聊聊storm的JoinBolt

dykun

2019-06-28

序

本文主要研究一下storm的JoinBolt

实例

@Test
    public void testJoinBolt() throws InvalidTopologyException, AuthorizationException, AlreadyAliveException {
        TopologyBuilder builder = new TopologyBuilder();
        builder.setSpout("uuid-spout", new RandomWordSpout(new String[]{"uuid", "timestamp"}), 1);
        builder.setSpout("word-spout", new RandomWordSpout(new String[]{"word", "timestamp"}), 1);

        JoinBolt joinBolt = new JoinBolt("uuid-spout", "timestamp")
                //from priorStream inner join newStream on newStream.field = priorStream.field1
                .join("word-spout", "timestamp", "uuid-spout")
                .select("uuid,word,timestamp")
                .withTumblingWindow(BaseWindowedBolt.Count.of(10));
        builder.setBolt("join", joinBolt,1)
                .fieldsGrouping("uuid-spout",new Fields("timestamp"))
                .fieldsGrouping("word-spout",new Fields("timestamp"));

        builder.setBolt("fileWriter",new FilePrinterBolt(),1).globalGrouping("join");
        SubmitHelper.submitRemote("windowTopology",builder.createTopology());
    }

JoinBolt

storm-2.0.0/storm-client/src/jvm/org/apache/storm/bolt/JoinBolt.java

public class JoinBolt extends BaseWindowedBolt {

    protected final Selector selectorType;
    // Map[StreamName -> JoinInfo]
    protected LinkedHashMap<String, JoinInfo> joinCriteria = new LinkedHashMap<>();
    protected FieldSelector[] outputFields;  // specified via bolt.select() ... used in declaring Output fields
    //    protected String[] dotSeparatedOutputFieldNames; // fieldNames in x.y.z format w/o stream name, used for naming output fields
    protected String outputStreamName;
    // Map[StreamName -> Map[Key -> List<Tuple>]  ]
    HashMap<String, HashMap<Object, ArrayList<Tuple>>> hashedInputs = new HashMap<>(); // holds remaining streams
    private OutputCollector collector;

    /**
     * Calls  JoinBolt(Selector.SOURCE, sourceId, fieldName)
     *
     * @param sourceId  Id of source component (spout/bolt) from which this bolt is receiving data
     * @param fieldName the field to use for joining the stream (x.y.z format)
     */
    public JoinBolt(String sourceId, String fieldName) {
        this(Selector.SOURCE, sourceId, fieldName);
    }


    /**
     * Introduces the first stream to start the join with. Equivalent SQL ... select .... from srcOrStreamId ...
     *
     * @param type          Specifies whether 'srcOrStreamId' refers to stream name/source component
     * @param srcOrStreamId name of stream OR source component
     * @param fieldName     the field to use for joining the stream (x.y.z format)
     */
    public JoinBolt(Selector type, String srcOrStreamId, String fieldName) {
        selectorType = type;

        joinCriteria.put(srcOrStreamId, new JoinInfo(new FieldSelector(srcOrStreamId, fieldName)));
    }

    /**
     * Optional. Allows naming the output stream of this bolt. If not specified, the emits will happen on 'default' stream.
     */
    public JoinBolt withOutputStream(String streamName) {
        this.outputStreamName = streamName;
        return this;
    }

    /**
     * Performs inner Join with the newStream. SQL    :   from priorStream inner join newStream on newStream.field = priorStream.field1 same
     * as:   new WindowedQueryBolt(priorStream,field1). join(newStream, field, priorStream);
     *
     * Note: priorStream must be previously joined. Valid ex:    new WindowedQueryBolt(s1,k1). join(s2,k2, s1). join(s3,k3, s2); Invalid ex:
     * new WindowedQueryBolt(s1,k1). join(s3,k3, s2). join(s2,k2, s1);
     *
     * @param newStream Either stream name or name of upstream component
     * @param field     the field on which to perform the join
     */
    public JoinBolt join(String newStream, String field, String priorStream) {
        return joinCommon(newStream, field, priorStream, JoinType.INNER);
    }

    /**
     * Performs left Join with the newStream. SQL    :   from stream1  left join stream2  on stream2.field = stream1.field1 same as:   new
     * WindowedQueryBolt(stream1, field1). leftJoin(stream2, field, stream1);
     *
     * Note: priorStream must be previously joined Valid ex:    new WindowedQueryBolt(s1,k1). leftJoin(s2,k2, s1). leftJoin(s3,k3, s2);
     * Invalid ex:  new WindowedQueryBolt(s1,k1). leftJoin(s3,k3, s2). leftJoin(s2,k2, s1);
     *
     * @param newStream Either a name of a stream or an upstream component
     * @param field     the field on which to perform the join
     */
    public JoinBolt leftJoin(String newStream, String field, String priorStream) {
        return joinCommon(newStream, field, priorStream, JoinType.LEFT);
    }

    private JoinBolt joinCommon(String newStream, String fieldDescriptor, String priorStream, JoinType joinType) {
        if (hashedInputs.containsKey(newStream)) {
            throw new IllegalArgumentException("'" + newStream + "' is already part of join. Cannot join with it more than once.");
        }
        hashedInputs.put(newStream, new HashMap<Object, ArrayList<Tuple>>());
        JoinInfo joinInfo = joinCriteria.get(priorStream);
        if (joinInfo == null) {
            throw new IllegalArgumentException("Stream '" + priorStream + "' was not previously declared");
        }

        FieldSelector field = new FieldSelector(newStream, fieldDescriptor);
        joinCriteria.put(newStream, new JoinInfo(field, priorStream, joinInfo, joinType));
        return this;
    }

    /**
     * Specify projection fields. i.e. Specifies the fields to include in the output. e.g: .select("field1, stream2:field2, field3") Nested
     * Key names are supported for nested types: e.g: .select("outerKey1.innerKey1, outerKey1.innerKey2, stream3:outerKey2.innerKey3)" Inner
     * types (non leaf) must be Map<> in order to support nested lookup using this dot notation This selected fields implicitly declare the
     * output fieldNames for the bolt based.
     *
     * @param commaSeparatedKeys
     * @return
     */
    public JoinBolt select(String commaSeparatedKeys) {
        String[] fieldNames = commaSeparatedKeys.split(",");

        outputFields = new FieldSelector[fieldNames.length];
        for (int i = 0; i < fieldNames.length; i++) {
            outputFields[i] = new FieldSelector(fieldNames[i]);
        }
        return this;
    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer declarer) {
        String[] outputFieldNames = new String[outputFields.length];
        for (int i = 0; i < outputFields.length; ++i) {
            outputFieldNames[i] = outputFields[i].getOutputName();
        }
        if (outputStreamName != null) {
            declarer.declareStream(outputStreamName, new Fields(outputFieldNames));
        } else {
            declarer.declare(new Fields(outputFieldNames));
        }
    }

    @Override
    public void prepare(Map<String, Object> topoConf, TopologyContext context, OutputCollector collector) {
        this.collector = collector;
        // initialize the hashedInputs data structure
        int i = 0;
        for (String stream : joinCriteria.keySet()) {
            if (i > 0) {
                hashedInputs.put(stream, new HashMap<Object, ArrayList<Tuple>>());
            }
            ++i;
        }
        if (outputFields == null) {
            throw new IllegalArgumentException("Must specify output fields via .select() method.");
        }
    }

    @Override
    public void execute(TupleWindow inputWindow) {
        // 1) Perform Join
        List<Tuple> currentWindow = inputWindow.get();
        JoinAccumulator joinResult = hashJoin(currentWindow);

        // 2) Emit results
        for (ResultRecord resultRecord : joinResult.getRecords()) {
            ArrayList<Object> outputTuple = resultRecord.getOutputFields();
            if (outputStreamName == null) {
                // explicit anchoring emits to corresponding input tuples only, as default window anchoring will anchor them to all
                // tuples in window
                collector.emit(resultRecord.tupleList, outputTuple);
            } else {
                // explicitly anchor emits to corresponding input tuples only, as default window anchoring will anchor them to all tuples
                // in window
                collector.emit(outputStreamName, resultRecord.tupleList, outputTuple);
            }
        }
    }

    //......
}

JoinBolt继承了BaseWindowedBolt，定义了Selector selectorType、LinkedHashMap<String, JoinInfo> joinCriteria、FieldSelector[] outputFields等属性，用于记录关联类型及关联关系
join、leftJoin方法用于设置join关联关系，最后都是调用joinCommon方法，关联关系使用JoinInfo对象，存储在joinCriteria中
select方法用于选择结果集的列，最后设置到outputFields，用于declareOutputFields
execute就是join的核心逻辑了，这里调用了hashJoin

JoinBolt.hashJoin

storm-2.0.0/storm-client/src/jvm/org/apache/storm/bolt/JoinBolt.java

protected JoinAccumulator hashJoin(List<Tuple> tuples) {
        clearHashedInputs();

        JoinAccumulator probe = new JoinAccumulator();

        // 1) Build phase - Segregate tuples in the Window into streams.
        //    First stream's tuples go into probe, rest into HashMaps in hashedInputs
        String firstStream = joinCriteria.keySet().iterator().next();
        for (Tuple tuple : tuples) {
            String streamId = getStreamSelector(tuple);
            if (!streamId.equals(firstStream)) {
                Object field = getJoinField(streamId, tuple);
                ArrayList<Tuple> recs = hashedInputs.get(streamId).get(field);
                if (recs == null) {
                    recs = new ArrayList<Tuple>();
                    hashedInputs.get(streamId).put(field, recs);
                }
                recs.add(tuple);

            } else {
                ResultRecord probeRecord = new ResultRecord(tuple, joinCriteria.size() == 1);
                probe.insert(probeRecord);  // first stream's data goes into the probe
            }
        }

        // 2) Join the streams in order of streamJoinOrder
        int i = 0;
        for (String streamName : joinCriteria.keySet()) {
            boolean finalJoin = (i == joinCriteria.size() - 1);
            if (i > 0) {
                probe = doJoin(probe, hashedInputs.get(streamName), joinCriteria.get(streamName), finalJoin);
            }
            ++i;
        }


        return probe;
    }

hashJoin方法先遍历一下tuples，把tuples分为两类，firstStream的数据存到JoinAccumulator probe中，其余的存到HashMap<String, HashMap<Object, ArrayList<Tuple>>> hashedInputs
之后对剩余的streamId，挨个遍历调用doJoin，把结果整合到JoinAccumulator probe

JoinAccumulator

storm-2.0.0/storm-client/src/jvm/org/apache/storm/bolt/JoinBolt.java

protected class JoinAccumulator {
        ArrayList<ResultRecord> records = new ArrayList<>();

        public void insert(ResultRecord tuple) {
            records.add(tuple);
        }

        public Collection<ResultRecord> getRecords() {
            return records;
        }
    }

JoinAccumulator就是一个ArrayList<ResultRecord>

ResultRecord

storm-2.0.0/storm-client/src/jvm/org/apache/storm/bolt/JoinBolt.java

// Join helper to concat fields to the record
    protected class ResultRecord {

        ArrayList<Tuple> tupleList = new ArrayList<>(); // contains one Tuple per Stream being joined
        ArrayList<Object> outFields = null; // refs to fields that will be part of output fields

        // 'generateOutputFields' enables us to avoid projection unless it is the final stream being joined
        public ResultRecord(Tuple tuple, boolean generateOutputFields) {
            tupleList.add(tuple);
            if (generateOutputFields) {
                outFields = doProjection(tupleList, outputFields);
            }
        }

        public ResultRecord(ResultRecord lhs, Tuple rhs, boolean generateOutputFields) {
            if (lhs != null) {
                tupleList.addAll(lhs.tupleList);
            }
            if (rhs != null) {
                tupleList.add(rhs);
            }
            if (generateOutputFields) {
                outFields = doProjection(tupleList, outputFields);
            }
        }

        public ArrayList<Object> getOutputFields() {
            return outFields;
        }


        // 'stream' cannot be null,
        public Object getField(FieldSelector fieldSelector) {
            for (Tuple tuple : tupleList) {
                Object result = lookupField(fieldSelector, tuple);
                if (result != null) {
                    return result;
                }
            }
            return null;
        }
    }

    // Performs projection on the tuples based on 'projectionFields'
    protected ArrayList<Object> doProjection(ArrayList<Tuple> tuples, FieldSelector[] projectionFields) {
        ArrayList<Object> result = new ArrayList<>(projectionFields.length);
        // Todo: optimize this computation... perhaps inner loop can be outside to avoid rescanning tuples
        for (int i = 0; i < projectionFields.length; i++) {
            boolean missingField = true;
            for (Tuple tuple : tuples) {
                Object field = lookupField(projectionFields[i], tuple);
                if (field != null) {
                    result.add(field);
                    missingField = false;
                    break;
                }
            }
            if (missingField) { // add a null for missing fields (usually in case of outer joins)
                result.add(null);
            }
        }
        return result;
    }

    // Extract the field from tuple. Field may be nested field (x.y.z)
    protected Object lookupField(FieldSelector fieldSelector, Tuple tuple) {

        // very stream name matches, it stream name was specified
        if (fieldSelector.streamName != null &&
            !fieldSelector.streamName.equalsIgnoreCase(getStreamSelector(tuple))) {
            return null;
        }

        Object curr = null;
        for (int i = 0; i < fieldSelector.field.length; i++) {
            if (i == 0) {
                if (tuple.contains(fieldSelector.field[i])) {
                    curr = tuple.getValueByField(fieldSelector.field[i]);
                } else {
                    return null;
                }
            } else {
                curr = ((Map) curr).get(fieldSelector.field[i]);
                if (curr == null) {
                    return null;
                }
            }
        }
        return curr;
    }

ResultRecord用于存储joined之后的数据
当joinCriteria.size() == 1或者finalJoin为true的时候，ResultRecord的generateOutputFields为true，会调用doProjection对结果集进行projection操作
当遍历joinCriteria调用doJoin的时候，遍历到最后一条记录时为true

JoinBolt.doJoin

storm-2.0.0/storm-client/src/jvm/org/apache/storm/bolt/JoinBolt.java

// Dispatches to the right join method (inner/left/right/outer) based on the joinInfo.joinType
    protected JoinAccumulator doJoin(JoinAccumulator probe, HashMap<Object, ArrayList<Tuple>> buildInput, JoinInfo joinInfo,
                                     boolean finalJoin) {
        final JoinType joinType = joinInfo.getJoinType();
        switch (joinType) {
            case INNER:
                return doInnerJoin(probe, buildInput, joinInfo, finalJoin);
            case LEFT:
                return doLeftJoin(probe, buildInput, joinInfo, finalJoin);
            case RIGHT:
            case OUTER:
            default:
                throw new RuntimeException("Unsupported join type : " + joinType.name());
        }
    }

doJoin封装了各种join类型的方法，目前仅仅实现了INNER以及LEFT，分别调用doInnerJoin、doLeftJoin方法

doInnerJoin

storm-2.0.0/storm-client/src/jvm/org/apache/storm/bolt/JoinBolt.java

// inner join - core implementation
    protected JoinAccumulator doInnerJoin(JoinAccumulator probe, Map<Object, ArrayList<Tuple>> buildInput, JoinInfo joinInfo,
                                          boolean finalJoin) {
        String[] probeKeyName = joinInfo.getOtherField();
        JoinAccumulator result = new JoinAccumulator();
        FieldSelector fieldSelector = new FieldSelector(joinInfo.other.getStreamName(), probeKeyName);
        for (ResultRecord rec : probe.getRecords()) {
            Object probeKey = rec.getField(fieldSelector);
            if (probeKey != null) {
                ArrayList<Tuple> matchingBuildRecs = buildInput.get(probeKey);
                if (matchingBuildRecs != null) {
                    for (Tuple matchingRec : matchingBuildRecs) {
                        ResultRecord mergedRecord = new ResultRecord(rec, matchingRec, finalJoin);
                        result.insert(mergedRecord);
                    }
                }
            }
        }
        return result;
    }

这里挨个对JoinAccumulator probe的records遍历，然后通过probeKey从buildInput寻找对应的records，如果有找到则进行合并

doLeftJoin

storm-2.0.0/storm-client/src/jvm/org/apache/storm/bolt/JoinBolt.java

// left join - core implementation
    protected JoinAccumulator doLeftJoin(JoinAccumulator probe, Map<Object, ArrayList<Tuple>> buildInput, JoinInfo joinInfo,
                                         boolean finalJoin) {
        String[] probeKeyName = joinInfo.getOtherField();
        JoinAccumulator result = new JoinAccumulator();
        FieldSelector fieldSelector = new FieldSelector(joinInfo.other.getStreamName(), probeKeyName);
        for (ResultRecord rec : probe.getRecords()) {
            Object probeKey = rec.getField(fieldSelector);
            if (probeKey != null) {
                ArrayList<Tuple> matchingBuildRecs = buildInput.get(probeKey); // ok if its return null
                if (matchingBuildRecs != null && !matchingBuildRecs.isEmpty()) {
                    for (Tuple matchingRec : matchingBuildRecs) {
                        ResultRecord mergedRecord = new ResultRecord(rec, matchingRec, finalJoin);
                        result.insert(mergedRecord);
                    }
                } else {
                    ResultRecord mergedRecord = new ResultRecord(rec, null, finalJoin);
                    result.insert(mergedRecord);
                }

            }
        }
        return result;
    }

left join与inner join的区别就在于没有找到匹配记录的话，仍旧保留左边的记录

小结

JoinBolt继承了BaseWindowedBolt，目前仅仅支持inner join及left join，而且要求join的字段与fieldsGrouping的字段相同
JoinBolt对于多个stream数据的合并，使用分治的方式实现，采用JoinAccumulator不断累加结果集，循环遍历调用doJoin来完成
由于JoinBolt是在内存进行操作，又需要匹配数据，需要消耗CPU及内存，有几个点需要注意一下：
- window的时间窗口不宜过大，否则内存堆积的数据过多，容易OOM，可根据情况调整时间窗口或者通过Config.TOPOLOGY_WORKER_MAX_HEAP_SIZE_MB设置woker的内存大小
- 采取slding window会造成数据重复join，因而需要使用withTumblingWindow
- 如果开启tuple处理超时，则要求Config.TOPOLOGY_MESSAGE_TIMEOUT_SECS大于windowLength + slidingInterval + 处理时间，避免还没有处理完就误判为超时重新replayed
- 由于windowedBolt会自动对tupleWindow的数据进行anchor，数据量过多anchor操作会给整个topology造成压力，如无必要可以关闭ack(设置Config.TOPOLOGY_ACKER_EXECUTORS为0)
- Config.TOPOLOGY_MAX_SPOUT_PENDING要设置的大一点，给window的join操作及后续操作足够的时间，在一定程度上避免spout发送tuple速度过快，下游bolt消费不过来
- 生产上Config.TOPOLOGY_DEBUG设置为false关闭debug日志，Config.TOPOLOGY_EVENTLOGGER_EXECUTORS设置为0关闭event logger