Storm+kafka 開發

1、pom.xml

    <!--storm-->
    <dependency>
      <groupId>org.apache.storm</groupId>
      <artifactId>storm-core</artifactId>
      <version>1.1.1</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.storm</groupId>
      <artifactId>storm-kafka-client</artifactId>
      <version>1.1.1</version>
    </dependency>

    <!--kafka-->
    <dependency>
      <groupId>org.apache.kafka</groupId>
      <artifactId>kafka_2.11</artifactId>
      <version>RELEASE</version>
    </dependency>
    <dependency>
      <groupId>org.apache.kafka</groupId>
      <artifactId>kafka-clients</artifactId>
      <version>RELEASE</version>
    </dependency>

2、Spout


import com.fasterxml.jackson.databind.ObjectMapper;
import com.util.PropUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.storm.kafka.spout.Func;
import org.apache.storm.kafka.spout.KafkaSpoutConfig;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;

import java.io.IOException;
import java.util.List;

import static org.apache.storm.kafka.spout.KafkaSpoutConfig.FirstPollOffsetStrategy.LATEST;


@Slf4j
public class SpoutConfig {

    /**
     * kafka消息翻譯函數,record-->Test對象
     */
    private static Func<ConsumerRecord<String, String>, List<Object>> record = new Func<ConsumerRecord<String, String>, List<Object>>() {
        ObjectMapper objectMapper = new ObjectMapper();

        @Override
        public List<Object> apply(ConsumerRecord<String, String> record) {
            String message = record.value();
            Test test = null;
            try {
                test = objectMapper.readValue(message, Test.class);
            } catch (IOException e) {
                e.printStackTrace();
                log.error("test轉換錯誤:{}", e.getMessage());
            }
            return new Values(test);
        }
    };

    /**
     * kafka spout配置
     *
     * @return KafkaSpoutConfig
     */
    public static KafkaSpoutConfig<String, String> newKafkaSpoutConfig() {
        return KafkaSpoutConfig
                .builder(PropUtils.getProperty("kafka.servers"), PropUtils.getProperty("kafka.topic"))//設置bootstrapServers和topic
                .setProp(ConsumerConfig.GROUP_ID_CONFIG, "busSpoutGroup")//設置消費組
                .setProp(ConsumerConfig.MAX_POLL_INTERVAL_MS_CONFIG, 600000)//消費者最大心跳時間間隔
                // .setProp(ConsumerConfig.MAX_POLL_RECORDS_CONFIG, 200)
                .setRecordTranslator(record, new Fields("test"))//設置消息翻譯函數
                .setOffsetCommitPeriodMs(10000)//設置提交offset週期
                .setFirstPollOffsetStrategy(LATEST)//設置第一次拉取消息時offset位置
                // .setMaxUncommittedOffsets(500)//Defines the max number of polled offsets (records) that can be pending commit, before another poll can take place
                .build();
    }
}

PropUtils.java

import lombok.extern.slf4j.Slf4j;

import java.io.IOException;
import java.util.Properties;

@Slf4j
public class PropUtils {

    // 本地環境
    private static final String devMode = "development";
    // 正式環境
    // private static final String devMode = "production";
    // 測試環境
    // private static final String devMode = "test";


    private static Properties prop = new Properties();

    static {
        try {
            Properties kafka = new Properties();

            kafka.load(PropUtils.class.getClassLoader().getResourceAsStream("profile/" + devMode + "/kafka.properties"));

            prop.putAll(kafka);

        } catch (IOException e) {
            log.error("加載配置文件失敗!", e);
            System.exit(1);
        }
    }

    public static String getProperty(String p) {
        return prop.getProperty(p);
    }

    public static int getInt(String p) {
        return Integer.parseInt(prop.getProperty(p));
    }

    public static boolean getBoolean(String p) {
        return Boolean.parseBoolean(prop.getProperty(p));
    }

}

kafka.properties

kafka.servers=node1:6667,node2:6667,node3:6667
kafka.topic=test

3、Bolt

public class TestBolt1 extends BaseWindowedBolt
public class TestBolt2 extends BaseBasicBolt

4、創建拓撲

import lombok.extern.slf4j.Slf4j;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.kafka.spout.KafkaSpout;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseWindowedBolt;

import java.util.List;
import java.util.concurrent.TimeUnit;



@Slf4j
public class TopologyBuilder {
    static final String TOPOLOGY_NAME = "TEST_TOPOLOGY";

    public static void main(String[] args) throws Exception {
        Config config = new Config();

        config.setMessageTimeoutSecs(3600); //設置消息過期時間
        config.setNumAckers(0); //設置表示acker的併發數
        config.setNumWorkers(2); //表示整個topology將使用幾個worker

        TopologyBuilder topologyBuilder = new TopologyBuilder();
        StormTopology topology = topologyBuilder.buildTopology();

        if (args.length == 0) {
            //本地模式
            LocalCluster cluster = new LocalCluster();
            //提交topo
            cluster.submitTopology(TOPOLOGY_NAME, config, topology);
            log.info("topology submit...");
            //運行一段時間後關閉程序
            TimeUnit.HOURS.sleep(1);
            cluster.killTopology(TOPOLOGY_NAME);
            cluster.shutdown();
            log.info("topology shutdown...");
            System.exit(0);
        } else {
            //集羣模式
            config.put(Config.STORM_CLUSTER_MODE, "distributed");
            //提交topo
            StormSubmitter.submitTopology(args[0], config, topology);
        }
    }

    /**
     * 構建topology
     *
     * @return StormTopology
     */
    private StormTopology buildTopology() {
        TopologyBuilder builder = new TopologyBuilder();
        //設置spout
        builder.setSpout("KafkaSpout", new KafkaSpout<>(SpoutConfig.newKafkaSpoutConfig()), 3);
        //設置bolt
        //滾動窗口
        builder.setBolt("TestBolt1", new TestBolt1().withTumblingWindow(new BaseWindowedBolt.Duration(5, TimeUnit.MINUTES)), 1).localOrShuffleGrouping("KafkaSpout");
        //普通窗口
        builder.setBolt("TestBolt2", new TestBolt2(), 1).localOrShuffleGrouping("KafkaSpout");

        return builder.createTopology();

    }


}

 

遇到的問題記錄:

1、一個topolopy多個bolt都沒拋異常,spout有failed

解決:

conf.setMaxSpoutPending(100000);   //  這個設置一個spout task上面最多有多少個沒有處理(ack/fail)的tuple,防止tuple隊列過大, 只對可靠任務起作用
conf.setMessageTimeoutSecs(1000);//設置消息過期時間,默認是30s

或者

conf.setNumAckers(0); // 設置acker併發數,關閉Storm應答,可靠性有關

2、異常:org.apache.kafka.clients.consumer.CommitFailedException: Commit cannot be completed since the group has already rebalanced and assigned the partitions to another member. This means that the time between subsequent calls to poll() was longer than the configured max.poll.interval.ms, which typically implies that the poll loop is spending too much time message processing. You can address this either by increasing the session timeout or by reducing the maximum size of batches returned in poll() with max.poll.records.
解決:

a)調大max.poll.interval,ms,默認300000(300s)

b)調小max.poll.records,默認500

參考:Kafka消費異常處理

Storm1.1.1 對 0.10.x 版 Kafka之commit offsets

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章