kafka(四):kafka javaAPI入庫程序

說明

  • 本博客每週五更新一次。
  • 本博客主要介紹java代碼實現kafka數據入庫的代碼實現,幷包含kerberos認證實現。kafka版本爲2.3.0

講解

maven導包

<dependency>  
    <groupId>org.apache.kafka</groupId>  
    <artifactId>kafka-clients</artifactId>  
    <version>2.3.0</version>  
</dependency>  

連接kafka

Properties props = new Properties();
props.put("acks", "all");  //保證所有副本接受到消息
props.put("bootstrap.servers", Config.ipList);  //可設置多個
props.put("key.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
props.put("retries", "2");
KafkaProducer<byte[], byte[]> produce= new KafkaProducer<byte[], byte[]>(props);

kerberos認證

  • kerberos是大數據平臺的安全認證策略,可在項目啓動時先一步完成。這裏介紹兩種實現方式。

方式一

  • 指定認證文件
//加載keberos配置文件
System.setProperty("java.security.krb5.conf", "/etc/krb5.conf"); 
//加載kerberos用戶文件
System.setProperty("java.security.auth.login.config", "/etc/kafka/conf/kafka_jaas.conf");

方式二

  • 某些時候,考慮到用戶切換,不同機器,有不同的用戶信息,每個都要通過配置文件設置,比較麻煩,考慮使用java的啓動的臨時文件功能(主要是炫技——微笑)。
//加載keberos配置文件
System.setProperty("java.security.krb5.conf", "/etc/krb5.conf");
KafkaUtil.configureJAAS(Config.tabFile, Config.principal);  //用戶和認證文件

/**
 * 生成jaas.conf臨時文件
 * @param keyTab  tab認證文件位置
 * @param principal 認證用戶
 */
public static void configureJAAS(String keyTab, String principal) {
  String JAAS_TEMPLATE =
            "KafkaClient {\n"
            + "com.sun.security.auth.module.Krb5LoginModule required\n" +
              "useKeyTab=true\n" +
              "keyTab=\"%1$s\"\n" +
              "principal=\"%2$s\";\n"
            + "};";
      String content = String.format(JAAS_TEMPLATE, keyTab, principal);
    
      File jaasConf = null;
      PrintWriter writer = null;
    try {
        jaasConf  = File.createTempFile("jaas", ".conf");
        writer = new PrintWriter(jaasConf);
        writer.println(content);
        
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        
          if (writer != null) {
              writer.close();
          }
          jaasConf.deleteOnExit();
    }
    System.setProperty("java.security.auth.login.config", jaasConf.getAbsolutePath());
    
}

應用

  • 實際線上使用時,考慮到數據傳輸效率和穩定性,要做以下優化。
    • 傳輸類爲線程類,線程池管理,增加傳輸效率。
    • 批量上傳數據。
    • 添加Callback處理機制,避免數據丟失。
  • 上傳線程類如下。
public class Performance extends Thread{
	private final static Logger log = LoggerFactory.getLogger(Performance.class);
	
	
	private List<ProducerRecord<byte[], byte[]>> recordList;

	public Performance(List<ProducerRecord<byte[], byte[]>> recordList) {
		this.recordList=recordList;
	}
	
	/**
     *入庫測試方法
     */
    public static void test() {
		 log.info("Kafka Tool Test");

	        try {
	            /* parse args */
	            String topicName ="test40";
	            /*總髮包數*/
	            long numRecords = 10000000000L;
	            /*包大小*/
	            int recordSize = 1500;
	            /*每次最多發送包數*/
	            int throughput = 10000000;
	            

	            Properties props = new Properties();
	            props.put("acks", "1");
	    	    props.put("bootstrap.servers","ip:6667,ip:6667");
	    	    props.put("sasl.kerberos.service.name", "kafka");
	    	    props.put("security.protocol", "SASL_PLAINTEXT");
	    	    
	            props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer");
	            props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, "org.apache.kafka.common.serialization.ByteArraySerializer");

	            
	            KafkaProducer<byte[], byte[]> producer = new KafkaProducer<byte[], byte[]>(props);
	            

	            /* 創建測試數據 */
	            byte[] payload = new byte[recordSize];
	            Random random = new Random(0);
	            for (int i = 0; i < payload.length; ++i)
	                payload[i] = (byte) (random.nextInt(26) + 65);
	            
	            
	            /*創建測試數據發送對象*/
	            ProducerRecord<byte[], byte[]> record = new ProducerRecord<byte[], byte[]>(topicName, payload);
	            
	            /*測試數據模型  包總數*/
	            Stats stats = new Stats(numRecords, 5000);
	            
	            /*啓動時間*/
	            long startMs = System.currentTimeMillis();
	            
	            /*幫助生成者發送流量類  每次最多發送包數  時間*/
	            ThroughputThrottler throttler = new ThroughputThrottler(throughput, startMs);
	            
	            for (int i = 0; i < numRecords; i++) {
	                long sendStartMs = System.currentTimeMillis();
	                Callback cb = stats.nextCompletion(sendStartMs, payload.length, stats,record.topic(),record.value());
	                producer.send(record, cb);

	                if (throttler.shouldThrottle(i, sendStartMs)) {
	                    throttler.throttle();
	                }
	            }

	            /* 結束任務 */
	            producer.close();
	            stats.printTotal();
	        } catch (Exception e) {
	            log.info("Test Error:"+e);
	        }

	}
	
    /**
     * 實際入庫方法
     */
	@Override
	public void run() {
//	   log.info("Start To Send:");
	   super.run();
	   KafkaUtil kafkaUtil=new KafkaUtil();
	   KafkaProducer<byte[], byte[]> produce=kafkaUtil.create();
	   
	   //總包數
	   long size=recordList.size();
//	   		size=10000000000L;
	   
	   /*每次最多發送包數*/
	   int throughput = 900000;
//	       throughput = 10000000;
	       
	       
		 /*測試數據模型  包總數*/
       Stats stats = new Stats(size, 5000);
       /*啓動時間*/
       long startMs = System.currentTimeMillis();
       
       /*幫助生成者發送流量類  每次最多發送包數  時間*/
       ThroughputThrottler throttler = new ThroughputThrottler(throughput, startMs);
       
       int i=0;
       for (ProducerRecord<byte[], byte[]> record:recordList) {
       	
           long sendStartMs = System.currentTimeMillis();
           //參數說明:發送數據時間  數據長度 數據模型類
           Callback cb = stats.nextCompletion(sendStartMs, record.value().length, stats,record.topic(),record.value());
           
           produce.send(record,cb);
           if (throttler.shouldThrottle(i, sendStartMs)) {
               throttler.throttle();
           }
           i++;
       }
       produce.close();
//       stats.printTotal();
//       log.info("End to Send");
       log.info("Finish Data To Send");
       LogModel.sendNum++;
	}
	
    private static class Stats {
        private long start;
        private long windowStart;
        private int[] latencies;
        private int sampling;
        private int iteration;
        private int index;
        private long count;
        private long bytes;
        private int maxLatency;
        private long totalLatency;
        private long windowCount;
        private int windowMaxLatency;
        private long windowTotalLatency;
        private long windowBytes;
        private long reportingInterval;

        public Stats(long numRecords, int reportingInterval) {
            this.start = System.currentTimeMillis();
            this.windowStart = System.currentTimeMillis();
            this.index = 0;
            this.iteration = 0;
            this.sampling = (int) (numRecords / Math.min(numRecords, 500000));
            this.latencies = new int[(int) (numRecords / this.sampling) + 1];
            this.index = 0;
            this.maxLatency = 0;
            this.totalLatency = 0;
            this.windowCount = 0;
            this.windowMaxLatency = 0;
            this.windowTotalLatency = 0;
            this.windowBytes = 0;
            this.totalLatency = 0;
            this.reportingInterval = reportingInterval;
        }

        public void record(int iter, int latency, int bytes, long time) {
            this.count++;
            this.bytes += bytes;
            this.totalLatency += latency;
            this.maxLatency = Math.max(this.maxLatency, latency);
            this.windowCount++;
            this.windowBytes += bytes;
            this.windowTotalLatency += latency;
            this.windowMaxLatency = Math.max(windowMaxLatency, latency);
            if (iter % this.sampling == 0) {
                this.latencies[index] = latency;
                this.index++;
            }
            /* maybe report the recent perf */
            if (time - windowStart >= reportingInterval) {
                printWindow();
                newWindow();
            }
        }

        public Callback nextCompletion(long start, int bytes, Stats stats,String topic,byte[] data) {
            Callback cb = new PerfCallback(this.iteration, start, bytes, stats,topic,data);
            this.iteration++;
            return cb;
        }

        /**
         * 傳輸效率反饋
         */
        public void printWindow() {
            long ellapsed = System.currentTimeMillis() - windowStart;
            double recsPerSec = 1000.0 * windowCount / (double) ellapsed;
            double mbPerSec = 1000.0 * this.windowBytes / (double) ellapsed / (1024.0 * 1024.0);
            System.out.printf("%d spend time,%d records sent, %.1f records/sec (%.2f MB/sec), %.1f ms avg latency, %.1f max latency.\n",
            		          ellapsed,
                              windowCount,
                              recsPerSec,
                              mbPerSec,
                              windowTotalLatency / (double) windowCount,
                              (double) windowMaxLatency);
        }

        public void newWindow() {
            this.windowStart = System.currentTimeMillis();
            this.windowCount = 0;
            this.windowMaxLatency = 0;
            this.windowTotalLatency = 0;
            this.windowBytes = 0;
        }

        /**
         * 傳輸效率
         */
        public void printTotal() {
            long elapsed = System.currentTimeMillis() - start;
            double recsPerSec = 1000.0 * count / (double) elapsed;
            double mbPerSec = 1000.0 * this.bytes / (double) elapsed / (1024.0 * 1024.0);
            int[] percs = percentiles(this.latencies, index, 0.5, 0.95, 0.99, 0.999);
            System.out.printf("%d spend time,%d records sent, %f records/sec (%.2f MB/sec), %.2f ms avg latency, %.2f ms max latency, %d ms 50th, %d ms 95th, %d ms 99th, %d ms 99.9th.\n",
                              elapsed,
            				  count,
                              recsPerSec,
                              mbPerSec,
                              totalLatency / (double) count,
                              (double) maxLatency,
                              percs[0],
                              percs[1],
                              percs[2],
                              percs[3]);
        }

        private static int[] percentiles(int[] latencies, int count, double... percentiles) {
            int size = Math.min(count, latencies.length);
            Arrays.sort(latencies, 0, size);
            int[] values = new int[percentiles.length];
            for (int i = 0; i < percentiles.length; i++) {
                int index = (int) (percentiles[i] * size);
                values[i] = latencies[index];
            }
            return values;
        }
    }

    private static final class PerfCallback implements Callback {
        private final long start;
        private final int iteration;
        private final int bytes;
        private final Stats stats;
        private final String topic;
        private final byte[] data;

        public PerfCallback(int iter, long start, int bytes, Stats stats,String topic,byte[] data) {
            this.start = start;
            this.stats = stats;
            this.iteration = iter;
            this.bytes = bytes;
            this.topic=topic;
            this.data=data;
        }

        public void onCompletion(RecordMetadata metadata, Exception exception) {
            long now = System.currentTimeMillis();
            int latency = (int) (now - start);
            this.stats.record(iteration, latency, bytes, now);
            if (exception != null){
            	ProducerRecord<byte[], byte[]> record=new ProducerRecord<byte[], byte[]>(topic,data);

                //將數據重新添加入數據隊列,二次上傳
            	ControlTask.recordList.add(record);
            	log.error("Send Error And Second To Send",exception);
            }
           
        }
    }

}
  • KafkaUtil.java
public class KafkaUtil {
//	private final static Logger log = LoggerFactory.getLogger(KafkaUtil.class);
	
	private KafkaProducer<byte[], byte[]> produce;

	/**
	 * 創建連接
	 * @return
	 */
	public KafkaProducer<byte[], byte[]> create(){
		Properties props = new Properties();
	    props.put("acks", "all");
	    props.put("bootstrap.servers", Config.ipList);
	    props.put("key.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
	    props.put("value.serializer", "org.apache.kafka.common.serialization.ByteArraySerializer");
//	    props.put(ProducerConfig.MAX_BLOCK_MS_CONFIG, 120000);  //增加等待時間
	    props.put("retries", "2");
	    
	    //kerbores安全認證
	    if(Config.kerberos==0){
	    	
	    	props.put("security.protocol", "SASL_PLAINTEXT");
	    	props.put("sasl.mechanism", "GSSAPI");
	    	props.put("sasl.kerberos.service.name", "kafka");
	    	
	    }
	    
	    produce = new KafkaProducer<byte[], byte[]>(props);
	    
	    return produce;
	}
	
	/**
	 * 發送數據
	 * @param record
	 * @param cb
	 */
	public void send(ProducerRecord<byte[], byte[]> record,Callback cb){
		produce.send(record,cb);
	}
	
	/**
	 * 關閉連接
	 * @param produce
	 */
	public void close(){
		produce.flush();
		produce.close();
	}
	
	/**
	 * 生成jaas.conf臨時文件
	 * @param keyTab  tab認證文件位置
	 * @param principal 認證用戶
	 */
    public static void configureJAAS(String keyTab, String principal) {
    	String JAAS_TEMPLATE =
                "KafkaClient {\n"
                + "com.sun.security.auth.module.Krb5LoginModule required\n" +
                  "useKeyTab=true\n" +
                  "keyTab=\"%1$s\"\n" +
                  "principal=\"%2$s\";\n"
                + "};";
         String content = String.format(JAAS_TEMPLATE, keyTab, principal);
        
         File jaasConf = null;
         PrintWriter writer = null;
        try {
            jaasConf  = File.createTempFile("jaas", ".conf");
            writer = new PrintWriter(jaasConf);
            writer.println(content);
            
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            
             if (writer != null) {
                  writer.close();
             }
             jaasConf.deleteOnExit();
        }
        System.setProperty("java.security.auth.login.config", jaasConf.getAbsolutePath());
    }
}

總結

  • 第一次做kafka入庫時,效率很差,最後反攻官方實例才確定了傳輸方案,但還是不理想,測試發現是java自身消息隊裏太慢,無法滿足現有需求,最後將隊列改爲Disruptor事件隊列,才滿足要求。
  • 認真學習,好好奮鬥,做自己喜歡做的事,努力活成自己喜歡的樣子。
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章