GuozhongCrawler的分佈式爬蟲還在開發當中。GuozhongCrawler實現的基於redis的隊列,提供大家寫其他分佈式爬蟲的參考。
package com.guozhong.queue;
import com.guozhong.request.BasicRequest;
/**
* 線程安全的可阻塞式隊列接口
* @author 郭鍾
* @QQ羣 202568714
*
*/
public interface BlockingRequestQueue {
/**
* 檢索並移除此隊列的頭,如果此隊列爲空,則返回 null。
* @return
*/
public BasicRequest poll();
/**
* 向隊列中添加指定的元素。
* @param e
* @return
*/
public boolean add(BasicRequest e);
/**
* 檢索並移除此隊列的頭部,如果此隊列不存在任何元素,則一直等待。
* @return
* @throws InterruptedException
*/
public BasicRequest take() throws InterruptedException ;
/**
* 檢索,但是不移除此隊列的頭,如果此隊列爲空,則返回 null。
*/
public BasicRequest peek();
/**
* 檢索,但是不移除此隊列的頭。 此方法與 peek 方法的惟一不同是,如果此隊列爲空,它會拋出一個異常。
*/
public BasicRequest element();
/**
* 從此隊列移除指定元素的單個實例(如果存在)。
* @return
*/
public boolean remove(BasicRequest e);
/**
* 檢索並移除此隊列的頭。此方法與 poll 方法的不同在於,如果此隊列爲空,它會拋出一個異常。 拋出: NoSuchElementException
* - 如果此隊列爲空。
*/
public BasicRequest remove();
/**
* 返回隊列中的元素個數。
*/
public int size();
/**
* 返回隊列中的是否爲空
*/
public boolean isEmpty();
/**
* 清空隊列所有元素
*/
public void clear();
}
實現類如下:
package com.guozhong.queue;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Queue;
import java.util.Random;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.PriorityBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.regex.Pattern;
import javax.sound.midi.VoiceStatus;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.JedisPoolConfig;
import com.guozhong.component.BinaryProcessor;
import com.guozhong.component.PageProcessor;
import com.guozhong.component.PageScript;
import com.guozhong.model.Proccessable;
import com.guozhong.page.OkPage;
import com.guozhong.page.Page;
import com.guozhong.request.BasicRequest;
import com.guozhong.request.BinaryRequest;
import com.guozhong.request.PageRequest;
import com.guozhong.request.StartContext;
/**
* 優先級隊列
*
* @author Administrator
*
*/
public final class RedisRequestBlockingQueue implements BlockingRequestQueue,Serializable {
/**
*
*/
private static final long serialVersionUID = 1L;
private JedisPool pool = null;
private byte[] queue;
/**
* 給定JedisPoolConfig初始化一個隊列
* @param host
* @param port
* @param config
* @param queue
*/
public RedisRequestBlockingQueue(String host, int port, JedisPoolConfig config, String queue) {
pool = new JedisPool(config, host, port, 15000);
try {
this.queue = queue.getBytes("utf-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
/**
* 基於默認配置初始化一個隊列
* @param host
* @param port
* @param queue
*/
public RedisRequestBlockingQueue(String host, int port, String queue) {
JedisPoolConfig config = new JedisPoolConfig();
// 連接耗盡時是否阻塞, false報異常,ture阻塞直到超時, 默認true
config.setBlockWhenExhausted(true);
// 設置的逐出策略類名, 默認DefaultEvictionPolicy(當連接超過最大空閒時間,或連接數超過最大空閒連接數)
config.setEvictionPolicyClassName("org.apache.commons.pool2.impl.DefaultEvictionPolicy");
// 是否啓用pool的jmx管理功能, 默認true
config.setJmxEnabled(true);
// MBean ObjectName = new
// ObjectName("org.apache.commons.pool2:type=GenericObjectPool,name=" +
// "pool" + i); 默 認爲"pool", JMX不熟,具體不知道是幹啥的...默認就好.
config.setJmxNamePrefix("pool");
// 是否啓用後進先出, 默認true
config.setLifo(true);
// 最大空閒連接數, 默認8個
config.setMaxIdle(100);
// 最大連接數, 默認8個
config.setMaxTotal(300);
// 獲取連接時的最大等待毫秒數(如果設置爲阻塞時BlockWhenExhausted),如果超時就拋異常, 小於零:阻塞不確定的時間,
// 默認-1
config.setMaxWaitMillis(10000);
// 逐出連接的最小空閒時間 默認1800000毫秒(30分鐘)
config.setMinEvictableIdleTimeMillis(1800000);
// 最小空閒連接數, 默認0
config.setMinIdle(20);
// 每次逐出檢查時 逐出的最大數目 如果爲負數就是 : 1/abs(n), 默認3
config.setNumTestsPerEvictionRun(3);
// 對象空閒多久後逐出, 當空閒時間>該值 且 空閒連接>最大空閒數
// 時直接逐出,不再根據MinEvictableIdleTimeMillis判斷 (默認逐出策略)
config.setSoftMinEvictableIdleTimeMillis(1800000);
// 在獲取連接的時候檢查有效性, 默認false
config.setTestOnBorrow(true);
// 在空閒時檢查有效性, 默認false
config.setTestWhileIdle(true);
// 逐出掃描的時間間隔(毫秒) 如果爲負數,則不運行逐出線程, 默認-1
config.setTimeBetweenEvictionRunsMillis(20);
pool = new JedisPool(config, host, port, 15000);
try {
this.queue = queue.getBytes("utf-8");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
@Override
public BasicRequest remove() {
Long leng = new OperationJedis<Long>() {
@Override
protected Long operation(Jedis jedis) throws Exception {
return jedis.llen(queue);
}
}.exe();
if (leng == 0) {
throw new NoSuchElementException("隊列長度爲0");
} else {
return poll();
}
}
@Override
public BasicRequest poll() {
BasicRequest basicRequest = null;
byte[] data = new OperationJedis<byte[]>() {
@Override
protected byte[] operation(Jedis jedis) throws Exception {
return jedis.rpop(queue);
}
}.exe();
basicRequest = byteToObject(data);
return basicRequest;
}
@Override
public BasicRequest element() {
BasicRequest basicRequest = null;
byte[] data = new OperationJedis<byte[]>() {
@Override
protected byte[] operation(Jedis jedis) throws Exception {
byte[] data = jedis.rpop(queue);
if (data != null) {
jedis.rpush(queue, data);
}
return data;
}
}.exe();
if (data == null) {
throw new NoSuchElementException("隊列長度爲0");
}
basicRequest = byteToObject(data);
return basicRequest;
}
@Override
public BasicRequest peek() {
BasicRequest basicRequest = null;
byte[] data = new OperationJedis<byte[]>() {
@Override
protected byte[] operation(Jedis jedis) throws Exception {
byte[] data = jedis.rpop(queue);
if (data != null) {
jedis.rpush(queue, data);
}
return data;
}
}.exe();
basicRequest = byteToObject(data);
return basicRequest;
}
/**
* 返回隊列中的元素個數。
*/
@Override
public int size() {
long size = new OperationJedis<Long>() {
@Override
protected Long operation(Jedis jedis) throws Exception {
return jedis.llen(queue);
}
}.exe();
return (int) size;
}
@Override
public boolean isEmpty() {
boolean isEmpty = new OperationJedis<Boolean>() {
@Override
protected Boolean operation(Jedis jedis) throws Exception {
return jedis.llen(queue) == 0;
}
}.exe();
return isEmpty;
}
@Override
public void clear() {
new OperationJedis<Void>() {
@Override
protected Void operation(Jedis jedis) throws Exception {
jedis.del(queue);
return null;
}
}.exe();
}
@Override
public boolean add(final BasicRequest e) {
new OperationJedis<Void>() {
@Override
protected Void operation(Jedis jedis) throws Exception {
byte[] data = objectToByte(e);
jedis.lpush(queue, data);
return null;
}
}.exe();
return true;
}
public BasicRequest take() throws InterruptedException {
BasicRequest basicRequest = null;
byte[] data = new OperationJedis<byte[]>() {
@Override
protected byte[] operation(Jedis jedis) throws Exception {
byte[] data = null;
while(true){
data = jedis.rpop(queue);
if(data != null){
break;
}
Thread.sleep(100);
}
return data;
}
}.exe();
basicRequest = byteToObject(data);
return basicRequest;
}
@Override
public boolean remove(final BasicRequest o) {
new OperationJedis<Void>() {
@Override
protected Void operation(Jedis jedis) throws Exception {
if(o instanceof BasicRequest){
byte[] data = objectToByte((BasicRequest) o);
jedis.lrem(queue, 0, data);
}
return null;
}
}.exe();
return true;
}
public byte[] objectToByte(BasicRequest obj) {
if (obj == null) {
return null;
}
byte[] bytes = null;
ByteArrayOutputStream bo = new ByteArrayOutputStream();
ObjectOutputStream oo = null;
try {
oo = new ObjectOutputStream(bo);
oo.writeObject(obj);
bytes = bo.toByteArray();
} catch (Exception e) {
e.printStackTrace();
} finally {
if (oo != null) {
try {
oo.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (bo != null) {
try {
bo.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return bytes;
}
public BasicRequest byteToObject(byte[] bytes) {
if (bytes == null) {
return null;
}
Serializable obj = null;
ByteArrayInputStream bi = new ByteArrayInputStream(bytes);
ObjectInputStream oi = null;
try {
oi = new ObjectInputStream(bi);
obj = (Serializable) oi.readObject();
} catch (Exception e) {
e.printStackTrace();
} finally {
if (oi != null) {
try {
oi.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (bi != null) {
try {
bi.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return (BasicRequest) obj;
}
@SuppressWarnings("unused")
private abstract class OperationJedis<E> {
protected abstract E operation(Jedis jedis) throws Exception;
public final E exe() {
Jedis jedis = null;
E result = null;
try {
jedis = pool.getResource();
result = operation(jedis);
} catch (Exception e) {
e.printStackTrace();
if (jedis != null) {
pool.returnBrokenResource(jedis);
jedis = null;
}
} finally {
if (jedis != null) {
pool.returnResource(jedis);
}
}
return result;
}
}
}