webmagic每次在爬取一個網頁時都會將他丟到線程池中的線程去處理,線程池有相應的大小閥值,其中用到了ReentrantLock 重入鎖機制控,在超出閥值後等待,防止過多的任務進入
將目標url(request)放入線程池中執行
final Request requestFinal = request;
threadPool.execute(new Runnable() {
@Override
public void run() {
try {
processRequest(requestFinal);
onSuccess(requestFinal);
} catch (Exception e) {
onError(requestFinal);
logger.error("process request " + requestFinal + " error", e);
} finally {
if (site.getHttpProxyPool()!=null && site.getHttpProxyPool().isEnable()) {
site.returnHttpProxyToPool((HttpHost) requestFinal.getExtra(Request.PROXY), (Integer) requestFinal
.getExtra(Request.STATUS_CODE));
}
pageCount.incrementAndGet();
signalNewUrl();
}
}
});
封裝的線程池源代碼
package us.codecraft.webmagic.thread;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
/**
* Thread pool for workers.<br><br>
* Use {@link java.util.concurrent.ExecutorService} as inner implement. <br><br>
* New feature: <br><br>
* 1. Block when thread pool is full to avoid poll many urls without process. <br><br>
* 2. Count of thread alive for monitor.
*
* @author [email protected]
* @since 0.5.0
*/
public class CountableThreadPool {
private int threadNum;
private AtomicInteger threadAlive = new AtomicInteger();
private ReentrantLock reentrantLock = new ReentrantLock();
private Condition condition = reentrantLock.newCondition();
public CountableThreadPool(int threadNum) {
this.threadNum = threadNum;
this.executorService = Executors.newFixedThreadPool(threadNum);
}
public CountableThreadPool(int threadNum, ExecutorService executorService) {
this.threadNum = threadNum;
this.executorService = executorService;
}
public void setExecutorService(ExecutorService executorService) {
this.executorService = executorService;
}
public int getThreadAlive() {
return threadAlive.get();
}
public int getThreadNum() {
return threadNum;
}
private ExecutorService executorService;
public void execute(final Runnable runnable) {
if (threadAlive.get() >= threadNum) {
try {
reentrantLock.lock();
while (threadAlive.get() >= threadNum) {
try {
condition.await();
} catch (InterruptedException e) {
}
}
} finally {
reentrantLock.unlock();
}
}
threadAlive.incrementAndGet();
executorService.execute(new Runnable() {
@Override
public void run() {
try {
runnable.run();
} finally {
try {
reentrantLock.lock();
threadAlive.decrementAndGet();
condition.signal();
} finally {
reentrantLock.unlock();
}
}
}
});
}
public boolean isShutdown() {
return executorService.isShutdown();
}
public void shutdown() {
executorService.shutdown();
}
}
初始化線程池
if (threadPool == null || threadPool.isShutdown()) {
if (executorService != null && !executorService.isShutdown()) {
threadPool = new CountableThreadPool(threadNum, executorService);
} else {
threadPool = new CountableThreadPool(threadNum);
}
}
--------------
spring mvc+tomcat源碼分析視頻 (複製鏈接在瀏覽器打開)