webmagic每次在爬取一个网页时都会将他丢到线程池中的线程去处理,线程池有相应的大小阀值,其中用到了ReentrantLock 重入锁机制控,在超出阀值后等待,防止过多的任务进入
将目标url(request)放入线程池中执行
final Request requestFinal = request;
threadPool.execute(new Runnable() {
@Override
public void run() {
try {
processRequest(requestFinal);
onSuccess(requestFinal);
} catch (Exception e) {
onError(requestFinal);
logger.error("process request " + requestFinal + " error", e);
} finally {
if (site.getHttpProxyPool()!=null && site.getHttpProxyPool().isEnable()) {
site.returnHttpProxyToPool((HttpHost) requestFinal.getExtra(Request.PROXY), (Integer) requestFinal
.getExtra(Request.STATUS_CODE));
}
pageCount.incrementAndGet();
signalNewUrl();
}
}
});
封装的线程池源代码
package us.codecraft.webmagic.thread;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
/**
* Thread pool for workers.<br><br>
* Use {@link java.util.concurrent.ExecutorService} as inner implement. <br><br>
* New feature: <br><br>
* 1. Block when thread pool is full to avoid poll many urls without process. <br><br>
* 2. Count of thread alive for monitor.
*
* @author [email protected]
* @since 0.5.0
*/
public class CountableThreadPool {
private int threadNum;
private AtomicInteger threadAlive = new AtomicInteger();
private ReentrantLock reentrantLock = new ReentrantLock();
private Condition condition = reentrantLock.newCondition();
public CountableThreadPool(int threadNum) {
this.threadNum = threadNum;
this.executorService = Executors.newFixedThreadPool(threadNum);
}
public CountableThreadPool(int threadNum, ExecutorService executorService) {
this.threadNum = threadNum;
this.executorService = executorService;
}
public void setExecutorService(ExecutorService executorService) {
this.executorService = executorService;
}
public int getThreadAlive() {
return threadAlive.get();
}
public int getThreadNum() {
return threadNum;
}
private ExecutorService executorService;
public void execute(final Runnable runnable) {
if (threadAlive.get() >= threadNum) {
try {
reentrantLock.lock();
while (threadAlive.get() >= threadNum) {
try {
condition.await();
} catch (InterruptedException e) {
}
}
} finally {
reentrantLock.unlock();
}
}
threadAlive.incrementAndGet();
executorService.execute(new Runnable() {
@Override
public void run() {
try {
runnable.run();
} finally {
try {
reentrantLock.lock();
threadAlive.decrementAndGet();
condition.signal();
} finally {
reentrantLock.unlock();
}
}
}
});
}
public boolean isShutdown() {
return executorService.isShutdown();
}
public void shutdown() {
executorService.shutdown();
}
}
初始化线程池
if (threadPool == null || threadPool.isShutdown()) {
if (executorService != null && !executorService.isShutdown()) {
threadPool = new CountableThreadPool(threadNum, executorService);
} else {
threadPool = new CountableThreadPool(threadNum);
}
}
--------------
spring mvc+tomcat源码分析视频 (复制链接在浏览器打开)