本文利用Jsoup工具從網站中爬IP,然後動態改變本地IP進行遠程訪問。
主要工作類:
public class Test {
/**
* @param args
*/
public static void main(String[] args) {
// TODO Auto-generated method stub
parse();
}
public static void parse() {
// blogBody("");
List<String> list = null;
try {
list = getHtml();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
String path = "/Users/tianjia/Documents/article";
List<String> articles = FileUtil.getListFromFile(path);
ExecutorService executorService = Executors.newCachedThreadPool();
int len_article = articles.size();
for (int i = 0; i < len_article; i++) {
executorService.execute(new MyRun(articles.get(i), list));
}
}
private static List<String> getHtml() throws IOException {
Document doc = null;
try {
// doc = Jsoup.connect("http://www.baidu.com")
doc = Jsoup.connect("http://www.xicidaili.com/nt")
// .data("query", "Java")
.userAgent("Mozilla")
// .cookie("auth", "token")
// .timeout(3000)
.get();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
List<String> list = new ArrayList<String>();
Elements elements = doc.select("tr.odd");
int len = elements.size();
Element element = null;
for (int i = 0; i < len; i++) {
element = elements.get(i);
StringBuilder sBuilder = new StringBuilder(20);
sBuilder.append(element.child(1).text());
sBuilder.append(":");
sBuilder.append(element.child(2).text());
list.add(sBuilder.toString());
}
// System.out.println(doc.html());
doc = null;
elements.clear();
elements = null;
return list;
}
public static void visit(String ip, String url){
// prop.setProperty("http.proxyHost", "183.45.78.31");
// 設置http訪問要使用的代理服務器的端口
// prop.setProperty("http.proxyPort", "8080");
String[] r = ip.split(":");
System.getProperties().setProperty("http.proxyHost", r[0]);
System.getProperties().setProperty("http.proxyPort", r[1]);
try {
// doc = Jsoup.connect("http://www.baidu.com")
Jsoup.connect(url)
// .data("query", "Java")
.userAgent("Mozilla")
// .cookie("auth", "token")
// .timeout(3000)
.get();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
自定義線程類:
public class MyRun implements Runnable{
private List<String> list;
private String urlString;
public MyRun(String url,List<String> list) {
this.list = list;
this.urlString = url;
}
@Override
public void run() {
// TODO Auto-generated method stub
int len = list.size();
for (int i = 0; i < len; i++) {
Test.visit(list.get(i), urlString);
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
文件操作類:
public class FileUtil {
public static List<String> getListFromFile(String path){
List<String> list = new ArrayList<>();
String data = null;
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path)));
while((data = br.readLine())!=null)
{
System.out.println(data);
list.add(data);
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return list;
}
}