前言
本文主要用於學習交流Java併發編程,錘鍊Java多線程使用的技能點
- 線程池
- 線程資源共享
- 多線程讀寫問題
使用IP代理方式,從代理商獲取IP,每天穩定增加訪問量2w(全部博客總量,而非單篇)左右。
此方式僅供學習研究,切勿實際使用!!!後果自負
實現思路
先通過博客列表頁面,獲得全部博客URL
開啓n個線程,每隔線程隔10s開始執行任務
獲取代理IP(200個),每個線程每隔60~120s隨機時間間隔代理訪問一個博客
如果你不停機,代理商不掛,此線程會刷到天荒地老!
此思路方法僅爲本人學習多線程,解決併發問題的模擬DEMO,不要真實使用到現實中,代碼提供僅供學習交流,希望大家有更好的思路可以提供!
還是要保障自己的博客質量高,纔是王道,喜歡的朋友點個讚唄~
使用攻略
- 初始化,執行StepOne(下面有源碼),獲取你所有的博客訪問地址
- 購買IP代理服務,我用的是大象代理
- 將訂單ID,按照代碼提示填寫
- 設置線程開啓數量
- 線程數量,電腦好點的自信點來個1000,大概一天能刷2w-4w,不建議設置太高size(50-2000)=訪問量(5k~50k)
- 默認300個,實測一天2W
- 開啓運行
具體代碼(實測可用)
import csdn.RefreshBlogThreadNew;
import java.util.HashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;
public class ExcuteLocal {
/**
* 使用指南:
* 1、先按照提示執行 StepOne 的Main方法,獲取你的全部博客Url(如果你可以手動粘貼複製你自己所有的博客地址也行)
* 2、把博客地址(已經拼接成java代碼),複製粘貼到本類getLocalBlogUrl ---》指定位置!
* 3、訪問http://www.daxiangdaili.com/ 購買(推薦一天9塊的),把訂單ID填充到下面proxyOrderId位置
* 4、點擊運行下面main方法,一天1w-2w訪問量
* @param args
*/
public static void main(String[] args) {
// ==========================================================
// http://www.daxiangdaili.com/ 訪問這個購買id,然後就能執行了
// 購買一天的就行,買了把訂單號寫這兒!!
// ==========================================================
String proxyOrderId = "55811xxxxx87931";
// ==========================================================
// 看你心情設置
// 線程數量,電腦好點的自信點來個1000,大概一天能刷2w-4w,不建議設置太高size(50-2000)=訪問量(5k~50k)
// ==========================================================
int threadSize = 300;
// 初始化需要刷的博客地址
HashMap<Integer, String> localBlogUrl = getLocalBlogUrl();
// 初始化對應博客計數map,前面是博客地址,後面是博客被訪問次數
HashMap<Integer, AtomicInteger> localBlogUrlCount = new HashMap<Integer, AtomicInteger>();
for (int i = 0; i < localBlogUrl.size(); i++) {
localBlogUrlCount.put(i, new AtomicInteger());
}
// 初始化總計數器,使用原子類,直接調用incr方法,防止線程間同時寫入,導致的ABA問題
AtomicInteger count = new AtomicInteger();
ExecutorService executorService = Executors.newFixedThreadPool(threadSize);
for (int i = 1; i <= threadSize; i++) {
executorService.execute(new Thread(new RefreshBlogThreadNew(i * 10000, localBlogUrl, localBlogUrlCount, count, proxyOrderId), "thread-refresh-" + i));
}
}
//獲取本地BlogUrl.txt文本中的博客地址,並裝入hashMap中,key=Integer,value=博客地址
public static HashMap<Integer, String> getLocalBlogUrl() {
HashMap<Integer, String> hashMap = new HashMap<Integer, String>();
int id = 1;
// ----------------------------指定位置
hashMap.put(id++, "https://blog.csdn.net/Mrkaizi/article/details/106178717");
hashMap.put(id++, "https://blog.csdn.net/Mrkaizi/article/details/106242794");
// ----------------------------指定位置
return hashMap;
}
}
package csdn;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
public class RefreshBlogThreadNew implements Runnable {
//本地博客地址文本中的文章數量
private int blogUrlSize = 0;
//本地博客地址文本裝入HashMap中
private static HashMap<Integer, String> LocalBlogUrl = null;
//本地博客地址訪問統計
private HashMap<Integer, AtomicInteger> LocalBlogUrlCount = null;
//訪問總量統計
private AtomicInteger count = null;
private int sleepSec = 0;
private String proxyOrderId = "";
public RefreshBlogThreadNew(int sleepSec, HashMap<Integer, String> localBlogUrl, HashMap<Integer, AtomicInteger> localBlogUrlCount, AtomicInteger count, String proxyOrderId) {
this.proxyOrderId = proxyOrderId;
this.LocalBlogUrl = localBlogUrl;
this.sleepSec = sleepSec;
this.blogUrlSize = LocalBlogUrl.size();
this.LocalBlogUrlCount = localBlogUrlCount;
this.count = count;
}
@Override
public void run() {
String threadName = Thread.currentThread().getName();
System.out.println(threadName + "----sleep" + (sleepSec));
try {
Thread.sleep(sleepSec);
System.out.println(threadName + "請求代理");
} catch (InterruptedException e) {
e.printStackTrace();
}
while (true) {
//比如你的訂單號是123456789,每次你想提取200個代理進行使用,就應該是
String url = "http://tvp.daxiangdaili.com/ip/?tid="+proxyOrderId+"&num=200&delay=5";
List<MyIp> ipList = getIp(url);
for (MyIp myIp : ipList) {
System.setProperty("http.maxRedirects", "50");
System.getProperties().setProperty("proxySet", "true");
System.getProperties().setProperty("http.proxyHost", myIp.getAddress());
System.getProperties().setProperty("http.proxyPort", myIp.getPort());
while (true) {
try {
int id = 0;
String urlStr = null;
while (StringUtils.isBlank(urlStr)) {
id = randomBlogUrl();
urlStr = LocalBlogUrl.get(id);
}
Document doc = Jsoup.connect(LocalBlogUrl.get(id))
.userAgent("Mozilla")
.cookie("auth", "token")
.timeout(3000)
.get();
if (doc != null) {
count.incrementAndGet();
LocalBlogUrlCount.get(id).incrementAndGet();
// LocalBlogUrlCount.put(id, LocalBlogUrlCount.get(id) + 1);
System.out.print("ID: " + id + "\tAddress: " + (LocalBlogUrl.get(id) + "\t成功刷新次數: " + count + "\t") + "Proxy: " + myIp.toString() + "\t");
}
} catch (IOException e) {
}
try {
sleepThread(randomClick());
} catch (InterruptedException e) {
e.printStackTrace();
}
show();
}
}
}
}
//訪問文章的隨機函數,用來模擬真實的訪問量操作,以免所有的文章訪問量都是一樣的,很明顯是刷的,此操作隨機訪問文章,製造訪問假象
public int randomBlogUrl() {
int id = new Random().nextInt(blogUrlSize);
return id;
}
//時間的隨機函數,用來模擬真實的訪問量操作,以防被博客後臺識別,模擬操作60-200秒內的隨機秒數,
public int randomClick() {
int time = (new Random().nextInt(200)) + 60;
return time;
}
//獲取在【大象代理】中購買的IP,裝入ArrayList<MyIp>中
public List<MyIp> getIp(String url) {
List<MyIp> ipList = null;
while (ipList == null) {
try {
//1.向ip代理地址發起get請求,拿到代理的ip
Document doc = Jsoup.connect(url)
.userAgent("Mozilla")
.cookie("auth", "token")
.timeout(3000)
.get();
System.out.println(doc.body().text());
//2,將得到的ip地址解析除字符串
String ipStr = doc.body().text().trim().toString();
System.out.println("當前使用ipStr----------" + ipStr);
//3.用正則表達式去切割所有的ip
String[] ips = ipStr.split("\\s+");
//4.循環遍歷得到的ip字符串,封裝成MyIp的bean
ipList = new ArrayList<MyIp>();
for (final String ip : ips) {
MyIp myIp = new MyIp();
String[] temp = ip.split(":");
myIp.setAddress(temp[0].trim());
myIp.setPort(temp[1].trim());
ipList.add(myIp);
}
} catch (IOException e) {
System.out.println("加載文檔出錯,等待5s後重試");
try {
Thread.sleep(5000);
} catch (InterruptedException e1) {
e1.printStackTrace();
}
}
}
return ipList;
}
//休眠進程,單位是分鐘,CSDN的規則好像是:每個IP訪問一個博客地址的時間間隔是5-15分鐘,計數一次
public void sleepThread(int s) throws InterruptedException {
long ms = s * 1000;
Thread.sleep(ms);
System.out.println("睡眠: " + s + "s");
}
//展示訪問統計總量
public void show() {
System.out.println("訪問量統計:");
for (int i = 0; i < LocalBlogUrlCount.size(); i++) {
System.out.print("博客【" + i + "】:" + LocalBlogUrlCount.get(i) + "次\t");
}
System.out.println();
System.out.println("總計:" + count + "次");
System.out.println();
}
}
package com.qyk;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class StepOne {
//======================start===========================
//1、將這個地方替換成你的博客id(點擊我的博客,如:https://blog.csdn.net/Mrkaizi,取Mrkaizi即可)
//2、將控制檯打印的,大片java語句,按照ExcuteLocal中提示放到相應位置
//======================end=============================
static String userId = "Mrkaizi";
static int random_num = 0;
public static void main(String urlstr[]) throws IOException, InterruptedException {
Set<String> urls = new HashSet<String>();
// ----------------------------------------------遍歷每一頁 獲取文章鏈接----------------------------------------------
final String homeUrl = "https://blog.csdn.net/" + userId + "/article/list/";// 後面加pageNum即可
int totalPage = 0;
InputStream is;
String pageStr;
StringBuilder curUrl = null;
for (int i = 1; i < 100; i++) {
Thread.sleep(1000);
System.out.println("finding page " + i);
curUrl = new StringBuilder(homeUrl);
curUrl.append(i);
System.out.println(curUrl);
is = doGet(curUrl.toString());
pageStr = inputStreamToString(is, "UTF-8");// 一整頁的html源碼
List<String> list = getMatherSubstrs(pageStr, "(?<=href=\")https://blog.csdn.net/" + userId + "/article/details/[0-9]{8,9}(?=\")");
urls.addAll(list);
if (pageStr.lastIndexOf("空空如也") != -1) {
System.out.println("No This Page!");
break;
} else {
System.out.println("Success~");
}
totalPage = i;
}
System.out.println("總頁數爲: " + totalPage);
// ---------------------------------------------------打印每個鏈接---------------------------------------------------
System.out.println("打印每個鏈接");
for (String s:urls) {
System.out.println("hashMap.put(id++, \""+s+"\");");
}
}
public static InputStream doGet(String urlstr) throws IOException {
URL url = new URL(urlstr);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
if (random_num++%2==0) {
conn.setRequestProperty("User-Agent",
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36");
}else {
conn.setRequestProperty("User-Agent",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36");
}
conn.setRequestProperty("Referer",
"https://blog.csdn.net");
InputStream inputStream = conn.getInputStream();
return inputStream;
}
public static String inputStreamToString(InputStream is, String charset) throws IOException {
byte[] bytes = new byte[1024];
int byteLength = 0;
StringBuffer sb = new StringBuffer();
while ((byteLength = is.read(bytes)) != -1) {
sb.append(new String(bytes, 0, byteLength, charset));
}
return sb.toString();
}
// 正則匹配
public static List<String> getMatherSubstrs(String str, String regex) {
List<String> list = new ArrayList<String>();
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(str);
while (m.find()) {
list.add(m.group());
}
return list;
}
}
項目需要Maven依賴如下
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.3</version>
</dependency>
喜歡的朋友點個讚唄,收個藏唄~