代理限制問題,需要自己搭建代理池,否則效果不明顯。
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@Author : {Jack Zhao}
@Time : 2019/9/3 17:46
@Contact : {[email protected]}
@Desc :
'''
# !/usr/bin/env python
# -*- coding:utf-8 -*-
import requests
from lxml import etree
import time
def auto_click(url,num=1):
while num:
req_headers={
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36'
}
# 獲取最後一頁的鏈接
resp=requests.get(url,headers=req_headers)
if resp.status_code==requests.codes.ok:
# 構建所有頁面的鏈接
base_page_link='http://blog.csdn.net/weixin_40539952/article/list/'
for i in range(2):
real_page_link=base_page_link+str(i)+'?'
# 提取本頁所有文章鏈接
resp=requests.get(real_page_link,headers=req_headers)
if resp.status_code == requests.codes.ok:
html = etree.HTML(resp.text)
article_links=html.xpath('//div[@class="article-list"]//h4/a/@href')
# 訪問每一篇文章,模擬點擊
for article_link in article_links:
real_article_link= article_link
requests.get(real_article_link,headers=req_headers)
print('正在第 [-{0}] 次點擊 {1}'.format(num,real_article_link))
num -= 1
time.sleep(2)
if __name__ == '__main__':
num=500
url='http://blog.csdn.net/weixin_40539952'
auto_click(url,num)
多線程版本很簡單,如下
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@Author : {Jack Zhao}
@Time : 2020/05/08 17:46
@Contact : {[email protected]}
@Desc : 增加多線程功能
'''
# !/usr/bin/env python
# -*- coding:utf-8 -*-
import requests
import time
from lxml import etree
from threading import Thread
def auto_click(url="http://blog.csdn.net/weixin_40539952",num=500):
while num:
req_headers={
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36'
}
# 獲取最後一頁的鏈接
resp=requests.get(url,headers=req_headers)
if resp.status_code==requests.codes.ok:
# 構建所有頁面的鏈接
base_page_link='http://blog.csdn.net/weixin_40539952/article/list/'
for i in range(4):
real_page_link=base_page_link+str(i)+'?'
# 提取本頁所有文章鏈接
resp=requests.get(real_page_link,headers=req_headers)
if resp.status_code == requests.codes.ok:
html = etree.HTML(resp.text)
article_links=html.xpath('//div[@class="article-list"]//h4/a/@href')
# 訪問每一篇文章,模擬點擊
for article_link in article_links:
real_article_link= article_link
print('正在第 [-{0}] 次點擊 {1}'.format(num,real_article_link))
try:
requests.get(real_article_link,headers=req_headers)
print('OK!')
except:
print("wrong!")
num -= 1
time.sleep(2)
if __name__=='__main__':
t_list = []
for i in range(4):
t = Thread(target = auto_click)
t_list.append(t)
t.start() #啓動進程
for t in t_list:
t.join() #回收進程