import requests
from lxml import etree
import random
import smtplib
import requests
from email.mime.text import MIMEText
import re
import schedule
import time
#請求連接獲取文章內容
def deal_url(url):
header = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
proxyIPs = ['27.188.64.70','163.204.246.139','121.13.252.60','180.118.247.69','111.75.223.9','1.193.244.92']
proxyIP = random.choice(proxyIPs)
proxies = {
'http': proxyIP,
'https': proxyIP
}
html = requests.get(url,proxies).text
return html
#獲取更新的鏈接
def geturls():
url = 'https://language.chinadaily.com.cn/news_bilingual/'
html = deal_url(url)
alist = [i.start() for i in re.finditer('gy_box_txt2',html)] #查找到首頁中包含文章的鏈接起始位置
da = []
urllist = []
for i in range(len(alist)):
dd = html[alist[i]+68:alist[i]+150] #截取鏈接字段
if dd.count('"')>=2:
da = [i.start() for i in re.finditer('"',dd)] #定位到超鏈接引號數組位置
dd = dd[da[0]+1:da[1]] #提取兩個引號中的字段
urllist.append(dd) #取到的鏈接存入列表
return urllist
#獲取到正文和標題
def getContent():
#隨機獲取一篇文章
ran = random.randint(0,len(geturls())-1)
url = 'https:'+geturls()[0]
html = deal_url(url)
html0 = etree.HTML(html)
#分割出標題
title0 = ''
title = [i.start() for i in re.finditer('main_title1',html)]
tit = html[title[0]+12:title[0]+100]
a = tit.index('>')
b = tit.index('<')
title0 = tit[a+1:b]
#分割出正文部分
datas = ''
for i in range(1,100,1):
data = str(html0.xpath('//*[@id="Content"]/p['+str(i)+']/text()'))
ss = '\xa0|\[\]|\"]|\[\"|\[\'|\'\]' #正則匹配去除[''],[""]
data = re.sub(ss,'',data)
data = re.sub(r'\\xa0','',data) #正則匹配去除\xa0
data += '\n'*2
if data.find('來源:')<=0:
datas+=data
else:
break
return title0,datas
#郵箱發送
def sendtoEmail():
msg = MIMEText(getContent()[1]) #發送正文部分
msg["Subject"] = '【原視界文章推送】'+getContent()[0] #發送標題部分
msg["From"] = user
msg["To"] = to
try:
s = smtplib.SMTP_SSL("smtp.qq.com", 465)
s.login(user, pwd)
s.sendmail(user, to, msg.as_string())
s.quit()
print("發送成功!",time.ctime(time.time()))
except smtplib.SMTPException as e:
print ("發送出錯,%s" %e)
#設置定時發送
def send_mail_by_schedule():
schedule.every().day.at("12:00").do(sendtoEmail)
schedule.every().day.at("18:00").do(sendtoEmail)
while True:
schedule.run_pending()
time.sleep(1)
if __name__=="__main__":
print('=======訂閱系統=======')
global user,pwd,to
user = input('請輸入QQ號:')+'@qq.com'
pwd = input('請輸入郵箱授權碼:')
to = input('請輸入訂閱號:')+'@qq.com'
print('蟲子管家已爲您開啓訂閱,請勿退出此程序並保持網絡通暢,訂閱文章於每天12:00和18:00準時發送至您的郵箱!')
send_mail_by_schedule()