給女朋友推送睡前小故事

上一篇寫了給女朋友推送天氣的,但是未免太單調,來個推送睡前小故事

這次用的是這個網站http://www.tom61.com/ertongwenxue/shuiqiangushi/index_2.html,這個網站需要用一丟丟爬蟲知識
首先,得到列表頁所有故事的url,還有長度,也就是多少個故事

def get_list():
    headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36',
        }
    links = []
    for page in range(2,11):
        url='http://www.tom61.com/ertongwenxue/shuiqiangushi/index_{}.html'.format(str(page))
        rq=requests.get(url,headers=headers)
        print(url)
        print('*'*50)
        rq.encoding='utf8'
        data=rq.text
        soup=BeautifulSoup(data,'lxml')
        con_list=soup.find('dl',class_='txt_box').findAll('dd')
        for x in con_list:
            link_s=x.find('a')['href']
            link='http://www.tom61.com'+link_s
            links.append(link)
    len_links=len(links)
    print(len_links)
    return links,len_links

接下來是獲取故事內容了,說下思路。首先我們不能發送重複的故事,這樣的想解決的話,可以這樣,發送一個故事,就把他的url存起來,下次運行先檢查是否發送過了,我代碼裏的兩個with open就是幹這事的。
然後我們用了while循環,直到發送成功,return退出循環。然而我取得是第二頁到第十頁的故事,總有發完的時候,到時候一時不查,它一直循環也佔資源不是,而且想着也覺得累。所以這時候那個len_links(故事個數)就派上用場了,循環所有故事總個數的次數,因爲我用的random隨機取,循環這麼多次雖然不能肯定就沒故事可發了,但是估計也沒剩多少了。這時候讓它break就行了,你女朋友跟你說昨天沒給她發故事,到時候改下代碼,跑第十一頁往後的就行了

def get_massage(urls,len_links):
    num=0
    while True:
        num+=1
        if num==len_links:
            print('沒資源了')
            break
        with open("gushi_url.txt", "r+", encoding="utf-8") as f:
            content = f.read()
            url_list = content[:-1].split(',')
            link=random.choice(urls)
            if link not in url_list:
                headers = {
                    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36',
                }
                rq = requests.get(link, headers=headers)
                print(link)
                rq.encoding = 'utf8'
                data = rq.text
                soup = BeautifulSoup(data, 'lxml')
                title=soup.find('div',class_='t_news').find('h1').get_text()
                print(title)
                content=soup.find('div',class_='t_news_txt').get_text()
                email_s(title,content)
                with open("gushi_url.txt", "a+", encoding="utf-8") as f:
                    f.write(link + ",")
                return False

接下來就是發送郵件了,這裏不詳細寫了,上一篇推送天氣已經寫了

def email_s(title,content):
    # 輸入Email地址和口令:
    from_addr = 。。@126.com'
    password = '。。'
    # 輸入收件人地址:
    to_addr = '。。。。@qq.com'

    # 輸入SMTP服務器地址:
    smtp_server = 'smtp.126.com'

    msg = MIMEText('寶貝兒,睡前小故事來嘍。\n{}\n{}\n今天的故事講完啦寶貝兒,快睡吧,晚安,開心每一天哦!'.format(str(title),str(content)), 'plain', 'utf-8')
    msg['From'] = Header('來自在乎你的人的問候', 'utf-8')
    msg['To'] = to_addr
    msg['Subject'] = Header('今天的故事是:'+title, 'utf-8')

    server = smtplib.SMTP(smtp_server, 25)  # SMTP協議默認端口是25
    server.set_debuglevel(1)
    server.login(from_addr, password)
    server.sendmail(from_addr, [to_addr], msg.as_string())
    server.quit()

建立job函數作爲啓動函數

def job():
        urls, len_links = get_list()
        get_massage(urls, len_links)

最後,完整代碼在這

import requests
from bs4 import BeautifulSoup
from email.mime.text import MIMEText
from email.header import Header
import smtplib
import random
import time
from apscheduler.schedulers.blocking import BlockingScheduler
from datetime import datetime
def get_list():
    headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36',
        }
    links = []
    for page in range(2,11):
        url='http://www.tom61.com/ertongwenxue/shuiqiangushi/index_{}.html'.format(str(page))
        rq=requests.get(url,headers=headers)
        print(url)
        print('*'*50)
        rq.encoding='utf8'
        data=rq.text
        soup=BeautifulSoup(data,'lxml')
        con_list=soup.find('dl',class_='txt_box').findAll('dd')
        for x in con_list:
            link_s=x.find('a')['href']
            link='http://www.tom61.com'+link_s
            links.append(link)
    len_links=len(links)
    print(len_links)
    return links,len_links

def get_massage(urls,len_links):
    num=0
    while True:
        num+=1
        if num==len_links:
            print('沒資源了')
            break
        with open("gushi_url.txt", "r+", encoding="utf-8") as f:
            content = f.read()
            url_list = content[:-1].split(',')
            link=random.choice(urls)
            if link not in url_list:
                headers = {
                    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36',
                }
                rq = requests.get(link, headers=headers)
                print(link)
                rq.encoding = 'utf8'
                data = rq.text
                soup = BeautifulSoup(data, 'lxml')
                title=soup.find('div',class_='t_news').find('h1').get_text()
                print(title)
                content=soup.find('div',class_='t_news_txt').get_text()
                # email_s(title,content)
                with open("gushi_url.txt", "a+", encoding="utf-8") as f:
                    f.write(link + ",")
                return False
def email_s(title,content):
    # 輸入Email地址和口令:
    from_addr = 。。@126.com'
    password = '。。'
    # 輸入收件人地址:
    to_addr = '。。。。@qq.com'

    # 輸入SMTP服務器地址:
    smtp_server = 'smtp.126.com'

    msg = MIMEText('寶貝兒,睡前小故事來嘍。\n{}\n{}\n今天的故事講完啦寶貝兒,快睡吧,晚安,開心每一天哦!'.format(str(title),str(content)), 'plain', 'utf-8')
    msg['From'] = Header('來自在乎你的人的問候', 'utf-8')
    msg['To'] = to_addr
    msg['Subject'] = Header('今天的故事是:'+title, 'utf-8')

    server = smtplib.SMTP(smtp_server, 25)  # SMTP協議默認端口是25
    server.set_debuglevel(1)
    server.login(from_addr, password)
    server.sendmail(from_addr, [to_addr], msg.as_string())
    server.quit()
def job():
        urls, len_links = get_list()
        get_massage(urls, len_links)
if __name__=='__main__':
    # job()
    #定時任務,每天23:30運行程序,想設置其他運行時間的可以看我的另一篇博客
    scheduler = BlockingScheduler()
    scheduler.add_job(job, 'cron', hour=23, minute=30)
    scheduler.start()

到此,程序就完成了,歡迎提問。
最後老規矩,看一下運行效果吧
在這裏插入圖片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章