给女朋友推送睡前小故事

上一篇写了给女朋友推送天气的,但是未免太单调,来个推送睡前小故事

这次用的是这个网站http://www.tom61.com/ertongwenxue/shuiqiangushi/index_2.html,这个网站需要用一丢丢爬虫知识
首先,得到列表页所有故事的url,还有长度,也就是多少个故事

def get_list():
    headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36',
        }
    links = []
    for page in range(2,11):
        url='http://www.tom61.com/ertongwenxue/shuiqiangushi/index_{}.html'.format(str(page))
        rq=requests.get(url,headers=headers)
        print(url)
        print('*'*50)
        rq.encoding='utf8'
        data=rq.text
        soup=BeautifulSoup(data,'lxml')
        con_list=soup.find('dl',class_='txt_box').findAll('dd')
        for x in con_list:
            link_s=x.find('a')['href']
            link='http://www.tom61.com'+link_s
            links.append(link)
    len_links=len(links)
    print(len_links)
    return links,len_links

接下来是获取故事内容了,说下思路。首先我们不能发送重复的故事,这样的想解决的话,可以这样,发送一个故事,就把他的url存起来,下次运行先检查是否发送过了,我代码里的两个with open就是干这事的。
然后我们用了while循环,直到发送成功,return退出循环。然而我取得是第二页到第十页的故事,总有发完的时候,到时候一时不查,它一直循环也占资源不是,而且想着也觉得累。所以这时候那个len_links(故事个数)就派上用场了,循环所有故事总个数的次数,因为我用的random随机取,循环这么多次虽然不能肯定就没故事可发了,但是估计也没剩多少了。这时候让它break就行了,你女朋友跟你说昨天没给她发故事,到时候改下代码,跑第十一页往后的就行了

def get_massage(urls,len_links):
    num=0
    while True:
        num+=1
        if num==len_links:
            print('没资源了')
            break
        with open("gushi_url.txt", "r+", encoding="utf-8") as f:
            content = f.read()
            url_list = content[:-1].split(',')
            link=random.choice(urls)
            if link not in url_list:
                headers = {
                    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36',
                }
                rq = requests.get(link, headers=headers)
                print(link)
                rq.encoding = 'utf8'
                data = rq.text
                soup = BeautifulSoup(data, 'lxml')
                title=soup.find('div',class_='t_news').find('h1').get_text()
                print(title)
                content=soup.find('div',class_='t_news_txt').get_text()
                email_s(title,content)
                with open("gushi_url.txt", "a+", encoding="utf-8") as f:
                    f.write(link + ",")
                return False

接下来就是发送邮件了,这里不详细写了,上一篇推送天气已经写了

def email_s(title,content):
    # 输入Email地址和口令:
    from_addr = 。。@126.com'
    password = '。。'
    # 输入收件人地址:
    to_addr = '。。。。@qq.com'

    # 输入SMTP服务器地址:
    smtp_server = 'smtp.126.com'

    msg = MIMEText('宝贝儿,睡前小故事来喽。\n{}\n{}\n今天的故事讲完啦宝贝儿,快睡吧,晚安,开心每一天哦!'.format(str(title),str(content)), 'plain', 'utf-8')
    msg['From'] = Header('来自在乎你的人的问候', 'utf-8')
    msg['To'] = to_addr
    msg['Subject'] = Header('今天的故事是:'+title, 'utf-8')

    server = smtplib.SMTP(smtp_server, 25)  # SMTP协议默认端口是25
    server.set_debuglevel(1)
    server.login(from_addr, password)
    server.sendmail(from_addr, [to_addr], msg.as_string())
    server.quit()

建立job函数作为启动函数

def job():
        urls, len_links = get_list()
        get_massage(urls, len_links)

最后,完整代码在这

import requests
from bs4 import BeautifulSoup
from email.mime.text import MIMEText
from email.header import Header
import smtplib
import random
import time
from apscheduler.schedulers.blocking import BlockingScheduler
from datetime import datetime
def get_list():
    headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36',
        }
    links = []
    for page in range(2,11):
        url='http://www.tom61.com/ertongwenxue/shuiqiangushi/index_{}.html'.format(str(page))
        rq=requests.get(url,headers=headers)
        print(url)
        print('*'*50)
        rq.encoding='utf8'
        data=rq.text
        soup=BeautifulSoup(data,'lxml')
        con_list=soup.find('dl',class_='txt_box').findAll('dd')
        for x in con_list:
            link_s=x.find('a')['href']
            link='http://www.tom61.com'+link_s
            links.append(link)
    len_links=len(links)
    print(len_links)
    return links,len_links

def get_massage(urls,len_links):
    num=0
    while True:
        num+=1
        if num==len_links:
            print('没资源了')
            break
        with open("gushi_url.txt", "r+", encoding="utf-8") as f:
            content = f.read()
            url_list = content[:-1].split(',')
            link=random.choice(urls)
            if link not in url_list:
                headers = {
                    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36',
                }
                rq = requests.get(link, headers=headers)
                print(link)
                rq.encoding = 'utf8'
                data = rq.text
                soup = BeautifulSoup(data, 'lxml')
                title=soup.find('div',class_='t_news').find('h1').get_text()
                print(title)
                content=soup.find('div',class_='t_news_txt').get_text()
                # email_s(title,content)
                with open("gushi_url.txt", "a+", encoding="utf-8") as f:
                    f.write(link + ",")
                return False
def email_s(title,content):
    # 输入Email地址和口令:
    from_addr = 。。@126.com'
    password = '。。'
    # 输入收件人地址:
    to_addr = '。。。。@qq.com'

    # 输入SMTP服务器地址:
    smtp_server = 'smtp.126.com'

    msg = MIMEText('宝贝儿,睡前小故事来喽。\n{}\n{}\n今天的故事讲完啦宝贝儿,快睡吧,晚安,开心每一天哦!'.format(str(title),str(content)), 'plain', 'utf-8')
    msg['From'] = Header('来自在乎你的人的问候', 'utf-8')
    msg['To'] = to_addr
    msg['Subject'] = Header('今天的故事是:'+title, 'utf-8')

    server = smtplib.SMTP(smtp_server, 25)  # SMTP协议默认端口是25
    server.set_debuglevel(1)
    server.login(from_addr, password)
    server.sendmail(from_addr, [to_addr], msg.as_string())
    server.quit()
def job():
        urls, len_links = get_list()
        get_massage(urls, len_links)
if __name__=='__main__':
    # job()
    #定时任务,每天23:30运行程序,想设置其他运行时间的可以看我的另一篇博客
    scheduler = BlockingScheduler()
    scheduler.add_job(job, 'cron', hour=23, minute=30)
    scheduler.start()

到此,程序就完成了,欢迎提问。
最后老规矩,看一下运行效果吧
在这里插入图片描述

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章