python 抓取"一個"網站文章信息放入數據庫

# coding:utf-8 import requests from bs4 import BeautifulSoup import json import time import datetime import pymysql import sys reload(sys) sys.setdefaultencoding('utf-8') # 獲取文章內容方法 def getartinfo( url ): page = requests.get(url).content soup = BeautifulSoup(page,'lxml') res={} res['curr'] = soup.find('div',class_="comilla-cerrar").string.strip() res['title'] = soup.find('h2',class_="articulo-titulo").string.strip() res['auchor'] = soup.find('p',class_="articulo-autor").string.strip() res['contents'] =soup.find('div',class_="articulo-contenido") res['add_time'] = (int)(time.time()) return res # 獲取問答內容方法 def getqueinfo( url ): page = requests.get(url).content soup = BeautifulSoup(page,'lxml') res={} res['title'] = soup.find('h4').string.strip() res['curr'] = soup.find('div',class_="cuestion-contenido").string.strip() res['auchor'] = soup.find('p',class_="cuestion-editor").string.strip() res['contents'] =soup.find_all('div',class_="cuestion-contenido")[1] res['add_time'] = (int)(time.time()) return res # 抓取“一個每日文章和問答” url = "http://wufazhuce.com/" page = requests.get(url).content soup = BeautifulSoup(page,'lxml') # 每日文章 art_list = soup.find_all("p", class_="one-articulo-titulo") art_url = art_list[0].a.get('href') artinfo = getartinfo(art_url) # 每日問答 que_list = soup.find_all("p", class_="one-cuestion-titulo") que_url = que_list[0].a.get('href') queinfo = getqueinfo(que_url) que_list = list(queinfo.values()) conn = pymysql.connect(host='localhost',port=3306,user='root',password='root',db='one',charset='utf8') cursor = conn.cursor() cursor.execute("INSERT INTO day_art(title,curr,author,contents,add_time)VALUES('{0}','{1}','{2}','{3}','{4}');".format(artinfo['title'],artinfo['curr'],artinfo['auchor'],artinfo['contents'],artinfo['add_time'])) cursor.execute("INSERT INTO day_art(title,curr,author,contents,add_time)VALUES('{0}','{1}','{2}','{3}','{4}');".format(queinfo['title'],queinfo['curr'],queinfo['auchor'],queinfo['contents'],queinfo['add_time'])) conn.commit() cursor.close() conn.close() print 'ok'
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章