import sqlite3
import json
import codecs
import xlwt
# Define your item pipelines here
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
class RxfblogspiderPipeline(object):
def process_item(self, item, spider):
return item
# 用json的數據形式展示
# class JsonWritePipeline(object):
# def __init__(self)
# self.file = codecs.open('article.json', 'w+', encoding='utf-8')
# # article.json json文件名 先寫入一個[ 將數據用列表包起來# self.file.write('[')
#
# def process_item(self, item, spider):
# # 將item轉化爲字典類型
# item = dict(item)
# # 把字典轉換爲json字符串
# json_str = json.dumps(item)+','
# # 把字符串寫入文件
# self.file.write(json_str)
# # self.file.write(',')
# return item
#
# def close_novel(self, spider):
# # 0 文件起始位置 1 當前位置 2 文件末尾
# self.file.seek(-1, 2)
# self.file.truncate()
# self.file.write(']')
# self.file.close()
#
#
# class ExcelWriterPipeline(object):
# # 1.初始化函數做寫入數據之前的準備工作
# def __init__(self):
# self.workbook = xlwt.Workbook(encoding='utf-8')
# self.sheet = self.workbook.add_sheet('精品微博')
# self.sheet.write(0, 0, 'name')
# self.sheet.write(0, 1, 'detail_href')
# self.sheet.write(0, 2, 'title')
# self.sheet.write(0, 3, 'introduce')
# self.sheet.write(0, 4, 'time')
# self.sheet.write(0, 5, 'number')
# # 記錄行號
# self.count = 1
#
# # 2.寫入數據
# def process_item(self, item, spider):
# self.sheet.write(self.count, 0, item['name'])
# self.sheet.write(self.count, 1, item['detail_href'])
# self.sheet.write(self.count, 2, item['title'])
# self.sheet.write(self.count, 3, item['introduce'])
# self.sheet.write(self.count, 4, item['time'])
# self.sheet.write(self.count, 5, item['number'])
# # 讓行號加1
# self.count += 1
# return item
#
# # 3.文件保存
# def close_spider(self, spider):
# self.workbook.save('csdn精品微博.xls')
# 保存到sqlite3數據庫
class SQLWriterPipeline(object):
def __init__(self):
self.connect = sqlite3.connect('weibo.db')
# 獲取遊標
self.cursor = self.connect.cursor()
# 創建表
try:
sql = 'CREATE TABLE weibo (id INTEGER PRIMARY KEY, name TEXT, detail_href TEXT, title TEXT,introduce TEXT, time TEXT, number TEXT)'
self.cursor.execute(sql)
except Exception as e:
print(e)
def process_item(self, item, spider):
# 根據item準備sql語句
sql = "INSERT INTO weibo(name,detail_href,title,introduce,time,number)VALUES('%s','%s','%s','%s','%s','%s')"%(item['name'],item['detail_href'][0],item['title'],item['introduce'],item['time'],item['number'])
# 執行sql
self.cursor.execute(sql)
# 提交
self.connect.commit()
return item
def close_spider(self, spider):
self.cursor.close()
self.connect.close()