scrapy将爬取的python数据用jsonExceldatabase显示

原創

2020-06-21 21:01

# -*- coding: utf-8 -*-
import sqlite3
import json
import codecs
import xlwt
# Define your item pipelines here
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html

class RxfblogspiderPipeline(object):
def process_item(self, item, spider):
return item

# 用json的数据形式展示
# class JsonWritePipeline(object):

# def __init__(self)

# self.file = codecs.open('article.json', 'w+', encoding='utf-8')

#         # article.json json文件名     先写入一个[ 将数据用列表包起来
#         self.file.write('[')
#
#     def process_item(self, item, spider):
#         # 将item转化为字典类型
#         item = dict(item)
#         # 把字典转换为json字符串
#         json_str = json.dumps(item)+','
#         # 把字符串写入文件
#         self.file.write(json_str)
#         # self.file.write(',')
#         return item
#
#     def close_novel(self, spider):
#         # 0 文件起始位置 1 当前位置 2 文件末尾
#         self.file.seek(-1, 2)
#         self.file.truncate()
#         self.file.write(']')
#         self.file.close()
#
#
# class ExcelWriterPipeline(object):
#     # 1.初始化函数做写入数据之前的准备工作
#     def __init__(self):
#         self.workbook = xlwt.Workbook(encoding='utf-8')
#         self.sheet = self.workbook.add_sheet('精品微博')
#         self.sheet.write(0, 0, 'name')
#         self.sheet.write(0, 1, 'detail_href')
#         self.sheet.write(0, 2, 'title')
#         self.sheet.write(0, 3, 'introduce')
#         self.sheet.write(0, 4, 'time')
#         self.sheet.write(0, 5, 'number')
#         # 记录行号
#         self.count = 1
#
#     # 2.写入数据
#     def process_item(self, item, spider):
#         self.sheet.write(self.count, 0, item['name'])
#         self.sheet.write(self.count, 1, item['detail_href'])
#         self.sheet.write(self.count, 2, item['title'])
#         self.sheet.write(self.count, 3, item['introduce'])
#         self.sheet.write(self.count, 4, item['time'])
#         self.sheet.write(self.count, 5, item['number'])
#         # 让行号加1
#         self.count += 1
#         return item
#
#     # 3.文件保存
#     def close_spider(self, spider):
#         self.workbook.save('csdn精品微博.xls')

# 保存到sqlite3数据库

class SQLWriterPipeline(object):

    def __init__(self):
        self.connect = sqlite3.connect('weibo.db')
        # 获取游标
        self.cursor = self.connect.cursor()
        # 创建表
        try:
            sql = 'CREATE TABLE weibo (id INTEGER PRIMARY KEY, name TEXT, detail_href TEXT, title TEXT,introduce TEXT, time TEXT, number TEXT)'
            self.cursor.execute(sql)
        except Exception as e:
            print(e)

    def process_item(self, item, spider):
        # 根据item准备sql语句
        sql = "INSERT INTO weibo(name,detail_href,title,introduce,time,number)VALUES('%s','%s','%s','%s','%s','%s')"%(item['name'],item['detail_href'][0],item['title'],item['introduce'],item['time'],item['number'])

        # 执行sql
        self.cursor.execute(sql)
        # 提交
        self.connect.commit()
        return item

    def close_spider(self, spider):

        self.cursor.close()
        self.connect.close()

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

scrapy将爬取的python数据用jsonExceldatabase显示

DAPPER 事务 TRANSACTION

數據可視化seaborn

爬取json數據網站

scrapy將爬取的python數據用jsonExceldatabase顯示

scrapy圖片文件下載

uwsgi+nginx+Ubuntu部署Django項目（阿里雲）

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結