scrapy將爬取的python數據用jsonExceldatabase顯示

原創

2020-06-21 21:01

# -*- coding: utf-8 -*-
import sqlite3
import json
import codecs
import xlwt
# Define your item pipelines here
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html

class RxfblogspiderPipeline(object):
def process_item(self, item, spider):
return item

# 用json的數據形式展示
# class JsonWritePipeline(object):

# def __init__(self)

# self.file = codecs.open('article.json', 'w+', encoding='utf-8')

#         # article.json json文件名     先寫入一個[ 將數據用列表包起來
#         self.file.write('[')
#
#     def process_item(self, item, spider):
#         # 將item轉化爲字典類型
#         item = dict(item)
#         # 把字典轉換爲json字符串
#         json_str = json.dumps(item)+','
#         # 把字符串寫入文件
#         self.file.write(json_str)
#         # self.file.write(',')
#         return item
#
#     def close_novel(self, spider):
#         # 0 文件起始位置 1 當前位置 2 文件末尾
#         self.file.seek(-1, 2)
#         self.file.truncate()
#         self.file.write(']')
#         self.file.close()
#
#
# class ExcelWriterPipeline(object):
#     # 1.初始化函數做寫入數據之前的準備工作
#     def __init__(self):
#         self.workbook = xlwt.Workbook(encoding='utf-8')
#         self.sheet = self.workbook.add_sheet('精品微博')
#         self.sheet.write(0, 0, 'name')
#         self.sheet.write(0, 1, 'detail_href')
#         self.sheet.write(0, 2, 'title')
#         self.sheet.write(0, 3, 'introduce')
#         self.sheet.write(0, 4, 'time')
#         self.sheet.write(0, 5, 'number')
#         # 記錄行號
#         self.count = 1
#
#     # 2.寫入數據
#     def process_item(self, item, spider):
#         self.sheet.write(self.count, 0, item['name'])
#         self.sheet.write(self.count, 1, item['detail_href'])
#         self.sheet.write(self.count, 2, item['title'])
#         self.sheet.write(self.count, 3, item['introduce'])
#         self.sheet.write(self.count, 4, item['time'])
#         self.sheet.write(self.count, 5, item['number'])
#         # 讓行號加1
#         self.count += 1
#         return item
#
#     # 3.文件保存
#     def close_spider(self, spider):
#         self.workbook.save('csdn精品微博.xls')

# 保存到sqlite3數據庫

class SQLWriterPipeline(object):

    def __init__(self):
        self.connect = sqlite3.connect('weibo.db')
        # 獲取遊標
        self.cursor = self.connect.cursor()
        # 創建表
        try:
            sql = 'CREATE TABLE weibo (id INTEGER PRIMARY KEY, name TEXT, detail_href TEXT, title TEXT,introduce TEXT, time TEXT, number TEXT)'
            self.cursor.execute(sql)
        except Exception as e:
            print(e)

    def process_item(self, item, spider):
        # 根據item準備sql語句
        sql = "INSERT INTO weibo(name,detail_href,title,introduce,time,number)VALUES('%s','%s','%s','%s','%s','%s')"%(item['name'],item['detail_href'][0],item['title'],item['introduce'],item['time'],item['number'])

        # 執行sql
        self.cursor.execute(sql)
        # 提交
        self.connect.commit()
        return item

    def close_spider(self, spider):

        self.cursor.close()
        self.connect.close()

發表評論

所有評論

還沒有人評論，想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.

scrapy將爬取的python數據用jsonExceldatabase顯示

自學編程兩個月，現在我月入 4 萬元

「實戰應用」如何用圖表控件LightningChart創建2D氣泡圖

百度安全多篇議題入選Blackhat Asia以硬技術發現“芯”問題

Google Chrome驅動程序 124.0.6367.62（正式版本）去哪下載？

數據可視化seaborn

爬取json數據網站

scrapy將爬取的python數據用jsonExceldatabase顯示

scrapy圖片文件下載

uwsgi+nginx+Ubuntu部署Django項目（阿里雲）

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結