統一在piplins裏面實現
爬去的數據存在到文件中:
class ManscrapyPipeline(object): # 將數據寫入文件中 def __init__(self): self.file=codecs.open('dataTest','wb',encoding='utf-8') def process_item(self, item, spider): line=json.dumps(dict(item),ensure_ascii=False)+'\n' self.file.write(line) return item
爬去的數據存在到數據庫中:
class ManscrapyPipeline(object):
def __init__(self): self.Create = pymysql.connect(host='localhost', user='root', password='123456', db='crmdata',use_unicode=True) # 2.找到遊標 self.Cc =self.Create.cursor() # # 3.執行數據庫 # query_sql = 'select * from crm_customer' # print(Cc.execute()) def close_spider(self,spider): print('----------關閉數據庫資源-----------') # 關閉遊標 self.Cc.close() # 關閉連接 self.Create.close() def process_item(self, item, spider): # 想數據庫中插入爬去的數據 self.Cc.execute("INSERT INTO crm_menu VALUES(null, %s, %s)",(item['name'],item['href'])) self.Create.commit()