统一在piplins里面实现
爬去的数据存在到文件中:
class ManscrapyPipeline(object): # 将数据写入文件中 def __init__(self): self.file=codecs.open('dataTest','wb',encoding='utf-8') def process_item(self, item, spider): line=json.dumps(dict(item),ensure_ascii=False)+'\n' self.file.write(line) return item
爬去的数据存在到数据库中:
class ManscrapyPipeline(object):
def __init__(self): self.Create = pymysql.connect(host='localhost', user='root', password='123456', db='crmdata',use_unicode=True) # 2.找到游标 self.Cc =self.Create.cursor() # # 3.执行数据库 # query_sql = 'select * from crm_customer' # print(Cc.execute()) def close_spider(self,spider): print('----------关闭数据库资源-----------') # 关闭游标 self.Cc.close() # 关闭连接 self.Create.close() def process_item(self, item, spider): # 想数据库中插入爬去的数据 self.Cc.execute("INSERT INTO crm_menu VALUES(null, %s, %s)",(item['name'],item['href'])) self.Create.commit()