# -*- coding: utf-8 -*-
# Scrapy settings for demo1 project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
# http://doc.scrapy.org/en/latest/topics/settings.html
# http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
# http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
BOT_NAME = 'demo1' #Scrapy項目的名字,這將用來構造默認 User-Agent,同時也用來log,當您使用 startproject 命令創建項目時其也被自動賦值。
SPIDER_MODULES = ['demo1.spiders'] #Scrapy搜索spider的模塊列表 默認: [xxx.spiders]
NEWSPIDER_MODULE = 'demo1.spiders' #使用 genspider 命令創建新spider的模塊。默認: 'xxx.spiders'
#爬取的默認User-Agent,除非被覆蓋
#USER_AGENT = 'demo1 (+http://www.yourdomain.com)'
#如果啓用,Scrapy將會採用 robots.txt策略
ROBOTSTXT_OBEY = True
#Scrapy downloader 併發請求(concurrent requests)的最大值,默認: 16
#CONCURRENT_REQUESTS = 32
#爲同一網站的請求配置延遲(默認值:0)
# See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
#DOWNLOAD_DELAY = 3 下載器在下載同一個網站下一個頁面前需要等待的時間,該選項可以用來限制爬取速度,減輕服務器壓力。同時也支持小數:0.25 以秒爲單位
#下載延遲設置只有一個有效
#CONCURRENT_REQUESTS_PER_DOMAIN = 16 對單個網站進行併發請求的最大值。
#CONCURRENT_REQUESTS_PER_IP = 16 對單個IP進行併發請求的最大值。如果非0,則忽略 CONCURRENT_REQUESTS_PER_DOMAIN 設定,使用該設定。 也就是說,併發限制將針對IP,而不是網站。該設定也影響 DOWNLOAD_DELAY: 如果 CONCURRENT_REQUESTS_PER_IP 非0,下載延遲應用在IP而不是網站上。
#禁用Cookie(默認情況下啓用)
#COOKIES_ENABLED = False
#禁用Telnet控制檯(默認啓用)
#TELNETCONSOLE_ENABLED = False
#覆蓋默認請求標頭:
#DEFAULT_REQUEST_HEADERS = {
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
# 'Accept-Language': 'en',
#}
#啓用或禁用蜘蛛中間件
# See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
#SPIDER_MIDDLEWARES = {
# 'demo1.middlewares.Demo1SpiderMiddleware': 543,
#}
#啓用或禁用下載器中間件
# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
#DOWNLOADER_MIDDLEWARES = {
# 'demo1.middlewares.MyCustomDownloaderMiddleware': 543,
#}
#啓用或禁用擴展程序
# See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
#EXTENSIONS = {
# 'scrapy.extensions.telnet.TelnetConsole': None,
#}
#配置項目管道
# See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
#ITEM_PIPELINES = {
# 'demo1.pipelines.Demo1Pipeline': 300,
#}
#啓用和配置AutoThrottle擴展(默認情況下禁用)
# See http://doc.scrapy.org/en/latest/topics/autothrottle.html
#AUTOTHROTTLE_ENABLED = True
#初始下載延遲
#AUTOTHROTTLE_START_DELAY = 5
#在高延遲的情況下設置的最大下載延遲
#AUTOTHROTTLE_MAX_DELAY = 60
#Scrapy請求的平均數量應該並行發送每個遠程服務器
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
#啓用顯示所收到的每個響應的調節統計信息:
#AUTOTHROTTLE_DEBUG = False
#啓用和配置HTTP緩存(默認情況下禁用)
# See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
#HTTPCACHE_ENABLED = True
#HTTPCACHE_EXPIRATION_SECS = 0
#HTTPCACHE_DIR = 'httpcache'
#HTTPCACHE_IGNORE_HTTP_CODES = []
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
參考資料: