""" This module contains the default values for all settings used by Scrapy. For more information about these settings you can read the settings documentation in docs/topics/settings.rst Scrapy developers, if you add a setting here remember to: * add it in alphabetical order * group similar settings without leaving blank lines * add its documentation to the available settings documentation (docs/topics/settings.rst) """ import sys from importlib import import_module from os.path import join, abspath, dirname AJAXCRAWL_ENABLED = False ASYNCIO_EVENT_LOOP = None AUTOTHROTTLE_ENABLED = False AUTOTHROTTLE_DEBUG = False AUTOTHROTTLE_MAX_DELAY = 60.0 AUTOTHROTTLE_START_DELAY = 5.0 AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 BOT_NAME = 'scrapybot' CLOSESPIDER_TIMEOUT = 0 CLOSESPIDER_PAGECOUNT = 0 CLOSESPIDER_ITEMCOUNT = 0 CLOSESPIDER_ERRORCOUNT = 0 COMMANDS_MODULE = '' COMPRESSION_ENABLED = True CONCURRENT_ITEMS = 100 CONCURRENT_REQUESTS = 16 CONCURRENT_REQUESTS_PER_DOMAIN = 8 CONCURRENT_REQUESTS_PER_IP = 0 COOKIES_ENABLED = True COOKIES_DEBUG = False DEFAULT_ITEM_CLASS = 'scrapy.item.Item' DEFAULT_REQUEST_HEADERS = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en', } DEPTH_LIMIT = 0 DEPTH_STATS_VERBOSE = False DEPTH_PRIORITY = 0 DNSCACHE_ENABLED = True DNSCACHE_SIZE = 10000 DNS_RESOLVER = 'scrapy.resolver.CachingThreadedResolver' DNS_TIMEOUT = 60 DOWNLOAD_DELAY = 0 DOWNLOAD_HANDLERS = {} DOWNLOAD_HANDLERS_BASE = { 'data': 'scrapy.core.downloader.handlers.datauri.DataURIDownloadHandler', 'file': 'scrapy.core.downloader.handlers.file.FileDownloadHandler', 'http': 'scrapy.core.downloader.handlers.http.HTTPDownloadHandler', 'https': 'scrapy.core.downloader.handlers.http.HTTPDownloadHandler', 's3': 'scrapy.core.downloader.handlers.s3.S3DownloadHandler', 'ftp': 'scrapy.core.downloader.handlers.ftp.FTPDownloadHandler', } DOWNLOAD_TIMEOUT = 180 # 3mins DOWNLOAD_MAXSIZE = 1024 * 1024 * 1024 # 1024m DOWNLOAD_WARNSIZE = 32 * 1024 * 1024 # 32m DOWNLOAD_FAIL_ON_DATALOSS = True DOWNLOADER = 'scrapy.core.downloader.Downloader' DOWNLOADER_HTTPCLIENTFACTORY = 'scrapy.core.downloader.webclient.ScrapyHTTPClientFactory' DOWNLOADER_CLIENTCONTEXTFACTORY = 'scrapy.core.downloader.contextfactory.ScrapyClientContextFactory' DOWNLOADER_CLIENT_TLS_CIPHERS = 'DEFAULT' # Use highest TLS/SSL protocol version supported by the platform, also allowing negotiation: DOWNLOADER_CLIENT_TLS_METHOD = 'TLS' DOWNLOADER_CLIENT_TLS_VERBOSE_LOGGING = False DOWNLOADER_MIDDLEWARES = {} DOWNLOADER_MIDDLEWARES_BASE = { # Engine side 'scrapy.downloadermiddlewares.robotstxt.RobotsTxtMiddleware': 100, 'scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware': 300, 'scrapy.downloadermiddlewares.downloadtimeout.DownloadTimeoutMiddleware': 350, 'scrapy.downloadermiddlewares.defaultheaders.DefaultHeadersMiddleware': 400, 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': 500, 'scrapy.downloadermiddlewares.retry.RetryMiddleware': 550, 'scrapy.downloadermiddlewares.ajaxcrawl.AjaxCrawlMiddleware': 560, 'scrapy.downloadermiddlewares.redirect.MetaRefreshMiddleware': 580, 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 590, 'scrapy.downloadermiddlewares.redirect.RedirectMiddleware': 600, 'scrapy.downloadermiddlewares.cookies.CookiesMiddleware': 700, 'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware': 750, 'scrapy.downloadermiddlewares.stats.DownloaderStats': 850, 'scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware': 900, # Downloader side } DOWNLOADER_STATS = True DUPEFILTER_CLASS = 'scrapy.dupefilters.RFPDupeFilter' EDITOR = 'vi' if sys.platform == 'win32': EDITOR = '%s -m idlelib.idle' EXTENSIONS = {} EXTENSIONS_BASE = { 'scrapy.extensions.corestats.CoreStats': 0, 'scrapy.extensions.telnet.TelnetConsole': 0, 'scrapy.extensions.memusage.MemoryUsage': 0, 'scrapy.extensions.memdebug.MemoryDebugger': 0, 'scrapy.extensions.closespider.CloseSpider': 0, 'scrapy.extensions.feedexport.FeedExporter': 0, 'scrapy.extensions.logstats.LogStats': 0, 'scrapy.extensions.spiderstate.SpiderState': 0, 'scrapy.extensions.throttle.AutoThrottle': 0, } FEED_TEMPDIR = None FEEDS = {} FEED_URI_PARAMS = None # a function to extend uri arguments FEED_STORE_EMPTY = False FEED_EXPORT_ENCODING = None FEED_EXPORT_FIELDS = None FEED_STORAGES = {} FEED_STORAGES_BASE = { '': 'scrapy.extensions.feedexport.FileFeedStorage', 'file': 'scrapy.extensions.feedexport.FileFeedStorage', 'ftp': 'scrapy.extensions.feedexport.FTPFeedStorage', 'gs': 'scrapy.extensions.feedexport.GCSFeedStorage', 's3': 'scrapy.extensions.feedexport.S3FeedStorage', 'stdout': 'scrapy.extensions.feedexport.StdoutFeedStorage', } FEED_EXPORT_BATCH_ITEM_COUNT = 0 FEED_EXPORTERS = {} FEED_EXPORTERS_BASE = { 'json': 'scrapy.exporters.JsonItemExporter', 'jsonlines': 'scrapy.exporters.JsonLinesItemExporter', 'jl': 'scrapy.exporters.JsonLinesItemExporter', 'csv': 'scrapy.exporters.CsvItemExporter', 'xml': 'scrapy.exporters.XmlItemExporter', 'marshal': 'scrapy.exporters.MarshalItemExporter', 'pickle': 'scrapy.exporters.PickleItemExporter', } FEED_EXPORT_INDENT = 0 FEED_STORAGE_FTP_ACTIVE = False FEED_STORAGE_GCS_ACL = '' FEED_STORAGE_S3_ACL = '' FILES_STORE_S3_ACL = 'private' FILES_STORE_GCS_ACL = '' FTP_USER = 'anonymous' FTP_PASSWORD = 'guest' FTP_PASSIVE_MODE = True GCS_PROJECT_ID = None HTTPCACHE_ENABLED = False HTTPCACHE_DIR = 'httpcache' HTTPCACHE_IGNORE_MISSING = False HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' HTTPCACHE_EXPIRATION_SECS = 0 HTTPCACHE_ALWAYS_STORE = False HTTPCACHE_IGNORE_HTTP_CODES = [] HTTPCACHE_IGNORE_SCHEMES = ['file'] HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS = [] HTTPCACHE_DBM_MODULE = 'dbm' HTTPCACHE_POLICY = 'scrapy.extensions.httpcache.DummyPolicy' HTTPCACHE_GZIP = False HTTPPROXY_ENABLED = True HTTPPROXY_AUTH_ENCODING = 'latin-1' IMAGES_STORE_S3_ACL = 'private' IMAGES_STORE_GCS_ACL = '' ITEM_PROCESSOR = 'scrapy.pipelines.ItemPipelineManager' ITEM_PIPELINES = {} ITEM_PIPELINES_BASE = {} LOG_ENABLED = True LOG_ENCODING = 'utf-8' LOG_FORMATTER = 'scrapy.logformatter.LogFormatter' LOG_FORMAT = '%(asctime)s [%(name)s] %(levelname)s: %(message)s' LOG_DATEFORMAT = '%Y-%m-%d %H:%M:%S' LOG_STDOUT = False LOG_LEVEL = 'DEBUG' LOG_FILE = None LOG_FILE_APPEND = True LOG_SHORT_NAMES = False SCHEDULER_DEBUG = False LOGSTATS_INTERVAL = 60.0 MAIL_HOST = 'localhost' MAIL_PORT = 25 MAIL_FROM = 'scrapy@localhost' MAIL_PASS = None MAIL_USER = None MEMDEBUG_ENABLED = False # enable memory debugging MEMDEBUG_NOTIFY = [] # send memory debugging report by mail at engine shutdown MEMUSAGE_CHECK_INTERVAL_SECONDS = 60.0 MEMUSAGE_ENABLED = True MEMUSAGE_LIMIT_MB = 0 MEMUSAGE_NOTIFY_MAIL = [] MEMUSAGE_WARNING_MB = 0 METAREFRESH_ENABLED = True METAREFRESH_IGNORE_TAGS = [] METAREFRESH_MAXDELAY = 100 NEWSPIDER_MODULE = '' RANDOMIZE_DOWNLOAD_DELAY = True REACTOR_THREADPOOL_MAXSIZE = 10 REDIRECT_ENABLED = True REDIRECT_MAX_TIMES = 20 # uses Firefox default setting REDIRECT_PRIORITY_ADJUST = +2 REFERER_ENABLED = True REFERRER_POLICY = 'scrapy.spidermiddlewares.referer.DefaultReferrerPolicy' RETRY_ENABLED = True RETRY_TIMES = 2 # initial response + 2 retries = 3 requests RETRY_HTTP_CODES = [500, 502, 503, 504, 522, 524, 408, 429] RETRY_PRIORITY_ADJUST = -1 ROBOTSTXT_OBEY = False ROBOTSTXT_PARSER = 'scrapy.robotstxt.ProtegoRobotParser' ROBOTSTXT_USER_AGENT = None SCHEDULER = 'scrapy.core.scheduler.Scheduler' SCHEDULER_DISK_QUEUE = 'scrapy.squeues.PickleLifoDiskQueue' SCHEDULER_MEMORY_QUEUE = 'scrapy.squeues.LifoMemoryQueue' SCHEDULER_PRIORITY_QUEUE = 'scrapy.pqueues.ScrapyPriorityQueue' SCRAPER_SLOT_MAX_ACTIVE_SIZE = 5000000 SPIDER_LOADER_CLASS = 'scrapy.spiderloader.SpiderLoader' SPIDER_LOADER_WARN_ONLY = False SPIDER_MIDDLEWARES = {} SPIDER_MIDDLEWARES_BASE = { # Engine side 'scrapy.spidermiddlewares.httperror.HttpErrorMiddleware': 50, 'scrapy.spidermiddlewares.offsite.OffsiteMiddleware': 500, 'scrapy.spidermiddlewares.referer.RefererMiddleware': 700, 'scrapy.spidermiddlewares.urllength.UrlLengthMiddleware': 800, 'scrapy.spidermiddlewares.depth.DepthMiddleware': 900, # Spider side } SPIDER_MODULES = [] STATS_CLASS = 'scrapy.statscollectors.MemoryStatsCollector' STATS_DUMP = True STATSMAILER_RCPTS = [] TEMPLATES_DIR = abspath(join(dirname(__file__), '..', 'templates')) URLLENGTH_LIMIT = 2083 USER_AGENT = f'Scrapy/{import_module("scrapy").__version__} (+https://scrapy.org)' TELNETCONSOLE_ENABLED = 1 TELNETCONSOLE_PORT = [6023, 6073] TELNETCONSOLE_HOST = '127.0.0.1' TELNETCONSOLE_USERNAME = 'scrapy' TELNETCONSOLE_PASSWORD = None TWISTED_REACTOR = None SPIDER_CONTRACTS = {} SPIDER_CONTRACTS_BASE = { 'scrapy.contracts.default.UrlContract': 1, 'scrapy.contracts.default.CallbackKeywordArgumentsContract': 1, 'scrapy.contracts.default.ReturnsContract': 2, 'scrapy.contracts.default.ScrapesContract': 3, }