Source code for scrapy.extensions.logstats

import logging

from twisted.internet import task

from scrapy import signals
from scrapy.exceptions import NotConfigured

logger = logging.getLogger(__name__)


[docs]class LogStats: """Log basic scraping stats periodically""" def __init__(self, stats, interval=60.0): self.stats = stats self.interval = interval self.multiplier = 60.0 / self.interval self.task = None @classmethod def from_crawler(cls, crawler): interval = crawler.settings.getfloat("LOGSTATS_INTERVAL") if not interval: raise NotConfigured o = cls(crawler.stats, interval) crawler.signals.connect(o.spider_opened, signal=signals.spider_opened) crawler.signals.connect(o.spider_closed, signal=signals.spider_closed) return o def spider_opened(self, spider): self.pagesprev = 0 self.itemsprev = 0 self.task = task.LoopingCall(self.log, spider) self.task.start(self.interval) def log(self, spider): items = self.stats.get_value("item_scraped_count", 0) pages = self.stats.get_value("response_received_count", 0) irate = (items - self.itemsprev) * self.multiplier prate = (pages - self.pagesprev) * self.multiplier self.pagesprev, self.itemsprev = pages, items msg = ( "Crawled %(pages)d pages (at %(pagerate)d pages/min), " "scraped %(items)d items (at %(itemrate)d items/min)" ) log_args = { "pages": pages, "pagerate": prate, "items": items, "itemrate": irate, } logger.info(msg, log_args, extra={"spider": spider}) def spider_closed(self, spider, reason): if self.task and self.task.running: self.task.stop()