Source code for scrapy.statscollectors

"""
Scrapy extension for collecting scraping stats
"""

from __future__ import annotations

import logging
import pprint
from typing import TYPE_CHECKING, Any

if TYPE_CHECKING:
    from scrapy import Spider
    from scrapy.crawler import Crawler


logger = logging.getLogger(__name__)


StatsT = dict[str, Any]


[docs]class StatsCollector: def __init__(self, crawler: Crawler): self._dump: bool = crawler.settings.getbool("STATS_DUMP") self._stats: StatsT = {}
[docs] def get_value( self, key: str, default: Any = None, spider: Spider | None = None ) -> Any: return self._stats.get(key, default)
[docs] def get_stats(self, spider: Spider | None = None) -> StatsT: return self._stats
[docs] def set_value(self, key: str, value: Any, spider: Spider | None = None) -> None: self._stats[key] = value
[docs] def set_stats(self, stats: StatsT, spider: Spider | None = None) -> None: self._stats = stats
[docs] def inc_value( self, key: str, count: int = 1, start: int = 0, spider: Spider | None = None ) -> None: d = self._stats d[key] = d.setdefault(key, start) + count
[docs] def max_value(self, key: str, value: Any, spider: Spider | None = None) -> None: self._stats[key] = max(self._stats.setdefault(key, value), value)
[docs] def min_value(self, key: str, value: Any, spider: Spider | None = None) -> None: self._stats[key] = min(self._stats.setdefault(key, value), value)
[docs] def clear_stats(self, spider: Spider | None = None) -> None: self._stats.clear()
[docs] def open_spider(self, spider: Spider) -> None: pass
[docs] def close_spider(self, spider: Spider, reason: str) -> None: if self._dump: logger.info( "Dumping Scrapy stats:\n" + pprint.pformat(self._stats), extra={"spider": spider}, ) self._persist_stats(self._stats, spider)
def _persist_stats(self, stats: StatsT, spider: Spider) -> None: pass
[docs]class MemoryStatsCollector(StatsCollector): def __init__(self, crawler: Crawler): super().__init__(crawler) self.spider_stats: dict[str, StatsT] = {} def _persist_stats(self, stats: StatsT, spider: Spider) -> None: self.spider_stats[spider.name] = stats
[docs]class DummyStatsCollector(StatsCollector): def get_value( self, key: str, default: Any = None, spider: Spider | None = None ) -> Any: return default def set_value(self, key: str, value: Any, spider: Spider | None = None) -> None: pass def set_stats(self, stats: StatsT, spider: Spider | None = None) -> None: pass def inc_value( self, key: str, count: int = 1, start: int = 0, spider: Spider | None = None ) -> None: pass def max_value(self, key: str, value: Any, spider: Spider | None = None) -> None: pass def min_value(self, key: str, value: Any, spider: Spider | None = None) -> None: pass