Source code for scrapy.logformatter
from __future__ import annotations
import logging
import os
from typing import TYPE_CHECKING, Any, TypedDict
from twisted.python.failure import Failure
# working around https://github.com/sphinx-doc/sphinx/issues/10400
from scrapy import Request, Spider # noqa: TC001
from scrapy.http import Response # noqa: TC001
from scrapy.utils.python import global_object_name
from scrapy.utils.request import referer_str
if TYPE_CHECKING:
# typing.Self requires Python 3.11
from typing_extensions import Self
from scrapy.crawler import Crawler
SCRAPEDMSG = "Scraped from %(src)s" + os.linesep + "%(item)s"
DROPPEDMSG = "Dropped: %(exception)s" + os.linesep + "%(item)s"
CRAWLEDMSG = "Crawled (%(status)s) %(request)s%(request_flags)s (referer: %(referer)s)%(response_flags)s"
ITEMERRORMSG = "Error processing %(item)s"
SPIDERERRORMSG = "Spider error processing %(request)s (referer: %(referer)s)"
DOWNLOADERRORMSG_SHORT = "Error downloading %(request)s"
DOWNLOADERRORMSG_LONG = "Error downloading %(request)s: %(errmsg)s"
class LogFormatterResult(TypedDict):
level: int
msg: str
args: dict[str, Any] | tuple[Any, ...]
[docs]class LogFormatter:
"""Class for generating log messages for different actions.
All methods must return a dictionary listing the parameters ``level``, ``msg``
and ``args`` which are going to be used for constructing the log message when
calling ``logging.log``.
Dictionary keys for the method outputs:
* ``level`` is the log level for that action, you can use those from the
`python logging library <https://docs.python.org/3/library/logging.html>`_ :
``logging.DEBUG``, ``logging.INFO``, ``logging.WARNING``, ``logging.ERROR``
and ``logging.CRITICAL``.
* ``msg`` should be a string that can contain different formatting placeholders.
This string, formatted with the provided ``args``, is going to be the long message
for that action.
* ``args`` should be a tuple or dict with the formatting placeholders for ``msg``.
The final log message is computed as ``msg % args``.
Users can define their own ``LogFormatter`` class if they want to customize how
each action is logged or if they want to omit it entirely. In order to omit
logging an action the method must return ``None``.
Here is an example on how to create a custom log formatter to lower the severity level of
the log message when an item is dropped from the pipeline::
class PoliteLogFormatter(logformatter.LogFormatter):
def dropped(self, item, exception, response, spider):
return {
'level': logging.INFO, # lowering the level from logging.WARNING
'msg': "Dropped: %(exception)s" + os.linesep + "%(item)s",
'args': {
'exception': exception,
'item': item,
}
}
"""
[docs] def crawled(
self, request: Request, response: Response, spider: Spider
) -> LogFormatterResult:
"""Logs a message when the crawler finds a webpage."""
request_flags = f" {str(request.flags)}" if request.flags else ""
response_flags = f" {str(response.flags)}" if response.flags else ""
return {
"level": logging.DEBUG,
"msg": CRAWLEDMSG,
"args": {
"status": response.status,
"request": request,
"request_flags": request_flags,
"referer": referer_str(request),
"response_flags": response_flags,
# backward compatibility with Scrapy logformatter below 1.4 version
"flags": response_flags,
},
}
[docs] def scraped(
self, item: Any, response: Response | Failure | None, spider: Spider
) -> LogFormatterResult:
"""Logs a message when an item is scraped by a spider."""
src: Any
if response is None:
src = f"{global_object_name(spider.__class__)}.start_requests"
elif isinstance(response, Failure):
src = response.getErrorMessage()
else:
src = response
return {
"level": logging.DEBUG,
"msg": SCRAPEDMSG,
"args": {
"src": src,
"item": item,
},
}
[docs] def dropped(
self,
item: Any,
exception: BaseException,
response: Response | None,
spider: Spider,
) -> LogFormatterResult:
"""Logs a message when an item is dropped while it is passing through the item pipeline."""
return {
"level": logging.WARNING,
"msg": DROPPEDMSG,
"args": {
"exception": exception,
"item": item,
},
}
[docs] def item_error(
self,
item: Any,
exception: BaseException,
response: Response | None,
spider: Spider,
) -> LogFormatterResult:
"""Logs a message when an item causes an error while it is passing
through the item pipeline.
.. versionadded:: 2.0
"""
return {
"level": logging.ERROR,
"msg": ITEMERRORMSG,
"args": {
"item": item,
},
}
[docs] def spider_error(
self,
failure: Failure,
request: Request,
response: Response | Failure,
spider: Spider,
) -> LogFormatterResult:
"""Logs an error message from a spider.
.. versionadded:: 2.0
"""
return {
"level": logging.ERROR,
"msg": SPIDERERRORMSG,
"args": {
"request": request,
"referer": referer_str(request),
},
}
[docs] def download_error(
self,
failure: Failure,
request: Request,
spider: Spider,
errmsg: str | None = None,
) -> LogFormatterResult:
"""Logs a download error message from a spider (typically coming from
the engine).
.. versionadded:: 2.0
"""
args: dict[str, Any] = {"request": request}
if errmsg:
msg = DOWNLOADERRORMSG_LONG
args["errmsg"] = errmsg
else:
msg = DOWNLOADERRORMSG_SHORT
return {
"level": logging.ERROR,
"msg": msg,
"args": args,
}
@classmethod
def from_crawler(cls, crawler: Crawler) -> Self:
return cls()