Source code for scrapy.downloadermiddlewares.httpauth

HTTP basic auth downloader middleware

See documentation in docs/topics/downloader-middleware.rst
import warnings

from w3lib.http import basic_auth_header

from scrapy import signals
from scrapy.exceptions import ScrapyDeprecationWarning
from scrapy.utils.httpobj import urlparse_cached
from scrapy.utils.url import url_is_from_any_domain

[docs]class HttpAuthMiddleware: """Set Basic HTTP Authorization header (http_user and http_pass spider class attributes)""" @classmethod def from_crawler(cls, crawler): o = cls() crawler.signals.connect(o.spider_opened, signal=signals.spider_opened) return o def spider_opened(self, spider): usr = getattr(spider, "http_user", "") pwd = getattr(spider, "http_pass", "") if usr or pwd: self.auth = basic_auth_header(usr, pwd) if not hasattr(spider, "http_auth_domain"): warnings.warn( "Using HttpAuthMiddleware without http_auth_domain is deprecated and can cause security " "problems if the spider makes requests to several different domains. http_auth_domain " "will be set to the domain of the first request, please set it to the correct value " "explicitly.", category=ScrapyDeprecationWarning, ) self.domain_unset = True else: self.domain = spider.http_auth_domain self.domain_unset = False def process_request(self, request, spider): auth = getattr(self, "auth", None) if auth and b"Authorization" not in request.headers: domain = urlparse_cached(request).hostname if self.domain_unset: self.domain = domain self.domain_unset = False if not self.domain or url_is_from_any_domain(request.url, [self.domain]): request.headers[b"Authorization"] = auth