Source code for scrapy.downloadermiddlewares.httpauth

"""
HTTP basic auth downloader middleware

See documentation in docs/topics/downloader-middleware.rst
"""
import warnings

from w3lib.http import basic_auth_header

from scrapy import signals
from scrapy.exceptions import ScrapyDeprecationWarning
from scrapy.utils.httpobj import urlparse_cached
from scrapy.utils.url import url_is_from_any_domain


[docs]class HttpAuthMiddleware:
    """Set Basic HTTP Authorization header
    (http_user and http_pass spider class attributes)"""

    @classmethod
    def from_crawler(cls, crawler):
        o = cls()
        crawler.signals.connect(o.spider_opened, signal=signals.spider_opened)
        return o

    def spider_opened(self, spider):
        usr = getattr(spider, 'http_user', '')
        pwd = getattr(spider, 'http_pass', '')
        if usr or pwd:
            self.auth = basic_auth_header(usr, pwd)
            if not hasattr(spider, 'http_auth_domain'):
                warnings.warn('Using HttpAuthMiddleware without http_auth_domain is deprecated and can cause security '
                              'problems if the spider makes requests to several different domains. http_auth_domain '
                              'will be set to the domain of the first request, please set it to the correct value '
                              'explicitly.',
                              category=ScrapyDeprecationWarning)
                self.domain_unset = True
            else:
                self.domain = spider.http_auth_domain
                self.domain_unset = False

    def process_request(self, request, spider):
        auth = getattr(self, 'auth', None)
        if auth and b'Authorization' not in request.headers:
            domain = urlparse_cached(request).hostname
            if self.domain_unset:
                self.domain = domain
                self.domain_unset = False
            if not self.domain or url_is_from_any_domain(request.url, [self.domain]):
                request.headers[b'Authorization'] = auth