Source code for scrapy.loader.processors

"""
This module provides some commonly used processors for Item Loaders.

See documentation in docs/topics/loaders.rst
"""
from collections import ChainMap

from scrapy.utils.misc import arg_to_iter
from scrapy.loader.common import wrap_loader_context


[docs]class MapCompose(object): def __init__(self, *functions, **default_loader_context): self.functions = functions self.default_loader_context = default_loader_context def __call__(self, value, loader_context=None): values = arg_to_iter(value) if loader_context: context = ChainMap(loader_context, self.default_loader_context) else: context = self.default_loader_context wrapped_funcs = [wrap_loader_context(f, context) for f in self.functions] for func in wrapped_funcs: next_values = [] for v in values: try: next_values += arg_to_iter(func(v)) except Exception as e: raise ValueError("Error in MapCompose with " "%s value=%r error='%s: %s'" % (str(func), value, type(e).__name__, str(e))) values = next_values return values
[docs]class Compose(object): def __init__(self, *functions, **default_loader_context): self.functions = functions self.stop_on_none = default_loader_context.get('stop_on_none', True) self.default_loader_context = default_loader_context def __call__(self, value, loader_context=None): if loader_context: context = ChainMap(loader_context, self.default_loader_context) else: context = self.default_loader_context wrapped_funcs = [wrap_loader_context(f, context) for f in self.functions] for func in wrapped_funcs: if value is None and self.stop_on_none: break try: value = func(value) except Exception as e: raise ValueError("Error in Compose with " "%s value=%r error='%s: %s'" % (str(func), value, type(e).__name__, str(e))) return value
[docs]class TakeFirst(object): def __call__(self, values): for value in values: if value is not None and value != '': return value
[docs]class Identity(object): def __call__(self, values): return values
[docs]class SelectJmes(object): """ Query the input string for the jmespath (given at instantiation), and return the answer Requires : jmespath(https://github.com/jmespath/jmespath) Note: SelectJmes accepts only one input element at a time. """ def __init__(self, json_path): self.json_path = json_path import jmespath self.compiled_path = jmespath.compile(self.json_path) def __call__(self, value): """Query value for the jmespath query and return answer :param value: a data structure (dict, list) to extract from :return: Element extracted according to jmespath query """ return self.compiled_path.search(value)
[docs]class Join(object): def __init__(self, separator=u' '): self.separator = separator def __call__(self, values): return self.separator.join(values)