Skip to content

Commit

Permalink
Merge pull request #80 from scrapy/add-typing
Browse files Browse the repository at this point in the history
Add typing.
  • Loading branch information
wRAR authored May 6, 2024
2 parents 854b996 + 5f56673 commit 6c8940d
Show file tree
Hide file tree
Showing 13 changed files with 244 additions and 83 deletions.
202 changes: 159 additions & 43 deletions itemloaders/__init__.py

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion itemloaders/common.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
"""Common functions used in Item Loaders code"""

from functools import partial
from typing import Any, Callable, MutableMapping

from itemloaders.utils import get_func_args


def wrap_loader_context(function, context):
def wrap_loader_context(
function: Callable[..., Any], context: MutableMapping[str, Any]
) -> Callable[..., Any]:
"""Wrap functions that receive loader_context to contain the context
"pre-loaded" and expose a interface that receives only one argument
"""
Expand Down
37 changes: 23 additions & 14 deletions itemloaders/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""

from collections import ChainMap
from typing import Any, Callable, Iterable, List, MutableMapping, Optional

from itemloaders.common import wrap_loader_context
from itemloaders.utils import arg_to_iter
Expand Down Expand Up @@ -54,19 +55,22 @@ class MapCompose:
.. _`parsel selectors`: https://parsel.readthedocs.io/en/latest/parsel.html#parsel.selector.Selector.extract
""" # noqa

def __init__(self, *functions, **default_loader_context):
def __init__(self, *functions: Callable[..., Any], **default_loader_context: Any):
self.functions = functions
self.default_loader_context = default_loader_context

def __call__(self, value, loader_context=None):
def __call__(
self, value: Any, loader_context: Optional[MutableMapping[str, Any]] = None
) -> Iterable[Any]:
values = arg_to_iter(value)
context: MutableMapping[str, Any]
if loader_context:
context = ChainMap(loader_context, self.default_loader_context)
else:
context = self.default_loader_context
wrapped_funcs = [wrap_loader_context(f, context) for f in self.functions]
for func in wrapped_funcs:
next_values = []
next_values: List[Any] = []
for v in values:
try:
next_values += arg_to_iter(func(v))
Expand Down Expand Up @@ -109,12 +113,15 @@ class Compose:
<itemloaders.ItemLoader.context>` attribute.
"""

def __init__(self, *functions, **default_loader_context):
def __init__(self, *functions: Callable[..., Any], **default_loader_context: Any):
self.functions = functions
self.stop_on_none = default_loader_context.get("stop_on_none", True)
self.default_loader_context = default_loader_context

def __call__(self, value, loader_context=None):
def __call__(
self, value: Any, loader_context: Optional[MutableMapping[str, Any]] = None
) -> Any:
context: MutableMapping[str, Any]
if loader_context:
context = ChainMap(loader_context, self.default_loader_context)
else:
Expand Down Expand Up @@ -148,7 +155,7 @@ class TakeFirst:
'one'
"""

def __call__(self, values):
def __call__(self, values: Any) -> Any:
for value in values:
if value is not None and value != "":
return value
Expand All @@ -168,7 +175,7 @@ class Identity:
['one', 'two', 'three']
"""

def __call__(self, values):
def __call__(self, values: Any) -> Any:
return values


Expand Down Expand Up @@ -198,13 +205,15 @@ class SelectJmes:
['bar']
"""

def __init__(self, json_path):
self.json_path = json_path
import jmespath
def __init__(self, json_path: str):
self.json_path: str = json_path
import jmespath.parser

self.compiled_path = jmespath.compile(self.json_path)
self.compiled_path: jmespath.parser.ParsedResult = jmespath.compile(
self.json_path
)

def __call__(self, value):
def __call__(self, value: Any) -> Any:
"""Query value for the jmespath query and return answer
:param value: a data structure (dict, list) to extract from
:return: Element extracted according to jmespath query
Expand All @@ -231,8 +240,8 @@ class Join:
'one<br>two<br>three'
"""

def __init__(self, separator=" "):
def __init__(self, separator: str = " "):
self.separator = separator

def __call__(self, values):
def __call__(self, values: Any) -> str:
return self.separator.join(values)
8 changes: 4 additions & 4 deletions itemloaders/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

import inspect
from functools import partial
from typing import Generator
from typing import Any, Callable, Generator, Iterable, List


def arg_to_iter(arg):
def arg_to_iter(arg: Any) -> Iterable[Any]:
"""Return an iterable based on *arg*.
If *arg* is a list, a tuple or a generator, it will be returned as is.
Expand All @@ -25,12 +25,12 @@ def arg_to_iter(arg):
return [arg]


def get_func_args(func, stripself=False):
def get_func_args(func: Callable[..., Any], stripself: bool = False) -> List[str]:
"""Return the argument name list of a callable object"""
if not callable(func):
raise TypeError(f"func must be callable, got {type(func).__name__!r}")

args = []
args: List[str] = []
try:
sig = inspect.signature(func)
except ValueError:
Expand Down
9 changes: 8 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
[flake8]
ignore = E266, E501, W503
ignore = E266, E501, E704, W503
max-line-length = 100
select = B,C,E,F,W,T4,B9
exclude = .git,__pycache__,.venv

[isort]
profile = black

[mypy]

[mypy-tests.*]
# Allow test functions to be untyped
allow_untyped_defs = true
check_untyped_defs = true
Empty file added tests/__init__.py
Empty file.
8 changes: 4 additions & 4 deletions tests/test_base_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,17 +300,17 @@ def test_output_processor_using_classes(self):
il.add_value("name", ["mar", "ta"])
self.assertEqual(il.get_output_value("name"), ["Mar", "Ta"])

class TakeFirstItemLoader(CustomItemLoader):
class TakeFirstItemLoader1(CustomItemLoader):
name_out = Join()

il = TakeFirstItemLoader()
il = TakeFirstItemLoader1()
il.add_value("name", ["mar", "ta"])
self.assertEqual(il.get_output_value("name"), "Mar Ta")

class TakeFirstItemLoader(CustomItemLoader):
class TakeFirstItemLoader2(CustomItemLoader):
name_out = Join("<br>")

il = TakeFirstItemLoader()
il = TakeFirstItemLoader2()
il.add_value("name", ["mar", "ta"])
self.assertEqual(il.get_output_value("name"), "Mar<br>Ta")

Expand Down
33 changes: 22 additions & 11 deletions tests/test_loader_initialization.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,39 @@
import unittest
from typing import Any, Protocol

from itemloaders import ItemLoader


class InitializationTestProtocol(Protocol):
item_class: Any

def assertEqual(self, first: Any, second: Any, msg: Any = ...) -> None: ...

def assertIsInstance(self, obj: object, cls: type, msg: Any = None) -> None: ...


class InitializationTestMixin:
item_class = None
item_class: Any = None

def test_keep_single_value(self):
def test_keep_single_value(self: InitializationTestProtocol) -> None:
"""Loaded item should contain values from the initial item"""
input_item = self.item_class(name="foo")
il = ItemLoader(item=input_item)
loaded_item = il.load_item()
self.assertIsInstance(loaded_item, self.item_class)
self.assertEqual(dict(loaded_item), {"name": ["foo"]})

def test_keep_list(self):
def test_keep_list(self: InitializationTestProtocol) -> None:
"""Loaded item should contain values from the initial item"""
input_item = self.item_class(name=["foo", "bar"])
il = ItemLoader(item=input_item)
loaded_item = il.load_item()
self.assertIsInstance(loaded_item, self.item_class)
self.assertEqual(dict(loaded_item), {"name": ["foo", "bar"]})

def test_add_value_singlevalue_singlevalue(self):
def test_add_value_singlevalue_singlevalue(
self: InitializationTestProtocol,
) -> None:
"""Values added after initialization should be appended"""
input_item = self.item_class(name="foo")
il = ItemLoader(item=input_item)
Expand All @@ -31,7 +42,7 @@ def test_add_value_singlevalue_singlevalue(self):
self.assertIsInstance(loaded_item, self.item_class)
self.assertEqual(dict(loaded_item), {"name": ["foo", "bar"]})

def test_add_value_singlevalue_list(self):
def test_add_value_singlevalue_list(self: InitializationTestProtocol) -> None:
"""Values added after initialization should be appended"""
input_item = self.item_class(name="foo")
il = ItemLoader(item=input_item)
Expand All @@ -40,7 +51,7 @@ def test_add_value_singlevalue_list(self):
self.assertIsInstance(loaded_item, self.item_class)
self.assertEqual(dict(loaded_item), {"name": ["foo", "item", "loader"]})

def test_add_value_list_singlevalue(self):
def test_add_value_list_singlevalue(self: InitializationTestProtocol) -> None:
"""Values added after initialization should be appended"""
input_item = self.item_class(name=["foo", "bar"])
il = ItemLoader(item=input_item)
Expand All @@ -49,7 +60,7 @@ def test_add_value_list_singlevalue(self):
self.assertIsInstance(loaded_item, self.item_class)
self.assertEqual(dict(loaded_item), {"name": ["foo", "bar", "qwerty"]})

def test_add_value_list_list(self):
def test_add_value_list_list(self: InitializationTestProtocol) -> None:
"""Values added after initialization should be appended"""
input_item = self.item_class(name=["foo", "bar"])
il = ItemLoader(item=input_item)
Expand All @@ -58,7 +69,7 @@ def test_add_value_list_list(self):
self.assertIsInstance(loaded_item, self.item_class)
self.assertEqual(dict(loaded_item), {"name": ["foo", "bar", "item", "loader"]})

def test_get_output_value_singlevalue(self):
def test_get_output_value_singlevalue(self: InitializationTestProtocol) -> None:
"""Getting output value must not remove value from item"""
input_item = self.item_class(name="foo")
il = ItemLoader(item=input_item)
Expand All @@ -67,7 +78,7 @@ def test_get_output_value_singlevalue(self):
self.assertIsInstance(loaded_item, self.item_class)
self.assertEqual(loaded_item, {"name": ["foo"]})

def test_get_output_value_list(self):
def test_get_output_value_list(self: InitializationTestProtocol) -> None:
"""Getting output value must not remove value from item"""
input_item = self.item_class(name=["foo", "bar"])
il = ItemLoader(item=input_item)
Expand All @@ -76,13 +87,13 @@ def test_get_output_value_list(self):
self.assertIsInstance(loaded_item, self.item_class)
self.assertEqual(loaded_item, {"name": ["foo", "bar"]})

def test_values_single(self):
def test_values_single(self: InitializationTestProtocol) -> None:
"""Values from initial item must be added to loader._values"""
input_item = self.item_class(name="foo")
il = ItemLoader(item=input_item)
self.assertEqual(il._values.get("name"), ["foo"])

def test_values_list(self):
def test_values_list(self: InitializationTestProtocol) -> None:
"""Values from initial item must be added to loader._values"""
input_item = self.item_class(name=["foo", "bar"])
il = ItemLoader(item=input_item)
Expand Down
6 changes: 4 additions & 2 deletions tests/test_nested_items.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import unittest
from typing import Any

from itemloaders import ItemLoader


class NestedItemTest(unittest.TestCase):
"""Test that adding items as values works as expected."""

def _test_item(self, item):
def _test_item(self, item: Any) -> None:
il = ItemLoader()
il.add_value("item_list", item)
self.assertEqual(il.load_item(), {"item_list": [item]})
Expand Down Expand Up @@ -44,7 +45,8 @@ def test_scrapy_item(self):
except ImportError:
self.skipTest("Cannot import Field or Item from scrapy")

class TestItem(Item):
# needs py.typed in Scrapy
class TestItem(Item): # type: ignore[misc]
foo = Field()

self._test_item(TestItem(foo="bar"))
2 changes: 2 additions & 0 deletions tests/test_nested_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def test_nested_xpath(self):
nl = loader.nested_xpath("//header")
nl.add_xpath("name", "div/text()")
nl.add_css("name_div", "#id")
assert nl.selector
nl.add_value("name_value", nl.selector.xpath('div[@id = "id"]/text()').getall())

self.assertEqual(loader.get_output_value("name"), ["marta"])
Expand All @@ -49,6 +50,7 @@ def test_nested_css(self):
nl = loader.nested_css("header")
nl.add_xpath("name", "div/text()")
nl.add_css("name_div", "#id")
assert nl.selector
nl.add_value("name_value", nl.selector.xpath('div[@id = "id"]/text()').getall())

self.assertEqual(loader.get_output_value("name"), ["marta"])
Expand Down
5 changes: 3 additions & 2 deletions tests/test_output_processor.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import unittest
from typing import Any, Dict

from itemloaders import ItemLoader
from itemloaders.processors import Compose, Identity, TakeFirst


class TestOutputProcessorDict(unittest.TestCase):
def test_output_processor(self):
class TempDict(dict):
class TempDict(Dict[str, Any]):
def __init__(self, *args, **kwargs):
super(TempDict, self).__init__(self, *args, **kwargs)
self.setdefault("temp", 0.3)
Expand All @@ -28,7 +29,7 @@ class TempLoader(ItemLoader):
default_input_processor = Identity()
default_output_processor = Compose(TakeFirst())

item = {}
item: Dict[str, Any] = {}
item.setdefault("temp", 0.3)
loader = TempLoader(item=item)
item = loader.load_item()
Expand Down
3 changes: 2 additions & 1 deletion tests/test_utils_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import operator
import platform
import unittest
from typing import Any

from itemloaders.utils import get_func_args

Expand All @@ -18,7 +19,7 @@ def f3(a, b=None, *, c=None):
pass

class A:
def __init__(self, a, b, c):
def __init__(self, a: Any, b: Any, c: Any):
pass

def method(self, a, b, c):
Expand Down
9 changes: 9 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,12 @@ deps =
commands =
python -m build --sdist
twine check dist/*

[testenv:typing]
basepython = python3
deps =
mypy==1.10.0
types-attrs==19.1.0
types-jmespath==1.0.2.20240106
commands =
mypy --strict --ignore-missing-imports --implicit-reexport {posargs:itemloaders tests}

0 comments on commit 6c8940d

Please sign in to comment.