Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add typing. #80

Merged
merged 1 commit into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
202 changes: 159 additions & 43 deletions itemloaders/__init__.py

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion itemloaders/common.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
"""Common functions used in Item Loaders code"""

from functools import partial
from typing import Any, Callable, MutableMapping

from itemloaders.utils import get_func_args


def wrap_loader_context(function, context):
def wrap_loader_context(
function: Callable[..., Any], context: MutableMapping[str, Any]
) -> Callable[..., Any]:
"""Wrap functions that receive loader_context to contain the context
"pre-loaded" and expose a interface that receives only one argument
"""
Expand Down
37 changes: 23 additions & 14 deletions itemloaders/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""

from collections import ChainMap
from typing import Any, Callable, Iterable, List, MutableMapping, Optional

from itemloaders.common import wrap_loader_context
from itemloaders.utils import arg_to_iter
Expand Down Expand Up @@ -54,19 +55,22 @@ class MapCompose:
.. _`parsel selectors`: https://parsel.readthedocs.io/en/latest/parsel.html#parsel.selector.Selector.extract
""" # noqa

def __init__(self, *functions, **default_loader_context):
def __init__(self, *functions: Callable[..., Any], **default_loader_context: Any):
self.functions = functions
self.default_loader_context = default_loader_context

def __call__(self, value, loader_context=None):
def __call__(
self, value: Any, loader_context: Optional[MutableMapping[str, Any]] = None
) -> Iterable[Any]:
values = arg_to_iter(value)
context: MutableMapping[str, Any]
if loader_context:
context = ChainMap(loader_context, self.default_loader_context)
else:
context = self.default_loader_context
wrapped_funcs = [wrap_loader_context(f, context) for f in self.functions]
for func in wrapped_funcs:
next_values = []
next_values: List[Any] = []
for v in values:
try:
next_values += arg_to_iter(func(v))
Expand Down Expand Up @@ -109,12 +113,15 @@ class Compose:
<itemloaders.ItemLoader.context>` attribute.
"""

def __init__(self, *functions, **default_loader_context):
def __init__(self, *functions: Callable[..., Any], **default_loader_context: Any):
self.functions = functions
self.stop_on_none = default_loader_context.get("stop_on_none", True)
self.default_loader_context = default_loader_context

def __call__(self, value, loader_context=None):
def __call__(
self, value: Any, loader_context: Optional[MutableMapping[str, Any]] = None
) -> Any:
context: MutableMapping[str, Any]
if loader_context:
context = ChainMap(loader_context, self.default_loader_context)
else:
Expand Down Expand Up @@ -148,7 +155,7 @@ class TakeFirst:
'one'
"""

def __call__(self, values):
def __call__(self, values: Any) -> Any:
for value in values:
if value is not None and value != "":
return value
Expand All @@ -168,7 +175,7 @@ class Identity:
['one', 'two', 'three']
"""

def __call__(self, values):
def __call__(self, values: Any) -> Any:
return values


Expand Down Expand Up @@ -198,13 +205,15 @@ class SelectJmes:
['bar']
"""

def __init__(self, json_path):
self.json_path = json_path
import jmespath
def __init__(self, json_path: str):
self.json_path: str = json_path
import jmespath.parser

self.compiled_path = jmespath.compile(self.json_path)
self.compiled_path: jmespath.parser.ParsedResult = jmespath.compile(
self.json_path
)

def __call__(self, value):
def __call__(self, value: Any) -> Any:
"""Query value for the jmespath query and return answer
:param value: a data structure (dict, list) to extract from
:return: Element extracted according to jmespath query
Expand All @@ -231,8 +240,8 @@ class Join:
'one<br>two<br>three'
"""

def __init__(self, separator=" "):
def __init__(self, separator: str = " "):
self.separator = separator

def __call__(self, values):
def __call__(self, values: Any) -> str:
return self.separator.join(values)
8 changes: 4 additions & 4 deletions itemloaders/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

import inspect
from functools import partial
from typing import Generator
from typing import Any, Callable, Generator, Iterable, List


def arg_to_iter(arg):
def arg_to_iter(arg: Any) -> Iterable[Any]:
"""Return an iterable based on *arg*.

If *arg* is a list, a tuple or a generator, it will be returned as is.
Expand All @@ -25,12 +25,12 @@ def arg_to_iter(arg):
return [arg]


def get_func_args(func, stripself=False):
def get_func_args(func: Callable[..., Any], stripself: bool = False) -> List[str]:
"""Return the argument name list of a callable object"""
if not callable(func):
raise TypeError(f"func must be callable, got {type(func).__name__!r}")

args = []
args: List[str] = []
try:
sig = inspect.signature(func)
except ValueError:
Expand Down
9 changes: 8 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
[flake8]
ignore = E266, E501, W503
ignore = E266, E501, E704, W503
max-line-length = 100
select = B,C,E,F,W,T4,B9
exclude = .git,__pycache__,.venv

[isort]
profile = black

[mypy]

[mypy-tests.*]
# Allow test functions to be untyped
allow_untyped_defs = true
check_untyped_defs = true
Empty file added tests/__init__.py
Empty file.
8 changes: 4 additions & 4 deletions tests/test_base_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,17 +300,17 @@ def test_output_processor_using_classes(self):
il.add_value("name", ["mar", "ta"])
self.assertEqual(il.get_output_value("name"), ["Mar", "Ta"])

class TakeFirstItemLoader(CustomItemLoader):
class TakeFirstItemLoader1(CustomItemLoader):
name_out = Join()

il = TakeFirstItemLoader()
il = TakeFirstItemLoader1()
il.add_value("name", ["mar", "ta"])
self.assertEqual(il.get_output_value("name"), "Mar Ta")

class TakeFirstItemLoader(CustomItemLoader):
class TakeFirstItemLoader2(CustomItemLoader):
name_out = Join("<br>")

il = TakeFirstItemLoader()
il = TakeFirstItemLoader2()
il.add_value("name", ["mar", "ta"])
self.assertEqual(il.get_output_value("name"), "Mar<br>Ta")

Expand Down
33 changes: 22 additions & 11 deletions tests/test_loader_initialization.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,39 @@
import unittest
from typing import Any, Protocol

from itemloaders import ItemLoader


class InitializationTestProtocol(Protocol):
item_class: Any

def assertEqual(self, first: Any, second: Any, msg: Any = ...) -> None: ...

def assertIsInstance(self, obj: object, cls: type, msg: Any = None) -> None: ...


class InitializationTestMixin:
item_class = None
item_class: Any = None

def test_keep_single_value(self):
def test_keep_single_value(self: InitializationTestProtocol) -> None:
"""Loaded item should contain values from the initial item"""
input_item = self.item_class(name="foo")
il = ItemLoader(item=input_item)
loaded_item = il.load_item()
self.assertIsInstance(loaded_item, self.item_class)
self.assertEqual(dict(loaded_item), {"name": ["foo"]})

def test_keep_list(self):
def test_keep_list(self: InitializationTestProtocol) -> None:
"""Loaded item should contain values from the initial item"""
input_item = self.item_class(name=["foo", "bar"])
il = ItemLoader(item=input_item)
loaded_item = il.load_item()
self.assertIsInstance(loaded_item, self.item_class)
self.assertEqual(dict(loaded_item), {"name": ["foo", "bar"]})

def test_add_value_singlevalue_singlevalue(self):
def test_add_value_singlevalue_singlevalue(
self: InitializationTestProtocol,
) -> None:
"""Values added after initialization should be appended"""
input_item = self.item_class(name="foo")
il = ItemLoader(item=input_item)
Expand All @@ -31,7 +42,7 @@ def test_add_value_singlevalue_singlevalue(self):
self.assertIsInstance(loaded_item, self.item_class)
self.assertEqual(dict(loaded_item), {"name": ["foo", "bar"]})

def test_add_value_singlevalue_list(self):
def test_add_value_singlevalue_list(self: InitializationTestProtocol) -> None:
"""Values added after initialization should be appended"""
input_item = self.item_class(name="foo")
il = ItemLoader(item=input_item)
Expand All @@ -40,7 +51,7 @@ def test_add_value_singlevalue_list(self):
self.assertIsInstance(loaded_item, self.item_class)
self.assertEqual(dict(loaded_item), {"name": ["foo", "item", "loader"]})

def test_add_value_list_singlevalue(self):
def test_add_value_list_singlevalue(self: InitializationTestProtocol) -> None:
"""Values added after initialization should be appended"""
input_item = self.item_class(name=["foo", "bar"])
il = ItemLoader(item=input_item)
Expand All @@ -49,7 +60,7 @@ def test_add_value_list_singlevalue(self):
self.assertIsInstance(loaded_item, self.item_class)
self.assertEqual(dict(loaded_item), {"name": ["foo", "bar", "qwerty"]})

def test_add_value_list_list(self):
def test_add_value_list_list(self: InitializationTestProtocol) -> None:
"""Values added after initialization should be appended"""
input_item = self.item_class(name=["foo", "bar"])
il = ItemLoader(item=input_item)
Expand All @@ -58,7 +69,7 @@ def test_add_value_list_list(self):
self.assertIsInstance(loaded_item, self.item_class)
self.assertEqual(dict(loaded_item), {"name": ["foo", "bar", "item", "loader"]})

def test_get_output_value_singlevalue(self):
def test_get_output_value_singlevalue(self: InitializationTestProtocol) -> None:
"""Getting output value must not remove value from item"""
input_item = self.item_class(name="foo")
il = ItemLoader(item=input_item)
Expand All @@ -67,7 +78,7 @@ def test_get_output_value_singlevalue(self):
self.assertIsInstance(loaded_item, self.item_class)
self.assertEqual(loaded_item, {"name": ["foo"]})

def test_get_output_value_list(self):
def test_get_output_value_list(self: InitializationTestProtocol) -> None:
"""Getting output value must not remove value from item"""
input_item = self.item_class(name=["foo", "bar"])
il = ItemLoader(item=input_item)
Expand All @@ -76,13 +87,13 @@ def test_get_output_value_list(self):
self.assertIsInstance(loaded_item, self.item_class)
self.assertEqual(loaded_item, {"name": ["foo", "bar"]})

def test_values_single(self):
def test_values_single(self: InitializationTestProtocol) -> None:
"""Values from initial item must be added to loader._values"""
input_item = self.item_class(name="foo")
il = ItemLoader(item=input_item)
self.assertEqual(il._values.get("name"), ["foo"])

def test_values_list(self):
def test_values_list(self: InitializationTestProtocol) -> None:
"""Values from initial item must be added to loader._values"""
input_item = self.item_class(name=["foo", "bar"])
il = ItemLoader(item=input_item)
Expand Down
6 changes: 4 additions & 2 deletions tests/test_nested_items.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import unittest
from typing import Any

from itemloaders import ItemLoader


class NestedItemTest(unittest.TestCase):
"""Test that adding items as values works as expected."""

def _test_item(self, item):
def _test_item(self, item: Any) -> None:
il = ItemLoader()
il.add_value("item_list", item)
self.assertEqual(il.load_item(), {"item_list": [item]})
Expand Down Expand Up @@ -44,7 +45,8 @@ def test_scrapy_item(self):
except ImportError:
self.skipTest("Cannot import Field or Item from scrapy")

class TestItem(Item):
# needs py.typed in Scrapy
class TestItem(Item): # type: ignore[misc]
foo = Field()

self._test_item(TestItem(foo="bar"))
2 changes: 2 additions & 0 deletions tests/test_nested_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def test_nested_xpath(self):
nl = loader.nested_xpath("//header")
nl.add_xpath("name", "div/text()")
nl.add_css("name_div", "#id")
assert nl.selector
nl.add_value("name_value", nl.selector.xpath('div[@id = "id"]/text()').getall())

self.assertEqual(loader.get_output_value("name"), ["marta"])
Expand All @@ -49,6 +50,7 @@ def test_nested_css(self):
nl = loader.nested_css("header")
nl.add_xpath("name", "div/text()")
nl.add_css("name_div", "#id")
assert nl.selector
nl.add_value("name_value", nl.selector.xpath('div[@id = "id"]/text()').getall())

self.assertEqual(loader.get_output_value("name"), ["marta"])
Expand Down
5 changes: 3 additions & 2 deletions tests/test_output_processor.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import unittest
from typing import Any, Dict

from itemloaders import ItemLoader
from itemloaders.processors import Compose, Identity, TakeFirst


class TestOutputProcessorDict(unittest.TestCase):
def test_output_processor(self):
class TempDict(dict):
class TempDict(Dict[str, Any]):
def __init__(self, *args, **kwargs):
super(TempDict, self).__init__(self, *args, **kwargs)
self.setdefault("temp", 0.3)
Expand All @@ -28,7 +29,7 @@ class TempLoader(ItemLoader):
default_input_processor = Identity()
default_output_processor = Compose(TakeFirst())

item = {}
item: Dict[str, Any] = {}
item.setdefault("temp", 0.3)
loader = TempLoader(item=item)
item = loader.load_item()
Expand Down
3 changes: 2 additions & 1 deletion tests/test_utils_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import operator
import platform
import unittest
from typing import Any

from itemloaders.utils import get_func_args

Expand All @@ -18,7 +19,7 @@ def f3(a, b=None, *, c=None):
pass

class A:
def __init__(self, a, b, c):
def __init__(self, a: Any, b: Any, c: Any):
pass

def method(self, a, b, c):
Expand Down
9 changes: 9 additions & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,12 @@ deps =
commands =
python -m build --sdist
twine check dist/*

[testenv:typing]
basepython = python3
deps =
mypy==1.10.0
types-attrs==19.1.0
types-jmespath==1.0.2.20240106
commands =
mypy --strict --ignore-missing-imports --implicit-reexport {posargs:itemloaders tests}
Loading