Skip to content

Commit

Permalink
Merge remote-tracking branch 'scrapy-plugins/main' into auto-fields
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio committed Sep 26, 2024
2 parents 959bbdf + a2061e8 commit a5ffb08
Show file tree
Hide file tree
Showing 16 changed files with 861 additions and 250 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.22.0
current_version = 0.22.1
commit = True
tag = True
tag_name = {new_version}
Expand Down
8 changes: 8 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
Changes
=======

0.22.1 (2024-08-30)
-------------------

* Fixed an issue in the handling of excessive session initialization failures
during session refreshing, which would manifest as an asyncio messages about
unretrieved ``TooManyBadSessionInits`` task exceptions instead of stopping
the spider as intended.

0.22.0 (2024-07-26)
-------------------

Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
project = "scrapy-zyte-api"
copyright = "2023, Zyte Group Ltd"
author = "Zyte Group Ltd"
release = "0.22.0"
release = "0.22.1"

sys.path.insert(0, str(Path(__file__).parent.absolute())) # _ext
extensions = [
Expand Down
40 changes: 40 additions & 0 deletions docs/usage/scrapy-poet.rst
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,46 @@ resulting page object:
return Product(is_valid=False)
return None
.. _custom-attrs:

Custom attribute extraction
---------------------------

You can request custom attribute extraction by using either a
:class:`scrapy_zyte_api.CustomAttributes` dependency (if you need both the
attribute values and the attribute extraction metadata) or a
:class:`scrapy_zyte_api.CustomAttributesValues` dependency (if you only need
the values). You need to annotate it with input data as a dictionary and, if
needed, a dictionary with extraction options. You should use the
:func:`scrapy_zyte_api.custom_attrs` function to create the annotation:

.. code-block:: python
from typing import Annotated
from scrapy_zyte_api import CustomAttributes, custom_attrs
@attrs.define
class MyPageObject(BasePage):
product: Product
custom_attributes: Annotated[
CustomAttributes,
custom_attrs(
{"name": {"type": "string", "description": "name of the product"}},
{"method": "generate"},
),
]
You can then access the results as the dependency value:

.. code-block:: python
def parse_page(self, response: DummyResponse, page: MyPageObject):
...
for k, v in page.custom_attributes.items():
...
Custom parameters
=================
Expand Down
9 changes: 9 additions & 0 deletions docs/usage/session.rst
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,15 @@ To define a different session config for a given URL pattern, install

.. autofunction:: scrapy_zyte_api.session_config

If you only need to override the :meth:`SessionConfig.check
<scrapy_zyte_api.SessionConfig.check>` or :meth:`SessionConfig.params
<scrapy_zyte_api.SessionConfig.params>` methods for scenarios involving a
location, you may subclass :class:`~scrapy_zyte_api.LocationSessionConfig`
instead:

.. autoclass:: scrapy_zyte_api.LocationSessionConfig
:members: location_check, location_params

If in a session config implementation or in any other Scrapy component you need
to tell whether a request is a :ref:`session initialization request
<session-init>` or not, use :func:`~scrapy_zyte_api.is_session_init_request`:
Expand Down
3 changes: 2 additions & 1 deletion scrapy_zyte_api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

install_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor")

from ._annotations import ExtractFrom, actions
from ._annotations import ExtractFrom, actions, custom_attrs
from ._middlewares import (
ScrapyZyteAPIDownloaderMiddleware,
ScrapyZyteAPISpiderMiddleware,
Expand All @@ -17,6 +17,7 @@
)
from ._session import SESSION_DEFAULT_RETRY_POLICY as _SESSION_DEFAULT_RETRY_POLICY
from ._session import (
LocationSessionConfig,
ScrapyZyteAPISessionDownloaderMiddleware,
SessionConfig,
is_session_init_request,
Expand Down
2 changes: 1 addition & 1 deletion scrapy_zyte_api/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.22.0"
__version__ = "0.22.1"
26 changes: 23 additions & 3 deletions scrapy_zyte_api/_annotations.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from enum import Enum
from typing import Iterable, List, Optional, TypedDict
from typing import Any, Dict, FrozenSet, Iterable, List, Optional, Tuple, TypedDict


class ExtractFrom(str, Enum):
Expand Down Expand Up @@ -56,7 +56,8 @@ class _ActionResult(TypedDict, total=False):
error: Optional[str]


def make_hashable(obj):
def make_hashable(obj: Any) -> Any:
"""Converts input into hashable form, to use in ``Annotated``."""
if isinstance(obj, (tuple, list)):
return tuple((make_hashable(e) for e in obj))

Expand All @@ -66,7 +67,26 @@ def make_hashable(obj):
return obj


def actions(value: Iterable[Action]):
def _from_hashable(obj: Any) -> Any:
"""Converts a result of ``make_hashable`` back to original form."""
if isinstance(obj, tuple):
return [_from_hashable(o) for o in obj]

if isinstance(obj, frozenset):
return {_from_hashable(k): _from_hashable(v) for k, v in obj}

return obj


def actions(value: Iterable[Action]) -> Tuple[Any, ...]:
"""Convert an iterable of :class:`~scrapy_zyte_api.Action` dicts into a hashable value."""
# both lists and dicts are not hashable and we need dep types to be hashable
return tuple(make_hashable(action) for action in value)


def custom_attrs(
input: Dict[str, Any], options: Optional[Dict[str, Any]] = None
) -> Tuple[FrozenSet[Any], Optional[FrozenSet[Any]]]:
input_wrapped = make_hashable(input)
options_wrapped = make_hashable(options) if options else None
return input_wrapped, options_wrapped
Loading

0 comments on commit a5ffb08

Please sign in to comment.