Skip to content

Commit

Permalink
Switch the custom attrs implementation to a dependency.
Browse files Browse the repository at this point in the history
  • Loading branch information
wRAR committed Sep 17, 2024
1 parent cf8962a commit 3f374f0
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 29 deletions.
40 changes: 37 additions & 3 deletions scrapy_zyte_api/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
AutoProductListPage,
AutoProductNavigationPage,
AutoProductPage,
CustomAttributes,
CustomAttributesMetadata,
CustomAttributesValues,
Item,
JobPosting,
Product,
Expand Down Expand Up @@ -76,6 +79,8 @@ class ZyteApiProvider(PageObjectInputProvider):
ArticleNavigation,
BrowserHtml,
BrowserResponse,
CustomAttributes,
CustomAttributesValues,
Geolocation,
JobPosting,
Product,
Expand Down Expand Up @@ -186,6 +191,17 @@ async def __call__( # noqa: C901
}
)
continue
if cls_stripped in {CustomAttributes, CustomAttributesValues}:
zyte_api_meta["customAttributes"] = {
k: (
dict(v)
if isinstance(v, frozenset)
else list(v) if isinstance(v, tuple) else v
)
for k, v in cls.__metadata__[0] # type: ignore[attr-defined]
}

continue

Check warning on line 204 in scrapy_zyte_api/providers.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/providers.py#L204

Added line #L204 was not covered by tests
kw = _ITEM_KEYWORDS.get(cls_stripped)
if not kw:
continue
Expand Down Expand Up @@ -322,14 +338,32 @@ async def __call__( # noqa: C901
result = AnnotatedInstance(Actions(actions_result), cls.__metadata__) # type: ignore[attr-defined]
results.append(result)
continue
if cls_stripped is CustomAttributes and is_typing_annotated(cls):
custom_attrs_result = api_response.raw_api_response["customAttributes"]
result = AnnotatedInstance(

Check warning on line 343 in scrapy_zyte_api/providers.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/providers.py#L342-L343

Added lines #L342 - L343 were not covered by tests
CustomAttributes(
CustomAttributesValues(custom_attrs_result["values"]),
CustomAttributesMetadata.from_dict(
custom_attrs_result["metadata"]
),
),
cls.__metadata__, # type: ignore[attr-defined]
)
results.append(result)
continue

Check warning on line 353 in scrapy_zyte_api/providers.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/providers.py#L352-L353

Added lines #L352 - L353 were not covered by tests
if cls_stripped is CustomAttributesValues and is_typing_annotated(cls):
custom_attrs_result = api_response.raw_api_response["customAttributes"]
result = AnnotatedInstance(

Check warning on line 356 in scrapy_zyte_api/providers.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/providers.py#L355-L356

Added lines #L355 - L356 were not covered by tests
CustomAttributesValues(custom_attrs_result["values"]),
cls.__metadata__, # type: ignore[attr-defined]
)
results.append(result)
continue

Check warning on line 361 in scrapy_zyte_api/providers.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/providers.py#L360-L361

Added lines #L360 - L361 were not covered by tests
kw = _ITEM_KEYWORDS.get(cls_stripped)
if not kw:
continue
assert issubclass(cls_stripped, Item)
result = cls_stripped.from_dict(api_response.raw_api_response[kw]) # type: ignore[attr-defined]
custom_attrs = api_response.raw_api_response.get("customAttributes")
if custom_attrs:
result.customAttributes = custom_attrs.get("values", {}) # type: ignore[attr-defined]
if is_typing_annotated(cls):
result = AnnotatedInstance(result, cls.__metadata__) # type: ignore[attr-defined]
results.append(result)
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def get_version():
"andi>=0.6.0",
"scrapy-poet>=0.22.3",
"web-poet>=0.17.0",
# https://github.com/zytedata/zyte-common-items/pull/100
"zyte-common-items @ git+https://github.com/zytedata/zyte-common-items.git@custom-attrs",
# https://github.com/zytedata/zyte-common-items/pull/106
"zyte-common-items @ git+https://github.com/zytedata/zyte-common-items.git@custom-attrs-dep",
]
},
classifiers=[
Expand Down
2 changes: 1 addition & 1 deletion tests/mockserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ def render_POST(self, request):
if "customAttributes" in request_data:
response_data["customAttributes"] = {
"metadata": {
"totalInputTokens": "1000",
"textInputTokens": 1000,
},
"values": {
"attr1": "foo",
Expand Down
123 changes: 101 additions & 22 deletions tests/test_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import pytest

from scrapy_zyte_api._annotations import make_hashable

pytest.importorskip("scrapy_poet")

import attrs
Expand All @@ -24,7 +26,14 @@
handle_urls,
)
from web_poet.pages import get_item_cls
from zyte_common_items import AutoProductPage, BasePage, BaseProductPage, Product
from zyte_common_items import (
AutoProductPage,
BasePage,
BaseProductPage,
CustomAttributes,
CustomAttributesValues,
Product,
)
from zyte_common_items.fields import auto_field

from scrapy_zyte_api import Actions, ExtractFrom, Geolocation, Screenshot, actions
Expand Down Expand Up @@ -394,31 +403,105 @@ def parse_(self, response: DummyResponse, page: GeoProductPage): # type: ignore
assert "Geolocation dependencies must be annotated" in caplog.text


@pytest.mark.skipif(
sys.version_info < (3, 9), reason="No Annotated support in Python < 3.9"
)
@ensureDeferred
async def test_provider_custom_attrs(mockserver):
from typing import Annotated

@attrs.define
class CustomAttrsPage(BasePage):
product: Product
custom_attrs: Annotated[
CustomAttributes,
make_hashable(
{
"attr1": {"type": "string", "description": "descr1"},
"attr2": {"type": "number", "description": "descr2"},
}
),
]

class CustomAttrsZyteAPISpider(ZyteAPISpider):
def parse_(self, response: DummyResponse, page: CustomAttrsPage): # type: ignore[override]
yield {
"product": page.product,
"custom_attrs": page.custom_attrs,
}

settings = create_scrapy_settings()
settings["ZYTE_API_URL"] = mockserver.urljoin("/")
settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0}
settings["ZYTE_API_PROVIDER_PARAMS"] = {
"customAttributes": {
"attr1": {"type": "string", "description": "descr1"},
"attr2": {"type": "number", "description": "descr2"},
}
}

item, url, _ = await crawl_single_item(ZyteAPISpider, HtmlResource, settings)
item, url, _ = await crawl_single_item(
CustomAttrsZyteAPISpider, HtmlResource, settings
)
assert item["product"] == Product.from_dict(
dict(
url=url,
name="Product name",
price="10",
currency="USD",
customAttributes={
)
)
assert item["custom_attrs"] == CustomAttributes.from_dict(
{
"values": {
"attr1": "foo",
"attr2": 42,
},
"metadata": {"textInputTokens": 1000},
}
)


@pytest.mark.skipif(
sys.version_info < (3, 9), reason="No Annotated support in Python < 3.9"
)
@ensureDeferred
async def test_provider_custom_attrs_values(mockserver):
from typing import Annotated

@attrs.define
class CustomAttrsPage(BasePage):
product: Product
custom_attrs: Annotated[
CustomAttributesValues,
make_hashable(
{
"attr1": {"type": "string", "description": "descr1"},
"attr2": {"type": "number", "description": "descr2"},
}
),
]

class CustomAttrsZyteAPISpider(ZyteAPISpider):
def parse_(self, response: DummyResponse, page: CustomAttrsPage): # type: ignore[override]
yield {
"product": page.product,
"custom_attrs": page.custom_attrs,
}

settings = create_scrapy_settings()
settings["ZYTE_API_URL"] = mockserver.urljoin("/")
settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0}

item, url, _ = await crawl_single_item(
CustomAttrsZyteAPISpider, HtmlResource, settings
)
assert item["product"] == Product.from_dict(
dict(
url=url,
name="Product name",
price="10",
currency="USD",
)
)
assert item["custom_attrs"] == {
"attr1": "foo",
"attr2": 42,
}


class RecordingHandler(ScrapyZyteAPIDownloadHandler):
Expand Down Expand Up @@ -1184,9 +1267,9 @@ def parse(self, response: DummyResponse, product: Product):
assert auto_field_stats == {
"scrapy-zyte-api/auto_fields/tests.test_providers.test_auto_field_stats_partial_override.<locals>.MyProductPage": (
"additionalProperties aggregateRating availability breadcrumbs "
"canonicalUrl color currency currencyRaw customAttributes description "
"descriptionHtml features gtin images mainImage metadata mpn price "
"productId regularPrice size sku style url variants"
"canonicalUrl color currency currencyRaw description descriptionHtml "
"features gtin images mainImage metadata mpn price productId "
"regularPrice size sku style url variants"
),
}

Expand Down Expand Up @@ -1240,10 +1323,6 @@ def currency(self):
def currencyRaw(self):
return self.product.currencyRaw

@field
def customAttributes(self):
return self.product.customAttributes

@field
def description(self):
return self.product.description
Expand Down Expand Up @@ -1428,9 +1507,9 @@ def parse(self, response: DummyResponse, page: MyProductPage):
assert auto_field_stats == {
"scrapy-zyte-api/auto_fields/tests.test_providers.test_auto_field_stats_item_page_override.<locals>.MyProductPage": (
"additionalProperties aggregateRating availability breadcrumbs "
"canonicalUrl color currency currencyRaw customAttributes description "
"descriptionHtml features gtin images mainImage metadata mpn price "
"productId regularPrice size sku style url variants"
"canonicalUrl color currency currencyRaw description descriptionHtml "
"features gtin images mainImage metadata mpn price productId "
"regularPrice size sku style url variants"
),
}

Expand Down Expand Up @@ -1494,9 +1573,9 @@ def parse(self, response: DummyResponse, page: AltProductPage):
assert auto_field_stats == {
"scrapy-zyte-api/auto_fields/tests.test_providers.test_auto_field_stats_alt_page_override.<locals>.MyProductPage": (
"additionalProperties aggregateRating availability breadcrumbs "
"canonicalUrl color currency currencyRaw customAttributes description "
"descriptionHtml features gtin images mainImage metadata mpn price "
"productId regularPrice size sku style url variants"
"canonicalUrl color currency currencyRaw description descriptionHtml "
"features gtin images mainImage metadata mpn price productId "
"regularPrice size sku style url variants"
),
}

Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ deps =
andi==0.6.0
scrapy-poet==0.22.3
web-poet==0.17.0
zyte-common-items @ git+https://github.com/zytedata/zyte-common-items.git@custom-attrs
zyte-common-items @ git+https://github.com/zytedata/zyte-common-items.git@custom-attrs-dep

[testenv:pinned-extra]
basepython=python3.8
Expand Down

0 comments on commit 3f374f0

Please sign in to comment.