Skip to content

Commit

Permalink
Complete test coverage with a significant cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio committed Jul 12, 2024
1 parent ab702fb commit 0795cf3
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 20 deletions.
21 changes: 2 additions & 19 deletions scrapy_zyte_api/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from scrapy import Request
from scrapy.crawler import Crawler
from scrapy.utils.defer import maybe_deferred_to_future
from scrapy_poet import InjectionMiddleware, PageObjectInputProvider
from scrapy_poet import PageObjectInputProvider
from web_poet import (
AnyResponse,
BrowserHtml,
Expand Down Expand Up @@ -86,7 +86,6 @@ class ZyteApiProvider(PageObjectInputProvider):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._injection_mw = None
self._should_track_auto_fields = None
self._tracked_auto_fields = set()

Expand All @@ -102,23 +101,7 @@ def _track_auto_fields(self, crawler: Crawler, request: Request, cls: Type):
)
if self._should_track_auto_fields is False:
return
if self._injection_mw is None:
try:
self._injection_mw = crawler.get_downloader_middleware(
InjectionMiddleware
)
except AttributeError:
for component in crawler.engine.downloader.middleware.middlewares:
if isinstance(component, InjectionMiddleware):
self._injection_mw = component
break
if self._injection_mw is None:
raise RuntimeError(
"Could not find the InjectionMiddleware among enabled "
"downloader middlewares. Please, ensure you have properly "
"configured scrapy-poet."
)
cls = self._injection_mw.registry.page_cls_for_item(request.url, cls) or cls
cls = self.injector.registry.page_cls_for_item(request.url, cls) or cls
if cls in self._tracked_auto_fields:
return
self._tracked_auto_fields.add(cls)
Expand Down
32 changes: 31 additions & 1 deletion tests/test_providers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sys
from collections import defaultdict

import pytest

Expand Down Expand Up @@ -1054,17 +1055,45 @@ async def test_auto_field_stats_no_override(mockserver):
"""When requesting an item directly from Zyte API, without an override to
change fields, stats reflect the entire list of item fields."""

from scrapy.statscollectors import MemoryStatsCollector

duplicate_stat_calls = defaultdict(int)

class OnlyOnceStatsCollector(MemoryStatsCollector):

def track_duplicate_stat_calls(self, key):
if key.startswith("scrapy-zyte-api/auto_fields/") and key in self._stats:
duplicate_stat_calls[key] += 1

def set_value(self, key, value, spider=None):
self.track_duplicate_stat_calls(key)
super().set_value(key, value, spider)

def inc_value(self, key, count=1, start=1, spider=None):
self.track_duplicate_stat_calls(key)
super().inc_value(key, count, start, spider)

def max_value(self, key, value, spider=None):
self.track_duplicate_stat_calls(key)
super().max_value(key, value, spider)

def min_value(self, key, value, spider=None):
self.track_duplicate_stat_calls(key)
super().min_value(key, value, spider)

class TestSpider(Spider):
name = "test_spider"
url: str

def start_requests(self):
yield Request(self.url, callback=self.parse)
for url in ("data:,a", "data:,b"):
yield Request(url, callback=self.parse)

def parse(self, response: DummyResponse, product: Product):
pass

settings = create_scrapy_settings()
settings["STATS_CLASS"] = OnlyOnceStatsCollector
settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0}
settings["ZYTE_API_AUTO_FIELD_STATS"] = True
settings["ZYTE_API_URL"] = mockserver.urljoin("/")
Expand All @@ -1080,6 +1109,7 @@ def parse(self, response: DummyResponse, product: Product):
"(all fields)"
),
}
assert all(value == 0 for value in duplicate_stat_calls.values())


@ensureDeferred
Expand Down

0 comments on commit 0795cf3

Please sign in to comment.