From fbdb8813fb4d4e88a2afa775f768284a6336b983 Mon Sep 17 00:00:00 2001 From: Felipe Boff Nunes Date: Fri, 28 Oct 2022 12:10:37 -0300 Subject: [PATCH 01/10] Issue #249 - Add strip to get() and getall() --- parsel/selector.py | 18 ++++++++++++---- tests/test_selector.py | 49 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 5 deletions(-) diff --git a/parsel/selector.py b/parsel/selector.py index b84b0308..3df09333 100644 --- a/parsel/selector.py +++ b/parsel/selector.py @@ -200,12 +200,15 @@ def re_first( return el return default - def getall(self) -> List[str]: + def getall(self, strip: bool = False) -> List[str]: """ Call the ``.get()`` method for each element is this list and return their results flattened, as a list of strings. """ - return [x.get() for x in self] + data = [x.get() for x in self] + if strip: + return [x.strip() if x else x for x in data] + return data extract = getall @@ -217,13 +220,20 @@ def get(self, default: None = None) -> Optional[str]: def get(self, default: str) -> str: pass - def get(self, default: Optional[str] = None) -> Optional[str]: + @typing.overload + def get(self, strip: bool) -> str: + pass + + def get( + self, default: Optional[str] = None, strip: Optional[bool] = False + ) -> Optional[str]: """ Return the result of ``.get()`` for the first element in this list. If the list is empty, return the default value. """ for x in self: - return x.get() + value = x.get() + return value.strip() if strip and value else value return default extract_first = get diff --git a/tests/test_selector.py b/tests/test_selector.py index d0bb2816..2afb1882 100644 --- a/tests/test_selector.py +++ b/tests/test_selector.py @@ -90,7 +90,7 @@ def test_simple_selection(self) -> None: def test_simple_selection_with_variables(self) -> None: """Using XPath variables""" body = "

" - sel = self.sscls(text=body) + sel = self.sscls(text=body) # Selector doesnt have default field self.assertEqual( [ @@ -323,6 +323,53 @@ def test_selectorlist_get_alias(self) -> None: self.assertEqual(sel.xpath("//ul/li").get(), '
  • 1
  • ') self.assertEqual(sel.xpath("//ul/li/text()").get(), "1") + def test_selector_get_strip(self) -> None: + body = '' + sel = self.sscls(text=body) + + self.assertEqual( + sel.xpath("//ul/li[position()>1]").get(), '
  • 2
  • ' + ) + self.assertEqual( + sel.xpath("//ul/li[position()>1]").get(strip=True), + '
  • 2
  • ', + ) + self.assertEqual( + sel.xpath("//ul/li[position()>1]/text()").get(), " 2 " + ) + self.assertEqual( + sel.xpath("//ul/li[position()>1]/text()").get(strip=True), "2" + ) + + def test_selector_getall_strip(self) -> None: + body = ( + '' + ) + sel = self.sscls(text=body) + + self.assertEqual( + sel.xpath("//ul/li").getall(), + [ + '
  • 1
  • ', + '
  • 2
  • ', + '
  • 3
  • ', + ], + ) + self.assertEqual( + sel.xpath("//ul/li").getall(strip=True), + [ + '
  • 1
  • ', + '
  • 2
  • ', + '
  • 3
  • ', + ], + ) + self.assertEqual( + sel.xpath("//ul/li/text()").getall(), ["1", " 2 ", " 3"] + ) + self.assertEqual( + sel.xpath("//ul/li/text()").getall(strip=True), ["1", "2", "3"] + ) + def test_re_first(self) -> None: """Test if re_first() returns first matched element""" body = '' From cbc6e89e31f318907443b728d5b3ada5ad33f856 Mon Sep 17 00:00:00 2001 From: Felipe Boff Nunes Date: Fri, 28 Oct 2022 12:23:17 -0300 Subject: [PATCH 02/10] cleanup --- parsel/selector.py | 4 ---- tests/test_selector.py | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/parsel/selector.py b/parsel/selector.py index 3df09333..e07fb756 100644 --- a/parsel/selector.py +++ b/parsel/selector.py @@ -220,10 +220,6 @@ def get(self, default: None = None) -> Optional[str]: def get(self, default: str) -> str: pass - @typing.overload - def get(self, strip: bool) -> str: - pass - def get( self, default: Optional[str] = None, strip: Optional[bool] = False ) -> Optional[str]: diff --git a/tests/test_selector.py b/tests/test_selector.py index 2afb1882..6d57cbac 100644 --- a/tests/test_selector.py +++ b/tests/test_selector.py @@ -90,7 +90,7 @@ def test_simple_selection(self) -> None: def test_simple_selection_with_variables(self) -> None: """Using XPath variables""" body = "

    " - sel = self.sscls(text=body) # Selector doesnt have default field + sel = self.sscls(text=body) self.assertEqual( [ From 02b09b0a488e534bee14b3617f91c634057430a0 Mon Sep 17 00:00:00 2001 From: Felipe Boff Nunes Date: Fri, 28 Oct 2022 12:26:54 -0300 Subject: [PATCH 03/10] Fix: No overload variant of "get" of "SelectorList" matches argument type "bool" --- parsel/selector.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/parsel/selector.py b/parsel/selector.py index e07fb756..3df09333 100644 --- a/parsel/selector.py +++ b/parsel/selector.py @@ -220,6 +220,10 @@ def get(self, default: None = None) -> Optional[str]: def get(self, default: str) -> str: pass + @typing.overload + def get(self, strip: bool) -> str: + pass + def get( self, default: Optional[str] = None, strip: Optional[bool] = False ) -> Optional[str]: From 4185e19a22f3aba75cb13e8b9bf2bae6ed790026 Mon Sep 17 00:00:00 2001 From: Felipe Boff Nunes Date: Fri, 28 Oct 2022 12:33:40 -0300 Subject: [PATCH 04/10] Keyword-only strip for getall. --- parsel/selector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsel/selector.py b/parsel/selector.py index 3df09333..6815ea8c 100644 --- a/parsel/selector.py +++ b/parsel/selector.py @@ -200,7 +200,7 @@ def re_first( return el return default - def getall(self, strip: bool = False) -> List[str]: + def getall(self, *, strip: bool = False) -> List[str]: """ Call the ``.get()`` method for each element is this list and return their results flattened, as a list of strings. From ff23fdb93dbd613580a58c895e637465263248ac Mon Sep 17 00:00:00 2001 From: Felipe Boff Nunes Date: Mon, 31 Oct 2022 12:09:49 -0300 Subject: [PATCH 05/10] Remove Optional from get strip argument --- parsel/selector.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/parsel/selector.py b/parsel/selector.py index 6815ea8c..2641ae8b 100644 --- a/parsel/selector.py +++ b/parsel/selector.py @@ -129,9 +129,7 @@ def xpath( selector.xpath('//a[href=$url]', url="http://www.example.com") """ return self.__class__( - flatten( - [x.xpath(xpath, namespaces=namespaces, **kwargs) for x in self] - ) + flatten([x.xpath(xpath, namespaces=namespaces, **kwargs) for x in self]) ) def css(self, query: str) -> "SelectorList[_SelectorType]": @@ -155,9 +153,7 @@ def re( Passing ``replace_entities`` as ``False`` switches off these replacements. """ - return flatten( - [x.re(regex, replace_entities=replace_entities) for x in self] - ) + return flatten([x.re(regex, replace_entities=replace_entities) for x in self]) @typing.overload def re_first( @@ -224,9 +220,7 @@ def get(self, default: str) -> str: def get(self, strip: bool) -> str: pass - def get( - self, default: Optional[str] = None, strip: Optional[bool] = False - ) -> Optional[str]: + def get(self, default: Optional[str] = None, strip: bool = False) -> Optional[str]: """ Return the result of ``.get()`` for the first element in this list. If the list is empty, return the default value. @@ -432,9 +426,7 @@ def re( Passing ``replace_entities`` as ``False`` switches off these replacements. """ - return extract_regex( - regex, self.get(), replace_entities=replace_entities - ) + return extract_regex(regex, self.get(), replace_entities=replace_entities) @typing.overload def re_first( From 37dfd36d6f365f0989fcd6b34236686791818007 Mon Sep 17 00:00:00 2001 From: Felipe Boff Nunes Date: Mon, 31 Oct 2022 12:29:23 -0300 Subject: [PATCH 06/10] Develop, commit, fail black, adjust, repeat --- parsel/selector.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/parsel/selector.py b/parsel/selector.py index 2641ae8b..f7726f32 100644 --- a/parsel/selector.py +++ b/parsel/selector.py @@ -129,7 +129,9 @@ def xpath( selector.xpath('//a[href=$url]', url="http://www.example.com") """ return self.__class__( - flatten([x.xpath(xpath, namespaces=namespaces, **kwargs) for x in self]) + flatten( + [x.xpath(xpath, namespaces=namespaces, **kwargs) for x in self] + ) ) def css(self, query: str) -> "SelectorList[_SelectorType]": @@ -153,7 +155,9 @@ def re( Passing ``replace_entities`` as ``False`` switches off these replacements. """ - return flatten([x.re(regex, replace_entities=replace_entities) for x in self]) + return flatten( + [x.re(regex, replace_entities=replace_entities) for x in self] + ) @typing.overload def re_first( @@ -220,7 +224,9 @@ def get(self, default: str) -> str: def get(self, strip: bool) -> str: pass - def get(self, default: Optional[str] = None, strip: bool = False) -> Optional[str]: + def get( + self, default: Optional[str] = None, strip: bool = False + ) -> Optional[str]: """ Return the result of ``.get()`` for the first element in this list. If the list is empty, return the default value. @@ -426,7 +432,9 @@ def re( Passing ``replace_entities`` as ``False`` switches off these replacements. """ - return extract_regex(regex, self.get(), replace_entities=replace_entities) + return extract_regex( + regex, self.get(), replace_entities=replace_entities + ) @typing.overload def re_first( From cbe7042d931270c5d385bacafb3573c3859311fb Mon Sep 17 00:00:00 2001 From: Felipe Boff Nunes Date: Mon, 31 Oct 2022 13:12:11 -0300 Subject: [PATCH 07/10] Adjust overload get --- parsel/selector.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/parsel/selector.py b/parsel/selector.py index f7726f32..b13edddb 100644 --- a/parsel/selector.py +++ b/parsel/selector.py @@ -213,15 +213,11 @@ def getall(self, *, strip: bool = False) -> List[str]: extract = getall @typing.overload - def get(self, default: None = None) -> Optional[str]: + def get(self, default: None, strip: bool = ...) -> Optional[str]: pass @typing.overload - def get(self, default: str) -> str: - pass - - @typing.overload - def get(self, strip: bool) -> str: + def get(self, default: str, strip: bool) -> str: pass def get( From 43ba89f3c8dd9f0ada29f97f3eca33895ccf2850 Mon Sep 17 00:00:00 2001 From: Felipe Boff Nunes Date: Mon, 31 Oct 2022 13:12:24 -0300 Subject: [PATCH 08/10] Adjust overload get --- parsel/selector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsel/selector.py b/parsel/selector.py index b13edddb..26a1863c 100644 --- a/parsel/selector.py +++ b/parsel/selector.py @@ -213,7 +213,7 @@ def getall(self, *, strip: bool = False) -> List[str]: extract = getall @typing.overload - def get(self, default: None, strip: bool = ...) -> Optional[str]: + def get(self, default: None = None, strip: bool = ...) -> Optional[str]: pass @typing.overload From 7ce8789bec94dc3325ffe3eaecd7be70e5e36d18 Mon Sep 17 00:00:00 2001 From: Felipe Boff Nunes Date: Mon, 31 Oct 2022 13:17:06 -0300 Subject: [PATCH 09/10] Attempt strip only overload on typing check --- parsel/selector.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/parsel/selector.py b/parsel/selector.py index 26a1863c..0e3acfbb 100644 --- a/parsel/selector.py +++ b/parsel/selector.py @@ -220,6 +220,10 @@ def get(self, default: None = None, strip: bool = ...) -> Optional[str]: def get(self, default: str, strip: bool) -> str: pass + @typing.overload + def get(self, strip: bool) -> Optional[str]: + pass + def get( self, default: Optional[str] = None, strip: bool = False ) -> Optional[str]: From b044b768de329cf69a3a5956b87e42d2dc1d28d6 Mon Sep 17 00:00:00 2001 From: Felipe Boff Nunes Date: Mon, 31 Oct 2022 13:22:12 -0300 Subject: [PATCH 10/10] This seems to fix typing (?) --- parsel/selector.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/parsel/selector.py b/parsel/selector.py index 0e3acfbb..1fa041f6 100644 --- a/parsel/selector.py +++ b/parsel/selector.py @@ -217,11 +217,7 @@ def get(self, default: None = None, strip: bool = ...) -> Optional[str]: pass @typing.overload - def get(self, default: str, strip: bool) -> str: - pass - - @typing.overload - def get(self, strip: bool) -> Optional[str]: + def get(self, default: str, strip: bool = ...) -> str: pass def get(