From e9cf9b8647a8804ddfa873c2597949f8ab2eba9f Mon Sep 17 00:00:00 2001 From: Konstantin Lopuhin Date: Mon, 29 May 2017 15:17:51 +0300 Subject: [PATCH] Cache method lookup, more readable loop conditions Thanks for the idea @kmike! --- html_text/html_text.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/html_text/html_text.py b/html_text/html_text.py index 4bf81ec..465ebad 100644 --- a/html_text/html_text.py +++ b/html_text/html_text.py @@ -42,9 +42,9 @@ def parse_html(html): _whitespace = re.compile(r'\s+') -_trailing_whitespace = re.compile(r'\s$') -_punct_after = re.compile(r'^[,:;.!?"\)]') -_punct_before = re.compile(r'\($') +_has_trailing_whitespace = re.compile(r'\s$').search +_has_punct_after = re.compile(r'^[,:;.!?"\)]').search +_has_punct_before = re.compile(r'\($').search def selector_to_text(sel, guess_punct_space=False): @@ -58,9 +58,9 @@ def selector_to_text(sel, guess_punct_space=False): def fragments(): prev = None for text in sel.xpath('//text()').extract(): - if prev is not None and (_trailing_whitespace.search(prev) - or (not _punct_after.search(text) and - not _punct_before.search(prev))): + if prev is not None and (_has_trailing_whitespace(prev) + or (not _has_punct_after(text) and + not _has_punct_before(prev))): yield ' ' yield text prev = text