From d30c8ca1814b27c493ff595d72ff9c5ced662221 Mon Sep 17 00:00:00 2001 From: Colin Dean Date: Tue, 30 Nov 2021 18:12:26 -0500 Subject: [PATCH 1/4] Adds a User-agent header to curl module requests The header will be of the form: peru/{version} Python-urllib/{version} Fixes #218 --- peru/resources/plugins/curl/curl_plugin.py | 20 ++++++++++++++++++-- tests/test_curl_plugin.py | 12 ++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/peru/resources/plugins/curl/curl_plugin.py b/peru/resources/plugins/curl/curl_plugin.py index 59c45796..71fedc27 100755 --- a/peru/resources/plugins/curl/curl_plugin.py +++ b/peru/resources/plugins/curl/curl_plugin.py @@ -9,10 +9,26 @@ import tarfile from urllib.error import HTTPError, URLError from urllib.parse import urlsplit +from urllib.request import Request +import peru.main import urllib.request import zipfile +def peru_version(): + return peru.main.get_version() + + +def build_request(url): + request = Request(url) + components = [ + f"peru/{peru_version()}", + urllib.request.URLopener.version + ] + request.add_header("User-agent", " ".join(components)) + return request + + def get_request_filename(request): '''Figure out the filename for an HTTP download.''' # Check to see if a filename is specified in the HTTP headers. @@ -80,7 +96,7 @@ def plugin_sync(url, sha1): # Download directly to the destination dir. download_dir = dest - with urllib.request.urlopen(url) as request: + with urllib.request.urlopen(build_request(url)) as request: filename = os.environ['PERU_MODULE_FILENAME'] if not filename: filename = get_request_filename(request) @@ -151,7 +167,7 @@ def __init__(self, message): def plugin_reup(url, sha1): reup_output = os.environ['PERU_REUP_OUTPUT'] - with urllib.request.urlopen(url) as request: + with urllib.request.urlopen(build_request(url)) as request: digest = download_file(request, None) with open(reup_output, 'w') as output_file: print('sha1:', digest, file=output_file) diff --git a/tests/test_curl_plugin.py b/tests/test_curl_plugin.py index fb3add45..df4bc245 100644 --- a/tests/test_curl_plugin.py +++ b/tests/test_curl_plugin.py @@ -2,6 +2,7 @@ import importlib.machinery import io from os.path import abspath, join, dirname +import urllib import peru import shared @@ -100,3 +101,14 @@ def test_evil_archives(self): tar_archive = shared.test_resources / (case + '.tar') with self.assertRaises(curl_plugin.EvilArchiveError): curl_plugin.extract_tar(str(tar_archive), dest) + + def test_request_has_user_agent_header(self): + actual = curl_plugin.build_request("http://example.test") + print(actual.header_items()) + self.assertTrue(actual.has_header("User-agent")) + ua_header = actual.get_header("User-agent") + peru_component, urllib_component = ua_header.split(' ') + _, peru_version = peru_component.split('/') + _, urllib_version = urllib_component.split('/') + self.assertEqual(peru.main.get_version(), peru_version) + self.assertEqual(urllib.request.__version__, urllib_version) From 13ca2cecf86c601f794ed65fc5bb2b0de2b4fac5 Mon Sep 17 00:00:00 2001 From: Colin Dean Date: Tue, 30 Nov 2021 20:40:40 -0500 Subject: [PATCH 2/4] Inline version resolution for User-agent and use a %-format instead of an f-format to maintain compatibility with Python 3.5 since f-format came in 3.6. --- peru/resources/plugins/curl/curl_plugin.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/peru/resources/plugins/curl/curl_plugin.py b/peru/resources/plugins/curl/curl_plugin.py index 71fedc27..a3e3a926 100755 --- a/peru/resources/plugins/curl/curl_plugin.py +++ b/peru/resources/plugins/curl/curl_plugin.py @@ -15,14 +15,10 @@ import zipfile -def peru_version(): - return peru.main.get_version() - - def build_request(url): request = Request(url) components = [ - f"peru/{peru_version()}", + "peru/%s" % peru.main.get_version(), urllib.request.URLopener.version ] request.add_header("User-agent", " ".join(components)) From ca7926748513842b94cbcfcb5f81dba6e6130dc0 Mon Sep 17 00:00:00 2001 From: Colin Dean Date: Tue, 30 Nov 2021 20:47:57 -0500 Subject: [PATCH 3/4] Extract User-agent addition to a separate method This sets the stage for more request modifications. --- peru/resources/plugins/curl/curl_plugin.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/peru/resources/plugins/curl/curl_plugin.py b/peru/resources/plugins/curl/curl_plugin.py index a3e3a926..65ea3cd8 100755 --- a/peru/resources/plugins/curl/curl_plugin.py +++ b/peru/resources/plugins/curl/curl_plugin.py @@ -15,8 +15,7 @@ import zipfile -def build_request(url): - request = Request(url) +def add_user_agent_to_request(request): components = [ "peru/%s" % peru.main.get_version(), urllib.request.URLopener.version @@ -25,6 +24,11 @@ def build_request(url): return request +def build_request(url): + request = Request(url) + return add_user_agent_to_request(request) + + def get_request_filename(request): '''Figure out the filename for an HTTP download.''' # Check to see if a filename is specified in the HTTP headers. From e939d5fdc445df451d75f5597e799f3222ad4b67 Mon Sep 17 00:00:00 2001 From: Colin Dean Date: Thu, 2 Dec 2021 23:28:22 -0500 Subject: [PATCH 4/4] Remove a debugging print from UA header test --- tests/test_curl_plugin.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_curl_plugin.py b/tests/test_curl_plugin.py index df4bc245..0525bcb6 100644 --- a/tests/test_curl_plugin.py +++ b/tests/test_curl_plugin.py @@ -104,7 +104,6 @@ def test_evil_archives(self): def test_request_has_user_agent_header(self): actual = curl_plugin.build_request("http://example.test") - print(actual.header_items()) self.assertTrue(actual.has_header("User-agent")) ua_header = actual.get_header("User-agent") peru_component, urllib_component = ua_header.split(' ')