From 3379db00449f344cb637a40c5c1a5c62733ffa83 Mon Sep 17 00:00:00 2001 From: Frank Wiegand Date: Tue, 4 Jul 2023 12:46:40 +0200 Subject: [PATCH 1/5] Adjust to number of captures in previous regex --- src/zimwriterfs/zimcreatorfs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zimwriterfs/zimcreatorfs.cpp b/src/zimwriterfs/zimcreatorfs.cpp index 08119b71..b1c7e956 100644 --- a/src/zimwriterfs/zimcreatorfs.cpp +++ b/src/zimwriterfs/zimcreatorfs.cpp @@ -56,7 +56,7 @@ void ZimCreatorFS::add_redirectArticles_from_file(const std::string& path) while (std::getline(in_stream, line)) { std::regex line_regex("(.+)\\t(.+)\\t(.+)"); std::smatch matches; - if (!std::regex_search(line, matches, line_regex) || matches.size() != 5) { + if (!std::regex_search(line, matches, line_regex) || matches.size() != 4) { std::cerr << "zimwriterfs: line #" << line_number << " has invalid format in redirect file " << path << ": '" << line << "'" << std::endl; From 413eca8e716af3d82c07a0ed3ff401cd684ecbe0 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Mon, 17 Jul 2023 17:34:38 +0200 Subject: [PATCH 2/5] Add test on parsing of redirect file. - Refactor code to move parsing code in `parse_redirectArticles`. - Add test. --- src/zimwriterfs/zimcreatorfs.cpp | 46 ++++++++++++++++++------------ test/zimwriterfs-zimcreatorfs.cpp | 47 +++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 18 deletions(-) diff --git a/src/zimwriterfs/zimcreatorfs.cpp b/src/zimwriterfs/zimcreatorfs.cpp index b1c7e956..eb0976de 100644 --- a/src/zimwriterfs/zimcreatorfs.cpp +++ b/src/zimwriterfs/zimcreatorfs.cpp @@ -30,6 +30,28 @@ #include #include + +using redirect_handler = std::function; +void parse_redirectArticles(std::istream& in_stream, redirect_handler handler) { + std::string line; + int line_number = 1; + while (std::getline(in_stream, line)) { + std::regex line_regex("(.+)\\t(.+)\\t(.+)"); + std::smatch matches; + if (!std::regex_search(line, matches, line_regex) || matches.size() != 4) { + throw std::runtime_error( + Formatter() << "Invalid line #" << line_number << " : '" << line << "'" + ); + } + + auto path = matches[1].str(); + auto title = matches[2].str(); + auto redirectUrl = matches[3].str(); + handler(path, title, redirectUrl); + ++line_number; + } +} + bool isVerbose(); ZimCreatorFS::ZimCreatorFS(std::string _directoryPath) @@ -49,26 +71,14 @@ ZimCreatorFS::ZimCreatorFS(std::string _directoryPath) void ZimCreatorFS::add_redirectArticles_from_file(const std::string& path) { std::ifstream in_stream; - std::string line; in_stream.open(path.c_str()); - int line_number = 1; - while (std::getline(in_stream, line)) { - std::regex line_regex("(.+)\\t(.+)\\t(.+)"); - std::smatch matches; - if (!std::regex_search(line, matches, line_regex) || matches.size() != 4) { - std::cerr << "zimwriterfs: line #" << line_number - << " has invalid format in redirect file " << path << ": '" - << line << "'" << std::endl; - in_stream.close(); - exit(1); - } - - auto path = matches[1].str(); - auto title = matches[2].str(); - auto redirectUrl = matches[3].str(); - addRedirection(path, title, redirectUrl); - ++line_number; + try { + parse_redirectArticles(in_stream, [this](std::string path, std::string title, std::string redirectUrl) {this->addRedirection(path, title, redirectUrl);}); + } catch(const std::runtime_error& e) { + std::cerr << e.what() << "\nin redirect file " << path << std::endl; + in_stream.close(); + exit(1); } in_stream.close(); } diff --git a/test/zimwriterfs-zimcreatorfs.cpp b/test/zimwriterfs-zimcreatorfs.cpp index ffeb71f0..c58c528f 100644 --- a/test/zimwriterfs-zimcreatorfs.cpp +++ b/test/zimwriterfs-zimcreatorfs.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -127,3 +128,49 @@ TEST(ZimCreatorFSTest, ThrowsErrorIfDirectoryNotExist) ZimCreatorFS zimCreator("Non-existing-dir"); }, std::invalid_argument ); } + +struct Redirect { + std::string path; + std::string title; + std::string target; +}; + +bool operator==(const Redirect& a, const Redirect& b) { + return a.path == b.path && a.title == b.title && a.target == b.target; +} + +void parse_redirectArticles(std::istream& in_stream, std::function handler); + +TEST(ZimCreatorFSTest, ParseRedirect) +{ + { + std::stringstream ss; + ss << "path\ttitle\ttarget\n"; + ss << "A/path/to/somewhere\tAn amazing title\tAnother/path"; + + std::vector found; + parse_redirectArticles( + ss, + [&](std::string path, std::string title, std::string target) + {found.push_back({path, title, target});} + ); + + std::vector expected; + expected.push_back({"path", "title", "target"}); + expected.push_back({"A/path/to/somewhere", "An amazing title", "Another/path"}); + EXPECT_EQ(found, expected); + } + + + { + std::stringstream ss; + ss << "A/path\tOups, no target"; + EXPECT_THROW({ + parse_redirectArticles( + ss, + [&](std::string path, std::string title, std::string target) + {} + ); + }, std::runtime_error); + } +} From 7262d3ecf31f98668e73d89c325c5a4c5748ab99 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Mon, 17 Jul 2023 17:35:50 +0200 Subject: [PATCH 3/5] Add gtest wrap in subproject. This way, zim-tools is properly tested even if gtest is not available in the system. (This is what is done in libzim and libkiwix) --- subprojects/.gitignore | 3 +++ subprojects/gtest.wrap | 10 ++++++++++ 2 files changed, 13 insertions(+) create mode 100644 subprojects/.gitignore create mode 100644 subprojects/gtest.wrap diff --git a/subprojects/.gitignore b/subprojects/.gitignore new file mode 100644 index 00000000..7c5e972a --- /dev/null +++ b/subprojects/.gitignore @@ -0,0 +1,3 @@ +googletest-release-1.8.1 +googletest-1.13.0 +packagecache diff --git a/subprojects/gtest.wrap b/subprojects/gtest.wrap new file mode 100644 index 00000000..ba9c9b95 --- /dev/null +++ b/subprojects/gtest.wrap @@ -0,0 +1,10 @@ +[wrap-file] +directory = googletest-release-1.8.1 + +source_url = https://github.com/google/googletest/archive/release-1.8.1.zip +source_filename = gtest-1.8.1.zip +source_hash = 927827c183d01734cc5cfef85e0ff3f5a92ffe6188e0d18e909c5efebf28a0c7 + +patch_url = https://wrapdb.mesonbuild.com/v1/projects/gtest/1.8.1/1/get_zip +patch_filename = gtest-1.8.1-1-wrap.zip +patch_hash = f79f5fd46e09507b3f2e09a51ea6eb20020effe543335f5aee59f30cc8d15805 From 2302871f7455cb1f6b516808adaae65ac2654dfc Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Fri, 28 Jul 2023 10:41:29 +0200 Subject: [PATCH 4/5] fixup! Add test on parsing of redirect file. --- src/zimwriterfs/zimcreatorfs.cpp | 14 +++++++------- src/zimwriterfs/zimcreatorfs.h | 8 ++++++++ test/zimwriterfs-zimcreatorfs.cpp | 22 +++++++--------------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/zimwriterfs/zimcreatorfs.cpp b/src/zimwriterfs/zimcreatorfs.cpp index eb0976de..4aef4094 100644 --- a/src/zimwriterfs/zimcreatorfs.cpp +++ b/src/zimwriterfs/zimcreatorfs.cpp @@ -30,8 +30,6 @@ #include #include - -using redirect_handler = std::function; void parse_redirectArticles(std::istream& in_stream, redirect_handler handler) { std::string line; int line_number = 1; @@ -44,10 +42,12 @@ void parse_redirectArticles(std::istream& in_stream, redirect_handler handler) { ); } - auto path = matches[1].str(); - auto title = matches[2].str(); - auto redirectUrl = matches[3].str(); - handler(path, title, redirectUrl); + Redirect redirect = { + .path= matches[1].str(), + .title = matches[2].str(), + .target = matches[3].str() + }; + handler(redirect); ++line_number; } } @@ -74,7 +74,7 @@ void ZimCreatorFS::add_redirectArticles_from_file(const std::string& path) in_stream.open(path.c_str()); try { - parse_redirectArticles(in_stream, [this](std::string path, std::string title, std::string redirectUrl) {this->addRedirection(path, title, redirectUrl);}); + parse_redirectArticles(in_stream, [this](Redirect redirect) {this->addRedirection(redirect.path, redirect.title, redirect.target);}); } catch(const std::runtime_error& e) { std::cerr << e.what() << "\nin redirect file " << path << std::endl; in_stream.close(); diff --git a/src/zimwriterfs/zimcreatorfs.h b/src/zimwriterfs/zimcreatorfs.h index e56d2e59..eb53c61d 100644 --- a/src/zimwriterfs/zimcreatorfs.h +++ b/src/zimwriterfs/zimcreatorfs.h @@ -23,6 +23,7 @@ #include #include +#include #include @@ -54,4 +55,11 @@ class ZimCreatorFS : public zim::writer::Creator std::string canonical_basedir; }; +struct Redirect { + std::string path, title, target; +}; + +using redirect_handler = std::function; +void parse_redirectArticles(std::istream& in_stream, redirect_handler handler); + #endif // OPENZIM_ZIMWRITERFS_ARTICLESOURCE_H diff --git a/test/zimwriterfs-zimcreatorfs.cpp b/test/zimwriterfs-zimcreatorfs.cpp index c58c528f..d2622983 100644 --- a/test/zimwriterfs-zimcreatorfs.cpp +++ b/test/zimwriterfs-zimcreatorfs.cpp @@ -18,7 +18,6 @@ #include #include #include -#include #include @@ -129,18 +128,10 @@ TEST(ZimCreatorFSTest, ThrowsErrorIfDirectoryNotExist) }, std::invalid_argument ); } -struct Redirect { - std::string path; - std::string title; - std::string target; -}; - bool operator==(const Redirect& a, const Redirect& b) { return a.path == b.path && a.title == b.title && a.target == b.target; } -void parse_redirectArticles(std::istream& in_stream, std::function handler); - TEST(ZimCreatorFSTest, ParseRedirect) { { @@ -151,13 +142,14 @@ TEST(ZimCreatorFSTest, ParseRedirect) std::vector found; parse_redirectArticles( ss, - [&](std::string path, std::string title, std::string target) - {found.push_back({path, title, target});} + [&](Redirect redirect) + {found.push_back(redirect);} ); - std::vector expected; - expected.push_back({"path", "title", "target"}); - expected.push_back({"A/path/to/somewhere", "An amazing title", "Another/path"}); + const std::vector expected { + {"path", "title", "target"}, + {"A/path/to/somewhere", "An amazing title", "Another/path"} + }; EXPECT_EQ(found, expected); } @@ -168,7 +160,7 @@ TEST(ZimCreatorFSTest, ParseRedirect) EXPECT_THROW({ parse_redirectArticles( ss, - [&](std::string path, std::string title, std::string target) + [&](Redirect redirect) {} ); }, std::runtime_error); From e4dc68e224c8a440d6ac589fa304aaddc1c2c796 Mon Sep 17 00:00:00 2001 From: Matthieu Gautier Date: Fri, 28 Jul 2023 10:42:38 +0200 Subject: [PATCH 5/5] Add test with too many tabs and fix code. `(.+)` is a bit to greedy and matches `\t`. --- src/zimwriterfs/zimcreatorfs.cpp | 2 +- test/zimwriterfs-zimcreatorfs.cpp | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/zimwriterfs/zimcreatorfs.cpp b/src/zimwriterfs/zimcreatorfs.cpp index 4aef4094..0df0b031 100644 --- a/src/zimwriterfs/zimcreatorfs.cpp +++ b/src/zimwriterfs/zimcreatorfs.cpp @@ -34,7 +34,7 @@ void parse_redirectArticles(std::istream& in_stream, redirect_handler handler) { std::string line; int line_number = 1; while (std::getline(in_stream, line)) { - std::regex line_regex("(.+)\\t(.+)\\t(.+)"); + std::regex line_regex("^([^\\t]+)\\t([^\\t]+)\\t([^\\t]+)$"); std::smatch matches; if (!std::regex_search(line, matches, line_regex) || matches.size() != 4) { throw std::runtime_error( diff --git a/test/zimwriterfs-zimcreatorfs.cpp b/test/zimwriterfs-zimcreatorfs.cpp index d2622983..e56a7483 100644 --- a/test/zimwriterfs-zimcreatorfs.cpp +++ b/test/zimwriterfs-zimcreatorfs.cpp @@ -165,4 +165,16 @@ TEST(ZimCreatorFSTest, ParseRedirect) ); }, std::runtime_error); } + + { + std::stringstream ss; + ss << "A/path\ttitle\ttarget\tOups, too many tabs\n"; + EXPECT_THROW({ + parse_redirectArticles( + ss, + [&](Redirect redirect) + {} + ); + }, std::runtime_error); + } }