Skip to content

Commit

Permalink
Merge pull request #360 from openzim/fix_redirect_regex
Browse files Browse the repository at this point in the history
  • Loading branch information
mgautierfr authored Jul 31, 2023
2 parents 82109ba + e4dc68e commit 662a593
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 18 deletions.
46 changes: 28 additions & 18 deletions src/zimwriterfs/zimcreatorfs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,28 @@
#include <limits.h>
#include <cassert>

void parse_redirectArticles(std::istream& in_stream, redirect_handler handler) {
std::string line;
int line_number = 1;
while (std::getline(in_stream, line)) {
std::regex line_regex("^([^\\t]+)\\t([^\\t]+)\\t([^\\t]+)$");
std::smatch matches;
if (!std::regex_search(line, matches, line_regex) || matches.size() != 4) {
throw std::runtime_error(
Formatter() << "Invalid line #" << line_number << " : '" << line << "'"
);
}

Redirect redirect = {
.path= matches[1].str(),
.title = matches[2].str(),
.target = matches[3].str()
};
handler(redirect);
++line_number;
}
}

bool isVerbose();

ZimCreatorFS::ZimCreatorFS(std::string _directoryPath)
Expand All @@ -49,26 +71,14 @@ ZimCreatorFS::ZimCreatorFS(std::string _directoryPath)
void ZimCreatorFS::add_redirectArticles_from_file(const std::string& path)
{
std::ifstream in_stream;
std::string line;

in_stream.open(path.c_str());
int line_number = 1;
while (std::getline(in_stream, line)) {
std::regex line_regex("(.+)\\t(.+)\\t(.+)");
std::smatch matches;
if (!std::regex_search(line, matches, line_regex) || matches.size() != 5) {
std::cerr << "zimwriterfs: line #" << line_number
<< " has invalid format in redirect file " << path << ": '"
<< line << "'" << std::endl;
in_stream.close();
exit(1);
}

auto path = matches[1].str();
auto title = matches[2].str();
auto redirectUrl = matches[3].str();
addRedirection(path, title, redirectUrl);
++line_number;
try {
parse_redirectArticles(in_stream, [this](Redirect redirect) {this->addRedirection(redirect.path, redirect.title, redirect.target);});
} catch(const std::runtime_error& e) {
std::cerr << e.what() << "\nin redirect file " << path << std::endl;
in_stream.close();
exit(1);
}
in_stream.close();
}
Expand Down
8 changes: 8 additions & 0 deletions src/zimwriterfs/zimcreatorfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include <vector>
#include <string>
#include <functional>

#include <zim/writer/creator.h>

Expand Down Expand Up @@ -54,4 +55,11 @@ class ZimCreatorFS : public zim::writer::Creator
std::string canonical_basedir;
};

struct Redirect {
std::string path, title, target;
};

using redirect_handler = std::function<void(Redirect)>;
void parse_redirectArticles(std::istream& in_stream, redirect_handler handler);

#endif // OPENZIM_ZIMWRITERFS_ARTICLESOURCE_H
3 changes: 3 additions & 0 deletions subprojects/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
googletest-release-1.8.1
googletest-1.13.0
packagecache
10 changes: 10 additions & 0 deletions subprojects/gtest.wrap
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[wrap-file]
directory = googletest-release-1.8.1

source_url = https://github.com/google/googletest/archive/release-1.8.1.zip
source_filename = gtest-1.8.1.zip
source_hash = 927827c183d01734cc5cfef85e0ff3f5a92ffe6188e0d18e909c5efebf28a0c7

patch_url = https://wrapdb.mesonbuild.com/v1/projects/gtest/1.8.1/1/get_zip
patch_filename = gtest-1.8.1-1-wrap.zip
patch_hash = f79f5fd46e09507b3f2e09a51ea6eb20020effe543335f5aee59f30cc8d15805
51 changes: 51 additions & 0 deletions test/zimwriterfs-zimcreatorfs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,54 @@ TEST(ZimCreatorFSTest, ThrowsErrorIfDirectoryNotExist)
ZimCreatorFS zimCreator("Non-existing-dir");
}, std::invalid_argument );
}

bool operator==(const Redirect& a, const Redirect& b) {
return a.path == b.path && a.title == b.title && a.target == b.target;
}

TEST(ZimCreatorFSTest, ParseRedirect)
{
{
std::stringstream ss;
ss << "path\ttitle\ttarget\n";
ss << "A/path/to/somewhere\tAn amazing title\tAnother/path";

std::vector<Redirect> found;
parse_redirectArticles(
ss,
[&](Redirect redirect)
{found.push_back(redirect);}
);

const std::vector<Redirect> expected {
{"path", "title", "target"},
{"A/path/to/somewhere", "An amazing title", "Another/path"}
};
EXPECT_EQ(found, expected);
}


{
std::stringstream ss;
ss << "A/path\tOups, no target";
EXPECT_THROW({
parse_redirectArticles(
ss,
[&](Redirect redirect)
{}
);
}, std::runtime_error);
}

{
std::stringstream ss;
ss << "A/path\ttitle\ttarget\tOups, too many tabs\n";
EXPECT_THROW({
parse_redirectArticles(
ss,
[&](Redirect redirect)
{}
);
}, std::runtime_error);
}
}

0 comments on commit 662a593

Please sign in to comment.