Merge pull request #9 from lincc-frameworks/issue/6/new-output-formats

Support for the most recent table outputs
lincc-frameworks · Oct 31, 2023 · 7bf94f0 · 7bf94f0
2 parents 84d0f3d + d0f6033
commit 7bf94f0
Show file tree

Hide file tree

Showing 16 changed files with 430 additions and 136 deletions.
diff --git a/.codecov.yml b/.codecov.yml
@@ -0,0 +1,3 @@
+ignore:
+  - "src/lf_asv_formatter/_version.py"
+  - "src/lf_asv_formatter/__main__.py"
diff --git a/pyproject.toml b/pyproject.toml
@@ -41,3 +41,9 @@ write_to = "src/lf_asv_formatter/_version.py"
 testpaths = [
     "tests",
 ]
+
+[tool.coverage.run]
+omit = [
+    "src/lf_asv_formatter/_version.py",
+    "src/lf_asv_formatter/__main__.py",
+]
diff --git a/src/lf_asv_formatter/__main__.py b/src/lf_asv_formatter/__main__.py
@@ -1,8 +1,21 @@
-"""Main file to call from command line and github workflows."""
+"""Main file to call from command line and GitHub workflows."""
+import argparse
 
+from .simple_formatter import SimpleFormatter
+from .tabulate_formatter import TabulateFormatter
 
-from .asv_formatter import rewrite_file
 
-if __name__ == "__main__":
+def parse_asv_version():
+    """Parses asv version from command line arguments."""
+    parser = argparse.ArgumentParser("lf_asv_formatter")
+    # asv defaults to v0.5.1 for backward compatibility
+    parser.add_argument(
+        "--asv_version", help="Version of asv", type=str, default="0.5.1"
+    )
+    return parser.parse_args().asv_version
+
 
-    rewrite_file()
+if __name__ == "__main__":
+    asv_version = parse_asv_version()
+    formatter = SimpleFormatter() if asv_version >= "0.6.0" else TabulateFormatter()
+    formatter.rewrite_file()
diff --git a/src/lf_asv_formatter/asv_formatter.py b/src/lf_asv_formatter/asv_formatter.py
@@ -1,122 +1,60 @@
-"""Typical ASV table file (before processing):
-    
-All benchmarks:
-
-        before           after        ratio
-      [fcd6c976]       [bc939276]
-      <main>           <test-pr> 
-             2.1k             2.1k     1.00  benchmarks.MemSuite.mem_list
-           failed          304±2ms      n/a  benchmarks.TimeSuite.time_iterkeys
-      2.43±0.05μs        205±0.7ms 84400.48  benchmarks.TimeSuite.time_keys
-      9.67±0.03μs          505±1ms 52177.14  benchmarks.TimeSuite.time_range
-           failed          1.01±0s      n/a  benchmarks.TimeSuite.time_xrange
-
-Formatted ASV table file (github style):
-| Before      | After       | Ratio    | Method                             |
-|-------------|-------------|----------|------------------------------------|
-| [fcd6c976]  | [bc939276]  |          |                                    |
-| 2.1k        | 2.1k        | 1.00     | benchmarks.MemSuite.mem_list       |
-| failed      | 304±2ms     | n/a      | benchmarks.TimeSuite.time_iterkeys |
-| 2.43±0.05μs | 205±0.7ms   | 84400.48 | benchmarks.TimeSuite.time_keys     |
-| 9.67±0.03μs | 505±1ms     | 52177.14 | benchmarks.TimeSuite.time_range    |
-| failed      | 1.01±0s     | n/a      | benchmarks.TimeSuite.time_xrange   |
-"""
-
-import re
-
-from tabulate import tabulate
-
-DEFAULT_OUTPUT_FILE = "output"
-OUTPUT_TABLE_STYLE = "github"
-MAX_NUM_ROWS = 10
-
-
-def rewrite_file(input_file=None, output_file=None):
-    """Read input file as ASV output, and write new file with github-formatted data."""
-    if not input_file:
-        input_file = DEFAULT_OUTPUT_FILE
-    if not output_file:
-        output_file = DEFAULT_OUTPUT_FILE
-
-    headers, rows = format_asv_table_from_file(input_file)
-
-    with open(output_file, "w", encoding="utf-8") as file:
-        file.write(tabulate(rows, headers=headers, tablefmt=OUTPUT_TABLE_STYLE))
-
-
-def format_asv_table_from_file(filename):
-    """Parses and formats a table generated by `asv compare`.
-
-    Parameters
-    ----------
-    filename : str
-        Name of the file containing the table.
-
-    Returns
-    -------
-    tuple of str
-        Headers and rows of the formatted table.
-    """
-    headers = []
-    table_data = []
-
-    with open(filename, "r", encoding="utf-8") as file:
-        rows = parse_table_rows(file.readlines())
-        headers = format_headers(rows[0])
-        branch_data = [rows[1]]
-        bench_data = rows[2:]
-        num_results = min(MAX_NUM_ROWS, len(bench_data))
-        table_data = remove_first_column(bench_data[:num_results])
-
-    return headers, branch_data + table_data
-
-
-def parse_table_rows(rows):
-    """Splits the columns, for each row, by whitespace separator.
-
-    Parameters
-    ----------
-    rows : list of str
-        Lines read from a file.
-
-    Returns
-    -------
-    list of lists
-        List of columns for each row. The first three lines of the original
-        file are ignored as they do not contain useful information.
-    """
-    return [re.split(r"\s+", row.strip()) for row in rows[3:]]
-
-
-def format_headers(headers):
-    """Reads and capitalizes the table headers.
-
-    Parameters
-    ----------
-    headers : list of str
-        List of headers read from the file.
-
-    Returns
-    -------
-    list of str
-        Full list of capitalized table headers.
-    """
-    headers.append("method")
-    return [header.capitalize() for header in headers]
-
-
-def remove_first_column(lines):
-    """Discards the first column of the benchmarks table if it
-    does not contain useful information ("+" / "-" / "x").
-
-    Parameters
-    ----------
-    lines : list of lists
-        Lines containing benchmark information.
-
-    Returns
-    -------
-    list of lists
-        Lines containing benchmark information.
-    """
-    return [line[1:] if len(line) > 4 else line for line in lines]
+class AsvFormatter:
+    """Base class for ASV-specific formatters."""
+
+    MAX_NUM_ROWS = 10
+    DEFAULT_OUTPUT_FILE = "output"
+
+    def __init__(self, input_file=None, output_file=None):
+        self.input_file = input_file if input_file else self.DEFAULT_OUTPUT_FILE
+        self.output_file = output_file if output_file else self.DEFAULT_OUTPUT_FILE
+
+    def extract_table_from_file(self):
+        """Extracts the asv table content from the output file.
+
+        Returns
+        -------
+        list of str
+            The list of file rows that constitute the asv table.
+        """
+        with open(self.input_file, "r", encoding="utf-8") as file:
+            lines = file.readlines()
+            start_index = self.get_table_start_index(lines)
+            return lines[start_index:]
+
+    def get_table_start_index(self, lines):
+        """Fetches the line index for the start of the table. The table starts
+        after the first empty line of the `asv compare` output, and it has a
+        3 line banner that should be discarded.
+
+        Parameters
+        ----------
+        lines: list of str
+            The lines of the output file.
+
+        Returns
+        -------
+        int
+            The line index in which the table starts.
+        """
+        start_index = -1
+        i = 0
+        while i < len(lines):
+            if lines[i].strip() == "":
+                # Skip the "All benchmarks" banner
+                start_index = i + 3
+                break
+            i += 1
+        if start_index == -1 or start_index >= len(lines):
+            raise ValueError("Invalid asv table")
+        return start_index
+
+    def write_output_to_file(self, output):
+        """Writes the formatted asv table to disk.
+
+        Parameters
+        ----------
+        output : str
+            The formatted asv table.
+        """
+        with open(self.output_file, "w", encoding="utf-8") as file:
+            file.write(output)
diff --git a/src/lf_asv_formatter/simple_formatter.py b/src/lf_asv_formatter/simple_formatter.py
@@ -0,0 +1,96 @@
+import re
+
+from .asv_formatter import AsvFormatter
+
+# pylint: disable=line-too-long
+
+
+class SimpleFormatter(AsvFormatter):
+    """The SimpleFormatter is used for asv >= 0.6.0.
+
+    It extracts the table from the ASV output file, clipping it to the maximum
+    specified size. The table keeps its original GitHub styling.
+
+    Typical ASV table file (before processing):
+
+    All benchmarks:
+
+    | Change   | Before [bd9dfcb0]    | After [65151fad]    | Ratio   | Benchmark (Parameter)              |
+    |----------|----------------------|---------------------|---------|------------------------------------|
+    |          | 2.67±0.3s            | 4.05±0.2s           | ~1.52   | benchmarks.TimeSuite.time_xrange   |
+    |          | 1.97±1s              | 2.51±0.9s           | ~1.27   | benchmarks.TimeSuite.time_keys     |
+    |          | 2.82±1s              | 1.95±0.8s           | ~0.69   | benchmarks.TimeSuite.time_iterkeys |
+    | +        | 464                  | 3.89k               | 8.38    | benchmarks.MemSuite.mem_list       |
+    |          | 3.00±0.5s            | 2.97±1s             | 0.99    | benchmarks.TimeSuite.time_range    |
+
+    The output will be similar to:
+    | Before [bd9dfcb0]    | After [65151fad]    | Ratio   | Benchmark (Parameter)              |
+    |----------------------|---------------------|---------|------------------------------------|
+    | 2.67±0.3s            | 4.05±0.2s           | ~1.52   | benchmarks.TimeSuite.time_xrange   |
+    | 1.97±1s              | 2.51±0.9s           | ~1.27   | benchmarks.TimeSuite.time_keys     |
+    | 2.82±1s              | 1.95±0.8s           | ~0.69   | benchmarks.TimeSuite.time_iterkeys |
+    | 464                  | 3.89k               | 8.38    | benchmarks.MemSuite.mem_list       |
+    | 3.00±0.5s            | 2.97±1s             | 0.99    | benchmarks.TimeSuite.time_range    |
+    """
+
+    def rewrite_file(self):
+        """Reads ASV table and writes new file with transformed table."""
+        rows = self.extract_table_from_file()
+        output = self.format_asv_table_from_file(rows)
+        self.write_output_to_file(output)
+
+    def format_asv_table_from_file(self, rows):
+        """Parses the table generated by `asv compare`, and clips the number of
+        results to the maximum size, specified by MAX_NUM_ROWS.
+
+        Parameters
+        ----------
+        rows : list of str
+            List of asv table rows.
+
+        Returns
+        -------
+        str
+            The formatted asv table.
+        """
+        headers = self.parse_headers(rows[0])
+        bench_data = rows[2:]
+        max_row = min(self.MAX_NUM_ROWS, len(bench_data)) + 2
+        if len(headers) > 4:
+            # If number of columns > 4, there is a "Change"
+            # column that we wish to discard
+            rows = self.remove_first_column(rows)
+        return "".join(map(str, rows[:max_row]))
+
+    def parse_headers(self, headers_str):
+        """Parses table headers using a regular expression.
+
+        Parameters
+        ----------
+        headers_str: str
+            The string with the labels and respective "|" delimiters.
+
+        Returns
+        -------
+        list of str
+            A list with the table headers.
+        """
+        # Regex iterator to find all columns between two "|" chars
+        iterator = re.finditer(r"\|(.+?)(?=\|)", headers_str)
+        return [match.group(1) for match in iterator]
+
+    def remove_first_column(self, lines):
+        """Discards the first column of the benchmarks table as it
+        does not contain useful information ("+" / "-" / "x").
+
+        Parameters
+        ----------
+        lines : list of lists
+            Lines containing benchmark information.
+
+        Returns
+        -------
+        list of lists
+            Lines containing benchmark information.
+        """
+        return [re.sub(r"^\|(.*?\|)", r"|", line) for line in lines]