Skip to content

Commit

Permalink
Merge pull request #9 from lincc-frameworks/issue/6/new-output-formats
Browse files Browse the repository at this point in the history
Support for the most recent table outputs
  • Loading branch information
camposandro authored Oct 31, 2023
2 parents 84d0f3d + d0f6033 commit 7bf94f0
Show file tree
Hide file tree
Showing 16 changed files with 430 additions and 136 deletions.
3 changes: 3 additions & 0 deletions .codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ignore:
- "src/lf_asv_formatter/_version.py"
- "src/lf_asv_formatter/__main__.py"
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,9 @@ write_to = "src/lf_asv_formatter/_version.py"
testpaths = [
"tests",
]

[tool.coverage.run]
omit = [
"src/lf_asv_formatter/_version.py",
"src/lf_asv_formatter/__main__.py",
]
21 changes: 17 additions & 4 deletions src/lf_asv_formatter/__main__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,21 @@
"""Main file to call from command line and github workflows."""
"""Main file to call from command line and GitHub workflows."""
import argparse

from .simple_formatter import SimpleFormatter
from .tabulate_formatter import TabulateFormatter

from .asv_formatter import rewrite_file

if __name__ == "__main__":
def parse_asv_version():
"""Parses asv version from command line arguments."""
parser = argparse.ArgumentParser("lf_asv_formatter")
# asv defaults to v0.5.1 for backward compatibility
parser.add_argument(
"--asv_version", help="Version of asv", type=str, default="0.5.1"
)
return parser.parse_args().asv_version


rewrite_file()
if __name__ == "__main__":
asv_version = parse_asv_version()
formatter = SimpleFormatter() if asv_version >= "0.6.0" else TabulateFormatter()
formatter.rewrite_file()
182 changes: 60 additions & 122 deletions src/lf_asv_formatter/asv_formatter.py
Original file line number Diff line number Diff line change
@@ -1,122 +1,60 @@
"""Typical ASV table file (before processing):
All benchmarks:
before after ratio
[fcd6c976] [bc939276]
<main> <test-pr>
2.1k 2.1k 1.00 benchmarks.MemSuite.mem_list
failed 304±2ms n/a benchmarks.TimeSuite.time_iterkeys
2.43±0.05μs 205±0.7ms 84400.48 benchmarks.TimeSuite.time_keys
9.67±0.03μs 505±1ms 52177.14 benchmarks.TimeSuite.time_range
failed 1.01±0s n/a benchmarks.TimeSuite.time_xrange
Formatted ASV table file (github style):
| Before | After | Ratio | Method |
|-------------|-------------|----------|------------------------------------|
| [fcd6c976] | [bc939276] | | |
| 2.1k | 2.1k | 1.00 | benchmarks.MemSuite.mem_list |
| failed | 304±2ms | n/a | benchmarks.TimeSuite.time_iterkeys |
| 2.43±0.05μs | 205±0.7ms | 84400.48 | benchmarks.TimeSuite.time_keys |
| 9.67±0.03μs | 505±1ms | 52177.14 | benchmarks.TimeSuite.time_range |
| failed | 1.01±0s | n/a | benchmarks.TimeSuite.time_xrange |
"""

import re

from tabulate import tabulate

DEFAULT_OUTPUT_FILE = "output"
OUTPUT_TABLE_STYLE = "github"
MAX_NUM_ROWS = 10


def rewrite_file(input_file=None, output_file=None):
"""Read input file as ASV output, and write new file with github-formatted data."""
if not input_file:
input_file = DEFAULT_OUTPUT_FILE
if not output_file:
output_file = DEFAULT_OUTPUT_FILE

headers, rows = format_asv_table_from_file(input_file)

with open(output_file, "w", encoding="utf-8") as file:
file.write(tabulate(rows, headers=headers, tablefmt=OUTPUT_TABLE_STYLE))


def format_asv_table_from_file(filename):
"""Parses and formats a table generated by `asv compare`.
Parameters
----------
filename : str
Name of the file containing the table.
Returns
-------
tuple of str
Headers and rows of the formatted table.
"""
headers = []
table_data = []

with open(filename, "r", encoding="utf-8") as file:
rows = parse_table_rows(file.readlines())
headers = format_headers(rows[0])
branch_data = [rows[1]]
bench_data = rows[2:]
num_results = min(MAX_NUM_ROWS, len(bench_data))
table_data = remove_first_column(bench_data[:num_results])

return headers, branch_data + table_data


def parse_table_rows(rows):
"""Splits the columns, for each row, by whitespace separator.
Parameters
----------
rows : list of str
Lines read from a file.
Returns
-------
list of lists
List of columns for each row. The first three lines of the original
file are ignored as they do not contain useful information.
"""
return [re.split(r"\s+", row.strip()) for row in rows[3:]]


def format_headers(headers):
"""Reads and capitalizes the table headers.
Parameters
----------
headers : list of str
List of headers read from the file.
Returns
-------
list of str
Full list of capitalized table headers.
"""
headers.append("method")
return [header.capitalize() for header in headers]


def remove_first_column(lines):
"""Discards the first column of the benchmarks table if it
does not contain useful information ("+" / "-" / "x").
Parameters
----------
lines : list of lists
Lines containing benchmark information.
Returns
-------
list of lists
Lines containing benchmark information.
"""
return [line[1:] if len(line) > 4 else line for line in lines]
class AsvFormatter:
"""Base class for ASV-specific formatters."""

MAX_NUM_ROWS = 10
DEFAULT_OUTPUT_FILE = "output"

def __init__(self, input_file=None, output_file=None):
self.input_file = input_file if input_file else self.DEFAULT_OUTPUT_FILE
self.output_file = output_file if output_file else self.DEFAULT_OUTPUT_FILE

def extract_table_from_file(self):
"""Extracts the asv table content from the output file.
Returns
-------
list of str
The list of file rows that constitute the asv table.
"""
with open(self.input_file, "r", encoding="utf-8") as file:
lines = file.readlines()
start_index = self.get_table_start_index(lines)
return lines[start_index:]

def get_table_start_index(self, lines):
"""Fetches the line index for the start of the table. The table starts
after the first empty line of the `asv compare` output, and it has a
3 line banner that should be discarded.
Parameters
----------
lines: list of str
The lines of the output file.
Returns
-------
int
The line index in which the table starts.
"""
start_index = -1
i = 0
while i < len(lines):
if lines[i].strip() == "":
# Skip the "All benchmarks" banner
start_index = i + 3
break
i += 1
if start_index == -1 or start_index >= len(lines):
raise ValueError("Invalid asv table")
return start_index

def write_output_to_file(self, output):
"""Writes the formatted asv table to disk.
Parameters
----------
output : str
The formatted asv table.
"""
with open(self.output_file, "w", encoding="utf-8") as file:
file.write(output)
96 changes: 96 additions & 0 deletions src/lf_asv_formatter/simple_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import re

from .asv_formatter import AsvFormatter

# pylint: disable=line-too-long


class SimpleFormatter(AsvFormatter):
"""The SimpleFormatter is used for asv >= 0.6.0.
It extracts the table from the ASV output file, clipping it to the maximum
specified size. The table keeps its original GitHub styling.
Typical ASV table file (before processing):
All benchmarks:
| Change | Before [bd9dfcb0] | After [65151fad] | Ratio | Benchmark (Parameter) |
|----------|----------------------|---------------------|---------|------------------------------------|
| | 2.67±0.3s | 4.05±0.2s | ~1.52 | benchmarks.TimeSuite.time_xrange |
| | 1.97±1s | 2.51±0.9s | ~1.27 | benchmarks.TimeSuite.time_keys |
| | 2.82±1s | 1.95±0.8s | ~0.69 | benchmarks.TimeSuite.time_iterkeys |
| + | 464 | 3.89k | 8.38 | benchmarks.MemSuite.mem_list |
| | 3.00±0.5s | 2.97±1s | 0.99 | benchmarks.TimeSuite.time_range |
The output will be similar to:
| Before [bd9dfcb0] | After [65151fad] | Ratio | Benchmark (Parameter) |
|----------------------|---------------------|---------|------------------------------------|
| 2.67±0.3s | 4.05±0.2s | ~1.52 | benchmarks.TimeSuite.time_xrange |
| 1.97±1s | 2.51±0.9s | ~1.27 | benchmarks.TimeSuite.time_keys |
| 2.82±1s | 1.95±0.8s | ~0.69 | benchmarks.TimeSuite.time_iterkeys |
| 464 | 3.89k | 8.38 | benchmarks.MemSuite.mem_list |
| 3.00±0.5s | 2.97±1s | 0.99 | benchmarks.TimeSuite.time_range |
"""

def rewrite_file(self):
"""Reads ASV table and writes new file with transformed table."""
rows = self.extract_table_from_file()
output = self.format_asv_table_from_file(rows)
self.write_output_to_file(output)

def format_asv_table_from_file(self, rows):
"""Parses the table generated by `asv compare`, and clips the number of
results to the maximum size, specified by MAX_NUM_ROWS.
Parameters
----------
rows : list of str
List of asv table rows.
Returns
-------
str
The formatted asv table.
"""
headers = self.parse_headers(rows[0])
bench_data = rows[2:]
max_row = min(self.MAX_NUM_ROWS, len(bench_data)) + 2
if len(headers) > 4:
# If number of columns > 4, there is a "Change"
# column that we wish to discard
rows = self.remove_first_column(rows)
return "".join(map(str, rows[:max_row]))

def parse_headers(self, headers_str):
"""Parses table headers using a regular expression.
Parameters
----------
headers_str: str
The string with the labels and respective "|" delimiters.
Returns
-------
list of str
A list with the table headers.
"""
# Regex iterator to find all columns between two "|" chars
iterator = re.finditer(r"\|(.+?)(?=\|)", headers_str)
return [match.group(1) for match in iterator]

def remove_first_column(self, lines):
"""Discards the first column of the benchmarks table as it
does not contain useful information ("+" / "-" / "x").
Parameters
----------
lines : list of lists
Lines containing benchmark information.
Returns
-------
list of lists
Lines containing benchmark information.
"""
return [re.sub(r"^\|(.*?\|)", r"|", line) for line in lines]
Loading

0 comments on commit 7bf94f0

Please sign in to comment.