Skip to content

Commit

Permalink
Merge pull request #222 from roedoejet/main
Browse files Browse the repository at this point in the history
ICLDC Release
  • Loading branch information
roedoejet authored Feb 24, 2023
2 parents bb765d4 + 297b069 commit 927c818
Show file tree
Hide file tree
Showing 63 changed files with 546 additions and 239 deletions.
6 changes: 6 additions & 0 deletions .git-blame-ignore-revs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
9c26df474a70543588e4f8b3ce7d56a36f475da4
cd3165733bbb5cd21b28aa2a3115cc13887dfa90
9b156b87b63f6ff3c337c3e82ec1ecc45a2af03a
d6ae834863d309cd05096d32c7237eea35e21615
ce0a4b1b2aca9c3e3dcb09dc473c44e9014cc103
1fa3d9d34b4087c44047df64fb0f936db73cb09f
2 changes: 1 addition & 1 deletion .github/pull_request_template.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ Pull request template for adding a new language
- [ ] Config.yaml file includes all author names, and settings necessary
- [ ] Please add some test data in `g2p/tests/public/data`. The added file should be a csv/tsv/psv file and each row should have the format `[input_mapping_code,output_mapping_code,input_string,output_string]`
- [ ] As the last step, G2P has been updated by running `g2p update` locally and committing the change
- [ ] You agree to license your contribution under the same license as this project (see [LICENSE](https://github.com/roedoejet/g2p/blob/master/LICENSE) file).
- [ ] You agree to license your contribution under the same license as this project (see [LICENSE](https://github.com/roedoejet/g2p/blob/main/LICENSE) file).

* **Other information**:
71 changes: 71 additions & 0 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# For most projects, this workflow file will not need changing; you simply need
# to commit it to your repository.
#
# You may wish to alter this file to override the set of languages analyzed,
# or to provide custom queries or build logic.
#
# ******** NOTE ********
# We have attempted to detect the languages in your repository. Please check
# the `language` matrix defined below to confirm you have the correct set of
# supported CodeQL languages.
#
name: "CodeQL"

on:
push:
branches: [ main, release ]
schedule:
- cron: '41 2 * * 0'

jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write

strategy:
fail-fast: false
matrix:
language: [ 'javascript', 'python' ]
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
# Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support

steps:
- name: Checkout repository
uses: actions/checkout@v3

# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.

# Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
# queries: security-extended,security-and-quality


# Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v2

# ℹ️ Command-line programs to run using the OS shell.
# 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun

# If the Autobuild fails above, remove it and uncomment the following three lines.
# modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.

# - run: |
# echo "Run, Build Application using script"
# ./location_of_script_within_repo/buildscript.sh

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
with:
category: "/language:${{matrix.language}}"
4 changes: 2 additions & 2 deletions .github/workflows/matrix-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ jobs:
pre-release-matrix-test:
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
os: [ubuntu-20.04, windows-latest, macos-latest]
python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"]
runs-on: ${{ matrix.os }}
steps:
Expand All @@ -18,6 +18,6 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip3 install -e .
pip install -e .
- name: Run tests
run: python run_tests.py dev
4 changes: 2 additions & 2 deletions .github/workflows/pythonpublish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v1
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install dependencies
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/studio-release-tests.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
name: Run Tests for Studio
on:
pull_request:
branches: [release, master]
branches: [release, main]
push:
branches: [release, master]
branches: [release, main]
jobs:
test-studio:
runs-on: ubuntu-latest
Expand All @@ -16,13 +16,13 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip3 install -r requirements.txt
pip3 install -r requirements/requirements.test.txt
pip3 install -e .
pip install -r requirements.txt
pip install -r requirements/requirements.test.txt
pip install -e .
- name: Ensure browser is installed
run: python -m playwright install --with-deps chromium
- name: Run tests
run: |
gunicorn --worker-class eventlet -w 1 g2p.app:APP --no-sendfile --bind 0.0.0.0:5000 --daemon
sleep 5
cd g2p/tests && python3 test_studio.py
cd g2p/tests && python test_studio.py
17 changes: 9 additions & 8 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
- pull_request
jobs:
test:
runs-on: ubuntu-latest
runs-on: ubuntu-20.04
# #no-ci in the commit log flags commit we don't want CI-validated
if: ${{ !contains(github.event.head_commit.message, '#no-ci') }}
steps:
Expand All @@ -16,21 +16,22 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip3 install -r requirements.txt
pip3 install -e .
pip3 install pip-licenses
pip install -r requirements.txt
pip install -e .
pip install pip-licenses
if pip-licenses | grep -v 'Artistic License' | grep -v LGPL | grep GNU; then echo 'Please avoid introducing *GPL dependencies'; false; fi
pip3 install coverage coveralls
pip install coverage coveralls
- name: Run tests
run: |
gunicorn --worker-class eventlet -w 1 g2p.app:APP --no-sendfile --bind 0.0.0.0:5000 --daemon
sleep 5
coverage run run_tests.py dev
coverage xml
if git status | grep -E 'static.*json|mapping.*pkl'; then echo 'g2p databases out of date, please run "g2p update" and commit the results.'; false; else echo OK; fi
- uses: codecov/codecov-action@v2
- uses: codecov/codecov-action@v3
with:
fail_ci_if_error: true # optional (default = false)
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: false # too many upload errors to keep "true"

test-on-windows:
runs-on: windows-latest
Expand All @@ -44,6 +45,6 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip3 install -e .
pip install -e .
- name: Run tests on Windows
run: python run_tests.py dev
12 changes: 7 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.3.0
rev: v4.4.0
hooks:
- id: check-yaml
- id: check-json
- id: end-of-file-fixer
- id: trailing-whitespace
exclude: \.svg$
- repo: https://gitlab.com/pycqa/flake8
rev: 3.8.3
hooks:
- id: flake8
- repo: local
# Using local repos because these won't work for me from remote repo -EJ
# They're also more convenient because we install them via requirements.dev.txt
Expand All @@ -33,3 +30,8 @@ repos:
language: system
types: [python]
stages: [commit]
- repo: https://github.com/pycqa/flake8
# do flake8 last to avoid duplicate reports
rev: 3.8.3
hooks:
- id: flake8
10 changes: 8 additions & 2 deletions .pylintrc
Original file line number Diff line number Diff line change
@@ -1,2 +1,8 @@
[MASTER]
disable=no-else-return # I find the "unecessary" else makes code more readable
[MAIN]
disable=
# We use isort for sorting our imports, so nevermind what pylint thinks
wrong-import-order,
# I find the "unnecessary" else makes code more readable
no-else-return,
# We use single letter e for exception, f for file handles
invalid-name
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Gᵢ2Pᵢ

[![codecov](https://codecov.io/gh/roedoejet/g2p/branch/master/graph/badge.svg)](https://codecov.io/gh/roedoejet/g2p)
[![codecov](https://codecov.io/gh/roedoejet/g2p/branch/main/graph/badge.svg)](https://codecov.io/gh/roedoejet/g2p)
[![Documentation Status](https://readthedocs.org/projects/g2p/badge/?version=latest)](https://g2p.readthedocs.io/en/latest/?badge=latest)
[![Build Status](https://github.com/roedoejet/g2p/actions/workflows/tests.yml/badge.svg)](https://github.com/roedoejet/g2p/actions)
[![PyPI package](https://img.shields.io/pypi/v/g2p.svg)](https://pypi.org/project/g2p/)
Expand All @@ -11,7 +11,7 @@
This library is for handling arbitrary conversions between input and output segments while preserving indices.

![indices](https://raw.githubusercontent.com/roedoejet/g2p/master/g2p/static/assets/bonjour.png)
![indices](https://raw.githubusercontent.com/roedoejet/g2p/main/g2p/static/assets/bonjour.png)

## Table of Contents
- [Gᵢ2Pᵢ](#gᵢ2pᵢ)
Expand Down Expand Up @@ -225,7 +225,7 @@ Mappings are defined in either a CSV or json file. See [writing mapping files](#
5. After installing your local version (`pip3 install -e .`), update with `g2p update`
6. Add some tests in `g2p/testspublic/data/<YourIsoCode>.psv`. Each line in the file will run a test with the following structure: `<in_lang>|<out_lang>|<input_string>|<expected_output>`
7. Run `python3 run_tests.py langs` to make sure your tests pass.
8. Make sure you have [checked all the boxes](https://github.com/roedoejet/g2p/blob/master/.github/pull_request_template.md) and make a [pull request]((https://github.com/roedoejet/g2p/pulls)!
8. Make sure you have [checked all the boxes](https://github.com/roedoejet/g2p/blob/main/.github/pull_request_template.md) and make a [pull request]((https://github.com/roedoejet/g2p/pulls)!

### Adding a new language for support with ReadAlongs

Expand Down
2 changes: 1 addition & 1 deletion docs/contributing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ This repo follows the `Contributor Covenant <http://contributor-covenant.org/ver
Adding a new language/mapping
-----------------------------

In order to add a new mapping or language to be supported, please fill out a pull request with the `pull request template <https://github.com/roedoejet/g2p/blob/master/.github/pull_request_template.md>`__ provided.
In order to add a new mapping or language to be supported, please fill out a pull request with the `pull request template <https://github.com/roedoejet/g2p/blob/main/.github/pull_request_template.md>`__ provided.
2 changes: 1 addition & 1 deletion g2p/_version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# -*- coding: UTF-8 -*-o

VERSION = "0.5"
VERSION = "1.0"
6 changes: 3 additions & 3 deletions g2p/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,11 +140,11 @@ def get(self):
def update_docs():
"""Update the swagger documentation with all nodes from the network"""
swagger_path = os.path.join(os.path.dirname(static_file), "swagger.json")
with open(swagger_path) as f:
with open(swagger_path, encoding="utf-8-sig") as f:
data = json.load(f)
data["components"]["schemas"]["Langs"]["enum"] = sorted(LANGS_NETWORK.nodes)
with open(swagger_path, "w") as f:
f.write(json.dumps(data))
with open(swagger_path, "w", encoding="utf-8", newline="\n") as f:
f.write(json.dumps(data) + "\n")
LOGGER.info("Updated API documentation")


Expand Down
5 changes: 4 additions & 1 deletion g2p/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@ def network_to_echart(write_to_file: bool = False, layout: bool = False):
edges.append({"source": edge[0], "target": edge[1]})
if write_to_file:
with open(
os.path.join(os.path.dirname(static_file), "languages-network.json"), "w"
os.path.join(os.path.dirname(static_file), "languages-network.json"),
"w",
encoding="utf-8",
newline="\n",
) as f:
f.write(json.dumps({"nodes": nodes, "edges": edges}) + "\n")
LOGGER.info("Wrote network nodes and edges to static file.")
Expand Down
5 changes: 4 additions & 1 deletion g2p/cli.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
"""
Command line interface to the g2p system
"""
import json
import os
import pprint
Expand Down Expand Up @@ -314,7 +317,7 @@ def generate_mapping( # noqa: C901

if list_dummy:
# --list-dummy mode
print("Dummy phone inventory: {}".format(DUMMY_INVENTORY))
print(f"Dummy phone inventory: {DUMMY_INVENTORY}")

elif ipa or dummy:
# --ipa and --dummy modes
Expand Down
11 changes: 7 additions & 4 deletions g2p/mappings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ def find_mapping_by_id(map_id: str):

@staticmethod
def mapping_type(name):
"""Return the type of a mapping given its name"""
if is_ipa(name):
return "IPA"
elif is_xsampa(name):
Expand All @@ -211,6 +212,7 @@ def _string_to_pua(string: str, offset: int) -> str:
return intermediate_char * len(string)

def index(self, item):
"""Find the location of an item in self"""
return self.mapping.index(item)

def inventory(self, in_or_out: str = "in"):
Expand Down Expand Up @@ -259,7 +261,7 @@ def sorted_rule_items(rule: dict):
for io in self.mapping
]

def process_kwargs(self, mapping):
def process_kwargs(self, mapping): # noqa: C901
"""Apply kwargs in the order they are provided. kwargs are ordered as of python 3.6"""

if "as_is" in self.kwargs:
Expand Down Expand Up @@ -364,7 +366,8 @@ def rule_to_regex(self, rule: dict) -> Union[Pattern, None]:
# Prevent null input. See, https://github.com/roedoejet/g2p/issues/24
if not rule["in"]:
LOGGER.warning(
f'Rule with input \'{rule["in"]}\' and output \'{rule["out"]}\' has no input. This is disallowed. Please check your mapping file for rules with null inputs.'
f'Rule with input \'{rule["in"]}\' and output \'{rule["out"]}\' has no input. '
"This is disallowed. Please check your mapping file for rules with null inputs."
)
return None
if "context_before" in rule and rule["context_before"]:
Expand Down Expand Up @@ -459,7 +462,7 @@ def mapping_to_file(self, output_path: str = GEN_DIR, file_type: str = "json"):
"""Write mapping to file"""

if not os.path.isdir(output_path):
raise Exception("Path %s is not a directory", output_path)
raise Exception(f"Path {output_path} is not a directory")
fn = os.path.join(
output_path,
self.kwargs.get("in_lang", "und")
Expand All @@ -468,7 +471,7 @@ def mapping_to_file(self, output_path: str = GEN_DIR, file_type: str = "json"):
+ "."
+ file_type,
)
with open(fn, "w", encoding="utf8") as f:
with open(fn, "w", encoding="utf8", newline="\n") as f:
self.mapping_to_stream(f, file_type)

def config_to_file(
Expand Down
Loading

0 comments on commit 927c818

Please sign in to comment.