Skip to content

39k Pfizer dataset (#188) (#189) #571

39k Pfizer dataset (#188) (#189)

39k Pfizer dataset (#188) (#189) #571

Workflow file for this run

# Runs process_dataset.py on all files in the database.
name: Validation
on:
push:
branches:
- main
pull_request:
paths:
# Runs when this file is modified in a PR.
- '.github/workflows/validation.yml'
env:
ORD_SCHEMA_TAG: v0.3.81
jobs:
validate_database:
runs-on: ubuntu-latest
strategy:
matrix:
filter: [
'data/[0-4][0-4]',
'data/[0-4][5-9]',
'data/[0-4][a-f]',
'data/[5-9][0-4]',
'data/[5-9][5-9]',
'data/[5-9][a-f]',
'data/[a-f][0-4]',
'data/[a-f][5-9]',
'data/[a-f][a-f]',
]
steps:
- name: Checkout ord-data
uses: actions/checkout@v2
with:
lfs: true
- name: Checkout ord-schema
uses: actions/checkout@v2
with:
repository: Open-Reaction-Database/ord-schema
ref: ${{ env.ORD_SCHEMA_TAG }}
path: ord-schema
- uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install ord_schema
run: |
cd "${GITHUB_WORKSPACE}/ord-schema"
python -m pip install --upgrade pip
python -m pip install wheel
python -m pip install .
- name: Validate datasets
env:
FILTER: ${{ matrix.filter }}
run: |
cd "${GITHUB_WORKSPACE}"
python ./ord-schema/ord_schema/scripts/validate_dataset.py --input="data/*/*.pb*" --filter="${FILTER}"