Skip to content

Add lower_path to check_exclude_file() for exclude exact file path #1101

Add lower_path to check_exclude_file() for exclude exact file path

Add lower_path to check_exclude_file() for exclude exact file path #1101

Workflow file for this run

# This workflow runs benchmark
# Separation of jobs helps to cache data even benchmark is fail
name: Benchmark
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
download_data:
runs-on: ubuntu-latest
steps:
- name: Checkout CredData
uses: actions/checkout@v3
with:
repository: Samsung/CredData
- name: Cache data
id: cache-data
uses: actions/cache@v3
with:
path: data
key: cred-data-${{ hashFiles('snapshot.yaml') }}
- name: Set up Python 3.8
if: steps.cache-data.outputs.cache-hit != 'true'
uses: actions/setup-python@v3
with:
python-version: "3.8"
- name: Update PIP
run: python -m pip install --upgrade pip
- name: Install requirements of CredData
if: steps.cache-data.outputs.cache-hit != 'true'
run: python -m pip install --requirement requirements.txt
- name: Generate Data Asset
if: steps.cache-data.outputs.cache-hit != 'true'
run: python download_data.py --data_dir data
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
run_benchmark:
needs: [download_data]
runs-on: ubuntu-latest
steps:
- name: Checkout CredData
uses: actions/checkout@v3
with:
repository: Samsung/CredData
- name: Cache data
id: cache-data
uses: actions/cache@v3
with:
path: data
key: cred-data-${{ hashFiles('snapshot.yaml') }}
- name: Failure in case when cache missed
if: steps.cache-data.outputs.cache-hit != 'true'
run: exit 1
- name: Check Data Asset - DEBUG
if: steps.cache-data.outputs.cache-hit == 'true'
run: ls -al . && ls -al data
- name: Set up Python 3.8
uses: actions/setup-python@v3
with:
python-version: "3.8"
- name: Update PIP
run: python -m pip install --upgrade pip
- name: Install requirements of CredData
run: python -m pip install --requirement requirements.txt
- name: Checkout CredSweeper
if: ${{ 'pull_request' == github.event_name }}
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.sha }}
path: temp/CredSweeper
- name: Patch benchmark for PR work
if: ${{ 'pull_request' == github.event_name }}
run: |
sed -i 's|CREDSWEEPER = "https://github.com/Samsung/CredSweeper.git"|CREDSWEEPER = "dummy://github.com/Samsung/CredSweeper.git"|' benchmark/common/constants.py
grep --with-filename --line-number 'dummy://github.com/Samsung/CredSweeper.git' benchmark/common/constants.py
- name: Run Benchmark
run: |
python -m benchmark --scanner credsweeper | tee credsweeper.log
- name: Get only results
run: |
grep '^Detected Credentials: \|^result_cnt : \|^credsweeper ' credsweeper.log | LC_ALL=C sort | tee benchmark.txt
cp -vf ./temp/CredSweeper/output.json report.json
- name: Upload artifact
if: always()
uses: actions/upload-artifact@v3
with:
name: report
path: report.json
- name: Upload artifact
if: always()
uses: actions/upload-artifact@v3
with:
name: benchmark
path: benchmark.txt
- name: Verify benchmark scores of the PR
if: ${{ 'pull_request' == github.event_name }}
# update cicd/benchmark.txt with uploaded artifact if a difference is found
run: |
LC_ALL=C sort temp/CredSweeper/cicd/benchmark.txt | diff - benchmark.txt
- name: Checkout CredSweeper on push event
if: ${{ 'pull_request' != github.event_name }}
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.sha }}
path: CredSweeper
- name: Verify benchmark scores on push event
if: ${{ 'pull_request' != github.event_name }}
# update cicd/benchmark.txt with uploaded artifact if a difference is found
run: |
LC_ALL=C sort CredSweeper/cicd/benchmark.txt | diff - benchmark.txt
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
performance_benchmark:
# put the benchmark in single job to keep constant environment during test
needs: [download_data]
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.8", "3.9", "3.10"]
steps:
- name: Checkout CredData
uses: actions/checkout@v3
with:
repository: Samsung/CredData
- name: Cache data
id: cache-data
uses: actions/cache@v3
with:
path: data
key: cred-data-${{ hashFiles('snapshot.yaml') }}
- name: Failure in case when cache missed
if: steps.cache-data.outputs.cache-hit != 'true'
run: exit 1
- name: Exclude very huge data
if: steps.cache-data.outputs.cache-hit == 'true'
run: rm -vrf data/81cd05d0
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Update PIP
run: python -m pip install --upgrade pip
- name: Install released CredSweeper
run: |
python -m pip install credsweeper
# check the banner
credsweeper --banner
- name: Run performance benchmark
run: |
START_TIME=$(date +%s)
credsweeper --path data --save-json /dev/null
FINISH_TIME=$(date +%s)
RELEASE_TIME=$(( ${FINISH_TIME} - ${START_TIME} ))
if [ 0 -lt ${RELEASE_TIME} ]; then
echo ${RELEASE_TIME}>stopwatch_${{ matrix.code-type }}_${{ matrix.python-version }}_result.txt
echo Elapsed $(date -ud "@${RELEASE_TIME}" +"%H:%M:%S")
else
echo "Wrong result '${RELEASE_TIME}'"
exit 1
fi
echo "RELEASE_TIME=${RELEASE_TIME}" >> $GITHUB_ENV
- name: Uninstall released CredSweeper
run: |
python -m pip uninstall -y credsweeper
- name: Checkout CredSweeper
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.sha }}
path: temp/CredSweeper
- name: Install current CredSweeper
run: |
python -m pip install temp/CredSweeper
# check the banner
credsweeper --banner
- name: Run performance benchmark
run: |
START_TIME=$(date +%s)
credsweeper --path data --save-json /dev/null
FINISH_TIME=$(date +%s)
HEAD_TIME=$(( ${FINISH_TIME} - ${START_TIME} ))
if [ 0 -lt ${HEAD_TIME} ]; then
echo ${HEAD_TIME}>stopwatch_${{ matrix.code-type }}_${{ matrix.python-version }}_result.txt
echo Elapsed $(date -ud "@${HEAD_TIME}" +"%H:%M:%S")
else
echo "Wrong result '${HEAD_TIME}'"
exit 1
fi
echo "HEAD_TIME=${HEAD_TIME}" >> $GITHUB_ENV
- name: Compare results
run: |
THRESHOLD=250
if [ ${RELEASE_TIME} -le ${HEAD_TIME} ]; then
delta=$(( 1000 * ( ${HEAD_TIME} - ${RELEASE_TIME} ) / ${RELEASE_TIME} ))
echo "delta=$delta"
if [ $THRESHOLD -lt ${delta} ]; then
echo "Significantly slowdown. Was ${RELEASE_TIME}, now ${HEAD_TIME}. Delta(%*10)=${delta}"
exit 1
fi
echo "Slowdown. Was ${RELEASE_TIME}, now ${HEAD_TIME}. Delta(%*10)=${delta}"
else
delta=$(( 1000 * ( ${RELEASE_TIME} - ${HEAD_TIME} ) / ${RELEASE_TIME} ))
echo "delta=$delta"
if [ $THRESHOLD -lt ${delta} ]; then
echo "Significantly speed-up. Was ${RELEASE_TIME}, now ${HEAD_TIME}. Delta(%*10)=${delta}"
exit 0
fi
echo "Speed-up. Was ${RELEASE_TIME}, now ${HEAD_TIME}. Delta(%*10)=${delta}"
fi
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #