Skip to content

Commit

Permalink
VIN detection (#393)
Browse files Browse the repository at this point in the history
* vin dbg

* removed debug

* codestyle

* benchmark fix

* benchmark fix2

* RENAMING: value card number check

* proposed PR template

* doc

* rollback_benchmark
  • Loading branch information
babenek authored Aug 7, 2023
1 parent dde4fa1 commit cc25331
Show file tree
Hide file tree
Showing 9 changed files with 177 additions and 9 deletions.
10 changes: 5 additions & 5 deletions cicd/benchmark.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
Detected Credentials: 4689
result_cnt : 4178, lost_cnt : 94, true_cnt : 3721, false_cnt : 363
credsweeper -> TP : 3721, FP : 363, TN : 19429483, FN : 891, FPR : 0.0000186826, FNR : 0.1931916739, ACC : 0.9999354754, PRC : 0.9111165524, RCL : 0.8068083261, F1 : 0.8557957682
Detected Credentials: 4721
result_cnt : 4208, lost_cnt : 0, true_cnt : 3844, false_cnt : 364
credsweeper -> TP : 3844, FP : 364, TN : 19429359, FN : 891, FPR : 0.0000187342, FNR : 0.1881731785, ACC : 0.9999354240, PRC : 0.9134980989, RCL : 0.8118268215, F1 : 0.8596667785
credsweeper Private Key -> TP : 967, FP : 0, TN : 4, FN : 34, FPR : None, FNR : 0.0339660340, ACC : 0.9661691542, PRC : 1.0000000000, RCL : 0.9660339660, F1 : 0.9827235772
credsweeper Predefined Pattern -> TP : 309, FP : 2, TN : 40, FN : 17, FPR : 0.0476190476, FNR : 0.0521472393, ACC : 0.9483695652, PRC : 0.9935691318, RCL : 0.9478527607, F1 : 0.9701726845
credsweeper Password -> TP : 977, FP : 116, TN : 4164, FN : 422, FPR : 0.0271028037, FNR : 0.3016440315, ACC : 0.9052650114, PRC : 0.8938700823, RCL : 0.6983559685, F1 : 0.7841091493
credsweeper Password -> TP : 981, FP : 116, TN : 4164, FN : 422, FPR : 0.0271028037, FNR : 0.3007840342, ACC : 0.9053316910, PRC : 0.8942570647, RCL : 0.6992159658, F1 : 0.7848000000
credsweeper Generic Token -> TP : 284, FP : 6, TN : 597, FN : 49, FPR : 0.0099502488, FNR : 0.1471471471, ACC : 0.9412393162, PRC : 0.9793103448, RCL : 0.8528528529, F1 : 0.9117174960
credsweeper Other -> TP : 127, FP : 6, TN : 738, FN : 265, FPR : 0.0080645161, FNR : 0.6760204082, ACC : 0.7614436620, PRC : 0.9548872180, RCL : 0.3239795918, F1 : 0.4838095238
credsweeper Other -> TP : 246, FP : 7, TN : 738, FN : 265, FPR : 0.0093959732, FNR : 0.5185909980, ACC : 0.7834394904, PRC : 0.9723320158, RCL : 0.4814090020, F1 : 0.6439790576
credsweeper Generic Secret -> TP : 971, FP : 2, TN : 216, FN : 84, FPR : 0.0091743119, FNR : 0.0796208531, ACC : 0.9324430479, PRC : 0.9979445015, RCL : 0.9203791469, F1 : 0.9575936884
credsweeper Seed, Salt, Nonce -> TP : 35, FP : 2, TN : 6, FN : 4, FPR : 0.2500000000, FNR : 0.1025641026, ACC : 0.8723404255, PRC : 0.9459459459, RCL : 0.8974358974, F1 : 0.9210526316
credsweeper Authentication Key & Token -> TP : 51, FP : 4, TN : 28, FN : 16, FPR : 0.1250000000, FNR : 0.2388059701, ACC : 0.7979797980, PRC : 0.9272727273, RCL : 0.7611940299, F1 : 0.8360655738
1 change: 1 addition & 0 deletions credsweeper/filters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,5 @@
from credsweeper.filters.value_token_base64_check import ValueTokenBase64Check
from credsweeper.filters.value_token_check import ValueTokenCheck
from credsweeper.filters.value_useless_word_check import ValueUselessWordCheck
from credsweeper.filters.value_vin_check import ValueVinCheck
from credsweeper.filters.variable_not_allowed_pattern_check import VariableNotAllowedPatternCheck
83 changes: 83 additions & 0 deletions credsweeper/filters/value_vin_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import contextlib

from credsweeper.config import Config
from credsweeper.credentials import LineData
from credsweeper.file_handler.analysis_target import AnalysisTarget
from credsweeper.filters import Filter


class ValueVinCheck(Filter):
"""Check that value is a VIN"""
WEIGHTS = [8, 7, 6, 5, 4, 3, 2, 10, 0, 9, 8, 7, 6, 5, 4, 3, 2]
TRANSLITERATIONS = {
"0": 0,
"1": 1,
"2": 2,
"3": 3,
"4": 4,
"5": 5,
"6": 6,
"7": 7,
"8": 8,
"9": 9,
"A": 1,
"B": 2,
"C": 3,
"D": 4,
"E": 5,
"F": 6,
"G": 7,
"H": 8,
"J": 1,
"K": 2,
"L": 3,
"M": 4,
"N": 5,
"P": 7,
"R": 9,
"S": 2,
"T": 3,
"U": 4,
"V": 5,
"W": 6,
"X": 7,
"Y": 8,
"Z": 9,
}

def __init__(self, config: Config = None) -> None:
pass

def run(self, line_data: LineData, target: AnalysisTarget) -> bool:
"""Run filter checks on received credential candidate data 'line_data'.
Args:
line_data: credential candidate data
target: multiline target from which line data was obtained
Return:
False, if the sequence is not card number. True if it is
"""
if line_data.value is None or 17 != len(line_data.value):
return True

with contextlib.suppress(Exception):
int(line_data.value)
return True

# NHTSA (National Highway Traffic Safety Administration)
# https://en.wikipedia.org/wiki/Vehicle_identification_number
with contextlib.suppress(Exception):
s = 0
for w, v in zip(ValueVinCheck.WEIGHTS, line_data.value):
s += w * ValueVinCheck.TRANSLITERATIONS[v]
r = s % 11
c = line_data.value[8]
if "X" == c and 10 == r:
return False
elif ValueVinCheck.TRANSLITERATIONS[c] == r:
return False

# return False when the sequence has wrong check digit
return True
12 changes: 12 additions & 0 deletions credsweeper/rules/config.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
- name: VIN
severity: info
type: pattern
values:
- (^|[^0-9A-Za-z])(?P<value>[A-HJ-NPR-Z0-9]{17})([^=0-9A-Za-z]|$)
filter_type:
- ValueVinCheck
- ValuePatternCheck
min_line_len: 16
required_regex: "[a-zA-Z0-9_/+-]{15,}"
doc_available: false

- name: Credit card number
severity: info
type: pattern
Expand Down
8 changes: 4 additions & 4 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from pathlib import Path

# total number of files in test samples
SAMPLES_FILES_COUNT: int = 107
SAMPLES_FILES_COUNT: int = 108

# credentials count after scan
SAMPLES_CRED_COUNT: int = 104
SAMPLES_CRED_LINE_COUNT: int = 115
SAMPLES_CRED_COUNT: int = 105
SAMPLES_CRED_LINE_COUNT: int = 116

# credentials count after post-processing
SAMPLES_POST_CRED_COUNT: int = 98
SAMPLES_POST_CRED_COUNT: int = 99

# with option --doc
SAMPLES_IN_DOC = 86
Expand Down
22 changes: 22 additions & 0 deletions tests/data/depth_3.json
Original file line number Diff line number Diff line change
Expand Up @@ -3628,6 +3628,28 @@
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "VIN",
"severity": "info",
"line_data_list": [
{
"line": "1M8GDM9AXKP042788",
"line_num": 6,
"path": "tests/samples/vin",
"info": "tests/samples/vin|RAW",
"value": "1M8GDM9AXKP042788",
"variable": null,
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 3.6901165175936654,
"valid": false
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "VALIDATED_KEY",
Expand Down
22 changes: 22 additions & 0 deletions tests/data/ml_threshold_0.json
Original file line number Diff line number Diff line change
Expand Up @@ -2364,6 +2364,28 @@
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "VIN",
"severity": "info",
"line_data_list": [
{
"line": "1M8GDM9AXKP042788",
"line_num": 6,
"path": "tests/samples/vin",
"info": "",
"value": "1M8GDM9AXKP042788",
"variable": null,
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 3.6901165175936654,
"valid": false
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
Expand Down
22 changes: 22 additions & 0 deletions tests/data/output.json
Original file line number Diff line number Diff line change
Expand Up @@ -2254,6 +2254,28 @@
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "VIN",
"severity": "info",
"line_data_list": [
{
"line": "1M8GDM9AXKP042788",
"line_num": 6,
"path": "tests/samples/vin",
"info": "",
"value": "1M8GDM9AXKP042788",
"variable": null,
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 3.6901165175936654,
"valid": false
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "VALIDATED_KEY",
Expand Down
6 changes: 6 additions & 0 deletions tests/samples/vin
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# fake or not NHTSA
00000000000000000
98374914723492346
M9AX4278X1MKP08GD
# valid
1M8GDM9AXKP042788

0 comments on commit cc25331

Please sign in to comment.