Skip to content

Commit

Permalink
Huggingface and Groq patterns (#609)
Browse files Browse the repository at this point in the history
* Groq API Key

* Hugging Face User Access Token

* testfix

* reorganize patterns ranges in common order 0-9A-Za-z
  • Loading branch information
babenek authored Sep 30, 2024
1 parent 754f893 commit 20c72e1
Show file tree
Hide file tree
Showing 8 changed files with 640 additions and 60 deletions.
142 changes: 87 additions & 55 deletions credsweeper/rules/config.yaml

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
from pathlib import Path

# total number of files in test samples
SAMPLES_FILES_COUNT: int = 134
SAMPLES_FILES_COUNT: int = 136

# the lowest value of ML threshold is used to display possible lowest values
NEGLIGIBLE_ML_THRESHOLD = 0.0001

# credentials count after scan
SAMPLES_CRED_COUNT: int = 391
SAMPLES_CRED_LINE_COUNT: int = 409
SAMPLES_CRED_COUNT: int = 396
SAMPLES_CRED_LINE_COUNT: int = 414

# credentials count after post-processing
SAMPLES_POST_CRED_COUNT: int = 344
SAMPLES_POST_CRED_COUNT: int = 349

# with option --doc
SAMPLES_IN_DOC = 425
SAMPLES_IN_DOC = 430

# archived credentials that are not found without --depth
SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 23
Expand Down
135 changes: 135 additions & 0 deletions tests/data/depth_3.json
Original file line number Diff line number Diff line change
Expand Up @@ -7226,6 +7226,60 @@
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "Groq API Key",
"severity": "high",
"confidence": "strong",
"line_data_list": [
{
"line": "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",
"line_num": 1,
"path": "./tests/samples/groq_api_key",
"info": "./tests/samples/groq_api_key|RAW",
"value": "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",
"value_start": 0,
"value_end": 56,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 5.096509298449433,
"valid": true
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "Groq API Key",
"severity": "high",
"confidence": "strong",
"line_data_list": [
{
"line": "url/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?part",
"line_num": 2,
"path": "./tests/samples/groq_api_key",
"info": "./tests/samples/groq_api_key|RAW",
"value": "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",
"value_start": 4,
"value_end": 60,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 5.096509298449433,
"valid": true
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
Expand Down Expand Up @@ -7280,6 +7334,87 @@
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "Hugging Face User Access Token",
"severity": "high",
"confidence": "moderate",
"line_data_list": [
{
"line": "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",
"line_num": 1,
"path": "./tests/samples/huggingface",
"info": "./tests/samples/huggingface|RAW",
"value": "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",
"value_start": 0,
"value_end": 37,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 4.5617684773102365,
"valid": true
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "Hugging Face User Access Token",
"severity": "high",
"confidence": "moderate",
"line_data_list": [
{
"line": "url/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/part",
"line_num": 2,
"path": "./tests/samples/huggingface",
"info": "./tests/samples/huggingface|RAW",
"value": "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",
"value_start": 4,
"value_end": 41,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 4.636224896287629,
"valid": true
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "Hugging Face User Access Token",
"severity": "high",
"confidence": "moderate",
"line_data_list": [
{
"line": "hf_UdYjVqYvybBLEhIrwwEUYjOgkeyexample",
"line_num": 6,
"path": "./tests/samples/huggingface",
"info": "./tests/samples/huggingface|RAW",
"value": "hf_UdYjVqYvybBLEhIrwwEUYjOgkeyexample",
"value_start": 0,
"value_end": 37,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": "BASE36_CHARS",
"entropy": 3.034378196565712,
"valid": true
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
Expand Down
135 changes: 135 additions & 0 deletions tests/data/doc.json
Original file line number Diff line number Diff line change
Expand Up @@ -11571,6 +11571,60 @@
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "Groq API Key",
"severity": "high",
"confidence": "strong",
"line_data_list": [
{
"line": "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",
"line_num": 1,
"path": "./tests/samples/groq_api_key",
"info": "./tests/samples/groq_api_key|RAW",
"value": "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",
"value_start": 0,
"value_end": 56,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 5.096509298449433,
"valid": true
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "Groq API Key",
"severity": "high",
"confidence": "strong",
"line_data_list": [
{
"line": "url/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?part",
"line_num": 2,
"path": "./tests/samples/groq_api_key",
"info": "./tests/samples/groq_api_key|RAW",
"value": "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",
"value_start": 4,
"value_end": 60,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 5.096509298449433,
"valid": true
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
Expand Down Expand Up @@ -11625,6 +11679,87 @@
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "Hugging Face User Access Token",
"severity": "high",
"confidence": "moderate",
"line_data_list": [
{
"line": "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",
"line_num": 1,
"path": "./tests/samples/huggingface",
"info": "./tests/samples/huggingface|RAW",
"value": "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",
"value_start": 0,
"value_end": 37,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 4.5617684773102365,
"valid": true
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "Hugging Face User Access Token",
"severity": "high",
"confidence": "moderate",
"line_data_list": [
{
"line": "url/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/part",
"line_num": 2,
"path": "./tests/samples/huggingface",
"info": "./tests/samples/huggingface|RAW",
"value": "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",
"value_start": 4,
"value_end": 41,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": "BASE64_CHARS",
"entropy": 4.636224896287629,
"valid": true
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
"ml_probability": null,
"rule": "Hugging Face User Access Token",
"severity": "high",
"confidence": "moderate",
"line_data_list": [
{
"line": "hf_UdYjVqYvybBLEhIrwwEUYjOgkeyexample",
"line_num": 6,
"path": "./tests/samples/huggingface",
"info": "./tests/samples/huggingface|RAW",
"value": "hf_UdYjVqYvybBLEhIrwwEUYjOgkeyexample",
"value_start": 0,
"value_end": 37,
"variable": null,
"variable_start": -2,
"variable_end": -2,
"entropy_validation": {
"iterator": "BASE36_CHARS",
"entropy": 3.034378196565712,
"valid": true
}
}
]
},
{
"api_validation": "NOT_AVAILABLE",
"ml_validation": "NOT_AVAILABLE",
Expand Down
Loading

0 comments on commit 20c72e1

Please sign in to comment.