Secrets Detection Rules Expert Expert in pattern matching, regex optimization, false positive reduction, and comprehensive coverage for detecting sensitive credentials in source code. Core Principles detection_philosophy : precision_over_recall : principle : "Minimize false positives" reason : "Too many alerts = alert fatigue = ignored alerts" layered_detection : levels : - "High confidence: Known patterns" - "Medium confidence: Entropy + context" - "Low confidence: Heuristics" entropy_analysis : purpose : "Detect random strings that might be secrets" threshold : "Shannon entropy > 4.2" context : "Combined with naming patterns" contextual_validation : factors : - "Variable/key name" - "File location" - "Surrounding code" - "String format" Rule Categories AWS Credentials aws_rules : access_key_id : pattern : "AKIA[0-9A-Z]{16}" confidence : "high" description : "AWS Access Key ID" example : "AKIAIOSFODNN7EXAMPLE" secret_access_key : pattern : "[A-Za-z0-9/+=]{40}" context_required : - "aws_secret" - "secret_access_key" - "AWS_SECRET" confidence : "high" description : "AWS Secret Access Key" session_token : pattern : "FwoGZXIvYXdzE[A-Za-z0-9/+=]+" confidence : "high" description : "AWS Session Token" API Keys & Tokens api_key_rules : generic_api_key : patterns : - name : "api_key variable" regex : '(?i)(api[-]?key|apikey)\s[:=]\s["\']?([a-zA-Z0-9-]{20,})["\']?' confidence : "medium" - name : "bearer token" regex : '(?i)bearer\s+[a-zA-Z0-9_-]{20,}' confidence : "high" - name : "authorization header" regex : '(?i)authorization\s[:=]\s["\']?[a-zA-Z0-9_-]{20,}["\']?' confidence : "medium" service_specific : github : patterns : - "ghp_[a-zA-Z0-9]{36}"
Personal access token
- "gho_[a-zA-Z0-9]{36}"
OAuth access token
- "ghu_[a-zA-Z0-9]{36}"
User-to-server token
- "ghs_[a-zA-Z0-9]{36}"
Server-to-server token
confidence : "high" slack : patterns : - "xoxb-[0-9]{10,}-[0-9]{10,}-[a-zA-Z0-9]{24}"
Bot token
- "xoxp-[0-9]{10,}-[0-9]{10,}-[a-zA-Z0-9]{24}"
User token
- "xoxa-[0-9]{10,}-[0-9]{10,}-[a-zA-Z0-9]{24}"
App token
- "xoxr-[0-9]{10,}-[0-9]{10,}-[a-zA-Z0-9]{24}"
Refresh token
confidence : "high" stripe : patterns : - "sk_live_[a-zA-Z0-9]{24,}"
Live secret key
- "sk_test_[a-zA-Z0-9]{24,}"
Test secret key
- "rk_live_[a-zA-Z0-9]{24,}"
Restricted key
- "pk_live_[a-zA-Z0-9]{24,}"
Publishable key (lower risk)
confidence : "high" google : patterns : - "AIza[0-9A-Za-z_-]{35}"
API key
- "[0-9]+-[a-z0-9_]{32}\.apps\.googleusercontent\.com"
OAuth client
confidence : "high" twilio : patterns : - "SK[a-f0-9]{32}"
API key
- "AC[a-f0-9]{32}"
Account SID
confidence
:
"high"
sendgrid
:
pattern
:
"SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}"
confidence
:
"high"
mailchimp
:
pattern
:
"[a-f0-9]{32}-us[0-9]{1,2}"
confidence
:
"high"
Database Credentials
database_rules
:
connection_strings
:
postgresql
:
pattern
:
'postgres(
?
:
ql)
?
:
//
[
^
:
]
+
:
[
^@
]
+@
[
^/
]
+/
[
^\s"\'']
+'
confidence
:
"high"
example
:
"postgresql://user:password@localhost:5432/db"
mysql
:
pattern
:
'mysql
:
//
[
^
:
]
+
:
[
^@
]
+@
[
^/
]
+/
[
^\s"\''
]
+'
confidence
:
"high"
mongodb
:
pattern
:
'mongodb(
?
:
+srv)
?
:
//
[
^
:
]
+
:
[
^@
]
+@
[
^\s"\'']
+'
confidence
:
"high"
example
:
"mongodb+srv://user:pass@cluster.mongodb.net/db"
redis
:
pattern
:
'redis
:
//
[
^
:
]
*:
[
^@
]
+@
[
^\s"\''
]
+'
confidence
:
"high"
password_patterns
:
variable_assignment
:
patterns
:
-
'(
?
i)(password
|
passwd
|
pwd)\s
[
:
=
]
\s
[
"\'']
(
[
^"\''\s
]
{
8
,
}
)
[
"\'']
'
-
'(
?
i)db_pass(
?
:
word)
?
\s*
[
:
=
]
\s*
[
"\''
]
(
[
^"\''\s
]
{
8
,
}
)
[
"\''
]
'
exclude
:
-
"password123"
-
"changeme"
-
"example"
-
"${.}"
Private Keys
private_key_rules
:
rsa
:
pattern
:
"-----BEGIN RSA PRIVATE KEY-----"
confidence
:
"high"
multiline
:
true
openssh
:
pattern
:
"-----BEGIN OPENSSH PRIVATE KEY-----"
confidence
:
"high"
multiline
:
true
ec
:
pattern
:
"-----BEGIN EC PRIVATE KEY-----"
confidence
:
"high"
multiline
:
true
pgp
:
pattern
:
"-----BEGIN PGP PRIVATE KEY BLOCK-----"
confidence
:
"high"
multiline
:
true
generic
:
pattern
:
"-----BEGIN PRIVATE KEY-----"
confidence
:
"high"
multiline
:
true
JWT Tokens
jwt_rules
:
jwt_token
:
pattern
:
"eyJ[a-zA-Z0-9_-]\.eyJ[a-zA-Z0-9_-]\.[a-zA-Z0-9_-]"
confidence
:
"medium"
validation
:
-
"Decode header to verify structure"
-
"Check payload for sensitive claims"
-
"Verify not expired test token"
jwt_context
:
high_confidence
:
-
"In Authorization header"
-
"Named as 'token' or 'jwt'"
-
"In API response"
low_confidence
:
-
"In test files"
-
"In documentation"
-
"Expired payload"
Entropy Analysis
Shannon entropy calculation
import math from collections import Counter def calculate_entropy ( s : str ) -
float : """Calculate Shannon entropy of a string.""" if not s : return 0.0 length = len ( s ) frequencies = Counter ( s ) entropy = 0.0 for count in frequencies . values ( ) : probability = count / length entropy -= probability * math . log2 ( probability ) return entropy def is_high_entropy ( s : str , threshold : float = 4.2 ) -
bool : """Check if string has high entropy (likely a secret)."""
Minimum length check
if len ( s ) < 16 : return False
Calculate entropy
entropy
calculate_entropy ( s ) return entropy
= threshold
Entropy thresholds by type
ENTROPY_THRESHOLDS
{ "api_key" : 4.2 , "password" : 3.5 , "token" : 4.5 , "hash" : 4.8 } False Positive Reduction whitelist_patterns : placeholders : patterns : - "YOUR_.HERE" - "REPLACE." - "INSERT_." - "xxx+" - "\+" - "<.>" - "\$\{.\}" - "\{\{.\}\}" action : "ignore" test_values : patterns : - "test." - "fake." - "dummy." - "example." - "sample." - "mock.*" action : "ignore" common_false_positives : patterns : - "0{16,}"
All zeros
- "1{16,}"
All ones
- "abcd.*"
Sequential
- "password123" - "changeme" - "secret123" action : "ignore" path_exclusions : directories : - "node_modules/" - "vendor/" - ".git/" - "pycache/" - "build/" - "dist/" - "coverage/" file_patterns : - ".min.js" - ".min.css" - ".map" - ".lock" - "package-lock.json" - "yarn.lock" documentation : - ".md" - ".rst" - ".txt" - "docs/" - "examples/" context_validation : safe_patterns : - "process.env." - "os.environ." - "System.getenv." - "ENV['.']" - "config.get." suspicious_patterns : - "hardcoded" - "= \"[^\"]{20,}\"" - "= '[^']{20,}'" Rule Configuration
.secrets-detection.yml
version : "1.0" rules : - id : "aws-access-key" pattern : "AKIA[0-9A-Z]{16}" severity : "critical" enabled : true - id : "generic-api-key" pattern : '(?i)(api[-]?key|apikey)\s[:=]\s["\']?([a-zA-Z0-9-]{20,})["\']?' severity : "high" enabled : true entropy_check : true entropy_threshold : 4.2 - id : "private-key" pattern : "-----BEGIN . PRIVATE KEY-----" severity : "critical" enabled : true multiline : true exclude : paths : - "test/" - "spec/" - ".test." - ".spec." - "fixtures/" - "mocks/" patterns : - "EXAMPLE_." - "._PLACEHOLDER" - "\$\{.\}" report : format : "json" output : "secrets-report.json" fail_on : "critical" performance : max_file_size : "10MB" timeout_per_file : "30s" parallel_files : 4 CI/CD Integration GitHub Actions
.github/workflows/secrets-scan.yml
name : Secrets Detection on : push : branches : [ main , develop ] pull_request : branches : [ main ] jobs : scan : runs-on : ubuntu - latest steps : - uses : actions/checkout@v4 with : fetch-depth : 0 - name : Detect secrets uses : gitleaks/gitleaks - action@v2 env : GITHUB_TOKEN : $ { { secrets.GITHUB_TOKEN } } - name : Trufflehog scan uses : trufflesecurity/trufflehog@main with : path : ./ base : $ { { github.event.repository.default_branch } } head : HEAD extra_args : - - only - verified - name : Upload results if : failure() uses : actions/upload - artifact@v4 with : name : secrets - report path : secrets - report.json Pre-commit Hook
.pre-commit-config.yaml
repos : - repo : https : //github.com/gitleaks/gitleaks rev : v8.18.0 hooks : - id : gitleaks name : Detect secrets entry : gitleaks protect - - verbose - - redact language : golang pass_filenames : false - repo : https : //github.com/Yelp/detect - secrets rev : v1.4.0 hooks : - id : detect - secrets args : [ '--baseline' , '.secrets.baseline' ] Performance Optimization optimization_strategies : regex : use_atomic_groups : true avoid_backtracking : true possessive_quantifiers : true example : bad : "(a+)+" good : "(?>a+)" scanning : progressive : - "Phase 1: High confidence patterns" - "Phase 2: Medium confidence + entropy" - "Phase 3: Low confidence heuristics" early_exit : - "Skip binary files" - "Skip files > 10MB" - "Skip whitelisted paths" caching : - "Cache compiled regexes" - "Cache file hashes" - "Incremental scanning" resource_limits : max_file_size : "10MB" timeout_per_file : "30s" max_line_length : "10000" parallel_workers : 4 Remediation remediation_steps : immediate : - "Revoke compromised credential" - "Rotate the secret" - "Remove from git history" - "Audit access logs" git_history_cleanup : commands : - "git filter-branch --force --index-filter" - "BFG Repo-Cleaner for large repos" - "git-filter-repo for complex cases" warning : "Requires force push, coordinate with team" prevention : - "Use environment variables" - "Use secrets management (Vault, AWS Secrets Manager)" - "Enable pre-commit hooks" - "Implement CI/CD scanning" - "Regular rotation schedule" Лучшие практики Precision over recall — меньше ложных срабатываний Layered detection — комбинируй паттерны и энтропию Context matters — учитывай окружение и naming Whitelist carefully — документируй исключения Scan early — pre-commit hooks + CI/CD Rotate on detection — compromised = revoked