diff --git a/.envguard.yml.example b/.envguard.yml.example index 8c02baa..3e6adca 100644 --- a/.envguard.yml.example +++ b/.envguard.yml.example @@ -10,7 +10,6 @@ exclude_extensions: - ".png" entropy_exclude_paths: - "testdata/**" - - "fixtures/**" custom_patterns: - name: "Internal Token" pattern: "MYCO_[A-Z0-9]{32}" diff --git a/README.md b/README.md index 88bbf23..dd19d81 100644 --- a/README.md +++ b/README.md @@ -67,9 +67,9 @@ The entropy engine tokenizes each scanned line, measures Shannon entropy, and fl | `entropy_threshold` | `float64` | `4.5` | Minimum Shannon entropy required to report a token. | | `min_length` | `int` | `20` | Minimum token length considered by entropy scanning. | | `max_file_size_kb` | `int` | `500` | Skip files larger than this limit with a warning. | -| `exclude_paths` | `[]string` | `["testdata/**","**/*.test.js","vendor/**"]` | Glob patterns excluded from scanning. | +| `exclude_paths` | `[]string` | `["**/*.test.js","vendor/**"]` | Glob patterns excluded from scanning. | | `exclude_extensions` | `[]string` | `[".lock",".svg",".png"]` | File extensions excluded from scanning. | -| `entropy_exclude_paths` | `[]string` | `[]` | Glob patterns that skip entropy scanning only while keeping pattern matching enabled for files that are still included by `exclude_paths`. | +| `entropy_exclude_paths` | `[]string` | `["testdata/**"]` | Glob patterns that skip entropy scanning only while keeping pattern matching enabled for files that are still included by `exclude_paths`. | | `custom_patterns` | `[]pattern` | `[]` | Extra regex rules added to the built-in pattern library. | Example: @@ -87,7 +87,6 @@ exclude_extensions: - ".svg" entropy_exclude_paths: - "testdata/**" - - "fixtures/**" custom_patterns: - name: "Internal Token" pattern: "MYCO_[A-Z0-9]{32}" @@ -95,7 +94,7 @@ custom_patterns: ``` Note: -`exclude_paths` is applied before scanning starts. If a path is excluded there, `entropy_exclude_paths` will never see it. To keep pattern matching enabled for `testdata/` while suppressing entropy checks, remove `testdata/**` from `exclude_paths` and add it to `entropy_exclude_paths` instead. +`exclude_paths` is applied before scanning starts. If a path is excluded there, `entropy_exclude_paths` will never see it. By default, `testdata/**` stays included for pattern scanning and is excluded from entropy scanning through `entropy_exclude_paths`. ## CLI Reference diff --git a/config/schema.go b/config/schema.go index b524fa7..a731b89 100644 --- a/config/schema.go +++ b/config/schema.go @@ -35,7 +35,6 @@ func Default() Config { MinLength: 20, MaxFileSizeKB: 500, ExcludePaths: []string{ - "testdata/**", "**/*.test.js", "vendor/**", }, @@ -44,5 +43,8 @@ func Default() Config { ".svg", ".png", }, + EntropyExcludePaths: []string{ + "testdata/**", + }, } } diff --git a/scanner/scanner_test.go b/scanner/scanner_test.go index c13bb88..b7c8e49 100644 --- a/scanner/scanner_test.go +++ b/scanner/scanner_test.go @@ -15,6 +15,7 @@ import ( func TestScanDirtyFixture(t *testing.T) { cfg := config.Default() cfg.ExcludePaths = nil + cfg.EntropyExcludePaths = nil engine, err := NewEngine(cfg, allowlist.Set{}) require.NoError(t, err) @@ -128,6 +129,25 @@ func TestEntropyExcludePathsCanSkipTestdataEntropy(t *testing.T) { assert.Empty(t, findings) } +func TestDefaultConfigUsesPatternOnlyScanningForTestdata(t *testing.T) { + tempDir := t.TempDir() + testdataDir := filepath.Join(tempDir, "testdata") + require.NoError(t, os.MkdirAll(testdataDir, 0o755)) + + target := filepath.Join(testdataDir, "sample.txt") + require.NoError(t, os.WriteFile(target, []byte("const key = AKIA1234567890ABCDEF;\nconst token = abcd1234efgh5678ijkl9012mnop3456;\n"), 0o644)) + + cfg := config.Default() + engine, err := NewEngineWithRoot(cfg, allowlist.Set{}, tempDir) + require.NoError(t, err) + + findings, err := engine.ScanPaths([]string{target}) + require.NoError(t, err) + require.Len(t, findings, 1) + assert.Equal(t, "AWS Access Key", findings[0].RuleName) + assert.Equal(t, "pattern", findings[0].Source) +} + func chdirForTest(t *testing.T, dir string) { t.Helper() wd, err := os.Getwd()