Skip to content

Commit 29615be

Browse files
feat(python): add license support for requirement.txt files (#6782)
Signed-off-by: knqyf263 <knqyf263@gmail.com> Co-authored-by: knqyf263 <knqyf263@gmail.com>
1 parent 2f05418 commit 29615be

File tree

10 files changed

+948
-103
lines changed

10 files changed

+948
-103
lines changed

docs/docs/coverage/language/python.md

+24-11
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,20 @@
33
Trivy supports three types of Python package managers: `pip`, `Pipenv` and `Poetry`.
44
The following scanners are supported for package managers.
55

6-
| Package manager | SBOM | Vulnerability | License |
7-
| --------------- | :---: | :-----------: | :-----: |
8-
| pip | || - |
9-
| Pipenv | || - |
10-
| Poetry | || - |
6+
| Package manager | SBOM | Vulnerability | License |
7+
|-----------------|:----:|:-------------:|:-------:|
8+
| pip ||| |
9+
| Pipenv ||| - |
10+
| Poetry ||| - |
1111

1212
In addition, Trivy supports three formats of Python packages: `egg`, `wheel` and `conda`.
1313
The following scanners are supported for Python packages.
1414

15-
| Packaging | SBOM | Vulnerability | License |
16-
| --------- | :---: | :-----------: | :-----: |
17-
| Egg | |||
18-
| Wheel | |||
19-
| Conda | | - | - |
15+
| Packaging | SBOM | Vulnerability | License |
16+
|-----------|:----:|:-------------:|:-------:|
17+
| Egg ||||
18+
| Wheel ||||
19+
| Conda || - | - |
2020

2121

2222
The following table provides an outline of the features Trivy offers.
@@ -40,6 +40,8 @@ See [here](./index.md) for the detail.
4040
Trivy parses your files generated by package managers in filesystem/repository scanning.
4141

4242
### pip
43+
44+
#### Dependency detection
4345
Trivy only parses [version specifiers](https://packaging.python.org/en/latest/specifications/version-specifiers/#id4) with `==` comparison operator and without `.*`.
4446
To convert unsupported version specifiers - use the `pip freeze` command.
4547

@@ -91,7 +93,16 @@ urllib3==1.26.15
9193
`requirements.txt` files don't contain information about dependencies used for development.
9294
Trivy could detect vulnerabilities on the development packages, which not affect your production environment.
9395

94-
License detection is not supported for `pip`.
96+
#### License detection
97+
98+
`requirements.txt` files don't contain information about licenses.
99+
Therefore, Trivy checks `METADATA` files from `lib/site-packages` directory.
100+
101+
Trivy uses 3 ways to detect `site-packages` directory:
102+
103+
- Checks `VIRTUAL_ENV` environment variable.
104+
- Detects path to `python`[^1] binary and checks `../lib/pythonX.Y/site-packages` directory.
105+
- Detects path to `python`[^1] binary and checks `../../lib/site-packages` directory.
95106

96107
### Pipenv
97108
Trivy parses `Pipfile.lock`.
@@ -116,4 +127,6 @@ Trivy looks for `*.egg-info`, `*.egg-info/PKG-INFO`, `*.egg` and `EGG-INFO/PKG-I
116127
### Wheel
117128
Trivy looks for `.dist-info/META-DATA` to identify Python packages.
118129

130+
[^1]: Trivy checks `python`, `python3`, `python2` and `python.exe` file names.
131+
119132
[dependency-graph]: ../../configuration/reporting.md#show-origins-of-vulnerable-dependencies

pkg/fanal/analyzer/language/python/pip/pip.go

+192-6
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,92 @@ package pip
22

33
import (
44
"context"
5+
"fmt"
6+
"io"
7+
"io/fs"
58
"os"
9+
"os/exec"
610
"path/filepath"
11+
"sort"
12+
"strings"
713

14+
"github.com/samber/lo"
815
"golang.org/x/xerrors"
916

17+
goversion "github.com/aquasecurity/go-version/pkg/version"
18+
"github.com/aquasecurity/trivy/pkg/dependency/parser/python/packaging"
1019
"github.com/aquasecurity/trivy/pkg/dependency/parser/python/pip"
1120
"github.com/aquasecurity/trivy/pkg/fanal/analyzer"
1221
"github.com/aquasecurity/trivy/pkg/fanal/analyzer/language"
1322
"github.com/aquasecurity/trivy/pkg/fanal/types"
23+
"github.com/aquasecurity/trivy/pkg/log"
24+
"github.com/aquasecurity/trivy/pkg/utils/fsutils"
1425
)
1526

1627
func init() {
17-
analyzer.RegisterAnalyzer(&pipLibraryAnalyzer{})
28+
analyzer.RegisterPostAnalyzer(analyzer.TypePip, newPipLibraryAnalyzer)
1829
}
1930

2031
const version = 1
2132

22-
type pipLibraryAnalyzer struct{}
33+
var pythonExecNames = []string{
34+
"python3",
35+
"python",
36+
"python2",
37+
"python.exe",
38+
}
39+
40+
type pipLibraryAnalyzer struct {
41+
logger *log.Logger
42+
metadataParser packaging.Parser
43+
}
44+
45+
func newPipLibraryAnalyzer(_ analyzer.AnalyzerOptions) (analyzer.PostAnalyzer, error) {
46+
return pipLibraryAnalyzer{
47+
logger: log.WithPrefix("pip"),
48+
metadataParser: *packaging.NewParser(),
49+
}, nil
50+
}
51+
52+
func (a pipLibraryAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAnalysisInput) (*analyzer.AnalysisResult, error) {
53+
var apps []types.Application
2354

24-
func (a pipLibraryAnalyzer) Analyze(_ context.Context, input analyzer.AnalysisInput) (*analyzer.AnalysisResult, error) {
25-
res, err := language.Analyze(types.Pip, input.FilePath, input.Content, pip.NewParser())
55+
sitePackagesDir, err := a.pythonSitePackagesDir()
2656
if err != nil {
27-
return nil, xerrors.Errorf("unable to parse requirements.txt: %w", err)
57+
a.logger.Warn("Unable to find python `site-packages` directory. License detection is skipped.", log.Err(err))
58+
}
59+
60+
// We only saved the `requirements.txt` files
61+
required := func(_ string, _ fs.DirEntry) bool {
62+
return true
63+
}
64+
65+
if err = fsutils.WalkDir(input.FS, ".", required, func(pathPath string, d fs.DirEntry, r io.Reader) error {
66+
app, err := language.Parse(types.Pip, pathPath, r, pip.NewParser())
67+
if err != nil {
68+
return xerrors.Errorf("unable to parse requirements.txt: %w", err)
69+
}
70+
71+
if app == nil {
72+
return nil
73+
}
74+
75+
// Fill licenses
76+
if sitePackagesDir != "" {
77+
for i := range app.Packages {
78+
app.Packages[i].Licenses = a.pkgLicense(app.Packages[i].Name, app.Packages[i].Version, sitePackagesDir)
79+
}
80+
}
81+
82+
apps = append(apps, *app)
83+
return nil
84+
}); err != nil {
85+
return nil, xerrors.Errorf("pip walt error: %w", err)
2886
}
29-
return res, nil
87+
88+
return &analyzer.AnalysisResult{
89+
Applications: apps,
90+
}, nil
3091
}
3192

3293
func (a pipLibraryAnalyzer) Required(filePath string, _ os.FileInfo) bool {
@@ -41,3 +102,128 @@ func (a pipLibraryAnalyzer) Type() analyzer.Type {
41102
func (a pipLibraryAnalyzer) Version() int {
42103
return version
43104
}
105+
106+
// pkgLicense parses `METADATA` pkg file to look for licenses
107+
func (a pipLibraryAnalyzer) pkgLicense(pkgName, pkgVer, spDir string) []string {
108+
// METADATA path is `**/site-packages/<pkg_name>-<pkg_version>.dist-info/METADATA`
109+
pkgDir := fmt.Sprintf("%s-%s.dist-info", pkgName, pkgVer)
110+
metadataPath := filepath.Join(spDir, pkgDir, "METADATA")
111+
metadataFile, err := os.Open(metadataPath)
112+
if os.IsNotExist(err) {
113+
a.logger.Debug("No package metadata found", log.String("site-packages", pkgDir),
114+
log.String("name", pkgName), log.String("version", pkgVer))
115+
return nil
116+
}
117+
118+
metadataPkg, _, err := a.metadataParser.Parse(metadataFile)
119+
if err != nil {
120+
a.logger.Warn("Unable to parse METADATA file", log.String("path", metadataPath), log.Err(err))
121+
return nil
122+
}
123+
124+
// METADATA file contains info about only 1 package
125+
// cf. https://github.com/aquasecurity/trivy/blob/e66dbb935764908f0b2b9a55cbfe6c107f101a31/pkg/dependency/parser/python/packaging/parse.go#L86-L92
126+
return metadataPkg[0].Licenses
127+
}
128+
129+
// pythonSitePackagesDir returns path to site-packages dir
130+
func (a pipLibraryAnalyzer) pythonSitePackagesDir() (string, error) {
131+
// check VIRTUAL_ENV first
132+
if venv := os.Getenv("VIRTUAL_ENV"); venv != "" {
133+
libDir := filepath.Join(venv, "lib")
134+
if _, err := os.Stat(libDir); os.IsNotExist(err) {
135+
return "", xerrors.Errorf("unable to detect `lib` dir for %q venv: %w", venv, err)
136+
}
137+
138+
spDir, err := a.findSitePackagesDir(libDir)
139+
if err != nil {
140+
return "", xerrors.Errorf("unable to detect `site-packages` dir for %q venv: %w", spDir, err)
141+
} else if spDir != "" {
142+
return spDir, nil
143+
}
144+
}
145+
146+
// Find path to Python executable
147+
pythonExecPath, err := pythonExecutablePath()
148+
if err != nil {
149+
return "", err
150+
}
151+
pythonExecDir := filepath.Dir(pythonExecPath)
152+
153+
// Search for a directory starting with "python" in the lib directory
154+
libDir := filepath.Join(pythonExecDir, "..", "lib")
155+
spDir, err := a.findSitePackagesDir(libDir)
156+
if err != nil {
157+
return "", xerrors.Errorf("unable to detect `site-packages` dir for %q: %w", pythonExecPath, err)
158+
} else if spDir != "" {
159+
return spDir, nil
160+
}
161+
162+
// Try another common pattern if the Python library directory is not found
163+
spDir = filepath.Join(pythonExecDir, "..", "..", "lib", "site-packages")
164+
if fsutils.DirExists(spDir) {
165+
return spDir, nil
166+
}
167+
168+
return "", xerrors.Errorf("site-packages directory not found")
169+
}
170+
171+
// pythonExecutablePath returns path to Python executable
172+
func pythonExecutablePath() (string, error) {
173+
for _, execName := range pythonExecNames {
174+
// Get the absolute path of the python command
175+
pythonPath, err := exec.LookPath(execName)
176+
if err != nil {
177+
continue
178+
}
179+
return pythonPath, nil
180+
}
181+
return "", xerrors.Errorf("unable to find path to Python executable")
182+
}
183+
184+
// findSitePackagesDir finds `site-packages` dir in `lib` dir
185+
func (a pipLibraryAnalyzer) findSitePackagesDir(libDir string) (string, error) {
186+
entries, err := os.ReadDir(libDir)
187+
if err != nil {
188+
if !os.IsNotExist(err) {
189+
return "", xerrors.Errorf("failed to read lib directory: %w", err)
190+
}
191+
return "", nil
192+
}
193+
194+
// Find python dir which contains `site-packages` dir
195+
// First check for newer versions
196+
pythonDirs := a.sortPythonDirs(entries)
197+
for i := len(pythonDirs) - 1; i >= 0; i-- {
198+
dir := filepath.Join(libDir, pythonDirs[i], "site-packages")
199+
if fsutils.DirExists(dir) {
200+
return dir, nil
201+
}
202+
}
203+
return "", nil
204+
}
205+
206+
// sortPythonDirs finds dirs starting with `python` and sorts them
207+
// e.g. python2.7 => python3.9 => python3.11
208+
func (a pipLibraryAnalyzer) sortPythonDirs(entries []os.DirEntry) []string {
209+
var pythonVers []goversion.Version
210+
for _, entry := range entries {
211+
// Found a directory starting with "python", assume it's the Python library directory
212+
if entry.IsDir() && strings.HasPrefix(entry.Name(), "python") {
213+
ver := strings.TrimPrefix(entry.Name(), "python")
214+
v, err := goversion.Parse(ver)
215+
if err != nil {
216+
a.logger.Debug("Unable to parse version from Python dir name", log.String("dir", entry.Name()), log.Err(err))
217+
continue
218+
}
219+
pythonVers = append(pythonVers, v)
220+
}
221+
}
222+
223+
// Sort Python version
224+
sort.Sort(goversion.Collection(pythonVers))
225+
226+
return lo.Map(pythonVers, func(v goversion.Version, _ int) string {
227+
return "python" + v.String()
228+
})
229+
}

0 commit comments

Comments
 (0)