Commit 3b6dc35e authored by Daniel Eggert's avatar Daniel Eggert Committed by Maximilian Dolling
Browse files

npm package.json parsing and license requesting

parent 4c2fdc99
......@@ -13,7 +13,7 @@ stages:
variables:
DOCKER_DST_IMAGE_NAME: software-quality-assurance
SQA_VERSION: '0.6.0'
SQA_VERSION: '0.7.0'
GIT_SUBMODULE_STRATEGY: recursive
include:
......
......@@ -128,12 +128,8 @@ title = "gitleaks config"
description = "Generic Credential"
regex = '''(?i)(dbpasswd|dbuser|dbname|dbhost|api_key|apikey|secret|key|api|password|user|guid|hostname|pw|auth)(.{0,20})?['|"]([0-9a-zA-Z-_\/+!{}/=]{4,120})['|"]'''
tags = ["key", "API", "generic"]
[[rules.allowlist]]
regex = """key[12]?'?s? ?(in|=|:)? ?[{[]?('ignore'|'license[s1]'|'foo[12]': 'bar[12]'|settings|config|'ignore'|'unknown'|'submodules')"""
[[rules.allowlist]]
regex = """[Aa]uthors"""
[[rules.allowlist]]
regex = """keys.?.?.? .? .?'submodules'"""
[rules.allowlist]
regexes = ["""key[12]?'?s? ?(in|=|:)? ?[{[]?('ignore'|'license[s1]'|'foo[12]': 'bar[12]'|settings|config|'ignore'|'unknown'|'submodules')""", """[Aa]uthors""", """keys.?.?.? .? .?'submodules'"""]
[allowlist]
description = "Allowlisted files"
files = ['''.*gitleaks_config.toml$''', '''.*config.toml$''', '''.*QUALITY.md$''', '''.*software-quality-assurance-report.md$''']
......@@ -43,7 +43,7 @@
"sha256:01f715cd0ed7a622ec8b32322e715813f7574de531f09b70f6f3b2c10f682425",
"sha256:64e2a6d14de9c8e022cf40539ac8468ba7c4b99550a2b05fc87fd20e392e568f"
],
"markers": "python_full_version >= '3.6.0'",
"markers": "python_version >= '3.6'",
"version": "==2.1.1"
},
"certifi": {
......@@ -70,7 +70,7 @@
"sha256:8f1d055d1c5b793d9355d7368a2f48cbb20c342dabc3f46ebd69e468dcfa2ab9",
"sha256:e6bb2d990d24bc38ce57484a587a187f4237f118c347c87ba030f8321ee5a5c2"
],
"markers": "python_full_version >= '3.6.0'",
"markers": "python_version >= '3.6'",
"version": "==1.1.8"
},
"click": {
......@@ -333,7 +333,7 @@
"sha256:7e112f62e2482d8417bbfa8f7bb715e98841fd15132a46c0a657ceb59302d24d",
"sha256:efda751de15201b395b6d6e64e6ae3b6b03dc502a64c3c908aa5cad14c27eee5"
],
"markers": "python_full_version >= '3.6.0'",
"markers": "python_version >= '3.6'",
"version": "==8.1.2"
}
},
......@@ -347,11 +347,11 @@
},
"astroid": {
"hashes": [
"sha256:6b0ed1af831570e500e2437625979eaa3b36011f66ddfc4ce930128610258ca9",
"sha256:cd80bf957c49765dce6d92c43163ff9d2abc43132ce64d4b1b47717c6d2522df"
"sha256:ad63b8552c70939568966811a088ef0bc880f99a24a00834abd0e3681b514f91",
"sha256:bea3f32799fbb8581f58431c12591bc20ce11cbc90ad82e2ea5717d94f2080d5"
],
"markers": "python_version >= '3.6'",
"version": "==2.5.2"
"version": "==2.5.3"
},
"babel": {
"hashes": [
......@@ -534,11 +534,11 @@
},
"pylint": {
"hashes": [
"sha256:0e21d3b80b96740909d77206d741aa3ce0b06b41be375d92e1f3244a274c1f8a",
"sha256:d09b0b07ba06bcdff463958f53f23df25e740ecd81895f7d2699ec04bbd8dc3b"
"sha256:209d712ec870a0182df034ae19f347e725c1e615b2269519ab58a35b3fcbbe7a",
"sha256:bd38914c7731cdc518634a8d3c5585951302b6e2b6de60fbb3f7a0220e21eeee"
],
"index": "pypi",
"version": "==2.7.2"
"version": "==2.7.4"
},
"pyparsing": {
"hashes": [
......@@ -580,19 +580,19 @@
},
"sphinx": {
"hashes": [
"sha256:3f01732296465648da43dec8fb40dc451ba79eb3e2cc5c6d79005fd98197107d",
"sha256:ce9c228456131bab09a3d7d10ae58474de562a6f79abb3dc811ae401cf8c1abc"
"sha256:19010b7b9fa0dc7756a6e105b2aacd3a80f798af3c25c273be64d7beeb482cb1",
"sha256:2320d4e994a191f4b4be27da514e46b3d6b420f2ff895d064f52415d342461e8"
],
"index": "pypi",
"version": "==3.5.3"
"version": "==3.5.4"
},
"sphinx-rtd-theme": {
"hashes": [
"sha256:eda689eda0c7301a80cf122dad28b1861e5605cbf455558f3775e1e8200e83a5",
"sha256:fa6bebd5ab9a73da8e102509a86f3fcc36dec04a0b52ea80e5a033b2aba00113"
"sha256:32bd3b5d13dc8186d7a42fc816a23d32e83a4827d7d9882948e7b837c232da5a",
"sha256:4a05bdbe8b1446d77a01e20a23ebc6777c74f43237035e76be89699308987d6f"
],
"index": "pypi",
"version": "==0.5.1"
"version": "==0.5.2"
},
"sphinxcontrib-applehelp": {
"hashes": [
......
......@@ -18,11 +18,11 @@
**Software Location:** [git](git.gfz-potsdam.de/id2/software/services/fair/software-quality-assurance)
**Last Commit:** 8633de4b18323c3f5c906ac1d0a8ad342892e465
**Last Commit:** 72ddcf4d10994c04a08309efddd016ac68840cb4
**Report Time:** 30/03/2021 09:56:09 UTC
**Report Time:** 15/04/2021 12:46:23 UTC
**Report Version:** 0.6.0
**Report Version:** 0.7.0
## Completed Services
......@@ -36,17 +36,14 @@
> **Description: Prints the result of [gitleaks](https://github.com/zricethezav/gitleaks)**
**gitleaks had an error.**
**gitleaks log**
**No leaks detected.**
### List Authors
> **Description: Lists all people that committed to this config.**
* [Daniel Eggert](mailto:eggi@gfz-potsdam.de)
* [GitLab CI:hifis/software-services/fair/ci-services](mailto:ci-services@gitext.gfz-potsdam.de)
* [GitLab CI:id2/software/services/fair/software-quality-assurance](mailto:software-quality-assurance@gitext.gfz-potsdam.de)
* [GitLab CI:software/services/fair/ci-services](mailto:ci-services@gitext.gfz-potsdam.de)
......@@ -61,11 +58,11 @@
|Language|Percentage|
|---|---|
|Python|92.60|
|Shell|4.37|
|Dockerfile|1.60|
|HTML|1.34|
|Ruby|0.08|
|Python|93.22|
|Shell|4.04|
|Dockerfile|1.47|
|HTML|1.20|
|Ruby|0.07|
### List Used Licenses
......@@ -89,7 +86,7 @@
|MIT License|Python / bracex / 2.1.1 <br> Python / wcmatch / 8.1.2|
|MPL-2.0|Python / certifi / 2020.12.5|
|PSF License|Python / contextlib2 / 0.6.0.post1|
|Python-2.0|Python / abc / base lib <br> Python / datetime / base lib <br> Python / json / base lib <br> Python / os / base lib <br> Python / pathlib / base lib <br> Python / re / base lib <br> Python / shutil / base lib <br> Python / subprocess / base lib <br> Python / time / base lib|
|Python-2.0|Python / abc / base lib <br> Python / datetime / base lib <br> Python / json / base lib <br> Python / os / base lib <br> Python / pathlib / base lib <br> Python / re / base lib <br> Python / shutil / base lib <br> Python / subprocess / base lib <br> Python / time / base lib <br> Python / typing / base lib|
|apache-2.0|Python / license-expression / 1.2|
......@@ -97,6 +94,14 @@
> **Description: Prints the result of the [reuse](https://reuse.software/spec/) linter.**
#### MISSING COPYRIGHT AND LICENSING INFORMATION
The following files have no copyright and licensing information:
* /repo/tests/test_project/QUALITY.md
* /repo/tests/test_project/meta/check_credentials.json
* /repo/tests/test_project/test_report.md
#### SUMMARY
* Bad licenses:
......@@ -106,8 +111,8 @@
* Unused licenses:
* Used licenses: CC0-1.0, GPL-3.0-or-later
* Read errors: 0
* Files with copyright information: 72 / 72
* Files with license information: 72 / 72
* Files with copyright information: 92 / 95
* Files with license information: 92 / 95
Congratulations! Your project is compliant with version 3.0 of the REUSE Specification :-)
Unfortunately, your project is not compliant with version 3.0 of the REUSE Specification :-(
#
# These requirements were autogenerated by pipenv
# To regenerate from the project's Pipfile, run:
#
# pipenv lock --requirements
#
-i https://pypi.org/simple
attrs==20.3.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
binaryornot==0.4.4
boolean.py==3.8
bracex==2.1.1; python_full_version >= '3.6.0'
certifi==2020.12.5
chardet==4.0.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
cli-exit-tools==1.1.8; python_full_version >= '3.6.0'
click==7.1.2
contextlib2==0.6.0.post1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
gitdb==4.0.7; python_version >= '3.4'
gitpython==3.1.14
idna==2.10; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
igittigitt==2.0.4
jinja2==2.11.3; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'
license-expression==1.2
markupsafe==1.1.1; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
orderedset==2.0.3
python-debian==0.1.39
pyyaml==5.4.1
requests==2.25.1
reuse==0.12.1
schema==0.7.4
six==1.15.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
smmap==4.0.0; python_version >= '3.5'
spdx-license-list==0.5.2
stdlib-list==0.8.0
tabulate==0.8.9
toml==0.10.2
urllib3==1.26.4; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'
wcmatch==8.1.2; python_full_version >= '3.6.0'
SPDX-FileCopyrightText: 2020 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences, Germany (https://www.gfz-potsdam.de/)
SPDX-FileCopyrightText: 2021 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences, Germany (https://www.gfz-potsdam.de/)
SPDX-License-Identifier: CC0-1.0
\ No newline at end of file
......@@ -33,18 +33,23 @@ RUN apk upgrade
# copy dependecies
COPY ./build/dependencies /dependencies
COPY ./Pipfile /dependencies
COPY ./Pipfile.lock /dependencies
WORKDIR /dependencies
# install OS dependencies
RUN apk add --update --no-cache $(cat /dependencies/dependencies_OS.txt)
RUN apk add --update --no-cache $(cat dependencies_OS.txt)
# install ruby dependencies
RUN bundle install --gemfile=/dependencies/Gemfile
# install python dependencies
RUN pip install -r /dependencies/requirements.txt
RUN bundle install
# install go modules
RUN /dependencies/install_go_dependencies.sh
RUN ./install_go_dependencies.sh
# install python dependencies
RUN pip install pipenv
RUN pipenv install --system --deploy
# cleanup
RUN rm -rf /dependencies
......@@ -97,27 +97,39 @@ It generates a yaml file, which contains all detected languages and the bytes of
> **Note:** config optional
The service loads the output from `list-used-languages`, which will also run before the actual service.
It takes every files from the repository and assigns them to a detected language.
It takes every file from the repository and assigns them to a detected language.
Afterwards it parses all files for their imports and requests their license from a pkg manager API.
It is required, that all files, that are handled (see below), are valid and contain no errors.
### implemented languages
#### Python
Currently parses the following files:
Currently, parses the following files:
* .py
* .py3
* requirements.txt
* Pipfile
* Pipfile.lock
* pyproject.toml (poetry settings)
* poetry.lock
Goes through the files line by line and searches for imports or libraries.
Cuts the line down and generates library:version pairs.
Afterwards requests the license from the PyPI API.
It will only handle libraries with specified versions '=='.
All other specifications (<>!~) are not supported.
It will only handle libraries with specified versions (`==`).
All other specifications (`<`,`>`,`!`,`~`) are not supported.
All packages that are not hosted on PyPI are not handled.
Local packages can be excluded in the configuration.
#### all others
#### Node
Currently, parses the following files:
* package.json
* package-lock.json
return will not be recognized
It will only handle libraries with specified versions (`x.y.z`).
All other specifications (`~`,`^`,`1.x`) are not supported.
All packages that are not hosted on NPM are not handled.
Local packages can be excluded in the configuration.
......@@ -78,5 +78,5 @@ setup(
"License :: OSI Approved :: GPL-3.0-or-later",
"Operating System :: OS Independent",
],
python_requires=">=3.8",
python_requires=">=3.9",
)
......@@ -6,6 +6,7 @@
# META
# valid names for programming languages
# ['Python',
# 'Node',
# 'submodules']
......
......@@ -4,5 +4,10 @@
#
# SPDX-License-Identifier: CC0-1.0
# plain text
# python files
- '.*(requirements){1,}[0-9a-z_-]*.txt$'
- '.*pyproject.toml$'
# npm files
- '.*package.json$'
- '.*package-lock.json$'
......@@ -10,6 +10,8 @@ Python:
- '.*requirements.*\.txt$'
- '.*Pipfile$'
- '.*Pipfile.lock$'
- '.*poetry.lock$'
- '.*pyproject.toml$'
Ruby:
- '.*\.rb$'
- '.*Gemfile.*$'
......@@ -31,4 +33,7 @@ PHP:
Batchfile:
- '.*\.bat$'
JavaScript:
- '.*\.js$'
\ No newline at end of file
- '.*\.js$'
Node:
- '.*package\.json$'
- '.*package-lock\.json$'
# software-quality-assurance
# This Software provides services to check and improve the source code quality of software-projects
#
# SPDX-FileCopyrightText: 2020 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences, Germany (https://www.gfz-potsdam.de/)
# SPDX-FileCopyrightText: 2021 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences, Germany (https://www.gfz-potsdam.de/)
#
# SPDX-License-Identifier: GPL-3.0-or-later
#
# Parts of this program <(especially the code for whatever)>
# were developed within the context of the following publicly funded
# projects or measures:
# - Helmholtz Federated IT Services, Helmholtz Association of German Research Centres (https://software.hifis.net/)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, Version 3 or later.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You can find a copy of the license in the LICENSES folder.
# If not, see <http://www.gnu.org/licenses/>.
"""
Provides an abstract class, all current and future language handlers are derived from.
Used as skeleton to improve extendability of the handlers.
"""
from abc import ABC, abstractmethod
from orderedset import OrderedSet
class LanguageHandler(ABC):
"""
Skeleton class for all language handlers
"""
def __init__(self, parent):
from .list_used_licenses import ListUsedLicenses
self.parent: ListUsedLicenses = parent
self.deps: dict = {}
self.errors: dict = {}
self.dev_deps = OrderedSet()
def __repr__(self):
return f"(class={self.__class__.__name__})"
@abstractmethod
def parse_language(self) -> (list, list):
"""
parses all files belonging to a language and all (implemented) dependencies files
to generate a list of dependencies
"""
@abstractmethod
def request_licenses(self) -> dict:
"""
requests licenses of dependencies from an API
"""
def run(self) -> None:
"""
main function of the language handler to its task.
"""
if self.__class__.__name__ in self.parent.files_per_language:
self.parse_language()
licenses = self.request_licenses()
if len(licenses) > 0:
for license_str in licenses:
if license_str not in self.parent.libraries_per_license:
self.parent.libraries_per_license[license_str] = OrderedSet()
self.parent.libraries_per_license[
license_str
] = self.parent.libraries_per_license[license_str].union(
licenses[license_str]
)
self.parent.dependencies_per_language[self.__class__.__name__] = self.deps
self.parent.errors[self.__class__.__name__] = self.errors
def _add_dep(self, dependency_name: str, versions_str: str):
"""
Adds a library with a version to the dependencies.
If the library is registered as error with unspecific version, it will be deleted there.
"""
if dependency_name not in self.deps:
self.deps[dependency_name] = OrderedSet()
self.deps[dependency_name].update(versions_str.split(","))
if (
dependency_name in self.errors
and "version not specified" in self.errors[dependency_name]
):
self.errors[dependency_name].remove("version not specified")
if len(self.errors[dependency_name]) == 0:
del self.errors[dependency_name]
def _add_error(self, dependency_name: str, error_text: str):
if error_text == "version not specified":
if dependency_name in self.deps or dependency_name in self.dev_deps:
return
if dependency_name not in self.errors:
self.errors[dependency_name] = OrderedSet()
self.errors[dependency_name].add(error_text)
def _add_dev_dep(self, dependency_name: str):
"""
Adds a library to the dev dependencies.
For licensing, these dependencies are irrelevant, which is why the version does not matter.
If the library is registered as error with unspecific version, it will be deleted there.
"""
self.dev_deps.add(dependency_name)
if (
dependency_name in self.errors
and "version not specified" in self.errors[dependency_name]
):
self.errors[dependency_name].remove("version not specified")
if len(self.errors[dependency_name]) == 0:
del self.errors[dependency_name]
# software-quality-assurance
# This Software provides services to check and improve the source code quality of software-projects
#
# SPDX-FileCopyrightText: 2020 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences, Germany (https://www.gfz-potsdam.de/)
# SPDX-FileCopyrightText: 2021 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences, Germany (https://www.gfz-potsdam.de/)
#
# SPDX-License-Identifier: GPL-3.0-or-later
#
# Parts of this program <(especially the code for whatever)>
# were developed within the context of the following publicly funded
# projects or measures:
# - Helmholtz Federated IT Services, Helmholtz Association of German Research Centres (https://software.hifis.net/)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, Version 3 or later.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You can find a copy of the license in the LICENSES folder.
# If not, see <http://www.gnu.org/licenses/>.
"""language_parser.py
Contains functions to parse file of certain programming languages.
Every language has their own function.
The functions parse all files that belong to a language:
* source code
* header files
* pkg-manager files
"""
import re
from orderedset import OrderedSet
from stdlib_list import stdlib_list
class DependencyParser:
"""
Parses all files from a list by language to get all imported libraries.
"""
from .list_used_licenses import ListUsedLicenses
parent: ListUsedLicenses
def __init__(self, parent: ListUsedLicenses):
self.parent = parent
def parse(self):
"""
Main function which runs all the parsers for every language (implemented).
"""
if "Python" in self.parent.files_per_language:
deps, errors = self._get_python_libs()
self.parent.dependencies_per_language["Python"] = deps
self.parent.errors["Python"] = errors
def _get_python_libs(self) -> (list, list):
"""
Parses '.py', 'requirements.txt' and Pipfile files to get all imports and their versions.
:return: a list of dictionaries; dictionaries {key, value} represents {import name, import version}
"""
deps = {}
errors = {}
std_lib_list = stdlib_list()
for file_path in self.parent.files_per_language["Python"]:
# parse requirements.txt
if file_path.endswith(".txt"):
with open(file_path) as file:
line = file.readline()
while line:
# skip comment lines, empty lines, links to other files, last line, index line
if (
line[0] != "#"
and line != ""
and line[0:2] != "-r"
and line != "\n"
and line[0:2] != "-i"
):
# cut away comments and python version specifiers
stripped_down_line = (
line.strip()
.split("#")[0]
.split(";")[0]
.replace(" ", "")
)
if ("/" or ":") in stripped_down_line:
errors[stripped_down_line] = {"url/path not supported"}
else:
match = re.compile("[=<>!~]").search(stripped_down_line)
if match is None:
errors[stripped_down_line.lower()] = {
"version not specified"
}
else:
libname = stripped_down_line[: match.start()]
if (
libname in errors
and "version not specified" in errors[libname]
):
errors[libname].remove("version not specified")
if len(errors[libname]) == 0:
del errors[libname]
if libname not in deps:
deps[libname.lower()] = OrderedSet()
deps[libname.lower()].update(
stripped_down_line[match.start() :].split(",")
)
line = file.readline()
# parse py files
if file_path.endswith(".py"):
lines_with_libs_list = []
with open(file_path) as file:
# get all lines with import statements
line = file.readline()