Commit c10def0e authored by Maximilian Dolling's avatar Maximilian Dolling
Browse files

Merge branch 'dev' into 'master'

added more license handling

See merge request !65
parents 562906f9 15ada1dc
......@@ -160,7 +160,7 @@ class SQA:
print("##############################")
print(
"Note: This report is automatically generated by [Software Quality Assurance]"
"(https://gitext.gfz-potsdam.de/software/services/fair/software-quality-assurance)"
"(https://git.gfz-potsdam.de/id2/software/services/fair/software-quality-assurance)"
)
print("##############################")
print("Meta Data")
......@@ -180,7 +180,7 @@ class SQA:
md_report.write("# Software Quality Report\n")
md_report.write(
"> **Note:** This report is automatically generated by [Software Quality Assurance]("
"https://gitext.gfz-potsdam.de/software/services/fair/software-quality-assurance)\n\n"
"https://git.gfz-potsdam.de/id2/software/services/fair/software-quality-assurance)\n\n"
)
# write table of content
......
......@@ -66,18 +66,24 @@ class CheckCredentials(Service):
if self.service_config is not None:
gitleaks_command = (
f'gitleaks --repo-path="{self.sqa.config.repository_dir}" '
f'--config="{self.service_config}" '
f'gitleaks --path="{self.sqa.config.repository_dir}" '
f'--config-path="{self.service_config}" '
f'--report="{self.sqa.config.sub_report_dir}/check_credentials.json" '
f"--leaks-exit-code=2 "
f'> "{self.sqa.config.sub_report_dir}/check_credentials.log"'
)
else:
gitleaks_command = (
f'gitleaks --repo-path="{self.sqa.config.repository_dir}" '
f'gitleaks --path="{self.sqa.config.repository_dir}" '
f'--report="{self.sqa.config.sub_report_dir}/check_credentials.json" '
f"--leaks-exit-code=2 "
f'> "{self.sqa.config.sub_report_dir}/check_credentials.log"'
)
# the return of os.system returns a 16 bit integer, which is why the exit codes have the following meaning:
# 0 = OK, no leak
# 256 = ERROR
# 512 = OK, leak
self.result["exit status"] = os.system(gitleaks_command)
with open(
......@@ -86,46 +92,41 @@ class CheckCredentials(Service):
self.result["log"] = log.read().replace("\n", "")
if self.result["exit status"] == 0:
if os.path.isfile(
self.result["summary"] = "No leaks detected."
elif self.result["exit status"] == 256:
self.result["summary"] = "gitleaks had an error."
elif self.result["exit status"] == 512:
with open(
f"{self.sqa.config.sub_report_dir}/check_credentials.json"
):
with open(
f"{self.sqa.config.sub_report_dir}/check_credentials.json"
) as cc_json_file:
check_credentials_json = json.load(cc_json_file)
cc_json_file.close()
self.result[
"summary"
] = f"{len(check_credentials_json)} possible leaks detected."
for item in check_credentials_json:
if item["rule"] not in self.result["leaks"]:
self.result["leaks"][item["rule"]] = {}
if item["file"] not in self.result["leaks"][item["rule"]]:
self.result["leaks"][item["rule"]][item["file"]] = {}
index = 0
else:
index = (
list(
self.result["leaks"][item["rule"]][item["file"]].keys()
)[-1]
+ 1
)
) as cc_json_file:
check_credentials_json = json.load(cc_json_file)
self.result["leaks"][item["rule"]][item["file"]][index] = {
"offender": item["offender"],
"commit": item["commit"],
}
else:
self.result["summary"] = "No leaks detected."
else:
self.result[
"summary"
] = f"gitleaks had an error and exited with status code {self.result['exit status']}."
if self.sqa.config.test_mode:
self.result["summary"] = "999 vulnerabilities found"
] = f"{len(check_credentials_json)} possible leaks detected."
for item in check_credentials_json:
if item["rule"] not in self.result["leaks"]:
self.result["leaks"][item["rule"]] = {}
if item["file"] not in self.result["leaks"][item["rule"]]:
self.result["leaks"][item["rule"]][item["file"]] = {}
index = 0
else:
index = (
list(self.result["leaks"][item["rule"]][item["file"]].keys())[
-1
]
+ 1
)
self.result["leaks"][item["rule"]][item["file"]][index] = {
"offender": item["offender"],
"commit": item["commit"],
}
# if self.sqa.config.test_mode:
# self.result["summary"] = "999 vulnerabilities found"
self.result = order_dict(self.result)
......@@ -134,7 +135,7 @@ class CheckCredentials(Service):
md_result += f'**{self.result["summary"]}**\n\n'
if self.result["exit status"] != 0:
if self.result["exit status"] == 256:
md_result += "**gitleaks log**\n\n"
md_result += f'{self.result["log"]}\n\n'
......@@ -158,7 +159,7 @@ class CheckCredentials(Service):
cli_result += f'{self.result["summary"]}\n'
if self.result["exit status"] != 0:
if self.result["exit status"] == 256:
cli_result += "gitleaks log:\n\n"
cli_result += f'{self.result["log"]}\n\n'
......
......@@ -6,6 +6,7 @@
# META
# valid names for programming languages
# ['Python',
# 'Node',
# 'submodules']
......
......@@ -4,5 +4,10 @@
#
# SPDX-License-Identifier: CC0-1.0
# plain text
# python files
- '.*(requirements){1,}[0-9a-z_-]*.txt$'
- '.*pyproject.toml$'
# npm files
- '.*package.json$'
- '.*package-lock.json$'
......@@ -10,6 +10,8 @@ Python:
- '.*requirements.*\.txt$'
- '.*Pipfile$'
- '.*Pipfile.lock$'
- '.*poetry.lock$'
- '.*pyproject.toml$'
Ruby:
- '.*\.rb$'
- '.*Gemfile.*$'
......@@ -31,4 +33,7 @@ PHP:
Batchfile:
- '.*\.bat$'
JavaScript:
- '.*\.js$'
\ No newline at end of file
- '.*\.js$'
Node:
- '.*package\.json$'
- '.*package-lock\.json$'
# software-quality-assurance
# This Software provides services to check and improve the source code quality of software-projects
#
# SPDX-FileCopyrightText: 2020 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences, Germany (https://www.gfz-potsdam.de/)
# SPDX-FileCopyrightText: 2021 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences, Germany (https://www.gfz-potsdam.de/)
#
# SPDX-License-Identifier: GPL-3.0-or-later
#
# Parts of this program <(especially the code for whatever)>
# were developed within the context of the following publicly funded
# projects or measures:
# - Helmholtz Federated IT Services, Helmholtz Association of German Research Centres (https://software.hifis.net/)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, Version 3 or later.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You can find a copy of the license in the LICENSES folder.
# If not, see <http://www.gnu.org/licenses/>.
"""
Provides an abstract class, all current and future language handlers are derived from.
Used as skeleton to improve extendability of the handlers.
"""
from abc import ABC, abstractmethod
from orderedset import OrderedSet
class LanguageHandler(ABC):
"""
Skeleton class for all language handlers
"""
def __init__(self, parent):
from .list_used_licenses import ListUsedLicenses
self.parent: ListUsedLicenses = parent
self.deps: dict = {}
self.errors: dict = {}
self.dev_deps = OrderedSet()
def __repr__(self):
return f"(class={self.__class__.__name__})"
@abstractmethod
def parse_language(self) -> (list, list):
"""
parses all files belonging to a language and all (implemented) dependencies files
to generate a list of dependencies
"""
@abstractmethod
def request_licenses(self) -> dict:
"""
requests licenses of dependencies from an API
"""
def run(self) -> None:
"""
main function of the language handler to its task.
"""
if self.__class__.__name__ in self.parent.files_per_language:
self.parse_language()
licenses = self.request_licenses()
if len(licenses) > 0:
for license_str in licenses:
if license_str not in self.parent.libraries_per_license:
self.parent.libraries_per_license[license_str] = OrderedSet()
self.parent.libraries_per_license[
license_str
] = self.parent.libraries_per_license[license_str].union(
licenses[license_str]
)
self.parent.dependencies_per_language[self.__class__.__name__] = self.deps
self.parent.errors[self.__class__.__name__] = self.errors
def _add_dep(self, dependency_name: str, versions_str: str):
"""
Adds a library with a version to the dependencies.
If the library is registered as error with unspecific version, it will be deleted there.
"""
if dependency_name not in self.deps:
self.deps[dependency_name] = OrderedSet()
self.deps[dependency_name].update(versions_str.split(","))
if (
dependency_name in self.errors
and "version not specified" in self.errors[dependency_name]
):
self.errors[dependency_name].remove("version not specified")
if len(self.errors[dependency_name]) == 0:
del self.errors[dependency_name]
def _add_error(self, dependency_name: str, error_text: str):
if error_text == "version not specified":
if dependency_name in self.deps or dependency_name in self.dev_deps:
return
if dependency_name not in self.errors:
self.errors[dependency_name] = OrderedSet()
self.errors[dependency_name].add(error_text)
def _add_dev_dep(self, dependency_name: str):
"""
Adds a library to the dev dependencies.
For licensing, these dependencies are irrelevant, which is why the version does not matter.
If the library is registered as error with unspecific version, it will be deleted there.
"""
self.dev_deps.add(dependency_name)
if (
dependency_name in self.errors
and "version not specified" in self.errors[dependency_name]
):
self.errors[dependency_name].remove("version not specified")
if len(self.errors[dependency_name]) == 0:
del self.errors[dependency_name]
# software-quality-assurance
# This Software provides services to check and improve the source code quality of software-projects
#
# SPDX-FileCopyrightText: 2020 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences, Germany (https://www.gfz-potsdam.de/)
# SPDX-FileCopyrightText: 2021 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences, Germany (https://www.gfz-potsdam.de/)
#
# SPDX-License-Identifier: GPL-3.0-or-later
#
# Parts of this program <(especially the code for whatever)>
# were developed within the context of the following publicly funded
# projects or measures:
# - Helmholtz Federated IT Services, Helmholtz Association of German Research Centres (https://software.hifis.net/)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, Version 3 or later.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You can find a copy of the license in the LICENSES folder.
# If not, see <http://www.gnu.org/licenses/>.
"""language_parser.py
Contains functions to parse file of certain programming languages.
Every language has their own function.
The functions parse all files that belong to a language:
* source code
* header files
* pkg-manager files
"""
import re
from orderedset import OrderedSet
from stdlib_list import stdlib_list
class DependencyParser:
"""
Parses all files from a list by language to get all imported libraries.
"""
from .list_used_licenses import ListUsedLicenses
parent: ListUsedLicenses
def __init__(self, parent: ListUsedLicenses):
self.parent = parent
def parse(self):
"""
Main function which runs all the parsers for every language (implemented).
"""
if "Python" in self.parent.files_per_language:
deps, errors = self._get_python_libs()
self.parent.dependencies_per_language["Python"] = deps
self.parent.errors["Python"] = errors
def _get_python_libs(self) -> (list, list):
"""
Parses '.py', 'requirements.txt' and Pipfile files to get all imports and their versions.
:return: a list of dictionaries; dictionaries {key, value} represents {import name, import version}
"""
deps = {}
errors = {}
std_lib_list = stdlib_list()
for file_path in self.parent.files_per_language["Python"]:
# parse requirements.txt
if file_path.endswith(".txt"):
with open(file_path) as file:
line = file.readline()
while line:
# skip comment lines, empty lines, links to other files, last line, index line
if (
line[0] != "#"
and line != ""
and line[0:2] != "-r"
and line != "\n"
and line[0:2] != "-i"
):
# cut away comments and python version specifiers
stripped_down_line = (
line.strip()
.split("#")[0]
.split(";")[0]
.replace(" ", "")
)
if ("/" or ":") in stripped_down_line:
errors[stripped_down_line] = {"url/path not supported"}
else:
match = re.compile("[=<>!~]").search(stripped_down_line)
if match is None:
errors[stripped_down_line.lower()] = {
"version not specified"
}
else:
libname = stripped_down_line[: match.start()]
if (
libname in errors
and "version not specified" in errors[libname]
):
errors[libname].remove("version not specified")
if len(errors[libname]) == 0:
del errors[libname]
if libname not in deps:
deps[libname.lower()] = OrderedSet()
deps[libname.lower()].update(
stripped_down_line[match.start() :].split(",")
)
line = file.readline()
# parse py files
if file_path.endswith(".py"):
lines_with_libs_list = []
with open(file_path) as file:
# get all lines with import statements
line = file.readline()
while line:
pattern = ".*import *"
p = re.compile(pattern, re.IGNORECASE)
if p.match(line) and pattern not in line:
lines_with_libs_list.append(line[:-1])
line = file.readline()
# parse all lines with import statements
for line in lines_with_libs_list:
# strip leading whitespaces
line = line.lstrip()
lib_list = []
if line[0:7] == "import ":
# strip import aliases
line = line.split(" as ")[0]
lib_list = line[7:].replace(" ", "").split(",")
if line[0:5] == "from ":
lib_list = [line.split(" ")[1]]
tmp_lib_list = []
for lib in lib_list:
if lib is not None:
# skip local imports from other pkgs / modules
if lib[0] != ".":
tmp_lib = lib
tmp_lib_list.append(tmp_lib.split(".")[0])
for entry in tmp_lib_list:
if entry not in deps:
# put base libs to deps instead of error
if entry in std_lib_list:
deps[entry.lower()] = {"base lib"}
else:
errors[entry.lower()] = {"version not specified"}
# renaming / ignoring from config
if (
"manual dependency config" in self.parent.service_config
and "Python" in self.parent.service_config["manual dependency config"]
):
# renaming
tmp_errors = {}
for error in errors:
for manual_config in self.parent.service_config[
"manual dependency config"
]["Python"]:
if manual_config["import name"].lower() == error:
if manual_config["pkg name"].lower() in deps:
errors[error].remove("version not specified")
break
if len(errors[error]) > 0:
tmp_errors[error] = errors[error]
errors = tmp_errors
# ignoring
for manual_config in self.parent.service_config["manual dependency config"][
"Python"
]:
if "ignore" in manual_config and manual_config["ignore"]:
if manual_config["import name"] in errors:
del errors[manual_config["import name"]]
if manual_config["pkg name"] in deps:
del deps[manual_config["pkg name"]]
# sort out unspecific version statements
deps_tmp = {}
for dep in deps:
tmp_version = OrderedSet()
for version in deps[dep]:
if "==" not in version and version != "base lib":
if dep not in errors:
errors[dep] = OrderedSet()
errors[dep].add(f'version "{version}" is not specific')
else:
tmp_version.add(version)
if len(tmp_version) > 0:
deps_tmp[dep] = tmp_version
deps = deps_tmp
return deps, errors
# software-quality-assurance
# This Software provides services to check and improve the source code quality of software-projects
#
# SPDX-FileCopyrightText: 2021 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences, Germany (https://www.gfz-potsdam.de/)
#
# SPDX-License-Identifier: GPL-3.0-or-later
#
# Parts of this program <(especially the code for whatever)>
# were developed within the context of the following publicly funded
# projects or measures:
# - Helmholtz Federated IT Services, Helmholtz Association of German Research Centres (https://software.hifis.net/)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, Version 3 or later.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You can find a copy of the license in the LICENSES folder.
# If not, see <http://www.gnu.org/licenses/>.
"""
This is the interface to the different language handlers within the list used licenses service.
"""
def get_language_handlers(parent) -> list:
"""
getter for all implemented language handlers
"""
from .node import Node
from .python import Python
python = Python(parent=parent)
node = Node(parent=parent)
return [node, python]
# software-quality-assurance
# This Software provides services to check and improve the source code quality of software-projects
#
# SPDX-FileCopyrightText: 2021 Helmholtz Centre Potsdam - GFZ German Research Centre for Geosciences, Germany (https://www.gfz-potsdam.de/)
#
# SPDX-License-Identifier: GPL-3.0-or-later
#
# Parts of this program <(especially the code for whatever)>
# were developed within the context of the following publicly funded
# projects or measures:
# - Helmholtz Federated IT Services, Helmholtz Association of German Research Centres (https://software.hifis.net/)
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, Version 3 or later.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You can find a copy of the license in the LICENSES folder.
# If not, see <http://www.gnu.org/licenses/>.
"""
provides a class for handling node.js dependencies
"""
import json
import os
import re
from typing import Optional
import requests
from orderedset import OrderedSet