diff options
author | Carsten Schoenert <c.schoenert@t-online.de> | 2023-11-05 10:38:33 +0100 |
---|---|---|
committer | Thomas Lange <lange@debian.org> | 2023-12-02 16:47:03 +0100 |
commit | a215df58b97423573c8e055d5017f9010a154aa7 (patch) | |
tree | c888499fdda4d2ab8754bbd022143a185992e29b /english/security | |
parent | 0b22b8f0d4e00a2cfc2b946d7e99015fbf2bea61 (diff) |
tracker.py: Adding new helper to read DSA data
Adding a new function parse_tracker_data() which reads the DSA data from
the file taken from this URL:
https://salsa.debian.org/security-tracker-team/security-tracker/-/raw/master/data/DSA/list
Diffstat (limited to 'english/security')
-rw-r--r-- | english/security/oval/oval/parser/tracker.py | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/english/security/oval/oval/parser/tracker.py b/english/security/oval/oval/parser/tracker.py new file mode 100644 index 00000000000..58c65a2aedb --- /dev/null +++ b/english/security/oval/oval/parser/tracker.py @@ -0,0 +1,148 @@ +""" +oval.parser.tracker + +function around parsing data from the security-tracker data file + +Copyright (c) 2023 Carsten Schoenert <c.schoenert@t-online.de> + 2023 Thomas Lange <lange@cs.uni-koeln.de> + +SPDX-License-Identifier: GPL-2.0-or-later +""" + +import re +from typing import Any + +# Dictionary containing all the regular expressions for scanning the +# tracker data. +# Basic idea is taken from https://www.vipinajayakumar.com/parsing-text-with-python +regex_dicts = { + "DSA-DATA": re.compile( + r"^\[?(.+)\]\s(D[SL]A-\d+(?:-\d+)?)\s(\S+)\s(?:- )?(.+)$" + ), + # example string: [15 Sep 2023] DSA-5498-1 thunderbird - security update + # match groups ( group 1 )( group 2 ) ( group 4 ) + # ( group 3 ) + # RegEx Visualization + # https://regexper.com/#%5E%5C%5B%3F%28.%2B%29%5C%5D%5Cs%28D%5BSL%5DA-%5Cd%2B%28%3F%3A-%5Cd%2B%29%3F%29%5Cs%28%5CS%2B%29%5Cs%28%3F%3A-%20%29%3F%28.%2B%29%24 + # https://regex101.com/r/Vz8tx7/1 + + "CVE-DATA": re.compile(r"\{([CVE0-9 -]+)}"), + # example string: {CVE-2004-0835 CVE-2004-0836 CVE-2004-0837} + # match group ( group 1 ) + # + # RegEx Visualization + # https://regexper.com/#%5C%7B%28%5BCVE0-9%20-%5D%2B%29%7D + # https://regex101.com/r/6MEGte/1 + + "RELEASE-DATA": re.compile(r"^\s+\[(\S+)]\s-\s(.+)\s(\S+)$"), + # example strings: [bullseye] - chromium 116.0.5845.140-1~deb11u1 + # [bookworm] - chromium 116.0.5845.140-1~deb12u1 + # match groups (group 1 ) (group 2 ) + # ( group 3 ) + # RegEx Visualization + # https://regexper.com/#%5E%5Cs%2B%5C%5B%28%5CS%2B%29%5D%5Cs-%5Cs%28.%2B%29%5Cs%28%5CS%2B%29%24 + # https://regex101.com/r/S2yy8S/1 +} + + +def parse_tracker_data( + file: str, + debian_version: dict[str, str] + ) -> dict[str, list[Any]]: + """Parse data from the list the Security Team is collecting + + The list is provided on + https://salsa.debian.org/security-tracker-team/security-tracker/-/raw/master/data/DSA/list + and is maintained by the Security Team. + + Parameters: + file (str): The file as source for the parsing. + debian_version (dict): All Debian versions as a dict. + + Returns: + dict: All collected data as a dictionary. + """ + with open(file, "r", encoding="UTF-8") as data_source: + raw_data = data_source.read() + + # Split off the data on occurrences of '\n[' into a list. + # By this we get a list which has all D[L,S]A as entries. + all_data = re.split( + r"\n\[", + raw_data, + ) + dsa_data_dict: list[Any] = [] + final_dict: dict[str, list[Any]] = {} + for entry in all_data: + cve_list = "" + dsa_date = dsa_number = dsa_pkg = dsa_desc = "" + release: list[Any] = [] + wml_data_dict1: dict[str, str] = {} + wml_data_dict2: dict[str, dict[str, Any]] = {} + + # Split off the long list into separate lists which including all + # data from a single D[L,S]A. + all_lines = re.split(r"\n", entry) + + for line in all_lines: + # Parse every line by the dict using the Regex's. + for key, regex in regex_dicts.items(): + match = regex.search(line) + if match: + if key == "DSA-DATA": + dsa_date = match.group(1) + dsa_number = match.group(2) + dsa_pkg = match.group(3) + dsa_desc = match.group(4) + wml_data_dict1["description"] = f"{dsa_desc}" + wml_data_dict1["moreinfo"] = "no info" + + if key == "CVE-DATA": + cve_list = match.group(1).split(" ") + if key == "RELEASE-DATA": + release.append( + [match.group(1), + match.group(2), + match.group(3)] + ) + wml_data_dict2.update( + {debian_version[match.group(1)]: + {"all": {match.group(2): match.group(3)}}} + ) + if not cve_list: + # There might be no CVE assigend yet, catching these cases. + cve_list = ["not yet available"] + + dsa_row = ( + f"{dsa_number} {dsa_pkg}", { + "title": f"{dsa_number} {dsa_pkg}", + "date": dsa_date, + "packages": dsa_pkg, + "secrefs": cve_list, + "vulnerable": "yes", + "fixed": "yes"} + ) + + final_dict[dsa_number.removesuffix("-1")] = [ + dsa_row, + wml_data_dict1, + wml_data_dict2, + ] + + dsa_data_dict.append(dsa_row) + return final_dict + +def test(): + # data = parse_tracker_data(DSA_DATA_FILE) + for key, value in data.items(): + # dsaResult + dsaResult = value[0] + print(f"dsaResult\n {dsaResult}") + + wmlResult = value[1] + print(f"wmlResult1\n {wmlResult}") + print(f"wmlResult2\n {value[2]}") + dsaRef = key + print(f"dsaRef\n {dsaRef}") + +#test() |