From 214f8c6844a2aad4de08ab1b7b37461fddeb64a6 Mon Sep 17 00:00:00 2001 From: Emilio Pozuelo Monfort Date: Tue, 11 Jul 2023 11:11:31 +0200 Subject: check-new-issues: load CVE 5 JSON files dynamically Pre-caching all of them takes quite some time, do it dynamically instead so that one can start processing issues quickly, since loading the next issue is not a problem, but loading 250k items is. --- bin/check-new-issues | 54 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 18 deletions(-) (limited to 'bin') diff --git a/bin/check-new-issues b/bin/check-new-issues index b1d8010af8..5b0fe48bed 100755 --- a/bin/check-new-issues +++ b/bin/check-new-issues @@ -35,6 +35,7 @@ import setup_paths # noqa from sectracker import parsers from bugs import temp_bug_name +logging.basicConfig(format='%(asctime)s %(levelname)-8s %(message)s') #logging.getLogger().setLevel("DEBUG") def debug(s): @@ -81,7 +82,7 @@ def read_wnpp_file(wnpp_file): return wnpp def print_urls(cve_id): - cve = cve5s[cve_id] if cve_id in cve5s else None + cve = get_cve5(cve_id) if cve: cna = cve['containers']['cna'] @@ -91,7 +92,7 @@ def print_urls(cve_id): print("") def get_cve5_description(cve_id): - cve = cve5s[cve_id] if cve_id in cve5s else None + cve = get_cve5(cve_id) desc = None if cve: @@ -119,17 +120,28 @@ def print_cve(cve): cvelist = [cve] parsers.writecvelist(cvelist, sys.stdout) +def get_cve5(cve_id): + global cve5_zip + + if cve_id not in cve5s: + return None + + fname = cve5s[cve_id] + + logging.info('loading file') + f = cve5_zip.open(fname) + logging.info('loading json') + return json.load(f) + def read_cve5_file(f): cve5s = {} z = zipfile.ZipFile(cve5_file) for fname in z.namelist(): if os.path.basename(fname).startswith('CVE-'): - f = z.open(fname) - debug("processing record " + fname) - record = json.load(f) - cve_id = record['cveMetadata']['cveId'] - cve5s[cve_id] = record + debug("found record " + fname) + cve_id = os.path.basename(fname)[:-5] + cve5s[cve_id] = fname return cve5s @@ -168,8 +180,12 @@ def parse_cves(): return cves def auto_nfu(name): + debug(f'checking nfu for {name}') desc = get_cve5_description(name) + if not desc: + return None + wordpress_re = re.compile(r".*in\s+the\s+(.+)\s+(plugin|theme)\s+(?:[\w\d.]+\s+)?(?:(?:(?:before|through)\s+)?[\w\d.]+\s+)?for\s+[Ww]ord[Pp]ress.*") m = wordpress_re.match(desc) if m: @@ -275,7 +291,7 @@ def wnpp_to_candidates(): def print_stats(): temp_cves = [e for e in cves.keys() if 'TEMP' in e] - print(f"{len(cve5s)} CVEs", end="") + print(f"{len(cves)} CVEs", end="") print(f", {len(temp_cves)} temp issues", end="") if num_todo > 0: print(f", {num_todo} todos", end="") @@ -335,6 +351,10 @@ ignore_bug_file = "data/packages/ignored-debian-bug-packages" wnppurl = "https://qa.debian.org/data/bts/wnpp_rm" wnppfile = "../wnpp_rm" +# used by read_cve5, used as a global so that we don't have to open the +# file repeatedly, since we only read cve5s one by one on demand +cve5_zip = zipfile.ZipFile(cve5_file) + issue_re = re.compile(r'CVE-20(?:0[3-9]|[1-9][0-9])|TEMP') auto_display_limit = 10 #$auto_display_limit = $opts{a} if defined $opts{a} @@ -439,9 +459,8 @@ if args.list: if args.auto: # auto process for todo in todos: - if todo in cve5s: - if nfu_entry := auto_nfu(todo): - set_cve_fnu(todo, nfu_entry) + if nfu_entry := auto_nfu(todo): + set_cve_nfu(todo, nfu_entry) save_datafile(cves.values(), datafile) sys.exit(0) @@ -575,13 +594,12 @@ def present_issue(name): print_full_entry(name) - if name in cve5s: - if nfu_entry := auto_nfu(name): - set_cve_nfu(name, nfu_entry) - print("New entry automatically set to NFU:") - entry = cves[name] - print_cve(entry) - return True + if nfu_entry := auto_nfu(name): + set_cve_nfu(name, nfu_entry) + print("New entry automatically set to NFU:") + entry = cves[name] + print_cve(entry) + return True auto_search(name) -- cgit v1.2.3