report-vuln: get CVE descriptions from CVE JSON API

The old pages will eventually go away, so switch to the JSON API now that there's one, as that should be cleaner than parsing HTML. Fixes #23.
author: Emilio Pozuelo Monfort <pochu@debian.org> 2023-06-07 13:39:03 +0200
committer: Emilio Pozuelo Monfort <pochu@debian.org> 2023-06-07 16:08:57 +0200
commit: be2cc0425d51cc6cce1e7f4f0d4d97d90c014093 (patch)
tree: d448dfcac19a55c3636b4253af95e489e41bc46d /bin
parent: fe1b60b5d152e0d35d0898b6d2f57216093cedb3 (diff)
1 files changed, 37 insertions, 52 deletions
diff --git a/bin/report-vuln b/bin/report-vuln
index d44be5609b..eb7d441fe7 100755
--- a/bin/report-vuln
+++ b/bin/report-vuln
@@ -10,13 +10,14 @@
 # export http_proxy if you need to use an http proxy to report bugs
 
 import argparse
-from tempfile import NamedTemporaryFile
+import json
 import os
 import re
 import sys
+from tempfile import NamedTemporaryFile
+from textwrap import wrap
 from urllib.parse import urlencode
 from urllib.request import urlopen
-from textwrap import wrap
 
 temp_id = re.compile('(?:CVE|cve)\-[0-9]{4}-XXXX')
 
@@ -53,58 +54,42 @@ def gen_index(ids):
 
     return ret
 
-def http_get(id):
-    param = urlencode({'name' : id})
-    resp = ''
-    try:
-        f = urlopen('https://cve.mitre.org/cgi-bin/cvename.cgi?%s' % param)
-        resp = f.read()
-    except Exception as e:
-        error('on doing HTTP request' + str(e))
-
-    f.close()
-
-    return resp
-
-# this is a hack that parses the cve id description from mitre
-def get_cve(id):
-    desc = False
-    r = re.compile('.*<th\ colspan=.*>Description<.*')
-    tag = re.compile('.*</?tr>.*')
-    reserved = re.compile(r'\*+\s+(<A HREF=.*>)?RESERVED(</A>)?\s+\*+')
-    ret = ''
-    resp = http_get(id)
-
-    for line in resp.decode('utf-8').rsplit('\n'):
-        if r.match(line):
-            desc = True
-            continue
-
-        if desc and reserved.search(line):
-            break
-
-        if tag.match(line) and desc:
-            continue
-
-        if desc and '<td colspan="2">' in line:
-            line = re.sub('.*<td colspan="2">', '', line)
-            for line in wrap(line):
-                ret += '| ' + line + '\n'
-            continue
-
-        if desc and '</td>' in line:
-            break
-
-        if desc and line != '':
-            ret = ret + '\n| ' + line
+# read CVE description from MITRE CVE JSON API
+def get_cve_description(id):
+    desc = None
 
-    if ret == '':
-        ret = description_from_list(id)
+    try:
+        with urlopen('https://cveawg.mitre.org/api/cve/' + id) as f:
+            cve = json.loads(f.read())
+    except:
+        # we can get a 404 if the CVE is RESERVED
+        cve = None
+
+    if cve:
+        desc = [desc['value']
+                for desc in cve['containers']['cna']['descriptions']
+                if desc['lang'].startswith('en')]
+    if desc:
+        desc = desc[0]
+        # TODO: sanitize description like in bin/process-cve-records,
+        # move this to a common function
+
+        # for some reason descriptions may contain new lines
+        desc = desc.replace('\n', ' ')
+
+        # and some contain leading spaces
+        desc = desc.strip()
+
+        # wrap the description with a prefix
+        desc = "\n".join(wrap(desc, initial_indent="| ", subsequent_indent="| "))
+    else:
+        desc = description_from_list(id)
 
-    if not ret:
-        ret = 'No description was found (try on a search engine)'
+    if not desc:
+        desc = 'No description was found (try on a search engine)'
 
-    return ret + '\n'
+    # double newline between CVEs
+    return desc + '\n\n'
 
 def gen_text(pkg, cveid, blanks=False, severity=None, affected=None, cc=False, cclist=None, src=False, mh=False):
     vuln_suff = 'y'
@@ -148,7 +133,7 @@ The following vulnerabilit%s %s published for %s.\n
     for cnt, cve in enumerate(cveid):
         if not temp_id.match(cve):
             ret += cve + '[' + str(cnt) + ']:\n'
-            ret += get_cve(cve) + '\n'
+            ret += get_cve_description(cve) + '\n'
         else:
             ret += 'Issue without CVE id #%d [%d]:\n' % (temp_id_cnt, cnt)
             desc = description_from_list(cve, pkg, temp_id_cnt)
author	Emilio Pozuelo Monfort <pochu@debian.org>	2023-06-07 13:39:03 +0200
committer	Emilio Pozuelo Monfort <pochu@debian.org>	2023-06-07 16:08:57 +0200
commit	be2cc0425d51cc6cce1e7f4f0d4d97d90c014093 (patch)
tree	d448dfcac19a55c3636b4253af95e489e41bc46d /bin
parent	fe1b60b5d152e0d35d0898b6d2f57216093cedb3 (diff)