From be2cc0425d51cc6cce1e7f4f0d4d97d90c014093 Mon Sep 17 00:00:00 2001
From: Emilio Pozuelo Monfort <pochu@debian.org>
Date: Wed, 7 Jun 2023 13:39:03 +0200
Subject: report-vuln: get CVE descriptions from CVE JSON API

The old pages will eventually go away, so switch to the JSON
API now that there's one, as that should be cleaner than parsing
HTML.

Fixes #23.
---
 bin/report-vuln | 89 ++++++++++++++++++++++++---------------------------------
 1 file changed, 37 insertions(+), 52 deletions(-)

(limited to 'bin')
diff --git a/bin/report-vuln b/bin/report-vuln
index d44be5609b..eb7d441fe7 100755
--- a/bin/report-vuln
+++ b/bin/report-vuln
@@ -10,13 +10,14 @@
 # export http_proxy if you need to use an http proxy to report bugs
 
 import argparse
-from tempfile import NamedTemporaryFile
+import json
 import os
 import re
 import sys
+from tempfile import NamedTemporaryFile
+from textwrap import wrap
 from urllib.parse import urlencode
 from urllib.request import urlopen
-from textwrap import wrap
 
 temp_id = re.compile('(?:CVE|cve)\-[0-9]{4}-XXXX')
 
@@ -53,58 +54,42 @@ def gen_index(ids):
 
     return ret
 
-def http_get(id):
-    param = urlencode({'name' : id})
-    resp = ''
-    try:
-        f = urlopen('https://cve.mitre.org/cgi-bin/cvename.cgi?%s' % param)
-        resp = f.read()
-    except Exception as e:
-        error('on doing HTTP request' + str(e))
-
-    f.close()
-
-    return resp
-
-# this is a hack that parses the cve id description from mitre
-def get_cve(id):
-    desc = False
-    r = re.compile('.*<th\ colspan=.*>Description<.*')
-    tag = re.compile('.*</?tr>.*')
-    reserved = re.compile(r'\*+\s+(<A HREF=.*>)?RESERVED(</A>)?\s+\*+')
-    ret = ''
-    resp = http_get(id)
-
-    for line in resp.decode('utf-8').rsplit('\n'):
-        if r.match(line):
-            desc = True
-            continue
-
-        if desc and reserved.search(line):
-            break
-
-        if tag.match(line) and desc:
-            continue
-
-        if desc and '<td colspan="2">' in line:
-            line = re.sub('.*<td colspan="2">', '', line)
-            for line in wrap(line):
-                ret += '| ' + line + '\n'
-            continue
-
-        if desc and '</td>' in line:
-            break
-
-        if desc and line != '':
-            ret = ret + '\n| ' + line
+# read CVE description from MITRE CVE JSON API
+def get_cve_description(id):
+    desc = None
 
-    if ret == '':
-        ret = description_from_list(id)
+    try:
+        with urlopen('https://cveawg.mitre.org/api/cve/' + id) as f:
+            cve = json.loads(f.read())
+    except:
+        # we can get a 404 if the CVE is RESERVED
+        cve = None
+
+    if cve:
+        desc = [desc['value']
+                for desc in cve['containers']['cna']['descriptions']
+                if desc['lang'].startswith('en')]
+    if desc:
+        desc = desc[0]
+        # TODO: sanitize description like in bin/process-cve-records,
+        # move this to a common function
+
+        # for some reason descriptions may contain new lines
+        desc = desc.replace('\n', ' ')
+
+        # and some contain leading spaces
+        desc = desc.strip()
+
+        # wrap the description with a prefix
+        desc = "\n".join(wrap(desc, initial_indent="| ", subsequent_indent="| "))
+    else:
+        desc = description_from_list(id)
 
-    if not ret:
-        ret = 'No description was found (try on a search engine)'
+    if not desc:
+        desc = 'No description was found (try on a search engine)'
 
-    return ret + '\n'
+    # double newline between CVEs
+    return desc + '\n\n'
 
 def gen_text(pkg, cveid, blanks=False, severity=None, affected=None, cc=False, cclist=None, src=False, mh=False):
     vuln_suff = 'y'
@@ -148,7 +133,7 @@ The following vulnerabilit%s %s published for %s.\n
     for cnt, cve in enumerate(cveid):
         if not temp_id.match(cve):
             ret += cve + '[' + str(cnt) + ']:\n'
-            ret += get_cve(cve) + '\n'
+            ret += get_cve_description(cve) + '\n'
         else:
             ret += 'Issue without CVE id #%d [%d]:\n' % (temp_id_cnt, cnt)
             desc = description_from_list(cve, pkg, temp_id_cnt)
-- 
cgit v1.2.3