#!/usr/bin/env python3
"""Downloads CVE database and store it as sqlite3 database.

This tool does not take parameter. It will create files in the current
directory:
 - data.db: The sqlite3 database itself.
 - nvdcve-2.0-*.xml.gz: The cached raw XML databases from the CVE database.

Do not remove nvdcve-2.0-*.xml.gz files unless you remove
data.db. data.db contains etags, and files would not be downloaded
again if files are just removed.

Files are not downloaded if not modified. But we still verify with the
remote database we have the latest version of the files.
"""

import gzip
import sys
import sqlite3
import datetime
import itertools
import urllib.request
import urllib.parse
from contextlib import contextmanager

from lxml import etree as ET

namespaces = {
    "feed": "http://scap.nist.gov/schema/feed/vulnerability/2.0",
    "vuln": "http://scap.nist.gov/schema/vulnerability/0.4",
    "cvss": "http://scap.nist.gov/schema/cvss-v2/0.2",
}

def extract_vulns(tree):
    for entry in tree.iterfind("feed:entry", namespaces=namespaces):
        cve_id = entry.find("vuln:cve-id", namespaces=namespaces).text
        summary = entry.find("vuln:summary", namespaces=namespaces).text
        score = entry.find("vuln:cvss/cvss:base_metrics/cvss:score", namespaces=namespaces)
        yield cve_id, summary, score.text if score is not None else None

def extract_product_vulns(tree):
    for entry in tree.iterfind("feed:entry", namespaces=namespaces):
        cve_id = entry.find("vuln:cve-id", namespaces=namespaces).text
        for vuln_software in entry.iterfind("vuln:vulnerable-software-list", namespaces=namespaces):
            for product in vuln_software.iterfind("vuln:product", namespaces=namespaces):
                product_name = product.text
                try:
                    vendor, name, version = product_name.split(':')[2:5]
                except ValueError:
                    continue
                yield cve_id, vendor, name, version

def ensure_tables(c):
    c.execute("""CREATE TABLE IF NOT EXISTS etags
                 (year TEXT UNIQUE, etag TEXT)""")
    c.execute("""CREATE TABLE IF NOT EXISTS cve
                 (id TEXT UNIQUE, summary TEXT, score TEXT)""")
    c.execute("""CREATE TABLE IF NOT EXISTS product_vuln
                 (cve_id TEXT, name TEXT, vendor TEXT, version TEXT,
                  UNIQUE(cve_id, name, vendor, version))""")

def update_year(c, year):
    url = 'https://nvd.nist.gov/feeds/xml/cve/2.0/nvdcve-2.0-{}.xml.gz'.format(year)
    c.execute("SELECT etag FROM etags WHERE year=?", (year,))
    row = c.fetchone()
    if row is not None:
        etag = row[0]
    else:
        etag = None

    request = urllib.request.Request(url)
    if etag is not None:
        request.add_header('If-None-Match', etag)
    try:
        with urllib.request.urlopen(request) as resp:
            new_etag = resp.getheader('ETag')
            assert new_etag is not None
            if new_etag is not None:
                c.execute("INSERT OR REPLACE INTO etags (year, etag) VALUES (?, ?)", (year, new_etag))
            with open('nvdcve-2.0-{}.xml.gz'.format(year), 'wb') as f:
                while True:
                    buf = resp.read(4096)
                    if not buf:
                        print("Downloaded {}".format(f.name))
                        break
                    f.write(buf)
    except urllib.error.HTTPError as error:
        if error.code != 304:
            raise
        print("Cached {}".format('nvdcve-2.0-{}.xml.gz'.format(year)))

    with gzip.open('nvdcve-2.0-{}.xml.gz'.format(year)) as f:
        tree = ET.parse(f)
        for cve_id, summary, score in extract_vulns(tree):
            c.execute("INSERT OR REPLACE INTO cve (id, summary, score) VALUES (?, ?, ?)", (cve_id, summary, score))

        for cve_id, vendor, name, version in extract_product_vulns(tree):
            c.execute("INSERT OR REPLACE INTO product_vuln (cve_id, name, vendor, version) VALUES (?, ?, ?, ?)", (cve_id, name, vendor, version))

if __name__ == '__main__':
    conn = sqlite3.connect('data-2.db')
    c = conn.cursor()
    try:
        ensure_tables(c)
        for year in range(2002, datetime.datetime.now().year + 1):
            update_year(c, str(year))
        update_year(c, 'Modified')
        conn.commit()
    finally:
        conn.close()
