#!/usr/bin/python3 ## Based on the perl code of Trustminer by CASED ## Nikos import sys import os from pymongo import MongoClient #mongodb assumes database at default path import logging, sys import configparser import json import csv import urllib.request import datetime import debian_advisory as da import timeseries as ts import cveparse as cv import matplotlib.pyplot as plt import numpy as np from dateutil import parser import plotly.plotly as py import plotly.graph_objs as go import machine_learning as ml import lstm_reg as lstm import metadata as meta import deps import psycopg2 logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) ## Increase the recursion limit by much to allow bs to parse large files ## This is not good practise sys.setrecursionlimit(6000) #load config file as library config = configparser.ConfigParser() config.read('config_test') if config.sections == []: print('configuration file not found\n') sys.exit(1) #global variables secperday = 60*60*24 now = datetime.datetime.now() verbosity = 1 ############################################################################### ## logging # 1 fatal errors # 2 errors # 3 note # 4 trace # 5 debug def msg(lvl,msg): if lvl <= int(config['LOG']['loglevel']): print(msg) def debug(msg): msg(5, msg) # Need to see if this is necessary ## load state, different from DBs in that we always need it def load_state(): cache = config['DIR']['cache_dir'] + 'state' err = 0 state = dict() try: with open(cache) as json_data: state = json.load(json_data) except FileNotFoundError: # Load default state - start from the beginning state['cache_dir'] = cache state['next_adv'] = 0 state['next_fsa'] = 0 state['Packages'] = '' state['Sources'] = '' state['Sha1Sums'] = '' err += 1 return (state, err) ############################################################################### ## save state, different from DBs in that we always need it def save_state(state): cache = config['DIR']['cache_dir'] + 'state' try: with open(cache, 'w') as fp: json.dump(state, fp) except IOError: print('write cache state failed!! Fatal error') sys.exit(1) ############################################################################### ## load sha lists :TODO later def load_sha1lists(): cache = config['DIR']['cache_dir'] + 'state' ############################################################################### ## save sha lists :TODO later def save_sha1lists(): pass ############################################################################### ## load from files def load_DBs(): dsatable = dict() src2dsa = dict() dsa2cve = dict() cvetable = dict() src2month = dict() src2sloccount = dict() src2pop = dict() src2deps = dict() cache = config['DIR']['cache_dir'] cache_dsatable = cache + 'dsatable' try: with open(cache_dsatable) as fp: dsatable = json.load(fp) except (IOError, ValueError): print('read cache dsatable failed!! Maybe first run of the system?') cache_src2dsa = cache + 'src2dsa' try: with open(cache_src2dsa) as fp: src2dsa = json.load(fp) except (IOError, ValueError): print('read cache src2dsa failed!! Maybe first run of the system?') cache_dsa2cve = cache + 'dsa2cve' try: with open(cache_dsa2cve) as fp: dsa2cve = json.load(fp) except (IOError, ValueError): print('read cache dsa2cve failed!! Maybe first run of the system?') cache_cvetable = cache + 'cvetable' try: with open(cache_cvetable) as fp: cvetable = json.load(fp) except (IOError, ValueError): print('read cache cvetable failed!! Maybe first run of the system?') cache_src2deps = cache + 'src2deps' try: with open(cache_src2deps) as fp: src2deps = json.load(fp) except (IOError, ValueError): print('read cache src2deps failed!! Maybe first run of the system?') cache_src2month = cache + 'src2month' try: with open(cache_src2month) as fp: src2month = json.load(fp) except (IOError, ValueError): print('read cache src2month failed!! Maybe first run of the system?') cache_src2sloccount = cache + 'src2sloccount' try: with open(cache_src2sloccount) as fp: src2sloccount = json.load(fp) except (IOError, ValueError): print('read cache src2sloccount failed!! Maybe first run of the system?') cache_src2pop = cache + 'src2pop' try: with open(cache_src2pop) as fp: src2pop = json.load(fp) except (IOError, ValueError): print('read cache src2pop failed!! Maybe first run of the system?') return(dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps) ############################################################################### ## help for save_DBs def myconverter(o): if isinstance(o, datetime.datetime) or isinstance(o, datetime.timedelta): return str(o) if isinstance(o, np.float): return o.astype(int) ############################################################################### ## save to files def save_DBs(dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps): cache = config['DIR']['cache_dir'] cache_dsatable = cache + 'dsatable' try: with open(cache_dsatable, 'w') as fp: json.dump(dsatable, fp, default = myconverter) except IOError: print('write cache dsatable failed!! Fatal error') sys.exit(1) cache_src2dsa = cache + 'src2dsa' try: with open(cache_src2dsa, 'w') as fp: json.dump(src2dsa, fp) except IOError: print('write cache src2dsa failed!! Fatal error') sys.exit(1) cache_dsa2cve = cache + 'dsa2cve' try: with open(cache_dsa2cve, 'w') as fp: json.dump(dsa2cve, fp) except IOError: print('write cache dsa2cve failed!! Fatal error') sys.exit(1) cache_cvetable = cache + 'cvetable' try: with open(cache_cvetable, 'w') as fp: json.dump(cvetable, fp, default = myconverter) except IOError: print('write cache cvetable failed!! Fatal error') sys.exit(1) cache_src2sloccount = cache + 'src2sloccount' try: with open(cache_src2sloccount, 'w') as fp: json.dump(src2sloccount, fp, default = myconverter) except IOError: print('write cache src2sloccount failed!! Fatal error') sys.exit(1) cache_src2pop = cache + 'src2pop' try: with open(cache_src2pop, 'w') as fp: json.dump(src2pop, fp, default = myconverter) except IOError: print('write cache src2pop failed!! Fatal error') sys.exit(1) cache_src2deps = cache + 'src2deps' try: with open(cache_src2deps, 'w') as fp: json.dump(src2deps, fp, default = myconverter) except IOError: print('write cache src2deps failed!! Fatal error') sys.exit(1) cache_src2month = cache + 'src2month' int_list = dict() for element in src2month: for i in range(len(src2month[element])): if element in int_list: int_list[element].append(int(src2month[element][i])) else: int_list[element] = [] int_list[element].append(int(src2month[element][i])) try: with open(cache_src2month, 'w') as fp: json.dump(int_list, fp, default = myconverter) except IOError: print('write cache src2month failed!! Fatal error') sys.exit(1) ############################################################################### ## Fetch current Packages, Sources and sha1sums files ## These are needed to find CVE stats by sha1sums/pkg-names ## Only Sha1Sums is custom generated, others are from Debian. ## FIXME: Server might do on-the-fly gzip (but should not for bzip2) ## Return: 1 on success, to signal that new parsing is needed. def fetchMeta(filename): urlbase = config['URL']['pkg_base_url'] mydir = config['DIR']['cache_dir'] bzFile = filename + '.bz2' url = urlbase + bzFile logging.info('Checking meta file from ' + url + '\n') # Download file urllib.request.urlretrieve(url, mydir + bzfile) # TODO catch exceptions like file not found # TODO check if file has changed, if it is new unpack ############################################################################### # Sources and Packages are not completely consistent, esp for debian-multimedia # He we store manual mappings for these.. def addOrphanPkgs(pkg2src): pkg2src['liblame-dev'] = "lame"; pkg2src['lame-extras'] = "lame"; pkg2src['moonlight'] = "moon"; pkg2src['libmoon0'] = "moon"; pkg2src['xmms-mp4'] = "xmms2"; pkg2src['xmms-mp4'] = "xmms2"; pkg2src['lazarus-src-0.9.30'] = "lazarus"; pkg2src['lazarus-ide-0.9.30'] = "lazarus"; pkg2src['lcl-qt4-0.9.30'] = "lazarus"; pkg2src['lazarus-ide-qt4-0.9.30'] = "lazarus"; pkg2src['lcl-gtk2-0.9.30'] = "lazarus"; pkg2src['lazarus-ide-gtk2-0.9.30'] = "lazarus"; pkg2src['lcl-units-0.9.30'] = "lazarus"; pkg2src['lazarus-0.9.30'] = "lazarus"; pkg2src['lazarus-doc-0.9.30'] = "lazarus"; pkg2src['lcl-0.9.30'] = "lazarus"; pkg2src['lcl-utils-0.9.30'] = "lazarus"; pkg2src['lcl-nogui-0.9.30'] = "lazarus"; pkg2src['libx264-65'] = "x264"; pkg2src['libx264-114'] = "x264"; pkg2src['libx264-60'] = "x264"; # pkg2src['libmlt3'] # pkg2src['libgmerlin-avdec0'] # pkg2src['libxul-dev'] # pkg2src['libmyth-0.23.1-0'] # pkg2src['libmpeg3hv'] # pkg2src['libquicktimehv'] # pkg2src['libxul0d'] # pkg2src['acroread-fonts-kor'] ############################################################################### ## Parse dpkg Packages file, create map deb-name->pkg-name def parsePackages(pkgfile): mydir = cache = config['DIR']['cache_dir'] deb2pkg = dict() pkg2virt = dict() virt2pkg = () logging.info('Parsing Packages file...\n') pkgfile = mydir + pkgfile #TODO open and parse pkg file ############################################################################### ## Parse dpkg Sources file, create map pkg-name->src-name def parseSources(srcfile): mydir = cache = config['DIR']['cache_dir'] checklinecont = 0 pkg2src = dict() logging.info('Parsing Sources file...\n') srcfile = mydir + srcfile #TODO open and parse sources file ############################################################################### def getSHA1(myhash, collection): return collection.find({"hash": myhash}) ############################################################################### def addSHA1(myhash, deb, src): dic = getSHA1(myhash) thash = dic["hash"] tdeb = dic["deb"] tsrc = dic["src"] #TODO insert SHA to database ############################################################################### ## Parse Sha1Sums file. Format: "sha1sum::deb-name::unix-file-path" ## Create 2 maps: sha1sum->file, file->deb-name def parseSha1Sums(sha1file): pass ############################################################################### ## Parse local dpkg status, return list of debs def parseStatus(stsfile): pass ############################################################################### ## Parse Advisory (only Debian supported atm def parseAdvisory(adv): if state['vendor'] == 'debian': return da.parseDSAhtml(adv) else: print('Unsupported distribution. We only support Debian at the moment') system.exit(1) ############################################################################### ## Manually fix problems with Advisory entries def fixAdvisoryQuirks(arg, state, dsastats): if state['vendor'] == 'debian': return da.fixDSAquirks(arg, dsastats) else: print('Unsupported distribution. We only support Debian at the moment') system.exit(1) ############################################################################### ## Extract CVE ids from new advisories and print URL for mirror script def printCVEs(myid,adv, state): logging.info('Looking for CVEs in advisory...\n') dsastats = parseAdvisory(adv) if dsastats == []: return ## fix DSAs that don't contain correct CVE refs dsastats = fixAdvisoryQuirks(myid, state, dsastats); #TODO Fix this part ##for cve_id in dsastats ############################################################################### ## Update internal vuln. DB with new Advisory info ## Creates CVEtable for MTBF computation: ## ( cve-id => (date, delay, score1, score2, score3)) def updateCVETables(myid, dsatable, state, src2dsa, dsa2cve, cvetable, client): logging.info('Updating vulnerability database with advisory ' + state['vendor'] + str(myid) + ' \n') adv = dsatable[myid] dsastats = parseAdvisory(adv) if dsastats == []: return dsastats = fixAdvisoryQuirks(myid, state, dsastats) for srcpkg in dsastats[0]: if srcpkg in src2dsa: src2dsa[srcpkg].append(myid) else: src2dsa[srcpkg] = [] src2dsa[srcpkg].append(myid) dsa2cve[str(myid)] = dsastats[2] for cve_id in dsastats[2]: # No fetch CVE We use mongodb and cve-search cve = cv.fetchCVE(cve_id, client) cvestats = cv.parseCVE(cve_id, cve) # print(cvestats) # print(dsastats) finaldate = cvestats[0] if cvestats[0] > dsastats[1] or cvestats[0] == 0: finaldate = dsastats[1] cvedata = (finaldate, dsastats[1]-finaldate, cvestats[1], cvestats[2], cvestats[3]) ## print(cvedata) cvetable[cve_id] = cvedata return cvetable ############################################################################### ## Check for updates on Package information def aptsec_update(state, config, dsatable, client, src2dsa, dsa2cve, src2month, cvetable, pkg_with_cvss): args = sys.argv # if not('--offline' in args): # fetchMeta('Packages') # fetchMeta('Sources') # fetchMeta('Sha1Sums') now = datetime.datetime.now() if not('--cves' in args): parsePackages('Packages') parseSources('Sources') # if not('--nosha1' in args): # parseSha1sums('Sha1Sums') if state['vendor'] == 'debian': newAdv = da.checkDSAs(state, config) else: print('Unsupported distribution. We only support Debian at the moment') system.exit(1) for myid in newAdv: if myid in dsatable: logging.info(state['vendor'] + ' advisory ' + myid + ' already known.\n') elif '--cves' in args: ## scan for CVE urls only? printCVEs(myid, newAdv[myid]) else: ## store advisory and parse it dsatable[myid] = newAdv[myid] updateCVETables(myid, dsatable, state, src2dsa, dsa2cve, cvetable, client) # recompute all pkg statistics for srcpkg in src2dsa: processCVEs(srcpkg, now, src2dsa, dsa2cve, src2month, cvetable, pkg_with_cvss, config) return 0 ############################################################################### ## find list of src pkgs from bin pkgs based on pkg2src def resolvePkg2Src(pkglist, pkg2src): srclist = [] for pkg in pkglist: if pkg in pkg2src: srcpkg = pkg2src[pkg] srclist.append(srcpkg) else: logging.info('Could not find source package for: ' + pkg + ' .\n') return srclist ############################################################################### ## compute and store MTBF, MTBR and Scores of each src pkg ## output: %src2mtbf: ## (srcpkg=> ()) def processCVEs(pkg, now, src2dsa, dsa2cve, src2month, cvetable, pkg_with_cvss, config): stats = [now, 0, 0, 0, 0, 0, 0] mylambda = config['TRUST']['lambda'] cvestats = dict() logging.info('Processing package: ' + pkg + '.\n') ## keep track of the number of low-medium-high severity vulnerabilities ## TODO see how cvss affects vulnerability prediction - if some packages show patterns temp_cvss = 0.0 with_cvss = dict() ## cvestats = (date: number) for dsa_id in src2dsa[pkg]: for cve_id in dsa2cve[str(dsa_id)]: tt = cvetable[cve_id][0] if tt in cvestats: cvestats[tt] += 1 else: cvestats[tt] = 1 stats[1] += 1 ## with_cvss = (date: number low, number med, number high) for dsa_id in src2dsa[pkg]: for cve_id in dsa2cve[str(dsa_id)]: tt = cvetable[cve_id][0] temp_cvss = float(cvetable[cve_id][2]) if tt in with_cvss: if (temp_cvss<4.0): with_cvss[tt][0] += 1 elif (temp_cvss<7.0): with_cvss[tt][1] += 1 else: with_cvss[tt][2] += 1 else: with_cvss[tt] = [0, 0, 0] if (temp_cvss<4.0): with_cvss[tt][0] += 1 elif (temp_cvss<7.0): with_cvss[tt][1] += 1 else: with_cvss[tt][2] += 1 # Ignore pkgs with less than one incident, should not happen.. if stats[1] < 1: return prev_date = 0 weight = 0 dates = sorted(cvestats, key = cvestats.get) stats[0] = dates[0] count = sum(cvestats.values()) print(pkg + ' ' + str(count)) # pkg_with_cvss[pkg] = with_cvss format_data(pkg, with_cvss, pkg_with_cvss, True) format_data(pkg, cvestats, src2month, False) ############################################################################### ## format vulnerability data into monthly intervals, suitable for tensorflow def format_data(pkg, cvestats, src2month, cvss): x = [] y = [] monthyear = [] year = [] temp_items=list(cvestats.items()) items = [] for data_dict in temp_items: if isinstance(data_dict[0], str): tmpx = (parser.parse(data_dict[0])) else: tmpx = data_dict[0] x.append(tmpx) try: tmpy = int(data_dict[1]) except TypeError: tmpy = data_dict[1] y.append(tmpy) items.append((tmpx, tmpy)) items.sort(key=lambda tup: tup[0]) for i in range(2000, 2018): temp = [] for j in range(12): if cvss: temp.append([0, 0, 0]) else: temp.append(0) monthyear.append(temp) for i in range(len(x)): if cvss: tmp0 = y[i][0] tmp1 = y[i][1] tmp2 = y[i][2] monthyear[x[i].year-2000][x[i].month-1][0] += tmp0 monthyear[x[i].year-2000][x[i].month-1][1] += tmp1 monthyear[x[i].year-2000][x[i].month-1][2] += tmp2 else: monthyear[x[i].year-2000][x[i].month-1] += y[i] months_list = [item for sublist in monthyear for item in sublist] if not cvss: temp_months = np.zeros(len(months_list)) i = 0 for element in months_list: temp_months[i] = np.float32(element) i += 1 src2month[pkg] = temp_months else: src2month[pkg] = months_list return ############################################################################### ## plot vulnerability time distribution for a single package def pkg_plot(pkg, cvestats): colors = list("rgbcmyk") items = list(cvestats.items()) #print(items) items.sort(key=lambda tup: tup[0]) x = [] y = [] for data_dict in items: x.append(parser.parse(data_dict[0])) y.append(data_dict[1]) monthyear = [] year = [] # initialize list for i in range(2000,2017): temp = [] for j in range(12): temp.append(0) monthyear.append(temp) for i in range(len(x)): # print(str(x[i].year) + str(x[i].month)) monthyear[x[i].year-2000][x[i].month-1] += y[i] newx = [] yearsx = [] year = [] monthlabel = [] month = [] m1 = 0 m2 = 0 k = 0 label_months = [] months_list = [item for sublist in monthyear for item in sublist] for i in range(len(months_list)): label_months.append(i) plt.bar(label_months, months_list) for i in range(len(monthyear)): year.append(0) cc = 0 for j in range(len(monthyear[i])): cc += monthyear[i][j] if j == 5: m1 = cc month.append(m1) if j == 11: month.append(cc - m1) k += 1 year[i] = cc for i in range(len(year)): yearsx.append(i + 2000) k = 2000 datapoints = [] for i in range(len(month)): datapoints.append(i+1) if i%2 == 0: monthlabel.append(str(k) + '-1') else: monthlabel.append('-2') k += 1 # plt.xticks(datapoints, monthlabel) # print(year) # plt.plot.hist(yearsx,year) # plt.bar(yearsx, year, 1, color='blue') # plt.bar(datapoints, month, 1, color='blue') # ts.predict(month) plt.legend([pkg], loc='upper left') plt.show() return 0 ############################################################################### ## populate src2sloccount dictionary with number of source lines of code in ## format (total, [ansic, cpp, asm, java, python, perl, sh]) def getslocs(src2dsa, src2sloccount): try: conn = psycopg2.connect("dbname = 'debsources' user = 'postgres' host = 'localhost' password = 'nik'") except: print('I am unable to connect to the database') cur = conn.cursor() for pkg in src2dsa: if pkg not in src2sloccount: print(pkg) src2sloccount[pkg] = meta.getsloccount(cur, pkg) return ############################################################################### ## get popularity contest data in format src_pkg -> (installed, vote, old, recent) def getpop(src2dsa, src2pop): with open('Debian_pop.csv', newline = '') as csvfile: reader = csv.reader(csvfile, delimiter = ',', quotechar = '|') for row in reader: try: if row[1] in src2dsa and not (row[1] in src2pop): src2pop[row[1]] = row[2:6] except IndexError: print(row) continue return ############################################################################### ## get dependencies of a given source def getdeps(src2dsa, src2deps): for srcpkg in src2dsa: deps.getdeps(srcpkg, src2deps) ############################################################################### ## print some meta-info on internal data def aptsec_about(dsatable, cvetable, pkg2src, src2dsa): num_dsa = len(dsatable) num_cve = len(cvetable) num_pkg = len(pkg2src) num_src = len(src2dsa) print('\nThe current database records %d binary packages and %d DSAs.\n', num_pkg, num_src) print('%d CVEs are associated with %d source packages.\n', num_cve, num_src) return ############################################################################### ## use scores to suggest alternative packages def aptsec_alternatives(pkg): pass ############################################################################### ## print overview for pkg high scores def aptsec_hitlist(): pass ############################################################################### ## evaluation helper ## compute stats until date given in $2, then compute stats ## for the next year to check accuracy of the prediction. ## @cvestats = (date base-score impact-score exploit-score) def simulate_stats(pkg, year): pass ############################################################################### ##TODO Printing functions ############################################################################### ## show info on a single src pkg, resolv to src if needed def aptsec_show(pkg, state, pkg2src, src2dsa, src2mtbf, cvetable): if state['vendor'] == 'debian': ADV = 'DSA-' else: print('Unsupported distribution. We only support Debian at the moment') system.exit(1) if (not(pkg in src2dsa)) and (pkg in pkg2src): print('\nResolving ' + pkg + ' to ' + pkg2src[pkg] + '\n') pkg = pkg2src[pkg] print('\nThe following binary packages are created from ' + pkg + ' :\n\n') lines = 0 for i in pkg2src: if pkg2src[i] == pkg: print(i + '\n') lines += 1 if lines < 1: print('-\n') if not (pkg in src2dsa and pkg in src2mtbf): print('\nNo vulnerabilities recorded for source package ' + pkg + '.\n') return print('\nAdvisories on package ' + pkg + ':\n\n') for dsa_id in sorted(src2dsa[pkg], key = src2dsa[pkg].get): print(ADV + dsa_id + '\n') for cve_id in dsa2cve[dsa_id]: (sec, minut, hrs, day, mon, yr) = gmtime(cvetable[cve_id][0]) print('%s: Base Score: %04.1f, %02d.%02d.%04d\n', cve_id, cvetable[cve_id][2], day, mon+1, yr+1900) stats = src2mtbf[pkg] (sec, minut, hrs, day, mon, yr) = gmtime(stats[0]) print('Now we print various iformation \n') ############################################################################### ## print help text def aptsec_help(): print('See manual for correct usage\n') ############################################################################### ## Print system status report from component(files) measurements (sha1sums) ## Expected input format is Linux IMA. We assume input was validated. ## ## Note: aptsec_status(), considers *reportedly installed* packages, while this ## one looks at *actually loaded* software that influenced the CPU since bootup. def aptsec_attest(sha1file): pass ## Main Program starts here!! try: action = sys.argv[1] except IndexError: # print('No argument given') # aptsec_help() # sys.exit(0) action = '' client = MongoClient() dsatable = dict() cve_db = client.cvedb src2dsa = dict() dsa2cve = dict() cvetable = dict() src2month = dict() src2deps = dict() pkg_with_cvss = dict() src2sloccount = dict() src2pop = dict() (state, err) = load_state() state['vendor'] = 'debian' #detect_distribution() #d = state['cache_dir'] #if not os.path.exists(d): # os.makedirs(d) if action == 'update': (dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps) = load_DBs() # loadsha1lists() # aptsec_update(state,config, dsatable, client, src2dsa, dsa2cve, src2month, cvetable, pkg_with_cvss) # save_sha1lists() # getslocs(src2dsa, src2sloccount) # getpop(src2dsa, src2pop) # getdeps(src2dsa, src2deps) save_DBs(dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps) save_state(state) lstm.predict(src2month) # print(pkg_with_cvss['linux']) low = [] med = [] high = [] for item in pkg_with_cvss['firefox']: low.append(item[0]) med.append(item[1]) high.append(item[2]) # plt.plot(low, color = 'green') # plt.plot(med, color = 'orange') # plt.plot(high, color = 'red') # plt.show() elif action == 'status': load_DBs or exit(1) #handle errors more gracefully aptsec_status(sys.argv[2]) elif action == 'show': load_DBs or exit(1) #handle errors more gracefully aptsec_show(sys.argv[2]) else: aptsec_help() #print(state) save_state(state) #cve_db = client.cvedb #collection = db.cves #testcvss = collection.find_one({"cvss": 9.3}) #print(testcvssi