#!/usr/bin/python3 ## Based on the perl code of Trustminer by CASED ## Nikos import sys import os from pymongo import MongoClient #mongodb assumes database at default path import logging import configparser import json import csv import urllib.request import datetime import debian_advisory as da import cveparse as cv import matplotlib.pyplot as plt import numpy as np from dateutil import parser import plotly.plotly as py import plotly.graph_objs as go #import lstm_reg as lstm import metadata as meta import deps import psycopg2 import powerlaw as pl import DLAmine as dla import pickle import paper_plots as carlosplt import stat_tests as stats from matplotlib2tikz import save as tikz_save import prediction as pred import scipy.stats as stats logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) ## Increase the recursion limit by much to allow bs to parse large files ## This is not good practise sys.setrecursionlimit(6000) #load config file as library config = configparser.ConfigParser() config.read('config_test') if config.sections == []: print('configuration file not found\n') sys.exit(1) #global variables secperday = 60*60*24 now = datetime.datetime.now() verbosity = 1 ############################################################################### ## logging # 1 fatal errors # 2 errors # 3 note # 4 trace # 5 debug def msg(lvl,msg): if lvl <= int(config['LOG']['loglevel']): print(msg) def debug(msg): msg(5, msg) # Need to see if this is necessary ## load state, different from DBs in that we always need it def load_state(): cache = config['DIR']['cache_dir'] + 'state' err = 0 state = dict() try: with open(cache) as json_data: state = json.load(json_data) except FileNotFoundError: # Load default state - start from the beginning state['cache_dir'] = cache state['next_adv'] = 0 state['next_fsa'] = 0 state['Packages'] = '' state['Sources'] = '' state['Sha1Sums'] = '' err += 1 return (state, err) ############################################################################### ## save state, different from DBs in that we always need it def save_state(state): cache = config['DIR']['cache_dir'] + 'state' try: with open(cache, 'w') as fp: json.dump(state, fp) except IOError: print('write cache state failed!! Fatal error') sys.exit(1) ############################################################################### ## load sha lists :TODO later def load_sha1lists(): cache = config['DIR']['cache_dir'] + 'state' ############################################################################### ## save sha lists :TODO later def save_sha1lists(): pass ############################################################################### ## load from files def load_DBs(): dsatable = dict() src2dsa = dict() dsa2cve = dict() cvetable = dict() src2month = dict() src2sloccount = dict() src2pop = dict() src2deps = dict() pkg_with_cvss = dict() cache = config['DIR']['cache_dir'] cache_dsatable = cache + 'dsatable' try: with open(cache_dsatable) as fp: dsatable = json.load(fp) except (IOError, ValueError): print('read cache dsatable failed!! Maybe first run of the system?') cache_src2dsa = cache + 'src2dsa' try: with open(cache_src2dsa) as fp: src2dsa = json.load(fp) except (IOError, ValueError): print('read cache src2dsa failed!! Maybe first run of the system?') cache_dsa2cve = cache + 'dsa2cve' try: with open(cache_dsa2cve) as fp: dsa2cve = json.load(fp) except (IOError, ValueError): print('read cache dsa2cve failed!! Maybe first run of the system?') cache_cvetable = cache + 'cvetable' try: with open(cache_cvetable) as fp: cvetable = json.load(fp) except (IOError, ValueError): print('read cache cvetable failed!! Maybe first run of the system?') cache_src2deps = cache + 'src2deps' try: with open(cache_src2deps) as fp: src2deps = json.load(fp) except (IOError, ValueError): print('read cache src2deps failed!! Maybe first run of the system?') cache_src2month = cache + 'src2month' try: with open(cache_src2month) as fp: src2month = json.load(fp) except (IOError, ValueError): print('read cache src2month failed!! Maybe first run of the system?') cache_pkg_with_cvss = cache + 'pkg_with_cvss' try: with open(cache_pkg_with_cvss) as fp: pkg_with_cvss = json.load(fp) except (IOError, ValueError): print('read cache pkg_with_cvss failed!! Maybe first run of the system?') cache_src2sloccount = cache + 'src2sloccount' try: with open(cache_src2sloccount) as fp: src2sloccount = json.load(fp) except (IOError, ValueError): print('read cache src2sloccount failed!! Maybe first run of the system?') cache_src2pop = cache + 'src2pop' try: with open(cache_src2pop) as fp: src2pop = json.load(fp) except (IOError, ValueError): print('read cache src2pop failed!! Maybe first run of the system?') return(dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps, pkg_with_cvss) ############################################################################### ## help for save_DBs def myconverter(o): if isinstance(o, datetime.datetime) or isinstance(o, datetime.timedelta): return str(o) if isinstance(o, np.float): return o.astype(int) ############################################################################### ## save to files def save_DBs(dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps, src2sum, pkg_with_cvss): cache = config['DIR']['cache_dir'] cache_dsatable = cache + 'dsatable' try: with open(cache_dsatable, 'w') as fp: json.dump(dsatable, fp, default = myconverter) except IOError: print('write cache dsatable failed!! Fatal error') sys.exit(1) cache_src2dsa = cache + 'src2dsa' try: with open(cache_src2dsa, 'w') as fp: json.dump(src2dsa, fp) except IOError: print('write cache src2dsa failed!! Fatal error') sys.exit(1) cache_dsa2cve = cache + 'dsa2cve' try: with open(cache_dsa2cve, 'w') as fp: json.dump(dsa2cve, fp) except IOError: print('write cache dsa2cve failed!! Fatal error') sys.exit(1) cache_cvetable = cache + 'cvetable' try: with open(cache_cvetable, 'w') as fp: json.dump(cvetable, fp, default = myconverter) except IOError: print('write cache cvetable failed!! Fatal error') sys.exit(1) cache_src2sloccount = cache + 'src2sloccount' try: with open(cache_src2sloccount, 'w') as fp: json.dump(src2sloccount, fp, default = myconverter) except IOError: print('write cache src2sloccount failed!! Fatal error') sys.exit(1) cache_src2pop = cache + 'src2pop' try: with open(cache_src2pop, 'w') as fp: json.dump(src2pop, fp, default = myconverter) except IOError: print('write cache src2pop failed!! Fatal error') sys.exit(1) cache_src2deps = cache + 'src2deps' try: with open(cache_src2deps, 'w') as fp: json.dump(src2deps, fp, default = myconverter) except IOError: print('write cache src2deps failed!! Fatal error') sys.exit(1) cache_src2sum = cache + 'src2sum' try: with open(cache_src2sum, 'w') as fp: json.dump(src2sum, fp, default = myconverter) except IOError: print('write cache src2deps failed!! Fatal error') sys.exit(1) cache_src2month = cache + 'src2month' int_list = dict() for element in src2month: for i in range(len(src2month[element])): if element in int_list: int_list[element].append(int(src2month[element][i])) else: int_list[element] = [] int_list[element].append(int(src2month[element][i])) try: with open(cache_src2month, 'w') as fp: json.dump(int_list, fp, default = myconverter) except IOError: print('write cache src2month failed!! Fatal error') sys.exit(1) cache_pkg_with_cvss = cache + 'pkg_with_cvss' int_list = dict() for element in pkg_with_cvss: for i in range(len(pkg_with_cvss[element])): if element in int_list: int_list[element].append(pkg_with_cvss[element][i]) else: int_list[element] = [] int_list[element].append(pkg_with_cvss[element][i]) try: with open(cache_pkg_with_cvss, 'w') as fp: json.dump(int_list, fp, default = myconverter) except IOError: print('write cache pkg_with_cvss failed!! Fatal error') sys.exit(1) ############################################################################### ## Fetch current Packages, Sources and sha1sums files ## These are needed to find CVE stats by sha1sums/pkg-names ## Only Sha1Sums is custom generated, others are from Debian. ## FIXME: Server might do on-the-fly gzip (but should not for bzip2) ## Return: 1 on success, to signal that new parsing is needed. def fetchMeta(filename): urlbase = config['URL']['pkg_base_url'] mydir = config['DIR']['cache_dir'] bzFile = filename + '.bz2' url = urlbase + bzFile logging.info('Checking meta file from ' + url + '\n') # Download file urllib.request.urlretrieve(url, mydir + bzfile) # TODO catch exceptions like file not found # TODO check if file has changed, if it is new unpack ############################################################################### # Sources and Packages are not completely consistent, esp for debian-multimedia # He we store manual mappings for these.. def addOrphanPkgs(pkg2src): pkg2src['liblame-dev'] = "lame"; pkg2src['lame-extras'] = "lame"; pkg2src['moonlight'] = "moon"; pkg2src['libmoon0'] = "moon"; pkg2src['xmms-mp4'] = "xmms2"; pkg2src['xmms-mp4'] = "xmms2"; pkg2src['lazarus-src-0.9.30'] = "lazarus"; pkg2src['lazarus-ide-0.9.30'] = "lazarus"; pkg2src['lcl-qt4-0.9.30'] = "lazarus"; pkg2src['lazarus-ide-qt4-0.9.30'] = "lazarus"; pkg2src['lcl-gtk2-0.9.30'] = "lazarus"; pkg2src['lazarus-ide-gtk2-0.9.30'] = "lazarus"; pkg2src['lcl-units-0.9.30'] = "lazarus"; pkg2src['lazarus-0.9.30'] = "lazarus"; pkg2src['lazarus-doc-0.9.30'] = "lazarus"; pkg2src['lcl-0.9.30'] = "lazarus"; pkg2src['lcl-utils-0.9.30'] = "lazarus"; pkg2src['lcl-nogui-0.9.30'] = "lazarus"; pkg2src['libx264-65'] = "x264"; pkg2src['libx264-114'] = "x264"; pkg2src['libx264-60'] = "x264"; # pkg2src['libmlt3'] # pkg2src['libgmerlin-avdec0'] # pkg2src['libxul-dev'] # pkg2src['libmyth-0.23.1-0'] # pkg2src['libmpeg3hv'] # pkg2src['libquicktimehv'] # pkg2src['libxul0d'] # pkg2src['acroread-fonts-kor'] ############################################################################### ## Parse dpkg Packages file, create map deb-name->pkg-name def parsePackages(pkgfile): mydir = cache = config['DIR']['cache_dir'] deb2pkg = dict() pkg2virt = dict() virt2pkg = () logging.info('Parsing Packages file...\n') pkgfile = mydir + pkgfile #TODO open and parse pkg file ############################################################################### ## Parse dpkg Sources file, create map pkg-name->src-name def parseSources(srcfile): mydir = cache = config['DIR']['cache_dir'] checklinecont = 0 pkg2src = dict() logging.info('Parsing Sources file...\n') srcfile = mydir + srcfile #TODO open and parse sources file ############################################################################### def getSHA1(myhash, collection): return collection.find({"hash": myhash}) ############################################################################### def addSHA1(myhash, deb, src): dic = getSHA1(myhash) thash = dic["hash"] tdeb = dic["deb"] tsrc = dic["src"] #TODO insert SHA to database ############################################################################### ## Parse Sha1Sums file. Format: "sha1sum::deb-name::unix-file-path" ## Create 2 maps: sha1sum->file, file->deb-name def parseSha1Sums(sha1file): pass ############################################################################### ## Parse local dpkg status, return list of debs def parseStatus(stsfile): pass ############################################################################### ## Parse Advisory (only Debian supported atm def parseAdvisory(adv): if state['vendor'] == 'debian': return da.parseDSAhtml(adv) else: print('Unsupported distribution. We only support Debian at the moment') system.exit(1) ############################################################################### ## Manually fix problems with Advisory entries def fixAdvisoryQuirks(arg, state, dsastats): if state['vendor'] == 'debian': return da.fixDSAquirks(arg, dsastats) else: print('Unsupported distribution. We only support Debian at the moment') system.exit(1) ############################################################################### ## Extract CVE ids from new advisories and print URL for mirror script def printCVEs(myid,adv, state): logging.info('Looking for CVEs in advisory...\n') dsastats = parseAdvisory(adv) if dsastats == []: return ## fix DSAs that don't contain correct CVE refs dsastats = fixAdvisoryQuirks(myid, state, dsastats); #TODO Fix this part ##for cve_id in dsastats ############################################################################### ## Update internal vuln. DB with new Advisory info ## Creates CVEtable for MTBF computation: ## ( cve-id => (date, delay, score1, score2, score3)) def updateCVETables(myid, dsatable, state, src2dsa, dsa2cve, cvetable, client): logging.info('Updating vulnerability database with advisory ' + state['vendor'] + str(myid) + ' \n') adv = dsatable[myid] dsastats = parseAdvisory(adv) if dsastats == []: return dsastats = fixAdvisoryQuirks(myid, state, dsastats) print('Did you fix it?') for srcpkg in dsastats[0]: if srcpkg in src2dsa: src2dsa[srcpkg].append(myid) else: src2dsa[srcpkg] = [] src2dsa[srcpkg].append(myid) dsa2cve[str(myid)] = dsastats[2] for cve_id in dsastats[2]: # No fetch CVE We use mongodb and cve-search cve = cv.fetchCVE(cve_id, client) cvestats = cv.parseCVE(cve_id, cve) # print(cvestats) # print(dsastats) finaldate = cvestats[0] if cvestats[0] > dsastats[1] or cvestats[0] == 0: finaldate = dsastats[1] cvedata = (finaldate, dsastats[1]-finaldate, cvestats[1], cvestats[2], cvestats[3]) ## print(cvedata) cvetable[cve_id] = cvedata return cvetable ############################################################################### ## Check for updates on Package information def aptsec_update(state, config, dsatable, client, src2dsa, dsa2cve, src2month, cvetable, pkg_with_cvss): args = sys.argv # if not('--offline' in args): # fetchMeta('Packages') # fetchMeta('Sources') # fetchMeta('Sha1Sums') now = datetime.datetime.now() if not('--cves' in args): parsePackages('Packages') parseSources('Sources') # if not('--nosha1' in args): # parseSha1sums('Sha1Sums') if state['vendor'] == 'debian': newAdv = da.checkDSAs(state, config) else: print('Unsupported distribution. We only support Debian at the moment') system.exit(1) for myid in newAdv: if myid in dsatable: logging.info(state['vendor'] + ' advisory ' + myid + ' already known.\n') elif '--cves' in args: ## scan for CVE urls only? printCVEs(myid, newAdv[myid]) else: ## store advisory and parse it dsatable[myid] = newAdv[myid] updateCVETables(myid, dsatable, state, src2dsa, dsa2cve, cvetable, client) # recompute all pkg statistics for srcpkg in src2dsa: processCVEs(srcpkg, now, src2dsa, dsa2cve, src2month, cvetable, pkg_with_cvss, config) return 0 ############################################################################### ## find list of src pkgs from bin pkgs based on pkg2src def resolvePkg2Src(pkglist, pkg2src): srclist = [] for pkg in pkglist: if pkg in pkg2src: srcpkg = pkg2src[pkg] srclist.append(srcpkg) else: logging.info('Could not find source package for: ' + pkg + ' .\n') return srclist ############################################################################### ## compute and store MTBF, MTBR and Scores of each src pkg ## output: %src2mtbf: ## (srcpkg=> ()) def processCVEs(pkg, now, src2dsa, dsa2cve, src2month, cvetable, pkg_with_cvss, config): stats = [now, 0, 0, 0, 0, 0, 0] #mylambda = config['TRUST']['lambda'] mylambda = 0 cvestats = dict() logging.info('Processing package: ' + pkg + '.\n') ## keep track of the number of low-medium-high severity vulnerabilities ## TODO see how cvss affects vulnerability prediction - if some packages show patterns temp_cvss = 10.0 with_cvss = dict() ## To eliminate duplicate cves haveseen = dict() ## cvestats = (date: number) for dsa_id in src2dsa[pkg]: for cve_id in dsa2cve[str(dsa_id)]: if cve_id in haveseen: continue else: haveseen[cve_id] = 1 tt = cvetable[cve_id][0] if tt in cvestats: cvestats[tt] += 1 else: cvestats[tt] = 1 stats[1] += 1 ## Date at the moment taken from CVE? - not sure. ## with_cvss = (date: number low, number med, number high) haveseen = dict() for dsa_id in src2dsa[pkg]: for cve_id in dsa2cve[str(dsa_id)]: if cve_id in haveseen: continue else: haveseen[cve_id] = 1 tt = cvetable[cve_id][0] try: temp_cvss = float(cvetable[cve_id][2]) except TypeError: print(cve_id) continue if pkg=='linux': print(tt, temp_cvss) if tt in with_cvss: if (temp_cvss<4.0): with_cvss[tt][0] += 1 elif (temp_cvss<7.0): with_cvss[tt][1] += 1 else: with_cvss[tt][2] += 1 else: with_cvss[tt] = [0, 0, 0] if (temp_cvss<4.0): with_cvss[tt][0] += 1 elif (temp_cvss<7.0): with_cvss[tt][1] += 1 else: with_cvss[tt][2] += 1 if pkg=='linux': print(with_cvss) # Ignore pkgs with less than one incident, should not happen.. if stats[1] < 1: return prev_date = 0 weight = 0 dates = sorted(cvestats, key = cvestats.get) try: stats[0] = dates[0] except IndexError: print(pkg + str(dates)) stats[0] = 0 count = sum(cvestats.values()) print(pkg + ' ' + str(count)) #pkg_with_cvss[pkg] = with_cvss format_data(pkg, with_cvss, pkg_with_cvss, True) format_data(pkg, cvestats, src2month, False) ############################################################################### ## format vulnerability data into monthly intervals, suitable for tensorflow def format_data(pkg, cvestats, src2month, cvss): x = [] y = [] monthyear = [] year = [] temp_items=list(cvestats.items()) items = [] for data_dict in temp_items: if isinstance(data_dict[0], str): tmpx = (parser.parse(data_dict[0])) else: tmpx = data_dict[0] x.append(tmpx) try: tmpy = int(data_dict[1]) except TypeError: tmpy = data_dict[1] y.append(tmpy) items.append((tmpx, tmpy)) items.sort(key=lambda tup: tup[0]) for i in range(2000, 2019): temp = [] for j in range(12): if cvss: temp.append([0, 0, 0]) else: temp.append(0) monthyear.append(temp) for i in range(len(x)): if cvss: tmp0 = y[i][0] tmp1 = y[i][1] tmp2 = y[i][2] monthyear[x[i].year-2000][x[i].month-1][0] += tmp0 monthyear[x[i].year-2000][x[i].month-1][1] += tmp1 monthyear[x[i].year-2000][x[i].month-1][2] += tmp2 else: monthyear[x[i].year-2000][x[i].month-1] += y[i] months_list = [item for sublist in monthyear for item in sublist] if not cvss: temp_months = np.zeros(len(months_list)) i = 0 for element in months_list: temp_months[i] = np.float32(element) i += 1 src2month[pkg] = temp_months else: src2month[pkg] = months_list return ############################################################################### ## plot vulnerability time distribution for a single package def pkg_plot(pkg, cvestats): colors = list("rgbcmyk") items = list(cvestats.items()) #print(items) items.sort(key=lambda tup: tup[0]) x = [] y = [] for data_dict in items: x.append(parser.parse(data_dict[0])) y.append(data_dict[1]) monthyear = [] year = [] # initialize list for i in range(2000,2017): temp = [] for j in range(12): temp.append(0) monthyear.append(temp) for i in range(len(x)): # print(str(x[i].year) + str(x[i].month)) monthyear[x[i].year-2000][x[i].month-1] += y[i] newx = [] yearsx = [] year = [] monthlabel = [] month = [] m1 = 0 m2 = 0 k = 0 label_months = [] months_list = [item for sublist in monthyear for item in sublist] for i in range(len(months_list)): label_months.append(i) plt.bar(label_months, months_list) for i in range(len(monthyear)): year.append(0) cc = 0 for j in range(len(monthyear[i])): cc += monthyear[i][j] if j == 5: m1 = cc month.append(m1) if j == 11: month.append(cc - m1) k += 1 year[i] = cc for i in range(len(year)): yearsx.append(i + 2000) k = 2000 datapoints = [] for i in range(len(month)): datapoints.append(i+1) if i%2 == 0: monthlabel.append(str(k) + '-1') else: monthlabel.append('-2') k += 1 # plt.xticks(datapoints, monthlabel) # print(year) # plt.plot.hist(yearsx,year) # plt.bar(yearsx, year, 1, color='blue') # plt.bar(datapoints, month, 1, color='blue') # ts.predict(month) plt.legend([pkg], loc='upper left') plt.show() return 0 ############################################################################### ## populate src2sloccount dictionary with number of source lines of code in ## format (total, [ansic, cpp, asm, java, python, perl, sh]) def getslocs(src2month, src2sloccount): with open('./sloc_report.txt') as f: content = f.readlines() for i in content: (total, ansic, cpp, asm, java, python, perl, sh) = (0, 0, 0, 0, 0, 0, 0, 0) words=i.split() total = int(words[0]) pkg = words[1] for w in words[2:]: ww = w.split('=') if ww[0] == 'ansic': ansic = int(ww[1]) if ww[0] == 'cpp': cpp = int(ww[1]) if ww[0] == 'asm': asm = int(ww[1]) if ww[0] == 'java': java = int(ww[1]) if ww[0] == 'python': python = int(ww[1]) if ww[0] == 'perl': perl = int(ww[1]) if ww[0] == 'sh': sh = int(ww[1]) src2sloccount[pkg] = (total, [ansic, cpp, asm, java, python, perl, sh]) ############################################################################### ## get popularity contest data in format src_pkg -> (installed, vote, old, recent) def getpop(src2dsa, src2pop): with open('by_vote.csv', newline = '') as csvfile: reader = csv.reader(csvfile, delimiter = ',', quotechar = '|') for row in reader: try: if row[1] in src2dsa: src2pop[row[1]] = row[3] except IndexError: print(row) continue return ############################################################################### ## get dependencies of a given source def getdeps(src2dsa, src2deps): for srcpkg in src2dsa: deps.getdeps(srcpkg, src2deps) ############################################################################### ## print some meta-info on internal data def aptsec_about(dsatable, cvetable, pkg2src, src2dsa): num_dsa = len(dsatable) num_cve = len(cvetable) num_pkg = len(pkg2src) num_src = len(src2dsa) print('\nThe current database records %d binary packages and %d DSAs.\n', num_pkg, num_src) print('%d CVEs are associated with %d source packages.\n', num_cve, num_src) return ############################################################################### ## use scores to suggest alternative packages def aptsec_alternatives(pkg): pass ############################################################################### ## print overview for pkg high scores def aptsec_hitlist(): pass ############################################################################### ## evaluation helper ## compute stats until date given in $2, then compute stats ## for the next year to check accuracy of the prediction. ## @cvestats = (date base-score impact-score exploit-score) def simulate_stats(pkg, year): pass ############################################################################### ##TODO Printing functions def plot_all(src2month, src2sloccount, pkg_with_cvss): ## Sum of vulnerabilities by package src2sum = dict() src2year = dict() src2month_loc=dict() src2lastyears = dict() src2dens = dict() src2month_temp = dict() for i in pkg_with_cvss: src2month_temp[i]=[] for j in range(len(src2month[i])): #src2month_temp[i].append(pkg_with_cvss[i][j][1]+pkg_with_cvss[i][j][2]) src2month_temp[i].append(pkg_with_cvss[i][j][2]) for i in src2month: src2month_loc[i]=src2month_temp[i][:-12] #cut data for 2018 years = 17 # 2001 - 2000 + years year_sum = [0] * years year_num = [0] * years for pkg in src2month_loc: for j in range(years): temp = sum(src2month_loc[pkg][12*(1+j):12*(2+j)]) if (temp>0): year_num[j] += 1 year_sum[j] += temp ## For last 2 years total = sum(src2month_loc[pkg][:]) last_years = sum(src2month_loc[pkg][-24:]) print(pkg + '; ' + str(last_years)) if (total>1): src2sum[pkg] = total src2lastyears[pkg] = last_years #calc total sum_total = 0 one_only=0 one_plus=0 for p in src2month: sum_part = sum(src2month_loc[p][:]) sum_total += sum_part if (sum_part == 1): one_only += 1 elif (sum_part>1): one_plus += 1 print('Total last 2 years = ', sum_total) print('one_only = ', one_only) print('one_plus = ', one_plus) values = sorted(src2sum.values(),reverse=True) #print(values) keys = list(sorted(src2sum, key=src2sum.__getitem__, reverse=True)) density = [] density_keys=[] size = [] size_dens = [] for pkg in keys: try: size.append(src2sloccount[pkg][0]/1000) except (KeyError): size.append(0) j=0 for pkg in keys: try: if (src2sloccount[pkg][0])>0: density.append((values[j]/src2sloccount[pkg][0])*1000) density_keys.append(pkg) src2dens[pkg] = (values[j]/src2sloccount[pkg][0])*1000 size_dens.append(src2sloccount[pkg][0]) except(KeyError): pass j += 1 i = 0 few_keys = [] #print(keys) for k in keys: if (i==0): few_keys.append(k) i+=1 if (i==10): i = 0 print('package number =' + str(len(values)) + '... ' + str(len(keys))) carlosplt.pre_paper_plot(True) #plt.style.use('ggplot') print('Spearman correlation: ',stats.spearmanr(values,size)) with open('sizes.txt', 'w') as thefile: for item in size: thefile.write("%.3f\n" % item) plt.figure(figsize=(10,5)) plt.plot(values, color='darkblue', lw = 2) #plt.plot(size, 'ro', color='darkred', lw = 2, label='Size in KSLoC') plt.xticks(np.arange(0,len(src2sum),10.0),few_keys, rotation="vertical") plt.ylabel('Vulnerabilities') plt.yscale('log') plt.grid() #plt.xscale('log') plt.tight_layout() plt.legend() carlosplt.post_paper_plot(True,True,True) plt.show() print('Yearly vulnerabilites in total' + str(year_sum)) src2sloc = dict() for pkg in src2sloccount: src2sloc[pkg] = src2sloccount[pkg][0] ## Density density = sorted(src2dens.values(),reverse=True) with open('densities.txt', 'w') as thefile: for item in density: thefile.write("%.3f\n" % item) density_keys = list(sorted(src2dens, key=src2dens.__getitem__, reverse=True)) density_few_keys =[] for k in density_keys: if (i==0): density_few_keys.append(k) i+=1 if (i==10): i = 0 plt.figure(figsize=(10,5)) plt.plot(size_dens, density, 'ro', color='darkblue', lw = 2) plt.xticks(np.arange(0,len(density),10.0),density_few_keys, rotation="vertical") plt.ylabel('Vulnerability density') plt.yscale('log') plt.xscale('log') plt.tight_layout() carlosplt.post_paper_plot(True,True,True) plt.show() ## Spearman density size print('Spearman correlation: ',stats.spearmanr(density,size_dens)) ## SLoCs values = sorted(src2sloc.values(),reverse=True) #print(values) keys = list(sorted(src2sloc, key=src2sloc.__getitem__, reverse=True)) i = 0 few_keys = [] for k in keys: if (i==0): few_keys.append(k) i+=1 if (i==10): i = 0 carlosplt.pre_paper_plot(True) plt.figure(figsize=(10,5)) plt.plot(values, color='darkblue', lw = 2) plt.xticks(np.arange(0,len(src2sloc),10.0),few_keys, rotation="vertical") plt.ylabel('SLoC') plt.yscale('log') plt.xscale('log') plt.tight_layout() carlosplt.post_paper_plot(True,True,True) plt.show() ## Number of affected packages n = len(year_sum) yearsx = [] for i in range(1,years+1): yearsx.append('\''+str(i).zfill(2)) x = range(years) width = 1/2 plt.bar(x, year_num, width, color='darkblue', edgecolor='black') plt.xticks(np.arange(0,n),yearsx) plt.ylabel('Number of affected packages') plt.xlabel('Year') carlosplt.post_paper_plot(True,True,True) plt.show() ## Average number of vulnerabilities per package per year average_per_year = [0] * years for j in range(years): average_per_year[j] = year_sum[j]/float(year_num[j]) #print(average_per_year) x_values = list(range(1,years+1)) #print(x_values) slope = np.polyfit(x_values,average_per_year,1) #slope = np.polyval(slope,x_values) print('Slope: ' + str(slope)) n = len(year_sum) x = range(years) width = 1/2 #plt.bar(x, year_sum, width) plt.bar(x, average_per_year, width, color='darkblue', edgecolor='black') plt.xticks(np.arange(0,n),yearsx) plt.ylabel('Average vulnerabilities per package') plt.xlabel('Year') carlosplt.post_paper_plot(True,True,True) plt.show() ## Work on selected packages (php7.0, openjdk8, wireshark, chromium-browser, icedove, linux) src2quarter = dict() quarter_num = years*4 # Here for only up to 2016 - let's change that #return(src2sum) # pkg = 'php5' # quarter_sum = [0] * quarter_num # for j in range(quarter_num): # temp = sum(src2month_loc[pkg][12+3*j:12+3*(j+1)]) # quarter_sum[j] = temp # src2quarter[pkg] = quarter_sum # for pkg in src2quarter: # n = len(src2quarter[pkg]) quartersx = [] for i in range(1,years+1): for j in range(1,5): if j==1: quartersx.append('Q' + str(j)+'\''+str(i).zfill(2)) else: quartersx.append(' ') # x = range(quarter_num) # width = 1/2 ## Plot different colors for php # before = src2quarter[pkg][:-8] + ([0] * 8) # after = ([0] * (len(before)-8)) + src2quarter[pkg][-8:] # print(len(src2quarter[pkg])) # # bar1 = plt.bar(x[:-26], before[24:-2], width, color='darkblue', label='before php7', edgecolor='black') # bar2 = plt.bar(x[:-26], after[24:-2], width, color='darkred', label='after php7', edgecolor='black') # plt.legend(handles=[bar1, bar2]) # # print('PHP Sum before: ' + str(sum(before))) # print('PHP Sum after: ' + str(sum(after))) # plt.xticks(np.arange(0,n-26),quartersx[24:-2], rotation="vertical") # plt.ylabel('Vulnerabilities per quarter of package ' + pkg) # plt.xlabel('Quarter') # carlosplt.post_paper_plot(True,True,True) # plt.show() # ## Plot for openjdk-7 #pkg = 'openjdk-8' #pkg = 'openjdk-7' #quarter_sum = [0] * quarter_num #for j in range(quarter_num): # temp = sum(src2month_loc[pkg][12+3*j:12+3*(j+1)]) # quarter_sum[j] = temp #src2quarter[pkg] = quarter_sum #n = len(src2quarter[pkg]) #x = range(quarter_num) #width = 1/2 # ## Plot different colors for openjdk #before = src2quarter[pkg][:-10] + ([0] * 10) #after = ([0] * (len(before)-10)) + src2quarter[pkg][-10:] #print(len(src2quarter[pkg])) #bar1 = plt.bar(x[:-48], before[48:], width, color='darkblue', label='before openjdk-8', edgecolor='black') #bar2 = plt.bar(x[:-48], after[48:], width, color='darkred', label='after openjdk-8', edgecolor='black') #plt.legend(handles=[bar1, bar2]) #print('OpenJDK Sum before: ' + str(sum(before))) #print('OpenJDK Sum after: ' + str(sum(after))) #plt.bar(x, src2quarter[pkg], width, color='red') #plt.xticks(np.arange(0,n-48),quartersx[48:], rotation="vertical") #plt.ylabel('Vulnerabilities per quarter of package ' + pkg) #plt.xlabel('Quarter') #carlosplt.post_paper_plot(True,True,True) #plt.show() ############################################################################################### n = len(year_sum) x = range(years) width = 1/2 plt.bar(x, year_sum, width, color='darkblue', edgecolor='black') #plt.bar(x, average_per_year, width) plt.xticks(np.arange(0,n),yearsx) plt.ylabel('Total vulnerabilities') plt.xlabel('Year') carlosplt.post_paper_plot(True,True,True) plt.show() sum_all = sum(values) print("Total: ", sum_all) ############################################################################################### # Get LTS and plot try: with open("DLA_sum.txt","rb") as fp: ltslist = pickle.load(fp) except IOError: ltslist = dla.getDLAs() print(ltslist) ## Plot for wheezy quarter_num += 1 quarter_sum = [0] * quarter_num totalLTS = [0] * (14 * 12) + ltslist for pkg in src2month_loc: for j in range(quarter_num): temp = sum(src2month_loc[pkg][12+(3*j):12+3*(j+1)]) quarter_sum[j] += temp LTS_quarter = [] for j in range(quarter_num): temp = sum(totalLTS[12+(3*j):12+3*(j+1)]) LTS_quarter.append(temp) quartersx.append("Q1'18") ## Print all LTS cut = 12*4+1 n = len(quarter_sum) x = range(quarter_num) width = 1/2 plt.bar(x, LTS_quarter, width, color='brown', label='regular support', edgecolor='black') plt.xticks(np.arange(0,n),quartersx, rotation="vertical") plt.ylabel('Vulnerabilities per quarter of Debian LTS') plt.xlabel('Quarter') carlosplt.post_paper_plot(True,True,True) plt.show() ## Filter only wheezy: quarter_sum_regular = [0] * (12*4+1) + quarter_sum[12*4+1:12*4+9] + [0] * 12 quarter_sum_errors = [0] * (12*4 + 9) + quarter_sum[12*4+9:12*4+9+5] + [0] * 7 LTS_quarter = [0] * (15*4+2) + LTS_quarter[15*4+2:] #print(quarter_sum_errors) cut = 12*4+1 n = len(quarter_sum) - cut x = range(quarter_num-cut) width = 1/2 #print(len(LTS_quarter)) bar1 = plt.bar(x, quarter_sum_regular[cut:], width, color='darkblue', label='regular', edgecolor='black') bar12 = plt.bar(x, quarter_sum_errors[cut:], width, color='darkorange', label='regular*', edgecolor='black') bar2 = plt.bar(x, LTS_quarter[cut:], width, color='darkred', label ='long-term', edgecolor='black') plt.legend(handles=[bar1, bar12, bar2]) plt.xticks(np.arange(0,n),quartersx[cut:], rotation="vertical") plt.ylabel('Vulnerabilities per quarter of Debian Wheezy') plt.xlabel('Quarter') carlosplt.post_paper_plot(True,True,True) plt.show() ## power-law fit #print(values) #results=pl.Fit(values, discrete=True, xmin=1) #print(results.power_law.alpha) #print(results.truncated_power_law.alpha) #print(results.power_law.xmin) #print(results.truncated_power_law.xmin) #print(results.truncated_power_law.xmax) #print(results.power_law.discrete) #print(results.lognormal.mu) #results.plot_ccdf(color = 'blue') #myax = plt.gca() ##results.lognormal.plot_pdf(color = 'yellow') ##results.exponential.plot_pdf(color = 'purple') #results.stretched_exponential.plot_pdf(color = 'black') #results.power_law.plot_ccdf(color = 'green', ax=myax) #results.truncated_power_law.plot_ccdf(color = 'red', ax=myax) #results.lognormal.plot_ccdf(color = 'pink', ax=myax) ##results.exponential.plot_ccdf(color = 'pink', ax=myax) ##plt.plot(results.data) #plt.show() #R, p=results.distribution_compare('power_law','stretched_exponential') #print(R,p) #R, p=results.distribution_compare('power_law','lognormal') #print(R,p) #R, p=results.distribution_compare('power_law','exponential') #print(R,p) #R, p=results.distribution_compare('power_law','truncated_power_law') #print(R,p) #R, p=results.distribution_compare('power_law','lognormal_positive') #print(R,p) #R, p=results.distribution_compare('truncated_power_law','lognormal') #print(R,p) ############################################################################################### ## return(src2sum) ############################################################################### ## print help text def aptsec_help(): print('See manual for correct usage\n') ############################################################################### ## Print system status report from component(files) measurements (sha1sums) ## Expected input format is Linux IMA. We assume input was validated. ## ## Note: aptsec_status(), considers *reportedly installed* packages, while this ## one looks at *actually loaded* software that influenced the CPU since bootup. try: action = sys.argv[1] except IndexError: print('No argument given') action='update' #aptsec_help() #sys.exit(0) #action = '' client = MongoClient() dsatable = dict() cve_db = client.cvedb src2dsa = dict() dsa2cve = dict() cvetable = dict() src2month = dict() src2deps = dict() pkg_with_cvss = dict() src2sloccount = dict() src2pop = dict() src2sum = dict() (state, err) = load_state() state['vendor'] = 'debian' #detect_distribution() #d = state['cache_dir'] #if not os.path.exists(d): # os.makedirs(d) if action == 'update': (dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps, pkg_with_cvss) = load_DBs() # loadsha1lists() aptsec_update(state,config, dsatable, client, src2dsa, dsa2cve, src2month, cvetable, pkg_with_cvss) # save_sha1lists() # getslocs(src2month, src2sloccount) # getpop(src2dsa, src2pop) # getdeps(src2dsa, src2deps) save_DBs(dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps, src2sum, pkg_with_cvss) save_state(state) # stats.test(src2month, src2pop, src2sloccount) # lstm.predict(src2month, src2sloccount, src2pop, src2deps) pred.predict(src2month, 0) # print(pkg_with_cvss['linux']) low = [] med = [] high = [] elif action == 'status': (dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps, pkg_with_cvss) = load_DBs() aptsec_status(sys.argv[2]) elif action == 'show': (dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps, pkg_with_cvss) = load_DBs() src2sum = plot_all(src2month, src2sloccount, pkg_with_cvss) save_DBs(dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps, src2sum, pkg_with_cvss) else: aptsec_help() save_state(state)