123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377 |
- #!/usr/bin/python3
- ## Based on the perl code of Trustminer by CASED
- ## Nikos
- import sys
- import os
- from pymongo import MongoClient
- #mongodb assumes database at default path
- import logging
- import configparser
- import json
- import csv
- import urllib.request
- import datetime
- import debian_advisory as da
- import cveparse as cv
- import matplotlib.pyplot as plt
- import numpy as np
- from dateutil import parser
- import plotly.plotly as py
- import plotly.graph_objs as go
- #import lstm_reg as lstm
- import metadata as meta
- import deps
- import psycopg2
- import powerlaw as pl
- import DLAmine as dla
- import pickle
- import paper_plots as carlosplt
- import stat_tests as stats
- from matplotlib2tikz import save as tikz_save
- import prediction as pred
- import scipy.stats as stats
- logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
- ## Increase the recursion limit by much to allow bs to parse large files
- ## This is not good practise
- sys.setrecursionlimit(6000)
- #load config file as library
- config = configparser.ConfigParser()
- config.read('config_test')
- if config.sections == []:
- print('configuration file not found\n')
- sys.exit(1)
- #global variables
- secperday = 60*60*24
- now = datetime.datetime.now()
- verbosity = 1
- ###############################################################################
- ## logging
- # 1 fatal errors
- # 2 errors
- # 3 note
- # 4 trace
- # 5 debug
- def msg(lvl,msg):
- if lvl <= int(config['LOG']['loglevel']):
- print(msg)
- def debug(msg):
- msg(5, msg)
- # Need to see if this is necessary
- ## load state, different from DBs in that we always need it
- def load_state():
- cache = config['DIR']['cache_dir'] + 'state'
- err = 0
- state = dict()
- try:
- with open(cache) as json_data:
- state = json.load(json_data)
- except FileNotFoundError:
- # Load default state - start from the beginning
- state['cache_dir'] = cache
- state['next_adv'] = 0
- state['next_fsa'] = 0
- state['Packages'] = ''
- state['Sources'] = ''
- state['Sha1Sums'] = ''
- err += 1
- return (state, err)
- ###############################################################################
- ## save state, different from DBs in that we always need it
- def save_state(state):
- cache = config['DIR']['cache_dir'] + 'state'
-
- try:
- with open(cache, 'w') as fp:
- json.dump(state, fp)
- except IOError:
- print('write cache state failed!! Fatal error')
- sys.exit(1)
- ###############################################################################
- ## load sha lists :TODO later
- def load_sha1lists():
- cache = config['DIR']['cache_dir'] + 'state'
- ###############################################################################
- ## save sha lists :TODO later
- def save_sha1lists():
- pass
- ###############################################################################
- ## load from files
- def load_DBs():
-
- dsatable = dict()
- src2dsa = dict()
- dsa2cve = dict()
- cvetable = dict()
- src2month = dict()
- src2sloccount = dict()
- src2pop = dict()
- src2deps = dict()
- pkg_with_cvss = dict()
- cache = config['DIR']['cache_dir']
-
- cache_dsatable = cache + 'dsatable'
- try:
- with open(cache_dsatable) as fp:
- dsatable = json.load(fp)
- except (IOError, ValueError):
- print('read cache dsatable failed!! Maybe first run of the system?')
- cache_src2dsa = cache + 'src2dsa'
- try:
- with open(cache_src2dsa) as fp:
- src2dsa = json.load(fp)
- except (IOError, ValueError):
- print('read cache src2dsa failed!! Maybe first run of the system?')
- cache_dsa2cve = cache + 'dsa2cve'
- try:
- with open(cache_dsa2cve) as fp:
- dsa2cve = json.load(fp)
- except (IOError, ValueError):
- print('read cache dsa2cve failed!! Maybe first run of the system?')
- cache_cvetable = cache + 'cvetable'
- try:
- with open(cache_cvetable) as fp:
- cvetable = json.load(fp)
- except (IOError, ValueError):
- print('read cache cvetable failed!! Maybe first run of the system?')
- cache_src2deps = cache + 'src2deps'
- try:
- with open(cache_src2deps) as fp:
- src2deps = json.load(fp)
- except (IOError, ValueError):
- print('read cache src2deps failed!! Maybe first run of the system?')
- cache_src2month = cache + 'src2month'
- try:
- with open(cache_src2month) as fp:
- src2month = json.load(fp)
- except (IOError, ValueError):
- print('read cache src2month failed!! Maybe first run of the system?')
-
- cache_pkg_with_cvss = cache + 'pkg_with_cvss'
- try:
- with open(cache_pkg_with_cvss) as fp:
- pkg_with_cvss = json.load(fp)
- except (IOError, ValueError):
- print('read cache pkg_with_cvss failed!! Maybe first run of the system?')
-
- cache_src2sloccount = cache + 'src2sloccount'
- try:
- with open(cache_src2sloccount) as fp:
- src2sloccount = json.load(fp)
- except (IOError, ValueError):
- print('read cache src2sloccount failed!! Maybe first run of the system?')
-
- cache_src2pop = cache + 'src2pop'
- try:
- with open(cache_src2pop) as fp:
- src2pop = json.load(fp)
- except (IOError, ValueError):
- print('read cache src2pop failed!! Maybe first run of the system?')
-
- return(dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps, pkg_with_cvss)
- ###############################################################################
- ## help for save_DBs
- def myconverter(o):
- if isinstance(o, datetime.datetime) or isinstance(o, datetime.timedelta):
- return str(o)
- if isinstance(o, np.float):
- return o.astype(int)
- ###############################################################################
- ## save to files
- def save_DBs(dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps, src2sum, pkg_with_cvss):
- cache = config['DIR']['cache_dir']
-
- cache_dsatable = cache + 'dsatable'
- try:
- with open(cache_dsatable, 'w') as fp:
- json.dump(dsatable, fp, default = myconverter)
- except IOError:
- print('write cache dsatable failed!! Fatal error')
- sys.exit(1)
- cache_src2dsa = cache + 'src2dsa'
- try:
- with open(cache_src2dsa, 'w') as fp:
- json.dump(src2dsa, fp)
- except IOError:
- print('write cache src2dsa failed!! Fatal error')
- sys.exit(1)
- cache_dsa2cve = cache + 'dsa2cve'
- try:
- with open(cache_dsa2cve, 'w') as fp:
- json.dump(dsa2cve, fp)
- except IOError:
- print('write cache dsa2cve failed!! Fatal error')
- sys.exit(1)
- cache_cvetable = cache + 'cvetable'
- try:
- with open(cache_cvetable, 'w') as fp:
- json.dump(cvetable, fp, default = myconverter)
- except IOError:
- print('write cache cvetable failed!! Fatal error')
- sys.exit(1)
- cache_src2sloccount = cache + 'src2sloccount'
- try:
- with open(cache_src2sloccount, 'w') as fp:
- json.dump(src2sloccount, fp, default = myconverter)
- except IOError:
- print('write cache src2sloccount failed!! Fatal error')
- sys.exit(1)
- cache_src2pop = cache + 'src2pop'
- try:
- with open(cache_src2pop, 'w') as fp:
- json.dump(src2pop, fp, default = myconverter)
- except IOError:
- print('write cache src2pop failed!! Fatal error')
- sys.exit(1)
-
- cache_src2deps = cache + 'src2deps'
- try:
- with open(cache_src2deps, 'w') as fp:
- json.dump(src2deps, fp, default = myconverter)
- except IOError:
- print('write cache src2deps failed!! Fatal error')
- sys.exit(1)
-
- cache_src2sum = cache + 'src2sum'
- try:
- with open(cache_src2sum, 'w') as fp:
- json.dump(src2sum, fp, default = myconverter)
- except IOError:
- print('write cache src2deps failed!! Fatal error')
- sys.exit(1)
- cache_src2month = cache + 'src2month'
- int_list = dict()
-
- for element in src2month:
- for i in range(len(src2month[element])):
- if element in int_list:
- int_list[element].append(int(src2month[element][i]))
- else:
- int_list[element] = []
- int_list[element].append(int(src2month[element][i]))
- try:
- with open(cache_src2month, 'w') as fp:
- json.dump(int_list, fp, default = myconverter)
- except IOError:
- print('write cache src2month failed!! Fatal error')
- sys.exit(1)
- cache_pkg_with_cvss = cache + 'pkg_with_cvss'
- int_list = dict()
-
- for element in pkg_with_cvss:
- for i in range(len(pkg_with_cvss[element])):
- if element in int_list:
- int_list[element].append(pkg_with_cvss[element][i])
- else:
- int_list[element] = []
- int_list[element].append(pkg_with_cvss[element][i])
- try:
- with open(cache_pkg_with_cvss, 'w') as fp:
- json.dump(int_list, fp, default = myconverter)
- except IOError:
- print('write cache pkg_with_cvss failed!! Fatal error')
- sys.exit(1)
- ###############################################################################
- ## Fetch current Packages, Sources and sha1sums files
- ## These are needed to find CVE stats by sha1sums/pkg-names
- ## Only Sha1Sums is custom generated, others are from Debian.
- ## FIXME: Server might do on-the-fly gzip (but should not for bzip2)
- ## Return: 1 on success, to signal that new parsing is needed.
- def fetchMeta(filename):
- urlbase = config['URL']['pkg_base_url']
- mydir = config['DIR']['cache_dir']
- bzFile = filename + '.bz2'
- url = urlbase + bzFile
- logging.info('Checking meta file from ' + url + '\n')
- # Download file
- urllib.request.urlretrieve(url, mydir + bzfile)
- # TODO catch exceptions like file not found
- # TODO check if file has changed, if it is new unpack
- ###############################################################################
- # Sources and Packages are not completely consistent, esp for debian-multimedia
- # He we store manual mappings for these..
- def addOrphanPkgs(pkg2src):
- pkg2src['liblame-dev'] = "lame";
- pkg2src['lame-extras'] = "lame";
- pkg2src['moonlight'] = "moon";
- pkg2src['libmoon0'] = "moon";
- pkg2src['xmms-mp4'] = "xmms2";
- pkg2src['xmms-mp4'] = "xmms2";
- pkg2src['lazarus-src-0.9.30'] = "lazarus";
- pkg2src['lazarus-ide-0.9.30'] = "lazarus";
- pkg2src['lcl-qt4-0.9.30'] = "lazarus";
- pkg2src['lazarus-ide-qt4-0.9.30'] = "lazarus";
- pkg2src['lcl-gtk2-0.9.30'] = "lazarus";
- pkg2src['lazarus-ide-gtk2-0.9.30'] = "lazarus";
- pkg2src['lcl-units-0.9.30'] = "lazarus";
- pkg2src['lazarus-0.9.30'] = "lazarus";
- pkg2src['lazarus-doc-0.9.30'] = "lazarus";
- pkg2src['lcl-0.9.30'] = "lazarus";
- pkg2src['lcl-utils-0.9.30'] = "lazarus";
- pkg2src['lcl-nogui-0.9.30'] = "lazarus";
- pkg2src['libx264-65'] = "x264";
- pkg2src['libx264-114'] = "x264";
- pkg2src['libx264-60'] = "x264";
- # pkg2src['libmlt3']
- # pkg2src['libgmerlin-avdec0']
- # pkg2src['libxul-dev']
- # pkg2src['libmyth-0.23.1-0']
- # pkg2src['libmpeg3hv']
- # pkg2src['libquicktimehv']
- # pkg2src['libxul0d']
- # pkg2src['acroread-fonts-kor']
- ###############################################################################
- ## Parse dpkg Packages file, create map deb-name->pkg-name
- def parsePackages(pkgfile):
- mydir = cache = config['DIR']['cache_dir']
- deb2pkg = dict()
- pkg2virt = dict()
- virt2pkg = ()
- logging.info('Parsing Packages file...\n')
- pkgfile = mydir + pkgfile
- #TODO open and parse pkg file
- ###############################################################################
- ## Parse dpkg Sources file, create map pkg-name->src-name
- def parseSources(srcfile):
- mydir = cache = config['DIR']['cache_dir']
- checklinecont = 0
- pkg2src = dict()
- logging.info('Parsing Sources file...\n')
- srcfile = mydir + srcfile
-
- #TODO open and parse sources file
- ###############################################################################
- def getSHA1(myhash, collection):
- return collection.find({"hash": myhash})
- ###############################################################################
- def addSHA1(myhash, deb, src):
- dic = getSHA1(myhash)
- thash = dic["hash"]
- tdeb = dic["deb"]
- tsrc = dic["src"]
- #TODO insert SHA to database
- ###############################################################################
- ## Parse Sha1Sums file. Format: "sha1sum::deb-name::unix-file-path"
- ## Create 2 maps: sha1sum->file, file->deb-name
- def parseSha1Sums(sha1file):
- pass
- ###############################################################################
- ## Parse local dpkg status, return list of debs
- def parseStatus(stsfile):
- pass
- ###############################################################################
- ## Parse Advisory (only Debian supported atm
- def parseAdvisory(adv):
- if state['vendor'] == 'debian':
- return da.parseDSAhtml(adv)
- else:
- print('Unsupported distribution. We only support Debian at the moment')
- system.exit(1)
- ###############################################################################
- ## Manually fix problems with Advisory entries
- def fixAdvisoryQuirks(arg, state, dsastats):
- if state['vendor'] == 'debian':
- return da.fixDSAquirks(arg, dsastats)
- else:
- print('Unsupported distribution. We only support Debian at the moment')
- system.exit(1)
- ###############################################################################
- ## Extract CVE ids from new advisories and print URL for mirror script
- def printCVEs(myid,adv, state):
- logging.info('Looking for CVEs in advisory...\n')
- dsastats = parseAdvisory(adv)
- if dsastats == []:
- return
- ## fix DSAs that don't contain correct CVE refs
- dsastats = fixAdvisoryQuirks(myid, state, dsastats);
- #TODO Fix this part
- ##for cve_id in dsastats
- ###############################################################################
- ## Update internal vuln. DB with new Advisory info
- ## Creates CVEtable for MTBF computation:
- ## ( cve-id => (date, delay, score1, score2, score3))
- def updateCVETables(myid, dsatable, state, src2dsa, dsa2cve, cvetable, client):
- logging.info('Updating vulnerability database with advisory ' + state['vendor'] + str(myid) + ' \n')
-
- adv = dsatable[myid]
- dsastats = parseAdvisory(adv)
- if dsastats == []:
- return
- dsastats = fixAdvisoryQuirks(myid, state, dsastats)
- print('Did you fix it?')
- for srcpkg in dsastats[0]:
- if srcpkg in src2dsa:
- src2dsa[srcpkg].append(myid)
- else:
- src2dsa[srcpkg] = []
- src2dsa[srcpkg].append(myid)
- dsa2cve[str(myid)] = dsastats[2]
- for cve_id in dsastats[2]:
- # No fetch CVE We use mongodb and cve-search
- cve = cv.fetchCVE(cve_id, client)
- cvestats = cv.parseCVE(cve_id, cve)
- # print(cvestats)
- # print(dsastats)
- finaldate = cvestats[0]
- if cvestats[0] > dsastats[1] or cvestats[0] == 0:
- finaldate = dsastats[1]
- cvedata = (finaldate, dsastats[1]-finaldate, cvestats[1], cvestats[2], cvestats[3])
- ## print(cvedata)
- cvetable[cve_id] = cvedata
- return cvetable
- ###############################################################################
- ## Check for updates on Package information
- def aptsec_update(state, config, dsatable, client, src2dsa, dsa2cve, src2month, cvetable, pkg_with_cvss):
- args = sys.argv
- # if not('--offline' in args):
- # fetchMeta('Packages')
- # fetchMeta('Sources')
- # fetchMeta('Sha1Sums')
- now = datetime.datetime.now()
-
- if not('--cves' in args):
- parsePackages('Packages')
- parseSources('Sources')
-
- # if not('--nosha1' in args):
- # parseSha1sums('Sha1Sums')
- if state['vendor'] == 'debian':
- newAdv = da.checkDSAs(state, config)
- else:
- print('Unsupported distribution. We only support Debian at the moment')
- system.exit(1)
- for myid in newAdv:
- if myid in dsatable:
- logging.info(state['vendor'] + ' advisory ' + myid + ' already known.\n')
- elif '--cves' in args:
- ## scan for CVE urls only?
- printCVEs(myid, newAdv[myid])
- else:
- ## store advisory and parse it
- dsatable[myid] = newAdv[myid]
- updateCVETables(myid, dsatable, state, src2dsa, dsa2cve, cvetable, client)
-
- # recompute all pkg statistics
- for srcpkg in src2dsa:
- processCVEs(srcpkg, now, src2dsa, dsa2cve, src2month, cvetable, pkg_with_cvss, config)
-
- return 0
- ###############################################################################
- ## find list of src pkgs from bin pkgs based on pkg2src
- def resolvePkg2Src(pkglist, pkg2src):
- srclist = []
- for pkg in pkglist:
- if pkg in pkg2src:
- srcpkg = pkg2src[pkg]
- srclist.append(srcpkg)
- else:
- logging.info('Could not find source package for: ' + pkg + ' .\n')
- return srclist
- ###############################################################################
- ## compute and store MTBF, MTBR and Scores of each src pkg
- ## output: %src2mtbf:
- ## (srcpkg=> ())
- def processCVEs(pkg, now, src2dsa, dsa2cve, src2month, cvetable, pkg_with_cvss, config):
- stats = [now, 0, 0, 0, 0, 0, 0]
- #mylambda = config['TRUST']['lambda']
- mylambda = 0
- cvestats = dict()
- logging.info('Processing package: ' + pkg + '.\n')
- ## keep track of the number of low-medium-high severity vulnerabilities
- ## TODO see how cvss affects vulnerability prediction - if some packages show patterns
- temp_cvss = 10.0
- with_cvss = dict()
- ## To eliminate duplicate cves
- haveseen = dict()
- ## cvestats = (date: number)
- for dsa_id in src2dsa[pkg]:
- for cve_id in dsa2cve[str(dsa_id)]:
- if cve_id in haveseen:
- continue
- else:
- haveseen[cve_id] = 1
- tt = cvetable[cve_id][0]
- if tt in cvestats:
- cvestats[tt] += 1
- else:
- cvestats[tt] = 1
- stats[1] += 1
- ## Date at the moment taken from CVE? - not sure.
- ## with_cvss = (date: number low, number med, number high)
- haveseen = dict()
- for dsa_id in src2dsa[pkg]:
- for cve_id in dsa2cve[str(dsa_id)]:
- if cve_id in haveseen:
- continue
- else:
- haveseen[cve_id] = 1
- tt = cvetable[cve_id][0]
- try: temp_cvss = float(cvetable[cve_id][2])
- except TypeError:
- print(cve_id)
- continue
- if pkg=='linux':
- print(tt, temp_cvss)
-
- if tt in with_cvss:
- if (temp_cvss<4.0):
- with_cvss[tt][0] += 1
- elif (temp_cvss<7.0):
- with_cvss[tt][1] += 1
- else:
- with_cvss[tt][2] += 1
- else:
- with_cvss[tt] = [0, 0, 0]
- if (temp_cvss<4.0):
- with_cvss[tt][0] += 1
- elif (temp_cvss<7.0):
- with_cvss[tt][1] += 1
- else:
- with_cvss[tt][2] += 1
- if pkg=='linux':
- print(with_cvss)
- # Ignore pkgs with less than one incident, should not happen..
- if stats[1] < 1:
- return
- prev_date = 0
- weight = 0
- dates = sorted(cvestats, key = cvestats.get)
- try:
- stats[0] = dates[0]
- except IndexError:
- print(pkg + str(dates))
- stats[0] = 0
- count = sum(cvestats.values())
- print(pkg + ' ' + str(count))
- #pkg_with_cvss[pkg] = with_cvss
- format_data(pkg, with_cvss, pkg_with_cvss, True)
- format_data(pkg, cvestats, src2month, False)
- ###############################################################################
- ## format vulnerability data into monthly intervals, suitable for tensorflow
- def format_data(pkg, cvestats, src2month, cvss):
-
- x = []
- y = []
- monthyear = []
- year = []
- temp_items=list(cvestats.items())
- items = []
- for data_dict in temp_items:
- if isinstance(data_dict[0], str):
- tmpx = (parser.parse(data_dict[0]))
- else:
- tmpx = data_dict[0]
- x.append(tmpx)
- try:
- tmpy = int(data_dict[1])
- except TypeError:
- tmpy = data_dict[1]
- y.append(tmpy)
- items.append((tmpx, tmpy))
-
- items.sort(key=lambda tup: tup[0])
- for i in range(2000, 2019):
- temp = []
- for j in range(12):
- if cvss:
- temp.append([0, 0, 0])
- else:
- temp.append(0)
- monthyear.append(temp)
- for i in range(len(x)):
- if cvss:
- tmp0 = y[i][0]
- tmp1 = y[i][1]
- tmp2 = y[i][2]
- monthyear[x[i].year-2000][x[i].month-1][0] += tmp0
- monthyear[x[i].year-2000][x[i].month-1][1] += tmp1
- monthyear[x[i].year-2000][x[i].month-1][2] += tmp2
- else:
- monthyear[x[i].year-2000][x[i].month-1] += y[i]
- months_list = [item for sublist in monthyear for item in sublist]
- if not cvss:
- temp_months = np.zeros(len(months_list))
- i = 0
- for element in months_list:
- temp_months[i] = np.float32(element)
- i += 1
- src2month[pkg] = temp_months
- else:
- src2month[pkg] = months_list
- return
-
- ###############################################################################
- ## plot vulnerability time distribution for a single package
- def pkg_plot(pkg, cvestats):
-
- colors = list("rgbcmyk")
- items = list(cvestats.items())
- #print(items)
- items.sort(key=lambda tup: tup[0])
- x = []
- y = []
- for data_dict in items:
- x.append(parser.parse(data_dict[0]))
- y.append(data_dict[1])
-
- monthyear = []
- year = []
- # initialize list
- for i in range(2000,2017):
- temp = []
- for j in range(12):
- temp.append(0)
- monthyear.append(temp)
- for i in range(len(x)):
- # print(str(x[i].year) + str(x[i].month))
- monthyear[x[i].year-2000][x[i].month-1] += y[i]
- newx = []
- yearsx = []
- year = []
- monthlabel = []
- month = []
- m1 = 0
- m2 = 0
- k = 0
- label_months = []
- months_list = [item for sublist in monthyear for item in sublist]
- for i in range(len(months_list)):
- label_months.append(i)
- plt.bar(label_months, months_list)
- for i in range(len(monthyear)):
- year.append(0)
- cc = 0
- for j in range(len(monthyear[i])):
- cc += monthyear[i][j]
- if j == 5:
- m1 = cc
- month.append(m1)
- if j == 11:
- month.append(cc - m1)
- k += 1
- year[i] = cc
- for i in range(len(year)):
- yearsx.append(i + 2000)
- k = 2000
- datapoints = []
- for i in range(len(month)):
- datapoints.append(i+1)
- if i%2 == 0:
- monthlabel.append(str(k) + '-1')
- else:
- monthlabel.append('-2')
- k += 1
-
- # plt.xticks(datapoints, monthlabel)
- # print(year)
- # plt.plot.hist(yearsx,year)
- # plt.bar(yearsx, year, 1, color='blue')
- # plt.bar(datapoints, month, 1, color='blue')
- # ts.predict(month)
- plt.legend([pkg], loc='upper left')
- plt.show()
- return 0
- ###############################################################################
- ## populate src2sloccount dictionary with number of source lines of code in
- ## format (total, [ansic, cpp, asm, java, python, perl, sh])
- def getslocs(src2month, src2sloccount):
- with open('./sloc_report.txt') as f:
- content = f.readlines()
- for i in content:
- (total, ansic, cpp, asm, java, python, perl, sh) = (0, 0, 0, 0, 0, 0, 0, 0)
- words=i.split()
- total = int(words[0])
- pkg = words[1]
- for w in words[2:]:
- ww = w.split('=')
- if ww[0] == 'ansic':
- ansic = int(ww[1])
- if ww[0] == 'cpp':
- cpp = int(ww[1])
- if ww[0] == 'asm':
- asm = int(ww[1])
- if ww[0] == 'java':
- java = int(ww[1])
- if ww[0] == 'python':
- python = int(ww[1])
- if ww[0] == 'perl':
- perl = int(ww[1])
- if ww[0] == 'sh':
- sh = int(ww[1])
- src2sloccount[pkg] = (total, [ansic, cpp, asm, java, python, perl, sh])
- ###############################################################################
- ## get popularity contest data in format src_pkg -> (installed, vote, old, recent)
- def getpop(src2dsa, src2pop):
- with open('by_vote.csv', newline = '') as csvfile:
- reader = csv.reader(csvfile, delimiter = ',', quotechar = '|')
- for row in reader:
- try:
- if row[1] in src2dsa:
- src2pop[row[1]] = row[3]
- except IndexError:
- print(row)
- continue
- return
- ###############################################################################
- ## get dependencies of a given source
- def getdeps(src2dsa, src2deps):
- for srcpkg in src2dsa:
- deps.getdeps(srcpkg, src2deps)
- ###############################################################################
- ## print some meta-info on internal data
- def aptsec_about(dsatable, cvetable, pkg2src, src2dsa):
- num_dsa = len(dsatable)
- num_cve = len(cvetable)
- num_pkg = len(pkg2src)
- num_src = len(src2dsa)
- print('\nThe current database records %d binary packages and %d DSAs.\n', num_pkg, num_src)
- print('%d CVEs are associated with %d source packages.\n', num_cve, num_src)
- return
- ###############################################################################
- ## use scores to suggest alternative packages
- def aptsec_alternatives(pkg):
- pass
- ###############################################################################
- ## print overview for pkg high scores
- def aptsec_hitlist():
- pass
- ###############################################################################
- ## evaluation helper
- ## compute stats until date given in $2, then compute stats
- ## for the next year to check accuracy of the prediction.
- ## @cvestats = (date base-score impact-score exploit-score)
- def simulate_stats(pkg, year):
- pass
- ###############################################################################
- ##TODO Printing functions
- def plot_all(src2month, src2sloccount, pkg_with_cvss):
- ## Sum of vulnerabilities by package
- src2sum = dict()
- src2year = dict()
- src2month_loc=dict()
- src2lastyears = dict()
- src2dens = dict()
- src2month_temp = dict()
- for i in pkg_with_cvss:
- src2month_temp[i]=[]
- for j in range(len(src2month[i])):
- #src2month_temp[i].append(pkg_with_cvss[i][j][1]+pkg_with_cvss[i][j][2])
- src2month_temp[i].append(pkg_with_cvss[i][j][2])
- for i in src2month:
- src2month_loc[i]=src2month_temp[i][:-12] #cut data for 2018
- years = 17 # 2001 - 2000 + years
- year_sum = [0] * years
- year_num = [0] * years
- for pkg in src2month_loc:
- for j in range(years):
- temp = sum(src2month_loc[pkg][12*(1+j):12*(2+j)])
- if (temp>0):
- year_num[j] += 1
- year_sum[j] += temp
- ## For last 2 years
- total = sum(src2month_loc[pkg][:])
- last_years = sum(src2month_loc[pkg][-24:])
- print(pkg + '; ' + str(last_years))
- if (total>1):
- src2sum[pkg] = total
- src2lastyears[pkg] = last_years
-
- #calc total
- sum_total = 0
- one_only=0
- one_plus=0
- for p in src2month:
- sum_part = sum(src2month_loc[p][:])
- sum_total += sum_part
- if (sum_part == 1):
- one_only += 1
- elif (sum_part>1):
- one_plus += 1
- print('Total last 2 years = ', sum_total)
- print('one_only = ', one_only)
- print('one_plus = ', one_plus)
- values = sorted(src2sum.values(),reverse=True)
- #print(values)
- keys = list(sorted(src2sum, key=src2sum.__getitem__, reverse=True))
-
- density = []
- density_keys=[]
- size = []
- size_dens = []
- for pkg in keys:
- try:
- size.append(src2sloccount[pkg][0]/1000)
- except (KeyError):
- size.append(0)
- j=0
- for pkg in keys:
- try:
- if (src2sloccount[pkg][0])>0:
- density.append((values[j]/src2sloccount[pkg][0])*1000)
- density_keys.append(pkg)
- src2dens[pkg] = (values[j]/src2sloccount[pkg][0])*1000
- size_dens.append(src2sloccount[pkg][0])
- except(KeyError):
- pass
- j += 1
-
- i = 0
- few_keys = []
- #print(keys)
- for k in keys:
- if (i==0):
- few_keys.append(k)
- i+=1
- if (i==10):
- i = 0
- print('package number =' + str(len(values)) + '... ' + str(len(keys)))
- carlosplt.pre_paper_plot(True)
- #plt.style.use('ggplot')
- print('Spearman correlation: ',stats.spearmanr(values,size))
- with open('sizes.txt', 'w') as thefile:
- for item in size:
- thefile.write("%.3f\n" % item)
- plt.figure(figsize=(10,5))
- plt.plot(values, color='darkblue', lw = 2)
- #plt.plot(size, 'ro', color='darkred', lw = 2, label='Size in KSLoC')
- plt.xticks(np.arange(0,len(src2sum),10.0),few_keys, rotation="vertical")
- plt.ylabel('Vulnerabilities')
- plt.yscale('log')
- plt.grid()
- #plt.xscale('log')
- plt.tight_layout()
- plt.legend()
- carlosplt.post_paper_plot(True,True,True)
- plt.show()
- print('Yearly vulnerabilites in total' + str(year_sum))
- src2sloc = dict()
- for pkg in src2sloccount:
- src2sloc[pkg] = src2sloccount[pkg][0]
- ## Density
- density = sorted(src2dens.values(),reverse=True)
- with open('densities.txt', 'w') as thefile:
- for item in density:
- thefile.write("%.3f\n" % item)
- density_keys = list(sorted(src2dens, key=src2dens.__getitem__, reverse=True))
- density_few_keys =[]
- for k in density_keys:
- if (i==0):
- density_few_keys.append(k)
- i+=1
- if (i==10):
- i = 0
- plt.figure(figsize=(10,5))
- plt.plot(size_dens, density, 'ro', color='darkblue', lw = 2)
- plt.xticks(np.arange(0,len(density),10.0),density_few_keys, rotation="vertical")
- plt.ylabel('Vulnerability density')
- plt.yscale('log')
- plt.xscale('log')
- plt.tight_layout()
- carlosplt.post_paper_plot(True,True,True)
- plt.show()
- ## Spearman density size
- print('Spearman correlation: ',stats.spearmanr(density,size_dens))
- ## SLoCs
- values = sorted(src2sloc.values(),reverse=True)
- #print(values)
- keys = list(sorted(src2sloc, key=src2sloc.__getitem__, reverse=True))
-
- i = 0
- few_keys = []
- for k in keys:
- if (i==0):
- few_keys.append(k)
- i+=1
- if (i==10):
- i = 0
- carlosplt.pre_paper_plot(True)
- plt.figure(figsize=(10,5))
- plt.plot(values, color='darkblue', lw = 2)
- plt.xticks(np.arange(0,len(src2sloc),10.0),few_keys, rotation="vertical")
- plt.ylabel('SLoC')
- plt.yscale('log')
- plt.xscale('log')
- plt.tight_layout()
- carlosplt.post_paper_plot(True,True,True)
- plt.show()
- ## Number of affected packages
- n = len(year_sum)
- yearsx = []
- for i in range(1,years+1):
- yearsx.append('\''+str(i).zfill(2))
- x = range(years)
- width = 1/2
- plt.bar(x, year_num, width, color='darkblue', edgecolor='black')
- plt.xticks(np.arange(0,n),yearsx)
- plt.ylabel('Number of affected packages')
- plt.xlabel('Year')
- carlosplt.post_paper_plot(True,True,True)
- plt.show()
-
-
- ## Average number of vulnerabilities per package per year
- average_per_year = [0] * years
- for j in range(years):
- average_per_year[j] = year_sum[j]/float(year_num[j])
-
- #print(average_per_year)
- x_values = list(range(1,years+1))
- #print(x_values)
- slope = np.polyfit(x_values,average_per_year,1)
- #slope = np.polyval(slope,x_values)
- print('Slope: ' + str(slope))
- n = len(year_sum)
- x = range(years)
- width = 1/2
- #plt.bar(x, year_sum, width)
- plt.bar(x, average_per_year, width, color='darkblue', edgecolor='black')
- plt.xticks(np.arange(0,n),yearsx)
- plt.ylabel('Average vulnerabilities per package')
- plt.xlabel('Year')
- carlosplt.post_paper_plot(True,True,True)
- plt.show()
- ## Work on selected packages (php7.0, openjdk8, wireshark, chromium-browser, icedove, linux)
- src2quarter = dict()
-
- quarter_num = years*4
-
- # Here for only up to 2016 - let's change that
- #return(src2sum)
- # pkg = 'php5'
- # quarter_sum = [0] * quarter_num
- # for j in range(quarter_num):
- # temp = sum(src2month_loc[pkg][12+3*j:12+3*(j+1)])
- # quarter_sum[j] = temp
- # src2quarter[pkg] = quarter_sum
- # for pkg in src2quarter:
- # n = len(src2quarter[pkg])
- quartersx = []
- for i in range(1,years+1):
- for j in range(1,5):
- if j==1:
- quartersx.append('Q' + str(j)+'\''+str(i).zfill(2))
- else:
- quartersx.append(' ')
- # x = range(quarter_num)
- # width = 1/2
- ## Plot different colors for php
- # before = src2quarter[pkg][:-8] + ([0] * 8)
- # after = ([0] * (len(before)-8)) + src2quarter[pkg][-8:]
- # print(len(src2quarter[pkg]))
- #
- # bar1 = plt.bar(x[:-26], before[24:-2], width, color='darkblue', label='before php7', edgecolor='black')
- # bar2 = plt.bar(x[:-26], after[24:-2], width, color='darkred', label='after php7', edgecolor='black')
- # plt.legend(handles=[bar1, bar2])
- #
- # print('PHP Sum before: ' + str(sum(before)))
- # print('PHP Sum after: ' + str(sum(after)))
-
- # plt.xticks(np.arange(0,n-26),quartersx[24:-2], rotation="vertical")
- # plt.ylabel('Vulnerabilities per quarter of package ' + pkg)
- # plt.xlabel('Quarter')
- # carlosplt.post_paper_plot(True,True,True)
- # plt.show()
- # ## Plot for openjdk-7
- #pkg = 'openjdk-8'
- #pkg = 'openjdk-7'
- #quarter_sum = [0] * quarter_num
- #for j in range(quarter_num):
- # temp = sum(src2month_loc[pkg][12+3*j:12+3*(j+1)])
- # quarter_sum[j] = temp
- #src2quarter[pkg] = quarter_sum
- #n = len(src2quarter[pkg])
- #x = range(quarter_num)
- #width = 1/2
- # ## Plot different colors for openjdk
- #before = src2quarter[pkg][:-10] + ([0] * 10)
- #after = ([0] * (len(before)-10)) + src2quarter[pkg][-10:]
- #print(len(src2quarter[pkg]))
- #bar1 = plt.bar(x[:-48], before[48:], width, color='darkblue', label='before openjdk-8', edgecolor='black')
- #bar2 = plt.bar(x[:-48], after[48:], width, color='darkred', label='after openjdk-8', edgecolor='black')
- #plt.legend(handles=[bar1, bar2])
-
- #print('OpenJDK Sum before: ' + str(sum(before)))
- #print('OpenJDK Sum after: ' + str(sum(after)))
-
- #plt.bar(x, src2quarter[pkg], width, color='red')
- #plt.xticks(np.arange(0,n-48),quartersx[48:], rotation="vertical")
- #plt.ylabel('Vulnerabilities per quarter of package ' + pkg)
- #plt.xlabel('Quarter')
- #carlosplt.post_paper_plot(True,True,True)
- #plt.show()
- ###############################################################################################
- n = len(year_sum)
- x = range(years)
- width = 1/2
- plt.bar(x, year_sum, width, color='darkblue', edgecolor='black')
- #plt.bar(x, average_per_year, width)
- plt.xticks(np.arange(0,n),yearsx)
- plt.ylabel('Total vulnerabilities')
- plt.xlabel('Year')
- carlosplt.post_paper_plot(True,True,True)
- plt.show()
- sum_all = sum(values)
- print("Total: ", sum_all)
- ###############################################################################################
-
- # Get LTS and plot
- try:
- with open("DLA_sum.txt","rb") as fp:
- ltslist = pickle.load(fp)
- except IOError:
- ltslist = dla.getDLAs()
- print(ltslist)
- ## Plot for wheezy
- quarter_num += 1
- quarter_sum = [0] * quarter_num
- totalLTS = [0] * (14 * 12) + ltslist
-
- for pkg in src2month_loc:
- for j in range(quarter_num):
- temp = sum(src2month_loc[pkg][12+(3*j):12+3*(j+1)])
- quarter_sum[j] += temp
- LTS_quarter = []
-
- for j in range(quarter_num):
- temp = sum(totalLTS[12+(3*j):12+3*(j+1)])
- LTS_quarter.append(temp)
- quartersx.append("Q1'18")
- ## Print all LTS
- cut = 12*4+1
- n = len(quarter_sum)
- x = range(quarter_num)
- width = 1/2
- plt.bar(x, LTS_quarter, width, color='brown', label='regular support', edgecolor='black')
-
- plt.xticks(np.arange(0,n),quartersx, rotation="vertical")
- plt.ylabel('Vulnerabilities per quarter of Debian LTS')
- plt.xlabel('Quarter')
- carlosplt.post_paper_plot(True,True,True)
- plt.show()
-
-
- ## Filter only wheezy:
- quarter_sum_regular = [0] * (12*4+1) + quarter_sum[12*4+1:12*4+9] + [0] * 12
- quarter_sum_errors = [0] * (12*4 + 9) + quarter_sum[12*4+9:12*4+9+5] + [0] * 7
- LTS_quarter = [0] * (15*4+2) + LTS_quarter[15*4+2:]
- #print(quarter_sum_errors)
- cut = 12*4+1
- n = len(quarter_sum) - cut
- x = range(quarter_num-cut)
- width = 1/2
-
- #print(len(LTS_quarter))
- bar1 = plt.bar(x, quarter_sum_regular[cut:], width, color='darkblue', label='regular', edgecolor='black')
- bar12 = plt.bar(x, quarter_sum_errors[cut:], width, color='darkorange', label='regular*', edgecolor='black')
- bar2 = plt.bar(x, LTS_quarter[cut:], width, color='darkred', label ='long-term', edgecolor='black')
- plt.legend(handles=[bar1, bar12, bar2])
-
- plt.xticks(np.arange(0,n),quartersx[cut:], rotation="vertical")
- plt.ylabel('Vulnerabilities per quarter of Debian Wheezy')
- plt.xlabel('Quarter')
- carlosplt.post_paper_plot(True,True,True)
- plt.show()
- ## power-law fit
- #print(values)
- #results=pl.Fit(values, discrete=True, xmin=1)
- #print(results.power_law.alpha)
- #print(results.truncated_power_law.alpha)
- #print(results.power_law.xmin)
- #print(results.truncated_power_law.xmin)
- #print(results.truncated_power_law.xmax)
- #print(results.power_law.discrete)
- #print(results.lognormal.mu)
- #results.plot_ccdf(color = 'blue')
- #myax = plt.gca()
- ##results.lognormal.plot_pdf(color = 'yellow')
- ##results.exponential.plot_pdf(color = 'purple')
- #results.stretched_exponential.plot_pdf(color = 'black')
- #results.power_law.plot_ccdf(color = 'green', ax=myax)
- #results.truncated_power_law.plot_ccdf(color = 'red', ax=myax)
- #results.lognormal.plot_ccdf(color = 'pink', ax=myax)
- ##results.exponential.plot_ccdf(color = 'pink', ax=myax)
- ##plt.plot(results.data)
- #plt.show()
- #R, p=results.distribution_compare('power_law','stretched_exponential')
- #print(R,p)
- #R, p=results.distribution_compare('power_law','lognormal')
- #print(R,p)
- #R, p=results.distribution_compare('power_law','exponential')
- #print(R,p)
- #R, p=results.distribution_compare('power_law','truncated_power_law')
- #print(R,p)
- #R, p=results.distribution_compare('power_law','lognormal_positive')
- #print(R,p)
- #R, p=results.distribution_compare('truncated_power_law','lognormal')
- #print(R,p)
- ###############################################################################################
- ##
- return(src2sum)
- ###############################################################################
- ## print help text
- def aptsec_help():
- print('See manual for correct usage\n')
- ###############################################################################
- ## Print system status report from component(files) measurements (sha1sums)
- ## Expected input format is Linux IMA. We assume input was validated.
- ##
- ## Note: aptsec_status(), considers *reportedly installed* packages, while this
- ## one looks at *actually loaded* software that influenced the CPU since bootup.
- try:
- action = sys.argv[1]
- except IndexError:
- print('No argument given')
- action='update'
- #aptsec_help()
- #sys.exit(0)
- #action = ''
- client = MongoClient()
- dsatable = dict()
- cve_db = client.cvedb
- src2dsa = dict()
- dsa2cve = dict()
- cvetable = dict()
- src2month = dict()
- src2deps = dict()
- pkg_with_cvss = dict()
- src2sloccount = dict()
- src2pop = dict()
- src2sum = dict()
- (state, err) = load_state()
- state['vendor'] = 'debian'
- #detect_distribution()
- #d = state['cache_dir']
- #if not os.path.exists(d):
- # os.makedirs(d)
- if action == 'update':
- (dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps, pkg_with_cvss) = load_DBs()
- # loadsha1lists()
- aptsec_update(state,config, dsatable, client, src2dsa, dsa2cve, src2month, cvetable, pkg_with_cvss)
- # save_sha1lists()
- # getslocs(src2month, src2sloccount)
- # getpop(src2dsa, src2pop)
- # getdeps(src2dsa, src2deps)
- save_DBs(dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps, src2sum, pkg_with_cvss)
- save_state(state)
- # stats.test(src2month, src2pop, src2sloccount)
- # lstm.predict(src2month, src2sloccount, src2pop, src2deps)
- pred.predict(src2month, 0)
- # print(pkg_with_cvss['linux'])
-
- low = []
- med = []
- high = []
-
- elif action == 'status':
- (dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps, pkg_with_cvss) = load_DBs()
- aptsec_status(sys.argv[2])
- elif action == 'show':
- (dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps, pkg_with_cvss) = load_DBs()
- src2sum = plot_all(src2month, src2sloccount, pkg_with_cvss)
- save_DBs(dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps, src2sum, pkg_with_cvss)
- else:
- aptsec_help()
- save_state(state)
|