import configparser import json import datetime import logging from pymongo import MongoClient import numpy as np import os from dateutil import parser from .DebianAdvisory import DebianAdvisory from .CVEParse import CVEParse class DebianModel: """ This class represents M-Star debian module. It is responsible for handling debian package infos. """ module_path = os.path.dirname(__file__) def __init__(self, action, configfile=os.path.join(module_path, 'config_default.txt')): ## DBs to track self.dsatable = dict() self.src2dsa = dict() self.dsa2cve = dict() self.cvetable = dict() self.src2month = dict() self.src2sloccount = dict() self.src2pop = dict() self.src2deps = dict() self.pkg_with_cvss = dict() self.src2sum = dict() ## config self.configfile = configfile self.config = configparser.ConfigParser() if not self.config.read(configfile): raise IOError('Cannot open configuration file: ') (self.state, self.err) = self.load_state() client = MongoClient() if action == 'update': (self.dsatable, self.src2dsa, self.dsa2cve, self.cvetable, self.src2month, self.src2sloccount, self.src2pop, self.src2deps) = self.load_dbs() self.update_dbs(self.dsatable, client, self.src2dsa, self.dsa2cve, self.src2month, self.cvetable, self.pkg_with_cvss) self.save_DBs(self.dsatable, self.src2dsa, self.dsa2cve, self.cvetable, self.src2month, self.src2sloccount, self.src2pop, self.src2deps, self.src2sum) self.save_state(self.state) # lstm.predict(src2month, src2sloccount, src2pop, src2deps) elif action == 'status': (dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps) = self.load_dbs() # aptsec_status(sys.argv[2]) elif action == 'show': (dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps) = self.load_dbs() # src2sum = plot_all(src2month) # save_DBs(dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps, src2sum) else: self.print_help(self) def load_dbs(self): """ Loads the required databases into the model. Can either be implemented as read from file, or read from DB. Currently reading it from files in the cache folder. """ cache_dir = os.path.join( self.module_path, self.config['DIR']['cache_dir']) tables = ['dsatable', 'src2dsa', 'dsa2cve', 'cvetable', 'src2deps', 'src2month', 'src2sloccount', 'src2pop'] result = [] for i in range(0, len(tables)): try: with open(os.path.join(cache_dir, tables[i])) as t: result.append(json.load(t)) except (IOError, ValueError): print('Read cache ' + tables[i] + ' failed!! Maybe first run of the system?') result.append(dict()) return tuple(result) def save_DBs(self, dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps, src2sum): cache_dir = os.path.join(self.module_path, self.config['DIR']['cache_dir']) tables = ['dsatable', 'src2dsa', 'dsa2cve', 'cvetable', 'src2deps', 'src2sloccount', 'src2pop'] for i in range(0, len(tables)): try: with open(os.path.join(cache_dir, tables[i]), 'w') as t: json.dump(dsatable, t, default=self.converter) except IOError: print('write cache dsatable failed!! Fatal error') cache_src2month = os.path.join(cache_dir, 'src2month') int_list = dict() for element in src2month: for i in range(len(src2month[element])): if element in int_list: int_list[element].append(int(src2month[element][i])) else: int_list[element] = [] int_list[element].append(int(src2month[element][i])) try: with open(cache_src2month, 'w') as fp: json.dump(int_list, fp, default=self.converter) except IOError: print('write cache src2month failed!! Fatal error') def save_state(self, state): """Save state, different from DBs in that we always need it""" state_file = os.path.join(self.module_path, self.config['DIR']['cache_dir'], 'state') try: with open(state_file, 'w') as sf: json.dump(state, sf) except IOError: print('Write cache state failed!! Fatal error') def converter(self, o): """Help for save_DBs""" if isinstance(o, datetime.datetime) or isinstance(o, datetime.timedelta): return str(o) if isinstance(o, np.float): return o.astype(int) def update_dbs(self, dsatable, client, src2dsa, dsa2cve, src2month, cvetable, pkg_with_cvss): now = datetime.datetime.now() new_adv = DebianAdvisory.checkDSAs(self.state, self.config) for id in new_adv: if id in dsatable: logging.info(self.state['vendor'] + ' advisory ' + id + ' already known.\n') else: ## store advisory and parse it dsatable[id] = new_adv[id] self.updateCVETables(id, dsatable, self.state, src2dsa, dsa2cve, cvetable, client) # recompute all pkg statistics for srcpkg in src2dsa: self.processCVEs(srcpkg, now, src2dsa, dsa2cve, src2month, cvetable, pkg_with_cvss, self.config) return 0 def updateCVETables(self, myid, dsatable, state, src2dsa, dsa2cve, cvetable, client): logging.info('Updating vulnerability database with advisory ' + state['vendor'] + str(myid) + ' \n') adv = dsatable[myid] dsastats = DebianAdvisory.parseDSAhtml(adv) dsastats = DebianAdvisory.fixDSAquirks(myid, dsastats) for srcpkg in dsastats[0]: if srcpkg in src2dsa: src2dsa[srcpkg].append(myid) else: src2dsa[srcpkg] = [] src2dsa[srcpkg].append(myid) dsa2cve[str(myid)] = dsastats[2] for cve_id in dsastats[2]: # No fetch CVE We use mongodb and cve-search cve = CVEParse.fetchCVE(cve_id, client) cvestats = CVEParse.parseCVE(cve_id, cve) finaldate = cvestats[0] if cvestats[0] > dsastats[1] or cvestats[0] == 0: finaldate = dsastats[1] cvedata = (finaldate, dsastats[1] - finaldate, cvestats[1], cvestats[2], cvestats[3]) cvetable[cve_id] = cvedata return cvetable @staticmethod def print_help(): """ Prints help message to this vendor model. """ print("Debian mstar model supports only update status and show actions.") def load_state(self): """ Load state, different from DBs in that we always need it. Retrieves the cached state for current configuration. :return: state , error number """ state_file = os.path.join(self.module_path, self.config['DIR']['cache_dir'], 'state') err = 0 try: with open(state_file) as json_data: state = json.load(json_data) except FileNotFoundError: # Load default state - start from the beginning print('File not found in: ' + state_file) print('Loading default state.') state = dict() state['cache_dir'] = 'cache/' state['next_adv'] = 0 state['next_fsa'] = 0 state['Packages'] = '' state['Sources'] = '' state['Sha1Sums'] = '' err += 1 return state, err def processCVEs(self, pkg, now, src2dsa, dsa2cve, src2month, cvetable, pkg_with_cvss, config): """ compute and store MTBF, MTBR and Scores of each src pkg output: %src2mtbf (srcpkg=> ()) """ stats = [now, 0, 0, 0, 0, 0, 0] mylambda = config['TRUST']['lambda'] cvestats = dict() logging.info('Processing package: ' + pkg + '.\n') ## keep track of the number of low-medium-high severity vulnerabilities ## TODO see how cvss affects vulnerability prediction - if some packages show patterns temp_cvss = 0.0 with_cvss = dict() ## To eliminate duplicate cves haveseen = dict() ## cvestats = (date: number) for dsa_id in src2dsa[pkg]: for cve_id in dsa2cve[str(dsa_id)]: if cve_id in haveseen: continue else: haveseen[cve_id] = 1 tt = cvetable[cve_id][0] if tt in cvestats: cvestats[tt] += 1 else: cvestats[tt] = 1 stats[1] += 1 ## Date at the moment taken from CVE? - not sure. ## with_cvss = (date: number low, number med, number high) for dsa_id in src2dsa[pkg]: for cve_id in dsa2cve[str(dsa_id)]: tt = cvetable[cve_id][0] try: temp_cvss = float(cvetable[cve_id][2]) except TypeError: print(cve_id) continue if tt in with_cvss: if (temp_cvss < 4.0): with_cvss[tt][0] += 1 elif (temp_cvss < 7.0): with_cvss[tt][1] += 1 else: with_cvss[tt][2] += 1 else: with_cvss[tt] = [0, 0, 0] if (temp_cvss < 4.0): with_cvss[tt][0] += 1 elif (temp_cvss < 7.0): with_cvss[tt][1] += 1 else: with_cvss[tt][2] += 1 # Ignore pkgs with less than one incident, should not happen.. if stats[1] < 1: return prev_date = 0 weight = 0 dates = sorted(cvestats, key=cvestats.get) try: stats[0] = dates[0] except IndexError: print(pkg + str(dates)) stats[0] = 0 count = sum(cvestats.values()) print(pkg + ' ' + str(count)) # pkg_with_cvss[pkg] = with_cvss self.format_data(pkg, with_cvss, pkg_with_cvss, True) self.format_data(pkg, cvestats, src2month, False) def format_data(self, pkg, cvestats, src2month, cvss): x = [] y = [] monthyear = [] year = [] temp_items = list(cvestats.items()) items = [] for data_dict in temp_items: if isinstance(data_dict[0], str): tmpx = (parser.parse(data_dict[0])) else: tmpx = data_dict[0] x.append(tmpx) try: tmpy = int(data_dict[1]) except TypeError: tmpy = data_dict[1] y.append(tmpy) items.append((tmpx, tmpy)) items.sort(key=lambda tup: tup[0]) for i in range(2000, 2018): temp = [] for j in range(12): if cvss: temp.append([0, 0, 0]) else: temp.append(0) monthyear.append(temp) for i in range(len(x)): if cvss: tmp0 = y[i][0] tmp1 = y[i][1] tmp2 = y[i][2] monthyear[x[i].year - 2000][x[i].month - 1][0] += tmp0 monthyear[x[i].year - 2000][x[i].month - 1][1] += tmp1 monthyear[x[i].year - 2000][x[i].month - 1][2] += tmp2 else: monthyear[x[i].year - 2000][x[i].month - 1] += y[i] months_list = [item for sublist in monthyear for item in sublist] if not cvss: temp_months = np.zeros(len(months_list)) i = 0 for element in months_list: temp_months[i] = np.float32(element) i += 1 src2month[pkg] = temp_months else: src2month[pkg] = months_list return