123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376 |
- import configparser
- import json
- import datetime
- import logging
- from pymongo import MongoClient
- import numpy as np
- import os
- from dateutil import parser
- from .DebianAdvisory import DebianAdvisory
- from .CVEParse import CVEParse
- class DebianModel:
- """
- This class represents M-Star debian module. It is responsible for handling debian package infos.
- """
- module_path = os.path.dirname(__file__)
- """
- TODO: Tables to manage.
- """
- dsatable = dict()
- src2dsa = dict()
- dsa2cve = dict()
- cvetable = dict()
- src2month = dict()
- src2sloccount = dict()
- src2pop = dict()
- src2deps = dict()
- pkg_with_cvss = dict()
- src2sum = dict()
- def __init__(self, action, configfile=os.path.join(module_path, 'config_default.txt')):
- ## DBs to track
- ## config
- self.configfile = configfile
- self.config = configparser.ConfigParser()
- if not self.config.read(configfile):
- raise IOError('Cannot open configuration file: ')
- (self.state, self.err) = self.load_state()
- self.client = MongoClient()
- if action == 'update':
- self.load_dbs()
- self.update_dbs()
- self.store_dbs()
- self.save_state(self.state)
- # lstm.predict(src2month, src2sloccount, src2pop, src2deps)
- """
- with open('dsatable.txt', 'w') as file:
- file.write(str(sorted(self.dsatable.keys(), key=lambda x: str(x).lower())))
- with open('src2dsa.txt', 'w') as file:
- file.write(str(sorted(self.src2dsa.keys(), key=lambda x: str(x).lower())))
- with open('dsa2cve.txt', 'w') as file:
- file.write(str(sorted(self.dsa2cve.keys(), key=lambda x: str(x).lower())))
- with open('cvetable.txt', 'w') as file:
- file.write(str(sorted(self.cvetable.keys(), key=lambda x: str(x).lower())))
- with open('src2month.txt', 'w') as file:
- file.write(str(sorted(self.src2month.keys(), key=lambda x: str(x).lower())))
- with open('src2sloccount.txt', 'w') as file:
- file.write(str(sorted(self.src2sloccount.keys(), key=lambda x: str(x).lower())))
- with open('src2pop.txt', 'w') as file:
- file.write(str(sorted(self.src2pop.keys(), key=lambda x: str(x).lower())))
- with open('src2deps.txt', 'w') as file:
- file.write(str(sorted(self.src2deps.keys(), key=lambda x: str(x).lower())))
- """
- elif action == 'status':
- self.load_dbs()
- # aptsec_status(sys.argv[2])
- elif action == 'show':
- self.load_dbs()
- self.store_dbs()
- else:
- self.print_help(self)
- def load_dbs(self):
- """
- Loads the required databases into the model. Can either be implemented as read from file, or read from DB.
- Currently reading it from files in the cache folder.
- """
- self.dsatable = self.load_single_db_from_cache('dsatable')
- self.src2dsa = self.load_single_db_from_cache('src2dsa')
- self.dsa2cve = self.load_single_db_from_cache('dsa2cve')
- self.cvetable = self.load_single_db_from_cache('cvetable')
- self.src2deps = self.load_single_db_from_cache('src2deps')
- self.src2month = self.load_single_db_from_cache('src2month')
- self.src2sloccount = self.load_single_db_from_cache('src2sloccount')
- self.src2pop = self.load_single_db_from_cache('src2pop')
- def load_single_db_from_cache(self, file_name):
- cache_dir = os.path.join(self.module_path, self.config['DIR']['cache_dir'])
- try:
- with open(os.path.join(cache_dir, file_name)) as f:
- return json.load(f)
- except (IOError, ValueError):
- print('Read cache ' + file_name + ' failed!! Maybe first run of the system?')
- def store_dbs(self):
- self.store_db_single('dsatable', self.dsatable)
- self.store_db_single('src2dsa', self.src2dsa)
- self.store_db_single('dsa2cve', self.dsa2cve)
- self.store_db_single('cvetable', self.cvetable)
- self.store_db_single('src2deps', self.src2deps)
- self.store_db_single('src2sloccount', self.src2sloccount)
- self.store_db_single('src2pop', self.src2pop)
- # src2month needs special handling
- cache_src2month = os.path.join(self.module_path, self.config['DIR']['cache_dir'], 'src2month')
- int_list = dict()
- for element in self.src2month:
- for i in range(len(self.src2month[element])):
- if element in int_list:
- int_list[element].append(int(self.src2month[element][i]))
- else:
- int_list[element] = []
- int_list[element].append(int(self.src2month[element][i]))
- try:
- with open(cache_src2month, 'w') as fp:
- json.dump(int_list, fp, default=self.converter)
- except IOError:
- print('write cache src2month failed!! Fatal error')
- def store_db_single(self, file_name, db):
- cache_dir = os.path.join(self.module_path, self.config['DIR']['cache_dir'])
- try:
- with open(os.path.join(cache_dir, file_name), 'w') as f:
- json.dump(db, f, default=self.converter)
- except (IOError, ValueError):
- print('Read cache ' + file_name + ' failed!! Maybe first run of the system?')
- def save_state(self, state):
- """Save state, different from DBs in that we always need it"""
- state_file = os.path.join(self.module_path, self.config['DIR']['cache_dir'], 'state')
- try:
- with open(state_file, 'w') as sf:
- json.dump(state, sf)
- except IOError:
- print('Write cache state failed!! Fatal error')
- def converter(self, o):
- """Help for save_DBs"""
- if isinstance(o, datetime.datetime) or isinstance(o, datetime.timedelta):
- return str(o)
- if isinstance(o, np.float):
- return o.astype(int)
- def update_dbs(self):
- now = datetime.datetime.now()
- new_adv = DebianAdvisory.checkDSAs(self.state, self.config)
- for id in new_adv:
- if id in self.dsatable:
- logging.info(self.state['vendor'] + ' advisory ' + id + ' already known.\n')
- else:
- ## store advisory and parse it
- self.dsatable[id] = new_adv[id]
- self.updateCVETables(id)
- # recompute all pkg statistics
- for srcpkg in self.src2dsa:
- self.processCVEs(srcpkg, now)
- def updateCVETables(self, myid):
- logging.info('Updating vulnerability database with advisory ' + self.state['vendor'] + str(myid) + ' \n')
- adv = self.dsatable[myid]
- dsastats = DebianAdvisory.parseDSAhtml(adv)
- dsastats = DebianAdvisory.fixDSAquirks(myid, dsastats)
- for srcpkg in dsastats[0]:
- if srcpkg in self.src2dsa:
- self.src2dsa[srcpkg].append(myid)
- else:
- self.src2dsa[srcpkg] = []
- self.src2dsa[srcpkg].append(myid)
- self.dsa2cve[str(myid)] = dsastats[2]
- for cve_id in dsastats[2]:
- # No fetch CVE We use mongodb and cve-search
- cve = CVEParse.fetchCVE(cve_id, self.client)
- cvestats = CVEParse.parseCVE(cve_id, cve)
- finaldate = cvestats[0]
- if cvestats[0] > dsastats[1] or cvestats[0] == 0:
- finaldate = dsastats[1]
- self.cvetable[cve_id] = (finaldate, dsastats[1] - finaldate, cvestats[1], cvestats[2], cvestats[3])
- @staticmethod
- def print_help():
- """
- Prints help message to this vendor model.
- """
- print("Debian mstar model supports only update status and show actions.")
- def load_state(self):
- """
- Load state, different from DBs in that we always need it.
- Retrieves the cached state for current configuration.
- :return: state , error number
- """
- state_file = os.path.join(self.module_path, self.config['DIR']['cache_dir'], 'state')
- err = 0
- try:
- with open(state_file) as json_data:
- state = json.load(json_data)
- except FileNotFoundError:
- # Load default state - start from the beginning
- print('File not found in: ' + state_file)
- print('Loading default state.')
- state = dict()
- state['cache_dir'] = 'cache/'
- state['next_adv'] = 0
- state['next_fsa'] = 0
- state['Packages'] = ''
- state['Sources'] = ''
- state['Sha1Sums'] = ''
- err += 1
- return state, err
- def processCVEs(self, srcpkg, now):
- """
- compute and store MTBF, MTBR and Scores of each src pkg
- output: %src2mtbf
- (srcpkg=> ())
- """
- stats = [now, 0, 0, 0, 0, 0, 0]
- cvestats = dict()
- logging.info('Processing package: ' + srcpkg + '.\n')
- ## keep track of the number of low-medium-high severity vulnerabilities
- ## TODO see how cvss affects vulnerability prediction - if some packages show patterns
- with_cvss = dict()
- ## To eliminate duplicate cves
- haveseen = dict()
- ## cvestats = (date: number)
- for dsa_id in self.src2dsa[srcpkg]:
- for cve_id in self.dsa2cve[str(dsa_id)]:
- if cve_id in haveseen:
- continue
- else:
- haveseen[cve_id] = 1
- tt = self.cvetable[cve_id][0]
- if tt in cvestats:
- cvestats[tt] += 1
- else:
- cvestats[tt] = 1
- stats[1] += 1
- ## Date at the moment taken from CVE? - not sure.
- ## with_cvss = (date: number low, number med, number high)
- for dsa_id in self.src2dsa[srcpkg]:
- for cve_id in self.dsa2cve[str(dsa_id)]:
- tt = self.cvetable[cve_id][0]
- try:
- temp_cvss = float(self.cvetable[cve_id][2])
- except TypeError:
- print(cve_id)
- continue
- if tt in with_cvss:
- if (temp_cvss < 4.0):
- with_cvss[tt][0] += 1
- elif (temp_cvss < 7.0):
- with_cvss[tt][1] += 1
- else:
- with_cvss[tt][2] += 1
- else:
- with_cvss[tt] = [0, 0, 0]
- if (temp_cvss < 4.0):
- with_cvss[tt][0] += 1
- elif (temp_cvss < 7.0):
- with_cvss[tt][1] += 1
- else:
- with_cvss[tt][2] += 1
- # Ignore pkgs with less than one incident, should not happen..
- if stats[1] < 1:
- return
- dates = sorted(cvestats, key=cvestats.get)
- try:
- stats[0] = dates[0]
- except IndexError:
- print(srcpkg + str(dates))
- stats[0] = 0
- count = sum(cvestats.values())
- self.format_data(srcpkg, with_cvss, self.pkg_with_cvss, True)
- self.format_data(srcpkg, cvestats, self.src2month, False)
- def format_data(self, pkg, cvestats, src2month, cvss):
- x = []
- y = []
- monthyear = []
- year = []
- temp_items = list(cvestats.items())
- items = []
- for data_dict in temp_items:
- if isinstance(data_dict[0], str):
- tmpx = (parser.parse(data_dict[0]))
- else:
- tmpx = data_dict[0]
- x.append(tmpx)
- try:
- tmpy = int(data_dict[1])
- except TypeError:
- tmpy = data_dict[1]
- y.append(tmpy)
- items.append((tmpx, tmpy))
- items.sort(key=lambda tup: tup[0])
- for i in range(2000, 2018):
- temp = []
- for j in range(12):
- if cvss:
- temp.append([0, 0, 0])
- else:
- temp.append(0)
- monthyear.append(temp)
- for i in range(len(x)):
- if cvss:
- tmp0 = y[i][0]
- tmp1 = y[i][1]
- tmp2 = y[i][2]
- monthyear[x[i].year - 2000][x[i].month - 1][0] += tmp0
- monthyear[x[i].year - 2000][x[i].month - 1][1] += tmp1
- monthyear[x[i].year - 2000][x[i].month - 1][2] += tmp2
- else:
- monthyear[x[i].year - 2000][x[i].month - 1] += y[i]
- months_list = [item for sublist in monthyear for item in sublist]
- if not cvss:
- temp_months = np.zeros(len(months_list))
- i = 0
- for element in months_list:
- temp_months[i] = np.float32(element)
- i += 1
- src2month[pkg] = temp_months
- else:
- src2month[pkg] = months_list
- return
|