DebianModel.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. import configparser
  2. import json
  3. import datetime
  4. import logging
  5. from pymongo import MongoClient
  6. import numpy as np
  7. import os
  8. from dateutil import parser
  9. from .DebianAdvisory import DebianAdvisory
  10. from .CVEParse import CVEParse
  11. from ..VendorModel import VendorModel
  12. from .CSVReader import CSVReader
  13. from .Tests import Tests
  14. class DebianModel(VendorModel):
  15. """
  16. This class represents M-Star debian module. It is responsible for handling debian package infos.
  17. """
  18. module_path = os.path.dirname(__file__)
  19. def __init__(self, action, configfile=os.path.join(module_path, 'config_default.txt')):
  20. ## DBs to track
  21. """
  22. TODO: Tables to manage.
  23. """
  24. self.dsatable = dict()
  25. self.src2dsa = dict()
  26. self.dsa2cve = dict()
  27. self.cvetable = dict()
  28. self.src2month = dict()
  29. self.src2sloccount = dict()
  30. self.src2pop = dict()
  31. self.src2deps = dict()
  32. self.pkg_with_cvss = dict()
  33. self.src2sum = dict()
  34. ## config
  35. self.configfile = configfile
  36. self.config = configparser.ConfigParser()
  37. if not self.config.read(configfile):
  38. raise IOError('Cannot open configuration file: ')
  39. (self.state, self.err) = self.load_state()
  40. self.client = MongoClient()
  41. if action == 'update':
  42. self.load_dbs()
  43. self.update_dbs()
  44. self.store_dbs()
  45. self.save_state(self.state)
  46. # lstm.predict(src2month, src2sloccount, src2pop, src2deps)
  47. """
  48. with open('dsatable.txt', 'w') as file:
  49. file.write(str(sorted(self.dsatable.keys(), key=lambda x: str(x).lower())))
  50. with open('src2dsa.txt', 'w') as file:
  51. file.write(str(sorted(self.src2dsa.keys(), key=lambda x: str(x).lower())))
  52. with open('dsa2cve.txt', 'w') as file:
  53. file.write(str(sorted(self.dsa2cve.keys(), key=lambda x: str(x).lower())))
  54. with open('cvetable.txt', 'w') as file:
  55. file.write(str(sorted(self.cvetable.keys(), key=lambda x: str(x).lower())))
  56. with open('src2month.txt', 'w') as file:
  57. file.write(str(sorted(self.src2month.keys(), key=lambda x: str(x).lower())))
  58. with open('src2sloccount.txt', 'w') as file:
  59. file.write(str(sorted(self.src2sloccount.keys(), key=lambda x: str(x).lower())))
  60. with open('src2pop.txt', 'w') as file:
  61. file.write(str(sorted(self.src2pop.keys(), key=lambda x: str(x).lower())))
  62. with open('src2deps.txt', 'w') as file:
  63. file.write(str(sorted(self.src2deps.keys(), key=lambda x: str(x).lower())))
  64. """
  65. elif action == 'status':
  66. self.load_dbs()
  67. # aptsec_status(sys.argv[2])
  68. elif action == 'show':
  69. self.load_dbs()
  70. self.store_dbs()
  71. else:
  72. self.print_help(self)
  73. def get_src2month(self):
  74. return self.src2month
  75. def get_vendor_dir(self):
  76. return self.module_path
  77. def load_dbs(self):
  78. """
  79. Loads the required databases into the model. Can either be implemented as read from file, or read from DB.
  80. Currently reading it from files in the cache folder.
  81. """
  82. self.dsatable = self.load_single_db_from_cache('dsatable')
  83. self.src2dsa = self.load_single_db_from_cache('src2dsa')
  84. self.dsa2cve = self.load_single_db_from_cache('dsa2cve')
  85. self.cvetable = self.load_single_db_from_cache('cvetable')
  86. self.src2deps = self.load_single_db_from_cache('src2deps')
  87. self.src2month = self.load_single_db_from_cache('src2month')
  88. self.src2sloccount = self.load_single_db_from_cache('src2sloccount')
  89. self.src2pop = self.load_single_db_from_cache('src2pop')
  90. self.pkg_with_cvss = self.load_single_db_from_cache('pkg_with_cvss')
  91. def load_single_db_from_cache(self, file_name):
  92. cache_dir = os.path.join(self.module_path, self.config['DIR']['cache_dir'])
  93. try:
  94. with open(os.path.join(cache_dir, file_name)) as f:
  95. return json.load(f)
  96. except (IOError, ValueError):
  97. print('Read cache ' + file_name + ' failed!! Maybe first run of the system?')
  98. return dict()
  99. def store_dbs(self):
  100. self.store_db_single('dsatable', self.dsatable)
  101. self.store_db_single('src2dsa', self.src2dsa)
  102. self.store_db_single('dsa2cve', self.dsa2cve)
  103. self.store_db_single('cvetable', self.cvetable)
  104. self.store_db_single('src2deps', self.src2deps)
  105. self.store_db_single('src2sloccount', self.src2sloccount)
  106. self.store_db_single('src2pop', self.src2pop)
  107. self.store_db_single('pkg_with_cvss', self.pkg_with_cvss)
  108. # src2month needs special handling
  109. cache_src2month = os.path.join(self.module_path, self.config['DIR']['cache_dir'], 'src2month')
  110. int_list = dict()
  111. for element in self.src2month:
  112. for i in range(len(self.src2month[element])):
  113. if element in int_list:
  114. int_list[element].append(int(self.src2month[element][i]))
  115. else:
  116. int_list[element] = []
  117. int_list[element].append(int(self.src2month[element][i]))
  118. try:
  119. with open(cache_src2month, 'w') as fp:
  120. json.dump(int_list, fp, default=self.converter)
  121. except IOError:
  122. print('write cache src2month failed!! Fatal error')
  123. def store_db_single(self, file_name, db):
  124. cache_dir = os.path.join(self.module_path, self.config['DIR']['cache_dir'])
  125. try:
  126. with open(os.path.join(cache_dir, file_name), 'w') as f:
  127. json.dump(db, f, default=self.converter)
  128. except (IOError, ValueError):
  129. print('Read cache ' + file_name + ' failed!! Maybe first run of the system?')
  130. def save_state(self, state):
  131. """Save state, different from DBs in that we always need it"""
  132. state_file = os.path.join(self.module_path, self.config['DIR']['cache_dir'], 'state')
  133. try:
  134. with open(state_file, 'w') as sf:
  135. json.dump(state, sf)
  136. except IOError:
  137. print('Write cache state failed!! Fatal error')
  138. def converter(self, o):
  139. """Help for save_DBs"""
  140. if isinstance(o, datetime.datetime) or isinstance(o, datetime.timedelta):
  141. return str(o)
  142. if isinstance(o, np.float):
  143. return o.astype(int)
  144. def update_dbs(self):
  145. now = datetime.datetime.now()
  146. new_adv = DebianAdvisory.checkDSAs(self.state, self.config)
  147. for id in new_adv:
  148. if id in self.dsatable:
  149. logging.info(self.state['vendor'] + ' advisory ' + id + ' already known.\n')
  150. else:
  151. ## store advisory and parse it
  152. self.dsatable[id] = new_adv[id]
  153. self.updateCVETables(id)
  154. # recompute all pkg statistics
  155. for srcpkg in self.src2dsa:
  156. self.processCVEs(srcpkg, now)
  157. def updateCVETables(self, myid):
  158. logging.info('Updating vulnerability database with advisory ' + self.state['vendor'] + str(myid) + ' \n')
  159. adv = self.dsatable[myid]
  160. dsastats = DebianAdvisory.parseDSAhtml(adv)
  161. dsastats = DebianAdvisory.fixDSAquirks(myid, dsastats)
  162. for srcpkg in dsastats[0]:
  163. if srcpkg in self.src2dsa:
  164. self.src2dsa[srcpkg].append(myid)
  165. else:
  166. self.src2dsa[srcpkg] = []
  167. self.src2dsa[srcpkg].append(myid)
  168. self.dsa2cve[str(myid)] = dsastats[2]
  169. for cve_id in dsastats[2]:
  170. # No fetch CVE We use mongodb and cve-search
  171. cve = CVEParse.fetchCVE(cve_id, self.client)
  172. cvestats = CVEParse.parseCVE(cve_id, cve)
  173. finaldate = cvestats[0]
  174. if cvestats[0] > dsastats[1] or cvestats[0] == 0:
  175. finaldate = dsastats[1]
  176. self.cvetable[cve_id] = (finaldate, dsastats[1] - finaldate, cvestats[1], cvestats[2], cvestats[3], cvestats[4])
  177. def load_state(self):
  178. """
  179. Load state, different from DBs in that we always need it.
  180. Retrieves the cached state for current configuration.
  181. :return: state , error number
  182. """
  183. state_file = os.path.join(self.module_path, self.config['DIR']['cache_dir'], 'state')
  184. err = 0
  185. try:
  186. with open(state_file) as json_data:
  187. state = json.load(json_data)
  188. except FileNotFoundError:
  189. # Load default state - start from the beginning
  190. print('File not found in: ' + state_file)
  191. print('Loading default state.')
  192. state = dict()
  193. state['cache_dir'] = 'cache/'
  194. state['vendor'] = 'debian'
  195. state['next_adv'] = 0
  196. state['next_fsa'] = 0
  197. state['Packages'] = ''
  198. state['Sources'] = ''
  199. state['Sha1Sums'] = ''
  200. err += 1
  201. return state, err
  202. def processCVEs(self, srcpkg, now):
  203. """
  204. compute and store MTBF, MTBR and Scores of each src pkg
  205. output: %src2mtbf
  206. (srcpkg=> ())
  207. """
  208. stats = [now, 0, 0, 0, 0, 0, 0]
  209. cvestats = dict()
  210. logging.info('Processing package: ' + srcpkg + '.\n')
  211. ## keep track of the number of low-medium-high severity vulnerabilities
  212. ## TODO see how cvss affects vulnerability prediction - if some packages show patterns
  213. with_cvss = dict()
  214. ## To eliminate duplicate cves
  215. haveseen = dict()
  216. ## cvestats = (date: number)
  217. for dsa_id in self.src2dsa[srcpkg]:
  218. for cve_id in self.dsa2cve[str(dsa_id)]:
  219. if cve_id in haveseen:
  220. continue
  221. else:
  222. haveseen[cve_id] = 1
  223. tt = self.cvetable[cve_id][0]
  224. if tt in cvestats:
  225. cvestats[tt] += 1
  226. else:
  227. cvestats[tt] = 1
  228. stats[1] += 1
  229. ## Date at the moment taken from CVE? - not sure.
  230. haveseen = dict()
  231. ## with_cvss = (date: number low, number med, number high, number undefined)
  232. for dsa_id in self.src2dsa[srcpkg]:
  233. for cve_id in self.dsa2cve[str(dsa_id)]:
  234. tt = self.cvetable[cve_id][0]
  235. try:
  236. temp_cvss = float(self.cvetable[cve_id][2])
  237. except TypeError:
  238. print(cve_id)
  239. continue
  240. if cve_id in haveseen:
  241. continue
  242. else:
  243. haveseen[cve_id] = 1
  244. if tt in with_cvss:
  245. if (temp_cvss < 0.0):
  246. with_cvss[tt][3] += 1
  247. elif (temp_cvss < 4.0):
  248. with_cvss[tt][0] += 1
  249. elif (temp_cvss < 7.0):
  250. with_cvss[tt][1] += 1
  251. else:
  252. with_cvss[tt][2] += 1
  253. else:
  254. with_cvss[tt] = [0, 0, 0, 0]
  255. if (temp_cvss < 0.0):
  256. with_cvss[tt][3] += 1
  257. elif (temp_cvss < 4.0):
  258. with_cvss[tt][0] += 1
  259. elif (temp_cvss < 7.0):
  260. with_cvss[tt][1] += 1
  261. else:
  262. with_cvss[tt][2] += 1
  263. # Ignore pkgs with less than one incident, should not happen..
  264. if stats[1] < 1:
  265. return
  266. dates = sorted(cvestats, key=cvestats.get)
  267. try:
  268. stats[0] = dates[0]
  269. except IndexError:
  270. print(srcpkg + str(dates))
  271. stats[0] = 0
  272. count = sum(cvestats.values())
  273. self.format_data(srcpkg, with_cvss, self.pkg_with_cvss, True)
  274. self.format_data(srcpkg, cvestats, self.src2month, False)
  275. def format_data(self, pkg, cvestats, src2temp, cvss):
  276. x = []
  277. y = []
  278. monthyear = []
  279. year = []
  280. temp_items = list(cvestats.items())
  281. items = []
  282. for data_dict in temp_items:
  283. if isinstance(data_dict[0], str):
  284. tmpx = (parser.parse(data_dict[0]))
  285. else:
  286. tmpx = data_dict[0]
  287. x.append(tmpx)
  288. try:
  289. tmpy = int(data_dict[1])
  290. except TypeError:
  291. tmpy = data_dict[1]
  292. y.append(tmpy)
  293. items.append((tmpx, tmpy))
  294. items.sort(key=lambda tup: tup[0])
  295. for i in range(2000, 2019):
  296. temp = []
  297. for j in range(12):
  298. if cvss:
  299. temp.append([0, 0, 0, 0])
  300. else:
  301. temp.append(0)
  302. monthyear.append(temp)
  303. if pkg=='linux':
  304. print(x)
  305. for i in range(len(x)):
  306. if cvss:
  307. tmp0 = y[i][0]
  308. tmp1 = y[i][1]
  309. tmp2 = y[i][2]
  310. tmp3 = y[i][3]
  311. monthyear[x[i].year - 2000][x[i].month - 1][0] += tmp0
  312. monthyear[x[i].year - 2000][x[i].month - 1][1] += tmp1
  313. monthyear[x[i].year - 2000][x[i].month - 1][2] += tmp2
  314. monthyear[x[i].year - 2000][x[i].month - 1][3] += tmp3
  315. else:
  316. monthyear[x[i].year - 2000][x[i].month - 1] += y[i]
  317. months_list = [item for sublist in monthyear for item in sublist]
  318. if not cvss:
  319. temp_months = np.zeros(len(months_list))
  320. i = 0
  321. for element in months_list:
  322. temp_months[i] = np.float32(element)
  323. i += 1
  324. src2temp[pkg] = temp_months
  325. else:
  326. src2temp[pkg] = months_list
  327. return
  328. def unifySrcName(self, name):
  329. return DebianAdvisory.unifySrcName(name)
  330. def performTests(self):
  331. #Tests.system_input_prediction_error_test(self)
  332. #Tests.random_input_prediction_error_test(self)
  333. Tests.relativity_of_expectations_test(self)
  334. def load_latest_prediction_model(self):
  335. return CSVReader.read_csv_prediction_errorcompl(os.path.join(self.module_path, 'models', 'latest_model.csv'), self, 9)
  336. def gen_model_opinion_set(self, filename, month, norm_param):
  337. """
  338. Generates opinion set from the model input.
  339. :param filename: model (package:prediction:errorcompl:f)
  340. :param month: month parameter of the model
  341. :param norm_param: normalization factor of the model
  342. :return: dictionary of opinions
  343. """
  344. res = CSVReader.read_csv_prediction_errorcompl(filename, self, month, norm_param=norm_param)
  345. # with open('vendors/debian/models/dummy_model_' + str(month) + '.csv', 'w') as file:
  346. # for key in res:
  347. # file.write(key + ":" + str(res[key].t) + ":" + str(res[key].c) + ":" + str(res[key].f) + "\n")
  348. return res
  349. @staticmethod
  350. def print_help():
  351. """
  352. Prints help message to this vendor model.
  353. """
  354. print("Debian mstar model supports only update status and show actions.")