DebianModel.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. import configparser
  2. import json
  3. import datetime
  4. import logging
  5. from pymongo import MongoClient
  6. import numpy as np
  7. import os
  8. from dateutil import parser
  9. from .DebianAdvisory import DebianAdvisory
  10. from .CVEParse import CVEParse
  11. class DebianModel:
  12. """
  13. This class represents M-Star debian module. It is responsible for handling debian package infos.
  14. """
  15. module_path = os.path.dirname(__file__)
  16. def __init__(self, action, configfile=os.path.join(module_path, 'config_default.txt')):
  17. ## DBs to track
  18. self.dsatable = dict()
  19. self.src2dsa = dict()
  20. self.dsa2cve = dict()
  21. self.cvetable = dict()
  22. self.src2month = dict()
  23. self.src2sloccount = dict()
  24. self.src2pop = dict()
  25. self.src2deps = dict()
  26. self.pkg_with_cvss = dict()
  27. self.src2sum = dict()
  28. ## config
  29. self.configfile = configfile
  30. self.config = configparser.ConfigParser()
  31. if not self.config.read(configfile):
  32. raise IOError('Cannot open configuration file: ')
  33. (self.state, self.err) = self.load_state()
  34. client = MongoClient()
  35. if action == 'update':
  36. (self.dsatable, self.src2dsa, self.dsa2cve, self.cvetable, self.src2month, self.src2sloccount, self.src2pop, self.src2deps) = self.load_dbs()
  37. self.update_dbs(self.dsatable, client, self.src2dsa, self.dsa2cve, self.src2month, self.cvetable, self.pkg_with_cvss)
  38. self.save_DBs(self.dsatable, self.src2dsa, self.dsa2cve, self.cvetable, self.src2month, self.src2sloccount, self.src2pop, self.src2deps, self.src2sum)
  39. self.save_state(self.state)
  40. # lstm.predict(src2month, src2sloccount, src2pop, src2deps)
  41. elif action == 'status':
  42. (dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps) = self.load_dbs()
  43. # aptsec_status(sys.argv[2])
  44. elif action == 'show':
  45. (dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps) = self.load_dbs()
  46. # src2sum = plot_all(src2month)
  47. # save_DBs(dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps, src2sum)
  48. else:
  49. self.print_help(self)
  50. def load_dbs(self):
  51. """
  52. Loads the required databases into the model. Can either be implemented as read from file, or read from DB.
  53. Currently reading it from files in the cache folder.
  54. """
  55. cache_dir = os.path.join( self.module_path, self.config['DIR']['cache_dir'])
  56. tables = ['dsatable', 'src2dsa', 'dsa2cve', 'cvetable', 'src2deps', 'src2month', 'src2sloccount', 'src2pop']
  57. result = []
  58. for i in range(0, len(tables)):
  59. try:
  60. with open(os.path.join(cache_dir, tables[i])) as t:
  61. result.append(json.load(t))
  62. except (IOError, ValueError):
  63. print('Read cache ' + tables[i] + ' failed!! Maybe first run of the system?')
  64. result.append(dict())
  65. return tuple(result)
  66. def save_DBs(self, dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps, src2sum):
  67. cache_dir = os.path.join(self.module_path, self.config['DIR']['cache_dir'])
  68. tables = ['dsatable', 'src2dsa', 'dsa2cve', 'cvetable', 'src2deps', 'src2sloccount', 'src2pop']
  69. for i in range(0, len(tables)):
  70. try:
  71. with open(os.path.join(cache_dir, tables[i]), 'w') as t:
  72. json.dump(dsatable, t, default=self.converter)
  73. except IOError:
  74. print('write cache dsatable failed!! Fatal error')
  75. cache_src2month = os.path.join(cache_dir, 'src2month')
  76. int_list = dict()
  77. for element in src2month:
  78. for i in range(len(src2month[element])):
  79. if element in int_list:
  80. int_list[element].append(int(src2month[element][i]))
  81. else:
  82. int_list[element] = []
  83. int_list[element].append(int(src2month[element][i]))
  84. try:
  85. with open(cache_src2month, 'w') as fp:
  86. json.dump(int_list, fp, default=self.converter)
  87. except IOError:
  88. print('write cache src2month failed!! Fatal error')
  89. def save_state(self, state):
  90. """Save state, different from DBs in that we always need it"""
  91. state_file = os.path.join(self.module_path, self.config['DIR']['cache_dir'], 'state')
  92. try:
  93. with open(state_file, 'w') as sf:
  94. json.dump(state, sf)
  95. except IOError:
  96. print('Write cache state failed!! Fatal error')
  97. def converter(self, o):
  98. """Help for save_DBs"""
  99. if isinstance(o, datetime.datetime) or isinstance(o, datetime.timedelta):
  100. return str(o)
  101. if isinstance(o, np.float):
  102. return o.astype(int)
  103. def update_dbs(self, dsatable, client, src2dsa, dsa2cve, src2month, cvetable, pkg_with_cvss):
  104. now = datetime.datetime.now()
  105. new_adv = DebianAdvisory.checkDSAs(self.state, self.config)
  106. for id in new_adv:
  107. if id in dsatable:
  108. logging.info(self.state['vendor'] + ' advisory ' + id + ' already known.\n')
  109. else:
  110. ## store advisory and parse it
  111. dsatable[id] = new_adv[id]
  112. self.updateCVETables(id, dsatable, self.state, src2dsa, dsa2cve, cvetable, client)
  113. # recompute all pkg statistics
  114. for srcpkg in src2dsa:
  115. self.processCVEs(srcpkg, now, src2dsa, dsa2cve, src2month, cvetable, pkg_with_cvss, self.config)
  116. return 0
  117. def updateCVETables(self, myid, dsatable, state, src2dsa, dsa2cve, cvetable, client):
  118. logging.info('Updating vulnerability database with advisory ' + state['vendor'] + str(myid) + ' \n')
  119. adv = dsatable[myid]
  120. dsastats = DebianAdvisory.parseDSAhtml(adv)
  121. dsastats = DebianAdvisory.fixDSAquirks(myid, dsastats)
  122. for srcpkg in dsastats[0]:
  123. if srcpkg in src2dsa:
  124. src2dsa[srcpkg].append(myid)
  125. else:
  126. src2dsa[srcpkg] = []
  127. src2dsa[srcpkg].append(myid)
  128. dsa2cve[str(myid)] = dsastats[2]
  129. for cve_id in dsastats[2]:
  130. # No fetch CVE We use mongodb and cve-search
  131. cve = CVEParse.fetchCVE(cve_id, client)
  132. cvestats = CVEParse.parseCVE(cve_id, cve)
  133. finaldate = cvestats[0]
  134. if cvestats[0] > dsastats[1] or cvestats[0] == 0:
  135. finaldate = dsastats[1]
  136. cvedata = (finaldate, dsastats[1] - finaldate, cvestats[1], cvestats[2], cvestats[3])
  137. cvetable[cve_id] = cvedata
  138. return cvetable
  139. @staticmethod
  140. def print_help():
  141. """
  142. Prints help message to this vendor model.
  143. """
  144. print("Debian mstar model supports only update status and show actions.")
  145. def load_state(self):
  146. """
  147. Load state, different from DBs in that we always need it.
  148. Retrieves the cached state for current configuration.
  149. :return: state , error number
  150. """
  151. state_file = os.path.join(self.module_path, self.config['DIR']['cache_dir'], 'state')
  152. err = 0
  153. try:
  154. with open(state_file) as json_data:
  155. state = json.load(json_data)
  156. except FileNotFoundError:
  157. # Load default state - start from the beginning
  158. print('File not found in: ' + state_file)
  159. print('Loading default state.')
  160. state = dict()
  161. state['cache_dir'] = 'cache/'
  162. state['next_adv'] = 0
  163. state['next_fsa'] = 0
  164. state['Packages'] = ''
  165. state['Sources'] = ''
  166. state['Sha1Sums'] = ''
  167. err += 1
  168. return state, err
  169. def processCVEs(self, pkg, now, src2dsa, dsa2cve, src2month, cvetable, pkg_with_cvss, config):
  170. """
  171. compute and store MTBF, MTBR and Scores of each src pkg
  172. output: %src2mtbf
  173. (srcpkg=> ())
  174. """
  175. stats = [now, 0, 0, 0, 0, 0, 0]
  176. mylambda = config['TRUST']['lambda']
  177. cvestats = dict()
  178. logging.info('Processing package: ' + pkg + '.\n')
  179. ## keep track of the number of low-medium-high severity vulnerabilities
  180. ## TODO see how cvss affects vulnerability prediction - if some packages show patterns
  181. temp_cvss = 0.0
  182. with_cvss = dict()
  183. ## To eliminate duplicate cves
  184. haveseen = dict()
  185. ## cvestats = (date: number)
  186. for dsa_id in src2dsa[pkg]:
  187. for cve_id in dsa2cve[str(dsa_id)]:
  188. if cve_id in haveseen:
  189. continue
  190. else:
  191. haveseen[cve_id] = 1
  192. tt = cvetable[cve_id][0]
  193. if tt in cvestats:
  194. cvestats[tt] += 1
  195. else:
  196. cvestats[tt] = 1
  197. stats[1] += 1
  198. ## Date at the moment taken from CVE? - not sure.
  199. ## with_cvss = (date: number low, number med, number high)
  200. for dsa_id in src2dsa[pkg]:
  201. for cve_id in dsa2cve[str(dsa_id)]:
  202. tt = cvetable[cve_id][0]
  203. try:
  204. temp_cvss = float(cvetable[cve_id][2])
  205. except TypeError:
  206. print(cve_id)
  207. continue
  208. if tt in with_cvss:
  209. if (temp_cvss < 4.0):
  210. with_cvss[tt][0] += 1
  211. elif (temp_cvss < 7.0):
  212. with_cvss[tt][1] += 1
  213. else:
  214. with_cvss[tt][2] += 1
  215. else:
  216. with_cvss[tt] = [0, 0, 0]
  217. if (temp_cvss < 4.0):
  218. with_cvss[tt][0] += 1
  219. elif (temp_cvss < 7.0):
  220. with_cvss[tt][1] += 1
  221. else:
  222. with_cvss[tt][2] += 1
  223. # Ignore pkgs with less than one incident, should not happen..
  224. if stats[1] < 1:
  225. return
  226. prev_date = 0
  227. weight = 0
  228. dates = sorted(cvestats, key=cvestats.get)
  229. try:
  230. stats[0] = dates[0]
  231. except IndexError:
  232. print(pkg + str(dates))
  233. stats[0] = 0
  234. count = sum(cvestats.values())
  235. print(pkg + ' ' + str(count))
  236. # pkg_with_cvss[pkg] = with_cvss
  237. self.format_data(pkg, with_cvss, pkg_with_cvss, True)
  238. self.format_data(pkg, cvestats, src2month, False)
  239. def format_data(self, pkg, cvestats, src2month, cvss):
  240. x = []
  241. y = []
  242. monthyear = []
  243. year = []
  244. temp_items = list(cvestats.items())
  245. items = []
  246. for data_dict in temp_items:
  247. if isinstance(data_dict[0], str):
  248. tmpx = (parser.parse(data_dict[0]))
  249. else:
  250. tmpx = data_dict[0]
  251. x.append(tmpx)
  252. try:
  253. tmpy = int(data_dict[1])
  254. except TypeError:
  255. tmpy = data_dict[1]
  256. y.append(tmpy)
  257. items.append((tmpx, tmpy))
  258. items.sort(key=lambda tup: tup[0])
  259. for i in range(2000, 2018):
  260. temp = []
  261. for j in range(12):
  262. if cvss:
  263. temp.append([0, 0, 0])
  264. else:
  265. temp.append(0)
  266. monthyear.append(temp)
  267. for i in range(len(x)):
  268. if cvss:
  269. tmp0 = y[i][0]
  270. tmp1 = y[i][1]
  271. tmp2 = y[i][2]
  272. monthyear[x[i].year - 2000][x[i].month - 1][0] += tmp0
  273. monthyear[x[i].year - 2000][x[i].month - 1][1] += tmp1
  274. monthyear[x[i].year - 2000][x[i].month - 1][2] += tmp2
  275. else:
  276. monthyear[x[i].year - 2000][x[i].month - 1] += y[i]
  277. months_list = [item for sublist in monthyear for item in sublist]
  278. if not cvss:
  279. temp_months = np.zeros(len(months_list))
  280. i = 0
  281. for element in months_list:
  282. temp_months[i] = np.float32(element)
  283. i += 1
  284. src2month[pkg] = temp_months
  285. else:
  286. src2month[pkg] = months_list
  287. return