plot_functions.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. import paper_plots as carlosplt
  2. import stat_tests as stats
  3. import matplotlib.pyplot as plt
  4. import numpy as np
  5. import pickle
  6. import vendors.debian.DLAmine as dla
  7. import json
  8. import csv
  9. from matplotlib.font_manager import FontProperties
  10. import seaborn as sns
  11. import laplace_tests as lptests
  12. import statsmodels.api as sm
  13. class Plotter:
  14. def __init__(self, src2month, src2sloccount, pkg_with_cvss, years):
  15. self.src2month = src2month
  16. self.src2sloccount = src2sloccount
  17. self.pkg_with_cvss = pkg_with_cvss
  18. self.years = years
  19. self.src2month_temp = dict()
  20. self.src2month_loc = dict()
  21. self.l=self.m=self.h=self.udef=0
  22. def plot_severity_percentage(self):
  23. num_low = [0] * (self.years+1)
  24. num_med = [0] * (self.years+1)
  25. num_high = [0] * (self.years+1)
  26. num_udef = [0] * (self.years+1)
  27. total = [0] * (self.years+1)
  28. for i in self.pkg_with_cvss:
  29. for j in range(len(self.src2month[i])):
  30. try:
  31. num_low[j//12] += self.pkg_with_cvss[i][j][0]
  32. num_med[j//12] += self.pkg_with_cvss[i][j][1]
  33. num_high[j//12] += self.pkg_with_cvss[i][j][2]
  34. num_udef[j//12] += self.pkg_with_cvss[i][j][3]
  35. total[j//12] += self.pkg_with_cvss[i][j][3] + self.pkg_with_cvss[i][j][2] + self.pkg_with_cvss[i][j][1] + self.pkg_with_cvss[i][j][0]
  36. except IndexError:
  37. print(j//12)
  38. raise IndexError('List index out of bounds')
  39. ## Generate percentage
  40. for i in range(self.years + 1):
  41. try:
  42. num_low[i] = num_low[i]/total[i]
  43. num_med[i] = num_med[i]/total[i]
  44. num_high[i] = num_high[i]/total[i]
  45. num_udef[i] = num_udef[i]/total[i]
  46. except ZeroDivisionError:
  47. num_low[i] = 0
  48. num_med[i] = 0
  49. num_high[i] = 0
  50. num_udef[i] = 0
  51. print(num_low)
  52. print(num_high)
  53. carlosplt.pre_paper_plot()
  54. pal = ['#fee8c8', '#fdbb84', '#e34a33', 'grey']
  55. x = range(2001, 2001 + self.years)
  56. labels_cvss = ['low', 'medium', 'high', 'N/A']
  57. h = plt.stackplot(x, [num_low[1:], num_med[1:], num_high[1:], num_udef[1:]], colors = pal, alpha=0.9, labels = labels_cvss)
  58. plt.xticks(x)
  59. plt.legend(loc='upper left', handles = h[::-1])
  60. carlosplt.post_paper_plot(True,True,True)
  61. plt.show()
  62. ## Plot sum of vulnerabilities. Can filter out by severity using the booleans low, med, high, undefined
  63. def plot_all_severity(self, l, m, h, udef):
  64. carlosplt.pre_paper_plot()
  65. self.l = l
  66. self.m = m
  67. self.h = h
  68. self.udef = udef
  69. for i in self.pkg_with_cvss:
  70. self.src2month_temp[i]=[]
  71. for j in range(len(self.src2month[i])):
  72. num_low = self.pkg_with_cvss[i][j][0]
  73. num_med = self.pkg_with_cvss[i][j][1]
  74. num_high = self.pkg_with_cvss[i][j][2]
  75. num_udef = self.pkg_with_cvss[i][j][3]
  76. tempp = 0
  77. if l:
  78. tempp += num_low
  79. if m:
  80. tempp += num_med
  81. if h:
  82. tempp += num_high
  83. if udef:
  84. tempp += num_udef
  85. self.src2month_temp[i].append(tempp)
  86. for i in self.src2month:
  87. self.src2month_loc[i]=self.src2month_temp[i][:] # don't cut data for 2018
  88. self.severityPlotter = Temp_Plotter(self)
  89. self.severityPlotter.plot_total()
  90. # Plot total number per year
  91. self.pkgnumPlotter = NumPackages_Plotter(self.severityPlotter)
  92. # Plot number of affected packages per year
  93. self.pkgnumPlotter.plot_num_affected()
  94. # Plot average number of vulnerabilities per affected package per year
  95. self.pkgnumPlotter.plot_average_number()
  96. # Plot regular and LTS for Wheezy
  97. self.wheezy = WheezyPloter(self)
  98. self.wheezy.plot_wheezy_lts()
  99. class Temp_Plotter:
  100. def __init__(self, plotter):
  101. self.src2month = plotter.src2month
  102. self.src2sloccount = plotter.src2sloccount
  103. self.pkg_with_cvss = plotter.pkg_with_cvss
  104. self.years = plotter.years
  105. self.src2month_loc = plotter.src2month_loc
  106. self.src2sum = dict()
  107. self.src2year = dict()
  108. self.src2lastyears = dict()
  109. self.src2dens = dict()
  110. self.src2month_temp = dict()
  111. self.year_sum = []
  112. self.year_num = []
  113. def plot_total(self):
  114. self.year_sum = [0] * self.years
  115. self.year_num = [0] * self.years
  116. for pkg in self.src2month_loc:
  117. for j in range(self.years):
  118. temp = sum(self.src2month_loc[pkg][12*(1+j):12*(2+j)])
  119. if (temp>0):
  120. self.year_num[j] += 1
  121. self.year_sum[j] += temp
  122. ## For last 2 years
  123. total = sum(self.src2month_loc[pkg][:])
  124. last_years = sum(self.src2month_loc[pkg][-24:])
  125. #print(pkg + '; ' + str(last_years))
  126. if (total>1):
  127. self.src2sum[pkg] = total
  128. self.src2lastyears[pkg] = last_years
  129. #calc total
  130. sum_total = 0
  131. one_only=0
  132. one_plus=0
  133. for p in self.src2month:
  134. sum_part = sum(self.src2month_loc[p][:])
  135. sum_total += sum_part
  136. if (sum_part == 1):
  137. one_only += 1
  138. elif (sum_part>1):
  139. one_plus += 1
  140. print('Total = ', sum_total)
  141. print('one_only = ', one_only)
  142. print('one_plus = ', one_plus)
  143. values = sorted(self.src2sum.values(),reverse=True)
  144. #print(values)
  145. keys = list(sorted(self.src2sum, key=self.src2sum.__getitem__, reverse=True))
  146. n = len(self.year_sum)
  147. yearsx = []
  148. for i in range(1,self.years+1):
  149. if i%2==1:
  150. yearsx.append('\''+str(i).zfill(2))
  151. else:
  152. yearsx.append('')
  153. x = range(self.years)
  154. width = 1/2
  155. plt.bar(x, self.year_sum, width, color='darkblue', edgecolor='black')
  156. #plt.bar(x, average_per_year, width)
  157. plt.xticks(np.arange(0,n),yearsx)
  158. plt.ylabel('Total vulnerabilities')
  159. plt.xlabel('Year')
  160. carlosplt.post_paper_plot(True,True,True)
  161. sum_all = sum(values)
  162. print("Total: ", sum_all)
  163. ## Linear regression model
  164. X = sm.add_constant(x)
  165. y = self.year_sum
  166. model = sm.OLS(y,X).fit()
  167. predictions = model.predict(X)
  168. plt.plot(predictions)
  169. plt.show()
  170. print(model.summary())
  171. print(model.summary().as_latex())
  172. class NumPackages_Plotter:
  173. def __init__(self, plotter):
  174. self.plotter = plotter
  175. self.yearsx = []
  176. def plot_num_affected(self):
  177. ## Number of affected packages
  178. n = len(self.plotter.year_sum)
  179. for i in range(1,self.plotter.years+1):
  180. if i%2==1:
  181. self.yearsx.append('\''+str(i).zfill(2))
  182. else:
  183. self.yearsx.append('')
  184. x = range(self.plotter.years)
  185. width = 1/2
  186. plt.bar(x, self.plotter.year_num, width, color='darkblue', edgecolor='black')
  187. plt.xticks(np.arange(0,n),self.yearsx)
  188. plt.ylabel('Number of affected packages')
  189. plt.xlabel('Year')
  190. carlosplt.post_paper_plot(True,True,True)
  191. plt.show()
  192. def plot_average_number(self):
  193. average_per_year = [0] * self.plotter.years
  194. for j in range(self.plotter.years):
  195. average_per_year[j] = self.plotter.year_sum[j]/float(self.plotter.year_num[j])
  196. x_values = list(range(1,self.plotter.years+1))
  197. slope = np.polyfit(x_values,average_per_year,1)
  198. print('Slope: ' + str(slope))
  199. n = len(self.plotter.year_sum)
  200. x = range(self.plotter.years)
  201. width = 1/2
  202. #plt.bar(x, year_sum, width)
  203. plt.bar(x, average_per_year, width, color='darkblue', edgecolor='black')
  204. plt.xticks(np.arange(0,n),self.yearsx)
  205. plt.ylabel('Average vulnerabilities per package')
  206. plt.xlabel('Year')
  207. carlosplt.post_paper_plot(True,True,True)
  208. ## Linear regression
  209. X = sm.add_constant(x)
  210. y = average_per_year
  211. model = sm.OLS(y,X).fit()
  212. predictions = model.predict(X)
  213. plt.plot(predictions)
  214. plt.show()
  215. print(model.summary())
  216. print(model.summary().as_latex())
  217. class WheezyPloter:
  218. def __init__(self, plotter):
  219. self.plotter = plotter
  220. self.yearsx = []
  221. self.l = plotter.l
  222. self.m = plotter.m
  223. self.h = plotter.h
  224. self.udef = plotter.udef
  225. def plot_wheezy_lts(self):
  226. quarter_num = self.plotter.years*4
  227. # Get LTS and plot
  228. try:
  229. with open("DLA_sum.txt","rb") as fp:
  230. ltslist = pickle.load(fp)
  231. with open("src2month_DLA.txt","rb") as fp:
  232. src2monthDLAs = pickle.load(fp)
  233. with open("DLA_src2month.json","r") as fp:
  234. src2monthDLA = json.load(fp)
  235. with open("DLA_withcvss.json","r") as fp:
  236. self.src2monthDLA_cvss = json.load(fp)
  237. # Fix this so it can compute when required
  238. #dla.permonthDLA(src2monthDLAs)
  239. with open("1000.csv","r") as csvfile:
  240. spamreader = csv.reader(csvfile, delimiter=' ', quotechar='|')
  241. except IOError:
  242. ltslist = dla.getDLAs()
  243. with open("src2month_DLA.txt","rb") as fp:
  244. src2monthDLAs = pickle.load(fp)
  245. dla.permonthDLA(src2monthDLAs)
  246. return self.plot_wheezy_lts()
  247. ## Plot for wheezy
  248. quarter_sum = [0] * quarter_num
  249. DLA_temp=dict()
  250. ## Fix src2monthDLA_cvss
  251. for i in self.src2monthDLA_cvss:
  252. temp_list = []
  253. for j in self.src2monthDLA_cvss[i]:
  254. temp_list += j
  255. self.src2monthDLA_cvss[i] = temp_list
  256. ## Fix ltslist according to severity
  257. for i in self.src2monthDLA_cvss:
  258. DLA_temp[i]=[]
  259. for j in range(len(self.src2monthDLA_cvss[i])):
  260. num_low = self.src2monthDLA_cvss[i][j][0]
  261. num_med = self.src2monthDLA_cvss[i][j][1]
  262. num_high = self.src2monthDLA_cvss[i][j][2]
  263. num_udef = self.src2monthDLA_cvss[i][j][3]
  264. tempp = 0
  265. if self.l:
  266. tempp += num_low
  267. if self.m:
  268. tempp += num_med
  269. if self.h:
  270. tempp += num_high
  271. if self.udef:
  272. tempp += num_udef
  273. DLA_temp[i].append(tempp)
  274. ltslist = []
  275. for m in range((self.plotter.years+1)*12):
  276. s = 0
  277. #print(m)
  278. for i in DLA_temp:
  279. s += DLA_temp[i][m]
  280. ltslist.append(s)
  281. totalLTS = ltslist
  282. plt.bar([i for i in range(len(ltslist))],ltslist)
  283. plt.show()
  284. quartersx = []
  285. for i in range(1,self.plotter.years+1):
  286. for j in range(1,5):
  287. if j==1:
  288. quartersx.append('Q' + str(j)+'\''+str(i).zfill(2))
  289. else:
  290. quartersx.append(' ')
  291. for pkg in self.plotter.src2month_loc:
  292. for j in range(quarter_num):
  293. temp = sum(self.plotter.src2month_loc[pkg][12+(3*j):12+3*(j+1)])
  294. quarter_sum[j] += temp
  295. LTS_quarter = []
  296. for j in range(quarter_num):
  297. temp = sum(totalLTS[12+(3*j):12+3*(j+1)])
  298. LTS_quarter.append(temp)
  299. ## Print all LTS
  300. cut = 12*4+1
  301. n = len(quarter_sum)
  302. x = range(quarter_num)
  303. width = 1/2
  304. plt.bar(x, LTS_quarter, width, color='brown', label='regular support', edgecolor='black')
  305. plt.xticks(np.arange(0,n),quartersx, rotation="vertical")
  306. plt.ylabel('Vulnerabilities per quarter of Debian LTS')
  307. plt.xlabel('Quarter')
  308. carlosplt.post_paper_plot(True,True,True)
  309. plt.show()
  310. ## Filter only wheezy:
  311. quarter_sum_regular = [0] * (12*4+1) + quarter_sum[12*4+1:12*4+9] + [0] * 12
  312. quarter_sum_errors = [0] * (12*4 + 9) + quarter_sum[12*4+9:12*4+9+5] + [0] * 7
  313. LTS_quarter = [0] * (15*4+2) + LTS_quarter[15*4+2:-3]
  314. whole_w = quarter_sum_regular[:-12] + quarter_sum_errors[12*4+9:-7] + LTS_quarter[15*4+2:]
  315. #print(quarter_sum_errors)
  316. cut = 12*4+1
  317. n = len(quarter_sum) - cut
  318. x = range(quarter_num-cut-3)
  319. width = 1/2
  320. #print(len(LTS_quarter))
  321. print(len(x))
  322. print(len(quarter_sum_regular[cut:]))
  323. print(len(quarter_sum_errors[cut:]))
  324. bar1 = plt.bar(x, quarter_sum_regular[cut:], width, color='darkblue', label='regular', edgecolor='black')
  325. bar12 = plt.bar(x, quarter_sum_errors[cut:], width, color='darkorange', label='regular*', edgecolor='black')
  326. bar2 = plt.bar(x, LTS_quarter[cut:], width, color='darkred', label ='long-term', edgecolor='black')
  327. plt.legend(handles=[bar1, bar12, bar2])
  328. plt.xticks(np.arange(0,n),quartersx[cut:], rotation="vertical")
  329. plt.ylabel('Vulnerabilities per quarter')
  330. plt.xlabel('Quarter')
  331. carlosplt.post_paper_plot(True,True,True)
  332. ## Linear Regression
  333. print(len(x))
  334. print(len(whole_w[cut:]))
  335. X = sm.add_constant(x)
  336. y = whole_w[cut:]
  337. model = sm.OLS(y,X).fit()
  338. predictions = model.predict(X)
  339. plt.plot(predictions)
  340. plt.show()
  341. print(model.summary())
  342. print(model.summary().as_latex())
  343. def plot_all(src2month, src2sloccount, pkg_with_cvss):
  344. years = 18
  345. # 2001-2000+years
  346. myplotter = Plotter(src2month, src2sloccount, pkg_with_cvss, years)
  347. # consider severity (low, medium, high, undefined)
  348. # Undefined is usual for newly added packages
  349. myplotter.plot_all_severity(True,True,True,True)
  350. myplotter.plot_severity_percentage()