plot_functions.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. import paper_plots as carlosplt
  2. import stat_tests as stats
  3. import matplotlib.pyplot as plt
  4. import numpy as np
  5. import pickle
  6. import vendors.debian.DLAmine as dla
  7. import json
  8. import csv
  9. class Plotter:
  10. def __init__(self, src2month, src2sloccount, pkg_with_cvss, years):
  11. self.src2month = src2month
  12. self.src2sloccount = src2sloccount
  13. self.pkg_with_cvss = pkg_with_cvss
  14. self.years = years
  15. self.src2month_temp = dict()
  16. self.src2month_loc = dict()
  17. self.l=self.m=self.h=self.udef=0
  18. ## Plot sum of vulnerabilities. Can filter out by severity using the booleans low, med, high, undefined
  19. def plot_all_severity(self, l, m, h, udef):
  20. self.l = l
  21. self.m = m
  22. self.h = h
  23. self.udef = udef
  24. for i in self.pkg_with_cvss:
  25. self.src2month_temp[i]=[]
  26. for j in range(len(self.src2month[i])):
  27. num_low = self.pkg_with_cvss[i][j][0]
  28. num_med = self.pkg_with_cvss[i][j][1]
  29. num_high = self.pkg_with_cvss[i][j][2]
  30. num_udef = self.pkg_with_cvss[i][j][3]
  31. tempp = 0
  32. if l:
  33. tempp += num_low
  34. if m:
  35. tempp += num_med
  36. if h:
  37. tempp += num_high
  38. if udef:
  39. tempp += num_udef
  40. self.src2month_temp[i].append(tempp)
  41. for i in self.src2month:
  42. self.src2month_loc[i]=self.src2month_temp[i][:] # don't cut data for 2018
  43. self.severityPlotter = Temp_Plotter(self)
  44. self.severityPlotter.plot_total()
  45. # Plot total number per year
  46. self.pkgnumPlotter = NumPackages_Plotter(self.severityPlotter)
  47. # Plot number of affected packages per year
  48. #self.pkgnumPlotter.plot_num_affected()
  49. # Plot average number of vulnerabilities per affected package per year
  50. #self.pkgnumPlotter.plot_average_number()
  51. # Plot regular and LTS for Wheezy
  52. self.wheezy = WheezyPloter(self)
  53. self.wheezy.plot_wheezy_lts()
  54. class Temp_Plotter:
  55. def __init__(self, plotter):
  56. self.src2month = plotter.src2month
  57. self.src2sloccount = plotter.src2sloccount
  58. self.pkg_with_cvss = plotter.pkg_with_cvss
  59. self.years = plotter.years
  60. self.src2month_loc = plotter.src2month_loc
  61. self.src2sum = dict()
  62. self.src2year = dict()
  63. self.src2lastyears = dict()
  64. self.src2dens = dict()
  65. self.src2month_temp = dict()
  66. self.year_sum = []
  67. self.year_num = []
  68. def plot_total(self):
  69. self.year_sum = [0] * self.years
  70. self.year_num = [0] * self.years
  71. for pkg in self.src2month_loc:
  72. for j in range(self.years):
  73. temp = sum(self.src2month_loc[pkg][12*(1+j):12*(2+j)])
  74. if (temp>0):
  75. self.year_num[j] += 1
  76. self.year_sum[j] += temp
  77. ## For last 2 years
  78. total = sum(self.src2month_loc[pkg][:])
  79. last_years = sum(self.src2month_loc[pkg][-24:])
  80. #print(pkg + '; ' + str(last_years))
  81. if (total>1):
  82. self.src2sum[pkg] = total
  83. self.src2lastyears[pkg] = last_years
  84. #calc total
  85. sum_total = 0
  86. one_only=0
  87. one_plus=0
  88. for p in self.src2month:
  89. sum_part = sum(self.src2month_loc[p][:])
  90. sum_total += sum_part
  91. if (sum_part == 1):
  92. one_only += 1
  93. elif (sum_part>1):
  94. one_plus += 1
  95. print('Total = ', sum_total)
  96. print('one_only = ', one_only)
  97. print('one_plus = ', one_plus)
  98. values = sorted(self.src2sum.values(),reverse=True)
  99. #print(values)
  100. keys = list(sorted(self.src2sum, key=self.src2sum.__getitem__, reverse=True))
  101. n = len(self.year_sum)
  102. yearsx = []
  103. for i in range(1,self.years+1):
  104. yearsx.append('\''+str(i).zfill(2))
  105. x = range(self.years)
  106. width = 1/2
  107. plt.bar(x, self.year_sum, width, color='darkblue', edgecolor='black')
  108. #plt.bar(x, average_per_year, width)
  109. plt.xticks(np.arange(0,n),yearsx)
  110. plt.ylabel('Total vulnerabilities')
  111. plt.xlabel('Year')
  112. carlosplt.post_paper_plot(True,True,True)
  113. plt.show()
  114. sum_all = sum(values)
  115. print("Total: ", sum_all)
  116. class NumPackages_Plotter:
  117. def __init__(self, plotter):
  118. self.plotter = plotter
  119. self.yearsx = []
  120. def plot_num_affected(self):
  121. ## Number of affected packages
  122. n = len(self.plotter.year_sum)
  123. for i in range(1,self.plotter.years+1):
  124. self.yearsx.append('\''+str(i).zfill(2))
  125. x = range(self.plotter.years)
  126. width = 1/2
  127. plt.bar(x, self.plotter.year_num, width, color='darkblue', edgecolor='black')
  128. plt.xticks(np.arange(0,n),self.yearsx)
  129. plt.ylabel('Number of affected packages')
  130. plt.xlabel('Year')
  131. carlosplt.post_paper_plot(True,True,True)
  132. plt.show()
  133. def plot_average_number(self):
  134. average_per_year = [0] * self.plotter.years
  135. for j in range(self.plotter.years):
  136. average_per_year[j] = self.plotter.year_sum[j]/float(self.plotter.year_num[j])
  137. x_values = list(range(1,self.plotter.years+1))
  138. slope = np.polyfit(x_values,average_per_year,1)
  139. print('Slope: ' + str(slope))
  140. n = len(self.plotter.year_sum)
  141. x = range(self.plotter.years)
  142. width = 1/2
  143. #plt.bar(x, year_sum, width)
  144. plt.bar(x, average_per_year, width, color='darkblue', edgecolor='black')
  145. plt.xticks(np.arange(0,n),self.yearsx)
  146. plt.ylabel('Average vulnerabilities per package')
  147. plt.xlabel('Year')
  148. carlosplt.post_paper_plot(True,True,True)
  149. plt.show()
  150. class WheezyPloter:
  151. def __init__(self, plotter):
  152. self.plotter = plotter
  153. self.yearsx = []
  154. self.l = plotter.l
  155. self.m = plotter.m
  156. self.h = plotter.h
  157. self.udef = plotter.udef
  158. def plot_wheezy_lts(self):
  159. quarter_num = self.plotter.years*4
  160. # Get LTS and plot
  161. try:
  162. with open("DLA_sum.txt","rb") as fp:
  163. ltslist = pickle.load(fp)
  164. with open("src2month_DLA.txt","rb") as fp:
  165. src2monthDLAs = pickle.load(fp)
  166. with open("DLA_src2month.json","r") as fp:
  167. src2monthDLA = json.load(fp)
  168. with open("DLA_withcvss.json","r") as fp:
  169. self.src2monthDLA_cvss = json.load(fp)
  170. # Fix this so it can compute when required
  171. #dla.permonthDLA(src2monthDLAs)
  172. with open("1000.csv","r") as csvfile:
  173. spamreader = csv.reader(csvfile, delimiter=' ', quotechar='|')
  174. except IOError:
  175. ltslist = dla.getDLAs()
  176. with open("src2month_DLA.txt","rb") as fp:
  177. src2monthDLAs = pickle.load(fp)
  178. dla.permonthDLA(src2monthDLAs)
  179. return self.plot_wheezy_lts()
  180. ## Plot for wheezy
  181. quarter_sum = [0] * quarter_num
  182. DLA_temp=dict()
  183. ## Fix src2monthDLA_cvss
  184. for i in self.src2monthDLA_cvss:
  185. temp_list = []
  186. for j in self.src2monthDLA_cvss[i]:
  187. temp_list += j
  188. self.src2monthDLA_cvss[i] = temp_list
  189. ## Fix ltslist according to severity
  190. for i in self.src2monthDLA_cvss:
  191. DLA_temp[i]=[]
  192. for j in range(len(self.src2monthDLA_cvss[i])):
  193. num_low = self.src2monthDLA_cvss[i][j][0]
  194. num_med = self.src2monthDLA_cvss[i][j][1]
  195. num_high = self.src2monthDLA_cvss[i][j][2]
  196. num_udef = self.src2monthDLA_cvss[i][j][3]
  197. tempp = 0
  198. if self.l:
  199. tempp += num_low
  200. if self.m:
  201. tempp += num_med
  202. if self.h:
  203. tempp += num_high
  204. if self.udef:
  205. tempp += num_udef
  206. DLA_temp[i].append(tempp)
  207. ltslist = []
  208. for m in range((self.plotter.years+1)*12):
  209. s = 0
  210. #print(m)
  211. for i in DLA_temp:
  212. s += DLA_temp[i][m]
  213. ltslist.append(s)
  214. totalLTS = ltslist
  215. plt.bar([i for i in range(len(ltslist))],ltslist)
  216. plt.show()
  217. quartersx = []
  218. for i in range(1,self.plotter.years+1):
  219. for j in range(1,5):
  220. if j==1:
  221. quartersx.append('Q' + str(j)+'\''+str(i).zfill(2))
  222. else:
  223. quartersx.append(' ')
  224. for pkg in self.plotter.src2month_loc:
  225. for j in range(quarter_num):
  226. temp = sum(self.plotter.src2month_loc[pkg][12+(3*j):12+3*(j+1)])
  227. quarter_sum[j] += temp
  228. LTS_quarter = []
  229. for j in range(quarter_num):
  230. temp = sum(totalLTS[12+(3*j):12+3*(j+1)])
  231. LTS_quarter.append(temp)
  232. ## Print all LTS
  233. cut = 12*4+1
  234. n = len(quarter_sum)
  235. x = range(quarter_num)
  236. width = 1/2
  237. plt.bar(x, LTS_quarter, width, color='brown', label='regular support', edgecolor='black')
  238. plt.xticks(np.arange(0,n),quartersx, rotation="vertical")
  239. plt.ylabel('Vulnerabilities per quarter of Debian LTS')
  240. plt.xlabel('Quarter')
  241. carlosplt.post_paper_plot(True,True,True)
  242. plt.show()
  243. ## Filter only wheezy:
  244. quarter_sum_regular = [0] * (12*4+1) + quarter_sum[12*4+1:12*4+9] + [0] * 12
  245. quarter_sum_errors = [0] * (12*4 + 9) + quarter_sum[12*4+9:12*4+9+5] + [0] * 7
  246. LTS_quarter = [0] * (15*4+2) + LTS_quarter[15*4+2:-3]
  247. #print(quarter_sum_errors)
  248. cut = 12*4+1
  249. n = len(quarter_sum) - cut
  250. x = range(quarter_num-cut-3)
  251. width = 1/2
  252. #print(len(LTS_quarter))
  253. print(len(x))
  254. print(len(quarter_sum_regular[cut:]))
  255. print(len(quarter_sum_errors[cut:]))
  256. bar1 = plt.bar(x, quarter_sum_regular[cut:], width, color='darkblue', label='regular', edgecolor='black')
  257. bar12 = plt.bar(x, quarter_sum_errors[cut:], width, color='darkorange', label='regular*', edgecolor='black')
  258. bar2 = plt.bar(x, LTS_quarter[cut:], width, color='darkred', label ='long-term', edgecolor='black')
  259. plt.legend(handles=[bar1, bar12, bar2])
  260. plt.xticks(np.arange(0,n),quartersx[cut:], rotation="vertical")
  261. plt.ylabel('Vulnerabilities per quarter of Debian Wheezy')
  262. plt.xlabel('Quarter')
  263. carlosplt.post_paper_plot(True,True,True)
  264. plt.show()
  265. def plot_all(src2month, src2sloccount, pkg_with_cvss):
  266. years = 18
  267. # 2001-2000+years
  268. myplotter = Plotter(src2month, src2sloccount, pkg_with_cvss, years)
  269. # consider severity (low, medium, high, undefined)
  270. # Undefined is usual for newly added packages
  271. myplotter.plot_all_severity(True,True,True,True)