bounties.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. import json
  2. from datetime import datetime
  3. from dateutil import parser
  4. import matplotlib.pyplot as plt
  5. import numpy as np
  6. import paper_plots as carlosplt
  7. from scipy.stats import shapiro
  8. from scipy.stats import ks_2samp
  9. import seaborn as sns
  10. import powerlaw
  11. import statsmodels.api as sm
  12. from statistics import median
  13. def main():
  14. data = dict()
  15. with open("reports.json","r") as fp:
  16. data = json.load(fp)
  17. reports_id = dict()
  18. reports_team = dict()
  19. teams = []
  20. sum_team = dict()
  21. flag = True
  22. for chunk in data:
  23. for page_id in chunk:
  24. for report in chunk[page_id]:
  25. reports_id[report['id']] = report
  26. team = report['team']['handle']
  27. if team in reports_team:
  28. reports_team[team].append(report)
  29. else:
  30. teams.append(team)
  31. reports_team[team] = []
  32. reports_team[team].append(report)
  33. for team in reports_team:
  34. sum_team[team] = len(reports_team[team])
  35. with open("reports_team.json", "w") as fp:
  36. json.dump(reports_team, fp)
  37. with open("sum_team.json", "w") as fp:
  38. json.dump(sum_team, fp)
  39. def plot_bounties(ff):
  40. reports_team = dict()
  41. sum_team = dict()
  42. with open("reports_team.json", "r") as fp:
  43. reports_team = json.load(fp)
  44. with open("sum_team.json", "r") as fp:
  45. sum_team = json.load(fp)
  46. if ff < 2:
  47. ibb_list = ['ibb-php', 'ibb-python', 'ibb-data', 'ibb-flash', 'ibb-nginx', 'ibb-perl', 'internet', 'ibb-openssl', 'ibb-apache']
  48. print('list follows')
  49. for j in ibb_list:
  50. print(reports_team[j])
  51. else:
  52. ibb_list = [team for team in reports_team]
  53. most_team = dict()
  54. sum_bounty_team = dict()
  55. for team in ibb_list:
  56. old = 0.0
  57. old_sum = 0.0
  58. for report in reports_team[team]:
  59. try:
  60. new = float(report['total_awarded_bounty_amount'])
  61. old_sum += new
  62. except KeyError:
  63. print('#'*80)
  64. print(report)
  65. print('Report id ', report['id'], ' - bounty not found')
  66. continue
  67. if new > old:
  68. old = new
  69. most_team[team] = old
  70. sum_bounty_team[team] = old_sum
  71. print(most_team)
  72. print(sum_bounty_team)
  73. month2sum = []
  74. month2money = []
  75. month2bountylist = []
  76. #Years: 2001-2018
  77. for i in range(12*18):
  78. month2sum.append(0)
  79. month2money.append(0.0)
  80. month2bountylist.append([])
  81. for team in ibb_list:
  82. for report in reports_team[team]:
  83. datetime_obj = parser.parse(report['latest_disclosable_activity_at'])
  84. print(str(datetime_obj))
  85. month2sum[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += 1
  86. try:
  87. #if report['severity_rating'] == "high":
  88. if (ff==0 or ff ==2) or (report['severity_rating'] == "high") or (report['severity_rating'] == "critical"):
  89. month2money[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += float(report['total_awarded_bounty_amount'])
  90. month2bountylist[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += [float(report['total_awarded_bounty_amount'])]
  91. except KeyError:
  92. continue
  93. print(month2bountylist)
  94. #plt.plot(month2sum[-12*5:])
  95. #plt.show()
  96. #plt.plot(month2money[-12*5:])
  97. #plt.show()
  98. years = 18
  99. quarter_num = years*4
  100. quarter_sum = []
  101. quarter_av = []
  102. carlosplt.pre_paper_plot()
  103. quarter2bountylist = []
  104. quartersx = []
  105. for i in range(1,years+1):
  106. for j in range(1,5):
  107. if j==1:
  108. quartersx.append('Q' + str(j)+'\''+str(i).zfill(2))
  109. else:
  110. quartersx.append(' ')
  111. for j in range(quarter_num):
  112. temp2 = sum(month2money[3*j:3*(j+1)])
  113. temp3 = [item for sublist in month2bountylist[3*j:3*(j+1)] for item in sublist]
  114. temp1 = len(temp3)
  115. print(temp3)
  116. quarter_sum.append(temp1)
  117. quarter2bountylist.append(temp3)
  118. try:
  119. quarter_av.append(temp2/temp1)
  120. except ZeroDivisionError:
  121. quarter_av.append(0)
  122. n = len(quarter_sum[-4*5:])
  123. x = range(len(quarter_sum[-4*5:]))
  124. width = 1/2
  125. #plt.bar(x[-4*5:], quarter_sum[-4*5:], width, color='brown', label='regular support', edgecolor='black')
  126. #plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical")
  127. #plt.ylabel('Number of rewards')
  128. #plt.xlabel('Quarter')
  129. #carlosplt.post_paper_plot(True,True,True)
  130. #plt.show()
  131. #
  132. #plt.bar(x[-4*5:], quarter_av[-4*5:], width, color='darkblue', label='regular support', edgecolor='black')
  133. #
  134. # plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical")
  135. # plt.ylabel('Average bug price of IBB projects (USD)')
  136. # plt.xlabel('Quarter')
  137. # carlosplt.post_paper_plot(True,True,True)
  138. # plt.show()
  139. #print(quarter2bountylist)
  140. if ff==0:
  141. labeltext = 'IBB-all'
  142. elif ff==1:
  143. labeltext = 'IBB-high'
  144. elif ff==2:
  145. labeltext = 'All-all'
  146. elif ff==3:
  147. labeltext = 'All-high'
  148. ## Shapiro normality test for each quarter
  149. ## Added powerlaw test
  150. reference = []
  151. for i in quarter2bountylist:
  152. reference+=i
  153. print(reference)
  154. for i in quarter2bountylist:
  155. print(i)
  156. data = i
  157. if len(i)>3:
  158. #sns.distplot(i)
  159. #plt.show()
  160. stat, p = shapiro(data)
  161. print('Statistics=%.3f, p=%.3f' % (stat, p))
  162. # interpret
  163. alpha = 0.01
  164. if p > alpha:
  165. print('Sample looks Gaussian (fail to reject H0)')
  166. else:
  167. print('Sample does not look Gaussian (reject H0)')
  168. w,p = ks_2samp(i,reference)
  169. if p > alpha:
  170. print('Samples look similar')
  171. else:
  172. print('Samples do not look similar')
  173. #mydata = i
  174. #results=powerlaw.Fit(mydata, discrete=False, xmax=5000)
  175. #print('alpha = ',results.power_law.alpha)
  176. #print(results.truncated_power_law.alpha)
  177. #print('xmin = ',results.power_law.xmin)
  178. #print('xmax = ',results.power_law.xmax)
  179. #print('sigma = ',results.power_law.sigma)
  180. #print('D = ',results.power_law.D)
  181. #print(results.truncated_power_law.xmin)
  182. #print('xmax = ', results.truncated_power_law.xmax)
  183. #print(results.power_law.discrete)
  184. #print('lognormal mu: ',results.lognormal.mu)
  185. #print('lognormal sigma: ',results.lognormal.sigma)
  186. #fig = results.plot_ccdf(color = 'darkblue', linestyle='-', label='data')
  187. #results.power_law.plot_ccdf(color = 'darkgreen', ax=fig, label='power-law fit')
  188. #results.truncated_power_law.plot_ccdf(color = 'red', ax=fig)
  189. #results.lognormal_positive.plot_ccdf(color = 'yellow', ax=fig)
  190. #results.lognormal.plot_ccdf(color = 'brown', ax=fig)
  191. #results.exponential.plot_ccdf(color = 'orange', ax=fig)
  192. #plt.ylabel('ccdf')
  193. #plt.xlabel('Vulnerabilities')
  194. #fig.legend()
  195. #carlosplt.post_paper_plot(True,True,True)
  196. #plt.show()
  197. #R, p=results.distribution_compare('power_law','exponential')
  198. #print('Exponential: ',R,p)
  199. #R, p=results.distribution_compare('power_law','stretched_exponential')
  200. #print('Stretched exponential: ',R,p)
  201. #R, p=results.distribution_compare('power_law','truncated_power_law')
  202. #print('Power law truncated: ',R,p)
  203. #R, p=results.distribution_compare('power_law','lognormal_positive')
  204. #print('Lognormal positive: ',R,p)
  205. #R, p=results.distribution_compare('power_law','lognormal')
  206. #print('Lognormal: ',R,p)
  207. ## Linear regression of average and median
  208. # Average
  209. xx = []
  210. yy = quarter_av[-4*5:]
  211. y = []
  212. counter=0
  213. for i in yy:
  214. if i!=0:
  215. y.append(i)
  216. xx.append(counter)
  217. counter+=1
  218. X = sm.add_constant(xx)
  219. model = sm.OLS(y,X).fit()
  220. predictions = model.predict(X)
  221. plt.plot(xx,predictions)
  222. print(model.summary())
  223. print(model.summary().as_latex())
  224. xx = []
  225. yy = quarter2bountylist[-4*5:]
  226. y = []
  227. counter=0
  228. for i in yy:
  229. if i!=[]:
  230. y.append(median(i))
  231. xx.append(counter)
  232. counter+=1
  233. X = sm.add_constant(xx)
  234. model = sm.OLS(y,X).fit()
  235. predictions = model.predict(X)
  236. plt.plot(xx,predictions, color='darkred')
  237. print(model.summary())
  238. print(model.summary().as_latex())
  239. ## Create box plot
  240. bp = plt.boxplot((quarter2bountylist[-4*5:]), whis = [5,95], patch_artist=True, positions = x)
  241. plt.setp(bp['boxes'], color='black')
  242. plt.setp(bp['whiskers'], color='darkred')
  243. plt.setp(bp['caps'], color='darkred')
  244. plt.setp(bp['fliers'], markersize = 3.0)
  245. plt.yscale('log')
  246. plt.ylim(top=50000)
  247. plt.ylim(bottom=1)
  248. plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical")
  249. plt.ylabel(labeltext)
  250. plt.xlabel('Quarter')
  251. carlosplt.post_paper_plot(True,True,True)
  252. if __name__ == "__main__":
  253. main()
  254. fig = plt.figure()
  255. carlosplt.pre_paper_plot()
  256. for i in range(4):
  257. ax = fig.add_subplot(2,2,i+1)
  258. plot_bounties(i)
  259. #plot_bounties(1)
  260. plt.show()