123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297 |
- import json
- from datetime import datetime
- from dateutil import parser
- import matplotlib.pyplot as plt
- import numpy as np
- import paper_plots as carlosplt
- from scipy.stats import shapiro
- from scipy.stats import ks_2samp
- import seaborn as sns
- import powerlaw
- import statsmodels.api as sm
- from statistics import median
- def main():
- data = dict()
- with open("reports.json","r") as fp:
- data = json.load(fp)
- reports_id = dict()
- reports_team = dict()
- teams = []
- sum_team = dict()
- flag = True
- for chunk in data:
- for page_id in chunk:
- for report in chunk[page_id]:
- reports_id[report['id']] = report
- team = report['team']['handle']
- if team in reports_team:
- reports_team[team].append(report)
- else:
- teams.append(team)
- reports_team[team] = []
- reports_team[team].append(report)
- for team in reports_team:
- sum_team[team] = len(reports_team[team])
- with open("reports_team.json", "w") as fp:
- json.dump(reports_team, fp)
- with open("sum_team.json", "w") as fp:
- json.dump(sum_team, fp)
- def plot_bounties(ff):
- reports_team = dict()
- sum_team = dict()
- with open("reports_team.json", "r") as fp:
- reports_team = json.load(fp)
- with open("sum_team.json", "r") as fp:
- sum_team = json.load(fp)
- if ff < 2:
- ibb_list = ['ibb-php', 'ibb-python', 'ibb-data', 'ibb-flash', 'ibb-nginx', 'ibb-perl', 'internet', 'ibb-openssl', 'ibb-apache']
- print('list follows')
- for j in ibb_list:
- print(reports_team[j])
- else:
- ibb_list = [team for team in reports_team]
-
- most_team = dict()
- sum_bounty_team = dict()
- for team in ibb_list:
- old = 0.0
- old_sum = 0.0
- for report in reports_team[team]:
- try:
- new = float(report['total_awarded_bounty_amount'])
- old_sum += new
- except KeyError:
- print('#'*80)
- print(report)
- print('Report id ', report['id'], ' - bounty not found')
- continue
- if new > old:
- old = new
- most_team[team] = old
- sum_bounty_team[team] = old_sum
- print(most_team)
- print(sum_bounty_team)
- month2sum = []
- month2money = []
- month2bountylist = []
- #Years: 2001-2018
- for i in range(12*18):
- month2sum.append(0)
- month2money.append(0.0)
- month2bountylist.append([])
- for team in ibb_list:
- for report in reports_team[team]:
- datetime_obj = parser.parse(report['latest_disclosable_activity_at'])
- print(str(datetime_obj))
- month2sum[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += 1
- try:
- #if report['severity_rating'] == "high":
- if (ff==0 or ff ==2) or (report['severity_rating'] == "high") or (report['severity_rating'] == "critical"):
- month2money[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += float(report['total_awarded_bounty_amount'])
- month2bountylist[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += [float(report['total_awarded_bounty_amount'])]
- except KeyError:
- continue
- print(month2bountylist)
- #plt.plot(month2sum[-12*5:])
- #plt.show()
-
- #plt.plot(month2money[-12*5:])
- #plt.show()
- years = 18
- quarter_num = years*4
- quarter_sum = []
- quarter_av = []
- carlosplt.pre_paper_plot()
- quarter2bountylist = []
-
-
- quartersx = []
- for i in range(1,years+1):
- for j in range(1,5):
- if j==1:
- quartersx.append('Q' + str(j)+'\''+str(i).zfill(2))
- else:
- quartersx.append(' ')
-
- for j in range(quarter_num):
- temp2 = sum(month2money[3*j:3*(j+1)])
- temp3 = [item for sublist in month2bountylist[3*j:3*(j+1)] for item in sublist]
- temp1 = len(temp3)
- print(temp3)
- quarter_sum.append(temp1)
- quarter2bountylist.append(temp3)
- try:
- quarter_av.append(temp2/temp1)
- except ZeroDivisionError:
- quarter_av.append(0)
- n = len(quarter_sum[-4*5:])
- x = range(len(quarter_sum[-4*5:]))
- width = 1/2
- #plt.bar(x[-4*5:], quarter_sum[-4*5:], width, color='brown', label='regular support', edgecolor='black')
-
- #plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical")
- #plt.ylabel('Number of rewards')
- #plt.xlabel('Quarter')
- #carlosplt.post_paper_plot(True,True,True)
- #plt.show()
- #
- #plt.bar(x[-4*5:], quarter_av[-4*5:], width, color='darkblue', label='regular support', edgecolor='black')
- #
- # plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical")
- # plt.ylabel('Average bug price of IBB projects (USD)')
- # plt.xlabel('Quarter')
- # carlosplt.post_paper_plot(True,True,True)
- # plt.show()
- #print(quarter2bountylist)
- if ff==0:
- labeltext = 'IBB-all'
- elif ff==1:
- labeltext = 'IBB-high'
- elif ff==2:
- labeltext = 'All-all'
- elif ff==3:
- labeltext = 'All-high'
-
- ## Shapiro normality test for each quarter
- ## Added powerlaw test
- reference = []
- for i in quarter2bountylist:
- reference+=i
- print(reference)
- for i in quarter2bountylist:
- print(i)
- data = i
- if len(i)>3:
- #sns.distplot(i)
- #plt.show()
- stat, p = shapiro(data)
- print('Statistics=%.3f, p=%.3f' % (stat, p))
- # interpret
- alpha = 0.01
- if p > alpha:
- print('Sample looks Gaussian (fail to reject H0)')
- else:
- print('Sample does not look Gaussian (reject H0)')
- w,p = ks_2samp(i,reference)
- if p > alpha:
- print('Samples look similar')
- else:
- print('Samples do not look similar')
- #mydata = i
- #results=powerlaw.Fit(mydata, discrete=False, xmax=5000)
- #print('alpha = ',results.power_law.alpha)
- #print(results.truncated_power_law.alpha)
- #print('xmin = ',results.power_law.xmin)
- #print('xmax = ',results.power_law.xmax)
- #print('sigma = ',results.power_law.sigma)
- #print('D = ',results.power_law.D)
- #print(results.truncated_power_law.xmin)
- #print('xmax = ', results.truncated_power_law.xmax)
- #print(results.power_law.discrete)
- #print('lognormal mu: ',results.lognormal.mu)
- #print('lognormal sigma: ',results.lognormal.sigma)
- #fig = results.plot_ccdf(color = 'darkblue', linestyle='-', label='data')
- #results.power_law.plot_ccdf(color = 'darkgreen', ax=fig, label='power-law fit')
- #results.truncated_power_law.plot_ccdf(color = 'red', ax=fig)
- #results.lognormal_positive.plot_ccdf(color = 'yellow', ax=fig)
- #results.lognormal.plot_ccdf(color = 'brown', ax=fig)
- #results.exponential.plot_ccdf(color = 'orange', ax=fig)
- #plt.ylabel('ccdf')
- #plt.xlabel('Vulnerabilities')
- #fig.legend()
- #carlosplt.post_paper_plot(True,True,True)
- #plt.show()
- #R, p=results.distribution_compare('power_law','exponential')
- #print('Exponential: ',R,p)
- #R, p=results.distribution_compare('power_law','stretched_exponential')
- #print('Stretched exponential: ',R,p)
- #R, p=results.distribution_compare('power_law','truncated_power_law')
- #print('Power law truncated: ',R,p)
- #R, p=results.distribution_compare('power_law','lognormal_positive')
- #print('Lognormal positive: ',R,p)
- #R, p=results.distribution_compare('power_law','lognormal')
- #print('Lognormal: ',R,p)
- ## Linear regression of average and median
- # Average
- xx = []
- yy = quarter_av[-4*5:]
- y = []
- counter=0
- for i in yy:
- if i!=0:
- y.append(i)
- xx.append(counter)
- counter+=1
-
- X = sm.add_constant(xx)
- model = sm.OLS(y,X).fit()
- predictions = model.predict(X)
- plt.plot(xx,predictions)
- print(model.summary())
- print(model.summary().as_latex())
-
- xx = []
- yy = quarter2bountylist[-4*5:]
- y = []
- counter=0
- for i in yy:
- if i!=[]:
- y.append(median(i))
- xx.append(counter)
- counter+=1
-
- X = sm.add_constant(xx)
- model = sm.OLS(y,X).fit()
- predictions = model.predict(X)
- plt.plot(xx,predictions, color='darkred')
- print(model.summary())
- print(model.summary().as_latex())
- ## Create box plot
- bp = plt.boxplot((quarter2bountylist[-4*5:]), whis = [5,95], patch_artist=True, positions = x)
- plt.setp(bp['boxes'], color='black')
- plt.setp(bp['whiskers'], color='darkred')
- plt.setp(bp['caps'], color='darkred')
- plt.setp(bp['fliers'], markersize = 3.0)
- plt.yscale('log')
- plt.ylim(top=50000)
- plt.ylim(bottom=1)
- plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical")
- plt.ylabel(labeltext)
- plt.xlabel('Quarter')
- carlosplt.post_paper_plot(True,True,True)
- if __name__ == "__main__":
- main()
- fig = plt.figure()
- carlosplt.pre_paper_plot()
- for i in range(4):
- ax = fig.add_subplot(2,2,i+1)
- plot_bounties(i)
- #plot_bounties(1)
- plt.show()
|