Browse Source

several changes

Nikolaos Alexopoulos 5 years ago
parent
commit
49460fbe17
25 changed files with 1001 additions and 2185 deletions
  1. 0 0
      DLA_src2month.json
  2. BIN
      DLA_sum.txt
  3. 0 0
      DLA_withcvss.json
  4. 0 0
      DLA_withcwe.json
  5. 262 0
      bounties.py
  6. 74 0
      cwecounts.json
  7. 4 1
      laplace.py
  8. 140 17
      laplace_tests.py
  9. 0 2154
      log.txt
  10. 28 0
      more_types.py
  11. 1 0
      openjdk.json
  12. 70 0
      openjdk.tex
  13. 115 0
      php.tex
  14. 1 0
      php5.json
  15. 97 8
      plot_functions.py
  16. 129 2
      plot_types.py
  17. 63 0
      powelaw.py
  18. 0 0
      reports.json
  19. 0 0
      reports_team.json
  20. 0 0
      src2month_DLA.json
  21. BIN
      src2month_DLA.txt
  22. 0 0
      sum_team.json
  23. 5 2
      types.py
  24. 6 0
      vendors/debian/DLAmine.py
  25. 6 1
      vendors/debian/src_name_unifications.txt

File diff suppressed because it is too large
+ 0 - 0
DLA_src2month.json


BIN
DLA_sum.txt


File diff suppressed because it is too large
+ 0 - 0
DLA_withcvss.json


File diff suppressed because it is too large
+ 0 - 0
DLA_withcwe.json


+ 262 - 0
bounties.py

@@ -1,4 +1,15 @@
 import json
+from datetime import datetime
+from dateutil import parser
+import matplotlib.pyplot as plt
+import numpy as np
+import paper_plots as carlosplt
+from scipy.stats import shapiro
+from scipy.stats import ks_2samp
+import seaborn as sns
+import powerlaw
+import statsmodels.api as sm
+from statistics import median
 
 def main():
     data = dict()
@@ -31,5 +42,256 @@ def main():
     with open("sum_team.json", "w") as fp:
         json.dump(sum_team, fp)
 
+def plot_bounties(ff):
+    reports_team = dict()
+    sum_team = dict()
+    with open("reports_team.json", "r") as fp:
+        reports_team = json.load(fp)
+
+    with open("sum_team.json", "r") as fp:
+        sum_team = json.load(fp)
+
+    if ff < 2:
+        ibb_list = ['ibb-php', 'ibb-python', 'ibb-data', 'ibb-flash', 'ibb-nginx', 'ibb-perl', 'internet', 'ibb-openssl', 'ibb-apache']
+        print('list follows')
+        for j in ibb_list:
+            print(reports_team[j])
+    else:
+        ibb_list = [team for team in reports_team]
+    
+    most_team = dict()
+    sum_bounty_team = dict()
+    for team in ibb_list:
+        old = 0.0
+        old_sum = 0.0
+        for report in reports_team[team]:
+            try:
+                new = float(report['total_awarded_bounty_amount'])
+                old_sum += new
+            except KeyError:
+                print('#'*80)
+                print(report)
+                print('Report id ', report['id'], ' - bounty not found')
+                continue
+            if new > old:
+                old = new
+        most_team[team] = old
+        sum_bounty_team[team] = old_sum
+
+    print(most_team)
+    print(sum_bounty_team)
+
+    month2sum = []
+    month2money = []
+    month2bountylist = []
+
+    #Years: 2001-2018
+    for i in range(12*18):
+        month2sum.append(0)
+        month2money.append(0.0)
+        month2bountylist.append([])
+
+    for team in ibb_list:
+        for report in reports_team[team]:
+            datetime_obj = parser.parse(report['latest_disclosable_activity_at'])
+            print(str(datetime_obj))
+            month2sum[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += 1
+            try:
+                #if report['severity_rating'] == "high":
+                if (ff==0 or ff ==2) or (report['severity_rating'] == "high") or (report['severity_rating'] == "critical"):
+                    month2money[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += float(report['total_awarded_bounty_amount'])
+                    month2bountylist[(int(datetime_obj.year)-2001)*12 + datetime_obj.month] += [float(report['total_awarded_bounty_amount'])]
+            except KeyError:
+                continue
+
+    print(month2bountylist)
+
+    #plt.plot(month2sum[-12*5:])
+    #plt.show()
+    
+    #plt.plot(month2money[-12*5:])
+    #plt.show()
+
+    years = 18
+    quarter_num = years*4
+    quarter_sum = []
+    quarter_av = []
+    carlosplt.pre_paper_plot()
+
+    quarter2bountylist = []
+    
+    
+    quartersx = []
+    for i in range(1,years+1):
+        for j in range(1,5):
+            if j==1:
+                quartersx.append('Q' + str(j)+'\''+str(i).zfill(2))
+            else:
+                quartersx.append(' ')
+    
+    for j in range(quarter_num):
+        temp2 = sum(month2money[3*j:3*(j+1)])
+        temp3 = [item for sublist in month2bountylist[3*j:3*(j+1)] for item in sublist]
+        temp1 = len(temp3)
+        print(temp3)
+        quarter_sum.append(temp1)
+        quarter2bountylist.append(temp3)
+        try:
+            quarter_av.append(temp2/temp1)
+        except ZeroDivisionError:
+            quarter_av.append(0)
+
+
+    n = len(quarter_sum[-4*5:])
+    x = range(len(quarter_sum[-4*5:]))
+    width = 1/2
+
+    #plt.bar(x[-4*5:], quarter_sum[-4*5:], width, color='brown', label='regular support', edgecolor='black')
+    
+    #plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical")
+    #plt.ylabel('Number of rewards')
+    #plt.xlabel('Quarter')
+    #carlosplt.post_paper_plot(True,True,True)
+    #plt.show()
+    #
+    #plt.bar(x[-4*5:], quarter_av[-4*5:], width, color='darkblue', label='regular support', edgecolor='black')
+   # 
+   # plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical")
+   # plt.ylabel('Average bug price of IBB projects (USD)')
+   # plt.xlabel('Quarter')
+   # carlosplt.post_paper_plot(True,True,True)
+   # plt.show()
+
+    #print(quarter2bountylist)
+    if ff==0:
+        labeltext = 'IBB-all'
+    elif ff==1:
+        labeltext = 'IBB-high'
+    elif ff==2:
+        labeltext = 'All-all'
+    elif ff==3:
+        labeltext = 'All-high'
+    
+    ## Shapiro normality test for each quarter
+    ## Added powerlaw test
+    reference = []
+    for i in quarter2bountylist:
+        reference+=i
+    print(reference)
+
+    for i in quarter2bountylist:
+        print(i)
+        data = i
+        if len(i)>3:
+            #sns.distplot(i)
+            #plt.show()
+            stat, p = shapiro(data)
+            print('Statistics=%.3f, p=%.3f' % (stat, p))
+            # interpret
+            alpha = 0.01
+            if p > alpha:
+                print('Sample looks Gaussian (fail to reject H0)')
+            else:
+                print('Sample does not look Gaussian (reject H0)')
+
+            w,p = ks_2samp(i,reference)
+            if p > alpha:
+                print('Samples look similar')
+            else:
+                print('Samples do not look similar')
+            #mydata = i
+            #results=powerlaw.Fit(mydata, discrete=False, xmax=5000)
+            #print('alpha = ',results.power_law.alpha)
+            #print(results.truncated_power_law.alpha)
+            #print('xmin = ',results.power_law.xmin)
+            #print('xmax = ',results.power_law.xmax)
+            #print('sigma = ',results.power_law.sigma)
+            #print('D = ',results.power_law.D)
+            #print(results.truncated_power_law.xmin)
+            #print('xmax = ', results.truncated_power_law.xmax)
+            #print(results.power_law.discrete)
+            #print('lognormal mu: ',results.lognormal.mu)
+            #print('lognormal sigma: ',results.lognormal.sigma)
+
+            #fig = results.plot_ccdf(color = 'darkblue', linestyle='-', label='data')
+            #results.power_law.plot_ccdf(color = 'darkgreen', ax=fig, label='power-law fit')
+            #results.truncated_power_law.plot_ccdf(color = 'red', ax=fig)
+            #results.lognormal_positive.plot_ccdf(color = 'yellow', ax=fig)
+            #results.lognormal.plot_ccdf(color = 'brown', ax=fig)
+            #results.exponential.plot_ccdf(color = 'orange', ax=fig)
+            #plt.ylabel('ccdf')
+            #plt.xlabel('Vulnerabilities')
+            #fig.legend()
+            #carlosplt.post_paper_plot(True,True,True)
+            #plt.show()
+            #R, p=results.distribution_compare('power_law','exponential')
+            #print('Exponential: ',R,p)
+            #R, p=results.distribution_compare('power_law','stretched_exponential')
+            #print('Stretched exponential: ',R,p)
+            #R, p=results.distribution_compare('power_law','truncated_power_law')
+            #print('Power law truncated: ',R,p)
+            #R, p=results.distribution_compare('power_law','lognormal_positive')
+            #print('Lognormal positive: ',R,p)
+            #R, p=results.distribution_compare('power_law','lognormal')
+            #print('Lognormal: ',R,p)
+
+    ## Linear regression of average and median
+    # Average
+    xx = []
+    yy = quarter_av[-4*5:]
+    y = []
+    counter=0
+    for i in yy:
+        if i!=0:
+            y.append(i)
+            xx.append(counter)
+        counter+=1
+    
+    X = sm.add_constant(xx)
+    model = sm.OLS(y,X).fit()
+    predictions = model.predict(X)
+    plt.plot(xx,predictions)
+    print(model.summary())
+    print(model.summary().as_latex())
+    
+    xx = []
+    yy = quarter2bountylist[-4*5:]
+    y = []
+    counter=0
+    for i in yy:
+        if i!=[]:
+            y.append(median(i))
+            xx.append(counter)
+        counter+=1
+    
+    X = sm.add_constant(xx)
+
+    model = sm.OLS(y,X).fit()
+    predictions = model.predict(X)
+    plt.plot(xx,predictions, color='darkred')
+    print(model.summary())
+    print(model.summary().as_latex())
+
+    ## Create box plot
+    bp = plt.boxplot((quarter2bountylist[-4*5:]), whis = [5,95], patch_artist=True, positions = x)
+    plt.setp(bp['boxes'], color='black')
+    plt.setp(bp['whiskers'], color='darkred')
+    plt.setp(bp['caps'], color='darkred')
+    plt.setp(bp['fliers'], markersize = 3.0)
+    plt.yscale('log')
+    plt.ylim(top=50000)
+    plt.ylim(bottom=1)
+    plt.xticks(np.arange(0,n),quartersx[-4*5:], rotation="vertical")
+    plt.ylabel(labeltext)
+    plt.xlabel('Quarter')
+    carlosplt.post_paper_plot(True,True,True)
+
 if __name__ == "__main__":
     main()
+    fig = plt.figure()
+    carlosplt.pre_paper_plot()
+    for i in range(4):
+        ax = fig.add_subplot(2,2,i+1)
+        plot_bounties(i)
+    #plot_bounties(1)
+    plt.show()

+ 74 - 0
cwecounts.json

@@ -0,0 +1,74 @@
+"0": 2954
+"CWE-310": 125
+"CWE-399": 659
+"CWE-119": 1651
+"Unknown": 423
+"CWE-264": 556
+"CWE-189": 493
+"CWE-20": 785
+"CWE-415": 24
+"CWE-200": 428
+"CWE-94": 119
+"CWE-89": 83
+"CWE-22": 120
+"CWE-352": 65
+"CWE-59": 71
+"CWE-79": 395
+"CWE-476": 104
+"CWE-134": 42
+"CWE-287": 88
+"CWE-362": 104
+"CWE-254": 89
+"CWE-19": 58
+"CWE-16": 39
+"CWE-78": 15
+"CWE-255": 20
+"CWE-190": 118
+"CWE-91": 1
+"CWE-295": 10
+"CWE-77": 28
+"CWE-284": 202
+"CWE-416": 164
+"CWE-17": 57
+"CWE-369": 19
+"CWE-125": 160
+"CWE-74": 19
+"CWE-601": 11
+"CWE-331": 1
+"CWE-345": 5
+"CWE-787": 82
+"CWE-704": 12
+"CWE-361": 3
+"CWE-18": 2
+"CWE-199": 1
+"CWE-400": 21
+"CWE-285": 11
+"CWE-502": 9
+"CWE-191": 9
+"CWE-417": 2
+"CWE-346": 1
+"CWE-93": 4
+"CWE-320": 2
+"CWE-388": 11
+"CWE-611": 8
+"CWE-485": 1
+"CWE-918": 2
+"CWE-682": 3
+"CWE-532": 3
+"CWE-129": 1
+"CWE-798": 1
+"CWE-123": 1
+"CWE-640": 1
+"CWE-444": 1
+"CWE-943": 1
+"CWE-754": 6
+"CWE-347": 9
+"CWE-338": 1
+"CWE-184": 2
+"CWE-275": 2
+"CWE-185": 2
+"CWE-326": 2
+"CWE-665": 1
+"CWE-113": 1
+"CWE-384": 1
+"CWE-613": 1

+ 4 - 1
laplace.py

@@ -6,7 +6,10 @@ def laplace_test(ttr, tend):
     s = 0
     for ti in ttr:
         s += ti - (tend/2)
-    z = (math.sqrt(12*r)*s)/(r*tend)
+    try:
+        z = (math.sqrt(12*r)*s)/(r*tend)
+    except ZeroDivisionError:
+        return(0)
     return(z)
 
 def main():

+ 140 - 17
laplace_tests.py

@@ -1,4 +1,13 @@
 from load_data import load_DBs
+from pymongo import MongoClient
+import datetime
+import random
+import laplace as lp
+import matplotlib.pyplot as plt
+import paper_plots as carlosplt
+import numpy as np
+import json
+
 class Mydata:
     def __init__(self, load):
         if load:
@@ -6,30 +15,144 @@ class Mydata:
         else:
             print('no load command given')
 
+
 def main():
-    data = Mydata(True)
-    print('Data loading done')
-    #
-    years = 18
-    # 2001-2018
-    
-    i = calc_laplace(data, years)
-    i.calculate()
+    vlist = []
+    carlosplt.pre_paper_plot() 
+    fig = plt.figure()    
+    tester = calc_laplace()
+    ax = fig.add_subplot(2,2,1)    
+    tester.laplace_php([])
+    ax = fig.add_subplot(2,2,2)    
+    tester.laplace_openjdk([])
+    ax = fig.add_subplot(2,2,3)    
+    tester.laplace_wheezy([], False)
+    ax = fig.add_subplot(2,2,4)    
+    tester.laplace_wheezy([], True)
+    carlosplt.post_paper_plot(True,True,True)
+    plt.show()
 
 class calc_laplace:
-    def __init__(self,data,years):
-        self. src2dsa = data.src2dsa
-        self.dsa2cve = data.dsa2cve
-        self.cvetable = data.cvetable
-        self.pkg_with_cvss = data.pkg_with_cvss
-        self.95percent = 1.96
-        self.90percent = 1.64
-
-    def laplace_wheezy(self):
+    def __init__(self):
+        i = 0
+
+    def getDLAdates(self):
         pass
 
+    def calculate(self):
+        pass
+
+    def laplace_php(self, vlist):
+        try:
+            with open("php5.json", "r") as fp:
+                new_vlist = json.load(fp)
+        except:
+            new_vlist = vlist
+            with open("php5.json", "w") as fp:
+                json.dump(new_vlist,fp)
+
+        print(len(new_vlist))
+        year_start = 7
+        self.laplace_process_list(new_vlist[12*year_start:-6], 'php5', year_start)
+
+    def laplace_openjdk(self, vlist):
+        try:
+            with open("openjdk.json", "r") as fp:
+                new_vlist = json.load(fp)
+        except:
+            new_vlist = vlist
+            with open("openjdk.json", "w") as fp:
+                json.dump(new_vlist,fp)
+        print(len(new_vlist))
+        year_start = 13
+        self.laplace_process_list(new_vlist[12*year_start+6:], 'openjdk-7', year_start)
+
+    def laplace_wheezy(self, vlist, high):
+        (dsatable, src2dsa, dsa2cve, cvetable, src2month, src2sloccount, src2pop, src2deps, pkg_with_cvss, src2cwe) = load_DBs()
+        print(len(src2month['linux']))
+        if not high:
+            with open("DLA_src2month.json", "r") as fp:
+                dlas = json.load(fp)
+        else:
+            src2month = pkg_with_cvss
+            with open("DLA_withcvss.json", "r") as fp:
+                dlas = json.load(fp)
+
+        total = [0]*len(src2month['linux'])
+        for pkg in src2month:
+            for month in range(len(src2month[pkg])):
+                if high:
+                    total[month] += src2month[pkg][month][2]
+                else:
+                    total[month] += src2month[pkg][month]
+
+        total_dla = [0]*(len(dlas['linux'])*12)
+        print(total_dla)
+        for pkg in dlas:
+            for year in range(len(dlas[pkg])):
+                for month in range(len(dlas[pkg][year])):
+                    if high:
+                        total_dla[year*12+month] += dlas[pkg][year][month][2]
+                    else:
+                        total_dla[year*12+month] += dlas[pkg][year][month]
+        print(total)
+        print(total_dla)
+        dsa_wheezy = total[12*13 + 4: 12*16 + 3]
+        dla_wheezy = total_dla[-12*3 + 4:-7]
+        print(len(dsa_wheezy))
+        print(len(dla_wheezy))
+
+        wheezy = dsa_wheezy + dla_wheezy
+        print(len(wheezy))
+
+        if not high:
+            self.laplace_process_list(wheezy, 'wheezy', 13)
+        else:
+            self.laplace_process_list(wheezy, 'wheezy-high', 13)
+
     def laplace_package(self, pkg):
         pass
 
+    def laplace_process_list(self,vlist, pkg, year):
+        months = len(vlist)
+        print(pkg)
+        perhour = 24*30*months
+        instances = []
+        laplace_values = []
+        i = 0
+        print(pkg)
+        print(vlist)
+        for month in vlist:
+            i += 1
+            temp = random.sample(range(24*30*(i-1),24*30*i),month)
+            instances += temp
+            laplace_values.append(lp.laplace_test(instances, 24*30*i))
+
+        final_laplace = lp.laplace_test(instances, 24*30*i+1)
+        print(final_laplace)
+
+        n = len(vlist)
+        if pkg == 'wheezy':
+            x = range(n+12)
+        else:
+            x = range(n)
+        print(n)
+        yearsx = ['\''+str(year+2000+i)[-2:] for i in range(len(vlist)//12+1)]
+        carlosplt.post_paper_plot(True,True,True)
+
+        #print(x)
+        if pkg == 'wheezy':
+            plt.plot(x,[None]*4+laplace_values+[None]*8)#
+        else:
+            plt.plot(x,laplace_values)
+        plt.axhline(y=1.96, linestyle = ':', color = 'orange')
+        plt.axhline(y=2.33, linestyle = ':', color = 'red')
+        plt.axhline(y=-1.96, linestyle = ':', color = 'orange')
+        plt.axhline(y=-2.33, linestyle = ':', color = 'red')
+        plt.xticks(np.arange(0, n, step=12), yearsx)
+        plt.ylabel(pkg)        
+
+
+
 if __name__ == "__main__":
     main()

File diff suppressed because it is too large
+ 0 - 2154
log.txt


+ 28 - 0
more_types.py

@@ -0,0 +1,28 @@
+from load_data import load_DBs
+from plot_functions import plot_all
+from plot_types import TypePlotter
+import json
+
+class Mydata:
+    def __init__(self, load):
+        if load:
+            (self.dsatable, self.src2dsa, self.dsa2cve, self.cvetable, self.src2month, self.src2sloccount, self.src2pop, self.src2deps, self.pkg_with_cvss, self.src2cwe) = load_DBs()
+        else:
+            print('no load command given')
+
+def main():
+    data = Mydata(True)
+    cwe_counts = dict()
+    for cvenum in data.cvetable:
+        cwe = data.cvetable[cvenum][5]
+        if cwe not in cwe_counts:
+            cwe_counts[cwe] = 1
+        else:
+            cwe_counts[cwe] += 1
+    with open('cwecounts.json', 'w') as outfile:
+        json.dump(cwe_counts, outfile)
+
+
+
+if __name__ == "__main__":
+    main()

+ 1 - 0
openjdk.json

@@ -0,0 +1 @@
+[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 0, 0, 0, 0, 1, 0, 1, 1, 0, 22, 0, 0, 15, 0, 0, 9, 0, 0, 9, 0, 1, 7, 1, 0, 13, 0, 0, 18, 0, 0, 6, 0, 1, 5, 0, 0, 5, 0, 0, 5, 0, 0, 11, 0, 0, 4, 0, 0, 0, 21, 0, 12, 0, 0]

+ 70 - 0
openjdk.tex

@@ -0,0 +1,70 @@
+% This file was created by matplotlib2tikz v0.6.14.
+\begin{tikzpicture}
+
+\begin{axis}[
+xlabel={Quarter},
+ylabel={Vulnerabilities per quarter of package openjdk-7},
+xmin=-1.225, xmax=20.225,
+ymin=0, ymax=31.5,
+width=\figurewidth,
+height=\figureheight,
+xtick={0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19},
+xticklabels={1/'13,2/'13,3/'13,4/'13,1/'14,2/'14,3/'14,4/'14,1/'15,2/'15,3/'15,4/'15,1/'16,2/'16,3/'16,4/'16,1/'17,2/'17,3/'17,4/'17},
+xticklabel style = {rotate=90},
+tick pos=left,
+x grid style={white!69.019607843137251!black},
+y grid style={white!69.019607843137251!black},
+legend cell align={left},
+legend entries={{before openjdk-8},{after openjdk-8}}
+]
+\addlegendimage{ybar,ybar legend,draw=black,fill=blue!54.509803921568626!black};
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:-0.25,0) rectangle (axis cs:0.25,0);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:0.75,0) rectangle (axis cs:1.25,30);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:1.75,0) rectangle (axis cs:2.25,0);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:2.75,0) rectangle (axis cs:3.25,1);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:3.75,0) rectangle (axis cs:4.25,2);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:4.75,0) rectangle (axis cs:5.25,23);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:5.75,0) rectangle (axis cs:6.25,15);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:6.75,0) rectangle (axis cs:7.25,11);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:7.75,0) rectangle (axis cs:8.25,12);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:8.75,0) rectangle (axis cs:9.25,8);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:9.75,0) rectangle (axis cs:10.25,0);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:10.75,0) rectangle (axis cs:11.25,0);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:11.75,0) rectangle (axis cs:12.25,0);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:12.75,0) rectangle (axis cs:13.25,0);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:13.75,0) rectangle (axis cs:14.25,0);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:14.75,0) rectangle (axis cs:15.25,0);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:15.75,0) rectangle (axis cs:16.25,0);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:16.75,0) rectangle (axis cs:17.25,0);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:17.75,0) rectangle (axis cs:18.25,0);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:18.75,0) rectangle (axis cs:19.25,0);
+\addlegendimage{ybar,ybar legend,draw=black,fill=red!54.509803921568626!black};
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:-0.25,0) rectangle (axis cs:0.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:0.75,0) rectangle (axis cs:1.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:1.75,0) rectangle (axis cs:2.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:2.75,0) rectangle (axis cs:3.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:3.75,0) rectangle (axis cs:4.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:4.75,0) rectangle (axis cs:5.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:5.75,0) rectangle (axis cs:6.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:6.75,0) rectangle (axis cs:7.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:7.75,0) rectangle (axis cs:8.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:8.75,0) rectangle (axis cs:9.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:9.75,0) rectangle (axis cs:10.25,14);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:10.75,0) rectangle (axis cs:11.25,18);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:11.75,0) rectangle (axis cs:12.25,7);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:12.75,0) rectangle (axis cs:13.25,6);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:13.75,0) rectangle (axis cs:14.25,5);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:14.75,0) rectangle (axis cs:15.25,5);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:15.75,0) rectangle (axis cs:16.25,12);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:16.75,0) rectangle (axis cs:17.25,6);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:17.75,0) rectangle (axis cs:18.25,22);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:18.75,0) rectangle (axis cs:19.25,14);
+\path [opacity=0] (axis cs:1,13)
+--(axis cs:1,13);
+
+\path [opacity=0] (axis cs:13,1)
+--(axis cs:13,1);
+
+\end{axis}
+
+\end{tikzpicture}

+ 115 - 0
php.tex

@@ -0,0 +1,115 @@
+% This file was created by matplotlib2tikz v0.6.14.
+\begin{tikzpicture}
+
+\begin{axis}[
+xlabel={Quarter},
+ylabel={Vulnerabilities per quarter of package php5},
+xmin=-2.325, xmax=43.325,
+ymin=0, ymax=33.6,
+width=\figurewidth,
+height=\figureheight,
+xtick={0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41},
+xticklabels={1/'07,2/'07,3/'07,4/'07,1/'08,2/'08,3/'08,4/'08,1/'09,2/'09,3/'09,4/'09,1/'10,2/'10,3/'10,4/'10,1/'11,2/'11,3/'11,4/'11,1/'12,2/'12,3/'12,4/'12,1/'13,2/'13,3/'13,4/'13,1/'14,2/'14,3/'14,4/'14,1/'15,2/'15,3/'15,4/'15,1/'16,2/'16,3/'16,4/'16,1/'17,2/'17},
+xticklabel style = {rotate=90},
+tick pos=left,
+x grid style={white!69.019607843137251!black},
+y grid style={white!69.019607843137251!black},
+legend cell align={left},
+legend entries={{before php7},{after php7}},
+legend style={at={(0.03,0.97)}, anchor=north west}
+]
+\addlegendimage{ybar,ybar legend,draw=black,fill=blue!54.509803921568626!black};
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:-0.25,0) rectangle (axis cs:0.25,13);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:0.75,0) rectangle (axis cs:1.25,7);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:1.75,0) rectangle (axis cs:2.25,7);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:2.75,0) rectangle (axis cs:3.25,2);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:3.75,0) rectangle (axis cs:4.25,1);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:4.75,0) rectangle (axis cs:5.25,4);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:5.75,0) rectangle (axis cs:6.25,3);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:6.75,0) rectangle (axis cs:7.25,3);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:7.75,0) rectangle (axis cs:8.25,2);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:8.75,0) rectangle (axis cs:9.25,1);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:9.75,0) rectangle (axis cs:10.25,3);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:10.75,0) rectangle (axis cs:11.25,3);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:11.75,0) rectangle (axis cs:12.25,1);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:12.75,0) rectangle (axis cs:13.25,2);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:13.75,0) rectangle (axis cs:14.25,2);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:14.75,0) rectangle (axis cs:15.25,4);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:15.75,0) rectangle (axis cs:16.25,16);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:16.75,0) rectangle (axis cs:17.25,2);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:17.75,0) rectangle (axis cs:18.25,4);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:18.75,0) rectangle (axis cs:19.25,2);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:19.75,0) rectangle (axis cs:20.25,6);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:20.75,0) rectangle (axis cs:21.25,4);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:21.75,0) rectangle (axis cs:22.25,2);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:22.75,0) rectangle (axis cs:23.25,0);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:23.75,0) rectangle (axis cs:24.25,2);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:24.75,0) rectangle (axis cs:25.25,0);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:25.75,0) rectangle (axis cs:26.25,2);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:26.75,0) rectangle (axis cs:27.25,2);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:27.75,0) rectangle (axis cs:28.25,3);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:28.75,0) rectangle (axis cs:29.25,4);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:29.75,0) rectangle (axis cs:30.25,11);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:30.75,0) rectangle (axis cs:31.25,5);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:31.75,0) rectangle (axis cs:32.25,9);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:32.75,0) rectangle (axis cs:33.25,7);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:33.75,0) rectangle (axis cs:34.25,13);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:34.75,0) rectangle (axis cs:35.25,2);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:35.75,0) rectangle (axis cs:36.25,0);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:36.75,0) rectangle (axis cs:37.25,0);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:37.75,0) rectangle (axis cs:38.25,0);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:38.75,0) rectangle (axis cs:39.25,0);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:39.75,0) rectangle (axis cs:40.25,0);
+\draw[draw=black,fill=blue!54.509803921568626!black] (axis cs:40.75,0) rectangle (axis cs:41.25,0);
+\addlegendimage{ybar,ybar legend,draw=black,fill=red!54.509803921568626!black};
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:-0.25,0) rectangle (axis cs:0.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:0.75,0) rectangle (axis cs:1.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:1.75,0) rectangle (axis cs:2.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:2.75,0) rectangle (axis cs:3.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:3.75,0) rectangle (axis cs:4.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:4.75,0) rectangle (axis cs:5.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:5.75,0) rectangle (axis cs:6.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:6.75,0) rectangle (axis cs:7.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:7.75,0) rectangle (axis cs:8.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:8.75,0) rectangle (axis cs:9.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:9.75,0) rectangle (axis cs:10.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:10.75,0) rectangle (axis cs:11.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:11.75,0) rectangle (axis cs:12.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:12.75,0) rectangle (axis cs:13.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:13.75,0) rectangle (axis cs:14.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:14.75,0) rectangle (axis cs:15.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:15.75,0) rectangle (axis cs:16.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:16.75,0) rectangle (axis cs:17.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:17.75,0) rectangle (axis cs:18.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:18.75,0) rectangle (axis cs:19.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:19.75,0) rectangle (axis cs:20.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:20.75,0) rectangle (axis cs:21.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:21.75,0) rectangle (axis cs:22.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:22.75,0) rectangle (axis cs:23.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:23.75,0) rectangle (axis cs:24.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:24.75,0) rectangle (axis cs:25.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:25.75,0) rectangle (axis cs:26.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:26.75,0) rectangle (axis cs:27.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:27.75,0) rectangle (axis cs:28.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:28.75,0) rectangle (axis cs:29.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:29.75,0) rectangle (axis cs:30.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:30.75,0) rectangle (axis cs:31.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:31.75,0) rectangle (axis cs:32.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:32.75,0) rectangle (axis cs:33.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:33.75,0) rectangle (axis cs:34.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:34.75,0) rectangle (axis cs:35.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:35.75,0) rectangle (axis cs:36.25,0);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:36.75,0) rectangle (axis cs:37.25,19);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:37.75,0) rectangle (axis cs:38.25,32);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:38.75,0) rectangle (axis cs:39.25,4);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:39.75,0) rectangle (axis cs:40.25,5);
+\draw[draw=black,fill=red!54.509803921568626!black] (axis cs:40.75,0) rectangle (axis cs:41.25,0);
+\path [opacity=0] (axis cs:1,13)
+--(axis cs:1,13);
+
+\path [opacity=0] (axis cs:13,1)
+--(axis cs:13,1);
+
+\end{axis}
+
+\end{tikzpicture}

+ 1 - 0
php5.json

@@ -0,0 +1 @@
+[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 4, 2, 0, 2, 0, 5, 0, 2, 0, 0, 0, 1, 0, 4, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 1, 2, 0, 1, 2, 0, 0, 1, 0, 1, 1, 0, 2, 0, 1, 2, 1, 1, 1, 13, 0, 1, 1, 0, 4, 0, 0, 1, 1, 3, 3, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 2, 0, 1, 3, 8, 2, 0, 3, 1, 1, 2, 0, 7, 0, 0, 7, 0, 8, 5, 2, 0, 0, 0, 0, 0, 6, 8, 5, 16, 0, 16, 1, 0, 3, 5, 0, 0, 0, 0, 0, 5, 1, 0, 0, 1, 0]

+ 97 - 8
plot_functions.py

@@ -6,6 +6,11 @@ import pickle
 import vendors.debian.DLAmine as dla
 import json
 import csv
+from matplotlib.font_manager import FontProperties
+import seaborn as sns
+import laplace_tests as lptests
+import statsmodels.api as sm
+
 
 class Plotter:
     def __init__(self, src2month, src2sloccount, pkg_with_cvss, years):
@@ -16,9 +21,58 @@ class Plotter:
         self.src2month_temp = dict()
         self.src2month_loc = dict()
         self.l=self.m=self.h=self.udef=0
+
+    def plot_severity_percentage(self):
+        num_low = [0] * (self.years+1)
+        num_med = [0] * (self.years+1)
+        num_high = [0] * (self.years+1)
+        num_udef = [0] * (self.years+1)
+        total = [0] * (self.years+1)
+        for i in self.pkg_with_cvss:
+            for j in range(len(self.src2month[i])):
+                try:
+                    num_low[j//12] += self.pkg_with_cvss[i][j][0]
+                    num_med[j//12] += self.pkg_with_cvss[i][j][1]
+                    num_high[j//12] += self.pkg_with_cvss[i][j][2]
+                    num_udef[j//12] += self.pkg_with_cvss[i][j][3]
+                    total[j//12] += self.pkg_with_cvss[i][j][3] + self.pkg_with_cvss[i][j][2] + self.pkg_with_cvss[i][j][1] + self.pkg_with_cvss[i][j][0]
+                except IndexError:
+                    print(j//12)
+                    raise IndexError('List index out of bounds')
+        ## Generate percentage
+        for i in range(self.years + 1):
+            try:
+                num_low[i] = num_low[i]/total[i]
+                num_med[i] = num_med[i]/total[i]
+                num_high[i] = num_high[i]/total[i]
+                num_udef[i] = num_udef[i]/total[i]
+            except ZeroDivisionError:
+                num_low[i] = 0
+                num_med[i] = 0
+                num_high[i] = 0
+                num_udef[i] = 0
+
+        print(num_low)
+        print(num_high)
+
+
+        carlosplt.pre_paper_plot()        
+
+        pal = ['#fee8c8', '#fdbb84', '#e34a33', 'grey']
+        x = range(2001, 2001 + self.years)
+
+        labels_cvss = ['low', 'medium', 'high', 'N/A']
+
+        h = plt.stackplot(x, [num_low[1:], num_med[1:], num_high[1:], num_udef[1:]], colors = pal, alpha=0.9, labels = labels_cvss)
+        plt.xticks(x)
+        plt.legend(loc='upper left', handles = h[::-1])
+        carlosplt.post_paper_plot(True,True,True)
+        plt.show()
+
     
     ## Plot sum of vulnerabilities. Can filter out by severity using the booleans low, med, high, undefined
     def plot_all_severity(self, l, m, h, udef):
+        carlosplt.pre_paper_plot()
         self.l = l
         self.m = m
         self.h = h
@@ -51,10 +105,10 @@ class Plotter:
         self.pkgnumPlotter = NumPackages_Plotter(self.severityPlotter)
         
         # Plot number of affected packages per year
-        #self.pkgnumPlotter.plot_num_affected()
+        self.pkgnumPlotter.plot_num_affected()
         
         # Plot average number of vulnerabilities per affected package per year
-        #self.pkgnumPlotter.plot_average_number()
+        self.pkgnumPlotter.plot_average_number()
 
         # Plot regular and LTS for Wheezy
         self.wheezy = WheezyPloter(self)
@@ -117,7 +171,10 @@ class Temp_Plotter:
         n = len(self.year_sum)
         yearsx = []
         for i in range(1,self.years+1):
-            yearsx.append('\''+str(i).zfill(2))
+            if i%2==1:
+                yearsx.append('\''+str(i).zfill(2))
+            else:
+                yearsx.append('')
         x = range(self.years)
 
         width = 1/2
@@ -127,11 +184,20 @@ class Temp_Plotter:
         plt.ylabel('Total vulnerabilities')
         plt.xlabel('Year')
         carlosplt.post_paper_plot(True,True,True)
-        plt.show()
         
         sum_all = sum(values)
         print("Total: ", sum_all)
 
+        ## Linear regression model
+        X = sm.add_constant(x)
+        y = self.year_sum
+        model = sm.OLS(y,X).fit()
+        predictions = model.predict(X)
+        plt.plot(predictions)
+        plt.show()
+        print(model.summary())
+        print(model.summary().as_latex())
+
 class NumPackages_Plotter:
     def __init__(self, plotter):
         self.plotter = plotter
@@ -141,7 +207,10 @@ class NumPackages_Plotter:
         ## Number of affected packages
         n = len(self.plotter.year_sum)
         for i in range(1,self.plotter.years+1):
-            self.yearsx.append('\''+str(i).zfill(2))
+            if i%2==1:
+                self.yearsx.append('\''+str(i).zfill(2))
+            else:
+                self.yearsx.append('')
         x = range(self.plotter.years)
         width = 1/2
         plt.bar(x, self.plotter.year_num, width, color='darkblue', edgecolor='black')
@@ -170,7 +239,15 @@ class NumPackages_Plotter:
         plt.ylabel('Average vulnerabilities per package')
         plt.xlabel('Year')
         carlosplt.post_paper_plot(True,True,True)
+        ## Linear regression
+        X = sm.add_constant(x)
+        y = average_per_year
+        model = sm.OLS(y,X).fit()
+        predictions = model.predict(X)
+        plt.plot(predictions)
         plt.show()
+        print(model.summary())
+        print(model.summary().as_latex())
 
 class WheezyPloter:
     def __init__(self, plotter):
@@ -271,7 +348,6 @@ class WheezyPloter:
             temp = sum(totalLTS[12+(3*j):12+3*(j+1)])
             LTS_quarter.append(temp)
 
-
         ## Print all LTS
         cut = 12*4+1
         n = len(quarter_sum)
@@ -290,6 +366,8 @@ class WheezyPloter:
         quarter_sum_regular = [0] * (12*4+1) + quarter_sum[12*4+1:12*4+9] + [0] * 12
         quarter_sum_errors = [0] * (12*4 + 9) + quarter_sum[12*4+9:12*4+9+5] + [0] * 7
         LTS_quarter = [0] * (15*4+2) + LTS_quarter[15*4+2:-3]
+        
+        whole_w = quarter_sum_regular[:-12] + quarter_sum_errors[12*4+9:-7] + LTS_quarter[15*4+2:]
     
         #print(quarter_sum_errors)
         cut = 12*4+1
@@ -307,10 +385,20 @@ class WheezyPloter:
         plt.legend(handles=[bar1, bar12, bar2])
     
         plt.xticks(np.arange(0,n),quartersx[cut:], rotation="vertical")
-        plt.ylabel('Vulnerabilities per quarter of Debian Wheezy')
+        plt.ylabel('Vulnerabilities per quarter')
         plt.xlabel('Quarter')
         carlosplt.post_paper_plot(True,True,True)
+        ## Linear Regression
+        print(len(x))
+        print(len(whole_w[cut:]))
+        X = sm.add_constant(x)
+        y = whole_w[cut:]
+        model = sm.OLS(y,X).fit()
+        predictions = model.predict(X)
+        plt.plot(predictions)
         plt.show()
+        print(model.summary())
+        print(model.summary().as_latex())
 
 
 def plot_all(src2month, src2sloccount, pkg_with_cvss):
@@ -320,4 +408,5 @@ def plot_all(src2month, src2sloccount, pkg_with_cvss):
     
     # consider severity (low, medium, high, undefined)
     # Undefined is usual for newly added packages
-    myplotter.plot_all_severity(True,True,True,True)
+    myplotter.plot_all_severity(False,False,True,False)
+    myplotter.plot_severity_percentage()

+ 129 - 2
plot_types.py

@@ -1,4 +1,9 @@
 import matplotlib.pyplot as plt
+import json
+import numpy as np
+import paper_plots as carlosplt
+import seaborn as sns
+from matplotlib.font_manager import FontProperties
 
 class TypePlotter:
     def __init__(self, data, years):
@@ -8,6 +13,12 @@ class TypePlotter:
         self.pkg_with_cvss = data.pkg_with_cvss
         self.src2cwe = data.src2cwe
         self.years = years
+        self.src2monthDLA = dict()
+        self.DLA_withcwe = dict()
+        with open("src2month_DLA.json", "r") as fp:
+            self.src2monthDLA = json.load(fp)
+        with open("DLA_withcwe.json", "r") as fp:
+            self.DLA_withcwe = json.load(fp)
 
     def plot_types(self):
 
@@ -44,7 +55,123 @@ class TypePlotter:
             binned = []
             for j in range(self.years*3):
                 binned.append(sum(cwe2month[i][4*j:4*j+4]))
-            plt.plot(binned)
-            plt.show()
+            #plt.plot(binned)
+            #plt.show()
 
+        percent = []
+        for i in range(self.years):
+            temp = [0]*12
+            for j in range(12):
+                temp[j] = sum(cwe2month[j][12*i:12*i+12])
+            sum_temp = sum(temp)
+            try:
+                temp_percent = list(map(lambda x: x / sum_temp, temp))
+                percent.append(temp_percent)
+            except ZeroDivisionError:
+                percent.append(temp)
+                print('Year ', i+1, 'is the problem')
+
+        print(percent)
+        carlosplt.pre_paper_plot()
+        x = range(2000,2000+self.years)
+        y = []
+        labels_cwe = ['682', '118', '664', '691', '693', '707', '710', 'N/A']
+        for i in range(12):
+            if i not in [2,3,7,8]:
+                y.append([j[i] for j in percent[8:]])
+
+        pal = sns.color_palette("Paired", 12)
+
+        h = plt.stackplot(x[8:], y, colors = pal, alpha=0.9, labels = labels_cwe)
+        plt.xticks(x[8:])
+        fontP = FontProperties()
+        fontP.set_size('small')
+        plt.legend(loc='upper left', handles = h[::-1], prop=fontP)
+        carlosplt.post_paper_plot(True,True,True)
+        plt.show()
+
+        self.plot_wheezyTypes(src2cwe_sum)
         return 0
+
+    def plot_peryearstable(self):
+        ## Plot the proportion changes over the years (All 11 types? - top 5 types).
+        pass
+
+
+    def plot_wheezyTypes(self, cwe_sum):
+        ## Plot the changes in Wheezy for the top 3 types of vulnerabilities.
+        ## Plot for wheezy
+        quarter_num = 4 * self.years
+        quarter_sum = dict()
+        quarter_sum_DLA = dict()
+
+        ## DSA Q2'13-Q2'16
+        ## DLA Q3'16-Q2'18
+        cwe_sum_DLA = []
+        for i in range(0, 12*self.years):
+            cwe_sum_DLA.append([0]*12)
+        
+        for pkg in self.DLA_withcwe:
+            for i in range(len(self.DLA_withcwe[pkg])):
+                for j in range(len(self.DLA_withcwe[pkg][i])):
+                    for k in range(len(self.DLA_withcwe[pkg][i][j])):
+                        cwe_sum_DLA[i*12+j][k] += self.DLA_withcwe[pkg][i][j][k]
+        
+        for cwe in range(len(cwe_sum[0])):
+            print(cwe)
+            quarter_sum[cwe] = [0] * quarter_num
+            quarter_sum_DLA[cwe] = [0] * quarter_num
+            for m in range(quarter_num):
+                quarter_sum[cwe][m] = cwe_sum[3*m][cwe] + cwe_sum[3*m+1][cwe] + cwe_sum[3*m+2][cwe]
+                quarter_sum_DLA[cwe][m] = cwe_sum_DLA[3*m][cwe] + cwe_sum_DLA[3*m+1][cwe] + cwe_sum_DLA[3*m+2][cwe]
+
+        print(quarter_sum)
+        print(quarter_sum_DLA)
+        quartersx = []
+        for i in range(1,self.years+1):
+            for j in range(1,5):
+                if j==1:
+                    quartersx.append('Q' + str(j)+'\''+str(i).zfill(2))
+                else:
+                    quartersx.append(' ')
+
+        ## Filter only wheezy. Do it for a selection of types:
+        root_list = ['682', '118', '330', '435', '664', '691', '693', '697', '703', '707', '710' ]
+        fig = plt.figure()
+        
+        ii = 0
+        for j in [1, 4, 6, 11]:
+            ii += 1
+            quarter_sum_regular = [0] * (12*4+1) + quarter_sum[j][12*4+1:12*4+9] + [0] * 12
+            quarter_sum_errors = [0] * (12*4 + 9) + quarter_sum[j][12*4+9:12*4+9+5] + [0] * 7
+            LTS_quarter = [0] * (15*4+2) + quarter_sum_DLA[j][15*4+2:-3-4]
+            
+            #print(quarter_sum_errors)
+            cut = 12*4+1
+            n = len(quarter_sum[j]) - cut -7
+            x = range(quarter_num-cut-3-4)
+            width = 1/2
+            
+            #print(len(LTS_quarter))
+            print(len(x))
+            print(len(quarter_sum_regular[cut:]))
+            print(len(quarter_sum_errors[cut:]))
+            print(len(LTS_quarter[cut:]))
+            
+            ax = fig.add_subplot(2,2,ii)
+            
+            bar1 = plt.bar(x, quarter_sum_regular[cut:], width, color='darkblue', label='regular', edgecolor='black')
+            bar12 = plt.bar(x, quarter_sum_errors[cut:], width, color='darkorange', label='regular*', edgecolor='black')
+            bar2 = plt.bar(x, LTS_quarter[cut:], width, color='darkred', label ='long-term', edgecolor='black')
+            if ii==2:
+                plt.legend(handles=[bar1, bar12, bar2])
+    
+            plt.xticks(np.arange(0,n),quartersx[cut:], rotation="vertical")
+            try:
+                plt.ylabel('CWE-' + root_list[j])
+            except IndexError:
+                plt.ylabel('N/A')
+            plt.xlabel('Quarter')
+            carlosplt.post_paper_plot(True,True,True)
+            
+        plt.show()

+ 63 - 0
powelaw.py

@@ -0,0 +1,63 @@
+import powerlaw
+from numpy import genfromtxt
+import matplotlib.pyplot as plt
+import paper_plots as carlosplt
+
+
+mydata = genfromtxt('power_law.csv', delimiter=',', dtype = 'float')
+print(len(mydata))
+print(mydata)
+
+## Build and print probability distribution, bins per 10
+distr = dict()
+for i in mydata:
+    bins = i // 10
+    if bins in distr:
+        distr[bins] += 1
+    else:
+        distr[bins] = 1
+
+#for i in distr:
+#    print(str(i) + ', ' + str(distr[i]))
+
+
+results=powerlaw.Fit(mydata, discrete=False, estimate_discrete=False)
+print('alpha = ',results.power_law.alpha)
+print(results.truncated_power_law.alpha)
+print('xmin = ',results.power_law.xmin)
+print('xmax = ',results.power_law.xmax)
+print('sigma = ',results.power_law.sigma)
+print('D = ',results.power_law.D)
+print(results.truncated_power_law.xmin)
+print('xmax = ', results.truncated_power_law.xmax)
+print(results.power_law.discrete)
+print('lognormal mu: ',results.lognormal.mu)
+print('lognormal sigma: ',results.lognormal.sigma)
+
+#custom_model=[]
+#for i in sorted(mydata,reverse=True):
+#    ccdf =
+
+#fig=results.plot_pdf(color='b', linewidth=2)
+carlosplt.pre_paper_plot(True)
+fig = results.plot_ccdf(color = 'darkblue', linestyle='-', label='data')
+results.power_law.plot_ccdf(color = 'darkgreen', ax=fig, label='power-law fit')
+#results.truncated_power_law.plot_ccdf(color = 'red', ax=fig)
+#results.lognormal_positive.plot_ccdf(color = 'yellow', ax=fig)
+#results.lognormal.plot_ccdf(color = 'brown', ax=fig)
+#results.exponential.plot_ccdf(color = 'orange', ax=fig)
+plt.ylabel('ccdf')
+plt.xlabel('Vulnerabilities')
+fig.legend()
+carlosplt.post_paper_plot(True,True,True)
+plt.show()
+R, p=results.distribution_compare('power_law','exponential')
+print('Exponential: ',R,p)
+R, p=results.distribution_compare('power_law','stretched_exponential')
+print('Stretched exponential: ',R,p)
+R, p=results.distribution_compare('power_law','truncated_power_law')
+print('Power law truncated: ',R,p)
+R, p=results.distribution_compare('power_law','lognormal_positive')
+print('Lognormal positive: ',R,p)
+R, p=results.distribution_compare('power_law','lognormal')
+print('Lognormal: ',R,p)

File diff suppressed because it is too large
+ 0 - 0
reports.json


File diff suppressed because it is too large
+ 0 - 0
reports_team.json


File diff suppressed because it is too large
+ 0 - 0
src2month_DLA.json


BIN
src2month_DLA.txt


File diff suppressed because it is too large
+ 0 - 0
sum_team.json


+ 5 - 2
types.py

@@ -1,6 +1,7 @@
 from load_data import load_DBs
 from plot_functions import plot_all
 from plot_types import TypePlotter
+from laplace_tests import calc_laplace as claplace
 
 class Mydata:
     def __init__(self, load):
@@ -12,13 +13,15 @@ class Mydata:
 def main():
     data = Mydata(True)
     print('Done')
-    i = plot_all(data.src2month, data.src2sloccount, data.pkg_with_cvss)
+    #i = plot_all(data.src2month, data.src2sloccount, data.pkg_with_cvss)
     #
     years = 19
-    # 2001-2018
+    # 2000-2018
     
     j = TypePlotter(data, years)
     j.plot_types()
 
+    #l = claplace(data,years)
+
 if __name__ == "__main__":
     main()

+ 6 - 0
vendors/debian/DLAmine.py

@@ -49,6 +49,12 @@ def getDLAs():
                         p2 = re.compile('CVE-[0-9]{4}-[0-9]*')
                         (pkg, cves) = parseDLAhtml(dla, p1, p2)
                         pkg = fixURL(url, pkg)
+                        try:
+                            pkg = da.unifySrcName(pkg)
+                        except AttributeError:
+                            print('Problem with')
+                            print(pkg)
+                            print('#'*80)
 
                         if pkg:
                             totalDLAs[str(year) + ',' + str(month)] += cves

+ 6 - 1
vendors/debian/src_name_unifications.txt

@@ -21,6 +21,8 @@ nagios3->nagios
 nagios2->nagios
 ^mysql-\d*->mysql-transitional
 linux-2.6*->linux
+linux-4*->linux
+linux-latest*->linux
 linux-kernel-alpha->linux
 linux-kernel-i386->linux
 libmusicbrainz*->libmusicbrainz
@@ -28,6 +30,8 @@ libgtop1->libgtop2
 libgd1->libgd2
 libast*->libast2
 libmozjs0d->libast
+tiff3->tiff
+tiff4->tiff
 ^kernel-source*->linux
 ^kernel-patch*->linux
 ^kernel-source-2.4.*->linux
@@ -109,6 +113,7 @@ libcgi-application-extra-plugin-bundle-perl->libcgi-application-plugins-perl
 ^python\d->python3.5
 ^postgresql-\d*->postgresql-9.6
 ^ruby\d.\d->ruby2.3
+^ruby\d.\d.\d->ruby2.3
 ^ruby->ruby2.3
 ^mariadb-10.*->mariadb-10.1
 ^ruby-actionpack*->rails
@@ -127,4 +132,4 @@ gnupg->gnupg2
 xine-lib*->xine-lib-1.2
 kfreebsd-\d*->kfreebsd-10
 pdfkit*->pdfkit
-gforge->fusionforge
+gforge->fusionforge

Some files were not shown because too many files changed in this diff