123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168 |
- import subprocess
- import glob
- import paper_plots as carlosplt
- import matplotlib.pyplot as plt
- from scipy.stats import spearmanr
- def download_data(src2month):
- create_folders(src2month)
- for pkg in src2month:
- print(pkg)
- bashCommand = "apt-get source --only-source " + pkg
- pwd = './source_files/'+pkg+'/'
- process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE, cwd=pwd)
- output, error = process.communicate()
- #print(output, error)
-
- return 0
- def create_folders(src2month):
- for pkg in src2month:
- bashCommand1 = "mkdir " + pkg
- process = subprocess.Popen(bashCommand1.split(), stdout=subprocess.PIPE, cwd='./source_files')
- output, error = process.communicate()
- def test_pop(src2month, src2pop):
- ar1 = []
- ar2 = []
- for pkg in src2month:
- try:
- ar1.append(int(src2pop[pkg]))
- except KeyError:
- #print(pkg + ": no popularity data found!")
- continue
- total = sum(src2month[pkg])
- if total>100:
- print(pkg + ', ' + str(total) + ', ' + src2pop[pkg])
- ar2.append(total)
- vulns_sorted_pop=[x for _,x in sorted(zip(ar1,ar2), reverse=True)]
- pop_xaxis=[y for y,_ in sorted(zip(ar1,ar2), reverse=True)]
- half_more_popular = sum(vulns_sorted_pop[:int(len(vulns_sorted_pop)/2)])
- half_less_popular = sum(vulns_sorted_pop[int(len(vulns_sorted_pop)/2):])
- print(half_more_popular)
- print(half_less_popular)
-
- print(pop_xaxis[0])
- print(pop_xaxis[len(pop_xaxis)-1])
- print(src2pop)
- print(spearmanr(ar1,ar2))
- carlosplt.pre_paper_plot(True)
- plt.plot(vulns_sorted_pop)
- plt.ylabel('Number of vulnerabilities')
- plt.xlabel('Popularity ranking')
- carlosplt.post_paper_plot(True,True,True)
- plt.show()
- def test_slocs(src2month, src2sloccount):
- # Remember sloccount is of the form (total, [ansic, cpp, asm, java, python, perl, sh])
- ar1 = []
- ar2 = []
- print(sum(src2month['linux']))
- for pkg in src2month:
- try:
- total_slocs = src2sloccount[pkg][0]
- if total_slocs == 0:
- continue
- else:
- ar1.append(int(total_slocs))
- except KeyError:
- print(pkg + ": no sloccount data found!")
- continue
- total = sum(src2month[pkg])
- if total>100:
- print(pkg + ', ' + str(total) + ', ' + str(total_slocs))
- ar2.append(total)
-
- vulns_sorted_slocs_total=[x for _,x in sorted(zip(ar1,ar2), reverse=True)]
- pop_xaxis=[y for y,_ in sorted(zip(ar1,ar2), reverse=True)]
- half_more_slocs = sum(vulns_sorted_slocs_total[:int(len(vulns_sorted_slocs_total)/2)])
- half_less_slocs = sum(vulns_sorted_slocs_total[int(len(vulns_sorted_slocs_total)/2):])
- print(half_more_slocs)
- print(half_less_slocs)
- print(pop_xaxis[0])
- print(pop_xaxis[len(pop_xaxis)-1])
- print(spearmanr(ar1,ar2))
- carlosplt.pre_paper_plot(True)
- plt.plot(vulns_sorted_slocs_total)
- plt.ylabel('Number of vulnerabilities')
- plt.xlabel('Number of SLOCS ranking')
- carlosplt.post_paper_plot(True,True,True)
- plt.show()
- def download_old_data(src2month,year):
- create_old_folders(src2month,year)
- for pkg in src2month:
- print(pkg)
- bashCommand = "apt-get download " + pkg
- pwd = './Old_sources/' + str(year) + '/' + pkg+'/'
- process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE, cwd=pwd)
- output, error = process.communicate()
- #print(output, error)
-
- return 0
- def create_old_folders(src2month,year):
- for pkg in src2month:
- bashCommand1 = "mkdir " + pkg
- process = subprocess.Popen(bashCommand1.split(), stdout=subprocess.PIPE, cwd='./Old_sources/' + str(year) + '/')
- output, error = process.communicate()
- def test(src2month, src2pop, src2sloccount):
- get_data= False
- year=2015
- get_old_data=False
- print(len(src2month))
- if(get_data):
- download_data(src2month)
- if(get_old_data):
- download_old_data(src2month,year)
-
- #test_slocs(src2month, src2sloccount)
- #p1, = plt.plot(src2month['apache2'], color = 'red', label='apache')
- #p2, = plt.plot(src2month['lighttpd'], color = 'blue', label='lighttpd')
- #p3, = plt.plot(src2month['nginx'], color = 'green', label='nginx')
- #plt.legend(handles=[p1, p2, p3])
- #plt.show()
-
- #p1, = plt.plot(src2month['openssl'], color = 'red', label='openssl')
- #p2, = plt.plot(src2month['gnutls28'], color = 'blue', label='gnutls')
- #plt.legend(handles=[p1, p2])
- #plt.show()
- # test_language(src2month)
- # test_pop(src2month, src2pop)
- # test_history(src2month)
- for pkg in ['linux', 'firefox-esr', 'chromium-browser', 'openjdk-8', 'icedove', 'php7.0', 'mysql-transitional', 'openssl', 'qemu']:
- total_previous=sum(src2month[pkg][-24-9-9:-24-9])
- total_validation=sum(src2month[pkg][-24-9:-24])
- dif=total_validation-total_previous
- print(pkg + ' previous: ' + str(total_previous))
- print(pkg + ' validation: ' + str(total_validation))
- print(pkg + ' dif: ' + str(dif))
- print('#'*80)
- return 0
|