stat_tests.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. import subprocess
  2. import glob
  3. import paper_plots as carlosplt
  4. import matplotlib.pyplot as plt
  5. from scipy.stats import spearmanr
  6. def download_data(src2month):
  7. create_folders(src2month)
  8. for pkg in src2month:
  9. print(pkg)
  10. bashCommand = "apt-get source --only-source " + pkg
  11. pwd = './source_files/'+pkg+'/'
  12. process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE, cwd=pwd)
  13. output, error = process.communicate()
  14. #print(output, error)
  15. return 0
  16. def create_folders(src2month):
  17. for pkg in src2month:
  18. bashCommand1 = "mkdir " + pkg
  19. process = subprocess.Popen(bashCommand1.split(), stdout=subprocess.PIPE, cwd='./source_files')
  20. output, error = process.communicate()
  21. def test_pop(src2month, src2pop):
  22. ar1 = []
  23. ar2 = []
  24. for pkg in src2month:
  25. try:
  26. ar1.append(int(src2pop[pkg]))
  27. except KeyError:
  28. #print(pkg + ": no popularity data found!")
  29. continue
  30. total = sum(src2month[pkg])
  31. if total>100:
  32. print(pkg + ', ' + str(total) + ', ' + src2pop[pkg])
  33. ar2.append(total)
  34. vulns_sorted_pop=[x for _,x in sorted(zip(ar1,ar2), reverse=True)]
  35. pop_xaxis=[y for y,_ in sorted(zip(ar1,ar2), reverse=True)]
  36. half_more_popular = sum(vulns_sorted_pop[:int(len(vulns_sorted_pop)/2)])
  37. half_less_popular = sum(vulns_sorted_pop[int(len(vulns_sorted_pop)/2):])
  38. print(half_more_popular)
  39. print(half_less_popular)
  40. print(pop_xaxis[0])
  41. print(pop_xaxis[len(pop_xaxis)-1])
  42. print(src2pop)
  43. print(spearmanr(ar1,ar2))
  44. carlosplt.pre_paper_plot(True)
  45. plt.plot(vulns_sorted_pop)
  46. plt.ylabel('Number of vulnerabilities')
  47. plt.xlabel('Popularity ranking')
  48. carlosplt.post_paper_plot(True,True,True)
  49. plt.show()
  50. def test_slocs(src2month, src2sloccount):
  51. # Remember sloccount is of the form (total, [ansic, cpp, asm, java, python, perl, sh])
  52. ar1 = []
  53. ar2 = []
  54. print(sum(src2month['linux']))
  55. for pkg in src2month:
  56. try:
  57. total_slocs = src2sloccount[pkg][0]
  58. if total_slocs == 0:
  59. continue
  60. else:
  61. ar1.append(int(total_slocs))
  62. except KeyError:
  63. print(pkg + ": no sloccount data found!")
  64. continue
  65. total = sum(src2month[pkg])
  66. if total>100:
  67. print(pkg + ', ' + str(total) + ', ' + str(total_slocs))
  68. ar2.append(total)
  69. vulns_sorted_slocs_total=[x for _,x in sorted(zip(ar1,ar2), reverse=True)]
  70. pop_xaxis=[y for y,_ in sorted(zip(ar1,ar2), reverse=True)]
  71. half_more_slocs = sum(vulns_sorted_slocs_total[:int(len(vulns_sorted_slocs_total)/2)])
  72. half_less_slocs = sum(vulns_sorted_slocs_total[int(len(vulns_sorted_slocs_total)/2):])
  73. print(half_more_slocs)
  74. print(half_less_slocs)
  75. print(pop_xaxis[0])
  76. print(pop_xaxis[len(pop_xaxis)-1])
  77. print(spearmanr(ar1,ar2))
  78. carlosplt.pre_paper_plot(True)
  79. plt.plot(vulns_sorted_slocs_total)
  80. plt.ylabel('Number of vulnerabilities')
  81. plt.xlabel('Number of SLOCS ranking')
  82. carlosplt.post_paper_plot(True,True,True)
  83. plt.show()
  84. def download_old_data(src2month,year):
  85. create_old_folders(src2month,year)
  86. for pkg in src2month:
  87. print(pkg)
  88. bashCommand = "apt-get download " + pkg
  89. pwd = './Old_sources/' + str(year) + '/' + pkg+'/'
  90. process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE, cwd=pwd)
  91. output, error = process.communicate()
  92. #print(output, error)
  93. return 0
  94. def create_old_folders(src2month,year):
  95. for pkg in src2month:
  96. bashCommand1 = "mkdir " + pkg
  97. process = subprocess.Popen(bashCommand1.split(), stdout=subprocess.PIPE, cwd='./Old_sources/' + str(year) + '/')
  98. output, error = process.communicate()
  99. def test(src2month, src2pop, src2sloccount):
  100. get_data= False
  101. year=2015
  102. get_old_data=False
  103. print(len(src2month))
  104. if(get_data):
  105. download_data(src2month)
  106. if(get_old_data):
  107. download_old_data(src2month,year)
  108. #test_slocs(src2month, src2sloccount)
  109. #p1, = plt.plot(src2month['apache2'], color = 'red', label='apache')
  110. #p2, = plt.plot(src2month['lighttpd'], color = 'blue', label='lighttpd')
  111. #p3, = plt.plot(src2month['nginx'], color = 'green', label='nginx')
  112. #plt.legend(handles=[p1, p2, p3])
  113. #plt.show()
  114. #p1, = plt.plot(src2month['openssl'], color = 'red', label='openssl')
  115. #p2, = plt.plot(src2month['gnutls28'], color = 'blue', label='gnutls')
  116. #plt.legend(handles=[p1, p2])
  117. #plt.show()
  118. # test_language(src2month)
  119. # test_pop(src2month, src2pop)
  120. # test_history(src2month)
  121. for pkg in ['linux', 'firefox-esr', 'chromium-browser', 'openjdk-8', 'icedove', 'php7.0', 'mysql-transitional', 'openssl', 'qemu']:
  122. total_previous=sum(src2month[pkg][-24-9-9:-24-9])
  123. total_validation=sum(src2month[pkg][-24-9:-24])
  124. dif=total_validation-total_previous
  125. print(pkg + ' previous: ' + str(total_previous))
  126. print(pkg + ' validation: ' + str(total_validation))
  127. print(pkg + ' dif: ' + str(dif))
  128. print('#'*80)
  129. return 0