DLAmine.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. #!/usr/bin/python3
  2. from .DebianAdvisory import DebianAdvisory as da
  3. from .CVEParse import CVEParse as cveparse
  4. import re
  5. import datetime
  6. from html.parser import HTMLParser
  7. from bs4 import BeautifulSoup
  8. from bs4 import NavigableString
  9. from pymongo import MongoClient
  10. import urllib.request
  11. import logging, sys
  12. import pickle
  13. import json
  14. from fixcwes import ret_roots
  15. def getDLAs():
  16. src2monthDLAs = dict()
  17. totalDLAs = dict()
  18. totalDLAsList = []
  19. base_url = 'https://lists.debian.org/debian-lts-announce/'
  20. logging.info('Checking for DLAs...\n')
  21. dlas = []
  22. more = True
  23. i = 0
  24. j = 0
  25. for year in range(2014,2019):
  26. for month in range(1,13):
  27. totalDLAs[str(year) + ',' + str(month)] = []
  28. i = 0
  29. while more:
  30. try:
  31. url = base_url + str(year) + '/' + str(month).zfill(2) + '/msg' + str(i).zfill(5) + '.html'
  32. print('Opening url: ' + url + '\n')
  33. req = urllib.request.urlopen(url)
  34. charset = req.info().get_content_charset()
  35. if charset is None:
  36. charset = 'utf-8'
  37. dla = req.read().decode(charset)
  38. dlas.append([dla, year, month])
  39. p1 = re.compile('Package.*: .*')
  40. p2 = re.compile('CVE-[0-9]{4}-[0-9]*')
  41. (pkg, cves) = parseDLAhtml(dla, p1, p2)
  42. pkg = fixURL(url, pkg)
  43. try:
  44. pkg = da.unifySrcName(pkg)
  45. except AttributeError:
  46. print('Problem with')
  47. print(pkg)
  48. print('#'*80)
  49. if pkg:
  50. totalDLAs[str(year) + ',' + str(month)] += cves
  51. try:
  52. src2monthDLAs[pkg].append((cves, [year,month]))
  53. except KeyError:
  54. src2monthDLAs[pkg] = []
  55. src2monthDLAs[pkg].append((cves, [year,month]))
  56. except urllib.error.HTTPError as err:
  57. if (i>1):
  58. break
  59. i+=1
  60. print(totalDLAs[str(year) + ',' + str(month)])
  61. totalDLAs[str(year) + ',' + str(month)] = list(set(totalDLAs[str(year) + ',' + str(month)]))
  62. totalDLAsList.append(len(totalDLAs[str(year) + ',' + str(month)]))
  63. j += 1
  64. print(totalDLAs)
  65. print(totalDLAsList)
  66. with open("DLA_sum.txt","wb") as fp:
  67. pickle.dump(totalDLAsList,fp)
  68. with open("src2month_DLA.txt","wb") as fp:
  69. pickle.dump(src2monthDLAs,fp)
  70. with open("src2month_DLA.json","w") as fp:
  71. json.dump(src2monthDLAs,fp)
  72. return(totalDLAsList)
  73. def permonthDLA(src2monthDLAs):
  74. client = MongoClient()
  75. out = dict()
  76. out_cvss = dict()
  77. out_cwe = dict()
  78. for pkg in src2monthDLAs:
  79. (out[pkg], out_cvss[pkg], out_cwe[pkg]) = perPackage(pkg, src2monthDLAs[pkg], out, out_cvss, client)
  80. #out_cwe[pkg] = perPackage_cwe(pkg, src2monthDLAs[pkg])
  81. with open("DLA_src2month.json","w") as fp:
  82. json.dump(out,fp)
  83. with open("DLA_withcvss.json","w") as fp:
  84. json.dump(out_cvss,fp)
  85. #
  86. with open("DLA_withcwe.json","w") as fp:
  87. json.dump(out_cwe,fp)
  88. def perPackage(pkg, dlas, cvss, out, client):
  89. root_list = ['682', '118', '330', '435', '664', '691', '693', '697', '703', '707', '710' ]
  90. monthyear = []
  91. monthyear_cvss = []
  92. monthyear_cwe = []
  93. haveseen = dict()
  94. for i in range(2000,2019):
  95. temp = []
  96. temp_cvss = []
  97. temp_cwe = []
  98. for j in range(12):
  99. temp.append(0)
  100. temp_cvss.append([0,0,0,0])
  101. temp_cwe.append([0]*12)
  102. monthyear.append(temp)
  103. monthyear_cvss.append(temp_cvss)
  104. monthyear_cwe.append(temp_cwe)
  105. for dla in dlas:
  106. for cve_id in dla[0]:
  107. if cve_id in haveseen:
  108. continue
  109. else:
  110. haveseen[cve_id] = 1
  111. cve = cveparse.fetchCVE(cve_id, client)
  112. (cve_date, cve_base, cve_impact, cve_exploit, cwe) = cveparse.parseCVE(cve_id, cve)
  113. new_year = dla[1][0]
  114. new_month = dla[1][1]
  115. if (cve_date.year<new_year) or (cve_date.year==new_year and cve_date.month<new_month):
  116. new_year=cve_date.year
  117. new_month=cve_date.month
  118. try:
  119. cve_base = float(cve_base)
  120. except TypeError:
  121. cve_base = -1.0
  122. monthyear[new_year-2000][new_month-1] += 1
  123. if (cve_base < 0.0):
  124. monthyear_cvss[new_year-2000][new_month-1][3] += 1
  125. elif (cve_base < 4.0):
  126. monthyear_cvss[new_year-2000][new_month-1][0] += 1
  127. elif (cve_base < 7.0):
  128. monthyear_cvss[new_year-2000][new_month-1][1] += 1
  129. else:
  130. monthyear_cvss[new_year-2000][new_month-1][2] += 1
  131. for i in ret_roots(cwe):
  132. if i == 0:
  133. monthyear_cwe[new_year-2000][new_month-1][11] += 1
  134. print('Problem with cve: ', cve_id)
  135. continue
  136. for j in range(len(root_list)):
  137. if i == root_list[j]:
  138. monthyear_cwe[new_year-2000][new_month-1][j] += 1
  139. return(monthyear, monthyear_cvss, monthyear_cwe)
  140. def parseDLAhtml(dla, p1, p2):
  141. pkg = re.search(p1, dla)
  142. if pkg:
  143. print(pkg.group(0))
  144. pkg = pkg.group(0).split(':',1)[1].strip()
  145. # Deal witg the different versions also here...
  146. pkg=da.unifySrcName(pkg)
  147. print(pkg)
  148. else:
  149. print(dla)
  150. cves = re.findall(p2, dla)
  151. cves = list(set(cves))
  152. return (pkg, cves)
  153. def fixURL(url, pkg):
  154. if (url=='https://lists.debian.org/debian-lts-announce/2016/10/msg00011.html'):
  155. return 'mpg123'
  156. elif (url=='https://lists.debian.org/debian-lts-announce/2016/05/msg00037.html'):
  157. return 'graphicsmagick'
  158. else:
  159. return pkg
  160. if __name__== "__main__":
  161. dlas = getDLAs()
  162. #saveDLAs(dlas)