DebianAdvisory.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. import logging
  2. import datetime
  3. import os
  4. import re
  5. import urllib.request
  6. from bs4 import BeautifulSoup
  7. from bs4 import NavigableString
  8. class DebianAdvisory:
  9. @staticmethod
  10. def checkDSAs(state, config):
  11. """Try to find new DSAs by iteration, return table of DSAs to process"""
  12. dsatable = dict()
  13. next_dsa = int(state['next_adv'])
  14. # state implemented as dictionary
  15. base_url = config['URL']['dsa_base_url']
  16. logging.info('Checking for new DSAs.. \n')
  17. if next_dsa < int(config['DSA']['first_dsa']):
  18. logging.debug('Cache was deleted, starting at DSA ' + str(next_dsa) + '\n')
  19. next_dsa = int(config['DSA']['first_dsa'])
  20. next_dsa2string = '%03d' % next_dsa
  21. blacklist = map(str.strip, config['DSA']['blacklist'].split(','))
  22. if DebianAdvisory.blacklistedDSA('DSA-' + next_dsa2string, blacklist):
  23. next_dsa += 1
  24. dsa = DebianAdvisory.fetchDSA(next_dsa, base_url)
  25. while dsa != '':
  26. logging.debug('Got DSA-' + str(next_dsa) + '\n')
  27. soup = BeautifulSoup(dsa, 'html.parser')
  28. # crop the DSA from unecessary weight
  29. dsa = soup.find(id="content")
  30. if dsa == '':
  31. raise NameError('html file format unexpected')
  32. dsatable[next_dsa] = str(dsa)
  33. next_dsa += 1
  34. if DebianAdvisory.blacklistedDSA('DSA-' + str(next_dsa), blacklist):
  35. next_dsa += 1
  36. dsa = DebianAdvisory.fetchDSA(next_dsa, base_url)
  37. state['next_adv'] = next_dsa
  38. return dsatable
  39. @staticmethod
  40. def blacklistedDSA(dsa_id, blacklist):
  41. """Should this advisory be skipped?"""
  42. if dsa_id in blacklist:
  43. return True
  44. else:
  45. return False
  46. @staticmethod
  47. def fetchDSA(dsa_id, base_url):
  48. """Fetches a given dsa from the url."""
  49. year = 2000
  50. now = datetime.datetime.now()
  51. current_year = now.year
  52. logging.info('Fetching DSA-%d records\n', dsa_id)
  53. if dsa_id >= 3751:
  54. year = 2017
  55. elif dsa_id >= 3431:
  56. year = 2016
  57. elif dsa_id >= 3118:
  58. year = 2015
  59. elif dsa_id >= 2832:
  60. year = 2014
  61. elif dsa_id >= 2597:
  62. year = 2013
  63. elif dsa_id >= 2377:
  64. year = 2012
  65. elif dsa_id >= 2140:
  66. year = 2011
  67. elif dsa_id >= 1965:
  68. year = 2010
  69. elif dsa_id >= 1694:
  70. year = 2009
  71. elif dsa_id >= 1443:
  72. year = 2008
  73. elif dsa_id >= 1245:
  74. year = 2007
  75. elif dsa_id >= 929:
  76. year = 2006
  77. elif dsa_id >= 622:
  78. year = 2005
  79. elif dsa_id >= 406:
  80. year = 2004
  81. elif dsa_id >= 220:
  82. year = 2003
  83. elif dsa_id >= 96:
  84. year = 2002
  85. elif dsa_id >= 11:
  86. year = 2001
  87. dsa_id2string = '%03d' % dsa_id
  88. flag = True
  89. while flag:
  90. try:
  91. flag = False
  92. logging.info('Opening url: ' + base_url + str(year) + '/dsa-' + dsa_id2string + '\n')
  93. req = urllib.request.urlopen(base_url + str(year) + '/dsa-' + dsa_id2string)
  94. charset = req.info().get_content_charset()
  95. if charset is None:
  96. charset = 'utf-8'
  97. dsa = req.read().decode(charset)
  98. return dsa
  99. except urllib.error.HTTPError as err:
  100. if year < current_year:
  101. year += 1
  102. flag = True
  103. else:
  104. dsa = ''
  105. return dsa
  106. @staticmethod
  107. def parseDSAhtml(dsa):
  108. dsa_names = []
  109. dsa_CVEs = []
  110. # Date Reported -> dsa_date
  111. soup = BeautifulSoup(dsa, 'html.parser')
  112. tmp = soup.find("dt", string=re.compile(".*Date Repo.*:"))
  113. tmp = str(tmp.find_next().contents[0])
  114. # dsa_date = tmp.split()
  115. # date in datetime python format
  116. dsa_date = datetime.datetime.strptime(tmp, "%d %b %Y")
  117. if not dsa_date:
  118. print('Unable to extract date. Raising exception...')
  119. raise NameError('DSA parsing problem!')
  120. # Affected Packages -> dsa_names
  121. # print(dsa)
  122. tmp = soup.find("dt", string=re.compile("Affected Packages:"))
  123. tmp = tmp.find_next().contents
  124. # Need to check with multiple vulnerable packages
  125. for i in tmp:
  126. if (not isinstance(i, NavigableString)) and i.has_attr('href'):
  127. # greedy 'and' operation assumed
  128. unified = DebianAdvisory.unifySrcName(i.string)
  129. dsa_names.append(unified)
  130. pass
  131. if not dsa_names:
  132. print('Unable to find src package in DSA. unnamed package...')
  133. dsa_names.append('unnamed')
  134. print('Unnamed dsa:' + str(dsa) + '\n')
  135. # Security database references (CVEs) -> dsa_CVEs
  136. tmp = soup.find("dt", string=re.compile("Security database references:"))
  137. tmp = tmp.find_next().descendants
  138. for i in tmp:
  139. if (not isinstance(i, NavigableString)) and i.has_attr('href'):
  140. # don't count bug database
  141. if not re.compile("^Bug*").match(i.string):
  142. dsa_CVEs.append(i.string)
  143. return dsa_names, dsa_date, dsa_CVEs
  144. @staticmethod
  145. def unifySrcName(name):
  146. """Track renamed packages here, easy but manual. We should look into ways to automate this
  147. TODO: it should map to the most recent version, not unversioned
  148. TODO: we can partially automate this..
  149. make all lower-case
  150. replace -X.Y version numbers by highest encounter(?)
  151. handle special cases like xfree86
  152. """
  153. lowername = name.lower()
  154. suf = os.path.join(os.path.dirname(__file__), 'src_name_unifications.txt')
  155. with open(suf) as su:
  156. for line in su:
  157. sp_line = line.strip().split("->")
  158. if re.compile(sp_line[0]).match(name):
  159. return sp_line[1]
  160. return name
  161. @staticmethod
  162. def fixDSAquirks(dsa_id, dsa_state):
  163. """
  164. TODO:
  165. Static map to correct errors in DSAs
  166. Return fixed list of CVE IDs or 0 to skip DSA
  167. This code is still experimental
  168. """
  169. new_names = dsa_state[0]
  170. new_date = dsa_state[1]
  171. new_cves = dsa_state[2]
  172. if dsa_id == 85:
  173. new_cves = ["CVE-2001-1562", "LOCAL-03/04/05", "LOCAL-08/24/08"]
  174. elif dsa_id == 745:
  175. newcves = ["CVE-2005-1921", "CVE-2005-2106", "CVE-2005-1921"]
  176. elif dsa_id == 1095:
  177. new_cves = ["CVE-2006-0747", "CVE-2006-1861", "CVE-2006-2661"]
  178. elif dsa_id == 1284:
  179. new_cves = ["CVE-2007-1320", "CVE-2007-1321", "CVE-2007-1322", "CVE-2007-2893", "CVE-2007-1366"]
  180. elif dsa_id == 1502:
  181. new_cves = ["CVE-2007-2821", "CVE-2007-3238", "CVE-2008-0193", "CVE-2008-0194"]
  182. elif dsa_id == 1706:
  183. new_cves = ["CVE-2009-0135", "CVE-2009-0136"]
  184. elif dsa_id == 1757:
  185. new_cves = ["CVE-2007-2383", "CVE-2008-7220", "CVE-2009-1208"]
  186. elif dsa_id == 1896:
  187. new_cves = ["CVE-2009-3474", "CVE-2009-3475", "CVE-2009-3476"]
  188. elif dsa_id == 1931:
  189. new_cves = ["CVE-2009-0689", "CVE-2009-2463"]
  190. elif dsa_id == 1989:
  191. new_cves = ["CVE-2010-0789"]
  192. elif dsa_id == 1941:
  193. new_cves = ["CVE-2009-0755", "CVE-2009-3903", "CVE-2009-3904", "CVE-2009-3905", "CVE-2009-3606",
  194. "CVE-2009-3607", "CVE-2009-3608", "CVE-2009-3909", "CVE-2009-3938"]
  195. elif dsa_id == 2004:
  196. new_cves = ["CVE-2010-0787", "CVE-2010-0547"]
  197. elif dsa_id == 2008:
  198. new_cves = ["LOCAL-02/23/10", "LOCAL-02/23/10", "LOCAL-02/23/10", "LOCAL-02/23/10"]
  199. elif dsa_id == 2043:
  200. new_cves = ["CVE-2010-2062"]
  201. elif dsa_id == 2044:
  202. new_cves = ["CVE-2010-2062"]
  203. elif dsa_id == 2056:
  204. new_cves = ["CVE-2010-2155", "CVE-2009-4882"]
  205. elif dsa_id == 2092:
  206. new_cves = ["CVE-2010-1625", "CVE-2010-1448", "CVE-2009-4497"]
  207. elif dsa_id == 2098:
  208. new_cves = ["CVE-2010-3659", "CVE-2010-3660", "CVE-2010-3661", "CVE-2010-3662", "CVE-2010-3663",
  209. "CVE-2010-3664", "CVE-2010-3665", "CVE-2010-3666", "CVE-2010-3667", "CVE-2010-3668",
  210. "CVE-2010-3669", "CVE-2010-3670", "CVE-2010-3671", "CVE-2010-3672", "CVE-2010-3673",
  211. "CVE-2010-3674"]
  212. elif dsa_id == 2103:
  213. new_cves = ["CVE-2010-3076"]
  214. elif dsa_id == 2218:
  215. new_cves = ["CVE-2011-1684"]
  216. elif dsa_id == 2229:
  217. new_cves = ["CVE-2005-4494", "CVE-2006-0517", "CVE-2006-0518", "CVE-2006-0519", "CVE-2006-0625",
  218. "CVE-2006-0626", "CVE-2006-1295", "CVE-2006-1702", "CVE-2007-4525", "CVE-2008-5812",
  219. "CVE-2008-5813", "CVE-2009-3041"]
  220. elif dsa_id == 2261:
  221. new_cves = ["CVE-2009-4078", "CVE-2009-4079", "CVE-2009-4059", "LOCAL-12/30/10", "LOCAL-12/30/10"]
  222. elif dsa_id == 2262:
  223. new_cves = ["LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-05/18/11",
  224. "LOCAL-05/18/11"]
  225. elif dsa_id == 2286:
  226. new_names = ["phpmyadmin"]
  227. elif dsa_id == 1977:
  228. new_names = ["python3.5"]
  229. elif (
  230. dsa_id == 47 or dsa_id == 479 or dsa_id == 480 or dsa_id == 482 or dsa_id == 489 or dsa_id == 491 or dsa_id == 495):
  231. new_names = ["linux"]
  232. print('Substitution successful')
  233. elif dsa_id == 2289:
  234. new_cves = ["LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11",
  235. "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11",
  236. "LOCAL-07/27/11", "LOCAL-07/27/11"]
  237. return new_names, new_date, new_cves