DebianAdvisory.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. import logging
  2. import datetime
  3. import os
  4. import re
  5. import urllib.request
  6. from bs4 import BeautifulSoup
  7. from bs4 import NavigableString
  8. class DebianAdvisory:
  9. @staticmethod
  10. def checkDSAs(state, config):
  11. """Try to find new DSAs by iteration, return table of DSAs to process"""
  12. dsatable = dict()
  13. next_dsa = int(state['next_adv'])
  14. # state implemented as dictionary
  15. base_url = config['URL']['dsa_base_url']
  16. logging.info('Checking for new DSAs.. \n')
  17. if next_dsa < int(config['DSA']['first_dsa']):
  18. logging.debug('Cache was deleted, starting at DSA ' + str(next_dsa) + '\n')
  19. next_dsa = int(config['DSA']['first_dsa'])
  20. next_dsa2string = '%03d' % next_dsa
  21. blacklist = map(str.strip, config['DSA']['blacklist'].split(','))
  22. #print('Blacklist ', list(blacklist))
  23. #print(blacklist)
  24. blacklist = list(blacklist)
  25. if DebianAdvisory.blacklistedDSA('DSA-' + next_dsa2string, blacklist):
  26. next_dsa += 1
  27. dsa = DebianAdvisory.fetchDSA(next_dsa, base_url)
  28. while dsa != '':
  29. logging.debug('Got DSA-' + str(next_dsa) + '\n')
  30. soup = BeautifulSoup(dsa, 'html.parser')
  31. # crop the DSA from unecessary weight
  32. dsa = soup.find(id="content")
  33. if dsa == '':
  34. raise NameError('html file format unexpected')
  35. dsatable[next_dsa] = str(dsa)
  36. next_dsa += 1
  37. if DebianAdvisory.blacklistedDSA('DSA-' + str(next_dsa), list(blacklist)):
  38. next_dsa += 1
  39. dsa = DebianAdvisory.fetchDSA(next_dsa, base_url)
  40. state['next_adv'] = next_dsa
  41. return dsatable
  42. @staticmethod
  43. def blacklistedDSA(dsa_id, blacklist):
  44. """Should this advisory be skipped?"""
  45. if dsa_id in blacklist:
  46. return True
  47. else:
  48. return False
  49. @staticmethod
  50. def fetchDSA(dsa_id, base_url):
  51. """Fetches a given dsa from the url."""
  52. year = 2000
  53. now = datetime.datetime.now()
  54. current_year = now.year
  55. logging.info('Fetching DSA-%d records\n', dsa_id)
  56. if dsa_id >= 4078:
  57. year = 2018
  58. elif dsa_id >= 3751:
  59. year = 2017
  60. elif dsa_id >= 3431:
  61. year = 2016
  62. elif dsa_id >= 3118:
  63. year = 2015
  64. elif dsa_id >= 2832:
  65. year = 2014
  66. elif dsa_id >= 2597:
  67. year = 2013
  68. elif dsa_id >= 2377:
  69. year = 2012
  70. elif dsa_id >= 2140:
  71. year = 2011
  72. elif dsa_id >= 1965:
  73. year = 2010
  74. elif dsa_id >= 1694:
  75. year = 2009
  76. elif dsa_id >= 1443:
  77. year = 2008
  78. elif dsa_id >= 1245:
  79. year = 2007
  80. elif dsa_id >= 929:
  81. year = 2006
  82. elif dsa_id >= 622:
  83. year = 2005
  84. elif dsa_id >= 406:
  85. year = 2004
  86. elif dsa_id >= 220:
  87. year = 2003
  88. elif dsa_id >= 96:
  89. year = 2002
  90. elif dsa_id >= 11:
  91. year = 2001
  92. dsa_id2string = '%03d' % dsa_id
  93. flag = True
  94. while flag:
  95. try:
  96. flag = False
  97. logging.info('Opening url: ' + base_url + str(year) + '/dsa-' + dsa_id2string + '\n')
  98. req = urllib.request.urlopen(base_url + str(year) + '/dsa-' + dsa_id2string)
  99. charset = req.info().get_content_charset()
  100. if charset is None:
  101. charset = 'utf-8'
  102. dsa = req.read().decode(charset)
  103. return dsa
  104. except urllib.error.HTTPError as err:
  105. if year < current_year:
  106. year += 1
  107. flag = True
  108. else:
  109. dsa = ''
  110. return dsa
  111. @staticmethod
  112. def parseDSAhtml(dsa):
  113. dsa_names = []
  114. dsa_CVEs = []
  115. # Date Reported -> dsa_date
  116. soup = BeautifulSoup(dsa, 'html.parser')
  117. tmp = soup.find("dt", string=re.compile(".*Date Repo.*:"))
  118. tmp = str(tmp.find_next().contents[0])
  119. # dsa_date = tmp.split()
  120. # date in datetime python format
  121. dsa_date = datetime.datetime.strptime(tmp, "%d %b %Y")
  122. if not dsa_date:
  123. print('Unable to extract date. Raising exception...')
  124. raise NameError('DSA parsing problem!')
  125. # Affected Packages -> dsa_names
  126. # print(dsa)
  127. tmp = soup.find("dt", string=re.compile("Affected Packages:"))
  128. tmp = tmp.find_next().contents
  129. # Need to check with multiple vulnerable packages
  130. for i in tmp:
  131. if (not isinstance(i, NavigableString)) and i.has_attr('href'):
  132. # greedy 'and' operation assumed
  133. unified = DebianAdvisory.unifySrcName(i.string)
  134. dsa_names.append(unified)
  135. pass
  136. if not dsa_names:
  137. print('Unable to find src package in DSA. unnamed package...')
  138. dsa_names.append('unnamed')
  139. print('Unnamed dsa:' + str(dsa) + '\n')
  140. # Security database references (CVEs) -> dsa_CVEs
  141. tmp = soup.find("dt", string=re.compile("Security database references:"))
  142. tmp = tmp.find_next().descendants
  143. for i in tmp:
  144. if (not isinstance(i, NavigableString)) and i.has_attr('href'):
  145. # don't count bug database
  146. if not re.compile("^Bug*").match(i.string):
  147. dsa_CVEs.append(i.string)
  148. return dsa_names, dsa_date, dsa_CVEs
  149. @staticmethod
  150. def unifySrcName(name):
  151. """Track renamed packages here, easy but manual. We should look into ways to automate this
  152. TODO: it should map to the most recent version, not unversioned
  153. TODO: we can partially automate this..
  154. make all lower-case
  155. replace -X.Y version numbers by highest encounter(?)
  156. handle special cases like xfree86
  157. """
  158. lowername = name.lower()
  159. suf = os.path.join(os.path.dirname(__file__), 'src_name_unifications.txt')
  160. with open(suf) as su:
  161. for line in su:
  162. sp_line = line.strip().split("->")
  163. if re.compile(sp_line[0]).match(name):
  164. return sp_line[1]
  165. return name
  166. @staticmethod
  167. def fixDSAquirks(dsa_id, dsa_state):
  168. """
  169. TODO:
  170. Static map to correct errors in DSAs
  171. Return fixed list of CVE IDs or 0 to skip DSA
  172. This code is still experimental
  173. """
  174. new_names = dsa_state[0]
  175. new_date = dsa_state[1]
  176. new_cves = dsa_state[2]
  177. if dsa_id == 85:
  178. new_cves = ["CVE-2001-1562", "LOCAL-03/04/05", "LOCAL-08/24/08"]
  179. elif dsa_id == 745:
  180. newcves = ["CVE-2005-1921", "CVE-2005-2106", "CVE-2005-1921"]
  181. elif dsa_id == 1095:
  182. new_cves = ["CVE-2006-0747", "CVE-2006-1861", "CVE-2006-2661"]
  183. elif dsa_id == 1284:
  184. new_cves = ["CVE-2007-1320", "CVE-2007-1321", "CVE-2007-1322", "CVE-2007-2893", "CVE-2007-1366"]
  185. elif dsa_id == 1502:
  186. new_cves = ["CVE-2007-2821", "CVE-2007-3238", "CVE-2008-0193", "CVE-2008-0194"]
  187. elif dsa_id == 1706:
  188. new_cves = ["CVE-2009-0135", "CVE-2009-0136"]
  189. elif dsa_id == 1757:
  190. new_cves = ["CVE-2007-2383", "CVE-2008-7220", "CVE-2009-1208"]
  191. elif dsa_id == 1896:
  192. new_cves = ["CVE-2009-3474", "CVE-2009-3475", "CVE-2009-3476"]
  193. elif dsa_id == 1931:
  194. new_cves = ["CVE-2009-0689", "CVE-2009-2463"]
  195. elif dsa_id == 1989:
  196. new_cves = ["CVE-2010-0789"]
  197. elif dsa_id == 1941:
  198. new_cves = ["CVE-2009-0755", "CVE-2009-3903", "CVE-2009-3904", "CVE-2009-3905", "CVE-2009-3606",
  199. "CVE-2009-3607", "CVE-2009-3608", "CVE-2009-3909", "CVE-2009-3938"]
  200. elif dsa_id == 2004:
  201. new_cves = ["CVE-2010-0787", "CVE-2010-0547"]
  202. elif dsa_id == 2008:
  203. new_cves = ["LOCAL-02/23/10", "LOCAL-02/23/10", "LOCAL-02/23/10", "LOCAL-02/23/10"]
  204. elif dsa_id == 2043:
  205. new_cves = ["CVE-2010-2062"]
  206. elif dsa_id == 2044:
  207. new_cves = ["CVE-2010-2062"]
  208. elif dsa_id == 2056:
  209. new_cves = ["CVE-2010-2155", "CVE-2009-4882"]
  210. elif dsa_id == 2092:
  211. new_cves = ["CVE-2010-1625", "CVE-2010-1448", "CVE-2009-4497"]
  212. elif dsa_id == 2098:
  213. new_cves = ["CVE-2010-3659", "CVE-2010-3660", "CVE-2010-3661", "CVE-2010-3662", "CVE-2010-3663",
  214. "CVE-2010-3664", "CVE-2010-3665", "CVE-2010-3666", "CVE-2010-3667", "CVE-2010-3668",
  215. "CVE-2010-3669", "CVE-2010-3670", "CVE-2010-3671", "CVE-2010-3672", "CVE-2010-3673",
  216. "CVE-2010-3674"]
  217. elif dsa_id == 2103:
  218. new_cves = ["CVE-2010-3076"]
  219. elif dsa_id == 2218:
  220. new_cves = ["CVE-2011-1684"]
  221. elif dsa_id == 2229:
  222. new_cves = ["CVE-2005-4494", "CVE-2006-0517", "CVE-2006-0518", "CVE-2006-0519", "CVE-2006-0625",
  223. "CVE-2006-0626", "CVE-2006-1295", "CVE-2006-1702", "CVE-2007-4525", "CVE-2008-5812",
  224. "CVE-2008-5813", "CVE-2009-3041"]
  225. elif dsa_id == 2261:
  226. new_cves = ["CVE-2009-4078", "CVE-2009-4079", "CVE-2009-4059", "LOCAL-12/30/10", "LOCAL-12/30/10"]
  227. elif dsa_id == 2262:
  228. new_cves = ["LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-05/18/11",
  229. "LOCAL-05/18/11"]
  230. elif dsa_id == 2286:
  231. new_names = ["phpmyadmin"]
  232. elif dsa_id == 1977:
  233. new_names = ["python3.5"]
  234. elif (
  235. dsa_id == 47 or dsa_id == 479 or dsa_id == 480 or dsa_id == 482 or dsa_id == 489 or dsa_id == 491 or dsa_id == 495):
  236. new_names = ["linux"]
  237. print('Substitution successful')
  238. elif dsa_id == 2289:
  239. new_cves = ["LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11",
  240. "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11",
  241. "LOCAL-07/27/11", "LOCAL-07/27/11"]
  242. return new_names, new_date, new_cves