DebianAdvisory.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. import logging
  2. import datetime
  3. import os
  4. import re
  5. import urllib.request
  6. from bs4 import BeautifulSoup
  7. from bs4 import NavigableString
  8. class DebianAdvisory:
  9. @staticmethod
  10. def checkDSAs(state, config):
  11. """Try to find new DSAs by iteration, return table of DSAs to process"""
  12. dsatable = dict()
  13. next_dsa = int(state['next_adv'])
  14. # state implemented as dictionary
  15. base_url = config['URL']['dsa_base_url']
  16. logging.info('Checking for new DSAs.. \n')
  17. if next_dsa < int(config['DSA']['first_dsa']):
  18. logging.debug('Cache was deleted, starting at DSA ' + str(next_dsa) + '\n')
  19. next_dsa = int(config['DSA']['first_dsa'])
  20. next_dsa2string = '%03d' % next_dsa
  21. blacklist = map(str.strip, config['DSA']['blacklist'].split(','))
  22. if DebianAdvisory.blacklistedDSA('DSA-' + next_dsa2string, blacklist):
  23. next_dsa += 1
  24. dsa = DebianAdvisory.fetchDSA(next_dsa, base_url)
  25. while dsa != '':
  26. logging.debug('Got DSA-' + str(next_dsa) + '\n')
  27. soup = BeautifulSoup(dsa, 'html.parser')
  28. # crop the DSA from unecessary weight
  29. dsa = soup.find(id="content")
  30. if dsa == '':
  31. raise NameError('html file format unexpected')
  32. dsatable[next_dsa] = str(dsa)
  33. next_dsa += 1
  34. if DebianAdvisory.blacklistedDSA('DSA-' + str(next_dsa), blacklist):
  35. next_dsa += 1
  36. dsa = DebianAdvisory.fetchDSA(next_dsa, base_url)
  37. state['next_adv'] = next_dsa
  38. return dsatable
  39. @staticmethod
  40. def blacklistedDSA(dsa_id, blacklist):
  41. """Should this advisory be skipped?"""
  42. if dsa_id in blacklist:
  43. return True
  44. else:
  45. return False
  46. @staticmethod
  47. def fetchDSA(dsa_id, base_url):
  48. """Fetches a given dsa from the url."""
  49. year = 2000
  50. now = datetime.datetime.now()
  51. current_year = now.year
  52. logging.info('Fetching DSA-%d records\n', dsa_id)
  53. if dsa_id >= 3751:
  54. year = 2017
  55. elif dsa_id >= 3431:
  56. year = 2016
  57. elif dsa_id >= 3118:
  58. year = 2015
  59. elif dsa_id >= 2832:
  60. year = 2014
  61. elif dsa_id >= 2597:
  62. year = 2013
  63. elif dsa_id >= 2377:
  64. year = 2012
  65. elif dsa_id >= 2140:
  66. year = 2011
  67. elif dsa_id >= 1965:
  68. year = 2010
  69. elif dsa_id >= 1694:
  70. year = 2009
  71. elif dsa_id >= 1443:
  72. year = 2008
  73. elif dsa_id >= 1245:
  74. year = 2007
  75. elif dsa_id >= 929:
  76. year = 2006
  77. elif dsa_id >= 622:
  78. year = 2005
  79. elif dsa_id >= 406:
  80. year = 2004
  81. elif dsa_id >= 220:
  82. year = 2003
  83. elif dsa_id >= 96:
  84. year = 2002
  85. elif dsa_id >= 11:
  86. year = 2001
  87. dsa_id2string = '%03d' % dsa_id
  88. print(dsa_id2string)
  89. flag = True
  90. while flag:
  91. try:
  92. flag = False
  93. logging.info('Opening url: ' + base_url + str(year) + '/dsa-' + dsa_id2string + '\n')
  94. req = urllib.request.urlopen(base_url + str(year) + '/dsa-' + dsa_id2string)
  95. charset = req.info().get_content_charset()
  96. if charset is None:
  97. charset = 'utf-8'
  98. dsa = req.read().decode(charset)
  99. return dsa
  100. except urllib.error.HTTPError as err:
  101. if year < current_year:
  102. year += 1
  103. flag = True
  104. else:
  105. dsa = ''
  106. return dsa
  107. @staticmethod
  108. def parseDSAhtml(dsa):
  109. dsa_names = []
  110. dsa_CVEs = []
  111. # Date Reported -> dsa_date
  112. soup = BeautifulSoup(dsa, 'html.parser')
  113. tmp = soup.find("dt", string=re.compile(".*Date Repo.*:"))
  114. tmp = str(tmp.find_next().contents[0])
  115. # dsa_date = tmp.split()
  116. # date in datetime python format
  117. dsa_date = datetime.datetime.strptime(tmp, "%d %b %Y")
  118. if not dsa_date:
  119. print('Unable to extract date. Raising exception...')
  120. raise NameError('DSA parsing problem!')
  121. # Affected Packages -> dsa_names
  122. # print(dsa)
  123. tmp = soup.find("dt", string=re.compile("Affected Packages:"))
  124. tmp = tmp.find_next().contents
  125. # Need to check with multiple vulnerable packages
  126. for i in tmp:
  127. if (not isinstance(i, NavigableString)) and i.has_attr('href'):
  128. # greedy 'and' operation assumed
  129. unified = DebianAdvisory.unifySrcName(i.string)
  130. dsa_names.append(unified)
  131. pass
  132. if not dsa_names:
  133. print('Unable to find src package in DSA. unnamed package...')
  134. dsa_names.append('unnamed')
  135. print('Unnamed dsa:' + str(dsa) + '\n')
  136. # Security database references (CVEs) -> dsa_CVEs
  137. tmp = soup.find("dt", string=re.compile("Security database references:"))
  138. tmp = tmp.find_next().descendants
  139. for i in tmp:
  140. if (not isinstance(i, NavigableString)) and i.has_attr('href'):
  141. # don't count bug database
  142. if not re.compile("^Bug*").match(i.string):
  143. dsa_CVEs.append(i.string)
  144. return dsa_names, dsa_date, dsa_CVEs
  145. @staticmethod
  146. def unifySrcName(name):
  147. """Track renamed packages here, easy but manual. We should look into ways to automate this
  148. TODO: it should map to the most recent version, not unversioned
  149. TODO: we can partially automate this..
  150. make all lower-case
  151. replace -X.Y version numbers by highest encounter(?)
  152. handle special cases like xfree86
  153. """
  154. lowername = name.lower()
  155. suf = os.path.join(os.path.dirname(__file__), 'src_name_unifications.txt')
  156. with open(suf) as su:
  157. for line in su:
  158. sp_line = line.strip().split("->")
  159. print(sp_line)
  160. if re.compile(sp_line[0]).match(name):
  161. return sp_line[1]
  162. return name
  163. def fixDSAquirks(dsa_id, dsa_state):
  164. """
  165. TODO:
  166. Static map to correct errors in DSAs
  167. Return fixed list of CVE IDs or 0 to skip DSA
  168. This code is still experimental
  169. """
  170. new_names = dsa_state[0]
  171. new_date = dsa_state[1]
  172. new_cves = dsa_state[2]
  173. if dsa_id == 85:
  174. new_cves = ["CVE-2001-1562", "LOCAL-03/04/05", "LOCAL-08/24/08"]
  175. elif dsa_id == 745:
  176. newcves = ["CVE-2005-1921", "CVE-2005-2106", "CVE-2005-1921"]
  177. elif dsa_id == 1095:
  178. new_cves = ["CVE-2006-0747", "CVE-2006-1861", "CVE-2006-2661"]
  179. elif dsa_id == 1284:
  180. new_cves = ["CVE-2007-1320", "CVE-2007-1321", "CVE-2007-1322", "CVE-2007-2893", "CVE-2007-1366"]
  181. elif dsa_id == 1502:
  182. new_cves = ["CVE-2007-2821", "CVE-2007-3238", "CVE-2008-0193", "CVE-2008-0194"]
  183. elif dsa_id == 1706:
  184. new_cves = ["CVE-2009-0135", "CVE-2009-0136"]
  185. elif dsa_id == 1757:
  186. new_cves = ["CVE-2007-2383", "CVE-2008-7220", "CVE-2009-1208"]
  187. elif dsa_id == 1896:
  188. new_cves = ["CVE-2009-3474", "CVE-2009-3475", "CVE-2009-3476"]
  189. elif dsa_id == 1931:
  190. new_cves = ["CVE-2009-0689", "CVE-2009-2463"]
  191. elif dsa_id == 1989:
  192. new_cves = ["CVE-2010-0789"]
  193. elif dsa_id == 1941:
  194. new_cves = ["CVE-2009-0755", "CVE-2009-3903", "CVE-2009-3904", "CVE-2009-3905", "CVE-2009-3606",
  195. "CVE-2009-3607", "CVE-2009-3608", "CVE-2009-3909", "CVE-2009-3938"]
  196. elif dsa_id == 2004:
  197. new_cves = ["CVE-2010-0787", "CVE-2010-0547"]
  198. elif dsa_id == 2008:
  199. new_cves = ["LOCAL-02/23/10", "LOCAL-02/23/10", "LOCAL-02/23/10", "LOCAL-02/23/10"]
  200. elif dsa_id == 2043:
  201. new_cves = ["CVE-2010-2062"]
  202. elif dsa_id == 2044:
  203. new_cves = ["CVE-2010-2062"]
  204. elif dsa_id == 2056:
  205. new_cves = ["CVE-2010-2155", "CVE-2009-4882"]
  206. elif dsa_id == 2092:
  207. new_cves = ["CVE-2010-1625", "CVE-2010-1448", "CVE-2009-4497"]
  208. elif dsa_id == 2098:
  209. new_cves = ["CVE-2010-3659", "CVE-2010-3660", "CVE-2010-3661", "CVE-2010-3662", "CVE-2010-3663",
  210. "CVE-2010-3664", "CVE-2010-3665", "CVE-2010-3666", "CVE-2010-3667", "CVE-2010-3668",
  211. "CVE-2010-3669", "CVE-2010-3670", "CVE-2010-3671", "CVE-2010-3672", "CVE-2010-3673",
  212. "CVE-2010-3674"]
  213. elif dsa_id == 2103:
  214. new_cves = ["CVE-2010-3076"]
  215. elif dsa_id == 2218:
  216. new_cves = ["CVE-2011-1684"]
  217. elif dsa_id == 2229:
  218. new_cves = ["CVE-2005-4494", "CVE-2006-0517", "CVE-2006-0518", "CVE-2006-0519", "CVE-2006-0625",
  219. "CVE-2006-0626", "CVE-2006-1295", "CVE-2006-1702", "CVE-2007-4525", "CVE-2008-5812",
  220. "CVE-2008-5813", "CVE-2009-3041"]
  221. elif dsa_id == 2261:
  222. new_cves = ["CVE-2009-4078", "CVE-2009-4079", "CVE-2009-4059", "LOCAL-12/30/10", "LOCAL-12/30/10"]
  223. elif dsa_id == 2262:
  224. new_cves = ["LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-05/18/11",
  225. "LOCAL-05/18/11"]
  226. elif dsa_id == 2286:
  227. new_names = ["phpmyadmin"]
  228. print(str(dsa_id) + 'whatsapp??')
  229. elif dsa_id == 1977:
  230. new_names = ["python3.5"]
  231. elif (
  232. dsa_id == 47 or dsa_id == 479 or dsa_id == 480 or dsa_id == 482 or dsa_id == 489 or dsa_id == 491 or dsa_id == 495):
  233. new_names = ["linux"]
  234. print('Substitution successful')
  235. elif dsa_id == 2289:
  236. new_cves = ["LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11",
  237. "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11",
  238. "LOCAL-07/27/11", "LOCAL-07/27/11"]
  239. return new_names, new_date, new_cves