debian-security-advisory.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
  1. #!/usr/bin/python3
  2. ###############################################################################
  3. ##
  4. ## Functions for downloading and parsing Debian Security Advisories (DSAs)
  5. ##
  6. ###############################################################################
  7. import re
  8. import datetime
  9. from html.parser import HTMLParser
  10. from bs4 import BeautifulSoup
  11. from bs4 import NavigableString
  12. import urllib.request
  13. import logging, sys
  14. logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
  15. #Testing global variables
  16. config = dict([('dsa_base_url','https://www.debian.org/security/')])
  17. state = dict([('next_adv',3496)])
  18. dsatable = dict()
  19. # Track renamed packages here, easy but manual. We should look into ways
  20. # to automate this
  21. def unifySrcName(name):
  22. ## TODO: it should map to the most recent version, not unversioned
  23. ## TODO: we can partially automate this..
  24. ## -> make all lower-case
  25. ## -> replace -X.Y version numbers by highest encounter(?)
  26. ## -> handle special cases like xfree86
  27. # Have to go over this again!
  28. # Maybe have a file with all these things??
  29. newname = name
  30. if re.compile("proftp-dfsg").match(name):
  31. newname = "proftp"
  32. elif re.compile("mozilla-firefox").match(name):
  33. newname = "iceweasel"
  34. elif re.compile("mozilla-thunderbird").match(name):
  35. newname = "icedove"
  36. elif re.compile("squid3").match(name):
  37. newname = "squid"
  38. elif re.compile("squid/squid3").match(name):
  39. newname = "squid"
  40. elif re.compile("tk8.3").match(name):
  41. newname = "tk8.4"
  42. elif re.compile("tk8.4").match(name):
  43. newname = "tk8.4"
  44. elif re.compile("xpdf-i").match(name):
  45. newname = "xpdf"
  46. elif re.compile("zope2.10/zope2.9").match(name):
  47. newname = "zope2.7"
  48. elif re.compile("zope-cmfplone").match(name):
  49. newname = "zope2.7"
  50. elif re.compile("zope-ldapuserfolder").match(name):
  51. newname = "zope2.7"
  52. elif re.compile("librmagick-ruby").match(name):
  53. newname = "ruby-rmagick"
  54. elif re.compile("libcompass-ruby").match(name):
  55. newname = "ruby-compass"
  56. elif re.compile("bio-ruby").match(name):
  57. newname = "ruby-bio"
  58. elif re.compile("request-tracker3.4").match(name):
  59. newname = "request-tracker3.8"
  60. elif re.compile("request-tracker3.6").match(name):
  61. newname = "request-tracker3.8"
  62. elif re.compile("perl-5.005").match(name):
  63. newname = "perl"
  64. elif re.compile("otrs2").match(name):
  65. newname = "otrs"
  66. elif re.compile("openldap2.3").match(name):
  67. newname = "openldap"
  68. elif re.compile("openldap2").match(name):
  69. newname = "openldap"
  70. elif re.compile("libreoffice").match(name):
  71. newname = "openoffice.org"
  72. elif re.compile("nsd3").match(name):
  73. newname = "nsd"
  74. elif re.compile("network-manager/network-manager-applet").match(name):
  75. newname = "network-manager"
  76. elif re.compile("nagios3").match(name):
  77. newname = "nagios"
  78. elif re.compile("nagios2").match(name):
  79. newname = "nagios"
  80. elif re.compile("mysql-dfsg-4.1").match(name):
  81. newname = "mysql"
  82. elif re.compile("mysql-dfsg-5.0").match(name):
  83. newname = "mysql"
  84. elif re.compile("mysql-dfsg").match(name):
  85. newname = "mysql"
  86. elif re.compile("linux-2.6.24").match(name):
  87. newname = "linux-2.6"
  88. elif re.compile("linux-kernel-alpha").match(name):
  89. newname = "linux-2.4"
  90. elif re.compile("linux-kernel-i386").match(name):
  91. newname = "linux-2.4"
  92. elif re.compile("libmusicbrainz-2.0").match(name):
  93. newname = "libmusicbrainz3"
  94. elif re.compile("libmusicbrainz-2.1").match(name):
  95. newname = "libmusicbrainz3"
  96. elif re.compile("libgtop1").match(name):
  97. newname = "libgtop2"
  98. elif re.compile("libgd1").match(name):
  99. newname = "libgd2"
  100. elif re.compile("libast1").match(name):
  101. newname = "libast"
  102. elif re.compile("libmozjs0d").match(name):
  103. newname = "libast"
  104. elif re.compile("^kernel-source-2.2.*").match(name):
  105. newname = "linux-2.2"
  106. elif re.compile("^kernel-patch-2.2.*").match(name):
  107. newname = "linux-2.2"
  108. elif re.compile("kernel").match(name):
  109. newname = "linux-2.4"
  110. elif re.compile("^kernel-source-2.4.*").match(name):
  111. newname = "linux-2.4"
  112. elif re.compile("^kernel-image-2.2.*").match(name):
  113. newname = "linux-2.2"
  114. elif re.compile("^kernel-image-2.4.*").match(name):
  115. newname = "linux-2.4"
  116. elif re.compile("^kernel-patch-2.4.*").match(name):
  117. newname = "linux-2.4"
  118. elif re.compile("kernel-patch-benh").match(name):
  119. newname = "linux-2.4"
  120. elif re.compile("kernel-patch-vserver").match(name):
  121. newname = "linux-2.4"
  122. elif re.compile("^kernel-source-2.6.*").match(name):
  123. newname = "linux-2.6"
  124. elif re.compile("gnutls11").match(name):
  125. newname = "gnutls26"
  126. elif re.compile("gnutls13").match(name):
  127. newname = "gnutls26"
  128. elif re.compile("gallery2").match(name):
  129. newname = "gallery"
  130. elif re.compile("firebird2").match(name):
  131. newname = "firebird2.5"
  132. elif re.compile("firebird2.0").match(name):
  133. newname = "firebird2.5"
  134. elif re.compile("firebird2.1").match(name):
  135. newname = "firebird2.5"
  136. elif re.compile("fltk1.1").match(name):
  137. newname = "fltk1.3"
  138. elif re.compile("fox1.4").match(name):
  139. newname = "fox1.6"
  140. elif re.compile("exim-tls").match(name):
  141. newname = "exim"
  142. elif re.compile("exim4").match(name):
  143. newname = "exim"
  144. elif re.compile("epic4").match(name):
  145. newname = "epic"
  146. elif re.compile("drupal6").match(name):
  147. newname = "drupal"
  148. elif re.compile("dhcp").match(name):
  149. newname = "dhcp3"
  150. elif re.compile("cyrus-sasl").match(name):
  151. newname = "cyrus-sasl2"
  152. elif re.compile("^cyrus-imapd.*").match(name):
  153. newname = "cyrus-imapd"
  154. elif re.compile("^kolab-cyrus-imapd.*").match(name):
  155. newname = "cyrus-imapd"
  156. elif re.compile("cfengine").match(name):
  157. newname = "cfengine2"
  158. elif re.compile("bind").match(name):
  159. newname = "bind9"
  160. elif re.compile("apache").match(name):
  161. newname = "apache2"
  162. elif re.compile("horde2").match(name):
  163. newname = "horde3"
  164. elif re.compile("mediawiki1.7").match(name):
  165. newname = "mediawiki"
  166. elif re.compile("ffmpeg-debian").match(name):
  167. newname = "ffmpeg"
  168. elif re.compile("xserver-xorg").match(name):
  169. newname = "xorg-server"
  170. elif re.compile("xfree86-1").match(name):
  171. newname = "xorg-server"
  172. elif re.compile("xfree86v3").match(name):
  173. newname = "xorg-server"
  174. elif re.compile("xfree86").match(name):
  175. newname = "xorg-server"
  176. elif re.compile("xfree86").match(name):
  177. newname = "xorg-server"
  178. elif re.compile("xorg").match(name):
  179. newname = "xorg-server"
  180. elif re.compile("typo3").match(name):
  181. newname = "typo3-src"
  182. elif re.compile("lvm10").match(name):
  183. newname = "lvm2"
  184. elif re.compile("cupsys").match(name):
  185. newname = "cups"
  186. elif re.compile("ethereal").match(name):
  187. newname = "wireshark"
  188. elif re.compile("libboost1.42").match(name):
  189. newname = "libboost1.46"
  190. elif re.compile("cinelerra-cv").match(name):
  191. newname = "cinelerra"
  192. elif re.compile("mplayer-dmo").match(name):
  193. newname = "mplayer"
  194. elif re.compile("libcap").match(name):
  195. newname = "libgda2"
  196. elif re.compile("xkb-data-legacy").match(name):
  197. newname = "xkeyboard-config"
  198. elif re.compile("boost-defaults").match(name):
  199. newname = "boost"
  200. elif re.compile("xen-3").match(name):
  201. newname = "xen"
  202. elif re.compile("kde-icons-gorilla").match(name):
  203. newname = "kde-icons-korilla"
  204. elif re.compile("kde4libs").match(name):
  205. newname = "kdelibs"
  206. elif re.compile("libcgi-application-extra-plugin-bundle-perl").match(name):
  207. newname = "libcgi-application-plugins-perl"
  208. elif re.compile("^openssl\d$").match(name):
  209. newname = "openssl"
  210. elif re.compile("^tomcat\d").match(name):
  211. newname = "tomcat7"
  212. elif re.compile("^tomcat\d.\d$").match(name):
  213. newname = "tomcat7"
  214. elif re.compile("^libgda\d").match(name):
  215. newname = "libgda4"
  216. elif re.compile("^readline\d").match(name):
  217. newname = "readline6"
  218. elif re.compile("^libwnck\d").match(name):
  219. newname = "libwnck"
  220. elif re.compile("^xerces-c\d").match(name):
  221. newname = "xerces-c"
  222. elif re.compile("^libticalcs\d").match(name):
  223. newname = "libticals"
  224. elif re.compile("^libtifiles\d").match(name):
  225. newname = "libtifiles"
  226. elif re.compile("^db\d.\d$").match(name):
  227. newname = "db4.8"
  228. elif re.compile("^gcc-.*").match(name):
  229. newname = "gcc"
  230. elif re.compile("^automake\d+.*").match(name):
  231. newname = "automake"
  232. elif re.compile("^sun-java\d").match(name):
  233. newname = "sun-java6"
  234. elif re.compile("^open-jdk\d").match(name):
  235. newname = "open-jdk7"
  236. elif re.compile("^mbrola-es\d").match(name):
  237. newname = "mbrola-es"
  238. elif re.compile("^mgltools-.*").match(name):
  239. newname = "mgltools"
  240. elif re.compile("^coin\d$").match(name):
  241. newname = "coin"
  242. elif re.compile("^adobereader-\.*").match(name):
  243. newname = "adobereader"
  244. elif re.compile("^picon-\.*").match(name):
  245. newname = "picon"
  246. elif re.compile("^nvidia-graphics-drivers\.*").match(name):
  247. newname = "nvidia-graphics-drivers"
  248. elif re.compile("^boost\d\.\d\d").match(name):
  249. newname = "boost"
  250. elif re.compile("^llvm-\d.\d").match(name):
  251. newname = "llvm"
  252. elif re.compile("^octave\d.\d").match(name):
  253. newname = "octave"
  254. elif re.compile("^libjibx\d.\d-java").match(name):
  255. newname = "libjibx-java"
  256. elif re.compile("^emacs2\d").match(name):
  257. newname = "emacs23"
  258. elif re.compile("^emacs2\d-non-dfsg").match(name):
  259. newname = "emacs23"
  260. elif re.compile("^libupnp\d").match(name):
  261. newname = "libupnp"
  262. elif re.compile("^python\d.\d").match(name):
  263. newname = "python3.2"
  264. elif re.compile("^postgresql-\d.\d").match(name):
  265. newname = "postgresql-9.0"
  266. elif re.compile("^ruby\d.\d").match(name):
  267. newname = "ruby1.9"
  268. elif re.compile("^php\d").match(name):
  269. newname = "php5"
  270. elif re.compile("^PHP\d").match(name):
  271. newname = "php5"
  272. return newname
  273. ###############################################################################
  274. ## Should this advisory be skipped?+
  275. def blacklistedDSA(dsa_id):
  276. dsa_blacklist = ["DSA-1975", "DSA-2360"]
  277. if dsa_id in dsa_blacklist:
  278. return True
  279. else:
  280. return False
  281. ###############################################################################
  282. ## Static map to correct errors in DSAs
  283. ## Return fixed list of CVE IDs or 0 to skip DSA
  284. ## This code is still experimental
  285. def fixDSAquirks(dsa_id, dsa_state):
  286. new_names = dsa_state[0]
  287. new_date = dsa_state[1]
  288. new_cves = dsa_state[2]
  289. if dsa_id == "085":
  290. new_cves = ["CVE-2001-1562", "LOCAL-03/04/05", "LOCAL-08/24/08"]
  291. elif dsa_id == "745":
  292. newcves = ["CVE-2005-1921", "CVE-2005-2106", "CVE-2005-1921"]
  293. elif dsa_id == "1095":
  294. new_cves = ["CVE-2006-0747", "CVE-2006-1861", "CVE-2006-2661"]
  295. elif dsa_id == "1284":
  296. new_cves = ["CVE-2007-1320", "CVE-2007-1321", "CVE-2007-1322", "CVE-2007-2893", "CVE-2007-1366"]
  297. elif dsa_id == "1502":
  298. new_cves = ["CVE-2007-2821", "CVE-2007-3238", "CVE-2008-0193", "CVE-2008-0194"]
  299. elif dsa_id == "1706":
  300. new_cves = ["CVE-2009-0135", "CVE-2009-0136"]
  301. elif dsa_id == "1757":
  302. new_cves = ["CVE-2007-2383", "CVE-2008-7220", "CVE-2009-1208"]
  303. elif dsa_id == "1896":
  304. new_cves = ["CVE-2009-3474", "CVE-2009-3475", "CVE-2009-3476"]
  305. elif dsa_id == "1931":
  306. new_cves = ["CVE-2009-0689", "CVE-2009-2463"]
  307. elif dsa_id == "1989":
  308. new_cves = ["CVE-2010-0789"]
  309. elif dsa_id == "1941":
  310. new_cves = ["CVE-2009-0755", "CVE-2009-3903", "CVE-2009-3904", "CVE-2009-3905", "CVE-2009-3606", "CVE-2009-3607", "CVE-2009-3608", "CVE-2009-3909", "CVE-2009-3938"]
  311. elif dsa_id == "2004":
  312. new_cves = ["CVE-2010-0787", "CVE-2010-0547"]
  313. elif dsa_id == "2008":
  314. new_cves = ["LOCAL-02/23/10", "LOCAL-02/23/10", "LOCAL-02/23/10", "LOCAL-02/23/10"]
  315. elif dsa_id == "2043":
  316. new_cves = ["CVE-2010-2062"]
  317. elif dsa_id == "2044":
  318. new_cves = ["CVE-2010-2062"]
  319. elif dsa_id == "2056":
  320. new_cves = ["CVE-2010-2155", "CVE-2009-4882"]
  321. elif dsa_id == "2092":
  322. new_cves = ["CVE-2010-1625", "CVE-2010-1448", "CVE-2009-4497"]
  323. elif dsa_id == "2098":
  324. new_cves = ["CVE-2010-3659", "CVE-2010-3660", "CVE-2010-3661", "CVE-2010-3662", "CVE-2010-3663", "CVE-2010-3664", "CVE-2010-3665", "CVE-2010-3666", "CVE-2010-3667", "CVE-2010-3668", "CVE-2010-3669", "CVE-2010-3670", "CVE-2010-3671", "CVE-2010-3672", "CVE-2010-3673", "CVE-2010-3674"]
  325. elif dsa_id == "2103":
  326. new_cves = ["CVE-2010-3076"]
  327. elif dsa_id == "2218":
  328. new_cves = ["CVE-2011-1684"]
  329. elif dsa_id == "2229":
  330. new_cves = ["CVE-2005-4494", "CVE-2006-0517", "CVE-2006-0518", "CVE-2006-0519", "CVE-2006-0625", "CVE-2006-0626", "CVE-2006-1295", "CVE-2006-1702", "CVE-2007-4525", "CVE-2008-5812", "CVE-2008-5813", "CVE-2009-3041"]
  331. elif dsa_id == "2261":
  332. new_cves = ["CVE-2009-4078", "CVE-2009-4079", "CVE-2009-4059", "LOCAL-12/30/10", "LOCAL-12/30/10"]
  333. elif dsa_id == "2262":
  334. new_cves = ["LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-05/18/11", "LOCAL-05/18/11"]
  335. elif dsa_id == "2286":
  336. new_names = ["phpmyadmin"]
  337. elif dsa_id == "2289":
  338. new_cves = ["LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11"]
  339. return (new_names, new_date, new_cves)
  340. ###############################################################################
  341. ## Fetch DSA from debian archive. Can't use tracker since dates are missing.
  342. ## DSA started counting in November 2000. We'll simply bruteforce which DSA
  343. ## was in which year and start in 2000 til current.
  344. def fetchDSA(dsa_id, base_url):
  345. year = 2000
  346. now = datetime.datetime.now()
  347. current_year = now.year
  348. logging.info('Fetching DSA-%d records\n', dsa_id)
  349. if dsa_id >= 3431:
  350. year = 2016
  351. elif dsa_id >= 3118:
  352. year = 2015
  353. elif dsa_id >= 2832:
  354. year = 2014
  355. elif dsa_id >= 2597:
  356. year = 2013
  357. elif dsa_id >= 2377:
  358. year = 2012
  359. elif dsa_id >= 2140:
  360. year = 2011
  361. elif dsa_id >= 1965:
  362. year = 2010
  363. elif dsa_id >= 1694:
  364. year = 2009
  365. elif dsa_id >= 1443:
  366. year = 2008
  367. elif dsa_id >= 1245:
  368. year = 2007
  369. elif dsa_id >= 929:
  370. year = 2006
  371. elif dsa_id >= 622:
  372. year = 2005
  373. elif dsa_id >= 406:
  374. year = 2004
  375. elif dsa_id >= 220:
  376. year = 2003
  377. elif dsa_id >= 96:
  378. year = 2002
  379. elif dsa_id >= 11:
  380. year = 2001
  381. dsa_id2string = '%03d' % dsa_id
  382. flag = True
  383. while flag:
  384. try:
  385. flag = False
  386. logging.info('Opening url: ' + base_url + str(year) + '/dsa-' + dsa_id2string + '\n')
  387. req = urllib.request.urlopen(base_url + str(year) + '/dsa-' + dsa_id2string)
  388. charset = req.info().get_content_charset()
  389. if charset is None:
  390. charset = 'utf-8'
  391. dsa = req.read().decode(charset)
  392. return dsa
  393. except urllib.error.HTTPError as err:
  394. if year < current_year:
  395. year += 1
  396. flag = True
  397. else:
  398. dsa = ''
  399. return dsa
  400. ###############################################################################
  401. ## Try to find new DSAs by iteration, return table of DSAs to process
  402. def checkDSAs(state, config):
  403. next_dsa = state['next_adv']
  404. #state implemented as dictionary
  405. logging.info('Checking for new DSAs.. \n')
  406. # if next_dsa < config['first_dsa']:
  407. # logging.debug('Cache was deleted, starting at DSA ' + str(next_dsa) + '\n')
  408. # next_dsa = config['first_dsa']
  409. next_dsa2string = '%03d' % next_dsa
  410. if blacklistedDSA('DSA-' + next_dsa2string):
  411. next_dsa += 1
  412. dsa = fetchDSA(next_dsa, config['dsa_base_url'])
  413. while dsa != '':
  414. logging.debug('Got DSA-' + str(next_dsa) + '\n')
  415. soup = BeautifulSoup(dsa,'html.parser')
  416. #crop the DSA from unecessary weight
  417. dsa = soup.find(id="content")
  418. if dsa == '':
  419. raise NameError('html file format unexpected')
  420. dsatable[next_dsa] = str(dsa)
  421. next_dsa += 1
  422. if blacklistedDSA('DSA-' + str(next_dsa)):
  423. next_dsa += 1
  424. dsa = fetchDSA(next_dsa, config['dsa_base_url'])
  425. state['next_dsa'] = next_dsa
  426. return dsatable
  427. ###############################################################################
  428. ## Parse DSA html data and return array
  429. ## (src-pkg-name date (CVE-id)*)
  430. def parseDSAhtml(dsa):
  431. dsa_date = []
  432. dsa_names = []
  433. dsa_CVEs = []
  434. # Date Reported -> dsa_date
  435. soup = BeautifulSoup(dsa, 'html.parser')
  436. tmp = soup.find("dt",string=re.compile(".*Date Repo.*:"))
  437. tmp = str(tmp.find_next().contents[0])
  438. dsa_date = tmp.split()
  439. if dsa_date == []:
  440. print('Unable to extract date. Raising exception...')
  441. raise NameError('DSA parsing problem')
  442. # Affected Packages -> dsa_names
  443. #print(dsa)
  444. tmp = soup.find("dt",string=re.compile("Affected Packages:"))
  445. tmp = tmp.find_next().contents
  446. #Need to check with multiple vulnerable packages
  447. for i in tmp:
  448. if (not isinstance(i, NavigableString)) and i.has_attr('href'):
  449. #greedy 'and' operation assumed
  450. unified = unifySrcName(i.string)
  451. dsa_names.append(unified)
  452. pass
  453. if dsa_names == []:
  454. print('Unable to find src package in DSA. Raising exception...')
  455. raise NameError('DSA parsing problem')
  456. # Security database references (CVEs) -> dsa_CVEs
  457. tmp = soup.find("dt", string=re.compile("Security database references:"))
  458. tmp = tmp.find_next().descendants
  459. for i in tmp:
  460. if (not isinstance(i, NavigableString)) and i.has_attr('href'):
  461. dsa_CVEs.append(i.string)
  462. return (dsa_names, dsa_date, dsa_CVEs)
  463. #dsa = fetchDSA(3200,'https://www.debian.org/security/')
  464. #dsatable = checkDSAs(state,config)
  465. #print(dsatable[3701])
  466. #parseDSAhtml(dsatable[3498])
  467. #checkDSAs(state,config)