debian_advisory.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510
  1. #!/usr/bin/python3
  2. ###############################################################################
  3. ##
  4. ## Functions for downloading and parsing Debian Security Advisories (DSAs)
  5. ##
  6. ###############################################################################
  7. import re
  8. import datetime
  9. from html.parser import HTMLParser
  10. from bs4 import BeautifulSoup
  11. from bs4 import NavigableString
  12. from pymongo import MongoClient
  13. import urllib.request
  14. import logging, sys
  15. logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
  16. #Testing global variables
  17. config = dict([('dsa_base_url','https://www.debian.org/security/')])
  18. state = dict([('next_adv',3496)])
  19. dsatable = dict()
  20. # Track renamed packages here, easy but manual. We should look into ways
  21. # to automate this
  22. def unifySrcName(name):
  23. ## TODO: it should map to the most recent version, not unversioned
  24. ## TODO: we can partially automate this..
  25. ## -> make all lower-case
  26. ## -> replace -X.Y version numbers by highest encounter(?)
  27. ## -> handle special cases like xfree86
  28. # Have to go over this again!
  29. # Maybe have a file with all these things??
  30. newname = name
  31. if re.compile("proftp-dfsg").match(name):
  32. newname = "proftp"
  33. elif re.compile("mozilla-firefox").match(name):
  34. newname = "firefox-esr"
  35. elif re.compile("iceweasel").match(name):
  36. newname = "firefox-esr"
  37. elif re.compile("firefox-esr").match(name):
  38. newname = "firefox-esr"
  39. elif re.compile("mozilla").match(name):
  40. newname = "firefox-esr"
  41. elif re.compile("mozilla-thunderbird").match(name):
  42. newname = "icedove"
  43. elif re.compile("squid3").match(name):
  44. newname = "squid"
  45. elif re.compile("squid/squid3").match(name):
  46. newname = "squid"
  47. elif re.compile("tk8.3").match(name):
  48. newname = "tk8.4"
  49. elif re.compile("tk8.4").match(name):
  50. newname = "tk8.4"
  51. elif re.compile("xpdf-i").match(name):
  52. newname = "xpdf"
  53. elif re.compile("zope2.10/zope2.9").match(name):
  54. newname = "zope2.7"
  55. elif re.compile("zope-cmfplone").match(name):
  56. newname = "zope2.7"
  57. elif re.compile("zope-ldapuserfolder").match(name):
  58. newname = "zope2.7"
  59. elif re.compile("librmagick-ruby").match(name):
  60. newname = "ruby-rmagick"
  61. elif re.compile("libcompass-ruby").match(name):
  62. newname = "ruby-compass"
  63. elif re.compile("bio-ruby").match(name):
  64. newname = "ruby-bio"
  65. elif re.compile("request-tracker3.4").match(name):
  66. newname = "request-tracker3.8"
  67. elif re.compile("request-tracker3.6").match(name):
  68. newname = "request-tracker3.8"
  69. elif re.compile("perl-5.005").match(name):
  70. newname = "perl"
  71. elif re.compile("otrs2").match(name):
  72. newname = "otrs"
  73. elif re.compile("openldap2.3").match(name):
  74. newname = "openldap"
  75. elif re.compile("openldap2").match(name):
  76. newname = "openldap"
  77. elif re.compile("libreoffice").match(name):
  78. newname = "openoffice.org"
  79. elif re.compile("nsd3").match(name):
  80. newname = "nsd"
  81. elif re.compile("network-manager/network-manager-applet").match(name):
  82. newname = "network-manager"
  83. elif re.compile("nagios3").match(name):
  84. newname = "nagios"
  85. elif re.compile("nagios2").match(name):
  86. newname = "nagios"
  87. elif re.compile("mysql*").match(name):
  88. newname = "mysql"
  89. elif re.compile("linux-2.6*").match(name):
  90. newname = "linux"
  91. elif re.compile("linux-kernel-alpha").match(name):
  92. newname = "linux"
  93. elif re.compile("linux-kernel-i386").match(name):
  94. newname = "linux"
  95. elif re.compile("libmusicbrainz-2.0").match(name):
  96. newname = "libmusicbrainz3"
  97. elif re.compile("libmusicbrainz-2.1").match(name):
  98. newname = "libmusicbrainz3"
  99. elif re.compile("libgtop1").match(name):
  100. newname = "libgtop2"
  101. elif re.compile("libgd1").match(name):
  102. newname = "libgd2"
  103. elif re.compile("libast1").match(name):
  104. newname = "libast"
  105. elif re.compile("libmozjs0d").match(name):
  106. newname = "libast"
  107. elif re.compile("^kernel-source*").match(name):
  108. newname = "linux"
  109. elif re.compile("^kernel-patch*").match(name):
  110. newname = "linux"
  111. # elif re.compile("kernel").match(name):
  112. # newname = "linux-2.4"
  113. elif re.compile("^kernel-source-2.4.*").match(name):
  114. newname = "linux"
  115. elif re.compile("^kernel-image-2.2.*").match(name):
  116. newname = "linux"
  117. elif re.compile("^kernel-image*").match(name):
  118. newname = "linux"
  119. elif re.compile("^kernel-patch-*").match(name):
  120. newname = "linux"
  121. elif re.compile("kernel-patch-benh").match(name):
  122. newname = "linux"
  123. elif re.compile("kernel-patch-vserver").match(name):
  124. newname = "linux"
  125. elif re.compile("^kernel-source*").match(name):
  126. newname = "linux"
  127. elif re.compile("gnutls11").match(name):
  128. newname = "gnutls26"
  129. elif re.compile("gnutls13").match(name):
  130. newname = "gnutls26"
  131. elif re.compile("gallery2").match(name):
  132. newname = "gallery"
  133. elif re.compile("firebird*").match(name):
  134. newname = "firebird"
  135. elif re.compile("fltk1.1").match(name):
  136. newname = "fltk1.3"
  137. elif re.compile("fox1.4").match(name):
  138. newname = "fox1.6"
  139. elif re.compile("exim-tls").match(name):
  140. newname = "exim"
  141. elif re.compile("exim4").match(name):
  142. newname = "exim"
  143. elif re.compile("epic4").match(name):
  144. newname = "epic"
  145. elif re.compile("drupal6").match(name):
  146. newname = "drupal"
  147. elif re.compile("dhcp").match(name):
  148. newname = "dhcp3"
  149. elif re.compile("cyrus-sasl").match(name):
  150. newname = "cyrus-sasl2"
  151. elif re.compile("^cyrus-imapd.*").match(name):
  152. newname = "cyrus-imapd"
  153. elif re.compile("^kolab-cyrus-imapd.*").match(name):
  154. newname = "cyrus-imapd"
  155. elif re.compile("cfengine").match(name):
  156. newname = "cfengine2"
  157. elif re.compile("bind").match(name):
  158. newname = "bind9"
  159. elif re.compile("apache").match(name):
  160. newname = "apache2"
  161. elif re.compile("horde2").match(name):
  162. newname = "horde3"
  163. elif re.compile("mediawiki1.7").match(name):
  164. newname = "mediawiki"
  165. elif re.compile("ffmpeg-debian").match(name):
  166. newname = "ffmpeg"
  167. elif re.compile("xserver-xorg").match(name):
  168. newname = "xorg-server"
  169. elif re.compile("xfree86-1").match(name):
  170. newname = "xorg-server"
  171. elif re.compile("xfree86v3").match(name):
  172. newname = "xorg-server"
  173. elif re.compile("xfree86").match(name):
  174. newname = "xorg-server"
  175. elif re.compile("xfree86").match(name):
  176. newname = "xorg-server"
  177. elif re.compile("xorg").match(name):
  178. newname = "xorg-server"
  179. elif re.compile("typo3").match(name):
  180. newname = "typo3-src"
  181. elif re.compile("lvm10").match(name):
  182. newname = "lvm2"
  183. elif re.compile("cupsys").match(name):
  184. newname = "cups"
  185. elif re.compile("ethereal").match(name):
  186. newname = "wireshark"
  187. elif re.compile("libboost1.42").match(name):
  188. newname = "libboost1.46"
  189. elif re.compile("cinelerra-cv").match(name):
  190. newname = "cinelerra"
  191. elif re.compile("mplayer-dmo").match(name):
  192. newname = "mplayer"
  193. elif re.compile("libcap").match(name):
  194. newname = "libgda2"
  195. elif re.compile("xkb-data-legacy").match(name):
  196. newname = "xkeyboard-config"
  197. elif re.compile("boost-defaults").match(name):
  198. newname = "boost"
  199. elif re.compile("xen-3").match(name):
  200. newname = "xen"
  201. elif re.compile("kde-icons-gorilla").match(name):
  202. newname = "kde-icons-korilla"
  203. elif re.compile("kde4libs").match(name):
  204. newname = "kdelibs"
  205. elif re.compile("libcgi-application-extra-plugin-bundle-perl").match(name):
  206. newname = "libcgi-application-plugins-perl"
  207. elif re.compile("^openssl*").match(name):
  208. newname = "openssl"
  209. elif re.compile("^tomcat\d").match(name):
  210. newname = "tomcat7"
  211. elif re.compile("^tomcat\d.\d$").match(name):
  212. newname = "tomcat7"
  213. elif re.compile("^libgda\d").match(name):
  214. newname = "libgda4"
  215. elif re.compile("^readline\d").match(name):
  216. newname = "readline6"
  217. elif re.compile("^libwnck\d").match(name):
  218. newname = "libwnck"
  219. elif re.compile("^xerces-c\d").match(name):
  220. newname = "xerces-c"
  221. elif re.compile("^libticalcs\d").match(name):
  222. newname = "libticals"
  223. elif re.compile("^libtifiles\d").match(name):
  224. newname = "libtifiles"
  225. elif re.compile("^db\d.\d$").match(name):
  226. newname = "db4.8"
  227. elif re.compile("^gcc-.*").match(name):
  228. newname = "gcc"
  229. elif re.compile("^automake\d+.*").match(name):
  230. newname = "automake"
  231. elif re.compile("^sun-java\d").match(name):
  232. newname = "sun-java6"
  233. elif re.compile("^open-jdk\d").match(name):
  234. newname = "open-jdk7"
  235. elif re.compile("^mbrola-es\d").match(name):
  236. newname = "mbrola-es"
  237. elif re.compile("^mgltools-.*").match(name):
  238. newname = "mgltools"
  239. elif re.compile("^coin\d$").match(name):
  240. newname = "coin"
  241. elif re.compile("^adobereader-\.*").match(name):
  242. newname = "adobereader"
  243. elif re.compile("^picon-\.*").match(name):
  244. newname = "picon"
  245. elif re.compile("^nvidia-graphics-drivers\.*").match(name):
  246. newname = "nvidia-graphics-drivers"
  247. elif re.compile("^boost\d\.\d\d").match(name):
  248. newname = "boost"
  249. elif re.compile("^llvm-\d.\d").match(name):
  250. newname = "llvm"
  251. elif re.compile("^octave\d.\d").match(name):
  252. newname = "octave"
  253. elif re.compile("^libjibx\d.\d-java").match(name):
  254. newname = "libjibx-java"
  255. elif re.compile("^emacs2\d").match(name):
  256. newname = "emacs23"
  257. elif re.compile("^emacs2\d-non-dfsg").match(name):
  258. newname = "emacs23"
  259. elif re.compile("^libupnp\d").match(name):
  260. newname = "libupnp"
  261. elif re.compile("^python\d.\d").match(name):
  262. newname = "python3.2"
  263. elif re.compile("^postgresql-\d.\d").match(name):
  264. newname = "postgresql-9.0"
  265. elif re.compile("^ruby\d.\d").match(name):
  266. newname = "ruby1.9"
  267. elif re.compile("^php\d").match(name):
  268. newname = "php5"
  269. elif re.compile("^PHP\d").match(name):
  270. newname = "php5"
  271. elif re.compile("^openjdk*").match(name):
  272. newname = "openjdk"
  273. return newname
  274. ###############################################################################
  275. ## Should this advisory be skipped?+
  276. def blacklistedDSA(dsa_id):
  277. dsa_blacklist = ["DSA-1975", "DSA-2360", "DSA-2134", "DSA-3043", "DSA-3156"]
  278. if dsa_id in dsa_blacklist:
  279. return True
  280. else:
  281. return False
  282. ###############################################################################
  283. ## Static map to correct errors in DSAs
  284. ## Return fixed list of CVE IDs or 0 to skip DSA
  285. ## This code is still experimental
  286. def fixDSAquirks(dsa_id, dsa_state):
  287. new_names = dsa_state[0]
  288. new_date = dsa_state[1]
  289. new_cves = dsa_state[2]
  290. if dsa_id == "085":
  291. new_cves = ["CVE-2001-1562", "LOCAL-03/04/05", "LOCAL-08/24/08"]
  292. elif dsa_id == "745":
  293. newcves = ["CVE-2005-1921", "CVE-2005-2106", "CVE-2005-1921"]
  294. elif dsa_id == "1095":
  295. new_cves = ["CVE-2006-0747", "CVE-2006-1861", "CVE-2006-2661"]
  296. elif dsa_id == "1284":
  297. new_cves = ["CVE-2007-1320", "CVE-2007-1321", "CVE-2007-1322", "CVE-2007-2893", "CVE-2007-1366"]
  298. elif dsa_id == "1502":
  299. new_cves = ["CVE-2007-2821", "CVE-2007-3238", "CVE-2008-0193", "CVE-2008-0194"]
  300. elif dsa_id == "1706":
  301. new_cves = ["CVE-2009-0135", "CVE-2009-0136"]
  302. elif dsa_id == "1757":
  303. new_cves = ["CVE-2007-2383", "CVE-2008-7220", "CVE-2009-1208"]
  304. elif dsa_id == "1896":
  305. new_cves = ["CVE-2009-3474", "CVE-2009-3475", "CVE-2009-3476"]
  306. elif dsa_id == "1931":
  307. new_cves = ["CVE-2009-0689", "CVE-2009-2463"]
  308. elif dsa_id == "1989":
  309. new_cves = ["CVE-2010-0789"]
  310. elif dsa_id == "1941":
  311. new_cves = ["CVE-2009-0755", "CVE-2009-3903", "CVE-2009-3904", "CVE-2009-3905", "CVE-2009-3606", "CVE-2009-3607", "CVE-2009-3608", "CVE-2009-3909", "CVE-2009-3938"]
  312. elif dsa_id == "2004":
  313. new_cves = ["CVE-2010-0787", "CVE-2010-0547"]
  314. elif dsa_id == "2008":
  315. new_cves = ["LOCAL-02/23/10", "LOCAL-02/23/10", "LOCAL-02/23/10", "LOCAL-02/23/10"]
  316. elif dsa_id == "2043":
  317. new_cves = ["CVE-2010-2062"]
  318. elif dsa_id == "2044":
  319. new_cves = ["CVE-2010-2062"]
  320. elif dsa_id == "2056":
  321. new_cves = ["CVE-2010-2155", "CVE-2009-4882"]
  322. elif dsa_id == "2092":
  323. new_cves = ["CVE-2010-1625", "CVE-2010-1448", "CVE-2009-4497"]
  324. elif dsa_id == "2098":
  325. new_cves = ["CVE-2010-3659", "CVE-2010-3660", "CVE-2010-3661", "CVE-2010-3662", "CVE-2010-3663", "CVE-2010-3664", "CVE-2010-3665", "CVE-2010-3666", "CVE-2010-3667", "CVE-2010-3668", "CVE-2010-3669", "CVE-2010-3670", "CVE-2010-3671", "CVE-2010-3672", "CVE-2010-3673", "CVE-2010-3674"]
  326. elif dsa_id == "2103":
  327. new_cves = ["CVE-2010-3076"]
  328. elif dsa_id == "2218":
  329. new_cves = ["CVE-2011-1684"]
  330. elif dsa_id == "2229":
  331. new_cves = ["CVE-2005-4494", "CVE-2006-0517", "CVE-2006-0518", "CVE-2006-0519", "CVE-2006-0625", "CVE-2006-0626", "CVE-2006-1295", "CVE-2006-1702", "CVE-2007-4525", "CVE-2008-5812", "CVE-2008-5813", "CVE-2009-3041"]
  332. elif dsa_id == "2261":
  333. new_cves = ["CVE-2009-4078", "CVE-2009-4079", "CVE-2009-4059", "LOCAL-12/30/10", "LOCAL-12/30/10"]
  334. elif dsa_id == "2262":
  335. new_cves = ["LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-05/18/11", "LOCAL-05/18/11"]
  336. elif dsa_id == "2286":
  337. new_names = ["phpmyadmin"]
  338. elif dsa_id == "2289":
  339. new_cves = ["LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11"]
  340. return (new_names, new_date, new_cves)
  341. ###############################################################################
  342. ## Fetch DSA from debian archive. Can't use tracker since dates are missing.
  343. ## DSA started counting in November 2000. We'll simply bruteforce which DSA
  344. ## was in which year and start in 2000 til current.
  345. def fetchDSA(dsa_id, base_url):
  346. year = 2000
  347. now = datetime.datetime.now()
  348. current_year = now.year
  349. logging.info('Fetching DSA-%d records\n', dsa_id)
  350. if dsa_id >= 3431:
  351. year = 2016
  352. elif dsa_id >= 3118:
  353. year = 2015
  354. elif dsa_id >= 2832:
  355. year = 2014
  356. elif dsa_id >= 2597:
  357. year = 2013
  358. elif dsa_id >= 2377:
  359. year = 2012
  360. elif dsa_id >= 2140:
  361. year = 2011
  362. elif dsa_id >= 1965:
  363. year = 2010
  364. elif dsa_id >= 1694:
  365. year = 2009
  366. elif dsa_id >= 1443:
  367. year = 2008
  368. elif dsa_id >= 1245:
  369. year = 2007
  370. elif dsa_id >= 929:
  371. year = 2006
  372. elif dsa_id >= 622:
  373. year = 2005
  374. elif dsa_id >= 406:
  375. year = 2004
  376. elif dsa_id >= 220:
  377. year = 2003
  378. elif dsa_id >= 96:
  379. year = 2002
  380. elif dsa_id >= 11:
  381. year = 2001
  382. dsa_id2string = '%03d' % dsa_id
  383. flag = True
  384. while flag:
  385. try:
  386. flag = False
  387. logging.info('Opening url: ' + base_url + str(year) + '/dsa-' + dsa_id2string + '\n')
  388. req = urllib.request.urlopen(base_url + str(year) + '/dsa-' + dsa_id2string)
  389. charset = req.info().get_content_charset()
  390. if charset is None:
  391. charset = 'utf-8'
  392. dsa = req.read().decode(charset)
  393. return dsa
  394. except urllib.error.HTTPError as err:
  395. if year < current_year:
  396. year += 1
  397. flag = True
  398. else:
  399. dsa = ''
  400. return dsa
  401. ###############################################################################
  402. ## Try to find new DSAs by iteration, return table of DSAs to process
  403. def checkDSAs(state, config):
  404. next_dsa = int(state['next_adv'])
  405. #state implemented as dictionary
  406. base_url = config['URL']['dsa_base_url']
  407. logging.info('Checking for new DSAs.. \n')
  408. if next_dsa < int(config['DSA']['first_dsa']):
  409. logging.debug('Cache was deleted, starting at DSA ' + str(next_dsa) + '\n')
  410. next_dsa = int(config['DSA']['first_dsa'])
  411. next_dsa2string = '%03d' % next_dsa
  412. if blacklistedDSA('DSA-' + next_dsa2string):
  413. next_dsa += 1
  414. #print(config)
  415. dsa = fetchDSA(next_dsa, base_url)
  416. while dsa != '':
  417. logging.debug('Got DSA-' + str(next_dsa) + '\n')
  418. soup = BeautifulSoup(dsa,'html.parser')
  419. #crop the DSA from unecessary weight
  420. dsa = soup.find(id="content")
  421. if dsa == '':
  422. raise NameError('html file format unexpected')
  423. dsatable[next_dsa] = str(dsa)
  424. next_dsa += 1
  425. if blacklistedDSA('DSA-' + str(next_dsa)):
  426. next_dsa += 1
  427. dsa = fetchDSA(next_dsa, base_url)
  428. state['next_adv'] = next_dsa
  429. return dsatable
  430. ###############################################################################
  431. ## Parse DSA html data and return array
  432. ## (src-pkg-name date (CVE-id)*)
  433. def parseDSAhtml(dsa):
  434. dsa_date = []
  435. dsa_names = []
  436. dsa_CVEs = []
  437. # Date Reported -> dsa_date
  438. soup = BeautifulSoup(dsa, 'html.parser')
  439. tmp = soup.find("dt",string=re.compile(".*Date Repo.*:"))
  440. tmp = str(tmp.find_next().contents[0])
  441. # dsa_date = tmp.split()
  442. # date in datetime python format
  443. dsa_date = datetime.datetime.strptime(tmp, "%d %b %Y")
  444. if dsa_date == []:
  445. print('Unable to extract date. Raising exception...')
  446. raise NameError('DSA parsing problem')
  447. # Affected Packages -> dsa_names
  448. #print(dsa)
  449. tmp = soup.find("dt",string=re.compile("Affected Packages:"))
  450. tmp = tmp.find_next().contents
  451. #Need to check with multiple vulnerable packages
  452. for i in tmp:
  453. if (not isinstance(i, NavigableString)) and i.has_attr('href'):
  454. #greedy 'and' operation assumed
  455. unified = unifySrcName(i.string)
  456. dsa_names.append(unified)
  457. pass
  458. if dsa_names == []:
  459. print('Unable to find src package in DSA. unnamed package...')
  460. dsa_names.append('unnamed')
  461. print('Unnamed dsa:' + str(dsa) + '\n')
  462. # Security database references (CVEs) -> dsa_CVEs
  463. tmp = soup.find("dt", string=re.compile("Security database references:"))
  464. tmp = tmp.find_next().descendants
  465. for i in tmp:
  466. if (not isinstance(i, NavigableString)) and i.has_attr('href'):
  467. #don't count bug database
  468. if not re.compile("^Bug*").match(i.string):
  469. dsa_CVEs.append(i.string)
  470. return (dsa_names, dsa_date, dsa_CVEs)
  471. #dsa = fetchDSA(3200,'https://www.debian.org/security/')
  472. #dsatable = checkDSAs(state,config)
  473. #print(dsatable[3701])
  474. #parseDSAhtml(dsatable[3498])
  475. #checkDSAs(state,config)