debian_advisory.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
  1. #!/usr/bin/python3
  2. ###############################################################################
  3. ##
  4. ## Functions for downloading and parsing Debian Security Advisories (DSAs)
  5. ##
  6. ###############################################################################
  7. import re
  8. import datetime
  9. from html.parser import HTMLParser
  10. from bs4 import BeautifulSoup
  11. from bs4 import NavigableString
  12. from pymongo import MongoClient
  13. import urllib.request
  14. import logging, sys
  15. logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
  16. #Testing global variables
  17. config = dict([('dsa_base_url','https://www.debian.org/security/')])
  18. state = dict([('next_adv',3496)])
  19. dsatable = dict()
  20. # Track renamed packages here, easy but manual. We should look into ways
  21. # to automate this
  22. def unifySrcName(name):
  23. ## TODO: it should map to the most recent version, not unversioned
  24. ## TODO: we can partially automate this..
  25. ## -> make all lower-case
  26. ## -> replace -X.Y version numbers by highest encounter(?)
  27. ## -> handle special cases like xfree86
  28. # Have to go over this again!
  29. # Maybe have a file with all these things??
  30. newname = name
  31. if re.compile("proftp-dfsg").match(name):
  32. newname = "proftp"
  33. elif re.compile("mozilla-firefox").match(name):
  34. newname = "firefox"
  35. elif re.compile("iceweasel").match(name):
  36. newname = "firefox"
  37. elif re.compile("mozilla").match(name):
  38. newname = "firefox"
  39. elif re.compile("mozilla-thunderbird").match(name):
  40. newname = "icedove"
  41. elif re.compile("squid3").match(name):
  42. newname = "squid"
  43. elif re.compile("squid/squid3").match(name):
  44. newname = "squid"
  45. elif re.compile("tk8.3").match(name):
  46. newname = "tk8.4"
  47. elif re.compile("tk8.4").match(name):
  48. newname = "tk8.4"
  49. elif re.compile("xpdf-i").match(name):
  50. newname = "xpdf"
  51. elif re.compile("zope2.10/zope2.9").match(name):
  52. newname = "zope2.7"
  53. elif re.compile("zope-cmfplone").match(name):
  54. newname = "zope2.7"
  55. elif re.compile("zope-ldapuserfolder").match(name):
  56. newname = "zope2.7"
  57. elif re.compile("librmagick-ruby").match(name):
  58. newname = "ruby-rmagick"
  59. elif re.compile("libcompass-ruby").match(name):
  60. newname = "ruby-compass"
  61. elif re.compile("bio-ruby").match(name):
  62. newname = "ruby-bio"
  63. elif re.compile("request-tracker3.4").match(name):
  64. newname = "request-tracker3.8"
  65. elif re.compile("request-tracker3.6").match(name):
  66. newname = "request-tracker3.8"
  67. elif re.compile("perl-5.005").match(name):
  68. newname = "perl"
  69. elif re.compile("otrs2").match(name):
  70. newname = "otrs"
  71. elif re.compile("openldap2.3").match(name):
  72. newname = "openldap"
  73. elif re.compile("openldap2").match(name):
  74. newname = "openldap"
  75. elif re.compile("libreoffice").match(name):
  76. newname = "openoffice.org"
  77. elif re.compile("nsd3").match(name):
  78. newname = "nsd"
  79. elif re.compile("network-manager/network-manager-applet").match(name):
  80. newname = "network-manager"
  81. elif re.compile("nagios3").match(name):
  82. newname = "nagios"
  83. elif re.compile("nagios2").match(name):
  84. newname = "nagios"
  85. elif re.compile("mysql*").match(name):
  86. newname = "mysql"
  87. elif re.compile("linux-2.6*").match(name):
  88. newname = "linux"
  89. elif re.compile("linux-kernel-alpha").match(name):
  90. newname = "linux"
  91. elif re.compile("linux-kernel-i386").match(name):
  92. newname = "linux"
  93. elif re.compile("libmusicbrainz-2.0").match(name):
  94. newname = "libmusicbrainz3"
  95. elif re.compile("libmusicbrainz-2.1").match(name):
  96. newname = "libmusicbrainz3"
  97. elif re.compile("libgtop1").match(name):
  98. newname = "libgtop2"
  99. elif re.compile("libgd1").match(name):
  100. newname = "libgd2"
  101. elif re.compile("libast1").match(name):
  102. newname = "libast"
  103. elif re.compile("libmozjs0d").match(name):
  104. newname = "libast"
  105. elif re.compile("^kernel-source*").match(name):
  106. newname = "linux"
  107. elif re.compile("^kernel-patch*").match(name):
  108. newname = "linux"
  109. # elif re.compile("kernel").match(name):
  110. # newname = "linux-2.4"
  111. elif re.compile("^kernel-source-2.4.*").match(name):
  112. newname = "linux"
  113. elif re.compile("^kernel-image-2.2.*").match(name):
  114. newname = "linux"
  115. elif re.compile("^kernel-image*").match(name):
  116. newname = "linux"
  117. elif re.compile("^kernel-patch-*").match(name):
  118. newname = "linux"
  119. elif re.compile("kernel-patch-benh").match(name):
  120. newname = "linux"
  121. elif re.compile("kernel-patch-vserver").match(name):
  122. newname = "linux"
  123. elif re.compile("^kernel-source*").match(name):
  124. newname = "linux"
  125. elif re.compile("gnutls11").match(name):
  126. newname = "gnutls26"
  127. elif re.compile("gnutls13").match(name):
  128. newname = "gnutls26"
  129. elif re.compile("gallery2").match(name):
  130. newname = "gallery"
  131. elif re.compile("firebird2").match(name):
  132. newname = "firebird2.5"
  133. elif re.compile("firebird2.0").match(name):
  134. newname = "firebird2.5"
  135. elif re.compile("firebird2.1").match(name):
  136. newname = "firebird2.5"
  137. elif re.compile("fltk1.1").match(name):
  138. newname = "fltk1.3"
  139. elif re.compile("fox1.4").match(name):
  140. newname = "fox1.6"
  141. elif re.compile("exim-tls").match(name):
  142. newname = "exim"
  143. elif re.compile("exim4").match(name):
  144. newname = "exim"
  145. elif re.compile("epic4").match(name):
  146. newname = "epic"
  147. elif re.compile("drupal6").match(name):
  148. newname = "drupal"
  149. elif re.compile("dhcp").match(name):
  150. newname = "dhcp3"
  151. elif re.compile("cyrus-sasl").match(name):
  152. newname = "cyrus-sasl2"
  153. elif re.compile("^cyrus-imapd.*").match(name):
  154. newname = "cyrus-imapd"
  155. elif re.compile("^kolab-cyrus-imapd.*").match(name):
  156. newname = "cyrus-imapd"
  157. elif re.compile("cfengine").match(name):
  158. newname = "cfengine2"
  159. elif re.compile("bind").match(name):
  160. newname = "bind9"
  161. elif re.compile("apache").match(name):
  162. newname = "apache2"
  163. elif re.compile("horde2").match(name):
  164. newname = "horde3"
  165. elif re.compile("mediawiki1.7").match(name):
  166. newname = "mediawiki"
  167. elif re.compile("ffmpeg-debian").match(name):
  168. newname = "ffmpeg"
  169. elif re.compile("xserver-xorg").match(name):
  170. newname = "xorg-server"
  171. elif re.compile("xfree86-1").match(name):
  172. newname = "xorg-server"
  173. elif re.compile("xfree86v3").match(name):
  174. newname = "xorg-server"
  175. elif re.compile("xfree86").match(name):
  176. newname = "xorg-server"
  177. elif re.compile("xfree86").match(name):
  178. newname = "xorg-server"
  179. elif re.compile("xorg").match(name):
  180. newname = "xorg-server"
  181. elif re.compile("typo3").match(name):
  182. newname = "typo3-src"
  183. elif re.compile("lvm10").match(name):
  184. newname = "lvm2"
  185. elif re.compile("cupsys").match(name):
  186. newname = "cups"
  187. elif re.compile("ethereal").match(name):
  188. newname = "wireshark"
  189. elif re.compile("libboost1.42").match(name):
  190. newname = "libboost1.46"
  191. elif re.compile("cinelerra-cv").match(name):
  192. newname = "cinelerra"
  193. elif re.compile("mplayer-dmo").match(name):
  194. newname = "mplayer"
  195. elif re.compile("libcap").match(name):
  196. newname = "libgda2"
  197. elif re.compile("xkb-data-legacy").match(name):
  198. newname = "xkeyboard-config"
  199. elif re.compile("boost-defaults").match(name):
  200. newname = "boost"
  201. elif re.compile("xen-3").match(name):
  202. newname = "xen"
  203. elif re.compile("kde-icons-gorilla").match(name):
  204. newname = "kde-icons-korilla"
  205. elif re.compile("kde4libs").match(name):
  206. newname = "kdelibs"
  207. elif re.compile("libcgi-application-extra-plugin-bundle-perl").match(name):
  208. newname = "libcgi-application-plugins-perl"
  209. elif re.compile("^openssl*").match(name):
  210. newname = "openssl"
  211. elif re.compile("^tomcat\d").match(name):
  212. newname = "tomcat7"
  213. elif re.compile("^tomcat\d.\d$").match(name):
  214. newname = "tomcat7"
  215. elif re.compile("^libgda\d").match(name):
  216. newname = "libgda4"
  217. elif re.compile("^readline\d").match(name):
  218. newname = "readline6"
  219. elif re.compile("^libwnck\d").match(name):
  220. newname = "libwnck"
  221. elif re.compile("^xerces-c\d").match(name):
  222. newname = "xerces-c"
  223. elif re.compile("^libticalcs\d").match(name):
  224. newname = "libticals"
  225. elif re.compile("^libtifiles\d").match(name):
  226. newname = "libtifiles"
  227. elif re.compile("^db\d.\d$").match(name):
  228. newname = "db4.8"
  229. elif re.compile("^gcc-.*").match(name):
  230. newname = "gcc"
  231. elif re.compile("^automake\d+.*").match(name):
  232. newname = "automake"
  233. elif re.compile("^sun-java\d").match(name):
  234. newname = "sun-java6"
  235. elif re.compile("^open-jdk\d").match(name):
  236. newname = "open-jdk7"
  237. elif re.compile("^mbrola-es\d").match(name):
  238. newname = "mbrola-es"
  239. elif re.compile("^mgltools-.*").match(name):
  240. newname = "mgltools"
  241. elif re.compile("^coin\d$").match(name):
  242. newname = "coin"
  243. elif re.compile("^adobereader-\.*").match(name):
  244. newname = "adobereader"
  245. elif re.compile("^picon-\.*").match(name):
  246. newname = "picon"
  247. elif re.compile("^nvidia-graphics-drivers\.*").match(name):
  248. newname = "nvidia-graphics-drivers"
  249. elif re.compile("^boost\d\.\d\d").match(name):
  250. newname = "boost"
  251. elif re.compile("^llvm-\d.\d").match(name):
  252. newname = "llvm"
  253. elif re.compile("^octave\d.\d").match(name):
  254. newname = "octave"
  255. elif re.compile("^libjibx\d.\d-java").match(name):
  256. newname = "libjibx-java"
  257. elif re.compile("^emacs2\d").match(name):
  258. newname = "emacs23"
  259. elif re.compile("^emacs2\d-non-dfsg").match(name):
  260. newname = "emacs23"
  261. elif re.compile("^libupnp\d").match(name):
  262. newname = "libupnp"
  263. elif re.compile("^python\d.\d").match(name):
  264. newname = "python3.2"
  265. elif re.compile("^postgresql-\d.\d").match(name):
  266. newname = "postgresql-9.0"
  267. elif re.compile("^ruby\d.\d").match(name):
  268. newname = "ruby1.9"
  269. elif re.compile("^php\d").match(name):
  270. newname = "php5"
  271. elif re.compile("^PHP\d").match(name):
  272. newname = "php5"
  273. elif re.compile("^openjdk*").match(name):
  274. newname = "openjdk"
  275. return newname
  276. ###############################################################################
  277. ## Should this advisory be skipped?+
  278. def blacklistedDSA(dsa_id):
  279. dsa_blacklist = ["DSA-1975", "DSA-2360", "DSA-2134", "DSA-3043", "DSA-3156"]
  280. if dsa_id in dsa_blacklist:
  281. return True
  282. else:
  283. return False
  284. ###############################################################################
  285. ## Static map to correct errors in DSAs
  286. ## Return fixed list of CVE IDs or 0 to skip DSA
  287. ## This code is still experimental
  288. def fixDSAquirks(dsa_id, dsa_state):
  289. new_names = dsa_state[0]
  290. new_date = dsa_state[1]
  291. new_cves = dsa_state[2]
  292. if dsa_id == "085":
  293. new_cves = ["CVE-2001-1562", "LOCAL-03/04/05", "LOCAL-08/24/08"]
  294. elif dsa_id == "745":
  295. newcves = ["CVE-2005-1921", "CVE-2005-2106", "CVE-2005-1921"]
  296. elif dsa_id == "1095":
  297. new_cves = ["CVE-2006-0747", "CVE-2006-1861", "CVE-2006-2661"]
  298. elif dsa_id == "1284":
  299. new_cves = ["CVE-2007-1320", "CVE-2007-1321", "CVE-2007-1322", "CVE-2007-2893", "CVE-2007-1366"]
  300. elif dsa_id == "1502":
  301. new_cves = ["CVE-2007-2821", "CVE-2007-3238", "CVE-2008-0193", "CVE-2008-0194"]
  302. elif dsa_id == "1706":
  303. new_cves = ["CVE-2009-0135", "CVE-2009-0136"]
  304. elif dsa_id == "1757":
  305. new_cves = ["CVE-2007-2383", "CVE-2008-7220", "CVE-2009-1208"]
  306. elif dsa_id == "1896":
  307. new_cves = ["CVE-2009-3474", "CVE-2009-3475", "CVE-2009-3476"]
  308. elif dsa_id == "1931":
  309. new_cves = ["CVE-2009-0689", "CVE-2009-2463"]
  310. elif dsa_id == "1989":
  311. new_cves = ["CVE-2010-0789"]
  312. elif dsa_id == "1941":
  313. new_cves = ["CVE-2009-0755", "CVE-2009-3903", "CVE-2009-3904", "CVE-2009-3905", "CVE-2009-3606", "CVE-2009-3607", "CVE-2009-3608", "CVE-2009-3909", "CVE-2009-3938"]
  314. elif dsa_id == "2004":
  315. new_cves = ["CVE-2010-0787", "CVE-2010-0547"]
  316. elif dsa_id == "2008":
  317. new_cves = ["LOCAL-02/23/10", "LOCAL-02/23/10", "LOCAL-02/23/10", "LOCAL-02/23/10"]
  318. elif dsa_id == "2043":
  319. new_cves = ["CVE-2010-2062"]
  320. elif dsa_id == "2044":
  321. new_cves = ["CVE-2010-2062"]
  322. elif dsa_id == "2056":
  323. new_cves = ["CVE-2010-2155", "CVE-2009-4882"]
  324. elif dsa_id == "2092":
  325. new_cves = ["CVE-2010-1625", "CVE-2010-1448", "CVE-2009-4497"]
  326. elif dsa_id == "2098":
  327. new_cves = ["CVE-2010-3659", "CVE-2010-3660", "CVE-2010-3661", "CVE-2010-3662", "CVE-2010-3663", "CVE-2010-3664", "CVE-2010-3665", "CVE-2010-3666", "CVE-2010-3667", "CVE-2010-3668", "CVE-2010-3669", "CVE-2010-3670", "CVE-2010-3671", "CVE-2010-3672", "CVE-2010-3673", "CVE-2010-3674"]
  328. elif dsa_id == "2103":
  329. new_cves = ["CVE-2010-3076"]
  330. elif dsa_id == "2218":
  331. new_cves = ["CVE-2011-1684"]
  332. elif dsa_id == "2229":
  333. new_cves = ["CVE-2005-4494", "CVE-2006-0517", "CVE-2006-0518", "CVE-2006-0519", "CVE-2006-0625", "CVE-2006-0626", "CVE-2006-1295", "CVE-2006-1702", "CVE-2007-4525", "CVE-2008-5812", "CVE-2008-5813", "CVE-2009-3041"]
  334. elif dsa_id == "2261":
  335. new_cves = ["CVE-2009-4078", "CVE-2009-4079", "CVE-2009-4059", "LOCAL-12/30/10", "LOCAL-12/30/10"]
  336. elif dsa_id == "2262":
  337. new_cves = ["LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-05/18/11", "LOCAL-05/18/11"]
  338. elif dsa_id == "2286":
  339. new_names = ["phpmyadmin"]
  340. elif dsa_id == "2289":
  341. new_cves = ["LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11"]
  342. return (new_names, new_date, new_cves)
  343. ###############################################################################
  344. ## Fetch DSA from debian archive. Can't use tracker since dates are missing.
  345. ## DSA started counting in November 2000. We'll simply bruteforce which DSA
  346. ## was in which year and start in 2000 til current.
  347. def fetchDSA(dsa_id, base_url):
  348. year = 2000
  349. now = datetime.datetime.now()
  350. current_year = now.year
  351. logging.info('Fetching DSA-%d records\n', dsa_id)
  352. if dsa_id >= 3431:
  353. year = 2016
  354. elif dsa_id >= 3118:
  355. year = 2015
  356. elif dsa_id >= 2832:
  357. year = 2014
  358. elif dsa_id >= 2597:
  359. year = 2013
  360. elif dsa_id >= 2377:
  361. year = 2012
  362. elif dsa_id >= 2140:
  363. year = 2011
  364. elif dsa_id >= 1965:
  365. year = 2010
  366. elif dsa_id >= 1694:
  367. year = 2009
  368. elif dsa_id >= 1443:
  369. year = 2008
  370. elif dsa_id >= 1245:
  371. year = 2007
  372. elif dsa_id >= 929:
  373. year = 2006
  374. elif dsa_id >= 622:
  375. year = 2005
  376. elif dsa_id >= 406:
  377. year = 2004
  378. elif dsa_id >= 220:
  379. year = 2003
  380. elif dsa_id >= 96:
  381. year = 2002
  382. elif dsa_id >= 11:
  383. year = 2001
  384. dsa_id2string = '%03d' % dsa_id
  385. flag = True
  386. while flag:
  387. try:
  388. flag = False
  389. logging.info('Opening url: ' + base_url + str(year) + '/dsa-' + dsa_id2string + '\n')
  390. req = urllib.request.urlopen(base_url + str(year) + '/dsa-' + dsa_id2string)
  391. charset = req.info().get_content_charset()
  392. if charset is None:
  393. charset = 'utf-8'
  394. dsa = req.read().decode(charset)
  395. return dsa
  396. except urllib.error.HTTPError as err:
  397. if year < current_year:
  398. year += 1
  399. flag = True
  400. else:
  401. dsa = ''
  402. return dsa
  403. ###############################################################################
  404. ## Try to find new DSAs by iteration, return table of DSAs to process
  405. def checkDSAs(state, config):
  406. next_dsa = int(state['next_adv'])
  407. #state implemented as dictionary
  408. base_url = config['URL']['dsa_base_url']
  409. logging.info('Checking for new DSAs.. \n')
  410. if next_dsa < int(config['DSA']['first_dsa']):
  411. logging.debug('Cache was deleted, starting at DSA ' + str(next_dsa) + '\n')
  412. next_dsa = int(config['DSA']['first_dsa'])
  413. next_dsa2string = '%03d' % next_dsa
  414. if blacklistedDSA('DSA-' + next_dsa2string):
  415. next_dsa += 1
  416. #print(config)
  417. dsa = fetchDSA(next_dsa, base_url)
  418. while dsa != '':
  419. logging.debug('Got DSA-' + str(next_dsa) + '\n')
  420. soup = BeautifulSoup(dsa,'html.parser')
  421. #crop the DSA from unecessary weight
  422. dsa = soup.find(id="content")
  423. if dsa == '':
  424. raise NameError('html file format unexpected')
  425. dsatable[next_dsa] = str(dsa)
  426. next_dsa += 1
  427. if blacklistedDSA('DSA-' + str(next_dsa)):
  428. next_dsa += 1
  429. dsa = fetchDSA(next_dsa, base_url)
  430. state['next_adv'] = next_dsa
  431. return dsatable
  432. ###############################################################################
  433. ## Parse DSA html data and return array
  434. ## (src-pkg-name date (CVE-id)*)
  435. def parseDSAhtml(dsa):
  436. dsa_date = []
  437. dsa_names = []
  438. dsa_CVEs = []
  439. # Date Reported -> dsa_date
  440. soup = BeautifulSoup(dsa, 'html.parser')
  441. tmp = soup.find("dt",string=re.compile(".*Date Repo.*:"))
  442. tmp = str(tmp.find_next().contents[0])
  443. # dsa_date = tmp.split()
  444. # date in datetime python format
  445. dsa_date = datetime.datetime.strptime(tmp, "%d %b %Y")
  446. if dsa_date == []:
  447. print('Unable to extract date. Raising exception...')
  448. raise NameError('DSA parsing problem')
  449. # Affected Packages -> dsa_names
  450. #print(dsa)
  451. tmp = soup.find("dt",string=re.compile("Affected Packages:"))
  452. tmp = tmp.find_next().contents
  453. #Need to check with multiple vulnerable packages
  454. for i in tmp:
  455. if (not isinstance(i, NavigableString)) and i.has_attr('href'):
  456. #greedy 'and' operation assumed
  457. unified = unifySrcName(i.string)
  458. dsa_names.append(unified)
  459. pass
  460. if dsa_names == []:
  461. print('Unable to find src package in DSA. unnamed package...')
  462. dsa_names.append('unnamed')
  463. print('Unnamed dsa:' + str(dsa) + '\n')
  464. # Security database references (CVEs) -> dsa_CVEs
  465. tmp = soup.find("dt", string=re.compile("Security database references:"))
  466. tmp = tmp.find_next().descendants
  467. for i in tmp:
  468. if (not isinstance(i, NavigableString)) and i.has_attr('href'):
  469. #don't count bug database
  470. if not re.compile("^Bug*").match(i.string):
  471. dsa_CVEs.append(i.string)
  472. return (dsa_names, dsa_date, dsa_CVEs)
  473. #dsa = fetchDSA(3200,'https://www.debian.org/security/')
  474. #dsatable = checkDSAs(state,config)
  475. #print(dsatable[3701])
  476. #parseDSAhtml(dsatable[3498])
  477. #checkDSAs(state,config)