123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510 |
- #!/usr/bin/python3
- ###############################################################################
- ##
- ## Functions for downloading and parsing Debian Security Advisories (DSAs)
- ##
- ###############################################################################
- import re
- import datetime
- from html.parser import HTMLParser
- from bs4 import BeautifulSoup
- from bs4 import NavigableString
- from pymongo import MongoClient
- import urllib.request
- import logging, sys
- logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
- #Testing global variables
- config = dict([('dsa_base_url','https://www.debian.org/security/')])
- state = dict([('next_adv',3496)])
- dsatable = dict()
- # Track renamed packages here, easy but manual. We should look into ways
- # to automate this
- def unifySrcName(name):
- ## TODO: it should map to the most recent version, not unversioned
- ## TODO: we can partially automate this..
- ## -> make all lower-case
- ## -> replace -X.Y version numbers by highest encounter(?)
- ## -> handle special cases like xfree86
- # Have to go over this again!
- # Maybe have a file with all these things??
- newname = name
- if re.compile("proftp-dfsg").match(name):
- newname = "proftp"
- elif re.compile("mozilla-firefox").match(name):
- newname = "firefox-esr"
- elif re.compile("iceweasel").match(name):
- newname = "firefox-esr"
- elif re.compile("firefox-esr").match(name):
- newname = "firefox-esr"
- elif re.compile("mozilla").match(name):
- newname = "firefox-esr"
- elif re.compile("mozilla-thunderbird").match(name):
- newname = "icedove"
- elif re.compile("squid3").match(name):
- newname = "squid"
- elif re.compile("squid/squid3").match(name):
- newname = "squid"
- elif re.compile("tk8.3").match(name):
- newname = "tk8.4"
- elif re.compile("tk8.4").match(name):
- newname = "tk8.4"
- elif re.compile("xpdf-i").match(name):
- newname = "xpdf"
- elif re.compile("zope2.10/zope2.9").match(name):
- newname = "zope2.7"
- elif re.compile("zope-cmfplone").match(name):
- newname = "zope2.7"
- elif re.compile("zope-ldapuserfolder").match(name):
- newname = "zope2.7"
- elif re.compile("librmagick-ruby").match(name):
- newname = "ruby-rmagick"
- elif re.compile("libcompass-ruby").match(name):
- newname = "ruby-compass"
- elif re.compile("bio-ruby").match(name):
- newname = "ruby-bio"
- elif re.compile("request-tracker3.4").match(name):
- newname = "request-tracker3.8"
- elif re.compile("request-tracker3.6").match(name):
- newname = "request-tracker3.8"
- elif re.compile("perl-5.005").match(name):
- newname = "perl"
- elif re.compile("otrs2").match(name):
- newname = "otrs"
- elif re.compile("openldap2.3").match(name):
- newname = "openldap"
- elif re.compile("openldap2").match(name):
- newname = "openldap"
- elif re.compile("libreoffice").match(name):
- newname = "openoffice.org"
- elif re.compile("nsd3").match(name):
- newname = "nsd"
- elif re.compile("network-manager/network-manager-applet").match(name):
- newname = "network-manager"
- elif re.compile("nagios3").match(name):
- newname = "nagios"
- elif re.compile("nagios2").match(name):
- newname = "nagios"
- elif re.compile("mysql*").match(name):
- newname = "mysql"
- elif re.compile("linux-2.6*").match(name):
- newname = "linux"
- elif re.compile("linux-kernel-alpha").match(name):
- newname = "linux"
- elif re.compile("linux-kernel-i386").match(name):
- newname = "linux"
- elif re.compile("libmusicbrainz-2.0").match(name):
- newname = "libmusicbrainz3"
- elif re.compile("libmusicbrainz-2.1").match(name):
- newname = "libmusicbrainz3"
- elif re.compile("libgtop1").match(name):
- newname = "libgtop2"
- elif re.compile("libgd1").match(name):
- newname = "libgd2"
- elif re.compile("libast1").match(name):
- newname = "libast"
- elif re.compile("libmozjs0d").match(name):
- newname = "libast"
- elif re.compile("^kernel-source*").match(name):
- newname = "linux"
- elif re.compile("^kernel-patch*").match(name):
- newname = "linux"
- # elif re.compile("kernel").match(name):
- # newname = "linux-2.4"
- elif re.compile("^kernel-source-2.4.*").match(name):
- newname = "linux"
- elif re.compile("^kernel-image-2.2.*").match(name):
- newname = "linux"
- elif re.compile("^kernel-image*").match(name):
- newname = "linux"
- elif re.compile("^kernel-patch-*").match(name):
- newname = "linux"
- elif re.compile("kernel-patch-benh").match(name):
- newname = "linux"
- elif re.compile("kernel-patch-vserver").match(name):
- newname = "linux"
- elif re.compile("^kernel-source*").match(name):
- newname = "linux"
- elif re.compile("gnutls11").match(name):
- newname = "gnutls26"
- elif re.compile("gnutls13").match(name):
- newname = "gnutls26"
- elif re.compile("gallery2").match(name):
- newname = "gallery"
- elif re.compile("firebird*").match(name):
- newname = "firebird"
- elif re.compile("fltk1.1").match(name):
- newname = "fltk1.3"
- elif re.compile("fox1.4").match(name):
- newname = "fox1.6"
- elif re.compile("exim-tls").match(name):
- newname = "exim"
- elif re.compile("exim4").match(name):
- newname = "exim"
- elif re.compile("epic4").match(name):
- newname = "epic"
- elif re.compile("drupal6").match(name):
- newname = "drupal"
- elif re.compile("dhcp").match(name):
- newname = "dhcp3"
- elif re.compile("cyrus-sasl").match(name):
- newname = "cyrus-sasl2"
- elif re.compile("^cyrus-imapd.*").match(name):
- newname = "cyrus-imapd"
- elif re.compile("^kolab-cyrus-imapd.*").match(name):
- newname = "cyrus-imapd"
- elif re.compile("cfengine").match(name):
- newname = "cfengine2"
- elif re.compile("bind").match(name):
- newname = "bind9"
- elif re.compile("apache").match(name):
- newname = "apache2"
- elif re.compile("horde2").match(name):
- newname = "horde3"
- elif re.compile("mediawiki1.7").match(name):
- newname = "mediawiki"
- elif re.compile("ffmpeg-debian").match(name):
- newname = "ffmpeg"
- elif re.compile("xserver-xorg").match(name):
- newname = "xorg-server"
- elif re.compile("xfree86-1").match(name):
- newname = "xorg-server"
- elif re.compile("xfree86v3").match(name):
- newname = "xorg-server"
- elif re.compile("xfree86").match(name):
- newname = "xorg-server"
- elif re.compile("xfree86").match(name):
- newname = "xorg-server"
- elif re.compile("xorg").match(name):
- newname = "xorg-server"
- elif re.compile("typo3").match(name):
- newname = "typo3-src"
- elif re.compile("lvm10").match(name):
- newname = "lvm2"
- elif re.compile("cupsys").match(name):
- newname = "cups"
- elif re.compile("ethereal").match(name):
- newname = "wireshark"
- elif re.compile("libboost1.42").match(name):
- newname = "libboost1.46"
- elif re.compile("cinelerra-cv").match(name):
- newname = "cinelerra"
- elif re.compile("mplayer-dmo").match(name):
- newname = "mplayer"
- elif re.compile("libcap").match(name):
- newname = "libgda2"
- elif re.compile("xkb-data-legacy").match(name):
- newname = "xkeyboard-config"
- elif re.compile("boost-defaults").match(name):
- newname = "boost"
- elif re.compile("xen-3").match(name):
- newname = "xen"
- elif re.compile("kde-icons-gorilla").match(name):
- newname = "kde-icons-korilla"
- elif re.compile("kde4libs").match(name):
- newname = "kdelibs"
- elif re.compile("libcgi-application-extra-plugin-bundle-perl").match(name):
- newname = "libcgi-application-plugins-perl"
- elif re.compile("^openssl*").match(name):
- newname = "openssl"
- elif re.compile("^tomcat\d").match(name):
- newname = "tomcat7"
- elif re.compile("^tomcat\d.\d$").match(name):
- newname = "tomcat7"
- elif re.compile("^libgda\d").match(name):
- newname = "libgda4"
- elif re.compile("^readline\d").match(name):
- newname = "readline6"
- elif re.compile("^libwnck\d").match(name):
- newname = "libwnck"
- elif re.compile("^xerces-c\d").match(name):
- newname = "xerces-c"
- elif re.compile("^libticalcs\d").match(name):
- newname = "libticals"
- elif re.compile("^libtifiles\d").match(name):
- newname = "libtifiles"
- elif re.compile("^db\d.\d$").match(name):
- newname = "db4.8"
- elif re.compile("^gcc-.*").match(name):
- newname = "gcc"
- elif re.compile("^automake\d+.*").match(name):
- newname = "automake"
- elif re.compile("^sun-java\d").match(name):
- newname = "sun-java6"
- elif re.compile("^open-jdk\d").match(name):
- newname = "open-jdk7"
- elif re.compile("^mbrola-es\d").match(name):
- newname = "mbrola-es"
- elif re.compile("^mgltools-.*").match(name):
- newname = "mgltools"
- elif re.compile("^coin\d$").match(name):
- newname = "coin"
- elif re.compile("^adobereader-\.*").match(name):
- newname = "adobereader"
- elif re.compile("^picon-\.*").match(name):
- newname = "picon"
- elif re.compile("^nvidia-graphics-drivers\.*").match(name):
- newname = "nvidia-graphics-drivers"
- elif re.compile("^boost\d\.\d\d").match(name):
- newname = "boost"
- elif re.compile("^llvm-\d.\d").match(name):
- newname = "llvm"
- elif re.compile("^octave\d.\d").match(name):
- newname = "octave"
- elif re.compile("^libjibx\d.\d-java").match(name):
- newname = "libjibx-java"
- elif re.compile("^emacs2\d").match(name):
- newname = "emacs23"
- elif re.compile("^emacs2\d-non-dfsg").match(name):
- newname = "emacs23"
- elif re.compile("^libupnp\d").match(name):
- newname = "libupnp"
- elif re.compile("^python\d.\d").match(name):
- newname = "python3.2"
- elif re.compile("^postgresql-\d.\d").match(name):
- newname = "postgresql-9.0"
- elif re.compile("^ruby\d.\d").match(name):
- newname = "ruby1.9"
- elif re.compile("^php\d").match(name):
- newname = "php5"
- elif re.compile("^PHP\d").match(name):
- newname = "php5"
- elif re.compile("^openjdk*").match(name):
- newname = "openjdk"
-
- return newname
- ###############################################################################
- ## Should this advisory be skipped?+
- def blacklistedDSA(dsa_id):
- dsa_blacklist = ["DSA-1975", "DSA-2360", "DSA-2134", "DSA-3043", "DSA-3156"]
- if dsa_id in dsa_blacklist:
- return True
- else:
- return False
- ###############################################################################
- ## Static map to correct errors in DSAs
- ## Return fixed list of CVE IDs or 0 to skip DSA
- ## This code is still experimental
- def fixDSAquirks(dsa_id, dsa_state):
- new_names = dsa_state[0]
- new_date = dsa_state[1]
- new_cves = dsa_state[2]
- if dsa_id == "085":
- new_cves = ["CVE-2001-1562", "LOCAL-03/04/05", "LOCAL-08/24/08"]
- elif dsa_id == "745":
- newcves = ["CVE-2005-1921", "CVE-2005-2106", "CVE-2005-1921"]
- elif dsa_id == "1095":
- new_cves = ["CVE-2006-0747", "CVE-2006-1861", "CVE-2006-2661"]
- elif dsa_id == "1284":
- new_cves = ["CVE-2007-1320", "CVE-2007-1321", "CVE-2007-1322", "CVE-2007-2893", "CVE-2007-1366"]
- elif dsa_id == "1502":
- new_cves = ["CVE-2007-2821", "CVE-2007-3238", "CVE-2008-0193", "CVE-2008-0194"]
- elif dsa_id == "1706":
- new_cves = ["CVE-2009-0135", "CVE-2009-0136"]
- elif dsa_id == "1757":
- new_cves = ["CVE-2007-2383", "CVE-2008-7220", "CVE-2009-1208"]
- elif dsa_id == "1896":
- new_cves = ["CVE-2009-3474", "CVE-2009-3475", "CVE-2009-3476"]
- elif dsa_id == "1931":
- new_cves = ["CVE-2009-0689", "CVE-2009-2463"]
- elif dsa_id == "1989":
- new_cves = ["CVE-2010-0789"]
- elif dsa_id == "1941":
- new_cves = ["CVE-2009-0755", "CVE-2009-3903", "CVE-2009-3904", "CVE-2009-3905", "CVE-2009-3606", "CVE-2009-3607", "CVE-2009-3608", "CVE-2009-3909", "CVE-2009-3938"]
- elif dsa_id == "2004":
- new_cves = ["CVE-2010-0787", "CVE-2010-0547"]
- elif dsa_id == "2008":
- new_cves = ["LOCAL-02/23/10", "LOCAL-02/23/10", "LOCAL-02/23/10", "LOCAL-02/23/10"]
- elif dsa_id == "2043":
- new_cves = ["CVE-2010-2062"]
- elif dsa_id == "2044":
- new_cves = ["CVE-2010-2062"]
- elif dsa_id == "2056":
- new_cves = ["CVE-2010-2155", "CVE-2009-4882"]
- elif dsa_id == "2092":
- new_cves = ["CVE-2010-1625", "CVE-2010-1448", "CVE-2009-4497"]
- elif dsa_id == "2098":
- new_cves = ["CVE-2010-3659", "CVE-2010-3660", "CVE-2010-3661", "CVE-2010-3662", "CVE-2010-3663", "CVE-2010-3664", "CVE-2010-3665", "CVE-2010-3666", "CVE-2010-3667", "CVE-2010-3668", "CVE-2010-3669", "CVE-2010-3670", "CVE-2010-3671", "CVE-2010-3672", "CVE-2010-3673", "CVE-2010-3674"]
- elif dsa_id == "2103":
- new_cves = ["CVE-2010-3076"]
- elif dsa_id == "2218":
- new_cves = ["CVE-2011-1684"]
- elif dsa_id == "2229":
- new_cves = ["CVE-2005-4494", "CVE-2006-0517", "CVE-2006-0518", "CVE-2006-0519", "CVE-2006-0625", "CVE-2006-0626", "CVE-2006-1295", "CVE-2006-1702", "CVE-2007-4525", "CVE-2008-5812", "CVE-2008-5813", "CVE-2009-3041"]
- elif dsa_id == "2261":
- new_cves = ["CVE-2009-4078", "CVE-2009-4079", "CVE-2009-4059", "LOCAL-12/30/10", "LOCAL-12/30/10"]
- elif dsa_id == "2262":
- new_cves = ["LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-05/18/11", "LOCAL-05/18/11"]
- elif dsa_id == "2286":
- new_names = ["phpmyadmin"]
- elif dsa_id == "2289":
- new_cves = ["LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11"]
- return (new_names, new_date, new_cves)
- ###############################################################################
- ## Fetch DSA from debian archive. Can't use tracker since dates are missing.
- ## DSA started counting in November 2000. We'll simply bruteforce which DSA
- ## was in which year and start in 2000 til current.
- def fetchDSA(dsa_id, base_url):
- year = 2000
- now = datetime.datetime.now()
- current_year = now.year
-
- logging.info('Fetching DSA-%d records\n', dsa_id)
- if dsa_id >= 3431:
- year = 2016
- elif dsa_id >= 3118:
- year = 2015
- elif dsa_id >= 2832:
- year = 2014
- elif dsa_id >= 2597:
- year = 2013
- elif dsa_id >= 2377:
- year = 2012
- elif dsa_id >= 2140:
- year = 2011
- elif dsa_id >= 1965:
- year = 2010
- elif dsa_id >= 1694:
- year = 2009
- elif dsa_id >= 1443:
- year = 2008
- elif dsa_id >= 1245:
- year = 2007
- elif dsa_id >= 929:
- year = 2006
- elif dsa_id >= 622:
- year = 2005
- elif dsa_id >= 406:
- year = 2004
- elif dsa_id >= 220:
- year = 2003
- elif dsa_id >= 96:
- year = 2002
- elif dsa_id >= 11:
- year = 2001
- dsa_id2string = '%03d' % dsa_id
- flag = True
- while flag:
- try:
- flag = False
- logging.info('Opening url: ' + base_url + str(year) + '/dsa-' + dsa_id2string + '\n')
- req = urllib.request.urlopen(base_url + str(year) + '/dsa-' + dsa_id2string)
- charset = req.info().get_content_charset()
- if charset is None:
- charset = 'utf-8'
- dsa = req.read().decode(charset)
- return dsa
- except urllib.error.HTTPError as err:
- if year < current_year:
- year += 1
- flag = True
- else:
- dsa = ''
- return dsa
- ###############################################################################
- ## Try to find new DSAs by iteration, return table of DSAs to process
- def checkDSAs(state, config):
- next_dsa = int(state['next_adv'])
- #state implemented as dictionary
- base_url = config['URL']['dsa_base_url']
- logging.info('Checking for new DSAs.. \n')
- if next_dsa < int(config['DSA']['first_dsa']):
- logging.debug('Cache was deleted, starting at DSA ' + str(next_dsa) + '\n')
- next_dsa = int(config['DSA']['first_dsa'])
- next_dsa2string = '%03d' % next_dsa
- if blacklistedDSA('DSA-' + next_dsa2string):
- next_dsa += 1
-
- #print(config)
- dsa = fetchDSA(next_dsa, base_url)
-
- while dsa != '':
- logging.debug('Got DSA-' + str(next_dsa) + '\n')
- soup = BeautifulSoup(dsa,'html.parser')
- #crop the DSA from unecessary weight
- dsa = soup.find(id="content")
- if dsa == '':
- raise NameError('html file format unexpected')
- dsatable[next_dsa] = str(dsa)
- next_dsa += 1
- if blacklistedDSA('DSA-' + str(next_dsa)):
- next_dsa += 1
- dsa = fetchDSA(next_dsa, base_url)
- state['next_adv'] = next_dsa
- return dsatable
- ###############################################################################
- ## Parse DSA html data and return array
- ## (src-pkg-name date (CVE-id)*)
- def parseDSAhtml(dsa):
-
- dsa_date = []
- dsa_names = []
- dsa_CVEs = []
- # Date Reported -> dsa_date
- soup = BeautifulSoup(dsa, 'html.parser')
- tmp = soup.find("dt",string=re.compile(".*Date Repo.*:"))
- tmp = str(tmp.find_next().contents[0])
- # dsa_date = tmp.split()
- # date in datetime python format
- dsa_date = datetime.datetime.strptime(tmp, "%d %b %Y")
- if dsa_date == []:
- print('Unable to extract date. Raising exception...')
- raise NameError('DSA parsing problem')
- # Affected Packages -> dsa_names
- #print(dsa)
- tmp = soup.find("dt",string=re.compile("Affected Packages:"))
- tmp = tmp.find_next().contents
- #Need to check with multiple vulnerable packages
- for i in tmp:
- if (not isinstance(i, NavigableString)) and i.has_attr('href'):
- #greedy 'and' operation assumed
- unified = unifySrcName(i.string)
- dsa_names.append(unified)
- pass
- if dsa_names == []:
- print('Unable to find src package in DSA. unnamed package...')
- dsa_names.append('unnamed')
- print('Unnamed dsa:' + str(dsa) + '\n')
- # Security database references (CVEs) -> dsa_CVEs
- tmp = soup.find("dt", string=re.compile("Security database references:"))
- tmp = tmp.find_next().descendants
- for i in tmp:
- if (not isinstance(i, NavigableString)) and i.has_attr('href'):
- #don't count bug database
- if not re.compile("^Bug*").match(i.string):
- dsa_CVEs.append(i.string)
- return (dsa_names, dsa_date, dsa_CVEs)
- #dsa = fetchDSA(3200,'https://www.debian.org/security/')
- #dsatable = checkDSAs(state,config)
- #print(dsatable[3701])
- #parseDSAhtml(dsatable[3498])
- #checkDSAs(state,config)
|