#!/usr/bin/python3 ############################################################################### ## ## Functions for downloading and parsing Debian Security Advisories (DSAs) ## ############################################################################### import re import datetime from html.parser import HTMLParser from bs4 import BeautifulSoup from bs4 import NavigableString from pymongo import MongoClient import urllib.request import logging, sys logging.basicConfig(stream=sys.stderr, level=logging.DEBUG) #Testing global variables config = dict([('dsa_base_url','https://www.debian.org/security/')]) state = dict([('next_adv',3496)]) dsatable = dict() # Track renamed packages here, easy but manual. We should look into ways # to automate this def unifySrcName(name): ## TODO: it should map to the most recent version, not unversioned ## TODO: we can partially automate this.. ## -> make all lower-case ## -> replace -X.Y version numbers by highest encounter(?) ## -> handle special cases like xfree86 # Have to go over this again! # Maybe have a file with all these things?? newname = name if re.compile("mozilla-firefox").match(name): newname = "firefox-esr" elif re.compile("iceweasel").match(name): newname = "firefox-esr" elif re.compile("mozilla").match(name): newname = "firefox-esr" elif re.compile("mozilla-thunderbird").match(name): newname = "icedove" elif re.compile("thunderbird").match(name): newname = "icedove" elif re.compile("squid\d").match(name): newname = "squid3" elif re.compile("squid").match(name): newname = "squid3" elif re.compile("tk8.*").match(name): newname = "tk8" elif re.compile("xpdf-i").match(name): newname = "xpdf" elif re.compile("zope*").match(name): newname = "zope-common" elif re.compile("librmagick-ruby").match(name): newname = "ruby-rmagick" elif re.compile("libcompass-ruby").match(name): newname = "ruby-compass" elif re.compile("bio-ruby").match(name): newname = "ruby-bio" elif re.compile("request-tracker*").match(name): newname = "request-tracker4" elif re.compile("perl-5.005").match(name): newname = "perl" elif re.compile("openldap*").match(name): newname = "openldap" elif re.compile("openoffice*").match(name): newname = "libreoffice" elif re.compile("nsd3").match(name): newname = "nsd" elif re.compile("network-manager/network-manager-applet").match(name): newname = "network-manager" elif re.compile("nagios3").match(name): newname = "nagios" elif re.compile("nagios2").match(name): newname = "nagios" elif re.compile("^mysql-\d*").match(name): newname = "mysql-transitional" elif re.compile("linux-2.6*").match(name): newname = "linux" elif re.compile("linux-kernel-alpha").match(name): newname = "linux" elif re.compile("linux-kernel-i386").match(name): newname = "linux" elif re.compile("libmusicbrainz*").match(name): newname = "libmusicbrainz" elif re.compile("libgtop1").match(name): newname = "libgtop2" elif re.compile("libgd1").match(name): newname = "libgd2" elif re.compile("libast*").match(name): newname = "libast2" elif re.compile("libmozjs0d").match(name): newname = "libast" elif re.compile("^kernel-source*").match(name): newname = "linux" elif re.compile("^kernel-patch*").match(name): newname = "linux" # elif re.compile("kernel").match(name): # newname = "linux-2.4" elif re.compile("^kernel-source-2.4.*").match(name): newname = "linux" elif re.compile("^kernel-image-2.2.*").match(name): newname = "linux" elif re.compile("^kernel-image*").match(name): newname = "linux" elif re.compile("^kernel-patch-*").match(name): newname = "linux" elif re.compile("kernel-patch-benh").match(name): newname = "linux" elif re.compile("kernel-patch-vserver").match(name): newname = "linux" elif re.compile("^kernel-source*").match(name): newname = "linux" elif re.compile("gnutls*").match(name): newname = "gnutls28" elif re.compile("gallery2").match(name): newname = "gallery" elif re.compile("firebird*").match(name): newname = "firebird3.0" elif re.compile("fltk1.1").match(name): newname = "fltk1.3" elif re.compile("fox1.4").match(name): newname = "fox1.6" elif re.compile("exim-tls").match(name): newname = "exim4" elif re.compile("epic4").match(name): newname = "epic" elif re.compile("drupal\d").match(name): newname = "drupal7" elif re.compile("dhcp").match(name): newname = "dhcpcd5" elif re.compile("cyrus-sasl").match(name): newname = "cyrus-sasl2" elif re.compile("^cyrus-imapd.*").match(name): newname = "cyrus-imapd" elif re.compile("^kolab-cyrus-imapd.*").match(name): newname = "cyrus-imapd" elif re.compile("cfengine").match(name): newname = "cfengine2" elif re.compile("bind").match(name): newname = "bind9" elif re.compile("apache").match(name): newname = "apache2" elif re.compile("horde\d").match(name): newname = "php-horde" elif re.compile("mediawiki*").match(name): newname = "mediawiki" elif re.compile("ffmpeg-debian").match(name): newname = "ffmpeg" elif re.compile("xserver-xorg").match(name): newname = "xorg-server" elif re.compile("xfree86-1").match(name): newname = "xorg-server" elif re.compile("xfree86v3").match(name): newname = "xorg-server" elif re.compile("xfree86").match(name): newname = "xorg-server" elif re.compile("xfree86").match(name): newname = "xorg-server" elif re.compile("xorg").match(name): newname = "xorg-server" elif re.compile("typo3").match(name): newname = "typo3-src" elif re.compile("lvm10").match(name): newname = "lvm2" elif re.compile("cupsys").match(name): newname = "cups" elif re.compile("ethereal").match(name): newname = "wireshark" elif re.compile("libboost1.42").match(name): newname = "libboost" elif re.compile("cinelerra-cv").match(name): newname = "cinelerra" elif re.compile("mplayer-dmo").match(name): newname = "mplayer" elif re.compile("libcap").match(name): newname = "libgda2" elif re.compile("xkb-data-legacy").match(name): newname = "xkeyboard-config" elif re.compile("boost-defaults").match(name): newname = "boost" elif re.compile("xen-3").match(name): newname = "xen" elif re.compile("xen-utils").match(name): newname = "xen" elif re.compile("kde-icons-gorilla").match(name): newname = "kde-icons-korilla" elif re.compile("libcgi-application-extra-plugin-bundle-perl").match(name): newname = "libcgi-application-plugins-perl" elif re.compile("^openssl*").match(name): newname = "openssl" elif re.compile("^tomcat\d").match(name): newname = "tomcat8" elif re.compile("^tomcat\d.\d$").match(name): newname = "tomcat8" elif re.compile("^libgda\d").match(name): newname = "libgda" elif re.compile("^readline\d").match(name): newname = "readline6" elif re.compile("^libwnck\d").match(name): newname = "libwnck" elif re.compile("^xerces-c\d").match(name): newname = "xerces-c" elif re.compile("kde-icons-gorilla").match(name): newname = "kde-icons-korilla" elif re.compile("kde4libs").match(name): newname = "kdelibs" elif re.compile("libcgi-application-extra-plugin-bundle-perl").match(name): newname = "libcgi-application-plugins-perl" elif re.compile("^libticalcs\d").match(name): newname = "libticals" elif re.compile("^libtifiles\d").match(name): newname = "libtifiles" elif re.compile("^db\d.\d$").match(name): newname = "db4.8" elif re.compile("^gcc-.*").match(name): newname = "gcc" elif re.compile("^automake\d+.*").match(name): newname = "automake" elif re.compile("^sun-java\d").match(name): newname = "sun-java6" elif re.compile("^open-jdk\d").match(name): newname = "open-jdk7" elif re.compile("^mbrola-es\d").match(name): newname = "mbrola-es" elif re.compile("^mgltools-.*").match(name): newname = "mgltools" elif re.compile("^coin\d$").match(name): newname = "coin" elif re.compile("^adobereader-\.*").match(name): newname = "adobereader" elif re.compile("^picon-\.*").match(name): newname = "picon" elif re.compile("^nvidia-graphics-drivers\.*").match(name): newname = "nvidia-graphics-drivers" elif re.compile("^boost\d\.\d\d").match(name): newname = "boost" elif re.compile("^llvm-\d.\d").match(name): newname = "llvm" elif re.compile("^octave\d.\d").match(name): newname = "octave" elif re.compile("^libjibx\d.\d-java").match(name): newname = "libjibx-java" elif re.compile("^emacs2\d").match(name): newname = "emacs2" elif re.compile("^emacs2\d-non-dfsg").match(name): newname = "emacs2" elif re.compile("^libupnp\d").match(name): newname = "libupnp" elif re.compile("^python\d.\d").match(name): newname = "python3.5" elif re.compile("^python\d").match(name): newname = "python3.5" elif re.compile("^postgresql-\d*").match(name): newname = "postgresql-9.6" elif re.compile("^ruby\d.\d").match(name): newname = "ruby2.3" elif re.compile("^ruby").match(name): newname = "ruby2.3" #elif re.compile("^php\d").match(name): # newname = "php7.0" #elif re.compile("^PHP\d").match(name): # newname = "php7.0" #elif re.compile("^openjdk*").match(name): # newname = "openjdk-8" elif re.compile("^mariadb-10.*").match(name): newname = "mariadb-10.1" elif re.compile("^ruby-actionpack*").match(name): newname = "rails" elif re.compile("^ruby-activerecord*").match(name): newname = "rails" elif re.compile("^librack-ruby").match(name): newname = "ruby-rack" elif re.compile("^libopenssl-ruby").match(name): newname = "ruby-defaults" elif re.compile("krb4").match(name): newname = "krb5" elif re.compile("ssh-krb5").match(name): newname = "openssh" elif re.compile("ssh").match(name): newname = "openssh" elif re.compile("qemu-kvm").match(name): newname = "qemu" elif re.compile("kvm").match(name): newname = "qemu" elif re.compile("phpbb2").match(name): newname = "phpbb3" elif re.compile("libpng").match(name): newname = "libpng1.6" elif re.compile("eglibc").match(name): newname = "glibc" elif re.compile("gnupg").match(name): newname = "gnupg2" elif re.compile("xine-lib*").match(name): newname = "xine-lib-1.2" elif re.compile("kfreebsd-\d*").match(name): newname = "kfreebsd-10" elif re.compile("pdfkit*").match(name): newname = "pdfkit" elif re.compile("gforge").match(name): newname = "fusionforge" return newname ############################################################################### ## Should this advisory be skipped?+ def blacklistedDSA(dsa_id): dsa_blacklist = ["DSA-1975", "DSA-2360", "DSA-2134", "DSA-3043", "DSA-3156"] if dsa_id in dsa_blacklist: return True else: return False ############################################################################### ## Static map to correct errors in DSAs ## Return fixed list of CVE IDs or 0 to skip DSA ## This code is still experimental def fixDSAquirks(dsa_id, dsa_state): new_names = dsa_state[0] new_date = dsa_state[1] new_cves = dsa_state[2] print('Are you here??') if dsa_id == 85: new_cves = ["CVE-2001-1562", "LOCAL-03/04/05", "LOCAL-08/24/08"] elif dsa_id == 745: newcves = ["CVE-2005-1921", "CVE-2005-2106", "CVE-2005-1921"] elif dsa_id == 1095: new_cves = ["CVE-2006-0747", "CVE-2006-1861", "CVE-2006-2661"] elif dsa_id == 1284: new_cves = ["CVE-2007-1320", "CVE-2007-1321", "CVE-2007-1322", "CVE-2007-2893", "CVE-2007-1366"] elif dsa_id == 1502: new_cves = ["CVE-2007-2821", "CVE-2007-3238", "CVE-2008-0193", "CVE-2008-0194"] elif dsa_id == 1706: new_cves = ["CVE-2009-0135", "CVE-2009-0136"] elif dsa_id == 1757: new_cves = ["CVE-2007-2383", "CVE-2008-7220", "CVE-2009-1208"] elif dsa_id == 1896: new_cves = ["CVE-2009-3474", "CVE-2009-3475", "CVE-2009-3476"] elif dsa_id == 1931: new_cves = ["CVE-2009-0689", "CVE-2009-2463"] elif dsa_id == 1989: new_cves = ["CVE-2010-0789"] elif dsa_id == 1941: new_cves = ["CVE-2009-0755", "CVE-2009-3903", "CVE-2009-3904", "CVE-2009-3905", "CVE-2009-3606", "CVE-2009-3607", "CVE-2009-3608", "CVE-2009-3909", "CVE-2009-3938"] elif dsa_id == 2004: new_cves = ["CVE-2010-0787", "CVE-2010-0547"] elif dsa_id == 2008: new_cves = ["LOCAL-02/23/10", "LOCAL-02/23/10", "LOCAL-02/23/10", "LOCAL-02/23/10"] elif dsa_id == 2043: new_cves = ["CVE-2010-2062"] elif dsa_id == 2044: new_cves = ["CVE-2010-2062"] elif dsa_id == 2056: new_cves = ["CVE-2010-2155", "CVE-2009-4882"] elif dsa_id == 2092: new_cves = ["CVE-2010-1625", "CVE-2010-1448", "CVE-2009-4497"] elif dsa_id == 2098: new_cves = ["CVE-2010-3659", "CVE-2010-3660", "CVE-2010-3661", "CVE-2010-3662", "CVE-2010-3663", "CVE-2010-3664", "CVE-2010-3665", "CVE-2010-3666", "CVE-2010-3667", "CVE-2010-3668", "CVE-2010-3669", "CVE-2010-3670", "CVE-2010-3671", "CVE-2010-3672", "CVE-2010-3673", "CVE-2010-3674"] elif dsa_id == 2103: new_cves = ["CVE-2010-3076"] elif dsa_id == 2218: new_cves = ["CVE-2011-1684"] elif dsa_id == 2229: new_cves = ["CVE-2005-4494", "CVE-2006-0517", "CVE-2006-0518", "CVE-2006-0519", "CVE-2006-0625", "CVE-2006-0626", "CVE-2006-1295", "CVE-2006-1702", "CVE-2007-4525", "CVE-2008-5812", "CVE-2008-5813", "CVE-2009-3041"] elif dsa_id == 2261: new_cves = ["CVE-2009-4078", "CVE-2009-4079", "CVE-2009-4059", "LOCAL-12/30/10", "LOCAL-12/30/10"] elif dsa_id == 2262: new_cves = ["LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-03/01/11", "LOCAL-05/18/11", "LOCAL-05/18/11"] elif dsa_id == 2286: new_names = ["phpmyadmin"] print(str(dsa_id) + 'whatsapp??') elif dsa_id == 1977: new_names = ["python3.5"] elif (dsa_id == 47 or dsa_id == 479 or dsa_id == 480 or dsa_id == 482 or dsa_id == 489 or dsa_id == 491 or dsa_id == 495): new_names = ["linux"] print('Substitution successful') elif dsa_id == 2289: new_cves = ["LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11", "LOCAL-07/27/11"] return (new_names, new_date, new_cves) ############################################################################### ## Fetch DSA from debian archive. Can't use tracker since dates are missing. ## DSA started counting in November 2000. We'll simply bruteforce which DSA ## was in which year and start in 2000 til current. def fetchDSA(dsa_id, base_url): year = 2000 now = datetime.datetime.now() current_year = now.year logging.info('Fetching DSA-%d records\n', dsa_id) if dsa_id >= 3751: year = 2017 elif dsa_id >= 3431: year = 2016 elif dsa_id >= 3118: year = 2015 elif dsa_id >= 2832: year = 2014 elif dsa_id >= 2597: year = 2013 elif dsa_id >= 2377: year = 2012 elif dsa_id >= 2140: year = 2011 elif dsa_id >= 1965: year = 2010 elif dsa_id >= 1694: year = 2009 elif dsa_id >= 1443: year = 2008 elif dsa_id >= 1245: year = 2007 elif dsa_id >= 929: year = 2006 elif dsa_id >= 622: year = 2005 elif dsa_id >= 406: year = 2004 elif dsa_id >= 220: year = 2003 elif dsa_id >= 96: year = 2002 elif dsa_id >= 11: year = 2001 dsa_id2string = '%03d' % dsa_id flag = True while flag: try: flag = False logging.info('Opening url: ' + base_url + str(year) + '/dsa-' + dsa_id2string + '\n') req = urllib.request.urlopen(base_url + str(year) + '/dsa-' + dsa_id2string) charset = req.info().get_content_charset() if charset is None: charset = 'utf-8' dsa = req.read().decode(charset) return dsa except urllib.error.HTTPError as err: if year < current_year: year += 1 flag = True else: dsa = '' return dsa ############################################################################### ## Try to find new DSAs by iteration, return table of DSAs to process def checkDSAs(state, config): next_dsa = int(state['next_adv']) #state implemented as dictionary base_url = config['URL']['dsa_base_url'] logging.info('Checking for new DSAs.. \n') if next_dsa < int(config['DSA']['first_dsa']): logging.debug('Cache was deleted, starting at DSA ' + str(next_dsa) + '\n') next_dsa = int(config['DSA']['first_dsa']) next_dsa2string = '%03d' % next_dsa if blacklistedDSA('DSA-' + next_dsa2string): next_dsa += 1 #print(config) dsa = fetchDSA(next_dsa, base_url) while dsa != '': logging.debug('Got DSA-' + str(next_dsa) + '\n') soup = BeautifulSoup(dsa,'html.parser') #crop the DSA from unecessary weight dsa = soup.find(id="content") if dsa == '': raise NameError('html file format unexpected') dsatable[next_dsa] = str(dsa) next_dsa += 1 if blacklistedDSA('DSA-' + str(next_dsa)): next_dsa += 1 dsa = fetchDSA(next_dsa, base_url) state['next_adv'] = next_dsa return dsatable ############################################################################### ## Parse DSA html data and return array ## (src-pkg-name date (CVE-id)*) def parseDSAhtml(dsa): dsa_date = [] dsa_names = [] dsa_CVEs = [] # Date Reported -> dsa_date soup = BeautifulSoup(dsa, 'html.parser') tmp = soup.find("dt",string=re.compile(".*Date Repo.*:")) tmp = str(tmp.find_next().contents[0]) # dsa_date = tmp.split() # date in datetime python format dsa_date = datetime.datetime.strptime(tmp, "%d %b %Y") if dsa_date == []: print('Unable to extract date. Raising exception...') raise NameError('DSA parsing problem') # Affected Packages -> dsa_names #print(dsa) tmp = soup.find("dt",string=re.compile("Affected Packages:")) tmp = tmp.find_next().contents #Need to check with multiple vulnerable packages for i in tmp: if (not isinstance(i, NavigableString)) and i.has_attr('href'): #greedy 'and' operation assumed unified = unifySrcName(i.string) dsa_names.append(unified) pass if dsa_names == []: print('Unable to find src package in DSA. unnamed package...') dsa_names.append('unnamed') print('Unnamed dsa:' + str(dsa) + '\n') # Security database references (CVEs) -> dsa_CVEs tmp = soup.find("dt", string=re.compile("Security database references:")) tmp = tmp.find_next().descendants for i in tmp: if (not isinstance(i, NavigableString)) and i.has_attr('href'): #don't count bug database if not re.compile("^Bug*").match(i.string): dsa_CVEs.append(i.string) return (dsa_names, dsa_date, dsa_CVEs) #dsa = fetchDSA(3200,'https://www.debian.org/security/') #dsatable = checkDSAs(state,config) #print(dsatable[3701]) #parseDSAhtml(dsatable[3498]) #checkDSAs(state,config)