#!/usr/bin/python3 import debian_advisory as da import re import datetime from html.parser import HTMLParser from bs4 import BeautifulSoup from bs4 import NavigableString from pymongo import MongoClient import urllib.request import logging, sys import pickle def getDLAs(): src2monthDLAs = dict() totalDLAs = dict() totalDLAsList = [] base_url = 'https://lists.debian.org/debian-lts-announce/' logging.info('Checking for DLAs...\n') dlas = [] more = True i = 0 j = 0 for year in range(2014,2018): for month in range(1,13): totalDLAs[str(year) + ',' + str(month)] = [] i = 0 while more: try: url = base_url + str(year) + '/' + str(month).zfill(2) + '/msg' + str(i).zfill(5) + '.html' print('Opening url: ' + url + '\n') req = urllib.request.urlopen(url) charset = req.info().get_content_charset() if charset is None: charset = 'utf-8' dla = req.read().decode(charset) dlas.append([dla, year, month]) p1 = re.compile('Package.*: .*') p2 = re.compile('CVE-[0-9]{4}-[0-9]{4}') (pkg, cves) = parseDLAhtml(dla, p1, p2) pkg = fixURL(url, pkg) if pkg: src2monthDLAs[pkg] = (cves, [year,month]) totalDLAs[str(year) + ',' + str(month)] += cves except urllib.error.HTTPError as err: if (i>1): break i+=1 print(totalDLAs[str(year) + ',' + str(month)]) totalDLAs[str(year) + ',' + str(month)] = list(set(totalDLAs[str(year) + ',' + str(month)])) totalDLAsList.append(len(totalDLAs[str(year) + ',' + str(month)])) j += 1 print(totalDLAs) print(totalDLAsList) with open("DLA_sum.txt","wb") as fp: pickle.dump(totalDLAsList,fp) return(totalDLAsList) def parseDLAhtml(dla, p1, p2): pkg = re.search(p1, dla) if pkg: print(pkg.group(0)) pkg = pkg.group(0).split(':',1)[1].strip() print(pkg) else: print(dla) p2 = re.compile('CVE-[0-9]{4}-[0-9]{4}') cves = re.findall(p2, dla) cves = list(set(cves)) return (pkg, cves) def fixURL(url, pkg): if (url=='https://lists.debian.org/debian-lts-announce/2016/10/msg00011.html'): return 'mpg123' elif (url=='https://lists.debian.org/debian-lts-announce/2016/05/msg00037.html'): return 'graphicsmagick' else: return pkg if __name__== "__main__": dlas = getDLAs() #saveDLAs(dlas)