|
@@ -9,11 +9,18 @@
|
|
|
|
|
|
import re
|
|
|
import datetime
|
|
|
+from html.parser import HTMLParser
|
|
|
+from bs4 import BeautifulSoup
|
|
|
import urllib.request
|
|
|
import logging, sys
|
|
|
|
|
|
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
|
|
|
|
|
|
+#Testing global variables
|
|
|
+config = dict([('dsa_base_url','https://www.debian.org/security/')])
|
|
|
+state = dict([('next_adv',3700)])
|
|
|
+dsatable = dict()
|
|
|
+
|
|
|
## Fetch DSA from debian archive. Can't use tracker since dates are missing.
|
|
|
## DSA started counting in November 2000. We'll simply bruteforce which DSA
|
|
|
## was in which year and start in 2000 til current.
|
|
@@ -64,14 +71,19 @@ def fetchDSA(dsa_id, base_url):
|
|
|
try:
|
|
|
flag = False
|
|
|
logging.info('Opening url: ' + base_url + str(year) + '/dsa-' + dsa_id2string + '\n')
|
|
|
- dsa = urllib.request.urlopen(base_url + str(year) + '/dsa-' + dsa_id2string).read()
|
|
|
+ req = urllib.request.urlopen(base_url + str(year) + '/dsa-' + dsa_id2string)
|
|
|
+ charset = req.info().get_content_charset()
|
|
|
+ if charset is None:
|
|
|
+ charset = 'utf-8'
|
|
|
+ dsa = req.read().decode(charset)
|
|
|
return dsa
|
|
|
except urllib.error.HTTPError as err:
|
|
|
if year < current_year:
|
|
|
year += 1
|
|
|
flag = True
|
|
|
else:
|
|
|
- pass
|
|
|
+ dsa = ''
|
|
|
+ return dsa
|
|
|
###############################################################################
|
|
|
|
|
|
## Try to find new DSAs by iteration, return table of DSAs to process
|
|
@@ -81,15 +93,69 @@ def checkDSAs(state, config):
|
|
|
|
|
|
logging.info('Checking for new DSAs.. \n')
|
|
|
|
|
|
- if next_dsa < config['first_dsa']:
|
|
|
- logging.debug('Cache was deleted, starting at DSA ' + str(next_dsa) + '\n')
|
|
|
- next_dsa = config['first_dsa']
|
|
|
+# if next_dsa < config['first_dsa']:
|
|
|
+# logging.debug('Cache was deleted, starting at DSA ' + str(next_dsa) + '\n')
|
|
|
+# next_dsa = config['first_dsa']
|
|
|
+
|
|
|
+# if blacklistedDSA('DSA-' + str(next_dsa)):
|
|
|
+# next_dsa += 1
|
|
|
|
|
|
- if blacklistedDSA('DSA-' + str(next_dsa)):
|
|
|
+ dsa = fetchDSA(next_dsa, config['dsa_base_url'])
|
|
|
+
|
|
|
+ while dsa != '':
|
|
|
+ logging.debug('Got DSA-' + str(next_dsa) + '\n')
|
|
|
+ soup = BeautifulSoup(dsa,'html.parser')
|
|
|
+ #crop the DSA from unecessary weight
|
|
|
+ dsa = soup.find(id="content")
|
|
|
+ if dsa == '':
|
|
|
+ raise NameError('html file format unexpected')
|
|
|
+ dsatable[next_dsa] = str(dsa)
|
|
|
next_dsa += 1
|
|
|
+# if blacklistedDSA('DSA-' + str(next_dsa)):
|
|
|
+# next_dsa += 1
|
|
|
+ dsa = fetchDSA(next_dsa, config['dsa_base_url'])
|
|
|
+
|
|
|
+ state['next_dsa'] = next_dsa
|
|
|
+ return dsatable
|
|
|
+
|
|
|
+###############################################################################
|
|
|
+
|
|
|
+
|
|
|
+## Parse DSA html data and return array
|
|
|
+## (src-pkg-name date (CVE-id)*)
|
|
|
+
|
|
|
+def parseDSAhtml(dsa):
|
|
|
+
|
|
|
+ # Date Reported -> dsa_date
|
|
|
+ soup = BeautifulSoup(dsa, 'html.parser')
|
|
|
+ tmp = soup.find("dt",string=re.compile(".*Date Repo.*:"))
|
|
|
+ tmp = str(tmp.find_next().contents[0])
|
|
|
+ dsa_date = tmp.split()
|
|
|
+ if dsa_date == []:
|
|
|
+ print('Unable to extract date. Returning...')
|
|
|
+ raise NameError('file format problem')
|
|
|
+
|
|
|
+ # Affected Packages -> dsa_names
|
|
|
+ #print(dsa)
|
|
|
+ tmp = soup.find("dt",string=re.compile("Affected Packages:"))
|
|
|
+ tmp = tmp.find_next().contents
|
|
|
+
|
|
|
+ for i in tmp:
|
|
|
+ print(i)
|
|
|
+
|
|
|
+# m = re.search('<dt>Affected\ Packages:<\/dt>(.*)<\/dd>.*<dt>Vulnerable:',dsa)
|
|
|
+# if m:
|
|
|
+# print(len(m.group))
|
|
|
+# tmpstring = m.group(1)
|
|
|
+# else:
|
|
|
+# print('Unable to extract affected packages. Returning...')
|
|
|
+# raise NameError('html file format unexpected')
|
|
|
+
|
|
|
|
|
|
- dsa = fetchDSA(next_dsa, config['dsa_base_url']
|
|
|
- while dsa
|
|
|
|
|
|
|
|
|
-fetchDSA(3200,'https://www.debian.org/security/')
|
|
|
+#dsa = fetchDSA(3200,'https://www.debian.org/security/')
|
|
|
+dsatable = checkDSAs(state,config)
|
|
|
+#print(dsatable[3701])
|
|
|
+parseDSAhtml(dsatable[3701])
|
|
|
+#checkDSAs(state,config)
|