DLAmine.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. #!/usr/bin/python3
  2. import debian_advisory as da
  3. import re
  4. import datetime
  5. from html.parser import HTMLParser
  6. from bs4 import BeautifulSoup
  7. from bs4 import NavigableString
  8. from pymongo import MongoClient
  9. import urllib.request
  10. import logging, sys
  11. import pickle
  12. def getDLAs():
  13. src2monthDLAs = dict()
  14. totalDLAs = dict()
  15. totalDLAsList = []
  16. base_url = 'https://lists.debian.org/debian-lts-announce/'
  17. logging.info('Checking for DLAs...\n')
  18. dlas = []
  19. more = True
  20. i = 0
  21. j = 0
  22. for year in range(2014,2018):
  23. for month in range(1,13):
  24. totalDLAs[str(year) + ',' + str(month)] = []
  25. i = 0
  26. while more:
  27. try:
  28. url = base_url + str(year) + '/' + str(month).zfill(2) + '/msg' + str(i).zfill(5) + '.html'
  29. print('Opening url: ' + url + '\n')
  30. req = urllib.request.urlopen(url)
  31. charset = req.info().get_content_charset()
  32. if charset is None:
  33. charset = 'utf-8'
  34. dla = req.read().decode(charset)
  35. dlas.append([dla, year, month])
  36. p1 = re.compile('Package.*: .*')
  37. p2 = re.compile('CVE-[0-9]{4}-[0-9]{4}')
  38. (pkg, cves) = parseDLAhtml(dla, p1, p2)
  39. pkg = fixURL(url, pkg)
  40. if pkg:
  41. src2monthDLAs[pkg] = (cves, [year,month])
  42. totalDLAs[str(year) + ',' + str(month)] += cves
  43. except urllib.error.HTTPError as err:
  44. if (i>1):
  45. break
  46. i+=1
  47. print(totalDLAs[str(year) + ',' + str(month)])
  48. totalDLAs[str(year) + ',' + str(month)] = list(set(totalDLAs[str(year) + ',' + str(month)]))
  49. totalDLAsList.append(len(totalDLAs[str(year) + ',' + str(month)]))
  50. j += 1
  51. print(totalDLAs)
  52. print(totalDLAsList)
  53. with open("DLA_sum.txt","wb") as fp:
  54. pickle.dump(totalDLAsList,fp)
  55. return(totalDLAsList)
  56. def parseDLAhtml(dla, p1, p2):
  57. pkg = re.search(p1, dla)
  58. if pkg:
  59. print(pkg.group(0))
  60. pkg = pkg.group(0).split(':',1)[1].strip()
  61. print(pkg)
  62. else:
  63. print(dla)
  64. p2 = re.compile('CVE-[0-9]{4}-[0-9]{4}')
  65. cves = re.findall(p2, dla)
  66. cves = list(set(cves))
  67. return (pkg, cves)
  68. def fixURL(url, pkg):
  69. if (url=='https://lists.debian.org/debian-lts-announce/2016/10/msg00011.html'):
  70. return 'mpg123'
  71. elif (url=='https://lists.debian.org/debian-lts-announce/2016/05/msg00037.html'):
  72. return 'graphicsmagick'
  73. else:
  74. return pkg
  75. if __name__== "__main__":
  76. dlas = getDLAs()
  77. #saveDLAs(dlas)