DLAmine.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. #!/usr/bin/python3
  2. from DebianAdvisory import DebianAdvisory as da
  3. import re
  4. import datetime
  5. from html.parser import HTMLParser
  6. from bs4 import BeautifulSoup
  7. from bs4 import NavigableString
  8. from pymongo import MongoClient
  9. import urllib.request
  10. import logging, sys
  11. import pickle
  12. import json
  13. def getDLAs():
  14. src2monthDLAs = dict()
  15. totalDLAs = dict()
  16. totalDLAsList = []
  17. base_url = 'https://lists.debian.org/debian-lts-announce/'
  18. logging.info('Checking for DLAs...\n')
  19. dlas = []
  20. more = True
  21. i = 0
  22. j = 0
  23. for year in range(2014,2019):
  24. for month in range(1,13):
  25. totalDLAs[str(year) + ',' + str(month)] = []
  26. i = 0
  27. while more:
  28. try:
  29. url = base_url + str(year) + '/' + str(month).zfill(2) + '/msg' + str(i).zfill(5) + '.html'
  30. print('Opening url: ' + url + '\n')
  31. req = urllib.request.urlopen(url)
  32. charset = req.info().get_content_charset()
  33. if charset is None:
  34. charset = 'utf-8'
  35. dla = req.read().decode(charset)
  36. dlas.append([dla, year, month])
  37. p1 = re.compile('Package.*: .*')
  38. p2 = re.compile('CVE-[0-9]{4}-[0-9]{4}')
  39. (pkg, cves) = parseDLAhtml(dla, p1, p2)
  40. pkg = fixURL(url, pkg)
  41. if pkg:
  42. src2monthDLAs[pkg] = (cves, [year,month])
  43. totalDLAs[str(year) + ',' + str(month)] += cves
  44. except urllib.error.HTTPError as err:
  45. if (i>1):
  46. break
  47. i+=1
  48. print(totalDLAs[str(year) + ',' + str(month)])
  49. totalDLAs[str(year) + ',' + str(month)] = list(set(totalDLAs[str(year) + ',' + str(month)]))
  50. totalDLAsList.append(len(totalDLAs[str(year) + ',' + str(month)]))
  51. j += 1
  52. print(totalDLAs)
  53. print(totalDLAsList)
  54. with open("DLA_sum.txt","wb") as fp:
  55. pickle.dump(totalDLAsList,fp)
  56. with open("src2month_DLA.txt","w") as fp:
  57. json.dump(src2monthDLAs,fp)
  58. return(totalDLAsList)
  59. def parseDLAhtml(dla, p1, p2):
  60. pkg = re.search(p1, dla)
  61. if pkg:
  62. print(pkg.group(0))
  63. pkg = pkg.group(0).split(':',1)[1].strip()
  64. print(pkg)
  65. else:
  66. print(dla)
  67. p2 = re.compile('CVE-[0-9]{4}-[0-9]{4}')
  68. cves = re.findall(p2, dla)
  69. cves = list(set(cves))
  70. return (pkg, cves)
  71. def fixURL(url, pkg):
  72. if (url=='https://lists.debian.org/debian-lts-announce/2016/10/msg00011.html'):
  73. return 'mpg123'
  74. elif (url=='https://lists.debian.org/debian-lts-announce/2016/05/msg00037.html'):
  75. return 'graphicsmagick'
  76. else:
  77. return pkg
  78. if __name__== "__main__":
  79. dlas = getDLAs()
  80. #saveDLAs(dlas)