PcapFile.py 3.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. import hashlib
  2. import os.path
  3. import ID2TLib.libpcapreader as pr
  4. import ID2TLib.Utility as Util
  5. class PcapFile(object):
  6. def __init__(self, pcap_file_path: str):
  7. """
  8. Creates a new PcapFile associated to the PCAP file at pcap_file_path.
  9. :param pcap_file_path: The path to the PCAP file
  10. """
  11. self.pcap_file_path = pcap_file_path
  12. def merge_attack(self, attack_pcap_path: str):
  13. """
  14. Merges the loaded PCAP with the PCAP at attack_pcap_path.
  15. :param attack_pcap_path: The path to the PCAP file to merge with the PCAP at pcap_file_path
  16. :return: The file path of the resulting PCAP file
  17. """
  18. pcap = pr.pcap_processor(self.pcap_file_path, "False")
  19. file_out_path = pcap.merge_pcaps(attack_pcap_path)
  20. return file_out_path
  21. def get_file_hash(self):
  22. """
  23. Returns the hash for the loaded PCAP file. The hash is calculated based on:
  24. - the file size in bytes
  25. - the first 224*40000 bytes of the file
  26. :return: The hash for the PCAP file as string.
  27. """
  28. # Blocksize in bytes
  29. const_blocksize = 224
  30. # Number of blocks to read at beginning of file
  31. const_max_blocks_read = 40000
  32. # Initialize required variables
  33. hasher = hashlib.sha224()
  34. blocks_read = 0
  35. # Hash calculation
  36. with open(self.pcap_file_path, 'rb') as afile:
  37. # Add filename -> makes trouble when renaming the PCAP
  38. # hasher.update(afile.name.encode('utf-8'))
  39. # Add file's last modification date -> makes trouble when copying the PCAP
  40. # hasher.update(str(time.ctime(os.path.getmtime(self.pcap_file_path))).encode('utf-8'))
  41. # Add file size
  42. hasher.update(str(os.path.getsize(self.pcap_file_path)).encode('utf-8'))
  43. # Add max. first 40000 * 224 bytes = 8,5 MB of file
  44. buf = afile.read(const_blocksize)
  45. blocks_read += 1
  46. while len(buf) > 0 and blocks_read < const_max_blocks_read:
  47. hasher.update(buf)
  48. buf = afile.read(const_blocksize)
  49. blocks_read += 1
  50. return hasher.hexdigest()
  51. def get_db_path(self, root_directory: str = os.path.join(Util.CACHE_DIR, 'db')):
  52. """
  53. Creates a path based on a hashed directory structure. Derives a hash code by the file's hash and derives
  54. thereof the database path.
  55. Code and idea based on:
  56. http://michaelandrews.typepad.com/the_technical_times/2009/10/creating-a-hashed-directory-structure.html
  57. :param root_directory: The root directory of the hashed directory structure (optional)
  58. :return: The full path to the database file
  59. """
  60. def hashcode(string_in: str):
  61. """
  62. Creates a hashcode of a string, based on Java's hashcode implementation.
  63. Code based on: http://garage.pimentech.net/libcommonPython_src_python_libcommon_javastringhashcode/
  64. :param string_in: The string the hashcode should be calculated from
  65. :return: The hashcode as string
  66. """
  67. h = 0
  68. for c in string_in:
  69. h = (31 * h + ord(c)) & 0xFFFFFFFF
  70. return ((h + 0x80000000) & 0xFFFFFFFF) - 0x80000000
  71. file_hash = self.get_file_hash()
  72. hashcode = hashcode(file_hash)
  73. mask = 255
  74. dir_first_level = hashcode & mask
  75. dir_second_level = (hashcode >> 8) & mask
  76. return os.path.join(root_directory, str(dir_first_level), str(dir_second_level), file_hash[0:12] + ".sqlite3")