LabelManager.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201
  1. import os.path
  2. from datetime import datetime
  3. from xml.dom.minidom import *
  4. from functools import total_ordering
  5. @total_ordering
  6. class Label:
  7. def __init__(self, attack_name, timestamp_start, timestamp_end, attack_note=""):
  8. """
  9. Creates a new attack label
  10. :param attack_name: The name of the associated attack
  11. :param timestamp_start: The timestamp as unix time of the first attack packet
  12. :param timestamp_end: The timestamp as unix time of the last attack packet
  13. :param attack_note: A note associated to the attack (optional)
  14. """
  15. self.attack_name = attack_name
  16. self.timestamp_start = timestamp_start
  17. self.timestamp_end = timestamp_end
  18. self.attack_note = attack_note
  19. def __eq__(self, other):
  20. return self.timestamp == other.timestamp
  21. def __lt__(self, other):
  22. return self.timestamp_start < other.timestamp_start
  23. def __gt__(self, other):
  24. return self.timestamp_start > other.timestamp_start
  25. def __str__(self):
  26. return ''.join(
  27. ['(', self.attack_name, ',', self.attack_note, ',', str(self.timestamp_start), ',', str(self.timestamp_end),
  28. ')'])
  29. class LabelManager:
  30. TAG_ROOT = 'LABELS'
  31. TAG_ATTACK = 'attack'
  32. TAG_ATTACK_NAME = 'attack_name'
  33. TAG_ATTACK_NOTE = 'attack_note'
  34. TAG_TIMESTAMP_START = 'timestamp_start'
  35. TAG_TIMESTAMP_END = 'timestamp_end'
  36. TAG_TIMESTAMP = 'timestamp'
  37. TAG_TIMESTAMP_HR = 'timestamp_hr'
  38. ATTR_VERSION = 'version_parser'
  39. # update this attribute if XML scheme was modified
  40. ATTR_VERSION_VALUE = '0.2'
  41. def __init__(self, filepath_pcap=None):
  42. """
  43. Creates a new LabelManager for managing the attack's labels.
  44. :param filepath_pcap: The path to the PCAP file associated to the labels.
  45. """
  46. self.labels = list()
  47. if filepath_pcap is not None:
  48. # splitext gives us the filename without extension
  49. self.label_file_path = os.path.splitext(filepath_pcap)[0] + '_labels.xml'
  50. # only load labels if label file is existing
  51. if os.path.exists(self.label_file_path):
  52. self.load_labels()
  53. def add_labels(self, labels):
  54. """
  55. Adds a label to the internal list of labels.
  56. :param labels: The labels to be added
  57. """
  58. if isinstance(labels, list):
  59. self.labels = self.labels + [labels]
  60. elif isinstance(labels, tuple):
  61. for l in labels:
  62. self.labels.append(l)
  63. else:
  64. self.labels.append(labels)
  65. # sorts the labels ascending by their timestamp
  66. self.labels.sort()
  67. def write_label_file(self, filepath=None):
  68. """
  69. Writes previously added/loaded labels to a XML file. Uses the given filepath as destination path, if no path is
  70. given, uses the path in label_file_path.
  71. :param filepath: The path where the label file should be written to.
  72. """
  73. def get_subtree_timestamp(xml_tag_root, timestamp_entry):
  74. """
  75. Creates the subtree for a given timestamp, consisting of the unix time format (seconds) and a human-readable
  76. output.
  77. :param xml_tag_root: The tag name for the root of the subtree
  78. :param timestamp_entry: The timestamp as unix time
  79. :return: The root node of the XML subtree
  80. """
  81. timestamp_root = doc.createElement(xml_tag_root)
  82. # add timestamp in unix format
  83. timestamp = doc.createElement(self.TAG_TIMESTAMP)
  84. timestamp.appendChild(doc.createTextNode(str(timestamp_entry)))
  85. timestamp_root.appendChild(timestamp)
  86. # add timestamp in human-readable format
  87. timestamp_hr = doc.createElement(self.TAG_TIMESTAMP_HR)
  88. timestamp_hr_text = datetime.fromtimestamp(timestamp_entry).strftime('%Y-%m-%d %H:%M:%S.%f')
  89. timestamp_hr.appendChild(doc.createTextNode(timestamp_hr_text))
  90. timestamp_root.appendChild(timestamp_hr)
  91. return timestamp_root
  92. if filepath is not None:
  93. self.label_file_path = os.path.splitext(filepath)[0] + '_labels.xml' # splitext removes the file extension
  94. # Generate XML
  95. doc = Document()
  96. node = doc.createElement(self.TAG_ROOT)
  97. node.setAttribute(self.ATTR_VERSION, self.ATTR_VERSION_VALUE)
  98. for label in self.labels:
  99. xml_tree = doc.createElement(self.TAG_ATTACK)
  100. # add attack to XML tree
  101. attack_name = doc.createElement(self.TAG_ATTACK_NAME)
  102. attack_name.appendChild(doc.createTextNode(str(label.attack_name)))
  103. xml_tree.appendChild(attack_name)
  104. attack_note = doc.createElement(self.TAG_ATTACK_NOTE)
  105. attack_note.appendChild(doc.createTextNode(str(label.attack_note)))
  106. xml_tree.appendChild(attack_note)
  107. # add timestamp_start to XML tree
  108. xml_tree.appendChild(get_subtree_timestamp(self.TAG_TIMESTAMP_START, label.timestamp_start))
  109. # add timestamp_end to XML tree
  110. xml_tree.appendChild(get_subtree_timestamp(self.TAG_TIMESTAMP_END, label.timestamp_end))
  111. node.appendChild(xml_tree)
  112. doc.appendChild(node)
  113. # Write XML to file
  114. file = open(self.label_file_path, 'w')
  115. file.write(doc.toprettyxml())
  116. file.close()
  117. def load_labels(self):
  118. """
  119. Loads the labels from an already existing label XML file located at label_file_path (set by constructor).
  120. """
  121. def get_value_from_node(node, tag_name, *child_number):
  122. """
  123. Returns the value located in the tag specified by tag_name from a given node. Walks therefor the
  124. node's children along as indicated by child_number, e.g., childNumber = (1,2,) first goes to the 1st child, and
  125. then to the 2nd child of the first child -> elem.childNodes[1].childNodes[2].
  126. """
  127. elem = node.getElementsByTagName(tag_name)
  128. if len(elem) == 1:
  129. elem = elem[0]
  130. for c in child_number:
  131. if len(elem.childNodes) > 0:
  132. elem = elem.childNodes[c]
  133. else:
  134. return ""
  135. return elem.data
  136. else:
  137. return ""
  138. print("Label file found. Loading labels...")
  139. try:
  140. dom = parse(self.label_file_path)
  141. except Exception:
  142. print('ERROR: Provided label file could not be parsed. Ignoring label file')
  143. return
  144. # Check if version of parser and version of file match
  145. version = dom.getElementsByTagName(self.TAG_ROOT)
  146. if len(version) > 0:
  147. version = version[0].getAttribute(self.ATTR_VERSION)
  148. if version == [] or not version == self.ATTR_VERSION_VALUE:
  149. print(
  150. "The file " + self.label_file_path + " was created by another version of ID2TLib.LabelManager. Ignoring label file.")
  151. # Parse attacks from XML file
  152. attacks = dom.getElementsByTagName(self.TAG_ATTACK)
  153. count_labels = 0
  154. for a in attacks:
  155. attack_name = get_value_from_node(a, self.TAG_ATTACK_NAME, 0)
  156. attack_note = get_value_from_node(a, self.TAG_ATTACK_NOTE, 0)
  157. timestamp_start = get_value_from_node(a, self.TAG_TIMESTAMP_START, 1, 0)
  158. timestamp_end = get_value_from_node(a, self.TAG_TIMESTAMP_END, 1, 0)
  159. label = Label(attack_name, float(timestamp_start), float(timestamp_end), attack_note)
  160. self.labels.append(label)
  161. count_labels += 1
  162. print("Read " + str(count_labels) + " label(s) successfully.")