LabelManager.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. import importlib
  2. import datetime as dt
  3. import os.path
  4. import xml.dom.minidom as minidom
  5. import pytz as pytz
  6. import ID2TLib.Label as Label
  7. import ID2TLib.TestLibrary as Lib
  8. class LabelManager:
  9. TAG_ROOT = 'labels'
  10. TAG_INPUT = 'input'
  11. TAG_OUTPUT = 'output'
  12. TAG_FILE_NAME = 'filename'
  13. TAG_FILE_HASH = 'sha256'
  14. TAG_ATTACK = 'attack'
  15. TAG_ATTACK_NAME = 'name'
  16. TAG_ATTACK_NOTE = 'note'
  17. TAG_ATTACK_SEED = 'seed'
  18. TAG_ATTACK_PACKETS = 'injected_packets'
  19. TAG_TIMESTAMP_START = 'timestamp_start'
  20. TAG_TIMESTAMP_END = 'timestamp_end'
  21. TAG_TIMESTAMP = 'timestamp'
  22. TAG_TIMESTAMP_HR = 'timestamp_hr'
  23. TAG_PARAMETERS = 'parameters'
  24. ATTR_VERSION = 'version_parser'
  25. ATTR_PARAM_USERSPECIFIED = 'user_specified'
  26. # update this attribute if XML scheme was modified
  27. ATTR_VERSION_VALUE = '0.3'
  28. def __init__(self, filepath_pcap=None):
  29. """
  30. Creates a new LabelManager for managing the attack's labels.
  31. :param filepath_pcap: The path to the PCAP file associated to the labels.
  32. """
  33. self.labels = list()
  34. self.filepath_input_pcap = filepath_pcap
  35. if filepath_pcap is not None:
  36. self.label_file_path = os.path.splitext(filepath_pcap)[0] + '_labels.xml'
  37. # only load labels if label file is existing
  38. if os.path.exists(self.label_file_path):
  39. self.load_labels()
  40. def add_labels(self, labels):
  41. """
  42. Adds a label to the internal list of labels.
  43. :param labels: The labels to be added
  44. """
  45. if isinstance(labels, list):
  46. self.labels = self.labels + [labels]
  47. elif isinstance(labels, tuple):
  48. for l in labels:
  49. self.labels.append(l)
  50. else:
  51. self.labels.append(labels)
  52. # sorts the labels ascending by their timestamp
  53. self.labels.sort()
  54. def write_label_file(self, filepath=None):
  55. """
  56. Writes previously added/loaded labels to a XML file. Uses the given filepath as destination path, if no path is
  57. given, uses the path in label_file_path.
  58. :param filepath: The path where the label file should be written to.
  59. """
  60. def get_subtree_fileinfo(xml_tag_root, filename) -> minidom.Element:
  61. """
  62. Creates the subtree for pcap file information (filename and hash).
  63. :return: The root node of the XML subtree
  64. """
  65. input_root = doc.createElement(xml_tag_root)
  66. file = doc.createElement(self.TAG_FILE_NAME)
  67. file.appendChild(doc.createTextNode(os.path.split(filename)[-1]))
  68. input_root.appendChild(file)
  69. hash_node = doc.createElement(self.TAG_FILE_HASH)
  70. hash_node.appendChild(doc.createTextNode(Lib.get_sha256(filename)))
  71. input_root.appendChild(hash_node)
  72. return input_root
  73. def get_subtree_timestamp(xml_tag_root, timestamp_entry):
  74. """
  75. Creates the subtree for a given timestamp, consisting of the unix time format (seconds) and a human-readable
  76. output.
  77. :param xml_tag_root: The tag name for the root of the subtree
  78. :param timestamp_entry: The timestamp as unix time
  79. :return: The root node of the XML subtree
  80. """
  81. timestamp_root = doc.createElement(xml_tag_root)
  82. # add timestamp in unix format
  83. timestamp = doc.createElement(self.TAG_TIMESTAMP)
  84. timestamp.appendChild(doc.createTextNode(str(timestamp_entry)))
  85. timestamp_root.appendChild(timestamp)
  86. # add timestamp in human-readable format
  87. timestamp_hr = doc.createElement(self.TAG_TIMESTAMP_HR)
  88. timestamp_hr_text = dt.datetime.fromtimestamp(timestamp_entry).astimezone(pytz.timezone('UTC')).strftime('%Y-%m-%d %H:%M:%S.%f')
  89. timestamp_hr.appendChild(doc.createTextNode(timestamp_hr_text))
  90. timestamp_root.appendChild(timestamp_hr)
  91. return timestamp_root
  92. def get_subtree_parameters(parameters):
  93. """
  94. Creates a subtree containing all parameters used to construct the attack
  95. :param parameters: The list of parameters used to run the attack
  96. :return: The root node of the XML subtree
  97. """
  98. parameters_root = doc.createElement(self.TAG_PARAMETERS)
  99. for param_key, param_value in parameters.items():
  100. param = doc.createElement(param_key.value)
  101. param.appendChild(doc.createTextNode(str(param_value.value)))
  102. param.setAttribute(self.ATTR_PARAM_USERSPECIFIED, str(param_value.user_specified))
  103. parameters_root.appendChild(param)
  104. return parameters_root
  105. if filepath is not None:
  106. self.label_file_path = os.path.splitext(filepath)[0] + '_labels.xml'
  107. # Generate XML
  108. doc = minidom.Document()
  109. node = doc.createElement(self.TAG_ROOT)
  110. node.setAttribute(self.ATTR_VERSION, self.ATTR_VERSION_VALUE)
  111. node.appendChild(get_subtree_fileinfo(self.TAG_INPUT, self.filepath_input_pcap))
  112. node.appendChild(get_subtree_fileinfo(self.TAG_OUTPUT, filepath))
  113. for label in self.labels:
  114. xml_tree = doc.createElement(self.TAG_ATTACK)
  115. # add attack to XML tree
  116. attack_name = doc.createElement(self.TAG_ATTACK_NAME)
  117. attack_name.appendChild(doc.createTextNode(str(label.attack_name)))
  118. xml_tree.appendChild(attack_name)
  119. attack_note = doc.createElement(self.TAG_ATTACK_NOTE)
  120. attack_note.appendChild(doc.createTextNode(str(label.attack_note)))
  121. xml_tree.appendChild(attack_note)
  122. attack_seed = doc.createElement(self.TAG_ATTACK_SEED)
  123. attack_seed.appendChild(doc.createTextNode(str(label.seed)))
  124. xml_tree.appendChild(attack_seed)
  125. injected_packets = doc.createElement(self.TAG_ATTACK_PACKETS)
  126. injected_packets.appendChild(doc.createTextNode(str(label.injected_packets)))
  127. xml_tree.appendChild(injected_packets)
  128. # add timestamp_start to XML tree
  129. xml_tree.appendChild(get_subtree_timestamp(self.TAG_TIMESTAMP_START, label.timestamp_start))
  130. # add timestamp_end to XML tree
  131. xml_tree.appendChild(get_subtree_timestamp(self.TAG_TIMESTAMP_END, label.timestamp_end))
  132. # add parameters to XML tree
  133. xml_tree.appendChild(get_subtree_parameters(label.parameters))
  134. node.appendChild(xml_tree)
  135. doc.appendChild(node)
  136. # Write XML to file
  137. file = open(self.label_file_path, 'w')
  138. file.write(doc.toprettyxml())
  139. file.close()
  140. def load_labels(self):
  141. """
  142. Loads the labels from an already existing label XML file located at label_file_path (set by constructor).
  143. """
  144. def get_value_from_node(node, tag_name, *child_number):
  145. """
  146. Returns the value located in the tag specified by tag_name from a given node. Walks therefor the
  147. node's children along as indicated by child_number, e.g., childNumber = (1,2,) first goes to the 1st child,
  148. and then to the 2nd child of the first child -> elem.childNodes[1].childNodes[2].
  149. """
  150. elem = node.getElementsByTagName(tag_name)
  151. if len(elem) == 1:
  152. elem = elem[0]
  153. for c in child_number:
  154. if len(elem.childNodes) > 0:
  155. elem = elem.childNodes[c]
  156. else:
  157. return ""
  158. return elem.data
  159. else:
  160. return ""
  161. print("Label file found. Loading labels...")
  162. try:
  163. dom = minidom.parse(self.label_file_path)
  164. except Exception:
  165. # TODO: more specific exception
  166. print('ERROR: Provided label file could not be parsed. Ignoring label file')
  167. return
  168. # Check if version of parser and version of file match
  169. version = dom.getElementsByTagName(self.TAG_ROOT)
  170. if len(version) > 0:
  171. version = version[0].getAttribute(self.ATTR_VERSION)
  172. if version == [] or not version == self.ATTR_VERSION_VALUE:
  173. print(
  174. "The file " + self.label_file_path + " was created by another version of ID2TLib.LabelManager. "
  175. "Ignoring label file.")
  176. self.input_filename = get_value_from_node(dom, self.TAG_INPUT, 1, 0)
  177. self.input_hash = get_value_from_node(dom, self.TAG_INPUT, 3, 0)
  178. self.output_filename = get_value_from_node(dom, self.TAG_OUTPUT, 1, 0)
  179. self.output_hash = get_value_from_node(dom, self.TAG_OUTPUT, 3, 0)
  180. # Parse attacks from XML file
  181. attacks = dom.getElementsByTagName(self.TAG_ATTACK)
  182. count_labels = 0
  183. for a in attacks:
  184. attack_name = get_value_from_node(a, self.TAG_ATTACK_NAME, 0)
  185. attack_note = get_value_from_node(a, self.TAG_ATTACK_NOTE, 0)
  186. timestamp_start = get_value_from_node(a, self.TAG_TIMESTAMP_START, 1, 0)
  187. timestamp_end = get_value_from_node(a, self.TAG_TIMESTAMP_END, 1, 0)
  188. attack_seed = get_value_from_node(a, self.TAG_ATTACK_SEED, 0)
  189. # Instantiate this attack to create a parameter list with the correct types
  190. attack_module = importlib.import_module("Attack." + attack_name)
  191. attack_class = getattr(attack_module, attack_name)
  192. attack = attack_class()
  193. # Loop through all parameters listed in the XML file
  194. param = a.getElementsByTagName(self.TAG_PARAMETERS)[0]
  195. for param in param.childNodes:
  196. # Skip empty text nodes returned by minidom
  197. if not isinstance(param, minidom.Text):
  198. import distutils.util
  199. param_name = param.tagName
  200. param_value = param.childNodes[0].nodeValue
  201. param_userspecified = bool(distutils.util.strtobool(param.getAttribute(self.ATTR_PARAM_USERSPECIFIED)))
  202. attack.add_param_value(param_name, param_value, param_userspecified)
  203. # Create the label from the data read
  204. label = Label.Label(attack_name, float(timestamp_start), float(timestamp_end), attack_seed, attack.params,
  205. attack_note)
  206. self.labels.append(label)
  207. count_labels += 1
  208. print("Read " + str(count_labels) + " label(s) successfully.")