UnsupervisedAnomalyDetectionExample.java 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. import java.util.ArrayList;
  2. import java.util.HashMap;
  3. import java.util.Iterator;
  4. import java.util.LinkedList;
  5. import java.util.Map.Entry;
  6. import de.tu_darmstadt.tk.SmartHomeNetworkSim.core.Link;
  7. import de.tu_darmstadt.tk.SmartHomeNetworkSim.core.Packet;
  8. import de.tu_darmstadt.tk.SmartHomeNetworkSim.core.PacketSniffer;
  9. import weka.clusterers.SimpleKMeans;
  10. import weka.core.Attribute;
  11. import weka.core.DenseInstance;
  12. import weka.core.Instance;
  13. import weka.core.Instances;
  14. /**
  15. * Unsupervised Example - maybe Clustering
  16. *
  17. * @author Andreas T. Meyer-Berg
  18. */
  19. public class UnsupervisedAnomalyDetectionExample implements PacketSniffer {
  20. /**
  21. * Clusterer
  22. */
  23. private SimpleKMeans clusterer;
  24. /**
  25. * True, if instances should be used for training
  26. */
  27. private boolean training = true;
  28. /**
  29. * Attributes which should be taken into account
  30. */
  31. private ArrayList<Attribute> atts = new ArrayList<Attribute>();
  32. /**
  33. * Collected Packets
  34. */
  35. private Instances dataset;
  36. /**
  37. * HashMap for calculating transmission delay
  38. */
  39. private HashMap<Link, LinkedList<Packet>> lastPackets = new HashMap<Link, LinkedList<Packet>>();
  40. /**
  41. * Number of Clusters
  42. */
  43. private int NUMBER_OF_CLUSTERS = 2;
  44. /**
  45. * Number of packets used for number of packets per second
  46. */
  47. private int NUMBER_OF_PACKETS = 30;
  48. /**
  49. *
  50. */
  51. private HashMap<String,Integer> link_mappings = new HashMap<String, Integer>();
  52. private HashMap<String,Integer> source_mappings = new HashMap<String, Integer>();
  53. private HashMap<String,Integer> destination_mappings = new HashMap<String, Integer>();
  54. private HashMap<String,Integer> protocol_mappings = new HashMap<String, Integer>();
  55. /**
  56. *
  57. */
  58. public UnsupervisedAnomalyDetectionExample() {
  59. // Initialize Attribute list
  60. link_mappings.put("unknown", 0);
  61. atts.add(new Attribute("Link-Name", false));//TODO:??
  62. source_mappings.put("unknown", 0);
  63. atts.add(new Attribute("Source-Device", false));
  64. atts.add(new Attribute("Source-Port-number", false));
  65. destination_mappings.put("unknown", 0);
  66. atts.add(new Attribute("Destination-Device", false));
  67. atts.add(new Attribute("Destination-Port-number", false));
  68. protocol_mappings.put("unknown", 0);
  69. atts.add(new Attribute("Protocol-name", false));
  70. atts.add(new Attribute("Packets-per-second", false));
  71. // Initialize data set
  72. dataset = new Instances("Packets", atts, 100000);
  73. // Initialize Clusterer
  74. clusterer = new SimpleKMeans();
  75. clusterer.setSeed(42);
  76. try {
  77. clusterer.setNumClusters(NUMBER_OF_CLUSTERS);
  78. } catch (Exception e) {
  79. System.out.println("Error while building cluster");
  80. e.printStackTrace();
  81. }
  82. }
  83. @Override
  84. public void processPackets(HashMap<Link, LinkedList<Packet>> packets) {
  85. if(!packets.entrySet().isEmpty() && packets.entrySet().iterator().next().getValue().getFirst().getTimestamp()>10000) {
  86. training = false;
  87. // Build Clusterer
  88. try {
  89. finishDataCollection();
  90. } catch (Exception e) {
  91. System.out.println("Clustering failed");
  92. e.printStackTrace();
  93. }
  94. }
  95. if(training)
  96. try {
  97. training(packets);
  98. } catch (Exception e) {
  99. e.printStackTrace();
  100. }
  101. else
  102. classify(packets);
  103. }
  104. /**
  105. * Estimates the current Packets per second (depending on the last 100 packets of the link)
  106. * @param link Link which should be checked
  107. * @param packet Packet which should investigated
  108. * @return estimated number of packets per second
  109. */
  110. private double getEstimatedPacketsPerSecond(Link link, Packet packet) {
  111. /**
  112. * Packets used to calculated the packets per second
  113. */
  114. LinkedList<Packet> list = lastPackets.get(link);
  115. if(list == null) {
  116. /**
  117. * Add list if not present
  118. */
  119. list = new LinkedList<Packet>();
  120. lastPackets.put(link, list);
  121. }
  122. if(list.isEmpty()) {
  123. list.addLast(packet);
  124. // Default 1 packet per second
  125. return 1.0;
  126. }
  127. if(list.size() == NUMBER_OF_PACKETS){
  128. list.removeFirst();
  129. }
  130. list.addLast(packet);
  131. /**
  132. * elapsed time in milliseconds since last packet
  133. */
  134. long elapsed_time = packet.getTimestamp()-list.getFirst().getTimestamp()/list.size();
  135. if(elapsed_time<=0)
  136. return Double.POSITIVE_INFINITY;
  137. /**
  138. * Return number of packets per second
  139. */
  140. return 1000.0/elapsed_time;
  141. }
  142. /**
  143. * Returns the instance representation of the given packet and link
  144. * @param link link the packet was sent on
  145. * @param packet packet which should be transformed
  146. * @param dataset distribution the packet is part of
  147. * @return instance representation
  148. */
  149. private Instance packet2Instance(Link link, Packet packet, Instances dataset) {
  150. /**
  151. * Instance for the given Packet
  152. */
  153. DenseInstance instance = new DenseInstance(dataset.numAttributes());
  154. instance.setDataset(dataset);
  155. // link
  156. instance.setValue(0, link == null ? 0 : stringToNumber(link_mappings, link.getName()));
  157. // source
  158. if(packet.getSource()==null) {
  159. instance.setValue(1, 0);
  160. instance.setValue(2, Double.NEGATIVE_INFINITY);
  161. }else if(packet.getSource().getOwner()==null){
  162. instance.setValue(1, 0);
  163. instance.setValue(2, packet.getSource().getPortNumber());
  164. }else {
  165. instance.setValue(1, stringToNumber(source_mappings, packet.getSource().getOwner().getName()));
  166. instance.setValue(2, packet.getSource().getPortNumber());
  167. }
  168. // Destination
  169. if(packet.getDestination()==null) {
  170. instance.setValue(3, 0);
  171. instance.setValue(4, Double.NEGATIVE_INFINITY);
  172. }else if(packet.getDestination().getOwner()==null){
  173. instance.setValue(3, 0);
  174. instance.setValue(4, packet.getDestination().getPortNumber());
  175. }else {
  176. instance.setValue(3, stringToNumber(destination_mappings, packet.getDestination().getOwner().getName()));
  177. instance.setValue(4, packet.getDestination().getPortNumber());
  178. }
  179. // Protocol name
  180. instance.setValue(5, stringToNumber(protocol_mappings, packet.getProtocolName()));
  181. // Packets per second
  182. instance.setValue(6, getEstimatedPacketsPerSecond(link, packet));
  183. return instance;
  184. }
  185. /**
  186. * Transforms the String into an Number
  187. * @param map
  188. * @param s
  189. * @return
  190. */
  191. double stringToNumber(HashMap<String, Integer> map, String s) {
  192. Integer i = map.get(s);
  193. if(i == null) {
  194. int size = map.size();
  195. map.put(s, size);
  196. return size;
  197. }else {
  198. return i;
  199. }
  200. }
  201. /**
  202. * Train the clusterer by collecting the packets
  203. *
  204. * @param packets packets to be learned
  205. */
  206. private void training(HashMap<Link, LinkedList<Packet>> packets) {
  207. for (Iterator<Entry<Link, LinkedList<Packet>>> it = packets.entrySet().iterator(); it.hasNext();) {
  208. Entry<Link, LinkedList<Packet>> entry = it.next();
  209. /**
  210. * Link the packet was captured on
  211. */
  212. Link l = entry.getKey();
  213. for (Iterator<Packet> itPacket = entry.getValue().iterator(); itPacket.hasNext();) {
  214. /**
  215. * Packets to be added to the dataset
  216. */
  217. Packet packet = (Packet) itPacket.next();
  218. dataset.add(packet2Instance(l, packet, dataset));
  219. }
  220. }
  221. }
  222. /**
  223. * Finishes the collection and trains the clusterer on the collected packets
  224. *
  225. * @throws Exception
  226. */
  227. private void finishDataCollection() throws Exception{
  228. /**
  229. * Build the clusterer for the given dataset
  230. */
  231. clusterer.buildClusterer(dataset);
  232. }
  233. /**
  234. * Try to classify the given packets and detect anomalies
  235. * @param packets packets to be classified
  236. */
  237. private void classify(HashMap<Link, LinkedList<Packet>> packets) {
  238. for (Iterator<Entry<Link, LinkedList<Packet>>> it = packets.entrySet().iterator(); it.hasNext();) {
  239. /**
  240. * Link & its packets
  241. */
  242. Entry<Link, LinkedList<Packet>> entry = it.next();
  243. /**
  244. * Link the packets were captured on
  245. */
  246. Link l = entry.getKey();
  247. for (Iterator<Packet> itPacket = entry.getValue().iterator(); itPacket.hasNext();) {
  248. /**
  249. * Packet which should be checked
  250. */
  251. Packet packet = (Packet) itPacket.next();
  252. /**
  253. * Instance Representation
  254. */
  255. Instance packet_instance = packet2Instance(l, packet, dataset);
  256. try {
  257. /**
  258. * Try to classify (find appropriate cluster)
  259. */
  260. clusterer.clusterInstance(packet_instance);
  261. } catch (Exception e) {
  262. /**
  263. * Anomaly found
  264. */
  265. System.out.println("Anomaly: "+packet.getTextualRepresentation());
  266. //e.printStackTrace();
  267. }
  268. }
  269. }
  270. }
  271. }