Browse Source

Adds abstract base classifier to simplify further development

Andreas T. Meyer-Berg 4 years ago
parent
commit
7476fc48f8

+ 2 - 1
examples/UnsupervisedAnomalyDetectionExample2.java

@@ -56,7 +56,7 @@ public class UnsupervisedAnomalyDetectionExample2 implements PacketSniffer {
 	/**
 	 * Number of Clusters
 	 */
-	private int NUMBER_OF_CLUSTERS = 30;
+	private int NUMBER_OF_CLUSTERS = 2;
 	
 	/**
 	 * Number of packets used for number of packets per second
@@ -85,6 +85,7 @@ public class UnsupervisedAnomalyDetectionExample2 implements PacketSniffer {
 		protocol_mappings.add("unknown");
 		// Initialize data set
 		// Initialize Clusterer
+
 		clusterer = new SimpleKMeans();
 		clusterer.setSeed(42);
 		try {

+ 326 - 0
examples/classifier/BasicPacketClassifier.java

@@ -0,0 +1,326 @@
+package classifier;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.Map.Entry;
+
+import de.tu_darmstadt.tk.SmartHomeNetworkSim.core.Link;
+import de.tu_darmstadt.tk.SmartHomeNetworkSim.core.Packet;
+import de.tu_darmstadt.tk.SmartHomeNetworkSim.core.PacketSniffer;
+import weka.core.Attribute;
+import weka.core.DenseInstance;
+import weka.core.Instance;
+import weka.core.Instances;
+
+/**
+ * Unsupervised Classifier Basis, which contains methods for transforming {@link Packet}s into {@link Instance}s.
+ *
+ * @author Andreas T. Meyer-Berg
+ */
+public abstract class BasicPacketClassifier implements PacketSniffer {
+
+	/**
+	 * True, if instances should be used for training
+	 */
+	protected boolean training = true;
+	
+	/**
+	 * Attributes which should be taken into account
+	 */
+	protected ArrayList<Attribute> atts = new ArrayList<Attribute>();
+	
+	/**
+	 * Collected Packets
+	 */
+	protected Instances dataset;
+	
+	/**
+	 * CollectedPackets
+	 */
+	protected HashMap<Link, LinkedList<Packet>> collectedPackets = new HashMap<Link, LinkedList<Packet>>();
+	
+	/**
+	 * HashMap for calculating transmission delay
+	 */
+	protected HashMap<Link, LinkedList<Packet>> lastPackets = new HashMap<Link, LinkedList<Packet>>();
+	
+	/**
+	 * Map for the different Link names
+	 */
+	protected HashSet<String> link_mappings = new HashSet<String>();
+
+	/**
+	 * Map for the difference source device names
+	 */
+	protected HashSet<String> source_mappings = new HashSet<String>();
+	
+	/**
+	 * Map for the different destination device names
+	 */
+	protected HashSet<String> destination_mappings = new HashSet<String>();
+	
+	/**
+	 * Map for the protocol names
+	 */
+	protected HashSet<String> protocol_mappings = new HashSet<String>();
+	
+	protected int NUMBER_OF_PACKETS = 200;
+	/**
+	 * Initializes the different maps
+	 */
+	public BasicPacketClassifier() {
+		// Initialize Attribute list
+		source_mappings.add("unknown");
+		link_mappings.add("unknown");
+		destination_mappings.add("unknown");
+		protocol_mappings.add("unknown");
+	}
+	
+	/**
+	 * Initialize the Algorithm parameters
+	 */
+	protected abstract void initializeAlgorithm();
+	
+	@Override
+	public void processPackets(HashMap<Link, LinkedList<Packet>> packets) {
+		if(training && !packets.entrySet().isEmpty() && !packets.entrySet().iterator().next().getValue().isEmpty() && packets.entrySet().iterator().next().getValue().getFirst().getTimestamp()>getClassificationStart()) {
+			training = false;
+			// Build Clusterer
+			try {
+				finishDataCollection();
+			} catch (Exception e) {
+				System.out.println("Clustering failed");
+				e.printStackTrace();
+			}
+		}
+		
+		if(training)
+			try {
+				training(packets);
+			} catch (Exception e) {
+				e.printStackTrace();
+			}
+		else
+			classify(packets);
+	}
+	
+	/**
+	 * Estimates the current Packets per second (depending on the last 100 packets of the link)
+	 * @param link Link which should be checked
+	 * @param packet Packet which should investigated
+	 * @return estimated number of packets per second
+	 */
+	protected double getEstimatedPacketsPerSecond(Link link, Packet packet) {
+		/**
+		 * Packets used to calculated the packets per second
+		 */
+		LinkedList<Packet> list = lastPackets.get(link);
+		if(list == null) {
+			/**
+			 * Add list if not present
+			 */
+			list = new LinkedList<Packet>();
+			lastPackets.put(link, list);
+		}
+		if(list.isEmpty()) {
+			list.addLast(packet);
+			// Default 1 packet per second
+			return 1.0;
+		}
+		if(list.size() == NUMBER_OF_PACKETS){
+			list.removeFirst();	
+		}
+		list.addLast(packet);
+		/**
+		 * elapsed time in milliseconds since last packet
+		 */
+		long elapsed_time = packet.getTimestamp()-list.getFirst().getTimestamp()/list.size();
+		if(elapsed_time<=0)
+			return Double.POSITIVE_INFINITY;
+		/**
+		 * Return number of packets per second
+		 */
+		return 1000.0/elapsed_time;
+		
+	}
+	
+	/**
+	 * Returns the instance representation of the given packet and link
+	 * @param link link the packet was sent on
+	 * @param packet packet which should be transformed
+	 * @param dataset distribution the packet is part of
+	 * @return instance representation
+	 */
+	protected Instance packet2Instance(Link link, Packet packet, Instances dataset) {
+		/**
+		 * Instance for the given Packet
+		 */
+		DenseInstance instance = new DenseInstance(dataset.numAttributes());
+		instance.setDataset(dataset);
+		
+		// link
+		instance.setValue(0, stringToNominal(link_mappings, link.getName()));
+		
+		// source
+		if(packet.getSource()==null) {
+			instance.setValue(1, "unknown");
+			instance.setValue(2, Double.NEGATIVE_INFINITY);
+		}else if(packet.getSource().getOwner()==null){
+			instance.setValue(1, "unknown");
+			instance.setValue(2, packet.getSource().getPortNumber());
+		}else {
+			instance.setValue(1, stringToNominal(source_mappings, packet.getSource().getOwner().getName()));
+			instance.setValue(2, packet.getSource().getPortNumber());
+		}
+		
+		// Destination
+		if(packet.getDestination()==null) {
+			instance.setValue(3, "unknown");
+			instance.setValue(4, Double.NEGATIVE_INFINITY);
+		}else if(packet.getDestination().getOwner()==null){
+			instance.setValue(3, "unknown");
+			instance.setValue(4, packet.getDestination().getPortNumber());
+		}else {
+			instance.setValue(3, stringToNominal(destination_mappings, packet.getDestination().getOwner().getName()));
+			instance.setValue(4, packet.getDestination().getPortNumber());
+		}
+		
+		// Protocol name
+		instance.setValue(5, stringToNominal(protocol_mappings, packet.getProtocolName()));
+		
+		// Packets per second
+		instance.setValue(6, getEstimatedPacketsPerSecond(link, packet));
+		
+		return instance;
+	}
+	
+	/**
+	 * Transforms the String into an Number
+	 * @param map
+	 * @param s
+	 * @return
+	 */
+	protected String stringToNominal(HashSet<String> map, String s) {
+		return map.contains(s)?s:"unknown";
+	} 
+	
+	/**
+	 * Train the clusterer by collecting the packets
+	 * 
+	 * @param packets packets to be learned
+	 */
+	protected void training(HashMap<Link, LinkedList<Packet>> packets) {
+		for(Entry<Link, LinkedList<Packet>> e:packets.entrySet()) {
+			Link l = e.getKey();
+			LinkedList<Packet> p = collectedPackets.get(l);
+			if(p == null)
+				collectedPackets.put(l, new LinkedList<Packet>(e.getValue()));
+			else
+				p.addAll(e.getValue());
+		}
+	}
+	
+	/**
+	 * Finishes the collection and trains the clusterer on the collected packets
+	 * 
+	 * @throws Exception
+	 */
+	protected void finishDataCollection() throws Exception{
+		atts.add(new Attribute("Link-Name", new LinkedList<String>(link_mappings)));//TODO:??
+		atts.add(new Attribute("Source-Device", new LinkedList<String>(source_mappings)));
+		atts.add(new Attribute("Source-Port-number", false));
+		atts.add(new Attribute("Destination-Device", new LinkedList<String>(destination_mappings)));
+		atts.add(new Attribute("Destination-Port-number", false));
+		Attribute pn = new Attribute("Protocol-name", new LinkedList<String>(protocol_mappings));
+		//pn.setWeight(10);
+		atts.add(pn);
+		Attribute pps = new Attribute("Packets-per-second", false);
+		//pps.setWeight(20);
+		atts.add(pps);
+		//atts.add(new Attribute("Anomaly", false));
+
+		/*
+		atts = new ArrayList<Attribute>();
+		atts.add(new Attribute("LN", new LinkedList<String>(link_mappings)));//TODO:??
+		atts.add(new Attribute("SD", new LinkedList<String>(source_mappings)));
+		atts.add(new Attribute("SPN", false));
+		atts.add(new Attribute("DD", new LinkedList<String>(destination_mappings)));
+		atts.add(new Attribute("DPN", false));
+		atts.add(new Attribute("PN", new LinkedList<String>(protocol_mappings)));
+		atts.add(new Attribute("PPS", false));
+		atts.add(new Attribute("A", false));*/
+		dataset = new Instances("Packets", atts, 100000);
+		//dataset.setClassIndex(7);
+
+		/**
+		 * Add Instances to dataset
+		 */
+		for (Iterator<Entry<Link, LinkedList<Packet>>> it = collectedPackets.entrySet().iterator(); it.hasNext();) {
+			Entry<Link, LinkedList<Packet>> entry = it.next();
+			/**
+			 * Link the packet was captured on
+			 */
+			Link l = entry.getKey();
+			for (Iterator<Packet> itPacket = entry.getValue().iterator(); itPacket.hasNext();) {
+				/**
+				 * Packets to be added to the dataset
+				 */
+				Packet packet = (Packet) itPacket.next();
+				dataset.add(packet2Instance(l, packet, dataset));
+			}
+		}
+		
+		trainModel(dataset);
+	}
+	
+	/**
+	 * Try to classify the given packets and detect anomalies
+	 * @param packets packets to be classified
+	 */
+	protected void classify(HashMap<Link, LinkedList<Packet>> packets) {
+		for (Iterator<Entry<Link, LinkedList<Packet>>> it = packets.entrySet().iterator(); it.hasNext();) {
+			/**
+			 * Link & its packets
+			 */
+			Entry<Link, LinkedList<Packet>> entry = it.next();
+			/**
+			 * Link the packets were captured on
+			 */
+			Link l = entry.getKey();
+			for (Iterator<Packet> itPacket = entry.getValue().iterator(); itPacket.hasNext();) {
+				/**
+				 * Packet which should be checked
+				 */
+				Packet packet = (Packet) itPacket.next();
+				/**
+				 * Instance Representation
+				 */
+				Instance packet_instance = packet2Instance(l, packet, dataset);
+				
+				if(packet_instance == null)continue;
+				classifyInstance(null, packet);
+			}	
+		}
+	}
+	
+	/**
+	 * Train the model using the given instances
+	 * @param instances training set, which should be learned
+	 */
+	public abstract void trainModel(Instances instances);
+	
+	/**
+	 * classifies the given instance
+	 * @param instance instance which should be classified
+	 * @param origin original packet, which was transformed into the instance
+	 */
+	public abstract void classifyInstance(Instance instance, Packet origin);
+	
+	/**
+	 * Returns the timestep, after which the classifier should start classifying instead of training.
+	 * @return timestep of the testing begin.
+	 */
+	public abstract long getClassificationStart();
+}

+ 68 - 0
examples/classifier/KMeansClustering.java

@@ -0,0 +1,68 @@
+package classifier;
+
+import de.tu_darmstadt.tk.SmartHomeNetworkSim.core.Packet;
+import weka.clusterers.SimpleKMeans;
+import weka.core.Instance;
+import weka.core.Instances;
+
+/**
+ * Unsupervised Example: K Means Clustering
+ *
+ * @author Andreas T. Meyer-Berg
+ */
+public class KMeansClustering extends BasicPacketClassifier {
+
+	/**
+	 * Clusterer
+	 */
+	private SimpleKMeans clusterer;
+
+	/**
+	 * Number of Clusters
+	 */
+	protected int NUMBER_OF_CLUSTERS = 2;
+	
+	/**
+	 * 
+	 */
+	public KMeansClustering() {
+		super();
+		initializeAlgorithm();
+	}
+
+	@Override
+	protected void initializeAlgorithm() {
+		clusterer = new SimpleKMeans();
+		clusterer.setSeed(42);
+		try {
+			clusterer.setNumClusters(this.NUMBER_OF_CLUSTERS);
+		} catch (Exception e) {
+			System.out.println("Error while building cluster");
+			e.printStackTrace();
+		}
+	}
+
+	@Override
+	public void trainModel(Instances instances) {
+		try {
+			clusterer.buildClusterer(instances);
+		} catch (Exception e) {
+			System.out.println("Failed while training the classifier");
+			e.printStackTrace();
+		}
+	}
+
+	@Override
+	public void classifyInstance(Instance instance, Packet origin) {
+		try {
+			clusterer.clusterInstance(instance);
+		} catch (Exception e) {
+			System.out.println("Anomaly Detected on Packet: "+origin.getTextualRepresentation());
+		}
+	}
+
+	@Override
+	public long getClassificationStart() {
+		return 10000;
+	}
+}