Преглед изворни кода

Add more flexibiliy for communication interval selection

Because the C++ approach, while being vastly more efficient than
the previous approach, is still not fast enough for large input
CSVs/XMLs, three new attack parameters are addded.

The first parameter determines the selection strategy of the
communication interval. The selection strategy that has been
implemented so far is called 'optimal'. The first new option is
'random', where a random start index is chosen and the interval
is spanned from there until the time exceeds the maximum interval
time or until there are no more packets beyond the start index.
The second new option is 'custom', where a start and end index can
be chosen by the user.

The second and third new paramter exist to let the user choose a
start and end index. If there is a start but no end index, the end
index is automatically determined by iterating over the messages
beyond the start index until the maximum time stretch is discovered.
dustin.born пре 7 година
родитељ
комит
b78e05fe67

+ 5 - 0
code/Attack/AttackParameters.py

@@ -24,6 +24,8 @@ class Parameter(Enum):
     VICTIM_BUFFER = 'victim.buffer' # in packets
     TARGET_URI = 'target.uri'
     NUMBER_INITIATOR_BOTS = 'bots.count'
+    INTERVAL_SELECT_START = 'interval.select.start'
+    INTERVAL_SELECT_END = 'interval.select.end'
     # recommended type: domain -----------------------------------
     TARGET_HOST = 'target.host'
 
@@ -53,6 +55,8 @@ class Parameter(Enum):
     # calculate the destination port based on the hostname (like some botnets do)
     # otherwise the destination port is a normal ephemeral port
     BOTNET_DST_PORT_CALCULATION = "botnet.dstportcalculation"
+    #recommended type: interval selection strategy, i.e. 'random', 'optimal' or 'custom' ------------------------------------
+    INTERVAL_SELECT_STRATEGY = 'interval.select.strategy'
 
 class ParameterTypes(Enum):
     """
@@ -72,3 +76,4 @@ class ParameterTypes(Enum):
     TYPE_COMM_TYPE = 10
     TYPE_PERCENTAGE = 11
     TYPE_PADDING = 12
+    TYPE_INTERVAL_SELECT_STRAT = 13

+ 14 - 1
code/Attack/BaseAttack.py

@@ -285,6 +285,17 @@ class BaseAttack(metaclass=ABCMeta):
             return True
         return False
 
+    @staticmethod
+    def _is_inteval_select_strat(val: str):
+        """
+        Verifies that the given string is a valid interval selection strategy.
+        Valid strategies are: 'random', 'optimal' or 'custom'
+
+        :param val: the selection strategy to test for validity
+        :return: True if the given strategy is valid, False otherwise
+        """
+        return val in {"random", "optimal", "custom"}
+
     #########################################
     # HELPER METHODS
     #########################################
@@ -324,7 +335,7 @@ class BaseAttack(metaclass=ABCMeta):
             print('Parameter ' + str(param_name) + ' not available for chosen attack. Skipping parameter.')
 
         # If value is query -> get value from database
-        elif self.statistics.is_query(value):
+        elif param_name != AttackParameters.Parameter.INTERVAL_SELECT_STRATEGY and self.statistics.is_query(value):
             value = self.statistics.process_db_query(value, False)
             if value is not None and value is not "":
                 is_valid = True
@@ -392,6 +403,8 @@ class BaseAttack(metaclass=ABCMeta):
                 
             if is_valid:
                 is_valid = self._is_padding(value) 
+        elif param_type == ParameterTypes.TYPE_INTERVAL_SELECT_STRAT:
+            is_valid = self._is_inteval_select_strat(value)
 
         # add value iff validation was successful
         if is_valid:

+ 17 - 8
code/Attack/MembersMgmtCommAttack.py

@@ -109,7 +109,12 @@ class MembersMgmtCommAttack(BaseAttack.BaseAttack):
 
             # the base PCAP for the TTL distribution
             Param.TTL_FROM_CAIDA: ParameterTypes.TYPE_BOOLEAN,
-            Param.BOTNET_DST_PORT_CALCULATION: ParameterTypes.TYPE_BOOLEAN
+            Param.BOTNET_DST_PORT_CALCULATION: ParameterTypes.TYPE_BOOLEAN,
+
+            # information about the interval selection strategy
+            Param.INTERVAL_SELECT_STRATEGY: ParameterTypes.TYPE_INTERVAL_SELECT_STRAT,
+            Param.INTERVAL_SELECT_START: ParameterTypes.TYPE_INTEGER_POSITIVE,
+            Param.INTERVAL_SELECT_END: ParameterTypes.TYPE_INTEGER_POSITIVE
         }
 
         # create dict with MessageType values for fast name lookup
@@ -156,6 +161,8 @@ class MembersMgmtCommAttack(BaseAttack.BaseAttack):
         self.add_param_value(Param.TTL_FROM_CAIDA, False)
         self.add_param_value(Param.BOTNET_DST_PORT_CALCULATION, True)
 
+        # interval selection strategy
+        self.add_param_value(Param.INTERVAL_SELECT_STRATEGY, "optimal")
 
     def generate_attack_pcap(self, context):
         """
@@ -447,7 +454,6 @@ class MembersMgmtCommAttack(BaseAttack.BaseAttack):
                 else:
                     bot_configs[bot_id]["TTL"] = total_ttl_prob_dict.random()
 
-
         def move_xml_to_outdir(filepath_xml: str):
             """
             Moves the XML file at filepath_xml to the output directory of the PCAP
@@ -465,6 +471,7 @@ class MembersMgmtCommAttack(BaseAttack.BaseAttack):
             context.add_other_created_file(new_xml_path)
             return new_xml_path
 
+
         # parse input CSV or XML
         filepath_xml = self.get_param_value(Param.FILE_XML)
         filepath_csv = self.get_param_value(Param.FILE_CSV)
@@ -483,14 +490,19 @@ class MembersMgmtCommAttack(BaseAttack.BaseAttack):
             cpp_comm_proc.parse_xml(filepath_xml)
 
         # find a good communication mapping in the input file that matches the users parameters
+        nat = self.get_param_value(Param.NAT_PRESENT)
+        comm_proc = CommunicationProcessor(self.msg_types, nat)
         duration = self.get_param_value(Param.ATTACK_DURATION)
         number_init_bots = self.get_param_value(Param.NUMBER_INITIATOR_BOTS)
+        strategy = self.get_param_value(Param.INTERVAL_SELECT_STRATEGY)
+        start_idx = self.get_param_value(Param.INTERVAL_SELECT_START)
+        end_idx = self.get_param_value(Param.INTERVAL_SELECT_END)
+
+        comm_interval = comm_proc.get_comm_interval(cpp_comm_proc, strategy, number_init_bots, duration, start_idx, end_idx)
 
-        comm_intervals = cpp_comm_proc.find_interval(number_init_bots, duration)
-        if comm_intervals == []:
+        if not comm_interval:
             print("Error: There is no interval in the given CSV/XML that has enough communicating initiating bots.")
             return []
-        comm_interval = comm_intervals[randrange(0, len(comm_intervals))]
 
         # retrieve the mapping information
         mapped_ids, packet_start_idx, packet_end_idx = comm_interval["IDs"], comm_interval["Start"], comm_interval["End"]
@@ -501,9 +513,6 @@ class MembersMgmtCommAttack(BaseAttack.BaseAttack):
 
         # get the messages contained in the chosen interval
         abstract_packets = cpp_comm_proc.get_messages(packet_start_idx, packet_end_idx);
-        nat = self.get_param_value(Param.NAT_PRESENT)
-        # create a communication processor responsible for assigning roles and localities of IDs
-        comm_proc = CommunicationProcessor([], self.msg_types, nat)
         comm_proc.set_mapping(abstract_packets, mapped_ids)
         # determine ID roles and select the messages that are to be mapped into the PCAP
         messages = comm_proc.det_id_roles_and_msgs()

+ 55 - 2
code/ID2TLib/CommunicationProcessor.py

@@ -1,4 +1,5 @@
 from lea import Lea
+from random import randrange
 from Attack.MembersMgmtCommAttack import MessageType
 from Attack.MembersMgmtCommAttack import Message
 
@@ -17,14 +18,14 @@ class CommunicationProcessor():
     Class to process parsed input CSV/XML data and retrieve a mapping or other information.
     """
 
-    def __init__(self, packets:list, mtypes:dict, nat:bool):
+    def __init__(self, mtypes:dict, nat:bool):
         """
         Creates an instance of CommunicationProcessor.
         :param packets: the list of abstract packets
         :param mtypes: a dict containing an int to EnumType mapping of MessageTypes
         :param nat: whether NAT is present in this network
         """
-        self.packets = packets
+        self.packets = []
         self.mtypes = mtypes
         self.nat = nat
 
@@ -38,6 +39,58 @@ class CommunicationProcessor():
         self.packets = packets
         self.local_init_ids = set(mapped_ids)
 
+    def get_comm_interval(self, cpp_comm_proc, strategy: str, number_ids: int, max_int_time: int, start_idx: int, end_idx: int):
+        """
+        Finds a communication interval with respect to the given strategy. The interval is maximum of the given seconds 
+        and has at least number_ids communicating initiators in it.
+        
+        :param cpp_comm_proc: An instance of the C++ communication processor that stores all the input messages and 
+                              is responsible for retrieving the interval(s)
+        :param strategy: The selection strategy (i.e. random, optimal, custom)
+        :param number_ids: The number of initiator IDs that have to exist in the interval(s)
+        :param max_int_time: The maximum time period of the interval
+        :param start_idx: The message index the interval should start at (None if not specified)
+        :param end_idx: The message index the interval should stop at (inclusive) (None if not specified)
+        :return: A dict representing the communication interval. It contains the initiator IDs, 
+                 the start index and end index of the respective interval. The respective keys 
+                 are {IDs, Start, End}. If no interval is found, an empty dict is returned.
+        """
+
+        if strategy == "random":
+            # try finding not-empty interval 5 times
+            for i in range(5):
+                start_idx = randrange(0, cpp_comm_proc.get_message_count())
+                interval = cpp_comm_proc.find_interval_from_startidx(start_idx, number_ids, max_int_time)
+                if interval and interval["IDs"]:
+                    return interval
+            return {}
+        elif strategy == "optimal":
+            intervals = cpp_comm_proc.find_optimal_interval(number_ids, max_int_time)
+            if not intervals:
+                return {}
+            else:
+                for i in range(5):
+                    interval = intervals[randrange(0, len(intervals))]
+                    if interval and interval["IDs"]:
+                        return interval
+
+                return {}
+        elif strategy == "custom":
+            if start_idx is None:
+                print("Custom strategy was selected, but no (valid) start index was specified.")
+                print("Because of this, a random interval is selected.")
+                start_idx = randrange(0, cpp_comm_proc.get_message_count())
+            elif end_idx is not None:
+                ids = cpp_comm_proc.get_interval_init_ids(start_idx, end_idx)
+                if not ids:
+                    return {}
+                return {"IDs": ids, "Start": start_idx, "End": end_idx}
+
+            interval = cpp_comm_proc.find_interval_from_startidx(start_idx, number_ids, max_int_time)
+            if not interval or not interval["IDs"]:
+                return {}
+            return interval
+
     def det_id_roles_and_msgs(self):
         """
         Determine the role of every mapped ID. The role can be initiator, responder or both.

+ 140 - 16
code_boost/src/cxx/botnet_comm_processor.cpp

@@ -8,22 +8,40 @@
  *    represented as (python) list containing (python) dicts.
  */
 botnet_comm_processor::botnet_comm_processor(const py::list &messages_pyboost){
+    set_messages(messages_pyboost);
+}
+
+/**
+ * Creates a new and empty botnet_comm_processor object.
+ */
+botnet_comm_processor::botnet_comm_processor(){
+}
+
+
+void botnet_comm_processor::set_messages(const py::list &messages_pyboost){
+    messages.clear();
     for (int i = 0; i < len(messages_pyboost); i++){
         py::dict msg_pyboost = py::extract<py::dict>(messages_pyboost[i]);
         unsigned int src_id = std::stoi(py::extract<std::string>(msg_pyboost["Src"]));
         unsigned int dst_id = std::stoi(py::extract<std::string>(msg_pyboost["Dst"]));
         unsigned short type = (unsigned short) std::stoi(py::extract<std::string>(msg_pyboost["Type"]));
         double time = std::stod(py::extract<std::string>(msg_pyboost["Time"]));
-        int line_no = std::stoi(py::extract<std::string>(msg_pyboost["LineNumber"]));
+
+        int line_no = -1;
+        // try {
+        line_no = std::stoi(py::extract<std::string>(msg_pyboost["LineNumber"]));
+        // }
         abstract_msg msg = {src_id, dst_id, type, time, line_no};
-        messages.push_back(msg);
+        messages.push_back(std::move(msg));
     }
 }
 
 /**
- * Creates a new and empty botnet_comm_processor object.
+ * Retrieve input information about message count.
+ * @return the number of existing messages.
  */
-botnet_comm_processor::botnet_comm_processor(){
+int botnet_comm_processor::get_message_count(){
+    return messages.size();
 }
 
 /**
@@ -167,6 +185,8 @@ std::string botnet_comm_processor::write_xml(const std::string &filename){
 py::list botnet_comm_processor::get_messages(unsigned int start_idx, unsigned int end_idx){
     py::list py_messages;
     for (int i = start_idx; i <= end_idx; i++){
+        if (i >= messages.size())
+            break;
         py::dict py_msg;
         py_msg["Src"] = messages[i].src;
         py_msg["Dst"] = messages[i].dst;
@@ -179,14 +199,14 @@ py::list botnet_comm_processor::get_messages(unsigned int start_idx, unsigned in
 }
 
 /**
- * Finds the time interval(s) of the given seconds with the most overall communication
+ * Finds the time interval(s) of maximum the given seconds with the most overall communication
  * (i.e. requests and responses) that has at least number_ids communicating initiators in it. 
  * @param number_ids The number of initiator IDs that have to exist in the interval(s).
  * @param max_int_time The maximum time period of the interval.
- * @return A (python) list of (python) tuple, where each tuple represents an interval with a set of the initiator IDs, 
- * a start index and an end index in that order. The indices are with respect to the first abstract message.
+ * @return A (python) list of (python) dicts, where each dict (keys: 'IDs', Start', 'End') represents an interval with its
+ * list of initiator IDs, a start index and an end index. The indices are with respect to the first abstract message.
  */
-py::list botnet_comm_processor::find_interval(int number_ids, double max_int_time){
+py::list botnet_comm_processor::find_optimal_interval(int number_ids, double max_int_time){
     unsigned int logical_thread_count = std::thread::hardware_concurrency();
     std::vector<std::thread> threads;
     std::vector<std::future<std::vector<comm_interval> > > futures;
@@ -197,7 +217,7 @@ py::list botnet_comm_processor::find_interval(int number_ids, double max_int_tim
         unsigned int end_idx = (i + 1) * messages.size() / logical_thread_count;
         std::promise<std::vector<comm_interval> > p;  // use promises to retrieve return values
         futures.push_back(p.get_future());
-        threads.push_back(std::thread(&botnet_comm_processor::find_interval_helper, this, std::move(p), number_ids, max_int_time, start_idx, end_idx));
+        threads.push_back(std::thread(&botnet_comm_processor::find_optimal_interval_helper, this, std::move(p), number_ids, max_int_time, start_idx, end_idx));
     }
 
     // synchronize all threads
@@ -233,7 +253,7 @@ py::list botnet_comm_processor::find_interval(int number_ids, double max_int_tim
 }
 
 /**
- * Finds the time interval(s) of the given seconds within the given start and end index having the most 
+ * Finds the time interval(s) of maximum the given seconds within the given start and end index having the most 
  * overall communication (i.e. requests and responses) as well as at least number_ids communicating initiators in it. 
  * @param p An rvalue to a promise to return the found intervals.
  * @param number_ids The number of initiator IDs that have to exist in the interval(s).
@@ -241,7 +261,7 @@ py::list botnet_comm_processor::find_interval(int number_ids, double max_int_tim
  * @param start_idx The index of the first message to process with respect to the class member 'messages'.
  * @param end_idx The upper index boundary where the search is stopped at (i.e. exclusive index).
  */
-void botnet_comm_processor::find_interval_helper(std::promise<std::vector<comm_interval> > && p, int number_ids, double max_int_time, int start_idx, int end_idx){
+void botnet_comm_processor::find_optimal_interval_helper(std::promise<std::vector<comm_interval> > && p, int number_ids, double max_int_time, int start_idx, int end_idx){
     // setup initial variables
     unsigned int idx_low = start_idx, idx_high = start_idx;  // the indices spanning the interval
     unsigned int comm_sum = 0;  // the communication sum of the current interval
@@ -256,7 +276,8 @@ void botnet_comm_processor::find_interval_helper(std::promise<std::vector<comm_i
         if (idx_high < messages.size())
             cur_int_time = messages[idx_high].time - messages[idx_low].time;
  
-        // if current interval time exceeds maximum time period, process information of the current interval
+        // if current interval time exceeds maximum time period or all messages have been processed, 
+        // process information of the current interval
         if (greater_than(cur_int_time, max_int_time) || idx_high >= messages.size()){
             std::set<unsigned int> interval_ids;
 
@@ -318,6 +339,105 @@ void botnet_comm_processor::find_interval_helper(std::promise<std::vector<comm_i
     end: p.set_value(possible_intervals);
 }
 
+/**
+ * Finds the time interval of maximum the given seconds starting at the given index. If it does not have at least number_ids 
+ * communicating initiators in it or the index is out of bounds, an empty dict is returned.
+ * @param number_ids The number of initiator IDs that have to exist in the interval.
+ * @param max_int_time The maximum time period of the interval.
+ * @return A (python) dict (keys: 'IDs', Start', 'End'), which represents an interval with its list of initiator IDs, 
+ * a start index and an end index. The indices are with respect to the first abstract message.
+ */
+py::dict botnet_comm_processor::find_interval_from_startidx(int start_idx, int number_ids, double max_int_time){
+    // setup initial variables
+    unsigned int cur_idx = start_idx;  // the current iteration index
+    double cur_int_time = 0;  // the time of the current interval
+    std::deque<unsigned int> init_ids;  // the initiator IDs seen in the current interval in order of appearance
+    py::dict comm_interval_py;  // the communication interval that is returned
+
+    if (start_idx >= messages.size()){
+        return comm_interval_py;
+    }
+
+    // Iterate over all messages starting at start_idx until the duration or the current index exceeds a boundary
+    while (1){
+        if (cur_idx < messages.size())
+            cur_int_time = messages[cur_idx].time - messages[start_idx].time;
+ 
+        // if current interval time exceeds maximum time period or all messages have been processed, 
+        // process information of the current interval
+        if (greater_than(cur_int_time, max_int_time) || cur_idx >= messages.size()){
+            std::set<unsigned int> interval_ids;
+
+            for (int i = 0; i < init_ids.size(); i++) 
+                interval_ids.insert(init_ids[i]);
+
+            // if the interval contains enough initiator IDs, convert it to python representation and return it
+            if (interval_ids.size() >= number_ids){
+                py::list py_ids;
+                for (const auto &id : interval_ids){
+                    py_ids.append(id);
+                }
+                comm_interval_py["IDs"] = py_ids;
+                comm_interval_py["Start"] = start_idx;
+                comm_interval_py["End"] = cur_idx - 1;
+                return comm_interval_py;
+            }
+            else {
+                return comm_interval_py;
+            }
+        }
+
+        // consume the new message at cur_idx and process its information
+        abstract_msg &cur_msg = messages[cur_idx];
+        // if message is request, add src to initiator list
+        if (msgtype_is_request(cur_msg.type))
+            init_ids.push_back(cur_msg.src);
+        // if message is response, add dst to initiator list
+        else if (msgtype_is_response(cur_msg.type))
+            init_ids.push_back(cur_msg.dst);
+
+        cur_idx += 1;
+    }
+}
+
+/**
+ * Finds all initiator IDs contained in the interval spanned by the two indices.
+ * @param start_idx The start index of the interval.
+ * @param end_idx The last index of the interval (inclusive).
+ * @return A (python) list containing all initiator IDs of the interval.
+ */
+py::list botnet_comm_processor::get_interval_init_ids(int start_idx, int end_idx){
+    // setup initial variables
+    unsigned int cur_idx = start_idx;  // the current iteration index
+    std::set<unsigned int> interval_ids;
+    py::list py_ids;  // the communication interval that is returned
+
+    if (start_idx >= messages.size()){
+        return py_ids;
+    }
+
+    // Iterate over all messages starting at start_idx until the duration or the current index exceeds a boundary
+    while (1){
+        // if messages have been processed
+        if (cur_idx >= messages.size() || cur_idx > end_idx){
+            for (const auto &id : interval_ids)
+                py_ids.append(id);
+            return py_ids;
+        }
+
+        // consume the new message at cur_idx and process its information
+        abstract_msg &cur_msg = messages[cur_idx];
+        // if message is request, add src to initiator list
+        if (msgtype_is_request(cur_msg.type))
+            interval_ids.insert(cur_msg.src);
+        // if message is response, add dst to initiator list
+        else if (msgtype_is_response(cur_msg.type))
+            interval_ids.insert(cur_msg.dst);
+
+        cur_idx += 1;
+    }
+}
+
 /**
  * Checks whether the given message type corresponds to a request.
  * @param mtype The message type to check.
@@ -340,7 +460,7 @@ int botnet_comm_processor::msgtype_is_response(unsigned short mtype){
  * Converts the given vector of communication intervals to a python representation 
  * using (python) lists and (python) tuples.
  * @param intervals The communication intervals to convert.
- * @return A boost::python::list containing the same information using boost::python::dict for each interval.
+ * @return A boost::python::list containing the same interval information using boost::python::dict for each interval.
  */
 py::list botnet_comm_processor::convert_intervals_to_py_repr(const std::vector<comm_interval> &intervals){
     py::list py_intervals;
@@ -374,9 +494,13 @@ BOOST_PYTHON_MODULE (libbotnet) {
     class_<botnet_comm_processor>("botnet_comm_processor")
             .def(init<list>())
             .def(init<>())
-            .def("find_interval", &botnet_comm_processor::find_interval)
+            .def("find_interval_from_startidx", &botnet_comm_processor::find_interval_from_startidx)
+            .def("find_optimal_interval", &botnet_comm_processor::find_optimal_interval)
+            .def("get_interval_init_ids", &botnet_comm_processor::get_interval_init_ids)
+            .def("get_messages", &botnet_comm_processor::get_messages)
+            .def("get_message_count", &botnet_comm_processor::get_message_count)
             .def("parse_csv", &botnet_comm_processor::parse_csv)
             .def("parse_xml", &botnet_comm_processor::parse_xml)
-            .def("get_messages", &botnet_comm_processor::get_messages)
+            .def("set_messages", &botnet_comm_processor::set_messages)
             .def("write_xml", &botnet_comm_processor::write_xml);
-}
+}

+ 11 - 3
code_boost/src/cxx/botnet_comm_processor.h

@@ -17,6 +17,7 @@
 #include <fstream>
 #include <string>
 #include <istream>
+#include <iomanip>
 
 
 /*
@@ -37,7 +38,6 @@
  * For quick usage
  */
 namespace py = boost::python;
-namespace pt = boost::property_tree;
 
 /*
  * Definition of structs
@@ -103,14 +103,22 @@ public:
     /*
      * Methods
      */
-    py::list find_interval(int number_ids, double max_int_time);
+    py::dict find_interval_from_startidx(int start_idx, int number_ids, double max_int_time);
+
+    py::list find_optimal_interval(int number_ids, double max_int_time);
+
+    py::list get_interval_init_ids(int start_idx, int end_idx);
 
     py::list get_messages(unsigned int start_idx, unsigned int end_idx);
 
+    int get_message_count();
+
     unsigned int parse_csv(const std::string &);
 
     unsigned int parse_xml(const std::string &);
 
+    void set_messages(const py::list &messages_pyboost);
+
     std::string write_xml(const std::string &);
 
 private:
@@ -119,7 +127,7 @@ private:
      */
     py::list convert_intervals_to_py_repr(const std::vector<comm_interval>& intervals);
 
-    void find_interval_helper(std::promise<std::vector<comm_interval> > && p, int number_ids, double max_int_time, int start_idx, int end_idx);
+    void find_optimal_interval_helper(std::promise<std::vector<comm_interval> > && p, int number_ids, double max_int_time, int start_idx, int end_idx);
 
     int msgtype_is_request(unsigned short mtype);