Ver código fonte

Refactor interval selection indices and do some code cleanup there

If interval.selection.strategy=custom and only an ending index
is specified, an interval is found that ends at that index.

For the user, interval indices now start from one, i.e. the first
message has index 1 (not 0 as before).

In the C++ code substitute 'x += 1;' with 'x++;', where x is some
variable.
dustin.born 6 anos atrás
pai
commit
88e2647e8e

+ 11 - 4
code/ID2TLib/CommunicationProcessor.py

@@ -76,17 +76,24 @@ class CommunicationProcessor():
 
                 return {}
         elif strategy == "custom":
-            if start_idx is None:
-                print("Custom strategy was selected, but no (valid) start index was specified.")
+            if (not start_idx) and (not end_idx):
+                print("Custom strategy was selected, but no (valid) start or end index was specified.")
                 print("Because of this, a random interval is selected.")
                 start_idx = randrange(0, cpp_comm_proc.get_message_count())
-            elif end_idx is not None:
+                interval = cpp_comm_proc.find_interval_from_startidx(start_idx, number_ids, max_int_time)
+            elif (not start_idx) and end_idx:
+                end_idx -= 1  # because message indices start with 1 (for the user)
+                interval = cpp_comm_proc.find_interval_from_endidx(end_idx, number_ids, max_int_time)
+            elif start_idx and (not end_idx):
+                start_idx -= 1  # because message indices start with 1 (for the user)
+                interval = cpp_comm_proc.find_interval_from_startidx(start_idx, number_ids, max_int_time)
+            elif start_idx and end_idx:
+                start_idx -= 1; end_idx -= 1
                 ids = cpp_comm_proc.get_interval_init_ids(start_idx, end_idx)
                 if not ids:
                     return {}
                 return {"IDs": ids, "Start": start_idx, "End": end_idx}
 
-            interval = cpp_comm_proc.find_interval_from_startidx(start_idx, number_ids, max_int_time)
             if not interval or not interval["IDs"]:
                 return {}
             return interval

+ 70 - 6
code_boost/src/cxx/botnet_comm_processor.cpp

@@ -313,7 +313,7 @@ void botnet_comm_processor::find_optimal_interval_helper(std::promise<std::vecto
             // if message was not a timeout, delete the first appearance of the initiator ID 
             // of this message from the initiator list and update comm_sum
             if (cur_msg.type != TIMEOUT){
-                comm_sum -= 1;
+                comm_sum--;
                 init_ids.pop_front();
             }
 
@@ -326,15 +326,15 @@ void botnet_comm_processor::find_optimal_interval_helper(std::promise<std::vecto
         // if message is request, add src to initiator list
         if (msgtype_is_request(cur_msg.type)){
             init_ids.push_back(cur_msg.src);
-            comm_sum += 1;
+            comm_sum++;
         }
         // if message is response, add dst to initiator list
         else if (msgtype_is_response(cur_msg.type)){
             init_ids.push_back(cur_msg.dst);
-            comm_sum += 1;
+            comm_sum++;
         }
 
-        idx_high += 1;
+        idx_high++;
     }
 
     end: p.set_value(possible_intervals);
@@ -343,6 +343,7 @@ void botnet_comm_processor::find_optimal_interval_helper(std::promise<std::vecto
 /**
  * Finds the time interval of maximum the given seconds starting at the given index. If it does not have at least number_ids 
  * communicating initiators in it or the index is out of bounds, an empty dict is returned.
+ * @param start_idx the starting index of the returned interval
  * @param number_ids The number of initiator IDs that have to exist in the interval.
  * @param max_int_time The maximum time period of the interval.
  * @return A (python) dict (keys: 'IDs', Start', 'End'), which represents an interval with its list of initiator IDs, 
@@ -397,7 +398,69 @@ py::dict botnet_comm_processor::find_interval_from_startidx(int start_idx, int n
         else if (msgtype_is_response(cur_msg.type))
             init_ids.push_back(cur_msg.dst);
 
-        cur_idx += 1;
+        cur_idx++;
+    }
+}
+
+/**
+ * Finds the time interval of maximum the given seconds ending at the given index. If it does not have at least number_ids 
+ * communicating initiators in it or the index is out of bounds, an empty dict is returned.
+ * @param end_idx the ending index of the returned interval (inclusive)
+ * @param number_ids The number of initiator IDs that have to exist in the interval.
+ * @param max_int_time The maximum time period of the interval.
+ * @return A (python) dict (keys: 'IDs', Start', 'End'), which represents an interval with its list of initiator IDs, 
+ * a start index and an end index. The indices are with respect to the first abstract message.
+ */
+py::dict botnet_comm_processor::find_interval_from_endidx(int end_idx, int number_ids, double max_int_time){
+    // setup initial variables
+    int cur_idx = end_idx;  // the current iteration index
+    double cur_int_time = 0;  // the time of the current interval
+    std::deque<unsigned int> init_ids;  // the initiator IDs seen in the current interval in order of appearance
+    py::dict comm_interval_py;  // the communication interval that is returned
+
+    if (end_idx < 0){
+        return comm_interval_py;
+    }
+
+    // Iterate over all messages starting at end_idx until the duration or the current index exceeds a boundary
+    while (1){
+        if (cur_idx >= 0)
+            cur_int_time = messages[end_idx].time - messages[cur_idx].time;
+ 
+        // if current interval time exceeds maximum time period or all messages have been processed, 
+        // process information of the current interval
+        if (greater_than(cur_int_time, max_int_time) || cur_idx < 0){
+            std::set<unsigned int> interval_ids;
+
+            for (int i = 0; i < init_ids.size(); i++) 
+                interval_ids.insert(init_ids[i]);
+
+            // if the interval contains enough initiator IDs, convert it to python representation and return it
+            if (interval_ids.size() >= number_ids){
+                py::list py_ids;
+                for (const auto &id : interval_ids){
+                    py_ids.append(id);
+                }
+                comm_interval_py["IDs"] = py_ids;
+                comm_interval_py["Start"] = cur_idx + 1;
+                comm_interval_py["End"] = end_idx;
+                return comm_interval_py;
+            }
+            else {
+                return comm_interval_py;
+            }
+        }
+
+        // consume the new message at cur_idx and process its information
+        abstract_msg &cur_msg = messages[cur_idx];
+        // if message is request, add src to initiator list
+        if (msgtype_is_request(cur_msg.type))
+            init_ids.push_back(cur_msg.src);
+        // if message is response, add dst to initiator list
+        else if (msgtype_is_response(cur_msg.type))
+            init_ids.push_back(cur_msg.dst);
+
+        cur_idx--;
     }
 }
 
@@ -435,7 +498,7 @@ py::list botnet_comm_processor::get_interval_init_ids(int start_idx, int end_idx
         else if (msgtype_is_response(cur_msg.type))
             interval_ids.insert(cur_msg.dst);
 
-        cur_idx += 1;
+        cur_idx++;
     }
 }
 
@@ -496,6 +559,7 @@ BOOST_PYTHON_MODULE (libbotnetcomm) {
             .def(init<list>())
             .def(init<>())
             .def("find_interval_from_startidx", &botnet_comm_processor::find_interval_from_startidx)
+            .def("find_interval_from_endidx", &botnet_comm_processor::find_interval_from_endidx)
             .def("find_optimal_interval", &botnet_comm_processor::find_optimal_interval)
             .def("get_interval_init_ids", &botnet_comm_processor::get_interval_init_ids)
             .def("get_messages", &botnet_comm_processor::get_messages)

+ 2 - 0
code_boost/src/cxx/botnet_comm_processor.h

@@ -105,6 +105,8 @@ public:
      */
     py::dict find_interval_from_startidx(int start_idx, int number_ids, double max_int_time);
 
+    py::dict find_interval_from_endidx(int end_idx, int number_ids, double max_int_time);
+
     py::list find_optimal_interval(int number_ids, double max_int_time);
 
     py::list get_interval_init_ids(int start_idx, int end_idx);