|
@@ -8,22 +8,40 @@
|
|
|
* represented as (python) list containing (python) dicts.
|
|
|
*/
|
|
|
botnet_comm_processor::botnet_comm_processor(const py::list &messages_pyboost){
|
|
|
+ set_messages(messages_pyboost);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * Creates a new and empty botnet_comm_processor object.
|
|
|
+ */
|
|
|
+botnet_comm_processor::botnet_comm_processor(){
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+void botnet_comm_processor::set_messages(const py::list &messages_pyboost){
|
|
|
+ messages.clear();
|
|
|
for (int i = 0; i < len(messages_pyboost); i++){
|
|
|
py::dict msg_pyboost = py::extract<py::dict>(messages_pyboost[i]);
|
|
|
unsigned int src_id = std::stoi(py::extract<std::string>(msg_pyboost["Src"]));
|
|
|
unsigned int dst_id = std::stoi(py::extract<std::string>(msg_pyboost["Dst"]));
|
|
|
unsigned short type = (unsigned short) std::stoi(py::extract<std::string>(msg_pyboost["Type"]));
|
|
|
double time = std::stod(py::extract<std::string>(msg_pyboost["Time"]));
|
|
|
- int line_no = std::stoi(py::extract<std::string>(msg_pyboost["LineNumber"]));
|
|
|
+
|
|
|
+ int line_no = -1;
|
|
|
+ // try {
|
|
|
+ line_no = std::stoi(py::extract<std::string>(msg_pyboost["LineNumber"]));
|
|
|
+ // }
|
|
|
abstract_msg msg = {src_id, dst_id, type, time, line_no};
|
|
|
- messages.push_back(msg);
|
|
|
+ messages.push_back(std::move(msg));
|
|
|
}
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Creates a new and empty botnet_comm_processor object.
|
|
|
+ * Retrieve input information about message count.
|
|
|
+ * @return the number of existing messages.
|
|
|
*/
|
|
|
-botnet_comm_processor::botnet_comm_processor(){
|
|
|
+int botnet_comm_processor::get_message_count(){
|
|
|
+ return messages.size();
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -167,6 +185,8 @@ std::string botnet_comm_processor::write_xml(const std::string &filename){
|
|
|
py::list botnet_comm_processor::get_messages(unsigned int start_idx, unsigned int end_idx){
|
|
|
py::list py_messages;
|
|
|
for (int i = start_idx; i <= end_idx; i++){
|
|
|
+ if (i >= messages.size())
|
|
|
+ break;
|
|
|
py::dict py_msg;
|
|
|
py_msg["Src"] = messages[i].src;
|
|
|
py_msg["Dst"] = messages[i].dst;
|
|
@@ -179,14 +199,14 @@ py::list botnet_comm_processor::get_messages(unsigned int start_idx, unsigned in
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Finds the time interval(s) of the given seconds with the most overall communication
|
|
|
+ * Finds the time interval(s) of maximum the given seconds with the most overall communication
|
|
|
* (i.e. requests and responses) that has at least number_ids communicating initiators in it.
|
|
|
* @param number_ids The number of initiator IDs that have to exist in the interval(s).
|
|
|
* @param max_int_time The maximum time period of the interval.
|
|
|
- * @return A (python) list of (python) tuple, where each tuple represents an interval with a set of the initiator IDs,
|
|
|
- * a start index and an end index in that order. The indices are with respect to the first abstract message.
|
|
|
+ * @return A (python) list of (python) dicts, where each dict (keys: 'IDs', Start', 'End') represents an interval with its
|
|
|
+ * list of initiator IDs, a start index and an end index. The indices are with respect to the first abstract message.
|
|
|
*/
|
|
|
-py::list botnet_comm_processor::find_interval(int number_ids, double max_int_time){
|
|
|
+py::list botnet_comm_processor::find_optimal_interval(int number_ids, double max_int_time){
|
|
|
unsigned int logical_thread_count = std::thread::hardware_concurrency();
|
|
|
std::vector<std::thread> threads;
|
|
|
std::vector<std::future<std::vector<comm_interval> > > futures;
|
|
@@ -197,7 +217,7 @@ py::list botnet_comm_processor::find_interval(int number_ids, double max_int_tim
|
|
|
unsigned int end_idx = (i + 1) * messages.size() / logical_thread_count;
|
|
|
std::promise<std::vector<comm_interval> > p; // use promises to retrieve return values
|
|
|
futures.push_back(p.get_future());
|
|
|
- threads.push_back(std::thread(&botnet_comm_processor::find_interval_helper, this, std::move(p), number_ids, max_int_time, start_idx, end_idx));
|
|
|
+ threads.push_back(std::thread(&botnet_comm_processor::find_optimal_interval_helper, this, std::move(p), number_ids, max_int_time, start_idx, end_idx));
|
|
|
}
|
|
|
|
|
|
// synchronize all threads
|
|
@@ -233,7 +253,7 @@ py::list botnet_comm_processor::find_interval(int number_ids, double max_int_tim
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Finds the time interval(s) of the given seconds within the given start and end index having the most
|
|
|
+ * Finds the time interval(s) of maximum the given seconds within the given start and end index having the most
|
|
|
* overall communication (i.e. requests and responses) as well as at least number_ids communicating initiators in it.
|
|
|
* @param p An rvalue to a promise to return the found intervals.
|
|
|
* @param number_ids The number of initiator IDs that have to exist in the interval(s).
|
|
@@ -241,7 +261,7 @@ py::list botnet_comm_processor::find_interval(int number_ids, double max_int_tim
|
|
|
* @param start_idx The index of the first message to process with respect to the class member 'messages'.
|
|
|
* @param end_idx The upper index boundary where the search is stopped at (i.e. exclusive index).
|
|
|
*/
|
|
|
-void botnet_comm_processor::find_interval_helper(std::promise<std::vector<comm_interval> > && p, int number_ids, double max_int_time, int start_idx, int end_idx){
|
|
|
+void botnet_comm_processor::find_optimal_interval_helper(std::promise<std::vector<comm_interval> > && p, int number_ids, double max_int_time, int start_idx, int end_idx){
|
|
|
// setup initial variables
|
|
|
unsigned int idx_low = start_idx, idx_high = start_idx; // the indices spanning the interval
|
|
|
unsigned int comm_sum = 0; // the communication sum of the current interval
|
|
@@ -256,7 +276,8 @@ void botnet_comm_processor::find_interval_helper(std::promise<std::vector<comm_i
|
|
|
if (idx_high < messages.size())
|
|
|
cur_int_time = messages[idx_high].time - messages[idx_low].time;
|
|
|
|
|
|
- // if current interval time exceeds maximum time period, process information of the current interval
|
|
|
+ // if current interval time exceeds maximum time period or all messages have been processed,
|
|
|
+ // process information of the current interval
|
|
|
if (greater_than(cur_int_time, max_int_time) || idx_high >= messages.size()){
|
|
|
std::set<unsigned int> interval_ids;
|
|
|
|
|
@@ -318,6 +339,105 @@ void botnet_comm_processor::find_interval_helper(std::promise<std::vector<comm_i
|
|
|
end: p.set_value(possible_intervals);
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * Finds the time interval of maximum the given seconds starting at the given index. If it does not have at least number_ids
|
|
|
+ * communicating initiators in it or the index is out of bounds, an empty dict is returned.
|
|
|
+ * @param number_ids The number of initiator IDs that have to exist in the interval.
|
|
|
+ * @param max_int_time The maximum time period of the interval.
|
|
|
+ * @return A (python) dict (keys: 'IDs', Start', 'End'), which represents an interval with its list of initiator IDs,
|
|
|
+ * a start index and an end index. The indices are with respect to the first abstract message.
|
|
|
+ */
|
|
|
+py::dict botnet_comm_processor::find_interval_from_startidx(int start_idx, int number_ids, double max_int_time){
|
|
|
+ // setup initial variables
|
|
|
+ unsigned int cur_idx = start_idx; // the current iteration index
|
|
|
+ double cur_int_time = 0; // the time of the current interval
|
|
|
+ std::deque<unsigned int> init_ids; // the initiator IDs seen in the current interval in order of appearance
|
|
|
+ py::dict comm_interval_py; // the communication interval that is returned
|
|
|
+
|
|
|
+ if (start_idx >= messages.size()){
|
|
|
+ return comm_interval_py;
|
|
|
+ }
|
|
|
+
|
|
|
+ // Iterate over all messages starting at start_idx until the duration or the current index exceeds a boundary
|
|
|
+ while (1){
|
|
|
+ if (cur_idx < messages.size())
|
|
|
+ cur_int_time = messages[cur_idx].time - messages[start_idx].time;
|
|
|
+
|
|
|
+ // if current interval time exceeds maximum time period or all messages have been processed,
|
|
|
+ // process information of the current interval
|
|
|
+ if (greater_than(cur_int_time, max_int_time) || cur_idx >= messages.size()){
|
|
|
+ std::set<unsigned int> interval_ids;
|
|
|
+
|
|
|
+ for (int i = 0; i < init_ids.size(); i++)
|
|
|
+ interval_ids.insert(init_ids[i]);
|
|
|
+
|
|
|
+ // if the interval contains enough initiator IDs, convert it to python representation and return it
|
|
|
+ if (interval_ids.size() >= number_ids){
|
|
|
+ py::list py_ids;
|
|
|
+ for (const auto &id : interval_ids){
|
|
|
+ py_ids.append(id);
|
|
|
+ }
|
|
|
+ comm_interval_py["IDs"] = py_ids;
|
|
|
+ comm_interval_py["Start"] = start_idx;
|
|
|
+ comm_interval_py["End"] = cur_idx - 1;
|
|
|
+ return comm_interval_py;
|
|
|
+ }
|
|
|
+ else {
|
|
|
+ return comm_interval_py;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // consume the new message at cur_idx and process its information
|
|
|
+ abstract_msg &cur_msg = messages[cur_idx];
|
|
|
+ // if message is request, add src to initiator list
|
|
|
+ if (msgtype_is_request(cur_msg.type))
|
|
|
+ init_ids.push_back(cur_msg.src);
|
|
|
+ // if message is response, add dst to initiator list
|
|
|
+ else if (msgtype_is_response(cur_msg.type))
|
|
|
+ init_ids.push_back(cur_msg.dst);
|
|
|
+
|
|
|
+ cur_idx += 1;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * Finds all initiator IDs contained in the interval spanned by the two indices.
|
|
|
+ * @param start_idx The start index of the interval.
|
|
|
+ * @param end_idx The last index of the interval (inclusive).
|
|
|
+ * @return A (python) list containing all initiator IDs of the interval.
|
|
|
+ */
|
|
|
+py::list botnet_comm_processor::get_interval_init_ids(int start_idx, int end_idx){
|
|
|
+ // setup initial variables
|
|
|
+ unsigned int cur_idx = start_idx; // the current iteration index
|
|
|
+ std::set<unsigned int> interval_ids;
|
|
|
+ py::list py_ids; // the communication interval that is returned
|
|
|
+
|
|
|
+ if (start_idx >= messages.size()){
|
|
|
+ return py_ids;
|
|
|
+ }
|
|
|
+
|
|
|
+ // Iterate over all messages starting at start_idx until the duration or the current index exceeds a boundary
|
|
|
+ while (1){
|
|
|
+ // if messages have been processed
|
|
|
+ if (cur_idx >= messages.size() || cur_idx > end_idx){
|
|
|
+ for (const auto &id : interval_ids)
|
|
|
+ py_ids.append(id);
|
|
|
+ return py_ids;
|
|
|
+ }
|
|
|
+
|
|
|
+ // consume the new message at cur_idx and process its information
|
|
|
+ abstract_msg &cur_msg = messages[cur_idx];
|
|
|
+ // if message is request, add src to initiator list
|
|
|
+ if (msgtype_is_request(cur_msg.type))
|
|
|
+ interval_ids.insert(cur_msg.src);
|
|
|
+ // if message is response, add dst to initiator list
|
|
|
+ else if (msgtype_is_response(cur_msg.type))
|
|
|
+ interval_ids.insert(cur_msg.dst);
|
|
|
+
|
|
|
+ cur_idx += 1;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* Checks whether the given message type corresponds to a request.
|
|
|
* @param mtype The message type to check.
|
|
@@ -340,7 +460,7 @@ int botnet_comm_processor::msgtype_is_response(unsigned short mtype){
|
|
|
* Converts the given vector of communication intervals to a python representation
|
|
|
* using (python) lists and (python) tuples.
|
|
|
* @param intervals The communication intervals to convert.
|
|
|
- * @return A boost::python::list containing the same information using boost::python::dict for each interval.
|
|
|
+ * @return A boost::python::list containing the same interval information using boost::python::dict for each interval.
|
|
|
*/
|
|
|
py::list botnet_comm_processor::convert_intervals_to_py_repr(const std::vector<comm_interval> &intervals){
|
|
|
py::list py_intervals;
|
|
@@ -374,9 +494,13 @@ BOOST_PYTHON_MODULE (libbotnet) {
|
|
|
class_<botnet_comm_processor>("botnet_comm_processor")
|
|
|
.def(init<list>())
|
|
|
.def(init<>())
|
|
|
- .def("find_interval", &botnet_comm_processor::find_interval)
|
|
|
+ .def("find_interval_from_startidx", &botnet_comm_processor::find_interval_from_startidx)
|
|
|
+ .def("find_optimal_interval", &botnet_comm_processor::find_optimal_interval)
|
|
|
+ .def("get_interval_init_ids", &botnet_comm_processor::get_interval_init_ids)
|
|
|
+ .def("get_messages", &botnet_comm_processor::get_messages)
|
|
|
+ .def("get_message_count", &botnet_comm_processor::get_message_count)
|
|
|
.def("parse_csv", &botnet_comm_processor::parse_csv)
|
|
|
.def("parse_xml", &botnet_comm_processor::parse_xml)
|
|
|
- .def("get_messages", &botnet_comm_processor::get_messages)
|
|
|
+ .def("set_messages", &botnet_comm_processor::set_messages)
|
|
|
.def("write_xml", &botnet_comm_processor::write_xml);
|
|
|
-}
|
|
|
+}
|