|
@@ -1,6 +1,5 @@
|
|
|
#include "botnet_comm_processor.h"
|
|
|
|
|
|
-// Use references instead of values to save time?
|
|
|
|
|
|
/**
|
|
|
* Creates a new botnet_comm_processor object.
|
|
@@ -8,18 +7,177 @@
|
|
|
* @param messages_pyboost The abstract communication messages
|
|
|
* represented as (python) list containing (python) dicts.
|
|
|
*/
|
|
|
-botnet_comm_processor::botnet_comm_processor(py::list messages_pyboost){
|
|
|
+botnet_comm_processor::botnet_comm_processor(const py::list &messages_pyboost){
|
|
|
for (int i = 0; i < len(messages_pyboost); i++){
|
|
|
py::dict msg_pyboost = py::extract<py::dict>(messages_pyboost[i]);
|
|
|
unsigned int src_id = std::stoi(py::extract<std::string>(msg_pyboost["Src"]));
|
|
|
unsigned int dst_id = std::stoi(py::extract<std::string>(msg_pyboost["Dst"]));
|
|
|
unsigned short type = (unsigned short) std::stoi(py::extract<std::string>(msg_pyboost["Type"]));
|
|
|
double time = std::stod(py::extract<std::string>(msg_pyboost["Time"]));
|
|
|
- abstract_msg msg = {src_id, dst_id, type, time};
|
|
|
+ int line_no = std::stoi(py::extract<std::string>(msg_pyboost["LineNumber"]));
|
|
|
+ abstract_msg msg = {src_id, dst_id, type, time, line_no};
|
|
|
messages.push_back(msg);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * Creates a new and empty botnet_comm_processor object.
|
|
|
+ */
|
|
|
+botnet_comm_processor::botnet_comm_processor(){
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * Processes an XML attribute assignment. The result is reflected in the respective change of the given message.
|
|
|
+ * @param msg The message this attribute refers to.
|
|
|
+ * @param assignment The XML attribute assignment in notation: attribute="value"
|
|
|
+ */
|
|
|
+void botnet_comm_processor::process_xml_attrib_assign(abstract_msg &msg, const std::string &assignment) {
|
|
|
+ int split_pos = assignment.find("=");
|
|
|
+ if (split_pos != std::string::npos){
|
|
|
+ std::string key = assignment.substr(0, split_pos);
|
|
|
+ std::string value = assignment.substr(split_pos + 2, assignment.length() - 1);
|
|
|
+ process_kv(msg, key, value);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * Processes a key-value pair. The result is reflected in the respective change of the given message.
|
|
|
+ * @param msg The message this kv pair refers to.
|
|
|
+ * @param key The key of the attribute.
|
|
|
+ * @param value The value of the attribute.
|
|
|
+ */
|
|
|
+void botnet_comm_processor::process_kv(abstract_msg &msg, const std::string &key, const std::string &value){
|
|
|
+ if (key == "Src")
|
|
|
+ msg.src = std::stoi(value);
|
|
|
+ else if (key == "Dst")
|
|
|
+ msg.dst = std::stoi(value);
|
|
|
+ else if (key == "Type")
|
|
|
+ msg.type = (unsigned short) std::stoi(value);
|
|
|
+ else if (key == "Time")
|
|
|
+ msg.time = std::stod(value);
|
|
|
+ else if (key == "LineNumber")
|
|
|
+ msg.line_no = std::stoi(value);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * Parses the packets contained in the given CSV to program structure.
|
|
|
+ * @param filepath The filepath where the CSV is located.
|
|
|
+ * @return The number of messages (or lines) contained in the CSV file.
|
|
|
+ */
|
|
|
+unsigned int botnet_comm_processor::parse_csv(const std::string &filepath){
|
|
|
+ std::ifstream input(filepath);
|
|
|
+ int line_no = 0;
|
|
|
+
|
|
|
+ messages.clear();
|
|
|
+ // iterate over every line
|
|
|
+ for (std::string line; std::getline(input, line); ){
|
|
|
+ std::istringstream line_stream(line);
|
|
|
+ abstract_msg cur_msg;
|
|
|
+ cur_msg.line_no = line_no;
|
|
|
+ // iterate over every key:value entry
|
|
|
+ for (std::string pair; std::getline(line_stream, pair, ','); ){
|
|
|
+ boost::replace_all(pair, " ", "");
|
|
|
+ int split_pos = pair.find(":");
|
|
|
+ if (split_pos != std::string::npos){
|
|
|
+ std::string key = pair.substr(0, split_pos);
|
|
|
+ std::string value = pair.substr(split_pos + 1, pair.length());
|
|
|
+ process_kv(cur_msg, key, value);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ messages.push_back(std::move(cur_msg));
|
|
|
+ line_no++;
|
|
|
+ }
|
|
|
+ return messages.size();
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * Parses the packets contained in the given XML to program structure.
|
|
|
+ * @param filepath The filepath where the XML is located.
|
|
|
+ * @return The number of messages contained in the XML file.
|
|
|
+ */
|
|
|
+unsigned int botnet_comm_processor::parse_xml(const std::string &filepath){
|
|
|
+ std::ifstream input(filepath);
|
|
|
+ std::string cur_word = "";
|
|
|
+ abstract_msg cur_msg;
|
|
|
+ char c;
|
|
|
+ int read_packet_open = 0, read_slash = 0;
|
|
|
+
|
|
|
+ messages.clear();
|
|
|
+ // iterate over every character
|
|
|
+ while (input.get(c)){
|
|
|
+ if(c == '/') // hints ending of tag
|
|
|
+ read_slash = 1;
|
|
|
+ else if (c == '>'){ // definitely closes tag
|
|
|
+ if (read_packet_open && read_slash){ // handle oustanding attribute
|
|
|
+ read_slash = 0;
|
|
|
+ process_xml_attrib_assign(cur_msg, cur_word);
|
|
|
+ messages.push_back(cur_msg);
|
|
|
+ read_packet_open = 0;
|
|
|
+ }
|
|
|
+ cur_word = "";
|
|
|
+ }
|
|
|
+ else if (c == ' '){
|
|
|
+ if (read_packet_open && cur_word != ""){ // handle new attribute
|
|
|
+ process_xml_attrib_assign(cur_msg, cur_word);
|
|
|
+ }
|
|
|
+ else if (cur_word == "<packet")
|
|
|
+ read_packet_open = 1;
|
|
|
+
|
|
|
+ cur_word = "";
|
|
|
+ }
|
|
|
+ else
|
|
|
+ cur_word += c;
|
|
|
+ }
|
|
|
+ return messages.size();
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * Writes the communication messages contained in the class member messages into an XML file (with respective notation).
|
|
|
+ * @param filename The name the file should have (without extension).
|
|
|
+ * @return The filepath of the written XML file.
|
|
|
+ */
|
|
|
+std::string botnet_comm_processor::write_xml(const std::string &filename){
|
|
|
+ std::string filepath = filename + ".xml";
|
|
|
+
|
|
|
+ std::ofstream xml_file;
|
|
|
+ xml_file.open(filepath);
|
|
|
+
|
|
|
+ // set number of digits after dot to 11
|
|
|
+ xml_file << std::fixed << std::setprecision(11);
|
|
|
+
|
|
|
+ xml_file << "<trace name=\"" << filename << "\">";
|
|
|
+ for (const auto &msg : messages){
|
|
|
+ xml_file << "<packet ";
|
|
|
+ xml_file << "Src=\"" << msg.src << "\" Dst=\"" << msg.dst << "\" ";
|
|
|
+ xml_file << "Type=\"" << msg.type << "\" Time=\"" << msg.time << "\" ";
|
|
|
+ xml_file << "LineNumber=\"" << msg.line_no << "\" />";
|
|
|
+ }
|
|
|
+ xml_file << "</trace>";
|
|
|
+
|
|
|
+ xml_file.close();
|
|
|
+ return filepath;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * Retrieves all messages contained in the interval between start_idx and end_idx in Python representation.
|
|
|
+ * @param start_idx The inclusive first index of the interval.
|
|
|
+ * @param end_idx The inclusive last index of the interval.
|
|
|
+ * @return A (Python) list of (Python) dicts containing the desired information.
|
|
|
+ */
|
|
|
+py::list botnet_comm_processor::get_messages(unsigned int start_idx, unsigned int end_idx){
|
|
|
+ py::list py_messages;
|
|
|
+ for (int i = start_idx; i <= end_idx; i++){
|
|
|
+ py::dict py_msg;
|
|
|
+ py_msg["Src"] = messages[i].src;
|
|
|
+ py_msg["Dst"] = messages[i].dst;
|
|
|
+ py_msg["Type"] = messages[i].type;
|
|
|
+ py_msg["Time"] = messages[i].time;
|
|
|
+ py_msg["LineNumber"] = messages[i].line_no;
|
|
|
+ py_messages.append(py_msg);
|
|
|
+ }
|
|
|
+ return py_messages;
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* Finds the time interval(s) of the given seconds with the most overall communication
|
|
|
* (i.e. requests and responses) that has at least number_ids communicating initiators in it.
|
|
@@ -71,7 +229,7 @@ py::list botnet_comm_processor::find_interval(int number_ids, double max_int_tim
|
|
|
}
|
|
|
|
|
|
// return the result converted into python data structures
|
|
|
- return convert_to_py_repr(possible_intervals);
|
|
|
+ return convert_intervals_to_py_repr(possible_intervals);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -81,7 +239,7 @@ py::list botnet_comm_processor::find_interval(int number_ids, double max_int_tim
|
|
|
* @param number_ids The number of initiator IDs that have to exist in the interval(s).
|
|
|
* @param max_int_time The maximum time period of the interval.
|
|
|
* @param start_idx The index of the first message to process with respect to the class member 'messages'.
|
|
|
- * @param end_idx The upper index boundary where the search is stopped at (i.e. idx_low does not cross this boundary).
|
|
|
+ * @param end_idx The upper index boundary where the search is stopped at (i.e. exclusive index).
|
|
|
*/
|
|
|
void botnet_comm_processor::find_interval_helper(std::promise<std::vector<comm_interval> > && p, int number_ids, double max_int_time, int start_idx, int end_idx){
|
|
|
// setup initial variables
|
|
@@ -178,41 +336,30 @@ int botnet_comm_processor::msgtype_is_response(unsigned short mtype){
|
|
|
return mtype == SALITY_HELLO_REPLY || mtype == SALITY_NL_REPLY;
|
|
|
}
|
|
|
|
|
|
-// py::list botnet_comm_processor::std_vector_to_py_list(const std::vector<comm_interval> &intervals){
|
|
|
-// py::object get_iter = py::iterator<std::vector<comm_interval> >();
|
|
|
-// py::object iter = get_iter(intervals);
|
|
|
-// py::list l(iter);
|
|
|
-// return l;
|
|
|
-// }
|
|
|
-
|
|
|
-// py::list botnet_comm_processor::st_unorderedmap_to_py_dict(const std::vector<comm_interval> &intervals){
|
|
|
-// py::object get_iter = py::iterator<std::vector<comm_interval> >();
|
|
|
-// py::object iter = get_iter(intervals);
|
|
|
-// py::list l(iter);
|
|
|
-// return l;
|
|
|
-// }
|
|
|
-
|
|
|
/**
|
|
|
* Converts the given vector of communication intervals to a python representation
|
|
|
* using (python) lists and (python) tuples.
|
|
|
* @param intervals The communication intervals to convert.
|
|
|
- * @return A boost::python::list containing the same information using boost::python::tuples for each interval.
|
|
|
+ * @return A boost::python::list containing the same information using boost::python::dict for each interval.
|
|
|
*/
|
|
|
-py::list botnet_comm_processor::convert_to_py_repr(const std::vector<comm_interval> &intervals){
|
|
|
+py::list botnet_comm_processor::convert_intervals_to_py_repr(const std::vector<comm_interval> &intervals){
|
|
|
py::list py_intervals;
|
|
|
for (const auto &interval : intervals){
|
|
|
py::list py_ids;
|
|
|
for (const auto &id : interval.ids){
|
|
|
py_ids.append(id);
|
|
|
}
|
|
|
- py::tuple py_interval = py::make_tuple(py_ids, interval.start_idx, interval.end_idx);
|
|
|
+ py::dict py_interval;
|
|
|
+ py_interval["IDs"] = py_ids;
|
|
|
+ py_interval["Start"] = interval.start_idx;
|
|
|
+ py_interval["End"] = interval.end_idx;
|
|
|
py_intervals.append(py_interval);
|
|
|
}
|
|
|
return py_intervals;
|
|
|
}
|
|
|
|
|
|
-// void botnet_comm_processor::print_message(abstract_msg message){
|
|
|
-// std::cout << "Src: " << message.src << " Dst: " << message.dst << " Type: " << message.type << " Time: " << message.time << std::endl;
|
|
|
+// void botnet_comm_processor::print_message(const abstract_msg &message){
|
|
|
+// std::cout << "Src: " << message.src << " Dst: " << message.dst << " Type: " << message.type << " Time: " << message.time << " LineNumber: " << message.line_no << std::endl;
|
|
|
// }
|
|
|
|
|
|
|
|
@@ -224,6 +371,12 @@ py::list botnet_comm_processor::convert_to_py_repr(const std::vector<comm_interv
|
|
|
using namespace boost::python;
|
|
|
|
|
|
BOOST_PYTHON_MODULE (libbotnet) {
|
|
|
- class_<botnet_comm_processor>("botnet_comm_processor", init<list>())
|
|
|
- .def("find_interval", &botnet_comm_processor::find_interval);
|
|
|
+ class_<botnet_comm_processor>("botnet_comm_processor")
|
|
|
+ .def(init<list>())
|
|
|
+ .def(init<>())
|
|
|
+ .def("find_interval", &botnet_comm_processor::find_interval)
|
|
|
+ .def("parse_csv", &botnet_comm_processor::parse_csv)
|
|
|
+ .def("parse_xml", &botnet_comm_processor::parse_xml)
|
|
|
+ .def("get_messages", &botnet_comm_processor::get_messages)
|
|
|
+ .def("write_xml", &botnet_comm_processor::write_xml);
|
|
|
}
|