statistics_db.cpp 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764
  1. #include "statistics_db.h"
  2. #include <math.h>
  3. #include <iostream>
  4. #include <sstream>
  5. #include <fstream>
  6. #include <unistd.h>
  7. #include <stdio.h>
  8. #include <pybind11/pybind11.h>
  9. namespace py = pybind11;
  10. /**
  11. * Creates a new statistics_db object. Opens an existing database located at database_path. If not existing, creates
  12. * a new database at database_path.
  13. * @param database_path The file path of the database.
  14. */
  15. statistics_db::statistics_db(std::string database_path) {
  16. // Append file extension if not present
  17. if (database_path.find(".sqlite3") == database_path.npos) {
  18. database_path += ".sqlite3";
  19. }
  20. // creates the DB if not existing, opens the DB for read+write access
  21. db.reset(new SQLite::Database(database_path, SQLite::OPEN_CREATE | SQLite::OPEN_READWRITE));
  22. // Read ports and services into portServices vector
  23. readPortServicesFromNmap();
  24. }
  25. /**
  26. * Writes the IP statistics into the database.
  27. * @param ipStatistics The IP statistics from class statistics.
  28. */
  29. void statistics_db::writeStatisticsIP(const std::unordered_map<std::string, entry_ipStat> &ipStatistics) {
  30. try {
  31. db->exec("DROP TABLE IF EXISTS ip_statistics");
  32. SQLite::Transaction transaction(*db);
  33. const char *createTable = "CREATE TABLE ip_statistics ( "
  34. "ipAddress TEXT, "
  35. "pktsReceived INTEGER, "
  36. "pktsSent INTEGER, "
  37. "kbytesReceived REAL, "
  38. "kbytesSent REAL, "
  39. "maxPktRate REAL,"
  40. "minPktRate REAL,"
  41. "ipClass TEXT COLLATE NOCASE, "
  42. "PRIMARY KEY(ipAddress));";
  43. db->exec(createTable);
  44. SQLite::Statement query(*db, "INSERT INTO ip_statistics VALUES (?, ?, ?, ?, ?, ?, ?, ?)");
  45. for (auto it = ipStatistics.begin(); it != ipStatistics.end(); ++it) {
  46. const entry_ipStat &e = it->second;
  47. query.bindNoCopy(1, it->first);
  48. query.bind(2, (int) e.pkts_received);
  49. query.bind(3, (int) e.pkts_sent);
  50. query.bind(4, e.kbytes_received);
  51. query.bind(5, e.kbytes_sent);
  52. query.bind(6, e.max_interval_pkt_rate);
  53. query.bind(7, e.min_interval_pkt_rate);
  54. query.bindNoCopy(8, e.ip_class);
  55. query.exec();
  56. query.reset();
  57. if (PyErr_CheckSignals()) throw py::error_already_set();
  58. }
  59. transaction.commit();
  60. }
  61. catch (std::exception &e) {
  62. std::cerr << "Exception in statistics_db::" << __func__ << ": " << e.what() << std::endl;
  63. }
  64. }
  65. /**
  66. * Writes the IP Degrees into the database.
  67. * @param ipStatistics The IP statistics from class statistics. Degree Statistics are supposed to be integrated into the ip_statistics table later on,
  68. * therefore they use the same parameter. But for now they are inserted into their own table.
  69. */
  70. void statistics_db::writeStatisticsDegree(const std::unordered_map<std::string, entry_ipStat> &ipStatistics){
  71. try {
  72. db->exec("DROP TABLE IF EXISTS ip_degrees");
  73. SQLite::Transaction transaction(*db);
  74. const char *createTable = "CREATE TABLE ip_degrees ( "
  75. "ipAddress TEXT, "
  76. "inDegree INTEGER, "
  77. "outDegree INTEGER, "
  78. "overallDegree INTEGER, "
  79. "PRIMARY KEY(ipAddress));";
  80. db->exec(createTable);
  81. SQLite::Statement query(*db, "INSERT INTO ip_degrees VALUES (?, ?, ?, ?)");
  82. for (auto it = ipStatistics.begin(); it != ipStatistics.end(); ++it) {
  83. const entry_ipStat &e = it->second;
  84. query.bindNoCopy(1, it->first);
  85. query.bind(2, e.in_degree);
  86. query.bind(3, e.out_degree);
  87. query.bind(4, e.overall_degree);
  88. query.exec();
  89. query.reset();
  90. if (PyErr_CheckSignals()) throw py::error_already_set();
  91. }
  92. transaction.commit();
  93. }
  94. catch (std::exception &e) {
  95. std::cerr << "Exception in statistics_db::" << __func__ << ": " << e.what() << std::endl;
  96. }
  97. }
  98. /**
  99. * Writes the TTL distribution into the database.
  100. * @param ttlDistribution The TTL distribution from class statistics.
  101. */
  102. void statistics_db::writeStatisticsTTL(const std::unordered_map<ipAddress_ttl, int> &ttlDistribution) {
  103. try {
  104. db->exec("DROP TABLE IF EXISTS ip_ttl");
  105. SQLite::Transaction transaction(*db);
  106. const char *createTable = "CREATE TABLE ip_ttl ("
  107. "ipAddress TEXT,"
  108. "ttlValue INTEGER,"
  109. "ttlCount INTEGER,"
  110. "PRIMARY KEY(ipAddress,ttlValue));"
  111. "CREATE INDEX ipAddressTTL ON ip_ttl(ipAddress);";
  112. db->exec(createTable);
  113. SQLite::Statement query(*db, "INSERT INTO ip_ttl VALUES (?, ?, ?)");
  114. for (auto it = ttlDistribution.begin(); it != ttlDistribution.end(); ++it) {
  115. const ipAddress_ttl &e = it->first;
  116. query.bindNoCopy(1, e.ipAddress);
  117. query.bind(2, e.ttlValue);
  118. query.bind(3, it->second);
  119. query.exec();
  120. query.reset();
  121. if (PyErr_CheckSignals()) throw py::error_already_set();
  122. }
  123. transaction.commit();
  124. }
  125. catch (std::exception &e) {
  126. std::cerr << "Exception in statistics_db::" << __func__ << ": " << e.what() << std::endl;
  127. }
  128. }
  129. /**
  130. * Writes the MSS distribution into the database.
  131. * @param mssDistribution The MSS distribution from class statistics.
  132. */
  133. void statistics_db::writeStatisticsMSS(const std::unordered_map<ipAddress_mss, int> &mssDistribution) {
  134. try {
  135. db->exec("DROP TABLE IF EXISTS tcp_mss");
  136. SQLite::Transaction transaction(*db);
  137. const char *createTable = "CREATE TABLE tcp_mss ("
  138. "ipAddress TEXT,"
  139. "mssValue INTEGER,"
  140. "mssCount INTEGER,"
  141. "PRIMARY KEY(ipAddress,mssValue));"
  142. "CREATE INDEX ipAddressMSS ON tcp_mss(ipAddress);";
  143. db->exec(createTable);
  144. SQLite::Statement query(*db, "INSERT INTO tcp_mss VALUES (?, ?, ?)");
  145. for (auto it = mssDistribution.begin(); it != mssDistribution.end(); ++it) {
  146. const ipAddress_mss &e = it->first;
  147. query.bindNoCopy(1, e.ipAddress);
  148. query.bind(2, e.mssValue);
  149. query.bind(3, it->second);
  150. query.exec();
  151. query.reset();
  152. if (PyErr_CheckSignals()) throw py::error_already_set();
  153. }
  154. transaction.commit();
  155. }
  156. catch (std::exception &e) {
  157. std::cerr << "Exception in statistics_db::" << __func__ << ": " << e.what() << std::endl;
  158. }
  159. }
  160. /**
  161. * Writes the ToS distribution into the database.
  162. * @param tosDistribution The ToS distribution from class statistics.
  163. */
  164. void statistics_db::writeStatisticsToS(const std::unordered_map<ipAddress_tos, int> &tosDistribution) {
  165. try {
  166. db->exec("DROP TABLE IF EXISTS ip_tos");
  167. SQLite::Transaction transaction(*db);
  168. const char *createTable = "CREATE TABLE ip_tos ("
  169. "ipAddress TEXT,"
  170. "tosValue INTEGER,"
  171. "tosCount INTEGER,"
  172. "PRIMARY KEY(ipAddress,tosValue));";
  173. db->exec(createTable);
  174. SQLite::Statement query(*db, "INSERT INTO ip_tos VALUES (?, ?, ?)");
  175. for (auto it = tosDistribution.begin(); it != tosDistribution.end(); ++it) {
  176. const ipAddress_tos &e = it->first;
  177. query.bindNoCopy(1, e.ipAddress);
  178. query.bind(2, e.tosValue);
  179. query.bind(3, it->second);
  180. query.exec();
  181. query.reset();
  182. if (PyErr_CheckSignals()) throw py::error_already_set();
  183. }
  184. transaction.commit();
  185. }
  186. catch (std::exception &e) {
  187. std::cerr << "Exception in statistics_db::" << __func__ << ": " << e.what() << std::endl;
  188. }
  189. }
  190. /**
  191. * Writes the window size distribution into the database.
  192. * @param winDistribution The window size distribution from class statistics.
  193. */
  194. void statistics_db::writeStatisticsWin(const std::unordered_map<ipAddress_win, int> &winDistribution) {
  195. try {
  196. db->exec("DROP TABLE IF EXISTS tcp_win");
  197. SQLite::Transaction transaction(*db);
  198. const char *createTable = "CREATE TABLE tcp_win ("
  199. "ipAddress TEXT,"
  200. "winSize INTEGER,"
  201. "winCount INTEGER,"
  202. "PRIMARY KEY(ipAddress,winSize));"
  203. "CREATE INDEX ipAddressWIN ON tcp_win(ipAddress);";
  204. db->exec(createTable);
  205. SQLite::Statement query(*db, "INSERT INTO tcp_win VALUES (?, ?, ?)");
  206. for (auto it = winDistribution.begin(); it != winDistribution.end(); ++it) {
  207. const ipAddress_win &e = it->first;
  208. query.bindNoCopy(1, e.ipAddress);
  209. query.bind(2, e.winSize);
  210. query.bind(3, it->second);
  211. query.exec();
  212. query.reset();
  213. if (PyErr_CheckSignals()) throw py::error_already_set();
  214. }
  215. transaction.commit();
  216. }
  217. catch (std::exception &e) {
  218. std::cerr << "Exception in statistics_db::" << __func__ << ": " << e.what() << std::endl;
  219. }
  220. }
  221. /**
  222. * Writes the protocol distribution into the database.
  223. * @param protocolDistribution The protocol distribution from class statistics.
  224. */
  225. void statistics_db::writeStatisticsProtocols(const std::unordered_map<ipAddress_protocol, entry_protocolStat> &protocolDistribution) {
  226. try {
  227. db->exec("DROP TABLE IF EXISTS ip_protocols");
  228. SQLite::Transaction transaction(*db);
  229. const char *createTable = "CREATE TABLE ip_protocols ("
  230. "ipAddress TEXT,"
  231. "protocolName TEXT COLLATE NOCASE,"
  232. "protocolCount INTEGER,"
  233. "byteCount REAL,"
  234. "PRIMARY KEY(ipAddress,protocolName));";
  235. db->exec(createTable);
  236. SQLite::Statement query(*db, "INSERT INTO ip_protocols VALUES (?, ?, ?, ?)");
  237. for (auto it = protocolDistribution.begin(); it != protocolDistribution.end(); ++it) {
  238. const ipAddress_protocol &e = it->first;
  239. query.bindNoCopy(1, e.ipAddress);
  240. query.bindNoCopy(2, e.protocol);
  241. query.bind(3, it->second.count);
  242. query.bind(4, it->second.byteCount);
  243. query.exec();
  244. query.reset();
  245. if (PyErr_CheckSignals()) throw py::error_already_set();
  246. }
  247. transaction.commit();
  248. }
  249. catch (std::exception &e) {
  250. std::cerr << "Exception in statistics_db::" << __func__ << ": " << e.what() << std::endl;
  251. }
  252. }
  253. /**
  254. * Writes the port statistics into the database.
  255. * @param portsStatistics The ports statistics from class statistics.
  256. */
  257. void statistics_db::writeStatisticsPorts(const std::unordered_map<ipAddress_inOut_port, entry_portStat> &portsStatistics) {
  258. try {
  259. db->exec("DROP TABLE IF EXISTS ip_ports");
  260. SQLite::Transaction transaction(*db);
  261. const char *createTable = "CREATE TABLE ip_ports ("
  262. "ipAddress TEXT,"
  263. "portDirection TEXT COLLATE NOCASE,"
  264. "portNumber INTEGER,"
  265. "portCount INTEGER,"
  266. "byteCount REAL,"
  267. "portProtocol TEXT COLLATE NOCASE,"
  268. "portService TEXT COLLATE NOCASE,"
  269. "PRIMARY KEY(ipAddress,portDirection,portNumber,portProtocol));";
  270. db->exec(createTable);
  271. SQLite::Statement query(*db, "INSERT INTO ip_ports VALUES (?, ?, ?, ?, ?, ?, ?)");
  272. for (auto it = portsStatistics.begin(); it != portsStatistics.end(); ++it) {
  273. const ipAddress_inOut_port &e = it->first;
  274. std::string portService = portServices[e.portNumber];
  275. if(portService.empty()) {
  276. if(portServices[{0}] == "unavailable") {portService = "unavailable";}
  277. else {portService = "unknown";}
  278. }
  279. query.bindNoCopy(1, e.ipAddress);
  280. query.bindNoCopy(2, e.trafficDirection);
  281. query.bind(3, e.portNumber);
  282. query.bind(4, it->second.count);
  283. query.bind(5, it->second.byteCount);
  284. query.bindNoCopy(6, e.protocol);
  285. query.bindNoCopy(7, portService);
  286. query.exec();
  287. query.reset();
  288. if (PyErr_CheckSignals()) throw py::error_already_set();
  289. }
  290. transaction.commit();
  291. }
  292. catch (std::exception &e) {
  293. std::cerr << "Exception in statistics_db::" << __func__ << ": " << e.what() << std::endl;
  294. }
  295. }
  296. /**
  297. * Writes the IP address -> MAC address mapping into the database.
  298. * @param IpMacStatistics The IP address -> MAC address mapping from class statistics.
  299. */
  300. void statistics_db::writeStatisticsIpMac(const std::unordered_map<std::string, std::string> &IpMacStatistics) {
  301. try {
  302. db->exec("DROP TABLE IF EXISTS ip_mac");
  303. SQLite::Transaction transaction(*db);
  304. const char *createTable = "CREATE TABLE ip_mac ("
  305. "ipAddress TEXT,"
  306. "macAddress TEXT COLLATE NOCASE,"
  307. "PRIMARY KEY(ipAddress));";
  308. db->exec(createTable);
  309. SQLite::Statement query(*db, "INSERT INTO ip_mac VALUES (?, ?)");
  310. for (auto it = IpMacStatistics.begin(); it != IpMacStatistics.end(); ++it) {
  311. query.bindNoCopy(1, it->first);
  312. query.bindNoCopy(2, it->second);
  313. query.exec();
  314. query.reset();
  315. if (PyErr_CheckSignals()) throw py::error_already_set();
  316. }
  317. transaction.commit();
  318. }
  319. catch (std::exception &e) {
  320. std::cerr << "Exception in statistics_db::" << __func__ << ": " << e.what() << std::endl;
  321. }
  322. }
  323. /**
  324. * Writes general file statistics into the database.
  325. * @param packetCount The number of packets in the PCAP file.
  326. * @param captureDuration The duration of the capture (format: SS.mmmmmm).
  327. * @param timestampFirstPkt The timestamp of the first packet in the PCAP file.
  328. * @param timestampLastPkt The timestamp of the last packet in the PCAP file.
  329. * @param avgPacketRate The average packet rate (#packets / capture duration).
  330. * @param avgPacketSize The average packet size.
  331. * @param avgPacketsSentPerHost The average packets sent per host.
  332. * @param avgBandwidthIn The average incoming bandwidth.
  333. * @param avgBandwidthOut The average outgoing bandwidth.
  334. */
  335. void statistics_db::writeStatisticsFile(int packetCount, float captureDuration, std::string timestampFirstPkt,
  336. std::string timestampLastPkt, float avgPacketRate, float avgPacketSize,
  337. float avgPacketsSentPerHost, float avgBandwidthIn, float avgBandwidthOut,
  338. bool doExtraTests) {
  339. try {
  340. db->exec("DROP TABLE IF EXISTS file_statistics");
  341. SQLite::Transaction transaction(*db);
  342. const char *createTable = "CREATE TABLE file_statistics ("
  343. "packetCount INTEGER,"
  344. "captureDuration TEXT,"
  345. "timestampFirstPacket TEXT,"
  346. "timestampLastPacket TEXT,"
  347. "avgPacketRate REAL,"
  348. "avgPacketSize REAL,"
  349. "avgPacketsSentPerHost REAL,"
  350. "avgBandwidthIn REAL,"
  351. "avgBandwidthOut REAL,"
  352. "doExtraTests INTEGER);";
  353. db->exec(createTable);
  354. SQLite::Statement query(*db, "INSERT INTO file_statistics VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
  355. query.bind(1, packetCount);
  356. query.bind(2, captureDuration);
  357. query.bind(3, timestampFirstPkt);
  358. query.bind(4, timestampLastPkt);
  359. query.bind(5, avgPacketRate);
  360. query.bind(6, avgPacketSize);
  361. query.bind(7, avgPacketsSentPerHost);
  362. query.bind(8, avgBandwidthIn);
  363. query.bind(9, avgBandwidthOut);
  364. query.bind(10, doExtraTests);
  365. query.exec();
  366. transaction.commit();
  367. }
  368. catch (std::exception &e) {
  369. std::cerr << "Exception in statistics_db::" << __func__ << ": " << e.what() << std::endl;
  370. }
  371. }
  372. /**
  373. * Writes the conversation statistics into the database.
  374. * @param convStatistics The conversation from class statistics.
  375. */
  376. void statistics_db::writeStatisticsConv(std::unordered_map<conv, entry_convStat> &convStatistics){
  377. try {
  378. db->exec("DROP TABLE IF EXISTS conv_statistics");
  379. SQLite::Transaction transaction(*db);
  380. const char *createTable = "CREATE TABLE conv_statistics ("
  381. "ipAddressA TEXT,"
  382. "portA INTEGER,"
  383. "ipAddressB TEXT,"
  384. "portB INTEGER,"
  385. "pktsCount INTEGER,"
  386. "avgPktRate REAL,"
  387. "avgDelay INTEGER,"
  388. "minDelay INTEGER,"
  389. "maxDelay INTEGER,"
  390. "PRIMARY KEY(ipAddressA,portA,ipAddressB,portB));";
  391. db->exec(createTable);
  392. SQLite::Statement query(*db, "INSERT INTO conv_statistics VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)");
  393. // Calculate average of inter-arrival times and average packet rate
  394. for (auto it = convStatistics.begin(); it != convStatistics.end(); ++it) {
  395. const conv &f = it->first;
  396. entry_convStat &e = it->second;
  397. if (e.pkts_count > 1){
  398. int sumDelay = 0;
  399. int minDelay = -1;
  400. int maxDelay = -1;
  401. for (int i = 0; (unsigned) i < e.interarrival_time.size(); i++) {
  402. sumDelay += e.interarrival_time[i].count();
  403. if (maxDelay < e.interarrival_time[i].count())
  404. maxDelay = e.interarrival_time[i].count();
  405. if (minDelay > e.interarrival_time[i].count() || minDelay == -1)
  406. minDelay = e.interarrival_time[i].count();
  407. }
  408. if (e.interarrival_time.size() > 0)
  409. e.avg_interarrival_time = (std::chrono::microseconds) sumDelay / e.interarrival_time.size(); // average
  410. else e.avg_interarrival_time = (std::chrono::microseconds) 0;
  411. std::chrono::microseconds start_timesttamp = e.pkts_timestamp[0];
  412. std::chrono::microseconds end_timesttamp = e.pkts_timestamp.back();
  413. std::chrono::microseconds conn_duration = end_timesttamp - start_timesttamp;
  414. e.avg_pkt_rate = (float) e.pkts_count * 1000000 / conn_duration.count(); // pkt per sec
  415. query.bindNoCopy(1, f.ipAddressA);
  416. query.bind(2, f.portA);
  417. query.bindNoCopy(3, f.ipAddressB);
  418. query.bind(4, f.portB);
  419. query.bind(5, (int) e.pkts_count);
  420. query.bind(6, (float) e.avg_pkt_rate);
  421. query.bind(7, (int) e.avg_interarrival_time.count());
  422. query.bind(8, minDelay);
  423. query.bind(9, maxDelay);
  424. query.exec();
  425. query.reset();
  426. if (PyErr_CheckSignals()) throw py::error_already_set();
  427. }
  428. }
  429. transaction.commit();
  430. }
  431. catch (std::exception &e) {
  432. std::cerr << "Exception in statistics_db::" << __func__ << ": " << e.what() << std::endl;
  433. }
  434. }
  435. /**
  436. * Writes the extended statistics for every conversation into the database.
  437. * @param conv_statistics_extended The extended conversation statistics from class statistics.
  438. */
  439. void statistics_db::writeStatisticsConvExt(std::unordered_map<convWithProt, entry_convStatExt> &conv_statistics_extended){
  440. try {
  441. db->exec("DROP TABLE IF EXISTS conv_statistics_extended");
  442. SQLite::Transaction transaction(*db);
  443. const char *createTable = "CREATE TABLE conv_statistics_extended ("
  444. "ipAddressA TEXT,"
  445. "portA INTEGER,"
  446. "ipAddressB TEXT,"
  447. "portB INTEGER,"
  448. "protocol TEXT COLLATE NOCASE,"
  449. "pktsCount INTEGER,"
  450. "avgPktRate REAL,"
  451. "avgDelay INTEGER,"
  452. "minDelay INTEGER,"
  453. "maxDelay INTEGER,"
  454. "avgIntervalPktCount REAL,"
  455. "avgTimeBetweenIntervals REAL,"
  456. "avgIntervalTime REAL,"
  457. "totalConversationDuration REAL,"
  458. "PRIMARY KEY(ipAddressA,portA,ipAddressB,portB,protocol));";
  459. db->exec(createTable);
  460. SQLite::Statement query(*db, "INSERT INTO conv_statistics_extended VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
  461. // iterate over every conversation and interval aggregation pair and store the respective values in the database
  462. for (auto it = conv_statistics_extended.begin(); it != conv_statistics_extended.end(); ++it) {
  463. const convWithProt &f = it->first;
  464. entry_convStatExt &e = it->second;
  465. int sumDelay = 0;
  466. int minDelay = -1;
  467. int maxDelay = -1;
  468. if (e.pkts_count > 1 && f.protocol == "TCP"){
  469. for (int i = 0; (unsigned) i < e.interarrival_time.size(); i++) {
  470. sumDelay += e.interarrival_time[i].count();
  471. if (maxDelay < e.interarrival_time[i].count())
  472. maxDelay = e.interarrival_time[i].count();
  473. if (minDelay > e.interarrival_time[i].count() || minDelay == -1)
  474. minDelay = e.interarrival_time[i].count();
  475. }
  476. if (e.interarrival_time.size() > 0)
  477. e.avg_interarrival_time = (std::chrono::microseconds) sumDelay / e.interarrival_time.size(); // average
  478. else
  479. e.avg_interarrival_time = (std::chrono::microseconds) 0;
  480. }
  481. if (e.total_comm_duration == 0)
  482. e.avg_pkt_rate = e.pkts_count; // pkt per sec
  483. else
  484. e.avg_pkt_rate = e.pkts_count / e.total_comm_duration;
  485. if (e.avg_int_pkts_count > 0){
  486. query.bindNoCopy(1, f.ipAddressA);
  487. query.bind(2, f.portA);
  488. query.bindNoCopy(3, f.ipAddressB);
  489. query.bind(4, f.portB);
  490. query.bindNoCopy(5, f.protocol);
  491. query.bind(6, (int) e.pkts_count);
  492. query.bind(7, (float) e.avg_pkt_rate);
  493. if (f.protocol == "UDP" || (f.protocol == "TCP" && e.pkts_count < 2))
  494. query.bind(8);
  495. else
  496. query.bind(8, (int) e.avg_interarrival_time.count());
  497. if (minDelay == -1)
  498. query.bind(9);
  499. else
  500. query.bind(9, minDelay);
  501. if (maxDelay == -1)
  502. query.bind(10);
  503. else
  504. query.bind(10, maxDelay);
  505. query.bind(11, e.avg_int_pkts_count);
  506. query.bind(12, e.avg_time_between_ints);
  507. query.bind(13, e.avg_interval_time);
  508. query.bind(14, e.total_comm_duration);
  509. query.exec();
  510. query.reset();
  511. if (PyErr_CheckSignals()) throw py::error_already_set();
  512. }
  513. }
  514. transaction.commit();
  515. }
  516. catch (std::exception &e) {
  517. std::cerr << "Exception in statistics_db::" << __func__ << ": " << e.what() << std::endl;
  518. }
  519. }
  520. /**
  521. * Writes the interval statistics into the database.
  522. * @param intervalStatistics The interval entries from class statistics.
  523. */
  524. void statistics_db::writeStatisticsInterval(const std::unordered_map<std::string, entry_intervalStat> &intervalStatistics){
  525. try {
  526. db->exec("DROP TABLE IF EXISTS interval_statistics");
  527. SQLite::Transaction transaction(*db);
  528. const char *createTable = "CREATE TABLE interval_statistics ("
  529. "lastPktTimestamp TEXT,"
  530. "pktsCount INTEGER,"
  531. "kBytes REAL,"
  532. "ipSrcEntropy REAL,"
  533. "ipDstEntropy REAL,"
  534. "ipSrcCumEntropy REAL,"
  535. "ipDstCumEntropy REAL,"
  536. "payloadCount INTEGER,"
  537. "incorrectTCPChecksumCount INTEGER,"
  538. "correctTCPChecksumCount INTEGER,"
  539. "newIPCount INTEGER,"
  540. "newPortCount INTEGER,"
  541. "newTTLCount INTEGER,"
  542. "newWinSizeCount INTEGER,"
  543. "newToSCount INTEGER,"
  544. "newMSSCount INTEGER,"
  545. "PRIMARY KEY(lastPktTimestamp));";
  546. db->exec(createTable);
  547. SQLite::Statement query(*db, "INSERT INTO interval_statistics VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
  548. for (auto it = intervalStatistics.begin(); it != intervalStatistics.end(); ++it) {
  549. const entry_intervalStat &e = it->second;
  550. query.bindNoCopy(1, it->first);
  551. query.bind(2, (int)e.pkts_count);
  552. query.bind(3, e.kbytes);
  553. query.bind(4, e.ip_src_entropy);
  554. query.bind(5, e.ip_dst_entropy);
  555. query.bind(6, e.ip_src_cum_entropy);
  556. query.bind(7, e.ip_dst_cum_entropy);
  557. query.bind(8, e.payload_count);
  558. query.bind(9, e.incorrect_tcp_checksum_count);
  559. query.bind(10, e.correct_tcp_checksum_count);
  560. query.bind(11, e.novel_ip_count);
  561. query.bind(12, e.novel_port_count);
  562. query.bind(13, e.novel_ttl_count);
  563. query.bind(14, e.novel_win_size_count);
  564. query.bind(15, e.novel_tos_count);
  565. query.bind(16, e.novel_mss_count);
  566. query.exec();
  567. query.reset();
  568. if (PyErr_CheckSignals()) throw py::error_already_set();
  569. }
  570. transaction.commit();
  571. }
  572. catch (std::exception &e) {
  573. std::cerr << "Exception in statistics_db::" << __func__ << ": " << e.what() << std::endl;
  574. }
  575. }
  576. void statistics_db::writeDbVersion(){
  577. try {
  578. SQLite::Transaction transaction(*db);
  579. SQLite::Statement query(*db, std::string("PRAGMA user_version = ") + std::to_string(DB_VERSION) + ";");
  580. query.exec();
  581. transaction.commit();
  582. }
  583. catch (std::exception &e) {
  584. std::cerr << "Exception in statistics_db::" << __func__ << ": " << e.what() << std::endl;
  585. }
  586. }
  587. /**
  588. * Reads all ports and their corresponding services from nmap-services-tcp.csv and stores them into portServices vector.
  589. */
  590. void statistics_db::readPortServicesFromNmap()
  591. {
  592. std::string portnumber;
  593. std::string service;
  594. std::string dump;
  595. std::string nmapPath = getNmapPath();
  596. std::ifstream reader;
  597. reader.open(nmapPath, std::ios::in);
  598. if(reader.is_open())
  599. {
  600. getline(reader, dump);
  601. while(!reader.eof())
  602. {
  603. getline(reader, portnumber, ',');
  604. getline(reader, service, ',');
  605. getline(reader, dump);
  606. if(!service.empty() && !portnumber.empty())
  607. {
  608. portServices.insert({std::stoi(portnumber), service});
  609. }
  610. }
  611. reader.close();
  612. }
  613. else
  614. {
  615. std::cerr << "WARNING: " << nmapPath << " could not be opened! PortServices can't be read!" << std::endl;
  616. portServices.insert({0, "unavailable"});
  617. }
  618. }
  619. /**
  620. * Gets the path to nmap-services-tcp.csv and makes sure the file is reached from any working directory within "/code"
  621. * because the working directory can be different when running tests. Checks if the file/path exists and warns the user.
  622. */
  623. std::string statistics_db::getNmapPath()
  624. {
  625. //The different working directory paths according to how the database is built:
  626. //<ID2T> stands for the directory id2t.sh is located in
  627. //From tests(e.g. pycharm) /<ID2T>/code/Test
  628. //From run_tests.sh /<ID2T>/code
  629. //From id2t.sh /<ID2T>
  630. std::string filename = "nmap-services-tcp.csv";
  631. std::string resourcesDir = "/resources/";
  632. std::string codeDir = "/code";
  633. std::string testDir = "/code/Test";
  634. char buff[FILENAME_MAX];
  635. // Working directory
  636. std::string dir(getcwd(buff, FILENAME_MAX));
  637. // Check if working directory is id2t.sh directory(try to reach file from working directory)
  638. if(pathExists(dir + resourcesDir + filename))
  639. {
  640. return dir + resourcesDir + filename;
  641. }
  642. // If working directory is test directory(happens if tests are called from pycharm for example)
  643. else if(dir.rfind(testDir) == (dir.size()-testDir.size()))
  644. {
  645. // Remove test directory from path
  646. dir = dir.substr(0, (dir.size()-testDir.size()));
  647. }
  648. // If working directory is code directory(happens if tests are called with testscript)
  649. else if(dir.rfind(codeDir) == (dir.size()-codeDir.size()))
  650. {
  651. // Remove code directory from path
  652. dir = dir.substr(0, (dir.size()-codeDir.size()));
  653. }
  654. dir = dir + resourcesDir + filename;
  655. return dir;
  656. }
  657. bool statistics_db::pathExists(std::string path)
  658. {
  659. std::ifstream file;
  660. file.open(path, std::ios::in);
  661. if(file.is_open())
  662. {
  663. file.close();
  664. return true;
  665. }
  666. else
  667. {
  668. return false;
  669. }
  670. }
  671. /**
  672. * Writes the unrecognized PDUs into the database.
  673. * @param unrecognized_PDUs The unrecognized PDUs from class statistics.
  674. */
  675. void statistics_db::writeStatisticsUnrecognizedPDUs(const std::unordered_map<unrecognized_PDU, unrecognized_PDU_stat>
  676. &unrecognized_PDUs) {
  677. try {
  678. db->exec("DROP TABLE IF EXISTS unrecognized_pdus");
  679. SQLite::Transaction transaction(*db);
  680. const char *createTable = "CREATE TABLE unrecognized_pdus ("
  681. "srcMac TEXT COLLATE NOCASE,"
  682. "dstMac TEXT COLLATE NOCASE,"
  683. "etherType INTEGER,"
  684. "pktCount INTEGER,"
  685. "timestampLastOccurrence TEXT,"
  686. "PRIMARY KEY(srcMac,dstMac,etherType));";
  687. db->exec(createTable);
  688. SQLite::Statement query(*db, "INSERT INTO unrecognized_pdus VALUES (?, ?, ?, ?, ?)");
  689. for (auto it = unrecognized_PDUs.begin(); it != unrecognized_PDUs.end(); ++it) {
  690. const unrecognized_PDU &e = it->first;
  691. query.bindNoCopy(1, e.srcMacAddress);
  692. query.bindNoCopy(2, e.dstMacAddress);
  693. query.bind(3, e.typeNumber);
  694. query.bind(4, it->second.count);
  695. query.bindNoCopy(5, it->second.timestamp_last_occurrence);
  696. query.exec();
  697. query.reset();
  698. if (PyErr_CheckSignals()) throw py::error_already_set();
  699. }
  700. transaction.commit();
  701. }
  702. catch (std::exception &e) {
  703. std::cerr << "Exception in statistics_db::" << __func__ << ": " << e.what() << std::endl;
  704. }
  705. }