statistics_db.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. #include "statistics_db.h"
  2. #include <math.h>
  3. #include <iostream>
  4. #include <sstream>
  5. #include <fstream>
  6. #include <unistd.h>
  7. #include <stdio.h>
  8. /**
  9. * Creates a new statistics_db object. Opens an existing database located at database_path. If not existing, creates
  10. * a new database at database_path.
  11. * @param database_path The file path of the database.
  12. */
  13. statistics_db::statistics_db(std::string database_path) {
  14. // Append file extension if not present
  15. if (database_path.find(".sqlite3") == database_path.npos) {
  16. database_path += ".sqlite3";
  17. }
  18. // creates the DB if not existing, opens the DB for read+write access
  19. db.reset(new SQLite::Database(database_path, SQLite::OPEN_CREATE | SQLite::OPEN_READWRITE));
  20. // Read ports and services into portServices vector
  21. readPortServicesFromNmap();
  22. }
  23. /**
  24. * Writes the IP statistics into the database.
  25. * @param ipStatistics The IP statistics from class statistics.
  26. */
  27. void statistics_db::writeStatisticsIP(std::unordered_map<std::string, entry_ipStat> ipStatistics) {
  28. try {
  29. db->exec("DROP TABLE IF EXISTS ip_statistics");
  30. SQLite::Transaction transaction(*db);
  31. const char *createTable = "CREATE TABLE ip_statistics ( "
  32. "ipAddress TEXT, "
  33. "pktsReceived INTEGER, "
  34. "pktsSent INTEGER, "
  35. "kbytesReceived REAL, "
  36. "kbytesSent REAL, "
  37. "maxPktRate REAL,"
  38. "minPktRate REAL,"
  39. "ipClass TEXT COLLATE NOCASE, "
  40. "PRIMARY KEY(ipAddress));";
  41. db->exec(createTable);
  42. SQLite::Statement query(*db, "INSERT INTO ip_statistics VALUES (?, ?, ?, ?, ?, ?, ?, ?)");
  43. for (auto it = ipStatistics.begin(); it != ipStatistics.end(); ++it) {
  44. entry_ipStat e = it->second;
  45. query.bind(1, it->first);
  46. query.bind(2, (int) e.pkts_received);
  47. query.bind(3, (int) e.pkts_sent);
  48. query.bind(4, e.kbytes_received);
  49. query.bind(5, e.kbytes_sent);
  50. query.bind(6, e.max_interval_pkt_rate);
  51. query.bind(7, e.min_interval_pkt_rate);
  52. query.bind(8, e.ip_class);
  53. query.exec();
  54. query.reset();
  55. }
  56. transaction.commit();
  57. }
  58. catch (std::exception &e) {
  59. std::cout << "Exception in statistics_db: " << e.what() << std::endl;
  60. }
  61. }
  62. /**
  63. * Writes the TTL distribution into the database.
  64. * @param ttlDistribution The TTL distribution from class statistics.
  65. */
  66. void statistics_db::writeStatisticsTTL(std::unordered_map<ipAddress_ttl, int> ttlDistribution) {
  67. try {
  68. db->exec("DROP TABLE IF EXISTS ip_ttl");
  69. SQLite::Transaction transaction(*db);
  70. const char *createTable = "CREATE TABLE ip_ttl ("
  71. "ipAddress TEXT,"
  72. "ttlValue INTEGER,"
  73. "ttlCount INTEGER,"
  74. "PRIMARY KEY(ipAddress,ttlValue));";
  75. db->exec(createTable);
  76. SQLite::Statement query(*db, "INSERT INTO ip_ttl VALUES (?, ?, ?)");
  77. for (auto it = ttlDistribution.begin(); it != ttlDistribution.end(); ++it) {
  78. ipAddress_ttl e = it->first;
  79. query.bind(1, e.ipAddress);
  80. query.bind(2, e.ttlValue);
  81. query.bind(3, it->second);
  82. query.exec();
  83. query.reset();
  84. }
  85. transaction.commit();
  86. }
  87. catch (std::exception &e) {
  88. std::cout << "Exception in statistics_db: " << e.what() << std::endl;
  89. }
  90. }
  91. /**
  92. * Writes the MSS distribution into the database.
  93. * @param mssDistribution The MSS distribution from class statistics.
  94. */
  95. void statistics_db::writeStatisticsMSS(std::unordered_map<ipAddress_mss, int> mssDistribution) {
  96. try {
  97. db->exec("DROP TABLE IF EXISTS tcp_mss");
  98. SQLite::Transaction transaction(*db);
  99. const char *createTable = "CREATE TABLE tcp_mss ("
  100. "ipAddress TEXT,"
  101. "mssValue INTEGER,"
  102. "mssCount INTEGER,"
  103. "PRIMARY KEY(ipAddress,mssValue));";
  104. db->exec(createTable);
  105. SQLite::Statement query(*db, "INSERT INTO tcp_mss VALUES (?, ?, ?)");
  106. for (auto it = mssDistribution.begin(); it != mssDistribution.end(); ++it) {
  107. ipAddress_mss e = it->first;
  108. query.bind(1, e.ipAddress);
  109. query.bind(2, e.mssValue);
  110. query.bind(3, it->second);
  111. query.exec();
  112. query.reset();
  113. }
  114. transaction.commit();
  115. }
  116. catch (std::exception &e) {
  117. std::cout << "Exception in statistics_db: " << e.what() << std::endl;
  118. }
  119. }
  120. /**
  121. * Writes the ToS distribution into the database.
  122. * @param tosDistribution The ToS distribution from class statistics.
  123. */
  124. void statistics_db::writeStatisticsToS(std::unordered_map<ipAddress_tos, int> tosDistribution) {
  125. try {
  126. db->exec("DROP TABLE IF EXISTS ip_tos");
  127. SQLite::Transaction transaction(*db);
  128. const char *createTable = "CREATE TABLE ip_tos ("
  129. "ipAddress TEXT,"
  130. "tosValue INTEGER,"
  131. "tosCount INTEGER,"
  132. "PRIMARY KEY(ipAddress,tosValue));";
  133. db->exec(createTable);
  134. SQLite::Statement query(*db, "INSERT INTO ip_tos VALUES (?, ?, ?)");
  135. for (auto it = tosDistribution.begin(); it != tosDistribution.end(); ++it) {
  136. ipAddress_tos e = it->first;
  137. query.bind(1, e.ipAddress);
  138. query.bind(2, e.tosValue);
  139. query.bind(3, it->second);
  140. query.exec();
  141. query.reset();
  142. }
  143. transaction.commit();
  144. }
  145. catch (std::exception &e) {
  146. std::cout << "Exception in statistics_db: " << e.what() << std::endl;
  147. }
  148. }
  149. /**
  150. * Writes the window size distribution into the database.
  151. * @param winDistribution The window size distribution from class statistics.
  152. */
  153. void statistics_db::writeStatisticsWin(std::unordered_map<ipAddress_win, int> winDistribution) {
  154. try {
  155. db->exec("DROP TABLE IF EXISTS tcp_win");
  156. SQLite::Transaction transaction(*db);
  157. const char *createTable = "CREATE TABLE tcp_win ("
  158. "ipAddress TEXT,"
  159. "winSize INTEGER,"
  160. "winCount INTEGER,"
  161. "PRIMARY KEY(ipAddress,winSize));";
  162. db->exec(createTable);
  163. SQLite::Statement query(*db, "INSERT INTO tcp_win VALUES (?, ?, ?)");
  164. for (auto it = winDistribution.begin(); it != winDistribution.end(); ++it) {
  165. ipAddress_win e = it->first;
  166. query.bind(1, e.ipAddress);
  167. query.bind(2, e.winSize);
  168. query.bind(3, it->second);
  169. query.exec();
  170. query.reset();
  171. }
  172. transaction.commit();
  173. }
  174. catch (std::exception &e) {
  175. std::cout << "Exception in statistics_db: " << e.what() << std::endl;
  176. }
  177. }
  178. /**
  179. * Writes the protocol distribution into the database.
  180. * @param protocolDistribution The protocol distribution from class statistics.
  181. */
  182. void statistics_db::writeStatisticsProtocols(std::unordered_map<ipAddress_protocol, entry_protocolStat> protocolDistribution) {
  183. try {
  184. db->exec("DROP TABLE IF EXISTS ip_protocols");
  185. SQLite::Transaction transaction(*db);
  186. const char *createTable = "CREATE TABLE ip_protocols ("
  187. "ipAddress TEXT,"
  188. "protocolName TEXT COLLATE NOCASE,"
  189. "protocolCount INTEGER,"
  190. "byteCount REAL,"
  191. "PRIMARY KEY(ipAddress,protocolName));";
  192. db->exec(createTable);
  193. SQLite::Statement query(*db, "INSERT INTO ip_protocols VALUES (?, ?, ?, ?)");
  194. for (auto it = protocolDistribution.begin(); it != protocolDistribution.end(); ++it) {
  195. ipAddress_protocol e = it->first;
  196. query.bind(1, e.ipAddress);
  197. query.bind(2, e.protocol);
  198. query.bind(3, it->second.count);
  199. query.bind(4, it->second.byteCount);
  200. query.exec();
  201. query.reset();
  202. }
  203. transaction.commit();
  204. }
  205. catch (std::exception &e) {
  206. std::cout << "Exception in statistics_db: " << e.what() << std::endl;
  207. }
  208. }
  209. /**
  210. * Writes the port statistics into the database.
  211. * @param portsStatistics The ports statistics from class statistics.
  212. */
  213. void statistics_db::writeStatisticsPorts(std::unordered_map<ipAddress_inOut_port, entry_portStat> portsStatistics) {
  214. try {
  215. db->exec("DROP TABLE IF EXISTS ip_ports");
  216. SQLite::Transaction transaction(*db);
  217. const char *createTable = "CREATE TABLE ip_ports ("
  218. "ipAddress TEXT,"
  219. "portDirection TEXT COLLATE NOCASE,"
  220. "portNumber INTEGER,"
  221. "portCount INTEGER,"
  222. "byteCount REAL,"
  223. "portProtocol TEXT COLLATE NOCASE,"
  224. "portService TEXT COLLATE NOCASE,"
  225. "PRIMARY KEY(ipAddress,portDirection,portNumber));";
  226. db->exec(createTable);
  227. SQLite::Statement query(*db, "INSERT INTO ip_ports VALUES (?, ?, ?, ?, ?, ?, ?)");
  228. for (auto it = portsStatistics.begin(); it != portsStatistics.end(); ++it) {
  229. ipAddress_inOut_port e = it->first;
  230. std::string portService = portServices[e.portNumber];
  231. if(portService.empty()) {portService = "unknown";}
  232. query.bind(1, e.ipAddress);
  233. query.bind(2, e.trafficDirection);
  234. query.bind(3, e.portNumber);
  235. query.bind(4, it->second.count);
  236. query.bind(5, it->second.byteCount);
  237. query.bind(6, e.protocol);
  238. query.bind(7, portService);
  239. query.exec();
  240. query.reset();
  241. }
  242. transaction.commit();
  243. }
  244. catch (std::exception &e) {
  245. std::cout << "Exception in statistics_db: " << e.what() << std::endl;
  246. }
  247. }
  248. /**
  249. * Writes the IP address -> MAC address mapping into the database.
  250. * @param IpMacStatistics The IP address -> MAC address mapping from class statistics.
  251. */
  252. void statistics_db::writeStatisticsIpMac(std::unordered_map<std::string, std::string> IpMacStatistics) {
  253. try {
  254. db->exec("DROP TABLE IF EXISTS ip_mac");
  255. SQLite::Transaction transaction(*db);
  256. const char *createTable = "CREATE TABLE ip_mac ("
  257. "ipAddress TEXT,"
  258. "macAddress TEXT COLLATE NOCASE,"
  259. "PRIMARY KEY(ipAddress));";
  260. db->exec(createTable);
  261. SQLite::Statement query(*db, "INSERT INTO ip_mac VALUES (?, ?)");
  262. for (auto it = IpMacStatistics.begin(); it != IpMacStatistics.end(); ++it) {
  263. query.bind(1, it->first);
  264. query.bind(2, it->second);
  265. query.exec();
  266. query.reset();
  267. }
  268. transaction.commit();
  269. }
  270. catch (std::exception &e) {
  271. std::cout << "Exception in statistics_db: " << e.what() << std::endl;
  272. }
  273. }
  274. /**
  275. * Writes general file statistics into the database.
  276. * @param packetCount The number of packets in the PCAP file.
  277. * @param captureDuration The duration of the capture (format: SS.mmmmmm).
  278. * @param timestampFirstPkt The timestamp of the first packet in the PCAP file.
  279. * @param timestampLastPkt The timestamp of the last packet in the PCAP file.
  280. * @param avgPacketRate The average packet rate (#packets / capture duration).
  281. * @param avgPacketSize The average packet size.
  282. * @param avgPacketsSentPerHost The average packets sent per host.
  283. * @param avgBandwidthIn The average incoming bandwidth.
  284. * @param avgBandwidthOut The average outgoing bandwidth.
  285. */
  286. void statistics_db::writeStatisticsFile(int packetCount, float captureDuration, std::string timestampFirstPkt,
  287. std::string timestampLastPkt, float avgPacketRate, float avgPacketSize,
  288. float avgPacketsSentPerHost, float avgBandwidthIn, float avgBandwidthOut) {
  289. try {
  290. db->exec("DROP TABLE IF EXISTS file_statistics");
  291. SQLite::Transaction transaction(*db);
  292. const char *createTable = "CREATE TABLE file_statistics ("
  293. "packetCount INTEGER,"
  294. "captureDuration TEXT,"
  295. "timestampFirstPacket TEXT,"
  296. "timestampLastPacket TEXT,"
  297. "avgPacketRate REAL,"
  298. "avgPacketSize REAL,"
  299. "avgPacketsSentPerHost REAL,"
  300. "avgBandwidthIn REAL,"
  301. "avgBandwidthOut REAL);";
  302. db->exec(createTable);
  303. SQLite::Statement query(*db, "INSERT INTO file_statistics VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)");
  304. query.bind(1, packetCount);
  305. query.bind(2, captureDuration);
  306. query.bind(3, timestampFirstPkt);
  307. query.bind(4, timestampLastPkt);
  308. query.bind(5, avgPacketRate);
  309. query.bind(6, avgPacketSize);
  310. query.bind(7, avgPacketsSentPerHost);
  311. query.bind(8, avgBandwidthIn);
  312. query.bind(9, avgBandwidthOut);
  313. query.exec();
  314. transaction.commit();
  315. }
  316. catch (std::exception &e) {
  317. std::cout << "Exception in statistics_db: " << e.what() << std::endl;
  318. }
  319. }
  320. /**
  321. * Writes the conversation statistics into the database.
  322. * @param convStatistics The conversation from class statistics.
  323. */
  324. void statistics_db::writeStatisticsConv(std::unordered_map<conv, entry_convStat> convStatistics){
  325. try {
  326. db->exec("DROP TABLE IF EXISTS conv_statistics");
  327. SQLite::Transaction transaction(*db);
  328. const char *createTable = "CREATE TABLE conv_statistics ("
  329. "ipAddressA TEXT,"
  330. "portA INTEGER,"
  331. "ipAddressB TEXT,"
  332. "portB INTEGER,"
  333. "pktsCount INTEGER,"
  334. "avgPktRate REAL,"
  335. "avgDelay INTEGER,"
  336. "minDelay INTEGER,"
  337. "maxDelay INTEGER,"
  338. "PRIMARY KEY(ipAddressA,portA,ipAddressB,portB));";
  339. db->exec(createTable);
  340. SQLite::Statement query(*db, "INSERT INTO conv_statistics VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)");
  341. // Calculate average of inter-arrival times and average packet rate
  342. for (auto it = convStatistics.begin(); it != convStatistics.end(); ++it) {
  343. conv f = it->first;
  344. entry_convStat e = it->second;
  345. if (e.pkts_count > 1){
  346. int sumDelay = 0;
  347. int minDelay = -1;
  348. int maxDelay = -1;
  349. for (int i = 0; (unsigned) i < e.interarrival_time.size(); i++) {
  350. sumDelay += e.interarrival_time[i].count();
  351. if (maxDelay < e.interarrival_time[i].count())
  352. maxDelay = e.interarrival_time[i].count();
  353. if (minDelay > e.interarrival_time[i].count() || minDelay == -1)
  354. minDelay = e.interarrival_time[i].count();
  355. }
  356. if (e.interarrival_time.size() > 0)
  357. e.avg_interarrival_time = (std::chrono::microseconds) sumDelay / e.interarrival_time.size(); // average
  358. else e.avg_interarrival_time = (std::chrono::microseconds) 0;
  359. std::chrono::microseconds start_timesttamp = e.pkts_timestamp[0];
  360. std::chrono::microseconds end_timesttamp = e.pkts_timestamp.back();
  361. std::chrono::microseconds conn_duration = end_timesttamp - start_timesttamp;
  362. e.avg_pkt_rate = (float) e.pkts_count * 1000000 / conn_duration.count(); // pkt per sec
  363. query.bind(1, f.ipAddressA);
  364. query.bind(2, f.portA);
  365. query.bind(3, f.ipAddressB);
  366. query.bind(4, f.portB);
  367. query.bind(5, (int) e.pkts_count);
  368. query.bind(6, (float) e.avg_pkt_rate);
  369. query.bind(7, (int) e.avg_interarrival_time.count());
  370. query.bind(8, minDelay);
  371. query.bind(9, maxDelay);
  372. query.exec();
  373. query.reset();
  374. }
  375. }
  376. transaction.commit();
  377. }
  378. catch (std::exception &e) {
  379. std::cout << "Exception in statistics_db: " << e.what() << std::endl;
  380. }
  381. }
  382. /**
  383. * Writes the interval statistics into the database.
  384. * @param intervalStatistics The interval entries from class statistics.
  385. */
  386. void statistics_db::writeStatisticsInterval(std::unordered_map<std::string, entry_intervalStat> intervalStatistics){
  387. try {
  388. db->exec("DROP TABLE IF EXISTS interval_statistics");
  389. SQLite::Transaction transaction(*db);
  390. const char *createTable = "CREATE TABLE interval_statistics ("
  391. "lastPktTimestamp TEXT,"
  392. "pktsCount INTEGER,"
  393. "kBytes REAL,"
  394. "ipSrcEntropy REAL,"
  395. "ipDstEntropy REAL,"
  396. "ipSrcCumEntropy REAL,"
  397. "ipDstCumEntropy REAL,"
  398. "payloadCount INTEGER,"
  399. "incorrectTCPChecksumCount INTEGER,"
  400. "correctTCPChecksumCount INTEGER,"
  401. "newIPCount INTEGER,"
  402. "newPortCount INTEGER,"
  403. "newTTLCount INTEGER,"
  404. "newWinSizeCount INTEGER,"
  405. "newToSCount INTEGER,"
  406. "newMSSCount INTEGER,"
  407. "PRIMARY KEY(lastPktTimestamp));";
  408. db->exec(createTable);
  409. SQLite::Statement query(*db, "INSERT INTO interval_statistics VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
  410. for (auto it = intervalStatistics.begin(); it != intervalStatistics.end(); ++it) {
  411. std::string t = it->first;
  412. entry_intervalStat e = it->second;
  413. query.bind(1, t);
  414. query.bind(2, (int)e.pkts_count);
  415. query.bind(3, e.kbytes);
  416. query.bind(4, e.ip_src_entropy);
  417. query.bind(5, e.ip_dst_entropy);
  418. query.bind(6, e.ip_src_cum_entropy);
  419. query.bind(7, e.ip_dst_cum_entropy);
  420. query.bind(8, e.payload_count);
  421. query.bind(9, e.incorrect_tcp_checksum_count);
  422. query.bind(10, e.correct_tcp_checksum_count);
  423. query.bind(11, e.novel_ip_count);
  424. query.bind(12, e.novel_port_count);
  425. query.bind(13, e.novel_ttl_count);
  426. query.bind(14, e.novel_win_size_count);
  427. query.bind(15, e.novel_tos_count);
  428. query.bind(16, e.novel_mss_count);
  429. query.exec();
  430. query.reset();
  431. }
  432. transaction.commit();
  433. }
  434. catch (std::exception &e) {
  435. std::cout << "Exception in statistics_db: " << e.what() << std::endl;
  436. }
  437. }
  438. void statistics_db::writeDbVersion(){
  439. try {
  440. SQLite::Transaction transaction(*db);
  441. SQLite::Statement query(*db, std::string("PRAGMA user_version = ") + std::to_string(DB_VERSION) + ";");
  442. query.exec();
  443. transaction.commit();
  444. }
  445. catch (std::exception &e) {
  446. std::cout << "Exception in statistics_db: " << e.what() << std::endl;
  447. }
  448. }
  449. /**
  450. * Reads all ports and their corresponding services from nmap-services-tcp.csv and stores them into portServices vector.
  451. */
  452. void statistics_db::readPortServicesFromNmap()
  453. {
  454. std::string portnumber;
  455. std::string service;
  456. std::string dump;
  457. std::string nmapPath = getNmapPath();
  458. std::ifstream reader;
  459. reader.open(nmapPath, std::ios::in);
  460. if(reader.is_open())
  461. {
  462. getline(reader, dump);
  463. while(!reader.eof())
  464. {
  465. getline(reader, portnumber, ',');
  466. getline(reader, service, ',');
  467. getline(reader, dump);
  468. if(!service.empty() && !portnumber.empty())
  469. {
  470. portServices.insert({std::stoi(portnumber), service});
  471. }
  472. }
  473. reader.close();
  474. }
  475. else
  476. {
  477. std::cerr << "WARNING: " << nmapPath << " could not be opened! PortServices can't be read!" << std::endl;
  478. portServices.insert({0, "unknown"});
  479. }
  480. }
  481. /**
  482. * Gets the path to nmap-services-tcp.csv and makes sure the file is reached from any working directory within "/code"
  483. * because the working directory can be different when running tests. Checks if the file/path exists and warns the user.
  484. */
  485. std::string statistics_db::getNmapPath()
  486. {
  487. char buff[FILENAME_MAX];
  488. std::string dir(getcwd(buff, FILENAME_MAX));
  489. dir = dir.substr(0, dir.rfind("/ID2T-toolkit")) + "/ID2T-toolkit/resources/nmap-services-tcp.csv";
  490. return dir;
  491. }