statistics.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. /*
  2. * Class providing containers and access methods for statistical data collection.
  3. */
  4. #ifndef CPP_PCAPREADER_STATISTICS_H
  5. #define CPP_PCAPREADER_STATISTICS_H
  6. // Aidmar
  7. #include <vector>
  8. #include <unordered_map>
  9. #include <list>
  10. #include <tuple>
  11. #include <tins/timestamp.h>
  12. #include <tins/ip_address.h>
  13. #include "utilities.h"
  14. using namespace Tins;
  15. /*
  16. * Definition of structs used in unordered_map fields
  17. */
  18. /*
  19. * Struct used as data structure for method get_stats_for_ip, represents:
  20. * - Incoming bandwidth in KBits
  21. * - Outgoing bandwidth in KBits
  22. * - Number of incoming packets per second
  23. * - Number of outgoing packets per second
  24. * - Average size of sent packets in kbytes
  25. * - Average size of received packets in kybtes
  26. * - Average value of TCP option Maximum Segment Size (MSS)
  27. */
  28. struct ip_stats {
  29. float bandwidthKBitsIn;
  30. float bandwidthKBitsOut;
  31. float packetPerSecondIn;
  32. float packetPerSecondOut;
  33. float AvgPacketSizeSent;
  34. float AvgPacketSizeRecv;
  35. long AvgMaxSegmentSizeTCP;
  36. };
  37. // Aidmar
  38. /*
  39. * Struct used to represent a conv by:
  40. * - IP address A
  41. * - Port A
  42. * - IP address B
  43. * - Port B
  44. */
  45. struct conv{
  46. std::string ipAddressA;
  47. int portA;
  48. std::string ipAddressB;
  49. int portB;
  50. bool operator==(const conv &other) const {
  51. return ipAddressA == other.ipAddressA
  52. && portA == other.portA
  53. &&ipAddressB == other.ipAddressB
  54. && portB == other.portB;
  55. }
  56. };
  57. // Aidmar
  58. /*
  59. * Struct used to represent:
  60. * - IP address (IPv4 or IPv6)
  61. * - MSS value
  62. */
  63. struct ipAddress_mss {
  64. std::string ipAddress;
  65. int mssValue;
  66. bool operator==(const ipAddress_mss &other) const {
  67. return ipAddress == other.ipAddress
  68. && mssValue == other.mssValue;
  69. }
  70. };
  71. // Aidmar
  72. /*
  73. * Struct used to represent:
  74. * - IP address (IPv4 or IPv6)
  75. * - ToS value
  76. */
  77. struct ipAddress_tos {
  78. std::string ipAddress;
  79. int tosValue;
  80. bool operator==(const ipAddress_tos &other) const {
  81. return ipAddress == other.ipAddress
  82. && tosValue == other.tosValue;
  83. }
  84. };
  85. // Aidmar
  86. /*
  87. * Struct used to represent:
  88. * - IP address (IPv4 or IPv6)
  89. * - Window size
  90. */
  91. struct ipAddress_win {
  92. std::string ipAddress;
  93. int winSize;
  94. bool operator==(const ipAddress_win &other) const {
  95. return ipAddress == other.ipAddress
  96. && winSize == other.winSize;
  97. }
  98. };
  99. /*
  100. * Struct used to represent:
  101. * - IP address (IPv4 or IPv6)
  102. * - TTL value
  103. */
  104. struct ipAddress_ttl {
  105. std::string ipAddress;
  106. int ttlValue;
  107. bool operator==(const ipAddress_ttl &other) const {
  108. return ipAddress == other.ipAddress
  109. && ttlValue == other.ttlValue;
  110. }
  111. };
  112. /*
  113. * Struct used to represent:
  114. * - IP address (IPv4 or IPv6)
  115. * - Protocol (e.g. TCP, UDP, IPv4, IPv6)
  116. */
  117. struct ipAddress_protocol {
  118. std::string ipAddress;
  119. std::string protocol;
  120. bool operator==(const ipAddress_protocol &other) const {
  121. return ipAddress == other.ipAddress
  122. && protocol == other.protocol;
  123. }
  124. };
  125. /*
  126. * Struct used to represent:
  127. * - Number of received packets
  128. * - Number of sent packets
  129. * - Data received in kbytes
  130. * - Data sent in kbytes
  131. */
  132. struct entry_ipStat {
  133. long pkts_received;
  134. long pkts_sent;
  135. float kbytes_received;
  136. float kbytes_sent;
  137. // Aidmar
  138. std::string ip_class;
  139. std::vector<float> interval_pkt_rate;
  140. float max_pkt_rate;
  141. float min_pkt_rate;
  142. // Aidmar - to calculate Mahoney anomaly score
  143. long firstAppearAsSenderPktCount;
  144. long firstAppearAsReceiverPktCount;
  145. float sourceAnomalyScore;
  146. float destinationAnomalyScore;
  147. // Aidmar- To collect statstics over time interval
  148. std::vector<std::chrono::microseconds> pktsSentTimestamp;
  149. std::vector<std::chrono::microseconds> pktsReceivedTimestamp;
  150. bool operator==(const entry_ipStat &other) const {
  151. return pkts_received == other.pkts_received
  152. && pkts_sent == other.pkts_sent
  153. && kbytes_sent == other.kbytes_sent
  154. && kbytes_received == other.kbytes_received
  155. // Aidmar
  156. && interval_pkt_rate == other.interval_pkt_rate
  157. && max_pkt_rate == other.max_pkt_rate
  158. && min_pkt_rate == other.min_pkt_rate
  159. && ip_class == other.ip_class
  160. && firstAppearAsSenderPktCount == other.firstAppearAsSenderPktCount
  161. && firstAppearAsReceiverPktCount == other.firstAppearAsReceiverPktCount
  162. && sourceAnomalyScore == other.sourceAnomalyScore
  163. && destinationAnomalyScore == other.destinationAnomalyScore
  164. && pktsSentTimestamp == other.pktsSentTimestamp
  165. && pktsReceivedTimestamp == other.pktsReceivedTimestamp;
  166. }
  167. };
  168. // Aidmar
  169. /*
  170. * Struct used to represent interval statistics:
  171. * - Number of packets
  172. * - IP source entropy
  173. * - IP destination entropy
  174. * - IP source cumulative entropy
  175. * - IP destination cumulative entropy
  176. */
  177. struct entry_intervalStat {
  178. int pkts_count;
  179. float kbytes;
  180. float ip_src_entropy;
  181. float ip_dst_entropy;
  182. float ip_src_cum_entropy;
  183. float ip_dst_cum_entropy;
  184. int payload_count;
  185. int incorrect_checksum_count;
  186. int correct_checksum_count;
  187. int invalid_tos_count;
  188. int valid_tos_count;
  189. int new_ip_count;
  190. int new_ttl_count;
  191. int new_win_size_count;
  192. int new_tos_count;
  193. int new_mss_count;
  194. // Predictability score
  195. //float ip_src_pred_score;
  196. //float ip_dst_pred_score;
  197. bool operator==(const entry_intervalStat &other) const {
  198. return pkts_count == other.pkts_count
  199. && kbytes == other.kbytes
  200. && ip_src_entropy == other.ip_src_entropy
  201. && ip_dst_entropy == other.ip_dst_entropy
  202. && ip_src_cum_entropy == other.ip_src_cum_entropy
  203. && ip_dst_cum_entropy == other.ip_dst_cum_entropy
  204. && payload_count == other.payload_count
  205. && incorrect_checksum_count == other.incorrect_checksum_count
  206. && invalid_tos_count == other.invalid_tos_count
  207. && valid_tos_count == other.valid_tos_count
  208. && new_ip_count == other.new_ip_count
  209. && new_ttl_count == other.new_ttl_count
  210. && new_win_size_count == other.new_win_size_count
  211. && new_tos_count == other.new_tos_count
  212. && new_mss_count == other.new_mss_count;
  213. }
  214. };
  215. // Aidmar
  216. /*
  217. * Struct used to represent:
  218. * - Number of packets from A to B
  219. * - Number of packets from B to A
  220. */
  221. struct entry_convStat {
  222. // long pkts_A_B;
  223. // long pkts_B_A;
  224. // std::vector<std::chrono::microseconds> pkts_A_B_timestamp;
  225. // std::vector<std::chrono::microseconds> pkts_B_A_timestamp;
  226. long pkts_count;
  227. float avg_pkt_rate;
  228. std::vector<std::chrono::microseconds> pkts_timestamp;
  229. std::vector<std::chrono::microseconds> pkts_delay;
  230. std::chrono::microseconds avg_delay;
  231. std::chrono::microseconds standardDeviation_delay;
  232. bool operator==(const entry_convStat &other) const {
  233. return pkts_count == other.pkts_count
  234. && avg_pkt_rate == avg_pkt_rate
  235. && pkts_timestamp == other.pkts_timestamp
  236. && pkts_delay == other.pkts_delay
  237. && avg_delay == other.avg_delay
  238. && standardDeviation_delay == other.standardDeviation_delay;
  239. }
  240. };
  241. /*
  242. * Struct used to represent:
  243. * - IP address (IPv4 or IPv6)
  244. - Traffic direction (out: outgoing connection, in: incoming connection)
  245. * - Port number
  246. */
  247. struct ipAddress_inOut_port {
  248. std::string ipAddress;
  249. std::string trafficDirection;
  250. int portNumber;
  251. bool operator==(const ipAddress_inOut_port &other) const {
  252. return ipAddress == other.ipAddress
  253. && trafficDirection == other.trafficDirection
  254. && portNumber == other.portNumber;
  255. }
  256. };
  257. /*
  258. * Definition of hash functions for structs used as key in unordered_map
  259. */
  260. namespace std {
  261. template<>
  262. struct hash<ipAddress_ttl> {
  263. std::size_t operator()(const ipAddress_ttl &k) const {
  264. using std::size_t;
  265. using std::hash;
  266. using std::string;
  267. return ((hash<string>()(k.ipAddress)
  268. ^ (hash<int>()(k.ttlValue) << 1)) >> 1);
  269. }
  270. };
  271. // Aidmar
  272. template<>
  273. struct hash<ipAddress_mss> {
  274. std::size_t operator()(const ipAddress_mss &k) const {
  275. using std::size_t;
  276. using std::hash;
  277. using std::string;
  278. return ((hash<string>()(k.ipAddress)
  279. ^ (hash<int>()(k.mssValue) << 1)) >> 1);
  280. }
  281. };
  282. // Aidmar
  283. template<>
  284. struct hash<ipAddress_tos> {
  285. std::size_t operator()(const ipAddress_tos &k) const {
  286. using std::size_t;
  287. using std::hash;
  288. using std::string;
  289. return ((hash<string>()(k.ipAddress)
  290. ^ (hash<int>()(k.tosValue) << 1)) >> 1);
  291. }
  292. };
  293. // Aidmar
  294. template<>
  295. struct hash<ipAddress_win> {
  296. std::size_t operator()(const ipAddress_win &k) const {
  297. using std::size_t;
  298. using std::hash;
  299. using std::string;
  300. return ((hash<string>()(k.ipAddress)
  301. ^ (hash<int>()(k.winSize) << 1)) >> 1);
  302. }
  303. };
  304. // Aidmar: TO-DO:??
  305. template<>
  306. struct hash<conv> {
  307. std::size_t operator()(const conv &k) const {
  308. using std::size_t;
  309. using std::hash;
  310. using std::string;
  311. return ((hash<string>()(k.ipAddressA)
  312. ^ (hash<int>()(k.portA) << 1)) >> 1)
  313. ^ ((hash<string>()(k.ipAddressB)
  314. ^ (hash<int>()(k.portB) << 1)) >> 1);
  315. }
  316. };
  317. template<>
  318. struct hash<ipAddress_protocol> {
  319. std::size_t operator()(const ipAddress_protocol &k) const {
  320. using std::size_t;
  321. using std::hash;
  322. using std::string;
  323. return ((hash<string>()(k.ipAddress)
  324. ^ (hash<string>()(k.protocol) << 1)) >> 1);
  325. }
  326. };
  327. template<>
  328. struct hash<ipAddress_inOut_port> {
  329. std::size_t operator()(const ipAddress_inOut_port &k) const {
  330. using std::size_t;
  331. using std::hash;
  332. using std::string;
  333. return ((hash<string>()(k.ipAddress)
  334. ^ (hash<string>()(k.trafficDirection) << 1)) >> 1)
  335. ^ (hash<int>()(k.portNumber) << 1);
  336. }
  337. };
  338. }
  339. class statistics {
  340. public:
  341. /*
  342. * Constructor
  343. */
  344. statistics();
  345. /*
  346. * Methods
  347. */
  348. std::string getFormattedTimestamp(time_t seconds, suseconds_t microseconds) const;
  349. /*
  350. * Access methods for containers
  351. */
  352. void incrementPacketCount();
  353. // Adimar
  354. void calculateIPIntervalPacketRate(std::chrono::duration<int, std::micro> interval, std::chrono::microseconds intervalStartTimestamp);
  355. void incrementMSScount(std::string ipAddress, int mssValue);
  356. void incrementWinCount(std::string ipAddress, int winSize);
  357. void addConvStat(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp);
  358. std::vector<float> calculateIPsCumEntropy();
  359. std::vector<float> calculateLastIntervalIPsEntropy(std::chrono::microseconds intervalStartTimestamp);
  360. void addIntervalStat(std::chrono::duration<int, std::micro> interval, std::chrono::microseconds intervalStartTimestamp, std::chrono::microseconds lastPktTimestamp);
  361. void checkPayload(const PDU *pdu_l4);
  362. void checkTCPChecksum(std::string ipAddressSender, std::string ipAddressReceiver, TCP tcpPkt);
  363. void checkToS(uint8_t ToS);
  364. void incrementToScount(std::string ipAddress, int tosValue);
  365. void incrementTTLcount(std::string ipAddress, int ttlValue);
  366. void incrementProtocolCount(std::string ipAddress, std::string protocol);
  367. void incrementPortCount(std::string ipAddressSender, int outgoingPort, std::string ipAddressReceiver,
  368. int incomingPort);
  369. int getProtocolCount(std::string ipAddress, std::string protocol);
  370. void setTimestampFirstPacket(Tins::Timestamp ts);
  371. void setTimestampLastPacket(Tins::Timestamp ts);
  372. // Aidmar
  373. Tins::Timestamp getTimestampFirstPacket();
  374. Tins::Timestamp getTimestampLastPacket();
  375. void assignMacAddress(std::string ipAddress, std::string macAddress);
  376. // Aidmar
  377. void addIpStat_packetSent(std::string filePath, std::string ipAddressSender, std::string ipAddressReceiver, long bytesSent, std::chrono::microseconds timestamp);
  378. int getPacketCount();
  379. int getSumPacketSize();
  380. void addMSS(std::string ipAddress, int MSSvalue);
  381. void writeToDatabase(std::string database_path);
  382. void addPacketSize(uint32_t packetSize);
  383. std::string getCaptureDurationTimestamp() const;
  384. float getCaptureDurationSeconds() const;
  385. float getAvgPacketSize() const;
  386. void printStats(std::string ipAddress);
  387. /*
  388. * IP Address-specific statistics
  389. */
  390. ip_stats getStatsForIP(std::string ipAddress);
  391. // Aidmar
  392. bool getDoTests();
  393. void setDoTests(bool var);
  394. private:
  395. /*
  396. * Data fields
  397. */
  398. Tins::Timestamp timestamp_firstPacket;
  399. Tins::Timestamp timestamp_lastPacket;
  400. float sumPacketSize = 0;
  401. int packetCount = 0;
  402. // Aidmar
  403. bool doTests = false;
  404. int payloadCount = 0;
  405. int incorrectTCPChecksumCount = 0;
  406. int correctTCPChecksumCount = 0;
  407. int validToSCount = 0;
  408. int invalidToSCount = 0;
  409. int lastIntervalPayloadCount = 0;
  410. int lastIntervalIncorrectTCPChecksumCount = 0;
  411. int lastIntervalCorrectTCPChecksumCount = 0;
  412. int lastIntervalValidToSCount = 0;
  413. int lastIntervalInvalidToSCount = 0;
  414. int lastIntervalCumPktCount = 0;
  415. float lastIntervalCumSumPktSize = 0;
  416. int lastIntervalCumNewIPCount = 0;
  417. int lastIntervalCumNewTTLCount = 0;
  418. int lastIntervalCumNewWinSizeCount = 0;
  419. int lastIntervalCumNewToSCount = 0;
  420. int lastIntervalCumNewMSSCount = 0;
  421. /*
  422. * Data containers
  423. */
  424. // {IP Address, TTL value, count}
  425. std::unordered_map<ipAddress_ttl, int> ttl_distribution;
  426. // Aidmar
  427. // {IP Address, MSS value, count}
  428. std::unordered_map<ipAddress_mss, int> mss_distribution;
  429. // {IP Address, Win size, count}
  430. std::unordered_map<ipAddress_win, int> win_distribution;
  431. // {IP Address A, Port A, IP Address B, Port B, #packets_A_B, #packets_B_A}
  432. std::unordered_map<conv, entry_convStat> conv_statistics;
  433. std::unordered_map<std::string, entry_intervalStat> interval_statistics;
  434. std::unordered_map<ipAddress_tos, int> tos_distribution;
  435. // {TTL value, count}
  436. std::unordered_map<int, int> ttl_values;
  437. // {Win size, count}
  438. std::unordered_map<int, int> win_values;
  439. std::unordered_map<int, int> tos_values;
  440. std::unordered_map<int, int> mss_values;
  441. // {IP Address, Protocol, count}
  442. std::unordered_map<ipAddress_protocol, int> protocol_distribution;
  443. // {IP Address, #received packets, #sent packets, Data received in kbytes, Data sent in kbytes}
  444. std::unordered_map<std::string, entry_ipStat> ip_statistics;
  445. // {IP Address, in_out, Port Number, count}
  446. std::unordered_map<ipAddress_inOut_port, int> ip_ports;
  447. // {IP Address, MAC Address}
  448. std::unordered_map<std::string, std::string> ip_mac_mapping;
  449. // Aidmar
  450. // {DSCP value, count}
  451. std::unordered_map<int, int> dscp_distribution;
  452. // Aidmar - comment out
  453. // {IP Address, avg MSS}
  454. //std::unordered_map<std::string, int> ip_sumMss;
  455. };
  456. #endif //CPP_PCAPREADER_STATISTICS_H