statistics.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. /*
  2. * Class providing containers and access methods for statistical data collection.
  3. */
  4. #ifndef CPP_PCAPREADER_STATISTICS_H
  5. #define CPP_PCAPREADER_STATISTICS_H
  6. // Aidmar
  7. #include <vector>
  8. #include <unordered_map>
  9. #include <list>
  10. #include <tuple>
  11. #include <tins/timestamp.h>
  12. #include <tins/ip_address.h>
  13. /*
  14. * Definition of structs used in unordered_map fields
  15. */
  16. /*
  17. * Struct used as data structure for method get_stats_for_ip, represents:
  18. * - Incoming bandwidth in KBits
  19. * - Outgoing bandwidth in KBits
  20. * - Number of incoming packets per second
  21. * - Number of outgoing packets per second
  22. * - Average size of sent packets in kbytes
  23. * - Average size of received packets in kybtes
  24. * - Average value of TCP option Maximum Segment Size (MSS)
  25. */
  26. struct ip_stats {
  27. float bandwidthKBitsIn;
  28. float bandwidthKBitsOut;
  29. float packetPerSecondIn;
  30. float packetPerSecondOut;
  31. float AvgPacketSizeSent;
  32. float AvgPacketSizeRecv;
  33. long AvgMaxSegmentSizeTCP;
  34. };
  35. // Aidmar
  36. /*
  37. * Struct used to represent a conv by:
  38. * - IP address A
  39. * - Port A
  40. * - IP address B
  41. * - Port B
  42. */
  43. struct conv{
  44. std::string ipAddressA;
  45. int portA;
  46. std::string ipAddressB;
  47. int portB;
  48. bool operator==(const conv &other) const {
  49. return ipAddressA == other.ipAddressA
  50. && portA == other.portA
  51. &&ipAddressB == other.ipAddressB
  52. && portB == other.portB;
  53. }
  54. };
  55. // Aidmar
  56. /*
  57. * Struct used to represent:
  58. * - IP address (IPv4 or IPv6)
  59. * - MSS value
  60. */
  61. struct ipAddress_mss {
  62. std::string ipAddress;
  63. int mssValue;
  64. bool operator==(const ipAddress_mss &other) const {
  65. return ipAddress == other.ipAddress
  66. && mssValue == other.mssValue;
  67. }
  68. };
  69. // Aidmar
  70. /*
  71. * Struct used to represent:
  72. * - IP address (IPv4 or IPv6)
  73. * - Window size
  74. */
  75. struct ipAddress_win {
  76. std::string ipAddress;
  77. int winSize;
  78. bool operator==(const ipAddress_win &other) const {
  79. return ipAddress == other.ipAddress
  80. && winSize == other.winSize;
  81. }
  82. };
  83. /*
  84. * Struct used to represent:
  85. * - IP address (IPv4 or IPv6)
  86. * - TTL value
  87. */
  88. struct ipAddress_ttl {
  89. std::string ipAddress;
  90. int ttlValue;
  91. bool operator==(const ipAddress_ttl &other) const {
  92. return ipAddress == other.ipAddress
  93. && ttlValue == other.ttlValue;
  94. }
  95. };
  96. /*
  97. * Struct used to represent:
  98. * - IP address (IPv4 or IPv6)
  99. * - Protocol (e.g. TCP, UDP, IPv4, IPv6)
  100. */
  101. struct ipAddress_protocol {
  102. std::string ipAddress;
  103. std::string protocol;
  104. bool operator==(const ipAddress_protocol &other) const {
  105. return ipAddress == other.ipAddress
  106. && protocol == other.protocol;
  107. }
  108. };
  109. /*
  110. * Struct used to represent:
  111. * - Number of received packets
  112. * - Number of sent packets
  113. * - Data received in kbytes
  114. * - Data sent in kbytes
  115. */
  116. struct entry_ipStat {
  117. long pkts_received;
  118. long pkts_sent;
  119. float kbytes_received;
  120. float kbytes_sent;
  121. // Aidmar
  122. std::string ip_class;
  123. std::vector<float> interval_pkt_rate;
  124. float max_pkt_rate;
  125. float min_pkt_rate;
  126. // Aidmar - to calculate Mahoney anomaly score
  127. long firstAppearAsSenderPktCount;
  128. long firstAppearAsReceiverPktCount;
  129. long sourceAnomalyScore;
  130. long destinationAnomalyScore;
  131. // Aidmar- To collect statstics over time interval
  132. std::vector<std::chrono::microseconds> pktsSentTimestamp;
  133. std::vector<std::chrono::microseconds> pktsReceivedTimestamp;
  134. bool operator==(const entry_ipStat &other) const {
  135. return pkts_received == other.pkts_received
  136. && pkts_sent == other.pkts_sent
  137. && kbytes_sent == other.kbytes_sent
  138. && kbytes_received == other.kbytes_received
  139. // Aidmar
  140. && interval_pkt_rate == other.interval_pkt_rate
  141. && max_pkt_rate == other.max_pkt_rate
  142. && min_pkt_rate == other.min_pkt_rate
  143. && ip_class == other.ip_class
  144. && firstAppearAsSenderPktCount == other.firstAppearAsSenderPktCount
  145. && firstAppearAsReceiverPktCount == other.firstAppearAsReceiverPktCount
  146. && sourceAnomalyScore == other.sourceAnomalyScore
  147. && destinationAnomalyScore == other.destinationAnomalyScore
  148. && pktsSentTimestamp == other.pktsSentTimestamp
  149. && pktsReceivedTimestamp == other.pktsReceivedTimestamp;
  150. }
  151. };
  152. // Aidmar
  153. /*
  154. * Struct used to represent interval statistics:
  155. * - Number of packets
  156. * - IP source entropy
  157. * - IP destination entropy
  158. */
  159. struct entry_intervalStat {
  160. int pkts_count;
  161. float ip_src_entropy;
  162. float ip_dst_entropy;
  163. bool operator==(const entry_intervalStat &other) const {
  164. return pkts_count == other.pkts_count
  165. && ip_src_entropy == other.ip_src_entropy
  166. && ip_dst_entropy == other.ip_dst_entropy;
  167. }
  168. };
  169. // Aidmar
  170. /*
  171. * Struct used to represent:
  172. * - Number of packets from A to B
  173. * - Number of packets from B to A
  174. */
  175. struct entry_convStat {
  176. long pkts_A_B;
  177. long pkts_B_A;
  178. std::vector<std::chrono::microseconds> pkts_A_B_timestamp;
  179. std::vector<std::chrono::microseconds> pkts_B_A_timestamp;
  180. std::vector<std::chrono::microseconds> pkts_delay;
  181. //std::chrono::duration<double, std::micro> median_delay;
  182. std::chrono::microseconds avg_delay;
  183. bool operator==(const entry_convStat &other) const {
  184. return pkts_A_B == other.pkts_A_B
  185. && pkts_A_B_timestamp == other.pkts_A_B_timestamp
  186. && pkts_B_A_timestamp == other.pkts_B_A_timestamp
  187. && pkts_delay == other.pkts_delay
  188. && avg_delay == other.avg_delay;
  189. }
  190. };
  191. /*
  192. * Struct used to represent:
  193. * - IP address (IPv4 or IPv6)
  194. - Traffic direction (out: outgoing connection, in: incoming connection)
  195. * - Port number
  196. */
  197. struct ipAddress_inOut_port {
  198. std::string ipAddress;
  199. std::string trafficDirection;
  200. int portNumber;
  201. bool operator==(const ipAddress_inOut_port &other) const {
  202. return ipAddress == other.ipAddress
  203. && trafficDirection == other.trafficDirection
  204. && portNumber == other.portNumber;
  205. }
  206. };
  207. /*
  208. * Definition of hash functions for structs used as key in unordered_map
  209. */
  210. namespace std {
  211. template<>
  212. struct hash<ipAddress_ttl> {
  213. std::size_t operator()(const ipAddress_ttl &k) const {
  214. using std::size_t;
  215. using std::hash;
  216. using std::string;
  217. return ((hash<string>()(k.ipAddress)
  218. ^ (hash<int>()(k.ttlValue) << 1)) >> 1);
  219. }
  220. };
  221. // Aidmar
  222. template<>
  223. struct hash<ipAddress_mss> {
  224. std::size_t operator()(const ipAddress_mss &k) const {
  225. using std::size_t;
  226. using std::hash;
  227. using std::string;
  228. return ((hash<string>()(k.ipAddress)
  229. ^ (hash<int>()(k.mssValue) << 1)) >> 1);
  230. }
  231. };
  232. // Aidmar
  233. template<>
  234. struct hash<ipAddress_win> {
  235. std::size_t operator()(const ipAddress_win &k) const {
  236. using std::size_t;
  237. using std::hash;
  238. using std::string;
  239. return ((hash<string>()(k.ipAddress)
  240. ^ (hash<int>()(k.winSize) << 1)) >> 1);
  241. }
  242. };
  243. // Aidmar: TO-DO:??
  244. template<>
  245. struct hash<conv> {
  246. std::size_t operator()(const conv &k) const {
  247. using std::size_t;
  248. using std::hash;
  249. using std::string;
  250. return ((hash<string>()(k.ipAddressA)
  251. ^ (hash<int>()(k.portA) << 1)) >> 1)
  252. ^ ((hash<string>()(k.ipAddressB)
  253. ^ (hash<int>()(k.portB) << 1)) >> 1);
  254. }
  255. };
  256. template<>
  257. struct hash<ipAddress_protocol> {
  258. std::size_t operator()(const ipAddress_protocol &k) const {
  259. using std::size_t;
  260. using std::hash;
  261. using std::string;
  262. return ((hash<string>()(k.ipAddress)
  263. ^ (hash<string>()(k.protocol) << 1)) >> 1);
  264. }
  265. };
  266. template<>
  267. struct hash<ipAddress_inOut_port> {
  268. std::size_t operator()(const ipAddress_inOut_port &k) const {
  269. using std::size_t;
  270. using std::hash;
  271. using std::string;
  272. return ((hash<string>()(k.ipAddress)
  273. ^ (hash<string>()(k.trafficDirection) << 1)) >> 1)
  274. ^ (hash<int>()(k.portNumber) << 1);
  275. }
  276. };
  277. }
  278. class statistics {
  279. public:
  280. /*
  281. * Constructor
  282. */
  283. statistics();
  284. /*
  285. * Methods
  286. */
  287. std::string getFormattedTimestamp(time_t seconds, suseconds_t microseconds) const;
  288. /*
  289. * Access methods for containers
  290. */
  291. void incrementPacketCount();
  292. // Adimar
  293. void incrementMSScount(std::string ipAddress, int mssValue);
  294. void incrementWinCount(std::string ipAddress, int winSize);
  295. void addIPEntropy(std::string filePath);
  296. void addFlowStat(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp);
  297. void calculateLastIntervalIPsEntropy(std::string filePath, std::chrono::microseconds intervalStartTimestamp);
  298. void calculateLastIntervalPacketRate(std::chrono::duration<int, std::micro> interval, std::chrono::microseconds intervalStartTimestamp);
  299. void addIntervalStat(std::chrono::duration<int, std::micro> interval, std::chrono::microseconds intervalStartTimestamp, std::chrono::microseconds lastPktTimestamp, int previousPacketCount);
  300. void incrementTTLcount(std::string ipAddress, int ttlValue);
  301. void incrementProtocolCount(std::string ipAddress, std::string protocol);
  302. void incrementPortCount(std::string ipAddressSender, int outgoingPort, std::string ipAddressReceiver,
  303. int incomingPort);
  304. int getProtocolCount(std::string ipAddress, std::string protocol);
  305. void setTimestampFirstPacket(Tins::Timestamp ts);
  306. void setTimestampLastPacket(Tins::Timestamp ts);
  307. // Aidmar
  308. Tins::Timestamp getTimestampFirstPacket();
  309. Tins::Timestamp getTimestampLastPacket();
  310. void assignMacAddress(std::string ipAddress, std::string macAddress);
  311. // Aidmar
  312. void addIpStat_packetSent(std::string filePath, std::string ipAddressSender, std::string ipAddressReceiver, long bytesSent, std::chrono::microseconds timestamp);
  313. int getPacketCount();
  314. void addMSS(std::string ipAddress, int MSSvalue);
  315. void writeToDatabase(std::string database_path);
  316. void addPacketSize(uint32_t packetSize);
  317. std::string getCaptureDurationTimestamp() const;
  318. float getCaptureDurationSeconds() const;
  319. float getAvgPacketSize() const;
  320. void printStats(std::string ipAddress);
  321. /*
  322. * IP Address-specific statistics
  323. */
  324. ip_stats getStatsForIP(std::string ipAddress);
  325. private:
  326. /*
  327. * Data fields
  328. */
  329. Tins::Timestamp timestamp_firstPacket;
  330. Tins::Timestamp timestamp_lastPacket;
  331. float sumPacketSize = 0;
  332. int packetCount = 0;
  333. /*
  334. * Data containers
  335. */
  336. // {IP Address, TTL value, count}
  337. std::unordered_map<ipAddress_ttl, int> ttl_distribution;
  338. // Aidmar
  339. // {IP Address, MSS value, count}
  340. std::unordered_map<ipAddress_mss, int> mss_distribution;
  341. // {IP Address, Win size, count}
  342. std::unordered_map<ipAddress_win, int> win_distribution;
  343. // {IP Address A, Port A, IP Address B, Port B, #packets_A_B, #packets_B_A}
  344. std::unordered_map<conv, entry_convStat> conv_statistics;
  345. std::unordered_map<std::string, entry_intervalStat> interval_statistics;
  346. // {IP Address, Protocol, count}
  347. std::unordered_map<ipAddress_protocol, int> protocol_distribution;
  348. // {IP Address, #received packets, #sent packets, Data received in kbytes, Data sent in kbytes}
  349. std::unordered_map<std::string, entry_ipStat> ip_statistics;
  350. // {IP Address, in_out, Port Number, count}
  351. std::unordered_map<ipAddress_inOut_port, int> ip_ports;
  352. // {IP Address, MAC Address}
  353. std::unordered_map<std::string, std::string> ip_mac_mapping;
  354. // {IP Address, avg MSS}
  355. std::unordered_map<std::string, int> ip_sumMss;
  356. };
  357. #endif //CPP_PCAPREADER_STATISTICS_H