statistics.h 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588
  1. /*
  2. * Class providing containers and access methods for statistical data collection.
  3. */
  4. #ifndef CPP_PCAPREADER_STATISTICS_H
  5. #define CPP_PCAPREADER_STATISTICS_H
  6. #include <vector>
  7. #include <unordered_map>
  8. #include <list>
  9. #include <tuple>
  10. #include <tins/timestamp.h>
  11. #include <tins/ip_address.h>
  12. #include "utilities.h"
  13. using namespace Tins;
  14. /*
  15. * Definition of structs used in unordered_map fields
  16. */
  17. /*
  18. * Struct used as data structure for method get_stats_for_ip, represents:
  19. * - Incoming bandwidth in KBits
  20. * - Outgoing bandwidth in KBits
  21. * - Number of incoming packets per second
  22. * - Number of outgoing packets per second
  23. * - Average size of sent packets in kbytes
  24. * - Average size of received packets in kybtes
  25. * - Average value of TCP option Maximum Segment Size (MSS)
  26. */
  27. struct ip_stats {
  28. float bandwidthKBitsIn;
  29. float bandwidthKBitsOut;
  30. float packetPerSecondIn;
  31. float packetPerSecondOut;
  32. float AvgPacketSizeSent;
  33. float AvgPacketSizeRecv;
  34. };
  35. /*
  36. * Struct used to represent a conversation by:
  37. * - IP address A
  38. * - Port A
  39. * - IP address B
  40. * - Port B
  41. */
  42. struct conv{
  43. std::string ipAddressA;
  44. int portA;
  45. std::string ipAddressB;
  46. int portB;
  47. bool operator==(const conv &other) const {
  48. return ipAddressA == other.ipAddressA
  49. && portA == other.portA
  50. &&ipAddressB == other.ipAddressB
  51. && portB == other.portB;
  52. }
  53. };
  54. /*
  55. * Struct used to represent:
  56. * - IP address (IPv4 or IPv6)
  57. * - MSS value
  58. */
  59. struct ipAddress_mss {
  60. std::string ipAddress;
  61. int mssValue;
  62. bool operator==(const ipAddress_mss &other) const {
  63. return ipAddress == other.ipAddress
  64. && mssValue == other.mssValue;
  65. }
  66. };
  67. /*
  68. * Struct used to represent:
  69. * - IP address (IPv4 or IPv6)
  70. * - ToS value
  71. */
  72. struct ipAddress_tos {
  73. std::string ipAddress;
  74. int tosValue;
  75. bool operator==(const ipAddress_tos &other) const {
  76. return ipAddress == other.ipAddress
  77. && tosValue == other.tosValue;
  78. }
  79. };
  80. /*
  81. * Struct used to represent:
  82. * - IP address (IPv4 or IPv6)
  83. * - Window size
  84. */
  85. struct ipAddress_win {
  86. std::string ipAddress;
  87. int winSize;
  88. bool operator==(const ipAddress_win &other) const {
  89. return ipAddress == other.ipAddress
  90. && winSize == other.winSize;
  91. }
  92. };
  93. /*
  94. * Struct used to represent:
  95. * - IP address (IPv4 or IPv6)
  96. * - TTL value
  97. */
  98. struct ipAddress_ttl {
  99. std::string ipAddress;
  100. int ttlValue;
  101. bool operator==(const ipAddress_ttl &other) const {
  102. return ipAddress == other.ipAddress
  103. && ttlValue == other.ttlValue;
  104. }
  105. };
  106. /*
  107. * Struct used to represent:
  108. * - IP address (IPv4 or IPv6)
  109. * - Protocol (e.g. TCP, UDP, IPv4, IPv6)
  110. */
  111. struct ipAddress_protocol {
  112. std::string ipAddress;
  113. std::string protocol;
  114. bool operator==(const ipAddress_protocol &other) const {
  115. return ipAddress == other.ipAddress
  116. && protocol == other.protocol;
  117. }
  118. };
  119. /*
  120. * Struct used to represent:
  121. * - Number of received packets
  122. * - Number of sent packets
  123. * - Data received in kbytes
  124. * - Data sent in kbytes
  125. */
  126. struct entry_ipStat {
  127. long pkts_received;
  128. long pkts_sent;
  129. float kbytes_received;
  130. float kbytes_sent;
  131. std::string ip_class;
  132. // Collects statstics over time interval
  133. std::vector<float> interval_pkt_rate;
  134. float max_interval_pkt_rate;
  135. float min_interval_pkt_rate;
  136. std::vector<std::chrono::microseconds> pkts_sent_timestamp;
  137. std::vector<std::chrono::microseconds> pkts_received_timestamp;
  138. bool operator==(const entry_ipStat &other) const {
  139. return pkts_received == other.pkts_received
  140. && pkts_sent == other.pkts_sent
  141. && kbytes_sent == other.kbytes_sent
  142. && kbytes_received == other.kbytes_received
  143. && interval_pkt_rate == other.interval_pkt_rate
  144. && max_interval_pkt_rate == other.max_interval_pkt_rate
  145. && min_interval_pkt_rate == other.min_interval_pkt_rate
  146. && ip_class == other.ip_class
  147. && pkts_sent_timestamp == other.pkts_sent_timestamp
  148. && pkts_received_timestamp == other.pkts_received_timestamp;
  149. }
  150. };
  151. /*
  152. * Struct used to represent:
  153. * - Number of transmitted packets
  154. * - Number of transmitted bytes
  155. */
  156. struct entry_portStat {
  157. int count;
  158. float byteCount;
  159. };
  160. /*
  161. * Struct used to represent:
  162. * - Number of times the protocol is seen
  163. * - Amount of bytes transmitted with this protocol
  164. */
  165. struct entry_protocolStat {
  166. int count;
  167. float byteCount;
  168. };
  169. /*
  170. * Struct used to represent interval statistics:
  171. * - # packets
  172. * - # bytes
  173. * - IP source entropy
  174. * - IP destination entropy
  175. * - IP source cumulative entropy
  176. * - IP destination cumulative entropy
  177. * - # packets that have payload
  178. * - # incorrect TCP checksum
  179. * - # correct TCP checksum
  180. * - # novel IPs
  181. * - # novel TTL
  182. * - # novel Window Size
  183. * - # novel ToS
  184. * - # novel MSS
  185. */
  186. struct entry_intervalStat {
  187. int pkts_count;
  188. float kbytes;
  189. float ip_src_entropy;
  190. float ip_dst_entropy;
  191. float ip_src_cum_entropy;
  192. float ip_dst_cum_entropy;
  193. int payload_count;
  194. int incorrect_tcp_checksum_count;
  195. int correct_tcp_checksum_count;
  196. int novel_ip_count;
  197. int novel_ttl_count;
  198. int novel_win_size_count;
  199. int novel_tos_count;
  200. int novel_mss_count;
  201. int novel_port_count;
  202. bool operator==(const entry_intervalStat &other) const {
  203. return pkts_count == other.pkts_count
  204. && kbytes == other.kbytes
  205. && ip_src_entropy == other.ip_src_entropy
  206. && ip_dst_entropy == other.ip_dst_entropy
  207. && ip_src_cum_entropy == other.ip_src_cum_entropy
  208. && ip_dst_cum_entropy == other.ip_dst_cum_entropy
  209. && payload_count == other.payload_count
  210. && incorrect_tcp_checksum_count == other.incorrect_tcp_checksum_count
  211. && novel_ip_count == other.novel_ip_count
  212. && novel_ttl_count == other.novel_ttl_count
  213. && novel_win_size_count == other.novel_win_size_count
  214. && novel_tos_count == other.novel_tos_count
  215. && novel_mss_count == other.novel_mss_count
  216. && novel_port_count == other.novel_port_count;
  217. }
  218. };
  219. /*
  220. * Struct used to represent converstaion statistics:
  221. * - # packets
  222. * - Average packet rate
  223. * - Timestamps of packets
  224. * - Inter-arrival time
  225. * - Average inter-arrival time
  226. */
  227. struct entry_convStat {
  228. long pkts_count;
  229. float avg_pkt_rate;
  230. std::vector<std::chrono::microseconds> pkts_timestamp;
  231. std::vector<std::chrono::microseconds> interarrival_time;
  232. std::chrono::microseconds avg_interarrival_time;
  233. bool operator==(const entry_convStat &other) const {
  234. return pkts_count == other.pkts_count
  235. && avg_pkt_rate == avg_pkt_rate
  236. && pkts_timestamp == other.pkts_timestamp
  237. && interarrival_time == other.interarrival_time
  238. && avg_interarrival_time == other.avg_interarrival_time;
  239. }
  240. };
  241. /*
  242. * Struct used to represent:
  243. * - IP address (IPv4 or IPv6)
  244. - Traffic direction (out: outgoing connection, in: incoming connection)
  245. * - Port number
  246. */
  247. struct ipAddress_inOut_port {
  248. std::string ipAddress;
  249. std::string trafficDirection;
  250. int portNumber;
  251. std::string protocol;
  252. bool operator==(const ipAddress_inOut_port &other) const {
  253. return ipAddress == other.ipAddress
  254. && trafficDirection == other.trafficDirection
  255. && portNumber == other.portNumber
  256. && protocol == other.protocol;
  257. }
  258. };
  259. /*
  260. * Struct used to represent:
  261. * - Source MAC address
  262. * - Destination MAC address
  263. * - Payload type number
  264. */
  265. struct unrecognized_PDU {
  266. std::string srcMacAddress;
  267. std::string dstMacAddress;
  268. uint32_t typeNumber;
  269. bool operator==(const unrecognized_PDU &other) const {
  270. return srcMacAddress == other.srcMacAddress
  271. && dstMacAddress == other.dstMacAddress
  272. && typeNumber == other.typeNumber;
  273. }
  274. };
  275. /*
  276. * Definition of hash functions for structs used as key in unordered_map
  277. */
  278. namespace std {
  279. template<>
  280. struct hash<ipAddress_ttl> {
  281. std::size_t operator()(const ipAddress_ttl &k) const {
  282. using std::size_t;
  283. using std::hash;
  284. using std::string;
  285. return ((hash<string>()(k.ipAddress)
  286. ^ (hash<int>()(k.ttlValue) << 1)) >> 1);
  287. }
  288. };
  289. template<>
  290. struct hash<ipAddress_mss> {
  291. std::size_t operator()(const ipAddress_mss &k) const {
  292. using std::size_t;
  293. using std::hash;
  294. using std::string;
  295. return ((hash<string>()(k.ipAddress)
  296. ^ (hash<int>()(k.mssValue) << 1)) >> 1);
  297. }
  298. };
  299. template<>
  300. struct hash<ipAddress_tos> {
  301. std::size_t operator()(const ipAddress_tos &k) const {
  302. using std::size_t;
  303. using std::hash;
  304. using std::string;
  305. return ((hash<string>()(k.ipAddress)
  306. ^ (hash<int>()(k.tosValue) << 1)) >> 1);
  307. }
  308. };
  309. template<>
  310. struct hash<ipAddress_win> {
  311. std::size_t operator()(const ipAddress_win &k) const {
  312. using std::size_t;
  313. using std::hash;
  314. using std::string;
  315. return ((hash<string>()(k.ipAddress)
  316. ^ (hash<int>()(k.winSize) << 1)) >> 1);
  317. }
  318. };
  319. template<>
  320. struct hash<conv> {
  321. std::size_t operator()(const conv &k) const {
  322. using std::size_t;
  323. using std::hash;
  324. using std::string;
  325. return ((hash<string>()(k.ipAddressA)
  326. ^ (hash<int>()(k.portA) << 1)) >> 1)
  327. ^ ((hash<string>()(k.ipAddressB)
  328. ^ (hash<int>()(k.portB) << 1)) >> 1);
  329. }
  330. };
  331. template<>
  332. struct hash<ipAddress_protocol> {
  333. std::size_t operator()(const ipAddress_protocol &k) const {
  334. using std::size_t;
  335. using std::hash;
  336. using std::string;
  337. return ((hash<string>()(k.ipAddress)
  338. ^ (hash<string>()(k.protocol) << 1)) >> 1);
  339. }
  340. };
  341. template<>
  342. struct hash<ipAddress_inOut_port> {
  343. std::size_t operator()(const ipAddress_inOut_port &k) const {
  344. using std::size_t;
  345. using std::hash;
  346. using std::string;
  347. return ((hash<string>()(k.ipAddress)
  348. ^ (hash<string>()(k.trafficDirection) << 1)) >> 1)
  349. ^ (hash<int>()(k.portNumber) << 1);
  350. }
  351. };
  352. template<>
  353. struct hash<unrecognized_PDU> {
  354. std::size_t operator()(const unrecognized_PDU &k) const {
  355. using std::size_t;
  356. using std::hash;
  357. using std::string;
  358. return ((hash<string>()(k.srcMacAddress)
  359. ^ (hash<string>()(k.dstMacAddress) << 1)) >> 1)
  360. ^ (hash<uint32_t>()(k.typeNumber) << 1);
  361. }
  362. };
  363. }
  364. class statistics {
  365. public:
  366. /*
  367. * Constructor
  368. */
  369. statistics();
  370. /*
  371. * Methods
  372. */
  373. std::string getFormattedTimestamp(time_t seconds, suseconds_t microseconds) const;
  374. /*
  375. * Access methods for containers
  376. */
  377. void incrementPacketCount();
  378. void calculateIPIntervalPacketRate(std::chrono::duration<int, std::micro> interval, std::chrono::microseconds intervalStartTimestamp);
  379. void incrementMSScount(std::string ipAddress, int mssValue);
  380. void incrementWinCount(std::string ipAddress, int winSize);
  381. void addConvStat(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp);
  382. std::vector<float> calculateIPsCumEntropy();
  383. std::vector<float> calculateLastIntervalIPsEntropy(std::chrono::microseconds intervalStartTimestamp);
  384. void addIntervalStat(std::chrono::duration<int, std::micro> interval, std::chrono::microseconds intervalStartTimestamp, std::chrono::microseconds lastPktTimestamp);
  385. void checkPayload(const PDU *pdu_l4);
  386. void checkTCPChecksum(std::string ipAddressSender, std::string ipAddressReceiver, TCP tcpPkt);
  387. void checkToS(uint8_t ToS);
  388. void incrementToScount(std::string ipAddress, int tosValue);
  389. void incrementTTLcount(std::string ipAddress, int ttlValue);
  390. void incrementProtocolCount(std::string ipAddress, std::string protocol);
  391. void increaseProtocolByteCount(std::string ipAddress, std::string protocol, long bytesSent);
  392. void incrementUnrecognizedPDUCount(std::string srcMac, std::string dstMac, uint32_t typeNumber);
  393. void incrementPortCount(std::string ipAddressSender, int outgoingPort, std::string ipAddressReceiver,
  394. int incomingPort, std::string protocol);
  395. void increasePortByteCount(std::string ipAddressSender, int outgoingPort, std::string ipAddressReceiver,
  396. int incomingPort, long bytesSent, std::string protocol);
  397. int getProtocolCount(std::string ipAddress, std::string protocol);
  398. float getProtocolByteCount(std::string ipAddress, std::string protocol);
  399. void setTimestampFirstPacket(Tins::Timestamp ts);
  400. void setTimestampLastPacket(Tins::Timestamp ts);
  401. Tins::Timestamp getTimestampFirstPacket();
  402. Tins::Timestamp getTimestampLastPacket();
  403. void assignMacAddress(std::string ipAddress, std::string macAddress);
  404. void addIpStat_packetSent(std::string filePath, std::string ipAddressSender, std::string ipAddressReceiver, long bytesSent, std::chrono::microseconds timestamp);
  405. int getPacketCount();
  406. int getSumPacketSize();
  407. void addMSS(std::string ipAddress, int MSSvalue);
  408. void writeToDatabase(std::string database_path);
  409. void addPacketSize(uint32_t packetSize);
  410. std::string getCaptureDurationTimestamp() const;
  411. float getCaptureDurationSeconds() const;
  412. float getAvgPacketSize() const;
  413. void printStats(std::string ipAddress);
  414. bool getDoExtraTests();
  415. void setDoExtraTests(bool var);
  416. /*
  417. * IP Address-specific statistics
  418. */
  419. ip_stats getStatsForIP(std::string ipAddress);
  420. private:
  421. /*
  422. * Data fields
  423. */
  424. Tins::Timestamp timestamp_firstPacket;
  425. Tins::Timestamp timestamp_lastPacket;
  426. float sumPacketSize = 0;
  427. int packetCount = 0;
  428. /* Extra tests includes:
  429. * - calculate IPs entropies for intervals
  430. * - calculate IPs cumulative entropies interval-wise
  431. * - check payload availability
  432. * - chech TCP checksum correctness
  433. */
  434. bool doExtraTests = false;
  435. int payloadCount = 0;
  436. int incorrectTCPChecksumCount = 0;
  437. int correctTCPChecksumCount = 0;
  438. // Variables that are used for interval-wise statistics
  439. int intervalPayloadCount = 0;
  440. int intervalIncorrectTCPChecksumCount = 0;
  441. int intervalCorrectTCPChecksumCount = 0;
  442. int intervalCumPktCount = 0;
  443. float intervalCumSumPktSize = 0;
  444. int intervalCumNovelIPCount = 0;
  445. int intervalCumNovelTTLCount = 0;
  446. int intervalCumNovelWinSizeCount = 0;
  447. int intervalCumNovelToSCount = 0;
  448. int intervalCumNovelMSSCount = 0;
  449. int intervalCumNovelPortCount = 0;
  450. /*
  451. * Data containers
  452. */
  453. // {IP Address, TTL value, count}
  454. std::unordered_map<ipAddress_ttl, int> ttl_distribution;
  455. // {IP Address, MSS value, count}
  456. std::unordered_map<ipAddress_mss, int> mss_distribution;
  457. // {IP Address, Win size, count}
  458. std::unordered_map<ipAddress_win, int> win_distribution;
  459. // {IP Address, ToS value, count}
  460. std::unordered_map<ipAddress_tos, int> tos_distribution;
  461. // {IP Address A, Port A, IP Address B, Port B, #packets, packets timestamps, inter-arrival times,
  462. // average of inter-arrival times}
  463. std::unordered_map<conv, entry_convStat> conv_statistics;
  464. // {Last timestamp in the interval, #packets, #bytes, source IP entropy, destination IP entropy,
  465. // source IP cumulative entropy, destination IP cumulative entropy, #payload, #incorrect TCP checksum,
  466. // #correct TCP checksum, #novel IP, #novel TTL, #novel Window Size, #novel ToS,#novel MSS}
  467. std::unordered_map<std::string, entry_intervalStat> interval_statistics;
  468. // {TTL value, count}
  469. std::unordered_map<int, int> ttl_values;
  470. // {Win size, count}
  471. std::unordered_map<int, int> win_values;
  472. // {ToS, count}
  473. std::unordered_map<int, int> tos_values;
  474. // {MSS, count}
  475. std::unordered_map<int, int> mss_values;
  476. // {Port, count}
  477. std::unordered_map<int, int> port_values;
  478. // {IP Address, Protocol, #count, #Data transmitted in bytes}
  479. std::unordered_map<ipAddress_protocol, entry_protocolStat> protocol_distribution;
  480. // {IP Address, #received packets, #sent packets, Data received in kbytes, Data sent in kbytes}
  481. std::unordered_map<std::string, entry_ipStat> ip_statistics;
  482. // {IP Address, in_out, Port Number, #count, #Data transmitted in bytes}
  483. std::unordered_map<ipAddress_inOut_port, entry_portStat> ip_ports;
  484. // {IP Address, MAC Address}
  485. std::unordered_map<std::string, std::string> ip_mac_mapping;
  486. // {Source MAC, Destination MAC, typeNumber, #count}
  487. std::unordered_map<unrecognized_PDU, int> unrecognized_PDUs;
  488. };
  489. #endif //CPP_PCAPREADER_STATISTICS_H