statistics.h 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637
  1. /*
  2. * Class providing containers and access methods for statistical data collection.
  3. */
  4. #ifndef CPP_PCAPREADER_STATISTICS_H
  5. #define CPP_PCAPREADER_STATISTICS_H
  6. #include <vector>
  7. #include <unordered_map>
  8. #include <list>
  9. #include <tuple>
  10. #include <tins/timestamp.h>
  11. #include <tins/ip_address.h>
  12. #include "utilities.h"
  13. using namespace Tins;
  14. #define COMM_INTERVAL_THRESHOLD 10e6 // in microseconds; i.e. here 10s
  15. /*
  16. * Definition of structs used in unordered_map fields
  17. */
  18. /*
  19. * Struct used as data structure for method get_stats_for_ip, represents:
  20. * - Incoming bandwidth in KBits
  21. * - Outgoing bandwidth in KBits
  22. * - Number of incoming packets per second
  23. * - Number of outgoing packets per second
  24. * - Average size of sent packets in kbytes
  25. * - Average size of received packets in kybtes
  26. * - Average value of TCP option Maximum Segment Size (MSS)
  27. */
  28. struct ip_stats {
  29. float bandwidthKBitsIn;
  30. float bandwidthKBitsOut;
  31. float packetPerSecondIn;
  32. float packetPerSecondOut;
  33. float AvgPacketSizeSent;
  34. float AvgPacketSizeRecv;
  35. };
  36. /*
  37. * Struct used to represent a conversation by:
  38. * - IP address A
  39. * - Port A
  40. * - IP address B
  41. * - Port B
  42. */
  43. struct conv{
  44. std::string ipAddressA;
  45. int portA;
  46. std::string ipAddressB;
  47. int portB;
  48. bool operator==(const conv &other) const {
  49. return ipAddressA == other.ipAddressA
  50. && portA == other.portA
  51. &&ipAddressB == other.ipAddressB
  52. && portB == other.portB;
  53. }
  54. };
  55. /*
  56. * Struct used to represent a conversation by:
  57. * - IP address A
  58. * - Port A
  59. * - IP address B
  60. * - Port B
  61. * - Protocol
  62. */
  63. struct convWithProt{
  64. std::string ipAddressA;
  65. int portA;
  66. std::string ipAddressB;
  67. int portB;
  68. std::string protocol;
  69. bool operator==(const convWithProt &other) const {
  70. return ipAddressA == other.ipAddressA
  71. && portA == other.portA
  72. &&ipAddressB == other.ipAddressB
  73. && portB == other.portB
  74. && protocol == other.protocol;
  75. }
  76. };
  77. /*
  78. * Struct used to represent:
  79. * - IP address (IPv4 or IPv6)
  80. * - MSS value
  81. */
  82. struct ipAddress_mss {
  83. std::string ipAddress;
  84. int mssValue;
  85. bool operator==(const ipAddress_mss &other) const {
  86. return ipAddress == other.ipAddress
  87. && mssValue == other.mssValue;
  88. }
  89. };
  90. /*
  91. * Struct used to represent:
  92. * - IP address (IPv4 or IPv6)
  93. * - ToS value
  94. */
  95. struct ipAddress_tos {
  96. std::string ipAddress;
  97. int tosValue;
  98. bool operator==(const ipAddress_tos &other) const {
  99. return ipAddress == other.ipAddress
  100. && tosValue == other.tosValue;
  101. }
  102. };
  103. /*
  104. * Struct used to represent:
  105. * - IP address (IPv4 or IPv6)
  106. * - Window size
  107. */
  108. struct ipAddress_win {
  109. std::string ipAddress;
  110. int winSize;
  111. bool operator==(const ipAddress_win &other) const {
  112. return ipAddress == other.ipAddress
  113. && winSize == other.winSize;
  114. }
  115. };
  116. /*
  117. * Struct used to represent:
  118. * - IP address (IPv4 or IPv6)
  119. * - TTL value
  120. */
  121. struct ipAddress_ttl {
  122. std::string ipAddress;
  123. int ttlValue;
  124. bool operator==(const ipAddress_ttl &other) const {
  125. return ipAddress == other.ipAddress
  126. && ttlValue == other.ttlValue;
  127. }
  128. };
  129. /*
  130. * Struct used to represent:
  131. * - IP address (IPv4 or IPv6)
  132. * - Protocol (e.g. TCP, UDP, IPv4, IPv6)
  133. */
  134. struct ipAddress_protocol {
  135. std::string ipAddress;
  136. std::string protocol;
  137. bool operator==(const ipAddress_protocol &other) const {
  138. return ipAddress == other.ipAddress
  139. && protocol == other.protocol;
  140. }
  141. };
  142. /*
  143. * Struct used to represent:
  144. * - Number of received packets
  145. * - Number of sent packets
  146. * - Data received in kbytes
  147. * - Data sent in kbytes
  148. */
  149. struct entry_ipStat {
  150. long pkts_received;
  151. long pkts_sent;
  152. float kbytes_received;
  153. float kbytes_sent;
  154. std::string ip_class;
  155. int in_degree;
  156. int out_degree;
  157. int overall_degree;
  158. // Collects statstics over time interval
  159. std::vector<float> interval_pkt_rate;
  160. float max_interval_pkt_rate;
  161. float min_interval_pkt_rate;
  162. std::vector<std::chrono::microseconds> pkts_sent_timestamp;
  163. std::vector<std::chrono::microseconds> pkts_received_timestamp;
  164. bool operator==(const entry_ipStat &other) const {
  165. return pkts_received == other.pkts_received
  166. && pkts_sent == other.pkts_sent
  167. && kbytes_sent == other.kbytes_sent
  168. && kbytes_received == other.kbytes_received
  169. && interval_pkt_rate == other.interval_pkt_rate
  170. && max_interval_pkt_rate == other.max_interval_pkt_rate
  171. && min_interval_pkt_rate == other.min_interval_pkt_rate
  172. && ip_class == other.ip_class
  173. && pkts_sent_timestamp == other.pkts_sent_timestamp
  174. && pkts_received_timestamp == other.pkts_received_timestamp;
  175. }
  176. };
  177. /*
  178. * Struct used to represent interval statistics:
  179. * - # packets
  180. * - # bytes
  181. * - IP source entropy
  182. * - IP destination entropy
  183. * - IP source cumulative entropy
  184. * - IP destination cumulative entropy
  185. * - # packets that have payload
  186. * - # incorrect TCP checksum
  187. * - # correct TCP checksum
  188. * - # novel IPs
  189. * - # novel TTL
  190. * - # novel Window Size
  191. * - # novel ToS
  192. * - # novel MSS
  193. */
  194. struct entry_intervalStat {
  195. int pkts_count;
  196. float kbytes;
  197. float ip_src_entropy;
  198. float ip_dst_entropy;
  199. float ip_src_cum_entropy;
  200. float ip_dst_cum_entropy;
  201. int payload_count;
  202. int incorrect_tcp_checksum_count;
  203. int correct_tcp_checksum_count;
  204. int novel_ip_count;
  205. int novel_ttl_count;
  206. int novel_win_size_count;
  207. int novel_tos_count;
  208. int novel_mss_count;
  209. int novel_port_count;
  210. bool operator==(const entry_intervalStat &other) const {
  211. return pkts_count == other.pkts_count
  212. && kbytes == other.kbytes
  213. && ip_src_entropy == other.ip_src_entropy
  214. && ip_dst_entropy == other.ip_dst_entropy
  215. && ip_src_cum_entropy == other.ip_src_cum_entropy
  216. && ip_dst_cum_entropy == other.ip_dst_cum_entropy
  217. && payload_count == other.payload_count
  218. && incorrect_tcp_checksum_count == other.incorrect_tcp_checksum_count
  219. && novel_ip_count == other.novel_ip_count
  220. && novel_ttl_count == other.novel_ttl_count
  221. && novel_win_size_count == other.novel_win_size_count
  222. && novel_tos_count == other.novel_tos_count
  223. && novel_mss_count == other.novel_mss_count
  224. && novel_port_count == other.novel_port_count;
  225. }
  226. };
  227. /*
  228. * Struct used to represent converstaion statistics:
  229. * - # packets
  230. * - Average packet rate
  231. * - Timestamps of packets
  232. * - Inter-arrival time
  233. * - Average inter-arrival time
  234. */
  235. struct entry_convStat {
  236. long pkts_count;
  237. float avg_pkt_rate;
  238. std::vector<std::chrono::microseconds> pkts_timestamp;
  239. std::vector<std::chrono::microseconds> interarrival_time;
  240. std::chrono::microseconds avg_interarrival_time;
  241. bool operator==(const entry_convStat &other) const {
  242. return pkts_count == other.pkts_count
  243. && avg_pkt_rate == avg_pkt_rate
  244. && pkts_timestamp == other.pkts_timestamp
  245. && interarrival_time == other.interarrival_time
  246. && avg_interarrival_time == other.avg_interarrival_time;
  247. }
  248. };
  249. /*
  250. * Struct used to represent:
  251. * - IP address (IPv4 or IPv6)
  252. - Traffic direction (out: outgoing connection, in: incoming connection)
  253. * - Port number
  254. */
  255. struct ipAddress_inOut_port {
  256. std::string ipAddress;
  257. std::string trafficDirection;
  258. int portNumber;
  259. bool operator==(const ipAddress_inOut_port &other) const {
  260. return ipAddress == other.ipAddress
  261. && trafficDirection == other.trafficDirection
  262. && portNumber == other.portNumber;
  263. }
  264. };
  265. /*
  266. * Struct used to represent a communication interval (for two hosts):
  267. * - Timestamp of the first packet in the interval
  268. * - Timestamp of the last packet in the interval
  269. * - The count of packets within the interval
  270. */
  271. struct commInterval{
  272. std::chrono::microseconds start;
  273. std::chrono::microseconds end;
  274. long pkts_count;
  275. bool operator==(const commInterval &other) const {
  276. return start == other.start
  277. && end == other.end
  278. && pkts_count == other.pkts_count;
  279. }
  280. };
  281. /*
  282. * Struct used to represent converstaion statistics:
  283. * - commnication intervals
  284. * - # packets
  285. * - Average packet rate
  286. * - average # packets per communication interval
  287. * - Average time between intervals
  288. * - Average duration of a communication interval
  289. * - Overall communication duration
  290. * - Timestamps of packets
  291. * - Inter-arrival time
  292. * - Average inter-arrival time
  293. */
  294. struct entry_convStatExt {
  295. std::vector<commInterval> comm_intervals;
  296. long pkts_count;
  297. float avg_pkt_rate;
  298. double avg_int_pkts_count;
  299. double avg_time_between_ints;
  300. double avg_interval_time;
  301. double total_comm_duration;
  302. std::vector<std::chrono::microseconds> pkts_timestamp;
  303. std::vector<std::chrono::microseconds> interarrival_time;
  304. std::chrono::microseconds avg_interarrival_time;
  305. bool operator==(const entry_convStatExt &other) const {
  306. return comm_intervals == other.comm_intervals
  307. && pkts_count == other.pkts_count
  308. && avg_pkt_rate == avg_pkt_rate
  309. && avg_int_pkts_count == other.avg_int_pkts_count
  310. && avg_time_between_ints == other.avg_time_between_ints
  311. && avg_interval_time == other.avg_interval_time
  312. && total_comm_duration == other.total_comm_duration
  313. && pkts_timestamp == other.pkts_timestamp
  314. && interarrival_time == other.interarrival_time
  315. && avg_interarrival_time == other.avg_interarrival_time;
  316. }
  317. };
  318. /*
  319. * Definition of hash functions for structs used as key in unordered_map
  320. */
  321. namespace std {
  322. template<>
  323. struct hash<ipAddress_ttl> {
  324. std::size_t operator()(const ipAddress_ttl &k) const {
  325. using std::size_t;
  326. using std::hash;
  327. using std::string;
  328. return ((hash<string>()(k.ipAddress)
  329. ^ (hash<int>()(k.ttlValue) << 1)) >> 1);
  330. }
  331. };
  332. template<>
  333. struct hash<ipAddress_mss> {
  334. std::size_t operator()(const ipAddress_mss &k) const {
  335. using std::size_t;
  336. using std::hash;
  337. using std::string;
  338. return ((hash<string>()(k.ipAddress)
  339. ^ (hash<int>()(k.mssValue) << 1)) >> 1);
  340. }
  341. };
  342. template<>
  343. struct hash<ipAddress_tos> {
  344. std::size_t operator()(const ipAddress_tos &k) const {
  345. using std::size_t;
  346. using std::hash;
  347. using std::string;
  348. return ((hash<string>()(k.ipAddress)
  349. ^ (hash<int>()(k.tosValue) << 1)) >> 1);
  350. }
  351. };
  352. template<>
  353. struct hash<ipAddress_win> {
  354. std::size_t operator()(const ipAddress_win &k) const {
  355. using std::size_t;
  356. using std::hash;
  357. using std::string;
  358. return ((hash<string>()(k.ipAddress)
  359. ^ (hash<int>()(k.winSize) << 1)) >> 1);
  360. }
  361. };
  362. template<>
  363. struct hash<conv> {
  364. std::size_t operator()(const conv &k) const {
  365. using std::size_t;
  366. using std::hash;
  367. using std::string;
  368. return ((hash<string>()(k.ipAddressA)
  369. ^ (hash<int>()(k.portA) << 1)) >> 1)
  370. ^ ((hash<string>()(k.ipAddressB)
  371. ^ (hash<int>()(k.portB) << 1)) >> 1);
  372. }
  373. };
  374. template<>
  375. struct hash<convWithProt> {
  376. std::size_t operator()(const convWithProt &c) const {
  377. using std::size_t;
  378. using std::hash;
  379. using std::string;
  380. return ((hash<string>()(c.ipAddressA)
  381. ^ (hash<int>()(c.portA) << 1)) >> 1)
  382. ^ ((hash<string>()(c.ipAddressB)
  383. ^ (hash<int>()(c.portB) << 1)) >> 1)
  384. ^ (hash<string>()(c.protocol));
  385. }
  386. };
  387. template<>
  388. struct hash<ipAddress_protocol> {
  389. std::size_t operator()(const ipAddress_protocol &k) const {
  390. using std::size_t;
  391. using std::hash;
  392. using std::string;
  393. return ((hash<string>()(k.ipAddress)
  394. ^ (hash<string>()(k.protocol) << 1)) >> 1);
  395. }
  396. };
  397. template<>
  398. struct hash<ipAddress_inOut_port> {
  399. std::size_t operator()(const ipAddress_inOut_port &k) const {
  400. using std::size_t;
  401. using std::hash;
  402. using std::string;
  403. return ((hash<string>()(k.ipAddress)
  404. ^ (hash<string>()(k.trafficDirection) << 1)) >> 1)
  405. ^ (hash<int>()(k.portNumber) << 1);
  406. }
  407. };
  408. }
  409. class statistics {
  410. public:
  411. /*
  412. * Constructor
  413. */
  414. statistics();
  415. /*
  416. * Methods
  417. */
  418. std::string getFormattedTimestamp(time_t seconds, suseconds_t microseconds) const;
  419. /*
  420. * Access methods for containers
  421. */
  422. void incrementPacketCount();
  423. void calculateIPIntervalPacketRate(std::chrono::duration<int, std::micro> interval, std::chrono::microseconds intervalStartTimestamp);
  424. void incrementMSScount(std::string ipAddress, int mssValue);
  425. void incrementWinCount(std::string ipAddress, int winSize);
  426. void addConvStat(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport, std::chrono::microseconds timestamp);
  427. void addConvStatExt(std::string ipAddressSender,int sport,std::string ipAddressReceiver,int dport,std::string protocol, std::chrono::microseconds timestamp);
  428. void createCommIntervalStats();
  429. std::vector<float> calculateIPsCumEntropy();
  430. std::vector<float> calculateLastIntervalIPsEntropy(std::chrono::microseconds intervalStartTimestamp);
  431. void addIntervalStat(std::chrono::duration<int, std::micro> interval, std::chrono::microseconds intervalStartTimestamp, std::chrono::microseconds lastPktTimestamp);
  432. void checkPayload(const PDU *pdu_l4);
  433. void checkTCPChecksum(std::string ipAddressSender, std::string ipAddressReceiver, TCP tcpPkt);
  434. void checkToS(uint8_t ToS);
  435. void incrementToScount(std::string ipAddress, int tosValue);
  436. void incrementTTLcount(std::string ipAddress, int ttlValue);
  437. void incrementProtocolCount(std::string ipAddress, std::string protocol);
  438. void incrementPortCount(std::string ipAddressSender, int outgoingPort, std::string ipAddressReceiver,
  439. int incomingPort);
  440. int getProtocolCount(std::string ipAddress, std::string protocol);
  441. void setTimestampFirstPacket(Tins::Timestamp ts);
  442. void setTimestampLastPacket(Tins::Timestamp ts);
  443. Tins::Timestamp getTimestampFirstPacket();
  444. Tins::Timestamp getTimestampLastPacket();
  445. void assignMacAddress(std::string ipAddress, std::string macAddress);
  446. void addIpStat_packetSent(std::string filePath, std::string ipAddressSender, std::string ipAddressReceiver, long bytesSent, std::chrono::microseconds timestamp);
  447. int getPacketCount();
  448. int getSumPacketSize();
  449. void addMSS(std::string ipAddress, int MSSvalue);
  450. void writeToDatabase(std::string database_path);
  451. void addPacketSize(uint32_t packetSize);
  452. std::string getCaptureDurationTimestamp() const;
  453. float getCaptureDurationSeconds() const;
  454. float getAvgPacketSize() const;
  455. void printStats(std::string ipAddress);
  456. bool getDoExtraTests();
  457. void setDoExtraTests(bool var);
  458. /*
  459. * IP Address-specific statistics
  460. */
  461. ip_stats getStatsForIP(std::string ipAddress);
  462. private:
  463. /*
  464. * Data fields
  465. */
  466. Tins::Timestamp timestamp_firstPacket;
  467. Tins::Timestamp timestamp_lastPacket;
  468. float sumPacketSize = 0;
  469. int packetCount = 0;
  470. /* Extra tests includes:
  471. * - calculate IPs entropies for intervals
  472. * - calculate IPs cumulative entropies interval-wise
  473. * - check payload availability
  474. * - chech TCP checksum correctness
  475. */
  476. bool doExtraTests = false;
  477. int payloadCount = 0;
  478. int incorrectTCPChecksumCount = 0;
  479. int correctTCPChecksumCount = 0;
  480. // Variables that are used for interval-wise statistics
  481. int intervalPayloadCount = 0;
  482. int intervalIncorrectTCPChecksumCount = 0;
  483. int intervalCorrectTCPChecksumCount = 0;
  484. int intervalCumPktCount = 0;
  485. float intervalCumSumPktSize = 0;
  486. int intervalCumNovelIPCount = 0;
  487. int intervalCumNovelTTLCount = 0;
  488. int intervalCumNovelWinSizeCount = 0;
  489. int intervalCumNovelToSCount = 0;
  490. int intervalCumNovelMSSCount = 0;
  491. int intervalCumNovelPortCount = 0;
  492. /*
  493. * Data containers
  494. */
  495. // {IP Address, TTL value, count}
  496. std::unordered_map<ipAddress_ttl, int> ttl_distribution;
  497. // {IP Address, MSS value, count}
  498. std::unordered_map<ipAddress_mss, int> mss_distribution;
  499. // {IP Address, Win size, count}
  500. std::unordered_map<ipAddress_win, int> win_distribution;
  501. // {IP Address, ToS value, count}
  502. std::unordered_map<ipAddress_tos, int> tos_distribution;
  503. // {IP Address A, Port A, IP Address B, Port B, #packets, packets timestamps, inter-arrival times,
  504. // average of inter-arrival times}
  505. std::unordered_map<conv, entry_convStat> conv_statistics;
  506. // {IP Address A, Port A, IP Address B, Port B, comm_intervals, #packets, avg. pkt rate, avg. #packets per interval,
  507. // avg. time between intervals, avg. interval time, duration, packets timestamps, inter-arrivtal times, average of inter-arrival times}
  508. // Also stores conversation with only one exchanged message. In this case avgPktRate, minDelay, maxDelay and avgDelay are -1
  509. std::unordered_map<convWithProt, entry_convStatExt> conv_statistics_extended;
  510. // {Last timestamp in the interval, #packets, #bytes, source IP entropy, destination IP entropy,
  511. // source IP cumulative entropy, destination IP cumulative entropy, #payload, #incorrect TCP checksum,
  512. // #correct TCP checksum, #novel IP, #novel TTL, #novel Window Size, #novel ToS,#novel MSS}
  513. std::unordered_map<std::string, entry_intervalStat> interval_statistics;
  514. // {TTL value, count}
  515. std::unordered_map<int, int> ttl_values;
  516. // {Win size, count}
  517. std::unordered_map<int, int> win_values;
  518. // {ToS, count}
  519. std::unordered_map<int, int> tos_values;
  520. // {MSS, count}
  521. std::unordered_map<int, int> mss_values;
  522. // {Port, count}
  523. std::unordered_map<int, int> port_values;
  524. // {IP Address, Protocol, count}
  525. std::unordered_map<ipAddress_protocol, int> protocol_distribution;
  526. //{IP Address, contacted IP Addresses}
  527. std::unordered_map<std::string, std::vector<std::string>> contacted_ips;
  528. // {IP Address, #received packets, #sent packets, Data received in kbytes, Data sent in kbytes}
  529. std::unordered_map<std::string, entry_ipStat> ip_statistics;
  530. // {IP Address, in_out, Port Number, count}
  531. std::unordered_map<ipAddress_inOut_port, int> ip_ports;
  532. // {IP Address, MAC Address}
  533. std::unordered_map<std::string, std::string> ip_mac_mapping;
  534. };
  535. #endif //CPP_PCAPREADER_STATISTICS_H