123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428 |
- from operator import itemgetter
- from math import sqrt, ceil, log
- import os
- import time
- import ID2TLib.libpcapreader as pr
- import matplotlib
- import numpy
- matplotlib.use('Agg')
- import matplotlib.pyplot as plt
- from ID2TLib.PcapFile import PcapFile
- from ID2TLib.StatsDatabase import StatsDatabase
- from ID2TLib.IPv4 import IPAddress
- class Statistics:
- def __init__(self, pcap_file: PcapFile):
- """
- Creates a new Statistics object.
- :param pcap_file: A reference to the PcapFile object
- """
- # Fields
- self.pcap_filepath = pcap_file.pcap_file_path
- self.pcap_proc = None
- self.do_extra_tests = False
- # Create folder for statistics database if required
- self.path_db = pcap_file.get_db_path()
- path_dir = os.path.dirname(self.path_db)
- if not os.path.isdir(path_dir):
- os.makedirs(path_dir)
- # Class instances
- self.stats_db = StatsDatabase(self.path_db)
- def load_pcap_statistics(self, flag_write_file: bool, flag_recalculate_stats: bool, flag_print_statistics: bool):
- """
- Loads the PCAP statistics for the file specified by pcap_filepath. If the database is not existing yet, the
- statistics are calculated by the PCAP file processor and saved into the newly created database. Otherwise the
- statistics are gathered directly from the existing database.
- :param flag_write_file: Indicates whether the statistics should be written addiotionally into a text file (True)
- or not (False)
- :param flag_recalculate_stats: Indicates whether eventually existing statistics should be recalculated
- :param flag_print_statistics: Indicates whether the gathered basic statistics should be printed to the terminal
- """
- # Load pcap and get loading time
- time_start = time.clock()
- # Inform user about recalculation of statistics and its reason
- if flag_recalculate_stats:
- print("Flag -r/--recalculate found. Recalculating statistics.")
- # Recalculate statistics if database does not exist OR param -r/--recalculate is provided
- if (not self.stats_db.get_db_exists()) or flag_recalculate_stats:
- self.pcap_proc = pr.pcap_processor(self.pcap_filepath, str(self.do_extra_tests))
- self.pcap_proc.collect_statistics()
- self.pcap_proc.write_to_database(self.path_db)
- outstring_datasource = "by PCAP file processor."
- else:
- outstring_datasource = "from statistics database."
- # Load statistics from database
- self.file_info = self.stats_db.get_file_info()
- time_end = time.clock()
- print("Loaded file statistics in " + str(time_end - time_start)[:4] + " sec " + outstring_datasource)
- # Write statistics if param -e/--export provided
- if flag_write_file:
- self.write_statistics_to_file()
- # Print statistics if param -s/--statistics provided
- if flag_print_statistics:
- self.print_statistics()
- def get_file_information(self):
- """
- Returns a list of tuples, each containing a information of the file.
- :return: a list of tuples, each consisting of (description, value, unit), where unit is optional.
- """
- return [("Pcap file", self.pcap_filepath),
- ("Packets", self.get_packet_count(), "packets"),
- ("Capture length", self.get_capture_duration(), "seconds"),
- ("Capture start", self.get_pcap_timestamp_start()),
- ("Capture end", self.get_pcap_timestamp_end())]
- def get_general_file_statistics(self):
- """
- Returns a list of tuples, each containing a file statistic.
- :return: a list of tuples, each consisting of (description, value, unit).
- """
- return [("Avg. packet rate", self.file_info['avgPacketRate'], "packets/sec"),
- ("Avg. packet size", self.file_info['avgPacketSize'], "kbytes"),
- ("Avg. packets sent", self.file_info['avgPacketsSentPerHost'], "packets"),
- ("Avg. bandwidth in", self.file_info['avgBandwidthIn'], "kbit/s"),
- ("Avg. bandwidth out", self.file_info['avgBandwidthOut'], "kbit/s")]
- @staticmethod
- def write_list(desc_val_unit_list, func, line_ending="\n"):
- """
- Takes a list of tuples (statistic name, statistic value, unit) as input, generates a string of these three values
- and applies the function func on this string.
- Before generating the string, it identifies text containing a float number, casts the string to a
- float and rounds the value to two decimal digits.
- :param desc_val_unit_list: The list of tuples consisting of (description, value, unit)
- :param func: The function to be applied to each generated string
- :param line_ending: The formatting string to be applied at the end of each string
- """
- for entry in desc_val_unit_list:
- # Convert text containing float into float
- (description, value) = entry[0:2]
- if isinstance(value, str) and "." in value:
- try:
- value = float(value)
- except ValueError:
- pass # do nothing -> value was not a float
- # round float
- if isinstance(value, float):
- value = round(value, 4)
- # write into file
- if len(entry) == 3:
- unit = entry[2]
- func(description + ":\t" + str(value) + " " + unit + line_ending)
- else:
- func(description + ":\t" + str(value) + line_ending)
- def print_statistics(self):
- """
- Prints the basic file statistics to the terminal.
- """
- print("\nPCAP FILE INFORMATION ------------------------------")
- Statistics.write_list(self.get_file_information(), print, "")
- print("\nGENERAL FILE STATISTICS ----------------------------")
- Statistics.write_list(self.get_general_file_statistics(), print, "")
- print("\n")
- def calculate_entropy(self, frequency:list, normalized:bool = False):
- """
- Calculates entropy and normalized entropy of list of elements that have specific frequency
- :param frequency: The frequency of the elements.
- :param normalized: Calculate normalized entropy
- :return: entropy or (entropy, normalized entropy)
- """
- entropy, normalizedEnt, n = 0, 0, 0
- sumFreq = sum(frequency)
- for i, x in enumerate(frequency):
- p_x = float(frequency[i] / sumFreq)
- if p_x > 0:
- n += 1
- entropy += - p_x * log(p_x, 2)
- if normalized:
- if log(n)>0:
- normalizedEnt = entropy/log(n, 2)
- return entropy, normalizedEnt
- else:
- return entropy
- def calculate_complement_packet_rates(self, pps):
- """
- Calculates the complement packet rates of the background traffic packet rates for each interval.
- Then normalize it to maximum boundary, which is the input parameter pps
- :return: normalized packet rates for each time interval.
- """
- result = self.process_db_query(
- "SELECT lastPktTimestamp,pktsCount FROM interval_statistics ORDER BY lastPktTimestamp")
- # print(result)
- bg_interval_pps = []
- complement_interval_pps = []
- intervalsSum = 0
- if result:
- # Get the interval in seconds
- for i, row in enumerate(result):
- if i < len(result) - 1:
- intervalsSum += ceil((int(result[i + 1][0]) * 10 ** -6) - (int(row[0]) * 10 ** -6))
- interval = intervalsSum / (len(result) - 1)
- # Convert timestamp from micro to seconds, convert packet rate "per interval" to "per second"
- for row in result:
- bg_interval_pps.append((int(row[0]) * 10 ** -6, int(row[1] / interval)))
- # Find max PPS
- maxPPS = max(bg_interval_pps, key=itemgetter(1))[1]
- for row in bg_interval_pps:
- complement_interval_pps.append((row[0], int(pps * (maxPPS - row[1]) / maxPPS)))
- return complement_interval_pps
- def get_tests_statistics(self):
- """
- Writes the calculated basic defects tests statistics into a file.
- """
- # self.stats_db._process_user_defined_query output is list of tuples, thus, we ned [0][0] to access data
- def count_frequncy(valuesList):
- values, frequency = [] , []
- for x in valuesList:
- if x in values:
- frequency[values.index(x)] += 1
- else:
- values.append(x)
- frequency.append(1)
- return values, frequency
- ####### Payload Tests #######
- sumPayloadCount = self.stats_db._process_user_defined_query("SELECT sum(payloadCount) FROM interval_statistics")
- pktCount = self.stats_db._process_user_defined_query("SELECT packetCount FROM file_statistics")
- if sumPayloadCount and pktCount:
- payloadRatio=0
- if(pktCount[0][0]!=0):
- payloadRatio = float(sumPayloadCount[0][0] / pktCount[0][0] * 100)
- else:
- payloadRatio = -1
- ####### TCP checksum Tests #######
- incorrectChecksumCount = self.stats_db._process_user_defined_query("SELECT sum(incorrectTCPChecksumCount) FROM interval_statistics")
- correctChecksumCount = self.stats_db._process_user_defined_query("SELECT avg(correctTCPChecksumCount) FROM interval_statistics")
- if incorrectChecksumCount and correctChecksumCount:
- incorrectChecksumRatio=0
- if(incorrectChecksumCount[0][0] + correctChecksumCount[0][0])!=0:
- incorrectChecksumRatio = float(incorrectChecksumCount[0][0] / (incorrectChecksumCount[0][0] + correctChecksumCount[0][0] ) * 100)
- else:
- incorrectChecksumRatio = -1
- ####### IP Src & Dst Tests #######
- result = self.stats_db._process_user_defined_query("SELECT ipAddress,pktsSent,pktsReceived FROM ip_statistics")
- data, srcFrequency, dstFrequency = [], [], []
- if result:
- for row in result:
- srcFrequency.append(row[1])
- dstFrequency.append(row[2])
- ipSrcEntropy, ipSrcNormEntropy = self.calculate_entropy(srcFrequency, True)
- ipDstEntropy, ipDstNormEntropy = self.calculate_entropy(dstFrequency, True)
- newIPCount = self.stats_db._process_user_defined_query("SELECT newIPCount FROM interval_statistics")
- ipNovelsPerInterval, ipNovelsPerIntervalFrequency = count_frequncy(newIPCount)
- ipNoveltyDistEntropy = self.calculate_entropy(ipNovelsPerIntervalFrequency)
- ####### Ports Tests #######
- port0Count = self.stats_db._process_user_defined_query("SELECT SUM(portCount) FROM ip_ports WHERE portNumber = 0")
- if not port0Count[0][0]:
- port0Count = 0
- else:
- port0Count = port0Count[0][0]
- reservedPortCount = self.stats_db._process_user_defined_query(
- "SELECT SUM(portCount) FROM ip_ports WHERE portNumber IN (100,114,1023,1024,49151,49152,65535)")# could be extended
- if not reservedPortCount[0][0]:
- reservedPortCount = 0
- else:
- reservedPortCount = reservedPortCount[0][0]
- ####### TTL Tests #######
- result = self.stats_db._process_user_defined_query("SELECT ttlValue,SUM(ttlCount) FROM ip_ttl GROUP BY ttlValue")
- data, frequency = [], []
- for row in result:
- frequency.append(row[1])
- ttlEntropy, ttlNormEntropy = self.calculate_entropy(frequency,True)
- newTTLCount = self.stats_db._process_user_defined_query("SELECT newTTLCount FROM interval_statistics")
- ttlNovelsPerInterval, ttlNovelsPerIntervalFrequency = count_frequncy(newTTLCount)
- ttlNoveltyDistEntropy = self.calculate_entropy(ttlNovelsPerIntervalFrequency)
- ####### Window Size Tests #######
- result = self.stats_db._process_user_defined_query("SELECT winSize,SUM(winCount) FROM tcp_win GROUP BY winSize")
- data, frequency = [], []
- for row in result:
- frequency.append(row[1])
- winEntropy, winNormEntropy = self.calculate_entropy(frequency, True)
- newWinSizeCount = self.stats_db._process_user_defined_query("SELECT newWinSizeCount FROM interval_statistics")
- winNovelsPerInterval, winNovelsPerIntervalFrequency = count_frequncy(newWinSizeCount)
- winNoveltyDistEntropy = self.calculate_entropy(winNovelsPerIntervalFrequency)
- ####### ToS Tests #######
- result = self.stats_db._process_user_defined_query(
- "SELECT tosValue,SUM(tosCount) FROM ip_tos GROUP BY tosValue")
- data, frequency = [], []
- for row in result:
- frequency.append(row[1])
- tosEntropy, tosNormEntropy = self.calculate_entropy(frequency, True)
- newToSCount = self.stats_db._process_user_defined_query("SELECT newToSCount FROM interval_statistics")
- tosNovelsPerInterval, tosNovelsPerIntervalFrequency = count_frequncy(newToSCount)
- tosNoveltyDistEntropy = self.calculate_entropy(tosNovelsPerIntervalFrequency)
- ####### MSS Tests #######
- result = self.stats_db._process_user_defined_query(
- "SELECT mssValue,SUM(mssCount) FROM tcp_mss GROUP BY mssValue")
- data, frequency = [], []
- for row in result:
- frequency.append(row[1])
- mssEntropy, mssNormEntropy = self.calculate_entropy(frequency, True)
- newMSSCount = self.stats_db._process_user_defined_query("SELECT newMSSCount FROM interval_statistics")
- mssNovelsPerInterval, mssNovelsPerIntervalFrequency = count_frequncy(newMSSCount)
- mssNoveltyDistEntropy = self.calculate_entropy(mssNovelsPerIntervalFrequency)
- result = self.stats_db._process_user_defined_query("SELECT SUM(mssCount) FROM tcp_mss WHERE mssValue > 1460")
- # The most used MSS < 1460. Calculate the ratio of the values bigger that 1460.
- if not result[0][0]:
- result = 0
- else:
- result = result[0][0]
- bigMSS = (result / sum(frequency)) * 100
- output = []
- if self.do_extra_tests:
- output = [("Payload ratio", payloadRatio, "%"),
- ("Incorrect TCP checksum ratio", incorrectChecksumRatio, "%")]
- output = output + [("# IP addresses", sum([x[0] for x in newIPCount]), ""),
- ("IP Src Entropy", ipSrcEntropy, ""),
- ("IP Src Normalized Entropy", ipSrcNormEntropy, ""),
- ("IP Dst Entropy", ipDstEntropy, ""),
- ("IP Dst Normalized Entropy", ipDstNormEntropy, ""),
- ("IP Novelty Distribution Entropy", ipNoveltyDistEntropy, ""),
- ("# TTL values", sum([x[0] for x in newTTLCount]), ""),
- ("TTL Entropy", ttlEntropy, ""),
- ("TTL Normalized Entropy", ttlNormEntropy, ""),
- ("TTL Novelty Distribution Entropy", ttlNoveltyDistEntropy, ""),
- ("# WinSize values", sum([x[0] for x in newWinSizeCount]), ""),
- ("WinSize Entropy", winEntropy, ""),
- ("WinSize Normalized Entropy", winNormEntropy, ""),
- ("WinSize Novelty Distribution Entropy", winNoveltyDistEntropy, ""),
- ("# ToS values", sum([x[0] for x in newToSCount]), ""),
- ("ToS Entropy", tosEntropy, ""),
- ("ToS Normalized Entropy", tosNormEntropy, ""),
- ("ToS Novelty Distribution Entropy", tosNoveltyDistEntropy, ""),
- ("# MSS values", sum([x[0] for x in newMSSCount]), ""),
- ("MSS Entropy", mssEntropy, ""),
- ("MSS Normalized Entropy", mssNormEntropy, ""),
- ("MSS Novelty Distribution Entropy", mssNoveltyDistEntropy, ""),
- ("======================","","")]
- # Reasoning the statistics values
- if self.do_extra_tests:
- if payloadRatio > 80:
- output.append(("WARNING: Too high payload ratio", payloadRatio, "%."))
- if payloadRatio < 30:
- output.append(("WARNING: Too low payload ratio", payloadRatio, "% (Injecting attacks that are carried out in the packet payloads is not recommmanded)."))
- if incorrectChecksumRatio > 5:
- output.append(("WARNING: High incorrect TCP checksum ratio",incorrectChecksumRatio,"%."))
- if ipSrcNormEntropy > 0.65:
- output.append(("WARNING: High IP source normalized entropy",ipSrcNormEntropy,"."))
- if ipSrcNormEntropy < 0.2:
- output.append(("WARNING: Low IP source normalized entropy", ipSrcNormEntropy, "."))
- if ipDstNormEntropy > 0.65:
- output.append(("WARNING: High IP destination normalized entropy", ipDstNormEntropy, "."))
- if ipDstNormEntropy < 0.2:
- output.append(("WARNING: Low IP destination normalized entropy", ipDstNormEntropy, "."))
- if ttlNormEntropy > 0.65:
- output.append(("WARNING: High TTL normalized entropy", ttlNormEntropy, "."))
- if ttlNormEntropy < 0.2:
- output.append(("WARNING: Low TTL normalized entropy", ttlNormEntropy, "."))
- if ttlNoveltyDistEntropy < 1:
- output.append(("WARNING: Too low TTL novelty distribution entropy", ttlNoveltyDistEntropy,
- "(The distribution of the novel TTL values is suspicious)."))
- if winNormEntropy > 0.6:
- output.append(("WARNING: High Window Size normalized entropy", winNormEntropy, "."))
- if winNormEntropy < 0.1:
- output.append(("WARNING: Low Window Size normalized entropy", winNormEntropy, "."))
- if winNoveltyDistEntropy < 4:
- output.append(("WARNING: Low Window Size novelty distribution entropy", winNoveltyDistEntropy,
- "(The distribution of the novel Window Size values is suspicious)."))
- if tosNormEntropy > 0.4:
- output.append(("WARNING: High ToS normalized entropy", tosNormEntropy, "."))
- if tosNormEntropy < 0.1:
- output.append(("WARNING: Low ToS normalized entropy", tosNormEntropy, "."))
- if tosNoveltyDistEntropy < 0.5:
- output.append(("WARNING: Low ToS novelty distribution entropy", tosNoveltyDistEntropy,
- "(The distribution of the novel ToS values is suspicious)."))
- if mssNormEntropy > 0.4:
- output.append(("WARNING: High MSS normalized entropy", mssNormEntropy, "."))
- if mssNormEntropy < 0.1:
- output.append(("WARNING: Low MSS normalized entropy", mssNormEntropy, "."))
- if mssNoveltyDistEntropy < 0.5:
- output.append(("WARNING: Low MSS novelty distribution entropy", mssNoveltyDistEntropy,
- "(The distribution of the novel MSS values is suspicious)."))
- if bigMSS > 50:
- output.append(("WARNING: High ratio of MSS > 1460", bigMSS, "% (High fragmentation rate in Ethernet)."))
- if port0Count > 0:
- output.append(("WARNING: Port number 0 is used in ",port0Count,"packets (awkward-looking port)."))
- if reservedPortCount > 0:
- output.append(("WARNING: Reserved port numbers are used in ",reservedPortCount,"packets (uncommonly-used ports)."))
- return output
- def write_statistics_to_file(self):
- """
- Writes the calculated basic statistics into a file.
- """
- def _write_header(title: str):
- """
- Writes the section header into the open file.
- :param title: The section title
- """
- target.write("====================== \n")
- target.write(title + " \n")
- target.write("====================== \n")
- target = open(self.pcap_filepath + ".stat", 'w')
- target.truncate()
- _write_header("PCAP file information")
- Statistics.write_list(self.get_file_information(), target.write)
- _write_header("General statistics")
- Statistics.write_list(self.get_general_file_statistics(), target.write)
- _write_header("Tests statistics")
- Statistics.write_list(self.get_tests_statistics(), target.write)
- target.close()
- def get_capture_duration(self):
- """
- :return: The duration of the capture in seconds
- """
- return self.file_info['captureDuration']
- def get_pcap_timestamp_start(self):
- """
- :return: The timestamp of the first packet in the PCAP file
- """
- return self.file_info['timestampFirstPacket']
- def get_pcap_timestamp_end(self):
- """
- :return: The timestamp of the last packet in the PCAP file
- """
- return self.file_info['timestampLastPacket']
- def get_pps_sent(self, ip_address: str):
- """
- Calculates the sent packets per seconds for a given IP address.
- :param ip_address: The IP address whose packets per second should be calculated
- :return: The sent packets per seconds for the given IP address
- """
- packets_sent = self.stats_db.process_db_query("SELECT pktsSent from ip_statistics WHERE ipAddress=?", False,
- (ip_address,))
- capture_duration = float(self.get_capture_duration())
- return int(float(packets_sent) / capture_duration)
- def get_pps_received(self, ip_address: str):
- """
- Calculate the packets per second received for a given IP address.
- :param ip_address: The IP address used for the calculation
- :return: The number of packets per second received
- """
- packets_received = self.stats_db.process_db_query("SELECT pktsReceived FROM ip_statistics WHERE ipAddress=?",
- False,
- (ip_address,))
- capture_duration = float(self.get_capture_duration())
- return int(float(packets_received) / capture_duration)
- def get_packet_count(self):
- """
- :return: The number of packets in the loaded PCAP file
- """
- return self.file_info['packetCount']
- def get_most_used_ip_address(self):
- """
- :return: The IP address/addresses with the highest sum of packets sent and received
- """
- return self.process_db_query("most_used(ipAddress)")
- def get_ttl_distribution(self, ipAddress: str):
- result = self.process_db_query('SELECT ttlValue, ttlCount from ip_ttl WHERE ipAddress="' + ipAddress + '"')
- result_dict = {key: value for (key, value) in result}
- return result_dict
- def get_mss_distribution(self, ipAddress: str):
- result = self.process_db_query('SELECT mssValue, mssCount from tcp_mss WHERE ipAddress="' + ipAddress + '"')
- result_dict = {key: value for (key, value) in result}
- return result_dict
- def get_win_distribution(self, ipAddress: str):
- result = self.process_db_query('SELECT winSize, winCount from tcp_win WHERE ipAddress="' + ipAddress + '"')
- result_dict = {key: value for (key, value) in result}
- return result_dict
- def get_tos_distribution(self, ipAddress: str):
- result = self.process_db_query('SELECT tosValue, tosCount from ip_tos WHERE ipAddress="' + ipAddress + '"')
- result_dict = {key: value for (key, value) in result}
- return result_dict
- def get_random_ip_address(self, count: int = 1):
- """
- :param count: The number of IP addreses to return
- :return: A randomly chosen IP address from the dataset or iff param count is greater than one, a list of randomly
- chosen IP addresses
- """
- if count == 1:
- return self.process_db_query("random(all(ipAddress))")
- else:
- ip_address_list = []
- for i in range(0, count):
- ip_address_list.append(self.process_db_query("random(all(ipAddress))"))
- return ip_address_list
- def get_mac_address(self, ipAddress: str):
- """
- :return: The MAC address used in the dataset for the given IP address.
- """
- return self.process_db_query('macAddress(ipAddress=' + ipAddress + ")")
- def get_most_used_mss(self, ipAddress: str):
- """
- :param ipAddress: The IP address whose used MSS should be determined
- :return: The TCP MSS value used by the IP address, or if the IP addresses never specified a MSS,
- then None is returned
- """
- mss_value = self.process_db_query('SELECT mssValue from tcp_mss WHERE ipAddress="' + ipAddress + '" ORDER BY mssCount DESC LIMIT 1')
- if isinstance(mss_value, int):
- return mss_value
- else:
- return None
- def get_most_used_ttl(self, ipAddress: str):
- """
- :param ipAddress: The IP address whose used TTL should be determined
- :return: The TTL value used by the IP address, or if the IP addresses never specified a TTL,
- then None is returned
- """
- ttl_value = self.process_db_query(
- 'SELECT ttlValue from ip_ttl WHERE ipAddress="' + ipAddress + '" ORDER BY ttlCount DESC LIMIT 1')
- if isinstance(ttl_value, int):
- return ttl_value
- else:
- return None
- def get_in_degree(self):
- """
- determines the in-degree for each ipAddress, i.e. for every IP the count of ipAddresses it has received packets from
- :return: a list, each entry consists of one IPAddress and its associated in-degree
- """
- in_degree_raw = self.stats_db._process_user_defined_query(
- "SELECT ipAddressA, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics_stateless ON ipAddress = ipAddressA WHERE portDirection=\'in\' AND portNumber = portA GROUP BY ipAddress " +
- "UNION " +
- "SELECT ipAddressB, Count(DISTINCT ipAddressA) FROM ip_ports JOIN conv_statistics_stateless ON ipAddress = ipAddressB WHERE portDirection=\'in\' AND portNumber = portB GROUP BY ipAddress")
- #Because of the structure of the database, there could be 2 entries for the same IP Address, therefore accumulate their sums
- in_degree = self.filter_multiples(in_degree_raw)
- return in_degree
- def get_out_degree(self):
- """
- determines the out-degree for each ipAddress, i.e. for every IP the count of ipAddresses it has sent packets to
- :return: a list, each entry consists of one IPAddress and its associated out-degree
- """
-
- out_degree_raw = self.stats_db._process_user_defined_query(
- "SELECT ipAddressA, Count(DISTINCT ipAddressB) FROM ip_ports JOIN conv_statistics_stateless ON ipAddress = ipAddressA WHERE portDirection=\'out\' AND portNumber = portA GROUP BY ipAddress " +
- "UNION " +
- "SELECT ipAddressB, Count(DISTINCT ipAddressA) FROM ip_ports JOIN conv_statistics_stateless ON ipAddress = ipAddressB WHERE portDirection=\'out\' AND portNumber = portB GROUP BY ipAddress")
- #Because of the structure of the database, there could be 2 entries for the same IP Address, therefore accumulate their sums
- out_degree = self.filter_multiples(out_degree_raw)
- return out_degree
- def get_overall_degree(self):
- """
- determines the overall-degree for each ipAddress, i.e. for every IP the count of ipAddresses it has sent packets to
- :return: a list, each entry consists of one IPAddress and its associated overall-degree
- """
- out_degrees = self.get_out_degree()
- in_degrees = self.get_in_degree()
- overall_degrees = []
- processed = {} # Dict, taking an IP Address and returning True, if the IP has already been processed and added to overall_degree
- # initialize values of the dict for in_degrees, this is important for error-free checking whether there are not processed IPs
- # for out_degrees this can be done without an additional loop
- for inD in in_degrees:
- processed[inD[0]] = False
- for outD in out_degrees:
- ip_out = outD[0]
- processed[ip_out] = False
- # add the sum of degrees for all IPs that appear in both lists
- for inD in in_degrees:
- ip_in = inD[0]
- if ip_out == ip_in:
- # same IPAddress -> append sum of degrees
- overall_degrees.append((ip_out, outD[1] + inD[1]))
- processed[ip_out] = True
- if not processed[ip_out]:
- # if IP only appears in out_degree list -> just append the value
- overall_degrees.append(outD)
- processed[outD[0]] = True
-
- # add remaining IPs, which did not appear in out_degree
- for inD in in_degrees:
- if not processed[inD[0]]:
- overall_degrees.append(inD)
- return overall_degrees
- def filter_multiples(self, entries):
- """
- helper function, for get_out_degree and get_in_degree
- filters the given list for duplicate IpAddresses and, if duplciates are present, accumulates their values
- :param entries: list, each entry consists of an ipAddress and a numeric value
- :return: a filtered list, without duplicate ipAddresses
- """
- filtered_entries = []
- done = []
- for p1 in entries:
- added = False
- if p1 in done:
- continue
- for p2 in entries:
- if p1[0] == p2[0] and p1 != p2:
- filtered_entries.append((p1[0], p1[1] + p2[1]))
- done.append(p1)
- done.append(p2)
- added = True
- break
- if not added:
- filtered_entries.append(p1)
- return filtered_entries
- def get_avg_delay_local_ext(self):
- """
- Calculates the average delay of a packet for external and local communication, based on the tcp handshakes
- :return: tuple consisting of avg delay for local and external communication, (local, external)
- """
- conv_delays = self.stats_db._process_user_defined_query("SELECT ipAddressA, ipAddressB, avgDelay FROM conv_statistics")
- if(conv_delays):
- external_conv = []
- local_conv = []
- for conv in conv_delays:
- IPA = IPAddress.parse(conv[0])
- IPB = IPAddress.parse(conv[1])
- #split into local and external conversations
- if(not IPA.is_private() or not IPB.is_private()):
- external_conv.append(conv)
- else:
- local_conv.append(conv)
-
- # calculate avg local and external delay by summing up the respective delays and dividing them by the number of conversations
- avg_delay_external = 0.0
- avg_delay_local = 0.0
- if(local_conv):
- for conv in local_conv:
- avg_delay_local += conv[2]
- avg_delay_local = (avg_delay_local/len(local_conv)) * 0.001 #ms
- else:
- # no local conversations in statistics found
- avg_delay_local = 0.06
- if(external_conv):
- for conv in external_conv:
- avg_delay_external += conv[2]
- avg_delay_external = (avg_delay_external/len(external_conv)) * 0.001 #ms
- else:
- # no external conversations in statistics found
- avg_delay_external = 0.15
- else:
- #if no statistics were found, use these numbers
- avg_delay_external = 0.15
- avg_delay_local = 0.06
- return avg_delay_local, avg_delay_external
- def get_statistics_database(self):
- """
- :return: A reference to the statistics database object
- """
- return self.stats_db
- def process_db_query(self, query_string_in: str, print_results: bool = False):
- """
- Executes a string identified previously as a query. This can be a standard SQL SELECT/INSERT query or a named
- query.
- :param query_string_in: The query to be processed
- :param print_results: Indicates whether the results should be printed to terminal
- :return: The result of the query
- """
- return self.stats_db.process_db_query(query_string_in, print_results)
- def is_query(self, value: str):
- """
- Checks whether the given string is a standard SQL query (SELECT, INSERT) or a named query.
- :param value: The string to be checked
- :return: True if the string is recognized as a query, otherwise False.
- """
- if not isinstance(value, str):
- return False
- else:
- return (any(x in value.lower().strip() for x in self.stats_db.get_all_named_query_keywords()) or
- any(x in value.lower().strip() for x in self.stats_db.get_all_sql_query_keywords()))
- def calculate_standard_deviation(self, lst):
- """
- Calculates the standard deviation of a list of numbers.
- :param lst: The list of numbers to calculate its SD.
- """
- num_items = len(lst)
- mean = sum(lst) / num_items
- differences = [x - mean for x in lst]
- sq_differences = [d ** 2 for d in differences]
- ssd = sum(sq_differences)
- variance = ssd / num_items
- sd = sqrt(variance)
- return sd
- def plot_statistics(self, format: str = 'pdf'): #'png'
- """
- Plots the statistics associated with the dataset.
- :param format: The format to be used to save the statistics diagrams.
- """
- def plot_distribution(queryOutput, title, xLabel, yLabel, file_ending: str):
- plt.gcf().clear()
- graphx, graphy = [], []
- for row in queryOutput:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title(title)
- plt.xlabel(xLabel)
- plt.ylabel(yLabel)
- width = 0.1
- plt.xlim([0, max(graphx)])
- plt.grid(True)
- plt.bar(graphx, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-' + title + file_ending)
- plt.savefig(out,dpi=500)
- return out
- def plot_ttl(file_ending: str):
- queryOutput = self.stats_db._process_user_defined_query(
- "SELECT ttlValue, SUM(ttlCount) FROM ip_ttl GROUP BY ttlValue")
- title = "TTL Distribution"
- xLabel = "TTL Value"
- yLabel = "Number of Packets"
- if queryOutput:
- return plot_distribution(queryOutput, title, xLabel, yLabel, file_ending)
- def plot_mss(file_ending: str):
- queryOutput = self.stats_db._process_user_defined_query(
- "SELECT mssValue, SUM(mssCount) FROM tcp_mss GROUP BY mssValue")
- title = "MSS Distribution"
- xLabel = "MSS Value"
- yLabel = "Number of Packets"
- if queryOutput:
- return plot_distribution(queryOutput, title, xLabel, yLabel, file_ending)
- def plot_win(file_ending: str):
- queryOutput = self.stats_db._process_user_defined_query(
- "SELECT winSize, SUM(winCount) FROM tcp_win GROUP BY winSize")
- title = "Window Size Distribution"
- xLabel = "Window Size"
- yLabel = "Number of Packets"
- if queryOutput:
- return plot_distribution(queryOutput, title, xLabel, yLabel, file_ending)
- def plot_protocol(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT protocolName, SUM(protocolCount) FROM ip_protocols GROUP BY protocolName")
- if (result):
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("Protocols Distribution")
- plt.xlabel('Protocols')
- plt.ylabel('Number of Packets')
- width = 0.1
- plt.xlim([0, len(graphx)])
- plt.grid(True)
- # Protocols' names on x-axis
- x = range(0,len(graphx))
- my_xticks = graphx
- plt.xticks(x, my_xticks)
- plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-protocol' + file_ending)
- plt.savefig(out,dpi=500)
- return out
- else:
- print("Error plot protocol: No protocol values found!")
- def plot_port(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT portNumber, SUM(portCount) FROM ip_ports GROUP BY portNumber")
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("Ports Distribution")
- plt.xlabel('Ports Numbers')
- plt.ylabel('Number of Packets')
- width = 0.1
- plt.xlim([0, max(graphx)])
- plt.grid(True)
- plt.bar(graphx, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-port' + file_ending)
- plt.savefig(out,dpi=500)
- return out
- # This distribution is not drawable for big datasets
- def plot_ip_src(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT ipAddress, pktsSent FROM ip_statistics")
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("Source IP Distribution")
- plt.xlabel('Source IP')
- plt.ylabel('Number of Packets')
- width = 0.1
- plt.xlim([0, len(graphx)])
- plt.grid(True)
- # IPs on x-axis
- x = range(0, len(graphx))
- my_xticks = graphx
- plt.xticks(x, my_xticks, rotation='vertical', fontsize=5)
- plt.tight_layout()
- # limit the number of xticks
- plt.locator_params(axis='x', nbins=20)
- plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-ip-src' + file_ending)
- plt.savefig(out, dpi=500)
- return out
- # This distribution is not drawable for big datasets
- def plot_ip_dst(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT ipAddress, pktsReceived FROM ip_statistics")
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("Destination IP Distribution")
- plt.xlabel('Destination IP')
- plt.ylabel('Number of Packets')
- width = 0.1
- plt.xlim([0, len(graphx)])
- plt.grid(True)
- # IPs on x-axis
- x = range(0, len(graphx))
- my_xticks = graphx
- plt.xticks(x, my_xticks, rotation='vertical', fontsize=5)
- plt.tight_layout()
- # limit the number of xticks
- plt.locator_params(axis='x', nbins=20)
- plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-ip-dst' + file_ending)
- plt.savefig(out, dpi=500)
- return out
- def plot_interval_statistics(queryOutput, title, xLabel, yLabel, file_ending: str):
- plt.gcf().clear()
- graphx, graphy = [], []
- for row in queryOutput:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title(title)
- plt.xlabel(xLabel)
- plt.ylabel(yLabel)
- width = 0.5
- plt.xlim([0, len(graphx)])
- plt.grid(True)
- # timestamp on x-axis
- x = range(0, len(graphx))
- # limit the number of xticks
- plt.locator_params(axis='x', nbins=20)
- plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-' + title + file_ending)
- plt.savefig(out, dpi=500)
- return out
- def plot_interval_pktCount(file_ending: str):
- queryOutput = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, pktsCount FROM interval_statistics ORDER BY lastPktTimestamp")
- title = "Packet Rate"
- xLabel = "Time Interval"
- yLabel = "Number of Packets"
- if queryOutput:
- return plot_interval_statistics(queryOutput, title, xLabel, yLabel, file_ending)
- def plot_interval_ip_src_ent(file_ending: str):
- queryOutput = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, ipSrcEntropy FROM interval_statistics ORDER BY lastPktTimestamp")
- title = "Source IP Entropy"
- xLabel = "Time Interval"
- yLabel = "Entropy"
- if queryOutput:
- return plot_interval_statistics(queryOutput, title, xLabel, yLabel, file_ending)
- def plot_interval_ip_dst_ent(file_ending: str):
- queryOutput = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, ipDstEntropy FROM interval_statistics ORDER BY lastPktTimestamp")
- title = "Destination IP Entropy"
- xLabel = "Time Interval"
- yLabel = "Entropy"
- if queryOutput:
- return plot_interval_statistics(queryOutput, title, xLabel, yLabel, file_ending)
- def plot_interval_new_ip(file_ending: str):
- queryOutput = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, newIPCount FROM interval_statistics ORDER BY lastPktTimestamp")
- title = "IP Novelty Distribution"
- xLabel = "Time Interval"
- yLabel = "Novel values count"
- if queryOutput:
- return plot_interval_statistics(queryOutput, title, xLabel, yLabel, file_ending)
- def plot_interval_new_port(file_ending: str):
- queryOutput = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, newPortCount FROM interval_statistics ORDER BY lastPktTimestamp")
- title = "Port Novelty Distribution"
- xLabel = "Time Interval"
- yLabel = "Novel values count"
- if queryOutput:
- return plot_interval_statistics(queryOutput, title, xLabel, yLabel, file_ending)
- def plot_interval_new_ttl(file_ending: str):
- queryOutput = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, newTTLCount FROM interval_statistics ORDER BY lastPktTimestamp")
- title = "TTL Novelty Distribution"
- xLabel = "Time Interval"
- yLabel = "Novel values count"
- if queryOutput:
- return plot_interval_statistics(queryOutput, title, xLabel, yLabel, file_ending)
- def plot_interval_new_tos(file_ending: str):
- queryOutput = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, newToSCount FROM interval_statistics ORDER BY lastPktTimestamp")
- title = "ToS Novelty Distribution"
- xLabel = "Time Interval"
- yLabel = "Novel values count"
- if queryOutput:
- return plot_interval_statistics(queryOutput, title, xLabel, yLabel, file_ending)
- def plot_interval_new_win_size(file_ending: str):
- queryOutput = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, newWinSizeCount FROM interval_statistics ORDER BY lastPktTimestamp")
- title = "Window Size Novelty Distribution"
- xLabel = "Time Interval"
- yLabel = "Novel values count"
- if queryOutput:
- return plot_interval_statistics(queryOutput, title, xLabel, yLabel, file_ending)
- def plot_interval_new_mss(file_ending: str):
- queryOutput = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, newMSSCount FROM interval_statistics ORDER BY lastPktTimestamp")
- title = "MSS Novelty Distribution"
- xLabel = "Time Interval"
- yLabel = "Novel values count"
- if queryOutput:
- return plot_interval_statistics(queryOutput, title, xLabel, yLabel, file_ending)
- def plot_interval_ip_dst_cum_ent(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, ipDstCumEntropy FROM interval_statistics ORDER BY lastPktTimestamp")
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- # If entropy was not calculated do not plot the graph
- if graphy[0] != -1:
- plt.autoscale(enable=True, axis='both')
- plt.title("Destination IP Cumulative Entropy")
- # plt.xlabel('Timestamp')
- plt.xlabel('Time Interval')
- plt.ylabel('Entropy')
- plt.xlim([0, len(graphx)])
- plt.grid(True)
- # timestamp on x-axis
- x = range(0, len(graphx))
- # my_xticks = graphx
- # plt.xticks(x, my_xticks, rotation='vertical', fontsize=5)
- # plt.tight_layout()
- # limit the number of xticks
- plt.locator_params(axis='x', nbins=20)
- plt.plot(x, graphy, 'r')
- out = self.pcap_filepath.replace('.pcap', '_plot-interval-ip-dst-cum-ent' + file_ending)
- plt.savefig(out, dpi=500)
- return out
- def plot_interval_ip_src_cum_ent(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, ipSrcCumEntropy FROM interval_statistics ORDER BY lastPktTimestamp")
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- # If entropy was not calculated do not plot the graph
- if graphy[0] != -1:
- plt.autoscale(enable=True, axis='both')
- plt.title("Source IP Cumulative Entropy")
- # plt.xlabel('Timestamp')
- plt.xlabel('Time Interval')
- plt.ylabel('Entropy')
- plt.xlim([0, len(graphx)])
- plt.grid(True)
- # timestamp on x-axis
- x = range(0, len(graphx))
- # my_xticks = graphx
- # plt.xticks(x, my_xticks, rotation='vertical', fontsize=5)
- # plt.tight_layout()
- # limit the number of xticks
- plt.locator_params(axis='x', nbins=20)
- plt.plot(x, graphy, 'r')
- out = self.pcap_filepath.replace('.pcap', '_plot-interval-ip-src-cum-ent' + file_ending)
- plt.savefig(out, dpi=500)
- return out
- def plot_in_degree(file_ending: str):
- """
- Creates a Plot, visualizing the in-degree for every IP Address
- :param file_ending: The file extension for the output file containing the plot, e.g. "pdf"
- :return: A filepath to the file containing the created plot
- """
- plt.gcf().clear()
- # retrieve data
- in_degree = self.get_in_degree()
- if(in_degree):
- graphx, graphy = [], []
- for entry in in_degree:
- # degree values
- graphx.append(entry[1])
- # IP labels
- graphy.append(entry[0])
- # set labels
- plt.title("Indegree per IP Address")
- plt.ylabel('IpAddress')
- plt.xlabel('Indegree')
- #set width of the bars
- width = 0.3
- # set scalings
- plt.figure(figsize=(int(len(graphx))/20 + 5, int(len(graphy)/5) + 5)) # these proportions just worked well
- #set limits of the axis
- plt.ylim([0, len(graphy)])
- plt.xlim([0, max(graphx) + 10])
- # display numbers at each bar
- for i, v in enumerate(graphx):
- plt.text(v + 1, i + .1, str(v), color='blue', fontweight='bold')
- # display grid for better visuals
- plt.grid(True)
- # plot the bar
- labels = graphy
- graphy = list(range(len(graphx)))
- plt.barh(graphy, graphx, width, align='center', linewidth=1, color='red', edgecolor='red')
- plt.yticks(graphy, labels)
- out = self.pcap_filepath.replace('.pcap', '_in_degree' + file_ending)
- plt.tight_layout()
- plt.savefig(out,dpi=500)
- return out
- else:
- print("Error: No statistics Information for plotting out-degrees found")
- def plot_out_degree(file_ending: str):
- """
- Creates a Plot, visualizing the out-degree for every IP Address
- :param file_ending: The file extension for the output file containing the plot, e.g. "pdf"
- :return: A filepath to the file containing the created plot
- """
- plt.gcf().clear()
- # retrieve data
- out_degree = self.get_out_degree()
- if(out_degree):
- graphx, graphy = [], []
- for entry in out_degree:
- # degree values
- graphx.append(entry[1])
- # IP labels
- graphy.append(entry[0])
- # set labels
- plt.title("Outdegree per IP Address")
- plt.ylabel('IpAddress')
- plt.xlabel('Outdegree')
- #set width of the bars
- width = 0.3
- # set scalings
- plt.figure(figsize=(int(len(graphx))/20 + 5, int(len(graphy)/5) + 5)) # these proportions just worked well
- #set limits of the axis
- plt.ylim([0, len(graphy)])
- plt.xlim([0, max(graphx) + 10])
- # display numbers at each bar
- for i, v in enumerate(graphx):
- plt.text(v + 1, i + .1, str(v), color='blue', fontweight='bold')
- # display grid for better visuals
- plt.grid(True)
- # plot the bar
- labels = graphy
- graphy = list(range(len(graphx)))
- plt.barh(graphy, graphx, width, align='center', linewidth=1, color='red', edgecolor='red')
- plt.yticks(graphy, labels)
- out = self.pcap_filepath.replace('.pcap', '_out_degree' + file_ending)
- plt.tight_layout()
- plt.savefig(out,dpi=500)
- return out
- else:
- print("Error: No statistics Information for plotting out-degrees found")
- def plot_overall_degree(file_ending: str):
- """
- Creates a Plot, visualizing the overall-degree for every IP Address
- :param file_ending: The file extension for the output file containing the plot, e.g. "pdf"
- :return: A filepath to the file containing the created plot
- """
- plt.gcf().clear()
- # retrieve data
- overall_degree = self.get_overall_degree()
- if(overall_degree):
- graphx, graphy = [], []
- for entry in overall_degree:
- # degree values
- graphx.append(entry[1])
- # IP labels
- graphy.append(entry[0])
- # set labels
- plt.title("Overalldegree per IP Address")
- plt.ylabel('IpAddress')
- plt.xlabel('Overalldegree')
- #set width of the bars
- width = 0.3
- # set scalings
- plt.figure(figsize=(int(len(graphx))/20 + 5, int(len(graphy)/5) + 5)) # these proportions just worked well
- #set limits of the axis
- plt.ylim([0, len(graphy)])
- plt.xlim([0, max(graphx) + 10])
- # display numbers at each bar
- for i, v in enumerate(graphx):
- plt.text(v + 1, i + .1, str(v), color='blue', fontweight='bold')
- # display grid for better visuals
- plt.grid(True)
- # plot the bar
- labels = graphy
- graphy = list(range(len(graphx)))
- plt.barh(graphy, graphx, width, align='center', linewidth=1, color='red', edgecolor='red')
- plt.yticks(graphy, labels)
- out = self.pcap_filepath.replace('.pcap', '_overall_degree' + file_ending)
- plt.tight_layout()
- plt.savefig(out,dpi=500)
- return out
- else:
- print("Error: No statistics Information for plotting overall-degrees found")
- def plot_big_comm_interval_stat(attr:str, table:str, title:str, xlabel:str, suffix:str):
- """
- Plots the desired statistc per connection as horizontal bar plot.
- Included are 'half-open' connections, where only one packet is exchanged.
- The given statistics table has to have at least the attributes 'ipAddressA', 'portA', 'ipAddressB',
- 'portB' and the specified additional attribute.
- Note: there may be cutoff/scaling problems within the plot if there is too little data.
- :param attr: The desired statistic, named with respect to its attribute in the given statistics table
- :param table: The statistics table
- :param title: The title of the created plot
- :param xlabel: The name of the x-axis of the created plot
- :param suffix: The suffix of the created file, including file extension
- :return: A filepath to the file containing the created plot
- """
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT ipAddressA, portA, ipAddressB, portB, %s FROM %s" % (attr, table))
- if (result):
- graphy, graphx = [], []
- # plot data in descending order
- result = sorted(result, key=lambda row: row[4])
- # compute plot data
- for i, row in enumerate(result):
- addr1, addr2 = "%s:%d" % (row[0], row[1]), "%s:%d" % (row[2], row[3])
- # adjust the justification of strings to improve appearance
- len_max = max(len(addr1), len(addr2))
- addr1 = addr1.ljust(len_max)
- addr2 = addr2.ljust(len_max)
- # add plot data
- graphy.append("%s\n%s" % (addr1, addr2))
- graphx.append(row[4])
- # have x axis and its label appear at the top (instead of bottom)
- fig, ax = plt.subplots()
- ax.xaxis.tick_top()
- ax.xaxis.set_label_position("top")
- # compute plot height in inches for scaling the plot
- dist_mult_height, dist_mult_width = 0.55, 0.07 # these values turned out to work well
- # use static scale along the conversation axis, if there are too little entries to use dynamic scaling numbers
- if len(graphy) < 10:
- plt_height = 7.5
- # otherwise use the numbers above
- else:
- plt_height = len(graphy) * dist_mult_height
- # use static scale along the x axis, if the x values are all 0
- if max(graphx) < 200:
- plt_width = 7.5 # 7.5 as static width worked well
- if max(graphx) == 0:
- ax.set_xlim(0, 10)
- # otherwise use the numbers above
- else:
- plt_width = max(graphx) * dist_mult_width
- title_distance = 1 + 0.012*52.8/plt_height # orginally, a good title distance turned out to be 1.012 with a plot height of 52.8
- plt.gcf().set_size_inches(plt_width, plt_height) # set plot size
- # set additional plot parameters
- plt.title(title, y=title_distance)
- plt.xlabel(xlabel)
- plt.ylabel('Connection')
- width = 0.5
- plt.grid(True)
- plt.gca().margins(y=0) # removes the space between data and x-axis within the plot
- # plot the above data, first use plain numbers as graphy to maintain sorting
- plt.barh(range(len(graphy)), graphx, width, align='center', linewidth=0.5, color='red', edgecolor='red')
- # now change the y numbers to the respective address labels
- plt.yticks(range(len(graphy)), graphy)
- # try to use tight layout to cut off unnecessary space
- try:
- plt.tight_layout(pad=4)
- except (ValueError, numpy.linalg.linalg.LinAlgError):
- pass
- # save created figure
- out = self.pcap_filepath.replace('.pcap', suffix)
- plt.savefig(out, dpi=500)
- return out
- def plot_packets_per_connection(file_ending: str):
- """
- Plots the total number of exchanged packets per connection.
- :param file_ending: The file extension for the output file containing the plot
- :return: A filepath to the file containing the created plot
- """
- title = 'Number of exchanged packets per connection'
- suffix = '_plot-PktCount per Connection Distribution' + file_ending
- # plot data and return outpath
- return plot_big_comm_interval_stat("pktsCount", "conv_statistics_stateless", title, "Number of packets", suffix)
- def plot_avg_pkts_per_comm_interval(file_ending: str):
- """
- Plots the average number of exchanged packets per communication interval for every connection.
- :param file_ending: The file extension for the output file containing the plot
- :return: A filepath to the file containing the created plot
- """
- title = 'Average number of exchanged packets per communication interval'
- suffix = '_plot-Avg PktCount Communication Interval Distribution' + file_ending
- # plot data and return outpath
- return plot_big_comm_interval_stat("avgPktCount", "comm_interval_statistics" ,title, "Number of packets", suffix)
- def plot_avg_time_between_comm_interval(file_ending: str):
- """
- Plots the average time between the communication intervals of every connection.
- :param file_ending: The file extension for the output file containing the plot
- :return: A filepath to the file containing the created plot
- """
- title = 'Average time between communication intervals in seconds'
- suffix = '_plot-Avg Time Between Communication Intervals Distribution' + file_ending
- # plot data and return outpath
- return plot_big_comm_interval_stat("avgTimeBetweenIntervals", "comm_interval_statistics", title, 'Average time between intervals', suffix)
- def plot_avg_comm_interval_time(file_ending: str):
- """
- Plots the average duration of a communication interval of every connection.
- :param file_ending: The file extension for the output file containing the plot
- :return: A filepath to the file containing the created plot
- """
- title = 'Average duration of a communication interval in seconds'
- suffix = '_plot-Avg Duration Communication Interval Distribution' + file_ending
- # plot data and return outpath
- return plot_big_comm_interval_stat("avgIntervalTime", "comm_interval_statistics", title, 'Average interval time', suffix)
- def plot_total_comm_duration(file_ending: str):
- """
- Plots the total communication duration of every connection.
- :param file_ending: The file extension for the output file containing the plot
- :return: A filepath to the file containing the created plot
- """
- title = 'Total communication duration in seconds'
- suffix = '_plot-Total Communication Duration Distribution' + file_ending
- # plot data and return outpath
- return plot_big_comm_interval_stat("totalCommDuration", "comm_interval_statistics", title, 'Duration', suffix)
- ttl_out_path = plot_ttl('.' + format)
- mss_out_path = plot_mss('.' + format)
- win_out_path = plot_win('.' + format)
- protocol_out_path = plot_protocol('.' + format)
- plot_interval_pktCount = plot_interval_pktCount('.' + format)
- plot_interval_ip_src_ent = plot_interval_ip_src_ent('.' + format)
- plot_interval_ip_dst_ent = plot_interval_ip_dst_ent('.' + format)
- plot_interval_ip_src_cum_ent = plot_interval_ip_src_cum_ent('.' + format)
- plot_interval_ip_dst_cum_ent = plot_interval_ip_dst_cum_ent('.' + format)
- plot_interval_new_ip = plot_interval_new_ip('.' + format)
- plot_interval_new_port = plot_interval_new_port('.' + format)
- plot_interval_new_ttl = plot_interval_new_ttl('.' + format)
- plot_interval_new_tos = plot_interval_new_tos('.' + format)
- plot_interval_new_win_size = plot_interval_new_win_size('.' + format)
- plot_interval_new_mss = plot_interval_new_mss('.' + format)
- plot_packets_per_connection_out = plot_packets_per_connection('.' + format)
- plot_out_degree = plot_out_degree('.' + format)
- plot_in_degree = plot_in_degree('.' + format)
- plot_overall_degree = plot_overall_degree('.' + format)
- plot_avg_pkts_per_comm_interval_out = plot_avg_pkts_per_comm_interval('.' + format)
- plot_avg_time_between_comm_interval_out = plot_avg_time_between_comm_interval('.' + format)
- plot_avg_comm_interval_time_out = plot_avg_comm_interval_time("." + format)
- plot_total_comm_duration_out = plot_total_comm_duration("." + format)
- ## Time consuming plot
- # port_out_path = plot_port('.' + format)
- ## Not drawable for too many IPs
- # ip_src_out_path = plot_ip_src('.' + format)
- # ip_dst_out_path = plot_ip_dst('.' + format)
- print("Saved plots in the input PCAP directory.")
|