1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060 |
- # Aidmar
- from operator import itemgetter
- from math import sqrt, ceil, log
- import os
- import time
- import ID2TLib.libpcapreader as pr
- import matplotlib
- from numpy.random.mtrand import normal
- from scipy.linalg.misc import norm
- matplotlib.use('Agg')
- import matplotlib.pyplot as plt
- from ID2TLib.PcapFile import PcapFile
- from ID2TLib.StatsDatabase import StatsDatabase
- class Statistics:
- def __init__(self, pcap_file: PcapFile):
- """
- Creates a new Statistics object.
- :param pcap_file: A reference to the PcapFile object
- """
- # Fields
- self.pcap_filepath = pcap_file.pcap_file_path
- self.pcap_proc = None
- # Aidmar
- self.do_tests = False
- # Create folder for statistics database if required
- self.path_db = pcap_file.get_db_path()
- path_dir = os.path.dirname(self.path_db)
- if not os.path.isdir(path_dir):
- os.makedirs(path_dir)
- # Class instances
- self.stats_db = StatsDatabase(self.path_db)
- def load_pcap_statistics(self, flag_write_file: bool, flag_recalculate_stats: bool, flag_print_statistics: bool):
- """
- Loads the PCAP statistics for the file specified by pcap_filepath. If the database is not existing yet, the
- statistics are calculated by the PCAP file processor and saved into the newly created database. Otherwise the
- statistics are gathered directly from the existing database.
- :param flag_write_file: Indicates whether the statistics should be written addiotionally into a text file (True)
- or not (False)
- :param flag_recalculate_stats: Indicates whether eventually existing statistics should be recalculated
- :param flag_print_statistics: Indicates whether the gathered basic statistics should be printed to the terminal
- """
- # Load pcap and get loading time
- time_start = time.clock()
- # Inform user about recalculation of statistics and its reason
- if flag_recalculate_stats:
- print("Flag -r/--recalculate found. Recalculating statistics.")
- # Recalculate statistics if database not exists OR param -r/--recalculate was provided
- if (not self.stats_db.get_db_exists()) or flag_recalculate_stats:
- self.pcap_proc = pr.pcap_processor(self.pcap_filepath, str(self.do_tests)) # Aidmar - do_tests
- self.pcap_proc.collect_statistics()
- self.pcap_proc.write_to_database(self.path_db)
- outstring_datasource = "by PCAP file processor."
- else:
- outstring_datasource = "from statistics database."
- # Load statistics from database
- self.file_info = self.stats_db.get_file_info()
- time_end = time.clock()
- print("Loaded file statistics in " + str(time_end - time_start)[:4] + " sec " + outstring_datasource)
- # Write statistics if param -e/--export provided
- if flag_write_file:
- self.write_statistics_to_file()
- # Print statistics if param -s/--statistics provided
- if flag_print_statistics:
- self.print_statistics()
- def get_file_information(self):
- """
- Returns a list of tuples, each containing a information of the file.
- :return: a list of tuples, each consisting of (description, value, unit), where unit is optional.
- """
- return [("Pcap file", self.pcap_filepath),
- ("#Packets", self.get_packet_count(), "packets"),
- ("Capture length", self.get_capture_duration(), "seconds"),
- ("Capture start", self.get_pcap_timestamp_start()),
- ("Capture end", self.get_pcap_timestamp_end())]
- def get_general_file_statistics(self):
- """
- Returns a list of tuples, each containing a file statistic.
- :return: a list of tuples, each consisting of (description, value, unit).
- """
- return [("Avg. packet rate", self.file_info['avgPacketRate'], "packets/sec"),
- ("Avg. packet size", self.file_info['avgPacketSize'], "kbytes"),
- ("Avg. packets sent", self.file_info['avgPacketsSentPerHost'], "packets"),
- ("Avg. bandwidth in", self.file_info['avgBandwidthIn'], "kbit/s"),
- ("Avg. bandwidth out", self.file_info['avgBandwidthOut'], "kbit/s")]
- @staticmethod
- def write_list(desc_val_unit_list, func, line_ending="\n"):
- """
- Takes a list of tuples (statistic name, statistic value, unit) as input, generates a string of these three values
- and applies the function func on this string.
- Before generating the string, it identifies text containing a float number, casts the string to a
- float and rounds the value to two decimal digits.
- :param desc_val_unit_list: The list of tuples consisting of (description, value, unit)
- :param func: The function to be applied to each generated string
- :param line_ending: The formatting string to be applied at the end of each string
- """
- for entry in desc_val_unit_list:
- # Convert text containing float into float
- (description, value) = entry[0:2]
- if isinstance(value, str) and "." in value:
- try:
- value = float(value)
- except ValueError:
- pass # do nothing -> value was not a float
- # round float
- if isinstance(value, float):
- value = round(value, 4)
- # write into file
- if len(entry) == 3:
- unit = entry[2]
- func(description + ":\t" + str(value) + " " + unit + line_ending)
- else:
- func(description + ":\t" + str(value) + line_ending)
- def print_statistics(self):
- """
- Prints the basic file statistics to the terminal.
- """
- print("\nPCAP FILE INFORMATION ------------------------------")
- Statistics.write_list(self.get_file_information(), print, "")
- print("\nGENERAL FILE STATISTICS ----------------------------")
- Statistics.write_list(self.get_general_file_statistics(), print, "")
- print("\n")
- #Aidmar
- def calculate_entropy(self, frequency:list, normalized:bool = False):
- entropy = 0
- sumFreq = sum(frequency)
- for i, x in enumerate(frequency):
- p_x = float(frequency[i] / sumFreq)
- if p_x > 0:
- entropy += - p_x * log(p_x, 2)
- if normalized:
- normalizedEnt = entropy/log(len(frequency), 2)
- return entropy, normalizedEnt
- else:
- return entropy
- # Aidmar
- def get_tests_statistics(self):
- """
- Writes the calculated basic defects tests statistics into a file.
- """
- # self.stats_db._process_user_defined_query output is list of tuples, thus, we ned [0][0] to access data
- def count_frequncy(valuesList):
- values, frequency = [] , []
- for x in valuesList:
- if x in values:
- frequency[values.index(x)] += 1
- else:
- values.append(x)
- frequency.append(1)
- return values, frequency
- ####### Payload Tests #######
- sumPayloadCount = self.stats_db._process_user_defined_query("SELECT sum(payloadCount) FROM interval_statistics")
- pktCount = self.stats_db._process_user_defined_query("SELECT packetCount FROM file_statistics")
- payloadRatio=0
- if(pktCount[0][0]!=0):
- payloadRatio = float(sumPayloadCount[0][0] / pktCount[0][0] * 100)
- ####### TCP checksum Tests #######
- incorrectChecksumCount = self.stats_db._process_user_defined_query("SELECT sum(incorrectTCPChecksumCount) FROM interval_statistics")
- correctChecksumCount = self.stats_db._process_user_defined_query("SELECT avg(correctTCPChecksumCount) FROM interval_statistics")
- incorrectChecksumRatio=0
- if(incorrectChecksumCount[0][0] + correctChecksumCount[0][0])!=0:
- incorrectChecksumRatio = float(incorrectChecksumCount[0][0] / (incorrectChecksumCount[0][0] + correctChecksumCount[0][0] ) * 100)
- ####### IP Tests #######
- newIPCount = self.stats_db._process_user_defined_query("SELECT newIPCount FROM interval_statistics")
- # Retrieve the last cumulative entropy which is the entropy of the all IPs
- result = self.stats_db._process_user_defined_query("SELECT ipSrcCumEntropy FROM interval_statistics")
- ipSrcEntropy = result[-1][0]
- ipSrcCount = self.stats_db._process_user_defined_query(
- "SELECT COUNT(ipAddress) FROM ip_statistics WHERE pktsSent > 0")
- ipSrcNormEntropy = ipSrcEntropy / log(ipSrcCount[0][0],2)
- result = self.stats_db._process_user_defined_query("SELECT ipDstCumEntropy FROM interval_statistics")
- ipDstEntropy = result[-1][0]
- ipDstCount = self.stats_db._process_user_defined_query(
- "SELECT COUNT(ipAddress) FROM ip_statistics WHERE pktsReceived > 0")
- ipDstNormEntropy = ipDstEntropy / log(ipDstCount[0][0],2)
- ####### Ports Tests #######
- port0Count = self.stats_db._process_user_defined_query("SELECT SUM(portCount) FROM ip_ports WHERE portNumber = 0")
- if not port0Count[0][0]:
- port0Count = 0
- else:
- port0Count = port0Count[0][0]
- reservedPortCount = self.stats_db._process_user_defined_query(
- "SELECT SUM(portCount) FROM ip_ports WHERE portNumber IN (0,100,114,1023,1024,49151,49152,65535)")# could be extended
- if not reservedPortCount[0][0]:
- reservedPortCount = 0
- else:
- reservedPortCount = reservedPortCount[0][0]
- totalPortCount = self.stats_db._process_user_defined_query("SELECT SUM(portCount) FROM ip_ports")
- reservedPortRatio = (reservedPortCount/ totalPortCount[0][0]) * 100
- ####### TTL Tests #######
- newTTLCount = self.stats_db._process_user_defined_query("SELECT newTTLCount FROM interval_statistics")
- result = self.stats_db._process_user_defined_query("SELECT ttlValue,SUM(ttlCount) FROM ip_ttl GROUP BY ttlValue")
- data, frequency = [], []
- for row in result:
- frequency.append(row[1])
- ttlEntopy, ttlNormEntopy = self.calculate_entropy(frequency,True)
- ttlNovelsPerInterval, ttlNovelsPerIntervalFrequency = count_frequncy(newTTLCount)
- ttlNovelityDistEntropy = self.calculate_entropy(ttlNovelsPerIntervalFrequency)
- ####### Window Size Tests #######
- newWinSizeCount = self.stats_db._process_user_defined_query("SELECT newWinSizeCount FROM interval_statistics")
- result = self.stats_db._process_user_defined_query("SELECT winSize,SUM(winCount) FROM tcp_syn_win GROUP BY winSize")
- data, frequency = [], []
- for row in result:
- frequency.append(row[1])
- winEntopy, winNormEntopy = self.calculate_entropy(frequency, True)
- winNovelsPerInterval, winNovelsPerIntervalFrequency = count_frequncy(newWinSizeCount)
- winNovelityDistEntropy = self.calculate_entropy(winNovelsPerIntervalFrequency)
- ####### ToS Tests #######
- newToSCount = self.stats_db._process_user_defined_query("SELECT newToSCount FROM interval_statistics")
- result = self.stats_db._process_user_defined_query(
- "SELECT tosValue,SUM(tosCount) FROM ip_tos GROUP BY tosValue")
- data, frequency = [], []
- for row in result:
- frequency.append(row[1])
- tosEntopy, tosNormEntopy = self.calculate_entropy(frequency, True)
- tosNovelsPerInterval, tosNovelsPerIntervalFrequency = count_frequncy(newToSCount)
- tosNovelityDistEntropy = self.calculate_entropy(tosNovelsPerIntervalFrequency)
- ####### MSS Tests #######
- newMSSCount = self.stats_db._process_user_defined_query("SELECT newMSSCount FROM interval_statistics")
- result = self.stats_db._process_user_defined_query(
- "SELECT mssValue,SUM(mssCount) FROM tcp_mss_dist GROUP BY mssValue")
- data, frequency = [], []
- for row in result:
- frequency.append(row[1])
- mssEntopy, mssNormEntopy = self.calculate_entropy(frequency, True)
- mssNovelsPerInterval, mssNovelsPerIntervalFrequency = count_frequncy(newMSSCount)
- mssNovelityDistEntropy = self.calculate_entropy(mssNovelsPerIntervalFrequency)
- result = self.stats_db._process_user_defined_query("SELECT SUM(mssCount) FROM tcp_mss_dist WHERE mssValue > 536 AND mssValue < 1460")
- # The most used range of MSS: 536 < MSS < 1460. Calculate the ratio of the values in this range to total values.
- mss5361460 = (result[0][0] / sum(frequency)) * 100
- return [("Payload ratio", payloadRatio, "%"),
- ("Incorrect TCP checksum ratio", incorrectChecksumRatio, "%"),
- ("IP Src Entropy", ipSrcEntropy, ""),
- ("IP Src Normalized Entropy", ipSrcNormEntropy, ""),
- ("IP Dst Entropy", ipDstEntropy, ""),
- ("IP Dst Normalized Entropy", ipDstNormEntropy, ""),
- ("Port 0 count", port0Count, ""),
- ("Reserved ports", reservedPortRatio, "%"),
- ("TTL Entropy", ttlEntopy, ""),
- ("TTL Normalized Entropy", ttlNormEntopy, ""),
- ("TTL Distribution Entropy", ttlNovelityDistEntropy, ""),
- ("WinSize Entropy", winEntopy, ""),
- ("WinSize Normalized Entropy", winNormEntopy, ""),
- ("WinSize Distribution Entropy", winNovelityDistEntropy, ""),
- ("ToS Entropy", tosEntopy, ""),
- ("ToS Normalized Entropy", tosNormEntopy, ""),
- ("ToS Distribution Entropy", tosNovelityDistEntropy, ""),
- ("MSS Entropy", mssEntopy, ""),
- ("MSS Normalized Entropy", mssNormEntopy, ""),
- ("MSS Distribution Entropy", mssNovelityDistEntropy, ""),
- ("536 < MSS < 1460", mss5361460, "%")]
- def write_statistics_to_file(self):
- """
- Writes the calculated basic statistics into a file.
- """
- def _write_header(title: str):
- """
- Writes the section header into the open file.
- :param title: The section title
- """
- target.write("====================== \n")
- target.write(title + " \n")
- target.write("====================== \n")
- target = open(self.pcap_filepath + ".stat", 'w')
- target.truncate()
- _write_header("PCAP file information")
- Statistics.write_list(self.get_file_information(), target.write)
- _write_header("General statistics")
- Statistics.write_list(self.get_general_file_statistics(), target.write)
- _write_header("Tests statistics")
- Statistics.write_list(self.get_tests_statistics(), target.write)
- target.close()
- def get_capture_duration(self):
- """
- :return: The duration of the capture in seconds
- """
- return self.file_info['captureDuration']
- def get_pcap_timestamp_start(self):
- """
- :return: The timestamp of the first packet in the PCAP file
- """
- return self.file_info['timestampFirstPacket']
- def get_pcap_timestamp_end(self):
- """
- :return: The timestamp of the last packet in the PCAP file
- """
- return self.file_info['timestampLastPacket']
- def get_pps_sent(self, ip_address: str):
- """
- Calculates the sent packets per seconds for a given IP address.
- :param ip_address: The IP address whose packets per second should be calculated
- :return: The sent packets per seconds for the given IP address
- """
- packets_sent = self.stats_db.process_db_query("SELECT pktsSent from ip_statistics WHERE ipAddress=?", False,
- (ip_address,))
- capture_duration = float(self.get_capture_duration())
- return int(float(packets_sent) / capture_duration)
- def get_pps_received(self, ip_address: str):
- """
- Calculate the packets per second received for a given IP address.
- :param ip_address: The IP address used for the calculation
- :return: The number of packets per second received
- """
- packets_received = self.stats_db.process_db_query("SELECT pktsReceived FROM ip_statistics WHERE ipAddress=?",
- False,
- (ip_address,))
- capture_duration = float(self.get_capture_duration())
- return int(float(packets_received) / capture_duration)
- def get_packet_count(self):
- """
- :return: The number of packets in the loaded PCAP file
- """
- return self.file_info['packetCount']
- def get_most_used_ip_address(self):
- """
- :return: The IP address/addresses with the highest sum of packets sent and received
- """
- return self.process_db_query("most_used(ipAddress)")
- def get_ttl_distribution(self, ipAddress: str):
- result = self.process_db_query('SELECT ttlValue, ttlCount from ip_ttl WHERE ipAddress="' + ipAddress + '"')
- result_dict = {key: value for (key, value) in result}
- return result_dict
- # Aidmar
- def get_mss_distribution(self, ipAddress: str):
- result = self.process_db_query('SELECT mssValue, mssCount from tcp_mss_dist WHERE ipAddress="' + ipAddress + '"')
- result_dict = {key: value for (key, value) in result}
- return result_dict
- # Aidmar
- def get_win_distribution(self, ipAddress: str):
- result = self.process_db_query('SELECT winSize, winCount from tcp_syn_win WHERE ipAddress="' + ipAddress + '"')
- result_dict = {key: value for (key, value) in result}
- return result_dict
- # Aidmar
- def get_tos_distribution(self, ipAddress: str):
- result = self.process_db_query('SELECT tosValue, tosCount from ip_tos WHERE ipAddress="' + ipAddress + '"')
- result_dict = {key: value for (key, value) in result}
- return result_dict
- def get_random_ip_address(self, count: int = 1):
- """
- :param count: The number of IP addreses to return
- :return: A randomly chosen IP address from the dataset or iff param count is greater than one, a list of randomly
- chosen IP addresses
- """
- if count == 1:
- return self.process_db_query("random(all(ipAddress))")
- else:
- ip_address_list = []
- for i in range(0, count):
- ip_address_list.append(self.process_db_query("random(all(ipAddress))"))
- return ip_address_list
- def get_mac_address(self, ipAddress: str):
- """
- :return: The MAC address used in the dataset for the given IP address.
- """
- return self.process_db_query('macAddress(ipAddress=' + ipAddress + ")")
- # Aidmar
- def get_most_used_mss(self, ipAddress: str):
- """
- :param ipAddress: The IP address whose used MSS should be determined
- :return: The TCP MSS value used by the IP address, or if the IP addresses never specified a MSS,
- then None is returned
- """
- mss_value = self.process_db_query('SELECT mssValue from tcp_mss_dist WHERE ipAddress="' + ipAddress + '" ORDER BY mssCount DESC LIMIT 1')
- if isinstance(mss_value, int):
- return mss_value
- else:
- return None
- def get_statistics_database(self):
- """
- :return: A reference to the statistics database object
- """
- return self.stats_db
- def process_db_query(self, query_string_in: str, print_results: bool = False):
- """
- Executes a string identified previously as a query. This can be a standard SQL SELECT/INSERT query or a named
- query.
- :param query_string_in: The query to be processed
- :param print_results: Indicates whether the results should be printed to terminal
- :return: The result of the query
- """
- return self.stats_db.process_db_query(query_string_in, print_results)
- def is_query(self, value: str):
- """
- Checks whether the given string is a standard SQL query (SELECT, INSERT) or a named query.
- :param value: The string to be checked
- :return: True if the string is recognized as a query, otherwise False.
- """
- if not isinstance(value, str):
- return False
- else:
- return (any(x in value.lower().strip() for x in self.stats_db.get_all_named_query_keywords()) or
- any(x in value.lower().strip() for x in self.stats_db.get_all_sql_query_keywords()))
- # Aidmar
- def calculate_standard_deviation(self, lst):
- """Calculates the standard deviation for a list of numbers."""
- num_items = len(lst)
- mean = sum(lst) / num_items
- differences = [x - mean for x in lst]
- sq_differences = [d ** 2 for d in differences]
- ssd = sum(sq_differences)
- variance = ssd / num_items
- sd = sqrt(variance)
- #print('The mean of {} is {}.'.format(lst, mean))
- #print('The differences are {}.'.format(differences))
- #print('The sum of squared differences is {}.'.format(ssd))
- #print('The variance is {}.'.format(variance))
- print('The standard deviation is {}.'.format(sd))
- print('--------------------------')
- return sd
- def plot_statistics(self, format: str = 'pdf'): #'png'):
- """
- Plots the statistics associated with the dataset prior attack injection.
- :param format: The format to be used to save the statistics diagrams.
- """
- def plot_ttl(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT ttlValue, SUM(ttlCount) FROM ip_ttl GROUP BY ttlValue")
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("TTL Distribution")
- plt.xlabel('TTL Value')
- plt.ylabel('Number of Packets')
- width = 0.1
- plt.xlim([0, max(graphx)])
- plt.grid(True)
- plt.bar(graphx, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-ttl' + file_ending)
- plt.savefig(out,dpi=500)
- return out
- # Aidmar
- def plot_mss(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT mssValue, SUM(mssCount) FROM tcp_mss_dist GROUP BY mssValue")
- if(result):
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("MSS Distribution")
- plt.xlabel('MSS Value')
- plt.ylabel('Number of Packets')
- width = 0.1
- plt.xlim([0, max(graphx)])
- plt.grid(True)
- plt.bar(graphx, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-mss' + file_ending)
- plt.savefig(out,dpi=500)
- return out
- else:
- print("Error plot MSS: No MSS values found!")
- # Aidmar
- def plot_win(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT winSize, SUM(winCount) FROM tcp_syn_win GROUP BY winSize")
- if (result):
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("Window Size Distribution")
- plt.xlabel('Window Size')
- plt.ylabel('Number of Packets')
- width = 0.1
- plt.xlim([0, max(graphx)])
- plt.grid(True)
- plt.bar(graphx, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-win' + file_ending)
- plt.savefig(out,dpi=500)
- return out
- else:
- print("Error plot WinSize: No WinSize values found!")
- # Aidmar
- def plot_protocol(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT protocolName, SUM(protocolCount) FROM ip_protocols GROUP BY protocolName")
- if (result):
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("Protocols Distribution")
- plt.xlabel('Protocols')
- plt.ylabel('Number of Packets')
- width = 0.1
- plt.xlim([0, len(graphx)])
- plt.grid(True)
- # Protocols' names on x-axis
- x = range(0,len(graphx))
- my_xticks = graphx
- plt.xticks(x, my_xticks)
- plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-protocol' + file_ending)
- plt.savefig(out,dpi=500)
- return out
- else:
- print("Error plot protocol: No protocol values found!")
- # Aidmar
- def plot_port(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT portNumber, SUM(portCount) FROM ip_ports GROUP BY portNumber")
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("Ports Distribution")
- plt.xlabel('Ports Numbers')
- plt.ylabel('Number of Packets')
- width = 0.1
- plt.xlim([0, max(graphx)])
- plt.grid(True)
- plt.bar(graphx, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-port' + file_ending)
- plt.savefig(out,dpi=500)
- return out
- # Aidmar - This distribution is not drawable for big datasets
- def plot_ip_src(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT ipAddress, pktsSent FROM ip_statistics")
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("Source IP Distribution")
- plt.xlabel('Source IP')
- plt.ylabel('Number of Packets')
- width = 0.1
- plt.xlim([0, len(graphx)])
- plt.grid(True)
- # IPs on x-axis
- x = range(0, len(graphx))
- my_xticks = graphx
- plt.xticks(x, my_xticks, rotation='vertical', fontsize=5)
- plt.tight_layout()
- # limit the number of xticks
- plt.locator_params(axis='x', nbins=20)
- plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-ip-src' + file_ending)
- plt.savefig(out, dpi=500)
- return out
- # Aidmar - This distribution is not drawable for big datasets
- def plot_ip_dst(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT ipAddress, pktsReceived FROM ip_statistics")
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("Destination IP Distribution")
- plt.xlabel('Destination IP')
- plt.ylabel('Number of Packets')
- width = 0.1
- plt.xlim([0, len(graphx)])
- plt.grid(True)
- # IPs on x-axis
- x = range(0, len(graphx))
- my_xticks = graphx
- plt.xticks(x, my_xticks, rotation='vertical', fontsize=5)
- plt.tight_layout()
- # limit the number of xticks
- plt.locator_params(axis='x', nbins=20)
- plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-ip-dst' + file_ending)
- plt.savefig(out, dpi=500)
- return out
- # Aidmar
- def plot_interval_pktCount(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, pktsCount FROM interval_statistics ORDER BY lastPktTimestamp")
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("Packet Rate")
- plt.xlabel('Timestamp')
- plt.ylabel('Number of Packets')
- width = 0.1
- plt.xlim([0, len(graphx)])
- plt.grid(True)
- # timestamp on x-axis
- x = range(0, len(graphx))
- my_xticks = graphx
- plt.xticks(x, my_xticks, rotation='vertical', fontsize=5)
- plt.tight_layout()
- # limit the number of xticks
- plt.locator_params(axis='x', nbins=20)
- plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-interval-pkt-count' + file_ending)
- plt.savefig(out, dpi=500)
- return out
- # Aidmar
- def plot_interval_ip_src_ent(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, ipSrcEntropy FROM interval_statistics ORDER BY lastPktTimestamp")
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("Source IP Entropy")
- plt.xlabel('Timestamp')
- plt.ylabel('Entropy')
- width = 0.1
- plt.xlim([0, len(graphx)])
- plt.grid(True)
- # timestamp on x-axis
- x = range(0, len(graphx))
- my_xticks = graphx
- plt.xticks(x, my_xticks, rotation='vertical', fontsize=5)
- plt.tight_layout()
- # limit the number of xticks
- plt.locator_params(axis='x', nbins=20)
- plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-interval-ip-src-ent' + file_ending)
- plt.savefig(out, dpi=500)
- return out
- # Aidmar
- def plot_interval_ip_dst_ent(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, ipDstEntropy FROM interval_statistics ORDER BY lastPktTimestamp")
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("Destination IP Entropy")
- plt.xlabel('Timestamp')
- plt.ylabel('Entropy')
- width = 0.1
- plt.xlim([0, len(graphx)])
- plt.grid(True)
- # timestamp on x-axis
- x = range(0, len(graphx))
- my_xticks = graphx
- plt.xticks(x, my_xticks, rotation='vertical', fontsize=5)
- plt.tight_layout()
- # limit the number of xticks
- plt.locator_params(axis='x', nbins=20)
- plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-interval-ip-dst-ent' + file_ending)
- plt.savefig(out, dpi=500)
- return out
- # Aidmar
- def plot_interval_ip_dst_cum_ent(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, ipDstCumEntropy FROM interval_statistics ORDER BY lastPktTimestamp")
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("Destination IP Cumulative Entropy")
- plt.xlabel('Timestamp')
- plt.ylabel('Entropy')
- plt.xlim([0, len(graphx)])
- plt.grid(True)
- # timestamp on x-axis
- x = range(0, len(graphx))
- my_xticks = graphx
- plt.xticks(x, my_xticks, rotation='vertical', fontsize=5)
- plt.tight_layout()
- # limit the number of xticks
- plt.locator_params(axis='x', nbins=20)
- plt.plot(x, graphy, 'r')
- out = self.pcap_filepath.replace('.pcap', '_plot-interval-ip-dst-cum-ent' + file_ending)
- plt.savefig(out, dpi=500)
- return out
- # Aidmar
- def plot_interval_ip_src_cum_ent(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, ipSrcCumEntropy FROM interval_statistics ORDER BY lastPktTimestamp")
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("Source IP Cumulative Entropy")
- plt.xlabel('Timestamp')
- plt.ylabel('Entropy')
- plt.xlim([0, len(graphx)])
- plt.grid(True)
- # timestamp on x-axis
- x = range(0, len(graphx))
- my_xticks = graphx
- plt.xticks(x, my_xticks, rotation='vertical', fontsize=5)
- plt.tight_layout()
- # limit the number of xticks
- plt.locator_params(axis='x',nbins=20)
- plt.plot(x, graphy, 'r')
- out = self.pcap_filepath.replace('.pcap', '_plot-interval-ip-src-cum-ent' + file_ending)
- plt.savefig(out, dpi=500)
- return out
- # Aidmar
- def plot_interval_new_ip(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, newIPCount FROM interval_statistics ORDER BY lastPktTimestamp")
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("IP Novelity Distribution")
- plt.xlabel('Timestamp')
- plt.ylabel('Novel values count')
- plt.xlim([0, len(graphx)])
- plt.grid(True)
- width = 0.1
- # timestamp on x-axis
- x = range(0, len(graphx))
- my_xticks = graphx
- plt.xticks(x, my_xticks, rotation='vertical', fontsize=5)
- plt.tight_layout()
- # limit the number of xticks
- plt.locator_params(axis='x', nbins=20)
- plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-interval-novel-ip-dist' + file_ending)
- plt.savefig(out, dpi=500)
- print("IP Standard Deviation:")
- self.calculate_standard_deviation(graphy)
- return out
- # Aidmar
- def plot_interval_new_ttl(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, newTTLCount FROM interval_statistics ORDER BY lastPktTimestamp")
- if(result):
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("TTL Novelity Distribution")
- plt.xlabel('Timestamp')
- plt.ylabel('Novel values count')
- plt.xlim([0, len(graphx)])
- plt.grid(True)
- width = 0.1
- # timestamp on x-axis
- x = range(0, len(graphx))
- my_xticks = graphx
- plt.xticks(x, my_xticks, rotation='vertical', fontsize=5)
- plt.tight_layout()
- # limit the number of xticks
- plt.locator_params(axis='x', nbins=20)
- plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-interval-novel-ttl-dist' + file_ending)
- plt.savefig(out, dpi=500)
- print("TTL Standard Deviation:")
- self.calculate_standard_deviation(graphy)
- return out
- else:
- print("Error plot TTL: No TTL values found!")
- # Aidmar
- def plot_interval_new_tos(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, newToSCount FROM interval_statistics ORDER BY lastPktTimestamp")
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("ToS Novelity Distribution")
- plt.xlabel('Timestamp')
- plt.ylabel('Novel values count')
- plt.xlim([0, len(graphx)])
- plt.grid(True)
- width = 0.1
- # timestamp on x-axis
- x = range(0, len(graphx))
- my_xticks = graphx
- plt.xticks(x, my_xticks, rotation='vertical', fontsize=5)
- plt.tight_layout()
- # limit the number of xticks
- plt.locator_params(axis='x', nbins=20)
- plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-interval-novel-tos-dist' + file_ending)
- plt.savefig(out, dpi=500)
- print("ToS Standard Deviation:")
- self.calculate_standard_deviation(graphy)
- return out
- # Aidmar
- def plot_interval_new_win_size(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, newWinSizeCount FROM interval_statistics ORDER BY lastPktTimestamp")
- if(result):
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("Window Size Novelity Distribution")
- plt.xlabel('Timestamp')
- plt.ylabel('Novel values count')
- plt.xlim([0, len(graphx)])
- plt.grid(True)
- width = 0.1
- # timestamp on x-axis
- x = range(0, len(graphx))
- my_xticks = graphx
- plt.xticks(x, my_xticks, rotation='vertical', fontsize=5)
- plt.tight_layout()
- # limit the number of xticks
- plt.locator_params(axis='x', nbins=20)
- plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-interval-novel-win-size-dist' + file_ending)
- plt.savefig(out, dpi=500)
- # Calculate Standart Deviation
- print("Window Size Standard Deviation:")
- self.calculate_standard_deviation(graphy)
- return out
- else:
- print("Error plot new values WinSize: No WinSize values found!")
- # Aidmar
- def plot_interval_new_mss(file_ending: str):
- plt.gcf().clear()
- result = self.stats_db._process_user_defined_query(
- "SELECT lastPktTimestamp, newMSSCount FROM interval_statistics ORDER BY lastPktTimestamp")
- if(result):
- graphx, graphy = [], []
- for row in result:
- graphx.append(row[0])
- graphy.append(row[1])
- plt.autoscale(enable=True, axis='both')
- plt.title("MSS Novelity Distribution")
- plt.xlabel('Timestamp')
- plt.ylabel('Novel values count')
- plt.xlim([0, len(graphx)])
- plt.grid(True)
- width = 0.1
- # timestamp on x-axis
- x = range(0, len(graphx))
- my_xticks = graphx
- plt.xticks(x, my_xticks, rotation='vertical', fontsize=5)
- plt.tight_layout()
- # limit the number of xticks
- plt.locator_params(axis='x', nbins=20)
- plt.bar(x, graphy, width, align='center', linewidth=1, color='red', edgecolor='red')
- out = self.pcap_filepath.replace('.pcap', '_plot-interval-novel-mss-dist' + file_ending)
- plt.savefig(out, dpi=500)
- # Calculate Standart Deviation
- print("MSS Standard Deviation:")
- self.calculate_standard_deviation(graphy)
- return out
- else:
- print("Error plot new values MSS: No MSS values found!")
- ttl_out_path = plot_ttl('.' + format)
- mss_out_path = plot_mss('.' + format)
- win_out_path = plot_win('.' + format)
- protocol_out_path = plot_protocol('.' + format)
- port_out_path = plot_port('.' + format)
- #ip_src_out_path = plot_ip_src('.' + format)
- #ip_dst_out_path = plot_ip_dst('.' + format)
- plot_interval_pktCount = plot_interval_pktCount('.' + format)
- plot_interval_ip_src_ent = plot_interval_ip_src_ent('.' + format)
- plot_interval_ip_dst_ent = plot_interval_ip_dst_ent('.' + format)
- plot_interval_ip_src_cum_ent = plot_interval_ip_src_cum_ent('.' + format)
- plot_interval_ip_dst_cum_ent = plot_interval_ip_dst_cum_ent('.' + format)
- plot_interval_new_ip = plot_interval_new_ip('.' + format)
- plot_interval_new_ttl = plot_interval_new_ttl('.' + format)
- plot_interval_new_tos = plot_interval_new_tos('.' + format)
- plot_interval_new_win_size = plot_interval_new_win_size('.' + format)
- plot_interval_new_mss = plot_interval_new_mss('.' + format)
- #print("Saved distributions plots at: %s, %s, %s, %s, %s, %s, %s, %s %s" %(ttl_out_path,mss_out_path, win_out_path,
- #protocol_out_path, port_out_path,ip_src_out_path,ip_dst_out_path, plot_interval_pktCount))
- # Aidmar
- def calculate_complement_packet_rates(self, pps):
- """
- Calculates the complement packet rates of the background traffic packet rates for each interval.
- Then normalize it to maximum boundary, which is the input parameter pps
- :return: normalized packet rates for each time interval.
- """
- result = self.process_db_query(
- "SELECT lastPktTimestamp,pktsCount FROM interval_statistics ORDER BY lastPktTimestamp")
- # print(result)
- bg_interval_pps = []
- complement_interval_pps = []
- intervalsSum = 0
- if result:
- # Get the interval in seconds
- for i, row in enumerate(result):
- if i < len(result) - 1:
- intervalsSum += ceil((int(result[i + 1][0]) * 10 ** -6) - (int(row[0]) * 10 ** -6))
- interval = intervalsSum / (len(result) - 1)
- # Convert timestamp from micro to seconds, convert packet rate "per interval" to "per second"
- for row in result:
- bg_interval_pps.append((int(row[0]) * 10 ** -6, int(row[1] / interval)))
- # Find max PPS
- maxPPS = max(bg_interval_pps, key=itemgetter(1))[1]
- for row in bg_interval_pps:
- complement_interval_pps.append((row[0], int(pps * (maxPPS - row[1]) / maxPPS)))
- return complement_interval_pps
|