6 years ago · f37afa4a48
--- a/code/Core/Controller.py
+++ b/code/Core/Controller.py
@@ -8,6 +8,7 @@ import Core.LabelManager as LabelManager
 
				 import Core.Statistics as Statistics
			
 
				 import ID2TLib.PcapFile as PcapFile
			
 
				 import ID2TLib.Utility as Util
			
 
				+import Core.StatsDatabase as StatsDB
			
 
				 
			
 
				 
			
 
				 class Controller:
			
@@ -176,7 +177,7 @@ class Controller:
 
				             print()
			
 
				         elif param == "least_used":
			
 
				             print("least_used can be used as a selector for the following attributes:")
			
 
				-            print("ipAddress | macAddress | portNumber | protocolName | ttlValue")
			
 
				+            print("ipAddress | macAddress | portNumber | protocolName | ttlValue | mssValue | winSize | ipClass")
			
 
				             print()
			
 
				         elif param == "avg":
			
 
				             print("avg can be used as a selector for the following attributes:")
			
@@ -184,7 +185,7 @@ class Controller:
 
				             print()
			
 
				         elif param == "all":
			
 
				             print("all can be used as a selector for the following attributes:")
			
 
				-            print("ipAddress | ttlValue | mss | macAddress | portNumber | protocolName")
			
 
				+            print("ipAddress | ttlValue | mss | macAddress | portNumber | protocolName | winSize | ipClass")
			
 
				             print()
			
 
				         elif param in ["random", "first", "last"]:
			
 
				             print("No additional info available for this keyword.")
			
@@ -197,6 +198,14 @@ class Controller:
 
				                   "macAddress | ttlValue | ttlCount | portDirection | portNumber | portCount | protocolCount\n"
			
 
				                   "protocolName")
			
 
				             print()
			
 
				+            print("The following operators can be used:")
			
 
				+            print("<= | < | = | >= | > | in")
			
 
				+            print()
			
 
				+            print("A value can either be a simple values, a list of simple values separated by commas and enclosed "
			
 
				+                  "in [] brackets, or another query.")
			
 
				+            print()
			
 
				+            print("When VALUE is a list (or a query returning a list), the usage of the 'in' operator is mandatory!")
			
 
				+            print()
			
 
				             print("See 'help examples;' for usage examples.")
			
 
				             print()
			
 
				         elif param == "macaddress":
			
@@ -205,6 +214,8 @@ class Controller:
 
				             print("The following parameters can be specified:")
			
 
				             print("ipAddress")
			
 
				             print()
			
 
				+            print("See 'help ipAddress' for information on valid operators and values.")
			
 
				+            print()
			
 
				             print("See 'help examples;' for usage examples.")
			
 
				             print()
			
 
				         elif param == "examples":
			
@@ -218,6 +229,8 @@ class Controller:
 
				             print("\tSELECT avg(ttlValue) from ip_ttl;")
			
 
				             print("Get a random IP address from all addresses that sent and received at least 10 packets:")
			
 
				             print("\trandom(ipAddress(pktsSent > 10, pktsReceived > 10));")
			
 
				+            print("Get the IP addresses used with one of the MAC addresses in a list:")
			
 
				+            print("\tipAddress(macAddress in [08:00:27:a3:83:43, 52:54:00:12:35:02]);")
			
 
				             print()
			
 
				         else:
			
 
				             print("Unknown keyword '" + param + "', try 'help;' to get a list of allowed keywords'")
			
@@ -291,6 +304,9 @@ class Controller:
 
				                         for i in range(1, e.col):
			
 
				                             sys.stderr.write(" ")
			
 
				                         sys.stderr.write("^\n\n")
			
 
				+                    except StatsDB.QueryExecutionException as e:
			
 
				+                        sys.stderr.write("An error occured: ")
			
 
				+                        sys.stderr.write(e.args[0] + "\n")
			
 
				                 buffer = ""
			
 
				 
			
 
				         readline.set_history_length(1000)
			
--- a/code/Core/QueryParser.py
+++ b/code/Core/QueryParser.py
@@ -3,24 +3,75 @@ import pyparsing as pp
 
				 
			
 
				 class QueryParser:
			
 
				     def __init__(self):
			
 
				+        """
			
 
				+        Constructs a parser for all named queries using PyParsing.
			
 
				+        """
			
 
				         extractor = pp.Keyword("random") ^ pp.Keyword("first") ^ pp.Keyword("last")
			
 
				-        selector = pp.Keyword("most_used") ^ pp.Keyword("least_used") ^ pp.Keyword("avg") ^ pp.Keyword("all")
			
 
				-        attribute = pp.Keyword("ipaddress") ^ pp.Keyword("macaddress") ^ pp.Keyword("portnumber") ^ pp.Keyword("protocolname") ^ pp.Keyword("ttlvalue") ^ pp.Keyword("mssvalue") ^ pp.Keyword("winsize") ^ pp.Keyword("ipclass") ^ pp.Keyword("pktssent") ^ pp.Keyword("pktsreceived") ^ pp.Keyword("mss") ^ pp.Keyword("kbytesreceived") ^ pp.Keyword("kbytessent")
			
 
				-        simple_selector_query = selector + pp.Suppress("(") + attribute + pp.Suppress(")")
			
 
				 
			
 
				-        param_selectors = pp.Keyword("ipaddress").setParseAction(pp.replaceWith("ipaddress_param")) ^ pp.Keyword("macaddress").setParseAction(pp.replaceWith("macaddress_param"))
			
 
				-        operators = pp.Literal("=") ^ pp.Literal("<=") ^ pp.Literal("<") ^ pp.Literal(">=") ^ pp.Literal(">")
			
 
				+        # Valid selectors - except "avg", because not all attributes can be combined with it
			
 
				+        selector_no_avg = pp.Keyword("most_used") ^ pp.Keyword("least_used") ^ pp.Keyword("all")
			
 
				+
			
 
				+        # All attributes that cannot be combined with "avg"
			
 
				+        attributes_no_avg = pp.Keyword("ipaddress") ^ pp.Keyword("macaddress") ^ pp.Keyword("portnumber") ^\
			
 
				+                            pp.Keyword("protocolname") ^ pp.Keyword("winsize") ^ pp.Keyword("ipclass")
			
 
				+
			
 
				+        # All attributes that can be combined with "avg"
			
 
				+        attributes_avg = pp.Keyword("ttlvalue") ^ pp.Keyword("mssvalue") ^\
			
 
				+                         pp.Keyword("pktssent") ^ pp.Keyword("pktsreceived") ^ pp.Keyword("mss") ^\
			
 
				+                         pp.Keyword("kbytesreceived") ^ pp.Keyword("kbytessent")
			
 
				+
			
 
				+        # Collection of all attributes for simpler specification
			
 
				+        attributes_all = attributes_no_avg ^ attributes_avg
			
 
				+
			
 
				+        # Simple selector + attribute query, only allowing "avg" with compatible attributes
			
 
				+        simple_selector_query = (selector_no_avg + pp.Suppress("(") + attributes_all + pp.Suppress(")")) ^\
			
 
				+                                (pp.Keyword("avg") + pp.Suppress("(") + attributes_avg + pp.Suppress(")"))
			
 
				+
			
 
				+        # Selectors for parameterized queries - they are replaced in the result to avoid ambiguity
			
 
				+        param_selectors = pp.Keyword("ipaddress").setParseAction(pp.replaceWith("ipaddress_param")) ^\
			
 
				+                          pp.Keyword("macaddress").setParseAction(pp.replaceWith("macaddress_param"))
			
 
				+
			
 
				+        # All operators allowed in parameterized queries
			
 
				+        operators = pp.Literal("<=") ^ pp.Literal("<") ^ pp.Literal("=") ^\
			
 
				+                    pp.Literal(">=") ^ pp.Literal(">") ^ pp.CaselessLiteral("in")
			
 
				+
			
 
				+        # Placeholder for nesting in parameterized queries
			
 
				         expr = pp.Forward()
			
 
				-        comparison = pp.Group(attribute + operators + (pp.Word(pp.alphanums + ".:") ^ expr))
			
 
				+
			
 
				+        # Simple values for comparisons inside a parameterized query can be alphanumeric plus dot and colon
			
 
				+        simple_value = pp.Word(pp.alphanums + ".:")
			
 
				+
			
 
				+        # Values in parameterized queries can either be simple values, or a list of them.
			
 
				+        # If it's a list, we insert a "list"-token to be able to distinguish it
			
 
				+        parameterized_value = simple_value ^\
			
 
				+                              (pp.Suppress("[") + pp.Group(pp.Empty().addParseAction(pp.replaceWith('list')) +
			
 
				+                               pp.delimitedList(simple_value)) + pp.Suppress("]"))
			
 
				+
			
 
				+        # One "attribute-operator-value" triplet for parameterized queries
			
 
				+        comparison = pp.Group(attributes_all + operators + (parameterized_value ^ expr))
			
 
				+
			
 
				+        # A full parameterized query, consisting of a parameterized selector and a comma-separated list of comparisons
			
 
				         parameterized_query = param_selectors + pp.Suppress("(") + pp.Group(pp.delimitedList(comparison)) + pp.Suppress(")")
			
 
				-        # parameterized_query = param_selectors + pp.Suppress("(") + comparison + pp.Suppress(")")
			
 
				 
			
 
				+        # Combination of simple and parameterized queries
			
 
				         all_selector_queries = (simple_selector_query ^ parameterized_query)
			
 
				+
			
 
				+        # All queries can be combined with an extractor
			
 
				         extractor_selector_query = extractor + pp.Suppress("(") + all_selector_queries + pp.Suppress(")")
			
 
				 
			
 
				+        # Queries can be used with an extractor or without
			
 
				         named_query = (extractor_selector_query ^ all_selector_queries)
			
 
				+
			
 
				+        # The placeholder can be replaced with any query
			
 
				         expr << pp.Group(named_query)
			
 
				+
			
 
				+        # Make sure all queries end with a semicolon, and we're done
			
 
				         self.full_query = named_query + pp.Suppress(";")
			
 
				 
			
 
				-    def parse_query(self, querystring):
			
 
				+    def parse_query(self, querystring: str) -> pp.ParseResults:
			
 
				+        """
			
 
				+        Parses the passed query with a pre-constructed parser.
			
 
				+        :param querystring: The named query to be executed
			
 
				+        :return: A ParseResults-object, which essentially is a list of tokens
			
 
				+        """
			
 
				         return self.full_query.parseString(querystring)
			
--- a/code/Core/StatsDatabase.py
+++ b/code/Core/StatsDatabase.py
@@ -25,6 +25,10 @@ def dict_gen(curs: sqlite3.Cursor):
 
				             yield dict(zip(field_names, row))
			
 
				 
			
 
				 
			
 
				+class QueryExecutionException(Exception):
			
 
				+    pass
			
 
				+
			
 
				+
			
 
				 class StatsDatabase:
			
 
				     def __init__(self, db_path: str):
			
 
				         """
			
@@ -168,18 +172,48 @@ class StatsDatabase:
 
				         field_types = self.get_field_types('ip_mac', 'ip_ttl', 'ip_ports', 'ip_protocols', 'ip_statistics', 'ip_mac')
			
 
				         conditions = []
			
 
				         for key, op, value in param_op_val:
			
 
				+            # Check whether the value is not a simple value, but another query (or list)
			
 
				             if isinstance(value, pp.ParseResults):
			
 
				-                # If we have another query instead of a direct value, execute and replace it
			
 
				-                value = self._execute_query_list(value)[0][0]
			
 
				+                if value[0] == "list":
			
 
				+                    # We have a list, cut the token off and use the remaining elements
			
 
				+                    value = value[1:]
			
 
				+
			
 
				+                    # Lists can only be used with "in"
			
 
				+                    if op is not "in":
			
 
				+                        raise QueryExecutionException("List values require the usage of the 'in' operator!")
			
 
				+                else:
			
 
				+                    # If we have another query instead of a direct value, execute and replace it
			
 
				+                    rvalue = self._execute_query_list(value)
			
 
				+
			
 
				+                    # Do we have a comparison operator with a multiple-result query?
			
 
				+                    if op is not "in" and value[0] in ['most_used', 'least_used', 'all', 'ipaddress_param',
			
 
				+                                                       'macaddress_param']:
			
 
				+                        raise QueryExecutionException("The extractor '" + value[0] +
			
 
				+                                                      "' may return more than one result!")
			
 
				+
			
 
				+                    # Make value contain a simple list with the results of the query
			
 
				+                    value = map(lambda x: str(x[0]), rvalue)
			
 
				+            else:
			
 
				+                # Make sure value is a list now to simplify handling
			
 
				+                value = [value]
			
 
				+
			
 
				             # this makes sure that TEXT fields are queried by strings,
			
 
				             # e.g. ipAddress=192.168.178.1 --is-converted-to--> ipAddress='192.168.178.1'
			
 
				             if field_types.get(key) == 'TEXT':
			
 
				-                if not str(value).startswith("'") and not str(value).startswith('"'):
			
 
				-                    value = "'" + value + "'"
			
 
				+                def ensure_string(x):
			
 
				+                    if not str(x).startswith("'") and not str(x).startswith('"'):
			
 
				+                        return "'" + x + "'"
			
 
				+                    else:
			
 
				+                        return x
			
 
				+                value = map(ensure_string, value)
			
 
				+
			
 
				+            # If we have more than one value, join them together, separated by commas
			
 
				+            value = ",".join(map(str, value))
			
 
				+
			
 
				             # this replacement is required to remove ambiguity in SQL query
			
 
				             if key == 'ipAddress':
			
 
				                 key = 'ip_mac.ipAddress'
			
 
				-            conditions.append(key + op + str(value))
			
 
				+            conditions.append(key + " " + op + " (" + str(value) + ")")
			
 
				 
			
 
				         where_clause = " AND ".join(conditions)
			
 
				         query += where_clause
			
@@ -230,6 +264,9 @@ class StatsDatabase:
 
				         "least_used.winsize": "SELECT winSize FROM (SELECT winSize, SUM(winCount) as occ FROM tcp_win GROUP BY "
			
 
				                               "winSize) WHERE occ=(SELECT SUM(winCount) as occ FROM tcp_win GROUP BY winSize "
			
 
				                               "ORDER BY occ ASC LIMIT 1) ORDER BY winSize ASC",
			
 
				+        "least_used.ipclass": "SELECT ipClass FROM (SELECT ipClass, COUNT(*) as occ from ip_statistics GROUP BY "
			
 
				+                             "ipClass ORDER BY occ DESC) WHERE occ=(SELECT COUNT(*) as occ from ip_statistics "
			
 
				+                             "GROUP BY ipClass ORDER BY occ ASC LIMIT 1) ORDER BY ipClass ASC",
			
 
				         "avg.pktsreceived": "SELECT avg(pktsReceived) from ip_statistics",
			
 
				         "avg.pktssent": "SELECT avg(pktsSent) from ip_statistics",
			
 
				         "avg.kbytesreceived": "SELECT avg(kbytesReceived) from ip_statistics",
			
@@ -241,27 +278,32 @@ class StatsDatabase:
 
				         "all.mss": "SELECT DISTINCT mssValue from tcp_mss ORDER BY mssValue ASC",
			
 
				         "all.macaddress": "SELECT DISTINCT macAddress from ip_mac ORDER BY macAddress ASC",
			
 
				         "all.portnumber": "SELECT DISTINCT portNumber from ip_ports ORDER BY portNumber ASC",
			
 
				-        "all.protocolname": "SELECT DISTINCT protocolName from ip_protocols ORDER BY protocolName ASC"}
			
 
				+        "all.protocolname": "SELECT DISTINCT protocolName from ip_protocols ORDER BY protocolName ASC",
			
 
				+        "all.winsize": "SELECT DISTINCT winSize FROM tcp_win ORDER BY winSize ASC",
			
 
				+        "all.ipclass": "SELECT DISTINCT ipClass FROM ip_statistics ORDER BY ipClass ASC"}
			
 
				 
			
 
				     def _execute_query_list(self, query_list):
			
 
				         """
			
 
				         Recursively executes a list of named queries. They are of the following form:
			
 
				-        ['macaddress_param', [['ipaddress', '=', ['most_used', 'ipaddress']]]]
			
 
				+        ['macaddress_param', [['ipaddress', 'in', ['most_used', 'ipaddress']]]]
			
 
				         :param query_list: The query statement list obtained from the query parser
			
 
				         :return: The result of the query (either a single result or a list).
			
 
				         """
			
 
				         if query_list[0] == "random":
			
 
				-            return rnd.choice(self._execute_query_list(query_list[1:]))
			
 
				+            return [rnd.choice(self._execute_query_list(query_list[1:]))]
			
 
				         elif query_list[0] == "first":
			
 
				-            return self._execute_query_list(query_list[1:])[0]
			
 
				+            return [self._execute_query_list(query_list[1:])[0]]
			
 
				         elif query_list[0] == "last":
			
 
				-            return self._execute_query_list(query_list[1:])[-1]
			
 
				+            return [self._execute_query_list(query_list[1:])[-1]]
			
 
				         elif query_list[0] == "macaddress_param":
			
 
				             return self.named_query_parameterized("macaddress", query_list[1])
			
 
				         elif query_list[0] == "ipaddress_param":
			
 
				             return self.named_query_parameterized("ipaddress", query_list[1])
			
 
				         else:
			
 
				             query = self.named_queries.get(query_list[0] + "." + query_list[1])
			
 
				+            if query is None:
			
 
				+                raise QueryExecutionException("The requested query '" + query_list[0] + "(" + query_list[1] +
			
 
				+                                              ")' was not found in the internal query list!")
			
 
				             self.cursor.execute(str(query))
			
 
				             last_result = self.cursor.fetchall()
			
 
				             return last_result
			
--- a/code/Test/test_Queries.py
+++ b/code/Test/test_Queries.py
@@ -235,4 +235,4 @@ class TestQueries(unittest.TestCase):
 
				         self.assertEqual(controller.statistics.process_db_query('all(protocolname)'), ['IPv4', 'TCP', 'UDP'])
			
 
				 
			
 
				     def test_nested_query(self):
			
 
				-        self.assertEqual(controller.statistics.process_db_query('macaddress(ipaddress=most_used(ipaddress))'), '08:00:27:a3:83:43')
			
 
				+        self.assertEqual(controller.statistics.process_db_query('macaddress(ipaddress in most_used(ipaddress))'), '08:00:27:a3:83:43')