QueryParser.py 4.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. import pyparsing as pp
  2. class QueryParser:
  3. def __init__(self):
  4. """
  5. Constructs a parser for all named queries using PyParsing.
  6. """
  7. extractor = pp.Keyword("random") ^ pp.Keyword("first") ^ pp.Keyword("last")
  8. # Valid selectors - except "avg", because not all attributes can be combined with it
  9. selector_no_avg = pp.Keyword("most_used") ^ pp.Keyword("least_used") ^ pp.Keyword("all")
  10. # All attributes that cannot be combined with "avg"
  11. attributes_no_avg = pp.Keyword("ipaddress") ^ pp.Keyword("macaddress") ^ pp.Keyword("portnumber") ^\
  12. pp.Keyword("protocolname") ^ pp.Keyword("winsize") ^ pp.Keyword("ipclass")
  13. # All attributes that can be combined with "avg"
  14. attributes_avg = pp.Keyword("ttlvalue") ^ pp.Keyword("mssvalue") ^\
  15. pp.Keyword("pktssent") ^ pp.Keyword("pktsreceived") ^ pp.Keyword("mss") ^\
  16. pp.Keyword("kbytesreceived") ^ pp.Keyword("kbytessent")
  17. # Collection of all attributes for simpler specification
  18. attributes_all = attributes_no_avg ^ attributes_avg
  19. # Simple selector + attribute query, only allowing "avg" with compatible attributes
  20. simple_selector_query = (selector_no_avg + pp.Suppress("(") + attributes_all + pp.Suppress(")")) ^\
  21. (pp.Keyword("avg") + pp.Suppress("(") + attributes_avg + pp.Suppress(")"))
  22. # Selectors for parameterized queries - they are replaced in the result to avoid ambiguity
  23. param_selectors = pp.Keyword("ipaddress").setParseAction(pp.replaceWith("ipaddress_param")) ^\
  24. pp.Keyword("macaddress").setParseAction(pp.replaceWith("macaddress_param"))
  25. # All operators allowed in parameterized queries
  26. operators = pp.Literal("<=") ^ pp.Literal("<") ^ pp.Literal("=") ^\
  27. pp.Literal(">=") ^ pp.Literal(">") ^ pp.CaselessLiteral("in")
  28. # Placeholder for nesting in parameterized queries
  29. expr = pp.Forward()
  30. # Simple values for comparisons inside a parameterized query can be alphanumeric plus dot and colon
  31. simple_value = pp.Word(pp.alphanums + ".:")
  32. # Values in parameterized queries can either be simple values, or a list of them.
  33. # If it's a list, we insert a "list"-token to be able to distinguish it
  34. parameterized_value = simple_value ^\
  35. (pp.Suppress("[") + pp.Group(pp.Empty().addParseAction(pp.replaceWith('list')) +
  36. pp.delimitedList(simple_value)) + pp.Suppress("]"))
  37. # One "attribute-operator-value" triplet for parameterized queries
  38. comparison = pp.Group(attributes_all + operators + (parameterized_value ^ expr))
  39. # A full parameterized query, consisting of a parameterized selector and a comma-separated list of comparisons
  40. parameterized_query = param_selectors + pp.Suppress("(") + pp.Group(pp.delimitedList(comparison)) + pp.Suppress(")")
  41. # Combination of simple and parameterized queries
  42. all_selector_queries = (simple_selector_query ^ parameterized_query)
  43. # All queries can be combined with an extractor
  44. extractor_selector_query = extractor + pp.Suppress("(") + all_selector_queries + pp.Suppress(")")
  45. # Queries can be used with an extractor or without
  46. named_query = (extractor_selector_query ^ all_selector_queries)
  47. # The placeholder can be replaced with any query
  48. expr << pp.Group(named_query)
  49. # Make sure all queries end with a semicolon, and we're done
  50. self.full_query = named_query + pp.Suppress(";")
  51. def parse_query(self, querystring: str) -> pp.ParseResults:
  52. """
  53. Parses the passed query with a pre-constructed parser.
  54. :param querystring: The named query to be executed
  55. :return: A ParseResults-object, which essentially is a list of tokens
  56. """
  57. return self.full_query.parseString(querystring)