ppcap.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. """
  2. Packet read and write routines for pcap format.
  3. See http://wiki.wireshark.org/Development/LibpcapFileFormat
  4. """
  5. import sys
  6. import logging
  7. import struct
  8. from pypacker import pypacker
  9. # avoid unneeded references for performance reasons
  10. unpack = struct.unpack
  11. logger = logging.getLogger("pypacker")
  12. # File magic numbers
  13. # pcap using microseconds resolution
  14. TCPDUMP_MAGIC = 0xa1b2c3d4
  15. TCPDUMP_MAGIC_SWAPPED = 0xd4c3b2a1
  16. # pcap using nanoseconds resolution
  17. TCPDUMP_MAGIC_NANO = 0xa1b23c4d
  18. TCPDUMP_MAGIC_NANO_SWAPPED = 0x4d3cb2a1
  19. PCAP_VERSION_MAJOR = 2
  20. PCAP_VERSION_MINOR = 4
  21. DLT_NULL = 0
  22. DLT_EN10MB = 1
  23. DLT_EN3MB = 2
  24. DLT_AX25 = 3
  25. DLT_PRONET = 4
  26. DLT_CHAOS = 5
  27. DLT_IEEE802 = 6
  28. DLT_ARCNET = 7
  29. DLT_SLIP = 8
  30. DLT_PPP = 9
  31. DLT_FDDI = 10
  32. DLT_PFSYNC = 18
  33. DLT_IEEE802_11 = 105
  34. DLT_LINUX_SLL = 113
  35. DLT_PFLOG = 117
  36. DLT_IEEE802_11_RADIO = 127
  37. _MODE_BYTES = 0
  38. _MODE_PACKETS = 1
  39. if sys.platform.find("openbsd") != -1:
  40. DLT_LOOP = 12
  41. DLT_RAW = 14
  42. else:
  43. DLT_LOOP = 108
  44. DLT_RAW = 12
  45. # retrieve via: FileHdr.linktype
  46. dltoff = {
  47. DLT_NULL : 4,
  48. DLT_EN10MB : 14,
  49. DLT_IEEE802 : 22,
  50. DLT_ARCNET : 6,
  51. DLT_SLIP : 16,
  52. DLT_PPP : 4,
  53. DLT_FDDI : 21,
  54. DLT_PFLOG : 48,
  55. DLT_PFSYNC : 4,
  56. DLT_LOOP : 4,
  57. DLT_LINUX_SLL : 16
  58. }
  59. class FileHdr(pypacker.Packet):
  60. """pcap file header."""
  61. # header length = 24
  62. __hdr__ = (
  63. ("magic", "I", TCPDUMP_MAGIC),
  64. ("v_major", "H", PCAP_VERSION_MAJOR),
  65. ("v_minor", "H", PCAP_VERSION_MINOR),
  66. ("thiszone", "I", 0),
  67. ("sigfigs", "I", 0),
  68. ("snaplen", "I", 1500),
  69. ("linktype", "I", 1),
  70. )
  71. class LEFileHdr(pypacker.Packet):
  72. """pcap file header."""
  73. # header length = 24
  74. __hdr__ = (
  75. ("magic", "I", TCPDUMP_MAGIC),
  76. ("v_major", "H", PCAP_VERSION_MAJOR),
  77. ("v_minor", "H", PCAP_VERSION_MINOR),
  78. ("thiszone", "I", 0),
  79. ("sigfigs", "I", 0),
  80. ("snaplen", "I", 1500),
  81. ("linktype", "I", 1),
  82. )
  83. __byte_order__ = "<"
  84. class PktHdr(pypacker.Packet):
  85. """pcap packet header."""
  86. # header length: 16
  87. __hdr__ = (
  88. ("tv_sec", "I", 0),
  89. # this can be either microseconds or nanoseconds: check magic number
  90. ("tv_usec", "I", 0),
  91. ("caplen", "I", 0),
  92. ("len", "I", 0),
  93. )
  94. class LEPktHdr(pypacker.Packet):
  95. """pcap packet header."""
  96. # header length: 16
  97. __hdr__ = (
  98. ("tv_sec", "I", 0),
  99. # this can be either microseconds or nanoseconds: check magic number
  100. ("tv_usec", "I", 0),
  101. ("caplen", "I", 0),
  102. ("len", "I", 0),
  103. )
  104. __byte_order__ = "<"
  105. class Writer(object):
  106. """
  107. Simple pcap writer supporting pcap format.
  108. Note: this will use nanosecond timestamp resolution.
  109. """
  110. def __init__(self, fileobj=None, filename=None, snaplen=1500, linktype=DLT_EN10MB):
  111. """
  112. fileobj -- create a pcap-writer giving a file object retrieved by open(..., "wb")
  113. filename -- create a pcap-writer giving a file pcap filename
  114. """
  115. # handle source modes
  116. if fileobj is not None:
  117. self.__fh = fileobj
  118. elif filename is not None:
  119. self.__fh = open(filename, "wb")
  120. else:
  121. raise Exception("No fileobject and no filename given..nothing to read!!!")
  122. fh = FileHdr(magic=TCPDUMP_MAGIC_NANO, snaplen=snaplen, linktype=linktype)
  123. logger.debug("writing fileheader %r" % fh)
  124. self.__fh.write(fh.bin())
  125. self._timestamp = 0
  126. def write(self, bts, ts=None):
  127. """
  128. Write the given packet's bytes to file.
  129. bts -- bytes to be written
  130. ts -- timestamp in Nanoseconds
  131. """
  132. # split timestamp into seconds, nanoseconds
  133. if ts is None:
  134. sec = self._timestamp // 1000000000
  135. nsec = int(self._timestamp - (sec * 1000000000))
  136. self._timestamp += 1000000
  137. else:
  138. sec = int(ts / 1000000000)
  139. nsec = ts - (sec * 1000000000)
  140. # logger.debug("paket time sec/nsec: %d/%d" % (sec, nsec))
  141. n = len(bts)
  142. ph = PktHdr(tv_sec=sec, tv_usec=nsec, caplen=n, len=n)
  143. # logger.debug("writing packet header + packet data")
  144. self.__fh.write(ph.bin())
  145. self.__fh.write(bts)
  146. def close(self):
  147. self.__fh.close()
  148. _struct_preheader_be = struct.Struct(">IIII")
  149. _struct_preheader_le = struct.Struct("<IIII")
  150. def _filter_dummy(pkt):
  151. return True
  152. class Reader(object):
  153. """
  154. Simple pcap file reader supporting pcap format. Using iterators this will
  155. return (timestamp, bytes) on standard mode and (timestamp, packet) on packet mode.
  156. Default timestamp resolution ist nanoseconds.
  157. """
  158. def __init__(self, fileobj=None, filename=None, lowest_layer=None, filter=None, ts_conversion=True):
  159. """
  160. Create a pcap Reader.
  161. fileobj -- create a pcap-reader giving a file object retrieved by "open(..., 'rb')"
  162. filename -- create a pcap-reader giving a filename
  163. lowest_layer -- setting this to a non-None value will activate the auto-packeting
  164. mode using the given class as lowest layer to create packets.
  165. Note: __next__ and __iter__ will return (timestamp, packet) instead of raw (timestamp, raw_bytes)
  166. filter -- filter callback to be used for packeting mode.
  167. signature: callback(packet) [True|False], True = accept packet, False otherwise
  168. ts_conversion -- convert timestamps to nanoseconds. Setting this to False will return
  169. ((seconds, [microseconds|nanoseconds]), buf) for __next__ and __iter__ instead of (timestamp, packet)
  170. and saves ~2% computation time. Minor fraction type can be checked using "is_resolution_nano".
  171. Note: This is deprecated and will be removed in future; conversion to nanoseconds will become the only option
  172. """
  173. # handle source modes
  174. if fileobj is not None:
  175. self.__fh = fileobj
  176. elif filename is not None:
  177. self.__fh = open(filename, "rb")
  178. else:
  179. raise Exception("No fileobject and no filename given..nothing to read!!!")
  180. buf = self.__fh.read(24)
  181. # file header is skipped per default (needed for __next__)
  182. self.__fh.seek(24)
  183. # this is not needed anymore later on but we set it anyway
  184. self.__fhdr = FileHdr(buf)
  185. self._closed = False
  186. # handle file types
  187. if self.__fhdr.magic == TCPDUMP_MAGIC:
  188. self.__resolution_factor = 1000
  189. # Note: we could use PktHdr to parse pre-packetdata but calling unpack directly
  190. # greatly improves performance
  191. self.__callback_unpack_meta = lambda x: _struct_preheader_be.unpack(x)
  192. elif self.__fhdr.magic == TCPDUMP_MAGIC_NANO:
  193. self.__resolution_factor = 1
  194. self.__callback_unpack_meta = lambda x: _struct_preheader_be.unpack(x)
  195. elif self.__fhdr.magic == TCPDUMP_MAGIC_SWAPPED:
  196. self.__fhdr = LEFileHdr(buf)
  197. self.__resolution_factor = 1000
  198. self.__callback_unpack_meta = lambda x: _struct_preheader_le.unpack(x)
  199. elif self.__fhdr.magic == TCPDUMP_MAGIC_NANO_SWAPPED:
  200. self.__fhdr = LEFileHdr(buf)
  201. self.__resolution_factor = 1
  202. self.__callback_unpack_meta = lambda x: _struct_preheader_le.unpack(x)
  203. else:
  204. raise ValueError("invalid tcpdump header, magic value: %s" % self.__fhdr.magic)
  205. logger.info("pcap file header for reading: %r" % self.__fhdr)
  206. # logger.debug("timestamp factor: %s" % self.__resolution_factor)
  207. # check if timestamp converison to nanoseconds is needed
  208. if ts_conversion:
  209. # logger.debug("using _next_bytes_conversion")
  210. self._next_bytes = self._next_bytes_conversion
  211. else:
  212. # logger.debug("using _next_bytes_noconversion")
  213. self._next_bytes = self._next_bytes_noconversion
  214. if lowest_layer is None:
  215. # standard implementation (conversion or non-converison mode)
  216. logger.info("using plain bytes mode")
  217. self._mode = _MODE_BYTES
  218. self.__next__ = self._next_bytes
  219. else:
  220. # set up packeting mode
  221. logger.info("using packets mode")
  222. self._mode = _MODE_PACKETS
  223. self.__next__ = self._next_pmode
  224. self._lowest_layer = lowest_layer
  225. if filter is None:
  226. self._filter = _filter_dummy
  227. else:
  228. self._filter = filter
  229. def is_resolution_nano(self):
  230. return self.__resolution_factor == 1000
  231. def _next_bytes_conversion(self):
  232. """
  233. Standard __next__ implementation. Needs to be a sepearte method to be called by producer.
  234. return -- (timestamp_nanoseconds, bytes) for pcap-reader.
  235. """
  236. # read metadata before actual packet
  237. buf = self.__fh.read(16)
  238. if not buf:
  239. raise StopIteration
  240. d = self.__callback_unpack_meta(buf)
  241. # logger.debug("reading: input/pos/d[2] = %d/%d/%r" % (len(buf), self.__fh.tell(), d))
  242. buf = self.__fh.read(d[2])
  243. return (d[0] * 1000000000 + (d[1] * self.__resolution_factor), buf)
  244. def _next_bytes_noconversion(self):
  245. """
  246. Same as _next_bytes_conversion wihtout timestamp-conversion. (Duplicatet because of performance reasons.)
  247. return -- ((seconds, [microseconds|nanoseconds]), bytes) for pcap-reader.
  248. """
  249. # read metadata before actual packet
  250. buf = self.__fh.read(16)
  251. if not buf:
  252. raise StopIteration
  253. d = self.__callback_unpack_meta(buf)
  254. # logger.debug("reading: input/d[2] = %d/%d" % (len(buf), d[2]))
  255. buf = self.__fh.read(d[2])
  256. # return ((hdr.tv_sec, hdr.tv_usec), buf)
  257. return ((d[0], d[1]), buf)
  258. def _next_pmode(self):
  259. """
  260. return -- (timestamp_nanoseconds, packet) if packet can be created from bytes
  261. else (timestamp_nanoseconds, bytes)
  262. """
  263. while True:
  264. # until StopIteration
  265. ts_bts = self._next_bytes()
  266. try:
  267. pkt = self._lowest_layer(ts_bts[1])
  268. if self._filter(pkt):
  269. return (ts_bts[0], pkt)
  270. except Exception as ex:
  271. logger.exception(ex)
  272. return ts_bts
  273. def __iter__(self):
  274. """
  275. return -- (timestamp, [bytes|packet]) for pcap-reader depending on configuration.
  276. """
  277. if self._closed:
  278. raise StopIteration
  279. while True:
  280. # loop until EOF is reached (raises StopIteration)
  281. yield self.__next__()
  282. def reset(self):
  283. """
  284. Reset file pointer to beginning
  285. """
  286. self.__fh.seek(24)
  287. def get_by_indices(self, indices):
  288. """
  289. Return [(timestamp, [bytes|packets]), ...] for the specified indices in packet file
  290. starting at 0 for first packet. This method won't change the current read-pointer.
  291. indices -- set of indices like set([0, 1, 2]). Nonexistent indices will be ignored.
  292. return -- list of (timestamp, [bytes|packets]) at positions given by indices
  293. (ordered as in packet source)
  294. """
  295. if self._closed:
  296. return []
  297. if type(indices) is list:
  298. indices = set(indices)
  299. oldpos = self.__fh.tell()
  300. self.__fh.seek(24)
  301. data_ret = []
  302. pos = 0
  303. for data in self:
  304. if pos in indices:
  305. data_ret.append(data)
  306. pos += 1
  307. self.__fh.seek(oldpos)
  308. return data_ret
  309. def close(self):
  310. self._closed = True
  311. self.__fh.close()