_dtype.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. """
  2. A place for code to be called from the implementation of np.dtype
  3. String handling is much easier to do correctly in python.
  4. """
  5. import numpy as np
  6. _kind_to_stem = {
  7. 'u': 'uint',
  8. 'i': 'int',
  9. 'c': 'complex',
  10. 'f': 'float',
  11. 'b': 'bool',
  12. 'V': 'void',
  13. 'O': 'object',
  14. 'M': 'datetime',
  15. 'm': 'timedelta',
  16. 'S': 'bytes',
  17. 'U': 'str',
  18. }
  19. def _kind_name(dtype):
  20. try:
  21. return _kind_to_stem[dtype.kind]
  22. except KeyError:
  23. raise RuntimeError(
  24. "internal dtype error, unknown kind {!r}"
  25. .format(dtype.kind)
  26. )
  27. def __str__(dtype):
  28. if dtype.fields is not None:
  29. return _struct_str(dtype, include_align=True)
  30. elif dtype.subdtype:
  31. return _subarray_str(dtype)
  32. elif issubclass(dtype.type, np.flexible) or not dtype.isnative:
  33. return dtype.str
  34. else:
  35. return dtype.name
  36. def __repr__(dtype):
  37. arg_str = _construction_repr(dtype, include_align=False)
  38. if dtype.isalignedstruct:
  39. arg_str = arg_str + ", align=True"
  40. return "dtype({})".format(arg_str)
  41. def _unpack_field(dtype, offset, title=None):
  42. """
  43. Helper function to normalize the items in dtype.fields.
  44. Call as:
  45. dtype, offset, title = _unpack_field(*dtype.fields[name])
  46. """
  47. return dtype, offset, title
  48. def _isunsized(dtype):
  49. # PyDataType_ISUNSIZED
  50. return dtype.itemsize == 0
  51. def _construction_repr(dtype, include_align=False, short=False):
  52. """
  53. Creates a string repr of the dtype, excluding the 'dtype()' part
  54. surrounding the object. This object may be a string, a list, or
  55. a dict depending on the nature of the dtype. This
  56. is the object passed as the first parameter to the dtype
  57. constructor, and if no additional constructor parameters are
  58. given, will reproduce the exact memory layout.
  59. Parameters
  60. ----------
  61. short : bool
  62. If true, this creates a shorter repr using 'kind' and 'itemsize', instead
  63. of the longer type name.
  64. include_align : bool
  65. If true, this includes the 'align=True' parameter
  66. inside the struct dtype construction dict when needed. Use this flag
  67. if you want a proper repr string without the 'dtype()' part around it.
  68. If false, this does not preserve the
  69. 'align=True' parameter or sticky NPY_ALIGNED_STRUCT flag for
  70. struct arrays like the regular repr does, because the 'align'
  71. flag is not part of first dtype constructor parameter. This
  72. mode is intended for a full 'repr', where the 'align=True' is
  73. provided as the second parameter.
  74. """
  75. if dtype.fields is not None:
  76. return _struct_str(dtype, include_align=include_align)
  77. elif dtype.subdtype:
  78. return _subarray_str(dtype)
  79. else:
  80. return _scalar_str(dtype, short=short)
  81. def _scalar_str(dtype, short):
  82. byteorder = _byte_order_str(dtype)
  83. if dtype.type == np.bool_:
  84. if short:
  85. return "'?'"
  86. else:
  87. return "'bool'"
  88. elif dtype.type == np.object_:
  89. # The object reference may be different sizes on different
  90. # platforms, so it should never include the itemsize here.
  91. return "'O'"
  92. elif dtype.type == np.string_:
  93. if _isunsized(dtype):
  94. return "'S'"
  95. else:
  96. return "'S%d'" % dtype.itemsize
  97. elif dtype.type == np.unicode_:
  98. if _isunsized(dtype):
  99. return "'%sU'" % byteorder
  100. else:
  101. return "'%sU%d'" % (byteorder, dtype.itemsize / 4)
  102. # unlike the other types, subclasses of void are preserved - but
  103. # historically the repr does not actually reveal the subclass
  104. elif issubclass(dtype.type, np.void):
  105. if _isunsized(dtype):
  106. return "'V'"
  107. else:
  108. return "'V%d'" % dtype.itemsize
  109. elif dtype.type == np.datetime64:
  110. return "'%sM8%s'" % (byteorder, _datetime_metadata_str(dtype))
  111. elif dtype.type == np.timedelta64:
  112. return "'%sm8%s'" % (byteorder, _datetime_metadata_str(dtype))
  113. elif np.issubdtype(dtype, np.number):
  114. # Short repr with endianness, like '<f8'
  115. if short or dtype.byteorder not in ('=', '|'):
  116. return "'%s%c%d'" % (byteorder, dtype.kind, dtype.itemsize)
  117. # Longer repr, like 'float64'
  118. else:
  119. return "'%s%d'" % (_kind_name(dtype), 8*dtype.itemsize)
  120. elif dtype.isbuiltin == 2:
  121. return dtype.type.__name__
  122. else:
  123. raise RuntimeError(
  124. "Internal error: NumPy dtype unrecognized type number")
  125. def _byte_order_str(dtype):
  126. """ Normalize byteorder to '<' or '>' """
  127. # hack to obtain the native and swapped byte order characters
  128. swapped = np.dtype(int).newbyteorder('s')
  129. native = swapped.newbyteorder('s')
  130. byteorder = dtype.byteorder
  131. if byteorder == '=':
  132. return native.byteorder
  133. if byteorder == 's':
  134. # TODO: this path can never be reached
  135. return swapped.byteorder
  136. elif byteorder == '|':
  137. return ''
  138. else:
  139. return byteorder
  140. def _datetime_metadata_str(dtype):
  141. # TODO: this duplicates the C append_metastr_to_string
  142. unit, count = np.datetime_data(dtype)
  143. if unit == 'generic':
  144. return ''
  145. elif count == 1:
  146. return '[{}]'.format(unit)
  147. else:
  148. return '[{}{}]'.format(count, unit)
  149. def _struct_dict_str(dtype, includealignedflag):
  150. # unpack the fields dictionary into ls
  151. names = dtype.names
  152. fld_dtypes = []
  153. offsets = []
  154. titles = []
  155. for name in names:
  156. fld_dtype, offset, title = _unpack_field(*dtype.fields[name])
  157. fld_dtypes.append(fld_dtype)
  158. offsets.append(offset)
  159. titles.append(title)
  160. # Build up a string to make the dictionary
  161. # First, the names
  162. ret = "{'names':["
  163. ret += ",".join(repr(name) for name in names)
  164. # Second, the formats
  165. ret += "], 'formats':["
  166. ret += ",".join(
  167. _construction_repr(fld_dtype, short=True) for fld_dtype in fld_dtypes)
  168. # Third, the offsets
  169. ret += "], 'offsets':["
  170. ret += ",".join("%d" % offset for offset in offsets)
  171. # Fourth, the titles
  172. if any(title is not None for title in titles):
  173. ret += "], 'titles':["
  174. ret += ",".join(repr(title) for title in titles)
  175. # Fifth, the itemsize
  176. ret += "], 'itemsize':%d" % dtype.itemsize
  177. if (includealignedflag and dtype.isalignedstruct):
  178. # Finally, the aligned flag
  179. ret += ", 'aligned':True}"
  180. else:
  181. ret += "}"
  182. return ret
  183. def _is_packed(dtype):
  184. """
  185. Checks whether the structured data type in 'dtype'
  186. has a simple layout, where all the fields are in order,
  187. and follow each other with no alignment padding.
  188. When this returns true, the dtype can be reconstructed
  189. from a list of the field names and dtypes with no additional
  190. dtype parameters.
  191. Duplicates the C `is_dtype_struct_simple_unaligned_layout` function.
  192. """
  193. total_offset = 0
  194. for name in dtype.names:
  195. fld_dtype, fld_offset, title = _unpack_field(*dtype.fields[name])
  196. if fld_offset != total_offset:
  197. return False
  198. total_offset += fld_dtype.itemsize
  199. if total_offset != dtype.itemsize:
  200. return False
  201. return True
  202. def _struct_list_str(dtype):
  203. items = []
  204. for name in dtype.names:
  205. fld_dtype, fld_offset, title = _unpack_field(*dtype.fields[name])
  206. item = "("
  207. if title is not None:
  208. item += "({!r}, {!r}), ".format(title, name)
  209. else:
  210. item += "{!r}, ".format(name)
  211. # Special case subarray handling here
  212. if fld_dtype.subdtype is not None:
  213. base, shape = fld_dtype.subdtype
  214. item += "{}, {}".format(
  215. _construction_repr(base, short=True),
  216. shape
  217. )
  218. else:
  219. item += _construction_repr(fld_dtype, short=True)
  220. item += ")"
  221. items.append(item)
  222. return "[" + ", ".join(items) + "]"
  223. def _struct_str(dtype, include_align):
  224. # The list str representation can't include the 'align=' flag,
  225. # so if it is requested and the struct has the aligned flag set,
  226. # we must use the dict str instead.
  227. if not (include_align and dtype.isalignedstruct) and _is_packed(dtype):
  228. sub = _struct_list_str(dtype)
  229. else:
  230. sub = _struct_dict_str(dtype, include_align)
  231. # If the data type isn't the default, void, show it
  232. if dtype.type != np.void:
  233. return "({t.__module__}.{t.__name__}, {f})".format(t=dtype.type, f=sub)
  234. else:
  235. return sub
  236. def _subarray_str(dtype):
  237. base, shape = dtype.subdtype
  238. return "({}, {})".format(
  239. _construction_repr(base, short=True),
  240. shape
  241. )
  242. def _name_includes_bit_suffix(dtype):
  243. if dtype.type == np.object_:
  244. # pointer size varies by system, best to omit it
  245. return False
  246. elif dtype.type == np.bool_:
  247. # implied
  248. return False
  249. elif np.issubdtype(dtype, np.flexible) and _isunsized(dtype):
  250. # unspecified
  251. return False
  252. else:
  253. return True
  254. def _name_get(dtype):
  255. # provides dtype.name.__get__, documented as returning a "bit name"
  256. if dtype.isbuiltin == 2:
  257. # user dtypes don't promise to do anything special
  258. return dtype.type.__name__
  259. if issubclass(dtype.type, np.void):
  260. # historically, void subclasses preserve their name, eg `record64`
  261. name = dtype.type.__name__
  262. else:
  263. name = _kind_name(dtype)
  264. # append bit counts
  265. if _name_includes_bit_suffix(dtype):
  266. name += "{}".format(dtype.itemsize * 8)
  267. # append metadata to datetimes
  268. if dtype.type in (np.datetime64, np.timedelta64):
  269. name += _datetime_metadata_str(dtype)
  270. return name