123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342 |
- """
- A place for code to be called from the implementation of np.dtype
- String handling is much easier to do correctly in python.
- """
- import numpy as np
- _kind_to_stem = {
- 'u': 'uint',
- 'i': 'int',
- 'c': 'complex',
- 'f': 'float',
- 'b': 'bool',
- 'V': 'void',
- 'O': 'object',
- 'M': 'datetime',
- 'm': 'timedelta',
- 'S': 'bytes',
- 'U': 'str',
- }
- def _kind_name(dtype):
- try:
- return _kind_to_stem[dtype.kind]
- except KeyError:
- raise RuntimeError(
- "internal dtype error, unknown kind {!r}"
- .format(dtype.kind)
- )
- def __str__(dtype):
- if dtype.fields is not None:
- return _struct_str(dtype, include_align=True)
- elif dtype.subdtype:
- return _subarray_str(dtype)
- elif issubclass(dtype.type, np.flexible) or not dtype.isnative:
- return dtype.str
- else:
- return dtype.name
- def __repr__(dtype):
- arg_str = _construction_repr(dtype, include_align=False)
- if dtype.isalignedstruct:
- arg_str = arg_str + ", align=True"
- return "dtype({})".format(arg_str)
- def _unpack_field(dtype, offset, title=None):
- """
- Helper function to normalize the items in dtype.fields.
- Call as:
- dtype, offset, title = _unpack_field(*dtype.fields[name])
- """
- return dtype, offset, title
- def _isunsized(dtype):
- # PyDataType_ISUNSIZED
- return dtype.itemsize == 0
- def _construction_repr(dtype, include_align=False, short=False):
- """
- Creates a string repr of the dtype, excluding the 'dtype()' part
- surrounding the object. This object may be a string, a list, or
- a dict depending on the nature of the dtype. This
- is the object passed as the first parameter to the dtype
- constructor, and if no additional constructor parameters are
- given, will reproduce the exact memory layout.
- Parameters
- ----------
- short : bool
- If true, this creates a shorter repr using 'kind' and 'itemsize', instead
- of the longer type name.
- include_align : bool
- If true, this includes the 'align=True' parameter
- inside the struct dtype construction dict when needed. Use this flag
- if you want a proper repr string without the 'dtype()' part around it.
- If false, this does not preserve the
- 'align=True' parameter or sticky NPY_ALIGNED_STRUCT flag for
- struct arrays like the regular repr does, because the 'align'
- flag is not part of first dtype constructor parameter. This
- mode is intended for a full 'repr', where the 'align=True' is
- provided as the second parameter.
- """
- if dtype.fields is not None:
- return _struct_str(dtype, include_align=include_align)
- elif dtype.subdtype:
- return _subarray_str(dtype)
- else:
- return _scalar_str(dtype, short=short)
- def _scalar_str(dtype, short):
- byteorder = _byte_order_str(dtype)
- if dtype.type == np.bool_:
- if short:
- return "'?'"
- else:
- return "'bool'"
- elif dtype.type == np.object_:
- # The object reference may be different sizes on different
- # platforms, so it should never include the itemsize here.
- return "'O'"
- elif dtype.type == np.string_:
- if _isunsized(dtype):
- return "'S'"
- else:
- return "'S%d'" % dtype.itemsize
- elif dtype.type == np.unicode_:
- if _isunsized(dtype):
- return "'%sU'" % byteorder
- else:
- return "'%sU%d'" % (byteorder, dtype.itemsize / 4)
- # unlike the other types, subclasses of void are preserved - but
- # historically the repr does not actually reveal the subclass
- elif issubclass(dtype.type, np.void):
- if _isunsized(dtype):
- return "'V'"
- else:
- return "'V%d'" % dtype.itemsize
- elif dtype.type == np.datetime64:
- return "'%sM8%s'" % (byteorder, _datetime_metadata_str(dtype))
- elif dtype.type == np.timedelta64:
- return "'%sm8%s'" % (byteorder, _datetime_metadata_str(dtype))
- elif np.issubdtype(dtype, np.number):
- # Short repr with endianness, like '<f8'
- if short or dtype.byteorder not in ('=', '|'):
- return "'%s%c%d'" % (byteorder, dtype.kind, dtype.itemsize)
- # Longer repr, like 'float64'
- else:
- return "'%s%d'" % (_kind_name(dtype), 8*dtype.itemsize)
- elif dtype.isbuiltin == 2:
- return dtype.type.__name__
- else:
- raise RuntimeError(
- "Internal error: NumPy dtype unrecognized type number")
- def _byte_order_str(dtype):
- """ Normalize byteorder to '<' or '>' """
- # hack to obtain the native and swapped byte order characters
- swapped = np.dtype(int).newbyteorder('s')
- native = swapped.newbyteorder('s')
- byteorder = dtype.byteorder
- if byteorder == '=':
- return native.byteorder
- if byteorder == 's':
- # TODO: this path can never be reached
- return swapped.byteorder
- elif byteorder == '|':
- return ''
- else:
- return byteorder
- def _datetime_metadata_str(dtype):
- # TODO: this duplicates the C append_metastr_to_string
- unit, count = np.datetime_data(dtype)
- if unit == 'generic':
- return ''
- elif count == 1:
- return '[{}]'.format(unit)
- else:
- return '[{}{}]'.format(count, unit)
- def _struct_dict_str(dtype, includealignedflag):
- # unpack the fields dictionary into ls
- names = dtype.names
- fld_dtypes = []
- offsets = []
- titles = []
- for name in names:
- fld_dtype, offset, title = _unpack_field(*dtype.fields[name])
- fld_dtypes.append(fld_dtype)
- offsets.append(offset)
- titles.append(title)
- # Build up a string to make the dictionary
- # First, the names
- ret = "{'names':["
- ret += ",".join(repr(name) for name in names)
- # Second, the formats
- ret += "], 'formats':["
- ret += ",".join(
- _construction_repr(fld_dtype, short=True) for fld_dtype in fld_dtypes)
- # Third, the offsets
- ret += "], 'offsets':["
- ret += ",".join("%d" % offset for offset in offsets)
- # Fourth, the titles
- if any(title is not None for title in titles):
- ret += "], 'titles':["
- ret += ",".join(repr(title) for title in titles)
- # Fifth, the itemsize
- ret += "], 'itemsize':%d" % dtype.itemsize
- if (includealignedflag and dtype.isalignedstruct):
- # Finally, the aligned flag
- ret += ", 'aligned':True}"
- else:
- ret += "}"
- return ret
- def _is_packed(dtype):
- """
- Checks whether the structured data type in 'dtype'
- has a simple layout, where all the fields are in order,
- and follow each other with no alignment padding.
- When this returns true, the dtype can be reconstructed
- from a list of the field names and dtypes with no additional
- dtype parameters.
- Duplicates the C `is_dtype_struct_simple_unaligned_layout` function.
- """
- total_offset = 0
- for name in dtype.names:
- fld_dtype, fld_offset, title = _unpack_field(*dtype.fields[name])
- if fld_offset != total_offset:
- return False
- total_offset += fld_dtype.itemsize
- if total_offset != dtype.itemsize:
- return False
- return True
- def _struct_list_str(dtype):
- items = []
- for name in dtype.names:
- fld_dtype, fld_offset, title = _unpack_field(*dtype.fields[name])
- item = "("
- if title is not None:
- item += "({!r}, {!r}), ".format(title, name)
- else:
- item += "{!r}, ".format(name)
- # Special case subarray handling here
- if fld_dtype.subdtype is not None:
- base, shape = fld_dtype.subdtype
- item += "{}, {}".format(
- _construction_repr(base, short=True),
- shape
- )
- else:
- item += _construction_repr(fld_dtype, short=True)
- item += ")"
- items.append(item)
- return "[" + ", ".join(items) + "]"
- def _struct_str(dtype, include_align):
- # The list str representation can't include the 'align=' flag,
- # so if it is requested and the struct has the aligned flag set,
- # we must use the dict str instead.
- if not (include_align and dtype.isalignedstruct) and _is_packed(dtype):
- sub = _struct_list_str(dtype)
- else:
- sub = _struct_dict_str(dtype, include_align)
- # If the data type isn't the default, void, show it
- if dtype.type != np.void:
- return "({t.__module__}.{t.__name__}, {f})".format(t=dtype.type, f=sub)
- else:
- return sub
- def _subarray_str(dtype):
- base, shape = dtype.subdtype
- return "({}, {})".format(
- _construction_repr(base, short=True),
- shape
- )
- def _name_includes_bit_suffix(dtype):
- if dtype.type == np.object_:
- # pointer size varies by system, best to omit it
- return False
- elif dtype.type == np.bool_:
- # implied
- return False
- elif np.issubdtype(dtype, np.flexible) and _isunsized(dtype):
- # unspecified
- return False
- else:
- return True
- def _name_get(dtype):
- # provides dtype.name.__get__, documented as returning a "bit name"
- if dtype.isbuiltin == 2:
- # user dtypes don't promise to do anything special
- return dtype.type.__name__
- if issubclass(dtype.type, np.void):
- # historically, void subclasses preserve their name, eg `record64`
- name = dtype.type.__name__
- else:
- name = _kind_name(dtype)
- # append bit counts
- if _name_includes_bit_suffix(dtype):
- name += "{}".format(dtype.itemsize * 8)
- # append metadata to datetimes
- if dtype.type in (np.datetime64, np.timedelta64):
- name += _datetime_metadata_str(dtype)
- return name
|