records.py 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019
  1. """
  2. Record Arrays
  3. =============
  4. Record arrays expose the fields of structured arrays as properties.
  5. Most commonly, ndarrays contain elements of a single type, e.g. floats,
  6. integers, bools etc. However, it is possible for elements to be combinations
  7. of these using structured types, such as::
  8. >>> a = np.array([(1, 2.0), (1, 2.0)], dtype=[('x', np.int64), ('y', np.float64)])
  9. >>> a
  10. array([(1, 2.), (1, 2.)], dtype=[('x', '<i8'), ('y', '<f8')])
  11. Here, each element consists of two fields: x (and int), and y (a float).
  12. This is known as a structured array. The different fields are analogous
  13. to columns in a spread-sheet. The different fields can be accessed as
  14. one would a dictionary::
  15. >>> a['x']
  16. array([1, 1])
  17. >>> a['y']
  18. array([2., 2.])
  19. Record arrays allow us to access fields as properties::
  20. >>> ar = np.rec.array(a)
  21. >>> ar.x
  22. array([1, 1])
  23. >>> ar.y
  24. array([2., 2.])
  25. """
  26. import os
  27. import warnings
  28. from collections import Counter, OrderedDict
  29. from . import numeric as sb
  30. from . import numerictypes as nt
  31. from numpy.compat import (
  32. isfileobj, os_fspath, contextlib_nullcontext
  33. )
  34. from numpy.core.overrides import set_module
  35. from .arrayprint import get_printoptions
  36. # All of the functions allow formats to be a dtype
  37. __all__ = ['record', 'recarray', 'format_parser']
  38. ndarray = sb.ndarray
  39. _byteorderconv = {'b':'>',
  40. 'l':'<',
  41. 'n':'=',
  42. 'B':'>',
  43. 'L':'<',
  44. 'N':'=',
  45. 'S':'s',
  46. 's':'s',
  47. '>':'>',
  48. '<':'<',
  49. '=':'=',
  50. '|':'|',
  51. 'I':'|',
  52. 'i':'|'}
  53. # formats regular expression
  54. # allows multidimension spec with a tuple syntax in front
  55. # of the letter code '(2,3)f4' and ' ( 2 , 3 ) f4 '
  56. # are equally allowed
  57. numfmt = nt.typeDict
  58. # taken from OrderedDict recipes in the Python documentation
  59. # https://docs.python.org/3.3/library/collections.html#ordereddict-examples-and-recipes
  60. class _OrderedCounter(Counter, OrderedDict):
  61. """Counter that remembers the order elements are first encountered"""
  62. def __repr__(self):
  63. return '%s(%r)' % (self.__class__.__name__, OrderedDict(self))
  64. def __reduce__(self):
  65. return self.__class__, (OrderedDict(self),)
  66. def find_duplicate(list):
  67. """Find duplication in a list, return a list of duplicated elements"""
  68. return [
  69. item
  70. for item, counts in _OrderedCounter(list).items()
  71. if counts > 1
  72. ]
  73. @set_module('numpy')
  74. class format_parser:
  75. """
  76. Class to convert formats, names, titles description to a dtype.
  77. After constructing the format_parser object, the dtype attribute is
  78. the converted data-type:
  79. ``dtype = format_parser(formats, names, titles).dtype``
  80. Attributes
  81. ----------
  82. dtype : dtype
  83. The converted data-type.
  84. Parameters
  85. ----------
  86. formats : str or list of str
  87. The format description, either specified as a string with
  88. comma-separated format descriptions in the form ``'f8, i4, a5'``, or
  89. a list of format description strings in the form
  90. ``['f8', 'i4', 'a5']``.
  91. names : str or list/tuple of str
  92. The field names, either specified as a comma-separated string in the
  93. form ``'col1, col2, col3'``, or as a list or tuple of strings in the
  94. form ``['col1', 'col2', 'col3']``.
  95. An empty list can be used, in that case default field names
  96. ('f0', 'f1', ...) are used.
  97. titles : sequence
  98. Sequence of title strings. An empty list can be used to leave titles
  99. out.
  100. aligned : bool, optional
  101. If True, align the fields by padding as the C-compiler would.
  102. Default is False.
  103. byteorder : str, optional
  104. If specified, all the fields will be changed to the
  105. provided byte-order. Otherwise, the default byte-order is
  106. used. For all available string specifiers, see `dtype.newbyteorder`.
  107. See Also
  108. --------
  109. dtype, typename, sctype2char
  110. Examples
  111. --------
  112. >>> np.format_parser(['<f8', '<i4', '<a5'], ['col1', 'col2', 'col3'],
  113. ... ['T1', 'T2', 'T3']).dtype
  114. dtype([(('T1', 'col1'), '<f8'), (('T2', 'col2'), '<i4'), (('T3', 'col3'), 'S5')])
  115. `names` and/or `titles` can be empty lists. If `titles` is an empty list,
  116. titles will simply not appear. If `names` is empty, default field names
  117. will be used.
  118. >>> np.format_parser(['f8', 'i4', 'a5'], ['col1', 'col2', 'col3'],
  119. ... []).dtype
  120. dtype([('col1', '<f8'), ('col2', '<i4'), ('col3', '<S5')])
  121. >>> np.format_parser(['<f8', '<i4', '<a5'], [], []).dtype
  122. dtype([('f0', '<f8'), ('f1', '<i4'), ('f2', 'S5')])
  123. """
  124. def __init__(self, formats, names, titles, aligned=False, byteorder=None):
  125. self._parseFormats(formats, aligned)
  126. self._setfieldnames(names, titles)
  127. self._createdtype(byteorder)
  128. def _parseFormats(self, formats, aligned=False):
  129. """ Parse the field formats """
  130. if formats is None:
  131. raise ValueError("Need formats argument")
  132. if isinstance(formats, list):
  133. dtype = sb.dtype(
  134. [('f{}'.format(i), format_) for i, format_ in enumerate(formats)],
  135. aligned,
  136. )
  137. else:
  138. dtype = sb.dtype(formats, aligned)
  139. fields = dtype.fields
  140. if fields is None:
  141. dtype = sb.dtype([('f1', dtype)], aligned)
  142. fields = dtype.fields
  143. keys = dtype.names
  144. self._f_formats = [fields[key][0] for key in keys]
  145. self._offsets = [fields[key][1] for key in keys]
  146. self._nfields = len(keys)
  147. def _setfieldnames(self, names, titles):
  148. """convert input field names into a list and assign to the _names
  149. attribute """
  150. if names:
  151. if type(names) in [list, tuple]:
  152. pass
  153. elif isinstance(names, str):
  154. names = names.split(',')
  155. else:
  156. raise NameError("illegal input names %s" % repr(names))
  157. self._names = [n.strip() for n in names[:self._nfields]]
  158. else:
  159. self._names = []
  160. # if the names are not specified, they will be assigned as
  161. # "f0, f1, f2,..."
  162. # if not enough names are specified, they will be assigned as "f[n],
  163. # f[n+1],..." etc. where n is the number of specified names..."
  164. self._names += ['f%d' % i for i in range(len(self._names),
  165. self._nfields)]
  166. # check for redundant names
  167. _dup = find_duplicate(self._names)
  168. if _dup:
  169. raise ValueError("Duplicate field names: %s" % _dup)
  170. if titles:
  171. self._titles = [n.strip() for n in titles[:self._nfields]]
  172. else:
  173. self._titles = []
  174. titles = []
  175. if self._nfields > len(titles):
  176. self._titles += [None] * (self._nfields - len(titles))
  177. def _createdtype(self, byteorder):
  178. dtype = sb.dtype({
  179. 'names': self._names,
  180. 'formats': self._f_formats,
  181. 'offsets': self._offsets,
  182. 'titles': self._titles,
  183. })
  184. if byteorder is not None:
  185. byteorder = _byteorderconv[byteorder[0]]
  186. dtype = dtype.newbyteorder(byteorder)
  187. self.dtype = dtype
  188. class record(nt.void):
  189. """A data-type scalar that allows field access as attribute lookup.
  190. """
  191. # manually set name and module so that this class's type shows up
  192. # as numpy.record when printed
  193. __name__ = 'record'
  194. __module__ = 'numpy'
  195. def __repr__(self):
  196. if get_printoptions()['legacy'] == '1.13':
  197. return self.__str__()
  198. return super(record, self).__repr__()
  199. def __str__(self):
  200. if get_printoptions()['legacy'] == '1.13':
  201. return str(self.item())
  202. return super(record, self).__str__()
  203. def __getattribute__(self, attr):
  204. if attr in ('setfield', 'getfield', 'dtype'):
  205. return nt.void.__getattribute__(self, attr)
  206. try:
  207. return nt.void.__getattribute__(self, attr)
  208. except AttributeError:
  209. pass
  210. fielddict = nt.void.__getattribute__(self, 'dtype').fields
  211. res = fielddict.get(attr, None)
  212. if res:
  213. obj = self.getfield(*res[:2])
  214. # if it has fields return a record,
  215. # otherwise return the object
  216. try:
  217. dt = obj.dtype
  218. except AttributeError:
  219. #happens if field is Object type
  220. return obj
  221. if dt.names is not None:
  222. return obj.view((self.__class__, obj.dtype))
  223. return obj
  224. else:
  225. raise AttributeError("'record' object has no "
  226. "attribute '%s'" % attr)
  227. def __setattr__(self, attr, val):
  228. if attr in ('setfield', 'getfield', 'dtype'):
  229. raise AttributeError("Cannot set '%s' attribute" % attr)
  230. fielddict = nt.void.__getattribute__(self, 'dtype').fields
  231. res = fielddict.get(attr, None)
  232. if res:
  233. return self.setfield(val, *res[:2])
  234. else:
  235. if getattr(self, attr, None):
  236. return nt.void.__setattr__(self, attr, val)
  237. else:
  238. raise AttributeError("'record' object has no "
  239. "attribute '%s'" % attr)
  240. def __getitem__(self, indx):
  241. obj = nt.void.__getitem__(self, indx)
  242. # copy behavior of record.__getattribute__,
  243. if isinstance(obj, nt.void) and obj.dtype.names is not None:
  244. return obj.view((self.__class__, obj.dtype))
  245. else:
  246. # return a single element
  247. return obj
  248. def pprint(self):
  249. """Pretty-print all fields."""
  250. # pretty-print all fields
  251. names = self.dtype.names
  252. maxlen = max(len(name) for name in names)
  253. fmt = '%% %ds: %%s' % maxlen
  254. rows = [fmt % (name, getattr(self, name)) for name in names]
  255. return "\n".join(rows)
  256. # The recarray is almost identical to a standard array (which supports
  257. # named fields already) The biggest difference is that it can use
  258. # attribute-lookup to find the fields and it is constructed using
  259. # a record.
  260. # If byteorder is given it forces a particular byteorder on all
  261. # the fields (and any subfields)
  262. class recarray(ndarray):
  263. """Construct an ndarray that allows field access using attributes.
  264. Arrays may have a data-types containing fields, analogous
  265. to columns in a spread sheet. An example is ``[(x, int), (y, float)]``,
  266. where each entry in the array is a pair of ``(int, float)``. Normally,
  267. these attributes are accessed using dictionary lookups such as ``arr['x']``
  268. and ``arr['y']``. Record arrays allow the fields to be accessed as members
  269. of the array, using ``arr.x`` and ``arr.y``.
  270. Parameters
  271. ----------
  272. shape : tuple
  273. Shape of output array.
  274. dtype : data-type, optional
  275. The desired data-type. By default, the data-type is determined
  276. from `formats`, `names`, `titles`, `aligned` and `byteorder`.
  277. formats : list of data-types, optional
  278. A list containing the data-types for the different columns, e.g.
  279. ``['i4', 'f8', 'i4']``. `formats` does *not* support the new
  280. convention of using types directly, i.e. ``(int, float, int)``.
  281. Note that `formats` must be a list, not a tuple.
  282. Given that `formats` is somewhat limited, we recommend specifying
  283. `dtype` instead.
  284. names : tuple of str, optional
  285. The name of each column, e.g. ``('x', 'y', 'z')``.
  286. buf : buffer, optional
  287. By default, a new array is created of the given shape and data-type.
  288. If `buf` is specified and is an object exposing the buffer interface,
  289. the array will use the memory from the existing buffer. In this case,
  290. the `offset` and `strides` keywords are available.
  291. Other Parameters
  292. ----------------
  293. titles : tuple of str, optional
  294. Aliases for column names. For example, if `names` were
  295. ``('x', 'y', 'z')`` and `titles` is
  296. ``('x_coordinate', 'y_coordinate', 'z_coordinate')``, then
  297. ``arr['x']`` is equivalent to both ``arr.x`` and ``arr.x_coordinate``.
  298. byteorder : {'<', '>', '='}, optional
  299. Byte-order for all fields.
  300. aligned : bool, optional
  301. Align the fields in memory as the C-compiler would.
  302. strides : tuple of ints, optional
  303. Buffer (`buf`) is interpreted according to these strides (strides
  304. define how many bytes each array element, row, column, etc.
  305. occupy in memory).
  306. offset : int, optional
  307. Start reading buffer (`buf`) from this offset onwards.
  308. order : {'C', 'F'}, optional
  309. Row-major (C-style) or column-major (Fortran-style) order.
  310. Returns
  311. -------
  312. rec : recarray
  313. Empty array of the given shape and type.
  314. See Also
  315. --------
  316. rec.fromrecords : Construct a record array from data.
  317. record : fundamental data-type for `recarray`.
  318. format_parser : determine a data-type from formats, names, titles.
  319. Notes
  320. -----
  321. This constructor can be compared to ``empty``: it creates a new record
  322. array but does not fill it with data. To create a record array from data,
  323. use one of the following methods:
  324. 1. Create a standard ndarray and convert it to a record array,
  325. using ``arr.view(np.recarray)``
  326. 2. Use the `buf` keyword.
  327. 3. Use `np.rec.fromrecords`.
  328. Examples
  329. --------
  330. Create an array with two fields, ``x`` and ``y``:
  331. >>> x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', '<f8'), ('y', '<i8')])
  332. >>> x
  333. array([(1., 2), (3., 4)], dtype=[('x', '<f8'), ('y', '<i8')])
  334. >>> x['x']
  335. array([1., 3.])
  336. View the array as a record array:
  337. >>> x = x.view(np.recarray)
  338. >>> x.x
  339. array([1., 3.])
  340. >>> x.y
  341. array([2, 4])
  342. Create a new, empty record array:
  343. >>> np.recarray((2,),
  344. ... dtype=[('x', int), ('y', float), ('z', int)]) #doctest: +SKIP
  345. rec.array([(-1073741821, 1.2249118382103472e-301, 24547520),
  346. (3471280, 1.2134086255804012e-316, 0)],
  347. dtype=[('x', '<i4'), ('y', '<f8'), ('z', '<i4')])
  348. """
  349. # manually set name and module so that this class's type shows
  350. # up as "numpy.recarray" when printed
  351. __name__ = 'recarray'
  352. __module__ = 'numpy'
  353. def __new__(subtype, shape, dtype=None, buf=None, offset=0, strides=None,
  354. formats=None, names=None, titles=None,
  355. byteorder=None, aligned=False, order='C'):
  356. if dtype is not None:
  357. descr = sb.dtype(dtype)
  358. else:
  359. descr = format_parser(formats, names, titles, aligned, byteorder).dtype
  360. if buf is None:
  361. self = ndarray.__new__(subtype, shape, (record, descr), order=order)
  362. else:
  363. self = ndarray.__new__(subtype, shape, (record, descr),
  364. buffer=buf, offset=offset,
  365. strides=strides, order=order)
  366. return self
  367. def __array_finalize__(self, obj):
  368. if self.dtype.type is not record and self.dtype.names is not None:
  369. # if self.dtype is not np.record, invoke __setattr__ which will
  370. # convert it to a record if it is a void dtype.
  371. self.dtype = self.dtype
  372. def __getattribute__(self, attr):
  373. # See if ndarray has this attr, and return it if so. (note that this
  374. # means a field with the same name as an ndarray attr cannot be
  375. # accessed by attribute).
  376. try:
  377. return object.__getattribute__(self, attr)
  378. except AttributeError: # attr must be a fieldname
  379. pass
  380. # look for a field with this name
  381. fielddict = ndarray.__getattribute__(self, 'dtype').fields
  382. try:
  383. res = fielddict[attr][:2]
  384. except (TypeError, KeyError):
  385. raise AttributeError("recarray has no attribute %s" % attr)
  386. obj = self.getfield(*res)
  387. # At this point obj will always be a recarray, since (see
  388. # PyArray_GetField) the type of obj is inherited. Next, if obj.dtype is
  389. # non-structured, convert it to an ndarray. Then if obj is structured
  390. # with void type convert it to the same dtype.type (eg to preserve
  391. # numpy.record type if present), since nested structured fields do not
  392. # inherit type. Don't do this for non-void structures though.
  393. if obj.dtype.names is not None:
  394. if issubclass(obj.dtype.type, nt.void):
  395. return obj.view(dtype=(self.dtype.type, obj.dtype))
  396. return obj
  397. else:
  398. return obj.view(ndarray)
  399. # Save the dictionary.
  400. # If the attr is a field name and not in the saved dictionary
  401. # Undo any "setting" of the attribute and do a setfield
  402. # Thus, you can't create attributes on-the-fly that are field names.
  403. def __setattr__(self, attr, val):
  404. # Automatically convert (void) structured types to records
  405. # (but not non-void structures, subarrays, or non-structured voids)
  406. if attr == 'dtype' and issubclass(val.type, nt.void) and val.names is not None:
  407. val = sb.dtype((record, val))
  408. newattr = attr not in self.__dict__
  409. try:
  410. ret = object.__setattr__(self, attr, val)
  411. except Exception:
  412. fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
  413. if attr not in fielddict:
  414. raise
  415. else:
  416. fielddict = ndarray.__getattribute__(self, 'dtype').fields or {}
  417. if attr not in fielddict:
  418. return ret
  419. if newattr:
  420. # We just added this one or this setattr worked on an
  421. # internal attribute.
  422. try:
  423. object.__delattr__(self, attr)
  424. except Exception:
  425. return ret
  426. try:
  427. res = fielddict[attr][:2]
  428. except (TypeError, KeyError):
  429. raise AttributeError("record array has no attribute %s" % attr)
  430. return self.setfield(val, *res)
  431. def __getitem__(self, indx):
  432. obj = super(recarray, self).__getitem__(indx)
  433. # copy behavior of getattr, except that here
  434. # we might also be returning a single element
  435. if isinstance(obj, ndarray):
  436. if obj.dtype.names is not None:
  437. obj = obj.view(type(self))
  438. if issubclass(obj.dtype.type, nt.void):
  439. return obj.view(dtype=(self.dtype.type, obj.dtype))
  440. return obj
  441. else:
  442. return obj.view(type=ndarray)
  443. else:
  444. # return a single element
  445. return obj
  446. def __repr__(self):
  447. repr_dtype = self.dtype
  448. if self.dtype.type is record or not issubclass(self.dtype.type, nt.void):
  449. # If this is a full record array (has numpy.record dtype),
  450. # or if it has a scalar (non-void) dtype with no records,
  451. # represent it using the rec.array function. Since rec.array
  452. # converts dtype to a numpy.record for us, convert back
  453. # to non-record before printing
  454. if repr_dtype.type is record:
  455. repr_dtype = sb.dtype((nt.void, repr_dtype))
  456. prefix = "rec.array("
  457. fmt = 'rec.array(%s,%sdtype=%s)'
  458. else:
  459. # otherwise represent it using np.array plus a view
  460. # This should only happen if the user is playing
  461. # strange games with dtypes.
  462. prefix = "array("
  463. fmt = 'array(%s,%sdtype=%s).view(numpy.recarray)'
  464. # get data/shape string. logic taken from numeric.array_repr
  465. if self.size > 0 or self.shape == (0,):
  466. lst = sb.array2string(
  467. self, separator=', ', prefix=prefix, suffix=',')
  468. else:
  469. # show zero-length shape unless it is (0,)
  470. lst = "[], shape=%s" % (repr(self.shape),)
  471. lf = '\n'+' '*len(prefix)
  472. if get_printoptions()['legacy'] == '1.13':
  473. lf = ' ' + lf # trailing space
  474. return fmt % (lst, lf, repr_dtype)
  475. def field(self, attr, val=None):
  476. if isinstance(attr, int):
  477. names = ndarray.__getattribute__(self, 'dtype').names
  478. attr = names[attr]
  479. fielddict = ndarray.__getattribute__(self, 'dtype').fields
  480. res = fielddict[attr][:2]
  481. if val is None:
  482. obj = self.getfield(*res)
  483. if obj.dtype.names is not None:
  484. return obj
  485. return obj.view(ndarray)
  486. else:
  487. return self.setfield(val, *res)
  488. def _deprecate_shape_0_as_None(shape):
  489. if shape == 0:
  490. warnings.warn(
  491. "Passing `shape=0` to have the shape be inferred is deprecated, "
  492. "and in future will be equivalent to `shape=(0,)`. To infer "
  493. "the shape and suppress this warning, pass `shape=None` instead.",
  494. FutureWarning, stacklevel=3)
  495. return None
  496. else:
  497. return shape
  498. def fromarrays(arrayList, dtype=None, shape=None, formats=None,
  499. names=None, titles=None, aligned=False, byteorder=None):
  500. """Create a record array from a (flat) list of arrays
  501. Parameters
  502. ----------
  503. arrayList : list or tuple
  504. List of array-like objects (such as lists, tuples,
  505. and ndarrays).
  506. dtype : data-type, optional
  507. valid dtype for all arrays
  508. shape : int or tuple of ints, optional
  509. Shape of the resulting array. If not provided, inferred from
  510. ``arrayList[0]``.
  511. formats, names, titles, aligned, byteorder :
  512. If `dtype` is ``None``, these arguments are passed to
  513. `numpy.format_parser` to construct a dtype. See that function for
  514. detailed documentation.
  515. Returns
  516. -------
  517. np.recarray
  518. Record array consisting of given arrayList columns.
  519. Examples
  520. --------
  521. >>> x1=np.array([1,2,3,4])
  522. >>> x2=np.array(['a','dd','xyz','12'])
  523. >>> x3=np.array([1.1,2,3,4])
  524. >>> r = np.core.records.fromarrays([x1,x2,x3],names='a,b,c')
  525. >>> print(r[1])
  526. (2, 'dd', 2.0) # may vary
  527. >>> x1[1]=34
  528. >>> r.a
  529. array([1, 2, 3, 4])
  530. >>> x1 = np.array([1, 2, 3, 4])
  531. >>> x2 = np.array(['a', 'dd', 'xyz', '12'])
  532. >>> x3 = np.array([1.1, 2, 3,4])
  533. >>> r = np.core.records.fromarrays(
  534. ... [x1, x2, x3],
  535. ... dtype=np.dtype([('a', np.int32), ('b', 'S3'), ('c', np.float32)]))
  536. >>> r
  537. rec.array([(1, b'a', 1.1), (2, b'dd', 2. ), (3, b'xyz', 3. ),
  538. (4, b'12', 4. )],
  539. dtype=[('a', '<i4'), ('b', 'S3'), ('c', '<f4')])
  540. """
  541. arrayList = [sb.asarray(x) for x in arrayList]
  542. # NumPy 1.19.0, 2020-01-01
  543. shape = _deprecate_shape_0_as_None(shape)
  544. if shape is None:
  545. shape = arrayList[0].shape
  546. elif isinstance(shape, int):
  547. shape = (shape,)
  548. if formats is None and dtype is None:
  549. # go through each object in the list to see if it is an ndarray
  550. # and determine the formats.
  551. formats = [obj.dtype for obj in arrayList]
  552. if dtype is not None:
  553. descr = sb.dtype(dtype)
  554. else:
  555. descr = format_parser(formats, names, titles, aligned, byteorder).dtype
  556. _names = descr.names
  557. # Determine shape from data-type.
  558. if len(descr) != len(arrayList):
  559. raise ValueError("mismatch between the number of fields "
  560. "and the number of arrays")
  561. d0 = descr[0].shape
  562. nn = len(d0)
  563. if nn > 0:
  564. shape = shape[:-nn]
  565. for k, obj in enumerate(arrayList):
  566. nn = descr[k].ndim
  567. testshape = obj.shape[:obj.ndim - nn]
  568. if testshape != shape:
  569. raise ValueError("array-shape mismatch in array %d" % k)
  570. _array = recarray(shape, descr)
  571. # populate the record array (makes a copy)
  572. for i in range(len(arrayList)):
  573. _array[_names[i]] = arrayList[i]
  574. return _array
  575. def fromrecords(recList, dtype=None, shape=None, formats=None, names=None,
  576. titles=None, aligned=False, byteorder=None):
  577. """Create a recarray from a list of records in text form.
  578. Parameters
  579. ----------
  580. recList : sequence
  581. data in the same field may be heterogeneous - they will be promoted
  582. to the highest data type.
  583. dtype : data-type, optional
  584. valid dtype for all arrays
  585. shape : int or tuple of ints, optional
  586. shape of each array.
  587. formats, names, titles, aligned, byteorder :
  588. If `dtype` is ``None``, these arguments are passed to
  589. `numpy.format_parser` to construct a dtype. See that function for
  590. detailed documentation.
  591. If both `formats` and `dtype` are None, then this will auto-detect
  592. formats. Use list of tuples rather than list of lists for faster
  593. processing.
  594. Returns
  595. -------
  596. np.recarray
  597. record array consisting of given recList rows.
  598. Examples
  599. --------
  600. >>> r=np.core.records.fromrecords([(456,'dbe',1.2),(2,'de',1.3)],
  601. ... names='col1,col2,col3')
  602. >>> print(r[0])
  603. (456, 'dbe', 1.2)
  604. >>> r.col1
  605. array([456, 2])
  606. >>> r.col2
  607. array(['dbe', 'de'], dtype='<U3')
  608. >>> import pickle
  609. >>> pickle.loads(pickle.dumps(r))
  610. rec.array([(456, 'dbe', 1.2), ( 2, 'de', 1.3)],
  611. dtype=[('col1', '<i8'), ('col2', '<U3'), ('col3', '<f8')])
  612. """
  613. if formats is None and dtype is None: # slower
  614. obj = sb.array(recList, dtype=object)
  615. arrlist = [sb.array(obj[..., i].tolist()) for i in range(obj.shape[-1])]
  616. return fromarrays(arrlist, formats=formats, shape=shape, names=names,
  617. titles=titles, aligned=aligned, byteorder=byteorder)
  618. if dtype is not None:
  619. descr = sb.dtype((record, dtype))
  620. else:
  621. descr = format_parser(formats, names, titles, aligned, byteorder).dtype
  622. try:
  623. retval = sb.array(recList, dtype=descr)
  624. except (TypeError, ValueError):
  625. # NumPy 1.19.0, 2020-01-01
  626. shape = _deprecate_shape_0_as_None(shape)
  627. if shape is None:
  628. shape = len(recList)
  629. if isinstance(shape, int):
  630. shape = (shape,)
  631. if len(shape) > 1:
  632. raise ValueError("Can only deal with 1-d array.")
  633. _array = recarray(shape, descr)
  634. for k in range(_array.size):
  635. _array[k] = tuple(recList[k])
  636. # list of lists instead of list of tuples ?
  637. # 2018-02-07, 1.14.1
  638. warnings.warn(
  639. "fromrecords expected a list of tuples, may have received a list "
  640. "of lists instead. In the future that will raise an error",
  641. FutureWarning, stacklevel=2)
  642. return _array
  643. else:
  644. if shape is not None and retval.shape != shape:
  645. retval.shape = shape
  646. res = retval.view(recarray)
  647. return res
  648. def fromstring(datastring, dtype=None, shape=None, offset=0, formats=None,
  649. names=None, titles=None, aligned=False, byteorder=None):
  650. r"""Create a record array from binary data
  651. Note that despite the name of this function it does not accept `str`
  652. instances.
  653. Parameters
  654. ----------
  655. datastring : bytes-like
  656. Buffer of binary data
  657. dtype : data-type, optional
  658. Valid dtype for all arrays
  659. shape : int or tuple of ints, optional
  660. Shape of each array.
  661. offset : int, optional
  662. Position in the buffer to start reading from.
  663. formats, names, titles, aligned, byteorder :
  664. If `dtype` is ``None``, these arguments are passed to
  665. `numpy.format_parser` to construct a dtype. See that function for
  666. detailed documentation.
  667. Returns
  668. -------
  669. np.recarray
  670. Record array view into the data in datastring. This will be readonly
  671. if `datastring` is readonly.
  672. See Also
  673. --------
  674. numpy.frombuffer
  675. Examples
  676. --------
  677. >>> a = b'\x01\x02\x03abc'
  678. >>> np.core.records.fromstring(a, dtype='u1,u1,u1,S3')
  679. rec.array([(1, 2, 3, b'abc')],
  680. dtype=[('f0', 'u1'), ('f1', 'u1'), ('f2', 'u1'), ('f3', 'S3')])
  681. >>> grades_dtype = [('Name', (np.str_, 10)), ('Marks', np.float64),
  682. ... ('GradeLevel', np.int32)]
  683. >>> grades_array = np.array([('Sam', 33.3, 3), ('Mike', 44.4, 5),
  684. ... ('Aadi', 66.6, 6)], dtype=grades_dtype)
  685. >>> np.core.records.fromstring(grades_array.tobytes(), dtype=grades_dtype)
  686. rec.array([('Sam', 33.3, 3), ('Mike', 44.4, 5), ('Aadi', 66.6, 6)],
  687. dtype=[('Name', '<U10'), ('Marks', '<f8'), ('GradeLevel', '<i4')])
  688. >>> s = '\x01\x02\x03abc'
  689. >>> np.core.records.fromstring(s, dtype='u1,u1,u1,S3')
  690. Traceback (most recent call last)
  691. ...
  692. TypeError: a bytes-like object is required, not 'str'
  693. """
  694. if dtype is None and formats is None:
  695. raise TypeError("fromstring() needs a 'dtype' or 'formats' argument")
  696. if dtype is not None:
  697. descr = sb.dtype(dtype)
  698. else:
  699. descr = format_parser(formats, names, titles, aligned, byteorder).dtype
  700. itemsize = descr.itemsize
  701. # NumPy 1.19.0, 2020-01-01
  702. shape = _deprecate_shape_0_as_None(shape)
  703. if shape in (None, -1):
  704. shape = (len(datastring) - offset) // itemsize
  705. _array = recarray(shape, descr, buf=datastring, offset=offset)
  706. return _array
  707. def get_remaining_size(fd):
  708. try:
  709. fn = fd.fileno()
  710. except AttributeError:
  711. return os.path.getsize(fd.name) - fd.tell()
  712. st = os.fstat(fn)
  713. size = st.st_size - fd.tell()
  714. return size
  715. def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
  716. names=None, titles=None, aligned=False, byteorder=None):
  717. """Create an array from binary file data
  718. Parameters
  719. ----------
  720. fd : str or file type
  721. If file is a string or a path-like object then that file is opened,
  722. else it is assumed to be a file object. The file object must
  723. support random access (i.e. it must have tell and seek methods).
  724. dtype : data-type, optional
  725. valid dtype for all arrays
  726. shape : int or tuple of ints, optional
  727. shape of each array.
  728. offset : int, optional
  729. Position in the file to start reading from.
  730. formats, names, titles, aligned, byteorder :
  731. If `dtype` is ``None``, these arguments are passed to
  732. `numpy.format_parser` to construct a dtype. See that function for
  733. detailed documentation
  734. Returns
  735. -------
  736. np.recarray
  737. record array consisting of data enclosed in file.
  738. Examples
  739. --------
  740. >>> from tempfile import TemporaryFile
  741. >>> a = np.empty(10,dtype='f8,i4,a5')
  742. >>> a[5] = (0.5,10,'abcde')
  743. >>>
  744. >>> fd=TemporaryFile()
  745. >>> a = a.newbyteorder('<')
  746. >>> a.tofile(fd)
  747. >>>
  748. >>> _ = fd.seek(0)
  749. >>> r=np.core.records.fromfile(fd, formats='f8,i4,a5', shape=10,
  750. ... byteorder='<')
  751. >>> print(r[5])
  752. (0.5, 10, 'abcde')
  753. >>> r.shape
  754. (10,)
  755. """
  756. if dtype is None and formats is None:
  757. raise TypeError("fromfile() needs a 'dtype' or 'formats' argument")
  758. # NumPy 1.19.0, 2020-01-01
  759. shape = _deprecate_shape_0_as_None(shape)
  760. if shape is None:
  761. shape = (-1,)
  762. elif isinstance(shape, int):
  763. shape = (shape,)
  764. if isfileobj(fd):
  765. # file already opened
  766. ctx = contextlib_nullcontext(fd)
  767. else:
  768. # open file
  769. ctx = open(os_fspath(fd), 'rb')
  770. with ctx as fd:
  771. if offset > 0:
  772. fd.seek(offset, 1)
  773. size = get_remaining_size(fd)
  774. if dtype is not None:
  775. descr = sb.dtype(dtype)
  776. else:
  777. descr = format_parser(formats, names, titles, aligned, byteorder).dtype
  778. itemsize = descr.itemsize
  779. shapeprod = sb.array(shape).prod(dtype=nt.intp)
  780. shapesize = shapeprod * itemsize
  781. if shapesize < 0:
  782. shape = list(shape)
  783. shape[shape.index(-1)] = size // -shapesize
  784. shape = tuple(shape)
  785. shapeprod = sb.array(shape).prod(dtype=nt.intp)
  786. nbytes = shapeprod * itemsize
  787. if nbytes > size:
  788. raise ValueError(
  789. "Not enough bytes left in file for specified shape and type")
  790. # create the array
  791. _array = recarray(shape, descr)
  792. nbytesread = fd.readinto(_array.data)
  793. if nbytesread != nbytes:
  794. raise IOError("Didn't read as many bytes as expected")
  795. return _array
  796. def array(obj, dtype=None, shape=None, offset=0, strides=None, formats=None,
  797. names=None, titles=None, aligned=False, byteorder=None, copy=True):
  798. """Construct a record array from a wide-variety of objects.
  799. """
  800. if ((isinstance(obj, (type(None), str)) or isfileobj(obj)) and
  801. formats is None and dtype is None):
  802. raise ValueError("Must define formats (or dtype) if object is "
  803. "None, string, or an open file")
  804. kwds = {}
  805. if dtype is not None:
  806. dtype = sb.dtype(dtype)
  807. elif formats is not None:
  808. dtype = format_parser(formats, names, titles,
  809. aligned, byteorder).dtype
  810. else:
  811. kwds = {'formats': formats,
  812. 'names': names,
  813. 'titles': titles,
  814. 'aligned': aligned,
  815. 'byteorder': byteorder
  816. }
  817. if obj is None:
  818. if shape is None:
  819. raise ValueError("Must define a shape if obj is None")
  820. return recarray(shape, dtype, buf=obj, offset=offset, strides=strides)
  821. elif isinstance(obj, bytes):
  822. return fromstring(obj, dtype, shape=shape, offset=offset, **kwds)
  823. elif isinstance(obj, (list, tuple)):
  824. if isinstance(obj[0], (tuple, list)):
  825. return fromrecords(obj, dtype=dtype, shape=shape, **kwds)
  826. else:
  827. return fromarrays(obj, dtype=dtype, shape=shape, **kwds)
  828. elif isinstance(obj, recarray):
  829. if dtype is not None and (obj.dtype != dtype):
  830. new = obj.view(dtype)
  831. else:
  832. new = obj
  833. if copy:
  834. new = new.copy()
  835. return new
  836. elif isfileobj(obj):
  837. return fromfile(obj, dtype=dtype, shape=shape, offset=offset)
  838. elif isinstance(obj, ndarray):
  839. if dtype is not None and (obj.dtype != dtype):
  840. new = obj.view(dtype)
  841. else:
  842. new = obj
  843. if copy:
  844. new = new.copy()
  845. return new.view(recarray)
  846. else:
  847. interface = getattr(obj, "__array_interface__", None)
  848. if interface is None or not isinstance(interface, dict):
  849. raise ValueError("Unknown input type")
  850. obj = sb.array(obj)
  851. if dtype is not None and (obj.dtype != dtype):
  852. obj = obj.view(dtype)
  853. return obj.view(recarray)