defchararray.py 68 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795
  1. """
  2. This module contains a set of functions for vectorized string
  3. operations and methods.
  4. .. note::
  5. The `chararray` class exists for backwards compatibility with
  6. Numarray, it is not recommended for new development. Starting from numpy
  7. 1.4, if one needs arrays of strings, it is recommended to use arrays of
  8. `dtype` `object_`, `string_` or `unicode_`, and use the free functions
  9. in the `numpy.char` module for fast vectorized string operations.
  10. Some methods will only be available if the corresponding string method is
  11. available in your version of Python.
  12. The preferred alias for `defchararray` is `numpy.char`.
  13. """
  14. import functools
  15. import sys
  16. from .numerictypes import (
  17. string_, unicode_, integer, int_, object_, bool_, character)
  18. from .numeric import ndarray, compare_chararrays
  19. from .numeric import array as narray
  20. from numpy.core.multiarray import _vec_string
  21. from numpy.core.overrides import set_module
  22. from numpy.core import overrides
  23. from numpy.compat import asbytes
  24. import numpy
  25. __all__ = [
  26. 'equal', 'not_equal', 'greater_equal', 'less_equal',
  27. 'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize',
  28. 'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs',
  29. 'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace',
  30. 'istitle', 'isupper', 'join', 'ljust', 'lower', 'lstrip', 'partition',
  31. 'replace', 'rfind', 'rindex', 'rjust', 'rpartition', 'rsplit',
  32. 'rstrip', 'split', 'splitlines', 'startswith', 'strip', 'swapcase',
  33. 'title', 'translate', 'upper', 'zfill', 'isnumeric', 'isdecimal',
  34. 'array', 'asarray'
  35. ]
  36. _globalvar = 0
  37. array_function_dispatch = functools.partial(
  38. overrides.array_function_dispatch, module='numpy.char')
  39. def _use_unicode(*args):
  40. """
  41. Helper function for determining the output type of some string
  42. operations.
  43. For an operation on two ndarrays, if at least one is unicode, the
  44. result should be unicode.
  45. """
  46. for x in args:
  47. if (isinstance(x, str) or
  48. issubclass(numpy.asarray(x).dtype.type, unicode_)):
  49. return unicode_
  50. return string_
  51. def _to_string_or_unicode_array(result):
  52. """
  53. Helper function to cast a result back into a string or unicode array
  54. if an object array must be used as an intermediary.
  55. """
  56. return numpy.asarray(result.tolist())
  57. def _clean_args(*args):
  58. """
  59. Helper function for delegating arguments to Python string
  60. functions.
  61. Many of the Python string operations that have optional arguments
  62. do not use 'None' to indicate a default value. In these cases,
  63. we need to remove all None arguments, and those following them.
  64. """
  65. newargs = []
  66. for chk in args:
  67. if chk is None:
  68. break
  69. newargs.append(chk)
  70. return newargs
  71. def _get_num_chars(a):
  72. """
  73. Helper function that returns the number of characters per field in
  74. a string or unicode array. This is to abstract out the fact that
  75. for a unicode array this is itemsize / 4.
  76. """
  77. if issubclass(a.dtype.type, unicode_):
  78. return a.itemsize // 4
  79. return a.itemsize
  80. def _binary_op_dispatcher(x1, x2):
  81. return (x1, x2)
  82. @array_function_dispatch(_binary_op_dispatcher)
  83. def equal(x1, x2):
  84. """
  85. Return (x1 == x2) element-wise.
  86. Unlike `numpy.equal`, this comparison is performed by first
  87. stripping whitespace characters from the end of the string. This
  88. behavior is provided for backward-compatibility with numarray.
  89. Parameters
  90. ----------
  91. x1, x2 : array_like of str or unicode
  92. Input arrays of the same shape.
  93. Returns
  94. -------
  95. out : ndarray or bool
  96. Output array of bools, or a single bool if x1 and x2 are scalars.
  97. See Also
  98. --------
  99. not_equal, greater_equal, less_equal, greater, less
  100. """
  101. return compare_chararrays(x1, x2, '==', True)
  102. @array_function_dispatch(_binary_op_dispatcher)
  103. def not_equal(x1, x2):
  104. """
  105. Return (x1 != x2) element-wise.
  106. Unlike `numpy.not_equal`, this comparison is performed by first
  107. stripping whitespace characters from the end of the string. This
  108. behavior is provided for backward-compatibility with numarray.
  109. Parameters
  110. ----------
  111. x1, x2 : array_like of str or unicode
  112. Input arrays of the same shape.
  113. Returns
  114. -------
  115. out : ndarray or bool
  116. Output array of bools, or a single bool if x1 and x2 are scalars.
  117. See Also
  118. --------
  119. equal, greater_equal, less_equal, greater, less
  120. """
  121. return compare_chararrays(x1, x2, '!=', True)
  122. @array_function_dispatch(_binary_op_dispatcher)
  123. def greater_equal(x1, x2):
  124. """
  125. Return (x1 >= x2) element-wise.
  126. Unlike `numpy.greater_equal`, this comparison is performed by
  127. first stripping whitespace characters from the end of the string.
  128. This behavior is provided for backward-compatibility with
  129. numarray.
  130. Parameters
  131. ----------
  132. x1, x2 : array_like of str or unicode
  133. Input arrays of the same shape.
  134. Returns
  135. -------
  136. out : ndarray or bool
  137. Output array of bools, or a single bool if x1 and x2 are scalars.
  138. See Also
  139. --------
  140. equal, not_equal, less_equal, greater, less
  141. """
  142. return compare_chararrays(x1, x2, '>=', True)
  143. @array_function_dispatch(_binary_op_dispatcher)
  144. def less_equal(x1, x2):
  145. """
  146. Return (x1 <= x2) element-wise.
  147. Unlike `numpy.less_equal`, this comparison is performed by first
  148. stripping whitespace characters from the end of the string. This
  149. behavior is provided for backward-compatibility with numarray.
  150. Parameters
  151. ----------
  152. x1, x2 : array_like of str or unicode
  153. Input arrays of the same shape.
  154. Returns
  155. -------
  156. out : ndarray or bool
  157. Output array of bools, or a single bool if x1 and x2 are scalars.
  158. See Also
  159. --------
  160. equal, not_equal, greater_equal, greater, less
  161. """
  162. return compare_chararrays(x1, x2, '<=', True)
  163. @array_function_dispatch(_binary_op_dispatcher)
  164. def greater(x1, x2):
  165. """
  166. Return (x1 > x2) element-wise.
  167. Unlike `numpy.greater`, this comparison is performed by first
  168. stripping whitespace characters from the end of the string. This
  169. behavior is provided for backward-compatibility with numarray.
  170. Parameters
  171. ----------
  172. x1, x2 : array_like of str or unicode
  173. Input arrays of the same shape.
  174. Returns
  175. -------
  176. out : ndarray or bool
  177. Output array of bools, or a single bool if x1 and x2 are scalars.
  178. See Also
  179. --------
  180. equal, not_equal, greater_equal, less_equal, less
  181. """
  182. return compare_chararrays(x1, x2, '>', True)
  183. @array_function_dispatch(_binary_op_dispatcher)
  184. def less(x1, x2):
  185. """
  186. Return (x1 < x2) element-wise.
  187. Unlike `numpy.greater`, this comparison is performed by first
  188. stripping whitespace characters from the end of the string. This
  189. behavior is provided for backward-compatibility with numarray.
  190. Parameters
  191. ----------
  192. x1, x2 : array_like of str or unicode
  193. Input arrays of the same shape.
  194. Returns
  195. -------
  196. out : ndarray or bool
  197. Output array of bools, or a single bool if x1 and x2 are scalars.
  198. See Also
  199. --------
  200. equal, not_equal, greater_equal, less_equal, greater
  201. """
  202. return compare_chararrays(x1, x2, '<', True)
  203. def _unary_op_dispatcher(a):
  204. return (a,)
  205. @array_function_dispatch(_unary_op_dispatcher)
  206. def str_len(a):
  207. """
  208. Return len(a) element-wise.
  209. Parameters
  210. ----------
  211. a : array_like of str or unicode
  212. Returns
  213. -------
  214. out : ndarray
  215. Output array of integers
  216. See also
  217. --------
  218. builtins.len
  219. """
  220. # Note: __len__, etc. currently return ints, which are not C-integers.
  221. # Generally intp would be expected for lengths, although int is sufficient
  222. # due to the dtype itemsize limitation.
  223. return _vec_string(a, int_, '__len__')
  224. @array_function_dispatch(_binary_op_dispatcher)
  225. def add(x1, x2):
  226. """
  227. Return element-wise string concatenation for two arrays of str or unicode.
  228. Arrays `x1` and `x2` must have the same shape.
  229. Parameters
  230. ----------
  231. x1 : array_like of str or unicode
  232. Input array.
  233. x2 : array_like of str or unicode
  234. Input array.
  235. Returns
  236. -------
  237. add : ndarray
  238. Output array of `string_` or `unicode_`, depending on input types
  239. of the same shape as `x1` and `x2`.
  240. """
  241. arr1 = numpy.asarray(x1)
  242. arr2 = numpy.asarray(x2)
  243. out_size = _get_num_chars(arr1) + _get_num_chars(arr2)
  244. dtype = _use_unicode(arr1, arr2)
  245. return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,))
  246. def _multiply_dispatcher(a, i):
  247. return (a,)
  248. @array_function_dispatch(_multiply_dispatcher)
  249. def multiply(a, i):
  250. """
  251. Return (a * i), that is string multiple concatenation,
  252. element-wise.
  253. Values in `i` of less than 0 are treated as 0 (which yields an
  254. empty string).
  255. Parameters
  256. ----------
  257. a : array_like of str or unicode
  258. i : array_like of ints
  259. Returns
  260. -------
  261. out : ndarray
  262. Output array of str or unicode, depending on input types
  263. """
  264. a_arr = numpy.asarray(a)
  265. i_arr = numpy.asarray(i)
  266. if not issubclass(i_arr.dtype.type, integer):
  267. raise ValueError("Can only multiply by integers")
  268. out_size = _get_num_chars(a_arr) * max(int(i_arr.max()), 0)
  269. return _vec_string(
  270. a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,))
  271. def _mod_dispatcher(a, values):
  272. return (a, values)
  273. @array_function_dispatch(_mod_dispatcher)
  274. def mod(a, values):
  275. """
  276. Return (a % i), that is pre-Python 2.6 string formatting
  277. (interpolation), element-wise for a pair of array_likes of str
  278. or unicode.
  279. Parameters
  280. ----------
  281. a : array_like of str or unicode
  282. values : array_like of values
  283. These values will be element-wise interpolated into the string.
  284. Returns
  285. -------
  286. out : ndarray
  287. Output array of str or unicode, depending on input types
  288. See also
  289. --------
  290. str.__mod__
  291. """
  292. return _to_string_or_unicode_array(
  293. _vec_string(a, object_, '__mod__', (values,)))
  294. @array_function_dispatch(_unary_op_dispatcher)
  295. def capitalize(a):
  296. """
  297. Return a copy of `a` with only the first character of each element
  298. capitalized.
  299. Calls `str.capitalize` element-wise.
  300. For 8-bit strings, this method is locale-dependent.
  301. Parameters
  302. ----------
  303. a : array_like of str or unicode
  304. Input array of strings to capitalize.
  305. Returns
  306. -------
  307. out : ndarray
  308. Output array of str or unicode, depending on input
  309. types
  310. See also
  311. --------
  312. str.capitalize
  313. Examples
  314. --------
  315. >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4'); c
  316. array(['a1b2', '1b2a', 'b2a1', '2a1b'],
  317. dtype='|S4')
  318. >>> np.char.capitalize(c)
  319. array(['A1b2', '1b2a', 'B2a1', '2a1b'],
  320. dtype='|S4')
  321. """
  322. a_arr = numpy.asarray(a)
  323. return _vec_string(a_arr, a_arr.dtype, 'capitalize')
  324. def _center_dispatcher(a, width, fillchar=None):
  325. return (a,)
  326. @array_function_dispatch(_center_dispatcher)
  327. def center(a, width, fillchar=' '):
  328. """
  329. Return a copy of `a` with its elements centered in a string of
  330. length `width`.
  331. Calls `str.center` element-wise.
  332. Parameters
  333. ----------
  334. a : array_like of str or unicode
  335. width : int
  336. The length of the resulting strings
  337. fillchar : str or unicode, optional
  338. The padding character to use (default is space).
  339. Returns
  340. -------
  341. out : ndarray
  342. Output array of str or unicode, depending on input
  343. types
  344. See also
  345. --------
  346. str.center
  347. """
  348. a_arr = numpy.asarray(a)
  349. width_arr = numpy.asarray(width)
  350. size = int(numpy.max(width_arr.flat))
  351. if numpy.issubdtype(a_arr.dtype, numpy.string_):
  352. fillchar = asbytes(fillchar)
  353. return _vec_string(
  354. a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar))
  355. def _count_dispatcher(a, sub, start=None, end=None):
  356. return (a,)
  357. @array_function_dispatch(_count_dispatcher)
  358. def count(a, sub, start=0, end=None):
  359. """
  360. Returns an array with the number of non-overlapping occurrences of
  361. substring `sub` in the range [`start`, `end`].
  362. Calls `str.count` element-wise.
  363. Parameters
  364. ----------
  365. a : array_like of str or unicode
  366. sub : str or unicode
  367. The substring to search for.
  368. start, end : int, optional
  369. Optional arguments `start` and `end` are interpreted as slice
  370. notation to specify the range in which to count.
  371. Returns
  372. -------
  373. out : ndarray
  374. Output array of ints.
  375. See also
  376. --------
  377. str.count
  378. Examples
  379. --------
  380. >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
  381. >>> c
  382. array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
  383. >>> np.char.count(c, 'A')
  384. array([3, 1, 1])
  385. >>> np.char.count(c, 'aA')
  386. array([3, 1, 0])
  387. >>> np.char.count(c, 'A', start=1, end=4)
  388. array([2, 1, 1])
  389. >>> np.char.count(c, 'A', start=1, end=3)
  390. array([1, 0, 0])
  391. """
  392. return _vec_string(a, int_, 'count', [sub, start] + _clean_args(end))
  393. def _code_dispatcher(a, encoding=None, errors=None):
  394. return (a,)
  395. @array_function_dispatch(_code_dispatcher)
  396. def decode(a, encoding=None, errors=None):
  397. """
  398. Calls `str.decode` element-wise.
  399. The set of available codecs comes from the Python standard library,
  400. and may be extended at runtime. For more information, see the
  401. :mod:`codecs` module.
  402. Parameters
  403. ----------
  404. a : array_like of str or unicode
  405. encoding : str, optional
  406. The name of an encoding
  407. errors : str, optional
  408. Specifies how to handle encoding errors
  409. Returns
  410. -------
  411. out : ndarray
  412. See also
  413. --------
  414. str.decode
  415. Notes
  416. -----
  417. The type of the result will depend on the encoding specified.
  418. Examples
  419. --------
  420. >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
  421. >>> c
  422. array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
  423. >>> np.char.encode(c, encoding='cp037')
  424. array(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@',
  425. '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'],
  426. dtype='|S7')
  427. """
  428. return _to_string_or_unicode_array(
  429. _vec_string(a, object_, 'decode', _clean_args(encoding, errors)))
  430. @array_function_dispatch(_code_dispatcher)
  431. def encode(a, encoding=None, errors=None):
  432. """
  433. Calls `str.encode` element-wise.
  434. The set of available codecs comes from the Python standard library,
  435. and may be extended at runtime. For more information, see the codecs
  436. module.
  437. Parameters
  438. ----------
  439. a : array_like of str or unicode
  440. encoding : str, optional
  441. The name of an encoding
  442. errors : str, optional
  443. Specifies how to handle encoding errors
  444. Returns
  445. -------
  446. out : ndarray
  447. See also
  448. --------
  449. str.encode
  450. Notes
  451. -----
  452. The type of the result will depend on the encoding specified.
  453. """
  454. return _to_string_or_unicode_array(
  455. _vec_string(a, object_, 'encode', _clean_args(encoding, errors)))
  456. def _endswith_dispatcher(a, suffix, start=None, end=None):
  457. return (a,)
  458. @array_function_dispatch(_endswith_dispatcher)
  459. def endswith(a, suffix, start=0, end=None):
  460. """
  461. Returns a boolean array which is `True` where the string element
  462. in `a` ends with `suffix`, otherwise `False`.
  463. Calls `str.endswith` element-wise.
  464. Parameters
  465. ----------
  466. a : array_like of str or unicode
  467. suffix : str
  468. start, end : int, optional
  469. With optional `start`, test beginning at that position. With
  470. optional `end`, stop comparing at that position.
  471. Returns
  472. -------
  473. out : ndarray
  474. Outputs an array of bools.
  475. See also
  476. --------
  477. str.endswith
  478. Examples
  479. --------
  480. >>> s = np.array(['foo', 'bar'])
  481. >>> s[0] = 'foo'
  482. >>> s[1] = 'bar'
  483. >>> s
  484. array(['foo', 'bar'], dtype='<U3')
  485. >>> np.char.endswith(s, 'ar')
  486. array([False, True])
  487. >>> np.char.endswith(s, 'a', start=1, end=2)
  488. array([False, True])
  489. """
  490. return _vec_string(
  491. a, bool_, 'endswith', [suffix, start] + _clean_args(end))
  492. def _expandtabs_dispatcher(a, tabsize=None):
  493. return (a,)
  494. @array_function_dispatch(_expandtabs_dispatcher)
  495. def expandtabs(a, tabsize=8):
  496. """
  497. Return a copy of each string element where all tab characters are
  498. replaced by one or more spaces.
  499. Calls `str.expandtabs` element-wise.
  500. Return a copy of each string element where all tab characters are
  501. replaced by one or more spaces, depending on the current column
  502. and the given `tabsize`. The column number is reset to zero after
  503. each newline occurring in the string. This doesn't understand other
  504. non-printing characters or escape sequences.
  505. Parameters
  506. ----------
  507. a : array_like of str or unicode
  508. Input array
  509. tabsize : int, optional
  510. Replace tabs with `tabsize` number of spaces. If not given defaults
  511. to 8 spaces.
  512. Returns
  513. -------
  514. out : ndarray
  515. Output array of str or unicode, depending on input type
  516. See also
  517. --------
  518. str.expandtabs
  519. """
  520. return _to_string_or_unicode_array(
  521. _vec_string(a, object_, 'expandtabs', (tabsize,)))
  522. @array_function_dispatch(_count_dispatcher)
  523. def find(a, sub, start=0, end=None):
  524. """
  525. For each element, return the lowest index in the string where
  526. substring `sub` is found.
  527. Calls `str.find` element-wise.
  528. For each element, return the lowest index in the string where
  529. substring `sub` is found, such that `sub` is contained in the
  530. range [`start`, `end`].
  531. Parameters
  532. ----------
  533. a : array_like of str or unicode
  534. sub : str or unicode
  535. start, end : int, optional
  536. Optional arguments `start` and `end` are interpreted as in
  537. slice notation.
  538. Returns
  539. -------
  540. out : ndarray or int
  541. Output array of ints. Returns -1 if `sub` is not found.
  542. See also
  543. --------
  544. str.find
  545. """
  546. return _vec_string(
  547. a, int_, 'find', [sub, start] + _clean_args(end))
  548. @array_function_dispatch(_count_dispatcher)
  549. def index(a, sub, start=0, end=None):
  550. """
  551. Like `find`, but raises `ValueError` when the substring is not found.
  552. Calls `str.index` element-wise.
  553. Parameters
  554. ----------
  555. a : array_like of str or unicode
  556. sub : str or unicode
  557. start, end : int, optional
  558. Returns
  559. -------
  560. out : ndarray
  561. Output array of ints. Returns -1 if `sub` is not found.
  562. See also
  563. --------
  564. find, str.find
  565. """
  566. return _vec_string(
  567. a, int_, 'index', [sub, start] + _clean_args(end))
  568. @array_function_dispatch(_unary_op_dispatcher)
  569. def isalnum(a):
  570. """
  571. Returns true for each element if all characters in the string are
  572. alphanumeric and there is at least one character, false otherwise.
  573. Calls `str.isalnum` element-wise.
  574. For 8-bit strings, this method is locale-dependent.
  575. Parameters
  576. ----------
  577. a : array_like of str or unicode
  578. Returns
  579. -------
  580. out : ndarray
  581. Output array of str or unicode, depending on input type
  582. See also
  583. --------
  584. str.isalnum
  585. """
  586. return _vec_string(a, bool_, 'isalnum')
  587. @array_function_dispatch(_unary_op_dispatcher)
  588. def isalpha(a):
  589. """
  590. Returns true for each element if all characters in the string are
  591. alphabetic and there is at least one character, false otherwise.
  592. Calls `str.isalpha` element-wise.
  593. For 8-bit strings, this method is locale-dependent.
  594. Parameters
  595. ----------
  596. a : array_like of str or unicode
  597. Returns
  598. -------
  599. out : ndarray
  600. Output array of bools
  601. See also
  602. --------
  603. str.isalpha
  604. """
  605. return _vec_string(a, bool_, 'isalpha')
  606. @array_function_dispatch(_unary_op_dispatcher)
  607. def isdigit(a):
  608. """
  609. Returns true for each element if all characters in the string are
  610. digits and there is at least one character, false otherwise.
  611. Calls `str.isdigit` element-wise.
  612. For 8-bit strings, this method is locale-dependent.
  613. Parameters
  614. ----------
  615. a : array_like of str or unicode
  616. Returns
  617. -------
  618. out : ndarray
  619. Output array of bools
  620. See also
  621. --------
  622. str.isdigit
  623. """
  624. return _vec_string(a, bool_, 'isdigit')
  625. @array_function_dispatch(_unary_op_dispatcher)
  626. def islower(a):
  627. """
  628. Returns true for each element if all cased characters in the
  629. string are lowercase and there is at least one cased character,
  630. false otherwise.
  631. Calls `str.islower` element-wise.
  632. For 8-bit strings, this method is locale-dependent.
  633. Parameters
  634. ----------
  635. a : array_like of str or unicode
  636. Returns
  637. -------
  638. out : ndarray
  639. Output array of bools
  640. See also
  641. --------
  642. str.islower
  643. """
  644. return _vec_string(a, bool_, 'islower')
  645. @array_function_dispatch(_unary_op_dispatcher)
  646. def isspace(a):
  647. """
  648. Returns true for each element if there are only whitespace
  649. characters in the string and there is at least one character,
  650. false otherwise.
  651. Calls `str.isspace` element-wise.
  652. For 8-bit strings, this method is locale-dependent.
  653. Parameters
  654. ----------
  655. a : array_like of str or unicode
  656. Returns
  657. -------
  658. out : ndarray
  659. Output array of bools
  660. See also
  661. --------
  662. str.isspace
  663. """
  664. return _vec_string(a, bool_, 'isspace')
  665. @array_function_dispatch(_unary_op_dispatcher)
  666. def istitle(a):
  667. """
  668. Returns true for each element if the element is a titlecased
  669. string and there is at least one character, false otherwise.
  670. Call `str.istitle` element-wise.
  671. For 8-bit strings, this method is locale-dependent.
  672. Parameters
  673. ----------
  674. a : array_like of str or unicode
  675. Returns
  676. -------
  677. out : ndarray
  678. Output array of bools
  679. See also
  680. --------
  681. str.istitle
  682. """
  683. return _vec_string(a, bool_, 'istitle')
  684. @array_function_dispatch(_unary_op_dispatcher)
  685. def isupper(a):
  686. """
  687. Returns true for each element if all cased characters in the
  688. string are uppercase and there is at least one character, false
  689. otherwise.
  690. Call `str.isupper` element-wise.
  691. For 8-bit strings, this method is locale-dependent.
  692. Parameters
  693. ----------
  694. a : array_like of str or unicode
  695. Returns
  696. -------
  697. out : ndarray
  698. Output array of bools
  699. See also
  700. --------
  701. str.isupper
  702. """
  703. return _vec_string(a, bool_, 'isupper')
  704. def _join_dispatcher(sep, seq):
  705. return (sep, seq)
  706. @array_function_dispatch(_join_dispatcher)
  707. def join(sep, seq):
  708. """
  709. Return a string which is the concatenation of the strings in the
  710. sequence `seq`.
  711. Calls `str.join` element-wise.
  712. Parameters
  713. ----------
  714. sep : array_like of str or unicode
  715. seq : array_like of str or unicode
  716. Returns
  717. -------
  718. out : ndarray
  719. Output array of str or unicode, depending on input types
  720. See also
  721. --------
  722. str.join
  723. """
  724. return _to_string_or_unicode_array(
  725. _vec_string(sep, object_, 'join', (seq,)))
  726. def _just_dispatcher(a, width, fillchar=None):
  727. return (a,)
  728. @array_function_dispatch(_just_dispatcher)
  729. def ljust(a, width, fillchar=' '):
  730. """
  731. Return an array with the elements of `a` left-justified in a
  732. string of length `width`.
  733. Calls `str.ljust` element-wise.
  734. Parameters
  735. ----------
  736. a : array_like of str or unicode
  737. width : int
  738. The length of the resulting strings
  739. fillchar : str or unicode, optional
  740. The character to use for padding
  741. Returns
  742. -------
  743. out : ndarray
  744. Output array of str or unicode, depending on input type
  745. See also
  746. --------
  747. str.ljust
  748. """
  749. a_arr = numpy.asarray(a)
  750. width_arr = numpy.asarray(width)
  751. size = int(numpy.max(width_arr.flat))
  752. if numpy.issubdtype(a_arr.dtype, numpy.string_):
  753. fillchar = asbytes(fillchar)
  754. return _vec_string(
  755. a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar))
  756. @array_function_dispatch(_unary_op_dispatcher)
  757. def lower(a):
  758. """
  759. Return an array with the elements converted to lowercase.
  760. Call `str.lower` element-wise.
  761. For 8-bit strings, this method is locale-dependent.
  762. Parameters
  763. ----------
  764. a : array_like, {str, unicode}
  765. Input array.
  766. Returns
  767. -------
  768. out : ndarray, {str, unicode}
  769. Output array of str or unicode, depending on input type
  770. See also
  771. --------
  772. str.lower
  773. Examples
  774. --------
  775. >>> c = np.array(['A1B C', '1BCA', 'BCA1']); c
  776. array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
  777. >>> np.char.lower(c)
  778. array(['a1b c', '1bca', 'bca1'], dtype='<U5')
  779. """
  780. a_arr = numpy.asarray(a)
  781. return _vec_string(a_arr, a_arr.dtype, 'lower')
  782. def _strip_dispatcher(a, chars=None):
  783. return (a,)
  784. @array_function_dispatch(_strip_dispatcher)
  785. def lstrip(a, chars=None):
  786. """
  787. For each element in `a`, return a copy with the leading characters
  788. removed.
  789. Calls `str.lstrip` element-wise.
  790. Parameters
  791. ----------
  792. a : array-like, {str, unicode}
  793. Input array.
  794. chars : {str, unicode}, optional
  795. The `chars` argument is a string specifying the set of
  796. characters to be removed. If omitted or None, the `chars`
  797. argument defaults to removing whitespace. The `chars` argument
  798. is not a prefix; rather, all combinations of its values are
  799. stripped.
  800. Returns
  801. -------
  802. out : ndarray, {str, unicode}
  803. Output array of str or unicode, depending on input type
  804. See also
  805. --------
  806. str.lstrip
  807. Examples
  808. --------
  809. >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
  810. >>> c
  811. array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
  812. The 'a' variable is unstripped from c[1] because whitespace leading.
  813. >>> np.char.lstrip(c, 'a')
  814. array(['AaAaA', ' aA ', 'bBABba'], dtype='<U7')
  815. >>> np.char.lstrip(c, 'A') # leaves c unchanged
  816. array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
  817. >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, '')).all()
  818. ... # XXX: is this a regression? This used to return True
  819. ... # np.char.lstrip(c,'') does not modify c at all.
  820. False
  821. >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, None)).all()
  822. True
  823. """
  824. a_arr = numpy.asarray(a)
  825. return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,))
  826. def _partition_dispatcher(a, sep):
  827. return (a,)
  828. @array_function_dispatch(_partition_dispatcher)
  829. def partition(a, sep):
  830. """
  831. Partition each element in `a` around `sep`.
  832. Calls `str.partition` element-wise.
  833. For each element in `a`, split the element as the first
  834. occurrence of `sep`, and return 3 strings containing the part
  835. before the separator, the separator itself, and the part after
  836. the separator. If the separator is not found, return 3 strings
  837. containing the string itself, followed by two empty strings.
  838. Parameters
  839. ----------
  840. a : array_like, {str, unicode}
  841. Input array
  842. sep : {str, unicode}
  843. Separator to split each string element in `a`.
  844. Returns
  845. -------
  846. out : ndarray, {str, unicode}
  847. Output array of str or unicode, depending on input type.
  848. The output array will have an extra dimension with 3
  849. elements per input element.
  850. See also
  851. --------
  852. str.partition
  853. """
  854. return _to_string_or_unicode_array(
  855. _vec_string(a, object_, 'partition', (sep,)))
  856. def _replace_dispatcher(a, old, new, count=None):
  857. return (a,)
  858. @array_function_dispatch(_replace_dispatcher)
  859. def replace(a, old, new, count=None):
  860. """
  861. For each element in `a`, return a copy of the string with all
  862. occurrences of substring `old` replaced by `new`.
  863. Calls `str.replace` element-wise.
  864. Parameters
  865. ----------
  866. a : array-like of str or unicode
  867. old, new : str or unicode
  868. count : int, optional
  869. If the optional argument `count` is given, only the first
  870. `count` occurrences are replaced.
  871. Returns
  872. -------
  873. out : ndarray
  874. Output array of str or unicode, depending on input type
  875. See also
  876. --------
  877. str.replace
  878. """
  879. return _to_string_or_unicode_array(
  880. _vec_string(
  881. a, object_, 'replace', [old, new] + _clean_args(count)))
  882. @array_function_dispatch(_count_dispatcher)
  883. def rfind(a, sub, start=0, end=None):
  884. """
  885. For each element in `a`, return the highest index in the string
  886. where substring `sub` is found, such that `sub` is contained
  887. within [`start`, `end`].
  888. Calls `str.rfind` element-wise.
  889. Parameters
  890. ----------
  891. a : array-like of str or unicode
  892. sub : str or unicode
  893. start, end : int, optional
  894. Optional arguments `start` and `end` are interpreted as in
  895. slice notation.
  896. Returns
  897. -------
  898. out : ndarray
  899. Output array of ints. Return -1 on failure.
  900. See also
  901. --------
  902. str.rfind
  903. """
  904. return _vec_string(
  905. a, int_, 'rfind', [sub, start] + _clean_args(end))
  906. @array_function_dispatch(_count_dispatcher)
  907. def rindex(a, sub, start=0, end=None):
  908. """
  909. Like `rfind`, but raises `ValueError` when the substring `sub` is
  910. not found.
  911. Calls `str.rindex` element-wise.
  912. Parameters
  913. ----------
  914. a : array-like of str or unicode
  915. sub : str or unicode
  916. start, end : int, optional
  917. Returns
  918. -------
  919. out : ndarray
  920. Output array of ints.
  921. See also
  922. --------
  923. rfind, str.rindex
  924. """
  925. return _vec_string(
  926. a, int_, 'rindex', [sub, start] + _clean_args(end))
  927. @array_function_dispatch(_just_dispatcher)
  928. def rjust(a, width, fillchar=' '):
  929. """
  930. Return an array with the elements of `a` right-justified in a
  931. string of length `width`.
  932. Calls `str.rjust` element-wise.
  933. Parameters
  934. ----------
  935. a : array_like of str or unicode
  936. width : int
  937. The length of the resulting strings
  938. fillchar : str or unicode, optional
  939. The character to use for padding
  940. Returns
  941. -------
  942. out : ndarray
  943. Output array of str or unicode, depending on input type
  944. See also
  945. --------
  946. str.rjust
  947. """
  948. a_arr = numpy.asarray(a)
  949. width_arr = numpy.asarray(width)
  950. size = int(numpy.max(width_arr.flat))
  951. if numpy.issubdtype(a_arr.dtype, numpy.string_):
  952. fillchar = asbytes(fillchar)
  953. return _vec_string(
  954. a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar))
  955. @array_function_dispatch(_partition_dispatcher)
  956. def rpartition(a, sep):
  957. """
  958. Partition (split) each element around the right-most separator.
  959. Calls `str.rpartition` element-wise.
  960. For each element in `a`, split the element as the last
  961. occurrence of `sep`, and return 3 strings containing the part
  962. before the separator, the separator itself, and the part after
  963. the separator. If the separator is not found, return 3 strings
  964. containing the string itself, followed by two empty strings.
  965. Parameters
  966. ----------
  967. a : array_like of str or unicode
  968. Input array
  969. sep : str or unicode
  970. Right-most separator to split each element in array.
  971. Returns
  972. -------
  973. out : ndarray
  974. Output array of string or unicode, depending on input
  975. type. The output array will have an extra dimension with
  976. 3 elements per input element.
  977. See also
  978. --------
  979. str.rpartition
  980. """
  981. return _to_string_or_unicode_array(
  982. _vec_string(a, object_, 'rpartition', (sep,)))
  983. def _split_dispatcher(a, sep=None, maxsplit=None):
  984. return (a,)
  985. @array_function_dispatch(_split_dispatcher)
  986. def rsplit(a, sep=None, maxsplit=None):
  987. """
  988. For each element in `a`, return a list of the words in the
  989. string, using `sep` as the delimiter string.
  990. Calls `str.rsplit` element-wise.
  991. Except for splitting from the right, `rsplit`
  992. behaves like `split`.
  993. Parameters
  994. ----------
  995. a : array_like of str or unicode
  996. sep : str or unicode, optional
  997. If `sep` is not specified or None, any whitespace string
  998. is a separator.
  999. maxsplit : int, optional
  1000. If `maxsplit` is given, at most `maxsplit` splits are done,
  1001. the rightmost ones.
  1002. Returns
  1003. -------
  1004. out : ndarray
  1005. Array of list objects
  1006. See also
  1007. --------
  1008. str.rsplit, split
  1009. """
  1010. # This will return an array of lists of different sizes, so we
  1011. # leave it as an object array
  1012. return _vec_string(
  1013. a, object_, 'rsplit', [sep] + _clean_args(maxsplit))
  1014. def _strip_dispatcher(a, chars=None):
  1015. return (a,)
  1016. @array_function_dispatch(_strip_dispatcher)
  1017. def rstrip(a, chars=None):
  1018. """
  1019. For each element in `a`, return a copy with the trailing
  1020. characters removed.
  1021. Calls `str.rstrip` element-wise.
  1022. Parameters
  1023. ----------
  1024. a : array-like of str or unicode
  1025. chars : str or unicode, optional
  1026. The `chars` argument is a string specifying the set of
  1027. characters to be removed. If omitted or None, the `chars`
  1028. argument defaults to removing whitespace. The `chars` argument
  1029. is not a suffix; rather, all combinations of its values are
  1030. stripped.
  1031. Returns
  1032. -------
  1033. out : ndarray
  1034. Output array of str or unicode, depending on input type
  1035. See also
  1036. --------
  1037. str.rstrip
  1038. Examples
  1039. --------
  1040. >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7'); c
  1041. array(['aAaAaA', 'abBABba'],
  1042. dtype='|S7')
  1043. >>> np.char.rstrip(c, b'a')
  1044. array(['aAaAaA', 'abBABb'],
  1045. dtype='|S7')
  1046. >>> np.char.rstrip(c, b'A')
  1047. array(['aAaAa', 'abBABba'],
  1048. dtype='|S7')
  1049. """
  1050. a_arr = numpy.asarray(a)
  1051. return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,))
  1052. @array_function_dispatch(_split_dispatcher)
  1053. def split(a, sep=None, maxsplit=None):
  1054. """
  1055. For each element in `a`, return a list of the words in the
  1056. string, using `sep` as the delimiter string.
  1057. Calls `str.split` element-wise.
  1058. Parameters
  1059. ----------
  1060. a : array_like of str or unicode
  1061. sep : str or unicode, optional
  1062. If `sep` is not specified or None, any whitespace string is a
  1063. separator.
  1064. maxsplit : int, optional
  1065. If `maxsplit` is given, at most `maxsplit` splits are done.
  1066. Returns
  1067. -------
  1068. out : ndarray
  1069. Array of list objects
  1070. See also
  1071. --------
  1072. str.split, rsplit
  1073. """
  1074. # This will return an array of lists of different sizes, so we
  1075. # leave it as an object array
  1076. return _vec_string(
  1077. a, object_, 'split', [sep] + _clean_args(maxsplit))
  1078. def _splitlines_dispatcher(a, keepends=None):
  1079. return (a,)
  1080. @array_function_dispatch(_splitlines_dispatcher)
  1081. def splitlines(a, keepends=None):
  1082. """
  1083. For each element in `a`, return a list of the lines in the
  1084. element, breaking at line boundaries.
  1085. Calls `str.splitlines` element-wise.
  1086. Parameters
  1087. ----------
  1088. a : array_like of str or unicode
  1089. keepends : bool, optional
  1090. Line breaks are not included in the resulting list unless
  1091. keepends is given and true.
  1092. Returns
  1093. -------
  1094. out : ndarray
  1095. Array of list objects
  1096. See also
  1097. --------
  1098. str.splitlines
  1099. """
  1100. return _vec_string(
  1101. a, object_, 'splitlines', _clean_args(keepends))
  1102. def _startswith_dispatcher(a, prefix, start=None, end=None):
  1103. return (a,)
  1104. @array_function_dispatch(_startswith_dispatcher)
  1105. def startswith(a, prefix, start=0, end=None):
  1106. """
  1107. Returns a boolean array which is `True` where the string element
  1108. in `a` starts with `prefix`, otherwise `False`.
  1109. Calls `str.startswith` element-wise.
  1110. Parameters
  1111. ----------
  1112. a : array_like of str or unicode
  1113. prefix : str
  1114. start, end : int, optional
  1115. With optional `start`, test beginning at that position. With
  1116. optional `end`, stop comparing at that position.
  1117. Returns
  1118. -------
  1119. out : ndarray
  1120. Array of booleans
  1121. See also
  1122. --------
  1123. str.startswith
  1124. """
  1125. return _vec_string(
  1126. a, bool_, 'startswith', [prefix, start] + _clean_args(end))
  1127. @array_function_dispatch(_strip_dispatcher)
  1128. def strip(a, chars=None):
  1129. """
  1130. For each element in `a`, return a copy with the leading and
  1131. trailing characters removed.
  1132. Calls `str.strip` element-wise.
  1133. Parameters
  1134. ----------
  1135. a : array-like of str or unicode
  1136. chars : str or unicode, optional
  1137. The `chars` argument is a string specifying the set of
  1138. characters to be removed. If omitted or None, the `chars`
  1139. argument defaults to removing whitespace. The `chars` argument
  1140. is not a prefix or suffix; rather, all combinations of its
  1141. values are stripped.
  1142. Returns
  1143. -------
  1144. out : ndarray
  1145. Output array of str or unicode, depending on input type
  1146. See also
  1147. --------
  1148. str.strip
  1149. Examples
  1150. --------
  1151. >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
  1152. >>> c
  1153. array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
  1154. >>> np.char.strip(c)
  1155. array(['aAaAaA', 'aA', 'abBABba'], dtype='<U7')
  1156. >>> np.char.strip(c, 'a') # 'a' unstripped from c[1] because whitespace leads
  1157. array(['AaAaA', ' aA ', 'bBABb'], dtype='<U7')
  1158. >>> np.char.strip(c, 'A') # 'A' unstripped from c[1] because (unprinted) ws trails
  1159. array(['aAaAa', ' aA ', 'abBABba'], dtype='<U7')
  1160. """
  1161. a_arr = numpy.asarray(a)
  1162. return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars))
  1163. @array_function_dispatch(_unary_op_dispatcher)
  1164. def swapcase(a):
  1165. """
  1166. Return element-wise a copy of the string with
  1167. uppercase characters converted to lowercase and vice versa.
  1168. Calls `str.swapcase` element-wise.
  1169. For 8-bit strings, this method is locale-dependent.
  1170. Parameters
  1171. ----------
  1172. a : array_like, {str, unicode}
  1173. Input array.
  1174. Returns
  1175. -------
  1176. out : ndarray, {str, unicode}
  1177. Output array of str or unicode, depending on input type
  1178. See also
  1179. --------
  1180. str.swapcase
  1181. Examples
  1182. --------
  1183. >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5'); c
  1184. array(['a1B c', '1b Ca', 'b Ca1', 'cA1b'],
  1185. dtype='|S5')
  1186. >>> np.char.swapcase(c)
  1187. array(['A1b C', '1B cA', 'B cA1', 'Ca1B'],
  1188. dtype='|S5')
  1189. """
  1190. a_arr = numpy.asarray(a)
  1191. return _vec_string(a_arr, a_arr.dtype, 'swapcase')
  1192. @array_function_dispatch(_unary_op_dispatcher)
  1193. def title(a):
  1194. """
  1195. Return element-wise title cased version of string or unicode.
  1196. Title case words start with uppercase characters, all remaining cased
  1197. characters are lowercase.
  1198. Calls `str.title` element-wise.
  1199. For 8-bit strings, this method is locale-dependent.
  1200. Parameters
  1201. ----------
  1202. a : array_like, {str, unicode}
  1203. Input array.
  1204. Returns
  1205. -------
  1206. out : ndarray
  1207. Output array of str or unicode, depending on input type
  1208. See also
  1209. --------
  1210. str.title
  1211. Examples
  1212. --------
  1213. >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5'); c
  1214. array(['a1b c', '1b ca', 'b ca1', 'ca1b'],
  1215. dtype='|S5')
  1216. >>> np.char.title(c)
  1217. array(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'],
  1218. dtype='|S5')
  1219. """
  1220. a_arr = numpy.asarray(a)
  1221. return _vec_string(a_arr, a_arr.dtype, 'title')
  1222. def _translate_dispatcher(a, table, deletechars=None):
  1223. return (a,)
  1224. @array_function_dispatch(_translate_dispatcher)
  1225. def translate(a, table, deletechars=None):
  1226. """
  1227. For each element in `a`, return a copy of the string where all
  1228. characters occurring in the optional argument `deletechars` are
  1229. removed, and the remaining characters have been mapped through the
  1230. given translation table.
  1231. Calls `str.translate` element-wise.
  1232. Parameters
  1233. ----------
  1234. a : array-like of str or unicode
  1235. table : str of length 256
  1236. deletechars : str
  1237. Returns
  1238. -------
  1239. out : ndarray
  1240. Output array of str or unicode, depending on input type
  1241. See also
  1242. --------
  1243. str.translate
  1244. """
  1245. a_arr = numpy.asarray(a)
  1246. if issubclass(a_arr.dtype.type, unicode_):
  1247. return _vec_string(
  1248. a_arr, a_arr.dtype, 'translate', (table,))
  1249. else:
  1250. return _vec_string(
  1251. a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars))
  1252. @array_function_dispatch(_unary_op_dispatcher)
  1253. def upper(a):
  1254. """
  1255. Return an array with the elements converted to uppercase.
  1256. Calls `str.upper` element-wise.
  1257. For 8-bit strings, this method is locale-dependent.
  1258. Parameters
  1259. ----------
  1260. a : array_like, {str, unicode}
  1261. Input array.
  1262. Returns
  1263. -------
  1264. out : ndarray, {str, unicode}
  1265. Output array of str or unicode, depending on input type
  1266. See also
  1267. --------
  1268. str.upper
  1269. Examples
  1270. --------
  1271. >>> c = np.array(['a1b c', '1bca', 'bca1']); c
  1272. array(['a1b c', '1bca', 'bca1'], dtype='<U5')
  1273. >>> np.char.upper(c)
  1274. array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
  1275. """
  1276. a_arr = numpy.asarray(a)
  1277. return _vec_string(a_arr, a_arr.dtype, 'upper')
  1278. def _zfill_dispatcher(a, width):
  1279. return (a,)
  1280. @array_function_dispatch(_zfill_dispatcher)
  1281. def zfill(a, width):
  1282. """
  1283. Return the numeric string left-filled with zeros
  1284. Calls `str.zfill` element-wise.
  1285. Parameters
  1286. ----------
  1287. a : array_like, {str, unicode}
  1288. Input array.
  1289. width : int
  1290. Width of string to left-fill elements in `a`.
  1291. Returns
  1292. -------
  1293. out : ndarray, {str, unicode}
  1294. Output array of str or unicode, depending on input type
  1295. See also
  1296. --------
  1297. str.zfill
  1298. """
  1299. a_arr = numpy.asarray(a)
  1300. width_arr = numpy.asarray(width)
  1301. size = int(numpy.max(width_arr.flat))
  1302. return _vec_string(
  1303. a_arr, (a_arr.dtype.type, size), 'zfill', (width_arr,))
  1304. @array_function_dispatch(_unary_op_dispatcher)
  1305. def isnumeric(a):
  1306. """
  1307. For each element, return True if there are only numeric
  1308. characters in the element.
  1309. Calls `unicode.isnumeric` element-wise.
  1310. Numeric characters include digit characters, and all characters
  1311. that have the Unicode numeric value property, e.g. ``U+2155,
  1312. VULGAR FRACTION ONE FIFTH``.
  1313. Parameters
  1314. ----------
  1315. a : array_like, unicode
  1316. Input array.
  1317. Returns
  1318. -------
  1319. out : ndarray, bool
  1320. Array of booleans of same shape as `a`.
  1321. See also
  1322. --------
  1323. unicode.isnumeric
  1324. """
  1325. if _use_unicode(a) != unicode_:
  1326. raise TypeError("isnumeric is only available for Unicode strings and arrays")
  1327. return _vec_string(a, bool_, 'isnumeric')
  1328. @array_function_dispatch(_unary_op_dispatcher)
  1329. def isdecimal(a):
  1330. """
  1331. For each element, return True if there are only decimal
  1332. characters in the element.
  1333. Calls `unicode.isdecimal` element-wise.
  1334. Decimal characters include digit characters, and all characters
  1335. that can be used to form decimal-radix numbers,
  1336. e.g. ``U+0660, ARABIC-INDIC DIGIT ZERO``.
  1337. Parameters
  1338. ----------
  1339. a : array_like, unicode
  1340. Input array.
  1341. Returns
  1342. -------
  1343. out : ndarray, bool
  1344. Array of booleans identical in shape to `a`.
  1345. See also
  1346. --------
  1347. unicode.isdecimal
  1348. """
  1349. if _use_unicode(a) != unicode_:
  1350. raise TypeError("isnumeric is only available for Unicode strings and arrays")
  1351. return _vec_string(a, bool_, 'isdecimal')
  1352. @set_module('numpy')
  1353. class chararray(ndarray):
  1354. """
  1355. chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0,
  1356. strides=None, order=None)
  1357. Provides a convenient view on arrays of string and unicode values.
  1358. .. note::
  1359. The `chararray` class exists for backwards compatibility with
  1360. Numarray, it is not recommended for new development. Starting from numpy
  1361. 1.4, if one needs arrays of strings, it is recommended to use arrays of
  1362. `dtype` `object_`, `string_` or `unicode_`, and use the free functions
  1363. in the `numpy.char` module for fast vectorized string operations.
  1364. Versus a regular NumPy array of type `str` or `unicode`, this
  1365. class adds the following functionality:
  1366. 1) values automatically have whitespace removed from the end
  1367. when indexed
  1368. 2) comparison operators automatically remove whitespace from the
  1369. end when comparing values
  1370. 3) vectorized string operations are provided as methods
  1371. (e.g. `.endswith`) and infix operators (e.g. ``"+", "*", "%"``)
  1372. chararrays should be created using `numpy.char.array` or
  1373. `numpy.char.asarray`, rather than this constructor directly.
  1374. This constructor creates the array, using `buffer` (with `offset`
  1375. and `strides`) if it is not ``None``. If `buffer` is ``None``, then
  1376. constructs a new array with `strides` in "C order", unless both
  1377. ``len(shape) >= 2`` and ``order='F'``, in which case `strides`
  1378. is in "Fortran order".
  1379. Methods
  1380. -------
  1381. astype
  1382. argsort
  1383. copy
  1384. count
  1385. decode
  1386. dump
  1387. dumps
  1388. encode
  1389. endswith
  1390. expandtabs
  1391. fill
  1392. find
  1393. flatten
  1394. getfield
  1395. index
  1396. isalnum
  1397. isalpha
  1398. isdecimal
  1399. isdigit
  1400. islower
  1401. isnumeric
  1402. isspace
  1403. istitle
  1404. isupper
  1405. item
  1406. join
  1407. ljust
  1408. lower
  1409. lstrip
  1410. nonzero
  1411. put
  1412. ravel
  1413. repeat
  1414. replace
  1415. reshape
  1416. resize
  1417. rfind
  1418. rindex
  1419. rjust
  1420. rsplit
  1421. rstrip
  1422. searchsorted
  1423. setfield
  1424. setflags
  1425. sort
  1426. split
  1427. splitlines
  1428. squeeze
  1429. startswith
  1430. strip
  1431. swapaxes
  1432. swapcase
  1433. take
  1434. title
  1435. tofile
  1436. tolist
  1437. tostring
  1438. translate
  1439. transpose
  1440. upper
  1441. view
  1442. zfill
  1443. Parameters
  1444. ----------
  1445. shape : tuple
  1446. Shape of the array.
  1447. itemsize : int, optional
  1448. Length of each array element, in number of characters. Default is 1.
  1449. unicode : bool, optional
  1450. Are the array elements of type unicode (True) or string (False).
  1451. Default is False.
  1452. buffer : object exposing the buffer interface or str, optional
  1453. Memory address of the start of the array data. Default is None,
  1454. in which case a new array is created.
  1455. offset : int, optional
  1456. Fixed stride displacement from the beginning of an axis?
  1457. Default is 0. Needs to be >=0.
  1458. strides : array_like of ints, optional
  1459. Strides for the array (see `ndarray.strides` for full description).
  1460. Default is None.
  1461. order : {'C', 'F'}, optional
  1462. The order in which the array data is stored in memory: 'C' ->
  1463. "row major" order (the default), 'F' -> "column major"
  1464. (Fortran) order.
  1465. Examples
  1466. --------
  1467. >>> charar = np.chararray((3, 3))
  1468. >>> charar[:] = 'a'
  1469. >>> charar
  1470. chararray([[b'a', b'a', b'a'],
  1471. [b'a', b'a', b'a'],
  1472. [b'a', b'a', b'a']], dtype='|S1')
  1473. >>> charar = np.chararray(charar.shape, itemsize=5)
  1474. >>> charar[:] = 'abc'
  1475. >>> charar
  1476. chararray([[b'abc', b'abc', b'abc'],
  1477. [b'abc', b'abc', b'abc'],
  1478. [b'abc', b'abc', b'abc']], dtype='|S5')
  1479. """
  1480. def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None,
  1481. offset=0, strides=None, order='C'):
  1482. global _globalvar
  1483. if unicode:
  1484. dtype = unicode_
  1485. else:
  1486. dtype = string_
  1487. # force itemsize to be a Python int, since using NumPy integer
  1488. # types results in itemsize.itemsize being used as the size of
  1489. # strings in the new array.
  1490. itemsize = int(itemsize)
  1491. if isinstance(buffer, str):
  1492. # unicode objects do not have the buffer interface
  1493. filler = buffer
  1494. buffer = None
  1495. else:
  1496. filler = None
  1497. _globalvar = 1
  1498. if buffer is None:
  1499. self = ndarray.__new__(subtype, shape, (dtype, itemsize),
  1500. order=order)
  1501. else:
  1502. self = ndarray.__new__(subtype, shape, (dtype, itemsize),
  1503. buffer=buffer,
  1504. offset=offset, strides=strides,
  1505. order=order)
  1506. if filler is not None:
  1507. self[...] = filler
  1508. _globalvar = 0
  1509. return self
  1510. def __array_finalize__(self, obj):
  1511. # The b is a special case because it is used for reconstructing.
  1512. if not _globalvar and self.dtype.char not in 'SUbc':
  1513. raise ValueError("Can only create a chararray from string data.")
  1514. def __getitem__(self, obj):
  1515. val = ndarray.__getitem__(self, obj)
  1516. if isinstance(val, character):
  1517. temp = val.rstrip()
  1518. if len(temp) == 0:
  1519. val = ''
  1520. else:
  1521. val = temp
  1522. return val
  1523. # IMPLEMENTATION NOTE: Most of the methods of this class are
  1524. # direct delegations to the free functions in this module.
  1525. # However, those that return an array of strings should instead
  1526. # return a chararray, so some extra wrapping is required.
  1527. def __eq__(self, other):
  1528. """
  1529. Return (self == other) element-wise.
  1530. See also
  1531. --------
  1532. equal
  1533. """
  1534. return equal(self, other)
  1535. def __ne__(self, other):
  1536. """
  1537. Return (self != other) element-wise.
  1538. See also
  1539. --------
  1540. not_equal
  1541. """
  1542. return not_equal(self, other)
  1543. def __ge__(self, other):
  1544. """
  1545. Return (self >= other) element-wise.
  1546. See also
  1547. --------
  1548. greater_equal
  1549. """
  1550. return greater_equal(self, other)
  1551. def __le__(self, other):
  1552. """
  1553. Return (self <= other) element-wise.
  1554. See also
  1555. --------
  1556. less_equal
  1557. """
  1558. return less_equal(self, other)
  1559. def __gt__(self, other):
  1560. """
  1561. Return (self > other) element-wise.
  1562. See also
  1563. --------
  1564. greater
  1565. """
  1566. return greater(self, other)
  1567. def __lt__(self, other):
  1568. """
  1569. Return (self < other) element-wise.
  1570. See also
  1571. --------
  1572. less
  1573. """
  1574. return less(self, other)
  1575. def __add__(self, other):
  1576. """
  1577. Return (self + other), that is string concatenation,
  1578. element-wise for a pair of array_likes of str or unicode.
  1579. See also
  1580. --------
  1581. add
  1582. """
  1583. return asarray(add(self, other))
  1584. def __radd__(self, other):
  1585. """
  1586. Return (other + self), that is string concatenation,
  1587. element-wise for a pair of array_likes of `string_` or `unicode_`.
  1588. See also
  1589. --------
  1590. add
  1591. """
  1592. return asarray(add(numpy.asarray(other), self))
  1593. def __mul__(self, i):
  1594. """
  1595. Return (self * i), that is string multiple concatenation,
  1596. element-wise.
  1597. See also
  1598. --------
  1599. multiply
  1600. """
  1601. return asarray(multiply(self, i))
  1602. def __rmul__(self, i):
  1603. """
  1604. Return (self * i), that is string multiple concatenation,
  1605. element-wise.
  1606. See also
  1607. --------
  1608. multiply
  1609. """
  1610. return asarray(multiply(self, i))
  1611. def __mod__(self, i):
  1612. """
  1613. Return (self % i), that is pre-Python 2.6 string formatting
  1614. (interpolation), element-wise for a pair of array_likes of `string_`
  1615. or `unicode_`.
  1616. See also
  1617. --------
  1618. mod
  1619. """
  1620. return asarray(mod(self, i))
  1621. def __rmod__(self, other):
  1622. return NotImplemented
  1623. def argsort(self, axis=-1, kind=None, order=None):
  1624. """
  1625. Return the indices that sort the array lexicographically.
  1626. For full documentation see `numpy.argsort`, for which this method is
  1627. in fact merely a "thin wrapper."
  1628. Examples
  1629. --------
  1630. >>> c = np.array(['a1b c', '1b ca', 'b ca1', 'Ca1b'], 'S5')
  1631. >>> c = c.view(np.chararray); c
  1632. chararray(['a1b c', '1b ca', 'b ca1', 'Ca1b'],
  1633. dtype='|S5')
  1634. >>> c[c.argsort()]
  1635. chararray(['1b ca', 'Ca1b', 'a1b c', 'b ca1'],
  1636. dtype='|S5')
  1637. """
  1638. return self.__array__().argsort(axis, kind, order)
  1639. argsort.__doc__ = ndarray.argsort.__doc__
  1640. def capitalize(self):
  1641. """
  1642. Return a copy of `self` with only the first character of each element
  1643. capitalized.
  1644. See also
  1645. --------
  1646. char.capitalize
  1647. """
  1648. return asarray(capitalize(self))
  1649. def center(self, width, fillchar=' '):
  1650. """
  1651. Return a copy of `self` with its elements centered in a
  1652. string of length `width`.
  1653. See also
  1654. --------
  1655. center
  1656. """
  1657. return asarray(center(self, width, fillchar))
  1658. def count(self, sub, start=0, end=None):
  1659. """
  1660. Returns an array with the number of non-overlapping occurrences of
  1661. substring `sub` in the range [`start`, `end`].
  1662. See also
  1663. --------
  1664. char.count
  1665. """
  1666. return count(self, sub, start, end)
  1667. def decode(self, encoding=None, errors=None):
  1668. """
  1669. Calls `str.decode` element-wise.
  1670. See also
  1671. --------
  1672. char.decode
  1673. """
  1674. return decode(self, encoding, errors)
  1675. def encode(self, encoding=None, errors=None):
  1676. """
  1677. Calls `str.encode` element-wise.
  1678. See also
  1679. --------
  1680. char.encode
  1681. """
  1682. return encode(self, encoding, errors)
  1683. def endswith(self, suffix, start=0, end=None):
  1684. """
  1685. Returns a boolean array which is `True` where the string element
  1686. in `self` ends with `suffix`, otherwise `False`.
  1687. See also
  1688. --------
  1689. char.endswith
  1690. """
  1691. return endswith(self, suffix, start, end)
  1692. def expandtabs(self, tabsize=8):
  1693. """
  1694. Return a copy of each string element where all tab characters are
  1695. replaced by one or more spaces.
  1696. See also
  1697. --------
  1698. char.expandtabs
  1699. """
  1700. return asarray(expandtabs(self, tabsize))
  1701. def find(self, sub, start=0, end=None):
  1702. """
  1703. For each element, return the lowest index in the string where
  1704. substring `sub` is found.
  1705. See also
  1706. --------
  1707. char.find
  1708. """
  1709. return find(self, sub, start, end)
  1710. def index(self, sub, start=0, end=None):
  1711. """
  1712. Like `find`, but raises `ValueError` when the substring is not found.
  1713. See also
  1714. --------
  1715. char.index
  1716. """
  1717. return index(self, sub, start, end)
  1718. def isalnum(self):
  1719. """
  1720. Returns true for each element if all characters in the string
  1721. are alphanumeric and there is at least one character, false
  1722. otherwise.
  1723. See also
  1724. --------
  1725. char.isalnum
  1726. """
  1727. return isalnum(self)
  1728. def isalpha(self):
  1729. """
  1730. Returns true for each element if all characters in the string
  1731. are alphabetic and there is at least one character, false
  1732. otherwise.
  1733. See also
  1734. --------
  1735. char.isalpha
  1736. """
  1737. return isalpha(self)
  1738. def isdigit(self):
  1739. """
  1740. Returns true for each element if all characters in the string are
  1741. digits and there is at least one character, false otherwise.
  1742. See also
  1743. --------
  1744. char.isdigit
  1745. """
  1746. return isdigit(self)
  1747. def islower(self):
  1748. """
  1749. Returns true for each element if all cased characters in the
  1750. string are lowercase and there is at least one cased character,
  1751. false otherwise.
  1752. See also
  1753. --------
  1754. char.islower
  1755. """
  1756. return islower(self)
  1757. def isspace(self):
  1758. """
  1759. Returns true for each element if there are only whitespace
  1760. characters in the string and there is at least one character,
  1761. false otherwise.
  1762. See also
  1763. --------
  1764. char.isspace
  1765. """
  1766. return isspace(self)
  1767. def istitle(self):
  1768. """
  1769. Returns true for each element if the element is a titlecased
  1770. string and there is at least one character, false otherwise.
  1771. See also
  1772. --------
  1773. char.istitle
  1774. """
  1775. return istitle(self)
  1776. def isupper(self):
  1777. """
  1778. Returns true for each element if all cased characters in the
  1779. string are uppercase and there is at least one character, false
  1780. otherwise.
  1781. See also
  1782. --------
  1783. char.isupper
  1784. """
  1785. return isupper(self)
  1786. def join(self, seq):
  1787. """
  1788. Return a string which is the concatenation of the strings in the
  1789. sequence `seq`.
  1790. See also
  1791. --------
  1792. char.join
  1793. """
  1794. return join(self, seq)
  1795. def ljust(self, width, fillchar=' '):
  1796. """
  1797. Return an array with the elements of `self` left-justified in a
  1798. string of length `width`.
  1799. See also
  1800. --------
  1801. char.ljust
  1802. """
  1803. return asarray(ljust(self, width, fillchar))
  1804. def lower(self):
  1805. """
  1806. Return an array with the elements of `self` converted to
  1807. lowercase.
  1808. See also
  1809. --------
  1810. char.lower
  1811. """
  1812. return asarray(lower(self))
  1813. def lstrip(self, chars=None):
  1814. """
  1815. For each element in `self`, return a copy with the leading characters
  1816. removed.
  1817. See also
  1818. --------
  1819. char.lstrip
  1820. """
  1821. return asarray(lstrip(self, chars))
  1822. def partition(self, sep):
  1823. """
  1824. Partition each element in `self` around `sep`.
  1825. See also
  1826. --------
  1827. partition
  1828. """
  1829. return asarray(partition(self, sep))
  1830. def replace(self, old, new, count=None):
  1831. """
  1832. For each element in `self`, return a copy of the string with all
  1833. occurrences of substring `old` replaced by `new`.
  1834. See also
  1835. --------
  1836. char.replace
  1837. """
  1838. return asarray(replace(self, old, new, count))
  1839. def rfind(self, sub, start=0, end=None):
  1840. """
  1841. For each element in `self`, return the highest index in the string
  1842. where substring `sub` is found, such that `sub` is contained
  1843. within [`start`, `end`].
  1844. See also
  1845. --------
  1846. char.rfind
  1847. """
  1848. return rfind(self, sub, start, end)
  1849. def rindex(self, sub, start=0, end=None):
  1850. """
  1851. Like `rfind`, but raises `ValueError` when the substring `sub` is
  1852. not found.
  1853. See also
  1854. --------
  1855. char.rindex
  1856. """
  1857. return rindex(self, sub, start, end)
  1858. def rjust(self, width, fillchar=' '):
  1859. """
  1860. Return an array with the elements of `self`
  1861. right-justified in a string of length `width`.
  1862. See also
  1863. --------
  1864. char.rjust
  1865. """
  1866. return asarray(rjust(self, width, fillchar))
  1867. def rpartition(self, sep):
  1868. """
  1869. Partition each element in `self` around `sep`.
  1870. See also
  1871. --------
  1872. rpartition
  1873. """
  1874. return asarray(rpartition(self, sep))
  1875. def rsplit(self, sep=None, maxsplit=None):
  1876. """
  1877. For each element in `self`, return a list of the words in
  1878. the string, using `sep` as the delimiter string.
  1879. See also
  1880. --------
  1881. char.rsplit
  1882. """
  1883. return rsplit(self, sep, maxsplit)
  1884. def rstrip(self, chars=None):
  1885. """
  1886. For each element in `self`, return a copy with the trailing
  1887. characters removed.
  1888. See also
  1889. --------
  1890. char.rstrip
  1891. """
  1892. return asarray(rstrip(self, chars))
  1893. def split(self, sep=None, maxsplit=None):
  1894. """
  1895. For each element in `self`, return a list of the words in the
  1896. string, using `sep` as the delimiter string.
  1897. See also
  1898. --------
  1899. char.split
  1900. """
  1901. return split(self, sep, maxsplit)
  1902. def splitlines(self, keepends=None):
  1903. """
  1904. For each element in `self`, return a list of the lines in the
  1905. element, breaking at line boundaries.
  1906. See also
  1907. --------
  1908. char.splitlines
  1909. """
  1910. return splitlines(self, keepends)
  1911. def startswith(self, prefix, start=0, end=None):
  1912. """
  1913. Returns a boolean array which is `True` where the string element
  1914. in `self` starts with `prefix`, otherwise `False`.
  1915. See also
  1916. --------
  1917. char.startswith
  1918. """
  1919. return startswith(self, prefix, start, end)
  1920. def strip(self, chars=None):
  1921. """
  1922. For each element in `self`, return a copy with the leading and
  1923. trailing characters removed.
  1924. See also
  1925. --------
  1926. char.strip
  1927. """
  1928. return asarray(strip(self, chars))
  1929. def swapcase(self):
  1930. """
  1931. For each element in `self`, return a copy of the string with
  1932. uppercase characters converted to lowercase and vice versa.
  1933. See also
  1934. --------
  1935. char.swapcase
  1936. """
  1937. return asarray(swapcase(self))
  1938. def title(self):
  1939. """
  1940. For each element in `self`, return a titlecased version of the
  1941. string: words start with uppercase characters, all remaining cased
  1942. characters are lowercase.
  1943. See also
  1944. --------
  1945. char.title
  1946. """
  1947. return asarray(title(self))
  1948. def translate(self, table, deletechars=None):
  1949. """
  1950. For each element in `self`, return a copy of the string where
  1951. all characters occurring in the optional argument
  1952. `deletechars` are removed, and the remaining characters have
  1953. been mapped through the given translation table.
  1954. See also
  1955. --------
  1956. char.translate
  1957. """
  1958. return asarray(translate(self, table, deletechars))
  1959. def upper(self):
  1960. """
  1961. Return an array with the elements of `self` converted to
  1962. uppercase.
  1963. See also
  1964. --------
  1965. char.upper
  1966. """
  1967. return asarray(upper(self))
  1968. def zfill(self, width):
  1969. """
  1970. Return the numeric string left-filled with zeros in a string of
  1971. length `width`.
  1972. See also
  1973. --------
  1974. char.zfill
  1975. """
  1976. return asarray(zfill(self, width))
  1977. def isnumeric(self):
  1978. """
  1979. For each element in `self`, return True if there are only
  1980. numeric characters in the element.
  1981. See also
  1982. --------
  1983. char.isnumeric
  1984. """
  1985. return isnumeric(self)
  1986. def isdecimal(self):
  1987. """
  1988. For each element in `self`, return True if there are only
  1989. decimal characters in the element.
  1990. See also
  1991. --------
  1992. char.isdecimal
  1993. """
  1994. return isdecimal(self)
  1995. def array(obj, itemsize=None, copy=True, unicode=None, order=None):
  1996. """
  1997. Create a `chararray`.
  1998. .. note::
  1999. This class is provided for numarray backward-compatibility.
  2000. New code (not concerned with numarray compatibility) should use
  2001. arrays of type `string_` or `unicode_` and use the free functions
  2002. in :mod:`numpy.char <numpy.core.defchararray>` for fast
  2003. vectorized string operations instead.
  2004. Versus a regular NumPy array of type `str` or `unicode`, this
  2005. class adds the following functionality:
  2006. 1) values automatically have whitespace removed from the end
  2007. when indexed
  2008. 2) comparison operators automatically remove whitespace from the
  2009. end when comparing values
  2010. 3) vectorized string operations are provided as methods
  2011. (e.g. `str.endswith`) and infix operators (e.g. ``+, *, %``)
  2012. Parameters
  2013. ----------
  2014. obj : array of str or unicode-like
  2015. itemsize : int, optional
  2016. `itemsize` is the number of characters per scalar in the
  2017. resulting array. If `itemsize` is None, and `obj` is an
  2018. object array or a Python list, the `itemsize` will be
  2019. automatically determined. If `itemsize` is provided and `obj`
  2020. is of type str or unicode, then the `obj` string will be
  2021. chunked into `itemsize` pieces.
  2022. copy : bool, optional
  2023. If true (default), then the object is copied. Otherwise, a copy
  2024. will only be made if __array__ returns a copy, if obj is a
  2025. nested sequence, or if a copy is needed to satisfy any of the other
  2026. requirements (`itemsize`, unicode, `order`, etc.).
  2027. unicode : bool, optional
  2028. When true, the resulting `chararray` can contain Unicode
  2029. characters, when false only 8-bit characters. If unicode is
  2030. None and `obj` is one of the following:
  2031. - a `chararray`,
  2032. - an ndarray of type `str` or `unicode`
  2033. - a Python str or unicode object,
  2034. then the unicode setting of the output array will be
  2035. automatically determined.
  2036. order : {'C', 'F', 'A'}, optional
  2037. Specify the order of the array. If order is 'C' (default), then the
  2038. array will be in C-contiguous order (last-index varies the
  2039. fastest). If order is 'F', then the returned array
  2040. will be in Fortran-contiguous order (first-index varies the
  2041. fastest). If order is 'A', then the returned array may
  2042. be in any order (either C-, Fortran-contiguous, or even
  2043. discontiguous).
  2044. """
  2045. if isinstance(obj, (bytes, str)):
  2046. if unicode is None:
  2047. if isinstance(obj, str):
  2048. unicode = True
  2049. else:
  2050. unicode = False
  2051. if itemsize is None:
  2052. itemsize = len(obj)
  2053. shape = len(obj) // itemsize
  2054. return chararray(shape, itemsize=itemsize, unicode=unicode,
  2055. buffer=obj, order=order)
  2056. if isinstance(obj, (list, tuple)):
  2057. obj = numpy.asarray(obj)
  2058. if isinstance(obj, ndarray) and issubclass(obj.dtype.type, character):
  2059. # If we just have a vanilla chararray, create a chararray
  2060. # view around it.
  2061. if not isinstance(obj, chararray):
  2062. obj = obj.view(chararray)
  2063. if itemsize is None:
  2064. itemsize = obj.itemsize
  2065. # itemsize is in 8-bit chars, so for Unicode, we need
  2066. # to divide by the size of a single Unicode character,
  2067. # which for NumPy is always 4
  2068. if issubclass(obj.dtype.type, unicode_):
  2069. itemsize //= 4
  2070. if unicode is None:
  2071. if issubclass(obj.dtype.type, unicode_):
  2072. unicode = True
  2073. else:
  2074. unicode = False
  2075. if unicode:
  2076. dtype = unicode_
  2077. else:
  2078. dtype = string_
  2079. if order is not None:
  2080. obj = numpy.asarray(obj, order=order)
  2081. if (copy or
  2082. (itemsize != obj.itemsize) or
  2083. (not unicode and isinstance(obj, unicode_)) or
  2084. (unicode and isinstance(obj, string_))):
  2085. obj = obj.astype((dtype, int(itemsize)))
  2086. return obj
  2087. if isinstance(obj, ndarray) and issubclass(obj.dtype.type, object):
  2088. if itemsize is None:
  2089. # Since no itemsize was specified, convert the input array to
  2090. # a list so the ndarray constructor will automatically
  2091. # determine the itemsize for us.
  2092. obj = obj.tolist()
  2093. # Fall through to the default case
  2094. if unicode:
  2095. dtype = unicode_
  2096. else:
  2097. dtype = string_
  2098. if itemsize is None:
  2099. val = narray(obj, dtype=dtype, order=order, subok=True)
  2100. else:
  2101. val = narray(obj, dtype=(dtype, itemsize), order=order, subok=True)
  2102. return val.view(chararray)
  2103. def asarray(obj, itemsize=None, unicode=None, order=None):
  2104. """
  2105. Convert the input to a `chararray`, copying the data only if
  2106. necessary.
  2107. Versus a regular NumPy array of type `str` or `unicode`, this
  2108. class adds the following functionality:
  2109. 1) values automatically have whitespace removed from the end
  2110. when indexed
  2111. 2) comparison operators automatically remove whitespace from the
  2112. end when comparing values
  2113. 3) vectorized string operations are provided as methods
  2114. (e.g. `str.endswith`) and infix operators (e.g. ``+``, ``*``,``%``)
  2115. Parameters
  2116. ----------
  2117. obj : array of str or unicode-like
  2118. itemsize : int, optional
  2119. `itemsize` is the number of characters per scalar in the
  2120. resulting array. If `itemsize` is None, and `obj` is an
  2121. object array or a Python list, the `itemsize` will be
  2122. automatically determined. If `itemsize` is provided and `obj`
  2123. is of type str or unicode, then the `obj` string will be
  2124. chunked into `itemsize` pieces.
  2125. unicode : bool, optional
  2126. When true, the resulting `chararray` can contain Unicode
  2127. characters, when false only 8-bit characters. If unicode is
  2128. None and `obj` is one of the following:
  2129. - a `chararray`,
  2130. - an ndarray of type `str` or 'unicode`
  2131. - a Python str or unicode object,
  2132. then the unicode setting of the output array will be
  2133. automatically determined.
  2134. order : {'C', 'F'}, optional
  2135. Specify the order of the array. If order is 'C' (default), then the
  2136. array will be in C-contiguous order (last-index varies the
  2137. fastest). If order is 'F', then the returned array
  2138. will be in Fortran-contiguous order (first-index varies the
  2139. fastest).
  2140. """
  2141. return array(obj, itemsize, copy=False,
  2142. unicode=unicode, order=order)