from __future__ import division, absolute_import, print_function import sys import numpy as np from numpy.compat import unicode from numpy.testing import ( run_module_suite, assert_, assert_equal, assert_array_equal) # Guess the UCS length for this python interpreter if sys.version_info[:2] >= (3, 3): # Python 3.3 uses a flexible string representation ucs4 = False def buffer_length(arr): if isinstance(arr, unicode): arr = str(arr) if not arr: charmax = 0 else: charmax = max([ord(c) for c in arr]) if charmax < 256: size = 1 elif charmax < 65536: size = 2 else: size = 4 return size * len(arr) v = memoryview(arr) if v.shape is None: return len(v) * v.itemsize else: return np.prod(v.shape) * v.itemsize else: if len(buffer(u'u')) == 4: ucs4 = True else: ucs4 = False def buffer_length(arr): if isinstance(arr, np.ndarray): return len(arr.data) return len(buffer(arr)) # In both cases below we need to make sure that the byte swapped value (as # UCS4) is still a valid unicode: # Value that can be represented in UCS2 interpreters ucs2_value = u'\u0900' # Value that cannot be represented in UCS2 interpreters (but can in UCS4) ucs4_value = u'\U00100900' def test_string_cast(): str_arr = np.array(["1234", "1234\0\0"], dtype='S') uni_arr1 = str_arr.astype('>U') uni_arr2 = str_arr.astype('