summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEric Wieser <wieser.eric@gmail.com>2017-05-10 11:05:19 +0100
committerGitHub <noreply@github.com>2017-05-10 11:05:19 +0100
commit2c1470316604a3599538bd4d33509555b8652a6d (patch)
treeddd79c7e2c1260fd96bd24fae9174b4cd3372f7c
parent790dbcb6dde4259ec073444a3294975f304b6f49 (diff)
parent9177d0b5776550e2fbb3b1c9a922832a6553f3e2 (diff)
downloadpython-numpy-2c1470316604a3599538bd4d33509555b8652a6d.tar.gz
python-numpy-2c1470316604a3599538bd4d33509555b8652a6d.tar.bz2
python-numpy-2c1470316604a3599538bd4d33509555b8652a6d.zip
Merge pull request #9070 from ahaldane/silence_join_by
BUG: Preserve field order in join_by, avoids FutureWarning
-rw-r--r--numpy/lib/recfunctions.py40
-rw-r--r--numpy/lib/tests/test_recfunctions.py15
2 files changed, 49 insertions, 6 deletions
diff --git a/numpy/lib/recfunctions.py b/numpy/lib/recfunctions.py
index d3d58d1f2..b9542e848 100644
--- a/numpy/lib/recfunctions.py
+++ b/numpy/lib/recfunctions.py
@@ -495,7 +495,7 @@ def drop_fields(base, drop_names, usemask=True, asrecarray=False):
dtype=[('a', '<i4')])
"""
if _is_string_like(drop_names):
- drop_names = [drop_names, ]
+ drop_names = [drop_names]
else:
drop_names = set(drop_names)
@@ -523,6 +523,31 @@ def drop_fields(base, drop_names, usemask=True, asrecarray=False):
return _fix_output(output, usemask=usemask, asrecarray=asrecarray)
+def _keep_fields(base, keep_names, usemask=True, asrecarray=False):
+ """
+ Return a new array keeping only the fields in `keep_names`,
+ and preserving the order of those fields.
+
+ Parameters
+ ----------
+ base : array
+ Input array
+ keep_names : string or sequence
+ String or sequence of strings corresponding to the names of the
+ fields to keep. Order of the names will be preserved.
+ usemask : {False, True}, optional
+ Whether to return a masked array or not.
+ asrecarray : string or sequence, optional
+ Whether to return a recarray or a mrecarray (`asrecarray=True`) or
+ a plain ndarray or masked array with flexible dtype. The default
+ is False.
+ """
+ newdtype = [(n, base.dtype[n]) for n in keep_names]
+ output = np.empty(base.shape, dtype=newdtype)
+ output = recursive_fill_fields(base, output)
+ return _fix_output(output, usemask=usemask, asrecarray=asrecarray)
+
+
def rec_drop_fields(base, drop_names):
"""
Returns a new numpy.recarray with fields in `drop_names` dropped.
@@ -877,11 +902,14 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
key = (key,)
# Check the keys
+ if len(set(key)) != len(key):
+ dup = next(x for n,x in enumerate(key) if x in key[n+1:])
+ raise ValueError("duplicate join key %r" % dup)
for name in key:
if name not in r1.dtype.names:
- raise ValueError('r1 does not have key field %s' % name)
+ raise ValueError('r1 does not have key field %r' % name)
if name not in r2.dtype.names:
- raise ValueError('r2 does not have key field %s' % name)
+ raise ValueError('r2 does not have key field %r' % name)
# Make sure we work with ravelled arrays
r1 = r1.ravel()
@@ -899,8 +927,10 @@ def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2',
raise ValueError(msg)
# Make temporary arrays of just the keys
- r1k = drop_fields(r1, [n for n in r1names if n not in key])
- r2k = drop_fields(r2, [n for n in r2names if n not in key])
+ # (use order of keys in `r1` for back-compatibility)
+ key1 = [ n for n in r1names if n in key ]
+ r1k = _keep_fields(r1, key1)
+ r2k = _keep_fields(r2, key1)
# Concatenate the two arrays for comparison
aux = ma.concatenate((r1k, r2k))
diff --git a/numpy/lib/tests/test_recfunctions.py b/numpy/lib/tests/test_recfunctions.py
index 699a04716..0940d37b0 100644
--- a/numpy/lib/tests/test_recfunctions.py
+++ b/numpy/lib/tests/test_recfunctions.py
@@ -4,7 +4,7 @@ import numpy as np
import numpy.ma as ma
from numpy.ma.mrecords import MaskedRecords
from numpy.ma.testutils import assert_equal
-from numpy.testing import TestCase, run_module_suite, assert_
+from numpy.testing import TestCase, run_module_suite, assert_, assert_raises
from numpy.lib.recfunctions import (
drop_fields, rename_fields, get_fieldstructure, recursive_fill_fields,
find_duplicates, merge_arrays, append_fields, stack_arrays, join_by
@@ -633,6 +633,19 @@ class TestJoinBy(TestCase):
dtype=[('a', int), ('b', int), ('c', int), ('d', int)])
assert_equal(test, control)
+ def test_different_field_order(self):
+ # gh-8940
+ a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'f4'), ('c', 'u1')])
+ b = np.ones(3, dtype=[('c', 'u1'), ('b', 'f4'), ('a', 'i4')])
+ # this should not give a FutureWarning:
+ j = join_by(['c', 'b'], a, b, jointype='inner', usemask=False)
+ assert_equal(j.dtype.names, ['b', 'c', 'a1', 'a2'])
+
+ def test_duplicate_keys(self):
+ a = np.zeros(3, dtype=[('a', 'i4'), ('b', 'f4'), ('c', 'u1')])
+ b = np.ones(3, dtype=[('c', 'u1'), ('b', 'f4'), ('a', 'i4')])
+ assert_raises(ValueError, join_by, ['a', 'b', 'b'], a, b)
+
class TestJoinBy2(TestCase):
@classmethod