Merge pull request #8421 from seberg/isnat

ENH: Add isnat function and make comparison tests NAT specific
author: Charles Harris <charlesr.harris@gmail.com> 2017-05-07 09:40:15 -0600
committer: GitHub <noreply@github.com> 2017-05-07 09:40:15 -0600
commit: 39aaa2d2960e2e4bfb6d13b0f1a169deebf611e2 (patch)
tree: 046cc746d703551e105a6d0ea80f3740270df55b
parent: 25a6dfbbebb1d92e0e402aed7c8f11e4f5d3bc5b (diff)
parent: 868aa751ceb229465d626a7fd7083544afc96ed2 (diff)
download: python-numpy-39aaa2d2960e2e4bfb6d13b0f1a169deebf611e2.tar.gz
python-numpy-39aaa2d2960e2e4bfb6d13b0f1a169deebf611e2.tar.bz2
python-numpy-39aaa2d2960e2e4bfb6d13b0f1a169deebf611e2.zip
12 files changed, 212 insertions, 46 deletions
diff --git a/doc/release/1.13.0-notes.rst b/doc/release/1.13.0-notes.rst
index 3deca6a8c..613ce0602 100644
--- a/doc/release/1.13.0-notes.rst
+++ b/doc/release/1.13.0-notes.rst
@@ -167,6 +167,11 @@ In an N-dimensional array, the user can now choose the axis along which to look
 for duplicate N-1-dimensional elements using ``numpy.unique``. The original
 behaviour is recovered if ``axis=None`` (default).
 
+``np.isnat`` function to test for NaT special datetime and timedelta values
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+``np.isnat`` can now be used to find the positions of special NaT values
+within datetime and timedelta arrays. This is analogous to ``np.isnan``.
+
 ``isin`` function, improving on ``in1d``
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 The new function ``isin`` tests whether each element of an N-dimensonal
@@ -467,6 +472,17 @@ is not the expected behavior both according to documentation and intuitively.
 Now, -inf < x < inf is considered ``True`` for any real number x and all
 other cases fail.
 
+``assert_array_`` and masked arrays ``assert_equal`` hide less warnings
+-----------------------------------------------------------------------
+Some warnings that were previously hidden by the ``assert_array_``
+functions are not hidden anymore. In most cases the warnings should be
+correct and, should they occur, will require changes to the tests using
+these functions.
+For the masked array ``assert_equal`` version, warnings may occur when
+comparing NaT. The function presently does not handle NaT or NaN
+specifically and it may be best to avoid it at this time should a warning
+show up due to this change.
+
 ``offset`` attribute value in ``memmap`` objects
 ------------------------------------------------
 The ``offset`` attribute in a ``memmap`` object is now set to the
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
index dfba04c18..45476f931 100644
--- a/numpy/core/code_generators/generate_umath.py
+++ b/numpy/core/code_generators/generate_umath.py
@@ -799,6 +799,12 @@ defdict = {
           None,
           TD(inexact, out='?'),
           ),
+'isnat':
+    Ufunc(1, 1, None,
+          docstrings.get('numpy.core.umath.isnat'),
+          'PyUFunc_IsNaTTypeResolver',
+          TD(times, out='?'),
+          ),
 'isinf':
     Ufunc(1, 1, None,
           docstrings.get('numpy.core.umath.isinf'),
diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py
index ed9e05b15..c783d4595 100644
--- a/numpy/core/code_generators/ufunc_docstrings.py
+++ b/numpy/core/code_generators/ufunc_docstrings.py
@@ -1690,7 +1690,7 @@ add_newdoc('numpy.core.umath', 'isnan',
 
     See Also
     --------
-    isinf, isneginf, isposinf, isfinite
+    isinf, isneginf, isposinf, isfinite, isnat
 
     Notes
     -----
@@ -1708,6 +1708,41 @@ add_newdoc('numpy.core.umath', 'isnan',
 
     """)
 
+add_newdoc('numpy.core.umath', 'isnat',
+    """
+    Test element-wise for NaT (not a time) and return result as a boolean array.
+
+    Parameters
+    ----------
+    x : array_like
+        Input array with datetime or timedelta data type.
+
+    Returns
+    -------
+    y : ndarray or bool
+        For scalar input, the result is a new boolean with value True if
+        the input is NaT; otherwise the value is False.
+
+        For array input, the result is a boolean array of the same
+        dimensions as the input and the values are True if the
+        corresponding element of the input is NaT; otherwise the values are
+        False.
+
+    See Also
+    --------
+    isnan, isinf, isneginf, isposinf, isfinite
+
+    Examples
+    --------
+    >>> np.isnat(np.datetime64("NaT"))
+    True
+    >>> np.isnat(np.datetime64("2016-01-01"))
+    False
+    >>> np.isnat(np.array(["NaT", "2016-01-01"], dtype="datetime64[ns]"))
+    array([ True, False], dtype=bool)
+
+    """)
+
 add_newdoc('numpy.core.umath', 'left_shift',
     """
     Shift the bits of an integer to the left.
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 47faaf180..e88b87b5c 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1228,6 +1228,15 @@ TIMEDELTA_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNU
  */
 
 NPY_NO_EXPORT void
+@TYPE@_isnat(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
+{
+    UNARY_LOOP {
+        const @type@ in1 = *(@type@ *)ip1;
+        *((npy_bool *)op1) = (in1 == NPY_DATETIME_NAT);
+    }
+}
+
+NPY_NO_EXPORT void
 @TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data))
 {
     OUTPUT_LOOP {
diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src
index d20b776a0..c1b451c5b 100644
--- a/numpy/core/src/umath/loops.h.src
+++ b/numpy/core/src/umath/loops.h.src
@@ -402,6 +402,9 @@ TIMEDELTA_sign(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNU
  */
 
 NPY_NO_EXPORT void
+@TYPE@_isnat(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
+
+NPY_NO_EXPORT void
 @TYPE@__ones_like(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(data));
 
 /**begin repeat1
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index 154dbfb2f..0fd3c45c5 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -537,6 +537,32 @@ PyUFunc_AbsoluteTypeResolver(PyUFuncObject *ufunc,
 }
 
 /*
+ * This function applies special type resolution rules for the isnat
+ * ufunc. This ufunc converts datetime/timedelta -> bool, and is not covered
+ * by the simple unary type resolution.
+ *
+ * Returns 0 on success, -1 on error.
+ */
+NPY_NO_EXPORT int
+PyUFunc_IsNaTTypeResolver(PyUFuncObject *ufunc,
+                          NPY_CASTING casting,
+                          PyArrayObject **operands,
+                          PyObject *type_tup,
+                          PyArray_Descr **out_dtypes)
+{
+    if (!PyTypeNum_ISDATETIME(PyArray_DESCR(operands[0])->type_num)) {
+        PyErr_SetString(PyExc_ValueError,
+                "ufunc 'isnat' is only defined for datetime and timedelta.");
+        return -1;
+    }
+
+    out_dtypes[0] = ensure_dtype_nbo(PyArray_DESCR(operands[0]));
+    out_dtypes[1] = PyArray_DescrFromType(NPY_BOOL);
+
+    return 0;
+}
+
+/*
  * Creates a new NPY_TIMEDELTA dtype, copying the datetime metadata
  * from the given dtype.
  *
diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h
index d20c1e85b..eaf5e91ce 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.h
+++ b/numpy/core/src/umath/ufunc_type_resolution.h
@@ -42,6 +42,13 @@ PyUFunc_AbsoluteTypeResolver(PyUFuncObject *ufunc,
                              PyArrayObject **operands,
                              PyObject *type_tup,
                              PyArray_Descr **out_dtypes);
+                             
+NPY_NO_EXPORT int
+PyUFunc_IsNaTTypeResolver(PyUFuncObject *ufunc,
+                          NPY_CASTING casting,
+                          PyArrayObject **operands,
+                          PyObject *type_tup,
+                          PyArray_Descr **out_dtypes);
 
 NPY_NO_EXPORT int
 PyUFunc_AdditionTypeResolver(PyUFuncObject *ufunc,
diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py
index 12ebd5ae9..48afa728d 100644
--- a/numpy/core/tests/test_datetime.py
+++ b/numpy/core/tests/test_datetime.py
@@ -1918,6 +1918,35 @@ class TestDateTime(TestCase):
         a = np.datetime64('2038-01-20T13:21:14')
         assert_equal(str(a), '2038-01-20T13:21:14')
 
+    def test_isnat(self):
+        assert_(np.isnat(np.datetime64('NaT', 'ms')))
+        assert_(np.isnat(np.datetime64('NaT', 'ns')))
+        assert_(not np.isnat(np.datetime64('2038-01-19T03:14:07')))
+
+        assert_(np.isnat(np.timedelta64('NaT', "ms")))
+        assert_(not np.isnat(np.timedelta64(34, "ms")))
+
+        res = np.array([False, False, True])
+        for unit in ['Y', 'M', 'W', 'D',
+                     'h', 'm', 's', 'ms', 'us',
+                     'ns', 'ps', 'fs', 'as']:
+            arr = np.array([123, -321, "NaT"], dtype='<datetime64[%s]' % unit)
+            assert_equal(np.isnat(arr), res)
+            arr = np.array([123, -321, "NaT"], dtype='>datetime64[%s]' % unit)
+            assert_equal(np.isnat(arr), res)
+            arr = np.array([123, -321, "NaT"], dtype='<timedelta64[%s]' % unit)
+            assert_equal(np.isnat(arr), res)
+            arr = np.array([123, -321, "NaT"], dtype='>timedelta64[%s]' % unit)
+            assert_equal(np.isnat(arr), res)
+
+    def test_isnat_error(self):
+        # Test that only datetime dtype arrays are accepted
+        for t in np.typecodes["All"]:
+            if t in np.typecodes["Datetime"]:
+                continue
+            assert_raises(ValueError, np.isnat, np.zeros(10, t))
+
+
 class TestDateTimeData(TestCase):
 
     def test_basic(self):
diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py
index e72088692..d37a561c2 100644
--- a/numpy/ma/tests/test_core.py
+++ b/numpy/ma/tests/test_core.py
@@ -1841,11 +1841,11 @@ class TestFillingValues(TestCase):
                          "h", "D", "W", "M", "Y"):
             control = numpy.datetime64("NaT", timecode)
             test = default_fill_value(numpy.dtype("<M8[" + timecode + "]"))
-            assert_equal(test, control)
+            np.testing.utils.assert_equal(test, control)
 
             control = numpy.timedelta64("NaT", timecode)
             test = default_fill_value(numpy.dtype("<m8[" + timecode + "]"))
-            assert_equal(test, control)
+            np.testing.utils.assert_equal(test, control)
 
     def test_extremum_fill_value(self):
         # Tests extremum fill values for flexible type.
diff --git a/numpy/ma/testutils.py b/numpy/ma/testutils.py
index 866316c62..c19066d71 100644
--- a/numpy/ma/testutils.py
+++ b/numpy/ma/testutils.py
@@ -14,7 +14,7 @@ from numpy import ndarray, float_
 import numpy.core.umath as umath
 from numpy.testing import (
     TestCase, assert_, assert_allclose, assert_array_almost_equal_nulp,
-    assert_raises, build_err_msg, run_module_suite, suppress_warnings
+    assert_raises, build_err_msg, run_module_suite
     )
 import numpy.testing.utils as utils
 from .core import mask_or, getmask, masked_array, nomask, masked, filled
@@ -126,10 +126,8 @@ def assert_equal(actual, desired, err_msg=''):
         return _assert_equal_on_sequences(actual, desired, err_msg='')
     if not (isinstance(actual, ndarray) or isinstance(desired, ndarray)):
         msg = build_err_msg([actual, desired], err_msg,)
-        with suppress_warnings() as sup:
-            sup.filter(FutureWarning, ".*NAT ==")
-            if not desired == actual:
-                raise AssertionError(msg)
+        if not desired == actual:
+            raise AssertionError(msg)
         return
     # Case #4. arrays or equivalent
     if ((actual is masked) and not (desired is masked)) or \
diff --git a/numpy/testing/tests/test_utils.py b/numpy/testing/tests/test_utils.py
index 804f22b7f..e2c105245 100644
--- a/numpy/testing/tests/test_utils.py
+++ b/numpy/testing/tests/test_utils.py
@@ -3,6 +3,7 @@ from __future__ import division, absolute_import, print_function
 import warnings
 import sys
 import os
+import itertools
 
 import numpy as np
 from numpy.testing import (
@@ -144,7 +145,10 @@ class TestArrayEqual(_GenericTest, unittest.TestCase):
         c['floupipi'] = a['floupi'].copy()
         c['floupa'] = a['floupa'].copy()
 
-        self._test_not_equal(c, b)
+        with suppress_warnings() as sup:
+            l = sup.record(FutureWarning, message="elementwise == ")
+            self._test_not_equal(c, b)
+            assert_(len(l) == 1)
 
 
 class TestBuildErrorMessage(unittest.TestCase):
@@ -208,6 +212,37 @@ class TestEqual(TestArrayEqual):
         self._assert_func([np.inf], [np.inf])
         self._test_not_equal(np.inf, [np.inf])
 
+    def test_nat_items(self):
+        # not a datetime
+        nadt_no_unit = np.datetime64("NaT")
+        nadt_s = np.datetime64("NaT", "s")
+        nadt_d = np.datetime64("NaT", "ns")
+        # not a timedelta
+        natd_no_unit = np.timedelta64("NaT")
+        natd_s = np.timedelta64("NaT", "s")
+        natd_d = np.timedelta64("NaT", "ns")
+
+        dts = [nadt_no_unit, nadt_s, nadt_d]
+        tds = [natd_no_unit, natd_s, natd_d]
+        for a, b in itertools.product(dts, dts):
+            self._assert_func(a, b)
+            self._assert_func([a], [b])
+            self._test_not_equal([a], b)
+
+        for a, b in itertools.product(tds, tds):
+            self._assert_func(a, b)
+            self._assert_func([a], [b])
+            self._test_not_equal([a], b)
+
+        for a, b in itertools.product(tds, dts):
+            self._test_not_equal(a, b)
+            self._test_not_equal(a, [b])
+            self._test_not_equal([a], [b])
+            self._test_not_equal([a], np.datetime64("2017-01-01", "s"))
+            self._test_not_equal([b], np.datetime64("2017-01-01", "s"))
+            self._test_not_equal([a], np.timedelta64(123, "s"))
+            self._test_not_equal([b], np.timedelta64(123, "s"))
+
     def test_non_numeric(self):
         self._assert_func('ab', 'ab')
         self._test_not_equal('ab', 'abb')
diff --git a/numpy/testing/utils.py b/numpy/testing/utils.py
index b5a7e05c4..f54995870 100644
--- a/numpy/testing/utils.py
+++ b/numpy/testing/utils.py
@@ -15,7 +15,8 @@ import contextlib
 from tempfile import mkdtemp, mkstemp
 from unittest.case import SkipTest
 
-from numpy.core import float32, empty, arange, array_repr, ndarray
+from numpy.core import(
+     float32, empty, arange, array_repr, ndarray, isnat, array)
 from numpy.lib.utils import deprecate
 
 if sys.version_info[0] >= 3:
@@ -286,7 +287,7 @@ def build_err_msg(arrays, err_msg, header='Items are not equal:',
     return '\n'.join(msg)
 
 
-def assert_equal(actual,desired,err_msg='',verbose=True):
+def assert_equal(actual, desired, err_msg='', verbose=True):
     """
     Raises an AssertionError if two objects are not equal.
 
@@ -369,12 +370,12 @@ def assert_equal(actual,desired,err_msg='',verbose=True):
         except AssertionError:
             raise AssertionError(msg)
 
+    # isscalar test to check cases such as [np.nan] != np.nan
+    if isscalar(desired) != isscalar(actual):
+        raise AssertionError(msg)
+
     # Inf/nan/negative zero handling
     try:
-        # isscalar test to check cases such as [np.nan] != np.nan
-        if isscalar(desired) != isscalar(actual):
-            raise AssertionError(msg)
-
         # If one of desired/actual is not finite, handle it specially here:
         # check that both are nan if any is a nan, and test for equality
         # otherwise
@@ -396,14 +397,24 @@ def assert_equal(actual,desired,err_msg='',verbose=True):
     except (TypeError, ValueError, NotImplementedError):
         pass
 
-    # Explicitly use __eq__ for comparison, ticket #2552
-    with suppress_warnings() as sup:
-        # TODO: Better handling will to needed when change happens!
-        sup.filter(DeprecationWarning, ".*NAT ==")
-        sup.filter(FutureWarning, ".*NAT ==")
-        if not (desired == actual):
+    try:
+        # If both are NaT (and have the same dtype -- datetime or timedelta)
+        # they are considered equal.
+        if (isnat(desired) == isnat(actual) and
+                array(desired).dtype.type == array(actual).dtype.type):
+            return
+        else:
             raise AssertionError(msg)
 
+    # If TypeError or ValueError raised while using isnan and co, just handle
+    # as before
+    except (TypeError, ValueError, NotImplementedError):
+        pass
+
+    # Explicitly use __eq__ for comparison, ticket #2552
+    if not (desired == actual):
+        raise AssertionError(msg)
+
 
 def print_assert_equal(test_string, actual, desired):
     """
@@ -674,33 +685,12 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True,
     x = array(x, copy=False, subok=True)
     y = array(y, copy=False, subok=True)
 
-    def safe_comparison(*args, **kwargs):
-        # There are a number of cases where comparing two arrays hits special
-        # cases in array_richcompare, specifically around strings and void
-        # dtypes. Basically, we just can't do comparisons involving these
-        # types, unless both arrays have exactly the *same* type. So
-        # e.g. you can apply == to two string arrays, or two arrays with
-        # identical structured dtypes. But if you compare a non-string array
-        # to a string array, or two arrays with non-identical structured
-        # dtypes, or anything like that, then internally stuff blows up.
-        # Currently, when things blow up, we just return a scalar False or
-        # True. But we also emit a DeprecationWarning, b/c eventually we
-        # should raise an error here. (Ideally we might even make this work
-        # properly, but since that will require rewriting a bunch of how
-        # ufuncs work then we are not counting on that.)
-        #
-        # The point of this little function is to let the DeprecationWarning
-        # pass (or maybe eventually catch the errors and return False, I
-        # dunno, that's a little trickier and we can figure that out when the
-        # time comes).
-        with suppress_warnings() as sup:
-            sup.filter(DeprecationWarning, ".*==")
-            sup.filter(FutureWarning, ".*==")
-            return comparison(*args, **kwargs)
-
     def isnumber(x):
         return x.dtype.char in '?bhilqpBHILQPefdgFDG'
 
+    def istime(x):
+        return x.dtype.char in "Mm"
+
     def chk_same_position(x_id, y_id, hasval='nan'):
         """Handling nan/inf: check that x and y have the nan/inf at the same
         locations."""
@@ -756,7 +746,19 @@ def assert_array_compare(comparison, x, y, err_msg='', verbose=True,
             if x.size == 0:
                 return
 
-        val = safe_comparison(x, y)
+        elif istime(x) and istime(y):
+            # If one is datetime64 and the other timedelta64 there is no point
+            if equal_nan and x.dtype.type == y.dtype.type:
+                x_isnat, y_isnat = isnat(x), isnat(y)
+
+                if any(x_isnat) or any(y_isnat):
+                    chk_same_position(x_isnat, y_isnat, hasval="NaT")
+
+                if any(x_isnat) or any(y_isnat):
+                    x = x[~x_isnat]
+                    y = y[~y_isnat]
+
+        val = comparison(x, y)
 
         if isinstance(val, bool):
             cond = val
author	Charles Harris <charlesr.harris@gmail.com>	2017-05-07 09:40:15 -0600
committer	GitHub <noreply@github.com>	2017-05-07 09:40:15 -0600
commit	39aaa2d2960e2e4bfb6d13b0f1a169deebf611e2 (patch)
tree	046cc746d703551e105a6d0ea80f3740270df55b
parent	25a6dfbbebb1d92e0e402aed7c8f11e4f5d3bc5b (diff)
parent	868aa751ceb229465d626a7fd7083544afc96ed2 (diff)
download	python-numpy-39aaa2d2960e2e4bfb6d13b0f1a169deebf611e2.tar.gz python-numpy-39aaa2d2960e2e4bfb6d13b0f1a169deebf611e2.tar.bz2 python-numpy-39aaa2d2960e2e4bfb6d13b0f1a169deebf611e2.zip