Imported Upstream version 1.16.1upstream/1.16.1

author: DongHun Kwak <dh0128.kwak@samsung.com> 2020-12-31 09:34:06 +0900
committer: DongHun Kwak <dh0128.kwak@samsung.com> 2020-12-31 09:34:06 +0900
commit: 4b17002dd27193e2eb87f4b3c7566d929a7ac788 (patch)
tree: c6c5142f99ce1479902e180d434f81d5d2f922a0 /numpy/core
parent: f14f97841aa140385b7fca2aeb1c7c96b2711560 (diff)
download: python-numpy-4b17002dd27193e2eb87f4b3c7566d929a7ac788.tar.gz
python-numpy-4b17002dd27193e2eb87f4b3c7566d929a7ac788.tar.bz2
python-numpy-4b17002dd27193e2eb87f4b3c7566d929a7ac788.zip
50 files changed, 1736 insertions, 353 deletions
diff --git a/numpy/core/__init__.py b/numpy/core/__init__.py
index 80ce84f00..6b76e63b7 100644
--- a/numpy/core/__init__.py
+++ b/numpy/core/__init__.py
@@ -53,7 +53,19 @@ del env_added
 del os
 
 from . import umath
-from . import _internal  # for freeze programs
+
+# Check that multiarray,umath are pure python modules wrapping
+# _multiarray_umath and not either of the old c-extension modules
+if not (hasattr(multiarray, '_multiarray_umath') and
+        hasattr(umath, '_multiarray_umath')):
+    import sys
+    path = sys.modules['numpy'].__path__
+    msg = ("Something is wrong with the numpy installation. "
+        "While importing we detected an older version of "
+        "numpy in {}. One method of fixing this is to repeatedly uninstall "
+        "numpy until none is found, then reinstall this version.")
+    raise ImportError(msg.format(path))
+
 from . import numerictypes as nt
 multiarray.set_typeDict(nt.sctypeDict)
 from . import numeric
@@ -83,6 +95,11 @@ from .numeric import absolute as abs
 # do this after everything else, to minimize the chance of this misleadingly
 # appearing in an import-time traceback
 from . import _add_newdocs
+# add these for module-freeze analysis (like PyInstaller)
+from . import _dtype_ctypes
+from . import _internal
+from . import _dtype
+from . import _methods
 
 __all__ = ['char', 'rec', 'memmap']
 __all__ += numeric.__all__
diff --git a/numpy/core/_internal.py b/numpy/core/_internal.py
index 27a3deeda..1d3bb5584 100644
--- a/numpy/core/_internal.py
+++ b/numpy/core/_internal.py
@@ -830,6 +830,13 @@ def array_ufunc_errmsg_formatter(dummy, ufunc, method, *inputs, **kwargs):
             .format(ufunc, method, args_string, types_string))
 
 
+def array_function_errmsg_formatter(public_api, types):
+    """ Format the error message for when __array_ufunc__ gives up. """
+    func_name = '{}.{}'.format(public_api.__module__, public_api.__name__)
+    return ("no implementation found for '{}' on types that implement "
+            '__array_function__: {}'.format(func_name, list(types)))
+
+
 def _ufunc_doc_signature_formatter(ufunc):
     """
     Builds a signature string which resembles PEP 457
diff --git a/numpy/core/_methods.py b/numpy/core/_methods.py
index baeab6383..33f6d01a8 100644
--- a/numpy/core/_methods.py
+++ b/numpy/core/_methods.py
@@ -154,15 +154,3 @@ def _ptp(a, axis=None, out=None, keepdims=False):
         umr_minimum(a, axis, None, None, keepdims),
         out
     )
-
-_NDARRAY_ARRAY_FUNCTION = mu.ndarray.__array_function__
-
-def _array_function(self, func, types, args, kwargs):
-    # TODO: rewrite this in C
-    # Cannot handle items that have __array_function__ other than our own.
-    for t in types:
-        if not issubclass(t, mu.ndarray) and hasattr(t, '__array_function__'):
-            return NotImplemented
-
-    # The regular implementation can handle this, so we call it directly.
-    return func.__wrapped__(*args, **kwargs)
diff --git a/numpy/core/code_generators/genapi.py b/numpy/core/code_generators/genapi.py
index 1d2cd25c8..4aca2373c 100644
--- a/numpy/core/code_generators/genapi.py
+++ b/numpy/core/code_generators/genapi.py
@@ -19,6 +19,7 @@ __docformat__ = 'restructuredtext'
 
 # The files under src/ that are scanned for API functions
 API_FILES = [join('multiarray', 'alloc.c'),
+             join('multiarray', 'arrayfunction_override.c'),
              join('multiarray', 'array_assign_array.c'),
              join('multiarray', 'array_assign_scalar.c'),
              join('multiarray', 'arrayobject.c'),
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
index f5ee02c42..0fac9b05e 100644
--- a/numpy/core/code_generators/generate_umath.py
+++ b/numpy/core/code_generators/generate_umath.py
@@ -315,7 +315,7 @@ defdict = {
           TD(intfltcmplx),
           [TypeDescription('m', FullTypeDescr, 'mq', 'm'),
            TypeDescription('m', FullTypeDescr, 'md', 'm'),
-           #TypeDescription('m', FullTypeDescr, 'mm', 'd'),
+           TypeDescription('m', FullTypeDescr, 'mm', 'q'),
           ],
           TD(O, f='PyNumber_FloorDivide'),
           ),
@@ -802,8 +802,9 @@ defdict = {
 'divmod':
     Ufunc(2, 2, None,
           docstrings.get('numpy.core.umath.divmod'),
-          None,
+          'PyUFunc_DivmodTypeResolver',
           TD(intflt),
+          [TypeDescription('m', FullTypeDescr, 'mm', 'qm')],
           # TD(O, f='PyNumber_Divmod'),  # gh-9730
           ),
 'hypot':
diff --git a/numpy/core/multiarray.py b/numpy/core/multiarray.py
index df0ed2df4..790896922 100644
--- a/numpy/core/multiarray.py
+++ b/numpy/core/multiarray.py
@@ -40,6 +40,10 @@ __all__ = [
     'tracemalloc_domain', 'typeinfo', 'unpackbits', 'unravel_index', 'vdot',
     'where', 'zeros']
 
+# For backward compatibility, make sure pickle imports these functions from here
+_reconstruct.__module__ = 'numpy.core.multiarray'
+scalar.__module__ = 'numpy.core.multiarray'
+
 
 arange.__module__ = 'numpy'
 array.__module__ = 'numpy'
@@ -211,9 +215,11 @@ def concatenate(arrays, axis=None, out=None):
            fill_value=999999)
 
     """
-    for array in arrays:
-        yield array
-    yield out
+    if out is not None:
+        # optimize for the typical case where only arrays is provided
+        arrays = list(arrays)
+        arrays.append(out)
+    return arrays
 
 
 @array_function_from_c_func_and_dispatcher(_multiarray_umath.inner)
diff --git a/numpy/core/overrides.py b/numpy/core/overrides.py
index 0979858a1..c55174ecd 100644
--- a/numpy/core/overrides.py
+++ b/numpy/core/overrides.py
@@ -1,73 +1,23 @@
-"""Preliminary implementation of NEP-18
-
-TODO: rewrite this in C for performance.
-"""
+"""Implementation of __array_function__ overrides from NEP-18."""
 import collections
 import functools
 import os
 
-from numpy.core._multiarray_umath import add_docstring, ndarray
+from numpy.core._multiarray_umath import (
+    add_docstring, implement_array_function, _get_implementing_args)
 from numpy.compat._inspect import getargspec
 
 
-_NDARRAY_ARRAY_FUNCTION = ndarray.__array_function__
-_NDARRAY_ONLY = [ndarray]
-
 ENABLE_ARRAY_FUNCTION = bool(
     int(os.environ.get('NUMPY_EXPERIMENTAL_ARRAY_FUNCTION', 0)))
 
 
-def get_overloaded_types_and_args(relevant_args):
-    """Returns a list of arguments on which to call __array_function__.
-
-    Parameters
-    ----------
-    relevant_args : iterable of array-like
-        Iterable of array-like arguments to check for __array_function__
-        methods.
-
-    Returns
-    -------
-    overloaded_types : collection of types
-        Types of arguments from relevant_args with __array_function__ methods.
-    overloaded_args : list
-        Arguments from relevant_args on which to call __array_function__
-        methods, in the order in which they should be called.
+add_docstring(
+    implement_array_function,
     """
-    # Runtime is O(num_arguments * num_unique_types)
-    overloaded_types = []
-    overloaded_args = []
-    for arg in relevant_args:
-        arg_type = type(arg)
-        # We only collect arguments if they have a unique type, which ensures
-        # reasonable performance even with a long list of possibly overloaded
-        # arguments.
-        if (arg_type not in overloaded_types and
-                hasattr(arg_type, '__array_function__')):
-
-            # Create lists explicitly for the first type (usually the only one
-            # done) to avoid setting up the iterator for overloaded_args.
-            if overloaded_types:
-                overloaded_types.append(arg_type)
-                # By default, insert argument at the end, but if it is
-                # subclass of another argument, insert it before that argument.
-                # This ensures "subclasses before superclasses".
-                index = len(overloaded_args)
-                for i, old_arg in enumerate(overloaded_args):
-                    if issubclass(arg_type, type(old_arg)):
-                        index = i
-                        break
-                overloaded_args.insert(index, arg)
-            else:
-                overloaded_types = [arg_type]
-                overloaded_args = [arg]
-
-    return overloaded_types, overloaded_args
-
-
-def array_function_implementation_or_override(
-        implementation, public_api, relevant_args, args, kwargs):
-    """Implement a function with checks for __array_function__ overrides.
+    Implement a function with checks for __array_function__ overrides.
+
+    All arguments are required, and can only be passed by position.
 
     Arguments
     ---------
@@ -82,41 +32,37 @@ def array_function_implementation_or_override(
         Iterable of arguments to check for __array_function__ methods.
     args : tuple
         Arbitrary positional arguments originally passed into ``public_api``.
-    kwargs : tuple
+    kwargs : dict
         Arbitrary keyword arguments originally passed into ``public_api``.
 
     Returns
     -------
-    Result from calling `implementation()` or an `__array_function__`
+    Result from calling ``implementation()`` or an ``__array_function__``
     method, as appropriate.
 
     Raises
     ------
     TypeError : if no implementation is found.
+    """)
+
+
+# exposed for testing purposes; used internally by implement_array_function
+add_docstring(
+    _get_implementing_args,
     """
-    # Check for __array_function__ methods.
-    types, overloaded_args = get_overloaded_types_and_args(relevant_args)
-    # Short-cut for common cases: no overload or only ndarray overload
-    # (directly or with subclasses that do not override __array_function__).
-    if (not overloaded_args or types == _NDARRAY_ONLY or
-            all(type(arg).__array_function__ is _NDARRAY_ARRAY_FUNCTION
-                for arg in overloaded_args)):
-        return implementation(*args, **kwargs)
-
-    # Call overrides
-    for overloaded_arg in overloaded_args:
-        # Use `public_api` instead of `implemenation` so __array_function__
-        # implementations can do equality/identity comparisons.
-        result = overloaded_arg.__array_function__(
-            public_api, types, args, kwargs)
-
-        if result is not NotImplemented:
-            return result
-
-    func_name = '{}.{}'.format(public_api.__module__, public_api.__name__)
-    raise TypeError("no implementation found for '{}' on types that implement "
-                    '__array_function__: {}'
-                    .format(func_name, list(map(type, overloaded_args))))
+    Collect arguments on which to call __array_function__.
+
+    Parameters
+    ----------
+    relevant_args : iterable of array-like
+        Iterable of possibly array-like arguments to check for
+        __array_function__ methods.
+
+    Returns
+    -------
+    Sequence of arguments with __array_function__ methods, in the order in
+    which they should be called.
+    """)
 
 
 ArgSpec = collections.namedtuple('ArgSpec', 'args varargs keywords defaults')
@@ -215,7 +161,7 @@ def array_function_dispatch(dispatcher, module=None, verify=True,
         @functools.wraps(implementation)
         def public_api(*args, **kwargs):
             relevant_args = dispatcher(*args, **kwargs)
-            return array_function_implementation_or_override(
+            return implement_array_function(
                 implementation, public_api, relevant_args, args, kwargs)
 
         if module is not None:
diff --git a/numpy/core/records.py b/numpy/core/records.py
index 86a43306a..5898bb163 100644
--- a/numpy/core/records.py
+++ b/numpy/core/records.py
@@ -711,7 +711,7 @@ def fromstring(datastring, dtype=None, shape=None, offset=0, formats=None,
     a string"""
 
     if dtype is None and formats is None:
-        raise ValueError("Must have dtype= or formats=")
+        raise TypeError("fromstring() needs a 'dtype' or 'formats' argument")
 
     if dtype is not None:
         descr = sb.dtype(dtype)
@@ -758,6 +758,9 @@ def fromfile(fd, dtype=None, shape=None, offset=0, formats=None,
     >>> r.shape
     (10,)
     """
+    
+    if dtype is None and formats is None:
+        raise TypeError("fromfile() needs a 'dtype' or 'formats' argument")
 
     if (shape is None or shape == 0):
         shape = (-1,)
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index 467b590ac..00a9ffe69 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -775,6 +775,7 @@ def configuration(parent_package='',top_path=None):
     multiarray_deps = [
             join('src', 'multiarray', 'arrayobject.h'),
             join('src', 'multiarray', 'arraytypes.h'),
+            join('src', 'multiarray', 'arrayfunction_override.h'),
             join('src', 'multiarray', 'buffer.h'),
             join('src', 'multiarray', 'calculation.h'),
             join('src', 'multiarray', 'common.h'),
@@ -827,6 +828,7 @@ def configuration(parent_package='',top_path=None):
             join('src', 'multiarray', 'arraytypes.c.src'),
             join('src', 'multiarray', 'array_assign_scalar.c'),
             join('src', 'multiarray', 'array_assign_array.c'),
+            join('src', 'multiarray', 'arrayfunction_override.c'),
             join('src', 'multiarray', 'buffer.c'),
             join('src', 'multiarray', 'calculation.c'),
             join('src', 'multiarray', 'compiled_base.c'),
@@ -960,6 +962,14 @@ def configuration(parent_package='',top_path=None):
     config.add_extension('_operand_flag_tests',
                     sources=[join('src', 'umath', '_operand_flag_tests.c.src')])
 
+    #######################################################################
+    #                        _multiarray_module_test module               #
+    #######################################################################
+
+    config.add_extension('_multiarray_module_test',
+                    sources=[join('src', 'multiarray',
+                                         '_multiarray_module_test.c')])
+
     config.add_data_dir('tests')
     config.add_data_dir('tests/data')
 
diff --git a/numpy/core/shape_base.py b/numpy/core/shape_base.py
index a529d2ad7..d20afd8be 100644
--- a/numpy/core/shape_base.py
+++ b/numpy/core/shape_base.py
@@ -342,10 +342,11 @@ def hstack(tup):
 
 def _stack_dispatcher(arrays, axis=None, out=None):
     arrays = _arrays_for_stack_dispatcher(arrays, stacklevel=6)
-    for a in arrays:
-        yield a
     if out is not None:
-        yield out
+        # optimize for the typical case where only arrays is provided
+        arrays = list(arrays)
+        arrays.append(out)
+    return arrays
 
 
 @array_function_dispatch(_stack_dispatcher)
diff --git a/numpy/core/src/common/get_attr_string.h b/numpy/core/src/common/get_attr_string.h
index bec87c5ed..d458d9550 100644
--- a/numpy/core/src/common/get_attr_string.h
+++ b/numpy/core/src/common/get_attr_string.h
@@ -103,7 +103,6 @@ PyArray_LookupSpecial(PyObject *obj, char *name)
     if (_is_basic_python_type(tp)) {
         return NULL;
     }
-
     return maybe_get_attr((PyObject *)tp, name);
 }
 
diff --git a/numpy/core/src/multiarray/_multiarray_module_test.c b/numpy/core/src/multiarray/_multiarray_module_test.c
new file mode 100644
index 000000000..8dc017279
--- /dev/null
+++ b/numpy/core/src/multiarray/_multiarray_module_test.c
@@ -0,0 +1,129 @@
+#include "Python.h"
+
+/*
+ * This is a dummy module. It will be used to ruin the import of multiarray
+ * during testing. It exports two entry points, one to make the build happy,
+ * and a multiarray one for the actual test. The content of the module is
+ * irrelevant to the test.
+ *
+ * The code is from
+ * https://docs.python.org/3/howto/cporting.html
+ * or
+ * https://github.com/python/cpython/blob/v3.7.0/Doc/howto/cporting.rst
+ */
+
+#if defined _WIN32 || defined __CYGWIN__ || defined __MINGW32__
+  #if defined __GNUC__ || defined __clang__
+    #define DLL_PUBLIC __attribute__ ((dllexport))
+  #else
+    #define DLL_PUBLIC __declspec(dllexport)
+  #endif
+#elif defined __GNUC__  || defined __clang__
+  #define DLL_PUBLIC __attribute__ ((visibility ("default")))
+#else
+    /* Enhancement: error now instead ? */
+    #define DLL_PUBLIC
+#endif
+
+struct module_state {
+    PyObject *error;
+};
+
+#if PY_MAJOR_VERSION >= 3
+#define GETSTATE(m) ((struct module_state*)PyModule_GetState(m))
+#else
+#define GETSTATE(m) (&_state)
+static struct module_state _state;
+#endif
+
+static PyObject *
+error_out(PyObject *m) {
+    struct module_state *st = GETSTATE(m);
+    PyErr_SetString(st->error, "something bad happened");
+    return NULL;
+}
+
+static PyMethodDef multiarray_methods[] = {
+    {"error_out", (PyCFunction)error_out, METH_NOARGS, NULL},
+    {NULL, NULL}
+};
+
+#if PY_MAJOR_VERSION >= 3
+
+static int multiarray_traverse(PyObject *m, visitproc visit, void *arg) {
+    Py_VISIT(GETSTATE(m)->error);
+    return 0;
+}
+
+static int multiarray_clear(PyObject *m) {
+    Py_CLEAR(GETSTATE(m)->error);
+    return 0;
+}
+
+
+static struct PyModuleDef moduledef = {
+        PyModuleDef_HEAD_INIT,
+        "multiarray",
+        NULL,
+        sizeof(struct module_state),
+        multiarray_methods,
+        NULL,
+        multiarray_traverse,
+        multiarray_clear,
+        NULL
+};
+
+#define INITERROR return NULL
+
+DLL_PUBLIC PyObject *
+PyInit_multiarray(void)
+
+#else
+#define INITERROR return
+
+void
+DLL_PUBLIC initmultiarray(void)
+#endif
+{
+#if PY_MAJOR_VERSION >= 3
+    PyObject *module = PyModule_Create(&moduledef);
+#else
+    PyObject *module = Py_InitModule("multiarray", multiarray_methods);
+#endif
+    struct module_state *st;
+    if (module == NULL)
+        INITERROR;
+    st = GETSTATE(module);
+
+    st->error = PyErr_NewException("multiarray.Error", NULL, NULL);
+    if (st->error == NULL) {
+        Py_DECREF(module);
+        INITERROR;
+    }
+
+#if PY_MAJOR_VERSION >= 3
+    return module;
+#endif
+}
+
+/*
+ * Define a dummy entry point to make MSVC happy
+ * Python's build system will export this function automatically
+ */
+#if PY_MAJOR_VERSION >= 3
+
+PyObject *
+PyInit__multiarray_module_test(void)
+{
+    return PyInit_multiarray();
+}
+
+#else
+
+void
+init_multiarray_module_test(void)
+{
+    initmultiarray();
+}
+
+#endif                                                    
diff --git a/numpy/core/src/multiarray/_multiarray_tests.c.src b/numpy/core/src/multiarray/_multiarray_tests.c.src
index 2a8275572..c26bd16ac 100644
--- a/numpy/core/src/multiarray/_multiarray_tests.c.src
+++ b/numpy/core/src/multiarray/_multiarray_tests.c.src
@@ -1871,11 +1871,14 @@ printf_float_g(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds)
 static PyObject *
 getset_numericops(PyObject* NPY_UNUSED(self), PyObject* NPY_UNUSED(args))
 {
-    PyObject * ops = PyArray_GetNumericOps();
+    PyObject *ret;
+    PyObject *ops = PyArray_GetNumericOps();
     if (ops == NULL) {
         return NULL;
     }
-    return PyLong_FromLong(PyArray_SetNumericOps(ops));
+    ret = PyLong_FromLong(PyArray_SetNumericOps(ops));
+    Py_DECREF(ops);
+    return ret;
 }
 
 static PyMethodDef Multiarray_TestsMethods[] = {
diff --git a/numpy/core/src/multiarray/arrayfunction_override.c b/numpy/core/src/multiarray/arrayfunction_override.c
new file mode 100644
index 000000000..e62b32ab2
--- /dev/null
+++ b/numpy/core/src/multiarray/arrayfunction_override.c
@@ -0,0 +1,376 @@
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+
+#include "npy_pycompat.h"
+#include "get_attr_string.h"
+#include "npy_import.h"
+#include "multiarraymodule.h"
+
+
+/* Return the ndarray.__array_function__ method. */
+static PyObject *
+get_ndarray_array_function(void)
+{
+    PyObject* method = PyObject_GetAttrString((PyObject *)&PyArray_Type,
+                                              "__array_function__");
+    assert(method != NULL);
+    return method;
+}
+
+
+/*
+ * Get an object's __array_function__ method in the fastest way possible.
+ * Never raises an exception. Returns NULL if the method doesn't exist.
+ */
+static PyObject *
+get_array_function(PyObject *obj)
+{
+    static PyObject *ndarray_array_function = NULL;
+
+    if (ndarray_array_function == NULL) {
+        ndarray_array_function = get_ndarray_array_function();
+    }
+
+    /* Fast return for ndarray */
+    if (PyArray_CheckExact(obj)) {
+        Py_INCREF(ndarray_array_function);
+        return ndarray_array_function;
+    }
+
+    return PyArray_LookupSpecial(obj, "__array_function__");
+}
+
+
+/*
+ * Like list.insert(), but for C arrays of PyObject*. Skips error checking.
+ */
+static void
+pyobject_array_insert(PyObject **array, int length, int index, PyObject *item)
+{
+    int j;
+
+    for (j = length; j > index; j--) {
+        array[j] = array[j - 1];
+    }
+    array[index] = item;
+}
+
+
+/*
+ * Collects arguments with __array_function__ and their corresponding methods
+ * in the order in which they should be tried (i.e., skipping redundant types).
+ * `relevant_args` is expected to have been produced by PySequence_Fast.
+ * Returns the number of arguments, or -1 on failure. 
+ */
+static int
+get_implementing_args_and_methods(PyObject *relevant_args,
+                                  PyObject **implementing_args,
+                                  PyObject **methods)
+{
+    int num_implementing_args = 0;
+    Py_ssize_t i;
+    int j;
+
+    PyObject **items = PySequence_Fast_ITEMS(relevant_args);
+    Py_ssize_t length = PySequence_Fast_GET_SIZE(relevant_args);
+
+    for (i = 0; i < length; i++) {
+        int new_class = 1;
+        PyObject *argument = items[i];
+
+        /* Have we seen this type before? */
+        for (j = 0; j < num_implementing_args; j++) {
+            if (Py_TYPE(argument) == Py_TYPE(implementing_args[j])) {
+                new_class = 0;
+                break;
+            }
+        }
+        if (new_class) {
+            PyObject *method = get_array_function(argument);
+
+            if (method != NULL) {
+                int arg_index;
+
+                if (num_implementing_args >= NPY_MAXARGS) {
+                    PyErr_Format(
+                        PyExc_TypeError,
+                        "maximum number (%d) of distinct argument types " \
+                        "implementing __array_function__ exceeded",
+                        NPY_MAXARGS);
+                    Py_DECREF(method);
+                    goto fail;
+                }
+
+                /* "subclasses before superclasses, otherwise left to right" */
+                arg_index = num_implementing_args;
+                for (j = 0; j < num_implementing_args; j++) {
+                    PyObject *other_type;
+                    other_type = (PyObject *)Py_TYPE(implementing_args[j]);
+                    if (PyObject_IsInstance(argument, other_type)) {
+                        arg_index = j;
+                        break;
+                    }
+                }
+                Py_INCREF(argument);
+                pyobject_array_insert(implementing_args, num_implementing_args,
+                                      arg_index, argument);
+                pyobject_array_insert(methods, num_implementing_args,
+                                      arg_index, method);
+                ++num_implementing_args;
+            }
+        }
+    }
+    return num_implementing_args;
+
+fail:
+    for (j = 0; j < num_implementing_args; j++) {
+        Py_DECREF(implementing_args[j]);
+        Py_DECREF(methods[j]);
+    }
+    return -1;
+}
+
+
+/*
+ * Is this object ndarray.__array_function__?
+ */
+static int
+is_default_array_function(PyObject *obj)
+{
+    static PyObject *ndarray_array_function = NULL;
+
+    if (ndarray_array_function == NULL) {
+        ndarray_array_function = get_ndarray_array_function();
+    }
+    return obj == ndarray_array_function;
+}
+
+
+/*
+ * Core implementation of ndarray.__array_function__. This is exposed
+ * separately so we can avoid the overhead of a Python method call from
+ * within `implement_array_function`.
+ */
+NPY_NO_EXPORT PyObject *
+array_function_method_impl(PyObject *func, PyObject *types, PyObject *args,
+                           PyObject *kwargs)
+{
+    Py_ssize_t j;
+    PyObject *implementation, *result;
+
+    PyObject **items = PySequence_Fast_ITEMS(types);
+    Py_ssize_t length = PySequence_Fast_GET_SIZE(types);
+
+    for (j = 0; j < length; j++) {
+        int is_subclass = PyObject_IsSubclass(
+            items[j], (PyObject *)&PyArray_Type);
+        if (is_subclass == -1) {
+            return NULL;
+        }
+        if (!is_subclass) {
+            Py_INCREF(Py_NotImplemented);
+            return Py_NotImplemented;
+        }
+    }
+
+    implementation = PyObject_GetAttr(func, npy_ma_str_wrapped);
+    if (implementation == NULL) {
+        return NULL;
+    }
+    result = PyObject_Call(implementation, args, kwargs);
+    Py_DECREF(implementation);
+    return result;
+}
+
+
+/*
+ * Calls __array_function__ on the provided argument, with a fast-path for
+ * ndarray.
+ */
+static PyObject *
+call_array_function(PyObject* argument, PyObject* method,
+                    PyObject* public_api, PyObject* types,
+                    PyObject* args, PyObject* kwargs)
+{
+    if (is_default_array_function(method)) {
+        return array_function_method_impl(public_api, types, args, kwargs);
+    }
+    else {
+        return PyObject_CallFunctionObjArgs(
+            method, argument, public_api, types, args, kwargs, NULL);
+    }
+}
+
+
+/*
+ * Implements the __array_function__ protocol for a function, as described in
+ * in NEP-18. See numpy.core.overrides for a full docstring.
+ */
+NPY_NO_EXPORT PyObject *
+array_implement_array_function(
+    PyObject *NPY_UNUSED(dummy), PyObject *positional_args)
+{
+    PyObject *implementation, *public_api, *relevant_args, *args, *kwargs;
+
+    PyObject *types = NULL;
+    PyObject *implementing_args[NPY_MAXARGS];
+    PyObject *array_function_methods[NPY_MAXARGS];
+
+    int j, any_overrides;
+    int num_implementing_args = 0;
+    PyObject *result = NULL;
+
+    static PyObject *errmsg_formatter = NULL;
+
+    if (!PyArg_UnpackTuple(
+            positional_args, "implement_array_function", 5, 5,
+            &implementation, &public_api, &relevant_args, &args, &kwargs)) {
+        return NULL;
+    }
+
+    relevant_args = PySequence_Fast(
+        relevant_args,
+        "dispatcher for __array_function__ did not return an iterable");
+    if (relevant_args == NULL) {
+        return NULL;
+    }
+
+    /* Collect __array_function__ implementations */
+    num_implementing_args = get_implementing_args_and_methods(
+        relevant_args, implementing_args, array_function_methods);
+    if (num_implementing_args == -1) {
+        goto cleanup;
+    }
+
+    /*
+     * Handle the typical case of no overrides. This is merely an optimization
+     * if some arguments are ndarray objects, but is also necessary if no
+     * arguments implement __array_function__ at all (e.g., if they are all
+     * built-in types).
+     */
+    any_overrides = 0;
+    for (j = 0; j < num_implementing_args; j++) {
+        if (!is_default_array_function(array_function_methods[j])) {
+            any_overrides = 1;
+            break;
+        }
+    }
+    if (!any_overrides) {
+        result = PyObject_Call(implementation, args, kwargs);
+        goto cleanup;
+    }
+
+    /*
+     * Create a Python object for types.
+     * We use a tuple, because it's the fastest Python collection to create
+     * and has the bonus of being immutable.
+     */
+    types = PyTuple_New(num_implementing_args);
+    if (types == NULL) {
+        goto cleanup;
+    }
+    for (j = 0; j < num_implementing_args; j++) {
+        PyObject *arg_type = (PyObject *)Py_TYPE(implementing_args[j]);
+        Py_INCREF(arg_type);
+        PyTuple_SET_ITEM(types, j, arg_type);
+    }
+
+    /* Call __array_function__ methods */
+    for (j = 0; j < num_implementing_args; j++) {
+        PyObject *argument = implementing_args[j];
+        PyObject *method = array_function_methods[j];
+
+        /*
+         * We use `public_api` instead of `implementation` here so
+         * __array_function__ implementations can do equality/identity
+         * comparisons.
+         */
+        result = call_array_function(
+            argument, method, public_api, types, args, kwargs);
+
+        if (result == Py_NotImplemented) {
+            /* Try the next one */
+            Py_DECREF(result);
+            result = NULL;
+        }
+        else {
+            /* Either a good result, or an exception was raised. */
+            goto cleanup;
+        }
+    }
+
+    /* No acceptable override found, raise TypeError. */
+    npy_cache_import("numpy.core._internal",
+                     "array_function_errmsg_formatter",
+                     &errmsg_formatter);
+    if (errmsg_formatter != NULL) {
+        PyObject *errmsg = PyObject_CallFunctionObjArgs(
+            errmsg_formatter, public_api, types, NULL);
+        if (errmsg != NULL) {
+            PyErr_SetObject(PyExc_TypeError, errmsg);
+            Py_DECREF(errmsg);
+        }
+    }
+
+cleanup:
+    for (j = 0; j < num_implementing_args; j++) {
+        Py_DECREF(implementing_args[j]);
+        Py_DECREF(array_function_methods[j]);
+    }
+    Py_XDECREF(types);
+    Py_DECREF(relevant_args);
+    return result;
+}
+
+
+/*
+ * Python wrapper for get_implementing_args_and_methods, for testing purposes.
+ */
+NPY_NO_EXPORT PyObject *
+array__get_implementing_args(
+    PyObject *NPY_UNUSED(dummy), PyObject *positional_args)
+{
+    PyObject *relevant_args;
+    int j;
+    int num_implementing_args = 0;
+    PyObject *implementing_args[NPY_MAXARGS];
+    PyObject *array_function_methods[NPY_MAXARGS];
+    PyObject *result = NULL;
+
+    if (!PyArg_ParseTuple(positional_args, "O:array__get_implementing_args",
+                          &relevant_args)) {
+        return NULL;
+    }
+
+    relevant_args = PySequence_Fast(
+        relevant_args,
+        "dispatcher for __array_function__ did not return an iterable");
+    if (relevant_args == NULL) {
+        return NULL;
+    }
+
+    num_implementing_args = get_implementing_args_and_methods(
+        relevant_args, implementing_args, array_function_methods);
+    if (num_implementing_args == -1) {
+        goto cleanup;
+    }
+
+    /* create a Python object for implementing_args */
+    result = PyList_New(num_implementing_args);
+    if (result == NULL) {
+        goto cleanup;
+    }
+    for (j = 0; j < num_implementing_args; j++) {
+        PyObject *argument = implementing_args[j];
+        Py_INCREF(argument);
+        PyList_SET_ITEM(result, j, argument);
+    }
+
+cleanup:
+    for (j = 0; j < num_implementing_args; j++) {
+        Py_DECREF(implementing_args[j]);
+        Py_DECREF(array_function_methods[j]);
+    }
+    Py_DECREF(relevant_args);
+    return result;
+}
diff --git a/numpy/core/src/multiarray/arrayfunction_override.h b/numpy/core/src/multiarray/arrayfunction_override.h
new file mode 100644
index 000000000..0d224e2b6
--- /dev/null
+++ b/numpy/core/src/multiarray/arrayfunction_override.h
@@ -0,0 +1,16 @@
+#ifndef _NPY_PRIVATE__ARRAYFUNCTION_OVERRIDE_H
+#define _NPY_PRIVATE__ARRAYFUNCTION_OVERRIDE_H
+
+NPY_NO_EXPORT PyObject *
+array_implement_array_function(
+    PyObject *NPY_UNUSED(dummy), PyObject *positional_args);
+
+NPY_NO_EXPORT PyObject *
+array__get_implementing_args(
+    PyObject *NPY_UNUSED(dummy), PyObject *positional_args);
+
+NPY_NO_EXPORT PyObject *
+array_function_method_impl(PyObject *func, PyObject *types, PyObject *args,
+                           PyObject *kwargs);
+
+#endif
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index 823ee7115..ca5f5a47b 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -2205,15 +2205,19 @@ static void
 VOID_copyswapn (char *dst, npy_intp dstride, char *src, npy_intp sstride,
                 npy_intp n, int swap, PyArrayObject *arr)
 {
+    PyArray_Descr *descr;
+
     if (arr == NULL) {
         return;
     }
+
+    descr = PyArray_DESCR(arr);
+
     if (PyArray_HASFIELDS(arr)) {
         PyObject *key, *value;
-        PyArray_Descr *descr;
+
         Py_ssize_t pos = 0;
 
-        descr = PyArray_DESCR(arr);
         while (PyDict_Next(descr->fields, &pos, &key, &value)) {
             npy_intp offset;
             PyArray_Descr * new;
@@ -2236,14 +2240,28 @@ VOID_copyswapn (char *dst, npy_intp dstride, char *src, npy_intp sstride,
         ((PyArrayObject_fields *)arr)->descr = descr;
         return;
     }
-    if (swap && PyArray_DESCR(arr)->subarray != NULL) {
-        PyArray_Descr *descr, *new;
+    if (PyDataType_HASSUBARRAY(descr)) {
+        PyArray_Descr *new;
         npy_intp num;
         npy_intp i;
         int subitemsize;
         char *dstptr, *srcptr;
+        /*
+         * In certain cases subarray copy can be optimized. This is when
+         * swapping is unecessary and the subarrays data type can certainly
+         * be simply copied (no object, fields, subarray, and not a user dtype).
+         */
+        npy_bool can_optimize_subarray = (!swap &&
+                !PyDataType_HASFIELDS(descr->subarray->base) &&
+                !PyDataType_HASSUBARRAY(descr->subarray->base) &&
+                !PyDataType_REFCHK(descr->subarray->base) &&
+                (descr->subarray->base->type_num < NPY_NTYPES));
+
+        if (can_optimize_subarray) {
+            _basic_copyn(dst, dstride, src, sstride, n, descr->elsize);
+            return;
+        }
 
-        descr = PyArray_DESCR(arr);
         new = descr->subarray->base;
         /*
          * TODO: temporarily modifying the array like this
@@ -2253,6 +2271,10 @@ VOID_copyswapn (char *dst, npy_intp dstride, char *src, npy_intp sstride,
         dstptr = dst;
         srcptr = src;
         subitemsize = new->elsize;
+        if (subitemsize == 0) {
+            /* There cannot be any elements, so return */
+            return;
+        }
         num = descr->elsize / subitemsize;
         for (i = 0; i < n; i++) {
             new->f->copyswapn(dstptr, subitemsize, srcptr,
@@ -2265,22 +2287,26 @@ VOID_copyswapn (char *dst, npy_intp dstride, char *src, npy_intp sstride,
         ((PyArrayObject_fields *)arr)->descr = descr;
         return;
     }
-    _basic_copyn(dst, dstride, src, sstride, n, PyArray_DESCR(arr)->elsize);
+    /* Must be a naive Void type (e.g. a "V8") so simple copy is sufficient. */
+    _basic_copyn(dst, dstride, src, sstride, n, descr->elsize);
     return;
 }
 
 static void
 VOID_copyswap (char *dst, char *src, int swap, PyArrayObject *arr)
 {
+    PyArray_Descr *descr;
+
     if (arr == NULL) {
         return;
     }
+
+    descr = PyArray_DESCR(arr);
+
     if (PyArray_HASFIELDS(arr)) {
         PyObject *key, *value;
-        PyArray_Descr *descr;
         Py_ssize_t pos = 0;
 
-        descr = PyArray_DESCR(arr);
         while (PyDict_Next(descr->fields, &pos, &key, &value)) {
             npy_intp offset;
             PyArray_Descr * new;
@@ -2303,28 +2329,45 @@ VOID_copyswap (char *dst, char *src, int swap, PyArrayObject *arr)
         ((PyArrayObject_fields *)arr)->descr = descr;
         return;
     }
-    if (swap && PyArray_DESCR(arr)->subarray != NULL) {
-        PyArray_Descr *descr, *new;
+    if (PyDataType_HASSUBARRAY(descr)) {
+        PyArray_Descr *new;
         npy_intp num;
-        int itemsize;
+        int subitemsize;
+        /*
+         * In certain cases subarray copy can be optimized. This is when
+         * swapping is unecessary and the subarrays data type can certainly
+         * be simply copied (no object, fields, subarray, and not a user dtype).
+         */
+        npy_bool can_optimize_subarray = (!swap &&
+                !PyDataType_HASFIELDS(descr->subarray->base) &&
+                !PyDataType_HASSUBARRAY(descr->subarray->base) &&
+                !PyDataType_REFCHK(descr->subarray->base) &&
+                (descr->subarray->base->type_num < NPY_NTYPES));
+
+        if (can_optimize_subarray) {
+            _basic_copy(dst, src, descr->elsize);
+            return;
+        }
 
-        descr = PyArray_DESCR(arr);
         new = descr->subarray->base;
         /*
          * TODO: temporarily modifying the array like this
          *       is bad coding style, should be changed.
          */
         ((PyArrayObject_fields *)arr)->descr = new;
-        itemsize = new->elsize;
-        num = descr->elsize / itemsize;
-        new->f->copyswapn(dst, itemsize, src,
-                itemsize, num, swap, arr);
+        subitemsize = new->elsize;
+        if (subitemsize == 0) {
+            /* There cannot be any elements, so return */
+            return;
+        }
+        num = descr->elsize / subitemsize;
+        new->f->copyswapn(dst, subitemsize, src,
+                subitemsize, num, swap, arr);
         ((PyArrayObject_fields *)arr)->descr = descr;
         return;
     }
-
-    /* copy first if needed */
-    _basic_copy(dst, src, PyArray_DESCR(arr)->elsize);
+    /* Must be a naive Void type (e.g. a "V8") so simple copy is sufficient. */
+    _basic_copy(dst, src, descr->elsize);
     return;
 }
 
diff --git a/numpy/core/src/multiarray/buffer.c b/numpy/core/src/multiarray/buffer.c
index 2f66d7f2f..d8ad80266 100644
--- a/numpy/core/src/multiarray/buffer.c
+++ b/numpy/core/src/multiarray/buffer.c
@@ -509,6 +509,10 @@ _buffer_info_new(PyObject *obj)
     PyArray_Descr *descr = NULL;
     int err = 0;
 
+    /*
+     * Note that the buffer info is cached as pyints making them appear like
+     * unreachable lost memory to valgrind.
+     */
     info = malloc(sizeof(_buffer_info_t));
     if (info == NULL) {
         PyErr_NoMemory();
@@ -579,9 +583,11 @@ _buffer_info_new(PyObject *obj)
     err = _buffer_format_string(descr, &fmt, obj, NULL, NULL);
     Py_DECREF(descr);
     if (err != 0) {
+        free(info->shape);
         goto fail;
     }
     if (_append_char(&fmt, '\0') < 0) {
+        free(info->shape);
         goto fail;
     }
     info->format = fmt.s;
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index 2e51cee7e..addb67732 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -164,7 +164,7 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
 
             if (string_type == NPY_STRING) {
                 if ((temp = PyObject_Str(obj)) == NULL) {
-                    return -1;
+                    goto fail;
                 }
 #if defined(NPY_PY3K)
     #if PY_VERSION_HEX >= 0x03030000
@@ -182,7 +182,7 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
 #else
                 if ((temp = PyObject_Unicode(obj)) == NULL) {
 #endif
-                    return -1;
+                    goto fail;
                 }
                 itemsize = PyUnicode_GET_DATA_SIZE(temp);
 #ifndef Py_UNICODE_WIDE
@@ -216,7 +216,7 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
 
             if (string_type == NPY_STRING) {
                 if ((temp = PyObject_Str(obj)) == NULL) {
-                    return -1;
+                    goto fail;
                 }
 #if defined(NPY_PY3K)
     #if PY_VERSION_HEX >= 0x03030000
@@ -234,7 +234,7 @@ PyArray_DTypeFromObjectHelper(PyObject *obj, int maxdims,
 #else
                 if ((temp = PyObject_Unicode(obj)) == NULL) {
 #endif
-                    return -1;
+                    goto fail;
                 }
                 itemsize = PyUnicode_GET_DATA_SIZE(temp);
 #ifndef Py_UNICODE_WIDE
@@ -511,7 +511,7 @@ promote_types:
         PyArray_Descr *res_dtype = PyArray_PromoteTypes(dtype, *out_dtype);
         Py_DECREF(dtype);
         if (res_dtype == NULL) {
-            return -1;
+            goto fail;
         }
         if (!string_type &&
                 res_dtype->type_num == NPY_UNICODE &&
diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c
index 10e3478e2..625028bfb 100644
--- a/numpy/core/src/multiarray/compiled_base.c
+++ b/numpy/core/src/multiarray/compiled_base.c
@@ -328,6 +328,7 @@ arr_insert(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwdict)
         } else {
             Py_XDECREF(values);
             Py_XDECREF(mask);
+            PyArray_ResolveWritebackIfCopy(array);
             Py_XDECREF(array);
             Py_RETURN_NONE;
         }
@@ -358,6 +359,7 @@ arr_insert(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwdict)
 
  fail:
     Py_XDECREF(mask);
+    PyArray_ResolveWritebackIfCopy(array);
     Py_XDECREF(array);
     Py_XDECREF(values);
     return NULL;
@@ -1575,6 +1577,7 @@ pack_bits(PyObject *input, int axis)
     if (!PyArray_ISBOOL(inp) && !PyArray_ISINTEGER(inp)) {
         PyErr_SetString(PyExc_TypeError,
                 "Expected an input array of integer or boolean data type");
+        Py_DECREF(inp);
         goto fail;
     }
 
@@ -1682,6 +1685,7 @@ unpack_bits(PyObject *input, int axis)
     if (PyArray_TYPE(inp) != NPY_UBYTE) {
         PyErr_SetString(PyExc_TypeError,
                 "Expected an input array of unsigned byte data type");
+        Py_DECREF(inp);
         goto fail;
     }
 
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index a17621946..b9059ba4d 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -1410,6 +1410,7 @@ _array_from_buffer_3118(PyObject *memoryview)
          * dimensions, so the array is now 0d.
          */
         nd = 0;
+        Py_DECREF(descr);
         descr = (PyArray_Descr *)PyObject_CallFunctionObjArgs(
                 (PyObject *)&PyArrayDescr_Type, Py_TYPE(view->obj), NULL);
         if (descr == NULL) {
@@ -2128,12 +2129,15 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags)
              */
 
             /* 2017-Nov-10 1.14 */
-            if (DEPRECATE("NPY_ARRAY_UPDATEIFCOPY, NPY_ARRAY_INOUT_ARRAY, and "
-                "NPY_ARRAY_INOUT_FARRAY are deprecated, use NPY_WRITEBACKIFCOPY, "
-                "NPY_ARRAY_INOUT_ARRAY2, or NPY_ARRAY_INOUT_FARRAY2 respectively "
-                "instead, and call PyArray_ResolveWritebackIfCopy before the "
-                "array is deallocated, i.e. before the last call to Py_DECREF.") < 0)
+            if (DEPRECATE(
+                    "NPY_ARRAY_UPDATEIFCOPY, NPY_ARRAY_INOUT_ARRAY, and "
+                    "NPY_ARRAY_INOUT_FARRAY are deprecated, use NPY_WRITEBACKIFCOPY, "
+                    "NPY_ARRAY_INOUT_ARRAY2, or NPY_ARRAY_INOUT_FARRAY2 respectively "
+                    "instead, and call PyArray_ResolveWritebackIfCopy before the "
+                    "array is deallocated, i.e. before the last call to Py_DECREF.") < 0) {
+                Py_DECREF(ret);
                 return NULL;
+            }
             Py_INCREF(arr);
             if (PyArray_SetWritebackIfCopyBase(ret, arr) < 0) {
                 Py_DECREF(ret);
@@ -2160,14 +2164,12 @@ PyArray_FromArray(PyArrayObject *arr, PyArray_Descr *newtype, int flags)
 
         Py_DECREF(newtype);
         if (needview) {
-            PyArray_Descr *dtype = PyArray_DESCR(arr);
             PyTypeObject *subtype = NULL;
 
             if (flags & NPY_ARRAY_ENSUREARRAY) {
                 subtype = &PyArray_Type;
             }
 
-            Py_INCREF(dtype);
             ret = (PyArrayObject *)PyArray_View(arr, NULL, subtype);
             if (ret == NULL) {
                 return NULL;
@@ -2495,6 +2497,11 @@ PyArray_FromInterface(PyObject *origin)
             &PyArray_Type, dtype,
             n, dims, NULL, data,
             dataflags, NULL, base);
+    /*
+     * Ref to dtype was stolen by PyArray_NewFromDescrAndBase
+     * Prevent DECREFing dtype in fail codepath by setting to NULL
+     */
+    dtype = NULL;
     if (ret == NULL) {
         goto fail;
     }
diff --git a/numpy/core/src/multiarray/datetime.c b/numpy/core/src/multiarray/datetime.c
index a8550d958..54d19d993 100644
--- a/numpy/core/src/multiarray/datetime.c
+++ b/numpy/core/src/multiarray/datetime.c
@@ -3822,18 +3822,26 @@ recursive_find_object_timedelta64_type(PyObject *obj,
                  * single object using [()], but not by using
                  * __getitem__(integer) approaches
                  */
-                PyObject *item, *meth, *args;
+                PyObject *item, *args;
 
-                meth = PyObject_GetAttrString(obj, "__getitem__");
-                args = Py_BuildValue("(())");
-                item = PyObject_CallObject(meth, args);
+                args = PyTuple_New(0);
+                if (args == NULL) {
+                    return 0;
+                }
+                item = PyObject_GetItem(obj, args);
+                Py_DECREF(args);
+                if (item == NULL) {
+                    return 0;
+                }
                 /*
                  * NOTE: may need other type checks here in the future
                  * for expanded 0 D datetime array conversions?
                  */
                 if (PyDelta_Check(item)) {
+                    Py_DECREF(item);
                     return delta_checker(meta);
                 }
+                Py_DECREF(item);
             }
         }
     }
diff --git a/numpy/core/src/multiarray/descriptor.c b/numpy/core/src/multiarray/descriptor.c
index 3038e4dea..0471a2a3e 100644
--- a/numpy/core/src/multiarray/descriptor.c
+++ b/numpy/core/src/multiarray/descriptor.c
@@ -515,6 +515,7 @@ _convert_from_array_descr(PyObject *obj, int align)
 #if defined(NPY_PY3K)
             Py_DECREF(name);
 #endif
+            Py_DECREF(conv);
             goto fail;
         }
         dtypeflags |= (conv->flags & NPY_FROM_FIELDS);
@@ -837,9 +838,11 @@ _use_inherit(PyArray_Descr *type, PyObject *newobj, int *errflag)
     else if (new->elsize != conv->elsize) {
         PyErr_SetString(PyExc_ValueError,
                 "mismatch in size of old and new data-descriptor");
+        Py_DECREF(new);
         goto fail;
     }
     else if (invalid_union_object_dtype(new, conv)) {
+        Py_DECREF(new);
         goto fail;
     }
 
@@ -1728,6 +1731,7 @@ PyArray_DescrNew(PyArray_Descr *base)
         newdescr->c_metadata = NPY_AUXDATA_CLONE(base->c_metadata);
         if (newdescr->c_metadata == NULL) {
             PyErr_NoMemory();
+            /* TODO: This seems wrong, as the old fields get decref'd? */
             Py_DECREF(newdescr);
             return NULL;
         }
@@ -3336,12 +3340,15 @@ static PyObject *
 _subscript_by_index(PyArray_Descr *self, Py_ssize_t i)
 {
     PyObject *name = PySequence_GetItem(self->names, i);
+    PyObject *ret;
     if (name == NULL) {
         PyErr_Format(PyExc_IndexError,
                      "Field index %zd out of range.", i);
         return NULL;
     }
-    return _subscript_by_name(self, name);
+    ret = _subscript_by_name(self, name);
+    Py_DECREF(name);
+    return ret;
 }
 
 static PyObject *
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c
index 63b1ead25..6347d35eb 100644
--- a/numpy/core/src/multiarray/dtype_transfer.c
+++ b/numpy/core/src/multiarray/dtype_transfer.c
@@ -1572,12 +1572,30 @@ get_cast_transfer_function(int aligned,
                                 src_dtype,
                                 &tobuffer, &todata);
 
-
-        /* Get the copy/swap operation to dst */
-        PyArray_GetDTypeCopySwapFn(aligned,
-                                dst_itemsize, dst_stride,
-                                dst_dtype,
-                                &frombuffer, &fromdata);
+        if (!PyDataType_REFCHK(dst_dtype)) {
+            /* Copying from buffer is a simple copy/swap operation */
+            PyArray_GetDTypeCopySwapFn(aligned,
+                                    dst_itemsize, dst_stride,
+                                    dst_dtype,
+                                    &frombuffer, &fromdata);
+        }
+        else {
+            /*
+             * Since the buffer is initialized to NULL, need to move the
+             * references in order to DECREF the existing data.
+             */
+             /* Object types cannot be byte swapped */
+            assert(PyDataType_ISNOTSWAPPED(dst_dtype));
+            /* The loop already needs the python api if this is reached */
+            assert(*out_needs_api);
+
+            if (PyArray_GetDTypeTransferFunction(
+                    aligned, dst_itemsize, dst_stride,
+                    dst_dtype, dst_dtype, 1,
+                    &frombuffer, &fromdata, out_needs_api) != NPY_SUCCEED) {
+                return NPY_FAIL;
+            }
+        }
 
         if (frombuffer == NULL || tobuffer == NULL) {
             NPY_AUXDATA_FREE(castdata);
@@ -2001,6 +2019,7 @@ typedef struct {
     _subarray_broadcast_offsetrun offsetruns;
 } _subarray_broadcast_data;
 
+
 /* transfer data free function */
 static void _subarray_broadcast_data_free(NpyAuxData *data)
 {
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index 7c814e6e6..2fcc2ec22 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -8,6 +8,7 @@
 #include "numpy/arrayobject.h"
 #include "numpy/arrayscalars.h"
 
+#include "arrayfunction_override.h"
 #include "npy_config.h"
 #include "npy_pycompat.h"
 #include "npy_import.h"
@@ -1088,13 +1089,29 @@ cleanup:
     return result;
 }
 
-
 static PyObject *
-array_function(PyArrayObject *self, PyObject *args, PyObject *kwds)
+array_function(PyArrayObject *self, PyObject *c_args, PyObject *c_kwds)
 {
-    NPY_FORWARD_NDARRAY_METHOD("_array_function");
-}
+    PyObject *func, *types, *args, *kwargs, *result;
+    static char *kwlist[] = {"func", "types", "args", "kwargs", NULL};
+
+    if (!PyArg_ParseTupleAndKeywords(
+            c_args, c_kwds, "OOOO:__array_function__", kwlist,
+            &func, &types, &args, &kwargs)) {
+        return NULL;
+    }
 
+    types = PySequence_Fast(
+        types,
+        "types argument to ndarray.__array_function__ must be iterable");
+    if (types == NULL) {
+        return NULL;
+    }
+
+    result = array_function_method_impl(func, types, args, kwargs);
+    Py_DECREF(types);
+    return result;
+}
 
 static PyObject *
 array_copy(PyArrayObject *self, PyObject *args, PyObject *kwds)
@@ -1364,6 +1381,7 @@ array_argsort(PyArrayObject *self, PyObject *args, PyObject *kwds)
             return NULL;
         }
         newd = PyArray_DescrNew(saved);
+        Py_DECREF(newd->names);
         newd->names = new_name;
         ((PyArrayObject_fields *)self)->descr = newd;
     }
@@ -1418,6 +1436,7 @@ array_argpartition(PyArrayObject *self, PyObject *args, PyObject *kwds)
             return NULL;
         }
         newd = PyArray_DescrNew(saved);
+        Py_DECREF(newd->names);
         newd->names = new_name;
         ((PyArrayObject_fields *)self)->descr = newd;
     }
@@ -1521,7 +1540,6 @@ array_deepcopy(PyArrayObject *self, PyObject *args)
         copy = PyImport_ImportModule("copy");
         if (copy == NULL) {
             Py_DECREF(copied_array);
-            Py_DECREF(copy);
             return NULL;
         }
         deepcopy = PyObject_GetAttrString(copy, "deepcopy");
@@ -1704,129 +1722,150 @@ array_reduce(PyArrayObject *self, PyObject *NPY_UNUSED(args))
 }
 
 static PyObject *
-array_reduce_ex(PyArrayObject *self, PyObject *args)
+array_reduce_ex_regular(PyArrayObject *self, int protocol)
 {
-    int protocol;
-    PyObject *ret = NULL, *numeric_mod = NULL, *from_buffer_func = NULL;
-    PyObject *buffer_tuple = NULL, *pickle_module = NULL, *pickle_class = NULL;
-    PyObject *class_args = NULL, *class_args_tuple = NULL, *unused = NULL;
     PyObject *subclass_array_reduce = NULL;
+    PyObject *ret;
+
+    /* We do not call array_reduce directly but instead lookup and call
+     * the __reduce__ method to make sure that it's possible to customize
+     * pickling in sub-classes. */
+    subclass_array_reduce = PyObject_GetAttrString((PyObject *)self,
+                                                   "__reduce__");
+    if (subclass_array_reduce == NULL) {
+        return NULL;
+    }
+    ret = PyObject_CallObject(subclass_array_reduce, NULL);
+    Py_DECREF(subclass_array_reduce);
+    return ret;
+}
+
+static PyObject *
+array_reduce_ex_picklebuffer(PyArrayObject *self, int protocol)
+{
+    PyObject *numeric_mod = NULL, *from_buffer_func = NULL;
+    PyObject *pickle_module = NULL, *picklebuf_class = NULL;
+    PyObject *picklebuf_args = NULL;
     PyObject *buffer = NULL, *transposed_array = NULL;
     PyArray_Descr *descr = NULL;
     char order;
 
-    if (PyArg_ParseTuple(args, "i", &protocol)){
-        descr = PyArray_DESCR(self);
-        if ((protocol < 5) ||
-            (!PyArray_IS_C_CONTIGUOUS((PyArrayObject*)self) &&
-             !PyArray_IS_F_CONTIGUOUS((PyArrayObject*)self)) ||
-            PyDataType_FLAGCHK(descr, NPY_ITEM_HASOBJECT) ||
-            (PyType_IsSubtype(((PyObject*)self)->ob_type, &PyArray_Type) &&
-             ((PyObject*)self)->ob_type != &PyArray_Type) ||
-            PyDataType_ISUNSIZED(descr)) {
-            /* The PickleBuffer class from version 5 of the pickle protocol
-             * can only be used for arrays backed by a contiguous data buffer.
-             * For all other cases we fallback to the generic array_reduce
-             * method that involves using a temporary bytes allocation. However
-             * we do not call array_reduce directly but instead lookup and call
-             * the __reduce__ method to make sure that it's possible customize
-             * pickling in sub-classes. */
-            subclass_array_reduce = PyObject_GetAttrString((PyObject *)self,
-                                                           "__reduce__");
-            return PyObject_CallObject(subclass_array_reduce, unused);
-        }
-        else if (protocol == 5){
-            ret = PyTuple_New(2);
-
-            if (ret == NULL) {
-                return NULL;
-            }
+    descr = PyArray_DESCR(self);
 
-            /* if the python version is below 3.8, the pickle module does not provide
-             * built-in support for protocol 5. We try importing the pickle5
-             * backport instead */
+    /* if the python version is below 3.8, the pickle module does not provide
+     * built-in support for protocol 5. We try importing the pickle5
+     * backport instead */
 #if PY_VERSION_HEX >= 0x03080000
-            pickle_module = PyImport_ImportModule("pickle");
-#elif PY_VERSION_HEX < 0x03080000 && PY_VERSION_HEX >= 0x03060000
-            pickle_module = PyImport_ImportModule("pickle5");
-            if (pickle_module == NULL){
-                /* for protocol 5, raise a clear ImportError if pickle5 is not found
-                 */
-                PyErr_SetString(PyExc_ImportError, "Using pickle protocol 5 "
-                        "requires the pickle5 module for python versions >=3.6 "
-                        "and <3.8");
-                return NULL;
-            }
+    /* we expect protocol 5 to be available in Python 3.8 */
+    pickle_module = PyImport_ImportModule("pickle");
+#elif PY_VERSION_HEX >= 0x03060000
+    pickle_module = PyImport_ImportModule("pickle5");
+    if (pickle_module == NULL) {
+        /* for protocol 5, raise a clear ImportError if pickle5 is not found
+         */
+        PyErr_SetString(PyExc_ImportError, "Using pickle protocol 5 "
+                "requires the pickle5 module for Python >=3.6 and <3.8");
+        return NULL;
+    }
 #else
-            PyErr_SetString(PyExc_ValueError, "pickle protocol 5 is not available "
-                                               "for python versions < 3.6");
-            return NULL;
+    PyErr_SetString(PyExc_ValueError, "pickle protocol 5 is not available "
+                                      "for Python < 3.6");
+    return NULL;
 #endif
-            if (pickle_module == NULL){
-                return NULL;
-            }
-
-            pickle_class = PyObject_GetAttrString(pickle_module,
-                                                  "PickleBuffer");
+    if (pickle_module == NULL){
+        return NULL;
+    }
+    picklebuf_class = PyObject_GetAttrString(pickle_module, "PickleBuffer");
+    Py_DECREF(pickle_module);
+    if (picklebuf_class == NULL) {
+        return NULL;
+    }
 
-            class_args_tuple = PyTuple_New(1);
-            if (!PyArray_IS_C_CONTIGUOUS((PyArrayObject*)self) &&
-                PyArray_IS_F_CONTIGUOUS((PyArrayObject*)self)){
+    /* Construct a PickleBuffer of the array */
 
-                /* if the array if Fortran-contiguous and not C-contiguous,
-                 * the PickleBuffer instance will hold a view on the transpose
-                 * of the initial array, that is C-contiguous. */
-                order = 'F';
-                transposed_array = PyArray_Transpose((PyArrayObject*)self, NULL);
-                PyTuple_SET_ITEM(class_args_tuple, 0, transposed_array);
-            }
-            else {
-                order = 'C';
-                PyTuple_SET_ITEM(class_args_tuple, 0, (PyObject *)self);
-                Py_INCREF(self);
-            }
+    if (!PyArray_IS_C_CONTIGUOUS((PyArrayObject*) self) &&
+         PyArray_IS_F_CONTIGUOUS((PyArrayObject*) self)) {
+        /* if the array if Fortran-contiguous and not C-contiguous,
+         * the PickleBuffer instance will hold a view on the transpose
+         * of the initial array, that is C-contiguous. */
+        order = 'F';
+        transposed_array = PyArray_Transpose((PyArrayObject*)self, NULL);
+        picklebuf_args = Py_BuildValue("(N)", transposed_array);
+    }
+    else {
+        order = 'C';
+        picklebuf_args = Py_BuildValue("(O)", self);
+    }
+    if (picklebuf_args == NULL) {
+        Py_DECREF(picklebuf_class);
+        return NULL;
+    }
 
-            class_args = Py_BuildValue("O", class_args_tuple);
+    buffer = PyObject_CallObject(picklebuf_class, picklebuf_args);
+    Py_DECREF(picklebuf_class);
+    Py_DECREF(picklebuf_args);
+    if (buffer == NULL) {
+        /* Some arrays may refuse to export a buffer, in which case
+         * just fall back on regular __reduce_ex__ implementation
+         * (gh-12745).
+         */
+        PyErr_Clear();
+        return array_reduce_ex_regular(self, protocol);
+    }
 
-            buffer = PyObject_CallObject(pickle_class, class_args);
+    /* Get the _frombuffer() function for reconstruction */
 
-            numeric_mod = PyImport_ImportModule("numpy.core.numeric");
-            if (numeric_mod == NULL) {
-                Py_DECREF(ret);
-                return NULL;
-            }
-            from_buffer_func = PyObject_GetAttrString(numeric_mod,
-                                                      "_frombuffer");
-            Py_DECREF(numeric_mod);
+    numeric_mod = PyImport_ImportModule("numpy.core.numeric");
+    if (numeric_mod == NULL) {
+        Py_DECREF(buffer);
+        return NULL;
+    }
+    from_buffer_func = PyObject_GetAttrString(numeric_mod,
+                                              "_frombuffer");
+    Py_DECREF(numeric_mod);
+    if (from_buffer_func == NULL) {
+        Py_DECREF(buffer);
+        return NULL;
+    }
 
-            Py_INCREF(descr);
+    return Py_BuildValue("N(NONN)",
+                         from_buffer_func, buffer, (PyObject *)descr,
+                         PyObject_GetAttrString((PyObject *)self, "shape"),
+                         PyUnicode_FromStringAndSize(&order, 1));
+}
 
-            buffer_tuple = PyTuple_New(4);
-            PyTuple_SET_ITEM(buffer_tuple, 0, buffer);
-            PyTuple_SET_ITEM(buffer_tuple, 1, (PyObject *)descr);
-            PyTuple_SET_ITEM(buffer_tuple, 2,
-                             PyObject_GetAttrString((PyObject *)self,
-                                                    "shape"));
-            PyTuple_SET_ITEM(buffer_tuple, 3,
-                             PyUnicode_FromStringAndSize(&order,
-                                                         (Py_ssize_t)1));
+static PyObject *
+array_reduce_ex(PyArrayObject *self, PyObject *args)
+{
+    int protocol;
+    PyArray_Descr *descr = NULL;
 
-            PyTuple_SET_ITEM(ret, 0, from_buffer_func);
-            PyTuple_SET_ITEM(ret, 1, buffer_tuple);
+    if (!PyArg_ParseTuple(args, "i", &protocol)) {
+        return NULL;
+    }
 
-            return ret;
-        }
-        else {
-            PyErr_Format(PyExc_ValueError,
-                         "cannot call __reduce_ex__ with protocol >= %d",
-                         5);
-            return NULL;
-        }
+    descr = PyArray_DESCR(self);
+    if ((protocol < 5) ||
+        (!PyArray_IS_C_CONTIGUOUS((PyArrayObject*)self) &&
+         !PyArray_IS_F_CONTIGUOUS((PyArrayObject*)self)) ||
+        PyDataType_FLAGCHK(descr, NPY_ITEM_HASOBJECT) ||
+        (PyType_IsSubtype(((PyObject*)self)->ob_type, &PyArray_Type) &&
+         ((PyObject*)self)->ob_type != &PyArray_Type) ||
+        PyDataType_ISUNSIZED(descr)) {
+        /* The PickleBuffer class from version 5 of the pickle protocol
+         * can only be used for arrays backed by a contiguous data buffer.
+         * For all other cases we fallback to the generic array_reduce
+         * method that involves using a temporary bytes allocation. */
+        return array_reduce_ex_regular(self, protocol);
+    }
+    else if (protocol == 5) {
+        return array_reduce_ex_picklebuffer(self, protocol);
     }
     else {
+        PyErr_Format(PyExc_ValueError,
+                     "__reduce_ex__ called with protocol > 5");
         return NULL;
     }
-
 }
 
 static PyObject *
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index 8135769d9..166533b3f 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -34,6 +34,7 @@
 NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0;
 
 /* Internal APIs */
+#include "arrayfunction_override.h"
 #include "arraytypes.h"
 #include "arrayobject.h"
 #include "hashdescr.h"
@@ -3254,6 +3255,7 @@ array_datetime_data(PyObject *NPY_UNUSED(dummy), PyObject *args)
     }
 
     meta = get_datetime_metadata_from_dtype(dtype);
+    Py_DECREF(dtype);    
     if (meta == NULL) {
         return NULL;
     }
@@ -3618,6 +3620,7 @@ _vec_string_with_args(PyArrayObject* char_array, PyArray_Descr* type,
     if (nargs == -1 || nargs > NPY_MAXARGS) {
         PyErr_Format(PyExc_ValueError,
                 "len(args) must be < %d", NPY_MAXARGS - 1);
+        Py_DECREF(type);
         goto err;
     }
 
@@ -3625,6 +3628,7 @@ _vec_string_with_args(PyArrayObject* char_array, PyArray_Descr* type,
     for (i = 1; i < nargs; i++) {
         PyObject* item = PySequence_GetItem(args, i-1);
         if (item == NULL) {
+            Py_DECREF(type);
             goto err;
         }
         broadcast_args[i] = item;
@@ -3633,6 +3637,7 @@ _vec_string_with_args(PyArrayObject* char_array, PyArray_Descr* type,
     in_iter = (PyArrayMultiIterObject*)PyArray_MultiIterFromObjects
         (broadcast_args, nargs, 0);
     if (in_iter == NULL) {
+        Py_DECREF(type);
         goto err;
     }
     n = in_iter->numiter;
@@ -3713,6 +3718,7 @@ _vec_string_no_args(PyArrayObject* char_array,
 
     in_iter = (PyArrayIterObject*)PyArray_IterNew((PyObject*)char_array);
     if (in_iter == NULL) {
+        Py_DECREF(type);
         goto err;
     }
 
@@ -3769,7 +3775,7 @@ static PyObject *
 _vec_string(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds)
 {
     PyArrayObject* char_array = NULL;
-    PyArray_Descr *type = NULL;
+    PyArray_Descr *type;
     PyObject* method_name;
     PyObject* args_seq = NULL;
 
@@ -3806,6 +3812,7 @@ _vec_string(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds)
         result = _vec_string_with_args(char_array, type, method, args_seq);
     }
     else {
+        Py_DECREF(type);
         PyErr_SetString(PyExc_TypeError,
                 "'args' must be a sequence of arguments");
         goto err;
@@ -4062,6 +4069,9 @@ normalize_axis_index(PyObject *NPY_UNUSED(self), PyObject *args, PyObject *kwds)
 }
 
 static struct PyMethodDef array_module_methods[] = {
+    {"_get_implementing_args",
+        (PyCFunction)array__get_implementing_args,
+        METH_VARARGS, NULL},
     {"_get_ndarray_c_version",
         (PyCFunction)array__get_ndarray_c_version,
         METH_VARARGS|METH_KEYWORDS, NULL},
@@ -4224,6 +4234,9 @@ static struct PyMethodDef array_module_methods[] = {
         METH_VARARGS | METH_KEYWORDS, NULL},
     {"_monotonicity", (PyCFunction)arr__monotonicity,
         METH_VARARGS | METH_KEYWORDS, NULL},
+    {"implement_array_function",
+        (PyCFunction)array_implement_array_function,
+        METH_VARARGS, NULL},
     {"interp", (PyCFunction)arr_interp,
         METH_VARARGS | METH_KEYWORDS, NULL},
     {"interp_complex", (PyCFunction)arr_interp_complex,
@@ -4476,6 +4489,7 @@ NPY_VISIBILITY_HIDDEN PyObject * npy_ma_str_array_wrap = NULL;
 NPY_VISIBILITY_HIDDEN PyObject * npy_ma_str_array_finalize = NULL;
 NPY_VISIBILITY_HIDDEN PyObject * npy_ma_str_buffer = NULL;
 NPY_VISIBILITY_HIDDEN PyObject * npy_ma_str_ufunc = NULL;
+NPY_VISIBILITY_HIDDEN PyObject * npy_ma_str_wrapped = NULL;
 NPY_VISIBILITY_HIDDEN PyObject * npy_ma_str_order = NULL;
 NPY_VISIBILITY_HIDDEN PyObject * npy_ma_str_copy = NULL;
 NPY_VISIBILITY_HIDDEN PyObject * npy_ma_str_dtype = NULL;
@@ -4492,6 +4506,7 @@ intern_strings(void)
     npy_ma_str_array_finalize = PyUString_InternFromString("__array_finalize__");
     npy_ma_str_buffer = PyUString_InternFromString("__buffer__");
     npy_ma_str_ufunc = PyUString_InternFromString("__array_ufunc__");
+    npy_ma_str_wrapped = PyUString_InternFromString("__wrapped__");
     npy_ma_str_order = PyUString_InternFromString("order");
     npy_ma_str_copy = PyUString_InternFromString("copy");
     npy_ma_str_dtype = PyUString_InternFromString("dtype");
@@ -4501,7 +4516,7 @@ intern_strings(void)
 
     return npy_ma_str_array && npy_ma_str_array_prepare &&
            npy_ma_str_array_wrap && npy_ma_str_array_finalize &&
-           npy_ma_str_buffer && npy_ma_str_ufunc &&
+           npy_ma_str_buffer && npy_ma_str_ufunc && npy_ma_str_wrapped &&
            npy_ma_str_order && npy_ma_str_copy && npy_ma_str_dtype &&
            npy_ma_str_ndmin && npy_ma_str_axis1 && npy_ma_str_axis2;
 }
diff --git a/numpy/core/src/multiarray/multiarraymodule.h b/numpy/core/src/multiarray/multiarraymodule.h
index 3de68c549..60a3965c9 100644
--- a/numpy/core/src/multiarray/multiarraymodule.h
+++ b/numpy/core/src/multiarray/multiarraymodule.h
@@ -7,6 +7,7 @@ NPY_VISIBILITY_HIDDEN extern PyObject * npy_ma_str_array_wrap;
 NPY_VISIBILITY_HIDDEN extern PyObject * npy_ma_str_array_finalize;
 NPY_VISIBILITY_HIDDEN extern PyObject * npy_ma_str_buffer;
 NPY_VISIBILITY_HIDDEN extern PyObject * npy_ma_str_ufunc;
+NPY_VISIBILITY_HIDDEN extern PyObject * npy_ma_str_wrapped;
 NPY_VISIBILITY_HIDDEN extern PyObject * npy_ma_str_order;
 NPY_VISIBILITY_HIDDEN extern PyObject * npy_ma_str_copy;
 NPY_VISIBILITY_HIDDEN extern PyObject * npy_ma_str_dtype;
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index 90cff4077..18a2cc84f 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -1248,9 +1248,9 @@ npyiter_prepare_operands(int nop, PyArrayObject **op_in,
     return 1;
 
   fail_nop:
-    iop = nop;
+    iop = nop - 1;
   fail_iop:
-    for (i = 0; i < iop; ++i) {
+    for (i = 0; i < iop+1; ++i) {
         Py_XDECREF(op[i]);
         Py_XDECREF(op_dtype[i]);
     }
@@ -3175,6 +3175,7 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
                                         &stransfer,
                                         &transferdata,
                                         &needs_api) != NPY_SUCCEED) {
+                    iop -= 1;  /* This one cannot be cleaned up yet. */
                     goto fail;
                 }
                 readtransferfn[iop] = stransfer;
@@ -3268,7 +3269,7 @@ npyiter_allocate_transfer_functions(NpyIter *iter)
     return 1;
 
 fail:
-    for (i = 0; i < iop; ++i) {
+    for (i = 0; i < iop+1; ++i) {
         if (readtransferdata[iop] != NULL) {
             NPY_AUXDATA_FREE(readtransferdata[iop]);
             readtransferdata[iop] = NULL;
diff --git a/numpy/core/src/multiarray/nditer_pywrap.c b/numpy/core/src/multiarray/nditer_pywrap.c
index 5a9f3c5fa..30a81e0ca 100644
--- a/numpy/core/src/multiarray/nditer_pywrap.c
+++ b/numpy/core/src/multiarray/nditer_pywrap.c
@@ -2355,6 +2355,8 @@ npyiter_close(NewNpyArrayIterObject *self)
     }
     ret = NpyIter_Deallocate(iter);
     self->iter = NULL;
+    Py_XDECREF(self->nested_child);
+    self->nested_child = NULL;
     if (ret < 0) {
         return NULL;
     }
diff --git a/numpy/core/src/multiarray/number.c b/numpy/core/src/multiarray/number.c
index d153a8a64..420501ce2 100644
--- a/numpy/core/src/multiarray/number.c
+++ b/numpy/core/src/multiarray/number.c
@@ -599,15 +599,16 @@ array_positive(PyArrayObject *m1)
             PyErr_Restore(exc, val, tb);
             return NULL;
         }
+        Py_XDECREF(exc);
+        Py_XDECREF(val);
+        Py_XDECREF(tb);
+
         /* 2018-06-28, 1.16.0 */
         if (DEPRECATE("Applying '+' to a non-numerical array is "
                       "ill-defined. Returning a copy, but in the future "
                       "this will error.") < 0) {
             return NULL;
         }
-        Py_XDECREF(exc);
-        Py_XDECREF(val);
-        Py_XDECREF(tb);
         value = PyArray_Return((PyArrayObject *)PyArray_Copy(m1));
     }
     return value;
diff --git a/numpy/core/src/multiarray/refcount.c b/numpy/core/src/multiarray/refcount.c
index 4b018b056..b8230c81a 100644
--- a/numpy/core/src/multiarray/refcount.c
+++ b/numpy/core/src/multiarray/refcount.c
@@ -19,8 +19,12 @@
 static void
 _fillobject(char *optr, PyObject *obj, PyArray_Descr *dtype);
 
-/* Incref all objects found at this record */
+
 /*NUMPY_API
+ * XINCREF all objects in a single array item. This is complicated for
+ * structured datatypes where the position of objects needs to be extracted.
+ * The function is execute recursively for each nested field or subarrays dtype
+ * such as as `np.dtype([("field1", "O"), ("field2", "f,O", (3,2))])`
  */
 NPY_NO_EXPORT void
 PyArray_Item_INCREF(char *data, PyArray_Descr *descr)
@@ -51,11 +55,37 @@ PyArray_Item_INCREF(char *data, PyArray_Descr *descr)
             PyArray_Item_INCREF(data + offset, new);
         }
     }
+    else if (PyDataType_HASSUBARRAY(descr)) {
+        int size, i, inner_elsize;
+
+        inner_elsize = descr->subarray->base->elsize;
+        if (inner_elsize == 0) {
+            /* There cannot be any elements, so return */
+            return;
+        }
+        /* Subarrays are always contiguous in memory */
+        size = descr->elsize / inner_elsize;
+
+        for (i = 0; i < size; i++){
+            /* Recursively increment the reference count of subarray elements */
+            PyArray_Item_INCREF(data + i * inner_elsize,
+                                descr->subarray->base);
+        }
+    }
+    else {
+        /* This path should not be reachable. */
+        assert(0);
+    }
     return;
 }
 
-/* XDECREF all objects found at this record */
+
 /*NUMPY_API
+ *
+ * XDECREF all objects in a single array item. This is complicated for
+ * structured datatypes where the position of objects needs to be extracted.
+ * The function is execute recursively for each nested field or subarrays dtype
+ * such as as `np.dtype([("field1", "O"), ("field2", "f,O", (3,2))])`
  */
 NPY_NO_EXPORT void
 PyArray_Item_XDECREF(char *data, PyArray_Descr *descr)
@@ -87,6 +117,27 @@ PyArray_Item_XDECREF(char *data, PyArray_Descr *descr)
                 PyArray_Item_XDECREF(data + offset, new);
             }
         }
+    else if (PyDataType_HASSUBARRAY(descr)) {
+        int size, i, inner_elsize;
+
+        inner_elsize = descr->subarray->base->elsize;
+        if (inner_elsize == 0) {
+            /* There cannot be any elements, so return */
+            return;
+        }
+        /* Subarrays are always contiguous in memory */
+        size = descr->elsize / inner_elsize;
+
+        for (i = 0; i < size; i++){
+            /* Recursively decrement the reference count of subarray elements */
+            PyArray_Item_XDECREF(data + i * inner_elsize,
+                                 descr->subarray->base);
+        }
+    }
+    else {
+        /* This path should not be reachable. */
+        assert(0);
+    }
     return;
 }
 
@@ -258,6 +309,10 @@ _fillobject(char *optr, PyObject *obj, PyArray_Descr *dtype)
             Py_XDECREF(arr);
         }
     }
+    if (dtype->type_num == NPY_OBJECT) {
+        Py_XINCREF(obj);
+        NPY_COPY_PYOBJECT_PTR(optr, &obj);
+    }
     else if (PyDataType_HASFIELDS(dtype)) {
         PyObject *key, *value, *title = NULL;
         PyArray_Descr *new;
@@ -274,15 +329,26 @@ _fillobject(char *optr, PyObject *obj, PyArray_Descr *dtype)
             _fillobject(optr + offset, obj, new);
         }
     }
-    else {
-        npy_intp i;
-        npy_intp nsize = dtype->elsize / sizeof(obj);
+    else if (PyDataType_HASSUBARRAY(dtype)) {
+        int size, i, inner_elsize;
 
-        for (i = 0; i < nsize; i++) {
-            Py_XINCREF(obj);
-            NPY_COPY_PYOBJECT_PTR(optr, &obj);
-            optr += sizeof(obj);
+        inner_elsize = dtype->subarray->base->elsize;
+        if (inner_elsize == 0) {
+            /* There cannot be any elements, so return */
+            return;
+        }
+        /* Subarrays are always contiguous in memory */
+        size = dtype->elsize / inner_elsize;
+
+        /* Call _fillobject on each item recursively. */
+        for (i = 0; i < size; i++){
+            _fillobject(optr, obj, dtype->subarray->base);
+            optr += inner_elsize;
         }
-        return;
     }
+    else {
+        /* This path should not be reachable. */
+        assert(0);
+    }
+    return;
 }
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index 2f71c8ae9..52de31289 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -2599,6 +2599,8 @@ NPY_NO_EXPORT PyTypeObject PyGenericArrType_Type = {
 static void
 void_dealloc(PyVoidScalarObject *v)
 {
+    _dealloc_cached_buffer_info((PyObject *)v);
+
     if (v->flags & NPY_ARRAY_OWNDATA) {
         npy_free_cache(v->obval, Py_SIZE(v));
     }
diff --git a/numpy/core/src/multiarray/usertypes.c b/numpy/core/src/multiarray/usertypes.c
index 8e8090002..2e8fb514f 100644
--- a/numpy/core/src/multiarray/usertypes.c
+++ b/numpy/core/src/multiarray/usertypes.c
@@ -40,19 +40,27 @@ maintainer email:  oliphant.travis@ieee.org
 
 NPY_NO_EXPORT PyArray_Descr **userdescrs=NULL;
 
-static int *
-_append_new(int *types, int insert)
+static int
+_append_new(int **p_types, int insert)
 {
     int n = 0;
     int *newtypes;
+    int *types = *p_types;
 
     while (types[n] != NPY_NOTYPE) {
         n++;
     }
     newtypes = (int *)realloc(types, (n + 2)*sizeof(int));
+    if (newtypes == NULL) {
+        PyErr_NoMemory();
+        return -1;
+    }
     newtypes[n] = insert;
     newtypes[n + 1] = NPY_NOTYPE;
-    return newtypes;
+
+    /* Replace the passed-in pointer */
+    *p_types = newtypes;
+    return 0;
 }
 
 static npy_bool
@@ -247,10 +255,13 @@ PyArray_RegisterCanCast(PyArray_Descr *descr, int totype,
          */
         if (descr->f->cancastto == NULL) {
             descr->f->cancastto = (int *)malloc(1*sizeof(int));
+            if (descr->f->cancastto == NULL) {
+                PyErr_NoMemory();
+                return -1;
+            }
             descr->f->cancastto[0] = NPY_NOTYPE;
         }
-        descr->f->cancastto = _append_new(descr->f->cancastto,
-                                          totype);
+        return _append_new(&descr->f->cancastto, totype);
     }
     else {
         /* register with cancastscalarkindto */
@@ -258,6 +269,10 @@ PyArray_RegisterCanCast(PyArray_Descr *descr, int totype,
             int i;
             descr->f->cancastscalarkindto =
                 (int **)malloc(NPY_NSCALARKINDS* sizeof(int*));
+            if (descr->f->cancastscalarkindto == NULL) {
+                PyErr_NoMemory();
+                return -1;
+            }
             for (i = 0; i < NPY_NSCALARKINDS; i++) {
                 descr->f->cancastscalarkindto[i] = NULL;
             }
@@ -265,11 +280,13 @@ PyArray_RegisterCanCast(PyArray_Descr *descr, int totype,
         if (descr->f->cancastscalarkindto[scalar] == NULL) {
             descr->f->cancastscalarkindto[scalar] =
                 (int *)malloc(1*sizeof(int));
+            if (descr->f->cancastscalarkindto[scalar] == NULL) {
+                PyErr_NoMemory();
+                return -1;
+            }
             descr->f->cancastscalarkindto[scalar][0] =
                 NPY_NOTYPE;
         }
-        descr->f->cancastscalarkindto[scalar] =
-            _append_new(descr->f->cancastscalarkindto[scalar], totype);
+        return _append_new(&descr->f->cancastscalarkindto[scalar], totype);
     }
-    return 0;
 }
diff --git a/numpy/core/src/npymath/halffloat.c b/numpy/core/src/npymath/halffloat.c
index c2bd28d60..84af86009 100644
--- a/numpy/core/src/npymath/halffloat.c
+++ b/numpy/core/src/npymath/halffloat.c
@@ -301,15 +301,23 @@ npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f)
             npy_set_floatstatus_underflow();
         }
 #endif
+        /*
+         * Usually the significand is shifted by 13. For subnormals an
+         * additional shift needs to occur. This shift is one for the largest
+         * exponent giving a subnormal `f_exp = 0x38000000 >> 23 = 112`, which
+         * offsets the new first bit. At most the shift can be 1+10 bits.
+         */
         f_sig >>= (113 - f_exp);
         /* Handle rounding by adding 1 to the bit beyond half precision */
 #if NPY_HALF_ROUND_TIES_TO_EVEN
         /*
          * If the last bit in the half significand is 0 (already even), and
          * the remaining bit pattern is 1000...0, then we do not add one
-         * to the bit after the half significand.  In all other cases, we do.
+         * to the bit after the half significand. However, the (113 - f_exp)
+         * shift can lose up to 11 bits, so the || checks them in the original.
+         * In all other cases, we can just add one.
          */
-        if ((f_sig&0x00003fffu) != 0x00001000u) {
+        if (((f_sig&0x00003fffu) != 0x00001000u) || (f&0x000007ffu)) {
             f_sig += 0x00001000u;
         }
 #else
@@ -416,7 +424,16 @@ npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d)
             npy_set_floatstatus_underflow();
         }
 #endif
-        d_sig >>= (1009 - d_exp);
+        /*
+         * Unlike floats, doubles have enough room to shift left to align
+         * the subnormal significand leading to no loss of the last bits.
+         * The smallest possible exponent giving a subnormal is:
+         * `d_exp = 0x3e60000000000000 >> 52 = 998`. All larger subnormals are
+         * shifted with respect to it. This adds a shift of 10+1 bits the final
+         * right shift when comparing it to the one in the normal branch.
+         */
+        assert(d_exp - 998 >= 0);
+        d_sig <<= (d_exp - 998);
         /* Handle rounding by adding 1 to the bit beyond half precision */
 #if NPY_HALF_ROUND_TIES_TO_EVEN
         /*
@@ -424,13 +441,13 @@ npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d)
          * the remaining bit pattern is 1000...0, then we do not add one
          * to the bit after the half significand.  In all other cases, we do.
          */
-        if ((d_sig&0x000007ffffffffffULL) != 0x0000020000000000ULL) {
-            d_sig += 0x0000020000000000ULL;
+        if ((d_sig&0x003fffffffffffffULL) != 0x0010000000000000ULL) {
+            d_sig += 0x0010000000000000ULL;
         }
 #else
-        d_sig += 0x0000020000000000ULL;
+        d_sig += 0x0010000000000000ULL;
 #endif
-        h_sig = (npy_uint16) (d_sig >> 42);
+        h_sig = (npy_uint16) (d_sig >> 53);
         /*
          * If the rounding causes a bit to spill into h_exp, it will
          * increment h_exp from zero to one and h_sig will be zero.
diff --git a/numpy/core/src/umath/_struct_ufunc_tests.c.src b/numpy/core/src/umath/_struct_ufunc_tests.c.src
index b831d5c2a..5c6e235e0 100644
--- a/numpy/core/src/umath/_struct_ufunc_tests.c.src
+++ b/numpy/core/src/umath/_struct_ufunc_tests.c.src
@@ -114,6 +114,7 @@ PyMODINIT_FUNC init_struct_ufunc_tests(void)
                                 dtypes,
                                 NULL);
 
+    Py_DECREF(dtype);
     d = PyModule_GetDict(m);
 
     PyDict_SetItemString(d, "add_triplet", add_triplet);
diff --git a/numpy/core/src/umath/_umath_tests.c.src b/numpy/core/src/umath/_umath_tests.c.src
index 8cb74f177..6c3bcce71 100644
--- a/numpy/core/src/umath/_umath_tests.c.src
+++ b/numpy/core/src/umath/_umath_tests.c.src
@@ -564,7 +564,7 @@ UMath_Tests_test_signature(PyObject *NPY_UNUSED(dummy), PyObject *args)
         core_dim_sizes = Py_None;
     }
     Py_DECREF(f);
-    return Py_BuildValue("iOOOO", core_enabled, core_num_dims,
+    return Py_BuildValue("iNNNN", core_enabled, core_num_dims,
                          core_dim_ixs, core_dim_flags, core_dim_sizes);
 
 fail:
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index ae3ece77b..975a5e6b8 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1603,7 +1603,7 @@ TIMEDELTA_mm_m_remainder(char **args, npy_intp *dimensions, npy_intp *steps, voi
         else {
             if (in2 == 0) {
                 npy_set_floatstatus_divbyzero();
-                *((npy_timedelta *)op1) = 0;
+                *((npy_timedelta *)op1) = NPY_DATETIME_NAT;
             }
             else {
                 /* handle mixed case the way Python does */
@@ -1619,6 +1619,62 @@ TIMEDELTA_mm_m_remainder(char **args, npy_intp *dimensions, npy_intp *steps, voi
     }
 }
 
+NPY_NO_EXPORT void
+TIMEDELTA_mm_q_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
+{
+    BINARY_LOOP {
+        const npy_timedelta in1 = *(npy_timedelta *)ip1;
+        const npy_timedelta in2 = *(npy_timedelta *)ip2;
+        if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) {
+            npy_set_floatstatus_invalid();
+            *((npy_int64 *)op1) = 0;
+        }
+        else if (in2 == 0) {
+            npy_set_floatstatus_divbyzero();
+            *((npy_int64 *)op1) = 0;
+        }
+        else {
+            if (((in1 > 0) != (in2 > 0)) && (in1 % in2 != 0)) {
+                *((npy_int64 *)op1) = in1/in2 - 1;
+            }
+            else {
+                *((npy_int64 *)op1) = in1/in2;
+            }
+        }
+    }
+}
+
+NPY_NO_EXPORT void
+TIMEDELTA_mm_qm_divmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func))
+{
+    BINARY_LOOP_TWO_OUT {
+        const npy_timedelta in1 = *(npy_timedelta *)ip1;
+        const npy_timedelta in2 = *(npy_timedelta *)ip2;
+        if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) {
+            npy_set_floatstatus_invalid();
+            *((npy_int64 *)op1) = 0;
+            *((npy_timedelta *)op2) = NPY_DATETIME_NAT;
+        }
+        else if (in2 == 0) {
+            npy_set_floatstatus_divbyzero();
+            *((npy_int64 *)op1) = 0;
+            *((npy_timedelta *)op2) = NPY_DATETIME_NAT;
+        }
+        else {
+            const npy_int64 quo = in1 / in2;
+            const npy_timedelta rem = in1 % in2;
+            if ((in1 > 0) == (in2 > 0) || rem == 0) {
+                *((npy_int64 *)op1) = quo;
+                *((npy_timedelta *)op2) = rem;
+            }
+            else {
+                *((npy_int64 *)op1) = quo - 1;
+                *((npy_timedelta *)op2) = rem + in2;
+            }
+        }
+    }
+}
+
 /*
  *****************************************************************************
  **                             FLOAT LOOPS                                 **
diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src
index 9b6327308..5264a6533 100644
--- a/numpy/core/src/umath/loops.h.src
+++ b/numpy/core/src/umath/loops.h.src
@@ -474,8 +474,14 @@ NPY_NO_EXPORT void
 TIMEDELTA_mm_d_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
 
 NPY_NO_EXPORT void
+TIMEDELTA_mm_q_floor_divide(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
+
+NPY_NO_EXPORT void
 TIMEDELTA_mm_m_remainder(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
 
+NPY_NO_EXPORT void
+TIMEDELTA_mm_qm_divmod(char **args, npy_intp *dimensions, npy_intp *steps, void *NPY_UNUSED(func));
+
 /* Special case equivalents to above functions */
 
 #define TIMEDELTA_mq_m_true_divide TIMEDELTA_mq_m_divide
diff --git a/numpy/core/src/umath/reduction.c b/numpy/core/src/umath/reduction.c
index 6d04ce372..791d3693f 100644
--- a/numpy/core/src/umath/reduction.c
+++ b/numpy/core/src/umath/reduction.c
@@ -186,7 +186,6 @@ conform_reduce_result(int ndim, npy_bool *axis_flags,
             return NULL;
         }
 
-        Py_INCREF(ret);
         if (PyArray_SetWritebackIfCopyBase(ret_copy, (PyArrayObject *)ret) < 0) {
             Py_DECREF(ret);
             Py_DECREF(ret_copy);
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index a2df58698..1ab48bb90 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -3063,6 +3063,8 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     Py_XDECREF(axis);
     Py_XDECREF(full_args.in);
     Py_XDECREF(full_args.out);
+    PyArray_free(remap_axis_memory);
+    PyArray_free(remap_axis);
 
     NPY_UF_DBG_PRINT1("Returning code %d\n", retval);
 
diff --git a/numpy/core/src/umath/ufunc_type_resolution.c b/numpy/core/src/umath/ufunc_type_resolution.c
index ec60d9cfd..c2d81fc5d 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.c
+++ b/numpy/core/src/umath/ufunc_type_resolution.c
@@ -1114,7 +1114,16 @@ PyUFunc_DivisionTypeResolver(PyUFuncObject *ufunc,
             }
             out_dtypes[1] = out_dtypes[0];
             Py_INCREF(out_dtypes[1]);
+
+            /*
+             * TODO: split function into truediv and floordiv resolvers
+             */
+            if (strcmp(ufunc->name, "floor_divide") == 0) {
+                out_dtypes[2] = PyArray_DescrFromType(NPY_LONGLONG);
+            }
+            else {
             out_dtypes[2] = PyArray_DescrFromType(NPY_DOUBLE);
+            }
             if (out_dtypes[2] == NULL) {
                 Py_DECREF(out_dtypes[0]);
                 out_dtypes[0] = NULL;
@@ -2247,3 +2256,52 @@ type_tuple_type_resolver(PyUFuncObject *self,
 
     return -1;
 }
+
+NPY_NO_EXPORT int
+PyUFunc_DivmodTypeResolver(PyUFuncObject *ufunc,
+                                NPY_CASTING casting,
+                                PyArrayObject **operands,
+                                PyObject *type_tup,
+                                PyArray_Descr **out_dtypes)
+{
+    int type_num1, type_num2;
+    int i;
+
+    type_num1 = PyArray_DESCR(operands[0])->type_num;
+    type_num2 = PyArray_DESCR(operands[1])->type_num;
+
+    /* Use the default when datetime and timedelta are not involved */
+    if (!PyTypeNum_ISDATETIME(type_num1) && !PyTypeNum_ISDATETIME(type_num2)) {
+        return PyUFunc_DefaultTypeResolver(ufunc, casting, operands,
+                    type_tup, out_dtypes);
+    }
+    if (type_num1 == NPY_TIMEDELTA) {
+        if (type_num2 == NPY_TIMEDELTA) {
+            out_dtypes[0] = PyArray_PromoteTypes(PyArray_DESCR(operands[0]),
+                                                PyArray_DESCR(operands[1]));
+            out_dtypes[1] = out_dtypes[0];
+            Py_INCREF(out_dtypes[1]);
+            out_dtypes[2] = PyArray_DescrFromType(NPY_LONGLONG);
+            Py_INCREF(out_dtypes[2]);
+            out_dtypes[3] = out_dtypes[0];
+            Py_INCREF(out_dtypes[3]);
+        }
+        else {
+            return raise_binary_type_reso_error(ufunc, operands);
+        }
+    }
+    else {
+        return raise_binary_type_reso_error(ufunc, operands);
+    }
+
+    /* Check against the casting rules */
+    if (PyUFunc_ValidateCasting(ufunc, casting, operands, out_dtypes) < 0) {
+        for (i = 0; i < 4; ++i) {
+            Py_DECREF(out_dtypes[i]);
+            out_dtypes[i] = NULL;
+        }
+        return -1;
+    }
+
+    return 0;
+}
diff --git a/numpy/core/src/umath/ufunc_type_resolution.h b/numpy/core/src/umath/ufunc_type_resolution.h
index 2f37af753..78313b1ef 100644
--- a/numpy/core/src/umath/ufunc_type_resolution.h
+++ b/numpy/core/src/umath/ufunc_type_resolution.h
@@ -99,6 +99,13 @@ PyUFunc_RemainderTypeResolver(PyUFuncObject *ufunc,
                               PyObject *type_tup,
                               PyArray_Descr **out_dtypes);
 
+NPY_NO_EXPORT int
+PyUFunc_DivmodTypeResolver(PyUFuncObject *ufunc,
+                              NPY_CASTING casting,
+                              PyArrayObject **operands,
+                              PyObject *type_tup,
+                              PyArray_Descr **out_dtypes);
+
 /*
  * Does a linear search for the best inner loop of the ufunc.
  *
diff --git a/numpy/core/src/umath/umathmodule.c b/numpy/core/src/umath/umathmodule.c
index 5de19fec2..b334a89da 100644
--- a/numpy/core/src/umath/umathmodule.c
+++ b/numpy/core/src/umath/umathmodule.c
@@ -174,11 +174,17 @@ add_newdoc_ufunc(PyObject *NPY_UNUSED(dummy), PyObject *args)
     char *docstr, *newdocstr;
 
 #if defined(NPY_PY3K)
+    PyObject *tmp;
+
     if (!PyArg_ParseTuple(args, "O!O!:_add_newdoc_ufunc", &PyUFunc_Type, &ufunc,
                                         &PyUnicode_Type, &str)) {
         return NULL;
     }
-    docstr = PyBytes_AS_STRING(PyUnicode_AsUTF8String(str));
+    tmp = PyUnicode_AsUTF8String(str);
+    if (tmp == NULL) {
+        return NULL;
+    }
+    docstr = PyBytes_AS_STRING(tmp);
 #else
     if (!PyArg_ParseTuple(args, "O!O!:_add_newdoc_ufunc", &PyUFunc_Type, &ufunc,
                                          &PyString_Type, &str)) {
@@ -190,6 +196,9 @@ add_newdoc_ufunc(PyObject *NPY_UNUSED(dummy), PyObject *args)
     if (NULL != ufunc->doc) {
         PyErr_SetString(PyExc_ValueError,
                 "Cannot change docstring of ufunc with non-NULL docstring");
+#if defined(NPY_PY3K)
+        Py_DECREF(tmp);
+#endif
         return NULL;
     }
 
@@ -203,6 +212,9 @@ add_newdoc_ufunc(PyObject *NPY_UNUSED(dummy), PyObject *args)
     strcpy(newdocstr, docstr);
     ufunc->doc = newdocstr;
 
+#if defined(NPY_PY3K)
+    Py_DECREF(tmp);
+#endif
     Py_RETURN_NONE;
 }
 
diff --git a/numpy/core/tests/test_arrayprint.py b/numpy/core/tests/test_arrayprint.py
index 7a858d2e2..f2b8fdca7 100644
--- a/numpy/core/tests/test_arrayprint.py
+++ b/numpy/core/tests/test_arrayprint.py
@@ -90,6 +90,7 @@ class TestArrayRepr(object):
         assert_equal(repr(x),
             'sub(sub(sub(..., dtype=object), dtype=object), dtype=object)')
         assert_equal(str(x), '...')
+        x[()] = 0  # resolve circular references for garbage collector
 
         # nested 0d-subclass-object
         x = sub(None)
@@ -124,11 +125,13 @@ class TestArrayRepr(object):
         arr0d[()] = arr0d
         assert_equal(repr(arr0d),
             'array(array(..., dtype=object), dtype=object)')
+        arr0d[()] = 0  # resolve recursion for garbage collector
 
         arr1d = np.array([None, None])
         arr1d[1] = arr1d
         assert_equal(repr(arr1d),
             'array([None, array(..., dtype=object)], dtype=object)')
+        arr1d[1] = 0  # resolve recursion for garbage collector
 
         first = np.array(None)
         second = np.array(None)
@@ -136,6 +139,7 @@ class TestArrayRepr(object):
         second[()] = first
         assert_equal(repr(first),
             'array(array(array(..., dtype=object), dtype=object), dtype=object)')
+        first[()] = 0  # resolve circular references for garbage collector
 
     def test_containing_list(self):
         # printing square brackets directly would be ambiguuous
diff --git a/numpy/core/tests/test_datetime.py b/numpy/core/tests/test_datetime.py
index b2ce0402a..9832b4275 100644
--- a/numpy/core/tests/test_datetime.py
+++ b/numpy/core/tests/test_datetime.py
@@ -1081,6 +1081,133 @@ class TestDateTime(object):
                 check(np.timedelta64(0), f, nat)
                 check(nat, f, nat)
 
+    @pytest.mark.parametrize("op1, op2, exp", [
+        # m8 same units round down
+        (np.timedelta64(7, 's'),
+         np.timedelta64(4, 's'),
+         1),
+        # m8 same units round down with negative
+        (np.timedelta64(7, 's'),
+         np.timedelta64(-4, 's'),
+         -2),
+        # m8 same units negative no round down
+        (np.timedelta64(8, 's'),
+         np.timedelta64(-4, 's'),
+         -2),
+        # m8 different units
+        (np.timedelta64(1, 'm'),
+         np.timedelta64(31, 's'),
+         1),
+        # m8 generic units
+        (np.timedelta64(1890),
+         np.timedelta64(31),
+         60),
+        # Y // M works
+        (np.timedelta64(2, 'Y'),
+         np.timedelta64('13', 'M'),
+         1),
+        # handle 1D arrays
+        (np.array([1, 2, 3], dtype='m8'),
+         np.array([2], dtype='m8'),
+         np.array([0, 1, 1], dtype=np.int64)),
+        ])
+    def test_timedelta_floor_divide(self, op1, op2, exp):
+        assert_equal(op1 // op2, exp)
+
+    @pytest.mark.parametrize("op1, op2", [
+        # div by 0
+        (np.timedelta64(10, 'us'),
+         np.timedelta64(0, 'us')),
+        # div with NaT
+        (np.timedelta64('NaT'),
+         np.timedelta64(50, 'us')),
+        # special case for int64 min
+        # in integer floor division
+        (np.timedelta64(np.iinfo(np.int64).min),
+         np.timedelta64(-1)),
+        ])
+    def test_timedelta_floor_div_warnings(self, op1, op2):
+        with assert_warns(RuntimeWarning):
+            actual = op1 // op2
+            assert_equal(actual, 0)
+            assert_equal(actual.dtype, np.int64)
+
+    @pytest.mark.parametrize("val1, val2", [
+        # the smallest integer that can't be represented
+        # exactly in a double should be preserved if we avoid
+        # casting to double in floordiv operation
+        (9007199254740993, 1),
+        # stress the alternate floordiv code path where
+        # operand signs don't match and remainder isn't 0
+        (9007199254740999, -2),
+        ])
+    def test_timedelta_floor_div_precision(self, val1, val2):
+        op1 = np.timedelta64(val1)
+        op2 = np.timedelta64(val2)
+        actual = op1 // op2
+        # Python reference integer floor
+        expected = val1 // val2
+        assert_equal(actual, expected)
+
+    @pytest.mark.parametrize("val1, val2", [
+        # years and months sometimes can't be unambiguously
+        # divided for floor division operation
+        (np.timedelta64(7, 'Y'),
+         np.timedelta64(3, 's')),
+        (np.timedelta64(7, 'M'),
+         np.timedelta64(1, 'D')),
+        ])
+    def test_timedelta_floor_div_error(self, val1, val2):
+        with assert_raises_regex(TypeError, "common metadata divisor"):
+            val1 // val2
+
+    @pytest.mark.parametrize("op1, op2", [
+        # reuse the test cases from floordiv
+        (np.timedelta64(7, 's'),
+         np.timedelta64(4, 's')),
+        # m8 same units round down with negative
+        (np.timedelta64(7, 's'),
+         np.timedelta64(-4, 's')),
+        # m8 same units negative no round down
+        (np.timedelta64(8, 's'),
+         np.timedelta64(-4, 's')),
+        # m8 different units
+        (np.timedelta64(1, 'm'),
+         np.timedelta64(31, 's')),
+        # m8 generic units
+        (np.timedelta64(1890),
+         np.timedelta64(31)),
+        # Y // M works
+        (np.timedelta64(2, 'Y'),
+         np.timedelta64('13', 'M')),
+        # handle 1D arrays
+        (np.array([1, 2, 3], dtype='m8'),
+         np.array([2], dtype='m8')),
+        ])
+    def test_timedelta_divmod(self, op1, op2):
+        expected = (op1 // op2, op1 % op2)
+        assert_equal(divmod(op1, op2), expected)
+
+    @pytest.mark.parametrize("op1, op2", [
+        # reuse cases from floordiv
+        # div by 0
+        (np.timedelta64(10, 'us'),
+         np.timedelta64(0, 'us')),
+        # div with NaT
+        (np.timedelta64('NaT'),
+         np.timedelta64(50, 'us')),
+        # special case for int64 min
+        # in integer floor division
+        (np.timedelta64(np.iinfo(np.int64).min),
+         np.timedelta64(-1)),
+        ])
+    def test_timedelta_divmod_warnings(self, op1, op2):
+        with assert_warns(RuntimeWarning):
+            expected = (op1 // op2, op1 % op2)
+        with assert_warns(RuntimeWarning):
+            actual = divmod(op1, op2)
+        assert_equal(actual, expected)
+
     def test_datetime_divide(self):
         for dta, tda, tdb, tdc, tdd in \
                     [
@@ -1111,8 +1238,6 @@ class TestDateTime(object):
             assert_equal(tda / tdd, 60.0)
             assert_equal(tdd / tda, 1.0 / 60.0)
 
-            # m8 // m8
-            assert_raises(TypeError, np.floor_divide, tda, tdb)
             # int / m8
             assert_raises(TypeError, np.divide, 2, tdb)
             # float / m8
@@ -1680,7 +1805,7 @@ class TestDateTime(object):
     def test_timedelta_modulus_div_by_zero(self):
         with assert_warns(RuntimeWarning):
             actual = np.timedelta64(10, 's') % np.timedelta64(0, 's')
-            assert_equal(actual, np.timedelta64(0, 's'))
+            assert_equal(actual, np.timedelta64('NaT'))
 
     @pytest.mark.parametrize("val1, val2", [
         # cases where one operand is not
diff --git a/numpy/core/tests/test_dtype.py b/numpy/core/tests/test_dtype.py
index c55751e3c..8f371197c 100644
--- a/numpy/core/tests/test_dtype.py
+++ b/numpy/core/tests/test_dtype.py
@@ -4,10 +4,12 @@ import sys
 import operator
 import pytest
 import ctypes
+import gc
 
 import numpy as np
 from numpy.core._rational_tests import rational
-from numpy.testing import assert_, assert_equal, assert_raises
+from numpy.testing import (
+    assert_, assert_equal, assert_array_equal, assert_raises, HAS_REFCOUNT)
 from numpy.core.numeric import pickle
 
 def assert_dtype_equal(a, b):
@@ -446,6 +448,173 @@ class TestSubarray(object):
         assert_equal(t1.alignment, t2.alignment)
 
 
+def iter_struct_object_dtypes():
+    """
+    Iterates over a few complex dtypes and object pattern which
+    fill the array with a given object (defaults to a singleton).
+
+    Yields
+    ------
+    dtype : dtype
+    pattern : tuple
+        Structured tuple for use with `np.array`.
+    count : int
+        Number of objects stored in the dtype.
+    singleton : object
+        A singleton object. The returned pattern is constructed so that
+        all objects inside the datatype are set to the singleton.
+    """
+    obj = object()
+
+    dt = np.dtype([('b', 'O', (2, 3))])
+    p = ([[obj] * 3] * 2,)
+    yield pytest.param(dt, p, 6, obj, id="<subarray>")
+
+    dt = np.dtype([('a', 'i4'), ('b', 'O', (2, 3))])
+    p = (0, [[obj] * 3] * 2)
+    yield pytest.param(dt, p, 6, obj, id="<subarray in field>")
+
+    dt = np.dtype([('a', 'i4'),
+                   ('b', [('ba', 'O'), ('bb', 'i1')], (2, 3))])
+    p = (0, [[(obj, 0)] * 3] * 2)
+    yield pytest.param(dt, p, 6, obj, id="<structured subarray 1>")
+
+    dt = np.dtype([('a', 'i4'),
+                   ('b', [('ba', 'O'), ('bb', 'O')], (2, 3))])
+    p = (0, [[(obj, obj)] * 3] * 2)
+    yield pytest.param(dt, p, 12, obj, id="<structured subarray 2>")
+
+
+@pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts")
+class TestStructuredObjectRefcounting:
+    """These tests cover various uses of complicated structured types which
+    include objects and thus require reference counting.
+    """
+    @pytest.mark.parametrize(['dt', 'pat', 'count', 'singleton'],
+                             iter_struct_object_dtypes())
+    @pytest.mark.parametrize(["creation_func", "creation_obj"], [
+        pytest.param(np.empty, None,
+             # None is probably used for too many things
+             marks=pytest.mark.skip("unreliable due to python's behaviour")),
+        (np.ones, 1),
+        (np.zeros, 0)])
+    def test_structured_object_create_delete(self, dt, pat, count, singleton,
+                                             creation_func, creation_obj):
+        """Structured object reference counting in creation and deletion"""
+        # The test assumes that 0, 1, and None are singletons.
+        gc.collect()
+        before = sys.getrefcount(creation_obj)
+        arr = creation_func(3, dt)
+
+        now = sys.getrefcount(creation_obj)
+        assert now - before == count * 3
+        del arr
+        now = sys.getrefcount(creation_obj)
+        assert now == before
+
+    @pytest.mark.parametrize(['dt', 'pat', 'count', 'singleton'],
+                             iter_struct_object_dtypes())
+    def test_structured_object_item_setting(self, dt, pat, count, singleton):
+        """Structured object reference counting for simple item setting"""
+        one = 1
+
+        gc.collect()
+        before = sys.getrefcount(singleton)
+        arr = np.array([pat] * 3, dt)
+        assert sys.getrefcount(singleton) - before == count * 3
+        # Fill with `1` and check that it was replaced correctly:
+        before2 = sys.getrefcount(one)
+        arr[...] = one
+        after2 = sys.getrefcount(one)
+        assert after2 - before2 == count * 3
+        del arr
+        gc.collect()
+        assert sys.getrefcount(one) == before2
+        assert sys.getrefcount(singleton) == before
+
+    @pytest.mark.parametrize(['dt', 'pat', 'count', 'singleton'],
+                             iter_struct_object_dtypes())
+    @pytest.mark.parametrize(
+        ['shape', 'index', 'items_changed'],
+        [((3,), ([0, 2],), 2),
+         ((3, 2), ([0, 2], slice(None)), 4),
+         ((3, 2), ([0, 2], [1]), 2),
+         ((3,), ([True, False, True]), 2)])
+    def test_structured_object_indexing(self, shape, index, items_changed,
+                                        dt, pat, count, singleton):
+        """Structured object reference counting for advanced indexing."""
+        zero = 0
+        one = 1
+
+        arr = np.zeros(shape, dt)
+
+        gc.collect()
+        before_zero = sys.getrefcount(zero)
+        before_one = sys.getrefcount(one)
+        # Test item getting:
+        part = arr[index]
+        after_zero = sys.getrefcount(zero)
+        assert after_zero - before_zero == count * items_changed
+        del part
+        # Test item setting:
+        arr[index] = one
+        gc.collect()
+        after_zero = sys.getrefcount(zero)
+        after_one = sys.getrefcount(one)
+        assert before_zero - after_zero == count * items_changed
+        assert after_one - before_one == count * items_changed
+
+    @pytest.mark.parametrize(['dt', 'pat', 'count', 'singleton'],
+                             iter_struct_object_dtypes())
+    def test_structured_object_take_and_repeat(self, dt, pat, count, singleton):
+        """Structured object reference counting for specialized functions.
+        The older functions such as take and repeat use different code paths
+        then item setting (when writing this).
+        """
+        indices = [0, 1]
+
+        arr = np.array([pat] * 3, dt)
+        gc.collect()
+        before = sys.getrefcount(singleton)
+        res = arr.take(indices)
+        after = sys.getrefcount(singleton)
+        assert after - before == count * 2
+        new = res.repeat(10)
+        gc.collect()
+        after_repeat = sys.getrefcount(singleton)
+        assert after_repeat - after == count * 2 * 10
+
+
+class TestStructuredDtypeSparseFields(object):
+    """Tests subarray fields which contain sparse dtypes so that
+    not all memory is used by the dtype work. Such dtype's should
+    leave the underlying memory unchanged.
+    """
+    dtype = np.dtype([('a', {'names':['aa', 'ab'], 'formats':['f', 'f'],
+                             'offsets':[0, 4]}, (2, 3))])
+    sparse_dtype = np.dtype([('a', {'names':['ab'], 'formats':['f'],
+                                    'offsets':[4]}, (2, 3))])
+
+    @pytest.mark.xfail(reason="inaccessible data is changed see gh-12686.")
+    @pytest.mark.valgrind_error(reason="reads from unitialized buffers.")
+    def test_sparse_field_assignment(self):
+        arr = np.zeros(3, self.dtype)
+        sparse_arr = arr.view(self.sparse_dtype)
+
+        sparse_arr[...] = np.finfo(np.float32).max
+        # dtype is reduced when accessing the field, so shape is (3, 2, 3):
+        assert_array_equal(arr["a"]["aa"], np.zeros((3, 2, 3)))
+
+    def test_sparse_field_assignment_fancy(self):
+        # Fancy assignment goes to the copyswap function for comlex types:
+        arr = np.zeros(3, self.dtype)
+        sparse_arr = arr.view(self.sparse_dtype)
+
+        sparse_arr[[0, 1, 2]] = np.finfo(np.float32).max
+        # dtype is reduced when accessing the field, so shape is (3, 2, 3):
+        assert_array_equal(arr["a"]["aa"], np.zeros((3, 2, 3)))
+
+
 class TestMonsterType(object):
     """Test deeply nested subtypes."""
 
diff --git a/numpy/core/tests/test_half.py b/numpy/core/tests/test_half.py
index b28c933db..770712501 100644
--- a/numpy/core/tests/test_half.py
+++ b/numpy/core/tests/test_half.py
@@ -69,6 +69,85 @@ class TestHalf(object):
         j = np.array(i_f16, dtype=int)
         assert_equal(i_int, j)
 
+    @pytest.mark.parametrize("offset", [None, "up", "down"])
+    @pytest.mark.parametrize("shift", [None, "up", "down"])
+    @pytest.mark.parametrize("float_t", [np.float32, np.float64])
+    def test_half_conversion_rounding(self, float_t, shift, offset):
+        # Assumes that round to even is used during casting.
+        max_pattern = np.float16(np.finfo(np.float16).max).view(np.uint16)
+
+        # Test all (positive) finite numbers, denormals are most interesting
+        # however:
+        f16s_patterns = np.arange(0, max_pattern+1, dtype=np.uint16)
+        f16s_float = f16s_patterns.view(np.float16).astype(float_t)
+
+        # Shift the values by half a bit up or a down (or do not shift),
+        if shift == "up":
+            f16s_float = 0.5 * (f16s_float[:-1] + f16s_float[1:])[1:]
+        elif shift == "down":
+            f16s_float = 0.5 * (f16s_float[:-1] + f16s_float[1:])[:-1]
+        else:
+            f16s_float = f16s_float[1:-1]
+
+        # Increase the float by a minimal value:
+        if offset == "up":
+            f16s_float = np.nextafter(f16s_float, float_t(1e50))
+        elif offset == "down":
+            f16s_float = np.nextafter(f16s_float, float_t(-1e50))
+
+        # Convert back to float16 and its bit pattern:
+        res_patterns = f16s_float.astype(np.float16).view(np.uint16)
+
+        # The above calculations tries the original values, or the exact
+        # mid points between the float16 values. It then further offsets them
+        # by as little as possible. If no offset occurs, "round to even"
+        # logic will be necessary, an arbitrarily small offset should cause
+        # normal up/down rounding always.
+
+        # Calculate the expecte pattern:
+        cmp_patterns = f16s_patterns[1:-1].copy()
+
+        if shift == "down" and offset != "up":
+            shift_pattern = -1
+        elif shift == "up" and offset != "down":
+            shift_pattern = 1
+        else:
+            # There cannot be a shift, either shift is None, so all rounding
+            # will go back to original, or shift is reduced by offset too much.
+            shift_pattern = 0
+
+        # If rounding occurs, is it normal rounding or round to even?
+        if offset is None:
+            # Round to even occurs, modify only non-even, cast to allow + (-1)
+            cmp_patterns[0::2].view(np.int16)[...] += shift_pattern
+        else:
+            cmp_patterns.view(np.int16)[...] += shift_pattern
+
+        assert_equal(res_patterns, cmp_patterns)
+
+    @pytest.mark.parametrize(["float_t", "uint_t", "bits"],
+                             [(np.float32, np.uint32, 23),
+                              (np.float64, np.uint64, 52)])
+    def test_half_conversion_denormal_round_even(self, float_t, uint_t, bits):
+        # Test specifically that all bits are considered when deciding
+        # whether round to even should occur (i.e. no bits are lost at the
+        # end. Compare also gh-12721. The most bits can get lost for the
+        # smallest denormal:
+        smallest_value = np.uint16(1).view(np.float16).astype(float_t)
+        assert smallest_value == 2**-24
+
+        # Will be rounded to zero based on round to even rule:
+        rounded_to_zero = smallest_value / float_t(2)
+        assert rounded_to_zero.astype(np.float16) == 0
+
+        # The significand will be all 0 for the float_t, test that we do not
+        # lose the lower ones of these:
+        for i in range(bits):
+            # slightly increasing the value should make it round up:
+            larger_pattern = rounded_to_zero.view(uint_t) | uint_t(1 << i)
+            larger_value = larger_pattern.view(float_t)
+            assert larger_value.astype(np.float16) == smallest_value
+
     def test_nans_infs(self):
         with np.errstate(all='ignore'):
             # Check some of the ufuncs
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 06cabe2cb..8a196308c 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -3769,10 +3769,16 @@ class TestPickling(object):
                                                    ('c', float)])
             ]
 
+            refs = [weakref.ref(a) for a in DATA]
             for a in DATA:
                 assert_equal(
                         a, pickle.loads(pickle.dumps(a, protocol=proto)),
                         err_msg="%r" % a)
+            del a, DATA, carray
+            gc.collect()
+            # check for reference leaks (gh-12793)
+            for ref in refs:
+                assert ref() is None
 
     def _loads(self, obj):
         if sys.version_info[0] >= 3:
@@ -7009,12 +7015,11 @@ class TestArrayAttributeDeletion(object):
             assert_raises(AttributeError, delattr, a, s)
 
 
-def test_array_interface():
-    # Test scalar coercion within the array interface
+class TestArrayInterface():
     class Foo(object):
         def __init__(self, value):
             self.value = value
-            self.iface = {'typestr': '=f8'}
+            self.iface = {'typestr': 'f8'}
 
         def __float__(self):
             return float(self.value)
@@ -7023,22 +7028,39 @@ def test_array_interface():
         def __array_interface__(self):
             return self.iface
 
+
     f = Foo(0.5)
-    assert_equal(np.array(f), 0.5)
-    assert_equal(np.array([f]), [0.5])
-    assert_equal(np.array([f, f]), [0.5, 0.5])
-    assert_equal(np.array(f).dtype, np.dtype('=f8'))
-    # Test various shape definitions
-    f.iface['shape'] = ()
-    assert_equal(np.array(f), 0.5)
-    f.iface['shape'] = None
-    assert_raises(TypeError, np.array, f)
-    f.iface['shape'] = (1, 1)
-    assert_equal(np.array(f), [[0.5]])
-    f.iface['shape'] = (2,)
-    assert_raises(ValueError, np.array, f)
-
-    # test scalar with no shape
+
+    @pytest.mark.parametrize('val, iface, expected', [
+        (f, {}, 0.5),
+        ([f], {}, [0.5]),
+        ([f, f], {}, [0.5, 0.5]),
+        (f, {'shape': ()}, 0.5),
+        (f, {'shape': None}, TypeError),
+        (f, {'shape': (1, 1)}, [[0.5]]),
+        (f, {'shape': (2,)}, ValueError),
+        (f, {'strides': ()}, 0.5),
+        (f, {'strides': (2,)}, ValueError),
+        (f, {'strides': 16}, TypeError),
+        ])
+    def test_scalar_interface(self, val, iface, expected):
+        # Test scalar coercion within the array interface
+        self.f.iface = {'typestr': 'f8'}
+        self.f.iface.update(iface)
+        if HAS_REFCOUNT:
+            pre_cnt = sys.getrefcount(np.dtype('f8'))
+        if isinstance(expected, type):
+            assert_raises(expected, np.array, val)
+        else:
+            result = np.array(val)
+            assert_equal(np.array(val), expected)
+            assert result.dtype == 'f8'
+            del result
+        if HAS_REFCOUNT:
+            post_cnt = sys.getrefcount(np.dtype('f8'))
+            assert_equal(pre_cnt, post_cnt)
+
+def test_interface_no_shape():
     class ArrayLike(object):
         array = np.array(1)
         __array_interface__ = array.__array_interface__
@@ -7211,6 +7233,7 @@ class TestConversion(object):
         except NameError:
             Error = RuntimeError  # python < 3.5
         assert_raises(Error, bool, self_containing)  # previously stack overflow
+        self_containing[0] = None  # resolve circular reference
 
     def test_to_int_scalar(self):
         # gh-9972 means that these aren't always the same
@@ -7712,6 +7735,8 @@ class TestWritebackIfCopy(object):
         # uses arr_insert
         np.place(a, a>2, [44, 55])
         assert_equal(a, np.array([[0, 44], [1, 55], [2, 44]]))
+        # hit one of the failing paths
+        assert_raises(ValueError, np.place, a, a>20, [])
 
     def test_put_noncontiguous(self):
         a = np.arange(6).reshape(2,3).T # force non-c-contiguous
@@ -8068,3 +8093,43 @@ def test_getfield():
     pytest.raises(ValueError, a.getfield, 'uint8', -1)
     pytest.raises(ValueError, a.getfield, 'uint8', 16)
     pytest.raises(ValueError, a.getfield, 'uint64', 0)
+
+def test_multiarray_module():
+    # gh-12736
+    # numpy 1.16 replaced the multiarray and umath c-extension modules with
+    # a single _multiarray_umath one. For backward compatibility, it added a
+    # pure-python multiarray.py and umath.py shim so people can still do
+    # from numpy.core.multirarray import something-public-api
+    # It turns out pip can leave old pieces of previous versions of numpy
+    # around when installing a newer version. If the old c-extension modules
+    # are found, they will be given precedence over the new pure-python ones.
+    #
+    # This test copies a multiarray c-extension in parallel with the pure-
+    # python one, and starts another python interpreter to load multiarray.
+    # The expectation is that import will fail.
+    import subprocess, shutil
+    core_dir = os.path.dirname(np.core.multiarray.__file__)
+    cextension = np.core._multiarray_umath.__file__
+    testfile = cextension.replace('_multiarray_umath', '_multiarray_module_test')
+    badfile = cextension.replace('_multiarray_umath', 'multiarray')
+    assert not os.path.exists(badfile), '%s exists, this numpy ' \
+                                    'installation is faulty' % badfile
+    try:
+        shutil.copy(testfile, badfile)
+        p = subprocess.Popen([sys.executable, '-c', 'import numpy' ],
+                stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                env=os.environ.copy())
+        stdout, stderr = p.communicate()
+        r = p.wait()
+        #print(stdout.decode())
+        #print(stderr.decode())
+        assert r != 0
+        assert b'ImportError' in stderr
+    finally:
+        if os.path.exists(badfile):
+            try:
+                # can this fail?
+                os.remove(badfile)
+            except:
+                print("Could not remove %s, remove it by hand" % badfile)
+                raise
diff --git a/numpy/core/tests/test_overrides.py b/numpy/core/tests/test_overrides.py
index 62b2a3e53..8f1c16539 100644
--- a/numpy/core/tests/test_overrides.py
+++ b/numpy/core/tests/test_overrides.py
@@ -7,7 +7,7 @@ import numpy as np
 from numpy.testing import (
     assert_, assert_equal, assert_raises, assert_raises_regex)
 from numpy.core.overrides import (
-    get_overloaded_types_and_args, array_function_dispatch,
+    _get_implementing_args, array_function_dispatch,
     verify_matching_signatures, ENABLE_ARRAY_FUNCTION)
 from numpy.core.numeric import pickle
 import pytest
@@ -18,11 +18,6 @@ requires_array_function = pytest.mark.skipif(
     reason="__array_function__ dispatch not enabled.")
 
 
-def _get_overloaded_args(relevant_args):
-    types, args = get_overloaded_types_and_args(relevant_args)
-    return args
-
-
 def _return_not_implemented(self, *args, **kwargs):
     return NotImplemented
 
@@ -41,26 +36,21 @@ def dispatched_two_arg(array1, array2):
 
 
 @requires_array_function
-class TestGetOverloadedTypesAndArgs(object):
+class TestGetImplementingArgs(object):
 
     def test_ndarray(self):
         array = np.array(1)
 
-        types, args = get_overloaded_types_and_args([array])
-        assert_equal(set(types), {np.ndarray})
+        args = _get_implementing_args([array])
         assert_equal(list(args), [array])
 
-        types, args = get_overloaded_types_and_args([array, array])
-        assert_equal(len(types), 1)
-        assert_equal(set(types), {np.ndarray})
+        args = _get_implementing_args([array, array])
         assert_equal(list(args), [array])
 
-        types, args = get_overloaded_types_and_args([array, 1])
-        assert_equal(set(types), {np.ndarray})
+        args = _get_implementing_args([array, 1])
         assert_equal(list(args), [array])
 
-        types, args = get_overloaded_types_and_args([1, array])
-        assert_equal(set(types), {np.ndarray})
+        args = _get_implementing_args([1, array])
         assert_equal(list(args), [array])
 
     def test_ndarray_subclasses(self):
@@ -75,17 +65,14 @@ class TestGetOverloadedTypesAndArgs(object):
         override_sub = np.array(1).view(OverrideSub)
         no_override_sub = np.array(1).view(NoOverrideSub)
 
-        types, args = get_overloaded_types_and_args([array, override_sub])
-        assert_equal(set(types), {np.ndarray, OverrideSub})
+        args = _get_implementing_args([array, override_sub])
         assert_equal(list(args), [override_sub, array])
 
-        types, args = get_overloaded_types_and_args([array, no_override_sub])
-        assert_equal(set(types), {np.ndarray, NoOverrideSub})
+        args = _get_implementing_args([array, no_override_sub])
         assert_equal(list(args), [no_override_sub, array])
 
-        types, args = get_overloaded_types_and_args(
+        args = _get_implementing_args(
             [override_sub, no_override_sub])
-        assert_equal(set(types), {OverrideSub, NoOverrideSub})
         assert_equal(list(args), [override_sub, no_override_sub])
 
     def test_ndarray_and_duck_array(self):
@@ -96,12 +83,10 @@ class TestGetOverloadedTypesAndArgs(object):
         array = np.array(1)
         other = Other()
 
-        types, args = get_overloaded_types_and_args([other, array])
-        assert_equal(set(types), {np.ndarray, Other})
+        args = _get_implementing_args([other, array])
         assert_equal(list(args), [other, array])
 
-        types, args = get_overloaded_types_and_args([array, other])
-        assert_equal(set(types), {np.ndarray, Other})
+        args = _get_implementing_args([array, other])
         assert_equal(list(args), [array, other])
 
     def test_ndarray_subclass_and_duck_array(self):
@@ -116,9 +101,9 @@ class TestGetOverloadedTypesAndArgs(object):
         subarray = np.array(1).view(OverrideSub)
         other = Other()
 
-        assert_equal(_get_overloaded_args([array, subarray, other]),
+        assert_equal(_get_implementing_args([array, subarray, other]),
                      [subarray, array, other])
-        assert_equal(_get_overloaded_args([array, other, subarray]),
+        assert_equal(_get_implementing_args([array, other, subarray]),
                      [subarray, array, other])
 
     def test_many_duck_arrays(self):
@@ -140,15 +125,26 @@ class TestGetOverloadedTypesAndArgs(object):
         c = C()
         d = D()
 
-        assert_equal(_get_overloaded_args([1]), [])
-        assert_equal(_get_overloaded_args([a]), [a])
-        assert_equal(_get_overloaded_args([a, 1]), [a])
-        assert_equal(_get_overloaded_args([a, a, a]), [a])
-        assert_equal(_get_overloaded_args([a, d, a]), [a, d])
-        assert_equal(_get_overloaded_args([a, b]), [b, a])
-        assert_equal(_get_overloaded_args([b, a]), [b, a])
-        assert_equal(_get_overloaded_args([a, b, c]), [b, c, a])
-        assert_equal(_get_overloaded_args([a, c, b]), [c, b, a])
+        assert_equal(_get_implementing_args([1]), [])
+        assert_equal(_get_implementing_args([a]), [a])
+        assert_equal(_get_implementing_args([a, 1]), [a])
+        assert_equal(_get_implementing_args([a, a, a]), [a])
+        assert_equal(_get_implementing_args([a, d, a]), [a, d])
+        assert_equal(_get_implementing_args([a, b]), [b, a])
+        assert_equal(_get_implementing_args([b, a]), [b, a])
+        assert_equal(_get_implementing_args([a, b, c]), [b, c, a])
+        assert_equal(_get_implementing_args([a, c, b]), [c, b, a])
+
+    def test_too_many_duck_arrays(self):
+        namespace = dict(__array_function__=_return_not_implemented)
+        types = [type('A' + str(i), (object,), namespace) for i in range(33)]
+        relevant_args = [t() for t in types]
+
+        actual = _get_implementing_args(relevant_args[:32])
+        assert_equal(actual, relevant_args[:32])
+
+        with assert_raises_regex(TypeError, 'distinct argument types'):
+            _get_implementing_args(relevant_args)
 
 
 @requires_array_function
@@ -201,6 +197,14 @@ class TestNDArrayArrayFunction(object):
         result = np.concatenate((array, override_sub))
         assert_equal(result, expected.view(OverrideSub))
 
+    def test_no_wrapper(self):
+        array = np.array(1)
+        func = dispatched_one_arg.__wrapped__
+        with assert_raises_regex(AttributeError, '__wrapped__'):
+            array.__array_function__(func=func,
+                                     types=(np.ndarray,),
+                                     args=(array,), kwargs={})
+
 
 @requires_array_function
 class TestArrayFunctionDispatch(object):
diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py
index 2421a1161..472a83696 100644
--- a/numpy/core/tests/test_regression.py
+++ b/numpy/core/tests/test_regression.py
@@ -46,7 +46,7 @@ class TestRegression(object):
             assert_array_equal(a, b)
 
     def test_typeNA(self):
-        # Issue gh-515 
+        # Issue gh-515
         with suppress_warnings() as sup:
             sup.filter(np.VisibleDeprecationWarning)
             assert_equal(np.typeNA[np.int64], 'Int64')
@@ -2411,7 +2411,41 @@ class TestRegression(object):
             if HAS_REFCOUNT:
                 assert_(base <= sys.getrefcount(s))
 
+    @pytest.mark.parametrize('val', [
+        # arrays and scalars
+        np.ones((10, 10), dtype='int32'),
+        np.uint64(10),
+        ])
+    @pytest.mark.parametrize('protocol',
+        range(2, pickle.HIGHEST_PROTOCOL + 1)
+        )
+    def test_pickle_module(self, protocol, val):
+        # gh-12837
+        s = pickle.dumps(val, protocol)
+        assert b'_multiarray_umath' not in s
+        if protocol == 5 and len(val.shape) > 0:
+            # unpickling ndarray goes through _frombuffer for protocol 5
+            assert b'numpy.core.numeric' in s
+        else:
+            assert b'numpy.core.multiarray' in s
+
     def test_object_casting_errors(self):
         # gh-11993
         arr = np.array(['AAAAA', 18465886.0, 18465886.0], dtype=object)
         assert_raises(TypeError, arr.astype, 'c8')
+
+    def test_eff1d_casting(self):
+        # gh-12711
+        x = np.array([1, 2, 4, 7, 0], dtype=np.int16)
+        res = np.ediff1d(x, to_begin=-99, to_end=np.array([88, 99]))
+        assert_equal(res, [-99,   1,   2,   3,  -7,  88,  99])
+        assert_raises(ValueError, np.ediff1d, x, to_begin=(1<<20))
+        assert_raises(ValueError, np.ediff1d, x, to_end=(1<<20))
+
+    def test_pickle_datetime64_array(self):
+        # gh-12745 (would fail with pickle5 installed)
+        d = np.datetime64('2015-07-04 12:59:59.50', 'ns')
+        arr = np.array([d])
+        for proto in range(2, pickle.HIGHEST_PROTOCOL + 1):
+            dumped = pickle.dumps(arr, protocol=proto)
+            assert_equal(pickle.loads(dumped), arr)
diff --git a/numpy/core/tests/test_shape_base.py b/numpy/core/tests/test_shape_base.py
index ef5c118ec..b996321c2 100644
--- a/numpy/core/tests/test_shape_base.py
+++ b/numpy/core/tests/test_shape_base.py
@@ -373,6 +373,10 @@ def test_stack():
     # empty arrays
     assert_(stack([[], [], []]).shape == (3, 0))
     assert_(stack([[], [], []], axis=1).shape == (0, 3))
+    # out
+    out = np.zeros_like(r1)
+    np.stack((a, b), out=out)
+    assert_array_equal(out, r1)
     # edge cases
     assert_raises_regex(ValueError, 'need at least one array', stack, [])
     assert_raises_regex(ValueError, 'must have the same shape',
author	DongHun Kwak <dh0128.kwak@samsung.com>	2020-12-31 09:34:06 +0900
committer	DongHun Kwak <dh0128.kwak@samsung.com>	2020-12-31 09:34:06 +0900
commit	4b17002dd27193e2eb87f4b3c7566d929a7ac788 (patch)
tree	c6c5142f99ce1479902e180d434f81d5d2f922a0 /numpy/core
parent	f14f97841aa140385b7fca2aeb1c7c96b2711560 (diff)
download	python-numpy-4b17002dd27193e2eb87f4b3c7566d929a7ac788.tar.gz python-numpy-4b17002dd27193e2eb87f4b3c7566d929a7ac788.tar.bz2 python-numpy-4b17002dd27193e2eb87f4b3c7566d929a7ac788.zip