summaryrefslogtreecommitdiff
path: root/numpy/random/src/distributions/random_mvhg_marginals.c
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/random/src/distributions/random_mvhg_marginals.c')
-rw-r--r--numpy/random/src/distributions/random_mvhg_marginals.c138
1 files changed, 138 insertions, 0 deletions
diff --git a/numpy/random/src/distributions/random_mvhg_marginals.c b/numpy/random/src/distributions/random_mvhg_marginals.c
new file mode 100644
index 000000000..809d129de
--- /dev/null
+++ b/numpy/random/src/distributions/random_mvhg_marginals.c
@@ -0,0 +1,138 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <math.h>
+
+#include "numpy/random/distributions.h"
+#include "logfactorial.h"
+
+
+/*
+ * random_multivariate_hypergeometric_marginals
+ *
+ * Draw samples from the multivariate hypergeometric distribution--
+ * the "marginals" algorithm.
+ *
+ * This version generates the sample by iteratively calling
+ * hypergeometric() (the univariate hypergeometric distribution).
+ *
+ * Parameters
+ * ----------
+ * bitgen_t *bitgen_state
+ * Pointer to a `bitgen_t` instance.
+ * int64_t total
+ * The sum of the values in the array `colors`. (This is redundant
+ * information, but we know the caller has already computed it, so
+ * we might as well use it.)
+ * size_t num_colors
+ * The length of the `colors` array. The functions assumes
+ * num_colors > 0.
+ * int64_t *colors
+ * The array of colors (i.e. the number of each type in the collection
+ * from which the random variate is drawn).
+ * int64_t nsample
+ * The number of objects drawn without replacement for each variate.
+ * `nsample` must not exceed sum(colors). This condition is not checked;
+ * it is assumed that the caller has already validated the value.
+ * size_t num_variates
+ * The number of variates to be produced and put in the array
+ * pointed to by `variates`. One variate is a vector of length
+ * `num_colors`, so the array pointed to by `variates` must have length
+ * `num_variates * num_colors`.
+ * int64_t *variates
+ * The array that will hold the result. It must have length
+ * `num_variates * num_colors`.
+ * The array is not initialized in the function; it is expected that the
+ * array has been initialized with zeros when the function is called.
+ *
+ * Notes
+ * -----
+ * Here's an example that demonstrates the idea of this algorithm.
+ *
+ * Suppose the urn contains red, green, blue and yellow marbles.
+ * Let nred be the number of red marbles, and define the quantities for
+ * the other colors similarly. The total number of marbles is
+ *
+ * total = nred + ngreen + nblue + nyellow.
+ *
+ * To generate a sample using rk_hypergeometric:
+ *
+ * red_sample = hypergeometric(ngood=nred, nbad=total - nred,
+ * nsample=nsample)
+ *
+ * This gives us the number of red marbles in the sample. The number of
+ * marbles in the sample that are *not* red is nsample - red_sample.
+ * To figure out the distribution of those marbles, we again use
+ * rk_hypergeometric:
+ *
+ * green_sample = hypergeometric(ngood=ngreen,
+ * nbad=total - nred - ngreen,
+ * nsample=nsample - red_sample)
+ *
+ * Similarly,
+ *
+ * blue_sample = hypergeometric(
+ * ngood=nblue,
+ * nbad=total - nred - ngreen - nblue,
+ * nsample=nsample - red_sample - green_sample)
+ *
+ * Finally,
+ *
+ * yellow_sample = total - (red_sample + green_sample + blue_sample).
+ *
+ * The above sequence of steps is implemented as a loop for an arbitrary
+ * number of colors in the innermost loop in the code below. `remaining`
+ * is the value passed to `nbad`; it is `total - colors[0]` in the first
+ * call to random_hypergeometric(), and then decreases by `colors[j]` in
+ * each iteration. `num_to_sample` is the `nsample` argument. It
+ * starts at this function's `nsample` input, and is decreased by the
+ * result of the call to random_hypergeometric() in each iteration.
+ *
+ * Assumptions on the arguments (not checked in the function):
+ * * colors[k] >= 0 for k in range(num_colors)
+ * * total = sum(colors)
+ * * 0 <= nsample <= total
+ * * the product num_variates * num_colors does not overflow
+ */
+
+void random_multivariate_hypergeometric_marginals(bitgen_t *bitgen_state,
+ int64_t total,
+ size_t num_colors, int64_t *colors,
+ int64_t nsample,
+ size_t num_variates, int64_t *variates)
+{
+ bool more_than_half;
+
+ if ((total == 0) || (nsample == 0) || (num_variates == 0)) {
+ // Nothing to do.
+ return;
+ }
+
+ more_than_half = nsample > (total / 2);
+ if (more_than_half) {
+ nsample = total - nsample;
+ }
+
+ for (size_t i = 0; i < num_variates * num_colors; i += num_colors) {
+ int64_t num_to_sample = nsample;
+ int64_t remaining = total;
+ for (size_t j = 0; (num_to_sample > 0) && (j + 1 < num_colors); ++j) {
+ int64_t r;
+ remaining -= colors[j];
+ r = random_hypergeometric(bitgen_state,
+ colors[j], remaining, num_to_sample);
+ variates[i + j] = r;
+ num_to_sample -= r;
+ }
+
+ if (num_to_sample > 0) {
+ variates[i + num_colors - 1] = num_to_sample;
+ }
+
+ if (more_than_half) {
+ for (size_t k = 0; k < num_colors; ++k) {
+ variates[i + k] = colors[k] - variates[i + k];
+ }
+ }
+ }
+}