summaryrefslogtreecommitdiff
path: root/caffe2/operators
diff options
context:
space:
mode:
authorEllie Wen <dwen@fb.com>2018-03-30 15:36:53 -0700
committerOrion Reblitz-Richardson <orionr@gmail.com>2018-03-30 21:00:44 -0700
commit363a227d1928b9427fad5624785128f026cf74db (patch)
tree8d927e988cc855ef951a88eb57bfd2ebc26038d0 /caffe2/operators
parent551d5fbf9a56769529307e075c17f0f9d5a80fda (diff)
downloadpytorch-363a227d1928b9427fad5624785128f026cf74db.tar.gz
pytorch-363a227d1928b9427fad5624785128f026cf74db.tar.bz2
pytorch-363a227d1928b9427fad5624785128f026cf74db.zip
extend bucketize op to support duplicated boundries
upgrade bucketize op to support duplicated boundaries
Diffstat (limited to 'caffe2/operators')
-rw-r--r--caffe2/operators/one_hot_ops.cc24
1 files changed, 17 insertions, 7 deletions
diff --git a/caffe2/operators/one_hot_ops.cc b/caffe2/operators/one_hot_ops.cc
index fdea2279be..2347c9ed66 100644
--- a/caffe2/operators/one_hot_ops.cc
+++ b/caffe2/operators/one_hot_ops.cc
@@ -125,11 +125,19 @@ bool BatchBucketOneHotOp<CPUContext>::RunOnDevice() {
for (TIndex j = 0; j < D; j++) {
// here we assume the boundary values for each feature are sorted
- TIndex bucket_idx = std::lower_bound(
- boundaries_offset,
- boundaries_offset + lens_data[j],
- input_data[pos]) -
+ TIndex lower_bucket_idx = std::lower_bound(
+ boundaries_offset,
+ boundaries_offset + lens_data[j],
+ input_data[pos]) -
boundaries_offset;
+
+ TIndex upper_bucket_idx = std::upper_bound(
+ boundaries_offset,
+ boundaries_offset + lens_data[j],
+ input_data[pos]) -
+ boundaries_offset;
+
+ TIndex bucket_idx = (lower_bucket_idx + upper_bucket_idx) / 2;
output_data[i * output_dim + output_offset + bucket_idx] = 1.0;
boundaries_offset += lens_data[j];
output_offset += (lens_data[j] + 1);
@@ -196,11 +204,13 @@ Note that each bucket is right-inclusive. That is, given boundary values
[b1, b2, b3], the buckets are defined as (-int, b1], (b1, b2], (b2, b3], (b3, inf).
For example
- If data = [[2, 3], [4, 1], [2, 5]], lengths = [2, 3],
- and boundaries = [0.1, 2.5, 1, 3.1, 4.5], then
-
+ data = [[2, 3], [4, 1], [2, 5]], lengths = [2, 3],
+ If boundaries = [0.1, 2.5, 1, 3.1, 4.5], then
output = [[0, 1, 0, 0, 1, 0, 0], [0, 0, 1, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1]]
+ If boundaries = [0.1, 2.5, 1, 1, 3.1], then
+ output = [[0, 1, 0, 0, 0, 1, 0], [0, 0, 1, 0, 1, 0, 0], [0, 1, 0, 0, 0, 0, 1]]
+
)DOC")
.Input(0, "data", "input tensor matrix")
.Input(1, "lengths", "the size is the same as the width of the `data`")