diff options
author | Ellie Wen <dwen@fb.com> | 2018-03-30 15:36:53 -0700 |
---|---|---|
committer | Orion Reblitz-Richardson <orionr@gmail.com> | 2018-03-30 21:00:44 -0700 |
commit | 363a227d1928b9427fad5624785128f026cf74db (patch) | |
tree | 8d927e988cc855ef951a88eb57bfd2ebc26038d0 /caffe2/operators | |
parent | 551d5fbf9a56769529307e075c17f0f9d5a80fda (diff) | |
download | pytorch-363a227d1928b9427fad5624785128f026cf74db.tar.gz pytorch-363a227d1928b9427fad5624785128f026cf74db.tar.bz2 pytorch-363a227d1928b9427fad5624785128f026cf74db.zip |
extend bucketize op to support duplicated boundries
upgrade bucketize op to support duplicated boundaries
Diffstat (limited to 'caffe2/operators')
-rw-r--r-- | caffe2/operators/one_hot_ops.cc | 24 |
1 files changed, 17 insertions, 7 deletions
diff --git a/caffe2/operators/one_hot_ops.cc b/caffe2/operators/one_hot_ops.cc index fdea2279be..2347c9ed66 100644 --- a/caffe2/operators/one_hot_ops.cc +++ b/caffe2/operators/one_hot_ops.cc @@ -125,11 +125,19 @@ bool BatchBucketOneHotOp<CPUContext>::RunOnDevice() { for (TIndex j = 0; j < D; j++) { // here we assume the boundary values for each feature are sorted - TIndex bucket_idx = std::lower_bound( - boundaries_offset, - boundaries_offset + lens_data[j], - input_data[pos]) - + TIndex lower_bucket_idx = std::lower_bound( + boundaries_offset, + boundaries_offset + lens_data[j], + input_data[pos]) - boundaries_offset; + + TIndex upper_bucket_idx = std::upper_bound( + boundaries_offset, + boundaries_offset + lens_data[j], + input_data[pos]) - + boundaries_offset; + + TIndex bucket_idx = (lower_bucket_idx + upper_bucket_idx) / 2; output_data[i * output_dim + output_offset + bucket_idx] = 1.0; boundaries_offset += lens_data[j]; output_offset += (lens_data[j] + 1); @@ -196,11 +204,13 @@ Note that each bucket is right-inclusive. That is, given boundary values [b1, b2, b3], the buckets are defined as (-int, b1], (b1, b2], (b2, b3], (b3, inf). For example - If data = [[2, 3], [4, 1], [2, 5]], lengths = [2, 3], - and boundaries = [0.1, 2.5, 1, 3.1, 4.5], then - + data = [[2, 3], [4, 1], [2, 5]], lengths = [2, 3], + If boundaries = [0.1, 2.5, 1, 3.1, 4.5], then output = [[0, 1, 0, 0, 1, 0, 0], [0, 0, 1, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0, 1]] + If boundaries = [0.1, 2.5, 1, 1, 3.1], then + output = [[0, 1, 0, 0, 0, 1, 0], [0, 0, 1, 0, 1, 0, 0], [0, 1, 0, 0, 0, 0, 1]] + )DOC") .Input(0, "data", "input tensor matrix") .Input(1, "lengths", "the size is the same as the width of the `data`") |