summaryrefslogtreecommitdiff
path: root/runtimes/nn/depend/external/gemmlowp/internal/compute.h
diff options
context:
space:
mode:
Diffstat (limited to 'runtimes/nn/depend/external/gemmlowp/internal/compute.h')
-rw-r--r--runtimes/nn/depend/external/gemmlowp/internal/compute.h104
1 files changed, 104 insertions, 0 deletions
diff --git a/runtimes/nn/depend/external/gemmlowp/internal/compute.h b/runtimes/nn/depend/external/gemmlowp/internal/compute.h
new file mode 100644
index 000000000..bbc9e2a0e
--- /dev/null
+++ b/runtimes/nn/depend/external/gemmlowp/internal/compute.h
@@ -0,0 +1,104 @@
+// Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// compute.h: the central stage of the Gemm computation, operates
+// on already-packed LHS and RHS blocks and calls the Gemm kernel
+// to compute a block of the product.
+
+#ifndef GEMMLOWP_INTERNAL_COMPUTE_H_
+#define GEMMLOWP_INTERNAL_COMPUTE_H_
+
+#include "block_params.h"
+#include "kernel.h"
+#include "pack.h"
+
+namespace gemmlowp {
+
+template <typename PackedLhs, typename PackedRhs, typename PackedResult>
+class ComputeImpl {
+ typedef typename PackedLhs::KernelSideFormat KernelLhsFormat;
+ typedef typename PackedRhs::KernelSideFormat KernelRhsFormat;
+ typedef KernelFormat<KernelLhsFormat, KernelRhsFormat> Format;
+
+ const KernelBase& kernel_;
+ const BlockParams& block_params_;
+
+ PackedResult* const packed_result_;
+ const PackedLhs& packed_lhs_;
+ const PackedRhs& packed_rhs_;
+
+ public:
+ ComputeImpl(const KernelBase& _kernel, const BlockParams& _block_params,
+ PackedResult* _packed_result, const PackedLhs& _packed_lhs,
+ const PackedRhs& _packed_rhs)
+ : kernel_(_kernel),
+ block_params_(_block_params),
+ packed_result_(_packed_result),
+ packed_lhs_(_packed_lhs),
+ packed_rhs_(_packed_rhs) {}
+
+ void Compute(int depth) {
+ depth = RoundUp<Format::kDepth>(depth);
+ assert(depth <= block_params_.l2_depth);
+ for (int d = 0; d < depth; d += block_params_.l1_depth) {
+ int ds = std::min(block_params_.l1_depth, depth - d);
+
+ for (int r = 0; r < block_params_.l2_rows; r += block_params_.l1_rows) {
+ int rs = std::min(block_params_.l1_rows, block_params_.l2_rows - r);
+
+ ComputeL1(r, rs, 0, block_params_.l2_cols, d, ds);
+ }
+ }
+ }
+
+ private:
+ void ComputeRun(int start_row, int start_col, int start_depth,
+ int depth) GEMMLOWP_NOINLINE {
+ packed_lhs_.seek_run(start_row, start_depth);
+ packed_rhs_.seek_run(start_col, start_depth);
+ auto packed_result_block = packed_result_->Map().block(
+ start_row, start_col, Format::kRows, Format::kCols);
+ kernel_.Run(packed_result_block.data(), packed_result_block.rows_stride(),
+ packed_result_block.cols_stride(), packed_lhs_.current_data(),
+ packed_rhs_.current_data(), start_depth, depth);
+ }
+
+ void ComputeL1(int start_row, int rows, int start_col, int cols,
+ int start_depth, int depth) {
+ assert(rows % Format::kRows == 0);
+ assert(cols % Format::kCols == 0);
+ assert(depth % Format::kDepth == 0);
+
+ for (int c = 0; c < cols; c += Format::kCols) {
+ for (int r = 0; r < rows; r += Format::kRows) {
+ ComputeRun(start_row + r, start_col + c, start_depth, depth);
+ }
+ }
+ }
+};
+
+template <typename PackedLhs, typename PackedRhs, typename PackedResult>
+void Compute(const KernelBase& kernel, const BlockParams& block_params,
+ PackedResult* packed_result, const PackedLhs& packed_lhs,
+ const PackedRhs& packed_rhs, int depth) {
+ ScopedProfilingLabel label("compute");
+ ComputeImpl<PackedLhs, PackedRhs, PackedResult> impl(
+ kernel, block_params, packed_result, packed_lhs, packed_rhs);
+
+ impl.Compute(depth);
+}
+
+} // namespace gemmlowp
+
+#endif // GEMMLOWP_INTERNAL_COMPUTE_H_