summaryrefslogtreecommitdiff
path: root/docs/01_library.dox
diff options
context:
space:
mode:
authorKaizen <kaizen@arm.com>2017-09-28 14:38:23 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2017-09-28 16:31:13 +0100
commit8938bd3f40ea62ff56d6ed4e2db0a8aee34dd64a (patch)
treec234331232f227e0cdfb567a54ecaa5460aaa064 /docs/01_library.dox
parentf4a254c2745aeaab6f7276a675147d707002fe7a (diff)
downloadarmcl-8938bd3f40ea62ff56d6ed4e2db0a8aee34dd64a.tar.gz
armcl-8938bd3f40ea62ff56d6ed4e2db0a8aee34dd64a.tar.bz2
armcl-8938bd3f40ea62ff56d6ed4e2db0a8aee34dd64a.zip
arm_compute v17.09
Change-Id: I4bf8f4e6e5f84ce0d5b6f5ba570d276879f42a81
Diffstat (limited to 'docs/01_library.dox')
-rw-r--r--docs/01_library.dox124
1 files changed, 122 insertions, 2 deletions
diff --git a/docs/01_library.dox b/docs/01_library.dox
index 738579e7c..c7903baa6 100644
--- a/docs/01_library.dox
+++ b/docs/01_library.dox
@@ -1,6 +1,6 @@
namespace arm_compute
{
-/**
+/**
@page architecture Library architecture
@tableofcontents
@@ -83,7 +83,7 @@ This is the very basic implementation used in the NEON runtime library by all th
@sa CPPScheduler.
-@note Some kernels like for example @ref NEHistogramKernel need some local temporary buffer to perform their calculations. In order to avoid memory corruption between threads, the local buffer must be of size: ```memory_needed_per_thread * num_threads``` and each subwindow must be initialized by calling @ref Window::set_thread_id() with a unique thread_id between 0 and num_threads.
+@note Some kernels like for example @ref NEHistogramKernel need some local temporary buffer to perform their calculations. In order to avoid memory corruption between threads, the local buffer must be of size: ```memory_needed_per_thread * num_threads``` and a unique thread_id between 0 and num_threads must be assigned to the @ref ThreadInfo object passed to the ```run``` function.
@subsection S4_2_4 Functions
@@ -246,5 +246,125 @@ It will iterate through every element of the execution window and for each eleme
Here are a couple of examples of how to use the iterators to fill / read tensors:
@snippet examples/neon_copy_objects.cpp Copy objects example
+
+@section S4_7_memory_manager MemoryManager
+
+@ref IMemoryManager is a memory managing interface that can be used to reduce the memory requirements of a given pipeline by recycling temporary buffers.
+
+@subsection S4_7_1_memory_manager_components MemoryGroup, MemoryPool and MemoryManager Components
+
+@subsubsection S4_7_1_1_memory_group MemoryGroup
+
+@ref IMemoryGroup defines the memory managing granularity.
+
+MemoryGroup binds a number of objects to a bucket of memory requirements that need to be fulfilled in order for an operation or list of operations to be executed.
+
+Requesting backing memory for a specific group can be done using @ref IMemoryGroup::acquire and releasing the memory back using @ref IMemoryGroup::release.
+
+@note Two types of memory groups are currently implemented:
+- @ref MemoryGroup that manages @ref Tensor objects
+- @ref CLMemoryGroup that manages @ref CLTensor objects.
+
+@subsubsection S4_7_1_2_memory_pool MemoryPool
+
+@ref IMemoryPool defines a pool of memory that can be used to provide backing memory to a memory group.
+
+@note @ref BlobMemoryPool is currently implemented which models the memory requirements as a vector of distinct memory blobs.
+
+@subsubsection S4_7_1_2_memory_manager_components MemoryManager Components
+
+@ref IMemoryManager consists of two components:
+- @ref ILifetimeManager that keeps track of the lifetime of the registered objects of the memory groups and given an @ref IAllocator creates an appropriate memory pool that fulfils the memory requirements of all the registered memory groups.
+- @ref IPoolManager that safely manages the registered memory pools.
+
+@note @ref IMemoryManager::finalize should be called once the configuration of all the memory groups, kernels and functions is done, so that the memory manager can allocate the appropriate backing memory.
+
+@note @ref BlobLifetimeManager is currently implemented which models the memory requirements as a vector of distinct memory blobs.
+
+@subsection S4_7_2_working_with_memory_manager Working with the Memory Manager
+Using a memory manager to reduce the memory requirements of a pipeline can be summed in the following steps:
+
+Initially a memory manager must be set-up:
+@code{.cpp}
+Allocator allocator{}; // Create an allocator to use for the backing memory allocation
+auto lifetime_mgr = std::make_shared<BlobLifetimeManager>(); // Create Lifetime Manager
+auto pool_mgr = std::make_shared<PoolManager>(); // Create Pool Manager
+auto mm = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr); // Create Memory Manager
+@endcode
+
+Once done, memory groups can be registered to use the memory manager:
+@code{.cpp}
+MemoryGroup memory_group(mm); // Create a memory group and set the memory manager to use
+@endcode
+
+@note If a memory manager is not specified then all allocation will be immediate instead of deferred through the memory manager.
+
+Next step is to set objects to be managed by the memory group. It is important though to note that the lifetime of an object is tracked from the @ref MemoryGroup::manage() and the @ref TensorAllocator::allocate calls.
+@ref MemoryGroup::manage flags that the object will be needed starting now and when @ref TensorAllocator::allocate is called it signals the end of the object lifetime.
+@code{.cpp}
+Tensor tmp1, tmp2, tmp3; // Create example tensors
+memory_group.manage(&tmp1); // Start managing object tmp1 and start its lifetime
+memory_group.manage(&tmp2); // Start managing object tmp2 and start its lifetime
+
+operation1.configure(&tmp1, &tmp2); // Configure a function/kernel using tmp1 and tmp2
+
+tmp1.allocator()->allocate(); // Flag that the lifetime of object tmp1 has ended
+
+memory_group.manage(&tmp3); // Start managing object tmp3 and start its lifetime
+
+operation2.configure(&tmp2, &tmp3); // Configure a function/kernel using tmp2 and tmp3
+
+tmp2.allocator()->allocate(); // Flag that the lifetime of object tmp2 has ended
+tmp3.allocator()->allocate(); // Flag that the lifetime of object tmp3 has ended
+@endcode
+
+@warning The configuration step should be done sequentially by a single thread so that all the lifetimes are captured correclty.
+
+When configuration of all the operations is finished then the memory manager have to be finalized:
+@code{.cpp}
+mm->set_allocator(&allocator); // Set allocator to use
+mm->set_set_num_pools(2); // Set number of pools to create in case parallel operations can be run
+mm->finalize(); // Finalize memory manager (Object lifetime check, Memory pool creation etc)
+@endcode
+
+Finally, during execution of the pipeline the memory of the appropriate memory group should be requested before running:
+@code{.cpp}
+memory_group.acquire(); // Request memory for the group
+
+operation1.run(); // Run operation1
+operation2.run(); // Run operation2
+
+memory_group.release(); // Release memory so that it can be reused
+@endcode
+@note Execution of a pipeline can be done in a multi-threading environment as memory acquisition/release are thread safe.
+
+@subsection S4_7_3_memory_manager_function_support Function support
+
+Most of the library's function have been ported to use @ref IMemoryManager for their internal temporary buffers.
+
+If that is the case, a memory manager can be passed to them during construction to reuse memory among these functions.
+@code{.cpp}
+// Setup Memory Manager
+CLBufferAllocator allocator{}; // Create an allocator to use for the backing memory allocation
+auto lifetime_mgr = std::make_shared<BlobLifetimeManager>(); // Create Lifetime Manager
+auto pool_mgr = std::make_shared<PoolManager>(); // Create Pool Manager
+auto mm = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr); // Create Memory Manager
+
+// Create two convolution layers and use the memory manager to manager their internal temporary buffers
+CLConvolutionLayer conv1(mm), conv2(mm);
+
+// Configure layers
+conv1.configure(...);
+conv2.configure(...);
+
+// Finalize memory manager
+mm->set_allocator(&allocator); // Set allocator to use
+mm->set_set_num_pools(1); // Set number of pools to create in case parallel operations can be run
+mm->finalize(); // Finalize memory manager (Object lifetime check, Memory pool creation etc)
+
+// Run layers (Memory will be recycled for internal buffers for conv1 and conv2
+conv1.run();
+conv2.run();
+@endcode
*/
} // namespace arm_compute