summaryrefslogtreecommitdiff
path: root/Documentation/gpu/rfc
diff options
context:
space:
mode:
authorMatthew Auld <matthew.auld@intel.com>2021-04-29 11:30:48 +0100
committerMatthew Auld <matthew.auld@intel.com>2021-05-04 10:48:59 +0100
commit2bc9c04ea7020df3a98682f12c50e38870d3104a (patch)
treed025c39c54696041b79856926ef230feeffbc0cd /Documentation/gpu/rfc
parent0333ec8806dfe7b8766ad025821bf58dc49dc970 (diff)
downloadlinux-rpi-2bc9c04ea7020df3a98682f12c50e38870d3104a.tar.gz
linux-rpi-2bc9c04ea7020df3a98682f12c50e38870d3104a.tar.bz2
linux-rpi-2bc9c04ea7020df3a98682f12c50e38870d3104a.zip
drm/doc/rfc: i915 DG1 uAPI
Add an entry for the new uAPI needed for DG1. Also add the overall upstream plan, including some notes for the TTM conversion. v2(Daniel): - include the overall upstreaming plan - add a note for mmap, there are differences here for TTM vs i915 - bunch of other suggestions from Daniel v3: (Daniel) - add a note for set/get caching stuff - add some more docs for existing query and extensions stuff - add an actual code example for regions query - bunch of other stuff (Jason) - uAPI change(!): - try a simpler design with the placements extension - rather than have a generic setparam which can cover multiple use cases, have each extension be responsible for one thing only v4: (Daniel) - add some more notes for ttm conversion - bunch of other stuff (Jason) - uAPI change(!): - drop all the extra rsvd members for the region_query and region_info, just keep the bare minimum needed for padding v5: (Jason) - for the upstream plan, add a requirement that we send the uAPI bits again for final sign off before turning it on for real - document how we intend to extend the rsvd bits for the region query (Kenneth) - improve the comment for the smem+lmem mmap mode and caching Signed-off-by: Matthew Auld <matthew.auld@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com> Cc: Jon Bloomfield <jon.bloomfield@intel.com> Cc: Jordan Justen <jordan.l.justen@intel.com> Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Kenneth Graunke <kenneth@whitecape.org> Cc: Jason Ekstrand <jason@jlekstrand.net> Cc: Dave Airlie <airlied@gmail.com> Cc: dri-devel@lists.freedesktop.org Cc: mesa-dev@lists.freedesktop.org Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> Acked-by: Dave Airlie <airlied@redhat.com> Acked-by: Kenneth Graunke <kenneth@whitecape.org> Acked-by: Jon Bloomfield <jon.bloomfield@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210429103056.407067-1-matthew.auld@intel.com
Diffstat (limited to 'Documentation/gpu/rfc')
-rw-r--r--Documentation/gpu/rfc/i915_gem_lmem.h237
-rw-r--r--Documentation/gpu/rfc/i915_gem_lmem.rst131
-rw-r--r--Documentation/gpu/rfc/index.rst4
3 files changed, 372 insertions, 0 deletions
diff --git a/Documentation/gpu/rfc/i915_gem_lmem.h b/Documentation/gpu/rfc/i915_gem_lmem.h
new file mode 100644
index 000000000000..d9c61bea0556
--- /dev/null
+++ b/Documentation/gpu/rfc/i915_gem_lmem.h
@@ -0,0 +1,237 @@
+/**
+ * enum drm_i915_gem_memory_class - Supported memory classes
+ */
+enum drm_i915_gem_memory_class {
+ /** @I915_MEMORY_CLASS_SYSTEM: System memory */
+ I915_MEMORY_CLASS_SYSTEM = 0,
+ /** @I915_MEMORY_CLASS_DEVICE: Device local-memory */
+ I915_MEMORY_CLASS_DEVICE,
+};
+
+/**
+ * struct drm_i915_gem_memory_class_instance - Identify particular memory region
+ */
+struct drm_i915_gem_memory_class_instance {
+ /** @memory_class: See enum drm_i915_gem_memory_class */
+ __u16 memory_class;
+
+ /** @memory_instance: Which instance */
+ __u16 memory_instance;
+};
+
+/**
+ * struct drm_i915_memory_region_info - Describes one region as known to the
+ * driver.
+ *
+ * Note that we reserve some stuff here for potential future work. As an example
+ * we might want expose the capabilities for a given region, which could include
+ * things like if the region is CPU mappable/accessible, what are the supported
+ * mapping types etc.
+ *
+ * Note that to extend struct drm_i915_memory_region_info and struct
+ * drm_i915_query_memory_regions in the future the plan is to do the following:
+ *
+ * .. code-block:: C
+ *
+ * struct drm_i915_memory_region_info {
+ * struct drm_i915_gem_memory_class_instance region;
+ * union {
+ * __u32 rsvd0;
+ * __u32 new_thing1;
+ * };
+ * ...
+ * union {
+ * __u64 rsvd1[8];
+ * struct {
+ * __u64 new_thing2;
+ * __u64 new_thing3;
+ * ...
+ * };
+ * };
+ * };
+ *
+ * With this things should remain source compatible between versions for
+ * userspace, even as we add new fields.
+ *
+ * Note this is using both struct drm_i915_query_item and struct drm_i915_query.
+ * For this new query we are adding the new query id DRM_I915_QUERY_MEMORY_REGIONS
+ * at &drm_i915_query_item.query_id.
+ */
+struct drm_i915_memory_region_info {
+ /** @region: The class:instance pair encoding */
+ struct drm_i915_gem_memory_class_instance region;
+
+ /** @rsvd0: MBZ */
+ __u32 rsvd0;
+
+ /** @probed_size: Memory probed by the driver (-1 = unknown) */
+ __u64 probed_size;
+
+ /** @unallocated_size: Estimate of memory remaining (-1 = unknown) */
+ __u64 unallocated_size;
+
+ /** @rsvd1: MBZ */
+ __u64 rsvd1[8];
+};
+
+/**
+ * struct drm_i915_query_memory_regions
+ *
+ * The region info query enumerates all regions known to the driver by filling
+ * in an array of struct drm_i915_memory_region_info structures.
+ *
+ * Example for getting the list of supported regions:
+ *
+ * .. code-block:: C
+ *
+ * struct drm_i915_query_memory_regions *info;
+ * struct drm_i915_query_item item = {
+ * .query_id = DRM_I915_QUERY_MEMORY_REGIONS;
+ * };
+ * struct drm_i915_query query = {
+ * .num_items = 1,
+ * .items_ptr = (uintptr_t)&item,
+ * };
+ * int err, i;
+ *
+ * // First query the size of the blob we need, this needs to be large
+ * // enough to hold our array of regions. The kernel will fill out the
+ * // item.length for us, which is the number of bytes we need.
+ * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
+ * if (err) ...
+ *
+ * info = calloc(1, item.length);
+ * // Now that we allocated the required number of bytes, we call the ioctl
+ * // again, this time with the data_ptr pointing to our newly allocated
+ * // blob, which the kernel can then populate with the all the region info.
+ * item.data_ptr = (uintptr_t)&info,
+ *
+ * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query);
+ * if (err) ...
+ *
+ * // We can now access each region in the array
+ * for (i = 0; i < info->num_regions; i++) {
+ * struct drm_i915_memory_region_info mr = info->regions[i];
+ * u16 class = mr.region.class;
+ * u16 instance = mr.region.instance;
+ *
+ * ....
+ * }
+ *
+ * free(info);
+ */
+struct drm_i915_query_memory_regions {
+ /** @num_regions: Number of supported regions */
+ __u32 num_regions;
+
+ /** @rsvd: MBZ */
+ __u32 rsvd[3];
+
+ /** @regions: Info about each supported region */
+ struct drm_i915_memory_region_info regions[];
+};
+
+#define DRM_I915_GEM_CREATE_EXT 0xdeadbeaf
+#define DRM_IOCTL_I915_GEM_CREATE_EXT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE_EXT, struct drm_i915_gem_create_ext)
+
+/**
+ * struct drm_i915_gem_create_ext - Existing gem_create behaviour, with added
+ * extension support using struct i915_user_extension.
+ *
+ * Note that in the future we want to have our buffer flags here, at least for
+ * the stuff that is immutable. Previously we would have two ioctls, one to
+ * create the object with gem_create, and another to apply various parameters,
+ * however this creates some ambiguity for the params which are considered
+ * immutable. Also in general we're phasing out the various SET/GET ioctls.
+ */
+struct drm_i915_gem_create_ext {
+ /**
+ * @size: Requested size for the object.
+ *
+ * The (page-aligned) allocated size for the object will be returned.
+ *
+ * Note that for some devices we have might have further minimum
+ * page-size restrictions(larger than 4K), like for device local-memory.
+ * However in general the final size here should always reflect any
+ * rounding up, if for example using the I915_GEM_CREATE_EXT_MEMORY_REGIONS
+ * extension to place the object in device local-memory.
+ */
+ __u64 size;
+ /**
+ * @handle: Returned handle for the object.
+ *
+ * Object handles are nonzero.
+ */
+ __u32 handle;
+ /** @flags: MBZ */
+ __u32 flags;
+ /**
+ * @extensions: The chain of extensions to apply to this object.
+ *
+ * This will be useful in the future when we need to support several
+ * different extensions, and we need to apply more than one when
+ * creating the object. See struct i915_user_extension.
+ *
+ * If we don't supply any extensions then we get the same old gem_create
+ * behaviour.
+ *
+ * For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see
+ * struct drm_i915_gem_create_ext_memory_regions.
+ */
+#define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
+ __u64 extensions;
+};
+
+/**
+ * struct drm_i915_gem_create_ext_memory_regions - The
+ * I915_GEM_CREATE_EXT_MEMORY_REGIONS extension.
+ *
+ * Set the object with the desired set of placements/regions in priority
+ * order. Each entry must be unique and supported by the device.
+ *
+ * This is provided as an array of struct drm_i915_gem_memory_class_instance, or
+ * an equivalent layout of class:instance pair encodings. See struct
+ * drm_i915_query_memory_regions and DRM_I915_QUERY_MEMORY_REGIONS for how to
+ * query the supported regions for a device.
+ *
+ * As an example, on discrete devices, if we wish to set the placement as
+ * device local-memory we can do something like:
+ *
+ * .. code-block:: C
+ *
+ * struct drm_i915_gem_memory_class_instance region_lmem = {
+ * .memory_class = I915_MEMORY_CLASS_DEVICE,
+ * .memory_instance = 0,
+ * };
+ * struct drm_i915_gem_create_ext_memory_regions regions = {
+ * .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS },
+ * .regions = (uintptr_t)&region_lmem,
+ * .num_regions = 1,
+ * };
+ * struct drm_i915_gem_create_ext create_ext = {
+ * .size = 16 * PAGE_SIZE,
+ * .extensions = (uintptr_t)&regions,
+ * };
+ *
+ * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
+ * if (err) ...
+ *
+ * At which point we get the object handle in &drm_i915_gem_create_ext.handle,
+ * along with the final object size in &drm_i915_gem_create_ext.size, which
+ * should account for any rounding up, if required.
+ */
+struct drm_i915_gem_create_ext_memory_regions {
+ /** @base: Extension link. See struct i915_user_extension. */
+ struct i915_user_extension base;
+
+ /** @pad: MBZ */
+ __u32 pad;
+ /** @num_regions: Number of elements in the @regions array. */
+ __u32 num_regions;
+ /**
+ * @regions: The regions/placements array.
+ *
+ * An array of struct drm_i915_gem_memory_class_instance.
+ */
+ __u64 regions;
+};
diff --git a/Documentation/gpu/rfc/i915_gem_lmem.rst b/Documentation/gpu/rfc/i915_gem_lmem.rst
new file mode 100644
index 000000000000..1d344c593018
--- /dev/null
+++ b/Documentation/gpu/rfc/i915_gem_lmem.rst
@@ -0,0 +1,131 @@
+=========================
+I915 DG1/LMEM RFC Section
+=========================
+
+Upstream plan
+=============
+For upstream the overall plan for landing all the DG1 stuff and turning it for
+real, with all the uAPI bits is:
+
+* Merge basic HW enabling of DG1(still without pciid)
+* Merge the uAPI bits behind special CONFIG_BROKEN(or so) flag
+ * At this point we can still make changes, but importantly this lets us
+ start running IGTs which can utilize local-memory in CI
+* Convert over to TTM, make sure it all keeps working. Some of the work items:
+ * TTM shrinker for discrete
+ * dma_resv_lockitem for full dma_resv_lock, i.e not just trylock
+ * Use TTM CPU pagefault handler
+ * Route shmem backend over to TTM SYSTEM for discrete
+ * TTM purgeable object support
+ * Move i915 buddy allocator over to TTM
+ * MMAP ioctl mode(see `I915 MMAP`_)
+ * SET/GET ioctl caching(see `I915 SET/GET CACHING`_)
+* Send RFC(with mesa-dev on cc) for final sign off on the uAPI
+* Add pciid for DG1 and turn on uAPI for real
+
+New object placement and region query uAPI
+==========================================
+Starting from DG1 we need to give userspace the ability to allocate buffers from
+device local-memory. Currently the driver supports gem_create, which can place
+buffers in system memory via shmem, and the usual assortment of other
+interfaces, like dumb buffers and userptr.
+
+To support this new capability, while also providing a uAPI which will work
+beyond just DG1, we propose to offer three new bits of uAPI:
+
+DRM_I915_QUERY_MEMORY_REGIONS
+-----------------------------
+New query ID which allows userspace to discover the list of supported memory
+regions(like system-memory and local-memory) for a given device. We identify
+each region with a class and instance pair, which should be unique. The class
+here would be DEVICE or SYSTEM, and the instance would be zero, on platforms
+like DG1.
+
+Side note: The class/instance design is borrowed from our existing engine uAPI,
+where we describe every physical engine in terms of its class, and the
+particular instance, since we can have more than one per class.
+
+In the future we also want to expose more information which can further
+describe the capabilities of a region.
+
+.. kernel-doc:: Documentation/gpu/rfc/i915_gem_lmem.h
+ :functions: drm_i915_gem_memory_class drm_i915_gem_memory_class_instance drm_i915_memory_region_info drm_i915_query_memory_regions
+
+GEM_CREATE_EXT
+--------------
+New ioctl which is basically just gem_create but now allows userspace to provide
+a chain of possible extensions. Note that if we don't provide any extensions and
+set flags=0 then we get the exact same behaviour as gem_create.
+
+Side note: We also need to support PXP[1] in the near future, which is also
+applicable to integrated platforms, and adds its own gem_create_ext extension,
+which basically lets userspace mark a buffer as "protected".
+
+.. kernel-doc:: Documentation/gpu/rfc/i915_gem_lmem.h
+ :functions: drm_i915_gem_create_ext
+
+I915_GEM_CREATE_EXT_MEMORY_REGIONS
+----------------------------------
+Implemented as an extension for gem_create_ext, we would now allow userspace to
+optionally provide an immutable list of preferred placements at creation time,
+in priority order, for a given buffer object. For the placements we expect
+them each to use the class/instance encoding, as per the output of the regions
+query. Having the list in priority order will be useful in the future when
+placing an object, say during eviction.
+
+.. kernel-doc:: Documentation/gpu/rfc/i915_gem_lmem.h
+ :functions: drm_i915_gem_create_ext_memory_regions
+
+One fair criticism here is that this seems a little over-engineered[2]. If we
+just consider DG1 then yes, a simple gem_create.flags or something is totally
+all that's needed to tell the kernel to allocate the buffer in local-memory or
+whatever. However looking to the future we need uAPI which can also support
+upcoming Xe HP multi-tile architecture in a sane way, where there can be
+multiple local-memory instances for a given device, and so using both class and
+instance in our uAPI to describe regions is desirable, although specifically
+for DG1 it's uninteresting, since we only have a single local-memory instance.
+
+Existing uAPI issues
+====================
+Some potential issues we still need to resolve.
+
+I915 MMAP
+---------
+In i915 there are multiple ways to MMAP GEM object, including mapping the same
+object using different mapping types(WC vs WB), i.e multiple active mmaps per
+object. TTM expects one MMAP at most for the lifetime of the object. If it
+turns out that we have to backpedal here, there might be some potential
+userspace fallout.
+
+I915 SET/GET CACHING
+--------------------
+In i915 we have set/get_caching ioctl. TTM doesn't let us to change this, but
+DG1 doesn't support non-snooped pcie transactions, so we can just always
+allocate as WB for smem-only buffers. If/when our hw gains support for
+non-snooped pcie transactions then we must fix this mode at allocation time as
+a new GEM extension.
+
+This is related to the mmap problem, because in general (meaning, when we're
+not running on intel cpus) the cpu mmap must not, ever, be inconsistent with
+allocation mode.
+
+Possible idea is to let the kernel picks the mmap mode for userspace from the
+following table:
+
+smem-only: WB. Userspace does not need to call clflush.
+
+smem+lmem: We only ever allow a single mode, so simply allocate this as uncached
+memory, and always give userspace a WC mapping. GPU still does snooped access
+here(assuming we can't turn it off like on DG1), which is a bit inefficient.
+
+lmem only: always WC
+
+This means on discrete you only get a single mmap mode, all others must be
+rejected. That's probably going to be a new default mode or something like
+that.
+
+Links
+=====
+[1] https://patchwork.freedesktop.org/series/86798/
+
+[2] https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5599#note_553791
diff --git a/Documentation/gpu/rfc/index.rst b/Documentation/gpu/rfc/index.rst
index a8621f7dab8b..05670442ca1b 100644
--- a/Documentation/gpu/rfc/index.rst
+++ b/Documentation/gpu/rfc/index.rst
@@ -15,3 +15,7 @@ host such documentation:
* Once the code has landed move all the documentation to the right places in
the main core, helper or driver sections.
+
+.. toctree::
+
+ i915_gem_lmem.rst