diff options
Diffstat (limited to 'libgomp/target.c')
-rw-r--r-- | libgomp/target.c | 634 |
1 files changed, 434 insertions, 200 deletions
diff --git a/libgomp/target.c b/libgomp/target.c index ec097de9ca2..83ad51108d0 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -27,11 +27,14 @@ #include "config.h" #include "libgomp.h" -#include "libgomp_target.h" +#include "oacc-plugin.h" +#include "oacc-int.h" +#include "gomp-constants.h" #include <limits.h> #include <stdbool.h> #include <stdlib.h> #include <string.h> +#include <assert.h> #ifdef PLUGIN_SUPPORT #include <dlfcn.h> @@ -39,52 +42,9 @@ static void gomp_target_init (void); +/* The whole initialization code for offloading plugins is only run one. */ static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT; -/* Forward declaration for a node in the tree. */ -typedef struct splay_tree_node_s *splay_tree_node; -typedef struct splay_tree_s *splay_tree; -typedef struct splay_tree_key_s *splay_tree_key; - -struct target_mem_desc { - /* Reference count. */ - uintptr_t refcount; - /* All the splay nodes allocated together. */ - splay_tree_node array; - /* Start of the target region. */ - uintptr_t tgt_start; - /* End of the targer region. */ - uintptr_t tgt_end; - /* Handle to free. */ - void *to_free; - /* Previous target_mem_desc. */ - struct target_mem_desc *prev; - /* Number of items in following list. */ - size_t list_count; - - /* Corresponding target device descriptor. */ - struct gomp_device_descr *device_descr; - - /* List of splay keys to remove (or decrease refcount) - at the end of region. */ - splay_tree_key list[]; -}; - -struct splay_tree_key_s { - /* Address of the host object. */ - uintptr_t host_start; - /* Address immediately after the host object. */ - uintptr_t host_end; - /* Descriptor of the target memory. */ - struct target_mem_desc *tgt; - /* Offset from tgt->tgt_start to the start of the target object. */ - uintptr_t tgt_offset; - /* Reference count. */ - uintptr_t refcount; - /* True if data should be copied from device to host at the end. */ - bool copy_from; -}; - /* This structure describes an offload image. It contains type of the target device, pointer to host table descriptor, and pointer to target data. */ @@ -106,9 +66,12 @@ static struct gomp_device_descr *devices; /* Total number of available devices. */ static int num_devices; +/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */ +static int num_devices_openmp; + /* The comparison function. */ -static int +attribute_hidden int splay_compare (splay_tree_key x, splay_tree_key y) { if (x->host_start == x->host_end @@ -123,54 +86,23 @@ splay_compare (splay_tree_key x, splay_tree_key y) #include "splay-tree.h" -/* This structure describes accelerator device. - It contains ID-number of the device, its type, function handlers for - interaction with the device, and information about mapped memory. */ -struct gomp_device_descr +attribute_hidden void +gomp_init_targets_once (void) { - /* This is the ID number of device. It could be specified in DEVICE-clause of - TARGET construct. */ - int id; - - /* This is the ID number of device among devices of the same type. */ - int target_id; - - /* This is the TYPE of device. */ - enum offload_target_type type; - - /* Set to true when device is initialized. */ - bool is_initialized; - - /* Function handlers. */ - int (*get_type_func) (void); - int (*get_num_devices_func) (void); - void (*register_image_func) (void *, void *); - void (*init_device_func) (int); - int (*get_table_func) (int, void *); - void *(*alloc_func) (int, size_t); - void (*free_func) (int, void *); - void *(*host2dev_func) (int, void *, const void *, size_t); - void *(*dev2host_func) (int, void *, const void *, size_t); - void (*run_func) (int, void *, void *); - - /* Splay tree containing information about mapped memory regions. */ - struct splay_tree_s dev_splay_tree; - - /* Mutex for operating with the splay tree and other shared structures. */ - gomp_mutex_t dev_env_lock; -}; + (void) pthread_once (&gomp_is_initialized, gomp_target_init); +} attribute_hidden int gomp_get_num_devices (void) { - (void) pthread_once (&gomp_is_initialized, gomp_target_init); - return num_devices; + gomp_init_targets_once (); + return num_devices_openmp; } static struct gomp_device_descr * resolve_device (int device_id) { - if (device_id == -1) + if (device_id == GOMP_DEVICE_ICV) { struct gomp_task_icv *icv = gomp_icv (false); device_id = icv->default_device_var; @@ -190,27 +122,39 @@ static inline void gomp_map_vars_existing (splay_tree_key oldn, splay_tree_key newn, unsigned char kind) { - if (oldn->host_start > newn->host_start + if ((kind & GOMP_MAP_FLAG_FORCE) + || oldn->host_start > newn->host_start || oldn->host_end < newn->host_end) - gomp_fatal ("Trying to map into device [%p..%p) object when" + gomp_fatal ("Trying to map into device [%p..%p) object when " "[%p..%p) is already mapped", (void *) newn->host_start, (void *) newn->host_end, (void *) oldn->host_start, (void *) oldn->host_end); oldn->refcount++; } -static struct target_mem_desc * +static int +get_kind (bool is_openacc, void *kinds, int idx) +{ + return is_openacc ? ((unsigned short *) kinds)[idx] + : ((unsigned char *) kinds)[idx]; +} + +attribute_hidden struct target_mem_desc * gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, - void **hostaddrs, size_t *sizes, unsigned char *kinds, - bool is_target) + void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, + bool is_openacc, bool is_target) { size_t i, tgt_align, tgt_size, not_found_cnt = 0; + const int rshift = is_openacc ? 8 : 3; + const int typemask = is_openacc ? 0xff : 0x7; + struct gomp_memory_mapping *mm = &devicep->mem_map; struct splay_tree_key_s cur_node; struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); tgt->list_count = mapnum; tgt->refcount = 1; tgt->device_descr = devicep; + tgt->mem_map = mm; if (mapnum == 0) return tgt; @@ -224,40 +168,43 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, tgt_size = mapnum * sizeof (void *); } - gomp_mutex_lock (&devicep->dev_env_lock); + gomp_mutex_lock (&mm->lock); + for (i = 0; i < mapnum; i++) { + int kind = get_kind (is_openacc, kinds, i); if (hostaddrs[i] == NULL) { tgt->list[i] = NULL; continue; } cur_node.host_start = (uintptr_t) hostaddrs[i]; - if ((kinds[i] & 7) != 4) + if (!GOMP_MAP_POINTER_P (kind & typemask)) cur_node.host_end = cur_node.host_start + sizes[i]; else cur_node.host_end = cur_node.host_start + sizeof (void *); - splay_tree_key n = splay_tree_lookup (&devicep->dev_splay_tree, - &cur_node); + splay_tree_key n = splay_tree_lookup (&mm->splay_tree, &cur_node); if (n) { tgt->list[i] = n; - gomp_map_vars_existing (n, &cur_node, kinds[i]); + gomp_map_vars_existing (n, &cur_node, kind & typemask); } else { - size_t align = (size_t) 1 << (kinds[i] >> 3); tgt->list[i] = NULL; + + size_t align = (size_t) 1 << (kind >> rshift); not_found_cnt++; if (tgt_align < align) tgt_align = align; tgt_size = (tgt_size + align - 1) & ~(align - 1); tgt_size += cur_node.host_end - cur_node.host_start; - if ((kinds[i] & 7) == 5) + if ((kind & typemask) == GOMP_MAP_TO_PSET) { size_t j; for (j = i + 1; j < mapnum; j++) - if ((kinds[j] & 7) != 4) + if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j) + & typemask)) break; else if ((uintptr_t) hostaddrs[j] < cur_node.host_start || ((uintptr_t) hostaddrs[j] + sizeof (void *) @@ -272,7 +219,15 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, } } - if (not_found_cnt || is_target) + if (devaddrs) + { + if (mapnum != 1) + gomp_fatal ("unexpected aggregation"); + tgt->to_free = devaddrs[0]; + tgt->tgt_start = (uintptr_t) tgt->to_free; + tgt->tgt_end = tgt->tgt_start + sizes[0]; + } + else if (not_found_cnt || is_target) { /* Allocate tgt_align aligned tgt_size block of memory. */ /* FIXME: Perhaps change interface to allocate properly aligned @@ -304,44 +259,47 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, for (i = 0; i < mapnum; i++) if (tgt->list[i] == NULL) { + int kind = get_kind (is_openacc, kinds, i); if (hostaddrs[i] == NULL) continue; splay_tree_key k = &array->key; k->host_start = (uintptr_t) hostaddrs[i]; - if ((kinds[i] & 7) != 4) + if (!GOMP_MAP_POINTER_P (kind & typemask)) k->host_end = k->host_start + sizes[i]; else k->host_end = k->host_start + sizeof (void *); - splay_tree_key n - = splay_tree_lookup (&devicep->dev_splay_tree, k); + splay_tree_key n = splay_tree_lookup (&mm->splay_tree, k); if (n) { tgt->list[i] = n; - gomp_map_vars_existing (n, k, kinds[i]); + gomp_map_vars_existing (n, k, kind & typemask); } else { - size_t align = (size_t) 1 << (kinds[i] >> 3); + size_t align = (size_t) 1 << (kind >> rshift); tgt->list[i] = k; tgt_size = (tgt_size + align - 1) & ~(align - 1); k->tgt = tgt; k->tgt_offset = tgt_size; tgt_size += k->host_end - k->host_start; - k->copy_from = false; - if ((kinds[i] & 7) == 2 || (kinds[i] & 7) == 3) - k->copy_from = true; + k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); k->refcount = 1; + k->async_refcount = 0; tgt->refcount++; array->left = NULL; array->right = NULL; - splay_tree_insert (&devicep->dev_splay_tree, array); - switch (kinds[i] & 7) + splay_tree_insert (&mm->splay_tree, array); + switch (kind & typemask) { - case 0: /* ALLOC */ - case 2: /* FROM */ + case GOMP_MAP_ALLOC: + case GOMP_MAP_FROM: + case GOMP_MAP_FORCE_ALLOC: + case GOMP_MAP_FORCE_FROM: break; - case 1: /* TO */ - case 3: /* TOFROM */ + case GOMP_MAP_TO: + case GOMP_MAP_TOFROM: + case GOMP_MAP_FORCE_TO: + case GOMP_MAP_FORCE_TOFROM: /* FIXME: Perhaps add some smarts, like if copying several adjacent fields from host to target, use some host buffer to avoid sending each var individually. */ @@ -351,12 +309,13 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, (void *) k->host_start, k->host_end - k->host_start); break; - case 4: /* POINTER */ + case GOMP_MAP_POINTER: cur_node.host_start = (uintptr_t) *(void **) k->host_start; if (cur_node.host_start == (uintptr_t) NULL) { cur_node.tgt_offset = (uintptr_t) NULL; + /* FIXME: see above FIXME comment. */ devicep->host2dev_func (devicep->target_id, (void *) (tgt->tgt_start + k->tgt_offset), @@ -367,19 +326,16 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, /* Add bias to the pointer value. */ cur_node.host_start += sizes[i]; cur_node.host_end = cur_node.host_start + 1; - n = splay_tree_lookup (&devicep->dev_splay_tree, - &cur_node); + n = splay_tree_lookup (&mm->splay_tree, &cur_node); if (n == NULL) { /* Could be possibly zero size array section. */ cur_node.host_end--; - n = splay_tree_lookup (&devicep->dev_splay_tree, - &cur_node); + n = splay_tree_lookup (&mm->splay_tree, &cur_node); if (n == NULL) { cur_node.host_start--; - n = splay_tree_lookup (&devicep->dev_splay_tree, - &cur_node); + n = splay_tree_lookup (&mm->splay_tree, &cur_node); cur_node.host_start++; } } @@ -393,20 +349,24 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, array section. Now subtract bias to get what we want to initialize the pointer with. */ cur_node.tgt_offset -= sizes[i]; + /* FIXME: see above FIXME comment. */ devicep->host2dev_func (devicep->target_id, (void *) (tgt->tgt_start + k->tgt_offset), (void *) &cur_node.tgt_offset, sizeof (void *)); break; - case 5: /* TO_PSET */ + case GOMP_MAP_TO_PSET: + /* FIXME: see above FIXME comment. */ devicep->host2dev_func (devicep->target_id, (void *) (tgt->tgt_start + k->tgt_offset), (void *) k->host_start, k->host_end - k->host_start); + for (j = i + 1; j < mapnum; j++) - if ((kinds[j] & 7) != 4) + if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j) + & typemask)) break; else if ((uintptr_t) hostaddrs[j] < k->host_start || ((uintptr_t) hostaddrs[j] + sizeof (void *) @@ -421,6 +381,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, if (cur_node.host_start == (uintptr_t) NULL) { cur_node.tgt_offset = (uintptr_t) NULL; + /* FIXME: see above FIXME comment. */ devicep->host2dev_func (devicep->target_id, (void *) (tgt->tgt_start + k->tgt_offset + ((uintptr_t) hostaddrs[j] @@ -433,19 +394,18 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, /* Add bias to the pointer value. */ cur_node.host_start += sizes[j]; cur_node.host_end = cur_node.host_start + 1; - n = splay_tree_lookup (&devicep->dev_splay_tree, - &cur_node); + n = splay_tree_lookup (&mm->splay_tree, &cur_node); if (n == NULL) { /* Could be possibly zero size array section. */ cur_node.host_end--; - n = splay_tree_lookup (&devicep->dev_splay_tree, + n = splay_tree_lookup (&mm->splay_tree, &cur_node); if (n == NULL) { cur_node.host_start--; - n = splay_tree_lookup - (&devicep->dev_splay_tree, &cur_node); + n = splay_tree_lookup (&mm->splay_tree, + &cur_node); cur_node.host_start++; } } @@ -460,6 +420,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, array section. Now subtract bias to get what we want to initialize the pointer with. */ cur_node.tgt_offset -= sizes[j]; + /* FIXME: see above FIXME comment. */ devicep->host2dev_func (devicep->target_id, (void *) (tgt->tgt_start + k->tgt_offset + ((uintptr_t) hostaddrs[j] @@ -468,12 +429,35 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, sizeof (void *)); i++; } - break; + break; + case GOMP_MAP_FORCE_PRESENT: + { + /* We already looked up the memory region above and it + was missing. */ + size_t size = k->host_end - k->host_start; + gomp_fatal ("present clause: !acc_is_present (%p, " + "%zd (0x%zx))", (void *) k->host_start, + size, size); + } + break; + case GOMP_MAP_FORCE_DEVICEPTR: + assert (k->host_end - k->host_start == sizeof (void *)); + + devicep->host2dev_func (devicep->target_id, + (void *) (tgt->tgt_start + + k->tgt_offset), + (void *) k->host_start, + sizeof (void *)); + break; + default: + gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__, + kind); } array++; } } } + if (is_target) { for (i = 0; i < mapnum; i++) @@ -483,6 +467,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, else cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start + tgt->list[i]->tgt_offset; + /* FIXME: see above FIXME comment. */ devicep->host2dev_func (devicep->target_id, (void *) (tgt->tgt_start + i * sizeof (void *)), @@ -491,7 +476,7 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, } } - gomp_mutex_unlock (&devicep->dev_env_lock); + gomp_mutex_unlock (&mm->lock); return tgt; } @@ -506,10 +491,50 @@ gomp_unmap_tgt (struct target_mem_desc *tgt) free (tgt); } -static void -gomp_unmap_vars (struct target_mem_desc *tgt) +/* Decrease the refcount for a set of mapped variables, and queue asychronous + copies from the device back to the host after any work that has been issued. + Because the regions are still "live", increment an asynchronous reference + count to indicate that they should not be unmapped from host-side data + structures until the asynchronous copy has completed. */ + +attribute_hidden void +gomp_copy_from_async (struct target_mem_desc *tgt) +{ + struct gomp_device_descr *devicep = tgt->device_descr; + struct gomp_memory_mapping *mm = tgt->mem_map; + size_t i; + + gomp_mutex_lock (&mm->lock); + + for (i = 0; i < tgt->list_count; i++) + if (tgt->list[i] == NULL) + ; + else if (tgt->list[i]->refcount > 1) + { + tgt->list[i]->refcount--; + tgt->list[i]->async_refcount++; + } + else + { + splay_tree_key k = tgt->list[i]; + if (k->copy_from) + devicep->dev2host_func (devicep->target_id, (void *) k->host_start, + (void *) (k->tgt->tgt_start + k->tgt_offset), + k->host_end - k->host_start); + } + + gomp_mutex_unlock (&mm->lock); +} + +/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant + variables back from device to host: if it is false, it is assumed that this + has been done already, i.e. by gomp_copy_from_async above. */ + +attribute_hidden void +gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) { struct gomp_device_descr *devicep = tgt->device_descr; + struct gomp_memory_mapping *mm = tgt->mem_map; if (tgt->list_count == 0) { @@ -517,21 +542,24 @@ gomp_unmap_vars (struct target_mem_desc *tgt) return; } + gomp_mutex_lock (&mm->lock); + size_t i; - gomp_mutex_lock (&devicep->dev_env_lock); for (i = 0; i < tgt->list_count; i++) if (tgt->list[i] == NULL) ; else if (tgt->list[i]->refcount > 1) tgt->list[i]->refcount--; + else if (tgt->list[i]->async_refcount > 0) + tgt->list[i]->async_refcount--; else { splay_tree_key k = tgt->list[i]; - if (k->copy_from) + if (k->copy_from && do_copyfrom) devicep->dev2host_func (devicep->target_id, (void *) k->host_start, (void *) (k->tgt->tgt_start + k->tgt_offset), k->host_end - k->host_start); - splay_tree_remove (&devicep->dev_splay_tree, k); + splay_tree_remove (&mm->splay_tree, k); if (k->tgt->refcount > 1) k->tgt->refcount--; else @@ -542,15 +570,18 @@ gomp_unmap_vars (struct target_mem_desc *tgt) tgt->refcount--; else gomp_unmap_tgt (tgt); - gomp_mutex_unlock (&devicep->dev_env_lock); + + gomp_mutex_unlock (&mm->lock); } static void -gomp_update (struct gomp_device_descr *devicep, size_t mapnum, - void **hostaddrs, size_t *sizes, unsigned char *kinds) +gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm, + size_t mapnum, void **hostaddrs, size_t *sizes, void *kinds, + bool is_openacc) { size_t i; struct splay_tree_key_s cur_node; + const int typemask = is_openacc ? 0xff : 0x7; if (!devicep) return; @@ -558,16 +589,17 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, if (mapnum == 0) return; - gomp_mutex_lock (&devicep->dev_env_lock); + gomp_mutex_lock (&mm->lock); for (i = 0; i < mapnum; i++) if (sizes[i]) { cur_node.host_start = (uintptr_t) hostaddrs[i]; cur_node.host_end = cur_node.host_start + sizes[i]; - splay_tree_key n = splay_tree_lookup (&devicep->dev_splay_tree, + splay_tree_key n = splay_tree_lookup (&mm->splay_tree, &cur_node); if (n) { + int kind = get_kind (is_openacc, kinds, i); if (n->host_start > cur_node.host_start || n->host_end < cur_node.host_end) gomp_fatal ("Trying to update [%p..%p) object when" @@ -576,7 +608,7 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, (void *) cur_node.host_end, (void *) n->host_start, (void *) n->host_end); - if ((kinds[i] & 7) == 1) + if (GOMP_MAP_COPY_TO_P (kind & typemask)) devicep->host2dev_func (devicep->target_id, (void *) (n->tgt->tgt_start + n->tgt_offset @@ -584,7 +616,7 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, - n->host_start), (void *) cur_node.host_start, cur_node.host_end - cur_node.host_start); - else if ((kinds[i] & 7) == 2) + if (GOMP_MAP_COPY_FROM_P (kind & typemask)) devicep->dev2host_func (devicep->target_id, (void *) cur_node.host_start, (void *) (n->tgt->tgt_start @@ -598,7 +630,7 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, (void *) cur_node.host_start, (void *) cur_node.host_end); } - gomp_mutex_unlock (&devicep->dev_env_lock); + gomp_mutex_unlock (&mm->lock); } /* This function should be called from every offload image. @@ -620,13 +652,23 @@ GOMP_offload_register (void *host_table, enum offload_target_type target_type, num_offload_images++; } -/* This function initializes the target device, specified by DEVICEP. */ +/* This function initializes the target device, specified by DEVICEP. DEVICEP + must be locked on entry, and remains locked on return. */ -static void +attribute_hidden void gomp_init_device (struct gomp_device_descr *devicep) { devicep->init_device_func (devicep->target_id); + devicep->is_initialized = true; +} +/* Initialize address mapping tables. MM must be locked on entry, and remains + locked on return. */ + +attribute_hidden void +gomp_init_tables (struct gomp_device_descr *devicep, + struct gomp_memory_mapping *mm) +{ /* Get address mapping table for device. */ struct mapping_table *table = NULL; int num_entries = devicep->get_table_func (devicep->target_id, &table); @@ -653,30 +695,63 @@ gomp_init_device (struct gomp_device_descr *devicep) k->tgt = tgt; node->left = NULL; node->right = NULL; - splay_tree_insert (&devicep->dev_splay_tree, node); + splay_tree_insert (&mm->splay_tree, node); } free (table); - devicep->is_initialized = true; + mm->is_initialized = true; +} + +/* Free address mapping tables. MM must be locked on entry, and remains locked + on return. */ + +attribute_hidden void +gomp_free_memmap (struct gomp_memory_mapping *mm) +{ + while (mm->splay_tree.root) + { + struct target_mem_desc *tgt = mm->splay_tree.root->key.tgt; + + splay_tree_remove (&mm->splay_tree, &mm->splay_tree.root->key); + free (tgt->array); + free (tgt); + } + + mm->is_initialized = false; +} + +/* This function de-initializes the target device, specified by DEVICEP. + DEVICEP must be locked on entry, and remains locked on return. */ + +attribute_hidden void +gomp_fini_device (struct gomp_device_descr *devicep) +{ + if (devicep->is_initialized) + devicep->fini_device_func (devicep->target_id); + + devicep->is_initialized = false; } /* Called when encountering a target directive. If DEVICE - is -1, it means use device-var ICV. If it is -2 (or any other value - larger than last available hw device, use host fallback. - FN is address of host code, OPENMP_TARGET contains value of the - __OPENMP_TARGET__ symbol in the shared library or binary that invokes + is GOMP_DEVICE_ICV, it means use device-var ICV. If it is + GOMP_DEVICE_HOST_FALLBACK (or any value + larger than last available hw device), use host fallback. + FN is address of host code, OFFLOAD_TABLE contains value of the + __OFFLOAD_TABLE__ symbol in the shared library or binary that invokes GOMP_target. HOSTADDRS, SIZES and KINDS are arrays with MAPNUM entries, with addresses of the host objects, sizes of the host objects (resp. for pointer kind pointer bias and assumed sizeof (void *) size) and kinds. */ void -GOMP_target (int device, void (*fn) (void *), const void *openmp_target, +GOMP_target (int device, void (*fn) (void *), const void *offload_table, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned char *kinds) { struct gomp_device_descr *devicep = resolve_device (device); - if (devicep == NULL) + + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) { /* Host fallback. */ struct gomp_thread old_thr, *thr = gomp_thread (); @@ -693,20 +768,38 @@ GOMP_target (int device, void (*fn) (void *), const void *openmp_target, return; } - gomp_mutex_lock (&devicep->dev_env_lock); + gomp_mutex_lock (&devicep->lock); if (!devicep->is_initialized) gomp_init_device (devicep); + gomp_mutex_unlock (&devicep->lock); + + void *fn_addr; + + if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) + fn_addr = (void *) fn; + else + { + struct gomp_memory_mapping *mm = &devicep->mem_map; + gomp_mutex_lock (&mm->lock); + + if (!mm->is_initialized) + gomp_init_tables (devicep, mm); + + struct splay_tree_key_s k; + k.host_start = (uintptr_t) fn; + k.host_end = k.host_start + 1; + splay_tree_key tgt_fn = splay_tree_lookup (&mm->splay_tree, &k); + if (tgt_fn == NULL) + gomp_fatal ("Target function wasn't mapped"); - struct splay_tree_key_s k; - k.host_start = (uintptr_t) fn; - k.host_end = k.host_start + 1; - splay_tree_key tgt_fn = splay_tree_lookup (&devicep->dev_splay_tree, &k); - if (tgt_fn == NULL) - gomp_fatal ("Target function wasn't mapped"); - gomp_mutex_unlock (&devicep->dev_env_lock); + gomp_mutex_unlock (&mm->lock); + + fn_addr = (void *) tgt_fn->tgt->tgt_start; + } struct target_mem_desc *tgt_vars - = gomp_map_vars (devicep, mapnum, hostaddrs, sizes, kinds, true); + = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, + true); struct gomp_thread old_thr, *thr = gomp_thread (); old_thr = *thr; memset (thr, '\0', sizeof (*thr)); @@ -715,19 +808,20 @@ GOMP_target (int device, void (*fn) (void *), const void *openmp_target, thr->place = old_thr.place; thr->ts.place_partition_len = gomp_places_list_len; } - devicep->run_func (devicep->target_id, (void *) tgt_fn->tgt->tgt_start, - (void *) tgt_vars->tgt_start); + devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start); gomp_free_thread (thr); *thr = old_thr; - gomp_unmap_vars (tgt_vars); + gomp_unmap_vars (tgt_vars, true); } void -GOMP_target_data (int device, const void *openmp_target, size_t mapnum, +GOMP_target_data (int device, const void *offload_table, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned char *kinds) { struct gomp_device_descr *devicep = resolve_device (device); - if (devicep == NULL) + + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) { /* Host fallback. */ struct gomp_task_icv *icv = gomp_icv (false); @@ -738,20 +832,27 @@ GOMP_target_data (int device, const void *openmp_target, size_t mapnum, new #pragma omp target data, otherwise GOMP_target_end_data would get out of sync. */ struct target_mem_desc *tgt - = gomp_map_vars (NULL, 0, NULL, NULL, NULL, false); + = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false); tgt->prev = icv->target_data; icv->target_data = tgt; } return; } - gomp_mutex_lock (&devicep->dev_env_lock); + gomp_mutex_lock (&devicep->lock); if (!devicep->is_initialized) gomp_init_device (devicep); - gomp_mutex_unlock (&devicep->dev_env_lock); + gomp_mutex_unlock (&devicep->lock); + + struct gomp_memory_mapping *mm = &devicep->mem_map; + gomp_mutex_lock (&mm->lock); + if (!mm->is_initialized) + gomp_init_tables (devicep, mm); + gomp_mutex_unlock (&mm->lock); struct target_mem_desc *tgt - = gomp_map_vars (devicep, mapnum, hostaddrs, sizes, kinds, false); + = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, + false); struct gomp_task_icv *icv = gomp_icv (true); tgt->prev = icv->target_data; icv->target_data = tgt; @@ -765,24 +866,32 @@ GOMP_target_end_data (void) { struct target_mem_desc *tgt = icv->target_data; icv->target_data = tgt->prev; - gomp_unmap_vars (tgt); + gomp_unmap_vars (tgt, true); } } void -GOMP_target_update (int device, const void *openmp_target, size_t mapnum, +GOMP_target_update (int device, const void *offload_table, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned char *kinds) { struct gomp_device_descr *devicep = resolve_device (device); - if (devicep == NULL) + + if (devicep == NULL + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) return; - gomp_mutex_lock (&devicep->dev_env_lock); + gomp_mutex_lock (&devicep->lock); if (!devicep->is_initialized) gomp_init_device (devicep); - gomp_mutex_unlock (&devicep->dev_env_lock); + gomp_mutex_unlock (&devicep->lock); + + struct gomp_memory_mapping *mm = &devicep->mem_map; + gomp_mutex_lock (&mm->lock); + if (!mm->is_initialized) + gomp_init_tables (devicep, mm); + gomp_mutex_unlock (&mm->lock); - gomp_update (devicep, mapnum, hostaddrs, sizes, kinds); + gomp_update (devicep, mm, mapnum, hostaddrs, sizes, kinds, false); } void @@ -808,54 +917,137 @@ static bool gomp_load_plugin_for_device (struct gomp_device_descr *device, const char *plugin_name) { + char *err = NULL, *last_missing = NULL; + int optional_present, optional_total; + + /* Clear any existing error. */ + dlerror (); + void *plugin_handle = dlopen (plugin_name, RTLD_LAZY); if (!plugin_handle) - return false; + { + err = dlerror (); + goto out; + } /* Check if all required functions are available in the plugin and store their handlers. */ -#define DLSYM(f) \ - do \ - { \ - device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_"#f); \ - if (!device->f##_func) \ - return false; \ - } \ +#define DLSYM(f) \ + do \ + { \ + device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \ + err = dlerror (); \ + if (err != NULL) \ + goto out; \ + } \ while (0) + /* Similar, but missing functions are not an error. */ +#define DLSYM_OPT(f, n) \ + do \ + { \ + char *tmp_err; \ + device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \ + tmp_err = dlerror (); \ + if (tmp_err == NULL) \ + optional_present++; \ + else \ + last_missing = #n; \ + optional_total++; \ + } \ + while (0) + + DLSYM (get_name); + DLSYM (get_caps); DLSYM (get_type); DLSYM (get_num_devices); DLSYM (register_image); DLSYM (init_device); + DLSYM (fini_device); DLSYM (get_table); DLSYM (alloc); DLSYM (free); DLSYM (dev2host); DLSYM (host2dev); - DLSYM (run); + device->capabilities = device->get_caps_func (); + if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + DLSYM (run); + if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) + { + optional_present = optional_total = 0; + DLSYM_OPT (openacc.exec, openacc_parallel); + DLSYM_OPT (openacc.open_device, openacc_open_device); + DLSYM_OPT (openacc.close_device, openacc_close_device); + DLSYM_OPT (openacc.get_device_num, openacc_get_device_num); + DLSYM_OPT (openacc.set_device_num, openacc_set_device_num); + DLSYM_OPT (openacc.register_async_cleanup, + openacc_register_async_cleanup); + DLSYM_OPT (openacc.async_test, openacc_async_test); + DLSYM_OPT (openacc.async_test_all, openacc_async_test_all); + DLSYM_OPT (openacc.async_wait, openacc_async_wait); + DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async); + DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all); + DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async); + DLSYM_OPT (openacc.async_set_async, openacc_async_set_async); + DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data); + DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data); + /* Require all the OpenACC handlers if we have + GOMP_OFFLOAD_CAP_OPENACC_200. */ + if (optional_present != optional_total) + { + err = "plugin missing OpenACC handler function"; + goto out; + } + optional_present = optional_total = 0; + DLSYM_OPT (openacc.cuda.get_current_device, + openacc_get_current_cuda_device); + DLSYM_OPT (openacc.cuda.get_current_context, + openacc_get_current_cuda_context); + DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream); + DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream); + /* Make sure all the CUDA functions are there if any of them are. */ + if (optional_present && optional_present != optional_total) + { + err = "plugin missing OpenACC CUDA handler function"; + goto out; + } + } #undef DLSYM +#undef DLSYM_OPT - return true; + out: + if (err != NULL) + { + gomp_error ("while loading %s: %s", plugin_name, err); + if (last_missing) + gomp_error ("missing function was %s", last_missing); + if (plugin_handle) + dlclose (plugin_handle); + } + return err == NULL; } -/* This function finds OFFLOAD_IMAGES corresponding to DEVICE type, and - registers them in the plugin. */ +/* This function adds a compatible offload image IMAGE to an accelerator device + DEVICE. DEVICE must be locked on entry, and remains locked on return. */ static void -gomp_register_images_for_device (struct gomp_device_descr *device) +gomp_register_image_for_device (struct gomp_device_descr *device, + struct offload_image_descr *image) { - int i; - for (i = 0; i < num_offload_images; i++) + if (!device->offload_regions_registered + && (device->type == image->type + || device->type == OFFLOAD_TARGET_TYPE_HOST)) { - struct offload_image_descr *image = &offload_images[i]; - if (image->type == device->type) - device->register_image_func (image->host_table, image->target_data); + device->register_image_func (image->host_table, image->target_data); + device->offload_regions_registered = true; } } /* This function initializes the runtime needed for offloading. - It parses the list of offload targets and tries to load the plugins for these - targets. Result of the function is properly initialized variable NUM_DEVICES - and array DEVICES, containing descriptors for corresponding devices. */ + It parses the list of offload targets and tries to load the plugins for + these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP + will be set, and the array DEVICES initialized, containing descriptors for + corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows + by the others. */ static void gomp_target_init (void) @@ -894,6 +1086,8 @@ gomp_target_init (void) new_num_devices = current_device.get_num_devices_func (); if (new_num_devices >= 1) { + /* Augment DEVICES and NUM_DEVICES. */ + devices = realloc (devices, (num_devices + new_num_devices) * sizeof (struct gomp_device_descr)); if (!devices) @@ -903,16 +1097,21 @@ gomp_target_init (void) break; } + current_device.name = current_device.get_name_func (); + /* current_device.capabilities has already been set. */ current_device.type = current_device.get_type_func (); + current_device.mem_map.is_initialized = false; + current_device.mem_map.splay_tree.root = NULL; current_device.is_initialized = false; - current_device.dev_splay_tree.root = NULL; - gomp_register_images_for_device (¤t_device); + current_device.offload_regions_registered = false; + current_device.openacc.data_environ = NULL; + current_device.openacc.target_data = NULL; for (i = 0; i < new_num_devices; i++) { - current_device.id = num_devices + 1; current_device.target_id = i; devices[num_devices] = current_device; - gomp_mutex_init (&devices[num_devices].dev_env_lock); + gomp_mutex_init (&devices[num_devices].mem_map.lock); + gomp_mutex_init (&devices[num_devices].lock); num_devices++; } } @@ -923,6 +1122,41 @@ gomp_target_init (void) } while (next); + /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set + NUM_DEVICES_OPENMP. */ + struct gomp_device_descr *devices_s + = malloc (num_devices * sizeof (struct gomp_device_descr)); + if (!devices_s) + { + num_devices = 0; + free (devices); + devices = NULL; + } + num_devices_openmp = 0; + for (i = 0; i < num_devices; i++) + if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + devices_s[num_devices_openmp++] = devices[i]; + int num_devices_after_openmp = num_devices_openmp; + for (i = 0; i < num_devices; i++) + if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + devices_s[num_devices_after_openmp++] = devices[i]; + free (devices); + devices = devices_s; + + for (i = 0; i < num_devices; i++) + { + int j; + + for (j = 0; j < num_offload_images; j++) + gomp_register_image_for_device (&devices[i], &offload_images[j]); + + /* The 'devices' array can be moved (by the realloc call) until we have + found all the plugins, so registering with the OpenACC runtime (which + takes a copy of the pointer argument) must be delayed until now. */ + if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) + goacc_register (&devices[i]); + } + free (offload_images); offload_images = NULL; num_offload_images = 0; |