diff options
Diffstat (limited to 'gcc/hsa-brig.c')
-rw-r--r-- | gcc/hsa-brig.c | 2560 |
1 files changed, 2560 insertions, 0 deletions
diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c new file mode 100644 index 00000000000..cfbac581a30 --- /dev/null +++ b/gcc/hsa-brig.c @@ -0,0 +1,2560 @@ +/* Producing binary form of HSA BRIG from our internal representation. + Copyright (C) 2013-2016 Free Software Foundation, Inc. + Contributed by Martin Jambor <mjambor@suse.cz> and + Martin Liska <mliska@suse.cz>. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "target.h" +#include "tm_p.h" +#include "is-a.h" +#include "vec.h" +#include "hash-table.h" +#include "hash-map.h" +#include "tree.h" +#include "tree-iterator.h" +#include "stor-layout.h" +#include "output.h" +#include "cfg.h" +#include "function.h" +#include "fold-const.h" +#include "stringpool.h" +#include "gimple-pretty-print.h" +#include "diagnostic-core.h" +#include "cgraph.h" +#include "dumpfile.h" +#include "print-tree.h" +#include "symbol-summary.h" +#include "hsa.h" +#include "gomp-constants.h" + +/* Convert VAL to little endian form, if necessary. */ + +static uint16_t +lendian16 (uint16_t val) +{ +#if GCC_VERSION >= 4006 +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return val; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return __builtin_bswap16 (val); +#else /* __ORDER_PDP_ENDIAN__ */ + return val; +#endif +#else +// provide a safe slower default, with shifts and masking +#ifndef WORDS_BIGENDIAN + return val; +#else + return (val >> 8) | (val << 8); +#endif +#endif +} + +/* Convert VAL to little endian form, if necessary. */ + +static uint32_t +lendian32 (uint32_t val) +{ +#if GCC_VERSION >= 4006 +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return val; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return __builtin_bswap32 (val); +#else /* __ORDER_PDP_ENDIAN__ */ + return (val >> 16) | (val << 16); +#endif +#else +// provide a safe slower default, with shifts and masking +#ifndef WORDS_BIGENDIAN + return val; +#else + val = ((val & 0xff00ff00) >> 8) | ((val & 0xff00ff) << 8); + return (val >> 16) | (val << 16); +#endif +#endif +} + +/* Convert VAL to little endian form, if necessary. */ + +static uint64_t +lendian64 (uint64_t val) +{ +#if GCC_VERSION >= 4006 +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return val; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return __builtin_bswap64 (val); +#else /* __ORDER_PDP_ENDIAN__ */ + return (((val & 0xffffll) << 48) + | ((val & 0xffff0000ll) << 16) + | ((val & 0xffff00000000ll) >> 16) + | ((val & 0xffff000000000000ll) >> 48)); +#endif +#else +// provide a safe slower default, with shifts and masking +#ifndef WORDS_BIGENDIAN + return val; +#else + val = (((val & 0xff00ff00ff00ff00ll) >> 8) + | ((val & 0x00ff00ff00ff00ffll) << 8)); + val = ((( val & 0xffff0000ffff0000ll) >> 16) + | (( val & 0x0000ffff0000ffffll) << 16)); + return (val >> 32) | (val << 32); +#endif +#endif +} + +#define BRIG_ELF_SECTION_NAME ".brig" +#define BRIG_LABEL_STRING "hsa_brig" +#define BRIG_SECTION_DATA_NAME "hsa_data" +#define BRIG_SECTION_CODE_NAME "hsa_code" +#define BRIG_SECTION_OPERAND_NAME "hsa_operand" + +#define BRIG_CHUNK_MAX_SIZE (64 * 1024) + +/* Required HSA section alignment. */ + +#define HSA_SECTION_ALIGNMENT 16 + +/* Chunks of BRIG binary data. */ + +struct hsa_brig_data_chunk +{ + /* Size of the data already stored into a chunk. */ + unsigned size; + + /* Pointer to the data. */ + char *data; +}; + +/* Structure representing a BRIG section, holding and writing its data. */ + +class hsa_brig_section +{ +public: + /* Section name that will be output to the BRIG. */ + const char *section_name; + /* Size in bytes of all data stored in the section. */ + unsigned total_size; + /* The size of the header of the section including padding. */ + unsigned header_byte_count; + /* The size of the header of the section without any padding. */ + unsigned header_byte_delta; + + /* Buffers of binary data, each containing BRIG_CHUNK_MAX_SIZE bytes. */ + vec <struct hsa_brig_data_chunk> chunks; + + /* More convenient access to the last chunk from the vector above. */ + struct hsa_brig_data_chunk *cur_chunk; + + void allocate_new_chunk (); + void init (const char *name); + void release (); + void output (); + unsigned add (const void *data, unsigned len); + void round_size_up (int factor); + void *get_ptr_by_offset (unsigned int offset); +}; + +static struct hsa_brig_section brig_data, brig_code, brig_operand; +static uint32_t brig_insn_count; +static bool brig_initialized = false; + +/* Mapping between emitted HSA functions and their offset in code segment. */ +static hash_map<tree, BrigCodeOffset32_t> *function_offsets; + +/* Hash map of emitted function declarations. */ +static hash_map <tree, BrigDirectiveExecutable *> *emitted_declarations; + +/* Hash table of emitted internal function declaration offsets. */ +hash_table <hsa_internal_fn_hasher> *hsa_emitted_internal_decls; + +/* List of sbr instructions. */ +static vec <hsa_insn_sbr *> *switch_instructions; + +struct function_linkage_pair +{ + function_linkage_pair (tree decl, unsigned int off) + : function_decl (decl), offset (off) {} + + /* Declaration of called function. */ + tree function_decl; + + /* Offset in operand section. */ + unsigned int offset; +}; + +/* Vector of function calls where we need to resolve function offsets. */ +static auto_vec <function_linkage_pair> function_call_linkage; + +/* Add a new chunk, allocate data for it and initialize it. */ + +void +hsa_brig_section::allocate_new_chunk () +{ + struct hsa_brig_data_chunk new_chunk; + + new_chunk.data = XCNEWVEC (char, BRIG_CHUNK_MAX_SIZE); + new_chunk.size = 0; + cur_chunk = chunks.safe_push (new_chunk); +} + +/* Initialize the brig section. */ + +void +hsa_brig_section::init (const char *name) +{ + section_name = name; + /* While the following computation is basically wrong, because the intent + certainly wasn't to have the first character of name and padding, which + are a part of sizeof (BrigSectionHeader), included in the first addend, + this is what the disassembler expects. */ + total_size = sizeof (BrigSectionHeader) + strlen (section_name); + chunks.create (1); + allocate_new_chunk (); + header_byte_delta = total_size; + round_size_up (4); + header_byte_count = total_size; +} + +/* Free all data in the section. */ + +void +hsa_brig_section::release () +{ + for (unsigned i = 0; i < chunks.length (); i++) + free (chunks[i].data); + chunks.release (); + cur_chunk = NULL; +} + +/* Write the section to the output file to a section with the name given at + initialization. Switches the output section and does not restore it. */ + +void +hsa_brig_section::output () +{ + struct BrigSectionHeader section_header; + char padding[8]; + + section_header.byteCount = lendian64 (total_size); + section_header.headerByteCount = lendian32 (header_byte_count); + section_header.nameLength = lendian32 (strlen (section_name)); + assemble_string ((const char *) §ion_header, 16); + assemble_string (section_name, (section_header.nameLength)); + memset (&padding, 0, sizeof (padding)); + /* This is also a consequence of the wrong header size computation described + in a comment in hsa_brig_section::init. */ + assemble_string (padding, 8); + for (unsigned i = 0; i < chunks.length (); i++) + assemble_string (chunks[i].data, chunks[i].size); +} + +/* Add to the stream LEN bytes of opaque binary DATA. Return the offset at + which it was stored. */ + +unsigned +hsa_brig_section::add (const void *data, unsigned len) +{ + unsigned offset = total_size; + + gcc_assert (len <= BRIG_CHUNK_MAX_SIZE); + if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - len)) + allocate_new_chunk (); + + memcpy (cur_chunk->data + cur_chunk->size, data, len); + cur_chunk->size += len; + total_size += len; + + return offset; +} + +/* Add padding to section so that its size is divisible by FACTOR. */ + +void +hsa_brig_section::round_size_up (int factor) +{ + unsigned padding, res = total_size % factor; + + if (res == 0) + return; + + padding = factor - res; + total_size += padding; + if (cur_chunk->size > (BRIG_CHUNK_MAX_SIZE - padding)) + { + padding -= BRIG_CHUNK_MAX_SIZE - cur_chunk->size; + cur_chunk->size = BRIG_CHUNK_MAX_SIZE; + allocate_new_chunk (); + } + + cur_chunk->size += padding; +} + +/* Return pointer to data by global OFFSET in the section. */ + +void * +hsa_brig_section::get_ptr_by_offset (unsigned int offset) +{ + gcc_assert (offset < total_size); + offset -= header_byte_delta; + + unsigned i; + for (i = 0; offset >= chunks[i].size; i++) + offset -= chunks[i].size; + + return chunks[i].data + offset; +} + +/* BRIG string data hashing. */ + +struct brig_string_slot +{ + const char *s; + char prefix; + int len; + uint32_t offset; +}; + +/* Hash table helpers. */ + +struct brig_string_slot_hasher : pointer_hash <brig_string_slot> +{ + static inline hashval_t hash (const value_type); + static inline bool equal (const value_type, const compare_type); + static inline void remove (value_type); +}; + +/* Returns a hash code for DS. Adapted from libiberty's htab_hash_string + to support strings that may not end in '\0'. */ + +inline hashval_t +brig_string_slot_hasher::hash (const value_type ds) +{ + hashval_t r = ds->len; + int i; + + for (i = 0; i < ds->len; i++) + r = r * 67 + (unsigned) ds->s[i] - 113; + r = r * 67 + (unsigned) ds->prefix - 113; + return r; +} + +/* Returns nonzero if DS1 and DS2 are equal. */ + +inline bool +brig_string_slot_hasher::equal (const value_type ds1, const compare_type ds2) +{ + if (ds1->len == ds2->len) + return ds1->prefix == ds2->prefix + && memcmp (ds1->s, ds2->s, ds1->len) == 0; + + return 0; +} + +/* Deallocate memory for DS upon its removal. */ + +inline void +brig_string_slot_hasher::remove (value_type ds) +{ + free (const_cast<char *> (ds->s)); + free (ds); +} + +/* Hash for strings we output in order not to duplicate them needlessly. */ + +static hash_table<brig_string_slot_hasher> *brig_string_htab; + +/* Emit a null terminated string STR to the data section and return its + offset in it. If PREFIX is non-zero, output it just before STR too. + Sanitize the string if SANITIZE option is set to true. */ + +static unsigned +brig_emit_string (const char *str, char prefix = 0, bool sanitize = true) +{ + unsigned slen = strlen (str); + unsigned offset, len = slen + (prefix ? 1 : 0); + uint32_t hdr_len = lendian32 (len); + brig_string_slot s_slot; + brig_string_slot **slot; + char *str2; + + str2 = xstrdup (str); + + if (sanitize) + hsa_sanitize_name (str2); + s_slot.s = str2; + s_slot.len = slen; + s_slot.prefix = prefix; + s_slot.offset = 0; + + slot = brig_string_htab->find_slot (&s_slot, INSERT); + if (*slot == NULL) + { + brig_string_slot *new_slot = XCNEW (brig_string_slot); + + /* In theory we should fill in BrigData but that would mean copying + the string to a buffer for no reason, so we just emulate it. */ + offset = brig_data.add (&hdr_len, sizeof (hdr_len)); + if (prefix) + brig_data.add (&prefix, 1); + + brig_data.add (str2, slen); + brig_data.round_size_up (4); + + /* TODO: could use the string we just copied into + brig_string->cur_chunk */ + new_slot->s = str2; + new_slot->len = slen; + new_slot->prefix = prefix; + new_slot->offset = offset; + *slot = new_slot; + } + else + { + offset = (*slot)->offset; + free (str2); + } + + return offset; +} + +/* Linked list of queued operands. */ + +static struct operand_queue +{ + /* First from the chain of queued operands. */ + hsa_op_base *first_op, *last_op; + + /* The offset at which the next operand will be enqueued. */ + unsigned projected_size; + +} op_queue; + +/* Unless already initialized, initialize infrastructure to produce BRIG. */ + +static void +brig_init (void) +{ + brig_insn_count = 0; + + if (brig_initialized) + return; + + brig_string_htab = new hash_table<brig_string_slot_hasher> (37); + brig_data.init (BRIG_SECTION_DATA_NAME); + brig_code.init (BRIG_SECTION_CODE_NAME); + brig_operand.init (BRIG_SECTION_OPERAND_NAME); + brig_initialized = true; + + struct BrigDirectiveModule moddir; + memset (&moddir, 0, sizeof (moddir)); + moddir.base.byteCount = lendian16 (sizeof (moddir)); + + char *modname; + if (main_input_filename && *main_input_filename != '\0') + { + const char *part = strrchr (main_input_filename, '/'); + if (!part) + part = main_input_filename; + else + part++; + modname = concat ("&__hsa_module_", part, NULL); + char *extension = strchr (modname, '.'); + if (extension) + *extension = '\0'; + + /* As in LTO mode, we have to emit a different module names. */ + if (flag_ltrans) + { + part = strrchr (asm_file_name, '/'); + if (!part) + part = asm_file_name; + else + part++; + char *modname2; + asprintf (&modname2, "%s_%s", modname, part); + free (modname); + modname = modname2; + } + + hsa_sanitize_name (modname); + moddir.name = brig_emit_string (modname); + free (modname); + } + else + moddir.name = brig_emit_string ("__hsa_module_unnamed", '&'); + moddir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_MODULE); + moddir.hsailMajor = lendian32 (BRIG_VERSION_HSAIL_MAJOR); + moddir.hsailMinor = lendian32 (BRIG_VERSION_HSAIL_MINOR); + moddir.profile = hsa_full_profile_p () ? BRIG_PROFILE_FULL: BRIG_PROFILE_BASE; + if (hsa_machine_large_p ()) + moddir.machineModel = BRIG_MACHINE_LARGE; + else + moddir.machineModel = BRIG_MACHINE_SMALL; + moddir.defaultFloatRound = BRIG_ROUND_FLOAT_DEFAULT; + brig_code.add (&moddir, sizeof (moddir)); +} + +/* Free all BRIG data. */ + +static void +brig_release_data (void) +{ + delete brig_string_htab; + brig_data.release (); + brig_code.release (); + brig_operand.release (); + + brig_initialized = 0; +} + +/* Enqueue operation OP. Return the offset at which it will be stored. */ + +static unsigned int +enqueue_op (hsa_op_base *op) +{ + unsigned ret; + + if (op->m_brig_op_offset) + return op->m_brig_op_offset; + + ret = op_queue.projected_size; + op->m_brig_op_offset = op_queue.projected_size; + + if (!op_queue.first_op) + op_queue.first_op = op; + else + op_queue.last_op->m_next = op; + op_queue.last_op = op; + + if (is_a <hsa_op_immed *> (op)) + op_queue.projected_size += sizeof (struct BrigOperandConstantBytes); + else if (is_a <hsa_op_reg *> (op)) + op_queue.projected_size += sizeof (struct BrigOperandRegister); + else if (is_a <hsa_op_address *> (op)) + op_queue.projected_size += sizeof (struct BrigOperandAddress); + else if (is_a <hsa_op_code_ref *> (op)) + op_queue.projected_size += sizeof (struct BrigOperandCodeRef); + else if (is_a <hsa_op_code_list *> (op)) + op_queue.projected_size += sizeof (struct BrigOperandCodeList); + else if (is_a <hsa_op_operand_list *> (op)) + op_queue.projected_size += sizeof (struct BrigOperandOperandList); + else + gcc_unreachable (); + return ret; +} + + +/* Emit directive describing a symbol if it has not been emitted already. + Return the offset of the directive. */ + +static unsigned +emit_directive_variable (struct hsa_symbol *symbol) +{ + struct BrigDirectiveVariable dirvar; + unsigned name_offset; + static unsigned res_name_offset; + + if (symbol->m_directive_offset) + return symbol->m_directive_offset; + + memset (&dirvar, 0, sizeof (dirvar)); + dirvar.base.byteCount = lendian16 (sizeof (dirvar)); + dirvar.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_VARIABLE); + dirvar.allocation = symbol->m_allocation; + + char prefix = symbol->m_global_scope_p ? '&' : '%'; + + if (symbol->m_decl && TREE_CODE (symbol->m_decl) == RESULT_DECL) + { + if (res_name_offset == 0) + res_name_offset = brig_emit_string (symbol->m_name, '%'); + name_offset = res_name_offset; + } + else if (symbol->m_name) + name_offset = brig_emit_string (symbol->m_name, prefix); + else + { + char buf[64]; + snprintf (buf, 64, "__%s_%i", hsa_seg_name (symbol->m_segment), + symbol->m_name_number); + name_offset = brig_emit_string (buf, prefix); + } + + dirvar.name = lendian32 (name_offset); + dirvar.init = 0; + dirvar.type = lendian16 (symbol->m_type); + dirvar.segment = symbol->m_segment; + /* TODO: Once we are able to access global variables, we must copy their + alignment. */ + dirvar.align = MAX (hsa_natural_alignment (dirvar.type), + (BrigAlignment8_t) BRIG_ALIGNMENT_4); + dirvar.linkage = symbol->m_linkage; + dirvar.dim.lo = symbol->m_dim; + dirvar.dim.hi = symbol->m_dim >> 32; + + /* Global variables are just declared and linked via HSA runtime. */ + if (symbol->m_linkage != BRIG_ALLOCATION_PROGRAM) + dirvar.modifier |= BRIG_VARIABLE_DEFINITION; + dirvar.reserved = 0; + + if (symbol->m_cst_value) + { + dirvar.modifier |= BRIG_VARIABLE_CONST; + dirvar.init = lendian32 (enqueue_op (symbol->m_cst_value)); + } + + symbol->m_directive_offset = brig_code.add (&dirvar, sizeof (dirvar)); + return symbol->m_directive_offset; +} + +/* Emit directives describing either a function declaration or + definition F. */ + +static BrigDirectiveExecutable * +emit_function_directives (hsa_function_representation *f, bool is_declaration) +{ + struct BrigDirectiveExecutable fndir; + unsigned name_offset, inarg_off, scoped_off, next_toplev_off; + int count = 0; + BrigDirectiveExecutable *ptr_to_fndir; + hsa_symbol *sym; + + if (!f->m_declaration_p) + for (int i = 0; f->m_global_symbols.iterate (i, &sym); i++) + { + emit_directive_variable (sym); + brig_insn_count++; + } + + name_offset = brig_emit_string (f->m_name, '&'); + inarg_off = brig_code.total_size + sizeof (fndir) + + (f->m_output_arg ? sizeof (struct BrigDirectiveVariable) : 0); + scoped_off = inarg_off + + f->m_input_args.length () * sizeof (struct BrigDirectiveVariable); + + if (!f->m_declaration_p) + { + count += f->m_spill_symbols.length (); + count += f->m_private_variables.length (); + } + + next_toplev_off = scoped_off + count * sizeof (struct BrigDirectiveVariable); + + memset (&fndir, 0, sizeof (fndir)); + fndir.base.byteCount = lendian16 (sizeof (fndir)); + fndir.base.kind = lendian16 (f->m_kern_p ? BRIG_KIND_DIRECTIVE_KERNEL + : BRIG_KIND_DIRECTIVE_FUNCTION); + fndir.name = lendian32 (name_offset); + fndir.inArgCount = lendian16 (f->m_input_args.length ()); + fndir.outArgCount = lendian16 (f->m_output_arg ? 1 : 0); + fndir.firstInArg = lendian32 (inarg_off); + fndir.firstCodeBlockEntry = lendian32 (scoped_off); + fndir.nextModuleEntry = lendian32 (next_toplev_off); + fndir.linkage = f->get_linkage (); + if (!f->m_declaration_p) + fndir.modifier |= BRIG_EXECUTABLE_DEFINITION; + memset (&fndir.reserved, 0, sizeof (fndir.reserved)); + + /* Once we put a definition of function_offsets, we should not overwrite + it with a declaration of the function. */ + if (f->m_internal_fn == NULL) + { + if (!function_offsets->get (f->m_decl) || !is_declaration) + function_offsets->put (f->m_decl, brig_code.total_size); + } + else + { + /* Internal function. */ + hsa_internal_fn **slot + = hsa_emitted_internal_decls->find_slot (f->m_internal_fn, INSERT); + hsa_internal_fn *int_fn = new hsa_internal_fn (f->m_internal_fn); + int_fn->m_offset = brig_code.total_size; + *slot = int_fn; + } + + brig_code.add (&fndir, sizeof (fndir)); + /* terrible hack: we need to set instCount after we emit all + insns, but we need to emit directive in order, and we emit directives + during insn emitting. So we need to emit the FUNCTION directive + early, then the insns, and then we need to set instCount, so remember + a pointer to it, in some horrible way. cur_chunk.data+size points + directly to after fndir here. */ + ptr_to_fndir + = (BrigDirectiveExecutable *)(brig_code.cur_chunk->data + + brig_code.cur_chunk->size + - sizeof (fndir)); + + if (f->m_output_arg) + emit_directive_variable (f->m_output_arg); + for (unsigned i = 0; i < f->m_input_args.length (); i++) + emit_directive_variable (f->m_input_args[i]); + + if (!f->m_declaration_p) + { + for (int i = 0; f->m_spill_symbols.iterate (i, &sym); i++) + { + emit_directive_variable (sym); + brig_insn_count++; + } + for (unsigned i = 0; i < f->m_private_variables.length (); i++) + { + emit_directive_variable (f->m_private_variables[i]); + brig_insn_count++; + } + } + + return ptr_to_fndir; +} + +/* Emit a label directive for the given HBB. We assume it is about to start on + the current offset in the code section. */ + +static void +emit_bb_label_directive (hsa_bb *hbb) +{ + struct BrigDirectiveLabel lbldir; + + lbldir.base.byteCount = lendian16 (sizeof (lbldir)); + lbldir.base.kind = lendian16 (BRIG_KIND_DIRECTIVE_LABEL); + char buf[32]; + snprintf (buf, 32, "BB_%u_%i", DECL_UID (current_function_decl), + hbb->m_index); + lbldir.name = lendian32 (brig_emit_string (buf, '@')); + + hbb->m_label_ref.m_directive_offset = brig_code.add (&lbldir, + sizeof (lbldir)); + brig_insn_count++; +} + +/* Map a normal HSAIL type to the type of the equivalent BRIG operand + holding such, for constants and registers. */ + +static BrigType16_t +regtype_for_type (BrigType16_t t) +{ + switch (t) + { + case BRIG_TYPE_B1: + return BRIG_TYPE_B1; + + case BRIG_TYPE_U8: + case BRIG_TYPE_U16: + case BRIG_TYPE_U32: + case BRIG_TYPE_S8: + case BRIG_TYPE_S16: + case BRIG_TYPE_S32: + case BRIG_TYPE_B8: + case BRIG_TYPE_B16: + case BRIG_TYPE_B32: + case BRIG_TYPE_F16: + case BRIG_TYPE_F32: + case BRIG_TYPE_U8X4: + case BRIG_TYPE_U16X2: + case BRIG_TYPE_S8X4: + case BRIG_TYPE_S16X2: + case BRIG_TYPE_F16X2: + return BRIG_TYPE_B32; + + case BRIG_TYPE_U64: + case BRIG_TYPE_S64: + case BRIG_TYPE_F64: + case BRIG_TYPE_B64: + case BRIG_TYPE_U8X8: + case BRIG_TYPE_U16X4: + case BRIG_TYPE_U32X2: + case BRIG_TYPE_S8X8: + case BRIG_TYPE_S16X4: + case BRIG_TYPE_S32X2: + case BRIG_TYPE_F16X4: + case BRIG_TYPE_F32X2: + return BRIG_TYPE_B64; + + case BRIG_TYPE_B128: + case BRIG_TYPE_U8X16: + case BRIG_TYPE_U16X8: + case BRIG_TYPE_U32X4: + case BRIG_TYPE_U64X2: + case BRIG_TYPE_S8X16: + case BRIG_TYPE_S16X8: + case BRIG_TYPE_S32X4: + case BRIG_TYPE_S64X2: + case BRIG_TYPE_F16X8: + case BRIG_TYPE_F32X4: + case BRIG_TYPE_F64X2: + return BRIG_TYPE_B128; + + default: + gcc_unreachable (); + } +} + +/* Return the length of the BRIG type TYPE that is going to be streamed out as + an immediate constant (so it must not be B1). */ + +unsigned +hsa_get_imm_brig_type_len (BrigType16_t type) +{ + BrigType16_t base_type = type & BRIG_TYPE_BASE_MASK; + BrigType16_t pack_type = type & BRIG_TYPE_PACK_MASK; + + switch (pack_type) + { + case BRIG_TYPE_PACK_NONE: + break; + case BRIG_TYPE_PACK_32: + return 4; + case BRIG_TYPE_PACK_64: + return 8; + case BRIG_TYPE_PACK_128: + return 16; + default: + gcc_unreachable (); + } + + switch (base_type) + { + case BRIG_TYPE_U8: + case BRIG_TYPE_S8: + case BRIG_TYPE_B8: + return 1; + case BRIG_TYPE_U16: + case BRIG_TYPE_S16: + case BRIG_TYPE_F16: + case BRIG_TYPE_B16: + return 2; + case BRIG_TYPE_U32: + case BRIG_TYPE_S32: + case BRIG_TYPE_F32: + case BRIG_TYPE_B32: + return 4; + case BRIG_TYPE_U64: + case BRIG_TYPE_S64: + case BRIG_TYPE_F64: + case BRIG_TYPE_B64: + return 8; + case BRIG_TYPE_B128: + return 16; + default: + gcc_unreachable (); + } +} + +/* Emit one scalar VALUE to the buffer DATA intended for BRIG emission. + If NEED_LEN is not equal to zero, shrink or extend the value + to NEED_LEN bytes. Return how many bytes were written. */ + +static int +emit_immediate_scalar_to_buffer (tree value, char *data, unsigned need_len) +{ + union hsa_bytes bytes; + + memset (&bytes, 0, sizeof (bytes)); + tree type = TREE_TYPE (value); + gcc_checking_assert (TREE_CODE (type) != VECTOR_TYPE); + + unsigned data_len = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT; + if (INTEGRAL_TYPE_P (type) + || (POINTER_TYPE_P (type) && TREE_CODE (value) == INTEGER_CST)) + switch (data_len) + { + case 1: + bytes.b8 = (uint8_t) TREE_INT_CST_LOW (value); + break; + case 2: + bytes.b16 = (uint16_t) TREE_INT_CST_LOW (value); + break; + case 4: + bytes.b32 = (uint32_t) TREE_INT_CST_LOW (value); + break; + case 8: + bytes.b64 = (uint64_t) TREE_INT_CST_LOW (value); + break; + default: + gcc_unreachable (); + } + else if (SCALAR_FLOAT_TYPE_P (type)) + { + if (data_len == 2) + { + sorry ("Support for HSA does not implement immediate 16 bit FPU " + "operands"); + return 2; + } + unsigned int_len = GET_MODE_SIZE (TYPE_MODE (type)); + /* There are always 32 bits in each long, no matter the size of + the hosts long. */ + long tmp[6]; + + real_to_target (tmp, TREE_REAL_CST_PTR (value), TYPE_MODE (type)); + + if (int_len == 4) + bytes.b32 = (uint32_t) tmp[0]; + else + { + bytes.b64 = (uint64_t)(uint32_t) tmp[1]; + bytes.b64 <<= 32; + bytes.b64 |= (uint32_t) tmp[0]; + } + } + else + gcc_unreachable (); + + int len; + if (need_len == 0) + len = data_len; + else + len = need_len; + + memcpy (data, &bytes, len); + return len; +} + +void +hsa_op_immed::emit_to_buffer (tree value) +{ + unsigned total_len = m_brig_repr_size; + + /* As we can have a constructor with fewer elements, fill the memory + with zeros. */ + m_brig_repr = XCNEWVEC (char, total_len); + char *p = m_brig_repr; + + if (TREE_CODE (value) == VECTOR_CST) + { + int i, num = VECTOR_CST_NELTS (value); + for (i = 0; i < num; i++) + { + unsigned actual; + actual + = emit_immediate_scalar_to_buffer (VECTOR_CST_ELT (value, i), p, 0); + total_len -= actual; + p += actual; + } + /* Vectors should have the exact size. */ + gcc_assert (total_len == 0); + } + else if (TREE_CODE (value) == STRING_CST) + memcpy (m_brig_repr, TREE_STRING_POINTER (value), + TREE_STRING_LENGTH (value)); + else if (TREE_CODE (value) == COMPLEX_CST) + { + gcc_assert (total_len % 2 == 0); + unsigned actual; + actual + = emit_immediate_scalar_to_buffer (TREE_REALPART (value), p, + total_len / 2); + + gcc_assert (actual == total_len / 2); + p += actual; + + actual + = emit_immediate_scalar_to_buffer (TREE_IMAGPART (value), p, + total_len / 2); + gcc_assert (actual == total_len / 2); + } + else if (TREE_CODE (value) == CONSTRUCTOR) + { + unsigned len = vec_safe_length (CONSTRUCTOR_ELTS (value)); + for (unsigned i = 0; i < len; i++) + { + tree v = CONSTRUCTOR_ELT (value, i)->value; + unsigned actual = emit_immediate_scalar_to_buffer (v, p, 0); + total_len -= actual; + p += actual; + } + } + else + emit_immediate_scalar_to_buffer (value, p, total_len); +} + +/* Emit an immediate BRIG operand IMM. The BRIG type of the immediate might + have been massaged to comply with various HSA/BRIG type requirements, so the + only important aspect of that is the length (because HSAIL might expect + smaller constants or become bit-data). The data should be represented + according to what is in the tree representation. */ + +static void +emit_immediate_operand (hsa_op_immed *imm) +{ + struct BrigOperandConstantBytes out; + + memset (&out, 0, sizeof (out)); + out.base.byteCount = lendian16 (sizeof (out)); + out.base.kind = lendian16 (BRIG_KIND_OPERAND_CONSTANT_BYTES); + uint32_t byteCount = lendian32 (imm->m_brig_repr_size); + out.type = lendian16 (imm->m_type); + out.bytes = lendian32 (brig_data.add (&byteCount, sizeof (byteCount))); + brig_operand.add (&out, sizeof (out)); + brig_data.add (imm->m_brig_repr, imm->m_brig_repr_size); + brig_data.round_size_up (4); +} + +/* Emit a register BRIG operand REG. */ + +static void +emit_register_operand (hsa_op_reg *reg) +{ + struct BrigOperandRegister out; + + out.base.byteCount = lendian16 (sizeof (out)); + out.base.kind = lendian16 (BRIG_KIND_OPERAND_REGISTER); + out.regNum = lendian32 (reg->m_hard_num); + + switch (regtype_for_type (reg->m_type)) + { + case BRIG_TYPE_B32: + out.regKind = BRIG_REGISTER_KIND_SINGLE; + break; + case BRIG_TYPE_B64: + out.regKind = BRIG_REGISTER_KIND_DOUBLE; + break; + case BRIG_TYPE_B128: + out.regKind = BRIG_REGISTER_KIND_QUAD; + break; + case BRIG_TYPE_B1: + out.regKind = BRIG_REGISTER_KIND_CONTROL; + break; + default: + gcc_unreachable (); + } + + brig_operand.add (&out, sizeof (out)); +} + +/* Emit an address BRIG operand ADDR. */ + +static void +emit_address_operand (hsa_op_address *addr) +{ + struct BrigOperandAddress out; + + out.base.byteCount = lendian16 (sizeof (out)); + out.base.kind = lendian16 (BRIG_KIND_OPERAND_ADDRESS); + out.symbol = addr->m_symbol + ? lendian32 (emit_directive_variable (addr->m_symbol)) : 0; + out.reg = addr->m_reg ? lendian32 (enqueue_op (addr->m_reg)) : 0; + + if (sizeof (addr->m_imm_offset) == 8) + { + out.offset.lo = lendian32 (addr->m_imm_offset); + out.offset.hi = lendian32 (addr->m_imm_offset >> 32); + } + else + { + gcc_assert (sizeof (addr->m_imm_offset) == 4); + out.offset.lo = lendian32 (addr->m_imm_offset); + out.offset.hi = 0; + } + + brig_operand.add (&out, sizeof (out)); +} + +/* Emit a code reference operand REF. */ + +static void +emit_code_ref_operand (hsa_op_code_ref *ref) +{ + struct BrigOperandCodeRef out; + + out.base.byteCount = lendian16 (sizeof (out)); + out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_REF); + out.ref = lendian32 (ref->m_directive_offset); + brig_operand.add (&out, sizeof (out)); +} + +/* Emit a code list operand CODE_LIST. */ + +static void +emit_code_list_operand (hsa_op_code_list *code_list) +{ + struct BrigOperandCodeList out; + unsigned args = code_list->m_offsets.length (); + + for (unsigned i = 0; i < args; i++) + gcc_assert (code_list->m_offsets[i]); + + out.base.byteCount = lendian16 (sizeof (out)); + out.base.kind = lendian16 (BRIG_KIND_OPERAND_CODE_LIST); + + uint32_t byteCount = lendian32 (4 * args); + + out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount))); + brig_data.add (code_list->m_offsets.address (), args * sizeof (uint32_t)); + brig_data.round_size_up (4); + brig_operand.add (&out, sizeof (out)); +} + +/* Emit an operand list operand OPERAND_LIST. */ + +static void +emit_operand_list_operand (hsa_op_operand_list *operand_list) +{ + struct BrigOperandOperandList out; + unsigned args = operand_list->m_offsets.length (); + + for (unsigned i = 0; i < args; i++) + gcc_assert (operand_list->m_offsets[i]); + + out.base.byteCount = lendian16 (sizeof (out)); + out.base.kind = lendian16 (BRIG_KIND_OPERAND_OPERAND_LIST); + + uint32_t byteCount = lendian32 (4 * args); + + out.elements = lendian32 (brig_data.add (&byteCount, sizeof (byteCount))); + brig_data.add (operand_list->m_offsets.address (), args * sizeof (uint32_t)); + brig_data.round_size_up (4); + brig_operand.add (&out, sizeof (out)); +} + +/* Emit all operands queued for writing. */ + +static void +emit_queued_operands (void) +{ + for (hsa_op_base *op = op_queue.first_op; op; op = op->m_next) + { + gcc_assert (op->m_brig_op_offset == brig_operand.total_size); + if (hsa_op_immed *imm = dyn_cast <hsa_op_immed *> (op)) + emit_immediate_operand (imm); + else if (hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op)) + emit_register_operand (reg); + else if (hsa_op_address *addr = dyn_cast <hsa_op_address *> (op)) + emit_address_operand (addr); + else if (hsa_op_code_ref *ref = dyn_cast <hsa_op_code_ref *> (op)) + emit_code_ref_operand (ref); + else if (hsa_op_code_list *code_list = dyn_cast <hsa_op_code_list *> (op)) + emit_code_list_operand (code_list); + else if (hsa_op_operand_list *l = dyn_cast <hsa_op_operand_list *> (op)) + emit_operand_list_operand (l); + else + gcc_unreachable (); + } +} + +/* Emit directives describing the function that is used for + a function declaration. */ + +static BrigDirectiveExecutable * +emit_function_declaration (tree decl) +{ + hsa_function_representation *f = hsa_generate_function_declaration (decl); + + BrigDirectiveExecutable *e = emit_function_directives (f, true); + emit_queued_operands (); + + delete f; + + return e; +} + +/* Emit directives describing the function that is used for + an internal function declaration. */ + +static BrigDirectiveExecutable * +emit_internal_fn_decl (hsa_internal_fn *fn) +{ + hsa_function_representation *f = hsa_generate_internal_fn_decl (fn); + + BrigDirectiveExecutable *e = emit_function_directives (f, true); + emit_queued_operands (); + + delete f; + + return e; +} + +/* Enqueue all operands of INSN and return offset to BRIG data section + to list of operand offsets. */ + +static unsigned +emit_insn_operands (hsa_insn_basic *insn) +{ + auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS> + operand_offsets; + + unsigned l = insn->operand_count (); + operand_offsets.safe_grow (l); + + for (unsigned i = 0; i < l; i++) + operand_offsets[i] = lendian32 (enqueue_op (insn->get_op (i))); + + /* We have N operands so use 4 * N for the byte_count. */ + uint32_t byte_count = lendian32 (4 * l); + + unsigned offset = brig_data.add (&byte_count, sizeof (byte_count)); + brig_data.add (operand_offsets.address (), + l * sizeof (BrigOperandOffset32_t)); + + brig_data.round_size_up (4); + + return offset; +} + +/* Enqueue operand OP0, OP1, OP2 (if different from NULL) and return offset + to BRIG data section to list of operand offsets. */ + +static unsigned +emit_operands (hsa_op_base *op0, hsa_op_base *op1 = NULL, + hsa_op_base *op2 = NULL) +{ + auto_vec<BrigOperandOffset32_t, HSA_BRIG_INT_STORAGE_OPERANDS> + operand_offsets; + + gcc_checking_assert (op0 != NULL); + operand_offsets.safe_push (enqueue_op (op0)); + + if (op1 != NULL) + { + operand_offsets.safe_push (enqueue_op (op1)); + if (op2 != NULL) + operand_offsets.safe_push (enqueue_op (op2)); + } + + unsigned l = operand_offsets.length (); + + /* We have N operands so use 4 * N for the byte_count. */ + uint32_t byte_count = lendian32 (4 * l); + + unsigned offset = brig_data.add (&byte_count, sizeof (byte_count)); + brig_data.add (operand_offsets.address (), + l * sizeof (BrigOperandOffset32_t)); + + brig_data.round_size_up (4); + + return offset; +} + +/* Emit an HSA memory instruction and all necessary directives, schedule + necessary operands for writing. */ + +static void +emit_memory_insn (hsa_insn_mem *mem) +{ + struct BrigInstMem repr; + gcc_checking_assert (mem->operand_count () == 2); + + hsa_op_address *addr = as_a <hsa_op_address *> (mem->get_op (1)); + + /* This is necessary because of the erroneous typedef of + BrigMemoryModifier8_t which introduces padding which may then contain + random stuff (which we do not want so that we can test things don't + change). */ + memset (&repr, 0, sizeof (repr)); + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM); + repr.base.opcode = lendian16 (mem->m_opcode); + repr.base.type = lendian16 (mem->m_type); + repr.base.operands = lendian32 (emit_insn_operands (mem)); + + if (addr->m_symbol) + repr.segment = addr->m_symbol->m_segment; + else + repr.segment = BRIG_SEGMENT_FLAT; + repr.modifier = 0; + repr.equivClass = mem->m_equiv_class; + repr.align = mem->m_align; + if (mem->m_opcode == BRIG_OPCODE_LD) + repr.width = BRIG_WIDTH_1; + else + repr.width = BRIG_WIDTH_NONE; + memset (&repr.reserved, 0, sizeof (repr.reserved)); + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + +/* Emit an HSA signal memory instruction and all necessary directives, schedule + necessary operands for writing. */ + +static void +emit_signal_insn (hsa_insn_signal *mem) +{ + struct BrigInstSignal repr; + + /* This is necessary because of the erroneous typedef of + BrigMemoryModifier8_t which introduces padding which may then contain + random stuff (which we do not want so that we can test things don't + change). */ + memset (&repr, 0, sizeof (repr)); + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_SIGNAL); + repr.base.opcode = lendian16 (mem->m_opcode); + repr.base.type = lendian16 (mem->m_type); + repr.base.operands = lendian32 (emit_insn_operands (mem)); + + repr.memoryOrder = mem->m_memoryorder; + repr.signalOperation = mem->m_atomicop; + repr.signalType = BRIG_TYPE_SIG64; + + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + +/* Emit an HSA atomic memory instruction and all necessary directives, schedule + necessary operands for writing. */ + +static void +emit_atomic_insn (hsa_insn_atomic *mem) +{ + struct BrigInstAtomic repr; + + /* Either operand[0] or operand[1] must be an address operand. */ + hsa_op_address *addr = NULL; + if (is_a <hsa_op_address *> (mem->get_op (0))) + addr = as_a <hsa_op_address *> (mem->get_op (0)); + else + addr = as_a <hsa_op_address *> (mem->get_op (1)); + + /* This is necessary because of the erroneous typedef of + BrigMemoryModifier8_t which introduces padding which may then contain + random stuff (which we do not want so that we can test things don't + change). */ + memset (&repr, 0, sizeof (repr)); + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_ATOMIC); + repr.base.opcode = lendian16 (mem->m_opcode); + repr.base.type = lendian16 (mem->m_type); + repr.base.operands = lendian32 (emit_insn_operands (mem)); + + if (addr->m_symbol) + repr.segment = addr->m_symbol->m_segment; + else + repr.segment = BRIG_SEGMENT_FLAT; + repr.memoryOrder = mem->m_memoryorder; + repr.memoryScope = mem->m_memoryscope; + repr.atomicOperation = mem->m_atomicop; + + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + +/* Emit an HSA LDA instruction and all necessary directives, schedule + necessary operands for writing. */ + +static void +emit_addr_insn (hsa_insn_basic *insn) +{ + struct BrigInstAddr repr; + + hsa_op_address *addr = as_a <hsa_op_address *> (insn->get_op (1)); + + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_ADDR); + repr.base.opcode = lendian16 (insn->m_opcode); + repr.base.type = lendian16 (insn->m_type); + repr.base.operands = lendian32 (emit_insn_operands (insn)); + + if (addr->m_symbol) + repr.segment = addr->m_symbol->m_segment; + else + repr.segment = BRIG_SEGMENT_FLAT; + memset (&repr.reserved, 0, sizeof (repr.reserved)); + + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + +/* Emit an HSA segment conversion instruction and all necessary directives, + schedule necessary operands for writing. */ + +static void +emit_segment_insn (hsa_insn_seg *seg) +{ + struct BrigInstSegCvt repr; + + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_SEG_CVT); + repr.base.opcode = lendian16 (seg->m_opcode); + repr.base.type = lendian16 (seg->m_type); + repr.base.operands = lendian32 (emit_insn_operands (seg)); + repr.sourceType = lendian16 (as_a <hsa_op_reg *> (seg->get_op (1))->m_type); + repr.segment = seg->m_segment; + repr.modifier = 0; + + brig_code.add (&repr, sizeof (repr)); + + brig_insn_count++; +} + +/* Emit an HSA alloca instruction and all necessary directives, + schedule necessary operands for writing. */ + +static void +emit_alloca_insn (hsa_insn_alloca *alloca) +{ + struct BrigInstMem repr; + gcc_checking_assert (alloca->operand_count () == 2); + + /* This is necessary because of the erroneous typedef of + BrigMemoryModifier8_t which introduces padding which may then contain + random stuff (which we do not want so that we can test things don't + change). */ + memset (&repr, 0, sizeof (repr)); + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_MEM); + repr.base.opcode = lendian16 (alloca->m_opcode); + repr.base.type = lendian16 (alloca->m_type); + repr.base.operands = lendian32 (emit_insn_operands (alloca)); + repr.segment = BRIG_SEGMENT_PRIVATE; + repr.modifier = 0; + repr.equivClass = 0; + repr.align = alloca->m_align; + repr.width = BRIG_WIDTH_NONE; + memset (&repr.reserved, 0, sizeof (repr.reserved)); + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + +/* Emit an HSA comparison instruction and all necessary directives, + schedule necessary operands for writing. */ + +static void +emit_cmp_insn (hsa_insn_cmp *cmp) +{ + struct BrigInstCmp repr; + + memset (&repr, 0, sizeof (repr)); + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_CMP); + repr.base.opcode = lendian16 (cmp->m_opcode); + repr.base.type = lendian16 (cmp->m_type); + repr.base.operands = lendian32 (emit_insn_operands (cmp)); + + if (is_a <hsa_op_reg *> (cmp->get_op (1))) + repr.sourceType + = lendian16 (as_a <hsa_op_reg *> (cmp->get_op (1))->m_type); + else + repr.sourceType + = lendian16 (as_a <hsa_op_immed *> (cmp->get_op (1))->m_type); + repr.modifier = 0; + repr.compare = cmp->m_compare; + repr.pack = 0; + + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + +/* Emit an HSA branching instruction and all necessary directives, schedule + necessary operands for writing. */ + +static void +emit_branch_insn (hsa_insn_br *br) +{ + struct BrigInstBr repr; + + basic_block target = NULL; + edge_iterator ei; + edge e; + + /* At the moment we only handle direct conditional jumps. */ + gcc_assert (br->m_opcode == BRIG_OPCODE_CBR); + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); + repr.base.opcode = lendian16 (br->m_opcode); + repr.width = BRIG_WIDTH_1; + /* For Conditional jumps the type is always B1. */ + repr.base.type = lendian16 (BRIG_TYPE_B1); + + FOR_EACH_EDGE (e, ei, br->m_bb->succs) + if (e->flags & EDGE_TRUE_VALUE) + { + target = e->dest; + break; + } + gcc_assert (target); + + repr.base.operands + = lendian32 (emit_operands (br->get_op (0), + &hsa_bb_for_bb (target)->m_label_ref)); + memset (&repr.reserved, 0, sizeof (repr.reserved)); + + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + +/* Emit an HSA unconditional jump branching instruction that points to + a label REFERENCE. */ + +static void +emit_unconditional_jump (hsa_op_code_ref *reference) +{ + struct BrigInstBr repr; + + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); + repr.base.opcode = lendian16 (BRIG_OPCODE_BR); + repr.base.type = lendian16 (BRIG_TYPE_NONE); + /* Direct branches to labels must be width(all). */ + repr.width = BRIG_WIDTH_ALL; + + repr.base.operands = lendian32 (emit_operands (reference)); + memset (&repr.reserved, 0, sizeof (repr.reserved)); + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + +/* Emit an HSA switch jump instruction that uses a jump table to + jump to a destination label. */ + +static void +emit_switch_insn (hsa_insn_sbr *sbr) +{ + struct BrigInstBr repr; + + gcc_assert (sbr->m_opcode == BRIG_OPCODE_SBR); + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); + repr.base.opcode = lendian16 (sbr->m_opcode); + repr.width = BRIG_WIDTH_1; + /* For Conditional jumps the type is always B1. */ + hsa_op_reg *index = as_a <hsa_op_reg *> (sbr->get_op (0)); + repr.base.type = lendian16 (index->m_type); + repr.base.operands + = lendian32 (emit_operands (sbr->get_op (0), sbr->m_label_code_list)); + memset (&repr.reserved, 0, sizeof (repr.reserved)); + + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; + + /* Emit jump to default label. */ + hsa_bb *hbb = hsa_bb_for_bb (sbr->m_default_bb); + emit_unconditional_jump (&hbb->m_label_ref); +} + +/* Emit a HSA convert instruction and all necessary directives, schedule + necessary operands for writing. */ + +static void +emit_cvt_insn (hsa_insn_cvt *insn) +{ + struct BrigInstCvt repr; + BrigType16_t srctype; + + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_CVT); + repr.base.opcode = lendian16 (insn->m_opcode); + repr.base.type = lendian16 (insn->m_type); + repr.base.operands = lendian32 (emit_insn_operands (insn)); + + if (is_a <hsa_op_reg *> (insn->get_op (1))) + srctype = as_a <hsa_op_reg *> (insn->get_op (1))->m_type; + else + srctype = as_a <hsa_op_immed *> (insn->get_op (1))->m_type; + repr.sourceType = lendian16 (srctype); + repr.modifier = 0; + /* float to smaller float requires a rounding setting (we default + to 'near'. */ + if (hsa_type_float_p (insn->m_type) + && (!hsa_type_float_p (srctype) + || ((insn->m_type & BRIG_TYPE_BASE_MASK) + < (srctype & BRIG_TYPE_BASE_MASK)))) + repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN; + else if (hsa_type_integer_p (insn->m_type) && + hsa_type_float_p (srctype)) + repr.round = BRIG_ROUND_INTEGER_ZERO; + else + repr.round = BRIG_ROUND_NONE; + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + +/* Emit call instruction INSN, where this instruction must be closed + within a call block instruction. */ + +static void +emit_call_insn (hsa_insn_call *call) +{ + struct BrigInstBr repr; + + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_BR); + repr.base.opcode = lendian16 (BRIG_OPCODE_CALL); + repr.base.type = lendian16 (BRIG_TYPE_NONE); + + repr.base.operands + = lendian32 (emit_operands (call->m_result_code_list, &call->m_func, + call->m_args_code_list)); + + /* Internal functions have not set m_called_function. */ + if (call->m_called_function) + { + function_linkage_pair pair (call->m_called_function, + call->m_func.m_brig_op_offset); + function_call_linkage.safe_push (pair); + } + else + { + hsa_internal_fn *slot + = hsa_emitted_internal_decls->find (call->m_called_internal_fn); + gcc_assert (slot); + gcc_assert (slot->m_offset > 0); + call->m_func.m_directive_offset = slot->m_offset; + } + + repr.width = BRIG_WIDTH_ALL; + memset (&repr.reserved, 0, sizeof (repr.reserved)); + + brig_code.add (&repr, sizeof (repr)); + brig_insn_count++; +} + +/* Emit argument block directive. */ + +static void +emit_arg_block_insn (hsa_insn_arg_block *insn) +{ + switch (insn->m_kind) + { + case BRIG_KIND_DIRECTIVE_ARG_BLOCK_START: + { + struct BrigDirectiveArgBlock repr; + repr.base.byteCount = lendian16 (sizeof (repr)); + repr.base.kind = lendian16 (insn->m_kind); + brig_code.add (&repr, sizeof (repr)); + + for (unsigned i = 0; i < insn->m_call_insn->m_input_args.length (); i++) + { + insn->m_call_insn->m_args_code_list->m_offsets[i] + = lendian32 (emit_directive_variable + (insn->m_call_insn->m_input_args[i])); + brig_insn_count++; + } + + if (insn->m_call_insn->m_output_arg) + { + insn->m_call_insn->m_result_code_list->m_offsets[0] + = lendian32 (emit_directive_variable + (insn->m_call_insn->m_output_arg)); + brig_insn_count++; + } + + break; + } + case BRIG_KIND_DIRECTIVE_ARG_BLOCK_END: + { + struct BrigDirectiveArgBlock repr; + repr.base.byteCount = lendian16 (sizeof (repr)); + repr.base.kind = lendian16 (insn->m_kind); + brig_code.add (&repr, sizeof (repr)); + break; + } + default: + gcc_unreachable (); + } + + brig_insn_count++; +} + +/* Emit comment directive. */ + +static void +emit_comment_insn (hsa_insn_comment *insn) +{ + struct BrigDirectiveComment repr; + memset (&repr, 0, sizeof (repr)); + + repr.base.byteCount = lendian16 (sizeof (repr)); + repr.base.kind = lendian16 (insn->m_opcode); + repr.name = brig_emit_string (insn->m_comment, '\0', false); + brig_code.add (&repr, sizeof (repr)); +} + +/* Emit queue instruction INSN. */ + +static void +emit_queue_insn (hsa_insn_queue *insn) +{ + BrigInstQueue repr; + memset (&repr, 0, sizeof (repr)); + + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_QUEUE); + repr.base.opcode = lendian16 (insn->m_opcode); + repr.base.type = lendian16 (insn->m_type); + repr.segment = BRIG_SEGMENT_GLOBAL; + repr.memoryOrder = BRIG_MEMORY_ORDER_SC_RELEASE; + repr.base.operands = lendian32 (emit_insn_operands (insn)); + brig_data.round_size_up (4); + brig_code.add (&repr, sizeof (repr)); + + brig_insn_count++; +} + +/* Emit source type instruction INSN. */ + +static void +emit_srctype_insn (hsa_insn_srctype *insn) +{ + /* We assume that BrigInstMod has a BrigInstBasic prefix. */ + struct BrigInstSourceType repr; + unsigned operand_count = insn->operand_count (); + gcc_checking_assert (operand_count >= 2); + + memset (&repr, 0, sizeof (repr)); + repr.sourceType = lendian16 (insn->m_source_type); + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE); + repr.base.opcode = lendian16 (insn->m_opcode); + repr.base.type = lendian16 (insn->m_type); + + repr.base.operands = lendian32 (emit_insn_operands (insn)); + brig_code.add (&repr, sizeof (struct BrigInstSourceType)); + brig_insn_count++; +} + +/* Emit packed instruction INSN. */ + +static void +emit_packed_insn (hsa_insn_packed *insn) +{ + /* We assume that BrigInstMod has a BrigInstBasic prefix. */ + struct BrigInstSourceType repr; + unsigned operand_count = insn->operand_count (); + gcc_checking_assert (operand_count >= 2); + + memset (&repr, 0, sizeof (repr)); + repr.sourceType = lendian16 (insn->m_source_type); + repr.base.base.byteCount = lendian16 (sizeof (repr)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_SOURCE_TYPE); + repr.base.opcode = lendian16 (insn->m_opcode); + repr.base.type = lendian16 (insn->m_type); + + if (insn->m_opcode == BRIG_OPCODE_COMBINE) + { + /* Create operand list for packed type. */ + for (unsigned i = 1; i < operand_count; i++) + { + gcc_checking_assert (insn->get_op (i)); + insn->m_operand_list->m_offsets[i - 1] + = lendian32 (enqueue_op (insn->get_op (i))); + } + + repr.base.operands = lendian32 (emit_operands (insn->get_op (0), + insn->m_operand_list)); + } + else if (insn->m_opcode == BRIG_OPCODE_EXPAND) + { + /* Create operand list for packed type. */ + for (unsigned i = 0; i < operand_count - 1; i++) + { + gcc_checking_assert (insn->get_op (i)); + insn->m_operand_list->m_offsets[i] + = lendian32 (enqueue_op (insn->get_op (i))); + } + + unsigned ops = emit_operands (insn->m_operand_list, + insn->get_op (insn->operand_count () - 1)); + repr.base.operands = lendian32 (ops); + } + + + brig_code.add (&repr, sizeof (struct BrigInstSourceType)); + brig_insn_count++; +} + +/* Emit a basic HSA instruction and all necessary directives, schedule + necessary operands for writing. */ + +static void +emit_basic_insn (hsa_insn_basic *insn) +{ + /* We assume that BrigInstMod has a BrigInstBasic prefix. */ + struct BrigInstMod repr; + BrigType16_t type; + + memset (&repr, 0, sizeof (repr)); + repr.base.base.byteCount = lendian16 (sizeof (BrigInstBasic)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_BASIC); + repr.base.opcode = lendian16 (insn->m_opcode); + switch (insn->m_opcode) + { + /* And the bit-logical operations need bit types and whine about + arithmetic types :-/ */ + case BRIG_OPCODE_AND: + case BRIG_OPCODE_OR: + case BRIG_OPCODE_XOR: + case BRIG_OPCODE_NOT: + type = regtype_for_type (insn->m_type); + break; + default: + type = insn->m_type; + break; + } + repr.base.type = lendian16 (type); + repr.base.operands = lendian32 (emit_insn_operands (insn)); + + if ((type & BRIG_TYPE_PACK_MASK) != BRIG_TYPE_PACK_NONE) + { + if (hsa_type_float_p (type) + && !hsa_opcode_floating_bit_insn_p (insn->m_opcode)) + repr.round = BRIG_ROUND_FLOAT_NEAR_EVEN; + else + repr.round = 0; + /* We assume that destination and sources agree in packing layout. */ + if (insn->num_used_ops () >= 2) + repr.pack = BRIG_PACK_PP; + else + repr.pack = BRIG_PACK_P; + repr.reserved = 0; + repr.base.base.byteCount = lendian16 (sizeof (BrigInstMod)); + repr.base.base.kind = lendian16 (BRIG_KIND_INST_MOD); + brig_code.add (&repr, sizeof (struct BrigInstMod)); + } + else + brig_code.add (&repr, sizeof (struct BrigInstBasic)); + brig_insn_count++; +} + +/* Emit an HSA instruction and all necessary directives, schedule necessary + operands for writing. */ + +static void +emit_insn (hsa_insn_basic *insn) +{ + gcc_assert (!is_a <hsa_insn_phi *> (insn)); + + insn->m_brig_offset = brig_code.total_size; + + if (hsa_insn_signal *signal = dyn_cast <hsa_insn_signal *> (insn)) + emit_signal_insn (signal); + else if (hsa_insn_atomic *atom = dyn_cast <hsa_insn_atomic *> (insn)) + emit_atomic_insn (atom); + else if (hsa_insn_mem *mem = dyn_cast <hsa_insn_mem *> (insn)) + emit_memory_insn (mem); + else if (insn->m_opcode == BRIG_OPCODE_LDA) + emit_addr_insn (insn); + else if (hsa_insn_seg *seg = dyn_cast <hsa_insn_seg *> (insn)) + emit_segment_insn (seg); + else if (hsa_insn_cmp *cmp = dyn_cast <hsa_insn_cmp *> (insn)) + emit_cmp_insn (cmp); + else if (hsa_insn_br *br = dyn_cast <hsa_insn_br *> (insn)) + emit_branch_insn (br); + else if (hsa_insn_sbr *sbr = dyn_cast <hsa_insn_sbr *> (insn)) + { + if (switch_instructions == NULL) + switch_instructions = new vec <hsa_insn_sbr *> (); + + switch_instructions->safe_push (sbr); + emit_switch_insn (sbr); + } + else if (hsa_insn_arg_block *block = dyn_cast <hsa_insn_arg_block *> (insn)) + emit_arg_block_insn (block); + else if (hsa_insn_call *call = dyn_cast <hsa_insn_call *> (insn)) + emit_call_insn (call); + else if (hsa_insn_comment *comment = dyn_cast <hsa_insn_comment *> (insn)) + emit_comment_insn (comment); + else if (hsa_insn_queue *queue = dyn_cast <hsa_insn_queue *> (insn)) + emit_queue_insn (queue); + else if (hsa_insn_srctype *srctype = dyn_cast <hsa_insn_srctype *> (insn)) + emit_srctype_insn (srctype); + else if (hsa_insn_packed *packed = dyn_cast <hsa_insn_packed *> (insn)) + emit_packed_insn (packed); + else if (hsa_insn_cvt *cvt = dyn_cast <hsa_insn_cvt *> (insn)) + emit_cvt_insn (cvt); + else if (hsa_insn_alloca *alloca = dyn_cast <hsa_insn_alloca *> (insn)) + emit_alloca_insn (alloca); + else + emit_basic_insn (insn); +} + +/* We have just finished emitting BB and are about to emit NEXT_BB if non-NULL, + or we are about to finish emitting code, if it is NULL. If the fall through + edge from BB does not lead to NEXT_BB, emit an unconditional jump. */ + +static void +perhaps_emit_branch (basic_block bb, basic_block next_bb) +{ + basic_block t_bb = NULL, ff = NULL; + + edge_iterator ei; + edge e; + + /* If the last instruction of BB is a switch, ignore emission of all + edges. */ + if (hsa_bb_for_bb (bb)->m_last_insn + && is_a <hsa_insn_sbr *> (hsa_bb_for_bb (bb)->m_last_insn)) + return; + + FOR_EACH_EDGE (e, ei, bb->succs) + if (e->flags & EDGE_TRUE_VALUE) + { + gcc_assert (!t_bb); + t_bb = e->dest; + } + else + { + gcc_assert (!ff); + ff = e->dest; + } + + if (!ff || ff == next_bb || ff == EXIT_BLOCK_PTR_FOR_FN (cfun)) + return; + + emit_unconditional_jump (&hsa_bb_for_bb (ff)->m_label_ref); +} + +/* Emit the a function with name NAME to the various brig sections. */ + +void +hsa_brig_emit_function (void) +{ + basic_block bb, prev_bb; + hsa_insn_basic *insn; + BrigDirectiveExecutable *ptr_to_fndir; + + brig_init (); + + brig_insn_count = 0; + memset (&op_queue, 0, sizeof (op_queue)); + op_queue.projected_size = brig_operand.total_size; + + if (!function_offsets) + function_offsets = new hash_map<tree, BrigCodeOffset32_t> (); + + if (!emitted_declarations) + emitted_declarations = new hash_map <tree, BrigDirectiveExecutable *> (); + + for (unsigned i = 0; i < hsa_cfun->m_called_functions.length (); i++) + { + tree called = hsa_cfun->m_called_functions[i]; + + /* If the function has no definition, emit a declaration. */ + if (!emitted_declarations->get (called)) + { + BrigDirectiveExecutable *e = emit_function_declaration (called); + emitted_declarations->put (called, e); + } + } + + for (unsigned i = 0; i < hsa_cfun->m_called_internal_fns.length (); i++) + { + hsa_internal_fn *called = hsa_cfun->m_called_internal_fns[i]; + emit_internal_fn_decl (called); + } + + ptr_to_fndir = emit_function_directives (hsa_cfun, false); + for (insn = hsa_bb_for_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun))->m_first_insn; + insn; + insn = insn->m_next) + emit_insn (insn); + prev_bb = ENTRY_BLOCK_PTR_FOR_FN (cfun); + FOR_EACH_BB_FN (bb, cfun) + { + perhaps_emit_branch (prev_bb, bb); + emit_bb_label_directive (hsa_bb_for_bb (bb)); + for (insn = hsa_bb_for_bb (bb)->m_first_insn; insn; insn = insn->m_next) + emit_insn (insn); + prev_bb = bb; + } + perhaps_emit_branch (prev_bb, NULL); + ptr_to_fndir->nextModuleEntry = brig_code.total_size; + + /* Fill up label references for all sbr instructions. */ + if (switch_instructions) + { + for (unsigned i = 0; i < switch_instructions->length (); i++) + { + hsa_insn_sbr *sbr = (*switch_instructions)[i]; + for (unsigned j = 0; j < sbr->m_jump_table.length (); j++) + { + hsa_bb *hbb = hsa_bb_for_bb (sbr->m_jump_table[j]); + sbr->m_label_code_list->m_offsets[j] + = hbb->m_label_ref.m_directive_offset; + } + } + + switch_instructions->release (); + delete switch_instructions; + switch_instructions = NULL; + } + + if (dump_file) + { + fprintf (dump_file, "------- After BRIG emission: -------\n"); + dump_hsa_cfun (dump_file); + } + + emit_queued_operands (); +} + +/* Emit all OMP symbols related to OMP. */ + +void +hsa_brig_emit_omp_symbols (void) +{ + brig_init (); + emit_directive_variable (hsa_num_threads); +} + +static GTY(()) tree hsa_cdtor_statements[2]; + +/* Create and return __hsa_global_variables symbol that contains + all informations consumed by libgomp to link global variables + with their string names used by an HSA kernel. */ + +static tree +hsa_output_global_variables () +{ + unsigned l = hsa_global_variable_symbols->elements (); + + tree variable_info_type = make_node (RECORD_TYPE); + tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("name"), ptr_type_node); + DECL_CHAIN (id_f1) = NULL_TREE; + tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("omp_data_size"), + ptr_type_node); + DECL_CHAIN (id_f2) = id_f1; + finish_builtin_struct (variable_info_type, "__hsa_variable_info", id_f2, + NULL_TREE); + + tree int_num_of_global_vars; + int_num_of_global_vars = build_int_cst (uint32_type_node, l); + tree global_vars_num_index_type = build_index_type (int_num_of_global_vars); + tree global_vars_array_type = build_array_type (variable_info_type, + global_vars_num_index_type); + TYPE_ARTIFICIAL (global_vars_array_type) = 1; + + vec<constructor_elt, va_gc> *global_vars_vec = NULL; + + for (hash_table <hsa_noop_symbol_hasher>::iterator it + = hsa_global_variable_symbols->begin (); + it != hsa_global_variable_symbols->end (); ++it) + { + unsigned len = strlen ((*it)->m_name); + char *copy = XNEWVEC (char, len + 2); + copy[0] = '&'; + memcpy (copy + 1, (*it)->m_name, len); + copy[len + 1] = '\0'; + len++; + hsa_sanitize_name (copy); + + tree var_name = build_string (len, copy); + TREE_TYPE (var_name) + = build_array_type (char_type_node, build_index_type (size_int (len))); + free (copy); + + vec<constructor_elt, va_gc> *variable_info_vec = NULL; + CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE, + build1 (ADDR_EXPR, + build_pointer_type (TREE_TYPE (var_name)), + var_name)); + CONSTRUCTOR_APPEND_ELT (variable_info_vec, NULL_TREE, + build_fold_addr_expr ((*it)->m_decl)); + + tree variable_info_ctor = build_constructor (variable_info_type, + variable_info_vec); + + CONSTRUCTOR_APPEND_ELT (global_vars_vec, NULL_TREE, + variable_info_ctor); + } + + tree global_vars_ctor = build_constructor (global_vars_array_type, + global_vars_vec); + + char tmp_name[64]; + ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_global_variables", 1); + tree global_vars_table = build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier (tmp_name), + global_vars_array_type); + TREE_STATIC (global_vars_table) = 1; + TREE_READONLY (global_vars_table) = 1; + TREE_PUBLIC (global_vars_table) = 0; + DECL_ARTIFICIAL (global_vars_table) = 1; + DECL_IGNORED_P (global_vars_table) = 1; + DECL_EXTERNAL (global_vars_table) = 0; + TREE_CONSTANT (global_vars_table) = 1; + DECL_INITIAL (global_vars_table) = global_vars_ctor; + varpool_node::finalize_decl (global_vars_table); + + return global_vars_table; +} + +/* Create __hsa_host_functions and __hsa_kernels that contain + all informations consumed by libgomp to register all kernels + in the BRIG binary. */ + +static void +hsa_output_kernels (tree *host_func_table, tree *kernels) +{ + unsigned map_count = hsa_get_number_decl_kernel_mappings (); + + tree int_num_of_kernels; + int_num_of_kernels = build_int_cst (uint32_type_node, map_count); + tree kernel_num_index_type = build_index_type (int_num_of_kernels); + tree host_functions_array_type = build_array_type (ptr_type_node, + kernel_num_index_type); + TYPE_ARTIFICIAL (host_functions_array_type) = 1; + + vec<constructor_elt, va_gc> *host_functions_vec = NULL; + for (unsigned i = 0; i < map_count; ++i) + { + tree decl = hsa_get_decl_kernel_mapping_decl (i); + tree host_fn = build_fold_addr_expr (hsa_get_host_function (decl)); + CONSTRUCTOR_APPEND_ELT (host_functions_vec, NULL_TREE, host_fn); + } + tree host_functions_ctor = build_constructor (host_functions_array_type, + host_functions_vec); + char tmp_name[64]; + ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_host_functions", 1); + tree hsa_host_func_table = build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier (tmp_name), + host_functions_array_type); + TREE_STATIC (hsa_host_func_table) = 1; + TREE_READONLY (hsa_host_func_table) = 1; + TREE_PUBLIC (hsa_host_func_table) = 0; + DECL_ARTIFICIAL (hsa_host_func_table) = 1; + DECL_IGNORED_P (hsa_host_func_table) = 1; + DECL_EXTERNAL (hsa_host_func_table) = 0; + TREE_CONSTANT (hsa_host_func_table) = 1; + DECL_INITIAL (hsa_host_func_table) = host_functions_ctor; + varpool_node::finalize_decl (hsa_host_func_table); + *host_func_table = hsa_host_func_table; + + /* Following code emits list of kernel_info structures. */ + + tree kernel_info_type = make_node (RECORD_TYPE); + tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("name"), ptr_type_node); + DECL_CHAIN (id_f1) = NULL_TREE; + tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("omp_data_size"), + unsigned_type_node); + DECL_CHAIN (id_f2) = id_f1; + tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("gridified_kernel_p"), + boolean_type_node); + DECL_CHAIN (id_f3) = id_f2; + tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("kernel_dependencies_count"), + unsigned_type_node); + DECL_CHAIN (id_f4) = id_f3; + tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("kernel_dependencies"), + build_pointer_type (build_pointer_type + (char_type_node))); + DECL_CHAIN (id_f5) = id_f4; + finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5, + NULL_TREE); + + int_num_of_kernels = build_int_cstu (uint32_type_node, map_count); + tree kernel_info_vector_type + = build_array_type (kernel_info_type, + build_index_type (int_num_of_kernels)); + TYPE_ARTIFICIAL (kernel_info_vector_type) = 1; + + vec<constructor_elt, va_gc> *kernel_info_vector_vec = NULL; + tree kernel_dependencies_vector_type = NULL; + + for (unsigned i = 0; i < map_count; ++i) + { + tree kernel = hsa_get_decl_kernel_mapping_decl (i); + char *name = hsa_get_decl_kernel_mapping_name (i); + unsigned len = strlen (name); + char *copy = XNEWVEC (char, len + 2); + copy[0] = '&'; + memcpy (copy + 1, name, len); + copy[len + 1] = '\0'; + len++; + + tree kern_name = build_string (len, copy); + TREE_TYPE (kern_name) + = build_array_type (char_type_node, build_index_type (size_int (len))); + free (copy); + + unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i); + tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size); + bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i); + tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node, + gridified_kernel_p); + unsigned count = 0; + + kernel_dependencies_vector_type + = build_array_type (build_pointer_type (char_type_node), + build_index_type (size_int (0))); + + vec<constructor_elt, va_gc> *kernel_dependencies_vec = NULL; + if (hsa_decl_kernel_dependencies) + { + vec<const char *> **slot; + slot = hsa_decl_kernel_dependencies->get (kernel); + if (slot) + { + vec <const char *> *dependencies = *slot; + count = dependencies->length (); + + kernel_dependencies_vector_type + = build_array_type (build_pointer_type (char_type_node), + build_index_type (size_int (count))); + TYPE_ARTIFICIAL (kernel_dependencies_vector_type) = 1; + + for (unsigned j = 0; j < count; j++) + { + const char *d = (*dependencies)[j]; + len = strlen (d); + tree dependency_name = build_string (len, d); + TREE_TYPE (dependency_name) + = build_array_type (char_type_node, + build_index_type (size_int (len))); + + CONSTRUCTOR_APPEND_ELT + (kernel_dependencies_vec, NULL_TREE, + build1 (ADDR_EXPR, + build_pointer_type (TREE_TYPE (dependency_name)), + dependency_name)); + } + } + } + + tree dependencies_count = build_int_cstu (unsigned_type_node, count); + + vec<constructor_elt, va_gc> *kernel_info_vec = NULL; + CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, + build1 (ADDR_EXPR, + build_pointer_type (TREE_TYPE + (kern_name)), + kern_name)); + CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size); + CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, + gridified_kernel_p_tree); + CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count); + + if (count > 0) + { + ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i); + tree dependencies_list = build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier (tmp_name), + kernel_dependencies_vector_type); + + TREE_STATIC (dependencies_list) = 1; + TREE_READONLY (dependencies_list) = 1; + TREE_PUBLIC (dependencies_list) = 0; + DECL_ARTIFICIAL (dependencies_list) = 1; + DECL_IGNORED_P (dependencies_list) = 1; + DECL_EXTERNAL (dependencies_list) = 0; + TREE_CONSTANT (dependencies_list) = 1; + DECL_INITIAL (dependencies_list) + = build_constructor (kernel_dependencies_vector_type, + kernel_dependencies_vec); + varpool_node::finalize_decl (dependencies_list); + + CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, + build1 (ADDR_EXPR, + build_pointer_type + (TREE_TYPE (dependencies_list)), + dependencies_list)); + } + else + CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node); + + tree kernel_info_ctor = build_constructor (kernel_info_type, + kernel_info_vec); + + CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE, + kernel_info_ctor); + } + + ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_kernels", 1); + tree hsa_kernels = build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier (tmp_name), + kernel_info_vector_type); + + TREE_STATIC (hsa_kernels) = 1; + TREE_READONLY (hsa_kernels) = 1; + TREE_PUBLIC (hsa_kernels) = 0; + DECL_ARTIFICIAL (hsa_kernels) = 1; + DECL_IGNORED_P (hsa_kernels) = 1; + DECL_EXTERNAL (hsa_kernels) = 0; + TREE_CONSTANT (hsa_kernels) = 1; + DECL_INITIAL (hsa_kernels) = build_constructor (kernel_info_vector_type, + kernel_info_vector_vec); + varpool_node::finalize_decl (hsa_kernels); + *kernels = hsa_kernels; +} + +/* Create a static constructor that will register out brig stuff with + libgomp. */ + +static void +hsa_output_libgomp_mapping (tree brig_decl) +{ + unsigned kernel_count = hsa_get_number_decl_kernel_mappings (); + unsigned global_variable_count = hsa_global_variable_symbols->elements (); + + tree kernels; + tree host_func_table; + + hsa_output_kernels (&host_func_table, &kernels); + tree global_vars = hsa_output_global_variables (); + + tree hsa_image_desc_type = make_node (RECORD_TYPE); + tree id_f1 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("brig_module"), ptr_type_node); + DECL_CHAIN (id_f1) = NULL_TREE; + tree id_f2 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("kernel_count"), + unsigned_type_node); + + DECL_CHAIN (id_f2) = id_f1; + tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("hsa_kernel_infos"), + ptr_type_node); + DECL_CHAIN (id_f3) = id_f2; + tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("global_variable_count"), + unsigned_type_node); + DECL_CHAIN (id_f4) = id_f3; + tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("hsa_global_variable_infos"), + ptr_type_node); + DECL_CHAIN (id_f5) = id_f4; + finish_builtin_struct (hsa_image_desc_type, "__hsa_image_desc", id_f5, + NULL_TREE); + TYPE_ARTIFICIAL (hsa_image_desc_type) = 1; + + vec<constructor_elt, va_gc> *img_desc_vec = NULL; + CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, + build_fold_addr_expr (brig_decl)); + CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, + build_int_cstu (unsigned_type_node, kernel_count)); + CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, + build1 (ADDR_EXPR, + build_pointer_type (TREE_TYPE (kernels)), + kernels)); + CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, + build_int_cstu (unsigned_type_node, + global_variable_count)); + CONSTRUCTOR_APPEND_ELT (img_desc_vec, NULL_TREE, + build1 (ADDR_EXPR, + build_pointer_type (TREE_TYPE (global_vars)), + global_vars)); + + tree img_desc_ctor = build_constructor (hsa_image_desc_type, img_desc_vec); + + char tmp_name[64]; + ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_img_descriptor", 1); + tree hsa_img_descriptor = build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier (tmp_name), + hsa_image_desc_type); + TREE_STATIC (hsa_img_descriptor) = 1; + TREE_READONLY (hsa_img_descriptor) = 1; + TREE_PUBLIC (hsa_img_descriptor) = 0; + DECL_ARTIFICIAL (hsa_img_descriptor) = 1; + DECL_IGNORED_P (hsa_img_descriptor) = 1; + DECL_EXTERNAL (hsa_img_descriptor) = 0; + TREE_CONSTANT (hsa_img_descriptor) = 1; + DECL_INITIAL (hsa_img_descriptor) = img_desc_ctor; + varpool_node::finalize_decl (hsa_img_descriptor); + + /* Construct the "host_table" libgomp expects. */ + tree index_type = build_index_type (build_int_cst (integer_type_node, 4)); + tree libgomp_host_table_type = build_array_type (ptr_type_node, index_type); + TYPE_ARTIFICIAL (libgomp_host_table_type) = 1; + vec<constructor_elt, va_gc> *libgomp_host_table_vec = NULL; + tree host_func_table_addr = build_fold_addr_expr (host_func_table); + CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, + host_func_table_addr); + offset_int func_table_size + = wi::to_offset (TYPE_SIZE_UNIT (ptr_type_node)) * kernel_count; + CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, + fold_build2 (POINTER_PLUS_EXPR, + TREE_TYPE (host_func_table_addr), + host_func_table_addr, + build_int_cst (size_type_node, + func_table_size.to_uhwi + ()))); + CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node); + CONSTRUCTOR_APPEND_ELT (libgomp_host_table_vec, NULL_TREE, null_pointer_node); + tree libgomp_host_table_ctor = build_constructor (libgomp_host_table_type, + libgomp_host_table_vec); + ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_libgomp_host_table", 1); + tree hsa_libgomp_host_table = build_decl (UNKNOWN_LOCATION, VAR_DECL, + get_identifier (tmp_name), + libgomp_host_table_type); + + TREE_STATIC (hsa_libgomp_host_table) = 1; + TREE_READONLY (hsa_libgomp_host_table) = 1; + TREE_PUBLIC (hsa_libgomp_host_table) = 0; + DECL_ARTIFICIAL (hsa_libgomp_host_table) = 1; + DECL_IGNORED_P (hsa_libgomp_host_table) = 1; + DECL_EXTERNAL (hsa_libgomp_host_table) = 0; + TREE_CONSTANT (hsa_libgomp_host_table) = 1; + DECL_INITIAL (hsa_libgomp_host_table) = libgomp_host_table_ctor; + varpool_node::finalize_decl (hsa_libgomp_host_table); + + /* Generate an initializer with a call to the registration routine. */ + + tree offload_register + = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_REGISTER); + gcc_checking_assert (offload_register); + + append_to_statement_list + (build_call_expr (offload_register, 4, + build_int_cstu (unsigned_type_node, + GOMP_VERSION_PACK (GOMP_VERSION, + GOMP_VERSION_HSA)), + build_fold_addr_expr (hsa_libgomp_host_table), + build_int_cst (integer_type_node, GOMP_DEVICE_HSA), + build_fold_addr_expr (hsa_img_descriptor)), + &hsa_cdtor_statements[0]); + + cgraph_build_static_cdtor ('I', hsa_cdtor_statements[0], + DEFAULT_INIT_PRIORITY); + + tree offload_unregister + = builtin_decl_explicit (BUILT_IN_GOMP_OFFLOAD_UNREGISTER); + gcc_checking_assert (offload_unregister); + + append_to_statement_list + (build_call_expr (offload_unregister, 4, + build_int_cstu (unsigned_type_node, + GOMP_VERSION_PACK (GOMP_VERSION, + GOMP_VERSION_HSA)), + build_fold_addr_expr (hsa_libgomp_host_table), + build_int_cst (integer_type_node, GOMP_DEVICE_HSA), + build_fold_addr_expr (hsa_img_descriptor)), + &hsa_cdtor_statements[1]); + cgraph_build_static_cdtor ('D', hsa_cdtor_statements[1], + DEFAULT_INIT_PRIORITY); +} + +/* Emit the brig module we have compiled to a section in the final assembly and + also create a compile unit static constructor that will register the brig + module with libgomp. */ + +void +hsa_output_brig (void) +{ + section *saved_section; + + if (!brig_initialized) + return; + + for (unsigned i = 0; i < function_call_linkage.length (); i++) + { + function_linkage_pair p = function_call_linkage[i]; + + BrigCodeOffset32_t *func_offset = function_offsets->get (p.function_decl); + gcc_assert (*func_offset); + BrigOperandCodeRef *code_ref + = (BrigOperandCodeRef *) (brig_operand.get_ptr_by_offset (p.offset)); + gcc_assert (code_ref->base.kind == BRIG_KIND_OPERAND_CODE_REF); + code_ref->ref = lendian32 (*func_offset); + } + + /* Iterate all function declarations and if we meet a function that should + have module linkage and we are unable to emit HSAIL for the function, + then change the linkage to program linkage. Doing so, we will emit + a valid BRIG image. */ + if (hsa_failed_functions != NULL && emitted_declarations != NULL) + for (hash_map <tree, BrigDirectiveExecutable *>::iterator it + = emitted_declarations->begin (); + it != emitted_declarations->end (); + ++it) + { + if (hsa_failed_functions->contains ((*it).first)) + (*it).second->linkage = BRIG_LINKAGE_PROGRAM; + } + + saved_section = in_section; + + switch_to_section (get_section (BRIG_ELF_SECTION_NAME, SECTION_NOTYPE, NULL)); + char tmp_name[64]; + ASM_GENERATE_INTERNAL_LABEL (tmp_name, BRIG_LABEL_STRING, 1); + ASM_OUTPUT_LABEL (asm_out_file, tmp_name); + tree brig_id = get_identifier (tmp_name); + tree brig_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, brig_id, + char_type_node); + SET_DECL_ASSEMBLER_NAME (brig_decl, brig_id); + TREE_ADDRESSABLE (brig_decl) = 1; + TREE_READONLY (brig_decl) = 1; + DECL_ARTIFICIAL (brig_decl) = 1; + DECL_IGNORED_P (brig_decl) = 1; + TREE_STATIC (brig_decl) = 1; + TREE_PUBLIC (brig_decl) = 0; + TREE_USED (brig_decl) = 1; + DECL_INITIAL (brig_decl) = brig_decl; + TREE_ASM_WRITTEN (brig_decl) = 1; + + BrigModuleHeader module_header; + memcpy (&module_header.identification, "HSA BRIG", + sizeof (module_header.identification)); + module_header.brigMajor = lendian32 (BRIG_VERSION_BRIG_MAJOR); + module_header.brigMinor = lendian32 (BRIG_VERSION_BRIG_MINOR); + uint64_t section_index[3]; + + int data_padding, code_padding, operand_padding; + data_padding = HSA_SECTION_ALIGNMENT + - brig_data.total_size % HSA_SECTION_ALIGNMENT; + code_padding = HSA_SECTION_ALIGNMENT + - brig_code.total_size % HSA_SECTION_ALIGNMENT; + operand_padding = HSA_SECTION_ALIGNMENT + - brig_operand.total_size % HSA_SECTION_ALIGNMENT; + + uint64_t module_size = sizeof (module_header) + + sizeof (section_index) + + brig_data.total_size + + data_padding + + brig_code.total_size + + code_padding + + brig_operand.total_size + + operand_padding; + gcc_assert ((module_size % 16) == 0); + module_header.byteCount = lendian64 (module_size); + memset (&module_header.hash, 0, sizeof (module_header.hash)); + module_header.reserved = 0; + module_header.sectionCount = lendian32 (3); + module_header.sectionIndex = lendian64 (sizeof (module_header)); + assemble_string ((const char *) &module_header, sizeof (module_header)); + uint64_t off = sizeof (module_header) + sizeof (section_index); + section_index[0] = lendian64 (off); + off += brig_data.total_size + data_padding; + section_index[1] = lendian64 (off); + off += brig_code.total_size + code_padding; + section_index[2] = lendian64 (off); + assemble_string ((const char *) §ion_index, sizeof (section_index)); + + char padding[HSA_SECTION_ALIGNMENT]; + memset (padding, 0, sizeof (padding)); + + brig_data.output (); + assemble_string (padding, data_padding); + brig_code.output (); + assemble_string (padding, code_padding); + brig_operand.output (); + assemble_string (padding, operand_padding); + + if (saved_section) + switch_to_section (saved_section); + + hsa_output_libgomp_mapping (brig_decl); + + hsa_free_decl_kernel_mapping (); + brig_release_data (); + hsa_deinit_compilation_unit_data (); + + delete emitted_declarations; + emitted_declarations = NULL; + delete function_offsets; + function_offsets = NULL; +} |