diff options
Diffstat (limited to 'dyngen.c')
-rw-r--r-- | dyngen.c | 271 |
1 files changed, 212 insertions, 59 deletions
@@ -1255,90 +1255,149 @@ int arm_emit_ldr_info(const char *name, unsigned long start_offset, { uint8_t *p; uint32_t insn; - int offset, min_offset, pc_offset, data_size; + int offset, min_offset, pc_offset, data_size, spare, max_pool; uint8_t data_allocated[1024]; unsigned int data_index; + int type; memset(data_allocated, 0, sizeof(data_allocated)); p = p_start; min_offset = p_end - p_start; + spare = 0x7fffffff; while (p < p_start + min_offset) { insn = get32((uint32_t *)p); + /* TODO: Armv5e ldrd. */ + /* TODO: VFP load. */ if ((insn & 0x0d5f0000) == 0x051f0000) { /* ldr reg, [pc, #im] */ offset = insn & 0xfff; if (!(insn & 0x00800000)) - offset = -offset; + offset = -offset; + max_pool = 4096; + type = 0; + } else if ((insn & 0x0e5f0f00) == 0x0c1f0100) { + /* FPA ldf. */ + offset = (insn & 0xff) << 2; + if (!(insn & 0x00800000)) + offset = -offset; + max_pool = 1024; + type = 1; + } else if ((insn & 0x0fff0000) == 0x028f0000) { + /* Some gcc load a doubleword immediate with + add regN, pc, #imm + ldmia regN, {regN, regM} + Hope and pray the compiler never generates somethin like + add reg, pc, #imm1; ldr reg, [reg, #-imm2]; */ + int r; + + r = (insn & 0xf00) >> 7; + offset = ((insn & 0xff) >> r) | ((insn & 0xff) << (32 - r)); + max_pool = 1024; + type = 2; + } else { + max_pool = 0; + type = -1; + } + if (type >= 0) { + /* PC-relative load needs fixing up. */ + if (spare > max_pool - offset) + spare = max_pool - offset; if ((offset & 3) !=0) - error("%s:%04x: ldr pc offset must be 32 bit aligned", + error("%s:%04x: pc offset must be 32 bit aligned", + name, start_offset + p - p_start); + if (offset < 0) + error("%s:%04x: Embedded literal value", name, start_offset + p - p_start); pc_offset = p - p_start + offset + 8; if (pc_offset <= (p - p_start) || pc_offset >= (p_end - p_start)) - error("%s:%04x: ldr pc offset must point inside the function code", + error("%s:%04x: pc offset must point inside the function code", name, start_offset + p - p_start); if (pc_offset < min_offset) min_offset = pc_offset; if (outfile) { - /* ldr position */ + /* The intruction position */ fprintf(outfile, " arm_ldr_ptr->ptr = gen_code_ptr + %d;\n", p - p_start); - /* ldr data index */ - data_index = ((p_end - p_start) - pc_offset - 4) >> 2; - fprintf(outfile, " arm_ldr_ptr->data_ptr = arm_data_ptr + %d;\n", + /* The position of the constant pool data. */ + data_index = ((p_end - p_start) - pc_offset) >> 2; + fprintf(outfile, " arm_ldr_ptr->data_ptr = arm_data_ptr - %d;\n", data_index); + fprintf(outfile, " arm_ldr_ptr->type = %d;\n", type); fprintf(outfile, " arm_ldr_ptr++;\n"); - if (data_index >= sizeof(data_allocated)) - error("%s: too many data", name); - if (!data_allocated[data_index]) { - ELF_RELOC *rel; - int i, addend, type; - const char *sym_name, *p; - char relname[1024]; - - data_allocated[data_index] = 1; - - /* data value */ - addend = get32((uint32_t *)(p_start + pc_offset)); - relname[0] = '\0'; - for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) { - if (rel->r_offset == (pc_offset + start_offset)) { - sym_name = get_rel_sym_name(rel); - /* the compiler leave some unnecessary references to the code */ - get_reloc_expr(relname, sizeof(relname), sym_name); - type = ELF32_R_TYPE(rel->r_info); - if (type != R_ARM_ABS32) - error("%s: unsupported data relocation", name); - break; - } - } - fprintf(outfile, " arm_data_ptr[%d] = 0x%x", - data_index, addend); - if (relname[0] != '\0') - fprintf(outfile, " + %s", relname); - fprintf(outfile, ";\n"); - } } } p += 4; } + + /* Copy and relocate the constant pool data. */ data_size = (p_end - p_start) - min_offset; if (data_size > 0 && outfile) { - fprintf(outfile, " arm_data_ptr += %d;\n", data_size >> 2); + spare += min_offset; + fprintf(outfile, " arm_data_ptr -= %d;\n", data_size >> 2); + fprintf(outfile, " arm_pool_ptr -= %d;\n", data_size); + fprintf(outfile, " if (arm_pool_ptr > gen_code_ptr + %d)\n" + " arm_pool_ptr = gen_code_ptr + %d;\n", + spare, spare); + + data_index = 0; + for (pc_offset = min_offset; + pc_offset < p_end - p_start; + pc_offset += 4) { + + ELF_RELOC *rel; + int i, addend, type; + const char *sym_name; + char relname[1024]; + + /* data value */ + addend = get32((uint32_t *)(p_start + pc_offset)); + relname[0] = '\0'; + for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) { + if (rel->r_offset == (pc_offset + start_offset)) { + sym_name = get_rel_sym_name(rel); + /* the compiler leave some unnecessary references to the code */ + get_reloc_expr(relname, sizeof(relname), sym_name); + type = ELF32_R_TYPE(rel->r_info); + if (type != R_ARM_ABS32) + error("%s: unsupported data relocation", name); + break; + } + } + fprintf(outfile, " arm_data_ptr[%d] = 0x%x", + data_index, addend); + if (relname[0] != '\0') + fprintf(outfile, " + %s", relname); + fprintf(outfile, ";\n"); + + data_index++; + } } - /* the last instruction must be a mov pc, lr */ if (p == p_start) goto arm_ret_error; p -= 4; insn = get32((uint32_t *)p); - if ((insn & 0xffff0000) != 0xe91b0000) { + /* The last instruction must be an ldm instruction. There are several + forms generated by gcc: + ldmib sp, {..., pc} (implies a sp adjustment of +4) + ldmia sp, {..., pc} + ldmea fp, {..., pc} */ + if ((insn & 0xffff8000) == 0xe99d8000) { + if (outfile) { + fprintf(outfile, + " *(uint32_t *)(gen_code_ptr + %d) = 0xe28dd004;\n", + p - p_start); + } + p += 4; + } else if ((insn & 0xffff8000) != 0xe89d8000 + && (insn & 0xffff8000) != 0xe91b8000) { arm_ret_error: if (!outfile) printf("%s: invalid epilog\n", name); } - return p - p_start; + return p - p_start; } #endif @@ -1537,6 +1596,8 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, } #elif defined(HOST_ARM) { + uint32_t insn; + if ((p_end - p_start) <= 16) error("%s: function too small", name); if (get32((uint32_t *)p_start) != 0xe1a0c00d || @@ -1545,6 +1606,12 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, error("%s: invalid prolog", name); p_start += 12; start_offset += 12; + insn = get32((uint32_t *)p_start); + if ((insn & 0xffffff00) == 0xe24dd000) { + /* Stack adjustment. Assume op uses the frame pointer. */ + p_start -= 4; + start_offset -= 4; + } copy_size = arm_emit_ldr_info(name, start_offset, NULL, p_start, p_end, relocs, nb_relocs); } @@ -2282,7 +2349,37 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, int type; int addend; int reloc_offset; - + uint32_t insn; + + insn = get32((uint32_t *)(p_start + 4)); + /* If prologue ends in sub sp, sp, #const then assume + op has a stack frame and needs the frame pointer. */ + if ((insn & 0xffffff00) == 0xe24dd000) { + int i; + uint32_t opcode; + opcode = 0xe28db000; /* add fp, sp, #0. */ +#if 0 +/* ??? Need to undo the extra stack adjustment at the end of the op. + For now just leave the stack misaligned and hope it doesn't break anything + too important. */ + if ((insn & 4) != 0) { + /* Preserve doubleword stack alignment. */ + fprintf(outfile, + " *(uint32_t *)(gen_code_ptr + 4)= 0x%x;\n", + insn + 4); + opcode -= 4; + } +#endif + insn = get32((uint32_t *)(p_start - 4)); + /* Calculate the size of the saved registers, + excluding pc. */ + for (i = 0; i < 15; i++) { + if (insn & (1 << i)) + opcode += 4; + } + fprintf(outfile, + " *(uint32_t *)gen_code_ptr = 0x%x;\n", opcode); + } arm_emit_ldr_info(name, start_offset, outfile, p_start, p_end, relocs, nb_relocs); @@ -2303,6 +2400,8 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, reloc_offset, name, addend); break; case R_ARM_PC24: + case R_ARM_JUMP24: + case R_ARM_CALL: fprintf(outfile, " arm_reloc_pc24((uint32_t *)(gen_code_ptr + %d), 0x%x, %s);\n", reloc_offset, addend, name); break; @@ -2407,6 +2506,28 @@ int gen_file(FILE *outfile, int out_type) } else { /* generate big code generation switch */ + +#ifdef HOST_ARM + /* We need to know the size of all the ops so we can figure out when + to emit constant pools. This must be consistent with opc.h. */ +fprintf(outfile, +"static const uint32_t arm_opc_size[] = {\n" +" 0,\n" /* end */ +" 0,\n" /* nop */ +" 0,\n" /* nop1 */ +" 0,\n" /* nop2 */ +" 0,\n"); /* nop3 */ + for(i = 0, sym = symtab; i < nb_syms; i++, sym++) { + const char *name; + name = get_sym_name(sym); + if (strstart(name, OP_PREFIX, NULL)) { + fprintf(outfile, " %d,\n", sym->st_size); + } + } +fprintf(outfile, +"};\n"); +#endif + fprintf(outfile, "int dyngen_code(uint8_t *gen_code_buf,\n" " uint16_t *label_offsets, uint16_t *jmp_offsets,\n" @@ -2417,10 +2538,36 @@ fprintf(outfile, " const uint32_t *opparam_ptr;\n"); #ifdef HOST_ARM +/* Arm is tricky because it uses constant pools for loading immediate values. + We assume (and require) each function is code followed by a constant pool. + All the ops are small so this should be ok. For each op we figure + out how much "spare" range we have in the load instructions. This allows + us to insert subsequent ops in between the op and the constant pool, + eliminating the neeed to jump around the pool. + + We currently generate: + + [ For this example we assume merging would move op1_pool out of range. + In practice we should be able to combine many ops before the offset + limits are reached. ] + op1_code; + op2_code; + goto op3; + op2_pool; + op1_pool; +op3: + op3_code; + ret; + op3_pool; + + Ideally we'd put op1_pool before op2_pool, but that requires two passes. + */ fprintf(outfile, " uint8_t *last_gen_code_ptr = gen_code_buf;\n" " LDREntry *arm_ldr_ptr = arm_ldr_table;\n" -" uint32_t *arm_data_ptr = arm_data_table;\n"); +" uint32_t *arm_data_ptr = arm_data_table + ARM_LDR_TABLE_SIZE;\n" +/* Initialise the parmissible pool offset to an arbitary large value. */ +" uint8_t *arm_pool_ptr = gen_code_buf + 0x1000000;\n"); #endif #ifdef HOST_IA64 { @@ -2489,9 +2636,23 @@ fprintf(outfile, /* Generate prologue, if needed. */ fprintf(outfile, -" for(;;) {\n" -" switch(*opc_ptr++) {\n" -); +" for(;;) {\n"); + +#ifdef HOST_ARM +/* Generate constant pool if needed */ +fprintf(outfile, +" if (gen_code_ptr + arm_opc_size[*opc_ptr] >= arm_pool_ptr) {\n" +" gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, " +"arm_ldr_ptr, arm_data_ptr, arm_data_table + ARM_LDR_TABLE_SIZE, 1);\n" +" last_gen_code_ptr = gen_code_ptr;\n" +" arm_ldr_ptr = arm_ldr_table;\n" +" arm_data_ptr = arm_data_table + ARM_LDR_TABLE_SIZE;\n" +" arm_pool_ptr = gen_code_ptr + 0x1000000;\n" +" }\n"); +#endif + +fprintf(outfile, +" switch(*opc_ptr++) {\n"); for(i = 0, sym = symtab; i < nb_syms; i++, sym++) { const char *name; @@ -2525,17 +2686,6 @@ fprintf(outfile, " goto the_end;\n" " }\n"); -#ifdef HOST_ARM -/* generate constant table if needed */ -fprintf(outfile, -" if ((gen_code_ptr - last_gen_code_ptr) >= (MAX_FRAG_SIZE - MAX_OP_SIZE)) {\n" -" gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, arm_ldr_ptr, arm_data_table, arm_data_ptr, 1);\n" -" last_gen_code_ptr = gen_code_ptr;\n" -" arm_ldr_ptr = arm_ldr_table;\n" -" arm_data_ptr = arm_data_table;\n" -" }\n"); -#endif - fprintf(outfile, " }\n" @@ -2553,7 +2703,10 @@ fprintf(outfile, /* generate some code patching */ #ifdef HOST_ARM -fprintf(outfile, "gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, arm_ldr_ptr, arm_data_table, arm_data_ptr, 0);\n"); +fprintf(outfile, +"if (arm_data_ptr != arm_data_table + ARM_LDR_TABLE_SIZE)\n" +" gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, " +"arm_ldr_ptr, arm_data_ptr, arm_data_table + ARM_LDR_TABLE_SIZE, 0);\n"); #endif /* flush instruction cache */ fprintf(outfile, "flush_icache_range((unsigned long)gen_code_buf, (unsigned long)gen_code_ptr);\n"); |