diff options
author | Chris Metcalf <cmetcalf@tilera.com> | 2010-08-13 08:52:19 -0400 |
---|---|---|
committer | Chris Metcalf <cmetcalf@tilera.com> | 2010-08-13 08:52:19 -0400 |
commit | c745a8a11fa1df6078bfc61fc29492ed43f71c2b (patch) | |
tree | 2db1cdf9cd0d0e892f4f92de1fd2700ac319f04a /arch/tile/lib | |
parent | 1fcbe027b5d29ec9cd0eeb753c14fb366ae852ac (diff) | |
download | linux-3.10-c745a8a11fa1df6078bfc61fc29492ed43f71c2b.tar.gz linux-3.10-c745a8a11fa1df6078bfc61fc29492ed43f71c2b.tar.bz2 linux-3.10-c745a8a11fa1df6078bfc61fc29492ed43f71c2b.zip |
arch/tile: Various cleanups.
This change rolls up random cleanups not representing any actual bugs.
- Remove a stale CONFIG_ value from the default tile_defconfig
- Remove unused tns_atomic_xxx() family of methods from <asm/atomic.h>
- Optimize get_order() using Tile's "clz" instruction
- Fix a bad hypervisor upcall name (not currently used in Linux anyway)
- Use __copy_in_user_inatomic() name for consistency, and export it
- Export some additional hypervisor driver I/O upcalls and some homecache calls
- Remove the obfuscating MEMCPY_TEST_WH64 support code
- Other stray comment cleanups, #if 0 removal, etc.
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Diffstat (limited to 'arch/tile/lib')
-rw-r--r-- | arch/tile/lib/Makefile | 4 | ||||
-rw-r--r-- | arch/tile/lib/exports.c | 16 | ||||
-rw-r--r-- | arch/tile/lib/memcpy_32.S | 20 | ||||
-rw-r--r-- | arch/tile/lib/memset_32.c | 25 |
4 files changed, 18 insertions, 47 deletions
diff --git a/arch/tile/lib/Makefile b/arch/tile/lib/Makefile index 438af38bc9e..746dc81ed3c 100644 --- a/arch/tile/lib/Makefile +++ b/arch/tile/lib/Makefile @@ -7,7 +7,9 @@ lib-y = cacheflush.o checksum.o cpumask.o delay.o \ memcpy_$(BITS).o memchr_$(BITS).o memmove_$(BITS).o memset_$(BITS).o \ strchr_$(BITS).o strlen_$(BITS).o -ifneq ($(CONFIG_TILEGX),y) +ifeq ($(CONFIG_TILEGX),y) +lib-y += memcpy_user_64.o +else lib-y += atomic_32.o atomic_asm_32.o memcpy_tile64.o endif diff --git a/arch/tile/lib/exports.c b/arch/tile/lib/exports.c index 6bc7b52b4aa..ce5dbf56578 100644 --- a/arch/tile/lib/exports.c +++ b/arch/tile/lib/exports.c @@ -36,21 +36,29 @@ EXPORT_SYMBOL(clear_user_asm); EXPORT_SYMBOL(current_text_addr); EXPORT_SYMBOL(dump_stack); -/* arch/tile/lib/__memcpy.S */ -/* NOTE: on TILE64, these symbols appear in arch/tile/lib/memcpy_tile64.c */ +/* arch/tile/lib/, various memcpy files */ EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(__copy_to_user_inatomic); EXPORT_SYMBOL(__copy_from_user_inatomic); EXPORT_SYMBOL(__copy_from_user_zeroing); +#ifdef __tilegx__ +EXPORT_SYMBOL(__copy_in_user_inatomic); +#endif /* hypervisor glue */ #include <hv/hypervisor.h> EXPORT_SYMBOL(hv_dev_open); EXPORT_SYMBOL(hv_dev_pread); EXPORT_SYMBOL(hv_dev_pwrite); +EXPORT_SYMBOL(hv_dev_preada); +EXPORT_SYMBOL(hv_dev_pwritea); +EXPORT_SYMBOL(hv_dev_poll); +EXPORT_SYMBOL(hv_dev_poll_cancel); EXPORT_SYMBOL(hv_dev_close); +EXPORT_SYMBOL(hv_sysconf); +EXPORT_SYMBOL(hv_confstr); -/* -ltile-cc */ +/* libgcc.a */ uint32_t __udivsi3(uint32_t dividend, uint32_t divisor); EXPORT_SYMBOL(__udivsi3); int32_t __divsi3(int32_t dividend, int32_t divisor); @@ -70,8 +78,6 @@ EXPORT_SYMBOL(__moddi3); #ifndef __tilegx__ uint64_t __ll_mul(uint64_t n0, uint64_t n1); EXPORT_SYMBOL(__ll_mul); -#endif -#ifndef __tilegx__ int64_t __muldi3(int64_t, int64_t); EXPORT_SYMBOL(__muldi3); uint64_t __lshrdi3(uint64_t, unsigned int); diff --git a/arch/tile/lib/memcpy_32.S b/arch/tile/lib/memcpy_32.S index f92984bf60e..30c3b7ebb55 100644 --- a/arch/tile/lib/memcpy_32.S +++ b/arch/tile/lib/memcpy_32.S @@ -17,10 +17,6 @@ #include <arch/chip.h> -#if CHIP_HAS_WH64() || defined(MEMCPY_TEST_WH64) -#define MEMCPY_USE_WH64 -#endif - #include <linux/linkage.h> @@ -160,7 +156,7 @@ EX: { sw r0, r3; addi r0, r0, 4; addi r2, r2, -4 } { addi r3, r1, 60; andi r9, r9, -64 } -#ifdef MEMCPY_USE_WH64 +#if CHIP_HAS_WH64() /* No need to prefetch dst, we'll just do the wh64 * right before we copy a line. */ @@ -173,7 +169,7 @@ EX: { lw r6, r3; addi r3, r3, 64 } /* Intentionally stall for a few cycles to leave L2 cache alone. */ { bnzt zero, . } EX: { lw r7, r3; addi r3, r3, 64 } -#ifndef MEMCPY_USE_WH64 +#if !CHIP_HAS_WH64() /* Prefetch the dest */ /* Intentionally stall for a few cycles to leave L2 cache alone. */ { bnzt zero, . } @@ -288,15 +284,7 @@ EX: { lw r7, r3; addi r3, r3, 64 } /* Fill second L1D line. */ EX: { lw r17, r17; addi r1, r1, 48; mvz r3, r13, r1 } /* r17 = WORD_4 */ -#ifdef MEMCPY_TEST_WH64 - /* Issue a fake wh64 that clobbers the destination words - * with random garbage, for testing. - */ - { movei r19, 64; crc32_32 r10, r2, r9 } -.Lwh64_test_loop: -EX: { sw r9, r10; addi r9, r9, 4; addi r19, r19, -4 } - { bnzt r19, .Lwh64_test_loop; crc32_32 r10, r10, r19 } -#elif CHIP_HAS_WH64() +#if CHIP_HAS_WH64() /* Prepare destination line for writing. */ EX: { wh64 r9; addi r9, r9, 64 } #else @@ -340,7 +328,7 @@ EX: { lw r18, r1; addi r1, r1, 4 } /* r18 = WORD_8 */ EX: { sw r0, r16; addi r0, r0, 4; add r16, r0, r2 } /* store(WORD_0) */ EX: { sw r0, r13; addi r0, r0, 4; andi r16, r16, -64 } /* store(WORD_1) */ EX: { sw r0, r14; addi r0, r0, 4; slt_u r16, r9, r16 } /* store(WORD_2) */ -#ifdef MEMCPY_USE_WH64 +#if CHIP_HAS_WH64() EX: { sw r0, r15; addi r0, r0, 4; addi r13, sp, -64 } /* store(WORD_3) */ #else /* Back up the r9 to a cache line we are already storing to diff --git a/arch/tile/lib/memset_32.c b/arch/tile/lib/memset_32.c index bfde5d864df..d014c1fbcbc 100644 --- a/arch/tile/lib/memset_32.c +++ b/arch/tile/lib/memset_32.c @@ -141,7 +141,6 @@ void *memset(void *s, int c, size_t n) */ __insn_prefetch(&out32[ahead32]); -#if 1 #if CACHE_LINE_SIZE_IN_WORDS % 4 != 0 #error "Unhandled CACHE_LINE_SIZE_IN_WORDS" #endif @@ -157,30 +156,6 @@ void *memset(void *s, int c, size_t n) *out32++ = v32; *out32++ = v32; } -#else - /* Unfortunately, due to a code generator flaw this - * allocates a separate register for each of these - * stores, which requires a large number of spills, - * which makes this procedure enormously bigger - * (something like 70%) - */ - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - *out32++ = v32; - n32 -= 16; -#endif /* To save compiled code size, reuse this loop even * when we run out of prefetching to do by dropping |