diff options
author | Min Chen <chenm003@gmail.com> | 2016-06-24 20:56:28 -0500 |
---|---|---|
committer | Johann Koenig <johannkoenig@google.com> | 2016-06-27 17:50:45 +0000 |
commit | b2fb48cfcff0bfe1fc8ac699786557e34bc1019d (patch) | |
tree | dfe21b6772dd27c15ff81381fb4a7db59e73a72a /vpx_dsp | |
parent | b9ec759bc23fdbafaf8266badb72a65d201ad315 (diff) | |
download | libvpx-b2fb48cfcff0bfe1fc8ac699786557e34bc1019d.tar.gz libvpx-b2fb48cfcff0bfe1fc8ac699786557e34bc1019d.tar.bz2 libvpx-b2fb48cfcff0bfe1fc8ac699786557e34bc1019d.zip |
improve vpx_filter_block1d* based on replace paddsw+psrlw to pmulhrsw
Change-Id: I14c0c2e54d0b0584df88e9a3f0a256ec096bea6e
Diffstat (limited to 'vpx_dsp')
-rw-r--r-- | vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm | 20 |
1 files changed, 8 insertions, 12 deletions
diff --git a/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm b/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm index 3c8cfd225..538b2129d 100644 --- a/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm +++ b/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm @@ -14,14 +14,14 @@ mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 + mov ecx, 0x01000100 movdqa xmm3, [rdx] ;load filters psrldq xmm3, 6 packsswb xmm3, xmm3 pshuflw xmm3, xmm3, 0b ;k3_k4 - movq xmm2, rcx ;rounding + movd xmm2, ecx ;rounding_shift pshufd xmm2, xmm2, 0 movsxd rax, DWORD PTR arg(1) ;pixels_per_line @@ -33,8 +33,7 @@ punpcklbw xmm0, xmm1 pmaddubsw xmm0, xmm3 - paddsw xmm0, xmm2 ;rounding - psraw xmm0, 7 ;shift + pmulhrsw xmm0, xmm2 ;rounding(+64)+shift(>>7) packuswb xmm0, xmm0 ;pack to byte %if %1 @@ -51,7 +50,7 @@ mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr - mov rcx, 0x0400040 + mov ecx, 0x01000100 movdqa xmm7, [rdx] ;load filters psrldq xmm7, 6 @@ -59,7 +58,7 @@ pshuflw xmm7, xmm7, 0b ;k3_k4 punpcklwd xmm7, xmm7 - movq xmm6, rcx ;rounding + movd xmm6, ecx ;rounding_shift pshufd xmm6, xmm6, 0 movsxd rax, DWORD PTR arg(1) ;pixels_per_line @@ -71,8 +70,7 @@ punpcklbw xmm0, xmm1 pmaddubsw xmm0, xmm7 - paddsw xmm0, xmm6 ;rounding - psraw xmm0, 7 ;shift + pmulhrsw xmm0, xmm6 ;rounding(+64)+shift(>>7) packuswb xmm0, xmm0 ;pack back to byte %if %1 @@ -92,10 +90,8 @@ pmaddubsw xmm0, xmm7 pmaddubsw xmm2, xmm7 - paddsw xmm0, xmm6 ;rounding - paddsw xmm2, xmm6 - psraw xmm0, 7 ;shift - psraw xmm2, 7 + pmulhrsw xmm0, xmm6 ;rounding(+64)+shift(>>7) + pmulhrsw xmm2, xmm6 packuswb xmm0, xmm2 ;pack back to byte %if %1 |