From c6b9039fd94aede59ac1086a379955137fc8e1b8 Mon Sep 17 00:00:00 2001 From: John Koleszar Date: Fri, 13 Jul 2012 15:21:29 -0700 Subject: Restyle code Approximate the Google style guide[1] so that that there's a written document to follow and tools to check compliance[2]. [1]: http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml [2]: http://google-styleguide.googlecode.com/svn/trunk/cpplint/cpplint.py Change-Id: Idf40e3d8dddcc72150f6af127b13e5dab838685f --- vpx_scale/arm/neon/yv12extend_arm.c | 9 +- vpx_scale/arm/scalesystemdependent.c | 73 +- vpx_scale/generic/bicubic_scaler.c | 769 +++++----- vpx_scale/generic/gen_scalers.c | 865 +++++------ vpx_scale/generic/scalesystemdependent.c | 9 +- vpx_scale/generic/vpxscale.c | 1305 ++++++++-------- vpx_scale/generic/yv12config.c | 134 +- vpx_scale/generic/yv12extend.c | 430 +++--- vpx_scale/include/generic/vpxscale_arbitrary.h | 53 +- vpx_scale/scale_mode.h | 11 +- vpx_scale/vpxscale.h | 36 +- vpx_scale/win32/scaleopt.c | 1922 ++++++++++++------------ vpx_scale/win32/scalesystemdependent.c | 70 +- vpx_scale/yv12config.h | 75 +- vpx_scale/yv12extend.h | 8 +- 15 files changed, 2789 insertions(+), 2980 deletions(-) (limited to 'vpx_scale') diff --git a/vpx_scale/arm/neon/yv12extend_arm.c b/vpx_scale/arm/neon/yv12extend_arm.c index d7a8289a9..94f499cf7 100644 --- a/vpx_scale/arm/neon/yv12extend_arm.c +++ b/vpx_scale/arm/neon/yv12extend_arm.c @@ -16,10 +16,9 @@ void vp8_yv12_copy_frame_func_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); void -vp8_yv12_copy_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) -{ - vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc); - //printf("Border:%d; plane_stride:%d; plane_height:%d; plane_width:%d\n",dst_ybc->border,dst_ybc->y_stride,dst_ybc->y_height,dst_ybc->y_width); +vp8_yv12_copy_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) { + vp8_yv12_copy_frame_func_neon(src_ybc, dst_ybc); + // printf("Border:%d; plane_stride:%d; plane_height:%d; plane_width:%d\n",dst_ybc->border,dst_ybc->y_stride,dst_ybc->y_height,dst_ybc->y_width); - vp8_yv12_extend_frame_borders_ptr(dst_ybc); + vp8_yv12_extend_frame_borders_ptr(dst_ybc); } diff --git a/vpx_scale/arm/scalesystemdependent.c b/vpx_scale/arm/scalesystemdependent.c index fee76fff7..5c19d61d9 100644 --- a/vpx_scale/arm/scalesystemdependent.c +++ b/vpx_scale/arm/scalesystemdependent.c @@ -46,51 +46,50 @@ extern void vp8_yv12_copy_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CO * SPECIAL NOTES : None. * ****************************************************************************/ -void vp8_scale_machine_specific_config() -{ +void vp8_scale_machine_specific_config() { #if HAVE_ARMV7 && CONFIG_RUNTIME_CPU_DETECT - int flags; + int flags; #endif - /* - vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_armv4; - vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_armv4; - vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c; - vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_armv4; - vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_armv4; - vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c; - vp8_horizontal_line_3_4_scale = horizontal_line_3_4_scale_armv4; - vp8_vertical_band_3_4_scale = vertical_band_3_4_scale_armv4; - vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; - vp8_horizontal_line_2_3_scale = horizontal_line_2_3_scale_armv4; - vp8_vertical_band_2_3_scale = vertical_band_2_3_scale_armv4; - vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; - vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_armv4; - vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_armv4; - vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c; + /* + vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_armv4; + vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_armv4; + vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c; + vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_armv4; + vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_armv4; + vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c; + vp8_horizontal_line_3_4_scale = horizontal_line_3_4_scale_armv4; + vp8_vertical_band_3_4_scale = vertical_band_3_4_scale_armv4; + vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; + vp8_horizontal_line_2_3_scale = horizontal_line_2_3_scale_armv4; + vp8_vertical_band_2_3_scale = vertical_band_2_3_scale_armv4; + vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; + vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_armv4; + vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_armv4; + vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c; - vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c; - vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c; - vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c; - vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c; - vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c; - vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c; - vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c; - */ + vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c; + vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c; + vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c; + vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c; + vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c; + vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c; + vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c; + */ #if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT - vp8_yv12_extend_frame_borders_ptr = vp8_yv12_extend_frame_borders; - vp8_yv12_copy_frame_yonly_ptr = vp8_yv12_copy_frame_yonly; - vp8_yv12_copy_frame_ptr = vp8_yv12_copy_frame; + vp8_yv12_extend_frame_borders_ptr = vp8_yv12_extend_frame_borders; + vp8_yv12_copy_frame_yonly_ptr = vp8_yv12_copy_frame_yonly; + vp8_yv12_copy_frame_ptr = vp8_yv12_copy_frame; #endif #if HAVE_ARMV7 #if CONFIG_RUNTIME_CPU_DETECT - flags = arm_cpu_caps(); - if (flags & HAS_NEON) + flags = arm_cpu_caps(); + if (flags & HAS_NEON) #endif - { - vp8_yv12_extend_frame_borders_ptr = vp8_yv12_extend_frame_borders_neon; - vp8_yv12_copy_frame_yonly_ptr = vp8_yv12_copy_frame_yonly_neon; - vp8_yv12_copy_frame_ptr = vp8_yv12_copy_frame_neon; - } + { + vp8_yv12_extend_frame_borders_ptr = vp8_yv12_extend_frame_borders_neon; + vp8_yv12_copy_frame_yonly_ptr = vp8_yv12_copy_frame_yonly_neon; + vp8_yv12_copy_frame_ptr = vp8_yv12_copy_frame_neon; + } #endif } diff --git a/vpx_scale/generic/bicubic_scaler.c b/vpx_scale/generic/bicubic_scaler.c index 4468e9d0a..c116740da 100644 --- a/vpx_scale/generic/bicubic_scaler.c +++ b/vpx_scale/generic/bicubic_scaler.c @@ -46,245 +46,229 @@ static float a = -0.6; // 3 2 // C0 = a*t - a*t // -static short c0_fixed(unsigned int t) -{ - // put t in Q16 notation - unsigned short v1, v2; - - // Q16 - v1 = (a_i * t) >> 16; - v1 = (v1 * t) >> 16; - - // Q16 - v2 = (a_i * t) >> 16; - v2 = (v2 * t) >> 16; - v2 = (v2 * t) >> 16; - - // Q12 - return -((v1 - v2) >> 4); +static short c0_fixed(unsigned int t) { + // put t in Q16 notation + unsigned short v1, v2; + + // Q16 + v1 = (a_i * t) >> 16; + v1 = (v1 * t) >> 16; + + // Q16 + v2 = (a_i * t) >> 16; + v2 = (v2 * t) >> 16; + v2 = (v2 * t) >> 16; + + // Q12 + return -((v1 - v2) >> 4); } // 2 3 // C1 = a*t + (3-2*a)*t - (2-a)*t // -static short c1_fixed(unsigned int t) -{ - unsigned short v1, v2, v3; - unsigned short two, three; - - // Q16 - v1 = (a_i * t) >> 16; - - // Q13 - two = 2 << 13; - v2 = two - (a_i >> 3); - v2 = (v2 * t) >> 16; - v2 = (v2 * t) >> 16; - v2 = (v2 * t) >> 16; - - // Q13 - three = 3 << 13; - v3 = three - (2 * (a_i >> 3)); - v3 = (v3 * t) >> 16; - v3 = (v3 * t) >> 16; - - // Q12 - return (((v1 >> 3) - v2 + v3) >> 1); +static short c1_fixed(unsigned int t) { + unsigned short v1, v2, v3; + unsigned short two, three; + + // Q16 + v1 = (a_i * t) >> 16; + + // Q13 + two = 2 << 13; + v2 = two - (a_i >> 3); + v2 = (v2 * t) >> 16; + v2 = (v2 * t) >> 16; + v2 = (v2 * t) >> 16; + + // Q13 + three = 3 << 13; + v3 = three - (2 * (a_i >> 3)); + v3 = (v3 * t) >> 16; + v3 = (v3 * t) >> 16; + + // Q12 + return (((v1 >> 3) - v2 + v3) >> 1); } // 2 3 // C2 = 1 - (3-a)*t + (2-a)*t // -static short c2_fixed(unsigned int t) -{ - unsigned short v1, v2, v3; - unsigned short two, three; - - // Q13 - v1 = 1 << 13; - - // Q13 - three = 3 << 13; - v2 = three - (a_i >> 3); - v2 = (v2 * t) >> 16; - v2 = (v2 * t) >> 16; - - // Q13 - two = 2 << 13; - v3 = two - (a_i >> 3); - v3 = (v3 * t) >> 16; - v3 = (v3 * t) >> 16; - v3 = (v3 * t) >> 16; - - // Q12 - return (v1 - v2 + v3) >> 1; +static short c2_fixed(unsigned int t) { + unsigned short v1, v2, v3; + unsigned short two, three; + + // Q13 + v1 = 1 << 13; + + // Q13 + three = 3 << 13; + v2 = three - (a_i >> 3); + v2 = (v2 * t) >> 16; + v2 = (v2 * t) >> 16; + + // Q13 + two = 2 << 13; + v3 = two - (a_i >> 3); + v3 = (v3 * t) >> 16; + v3 = (v3 * t) >> 16; + v3 = (v3 * t) >> 16; + + // Q12 + return (v1 - v2 + v3) >> 1; } // 2 3 // C3 = a*t - 2*a*t + a*t // -static short c3_fixed(unsigned int t) -{ - int v1, v2, v3; +static short c3_fixed(unsigned int t) { + int v1, v2, v3; - // Q16 - v1 = (a_i * t) >> 16; + // Q16 + v1 = (a_i * t) >> 16; - // Q15 - v2 = 2 * (a_i >> 1); - v2 = (v2 * t) >> 16; - v2 = (v2 * t) >> 16; + // Q15 + v2 = 2 * (a_i >> 1); + v2 = (v2 * t) >> 16; + v2 = (v2 * t) >> 16; - // Q16 - v3 = (a_i * t) >> 16; - v3 = (v3 * t) >> 16; - v3 = (v3 * t) >> 16; + // Q16 + v3 = (a_i * t) >> 16; + v3 = (v3 * t) >> 16; + v3 = (v3 * t) >> 16; - // Q12 - return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3); + // Q12 + return ((v2 - (v1 >> 1) - (v3 >> 1)) >> 3); } #else // 3 2 // C0 = -a*t + a*t // -float C0(float t) -{ - return -a * t * t * t + a * t * t; +float C0(float t) { + return -a * t * t * t + a * t * t; } // 2 3 // C1 = -a*t + (2*a+3)*t - (a+2)*t // -float C1(float t) -{ - return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t; +float C1(float t) { + return -(a + 2.0f) * t * t * t + (2.0f * a + 3.0f) * t * t - a * t; } // 2 3 // C2 = 1 - (a+3)*t + (a+2)*t // -float C2(float t) -{ - return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f; +float C2(float t) { + return (a + 2.0f) * t * t * t - (a + 3.0f) * t * t + 1.0f; } // 2 3 // C3 = a*t - 2*a*t + a*t // -float C3(float t) -{ - return a * t * t * t - 2.0f * a * t * t + a * t; +float C3(float t) { + return a * t * t * t - 2.0f * a * t * t + a * t; } #endif #if 0 -int compare_real_fixed() -{ - int i, errors = 0; - float mult = 1.0 / 10000.0; - unsigned int fixed_mult = mult * 4294967296;//65536; - unsigned int phase_offset_int; - float phase_offset_real; - - for (i = 0; i < 10000; i++) - { - int fixed0, fixed1, fixed2, fixed3, fixed_total; - int real0, real1, real2, real3, real_total; - - phase_offset_real = (float)i * mult; - phase_offset_int = (fixed_mult * i) >> 16; +int compare_real_fixed() { + int i, errors = 0; + float mult = 1.0 / 10000.0; + unsigned int fixed_mult = mult * 4294967296;// 65536; + unsigned int phase_offset_int; + float phase_offset_real; + + for (i = 0; i < 10000; i++) { + int fixed0, fixed1, fixed2, fixed3, fixed_total; + int real0, real1, real2, real3, real_total; + + phase_offset_real = (float)i * mult; + phase_offset_int = (fixed_mult * i) >> 16; // phase_offset_int = phase_offset_real * 65536; - fixed0 = c0_fixed(phase_offset_int); - real0 = C0(phase_offset_real) * 4096.0; + fixed0 = c0_fixed(phase_offset_int); + real0 = C0(phase_offset_real) * 4096.0; - if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1))) - errors++; + if ((abs(fixed0) > (abs(real0) + 1)) || (abs(fixed0) < (abs(real0) - 1))) + errors++; - fixed1 = c1_fixed(phase_offset_int); - real1 = C1(phase_offset_real) * 4096.0; + fixed1 = c1_fixed(phase_offset_int); + real1 = C1(phase_offset_real) * 4096.0; - if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1))) - errors++; + if ((abs(fixed1) > (abs(real1) + 1)) || (abs(fixed1) < (abs(real1) - 1))) + errors++; - fixed2 = c2_fixed(phase_offset_int); - real2 = C2(phase_offset_real) * 4096.0; + fixed2 = c2_fixed(phase_offset_int); + real2 = C2(phase_offset_real) * 4096.0; - if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1))) - errors++; + if ((abs(fixed2) > (abs(real2) + 1)) || (abs(fixed2) < (abs(real2) - 1))) + errors++; - fixed3 = c3_fixed(phase_offset_int); - real3 = C3(phase_offset_real) * 4096.0; + fixed3 = c3_fixed(phase_offset_int); + real3 = C3(phase_offset_real) * 4096.0; - if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1))) - errors++; + if ((abs(fixed3) > (abs(real3) + 1)) || (abs(fixed3) < (abs(real3) - 1))) + errors++; - fixed_total = fixed0 + fixed1 + fixed2 + fixed3; - real_total = real0 + real1 + real2 + real3; + fixed_total = fixed0 + fixed1 + fixed2 + fixed3; + real_total = real0 + real1 + real2 + real3; - if ((fixed_total > 4097) || (fixed_total < 4094)) - errors ++; + if ((fixed_total > 4097) || (fixed_total < 4094)) + errors++; - if ((real_total > 4097) || (real_total < 4095)) - errors ++; - } + if ((real_total > 4097) || (real_total < 4095)) + errors++; + } - return errors; + return errors; } #endif // Find greatest common denominator between two integers. Method used here is // slow compared to Euclid's algorithm, but does not require any division. -int gcd(int a, int b) -{ - // Problem with this algorithm is that if a or b = 0 this function - // will never exit. Don't want to return 0 because any computation - // that was based on a common denoninator and tried to reduce by - // dividing by 0 would fail. Best solution that could be thought of - // would to be fail by returing a 1; - if (a <= 0 || b <= 0) - return 1; - - while (a != b) - { - if (b > a) - b = b - a; - else - { - int tmp = a;//swap large and - a = b; //small - b = tmp; - } +int gcd(int a, int b) { + // Problem with this algorithm is that if a or b = 0 this function + // will never exit. Don't want to return 0 because any computation + // that was based on a common denoninator and tried to reduce by + // dividing by 0 would fail. Best solution that could be thought of + // would to be fail by returing a 1; + if (a <= 0 || b <= 0) + return 1; + + while (a != b) { + if (b > a) + b = b - a; + else { + int tmp = a;// swap large and + a = b; // small + b = tmp; } + } - return b; + return b; } -void bicubic_coefficient_init() -{ - vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT)); - g_first_time = 0; +void bicubic_coefficient_init() { + vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT)); + g_first_time = 0; } -void bicubic_coefficient_destroy() -{ - if (!g_first_time) - { - vpx_free(g_b_scaler.l_w); +void bicubic_coefficient_destroy() { + if (!g_first_time) { + vpx_free(g_b_scaler.l_w); - vpx_free(g_b_scaler.l_h); + vpx_free(g_b_scaler.l_h); - vpx_free(g_b_scaler.l_h_uv); + vpx_free(g_b_scaler.l_h_uv); - vpx_free(g_b_scaler.c_w); + vpx_free(g_b_scaler.c_w); - vpx_free(g_b_scaler.c_h); + vpx_free(g_b_scaler.c_h); - vpx_free(g_b_scaler.c_h_uv); + vpx_free(g_b_scaler.c_h_uv); - vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT)); - } + vpx_memset(&g_b_scaler, 0, sizeof(BICUBIC_SCALER_STRUCT)); + } } // Create the coeffients that will be used for the cubic interpolation. @@ -292,311 +276,294 @@ void bicubic_coefficient_destroy() // regimes the phase offsets will be different. There are 4 coefficents // for each point, two on each side. The layout is that there are the // 4 coefficents for each phase in the array and then the next phase. -int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height) -{ - int i; +int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height) { + int i; #ifdef FIXED_POINT - int phase_offset_int; - unsigned int fixed_mult; - int product_val = 0; + int phase_offset_int; + unsigned int fixed_mult; + int product_val = 0; #else - float phase_offset; + float phase_offset; #endif - int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv; + int gcd_w, gcd_h, gcd_h_uv, d_w, d_h, d_h_uv; - if (g_first_time) - bicubic_coefficient_init(); + if (g_first_time) + bicubic_coefficient_init(); - // check to see if the coefficents have already been set up correctly - if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height) - && (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height)) - return 0; + // check to see if the coefficents have already been set up correctly + if ((in_width == g_b_scaler.in_width) && (in_height == g_b_scaler.in_height) + && (out_width == g_b_scaler.out_width) && (out_height == g_b_scaler.out_height)) + return 0; - g_b_scaler.in_width = in_width; - g_b_scaler.in_height = in_height; - g_b_scaler.out_width = out_width; - g_b_scaler.out_height = out_height; + g_b_scaler.in_width = in_width; + g_b_scaler.in_height = in_height; + g_b_scaler.out_width = out_width; + g_b_scaler.out_height = out_height; - // Don't want to allow crazy scaling, just try and prevent a catastrophic - // failure here. Want to fail after setting the member functions so if - // if the scaler is called the member functions will not scale. - if (out_width <= 0 || out_height <= 0) - return -1; + // Don't want to allow crazy scaling, just try and prevent a catastrophic + // failure here. Want to fail after setting the member functions so if + // if the scaler is called the member functions will not scale. + if (out_width <= 0 || out_height <= 0) + return -1; - // reduce in/out width and height ratios using the gcd - gcd_w = gcd(out_width, in_width); - gcd_h = gcd(out_height, in_height); - gcd_h_uv = gcd(out_height, in_height / 2); + // reduce in/out width and height ratios using the gcd + gcd_w = gcd(out_width, in_width); + gcd_h = gcd(out_height, in_height); + gcd_h_uv = gcd(out_height, in_height / 2); - // the numerator width and height are to be saved in - // globals so they can be used during the scaling process - // without having to be recalculated. - g_b_scaler.nw = out_width / gcd_w; - d_w = in_width / gcd_w; + // the numerator width and height are to be saved in + // globals so they can be used during the scaling process + // without having to be recalculated. + g_b_scaler.nw = out_width / gcd_w; + d_w = in_width / gcd_w; - g_b_scaler.nh = out_height / gcd_h; - d_h = in_height / gcd_h; + g_b_scaler.nh = out_height / gcd_h; + d_h = in_height / gcd_h; - g_b_scaler.nh_uv = out_height / gcd_h_uv; - d_h_uv = (in_height / 2) / gcd_h_uv; + g_b_scaler.nh_uv = out_height / gcd_h_uv; + d_h_uv = (in_height / 2) / gcd_h_uv; - // allocate memory for the coefficents - vpx_free(g_b_scaler.l_w); + // allocate memory for the coefficents + vpx_free(g_b_scaler.l_w); - vpx_free(g_b_scaler.l_h); + vpx_free(g_b_scaler.l_h); - vpx_free(g_b_scaler.l_h_uv); + vpx_free(g_b_scaler.l_h_uv); - g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2); - g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2); - g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2); + g_b_scaler.l_w = (short *)vpx_memalign(32, out_width * 2); + g_b_scaler.l_h = (short *)vpx_memalign(32, out_height * 2); + g_b_scaler.l_h_uv = (short *)vpx_memalign(32, out_height * 2); - vpx_free(g_b_scaler.c_w); + vpx_free(g_b_scaler.c_w); - vpx_free(g_b_scaler.c_h); + vpx_free(g_b_scaler.c_h); - vpx_free(g_b_scaler.c_h_uv); + vpx_free(g_b_scaler.c_h_uv); - g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2); - g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2); - g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2); + g_b_scaler.c_w = (short *)vpx_memalign(32, g_b_scaler.nw * 4 * 2); + g_b_scaler.c_h = (short *)vpx_memalign(32, g_b_scaler.nh * 4 * 2); + g_b_scaler.c_h_uv = (short *)vpx_memalign(32, g_b_scaler.nh_uv * 4 * 2); - g_b_scaler.hbuf = g_hbuf; - g_b_scaler.hbuf_uv = g_hbuf_uv; + g_b_scaler.hbuf = g_hbuf; + g_b_scaler.hbuf_uv = g_hbuf_uv; - // Set up polyphase filter taps. This needs to be done before - // the scaling because of the floating point math required. The - // coefficients are multiplied by 2^12 so that fixed point math - // can be used in the main scaling loop. + // Set up polyphase filter taps. This needs to be done before + // the scaling because of the floating point math required. The + // coefficients are multiplied by 2^12 so that fixed point math + // can be used in the main scaling loop. #ifdef FIXED_POINT - fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296; + fixed_mult = (1.0 / (float)g_b_scaler.nw) * 4294967296; - product_val = 0; + product_val = 0; - for (i = 0; i < g_b_scaler.nw; i++) - { - if (product_val > g_b_scaler.nw) - product_val -= g_b_scaler.nw; + for (i = 0; i < g_b_scaler.nw; i++) { + if (product_val > g_b_scaler.nw) + product_val -= g_b_scaler.nw; - phase_offset_int = (fixed_mult * product_val) >> 16; + phase_offset_int = (fixed_mult * product_val) >> 16; - g_b_scaler.c_w[i*4] = c3_fixed(phase_offset_int); - g_b_scaler.c_w[i*4+1] = c2_fixed(phase_offset_int); - g_b_scaler.c_w[i*4+2] = c1_fixed(phase_offset_int); - g_b_scaler.c_w[i*4+3] = c0_fixed(phase_offset_int); + g_b_scaler.c_w[i * 4] = c3_fixed(phase_offset_int); + g_b_scaler.c_w[i * 4 + 1] = c2_fixed(phase_offset_int); + g_b_scaler.c_w[i * 4 + 2] = c1_fixed(phase_offset_int); + g_b_scaler.c_w[i * 4 + 3] = c0_fixed(phase_offset_int); - product_val += d_w; - } + product_val += d_w; + } - fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296; + fixed_mult = (1.0 / (float)g_b_scaler.nh) * 4294967296; - product_val = 0; + product_val = 0; - for (i = 0; i < g_b_scaler.nh; i++) - { - if (product_val > g_b_scaler.nh) - product_val -= g_b_scaler.nh; + for (i = 0; i < g_b_scaler.nh; i++) { + if (product_val > g_b_scaler.nh) + product_val -= g_b_scaler.nh; - phase_offset_int = (fixed_mult * product_val) >> 16; + phase_offset_int = (fixed_mult * product_val) >> 16; - g_b_scaler.c_h[i*4] = c0_fixed(phase_offset_int); - g_b_scaler.c_h[i*4+1] = c1_fixed(phase_offset_int); - g_b_scaler.c_h[i*4+2] = c2_fixed(phase_offset_int); - g_b_scaler.c_h[i*4+3] = c3_fixed(phase_offset_int); + g_b_scaler.c_h[i * 4] = c0_fixed(phase_offset_int); + g_b_scaler.c_h[i * 4 + 1] = c1_fixed(phase_offset_int); + g_b_scaler.c_h[i * 4 + 2] = c2_fixed(phase_offset_int); + g_b_scaler.c_h[i * 4 + 3] = c3_fixed(phase_offset_int); - product_val += d_h; - } + product_val += d_h; + } - fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296; + fixed_mult = (1.0 / (float)g_b_scaler.nh_uv) * 4294967296; - product_val = 0; + product_val = 0; - for (i = 0; i < g_b_scaler.nh_uv; i++) - { - if (product_val > g_b_scaler.nh_uv) - product_val -= g_b_scaler.nh_uv; + for (i = 0; i < g_b_scaler.nh_uv; i++) { + if (product_val > g_b_scaler.nh_uv) + product_val -= g_b_scaler.nh_uv; - phase_offset_int = (fixed_mult * product_val) >> 16; + phase_offset_int = (fixed_mult * product_val) >> 16; - g_b_scaler.c_h_uv[i*4] = c0_fixed(phase_offset_int); - g_b_scaler.c_h_uv[i*4+1] = c1_fixed(phase_offset_int); - g_b_scaler.c_h_uv[i*4+2] = c2_fixed(phase_offset_int); - g_b_scaler.c_h_uv[i*4+3] = c3_fixed(phase_offset_int); + g_b_scaler.c_h_uv[i * 4] = c0_fixed(phase_offset_int); + g_b_scaler.c_h_uv[i * 4 + 1] = c1_fixed(phase_offset_int); + g_b_scaler.c_h_uv[i * 4 + 2] = c2_fixed(phase_offset_int); + g_b_scaler.c_h_uv[i * 4 + 3] = c3_fixed(phase_offset_int); - product_val += d_h_uv; - } + product_val += d_h_uv; + } #else - for (i = 0; i < g_nw; i++) - { - phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw; - g_c_w[i*4] = (C3(phase_offset) * 4096.0); - g_c_w[i*4+1] = (C2(phase_offset) * 4096.0); - g_c_w[i*4+2] = (C1(phase_offset) * 4096.0); - g_c_w[i*4+3] = (C0(phase_offset) * 4096.0); - } - - for (i = 0; i < g_nh; i++) - { - phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh; - g_c_h[i*4] = (C0(phase_offset) * 4096.0); - g_c_h[i*4+1] = (C1(phase_offset) * 4096.0); - g_c_h[i*4+2] = (C2(phase_offset) * 4096.0); - g_c_h[i*4+3] = (C3(phase_offset) * 4096.0); - } - - for (i = 0; i < g_nh_uv; i++) - { - phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv; - g_c_h_uv[i*4] = (C0(phase_offset) * 4096.0); - g_c_h_uv[i*4+1] = (C1(phase_offset) * 4096.0); - g_c_h_uv[i*4+2] = (C2(phase_offset) * 4096.0); - g_c_h_uv[i*4+3] = (C3(phase_offset) * 4096.0); - } + for (i = 0; i < g_nw; i++) { + phase_offset = (float)((i * d_w) % g_nw) / (float)g_nw; + g_c_w[i * 4] = (C3(phase_offset) * 4096.0); + g_c_w[i * 4 + 1] = (C2(phase_offset) * 4096.0); + g_c_w[i * 4 + 2] = (C1(phase_offset) * 4096.0); + g_c_w[i * 4 + 3] = (C0(phase_offset) * 4096.0); + } + + for (i = 0; i < g_nh; i++) { + phase_offset = (float)((i * d_h) % g_nh) / (float)g_nh; + g_c_h[i * 4] = (C0(phase_offset) * 4096.0); + g_c_h[i * 4 + 1] = (C1(phase_offset) * 4096.0); + g_c_h[i * 4 + 2] = (C2(phase_offset) * 4096.0); + g_c_h[i * 4 + 3] = (C3(phase_offset) * 4096.0); + } + + for (i = 0; i < g_nh_uv; i++) { + phase_offset = (float)((i * d_h_uv) % g_nh_uv) / (float)g_nh_uv; + g_c_h_uv[i * 4] = (C0(phase_offset) * 4096.0); + g_c_h_uv[i * 4 + 1] = (C1(phase_offset) * 4096.0); + g_c_h_uv[i * 4 + 2] = (C2(phase_offset) * 4096.0); + g_c_h_uv[i * 4 + 3] = (C3(phase_offset) * 4096.0); + } #endif - // Create an array that corresponds input lines to output lines. - // This doesn't require floating point math, but it does require - // a division and because hardware division is not present that - // is a call. - for (i = 0; i < out_width; i++) - { - g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw; + // Create an array that corresponds input lines to output lines. + // This doesn't require floating point math, but it does require + // a division and because hardware division is not present that + // is a call. + for (i = 0; i < out_width; i++) { + g_b_scaler.l_w[i] = (i * d_w) / g_b_scaler.nw; - if ((g_b_scaler.l_w[i] + 2) <= in_width) - g_b_scaler.max_usable_out_width = i; + if ((g_b_scaler.l_w[i] + 2) <= in_width) + g_b_scaler.max_usable_out_width = i; - } + } - for (i = 0; i < out_height + 1; i++) - { - g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh; - g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv; - } + for (i = 0; i < out_height + 1; i++) { + g_b_scaler.l_h[i] = (i * d_h) / g_b_scaler.nh; + g_b_scaler.l_h_uv[i] = (i * d_h_uv) / g_b_scaler.nh_uv; + } - return 0; + return 0; } int bicubic_scale(int in_width, int in_height, int in_stride, int out_width, int out_height, int out_stride, - unsigned char *input_image, unsigned char *output_image) -{ - short *RESTRICT l_w, * RESTRICT l_h; - short *RESTRICT c_w, * RESTRICT c_h; - unsigned char *RESTRICT ip, * RESTRICT op; - unsigned char *RESTRICT hbuf; - int h, w, lw, lh; - int temp_sum; - int phase_offset_w, phase_offset_h; - - c_w = g_b_scaler.c_w; - c_h = g_b_scaler.c_h; - - op = output_image; - - l_w = g_b_scaler.l_w; - l_h = g_b_scaler.l_h; - - phase_offset_h = 0; - - for (h = 0; h < out_height; h++) - { - // select the row to work on - lh = l_h[h]; - ip = input_image + (in_stride * lh); - - // vp8_filter the row vertically into an temporary buffer. - // If the phase offset == 0 then all the multiplication - // is going to result in the output equalling the input. - // So instead point the temporary buffer to the input. - // Also handle the boundry condition of not being able to - // filter that last lines. - if (phase_offset_h && (lh < in_height - 2)) - { - hbuf = g_b_scaler.hbuf; - - for (w = 0; w < in_width; w++) - { - temp_sum = c_h[phase_offset_h*4+3] * ip[w - in_stride]; - temp_sum += c_h[phase_offset_h*4+2] * ip[w]; - temp_sum += c_h[phase_offset_h*4+1] * ip[w + in_stride]; - temp_sum += c_h[phase_offset_h*4] * ip[w + 2*in_stride]; - - hbuf[w] = temp_sum >> 12; - } - } - else - hbuf = ip; - - // increase the phase offset for the next time around. - if (++phase_offset_h >= g_b_scaler.nh) - phase_offset_h = 0; - - // now filter and expand it horizontally into the final - // output buffer + unsigned char *input_image, unsigned char *output_image) { + short *RESTRICT l_w, * RESTRICT l_h; + short *RESTRICT c_w, * RESTRICT c_h; + unsigned char *RESTRICT ip, * RESTRICT op; + unsigned char *RESTRICT hbuf; + int h, w, lw, lh; + int temp_sum; + int phase_offset_w, phase_offset_h; + + c_w = g_b_scaler.c_w; + c_h = g_b_scaler.c_h; + + op = output_image; + + l_w = g_b_scaler.l_w; + l_h = g_b_scaler.l_h; + + phase_offset_h = 0; + + for (h = 0; h < out_height; h++) { + // select the row to work on + lh = l_h[h]; + ip = input_image + (in_stride * lh); + + // vp8_filter the row vertically into an temporary buffer. + // If the phase offset == 0 then all the multiplication + // is going to result in the output equalling the input. + // So instead point the temporary buffer to the input. + // Also handle the boundry condition of not being able to + // filter that last lines. + if (phase_offset_h && (lh < in_height - 2)) { + hbuf = g_b_scaler.hbuf; + + for (w = 0; w < in_width; w++) { + temp_sum = c_h[phase_offset_h * 4 + 3] * ip[w - in_stride]; + temp_sum += c_h[phase_offset_h * 4 + 2] * ip[w]; + temp_sum += c_h[phase_offset_h * 4 + 1] * ip[w + in_stride]; + temp_sum += c_h[phase_offset_h * 4] * ip[w + 2 * in_stride]; + + hbuf[w] = temp_sum >> 12; + } + } else + hbuf = ip; + + // increase the phase offset for the next time around. + if (++phase_offset_h >= g_b_scaler.nh) + phase_offset_h = 0; + + // now filter and expand it horizontally into the final + // output buffer + phase_offset_w = 0; + + for (w = 0; w < out_width; w++) { + // get the index to use to expand the image + lw = l_w[w]; + + temp_sum = c_w[phase_offset_w * 4] * hbuf[lw - 1]; + temp_sum += c_w[phase_offset_w * 4 + 1] * hbuf[lw]; + temp_sum += c_w[phase_offset_w * 4 + 2] * hbuf[lw + 1]; + temp_sum += c_w[phase_offset_w * 4 + 3] * hbuf[lw + 2]; + temp_sum = temp_sum >> 12; + + if (++phase_offset_w >= g_b_scaler.nw) phase_offset_w = 0; - for (w = 0; w < out_width; w++) - { - // get the index to use to expand the image - lw = l_w[w]; - - temp_sum = c_w[phase_offset_w*4] * hbuf[lw - 1]; - temp_sum += c_w[phase_offset_w*4+1] * hbuf[lw]; - temp_sum += c_w[phase_offset_w*4+2] * hbuf[lw + 1]; - temp_sum += c_w[phase_offset_w*4+3] * hbuf[lw + 2]; - temp_sum = temp_sum >> 12; + // boundry conditions + if ((lw + 2) >= in_width) + temp_sum = hbuf[lw]; - if (++phase_offset_w >= g_b_scaler.nw) - phase_offset_w = 0; + if (lw == 0) + temp_sum = hbuf[0]; - // boundry conditions - if ((lw + 2) >= in_width) - temp_sum = hbuf[lw]; - - if (lw == 0) - temp_sum = hbuf[0]; - - op[w] = temp_sum; - } - - op += out_stride; + op[w] = temp_sum; } - return 0; + op += out_stride; + } + + return 0; } -void bicubic_scale_frame_reset() -{ - g_b_scaler.out_width = 0; - g_b_scaler.out_height = 0; +void bicubic_scale_frame_reset() { + g_b_scaler.out_width = 0; + g_b_scaler.out_height = 0; } void bicubic_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, - int new_width, int new_height) -{ + int new_width, int new_height) { - dst->y_width = new_width; - dst->y_height = new_height; - dst->uv_width = new_width / 2; - dst->uv_height = new_height / 2; + dst->y_width = new_width; + dst->y_height = new_height; + dst->uv_width = new_width / 2; + dst->uv_height = new_height / 2; - dst->y_stride = dst->y_width; - dst->uv_stride = dst->uv_width; + dst->y_stride = dst->y_width; + dst->uv_stride = dst->uv_width; - bicubic_scale(src->y_width, src->y_height, src->y_stride, - new_width, new_height, dst->y_stride, - src->y_buffer, dst->y_buffer); + bicubic_scale(src->y_width, src->y_height, src->y_stride, + new_width, new_height, dst->y_stride, + src->y_buffer, dst->y_buffer); - bicubic_scale(src->uv_width, src->uv_height, src->uv_stride, - new_width / 2, new_height / 2, dst->uv_stride, - src->u_buffer, dst->u_buffer); + bicubic_scale(src->uv_width, src->uv_height, src->uv_stride, + new_width / 2, new_height / 2, dst->uv_stride, + src->u_buffer, dst->u_buffer); - bicubic_scale(src->uv_width, src->uv_height, src->uv_stride, - new_width / 2, new_height / 2, dst->uv_stride, - src->v_buffer, dst->v_buffer); + bicubic_scale(src->uv_width, src->uv_height, src->uv_stride, + new_width / 2, new_height / 2, dst->uv_stride, + src->v_buffer, dst->v_buffer); } diff --git a/vpx_scale/generic/gen_scalers.c b/vpx_scale/generic/gen_scalers.c index b54e334cb..bb1178669 100644 --- a/vpx_scale/generic/gen_scalers.c +++ b/vpx_scale/generic/gen_scalers.c @@ -36,45 +36,43 @@ ****************************************************************************/ void vp8cx_horizontal_line_4_5_scale_c ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned i; - unsigned int a, b, c; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width - 4; i += 4) - { - a = src[0]; - b = src[1]; - des [0] = (unsigned char) a; - des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); - c = src[2] * 154; - a = src[3]; - des [2] = (unsigned char)((b * 102 + c + 128) >> 8); - des [3] = (unsigned char)((c + 102 * a + 128) >> 8); - b = src[4]; - des [4] = (unsigned char)((a * 205 + 51 * b + 128) >> 8); - - src += 4; - des += 5; - } - + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + unsigned i; + unsigned int a, b, c; + unsigned char *des = dest; + const unsigned char *src = source; + + (void) dest_width; + + for (i = 0; i < source_width - 4; i += 4) { a = src[0]; b = src[1]; - des [0] = (unsigned char)(a); + des [0] = (unsigned char) a; des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); c = src[2] * 154; a = src[3]; des [2] = (unsigned char)((b * 102 + c + 128) >> 8); des [3] = (unsigned char)((c + 102 * a + 128) >> 8); - des [4] = (unsigned char)(a); + b = src[4]; + des [4] = (unsigned char)((a * 205 + 51 * b + 128) >> 8); + + src += 4; + des += 5; + } + + a = src[0]; + b = src[1]; + des [0] = (unsigned char)(a); + des [1] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); + c = src[2] * 154; + a = src[3]; + des [2] = (unsigned char)((b * 102 + c + 128) >> 8); + des [3] = (unsigned char)((c + 102 * a + 128) >> 8); + des [4] = (unsigned char)(a); } @@ -97,31 +95,29 @@ void vp8cx_horizontal_line_4_5_scale_c * the current band. * ****************************************************************************/ -void vp8cx_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c, d; - unsigned char *des = dest; +void vp8cx_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c, d; + unsigned char *des = dest; - for (i = 0; i < dest_width; i++) - { - a = des [0]; - b = des [dest_pitch]; + for (i = 0; i < dest_width; i++) { + a = des [0]; + b = des [dest_pitch]; - des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); + des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); - c = des[dest_pitch*2] * 154; - d = des[dest_pitch*3]; + c = des[dest_pitch * 2] * 154; + d = des[dest_pitch * 3]; - des [dest_pitch*2] = (unsigned char)((b * 102 + c + 128) >> 8); - des [dest_pitch*3] = (unsigned char)((c + 102 * d + 128) >> 8); + des [dest_pitch * 2] = (unsigned char)((b * 102 + c + 128) >> 8); + des [dest_pitch * 3] = (unsigned char)((c + 102 * d + 128) >> 8); - /* First line in next band */ - a = des [dest_pitch * 5]; - des [dest_pitch * 4] = (unsigned char)((d * 205 + 51 * a + 128) >> 8); + /* First line in next band */ + a = des [dest_pitch * 5]; + des [dest_pitch * 4] = (unsigned char)((d * 205 + 51 * a + 128) >> 8); - des ++; - } + des++; + } } /**************************************************************************** @@ -144,30 +140,28 @@ void vp8cx_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitc * last band. * ****************************************************************************/ -void vp8cx_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c, d; - unsigned char *des = dest; +void vp8cx_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c, d; + unsigned char *des = dest; - for (i = 0; i < dest_width; ++i) - { - a = des[0]; - b = des[dest_pitch]; + for (i = 0; i < dest_width; ++i) { + a = des[0]; + b = des[dest_pitch]; - des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); + des[dest_pitch] = (unsigned char)((a * 51 + 205 * b + 128) >> 8); - c = des[dest_pitch*2] * 154; - d = des[dest_pitch*3]; + c = des[dest_pitch * 2] * 154; + d = des[dest_pitch * 3]; - des [dest_pitch*2] = (unsigned char)((b * 102 + c + 128) >> 8); - des [dest_pitch*3] = (unsigned char)((c + 102 * d + 128) >> 8); + des [dest_pitch * 2] = (unsigned char)((b * 102 + c + 128) >> 8); + des [dest_pitch * 3] = (unsigned char)((c + 102 * d + 128) >> 8); - /* No other line for interplation of this line, so .. */ - des[dest_pitch*4] = (unsigned char) d; + /* No other line for interplation of this line, so .. */ + des[dest_pitch * 4] = (unsigned char) d; - des++; - } + des++; + } } /**************************************************************************** @@ -192,38 +186,36 @@ void vp8cx_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest ****************************************************************************/ void vp8cx_horizontal_line_2_3_scale_c ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width - 2; i += 2) - { - a = src[0]; - b = src[1]; - c = src[2]; - - des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); - des [2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8); - - src += 2; - des += 3; - } - + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + unsigned int i; + unsigned int a, b, c; + unsigned char *des = dest; + const unsigned char *src = source; + + (void) dest_width; + + for (i = 0; i < source_width - 2; i += 2) { a = src[0]; b = src[1]; + c = src[2]; + des [0] = (unsigned char)(a); des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); - des [2] = (unsigned char)(b); + des [2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8); + + src += 2; + des += 3; + } + + a = src[0]; + b = src[1]; + des [0] = (unsigned char)(a); + des [1] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); + des [2] = (unsigned char)(b); } @@ -246,22 +238,20 @@ void vp8cx_horizontal_line_2_3_scale_c * the current band. * ****************************************************************************/ -void vp8cx_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - - for (i = 0; i < dest_width; i++) - { - a = des [0]; - b = des [dest_pitch]; - c = des[dest_pitch*3]; - des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); - des [dest_pitch*2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8); - - des++; - } +void vp8cx_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c; + unsigned char *des = dest; + + for (i = 0; i < dest_width; i++) { + a = des [0]; + b = des [dest_pitch]; + c = des[dest_pitch * 3]; + des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); + des [dest_pitch * 2] = (unsigned char)((b * 171 + 85 * c + 128) >> 8); + + des++; + } } /**************************************************************************** @@ -284,21 +274,19 @@ void vp8cx_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitc * last band. * ****************************************************************************/ -void vp8cx_last_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b; - unsigned char *des = dest; - - for (i = 0; i < dest_width; ++i) - { - a = des [0]; - b = des [dest_pitch]; - - des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); - des [dest_pitch*2] = (unsigned char)(b); - des++; - } +void vp8cx_last_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { + unsigned int i; + unsigned int a, b; + unsigned char *des = dest; + + for (i = 0; i < dest_width; ++i) { + a = des [0]; + b = des [dest_pitch]; + + des [dest_pitch ] = (unsigned char)((a * 85 + 171 * b + 128) >> 8); + des [dest_pitch * 2] = (unsigned char)(b); + des++; + } } /**************************************************************************** @@ -323,47 +311,45 @@ void vp8cx_last_vertical_band_2_3_scale_c(unsigned char *dest, unsigned int dest ****************************************************************************/ void vp8cx_horizontal_line_3_5_scale_c ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width - 3; i += 3) - { - a = src[0]; - b = src[1]; - des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); - - c = src[2] ; - des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); - des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); - - a = src[3]; - des [4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8); - - src += 3; - des += 5; - } - + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + unsigned int i; + unsigned int a, b, c; + unsigned char *des = dest; + const unsigned char *src = source; + + (void) dest_width; + + for (i = 0; i < source_width - 3; i += 3) { a = src[0]; b = src[1]; des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); - c = src[2] ; + + c = src[2]; des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); - des [4] = (unsigned char)(c); + a = src[3]; + des [4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8); + + src += 3; + des += 5; + } + + a = src[0]; + b = src[1]; + des [0] = (unsigned char)(a); + + des [1] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); + c = src[2]; + des [2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); + des [3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); + + des [4] = (unsigned char)(c); } /**************************************************************************** @@ -385,28 +371,26 @@ void vp8cx_horizontal_line_3_5_scale_c * the current band. * ****************************************************************************/ -void vp8cx_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - - for (i = 0; i < dest_width; i++) - { - a = des [0]; - b = des [dest_pitch]; - des [dest_pitch] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); - - c = des[dest_pitch*2]; - des [dest_pitch*2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); - des [dest_pitch*3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); - - /* First line in next band... */ - a = des [dest_pitch * 5]; - des [dest_pitch * 4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8); - - des++; - } +void vp8cx_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c; + unsigned char *des = dest; + + for (i = 0; i < dest_width; i++) { + a = des [0]; + b = des [dest_pitch]; + des [dest_pitch] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); + + c = des[dest_pitch * 2]; + des [dest_pitch * 2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); + des [dest_pitch * 3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); + + /* First line in next band... */ + a = des [dest_pitch * 5]; + des [dest_pitch * 4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8); + + des++; + } } /**************************************************************************** @@ -429,28 +413,26 @@ void vp8cx_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitc * last band. * ****************************************************************************/ -void vp8cx_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; +void vp8cx_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c; + unsigned char *des = dest; - for (i = 0; i < dest_width; ++i) - { - a = des [0]; - b = des [dest_pitch]; + for (i = 0; i < dest_width; ++i) { + a = des [0]; + b = des [dest_pitch]; - des [ dest_pitch ] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); + des [ dest_pitch ] = (unsigned char)((a * 102 + 154 * b + 128) >> 8); - c = des[dest_pitch*2]; - des [dest_pitch*2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); - des [dest_pitch*3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); + c = des[dest_pitch * 2]; + des [dest_pitch * 2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8); + des [dest_pitch * 3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8); - /* No other line for interplation of this line, so .. */ - des [ dest_pitch * 4 ] = (unsigned char)(c) ; + /* No other line for interplation of this line, so .. */ + des [ dest_pitch * 4 ] = (unsigned char)(c); - des++; - } + des++; + } } /**************************************************************************** @@ -475,44 +457,42 @@ void vp8cx_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest ****************************************************************************/ void vp8cx_horizontal_line_3_4_scale_c ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width - 3; i += 3) - { - a = src[0]; - b = src[1]; - des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); - - c = src[2]; - des [2] = (unsigned char)((b + c + 1) >> 1); - - a = src[3]; - des [3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8); - - src += 3; - des += 4; - } - + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + unsigned int i; + unsigned int a, b, c; + unsigned char *des = dest; + const unsigned char *src = source; + + (void) dest_width; + + for (i = 0; i < source_width - 3; i += 3) { a = src[0]; b = src[1]; des [0] = (unsigned char)(a); des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); - c = src[2] ; + c = src[2]; des [2] = (unsigned char)((b + c + 1) >> 1); - des [3] = (unsigned char)(c); + + a = src[3]; + des [3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8); + + src += 3; + des += 4; + } + + a = src[0]; + b = src[1]; + des [0] = (unsigned char)(a); + des [1] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); + + c = src[2]; + des [2] = (unsigned char)((b + c + 1) >> 1); + des [3] = (unsigned char)(c); } /**************************************************************************** @@ -534,27 +514,25 @@ void vp8cx_horizontal_line_3_4_scale_c * the current band. * ****************************************************************************/ -void vp8cx_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; - - for (i = 0; i < dest_width; i++) - { - a = des [0]; - b = des [dest_pitch]; - des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); - - c = des[dest_pitch*2]; - des [dest_pitch*2] = (unsigned char)((b + c + 1) >> 1); - - /* First line in next band... */ - a = des [dest_pitch*4]; - des [dest_pitch*3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8); - - des++; - } +void vp8cx_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c; + unsigned char *des = dest; + + for (i = 0; i < dest_width; i++) { + a = des [0]; + b = des [dest_pitch]; + des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); + + c = des[dest_pitch * 2]; + des [dest_pitch * 2] = (unsigned char)((b + c + 1) >> 1); + + /* First line in next band... */ + a = des [dest_pitch * 4]; + des [dest_pitch * 3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8); + + des++; + } } /**************************************************************************** @@ -577,27 +555,25 @@ void vp8cx_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitc * last band. * ****************************************************************************/ -void vp8cx_last_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c; - unsigned char *des = dest; +void vp8cx_last_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c; + unsigned char *des = dest; - for (i = 0; i < dest_width; ++i) - { - a = des [0]; - b = des [dest_pitch]; + for (i = 0; i < dest_width; ++i) { + a = des [0]; + b = des [dest_pitch]; - des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); + des [dest_pitch] = (unsigned char)((a * 64 + b * 192 + 128) >> 8); - c = des[dest_pitch*2]; - des [dest_pitch*2] = (unsigned char)((b + c + 1) >> 1); + c = des[dest_pitch * 2]; + des [dest_pitch * 2] = (unsigned char)((b + c + 1) >> 1); - /* No other line for interplation of this line, so .. */ - des [dest_pitch*3] = (unsigned char)(c); + /* No other line for interplation of this line, so .. */ + des [dest_pitch * 3] = (unsigned char)(c); - des++; - } + des++; + } } /**************************************************************************** @@ -621,32 +597,30 @@ void vp8cx_last_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest ****************************************************************************/ void vp8cx_horizontal_line_1_2_scale_c ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned int i; - unsigned int a, b; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width - 1; i += 1) - { - a = src[0]; - b = src[1]; - des [0] = (unsigned char)(a); - des [1] = (unsigned char)((a + b + 1) >> 1); - src += 1; - des += 2; - } - + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + unsigned int i; + unsigned int a, b; + unsigned char *des = dest; + const unsigned char *src = source; + + (void) dest_width; + + for (i = 0; i < source_width - 1; i += 1) { a = src[0]; + b = src[1]; des [0] = (unsigned char)(a); - des [1] = (unsigned char)(a); + des [1] = (unsigned char)((a + b + 1) >> 1); + src += 1; + des += 2; + } + + a = src[0]; + des [0] = (unsigned char)(a); + des [1] = (unsigned char)(a); } /**************************************************************************** @@ -668,21 +642,19 @@ void vp8cx_horizontal_line_1_2_scale_c * the current band. * ****************************************************************************/ -void vp8cx_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b; - unsigned char *des = dest; +void vp8cx_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { + unsigned int i; + unsigned int a, b; + unsigned char *des = dest; - for (i = 0; i < dest_width; i++) - { - a = des [0]; - b = des [dest_pitch * 2]; + for (i = 0; i < dest_width; i++) { + a = des [0]; + b = des [dest_pitch * 2]; - des[dest_pitch] = (unsigned char)((a + b + 1) >> 1); + des[dest_pitch] = (unsigned char)((a + b + 1) >> 1); - des++; - } + des++; + } } /**************************************************************************** @@ -705,16 +677,14 @@ void vp8cx_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitc * last band. * ****************************************************************************/ -void vp8cx_last_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned char *des = dest; - - for (i = 0; i < dest_width; ++i) - { - des[dest_pitch] = des[0]; - des++; - } +void vp8cx_last_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { + unsigned int i; + unsigned char *des = dest; + + for (i = 0; i < dest_width; ++i) { + des[dest_pitch] = des[0]; + des++; + } } @@ -742,65 +712,61 @@ void vp8cx_last_vertical_band_1_2_scale_c(unsigned char *dest, unsigned int dest ****************************************************************************/ void vp8cx_horizontal_line_5_4_scale_c ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned i; - unsigned int a, b, c, d, e; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width; i += 5) - { - a = src[0]; - b = src[1]; - c = src[2]; - d = src[3]; - e = src[4]; - - des[0] = (unsigned char) a; - des[1] = (unsigned char)((b * 192 + c * 64 + 128) >> 8); - des[2] = (unsigned char)((c * 128 + d * 128 + 128) >> 8); - des[3] = (unsigned char)((d * 64 + e * 192 + 128) >> 8); - - src += 5; - des += 4; - } + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + unsigned i; + unsigned int a, b, c, d, e; + unsigned char *des = dest; + const unsigned char *src = source; + + (void) dest_width; + + for (i = 0; i < source_width; i += 5) { + a = src[0]; + b = src[1]; + c = src[2]; + d = src[3]; + e = src[4]; + + des[0] = (unsigned char) a; + des[1] = (unsigned char)((b * 192 + c * 64 + 128) >> 8); + des[2] = (unsigned char)((c * 128 + d * 128 + 128) >> 8); + des[3] = (unsigned char)((d * 64 + e * 192 + 128) >> 8); + + src += 5; + des += 4; + } } -void vp8cx_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c, d, e; - unsigned char *des = dest; - unsigned char *src = source; +void vp8cx_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c, d, e; + unsigned char *des = dest; + unsigned char *src = source; - for (i = 0; i < dest_width; i++) - { + for (i = 0; i < dest_width; i++) { - a = src[0 * src_pitch]; - b = src[1 * src_pitch]; - c = src[2 * src_pitch]; - d = src[3 * src_pitch]; - e = src[4 * src_pitch]; + a = src[0 * src_pitch]; + b = src[1 * src_pitch]; + c = src[2 * src_pitch]; + d = src[3 * src_pitch]; + e = src[4 * src_pitch]; - des[0 * dest_pitch] = (unsigned char) a; - des[1 * dest_pitch] = (unsigned char)((b * 192 + c * 64 + 128) >> 8); - des[2 * dest_pitch] = (unsigned char)((c * 128 + d * 128 + 128) >> 8); - des[3 * dest_pitch] = (unsigned char)((d * 64 + e * 192 + 128) >> 8); + des[0 * dest_pitch] = (unsigned char) a; + des[1 * dest_pitch] = (unsigned char)((b * 192 + c * 64 + 128) >> 8); + des[2 * dest_pitch] = (unsigned char)((c * 128 + d * 128 + 128) >> 8); + des[3 * dest_pitch] = (unsigned char)((d * 64 + e * 192 + 128) >> 8); - src ++; - des ++; + src++; + des++; - } + } } @@ -826,61 +792,57 @@ void vp8cx_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pit ****************************************************************************/ void vp8cx_horizontal_line_5_3_scale_c ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned int i; - unsigned int a, b, c, d , e; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width; i += 5) - { - a = src[0]; - b = src[1]; - c = src[2]; - d = src[3]; - e = src[4]; - - des[0] = (unsigned char) a; - des[1] = (unsigned char)((b * 85 + c * 171 + 128) >> 8); - des[2] = (unsigned char)((d * 171 + e * 85 + 128) >> 8); - - src += 5; - des += 3; - } + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + unsigned int i; + unsigned int a, b, c, d, e; + unsigned char *des = dest; + const unsigned char *src = source; + + (void) dest_width; + + for (i = 0; i < source_width; i += 5) { + a = src[0]; + b = src[1]; + c = src[2]; + d = src[3]; + e = src[4]; + + des[0] = (unsigned char) a; + des[1] = (unsigned char)((b * 85 + c * 171 + 128) >> 8); + des[2] = (unsigned char)((d * 171 + e * 85 + 128) >> 8); + + src += 5; + des += 3; + } } -void vp8cx_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - unsigned int i; - unsigned int a, b, c, d, e; - unsigned char *des = dest; - unsigned char *src = source; +void vp8cx_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { + unsigned int i; + unsigned int a, b, c, d, e; + unsigned char *des = dest; + unsigned char *src = source; - for (i = 0; i < dest_width; i++) - { + for (i = 0; i < dest_width; i++) { - a = src[0 * src_pitch]; - b = src[1 * src_pitch]; - c = src[2 * src_pitch]; - d = src[3 * src_pitch]; - e = src[4 * src_pitch]; + a = src[0 * src_pitch]; + b = src[1 * src_pitch]; + c = src[2 * src_pitch]; + d = src[3 * src_pitch]; + e = src[4 * src_pitch]; - des[0 * dest_pitch] = (unsigned char) a; - des[1 * dest_pitch] = (unsigned char)((b * 85 + c * 171 + 128) >> 8); - des[2 * dest_pitch] = (unsigned char)((d * 171 + e * 85 + 128) >> 8); + des[0 * dest_pitch] = (unsigned char) a; + des[1 * dest_pitch] = (unsigned char)((b * 85 + c * 171 + 128) >> 8); + des[2 * dest_pitch] = (unsigned char)((d * 171 + e * 85 + 128) >> 8); - src ++; - des ++; + src++; + des++; - } + } } /**************************************************************************** @@ -904,53 +866,48 @@ void vp8cx_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pit ****************************************************************************/ void vp8cx_horizontal_line_2_1_scale_c ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - unsigned int i; - unsigned int a; - unsigned char *des = dest; - const unsigned char *src = source; - - (void) dest_width; - - for (i = 0; i < source_width; i += 2) - { - a = src[0]; - des [0] = (unsigned char)(a); - src += 2; - des += 1; - } + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + unsigned int i; + unsigned int a; + unsigned char *des = dest; + const unsigned char *src = source; + + (void) dest_width; + + for (i = 0; i < source_width; i += 2) { + a = src[0]; + des [0] = (unsigned char)(a); + src += 2; + des += 1; + } } -void vp8cx_vertical_band_2_1_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - (void) dest_pitch; - (void) src_pitch; - vpx_memcpy(dest, source, dest_width); +void vp8cx_vertical_band_2_1_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { + (void) dest_pitch; + (void) src_pitch; + vpx_memcpy(dest, source, dest_width); } -void vp8cx_vertical_band_2_1_scale_i_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - int i; - int temp; - int width = dest_width; - - (void) dest_pitch; - - for (i = 0; i < width; i++) - { - temp = 8; - temp += source[i-(int)src_pitch] * 3; - temp += source[i] * 10; - temp += source[i+src_pitch] * 3; - temp >>= 4 ; - dest[i] = (unsigned char)(temp); - } +void vp8cx_vertical_band_2_1_scale_i_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { + int i; + int temp; + int width = dest_width; + + (void) dest_pitch; + + for (i = 0; i < width; i++) { + temp = 8; + temp += source[i - (int)src_pitch] * 3; + temp += source[i] * 10; + temp += source[i + src_pitch] * 3; + temp >>= 4; + dest[i] = (unsigned char)(temp); + } } diff --git a/vpx_scale/generic/scalesystemdependent.c b/vpx_scale/generic/scalesystemdependent.c index 92cebc4f4..b9d6de4b1 100644 --- a/vpx_scale/generic/scalesystemdependent.c +++ b/vpx_scale/generic/scalesystemdependent.c @@ -42,10 +42,9 @@ extern void vp8_yv12_copy_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG * SPECIAL NOTES : None. * ****************************************************************************/ -void vp8_scale_machine_specific_config() -{ - vp8_yv12_extend_frame_borders_ptr = vp8_yv12_extend_frame_borders; - vp8_yv12_copy_frame_yonly_ptr = vp8_yv12_copy_frame_yonly; - vp8_yv12_copy_frame_ptr = vp8_yv12_copy_frame; +void vp8_scale_machine_specific_config() { + vp8_yv12_extend_frame_borders_ptr = vp8_yv12_extend_frame_borders; + vp8_yv12_copy_frame_yonly_ptr = vp8_yv12_copy_frame_yonly; + vp8_yv12_copy_frame_ptr = vp8_yv12_copy_frame; } diff --git a/vpx_scale/generic/vpxscale.c b/vpx_scale/generic/vpxscale.c index 13c9122f0..82485c73b 100644 --- a/vpx_scale/generic/vpxscale.c +++ b/vpx_scale/generic/vpxscale.c @@ -55,18 +55,17 @@ void (*vp8_horizontal_line_5_4_scale)(const unsigned char *source, unsigned int # include "vpxscale_nofp.h" #endif -typedef struct -{ - int expanded_frame_width; - int expanded_frame_height; +typedef struct { + int expanded_frame_width; + int expanded_frame_height; - int HScale; - int HRatio; - int VScale; - int VRatio; + int HScale; + int HRatio; + int VScale; + int VRatio; - YV12_BUFFER_CONFIG *src_yuv_config; - YV12_BUFFER_CONFIG *dst_yuv_config; + YV12_BUFFER_CONFIG *src_yuv_config; + YV12_BUFFER_CONFIG *dst_yuv_config; } SCALE_VARS; @@ -90,15 +89,14 @@ typedef struct ****************************************************************************/ static void horizontal_line_copy( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - (void) dest_width; - - duck_memcpy(dest, source, source_width); + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + (void) dest_width; + + duck_memcpy(dest, source, source_width); } /**************************************************************************** * @@ -120,16 +118,15 @@ void horizontal_line_copy( ****************************************************************************/ static void null_scale( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - (void) dest; - (void) dest_pitch; - (void) dest_width; - - return; + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width +) { + (void) dest; + (void) dest_pitch; + (void) dest_width; + + return; } /**************************************************************************** @@ -157,35 +154,33 @@ void null_scale( static void scale1d_2t1_i ( - const unsigned char *source, - int source_step, - unsigned int source_scale, - unsigned int source_length, - unsigned char *dest, - int dest_step, - unsigned int dest_scale, - unsigned int dest_length -) -{ - unsigned int i, j; - unsigned int temp; - int source_pitch = source_step; - (void) source_length; - (void) source_scale; - (void) dest_scale; - - source_step *= 2; - dest[0] = source[0]; - - for (i = dest_step, j = source_step; i < dest_length * dest_step; i += dest_step, j += source_step) - { - temp = 8; - temp += 3 * source[j-source_pitch]; - temp += 10 * source[j]; - temp += 3 * source[j+source_pitch]; - temp >>= 4; - dest[i] = (char)(temp); - } + const unsigned char *source, + int source_step, + unsigned int source_scale, + unsigned int source_length, + unsigned char *dest, + int dest_step, + unsigned int dest_scale, + unsigned int dest_length +) { + unsigned int i, j; + unsigned int temp; + int source_pitch = source_step; + (void) source_length; + (void) source_scale; + (void) dest_scale; + + source_step *= 2; + dest[0] = source[0]; + + for (i = dest_step, j = source_step; i < dest_length * dest_step; i += dest_step, j += source_step) { + temp = 8; + temp += 3 * source[j - source_pitch]; + temp += 10 * source[j]; + temp += 3 * source[j + source_pitch]; + temp >>= 4; + dest[i] = (char)(temp); + } } /**************************************************************************** @@ -213,27 +208,26 @@ void scale1d_2t1_i static void scale1d_2t1_ps ( - const unsigned char *source, - int source_step, - unsigned int source_scale, - unsigned int source_length, - unsigned char *dest, - int dest_step, - unsigned int dest_scale, - unsigned int dest_length -) -{ - unsigned int i, j; - - (void) source_length; - (void) source_scale; - (void) dest_scale; - - source_step *= 2; - j = 0; - - for (i = 0; i < dest_length * dest_step; i += dest_step, j += source_step) - dest[i] = source[j]; + const unsigned char *source, + int source_step, + unsigned int source_scale, + unsigned int source_length, + unsigned char *dest, + int dest_step, + unsigned int dest_scale, + unsigned int dest_length +) { + unsigned int i, j; + + (void) source_length; + (void) source_scale; + (void) dest_scale; + + source_step *= 2; + j = 0; + + for (i = 0; i < dest_length * dest_step; i += dest_step, j += source_step) + dest[i] = source[j]; } /**************************************************************************** * @@ -260,45 +254,42 @@ void scale1d_2t1_ps static void scale1d_c ( - const unsigned char *source, - int source_step, - unsigned int source_scale, - unsigned int source_length, - unsigned char *dest, - int dest_step, - unsigned int dest_scale, - unsigned int dest_length -) -{ - unsigned int i; - unsigned int round_value = dest_scale / 2; - unsigned int left_modifier = dest_scale; - unsigned int right_modifier = 0; - unsigned char left_pixel = *source; - unsigned char right_pixel = *(source + source_step); - - (void) source_length; - - /* These asserts are needed if there are boundary issues... */ - /*assert ( dest_scale > source_scale );*/ - /*assert ( (source_length-1) * dest_scale >= (dest_length-1) * source_scale );*/ - - for (i = 0; i < dest_length * dest_step; i += dest_step) - { - dest[i] = (char)((left_modifier * left_pixel + right_modifier * right_pixel + round_value) / dest_scale); - - right_modifier += source_scale; - - while (right_modifier > dest_scale) - { - right_modifier -= dest_scale; - source += source_step; - left_pixel = *source; - right_pixel = *(source + source_step); - } - - left_modifier = dest_scale - right_modifier; + const unsigned char *source, + int source_step, + unsigned int source_scale, + unsigned int source_length, + unsigned char *dest, + int dest_step, + unsigned int dest_scale, + unsigned int dest_length +) { + unsigned int i; + unsigned int round_value = dest_scale / 2; + unsigned int left_modifier = dest_scale; + unsigned int right_modifier = 0; + unsigned char left_pixel = *source; + unsigned char right_pixel = *(source + source_step); + + (void) source_length; + + /* These asserts are needed if there are boundary issues... */ + /*assert ( dest_scale > source_scale );*/ + /*assert ( (source_length-1) * dest_scale >= (dest_length-1) * source_scale );*/ + + for (i = 0; i < dest_length * dest_step; i += dest_step) { + dest[i] = (char)((left_modifier * left_pixel + right_modifier * right_pixel + round_value) / dest_scale); + + right_modifier += source_scale; + + while (right_modifier > dest_scale) { + right_modifier -= dest_scale; + source += source_step; + left_pixel = *source; + right_pixel = *(source + source_step); } + + left_modifier = dest_scale - right_modifier; + } } /**************************************************************************** @@ -334,246 +325,221 @@ void scale1d_c static void Scale2D ( - /*const*/ - unsigned char *source, - int source_pitch, - unsigned int source_width, - unsigned int source_height, - unsigned char *dest, - int dest_pitch, - unsigned int dest_width, - unsigned int dest_height, - unsigned char *temp_area, - unsigned char temp_area_height, - unsigned int hscale, - unsigned int hratio, - unsigned int vscale, - unsigned int vratio, - unsigned int interlaced -) -{ - /*unsigned*/ - int i, j, k; - int bands; - int dest_band_height; - int source_band_height; - - typedef void (*Scale1D)(const unsigned char * source, int source_step, unsigned int source_scale, unsigned int source_length, - unsigned char * dest, int dest_step, unsigned int dest_scale, unsigned int dest_length); - - Scale1D Scale1Dv = scale1d_c; - Scale1D Scale1Dh = scale1d_c; - - void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL; - void (*vert_band_scale)(unsigned char *, unsigned int, unsigned char *, unsigned int, unsigned int) = NULL; - - int ratio_scalable = 1; - int interpolation = 0; - - unsigned char *source_base; /* = (unsigned char *) ((source_pitch >= 0) ? source : (source + ((source_height-1) * source_pitch))); */ - unsigned char *line_src; - - - source_base = (unsigned char *)source; - - if (source_pitch < 0) - { - int offset; - - offset = (source_height - 1); - offset *= source_pitch; - - source_base += offset; - } - - /* find out the ratio for each direction */ - switch (hratio * 10 / hscale) - { + /*const*/ + unsigned char *source, + int source_pitch, + unsigned int source_width, + unsigned int source_height, + unsigned char *dest, + int dest_pitch, + unsigned int dest_width, + unsigned int dest_height, + unsigned char *temp_area, + unsigned char temp_area_height, + unsigned int hscale, + unsigned int hratio, + unsigned int vscale, + unsigned int vratio, + unsigned int interlaced +) { + /*unsigned*/ + int i, j, k; + int bands; + int dest_band_height; + int source_band_height; + + typedef void (*Scale1D)(const unsigned char * source, int source_step, unsigned int source_scale, unsigned int source_length, + unsigned char * dest, int dest_step, unsigned int dest_scale, unsigned int dest_length); + + Scale1D Scale1Dv = scale1d_c; + Scale1D Scale1Dh = scale1d_c; + + void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL; + void (*vert_band_scale)(unsigned char *, unsigned int, unsigned char *, unsigned int, unsigned int) = NULL; + + int ratio_scalable = 1; + int interpolation = 0; + + unsigned char *source_base; /* = (unsigned char *) ((source_pitch >= 0) ? source : (source + ((source_height-1) * source_pitch))); */ + unsigned char *line_src; + + + source_base = (unsigned char *)source; + + if (source_pitch < 0) { + int offset; + + offset = (source_height - 1); + offset *= source_pitch; + + source_base += offset; + } + + /* find out the ratio for each direction */ + switch (hratio * 10 / hscale) { case 8: - /* 4-5 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_5_4_scale; - break; + /* 4-5 Scale in Width direction */ + horiz_line_scale = vp8_horizontal_line_5_4_scale; + break; case 6: - /* 3-5 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_5_3_scale; - break; + /* 3-5 Scale in Width direction */ + horiz_line_scale = vp8_horizontal_line_5_3_scale; + break; case 5: - /* 1-2 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_2_1_scale; - break; + /* 1-2 Scale in Width direction */ + horiz_line_scale = vp8_horizontal_line_2_1_scale; + break; default: - /* The ratio is not acceptable now */ - /* throw("The ratio is not acceptable for now!"); */ - ratio_scalable = 0; - break; - } + /* The ratio is not acceptable now */ + /* throw("The ratio is not acceptable for now!"); */ + ratio_scalable = 0; + break; + } - switch (vratio * 10 / vscale) - { + switch (vratio * 10 / vscale) { case 8: - /* 4-5 Scale in vertical direction */ - vert_band_scale = vp8_vertical_band_5_4_scale; - source_band_height = 5; - dest_band_height = 4; - break; + /* 4-5 Scale in vertical direction */ + vert_band_scale = vp8_vertical_band_5_4_scale; + source_band_height = 5; + dest_band_height = 4; + break; case 6: - /* 3-5 Scale in vertical direction */ - vert_band_scale = vp8_vertical_band_5_3_scale; - source_band_height = 5; - dest_band_height = 3; - break; + /* 3-5 Scale in vertical direction */ + vert_band_scale = vp8_vertical_band_5_3_scale; + source_band_height = 5; + dest_band_height = 3; + break; case 5: - /* 1-2 Scale in vertical direction */ + /* 1-2 Scale in vertical direction */ - if (interlaced) - { - /* if the content is interlaced, point sampling is used */ - vert_band_scale = vp8_vertical_band_2_1_scale; - } - else - { + if (interlaced) { + /* if the content is interlaced, point sampling is used */ + vert_band_scale = vp8_vertical_band_2_1_scale; + } else { - interpolation = 1; - /* if the content is progressive, interplo */ - vert_band_scale = vp8_vertical_band_2_1_scale_i; + interpolation = 1; + /* if the content is progressive, interplo */ + vert_band_scale = vp8_vertical_band_2_1_scale_i; - } + } - source_band_height = 2; - dest_band_height = 1; - break; + source_band_height = 2; + dest_band_height = 1; + break; default: - /* The ratio is not acceptable now */ - /* throw("The ratio is not acceptable for now!"); */ - ratio_scalable = 0; - break; + /* The ratio is not acceptable now */ + /* throw("The ratio is not acceptable for now!"); */ + ratio_scalable = 0; + break; + } + + if (ratio_scalable) { + if (source_height == dest_height) { + /* for each band of the image */ + for (k = 0; k < (int)dest_height; k++) { + horiz_line_scale(source, source_width, dest, dest_width); + source += source_pitch; + dest += dest_pitch; + } + + return; } - if (ratio_scalable) - { - if (source_height == dest_height) - { - /* for each band of the image */ - for (k = 0; k < (int)dest_height; k++) - { - horiz_line_scale(source, source_width, dest, dest_width); - source += source_pitch; - dest += dest_pitch; - } - - return; - } - - if (interpolation) - { - if (source < source_base) - source = source_base; - - horiz_line_scale(source, source_width, temp_area, dest_width); - } - - for (k = 0; k < (int)(dest_height + dest_band_height - 1) / dest_band_height; k++) - { - /* scale one band horizontally */ - for (i = 0; i < source_band_height; i++) - { - /* Trap case where we could read off the base of the source buffer */ - - line_src = (unsigned char *)source + i * source_pitch; - - if (line_src < source_base) - line_src = source_base; - - horiz_line_scale(line_src, source_width, - temp_area + (i + 1)*dest_pitch, dest_width); - } - - /* Vertical scaling is in place */ - vert_band_scale(temp_area + dest_pitch, dest_pitch, dest, dest_pitch, dest_width); - - if (interpolation) - vpx_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_width); - - /* Next band... */ - source += (unsigned long) source_band_height * source_pitch; - dest += (unsigned long) dest_band_height * dest_pitch; - } - - return; + if (interpolation) { + if (source < source_base) + source = source_base; + + horiz_line_scale(source, source_width, temp_area, dest_width); } - if (hscale == 2 && hratio == 1) - Scale1Dh = scale1d_2t1_ps; + for (k = 0; k < (int)(dest_height + dest_band_height - 1) / dest_band_height; k++) { + /* scale one band horizontally */ + for (i = 0; i < source_band_height; i++) { + /* Trap case where we could read off the base of the source buffer */ - if (vscale == 2 && vratio == 1) - { - if (interlaced) - Scale1Dv = scale1d_2t1_ps; - else - Scale1Dv = scale1d_2t1_i; - } + line_src = (unsigned char *)source + i * source_pitch; - if (source_height == dest_height) - { - /* for each band of the image */ - for (k = 0; k < (int)dest_height; k++) - { - Scale1Dh(source, 1, hscale, source_width + 1, dest, 1, hratio, dest_width); - source += source_pitch; - dest += dest_pitch; - } - - return; - } + if (line_src < source_base) + line_src = source_base; + + horiz_line_scale(line_src, source_width, + temp_area + (i + 1)*dest_pitch, dest_width); + } + + /* Vertical scaling is in place */ + vert_band_scale(temp_area + dest_pitch, dest_pitch, dest, dest_pitch, dest_width); - if (dest_height > source_height) - { - dest_band_height = temp_area_height - 1; - source_band_height = dest_band_height * source_height / dest_height; + if (interpolation) + vpx_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_width); + + /* Next band... */ + source += (unsigned long) source_band_height * source_pitch; + dest += (unsigned long) dest_band_height * dest_pitch; } + + return; + } + + if (hscale == 2 && hratio == 1) + Scale1Dh = scale1d_2t1_ps; + + if (vscale == 2 && vratio == 1) { + if (interlaced) + Scale1Dv = scale1d_2t1_ps; else - { - source_band_height = temp_area_height - 1; - dest_band_height = source_band_height * vratio / vscale; + Scale1Dv = scale1d_2t1_i; + } + + if (source_height == dest_height) { + /* for each band of the image */ + for (k = 0; k < (int)dest_height; k++) { + Scale1Dh(source, 1, hscale, source_width + 1, dest, 1, hratio, dest_width); + source += source_pitch; + dest += dest_pitch; } - /* first row needs to be done so that we can stay one row ahead for vertical zoom */ - Scale1Dh(source, 1, hscale, source_width + 1, temp_area, 1, hratio, dest_width); + return; + } + + if (dest_height > source_height) { + dest_band_height = temp_area_height - 1; + source_band_height = dest_band_height * source_height / dest_height; + } else { + source_band_height = temp_area_height - 1; + dest_band_height = source_band_height * vratio / vscale; + } - /* for each band of the image */ - bands = (dest_height + dest_band_height - 1) / dest_band_height; - - for (k = 0; k < bands; k++) - { - /* scale one band horizontally */ - for (i = 1; i < source_band_height + 1; i++) - { - if (k * source_band_height + i < (int) source_height) - { - Scale1Dh(source + i * source_pitch, 1, hscale, source_width + 1, - temp_area + i * dest_pitch, 1, hratio, dest_width); - } - else /* Duplicate the last row */ - { - /* copy temp_area row 0 over from last row in the past */ - duck_memcpy(temp_area + i * dest_pitch, temp_area + (i - 1)*dest_pitch, dest_pitch); - } - } - - /* scale one band vertically */ - for (j = 0; j < (int)dest_width; j++) - { - Scale1Dv(&temp_area[j], dest_pitch, vscale, source_band_height + 1, - &dest[j], dest_pitch, vratio, dest_band_height); - } + /* first row needs to be done so that we can stay one row ahead for vertical zoom */ + Scale1Dh(source, 1, hscale, source_width + 1, temp_area, 1, hratio, dest_width); + /* for each band of the image */ + bands = (dest_height + dest_band_height - 1) / dest_band_height; + + for (k = 0; k < bands; k++) { + /* scale one band horizontally */ + for (i = 1; i < source_band_height + 1; i++) { + if (k * source_band_height + i < (int) source_height) { + Scale1Dh(source + i * source_pitch, 1, hscale, source_width + 1, + temp_area + i * dest_pitch, 1, hratio, dest_width); + } else { /* Duplicate the last row */ /* copy temp_area row 0 over from last row in the past */ - duck_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch); + duck_memcpy(temp_area + i * dest_pitch, temp_area + (i - 1)*dest_pitch, dest_pitch); + } + } - /* move to the next band */ - source += source_band_height * source_pitch; - dest += dest_band_height * dest_pitch; + /* scale one band vertically */ + for (j = 0; j < (int)dest_width; j++) { + Scale1Dv(&temp_area[j], dest_pitch, vscale, source_band_height + 1, + &dest[j], dest_pitch, vratio, dest_band_height); } + + /* copy temp_area row 0 over from last row in the past */ + duck_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch); + + /* move to the next band */ + source += source_band_height * source_pitch; + dest += dest_band_height * dest_pitch; + } } /**************************************************************************** @@ -602,57 +568,56 @@ void Scale2D ****************************************************************************/ void vp8_scale_frame ( - YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, - unsigned char *temp_area, - unsigned char temp_height, - unsigned int hscale, - unsigned int hratio, - unsigned int vscale, - unsigned int vratio, - unsigned int interlaced -) -{ - int i; - int dw = (hscale - 1 + src->y_width * hratio) / hscale; - int dh = (vscale - 1 + src->y_height * vratio) / vscale; - - /* call our internal scaling routines!! */ - Scale2D((unsigned char *) src->y_buffer, src->y_stride, src->y_width, src->y_height, - (unsigned char *) dst->y_buffer, dst->y_stride, dw, dh, - temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); - - if (dw < (int)dst->y_width) - for (i = 0; i < dh; i++) - duck_memset(dst->y_buffer + i * dst->y_stride + dw - 1, dst->y_buffer[i*dst->y_stride+dw-2], dst->y_width - dw + 1); - - if (dh < (int)dst->y_height) - for (i = dh - 1; i < (int)dst->y_height; i++) - duck_memcpy(dst->y_buffer + i * dst->y_stride, dst->y_buffer + (dh - 2) * dst->y_stride, dst->y_width + 1); - - Scale2D((unsigned char *) src->u_buffer, src->uv_stride, src->uv_width, src->uv_height, - (unsigned char *) dst->u_buffer, dst->uv_stride, dw / 2, dh / 2, - temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); - - if (dw / 2 < (int)dst->uv_width) - for (i = 0; i < dst->uv_height; i++) - duck_memset(dst->u_buffer + i * dst->uv_stride + dw / 2 - 1, dst->u_buffer[i*dst->uv_stride+dw/2-2], dst->uv_width - dw / 2 + 1); - - if (dh / 2 < (int)dst->uv_height) - for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) - duck_memcpy(dst->u_buffer + i * dst->uv_stride, dst->u_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); - - Scale2D((unsigned char *) src->v_buffer, src->uv_stride, src->uv_width, src->uv_height, - (unsigned char *) dst->v_buffer, dst->uv_stride, dw / 2, dh / 2, - temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); - - if (dw / 2 < (int)dst->uv_width) - for (i = 0; i < dst->uv_height; i++) - duck_memset(dst->v_buffer + i * dst->uv_stride + dw / 2 - 1, dst->v_buffer[i*dst->uv_stride+dw/2-2], dst->uv_width - dw / 2 + 1); - - if (dh / 2 < (int) dst->uv_height) - for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) - duck_memcpy(dst->v_buffer + i * dst->uv_stride, dst->v_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); + YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst, + unsigned char *temp_area, + unsigned char temp_height, + unsigned int hscale, + unsigned int hratio, + unsigned int vscale, + unsigned int vratio, + unsigned int interlaced +) { + int i; + int dw = (hscale - 1 + src->y_width * hratio) / hscale; + int dh = (vscale - 1 + src->y_height * vratio) / vscale; + + /* call our internal scaling routines!! */ + Scale2D((unsigned char *) src->y_buffer, src->y_stride, src->y_width, src->y_height, + (unsigned char *) dst->y_buffer, dst->y_stride, dw, dh, + temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); + + if (dw < (int)dst->y_width) + for (i = 0; i < dh; i++) + duck_memset(dst->y_buffer + i * dst->y_stride + dw - 1, dst->y_buffer[i * dst->y_stride + dw - 2], dst->y_width - dw + 1); + + if (dh < (int)dst->y_height) + for (i = dh - 1; i < (int)dst->y_height; i++) + duck_memcpy(dst->y_buffer + i * dst->y_stride, dst->y_buffer + (dh - 2) * dst->y_stride, dst->y_width + 1); + + Scale2D((unsigned char *) src->u_buffer, src->uv_stride, src->uv_width, src->uv_height, + (unsigned char *) dst->u_buffer, dst->uv_stride, dw / 2, dh / 2, + temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); + + if (dw / 2 < (int)dst->uv_width) + for (i = 0; i < dst->uv_height; i++) + duck_memset(dst->u_buffer + i * dst->uv_stride + dw / 2 - 1, dst->u_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1); + + if (dh / 2 < (int)dst->uv_height) + for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) + duck_memcpy(dst->u_buffer + i * dst->uv_stride, dst->u_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); + + Scale2D((unsigned char *) src->v_buffer, src->uv_stride, src->uv_width, src->uv_height, + (unsigned char *) dst->v_buffer, dst->uv_stride, dw / 2, dh / 2, + temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); + + if (dw / 2 < (int)dst->uv_width) + for (i = 0; i < dst->uv_height; i++) + duck_memset(dst->v_buffer + i * dst->uv_stride + dw / 2 - 1, dst->v_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1); + + if (dh / 2 < (int) dst->uv_height) + for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) + duck_memcpy(dst->v_buffer + i * dst->uv_stride, dst->v_buffer + (dh / 2 - 2)*dst->uv_stride, dst->uv_width); } /**************************************************************************** * @@ -681,183 +646,177 @@ void vp8_scale_frame static int any_ratio_2d_scale ( - SCALE_VARS *si, - const unsigned char *source, - int source_pitch, - unsigned int source_width, - unsigned int source_height, - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width, - unsigned int dest_height -) -{ - unsigned int i, k; - unsigned int src_band_height = 0; - unsigned int dest_band_height = 0; - - /* suggested scale factors */ - int hs = si->HScale; - int hr = si->HRatio; - int vs = si->VScale; - int vr = si->VRatio; - - /* assume the ratios are scalable instead of should be centered */ - int ratio_scalable = 1; - - const unsigned char *source_base = ((source_pitch >= 0) ? source : (source + ((source_height - 1) * source_pitch))); - const unsigned char *line_src; - - void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL; - void (*vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL; - void (*last_vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL; - - (void) si; - - /* find out the ratio for each direction */ - switch (hr * 30 / hs) - { + SCALE_VARS *si, + const unsigned char *source, + int source_pitch, + unsigned int source_width, + unsigned int source_height, + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width, + unsigned int dest_height +) { + unsigned int i, k; + unsigned int src_band_height = 0; + unsigned int dest_band_height = 0; + + /* suggested scale factors */ + int hs = si->HScale; + int hr = si->HRatio; + int vs = si->VScale; + int vr = si->VRatio; + + /* assume the ratios are scalable instead of should be centered */ + int ratio_scalable = 1; + + const unsigned char *source_base = ((source_pitch >= 0) ? source : (source + ((source_height - 1) * source_pitch))); + const unsigned char *line_src; + + void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL; + void (*vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL; + void (*last_vert_band_scale)(unsigned char *, unsigned int, unsigned int) = NULL; + + (void) si; + + /* find out the ratio for each direction */ + switch (hr * 30 / hs) { case 24: - /* 4-5 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_4_5_scale; - break; + /* 4-5 Scale in Width direction */ + horiz_line_scale = vp8_horizontal_line_4_5_scale; + break; case 22: - /* 3-4 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_3_4_scale; - break; + /* 3-4 Scale in Width direction */ + horiz_line_scale = vp8_horizontal_line_3_4_scale; + break; case 20: - /* 4-5 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_2_3_scale; - break; + /* 4-5 Scale in Width direction */ + horiz_line_scale = vp8_horizontal_line_2_3_scale; + break; case 18: - /* 3-5 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_3_5_scale; - break; + /* 3-5 Scale in Width direction */ + horiz_line_scale = vp8_horizontal_line_3_5_scale; + break; case 15: - /* 1-2 Scale in Width direction */ - horiz_line_scale = vp8_horizontal_line_1_2_scale; - break; + /* 1-2 Scale in Width direction */ + horiz_line_scale = vp8_horizontal_line_1_2_scale; + break; case 30: - /* no scale in Width direction */ - horiz_line_scale = horizontal_line_copy; - break; + /* no scale in Width direction */ + horiz_line_scale = horizontal_line_copy; + break; default: - /* The ratio is not acceptable now */ - /* throw("The ratio is not acceptable for now!"); */ - ratio_scalable = 0; - break; - } + /* The ratio is not acceptable now */ + /* throw("The ratio is not acceptable for now!"); */ + ratio_scalable = 0; + break; + } - switch (vr * 30 / vs) - { + switch (vr * 30 / vs) { case 24: - /* 4-5 Scale in vertical direction */ - vert_band_scale = vp8_vertical_band_4_5_scale; - last_vert_band_scale = vp8_last_vertical_band_4_5_scale; - src_band_height = 4; - dest_band_height = 5; - break; + /* 4-5 Scale in vertical direction */ + vert_band_scale = vp8_vertical_band_4_5_scale; + last_vert_band_scale = vp8_last_vertical_band_4_5_scale; + src_band_height = 4; + dest_band_height = 5; + break; case 22: - /* 3-4 Scale in vertical direction */ - vert_band_scale = vp8_vertical_band_3_4_scale; - last_vert_band_scale = vp8_last_vertical_band_3_4_scale; - src_band_height = 3; - dest_band_height = 4; - break; + /* 3-4 Scale in vertical direction */ + vert_band_scale = vp8_vertical_band_3_4_scale; + last_vert_band_scale = vp8_last_vertical_band_3_4_scale; + src_band_height = 3; + dest_band_height = 4; + break; case 20: - /* 2-3 Scale in vertical direction */ - vert_band_scale = vp8_vertical_band_2_3_scale; - last_vert_band_scale = vp8_last_vertical_band_2_3_scale; - src_band_height = 2; - dest_band_height = 3; - break; + /* 2-3 Scale in vertical direction */ + vert_band_scale = vp8_vertical_band_2_3_scale; + last_vert_band_scale = vp8_last_vertical_band_2_3_scale; + src_band_height = 2; + dest_band_height = 3; + break; case 18: - /* 3-5 Scale in vertical direction */ - vert_band_scale = vp8_vertical_band_3_5_scale; - last_vert_band_scale = vp8_last_vertical_band_3_5_scale; - src_band_height = 3; - dest_band_height = 5; - break; + /* 3-5 Scale in vertical direction */ + vert_band_scale = vp8_vertical_band_3_5_scale; + last_vert_band_scale = vp8_last_vertical_band_3_5_scale; + src_band_height = 3; + dest_band_height = 5; + break; case 15: - /* 1-2 Scale in vertical direction */ - vert_band_scale = vp8_vertical_band_1_2_scale; - last_vert_band_scale = vp8_last_vertical_band_1_2_scale; - src_band_height = 1; - dest_band_height = 2; - break; + /* 1-2 Scale in vertical direction */ + vert_band_scale = vp8_vertical_band_1_2_scale; + last_vert_band_scale = vp8_last_vertical_band_1_2_scale; + src_band_height = 1; + dest_band_height = 2; + break; case 30: - /* no scale in Width direction */ - vert_band_scale = null_scale; - last_vert_band_scale = null_scale; - src_band_height = 4; - dest_band_height = 4; - break; + /* no scale in Width direction */ + vert_band_scale = null_scale; + last_vert_band_scale = null_scale; + src_band_height = 4; + dest_band_height = 4; + break; default: - /* The ratio is not acceptable now */ - /* throw("The ratio is not acceptable for now!"); */ - ratio_scalable = 0; - break; - } + /* The ratio is not acceptable now */ + /* throw("The ratio is not acceptable for now!"); */ + ratio_scalable = 0; + break; + } - if (ratio_scalable == 0) - return ratio_scalable; + if (ratio_scalable == 0) + return ratio_scalable; - horiz_line_scale(source, source_width, dest, dest_width); + horiz_line_scale(source, source_width, dest, dest_width); - /* except last band */ - for (k = 0; k < (dest_height + dest_band_height - 1) / dest_band_height - 1; k++) - { - /* scale one band horizontally */ - for (i = 1; i < src_band_height; i++) - { - /* Trap case where we could read off the base of the source buffer */ - line_src = source + i * source_pitch; + /* except last band */ + for (k = 0; k < (dest_height + dest_band_height - 1) / dest_band_height - 1; k++) { + /* scale one band horizontally */ + for (i = 1; i < src_band_height; i++) { + /* Trap case where we could read off the base of the source buffer */ + line_src = source + i * source_pitch; - if (line_src < source_base) - line_src = source_base; + if (line_src < source_base) + line_src = source_base; - horiz_line_scale(line_src, source_width, - dest + i * dest_pitch, dest_width); - } + horiz_line_scale(line_src, source_width, + dest + i * dest_pitch, dest_width); + } - /* first line of next band */ - /* Trap case where we could read off the base of the source buffer */ - line_src = source + src_band_height * source_pitch; + /* first line of next band */ + /* Trap case where we could read off the base of the source buffer */ + line_src = source + src_band_height * source_pitch; - if (line_src < source_base) - line_src = source_base; + if (line_src < source_base) + line_src = source_base; - horiz_line_scale(line_src, source_width, - dest + dest_band_height * dest_pitch, - dest_width); + horiz_line_scale(line_src, source_width, + dest + dest_band_height * dest_pitch, + dest_width); - /* Vertical scaling is in place */ - vert_band_scale(dest, dest_pitch, dest_width); + /* Vertical scaling is in place */ + vert_band_scale(dest, dest_pitch, dest_width); - /* Next band... */ - source += src_band_height * source_pitch; - dest += dest_band_height * dest_pitch; - } + /* Next band... */ + source += src_band_height * source_pitch; + dest += dest_band_height * dest_pitch; + } - /* scale one band horizontally */ - for (i = 1; i < src_band_height; i++) - { - /* Trap case where we could read off the base of the source buffer */ - line_src = source + i * source_pitch; + /* scale one band horizontally */ + for (i = 1; i < src_band_height; i++) { + /* Trap case where we could read off the base of the source buffer */ + line_src = source + i * source_pitch; - if (line_src < source_base) - line_src = source_base; + if (line_src < source_base) + line_src = source_base; - horiz_line_scale(line_src, source_width, - dest + i * dest_pitch, - dest_width); - } + horiz_line_scale(line_src, source_width, + dest + i * dest_pitch, + dest_width); + } - /* Vertical scaling is in place */ - last_vert_band_scale(dest, dest_pitch, dest_width); + /* Vertical scaling is in place */ + last_vert_band_scale(dest, dest_pitch, dest_width); - return ratio_scalable; + return ratio_scalable; } /**************************************************************************** @@ -879,70 +838,69 @@ int any_ratio_2d_scale * ****************************************************************************/ static -int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset) -{ - int i; - int ew; - int eh; - - /* suggested scale factors */ - int hs = scale_vars->HScale; - int hr = scale_vars->HRatio; - int vs = scale_vars->VScale; - int vr = scale_vars->VRatio; - - int ratio_scalable = 1; - - int sw = (scale_vars->expanded_frame_width * hr + hs - 1) / hs; - int sh = (scale_vars->expanded_frame_height * vr + vs - 1) / vs; - int dw = scale_vars->expanded_frame_width; - int dh = scale_vars->expanded_frame_height; - YV12_BUFFER_CONFIG *src_yuv_config = scale_vars->src_yuv_config; - YV12_BUFFER_CONFIG *dst_yuv_config = scale_vars->dst_yuv_config; - - if (hr == 3) - ew = (sw + 2) / 3 * 3 * hs / hr; - else - ew = (sw + 7) / 8 * 8 * hs / hr; - - if (vr == 3) - eh = (sh + 2) / 3 * 3 * vs / vr; - else - eh = (sh + 7) / 8 * 8 * vs / vr; - - ratio_scalable = any_ratio_2d_scale(scale_vars, - (const unsigned char *)src_yuv_config->y_buffer, - src_yuv_config->y_stride, sw, sh, - (unsigned char *) dst_yuv_config->y_buffer + YOffset, - dst_yuv_config->y_stride, dw, dh); - - for (i = 0; i < eh; i++) - duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride + dw, 0, ew - dw); - - for (i = dh; i < eh; i++) - duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride, 0, ew); - - if (ratio_scalable == 0) - return ratio_scalable; +int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset) { + int i; + int ew; + int eh; + + /* suggested scale factors */ + int hs = scale_vars->HScale; + int hr = scale_vars->HRatio; + int vs = scale_vars->VScale; + int vr = scale_vars->VRatio; + + int ratio_scalable = 1; + + int sw = (scale_vars->expanded_frame_width * hr + hs - 1) / hs; + int sh = (scale_vars->expanded_frame_height * vr + vs - 1) / vs; + int dw = scale_vars->expanded_frame_width; + int dh = scale_vars->expanded_frame_height; + YV12_BUFFER_CONFIG *src_yuv_config = scale_vars->src_yuv_config; + YV12_BUFFER_CONFIG *dst_yuv_config = scale_vars->dst_yuv_config; + + if (hr == 3) + ew = (sw + 2) / 3 * 3 * hs / hr; + else + ew = (sw + 7) / 8 * 8 * hs / hr; + + if (vr == 3) + eh = (sh + 2) / 3 * 3 * vs / vr; + else + eh = (sh + 7) / 8 * 8 * vs / vr; + + ratio_scalable = any_ratio_2d_scale(scale_vars, + (const unsigned char *)src_yuv_config->y_buffer, + src_yuv_config->y_stride, sw, sh, + (unsigned char *) dst_yuv_config->y_buffer + YOffset, + dst_yuv_config->y_stride, dw, dh); + + for (i = 0; i < eh; i++) + duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride + dw, 0, ew - dw); + + for (i = dh; i < eh; i++) + duck_memset(dst_yuv_config->y_buffer + YOffset + i * dst_yuv_config->y_stride, 0, ew); + + if (ratio_scalable == 0) + return ratio_scalable; - sw = (sw + 1) >> 1; - sh = (sh + 1) >> 1; - dw = (dw + 1) >> 1; - dh = (dh + 1) >> 1; + sw = (sw + 1) >> 1; + sh = (sh + 1) >> 1; + dw = (dw + 1) >> 1; + dh = (dh + 1) >> 1; - any_ratio_2d_scale(scale_vars, - (const unsigned char *)src_yuv_config->u_buffer, - src_yuv_config->y_stride / 2, sw, sh, - (unsigned char *)dst_yuv_config->u_buffer + UVOffset, - dst_yuv_config->uv_stride, dw, dh); + any_ratio_2d_scale(scale_vars, + (const unsigned char *)src_yuv_config->u_buffer, + src_yuv_config->y_stride / 2, sw, sh, + (unsigned char *)dst_yuv_config->u_buffer + UVOffset, + dst_yuv_config->uv_stride, dw, dh); - any_ratio_2d_scale(scale_vars, - (const unsigned char *)src_yuv_config->v_buffer, - src_yuv_config->y_stride / 2, sw, sh, - (unsigned char *)dst_yuv_config->v_buffer + UVOffset, - dst_yuv_config->uv_stride, dw, dh); + any_ratio_2d_scale(scale_vars, + (const unsigned char *)src_yuv_config->v_buffer, + src_yuv_config->y_stride / 2, sw, sh, + (unsigned char *)dst_yuv_config->v_buffer + UVOffset, + dst_yuv_config->uv_stride, dw, dh); - return ratio_scalable; + return ratio_scalable; } /**************************************************************************** @@ -961,52 +919,48 @@ int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset) * ****************************************************************************/ static void -center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_config) -{ - int i; - int row_offset, col_offset; - unsigned char *src_data_pointer; - unsigned char *dst_data_pointer; - - /* center values */ - row_offset = (dst_yuv_config->y_height - src_yuv_config->y_height) / 2; - col_offset = (dst_yuv_config->y_width - src_yuv_config->y_width) / 2; - - /* Y's */ - src_data_pointer = src_yuv_config->y_buffer; - dst_data_pointer = (unsigned char *)dst_yuv_config->y_buffer + (row_offset * dst_yuv_config->y_stride) + col_offset; - - for (i = 0; i < src_yuv_config->y_height; i++) - { - duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->y_width); - dst_data_pointer += dst_yuv_config->y_stride; - src_data_pointer += src_yuv_config->y_stride; - } - - row_offset /= 2; - col_offset /= 2; - - /* U's */ - src_data_pointer = src_yuv_config->u_buffer; - dst_data_pointer = (unsigned char *)dst_yuv_config->u_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset; - - for (i = 0; i < src_yuv_config->uv_height; i++) - { - duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width); - dst_data_pointer += dst_yuv_config->uv_stride; - src_data_pointer += src_yuv_config->uv_stride; - } - - /* V's */ - src_data_pointer = src_yuv_config->v_buffer; - dst_data_pointer = (unsigned char *)dst_yuv_config->v_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset; - - for (i = 0; i < src_yuv_config->uv_height; i++) - { - duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width); - dst_data_pointer += dst_yuv_config->uv_stride; - src_data_pointer += src_yuv_config->uv_stride; - } +center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_config) { + int i; + int row_offset, col_offset; + unsigned char *src_data_pointer; + unsigned char *dst_data_pointer; + + /* center values */ + row_offset = (dst_yuv_config->y_height - src_yuv_config->y_height) / 2; + col_offset = (dst_yuv_config->y_width - src_yuv_config->y_width) / 2; + + /* Y's */ + src_data_pointer = src_yuv_config->y_buffer; + dst_data_pointer = (unsigned char *)dst_yuv_config->y_buffer + (row_offset * dst_yuv_config->y_stride) + col_offset; + + for (i = 0; i < src_yuv_config->y_height; i++) { + duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->y_width); + dst_data_pointer += dst_yuv_config->y_stride; + src_data_pointer += src_yuv_config->y_stride; + } + + row_offset /= 2; + col_offset /= 2; + + /* U's */ + src_data_pointer = src_yuv_config->u_buffer; + dst_data_pointer = (unsigned char *)dst_yuv_config->u_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset; + + for (i = 0; i < src_yuv_config->uv_height; i++) { + duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width); + dst_data_pointer += dst_yuv_config->uv_stride; + src_data_pointer += src_yuv_config->uv_stride; + } + + /* V's */ + src_data_pointer = src_yuv_config->v_buffer; + dst_data_pointer = (unsigned char *)dst_yuv_config->v_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset; + + for (i = 0; i < src_yuv_config->uv_height; i++) { + duck_memcpy(dst_data_pointer, src_data_pointer, src_yuv_config->uv_width); + dst_data_pointer += dst_yuv_config->uv_stride; + src_data_pointer += src_yuv_config->uv_stride; + } } /**************************************************************************** @@ -1029,61 +983,58 @@ center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_con void vp8_yv12_scale_or_center ( - YV12_BUFFER_CONFIG *src_yuv_config, - YV12_BUFFER_CONFIG *dst_yuv_config, - int expanded_frame_width, - int expanded_frame_height, - int scaling_mode, - int HScale, - int HRatio, - int VScale, - int VRatio -) -{ - /*if ( ppi->post_processing_level ) - update_umvborder ( ppi, frame_buffer );*/ - - - switch (scaling_mode) - { + YV12_BUFFER_CONFIG *src_yuv_config, + YV12_BUFFER_CONFIG *dst_yuv_config, + int expanded_frame_width, + int expanded_frame_height, + int scaling_mode, + int HScale, + int HRatio, + int VScale, + int VRatio +) { + /*if ( ppi->post_processing_level ) + update_umvborder ( ppi, frame_buffer );*/ + + + switch (scaling_mode) { case SCALE_TO_FIT: - case MAINTAIN_ASPECT_RATIO: - { - SCALE_VARS scale_vars; - /* center values */ + case MAINTAIN_ASPECT_RATIO: { + SCALE_VARS scale_vars; + /* center values */ #if 1 - int row = (dst_yuv_config->y_height - expanded_frame_height) / 2; - int col = (dst_yuv_config->y_width - expanded_frame_width) / 2; - /*int YOffset = row * dst_yuv_config->y_width + col; - int UVOffset = (row>>1) * dst_yuv_config->uv_width + (col>>1);*/ - int YOffset = row * dst_yuv_config->y_stride + col; - int UVOffset = (row >> 1) * dst_yuv_config->uv_stride + (col >> 1); + int row = (dst_yuv_config->y_height - expanded_frame_height) / 2; + int col = (dst_yuv_config->y_width - expanded_frame_width) / 2; + /*int YOffset = row * dst_yuv_config->y_width + col; + int UVOffset = (row>>1) * dst_yuv_config->uv_width + (col>>1);*/ + int YOffset = row * dst_yuv_config->y_stride + col; + int UVOffset = (row >> 1) * dst_yuv_config->uv_stride + (col >> 1); #else - int row = (src_yuv_config->y_height - expanded_frame_height) / 2; - int col = (src_yuv_config->y_width - expanded_frame_width) / 2; - int YOffset = row * src_yuv_config->y_width + col; - int UVOffset = (row >> 1) * src_yuv_config->uv_width + (col >> 1); + int row = (src_yuv_config->y_height - expanded_frame_height) / 2; + int col = (src_yuv_config->y_width - expanded_frame_width) / 2; + int YOffset = row * src_yuv_config->y_width + col; + int UVOffset = (row >> 1) * src_yuv_config->uv_width + (col >> 1); #endif - scale_vars.dst_yuv_config = dst_yuv_config; - scale_vars.src_yuv_config = src_yuv_config; - scale_vars.HScale = HScale; - scale_vars.HRatio = HRatio; - scale_vars.VScale = VScale; - scale_vars.VRatio = VRatio; - scale_vars.expanded_frame_width = expanded_frame_width; - scale_vars.expanded_frame_height = expanded_frame_height; + scale_vars.dst_yuv_config = dst_yuv_config; + scale_vars.src_yuv_config = src_yuv_config; + scale_vars.HScale = HScale; + scale_vars.HRatio = HRatio; + scale_vars.VScale = VScale; + scale_vars.VRatio = VRatio; + scale_vars.expanded_frame_width = expanded_frame_width; + scale_vars.expanded_frame_height = expanded_frame_height; - /* perform center and scale */ - any_ratio_frame_scale(&scale_vars, YOffset, UVOffset); + /* perform center and scale */ + any_ratio_frame_scale(&scale_vars, YOffset, UVOffset); - break; + break; } case CENTER: - center_image(src_yuv_config, dst_yuv_config); - break; + center_image(src_yuv_config, dst_yuv_config); + break; default: - break; - } + break; + } } diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c index eff594e2d..4cb2a4190 100644 --- a/vpx_scale/generic/yv12config.c +++ b/vpx_scale/generic/yv12config.c @@ -20,81 +20,73 @@ * ****************************************************************************/ int -vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) -{ - if (ybf) - { - vpx_free(ybf->buffer_alloc); - - /* buffer_alloc isn't accessed by most functions. Rather y_buffer, - u_buffer and v_buffer point to buffer_alloc and are used. Clear out - all of this so that a freed pointer isn't inadvertently used */ - vpx_memset (ybf, 0, sizeof (YV12_BUFFER_CONFIG)); - } - else - { - return -1; - } - - return 0; +vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) { + if (ybf) { + vpx_free(ybf->buffer_alloc); + + /* buffer_alloc isn't accessed by most functions. Rather y_buffer, + u_buffer and v_buffer point to buffer_alloc and are used. Clear out + all of this so that a freed pointer isn't inadvertently used */ + vpx_memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG)); + } else { + return -1; + } + + return 0; } /**************************************************************************** * ****************************************************************************/ int -vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border) -{ -/*NOTE:*/ - - if (ybf) - { - int y_stride = ((width + 2 * border) + 31) & ~31; - int yplane_size = (height + 2 * border) * y_stride; - int uv_width = width >> 1; - int uv_height = height >> 1; - /** There is currently a bunch of code which assumes - * uv_stride == y_stride/2, so enforce this here. */ - int uv_stride = y_stride >> 1; - int uvplane_size = (uv_height + border) * uv_stride; - - vp8_yv12_de_alloc_frame_buffer(ybf); - - /** Only support allocating buffers that have a height and width that - * are multiples of 16, and a border that's a multiple of 32. - * The border restriction is required to get 16-byte alignment of the - * start of the chroma rows without intoducing an arbitrary gap - * between planes, which would break the semantics of things like - * vpx_img_set_rect(). */ - if ((width & 0xf) | (height & 0xf) | (border & 0x1f)) - return -3; - - ybf->y_width = width; - ybf->y_height = height; - ybf->y_stride = y_stride; - - ybf->uv_width = uv_width; - ybf->uv_height = uv_height; - ybf->uv_stride = uv_stride; - - ybf->border = border; - ybf->frame_size = yplane_size + 2 * uvplane_size; - - ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size); - - if (ybf->buffer_alloc == NULL) - return -1; - - ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border; - ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2; - ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2; - - ybf->corrupted = 0; /* assume not currupted by errors */ - } - else - { - return -2; - } - - return 0; +vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border) { + /*NOTE:*/ + + if (ybf) { + int y_stride = ((width + 2 * border) + 31) & ~31; + int yplane_size = (height + 2 * border) * y_stride; + int uv_width = width >> 1; + int uv_height = height >> 1; + /** There is currently a bunch of code which assumes + * uv_stride == y_stride/2, so enforce this here. */ + int uv_stride = y_stride >> 1; + int uvplane_size = (uv_height + border) * uv_stride; + + vp8_yv12_de_alloc_frame_buffer(ybf); + + /** Only support allocating buffers that have a height and width that + * are multiples of 16, and a border that's a multiple of 32. + * The border restriction is required to get 16-byte alignment of the + * start of the chroma rows without intoducing an arbitrary gap + * between planes, which would break the semantics of things like + * vpx_img_set_rect(). */ + if ((width & 0xf) | (height & 0xf) | (border & 0x1f)) + return -3; + + ybf->y_width = width; + ybf->y_height = height; + ybf->y_stride = y_stride; + + ybf->uv_width = uv_width; + ybf->uv_height = uv_height; + ybf->uv_stride = uv_stride; + + ybf->border = border; + ybf->frame_size = yplane_size + 2 * uvplane_size; + + ybf->buffer_alloc = (unsigned char *) vpx_memalign(32, ybf->frame_size); + + if (ybf->buffer_alloc == NULL) + return -1; + + ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border; + ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2; + ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2; + + ybf->corrupted = 0; /* assume not currupted by errors */ + } else { + return -2; + } + + return 0; } diff --git a/vpx_scale/generic/yv12extend.c b/vpx_scale/generic/yv12extend.c index c087bdd78..cac8ae9b4 100644 --- a/vpx_scale/generic/yv12extend.c +++ b/vpx_scale/generic/yv12extend.c @@ -21,184 +21,174 @@ * ****************************************************************************/ void -vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf) -{ - int i; - unsigned char *src_ptr1, *src_ptr2; - unsigned char *dest_ptr1, *dest_ptr2; - - unsigned int Border; - int plane_stride; - int plane_height; - int plane_width; - - /***********/ - /* Y Plane */ - /***********/ - Border = ybf->border; - plane_stride = ybf->y_stride; - plane_height = ybf->y_height; - plane_width = ybf->y_width; - - /* copy the left and right most columns out */ - src_ptr1 = ybf->y_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - vpx_memset(dest_ptr1, src_ptr1[0], Border); - vpx_memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /* Now copy the top and bottom source lines into each line of the respective borders */ - src_ptr1 = ybf->y_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)Border; i++) - { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - - /***********/ - /* U Plane */ - /***********/ - plane_stride = ybf->uv_stride; - plane_height = ybf->uv_height; - plane_width = ybf->uv_width; - Border /= 2; - - /* copy the left and right most columns out */ - src_ptr1 = ybf->u_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - vpx_memset(dest_ptr1, src_ptr1[0], Border); - vpx_memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /* Now copy the top and bottom source lines into each line of the respective borders */ - src_ptr1 = ybf->u_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)(Border); i++) - { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /***********/ - /* V Plane */ - /***********/ - - /* copy the left and right most columns out */ - src_ptr1 = ybf->v_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - vpx_memset(dest_ptr1, src_ptr1[0], Border); - vpx_memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /* Now copy the top and bottom source lines into each line of the respective borders */ - src_ptr1 = ybf->v_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)(Border); i++) - { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } +vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf) { + int i; + unsigned char *src_ptr1, *src_ptr2; + unsigned char *dest_ptr1, *dest_ptr2; + + unsigned int Border; + int plane_stride; + int plane_height; + int plane_width; + + /***********/ + /* Y Plane */ + /***********/ + Border = ybf->border; + plane_stride = ybf->y_stride; + plane_height = ybf->y_height; + plane_width = ybf->y_width; + + /* copy the left and right most columns out */ + src_ptr1 = ybf->y_buffer; + src_ptr2 = src_ptr1 + plane_width - 1; + dest_ptr1 = src_ptr1 - Border; + dest_ptr2 = src_ptr2 + 1; + + for (i = 0; i < plane_height; i++) { + vpx_memset(dest_ptr1, src_ptr1[0], Border); + vpx_memset(dest_ptr2, src_ptr2[0], Border); + src_ptr1 += plane_stride; + src_ptr2 += plane_stride; + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } + + /* Now copy the top and bottom source lines into each line of the respective borders */ + src_ptr1 = ybf->y_buffer - Border; + src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; + dest_ptr1 = src_ptr1 - (Border * plane_stride); + dest_ptr2 = src_ptr2 + plane_stride; + + for (i = 0; i < (int)Border; i++) { + vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); + vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } + + + /***********/ + /* U Plane */ + /***********/ + plane_stride = ybf->uv_stride; + plane_height = ybf->uv_height; + plane_width = ybf->uv_width; + Border /= 2; + + /* copy the left and right most columns out */ + src_ptr1 = ybf->u_buffer; + src_ptr2 = src_ptr1 + plane_width - 1; + dest_ptr1 = src_ptr1 - Border; + dest_ptr2 = src_ptr2 + 1; + + for (i = 0; i < plane_height; i++) { + vpx_memset(dest_ptr1, src_ptr1[0], Border); + vpx_memset(dest_ptr2, src_ptr2[0], Border); + src_ptr1 += plane_stride; + src_ptr2 += plane_stride; + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } + + /* Now copy the top and bottom source lines into each line of the respective borders */ + src_ptr1 = ybf->u_buffer - Border; + src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; + dest_ptr1 = src_ptr1 - (Border * plane_stride); + dest_ptr2 = src_ptr2 + plane_stride; + + for (i = 0; i < (int)(Border); i++) { + vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); + vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } + + /***********/ + /* V Plane */ + /***********/ + + /* copy the left and right most columns out */ + src_ptr1 = ybf->v_buffer; + src_ptr2 = src_ptr1 + plane_width - 1; + dest_ptr1 = src_ptr1 - Border; + dest_ptr2 = src_ptr2 + 1; + + for (i = 0; i < plane_height; i++) { + vpx_memset(dest_ptr1, src_ptr1[0], Border); + vpx_memset(dest_ptr2, src_ptr2[0], Border); + src_ptr1 += plane_stride; + src_ptr2 += plane_stride; + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } + + /* Now copy the top and bottom source lines into each line of the respective borders */ + src_ptr1 = ybf->v_buffer - Border; + src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; + dest_ptr1 = src_ptr1 - (Border * plane_stride); + dest_ptr2 = src_ptr2 + plane_stride; + + for (i = 0; i < (int)(Border); i++) { + vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); + vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } } static void -extend_frame_borders_yonly(YV12_BUFFER_CONFIG *ybf) -{ - int i; - unsigned char *src_ptr1, *src_ptr2; - unsigned char *dest_ptr1, *dest_ptr2; - - unsigned int Border; - int plane_stride; - int plane_height; - int plane_width; - - /***********/ - /* Y Plane */ - /***********/ - Border = ybf->border; - plane_stride = ybf->y_stride; - plane_height = ybf->y_height; - plane_width = ybf->y_width; - - /* copy the left and right most columns out */ - src_ptr1 = ybf->y_buffer; - src_ptr2 = src_ptr1 + plane_width - 1; - dest_ptr1 = src_ptr1 - Border; - dest_ptr2 = src_ptr2 + 1; - - for (i = 0; i < plane_height; i++) - { - vpx_memset(dest_ptr1, src_ptr1[0], Border); - vpx_memset(dest_ptr2, src_ptr2[0], Border); - src_ptr1 += plane_stride; - src_ptr2 += plane_stride; - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - /* Now copy the top and bottom source lines into each line of the respective borders */ - src_ptr1 = ybf->y_buffer - Border; - src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; - dest_ptr1 = src_ptr1 - (Border * plane_stride); - dest_ptr2 = src_ptr2 + plane_stride; - - for (i = 0; i < (int)Border; i++) - { - vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); - vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); - dest_ptr1 += plane_stride; - dest_ptr2 += plane_stride; - } - - plane_stride /= 2; - plane_height /= 2; - plane_width /= 2; - Border /= 2; +extend_frame_borders_yonly(YV12_BUFFER_CONFIG *ybf) { + int i; + unsigned char *src_ptr1, *src_ptr2; + unsigned char *dest_ptr1, *dest_ptr2; + + unsigned int Border; + int plane_stride; + int plane_height; + int plane_width; + + /***********/ + /* Y Plane */ + /***********/ + Border = ybf->border; + plane_stride = ybf->y_stride; + plane_height = ybf->y_height; + plane_width = ybf->y_width; + + /* copy the left and right most columns out */ + src_ptr1 = ybf->y_buffer; + src_ptr2 = src_ptr1 + plane_width - 1; + dest_ptr1 = src_ptr1 - Border; + dest_ptr2 = src_ptr2 + 1; + + for (i = 0; i < plane_height; i++) { + vpx_memset(dest_ptr1, src_ptr1[0], Border); + vpx_memset(dest_ptr2, src_ptr2[0], Border); + src_ptr1 += plane_stride; + src_ptr2 += plane_stride; + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } + + /* Now copy the top and bottom source lines into each line of the respective borders */ + src_ptr1 = ybf->y_buffer - Border; + src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; + dest_ptr1 = src_ptr1 - (Border * plane_stride); + dest_ptr2 = src_ptr2 + plane_stride; + + for (i = 0; i < (int)Border; i++) { + vpx_memcpy(dest_ptr1, src_ptr1, plane_stride); + vpx_memcpy(dest_ptr2, src_ptr2, plane_stride); + dest_ptr1 += plane_stride; + dest_ptr2 += plane_stride; + } + + plane_stride /= 2; + plane_height /= 2; + plane_width /= 2; + Border /= 2; } @@ -221,60 +211,54 @@ extend_frame_borders_yonly(YV12_BUFFER_CONFIG *ybf) * ****************************************************************************/ void -vp8_yv12_copy_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) -{ - int row; - unsigned char *source, *dest; - - source = src_ybc->y_buffer; - dest = dst_ybc->y_buffer; - - for (row = 0; row < src_ybc->y_height; row++) - { - vpx_memcpy(dest, source, src_ybc->y_width); - source += src_ybc->y_stride; - dest += dst_ybc->y_stride; - } - - source = src_ybc->u_buffer; - dest = dst_ybc->u_buffer; - - for (row = 0; row < src_ybc->uv_height; row++) - { - vpx_memcpy(dest, source, src_ybc->uv_width); - source += src_ybc->uv_stride; - dest += dst_ybc->uv_stride; - } - - source = src_ybc->v_buffer; - dest = dst_ybc->v_buffer; - - for (row = 0; row < src_ybc->uv_height; row++) - { - vpx_memcpy(dest, source, src_ybc->uv_width); - source += src_ybc->uv_stride; - dest += dst_ybc->uv_stride; - } - - vp8_yv12_extend_frame_borders_ptr(dst_ybc); +vp8_yv12_copy_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) { + int row; + unsigned char *source, *dest; + + source = src_ybc->y_buffer; + dest = dst_ybc->y_buffer; + + for (row = 0; row < src_ybc->y_height; row++) { + vpx_memcpy(dest, source, src_ybc->y_width); + source += src_ybc->y_stride; + dest += dst_ybc->y_stride; + } + + source = src_ybc->u_buffer; + dest = dst_ybc->u_buffer; + + for (row = 0; row < src_ybc->uv_height; row++) { + vpx_memcpy(dest, source, src_ybc->uv_width); + source += src_ybc->uv_stride; + dest += dst_ybc->uv_stride; + } + + source = src_ybc->v_buffer; + dest = dst_ybc->v_buffer; + + for (row = 0; row < src_ybc->uv_height; row++) { + vpx_memcpy(dest, source, src_ybc->uv_width); + source += src_ybc->uv_stride; + dest += dst_ybc->uv_stride; + } + + vp8_yv12_extend_frame_borders_ptr(dst_ybc); } void -vp8_yv12_copy_frame_yonly(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) -{ - int row; - unsigned char *source, *dest; +vp8_yv12_copy_frame_yonly(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) { + int row; + unsigned char *source, *dest; - source = src_ybc->y_buffer; - dest = dst_ybc->y_buffer; + source = src_ybc->y_buffer; + dest = dst_ybc->y_buffer; - for (row = 0; row < src_ybc->y_height; row++) - { - vpx_memcpy(dest, source, src_ybc->y_width); - source += src_ybc->y_stride; - dest += dst_ybc->y_stride; - } + for (row = 0; row < src_ybc->y_height; row++) { + vpx_memcpy(dest, source, src_ybc->y_width); + source += src_ybc->y_stride; + dest += dst_ybc->y_stride; + } - extend_frame_borders_yonly(dst_ybc); + extend_frame_borders_yonly(dst_ybc); } diff --git a/vpx_scale/include/generic/vpxscale_arbitrary.h b/vpx_scale/include/generic/vpxscale_arbitrary.h index 39de1816b..c53525271 100644 --- a/vpx_scale/include/generic/vpxscale_arbitrary.h +++ b/vpx_scale/include/generic/vpxscale_arbitrary.h @@ -14,33 +14,32 @@ #include "vpx_scale/yv12config.h" -typedef struct -{ - int in_width; - int in_height; - - int out_width; - int out_height; - int max_usable_out_width; - - // numerator for the width and height - int nw; - int nh; - int nh_uv; - - // output to input correspondance array - short *l_w; - short *l_h; - short *l_h_uv; - - // polyphase coefficients - short *c_w; - short *c_h; - short *c_h_uv; - - // buffer for horizontal filtering. - unsigned char *hbuf; - unsigned char *hbuf_uv; +typedef struct { + int in_width; + int in_height; + + int out_width; + int out_height; + int max_usable_out_width; + + // numerator for the width and height + int nw; + int nh; + int nh_uv; + + // output to input correspondance array + short *l_w; + short *l_h; + short *l_h_uv; + + // polyphase coefficients + short *c_w; + short *c_h; + short *c_h_uv; + + // buffer for horizontal filtering. + unsigned char *hbuf; + unsigned char *hbuf_uv; } BICUBIC_SCALER_STRUCT; int bicubic_coefficient_setup(int in_width, int in_height, int out_width, int out_height); diff --git a/vpx_scale/scale_mode.h b/vpx_scale/scale_mode.h index 1476e641b..5581385b6 100644 --- a/vpx_scale/scale_mode.h +++ b/vpx_scale/scale_mode.h @@ -17,12 +17,11 @@ #ifndef SCALE_MODE_H #define SCALE_MODE_H -typedef enum -{ - MAINTAIN_ASPECT_RATIO = 0x0, - SCALE_TO_FIT = 0x1, - CENTER = 0x2, - OTHER = 0x3 +typedef enum { + MAINTAIN_ASPECT_RATIO = 0x0, + SCALE_TO_FIT = 0x1, + CENTER = 0x2, + OTHER = 0x3 } SCALE_MODE; diff --git a/vpx_scale/vpxscale.h b/vpx_scale/vpxscale.h index 86fc128da..899a50a89 100644 --- a/vpx_scale/vpxscale.h +++ b/vpx_scale/vpxscale.h @@ -75,27 +75,27 @@ void vertical_band_1_2_scale_armv4(unsigned char *dest, unsigned int dest_pitch, extern void dmachine_specific_config(int mmx_enabled, int xmm_enabled, int wmt_enabled); extern void vp8_yv12_scale_or_center ( - YV12_BUFFER_CONFIG *src_yuv_config, - YV12_BUFFER_CONFIG *dst_yuv_config, - int expanded_frame_width, - int expanded_frame_height, - int scaling_mode, - int HScale, - int HRatio, - int VScale, - int VRatio + YV12_BUFFER_CONFIG *src_yuv_config, + YV12_BUFFER_CONFIG *dst_yuv_config, + int expanded_frame_width, + int expanded_frame_height, + int scaling_mode, + int HScale, + int HRatio, + int VScale, + int VRatio ); extern void vp8_scale_frame ( - YV12_BUFFER_CONFIG *src, - YV12_BUFFER_CONFIG *dst, - unsigned char *temp_area, - unsigned char temp_height, - unsigned int hscale, - unsigned int hratio, - unsigned int vscale, - unsigned int vratio, - unsigned int interlaced + YV12_BUFFER_CONFIG *src, + YV12_BUFFER_CONFIG *dst, + unsigned char *temp_area, + unsigned char temp_height, + unsigned int hscale, + unsigned int hratio, + unsigned int vscale, + unsigned int vratio, + unsigned int interlaced ); extern void vp8_scale_machine_specific_config(void); diff --git a/vpx_scale/win32/scaleopt.c b/vpx_scale/win32/scaleopt.c index 3711fe5eb..2d96cc7c1 100644 --- a/vpx_scale/win32/scaleopt.c +++ b/vpx_scale/win32/scaleopt.c @@ -61,114 +61,112 @@ __declspec(align(16)) const static unsigned short const35_1[] = { 102, 205, 51, static void horizontal_line_3_5_scale_mmx ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - (void) dest_width; + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + (void) dest_width; - __asm - { + __asm { - push ebx + push ebx - mov esi, source - mov edi, dest + mov esi, source + mov edi, dest - mov ecx, source_width - lea edx, [esi+ecx-3]; + mov ecx, source_width + lea edx, [esi+ecx-3]; - movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx - movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx + movq mm5, const35_1 // mm5 = 66 xx cd xx 33 xx 9a xx + movq mm6, const35_2 // mm6 = 9a xx 33 xx cd xx 66 xx - movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx - pxor mm7, mm7 // clear mm7 + movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx + pxor mm7, mm7 // clear mm7 - horiz_line_3_5_loop: + horiz_line_3_5_loop: - mov eax, DWORD PTR [esi] // eax = 00 01 02 03 - mov ebx, eax + mov eax, DWORD PTR [esi] // eax = 00 01 02 03 + mov ebx, eax - and ebx, 0xffff00 // ebx = xx 01 02 xx - mov ecx, eax // ecx = 00 01 02 03 + and ebx, 0xffff00 // ebx = xx 01 02 xx + mov ecx, eax // ecx = 00 01 02 03 - and eax, 0xffff0000 // eax = xx xx 02 03 - xor ecx, eax // ecx = 00 01 xx xx + and eax, 0xffff0000 // eax = xx xx 02 03 + xor ecx, eax // ecx = 00 01 xx xx - shr ebx, 8 // ebx = 01 02 xx xx - or eax, ebx // eax = 01 02 02 03 + shr ebx, 8 // ebx = 01 02 xx xx + or eax, ebx // eax = 01 02 02 03 - shl ebx, 16 // ebx = xx xx 01 02 - movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx + shl ebx, 16 // ebx = xx xx 01 02 + movd mm1, eax // mm1 = 01 02 02 03 xx xx xx xx - or ebx, ecx // ebx = 00 01 01 02 - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx + or ebx, ecx // ebx = 00 01 01 02 + punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 03 xx - movd mm0, ebx // mm0 = 00 01 01 02 - pmullw mm1, mm6 // + movd mm0, ebx // mm0 = 00 01 01 02 + pmullw mm1, mm6 // - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx - pmullw mm0, mm5 // + punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx + pmullw mm0, mm5 // - mov [edi], ebx // writeoutput 00 xx xx xx - add esi, 3 + mov [edi], ebx // writeoutput 00 xx xx xx + add esi, 3 - add edi, 5 - paddw mm0, mm1 + add edi, 5 + paddw mm0, mm1 - paddw mm0, mm4 - psrlw mm0, 8 + paddw mm0, mm4 + psrlw mm0, 8 - cmp esi, edx - packuswb mm0, mm7 + cmp esi, edx + packuswb mm0, mm7 - movd DWORD Ptr [edi-4], mm0 - jl horiz_line_3_5_loop + movd DWORD Ptr [edi-4], mm0 + jl horiz_line_3_5_loop -//Exit: - mov eax, DWORD PTR [esi] // eax = 00 01 02 03 - mov ebx, eax +// Exit: + mov eax, DWORD PTR [esi] // eax = 00 01 02 03 + mov ebx, eax - and ebx, 0xffff00 // ebx = xx 01 02 xx - mov ecx, eax // ecx = 00 01 02 03 + and ebx, 0xffff00 // ebx = xx 01 02 xx + mov ecx, eax // ecx = 00 01 02 03 - and eax, 0xffff0000 // eax = xx xx 02 03 - xor ecx, eax // ecx = 00 01 xx xx + and eax, 0xffff0000 // eax = xx xx 02 03 + xor ecx, eax // ecx = 00 01 xx xx - shr ebx, 8 // ebx = 01 02 xx xx - or eax, ebx // eax = 01 02 02 03 + shr ebx, 8 // ebx = 01 02 xx xx + or eax, ebx // eax = 01 02 02 03 - shl eax, 8 // eax = xx 01 02 02 - and eax, 0xffff0000 // eax = xx xx 02 02 + shl eax, 8 // eax = xx 01 02 02 + and eax, 0xffff0000 // eax = xx xx 02 02 - or eax, ebx // eax = 01 02 02 02 + or eax, ebx // eax = 01 02 02 02 - shl ebx, 16 // ebx = xx xx 01 02 - movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx + shl ebx, 16 // ebx = xx xx 01 02 + movd mm1, eax // mm1 = 01 02 02 02 xx xx xx xx - or ebx, ecx // ebx = 00 01 01 02 - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx + or ebx, ecx // ebx = 00 01 01 02 + punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 02 xx 02 xx - movd mm0, ebx // mm0 = 00 01 01 02 - pmullw mm1, mm6 // + movd mm0, ebx // mm0 = 00 01 01 02 + pmullw mm1, mm6 // - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx - pmullw mm0, mm5 // + punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 01 xx 02 xx + pmullw mm0, mm5 // - mov [edi], ebx // writeoutput 00 xx xx xx - paddw mm0, mm1 + mov [edi], ebx // writeoutput 00 xx xx xx + paddw mm0, mm1 - paddw mm0, mm4 - psrlw mm0, 8 + paddw mm0, mm4 + psrlw mm0, 8 - packuswb mm0, mm7 - movd DWORD Ptr [edi+1], mm0 + packuswb mm0, mm7 + movd DWORD Ptr [edi+1], mm0 - pop ebx + pop ebx - } + } } @@ -194,120 +192,118 @@ void horizontal_line_3_5_scale_mmx static void horizontal_line_4_5_scale_mmx ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - (void)dest_width; + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + (void)dest_width; - __asm - { + __asm { - mov esi, source - mov edi, dest + mov esi, source + mov edi, dest - mov ecx, source_width - lea edx, [esi+ecx-8]; + mov ecx, source_width + lea edx, [esi+ecx-8]; - movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx - movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx + movq mm5, const45_1 // mm5 = 33 xx 66 xx 9a xx cd xx + movq mm6, const45_2 // mm6 = cd xx 9a xx 66 xx 33 xx - movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx - pxor mm7, mm7 // clear mm7 + movq mm4, round_values // mm4 = 80 xx 80 xx 80 xx 80 xx + pxor mm7, mm7 // clear mm7 - horiz_line_4_5_loop: + horiz_line_4_5_loop: - movq mm0, QWORD PTR [esi] // mm0 = 00 01 02 03 04 05 06 07 - movq mm1, QWORD PTR [esi+1]; // mm1 = 01 02 03 04 05 06 07 08 + movq mm0, QWORD PTR [esi] // mm0 = 00 01 02 03 04 05 06 07 + movq mm1, QWORD PTR [esi+1]; // mm1 = 01 02 03 04 05 06 07 08 - movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 - movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08 + movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 + movq mm3, mm1 // mm3 = 01 02 03 04 05 06 07 08 - movd DWORD PTR [edi], mm0 // write output 00 xx xx xx - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx + movd DWORD PTR [edi], mm0 // write output 00 xx xx xx + punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx - pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 + punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx + pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 - pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 - punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx + pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 + punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx - movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx - pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 + movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx + pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 - punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx - pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51 + punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx + pmullw mm3, mm6 // 05*205 06*154 07*102 08* 51 - paddw mm0, mm1 // added round values - paddw mm0, mm4 + paddw mm0, mm1 // added round values + paddw mm0, mm4 - psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx - packuswb mm0, mm7 + psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx + packuswb mm0, mm7 - movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 - add edi, 10 + movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 + add edi, 10 - add esi, 8 - paddw mm2, mm3 // + add esi, 8 + paddw mm2, mm3 // - paddw mm2, mm4 // added round values - cmp esi, edx + paddw mm2, mm4 // added round values + cmp esi, edx - psrlw mm2, 8 - packuswb mm2, mm7 + psrlw mm2, 8 + packuswb mm2, mm7 - movd DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09 - jl horiz_line_4_5_loop + movd DWORD PTR [edi-4], mm2 // writeoutput 06 07 08 09 + jl horiz_line_4_5_loop -//Exit: - movq mm0, [esi] // mm0 = 00 01 02 03 04 05 06 07 - movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07 +// Exit: + movq mm0, [esi] // mm0 = 00 01 02 03 04 05 06 07 + movq mm1, mm0 // mm1 = 00 01 02 03 04 05 06 07 - movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 - psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00 + movq mm2, mm0 // mm2 = 00 01 02 03 04 05 06 07 + psrlq mm1, 8 // mm1 = 01 02 03 04 05 06 07 00 - movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00 - pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00 + movq mm3, mask45 // mm3 = 00 00 00 00 00 00 ff 00 + pand mm3, mm1 // mm3 = 00 00 00 00 00 00 07 00 - psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07 - por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07 + psllq mm3, 8 // mm3 = 00 00 00 00 00 00 00 07 + por mm1, mm3 // mm1 = 01 02 03 04 05 06 07 07 - movq mm3, mm1 + movq mm3, mm1 - movd DWORD PTR [edi], mm0 // write output 00 xx xx xx - punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx + movd DWORD PTR [edi], mm0 // write output 00 xx xx xx + punpcklbw mm0, mm7 // mm0 = 00 xx 01 xx 02 xx 03 xx - punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx - pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 + punpcklbw mm1, mm7 // mm1 = 01 xx 02 xx 03 xx 04 xx + pmullw mm0, mm5 // 00* 51 01*102 02*154 03*205 - pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 - punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx + pmullw mm1, mm6 // 01*205 02*154 03*102 04* 51 + punpckhbw mm2, mm7 // mm2 = 04 xx 05 xx 06 xx 07 xx - movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx - pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 + movd DWORD PTR [edi+5], mm2 // write ouput 05 xx xx xx + pmullw mm2, mm5 // 04* 51 05*102 06*154 07*205 - punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx - pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51 + punpckhbw mm3, mm7 // mm3 = 05 xx 06 xx 07 xx 08 xx + pmullw mm3, mm6 // 05*205 06*154 07*102 07* 51 - paddw mm0, mm1 // added round values - paddw mm0, mm4 + paddw mm0, mm1 // added round values + paddw mm0, mm4 - psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx - packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx + psrlw mm0, 8 // output: 01 xx 02 xx 03 xx 04 xx + packuswb mm0, mm7 // 01 02 03 04 xx xx xx xx - movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 - paddw mm2, mm3 // + movd DWORD PTR [edi+1], mm0 // write output 01 02 03 04 + paddw mm2, mm3 // - paddw mm2, mm4 // added round values - psrlw mm2, 8 + paddw mm2, mm4 // added round values + psrlw mm2, 8 - packuswb mm2, mm7 - movd DWORD PTR [edi+6], mm2 // writeoutput 06 07 08 09 + packuswb mm2, mm7 + movd DWORD PTR [edi+6], mm2 // writeoutput 06 07 08 09 - } + } } /**************************************************************************** @@ -332,167 +328,165 @@ void horizontal_line_4_5_scale_mmx static void vertical_band_4_5_scale_mmx ( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width +) { + __asm { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size + mov esi, dest // Get the source and destination pointer + mov ecx, dest_pitch // Get the pitch size - lea edi, [esi+ecx*2] // tow lines below - add edi, ecx // three lines below + lea edi, [esi+ecx*2] // tow lines below + add edi, ecx // three lines below - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter + pxor mm7, mm7 // clear out mm7 + mov edx, dest_width // Loop counter - vs_4_5_loop: + vs_4_5_loop: - movq mm0, QWORD ptr [esi] // src[0]; - movq mm1, QWORD ptr [esi+ecx] // src[1]; + movq mm0, QWORD ptr [esi] // src[0]; + movq mm1, QWORD ptr [esi+ecx] // src[1]; - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word + movq mm2, mm0 // Make a copy + punpcklbw mm0, mm7 // unpack low to word - movq mm5, one_fifth - punpckhbw mm2, mm7 // unpack high to word + movq mm5, one_fifth + punpckhbw mm2, mm7 // unpack high to word - pmullw mm0, mm5 // a * 1/5 + pmullw mm0, mm5 // a * 1/5 - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word + movq mm3, mm1 // make a copy + punpcklbw mm1, mm7 // unpack low to word - pmullw mm2, mm5 // a * 1/5 - movq mm6, four_fifths // constan + pmullw mm2, mm5 // a * 1/5 + movq mm6, four_fifths // constan - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 4/5 + movq mm4, mm1 // copy of low b + pmullw mm4, mm6 // b * 4/5 - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b + punpckhbw mm3, mm7 // unpack high to word + movq mm5, mm3 // copy of high b - pmullw mm5, mm6 // b * 4/5 - paddw mm0, mm4 // a * 1/5 + b * 4/5 + pmullw mm5, mm6 // b * 4/5 + paddw mm0, mm4 // a * 1/5 + b * 4/5 - paddw mm2, mm5 // a * 1/5 + b * 4/5 - paddw mm0, round_values // + 128 + paddw mm2, mm5 // a * 1/5 + b * 4/5 + paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - psrlw mm0, 8 + paddw mm2, round_values // + 128 + psrlw mm0, 8 - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] + psrlw mm2, 8 + packuswb mm0, mm2 // des [1] - movq QWORD ptr [esi+ecx], mm0 // write des[1] - movq mm0, [esi+ecx*2] // mm0 = src[2] + movq QWORD ptr [esi+ecx], mm0 // write des[1] + movq mm0, [esi+ecx*2] // mm0 = src[2] - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking + // mm1, mm3 --- Src[1] + // mm0 --- Src[2] + // mm7 for unpacking - movq mm5, two_fifths - movq mm2, mm0 // make a copy + movq mm5, two_fifths + movq mm2, mm0 // make a copy - pmullw mm1, mm5 // b * 2/5 - movq mm6, three_fifths + pmullw mm1, mm5 // b * 2/5 + movq mm6, three_fifths - punpcklbw mm0, mm7 // unpack low to word - pmullw mm3, mm5 // b * 2/5 + punpcklbw mm0, mm7 // unpack low to word + pmullw mm3, mm5 // b * 2/5 - movq mm4, mm0 // make copy of c - punpckhbw mm2, mm7 // unpack high to word + movq mm4, mm0 // make copy of c + punpckhbw mm2, mm7 // unpack high to word - pmullw mm4, mm6 // c * 3/5 - movq mm5, mm2 + pmullw mm4, mm6 // c * 3/5 + movq mm5, mm2 - pmullw mm5, mm6 // c * 3/5 - paddw mm1, mm4 // b * 2/5 + c * 3/5 + pmullw mm5, mm6 // c * 3/5 + paddw mm1, mm4 // b * 2/5 + c * 3/5 - paddw mm3, mm5 // b * 2/5 + c * 3/5 - paddw mm1, round_values // + 128 + paddw mm3, mm5 // b * 2/5 + c * 3/5 + paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 - psrlw mm1, 8 + paddw mm3, round_values // + 128 + psrlw mm1, 8 - psrlw mm3, 8 - packuswb mm1, mm3 // des[2] + psrlw mm3, 8 + packuswb mm1, mm3 // des[2] - movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - movq mm1, [edi] // mm1=Src[3]; + movq QWORD ptr [esi+ecx*2], mm1 // write des[2] + movq mm1, [edi] // mm1=Src[3]; - // mm0, mm2 --- Src[2] - // mm1 --- Src[3] - // mm6 --- 3/5 - // mm7 for unpacking + // mm0, mm2 --- Src[2] + // mm1 --- Src[3] + // mm6 --- 3/5 + // mm7 for unpacking - pmullw mm0, mm6 // c * 3/5 - movq mm5, two_fifths // mm5 = 2/5 + pmullw mm0, mm6 // c * 3/5 + movq mm5, two_fifths // mm5 = 2/5 - movq mm3, mm1 // make a copy - pmullw mm2, mm6 // c * 3/5 + movq mm3, mm1 // make a copy + pmullw mm2, mm6 // c * 3/5 - punpcklbw mm1, mm7 // unpack low - movq mm4, mm1 // make a copy + punpcklbw mm1, mm7 // unpack low + movq mm4, mm1 // make a copy - punpckhbw mm3, mm7 // unpack high - pmullw mm4, mm5 // d * 2/5 + punpckhbw mm3, mm7 // unpack high + pmullw mm4, mm5 // d * 2/5 - movq mm6, mm3 // make a copy - pmullw mm6, mm5 // d * 2/5 + movq mm6, mm3 // make a copy + pmullw mm6, mm5 // d * 2/5 - paddw mm0, mm4 // c * 3/5 + d * 2/5 - paddw mm2, mm6 // c * 3/5 + d * 2/5 + paddw mm0, mm4 // c * 3/5 + d * 2/5 + paddw mm2, mm6 // c * 3/5 + d * 2/5 - paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 + paddw mm0, round_values // + 128 + paddw mm2, round_values // + 128 - psrlw mm0, 8 - psrlw mm2, 8 + psrlw mm0, 8 + psrlw mm2, 8 - packuswb mm0, mm2 // des[3] - movq QWORD ptr [edi], mm0 // write des[3] + packuswb mm0, mm2 // des[3] + movq QWORD ptr [edi], mm0 // write des[3] - // mm1, mm3 --- Src[3] - // mm7 -- cleared for unpacking + // mm1, mm3 --- Src[3] + // mm7 -- cleared for unpacking - movq mm0, [edi+ecx*2] // mm0, Src[0] of the next group + movq mm0, [edi+ecx*2] // mm0, Src[0] of the next group - movq mm5, four_fifths // mm5 = 4/5 - pmullw mm1, mm5 // d * 4/5 + movq mm5, four_fifths // mm5 = 4/5 + pmullw mm1, mm5 // d * 4/5 - movq mm6, one_fifth // mm6 = 1/5 - movq mm2, mm0 // make a copy + movq mm6, one_fifth // mm6 = 1/5 + movq mm2, mm0 // make a copy - pmullw mm3, mm5 // d * 4/5 - punpcklbw mm0, mm7 // unpack low + pmullw mm3, mm5 // d * 4/5 + punpcklbw mm0, mm7 // unpack low - pmullw mm0, mm6 // an * 1/5 - punpckhbw mm2, mm7 // unpack high + pmullw mm0, mm6 // an * 1/5 + punpckhbw mm2, mm7 // unpack high - paddw mm1, mm0 // d * 4/5 + an * 1/5 - pmullw mm2, mm6 // an * 1/5 + paddw mm1, mm0 // d * 4/5 + an * 1/5 + pmullw mm2, mm6 // an * 1/5 - paddw mm3, mm2 // d * 4/5 + an * 1/5 - paddw mm1, round_values // + 128 + paddw mm3, mm2 // d * 4/5 + an * 1/5 + paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 - psrlw mm1, 8 + paddw mm3, round_values // + 128 + psrlw mm1, 8 - psrlw mm3, 8 - packuswb mm1, mm3 // des[4] + psrlw mm3, 8 + packuswb mm1, mm3 // des[4] - movq QWORD ptr [edi+ecx], mm1 // write des[4] + movq QWORD ptr [edi+ecx], mm1 // write des[4] - add edi, 8 - add esi, 8 + add edi, 8 + add esi, 8 - sub edx, 8 - jg vs_4_5_loop - } + sub edx, 8 + jg vs_4_5_loop + } } /**************************************************************************** @@ -517,139 +511,137 @@ void vertical_band_4_5_scale_mmx static void last_vertical_band_4_5_scale_mmx ( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width +) { + __asm { + mov esi, dest // Get the source and destination pointer + mov ecx, dest_pitch // Get the pitch size - lea edi, [esi+ecx*2] // tow lines below - add edi, ecx // three lines below + lea edi, [esi+ecx*2] // tow lines below + add edi, ecx // three lines below - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter + pxor mm7, mm7 // clear out mm7 + mov edx, dest_width // Loop counter - last_vs_4_5_loop: + last_vs_4_5_loop: - movq mm0, QWORD ptr [esi] // src[0]; - movq mm1, QWORD ptr [esi+ecx] // src[1]; + movq mm0, QWORD ptr [esi] // src[0]; + movq mm1, QWORD ptr [esi+ecx] // src[1]; - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word + movq mm2, mm0 // Make a copy + punpcklbw mm0, mm7 // unpack low to word - movq mm5, one_fifth - punpckhbw mm2, mm7 // unpack high to word + movq mm5, one_fifth + punpckhbw mm2, mm7 // unpack high to word - pmullw mm0, mm5 // a * 1/5 + pmullw mm0, mm5 // a * 1/5 - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word + movq mm3, mm1 // make a copy + punpcklbw mm1, mm7 // unpack low to word - pmullw mm2, mm5 // a * 1/5 - movq mm6, four_fifths // constan + pmullw mm2, mm5 // a * 1/5 + movq mm6, four_fifths // constan - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 4/5 + movq mm4, mm1 // copy of low b + pmullw mm4, mm6 // b * 4/5 - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b + punpckhbw mm3, mm7 // unpack high to word + movq mm5, mm3 // copy of high b - pmullw mm5, mm6 // b * 4/5 - paddw mm0, mm4 // a * 1/5 + b * 4/5 + pmullw mm5, mm6 // b * 4/5 + paddw mm0, mm4 // a * 1/5 + b * 4/5 - paddw mm2, mm5 // a * 1/5 + b * 4/5 - paddw mm0, round_values // + 128 + paddw mm2, mm5 // a * 1/5 + b * 4/5 + paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - psrlw mm0, 8 + paddw mm2, round_values // + 128 + psrlw mm0, 8 - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] + psrlw mm2, 8 + packuswb mm0, mm2 // des [1] - movq QWORD ptr [esi+ecx], mm0 // write des[1] - movq mm0, [esi+ecx*2] // mm0 = src[2] + movq QWORD ptr [esi+ecx], mm0 // write des[1] + movq mm0, [esi+ecx*2] // mm0 = src[2] - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking + // mm1, mm3 --- Src[1] + // mm0 --- Src[2] + // mm7 for unpacking - movq mm5, two_fifths - movq mm2, mm0 // make a copy + movq mm5, two_fifths + movq mm2, mm0 // make a copy - pmullw mm1, mm5 // b * 2/5 - movq mm6, three_fifths + pmullw mm1, mm5 // b * 2/5 + movq mm6, three_fifths - punpcklbw mm0, mm7 // unpack low to word - pmullw mm3, mm5 // b * 2/5 + punpcklbw mm0, mm7 // unpack low to word + pmullw mm3, mm5 // b * 2/5 - movq mm4, mm0 // make copy of c - punpckhbw mm2, mm7 // unpack high to word + movq mm4, mm0 // make copy of c + punpckhbw mm2, mm7 // unpack high to word - pmullw mm4, mm6 // c * 3/5 - movq mm5, mm2 + pmullw mm4, mm6 // c * 3/5 + movq mm5, mm2 - pmullw mm5, mm6 // c * 3/5 - paddw mm1, mm4 // b * 2/5 + c * 3/5 + pmullw mm5, mm6 // c * 3/5 + paddw mm1, mm4 // b * 2/5 + c * 3/5 - paddw mm3, mm5 // b * 2/5 + c * 3/5 - paddw mm1, round_values // + 128 + paddw mm3, mm5 // b * 2/5 + c * 3/5 + paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 - psrlw mm1, 8 + paddw mm3, round_values // + 128 + psrlw mm1, 8 - psrlw mm3, 8 - packuswb mm1, mm3 // des[2] + psrlw mm3, 8 + packuswb mm1, mm3 // des[2] - movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - movq mm1, [edi] // mm1=Src[3]; + movq QWORD ptr [esi+ecx*2], mm1 // write des[2] + movq mm1, [edi] // mm1=Src[3]; - movq QWORD ptr [edi+ecx], mm1 // write des[4]; + movq QWORD ptr [edi+ecx], mm1 // write des[4]; - // mm0, mm2 --- Src[2] - // mm1 --- Src[3] - // mm6 --- 3/5 - // mm7 for unpacking + // mm0, mm2 --- Src[2] + // mm1 --- Src[3] + // mm6 --- 3/5 + // mm7 for unpacking - pmullw mm0, mm6 // c * 3/5 - movq mm5, two_fifths // mm5 = 2/5 + pmullw mm0, mm6 // c * 3/5 + movq mm5, two_fifths // mm5 = 2/5 - movq mm3, mm1 // make a copy - pmullw mm2, mm6 // c * 3/5 + movq mm3, mm1 // make a copy + pmullw mm2, mm6 // c * 3/5 - punpcklbw mm1, mm7 // unpack low - movq mm4, mm1 // make a copy + punpcklbw mm1, mm7 // unpack low + movq mm4, mm1 // make a copy - punpckhbw mm3, mm7 // unpack high - pmullw mm4, mm5 // d * 2/5 + punpckhbw mm3, mm7 // unpack high + pmullw mm4, mm5 // d * 2/5 - movq mm6, mm3 // make a copy - pmullw mm6, mm5 // d * 2/5 + movq mm6, mm3 // make a copy + pmullw mm6, mm5 // d * 2/5 - paddw mm0, mm4 // c * 3/5 + d * 2/5 - paddw mm2, mm6 // c * 3/5 + d * 2/5 + paddw mm0, mm4 // c * 3/5 + d * 2/5 + paddw mm2, mm6 // c * 3/5 + d * 2/5 - paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 + paddw mm0, round_values // + 128 + paddw mm2, round_values // + 128 - psrlw mm0, 8 - psrlw mm2, 8 + psrlw mm0, 8 + psrlw mm2, 8 - packuswb mm0, mm2 // des[3] - movq QWORD ptr [edi], mm0 // write des[3] + packuswb mm0, mm2 // des[3] + movq QWORD ptr [edi], mm0 // write des[3] - // mm1, mm3 --- Src[3] - // mm7 -- cleared for unpacking - add edi, 8 - add esi, 8 + // mm1, mm3 --- Src[3] + // mm7 -- cleared for unpacking + add edi, 8 + add esi, 8 - sub edx, 8 - jg last_vs_4_5_loop - } + sub edx, 8 + jg last_vs_4_5_loop + } } /**************************************************************************** @@ -674,153 +666,151 @@ void last_vertical_band_4_5_scale_mmx static void vertical_band_3_5_scale_mmx ( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width +) { + __asm { + mov esi, dest // Get the source and destination pointer + mov ecx, dest_pitch // Get the pitch size - lea edi, [esi+ecx*2] // tow lines below - add edi, ecx // three lines below + lea edi, [esi+ecx*2] // tow lines below + add edi, ecx // three lines below - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter + pxor mm7, mm7 // clear out mm7 + mov edx, dest_width // Loop counter - vs_3_5_loop: + vs_3_5_loop: - movq mm0, QWORD ptr [esi] // src[0]; - movq mm1, QWORD ptr [esi+ecx] // src[1]; + movq mm0, QWORD ptr [esi] // src[0]; + movq mm1, QWORD ptr [esi+ecx] // src[1]; - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word + movq mm2, mm0 // Make a copy + punpcklbw mm0, mm7 // unpack low to word - movq mm5, two_fifths // mm5 = 2/5 - punpckhbw mm2, mm7 // unpack high to word + movq mm5, two_fifths // mm5 = 2/5 + punpckhbw mm2, mm7 // unpack high to word - pmullw mm0, mm5 // a * 2/5 + pmullw mm0, mm5 // a * 2/5 - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word + movq mm3, mm1 // make a copy + punpcklbw mm1, mm7 // unpack low to word - pmullw mm2, mm5 // a * 2/5 - movq mm6, three_fifths // mm6 = 3/5 + pmullw mm2, mm5 // a * 2/5 + movq mm6, three_fifths // mm6 = 3/5 - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 3/5 + movq mm4, mm1 // copy of low b + pmullw mm4, mm6 // b * 3/5 - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b + punpckhbw mm3, mm7 // unpack high to word + movq mm5, mm3 // copy of high b - pmullw mm5, mm6 // b * 3/5 - paddw mm0, mm4 // a * 2/5 + b * 3/5 + pmullw mm5, mm6 // b * 3/5 + paddw mm0, mm4 // a * 2/5 + b * 3/5 - paddw mm2, mm5 // a * 2/5 + b * 3/5 - paddw mm0, round_values // + 128 + paddw mm2, mm5 // a * 2/5 + b * 3/5 + paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - psrlw mm0, 8 + paddw mm2, round_values // + 128 + psrlw mm0, 8 - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] + psrlw mm2, 8 + packuswb mm0, mm2 // des [1] - movq QWORD ptr [esi+ecx], mm0 // write des[1] - movq mm0, [esi+ecx*2] // mm0 = src[2] + movq QWORD ptr [esi+ecx], mm0 // write des[1] + movq mm0, [esi+ecx*2] // mm0 = src[2] - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking + // mm1, mm3 --- Src[1] + // mm0 --- Src[2] + // mm7 for unpacking - movq mm4, mm1 // b low - pmullw mm1, four_fifths // b * 4/5 low + movq mm4, mm1 // b low + pmullw mm1, four_fifths // b * 4/5 low - movq mm5, mm3 // b high - pmullw mm3, four_fifths // b * 4/5 high + movq mm5, mm3 // b high + pmullw mm3, four_fifths // b * 4/5 high - movq mm2, mm0 // c - pmullw mm4, one_fifth // b * 1/5 + movq mm2, mm0 // c + pmullw mm4, one_fifth // b * 1/5 - punpcklbw mm0, mm7 // c low - pmullw mm5, one_fifth // b * 1/5 + punpcklbw mm0, mm7 // c low + pmullw mm5, one_fifth // b * 1/5 - movq mm6, mm0 // make copy of c low - punpckhbw mm2, mm7 // c high + movq mm6, mm0 // make copy of c low + punpckhbw mm2, mm7 // c high - pmullw mm6, one_fifth // c * 1/5 low - movq mm7, mm2 // make copy of c high + pmullw mm6, one_fifth // c * 1/5 low + movq mm7, mm2 // make copy of c high - pmullw mm7, one_fifth // c * 1/5 high - paddw mm1, mm6 // b * 4/5 + c * 1/5 low + pmullw mm7, one_fifth // c * 1/5 high + paddw mm1, mm6 // b * 4/5 + c * 1/5 low - paddw mm3, mm7 // b * 4/5 + c * 1/5 high - movq mm6, mm0 // make copy of c low + paddw mm3, mm7 // b * 4/5 + c * 1/5 high + movq mm6, mm0 // make copy of c low - pmullw mm6, four_fifths // c * 4/5 low - movq mm7, mm2 // make copy of c high + pmullw mm6, four_fifths // c * 4/5 low + movq mm7, mm2 // make copy of c high - pmullw mm7, four_fifths // c * 4/5 high + pmullw mm7, four_fifths // c * 4/5 high - paddw mm4, mm6 // b * 1/5 + c * 4/5 low - paddw mm5, mm7 // b * 1/5 + c * 4/5 high + paddw mm4, mm6 // b * 1/5 + c * 4/5 low + paddw mm5, mm7 // b * 1/5 + c * 4/5 high - paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 + paddw mm1, round_values // + 128 + paddw mm3, round_values // + 128 - psrlw mm1, 8 - psrlw mm3, 8 + psrlw mm1, 8 + psrlw mm3, 8 - packuswb mm1, mm3 // des[2] - movq QWORD ptr [esi+ecx*2], mm1 // write des[2] + packuswb mm1, mm3 // des[2] + movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - paddw mm4, round_values // + 128 - paddw mm5, round_values // + 128 + paddw mm4, round_values // + 128 + paddw mm5, round_values // + 128 - psrlw mm4, 8 - psrlw mm5, 8 + psrlw mm4, 8 + psrlw mm5, 8 - packuswb mm4, mm5 // des[3] - movq QWORD ptr [edi], mm4 // write des[3] + packuswb mm4, mm5 // des[3] + movq QWORD ptr [edi], mm4 // write des[3] - // mm0, mm2 --- Src[3] + // mm0, mm2 --- Src[3] - pxor mm7, mm7 // clear mm7 for unpacking - movq mm1, [edi+ecx*2] // mm1 = Src[0] of the next group + pxor mm7, mm7 // clear mm7 for unpacking + movq mm1, [edi+ecx*2] // mm1 = Src[0] of the next group - movq mm5, three_fifths // mm5 = 3/5 - pmullw mm0, mm5 // d * 3/5 + movq mm5, three_fifths // mm5 = 3/5 + pmullw mm0, mm5 // d * 3/5 - movq mm6, two_fifths // mm6 = 2/5 - movq mm3, mm1 // make a copy + movq mm6, two_fifths // mm6 = 2/5 + movq mm3, mm1 // make a copy - pmullw mm2, mm5 // d * 3/5 - punpcklbw mm1, mm7 // unpack low + pmullw mm2, mm5 // d * 3/5 + punpcklbw mm1, mm7 // unpack low - pmullw mm1, mm6 // an * 2/5 - punpckhbw mm3, mm7 // unpack high + pmullw mm1, mm6 // an * 2/5 + punpckhbw mm3, mm7 // unpack high - paddw mm0, mm1 // d * 3/5 + an * 2/5 - pmullw mm3, mm6 // an * 2/5 + paddw mm0, mm1 // d * 3/5 + an * 2/5 + pmullw mm3, mm6 // an * 2/5 - paddw mm2, mm3 // d * 3/5 + an * 2/5 - paddw mm0, round_values // + 128 + paddw mm2, mm3 // d * 3/5 + an * 2/5 + paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - psrlw mm0, 8 + paddw mm2, round_values // + 128 + psrlw mm0, 8 - psrlw mm2, 8 - packuswb mm0, mm2 // des[4] + psrlw mm2, 8 + packuswb mm0, mm2 // des[4] - movq QWORD ptr [edi+ecx], mm0 // write des[4] + movq QWORD ptr [edi+ecx], mm0 // write des[4] - add edi, 8 - add esi, 8 + add edi, 8 + add esi, 8 - sub edx, 8 - jg vs_3_5_loop - } + sub edx, 8 + jg vs_3_5_loop + } } /**************************************************************************** @@ -845,129 +835,127 @@ void vertical_band_3_5_scale_mmx static void last_vertical_band_3_5_scale_mmx ( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width +) { + __asm { + mov esi, dest // Get the source and destination pointer + mov ecx, dest_pitch // Get the pitch size - lea edi, [esi+ecx*2] // tow lines below - add edi, ecx // three lines below + lea edi, [esi+ecx*2] // tow lines below + add edi, ecx // three lines below - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter + pxor mm7, mm7 // clear out mm7 + mov edx, dest_width // Loop counter - last_vs_3_5_loop: + last_vs_3_5_loop: - movq mm0, QWORD ptr [esi] // src[0]; - movq mm1, QWORD ptr [esi+ecx] // src[1]; + movq mm0, QWORD ptr [esi] // src[0]; + movq mm1, QWORD ptr [esi+ecx] // src[1]; - movq mm2, mm0 // Make a copy - punpcklbw mm0, mm7 // unpack low to word + movq mm2, mm0 // Make a copy + punpcklbw mm0, mm7 // unpack low to word - movq mm5, two_fifths // mm5 = 2/5 - punpckhbw mm2, mm7 // unpack high to word + movq mm5, two_fifths // mm5 = 2/5 + punpckhbw mm2, mm7 // unpack high to word - pmullw mm0, mm5 // a * 2/5 + pmullw mm0, mm5 // a * 2/5 - movq mm3, mm1 // make a copy - punpcklbw mm1, mm7 // unpack low to word + movq mm3, mm1 // make a copy + punpcklbw mm1, mm7 // unpack low to word - pmullw mm2, mm5 // a * 2/5 - movq mm6, three_fifths // mm6 = 3/5 + pmullw mm2, mm5 // a * 2/5 + movq mm6, three_fifths // mm6 = 3/5 - movq mm4, mm1 // copy of low b - pmullw mm4, mm6 // b * 3/5 + movq mm4, mm1 // copy of low b + pmullw mm4, mm6 // b * 3/5 - punpckhbw mm3, mm7 // unpack high to word - movq mm5, mm3 // copy of high b + punpckhbw mm3, mm7 // unpack high to word + movq mm5, mm3 // copy of high b - pmullw mm5, mm6 // b * 3/5 - paddw mm0, mm4 // a * 2/5 + b * 3/5 + pmullw mm5, mm6 // b * 3/5 + paddw mm0, mm4 // a * 2/5 + b * 3/5 - paddw mm2, mm5 // a * 2/5 + b * 3/5 - paddw mm0, round_values // + 128 + paddw mm2, mm5 // a * 2/5 + b * 3/5 + paddw mm0, round_values // + 128 - paddw mm2, round_values // + 128 - psrlw mm0, 8 + paddw mm2, round_values // + 128 + psrlw mm0, 8 - psrlw mm2, 8 - packuswb mm0, mm2 // des [1] + psrlw mm2, 8 + packuswb mm0, mm2 // des [1] - movq QWORD ptr [esi+ecx], mm0 // write des[1] - movq mm0, [esi+ecx*2] // mm0 = src[2] + movq QWORD ptr [esi+ecx], mm0 // write des[1] + movq mm0, [esi+ecx*2] // mm0 = src[2] - // mm1, mm3 --- Src[1] - // mm0 --- Src[2] - // mm7 for unpacking + // mm1, mm3 --- Src[1] + // mm0 --- Src[2] + // mm7 for unpacking - movq mm4, mm1 // b low - pmullw mm1, four_fifths // b * 4/5 low + movq mm4, mm1 // b low + pmullw mm1, four_fifths // b * 4/5 low - movq QWORD ptr [edi+ecx], mm0 // write des[4] + movq QWORD ptr [edi+ecx], mm0 // write des[4] - movq mm5, mm3 // b high - pmullw mm3, four_fifths // b * 4/5 high + movq mm5, mm3 // b high + pmullw mm3, four_fifths // b * 4/5 high - movq mm2, mm0 // c - pmullw mm4, one_fifth // b * 1/5 + movq mm2, mm0 // c + pmullw mm4, one_fifth // b * 1/5 - punpcklbw mm0, mm7 // c low - pmullw mm5, one_fifth // b * 1/5 + punpcklbw mm0, mm7 // c low + pmullw mm5, one_fifth // b * 1/5 - movq mm6, mm0 // make copy of c low - punpckhbw mm2, mm7 // c high + movq mm6, mm0 // make copy of c low + punpckhbw mm2, mm7 // c high - pmullw mm6, one_fifth // c * 1/5 low - movq mm7, mm2 // make copy of c high + pmullw mm6, one_fifth // c * 1/5 low + movq mm7, mm2 // make copy of c high - pmullw mm7, one_fifth // c * 1/5 high - paddw mm1, mm6 // b * 4/5 + c * 1/5 low + pmullw mm7, one_fifth // c * 1/5 high + paddw mm1, mm6 // b * 4/5 + c * 1/5 low - paddw mm3, mm7 // b * 4/5 + c * 1/5 high - movq mm6, mm0 // make copy of c low + paddw mm3, mm7 // b * 4/5 + c * 1/5 high + movq mm6, mm0 // make copy of c low - pmullw mm6, four_fifths // c * 4/5 low - movq mm7, mm2 // make copy of c high + pmullw mm6, four_fifths // c * 4/5 low + movq mm7, mm2 // make copy of c high - pmullw mm7, four_fifths // c * 4/5 high + pmullw mm7, four_fifths // c * 4/5 high - paddw mm4, mm6 // b * 1/5 + c * 4/5 low - paddw mm5, mm7 // b * 1/5 + c * 4/5 high + paddw mm4, mm6 // b * 1/5 + c * 4/5 low + paddw mm5, mm7 // b * 1/5 + c * 4/5 high - paddw mm1, round_values // + 128 - paddw mm3, round_values // + 128 + paddw mm1, round_values // + 128 + paddw mm3, round_values // + 128 - psrlw mm1, 8 - psrlw mm3, 8 + psrlw mm1, 8 + psrlw mm3, 8 - packuswb mm1, mm3 // des[2] - movq QWORD ptr [esi+ecx*2], mm1 // write des[2] + packuswb mm1, mm3 // des[2] + movq QWORD ptr [esi+ecx*2], mm1 // write des[2] - paddw mm4, round_values // + 128 - paddw mm5, round_values // + 128 + paddw mm4, round_values // + 128 + paddw mm5, round_values // + 128 - psrlw mm4, 8 - psrlw mm5, 8 + psrlw mm4, 8 + psrlw mm5, 8 - packuswb mm4, mm5 // des[3] - movq QWORD ptr [edi], mm4 // write des[3] + packuswb mm4, mm5 // des[3] + movq QWORD ptr [edi], mm4 // write des[3] - // mm0, mm2 --- Src[3] + // mm0, mm2 --- Src[3] - add edi, 8 - add esi, 8 + add edi, 8 + add esi, 8 - sub edx, 8 - jg last_vs_3_5_loop - } + sub edx, 8 + jg last_vs_3_5_loop + } } /**************************************************************************** @@ -992,52 +980,50 @@ void last_vertical_band_3_5_scale_mmx static void vertical_band_1_2_scale_mmx ( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width +) { + __asm { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size + mov esi, dest // Get the source and destination pointer + mov ecx, dest_pitch // Get the pitch size - pxor mm7, mm7 // clear out mm7 - mov edx, dest_width // Loop counter + pxor mm7, mm7 // clear out mm7 + mov edx, dest_width // Loop counter - vs_1_2_loop: + vs_1_2_loop: - movq mm0, [esi] // get Src[0] - movq mm1, [esi + ecx * 2] // get Src[1] + movq mm0, [esi] // get Src[0] + movq mm1, [esi + ecx * 2] // get Src[1] - movq mm2, mm0 // make copy before unpack - movq mm3, mm1 // make copy before unpack + movq mm2, mm0 // make copy before unpack + movq mm3, mm1 // make copy before unpack - punpcklbw mm0, mm7 // low Src[0] - movq mm6, four_ones // mm6= 1, 1, 1, 1 + punpcklbw mm0, mm7 // low Src[0] + movq mm6, four_ones // mm6= 1, 1, 1, 1 - punpcklbw mm1, mm7 // low Src[1] - paddw mm0, mm1 // low (a + b) + punpcklbw mm1, mm7 // low Src[1] + paddw mm0, mm1 // low (a + b) - punpckhbw mm2, mm7 // high Src[0] - paddw mm0, mm6 // low (a + b + 1) + punpckhbw mm2, mm7 // high Src[0] + paddw mm0, mm6 // low (a + b + 1) - punpckhbw mm3, mm7 - paddw mm2, mm3 // high (a + b ) + punpckhbw mm3, mm7 + paddw mm2, mm3 // high (a + b ) - psraw mm0, 1 // low (a + b +1 )/2 - paddw mm2, mm6 // high (a + b + 1) + psraw mm0, 1 // low (a + b +1 )/2 + paddw mm2, mm6 // high (a + b + 1) - psraw mm2, 1 // high (a + b + 1)/2 - packuswb mm0, mm2 // pack results + psraw mm2, 1 // high (a + b + 1)/2 + packuswb mm0, mm2 // pack results - movq [esi+ecx], mm0 // write out eight bytes - add esi, 8 + movq [esi+ecx], mm0 // write out eight bytes + add esi, 8 - sub edx, 8 - jg vs_1_2_loop - } + sub edx, 8 + jg vs_1_2_loop + } } @@ -1063,28 +1049,26 @@ void vertical_band_1_2_scale_mmx static void last_vertical_band_1_2_scale_mmx ( - unsigned char *dest, - unsigned int dest_pitch, - unsigned int dest_width -) -{ - __asm - { - mov esi, dest // Get the source and destination pointer - mov ecx, dest_pitch // Get the pitch size + unsigned char *dest, + unsigned int dest_pitch, + unsigned int dest_width +) { + __asm { + mov esi, dest // Get the source and destination pointer + mov ecx, dest_pitch // Get the pitch size - mov edx, dest_width // Loop counter + mov edx, dest_width // Loop counter - last_vs_1_2_loop: + last_vs_1_2_loop: - movq mm0, [esi] // get Src[0] - movq [esi+ecx], mm0 // write out eight bytes + movq mm0, [esi] // get Src[0] + movq [esi+ecx], mm0 // write out eight bytes - add esi, 8 - sub edx, 8 + add esi, 8 + sub edx, 8 - jg last_vs_1_2_loop - } + jg last_vs_1_2_loop + } } /**************************************************************************** @@ -1108,106 +1092,104 @@ void last_vertical_band_1_2_scale_mmx static void horizontal_line_1_2_scale_mmx ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - (void) dest_width; + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + (void) dest_width; - __asm - { - mov esi, source - mov edi, dest + __asm { + mov esi, source + mov edi, dest - pxor mm7, mm7 - movq mm6, four_ones + pxor mm7, mm7 + movq mm6, four_ones - mov ecx, source_width + mov ecx, source_width - hs_1_2_loop: + hs_1_2_loop: - movq mm0, [esi] - movq mm1, [esi+1] + movq mm0, [esi] + movq mm1, [esi+1] - movq mm2, mm0 - movq mm3, mm1 + movq mm2, mm0 + movq mm3, mm1 - movq mm4, mm0 - punpcklbw mm0, mm7 + movq mm4, mm0 + punpcklbw mm0, mm7 - punpcklbw mm1, mm7 - paddw mm0, mm1 + punpcklbw mm1, mm7 + paddw mm0, mm1 - paddw mm0, mm6 - punpckhbw mm2, mm7 + paddw mm0, mm6 + punpckhbw mm2, mm7 - punpckhbw mm3, mm7 - paddw mm2, mm3 + punpckhbw mm3, mm7 + paddw mm2, mm3 - paddw mm2, mm6 - psraw mm0, 1 + paddw mm2, mm6 + psraw mm0, 1 - psraw mm2, 1 - packuswb mm0, mm2 + psraw mm2, 1 + packuswb mm0, mm2 - movq mm2, mm4 - punpcklbw mm2, mm0 + movq mm2, mm4 + punpcklbw mm2, mm0 - movq [edi], mm2 - punpckhbw mm4, mm0 + movq [edi], mm2 + punpckhbw mm4, mm0 - movq [edi+8], mm4 - add esi, 8 + movq [edi+8], mm4 + add esi, 8 - add edi, 16 - sub ecx, 8 + add edi, 16 + sub ecx, 8 - cmp ecx, 8 - jg hs_1_2_loop + cmp ecx, 8 + jg hs_1_2_loop // last eight pixel - movq mm0, [esi] - movq mm1, mm0 + movq mm0, [esi] + movq mm1, mm0 - movq mm2, mm0 - movq mm3, mm1 + movq mm2, mm0 + movq mm3, mm1 - psrlq mm1, 8 - psrlq mm3, 56 + psrlq mm1, 8 + psrlq mm3, 56 - psllq mm3, 56 - por mm1, mm3 + psllq mm3, 56 + por mm1, mm3 - movq mm3, mm1 - movq mm4, mm0 + movq mm3, mm1 + movq mm4, mm0 - punpcklbw mm0, mm7 - punpcklbw mm1, mm7 + punpcklbw mm0, mm7 + punpcklbw mm1, mm7 - paddw mm0, mm1 - paddw mm0, mm6 + paddw mm0, mm1 + paddw mm0, mm6 - punpckhbw mm2, mm7 - punpckhbw mm3, mm7 + punpckhbw mm2, mm7 + punpckhbw mm3, mm7 - paddw mm2, mm3 - paddw mm2, mm6 + paddw mm2, mm3 + paddw mm2, mm6 - psraw mm0, 1 - psraw mm2, 1 + psraw mm0, 1 + psraw mm2, 1 - packuswb mm0, mm2 - movq mm2, mm4 + packuswb mm0, mm2 + movq mm2, mm4 - punpcklbw mm2, mm0 - movq [edi], mm2 + punpcklbw mm2, mm0 + movq [edi], mm2 - punpckhbw mm4, mm0 - movq [edi+8], mm4 - } + punpckhbw mm4, mm0 + movq [edi+8], mm4 + } } @@ -1240,86 +1222,84 @@ __declspec(align(16)) const static unsigned short const54_1[] = {256, 192, 128, static void horizontal_line_5_4_scale_mmx ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - /* - unsigned i; - unsigned int a, b, c, d, e; - unsigned char *des = dest; - const unsigned char *src = source; + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + /* + unsigned i; + unsigned int a, b, c, d, e; + unsigned char *des = dest; + const unsigned char *src = source; - (void) dest_width; + (void) dest_width; - for ( i=0; i>8); - des[2] = ((c*128 + d*128 + 128)>>8); - des[3] = ((d* 64 + e*192 + 128)>>8); + des[0] = a; + des[1] = ((b*192 + c* 64 + 128)>>8); + des[2] = ((c*128 + d*128 + 128)>>8); + des[3] = ((d* 64 + e*192 + 128)>>8); - src += 5; - des += 4; - } - */ - (void) dest_width; + src += 5; + des += 4; + } + */ + (void) dest_width; - __asm - { + __asm { - mov esi, source ; - mov edi, dest ; + mov esi, source; + mov edi, dest; - mov ecx, source_width ; - movq mm5, const54_1 ; + mov ecx, source_width; + movq mm5, const54_1; - pxor mm7, mm7 ; - movq mm6, const54_2 ; + pxor mm7, mm7; + movq mm6, const54_2; - movq mm4, round_values ; - lea edx, [esi+ecx] ; - horizontal_line_5_4_loop: + movq mm4, round_values; + lea edx, [esi+ecx]; + horizontal_line_5_4_loop: - movq mm0, QWORD PTR [esi] ; - 00 01 02 03 04 05 06 07 - movq mm1, mm0 ; - 00 01 02 03 04 05 06 07 + movq mm0, QWORD PTR [esi]; + 00 01 02 03 04 05 06 07 + movq mm1, mm0; + 00 01 02 03 04 05 06 07 - psrlq mm0, 8 ; - 01 02 03 04 05 06 07 xx - punpcklbw mm1, mm7 ; - xx 00 xx 01 xx 02 xx 03 + psrlq mm0, 8; + 01 02 03 04 05 06 07 xx + punpcklbw mm1, mm7; + xx 00 xx 01 xx 02 xx 03 - punpcklbw mm0, mm7 ; - xx 01 xx 02 xx 03 xx 04 - pmullw mm1, mm5 + punpcklbw mm0, mm7; + xx 01 xx 02 xx 03 xx 04 + pmullw mm1, mm5 - pmullw mm0, mm6 - add esi, 5 + pmullw mm0, mm6 + add esi, 5 - add edi, 4 - paddw mm1, mm0 + add edi, 4 + paddw mm1, mm0 - paddw mm1, mm4 - psrlw mm1, 8 + paddw mm1, mm4 + psrlw mm1, 8 - cmp esi, edx - packuswb mm1, mm7 + cmp esi, edx + packuswb mm1, mm7 - movd DWORD PTR [edi-4], mm1 + movd DWORD PTR [edi-4], mm1 - jl horizontal_line_5_4_loop + jl horizontal_line_5_4_loop - } + } } __declspec(align(16)) const static unsigned short one_fourths[] = { 64, 64, 64, 64 }; @@ -1327,86 +1307,84 @@ __declspec(align(16)) const static unsigned short two_fourths[] = { 128, 128, __declspec(align(16)) const static unsigned short three_fourths[] = { 192, 192, 192, 192 }; static -void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ +void vertical_band_5_4_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { - __asm - { - push ebx + __asm { + push ebx - mov esi, source // Get the source and destination pointer - mov ecx, src_pitch // Get the pitch size + mov esi, source // Get the source and destination pointer + mov ecx, src_pitch // Get the pitch size - mov edi, dest // tow lines below - pxor mm7, mm7 // clear out mm7 + mov edi, dest // tow lines below + pxor mm7, mm7 // clear out mm7 - mov edx, dest_pitch // Loop counter - mov ebx, dest_width + mov edx, dest_pitch // Loop counter + mov ebx, dest_width - vs_5_4_loop: + vs_5_4_loop: - movd mm0, DWORD ptr [esi] // src[0]; - movd mm1, DWORD ptr [esi+ecx] // src[1]; + movd mm0, DWORD ptr [esi] // src[0]; + movd mm1, DWORD ptr [esi+ecx] // src[1]; - movd mm2, DWORD ptr [esi+ecx*2] - lea eax, [esi+ecx*2] // + movd mm2, DWORD ptr [esi+ecx*2] + lea eax, [esi+ecx*2] // - punpcklbw mm1, mm7 - punpcklbw mm2, mm7 + punpcklbw mm1, mm7 + punpcklbw mm2, mm7 - movq mm3, mm2 - pmullw mm1, three_fourths + movq mm3, mm2 + pmullw mm1, three_fourths - pmullw mm2, one_fourths - movd mm4, [eax+ecx] + pmullw mm2, one_fourths + movd mm4, [eax+ecx] - pmullw mm3, two_fourths - punpcklbw mm4, mm7 + pmullw mm3, two_fourths + punpcklbw mm4, mm7 - movq mm5, mm4 - pmullw mm4, two_fourths + movq mm5, mm4 + pmullw mm4, two_fourths - paddw mm1, mm2 - movd mm6, [eax+ecx*2] + paddw mm1, mm2 + movd mm6, [eax+ecx*2] - pmullw mm5, one_fourths - paddw mm1, round_values; + pmullw mm5, one_fourths + paddw mm1, round_values; - paddw mm3, mm4 - psrlw mm1, 8 + paddw mm3, mm4 + psrlw mm1, 8 - punpcklbw mm6, mm7 - paddw mm3, round_values + punpcklbw mm6, mm7 + paddw mm3, round_values - pmullw mm6, three_fourths - psrlw mm3, 8 + pmullw mm6, three_fourths + psrlw mm3, 8 - packuswb mm1, mm7 - packuswb mm3, mm7 + packuswb mm1, mm7 + packuswb mm3, mm7 - movd DWORD PTR [edi], mm0 - movd DWORD PTR [edi+edx], mm1 + movd DWORD PTR [edi], mm0 + movd DWORD PTR [edi+edx], mm1 - paddw mm5, mm6 - movd DWORD PTR [edi+edx*2], mm3 + paddw mm5, mm6 + movd DWORD PTR [edi+edx*2], mm3 - lea eax, [edi+edx*2] - paddw mm5, round_values + lea eax, [edi+edx*2] + paddw mm5, round_values - psrlw mm5, 8 - add edi, 4 + psrlw mm5, 8 + add edi, 4 - packuswb mm5, mm7 - movd DWORD PTR [eax+edx], mm5 + packuswb mm5, mm7 + movd DWORD PTR [eax+edx], mm5 - add esi, 4 - sub ebx, 4 + add esi, 4 + sub ebx, 4 - jg vs_5_4_loop + jg vs_5_4_loop - pop ebx - } + pop ebx + } } @@ -1417,96 +1395,94 @@ __declspec(align(16)) const static unsigned short const53_2[] = {256, 171, 85, static void horizontal_line_5_3_scale_mmx ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { - (void) dest_width; - __asm - { + (void) dest_width; + __asm { - mov esi, source ; - mov edi, dest ; + mov esi, source; + mov edi, dest; - mov ecx, source_width ; - movq mm5, const53_1 ; + mov ecx, source_width; + movq mm5, const53_1; - pxor mm7, mm7 ; - movq mm6, const53_2 ; + pxor mm7, mm7; + movq mm6, const53_2; - movq mm4, round_values ; - lea edx, [esi+ecx-5] ; - horizontal_line_5_3_loop: + movq mm4, round_values; + lea edx, [esi+ecx-5]; + horizontal_line_5_3_loop: - movq mm0, QWORD PTR [esi] ; - 00 01 02 03 04 05 06 07 - movq mm1, mm0 ; - 00 01 02 03 04 05 06 07 + movq mm0, QWORD PTR [esi]; + 00 01 02 03 04 05 06 07 + movq mm1, mm0; + 00 01 02 03 04 05 06 07 - psllw mm0, 8 ; - xx 00 xx 02 xx 04 xx 06 - psrlw mm1, 8 ; - 01 xx 03 xx 05 xx 07 xx + psllw mm0, 8; + xx 00 xx 02 xx 04 xx 06 + psrlw mm1, 8; + 01 xx 03 xx 05 xx 07 xx - psrlw mm0, 8 ; - 00 xx 02 xx 04 xx 06 xx - psllq mm1, 16 ; - xx xx 01 xx 03 xx 05 xx + psrlw mm0, 8; + 00 xx 02 xx 04 xx 06 xx + psllq mm1, 16; + xx xx 01 xx 03 xx 05 xx - pmullw mm0, mm6 + pmullw mm0, mm6 - pmullw mm1, mm5 - add esi, 5 + pmullw mm1, mm5 + add esi, 5 - add edi, 3 - paddw mm1, mm0 + add edi, 3 + paddw mm1, mm0 - paddw mm1, mm4 - psrlw mm1, 8 + paddw mm1, mm4 + psrlw mm1, 8 - cmp esi, edx - packuswb mm1, mm7 + cmp esi, edx + packuswb mm1, mm7 - movd DWORD PTR [edi-3], mm1 - jl horizontal_line_5_3_loop + movd DWORD PTR [edi-3], mm1 + jl horizontal_line_5_3_loop -//exit condition - movq mm0, QWORD PTR [esi] ; - 00 01 02 03 04 05 06 07 - movq mm1, mm0 ; - 00 01 02 03 04 05 06 07 +// exit condition + movq mm0, QWORD PTR [esi]; + 00 01 02 03 04 05 06 07 + movq mm1, mm0; + 00 01 02 03 04 05 06 07 - psllw mm0, 8 ; - xx 00 xx 02 xx 04 xx 06 - psrlw mm1, 8 ; - 01 xx 03 xx 05 xx 07 xx + psllw mm0, 8; + xx 00 xx 02 xx 04 xx 06 + psrlw mm1, 8; + 01 xx 03 xx 05 xx 07 xx - psrlw mm0, 8 ; - 00 xx 02 xx 04 xx 06 xx - psllq mm1, 16 ; - xx xx 01 xx 03 xx 05 xx + psrlw mm0, 8; + 00 xx 02 xx 04 xx 06 xx + psllq mm1, 16; + xx xx 01 xx 03 xx 05 xx - pmullw mm0, mm6 + pmullw mm0, mm6 - pmullw mm1, mm5 - paddw mm1, mm0 + pmullw mm1, mm5 + paddw mm1, mm0 - paddw mm1, mm4 - psrlw mm1, 8 + paddw mm1, mm4 + psrlw mm1, 8 - packuswb mm1, mm7 - movd eax, mm1 + packuswb mm1, mm7 + movd eax, mm1 - mov edx, eax - shr edx, 16 + mov edx, eax + shr edx, 16 - mov WORD PTR[edi], ax - mov BYTE PTR[edi+2], dl + mov WORD PTR[edi], ax + mov BYTE PTR[edi+2], dl - } + } } @@ -1514,75 +1490,73 @@ __declspec(align(16)) const static unsigned short one_thirds[] = { 85, 85, 85 __declspec(align(16)) const static unsigned short two_thirds[] = { 171, 171, 171, 171 }; static -void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ +void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { - __asm - { - push ebx + __asm { + push ebx - mov esi, source // Get the source and destination pointer - mov ecx, src_pitch // Get the pitch size + mov esi, source // Get the source and destination pointer + mov ecx, src_pitch // Get the pitch size - mov edi, dest // tow lines below - pxor mm7, mm7 // clear out mm7 + mov edi, dest // tow lines below + pxor mm7, mm7 // clear out mm7 - mov edx, dest_pitch // Loop counter - movq mm5, one_thirds + mov edx, dest_pitch // Loop counter + movq mm5, one_thirds - movq mm6, two_thirds - mov ebx, dest_width; + movq mm6, two_thirds + mov ebx, dest_width; - vs_5_3_loop: + vs_5_3_loop: - movd mm0, DWORD ptr [esi] // src[0]; - movd mm1, DWORD ptr [esi+ecx] // src[1]; + movd mm0, DWORD ptr [esi] // src[0]; + movd mm1, DWORD ptr [esi+ecx] // src[1]; - movd mm2, DWORD ptr [esi+ecx*2] - lea eax, [esi+ecx*2] // + movd mm2, DWORD ptr [esi+ecx*2] + lea eax, [esi+ecx*2] // - punpcklbw mm1, mm7 - punpcklbw mm2, mm7 + punpcklbw mm1, mm7 + punpcklbw mm2, mm7 - pmullw mm1, mm5 - pmullw mm2, mm6 + pmullw mm1, mm5 + pmullw mm2, mm6 - movd mm3, DWORD ptr [eax+ecx] - movd mm4, DWORD ptr [eax+ecx*2] + movd mm3, DWORD ptr [eax+ecx] + movd mm4, DWORD ptr [eax+ecx*2] - punpcklbw mm3, mm7 - punpcklbw mm4, mm7 + punpcklbw mm3, mm7 + punpcklbw mm4, mm7 - pmullw mm3, mm6 - pmullw mm4, mm5 + pmullw mm3, mm6 + pmullw mm4, mm5 - movd DWORD PTR [edi], mm0 - paddw mm1, mm2 + movd DWORD PTR [edi], mm0 + paddw mm1, mm2 - paddw mm1, round_values - psrlw mm1, 8 + paddw mm1, round_values + psrlw mm1, 8 - packuswb mm1, mm7 - paddw mm3, mm4 + packuswb mm1, mm7 + paddw mm3, mm4 - paddw mm3, round_values - movd DWORD PTR [edi+edx], mm1 + paddw mm3, round_values + movd DWORD PTR [edi+edx], mm1 - psrlw mm3, 8 - packuswb mm3, mm7 + psrlw mm3, 8 + packuswb mm3, mm7 - movd DWORD PTR [edi+edx*2], mm3 + movd DWORD PTR [edi+edx*2], mm3 - add edi, 4 - add esi, 4 + add edi, 4 + add esi, 4 - sub ebx, 4 - jg vs_5_3_loop + sub ebx, 4 + jg vs_5_3_loop - pop ebx - } + pop ebx + } } @@ -1609,48 +1583,45 @@ void vertical_band_5_3_scale_mmx(unsigned char *source, unsigned int src_pitch, static void horizontal_line_2_1_scale_mmx ( - const unsigned char *source, - unsigned int source_width, - unsigned char *dest, - unsigned int dest_width -) -{ - (void) dest_width; - (void) source_width; - __asm - { - mov esi, source - mov edi, dest - - pxor mm7, mm7 - mov ecx, dest_width - - xor edx, edx - hs_2_1_loop: - - movq mm0, [esi+edx*2] - psllw mm0, 8 - - psrlw mm0, 8 - packuswb mm0, mm7 - - movd DWORD Ptr [edi+edx], mm0; - add edx, 4 - - cmp edx, ecx - jl hs_2_1_loop - - } + const unsigned char *source, + unsigned int source_width, + unsigned char *dest, + unsigned int dest_width +) { + (void) dest_width; + (void) source_width; + __asm { + mov esi, source + mov edi, dest + + pxor mm7, mm7 + mov ecx, dest_width + + xor edx, edx + hs_2_1_loop: + + movq mm0, [esi+edx*2] + psllw mm0, 8 + + psrlw mm0, 8 + packuswb mm0, mm7 + + movd DWORD Ptr [edi+edx], mm0; + add edx, 4 + + cmp edx, ecx + jl hs_2_1_loop + + } } static -void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ - (void) dest_pitch; - (void) src_pitch; - vpx_memcpy(dest, source, dest_width); +void vertical_band_2_1_scale_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { + (void) dest_pitch; + (void) src_pitch; + vpx_memcpy(dest, source, dest_width); } @@ -1658,91 +1629,88 @@ __declspec(align(16)) const static unsigned short three_sixteenths[] = { 48, 4 __declspec(align(16)) const static unsigned short ten_sixteenths[] = { 160, 160, 160, 160 }; static -void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) -{ +void vertical_band_2_1_scale_i_mmx(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { - (void) dest_pitch; - __asm - { - mov esi, source - mov edi, dest + (void) dest_pitch; + __asm { + mov esi, source + mov edi, dest - mov eax, src_pitch - mov edx, dest_width + mov eax, src_pitch + mov edx, dest_width - pxor mm7, mm7 - sub esi, eax //back one line + pxor mm7, mm7 + sub esi, eax // back one line - lea ecx, [esi+edx]; - movq mm6, round_values; + lea ecx, [esi+edx]; + movq mm6, round_values; - movq mm5, three_sixteenths; - movq mm4, ten_sixteenths; + movq mm5, three_sixteenths; + movq mm4, ten_sixteenths; - vs_2_1_i_loop: - movd mm0, [esi] // - movd mm1, [esi+eax] // + vs_2_1_i_loop: + movd mm0, [esi] // + movd mm1, [esi+eax] // - movd mm2, [esi+eax*2] // - punpcklbw mm0, mm7 + movd mm2, [esi+eax*2] // + punpcklbw mm0, mm7 - pmullw mm0, mm5 - punpcklbw mm1, mm7 + pmullw mm0, mm5 + punpcklbw mm1, mm7 - pmullw mm1, mm4 - punpcklbw mm2, mm7 + pmullw mm1, mm4 + punpcklbw mm2, mm7 - pmullw mm2, mm5 - paddw mm0, round_values + pmullw mm2, mm5 + paddw mm0, round_values - paddw mm1, mm2 - paddw mm0, mm1 + paddw mm1, mm2 + paddw mm0, mm1 - psrlw mm0, 8 - packuswb mm0, mm7 + psrlw mm0, 8 + packuswb mm0, mm7 - movd DWORD PTR [edi], mm0 - add esi, 4 + movd DWORD PTR [edi], mm0 + add esi, 4 - add edi, 4; - cmp esi, ecx - jl vs_2_1_i_loop + add edi, 4; + cmp esi, ecx + jl vs_2_1_i_loop - } + } } void -register_mmxscalers(void) -{ - vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_mmx; - vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_mmx; - vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_mmx; - vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_mmx; - vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_mmx; - vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_mmx; - vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_mmx; - vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_mmx; - vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_mmx; - - vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; - vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; - vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; - vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; - vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; - vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; - - - - vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx; - vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx; - vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx; - vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx; - vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx; - vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx; - vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx; +register_mmxscalers(void) { + vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_mmx; + vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_mmx; + vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_mmx; + vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_mmx; + vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_mmx; + vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_mmx; + vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_mmx; + vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_mmx; + vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_mmx; + + vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; + vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; + vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; + vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; + vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; + vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; + + + + vp8_vertical_band_5_4_scale = vertical_band_5_4_scale_mmx; + vp8_vertical_band_5_3_scale = vertical_band_5_3_scale_mmx; + vp8_vertical_band_2_1_scale = vertical_band_2_1_scale_mmx; + vp8_vertical_band_2_1_scale_i = vertical_band_2_1_scale_i_mmx; + vp8_horizontal_line_2_1_scale = horizontal_line_2_1_scale_mmx; + vp8_horizontal_line_5_3_scale = horizontal_line_5_3_scale_mmx; + vp8_horizontal_line_5_4_scale = horizontal_line_5_4_scale_mmx; diff --git a/vpx_scale/win32/scalesystemdependent.c b/vpx_scale/win32/scalesystemdependent.c index 19e61c3a5..98913d19a 100644 --- a/vpx_scale/win32/scalesystemdependent.c +++ b/vpx_scale/win32/scalesystemdependent.c @@ -46,46 +46,42 @@ extern void register_mmxscalers(void); * ****************************************************************************/ void -vp8_scale_machine_specific_config(void) -{ - // If MMX supported then set to use MMX versions of functions else - // use original 'C' versions. - int mmx_enabled; - int xmm_enabled; - int wmt_enabled; +vp8_scale_machine_specific_config(void) { + // If MMX supported then set to use MMX versions of functions else + // use original 'C' versions. + int mmx_enabled; + int xmm_enabled; + int wmt_enabled; - vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled); + vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled); - if (mmx_enabled || xmm_enabled || wmt_enabled) - { - register_mmxscalers(); - } - else - { - vp8_horizontal_line_1_2_scale = vp8cx_horizontal_line_1_2_scale_c; - vp8_vertical_band_1_2_scale = vp8cx_vertical_band_1_2_scale_c; - vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c; - vp8_horizontal_line_3_5_scale = vp8cx_horizontal_line_3_5_scale_c; - vp8_vertical_band_3_5_scale = vp8cx_vertical_band_3_5_scale_c; - vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c; - vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; - vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; - vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; - vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; - vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; - vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; - vp8_horizontal_line_4_5_scale = vp8cx_horizontal_line_4_5_scale_c; - vp8_vertical_band_4_5_scale = vp8cx_vertical_band_4_5_scale_c; - vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c; + if (mmx_enabled || xmm_enabled || wmt_enabled) { + register_mmxscalers(); + } else { + vp8_horizontal_line_1_2_scale = vp8cx_horizontal_line_1_2_scale_c; + vp8_vertical_band_1_2_scale = vp8cx_vertical_band_1_2_scale_c; + vp8_last_vertical_band_1_2_scale = vp8cx_last_vertical_band_1_2_scale_c; + vp8_horizontal_line_3_5_scale = vp8cx_horizontal_line_3_5_scale_c; + vp8_vertical_band_3_5_scale = vp8cx_vertical_band_3_5_scale_c; + vp8_last_vertical_band_3_5_scale = vp8cx_last_vertical_band_3_5_scale_c; + vp8_horizontal_line_3_4_scale = vp8cx_horizontal_line_3_4_scale_c; + vp8_vertical_band_3_4_scale = vp8cx_vertical_band_3_4_scale_c; + vp8_last_vertical_band_3_4_scale = vp8cx_last_vertical_band_3_4_scale_c; + vp8_horizontal_line_2_3_scale = vp8cx_horizontal_line_2_3_scale_c; + vp8_vertical_band_2_3_scale = vp8cx_vertical_band_2_3_scale_c; + vp8_last_vertical_band_2_3_scale = vp8cx_last_vertical_band_2_3_scale_c; + vp8_horizontal_line_4_5_scale = vp8cx_horizontal_line_4_5_scale_c; + vp8_vertical_band_4_5_scale = vp8cx_vertical_band_4_5_scale_c; + vp8_last_vertical_band_4_5_scale = vp8cx_last_vertical_band_4_5_scale_c; - vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c; - vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c; - vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c; - vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c; - vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c; - vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c; - vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c; + vp8_vertical_band_5_4_scale = vp8cx_vertical_band_5_4_scale_c; + vp8_vertical_band_5_3_scale = vp8cx_vertical_band_5_3_scale_c; + vp8_vertical_band_2_1_scale = vp8cx_vertical_band_2_1_scale_c; + vp8_vertical_band_2_1_scale_i = vp8cx_vertical_band_2_1_scale_i_c; + vp8_horizontal_line_2_1_scale = vp8cx_horizontal_line_2_1_scale_c; + vp8_horizontal_line_5_3_scale = vp8cx_horizontal_line_5_3_scale_c; + vp8_horizontal_line_5_4_scale = vp8cx_horizontal_line_5_4_scale_c; - } + } } diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h index e78046720..1c18b098a 100644 --- a/vpx_scale/yv12config.h +++ b/vpx_scale/yv12config.h @@ -26,51 +26,50 @@ extern "C" #define INTERP_EXTEND 3 #endif - /************************************* - For INT_YUV: + /************************************* + For INT_YUV: - Y = (R+G*2+B)/4; - U = (R-B)/2; - V = (G*2 - R - B)/4; - And - R = Y+U-V; - G = Y+V; - B = Y-U-V; - ************************************/ - typedef enum - { - REG_YUV = 0, /* Regular yuv */ - INT_YUV = 1 /* The type of yuv that can be tranfer to and from RGB through integer transform */ - } - YUV_TYPE; + Y = (R+G*2+B)/4; + U = (R-B)/2; + V = (G*2 - R - B)/4; + And + R = Y+U-V; + G = Y+V; + B = Y-U-V; + ************************************/ + typedef enum + { + REG_YUV = 0, /* Regular yuv */ + INT_YUV = 1 /* The type of yuv that can be tranfer to and from RGB through integer transform */ + } + YUV_TYPE; - typedef struct - { - int y_width; - int y_height; - int y_stride; -/* int yinternal_width; */ + typedef struct { + int y_width; + int y_height; + int y_stride; + /* int yinternal_width; */ - int uv_width; - int uv_height; - int uv_stride; -/* int uvinternal_width; */ + int uv_width; + int uv_height; + int uv_stride; + /* int uvinternal_width; */ - unsigned char *y_buffer; - unsigned char *u_buffer; - unsigned char *v_buffer; + unsigned char *y_buffer; + unsigned char *u_buffer; + unsigned char *v_buffer; - unsigned char *buffer_alloc; - int border; - int frame_size; - YUV_TYPE clrtype; + unsigned char *buffer_alloc; + int border; + int frame_size; + YUV_TYPE clrtype; - int corrupted; - int flags; - } YV12_BUFFER_CONFIG; + int corrupted; + int flags; + } YV12_BUFFER_CONFIG; - int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border); - int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf); + int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border); + int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf); #ifdef __cplusplus } diff --git a/vpx_scale/yv12extend.h b/vpx_scale/yv12extend.h index 0c239b9d1..c7fcedf21 100644 --- a/vpx_scale/yv12extend.h +++ b/vpx_scale/yv12extend.h @@ -19,12 +19,12 @@ extern "C" { #endif - void vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf); + void vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf); - /* Copy Y,U,V buffer data from src to dst, filling border of dst as well. */ + /* Copy Y,U,V buffer data from src to dst, filling border of dst as well. */ - void vp8_yv12_copy_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); - void vp8_yv12_copy_frame_yonly(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); + void vp8_yv12_copy_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); + void vp8_yv12_copy_frame_yonly(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc); #ifdef __cplusplus } -- cgit v1.2.3