summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMathis Rosenhauer <rosenhauer@dkrz.de>2013-07-25 17:20:43 +0200
committerMathis Rosenhauer <rosenhauer@dkrz.de>2013-07-31 16:33:07 +0200
commit4157870c61f5427e32f14673f2ea92138387b1f3 (patch)
tree68107bae32322025ebc002c2db01504e848f574e /src
parent747465e8d2242ffee9eb7ea003994b0369b9cea6 (diff)
downloadlibaec-4157870c61f5427e32f14673f2ea92138387b1f3.tar.gz
libaec-4157870c61f5427e32f14673f2ea92138387b1f3.tar.bz2
libaec-4157870c61f5427e32f14673f2ea92138387b1f3.zip
additional vectorization
Diffstat (limited to 'src')
-rw-r--r--src/encode.c63
-rw-r--r--src/encode_accessors.c26
2 files changed, 32 insertions, 57 deletions
diff --git a/src/encode.c b/src/encode.c
index 6c7e9ab..c6682f3 100644
--- a/src/encode.c
+++ b/src/encode.c
@@ -82,7 +82,7 @@ static inline void emit(struct internal_state *state,
bits -= state->bits;
*state->cds++ += (uint64_t)data >> bits;
- while (bits & ~7) {
+ while (bits > 8) {
bits -= 8;
*state->cds++ = data >> bits;
}
@@ -176,7 +176,7 @@ static inline void emitblock(struct aec_stream *strm, int k, int ref)
a += ((uint64_t)(*in++) & mask) << p;
}
- switch (p & ~ 7) {
+ switch (p & ~7) {
case 0:
o[0] = a >> 56;
o[1] = a >> 48;
@@ -253,26 +253,26 @@ static void preprocess_unsigned(struct aec_stream *strm)
uint32_t D;
struct internal_state *state = strm->state;
- const uint32_t *x = state->data_raw;
- uint32_t *d = state->data_pp;
+ const uint32_t *restrict x = state->data_raw;
+ uint32_t *restrict d = state->data_pp;
uint32_t xmax = state->xmax;
uint32_t rsi = strm->rsi * strm->block_size - 1;
int i;
d[0] = x[0];
for (i = 0; i < rsi; i++) {
- if (x[i+1] >= x[i]) {
- D = x[i+1] - x[i];
+ if (x[i + 1] >= x[i]) {
+ D = x[i + 1] - x[i];
if (D <= x[i])
- d[i+1] = 2 * D;
+ d[i + 1] = 2 * D;
else
- d[i+1] = x[i+1];
+ d[i + 1] = x[i + 1];
} else {
- D = x[i] - x[i+1];
+ D = x[i] - x[i + 1];
if (D <= xmax - x[i])
- d[i+1] = 2 * D - 1;
+ d[i + 1] = 2 * D - 1;
else
- d[i+1] = xmax - x[i+1];
+ d[i + 1] = xmax - x[i + 1];
}
}
state->ref = 1;
@@ -287,8 +287,8 @@ static void preprocess_signed(struct aec_stream *strm)
int64_t D;
struct internal_state *state = strm->state;
- uint32_t *d = state->data_pp;
- int32_t *x = (int32_t *)state->data_raw;
+ uint32_t *restrict d = state->data_pp;
+ int32_t *restrict x = (int32_t *)state->data_raw;
uint64_t m = 1ULL << (strm->bits_per_sample - 1);
int64_t xmax = state->xmax;
int64_t xmin = state->xmin;
@@ -299,26 +299,26 @@ static void preprocess_signed(struct aec_stream *strm)
x[0] = (x[0] ^ m) - m;
for (i = 0; i < rsi; i++) {
- x[i+1] = (x[i+1] ^ m) - m;
- if (x[i+1] < x[i]) {
- D = (int64_t)x[i] - x[i+1];
+ x[i + 1] = (x[i + 1] ^ m) - m;
+ if (x[i + 1] < x[i]) {
+ D = (int64_t)x[i] - x[i + 1];
if (D <= xmax - x[i])
d[i + 1] = 2 * D - 1;
else
- d[i + 1] = xmax - x[i+1];
+ d[i + 1] = xmax - x[i + 1];
} else {
- D = (int64_t)x[i+1] - x[i];
+ D = (int64_t)x[i + 1] - x[i];
if (D <= x[i] - xmin)
d[i + 1] = 2 * D;
else
- d[i + 1] = x[i+1] - xmin;
+ d[i + 1] = x[i + 1] - xmin;
}
}
state->ref = 1;
state->uncomp_len = (strm->block_size - 1) * strm->bits_per_sample;
}
-static uint64_t block_fs(struct aec_stream *strm, int k)
+static inline uint64_t block_fs(struct aec_stream *strm, int k)
{
/**
Sum FS of all samples in block for given splitting position.
@@ -328,16 +328,8 @@ static uint64_t block_fs(struct aec_stream *strm, int k)
uint64_t fs = 0;
struct internal_state *state = strm->state;
- for (i = 0; i < strm->block_size; i += 8)
- fs +=
- (uint64_t)(state->block[i + 0] >> k)
- + (uint64_t)(state->block[i + 1] >> k)
- + (uint64_t)(state->block[i + 2] >> k)
- + (uint64_t)(state->block[i + 3] >> k)
- + (uint64_t)(state->block[i + 4] >> k)
- + (uint64_t)(state->block[i + 5] >> k)
- + (uint64_t)(state->block[i + 6] >> k)
- + (uint64_t)(state->block[i + 7] >> k);
+ for (i = 0; i < strm->block_size; i++)
+ fs += (uint64_t)(state->block[i] >> k);
if (state->ref)
fs -= (uint64_t)(state->block[0] >> k);
@@ -632,14 +624,15 @@ static int m_check_zero_block(struct aec_stream *strm)
end of a segment or RSI.
*/
+ int i;
struct internal_state *state = strm->state;
- uint32_t *p = state->block + state->ref;
- uint32_t *end = state->block + strm->block_size;
+ uint32_t *p = state->block;
- while(p < end && *p == 0)
- p++;
+ for (i = state->ref; i < strm->block_size; i++)
+ if (p[i] != 0)
+ break;
- if (p < end) {
+ if (i < strm->block_size) {
if (state->zero_blocks) {
/* The current block isn't zero but we have to emit a
* previous zero block first. The current block will be
diff --git a/src/encode_accessors.c b/src/encode_accessors.c
index 2b8ccd5..c827d83 100644
--- a/src/encode_accessors.c
+++ b/src/encode_accessors.c
@@ -171,34 +171,16 @@ void aec_get_rsi_8(struct aec_stream *strm)
void aec_get_rsi_lsb_16(struct aec_stream *strm)
{
+ int i;
uint32_t *out = strm->state->data_raw;
const unsigned char *in = strm->next_in;
int rsi = strm->rsi * strm->block_size;
+ for (i = 0; i < rsi; i++)
+ out[i] = (uint32_t)in[2 * i] | ((uint32_t)in[2 * i + 1] << 8);
+
strm->next_in += 2 * rsi;
strm->avail_in -= 2 * rsi;
-
- while (rsi) {
- out[0] = (uint32_t)in[0]
- | ((uint32_t)in[1] << 8);
- out[1] = (uint32_t)in[2]
- | ((uint32_t)in[3] << 8);
- out[2] = (uint32_t)in[4]
- | ((uint32_t)in[5] << 8);
- out[3] = (uint32_t)in[6]
- | ((uint32_t)in[7] << 8);
- out[4] = (uint32_t)in[8]
- | ((uint32_t)in[9] << 8);
- out[5] = (uint32_t)in[10]
- | ((uint32_t)in[11] << 8);
- out[6] = (uint32_t)in[12]
- | ((uint32_t)in[13] << 8);
- out[7] = (uint32_t)in[14]
- | ((uint32_t)in[15] << 8);
- in += 16;
- out += 8;
- rsi -= 8;
- }
}
void aec_get_rsi_msb_16(struct aec_stream *strm)