Merge remote-tracking branch 'qatar/master'

* qatar/master:
  avplay: use libavresample for sample format conversion and channel mixing
  Fix compilation with YASM/NASM without AVX support.
  WMAL: do not output last frame again if nothing was decoded in current packet
  WMAL: do not start decoding if frame does not end in current packet
  adpcm-thp: fix invalid array indexing
  ppc: add const where needed in scalarproduct_int16_altivec()
  ppc: remove shift parameter from scalarproduct_int16_altivec()
  ppc: dsputil: do unaligned block accesses correctly
  dvenc: do not call dsputil functions with stride not a multiple of 16
  APIchanges: fill in some dates and commit hashes

Conflicts:
	doc/APIchanges
	ffplay.c
	libavcodec/adpcm.c

Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Michael Niedermayer 2012-05-01 22:06:55 +02:00
commit 75f847aa6b
7 changed files with 131 additions and 139 deletions

View File

@ -2,14 +2,14 @@ Never assume the API of libav* to be stable unless at least 1 month has passed
since the last major version increase. since the last major version increase.
The last version increases were: The last version increases were:
libavcodec: 2012-01-27 libavcodec: 2012-01-27
libavdevice: 2011-04-18 libavdevice: 2011-04-18
libavfilter: 2011-04-18 libavfilter: 2011-04-18
libavformat: 2012-01-27 libavformat: 2012-01-27
libavresample: 2012-xx-xx libavresample: 2012-04-24
libpostproc: 2011-04-18 libpostproc: 2011-04-18
libswscale: 2011-06-20 libswscale: 2011-06-20
libavutil: 2011-04-18 libavutil: 2011-04-18
API changes, most recent first: API changes, most recent first:
@ -23,13 +23,13 @@ API changes, most recent first:
2012-03-26 - a67d9cf - lavfi 2.66.100 2012-03-26 - a67d9cf - lavfi 2.66.100
Add avfilter_fill_frame_from_{audio_,}buffer_ref() functions. Add avfilter_fill_frame_from_{audio_,}buffer_ref() functions.
2012-04-25 - xxxxxxx - lavu 51.29.0 - cpu.h 2012-04-25 - 3527a73 - lavu 51.29.0 - cpu.h
Add av_parse_cpu_flags() Add av_parse_cpu_flags()
2012-xx-xx - xxxxxxx - lavr 0.0.0 2012-04-24 - c8af852 - lavr 0.0.0
Add libavresample audio conversion library Add libavresample audio conversion library
2012-xx-xx - xxxxxxx - lavu 51.28.0 - audio_fifo.h 2012-04-20 - 0c0d1bc - lavu 51.28.0 - audio_fifo.h
Add audio FIFO functions: Add audio FIFO functions:
av_audio_fifo_free() av_audio_fifo_free()
av_audio_fifo_alloc() av_audio_fifo_alloc()
@ -41,10 +41,10 @@ API changes, most recent first:
av_audio_fifo_size() av_audio_fifo_size()
av_audio_fifo_space() av_audio_fifo_space()
2012-xx-xx - xxxxxxx - lavfi 2.16.0 - avfiltergraph.h 2012-04-14 - lavfi 2.16.0 - avfiltergraph.h
Add avfilter_graph_parse2() d7bcc71 Add avfilter_graph_parse2().
2012-xx-xx - xxxxxxx - lavu 51.27.0 - samplefmt.h 2012-04-08 - 4d693b0 - lavu 51.27.0 - samplefmt.h
Add av_get_packed_sample_fmt() and av_get_planar_sample_fmt() Add av_get_packed_sample_fmt() and av_get_planar_sample_fmt()
2012-03-21 - b75c67d - lavu 51.43.100 2012-03-21 - b75c67d - lavu 51.43.100

View File

@ -40,7 +40,6 @@
#include "libavformat/avformat.h" #include "libavformat/avformat.h"
#include "libavdevice/avdevice.h" #include "libavdevice/avdevice.h"
#include "libswscale/swscale.h" #include "libswscale/swscale.h"
#include "libavcodec/audioconvert.h"
#include "libavutil/opt.h" #include "libavutil/opt.h"
#include "libavcodec/avfft.h" #include "libavcodec/avfft.h"
#include "libswresample/swresample.h" #include "libswresample/swresample.h"
@ -2130,7 +2129,8 @@ static int audio_decode_frame(VideoState *is, double *pts_ptr)
/* if no pts, then compute it */ /* if no pts, then compute it */
pts = is->audio_clock; pts = is->audio_clock;
*pts_ptr = pts; *pts_ptr = pts;
is->audio_clock += (double)data_size / (dec->channels * dec->sample_rate * av_get_bytes_per_sample(dec->sample_fmt)); is->audio_clock += (double)data_size /
(dec->channels * dec->sample_rate * av_get_bytes_per_sample(dec->sample_fmt));
#ifdef DEBUG #ifdef DEBUG
{ {
static double last_clock; static double last_clock;
@ -2373,9 +2373,9 @@ static void stream_component_close(VideoState *is, int stream_index)
SDL_CloseAudio(); SDL_CloseAudio();
packet_queue_end(&is->audioq); packet_queue_end(&is->audioq);
av_free_packet(&is->audio_pkt);
if (is->swr_ctx) if (is->swr_ctx)
swr_free(&is->swr_ctx); swr_free(&is->swr_ctx);
av_free_packet(&is->audio_pkt);
av_freep(&is->audio_buf1); av_freep(&is->audio_buf1);
is->audio_buf = NULL; is->audio_buf = NULL;
av_freep(&is->frame); av_freep(&is->frame);

View File

@ -1212,12 +1212,14 @@ static int adpcm_decode_frame(AVCodecContext *avctx, void *data,
int prev[2][2]; int prev[2][2];
int ch; int ch;
for (i = 0; i < 32; i++) for (i = 0; i < 2; i++)
table[0][i] = sign_extend(bytestream2_get_be16u(&gb), 16); for (n = 0; n < 16; n++)
table[i][n] = sign_extend(bytestream2_get_be16u(&gb), 16);
/* Initialize the previous sample. */ /* Initialize the previous sample. */
for (i = 0; i < 4; i++) for (i = 0; i < 2; i++)
prev[i>>1][i&1] = sign_extend(bytestream2_get_be16u(&gb), 16); for (n = 0; n < 2; n++)
prev[i][n] = sign_extend(bytestream2_get_be16u(&gb), 16);
for (ch = 0; ch <= st; ch++) { for (ch = 0; ch <= st; ch++) {
samples = (short *)c->frame.data[0] + ch; samples = (short *)c->frame.data[0] + ch;

View File

@ -668,7 +668,7 @@ static int dv_encode_video_segment(AVCodecContext *avctx, void *arg)
int mb_x, mb_y, c_offset, linesize, y_stride; int mb_x, mb_y, c_offset, linesize, y_stride;
uint8_t* y_ptr; uint8_t* y_ptr;
uint8_t* dif; uint8_t* dif;
LOCAL_ALIGNED_8(uint8_t, scratch, [64]); LOCAL_ALIGNED_8(uint8_t, scratch, [128]);
EncBlockInfo enc_blks[5*DV_MAX_BPM]; EncBlockInfo enc_blks[5*DV_MAX_BPM];
PutBitContext pbs[5*DV_MAX_BPM]; PutBitContext pbs[5*DV_MAX_BPM];
PutBitContext* pb; PutBitContext* pb;
@ -723,10 +723,10 @@ static int dv_encode_video_segment(AVCodecContext *avctx, void *arg)
b[0] = c_ptr[0]; b[1] = c_ptr[1]; b[2] = c_ptr[2]; b[3] = c_ptr[3]; b[0] = c_ptr[0]; b[1] = c_ptr[1]; b[2] = c_ptr[2]; b[3] = c_ptr[3];
b[4] = d[0]; b[5] = d[1]; b[6] = d[2]; b[7] = d[3]; b[4] = d[0]; b[5] = d[1]; b[6] = d[2]; b[7] = d[3];
c_ptr += linesize; c_ptr += linesize;
b += 8; b += 16;
} }
c_ptr = scratch; c_ptr = scratch;
linesize = 8; linesize = 16;
} }
vs_bit_size += dv_init_enc_block( enc_blk++, c_ptr , linesize, s, 1); vs_bit_size += dv_init_enc_block( enc_blk++, c_ptr , linesize, s, 1);

View File

@ -34,7 +34,9 @@ static int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size
int i; int i;
int s; int s;
const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
vector unsigned char *tv; vector unsigned char perm1 = vec_lvsl(0, pix2);
vector unsigned char perm2 = vec_add(perm1, vec_splat_u8(1));
vector unsigned char pix2l, pix2r;
vector unsigned char pix1v, pix2v, pix2iv, avgv, t5; vector unsigned char pix1v, pix2v, pix2iv, avgv, t5;
vector unsigned int sad; vector unsigned int sad;
vector signed int sumdiffs; vector signed int sumdiffs;
@ -45,14 +47,11 @@ static int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size
/* Read unaligned pixels into our vectors. The vectors are as follows: /* Read unaligned pixels into our vectors. The vectors are as follows:
pix1v: pix1[0]-pix1[15] pix1v: pix1[0]-pix1[15]
pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16] */ pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16] */
tv = (vector unsigned char *) pix1; pix1v = vec_ld( 0, pix1);
pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); pix2l = vec_ld( 0, pix2);
pix2r = vec_ld(16, pix2);
tv = (vector unsigned char *) &pix2[0]; pix2v = vec_perm(pix2l, pix2r, perm1);
pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0])); pix2iv = vec_perm(pix2l, pix2r, perm2);
tv = (vector unsigned char *) &pix2[1];
pix2iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[1]));
/* Calculate the average vector */ /* Calculate the average vector */
avgv = vec_avg(pix2v, pix2iv); avgv = vec_avg(pix2v, pix2iv);
@ -79,7 +78,8 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size
int i; int i;
int s; int s;
const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
vector unsigned char *tv; vector unsigned char perm = vec_lvsl(0, pix2);
vector unsigned char pix2l, pix2r;
vector unsigned char pix1v, pix2v, pix3v, avgv, t5; vector unsigned char pix1v, pix2v, pix3v, avgv, t5;
vector unsigned int sad; vector unsigned int sad;
vector signed int sumdiffs; vector signed int sumdiffs;
@ -95,18 +95,19 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size
Read unaligned pixels into our vectors. The vectors are as follows: Read unaligned pixels into our vectors. The vectors are as follows:
pix2v: pix2[0]-pix2[15] pix2v: pix2[0]-pix2[15]
Split the pixel vectors into shorts */ Split the pixel vectors into shorts */
tv = (vector unsigned char *) &pix2[0]; pix2l = vec_ld( 0, pix2);
pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0])); pix2r = vec_ld(15, pix2);
pix2v = vec_perm(pix2l, pix2r, perm);
for (i = 0; i < h; i++) { for (i = 0; i < h; i++) {
/* Read unaligned pixels into our vectors. The vectors are as follows: /* Read unaligned pixels into our vectors. The vectors are as follows:
pix1v: pix1[0]-pix1[15] pix1v: pix1[0]-pix1[15]
pix3v: pix3[0]-pix3[15] */ pix3v: pix3[0]-pix3[15] */
tv = (vector unsigned char *) pix1; pix1v = vec_ld(0, pix1);
pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
tv = (vector unsigned char *) &pix3[0]; pix2l = vec_ld( 0, pix3);
pix3v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[0])); pix2r = vec_ld(15, pix3);
pix3v = vec_perm(pix2l, pix2r, perm);
/* Calculate the average vector */ /* Calculate the average vector */
avgv = vec_avg(pix2v, pix3v); avgv = vec_avg(pix2v, pix3v);
@ -137,7 +138,10 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_siz
uint8_t *pix3 = pix2 + line_size; uint8_t *pix3 = pix2 + line_size;
const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
const vector unsigned short two = (const vector unsigned short)vec_splat_u16(2); const vector unsigned short two = (const vector unsigned short)vec_splat_u16(2);
vector unsigned char *tv, avgv, t5; vector unsigned char avgv, t5;
vector unsigned char perm1 = vec_lvsl(0, pix2);
vector unsigned char perm2 = vec_add(perm1, vec_splat_u8(1));
vector unsigned char pix2l, pix2r;
vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv; vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv;
vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv; vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv;
vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv; vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv;
@ -157,11 +161,10 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_siz
Read unaligned pixels into our vectors. The vectors are as follows: Read unaligned pixels into our vectors. The vectors are as follows:
pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16] pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16]
Split the pixel vectors into shorts */ Split the pixel vectors into shorts */
tv = (vector unsigned char *) &pix2[0]; pix2l = vec_ld( 0, pix2);
pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0])); pix2r = vec_ld(16, pix2);
pix2v = vec_perm(pix2l, pix2r, perm1);
tv = (vector unsigned char *) &pix2[1]; pix2iv = vec_perm(pix2l, pix2r, perm2);
pix2iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[1]));
pix2hv = (vector unsigned short) vec_mergeh(zero, pix2v); pix2hv = (vector unsigned short) vec_mergeh(zero, pix2v);
pix2lv = (vector unsigned short) vec_mergel(zero, pix2v); pix2lv = (vector unsigned short) vec_mergel(zero, pix2v);
@ -174,14 +177,12 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_siz
/* Read unaligned pixels into our vectors. The vectors are as follows: /* Read unaligned pixels into our vectors. The vectors are as follows:
pix1v: pix1[0]-pix1[15] pix1v: pix1[0]-pix1[15]
pix3v: pix3[0]-pix3[15] pix3iv: pix3[1]-pix3[16] */ pix3v: pix3[0]-pix3[15] pix3iv: pix3[1]-pix3[16] */
tv = (vector unsigned char *) pix1; pix1v = vec_ld(0, pix1);
pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
tv = (vector unsigned char *) &pix3[0]; pix2l = vec_ld( 0, pix3);
pix3v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[0])); pix2r = vec_ld(16, pix3);
pix3v = vec_perm(pix2l, pix2r, perm1);
tv = (vector unsigned char *) &pix3[1]; pix3iv = vec_perm(pix2l, pix2r, perm2);
pix3iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[1]));
/* Note that AltiVec does have vec_avg, but this works on vector pairs /* Note that AltiVec does have vec_avg, but this works on vector pairs
and rounds up. We could do avg(avg(a,b),avg(c,d)), but the rounding and rounds up. We could do avg(avg(a,b),avg(c,d)), but the rounding
@ -230,7 +231,7 @@ static int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, i
int i; int i;
int s; int s;
const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
vector unsigned char perm1, perm2, pix1v_low, pix1v_high, pix2v_low, pix2v_high; vector unsigned char perm = vec_lvsl(0, pix2);
vector unsigned char t1, t2, t3,t4, t5; vector unsigned char t1, t2, t3,t4, t5;
vector unsigned int sad; vector unsigned int sad;
vector signed int sumdiffs; vector signed int sumdiffs;
@ -240,14 +241,10 @@ static int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, i
for (i = 0; i < h; i++) { for (i = 0; i < h; i++) {
/* Read potentially unaligned pixels into t1 and t2 */ /* Read potentially unaligned pixels into t1 and t2 */
perm1 = vec_lvsl(0, pix1); vector unsigned char pix2l = vec_ld( 0, pix2);
pix1v_high = vec_ld( 0, pix1); vector unsigned char pix2r = vec_ld(15, pix2);
pix1v_low = vec_ld(15, pix1); t1 = vec_ld(0, pix1);
perm2 = vec_lvsl(0, pix2); t2 = vec_perm(pix2l, pix2r, perm);
pix2v_high = vec_ld( 0, pix2);
pix2v_low = vec_ld(15, pix2);
t1 = vec_perm(pix1v_high, pix1v_low, perm1);
t2 = vec_perm(pix2v_high, pix2v_low, perm2);
/* Calculate a sum of abs differences vector */ /* Calculate a sum of abs differences vector */
t3 = vec_max(t1, t2); t3 = vec_max(t1, t2);
@ -274,25 +271,25 @@ static int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, in
int i; int i;
int s; int s;
const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; const vector unsigned char permclear = (vector unsigned char){255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0};
vector unsigned char perm1 = vec_lvsl(0, pix1);
vector unsigned char perm2 = vec_lvsl(0, pix2);
vector unsigned char t1, t2, t3,t4, t5; vector unsigned char t1, t2, t3,t4, t5;
vector unsigned int sad; vector unsigned int sad;
vector signed int sumdiffs; vector signed int sumdiffs;
sad = (vector unsigned int)vec_splat_u32(0); sad = (vector unsigned int)vec_splat_u32(0);
permclear = (vector unsigned char){255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0};
for (i = 0; i < h; i++) { for (i = 0; i < h; i++) {
/* Read potentially unaligned pixels into t1 and t2 /* Read potentially unaligned pixels into t1 and t2
Since we're reading 16 pixels, and actually only want 8, Since we're reading 16 pixels, and actually only want 8,
mask out the last 8 pixels. The 0s don't change the sum. */ mask out the last 8 pixels. The 0s don't change the sum. */
perm1 = vec_lvsl(0, pix1); vector unsigned char pix1l = vec_ld( 0, pix1);
pix1v = (vector unsigned char *) pix1; vector unsigned char pix1r = vec_ld(15, pix1);
perm2 = vec_lvsl(0, pix2); vector unsigned char pix2l = vec_ld( 0, pix2);
pix2v = (vector unsigned char *) pix2; vector unsigned char pix2r = vec_ld(15, pix2);
t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear); t1 = vec_and(vec_perm(pix1l, pix1r, perm1), permclear);
t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear); t2 = vec_and(vec_perm(pix2l, pix2r, perm2), permclear);
/* Calculate a sum of abs differences vector */ /* Calculate a sum of abs differences vector */
t3 = vec_max(t1, t2); t3 = vec_max(t1, t2);
@ -319,7 +316,7 @@ static int pix_norm1_altivec(uint8_t *pix, int line_size)
int i; int i;
int s; int s;
const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
vector unsigned char *tv; vector unsigned char perm = vec_lvsl(0, pix);
vector unsigned char pixv; vector unsigned char pixv;
vector unsigned int sv; vector unsigned int sv;
vector signed int sum; vector signed int sum;
@ -329,8 +326,9 @@ static int pix_norm1_altivec(uint8_t *pix, int line_size)
s = 0; s = 0;
for (i = 0; i < 16; i++) { for (i = 0; i < 16; i++) {
/* Read in the potentially unaligned pixels */ /* Read in the potentially unaligned pixels */
tv = (vector unsigned char *) pix; vector unsigned char pixl = vec_ld( 0, pix);
pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix)); vector unsigned char pixr = vec_ld(15, pix);
pixv = vec_perm(pixl, pixr, perm);
/* Square the values, and add them to our sum */ /* Square the values, and add them to our sum */
sv = vec_msum(pixv, pixv, sv); sv = vec_msum(pixv, pixv, sv);
@ -355,26 +353,25 @@ static int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, in
int i; int i;
int s; int s;
const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
vector unsigned char perm1, perm2, permclear, *pix1v, *pix2v; const vector unsigned char permclear = (vector unsigned char){255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0};
vector unsigned char perm1 = vec_lvsl(0, pix1);
vector unsigned char perm2 = vec_lvsl(0, pix2);
vector unsigned char t1, t2, t3,t4, t5; vector unsigned char t1, t2, t3,t4, t5;
vector unsigned int sum; vector unsigned int sum;
vector signed int sumsqr; vector signed int sumsqr;
sum = (vector unsigned int)vec_splat_u32(0); sum = (vector unsigned int)vec_splat_u32(0);
permclear = (vector unsigned char){255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0};
for (i = 0; i < h; i++) { for (i = 0; i < h; i++) {
/* Read potentially unaligned pixels into t1 and t2 /* Read potentially unaligned pixels into t1 and t2
Since we're reading 16 pixels, and actually only want 8, Since we're reading 16 pixels, and actually only want 8,
mask out the last 8 pixels. The 0s don't change the sum. */ mask out the last 8 pixels. The 0s don't change the sum. */
perm1 = vec_lvsl(0, pix1); vector unsigned char pix1l = vec_ld( 0, pix1);
pix1v = (vector unsigned char *) pix1; vector unsigned char pix1r = vec_ld(15, pix1);
perm2 = vec_lvsl(0, pix2); vector unsigned char pix2l = vec_ld( 0, pix2);
pix2v = (vector unsigned char *) pix2; vector unsigned char pix2r = vec_ld(15, pix2);
t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear); t1 = vec_and(vec_perm(pix1l, pix1r, perm1), permclear);
t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear); t2 = vec_and(vec_perm(pix2l, pix2r, perm2), permclear);
/* Since we want to use unsigned chars, we can take advantage /* Since we want to use unsigned chars, we can take advantage
of the fact that abs(a-b)^2 = (a-b)^2. */ of the fact that abs(a-b)^2 = (a-b)^2. */
@ -409,7 +406,7 @@ static int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, i
int i; int i;
int s; int s;
const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
vector unsigned char perm1, perm2, *pix1v, *pix2v; vector unsigned char perm = vec_lvsl(0, pix2);
vector unsigned char t1, t2, t3,t4, t5; vector unsigned char t1, t2, t3,t4, t5;
vector unsigned int sum; vector unsigned int sum;
vector signed int sumsqr; vector signed int sumsqr;
@ -418,12 +415,10 @@ static int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, i
for (i = 0; i < h; i++) { for (i = 0; i < h; i++) {
/* Read potentially unaligned pixels into t1 and t2 */ /* Read potentially unaligned pixels into t1 and t2 */
perm1 = vec_lvsl(0, pix1); vector unsigned char pix2l = vec_ld( 0, pix2);
pix1v = (vector unsigned char *) pix1; vector unsigned char pix2r = vec_ld(15, pix2);
perm2 = vec_lvsl(0, pix2); t1 = vec_ld(0, pix1);
pix2v = (vector unsigned char *) pix2; t2 = vec_perm(pix2l, pix2r, perm);
t1 = vec_perm(pix1v[0], pix1v[1], perm1);
t2 = vec_perm(pix2v[0], pix2v[1], perm2);
/* Since we want to use unsigned chars, we can take advantage /* Since we want to use unsigned chars, we can take advantage
of the fact that abs(a-b)^2 = (a-b)^2. */ of the fact that abs(a-b)^2 = (a-b)^2. */
@ -451,7 +446,7 @@ static int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, i
static int pix_sum_altivec(uint8_t * pix, int line_size) static int pix_sum_altivec(uint8_t * pix, int line_size)
{ {
const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0);
vector unsigned char perm, *pixv; vector unsigned char perm = vec_lvsl(0, pix);
vector unsigned char t1; vector unsigned char t1;
vector unsigned int sad; vector unsigned int sad;
vector signed int sumdiffs; vector signed int sumdiffs;
@ -463,9 +458,9 @@ static int pix_sum_altivec(uint8_t * pix, int line_size)
for (i = 0; i < 16; i++) { for (i = 0; i < 16; i++) {
/* Read the potentially unaligned 16 pixels into t1 */ /* Read the potentially unaligned 16 pixels into t1 */
perm = vec_lvsl(0, pix); vector unsigned char pixl = vec_ld( 0, pix);
pixv = (vector unsigned char *) pix; vector unsigned char pixr = vec_ld(15, pix);
t1 = vec_perm(pixv[0], pixv[1], perm); t1 = vec_perm(pixl, pixr, perm);
/* Add each 4 pixel group together and put 4 results into sad */ /* Add each 4 pixel group together and put 4 results into sad */
sad = vec_sum4s(t1, sad); sad = vec_sum4s(t1, sad);
@ -484,7 +479,8 @@ static int pix_sum_altivec(uint8_t * pix, int line_size)
static void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, int line_size) static void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
{ {
int i; int i;
vector unsigned char perm, bytes, *pixv; vector unsigned char perm = vec_lvsl(0, pixels);
vector unsigned char bytes;
const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
vector signed short shorts; vector signed short shorts;
@ -492,9 +488,9 @@ static void get_pixels_altivec(DCTELEM *restrict block, const uint8_t *pixels, i
// Read potentially unaligned pixels. // Read potentially unaligned pixels.
// We're reading 16 pixels, and actually only want 8, // We're reading 16 pixels, and actually only want 8,
// but we simply ignore the extras. // but we simply ignore the extras.
perm = vec_lvsl(0, pixels); vector unsigned char pixl = vec_ld( 0, pixels);
pixv = (vector unsigned char *) pixels; vector unsigned char pixr = vec_ld(15, pixels);
bytes = vec_perm(pixv[0], pixv[1], perm); bytes = vec_perm(pixl, pixr, perm);
// convert the bytes into shorts // convert the bytes into shorts
shorts = (vector signed short)vec_mergeh(zero, bytes); shorts = (vector signed short)vec_mergeh(zero, bytes);
@ -510,7 +506,9 @@ static void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1,
const uint8_t *s2, int stride) const uint8_t *s2, int stride)
{ {
int i; int i;
vector unsigned char perm, bytes, *pixv; vector unsigned char perm1 = vec_lvsl(0, s1);
vector unsigned char perm2 = vec_lvsl(0, s2);
vector unsigned char bytes, pixl, pixr;
const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0);
vector signed short shorts1, shorts2; vector signed short shorts1, shorts2;
@ -518,17 +516,17 @@ static void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1,
// Read potentially unaligned pixels // Read potentially unaligned pixels
// We're reading 16 pixels, and actually only want 8, // We're reading 16 pixels, and actually only want 8,
// but we simply ignore the extras. // but we simply ignore the extras.
perm = vec_lvsl(0, s1); pixl = vec_ld( 0, s1);
pixv = (vector unsigned char *) s1; pixr = vec_ld(15, s1);
bytes = vec_perm(pixv[0], pixv[1], perm); bytes = vec_perm(pixl, pixr, perm1);
// convert the bytes into shorts // convert the bytes into shorts
shorts1 = (vector signed short)vec_mergeh(zero, bytes); shorts1 = (vector signed short)vec_mergeh(zero, bytes);
// Do the same for the second block of pixels // Do the same for the second block of pixels
perm = vec_lvsl(0, s2); pixl = vec_ld( 0, s2);
pixv = (vector unsigned char *) s2; pixr = vec_ld(15, s2);
bytes = vec_perm(pixv[0], pixv[1], perm); bytes = vec_perm(pixl, pixr, perm2);
// convert the bytes into shorts // convert the bytes into shorts
shorts2 = (vector signed short)vec_mergeh(zero, bytes); shorts2 = (vector signed short)vec_mergeh(zero, bytes);
@ -550,17 +548,17 @@ static void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1,
// Read potentially unaligned pixels // Read potentially unaligned pixels
// We're reading 16 pixels, and actually only want 8, // We're reading 16 pixels, and actually only want 8,
// but we simply ignore the extras. // but we simply ignore the extras.
perm = vec_lvsl(0, s1); pixl = vec_ld( 0, s1);
pixv = (vector unsigned char *) s1; pixr = vec_ld(15, s1);
bytes = vec_perm(pixv[0], pixv[1], perm); bytes = vec_perm(pixl, pixr, perm1);
// convert the bytes into shorts // convert the bytes into shorts
shorts1 = (vector signed short)vec_mergeh(zero, bytes); shorts1 = (vector signed short)vec_mergeh(zero, bytes);
// Do the same for the second block of pixels // Do the same for the second block of pixels
perm = vec_lvsl(0, s2); pixl = vec_ld( 0, s2);
pixv = (vector unsigned char *) s2; pixr = vec_ld(15, s2);
bytes = vec_perm(pixv[0], pixv[1], perm); bytes = vec_perm(pixl, pixr, perm2);
// convert the bytes into shorts // convert the bytes into shorts
shorts2 = (vector signed short)vec_mergeh(zero, bytes); shorts2 = (vector signed short)vec_mergeh(zero, bytes);

View File

@ -79,28 +79,20 @@ static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
return u.score[3]; return u.score[3];
} }
static int32_t scalarproduct_int16_altivec(int16_t *v1, const int16_t *v2, static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,
int order, const int shift) int order)
{ {
int i; int i;
LOAD_ZERO; LOAD_ZERO;
register vec_s16 vec1, *pv; const vec_s16 *pv;
register vec_s16 vec1;
register vec_s32 res = vec_splat_s32(0), t; register vec_s32 res = vec_splat_s32(0), t;
register vec_u32 shifts;
int32_t ires; int32_t ires;
shifts = zero_u32v;
if(shift & 0x10) shifts = vec_add(shifts, vec_sl(vec_splat_u32(0x08), vec_splat_u32(0x1)));
if(shift & 0x08) shifts = vec_add(shifts, vec_splat_u32(0x08));
if(shift & 0x04) shifts = vec_add(shifts, vec_splat_u32(0x04));
if(shift & 0x02) shifts = vec_add(shifts, vec_splat_u32(0x02));
if(shift & 0x01) shifts = vec_add(shifts, vec_splat_u32(0x01));
for(i = 0; i < order; i += 8){ for(i = 0; i < order; i += 8){
pv = (vec_s16*)v1; pv = (const vec_s16*)v1;
vec1 = vec_perm(pv[0], pv[1], vec_lvsl(0, v1)); vec1 = vec_perm(pv[0], pv[1], vec_lvsl(0, v1));
t = vec_msum(vec1, vec_ld(0, v2), zero_s32v); t = vec_msum(vec1, vec_ld(0, v2), zero_s32v);
t = vec_sr(t, shifts);
res = vec_sums(t, res); res = vec_sums(t, res);
v1 += 8; v1 += 8;
v2 += 8; v2 += 8;
@ -114,31 +106,31 @@ static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1, const int16_t *
{ {
LOAD_ZERO; LOAD_ZERO;
vec_s16 *pv1 = (vec_s16*)v1; vec_s16 *pv1 = (vec_s16*)v1;
vec_s16 *pv2 = (vec_s16*)v2;
vec_s16 *pv3 = (vec_s16*)v3;
register vec_s16 muls = {mul,mul,mul,mul,mul,mul,mul,mul}; register vec_s16 muls = {mul,mul,mul,mul,mul,mul,mul,mul};
register vec_s16 t0, t1, i0, i1; register vec_s16 t0, t1, i0, i1, i4;
register vec_s16 i2 = pv2[0], i3 = pv3[0]; register vec_s16 i2 = vec_ld(0, v2), i3 = vec_ld(0, v3);
register vec_s32 res = zero_s32v; register vec_s32 res = zero_s32v;
register vec_u8 align = vec_lvsl(0, v2); register vec_u8 align = vec_lvsl(0, v2);
int32_t ires; int32_t ires;
order >>= 4; order >>= 4;
do { do {
t0 = vec_perm(i2, pv2[1], align); i1 = vec_ld(16, v2);
i2 = pv2[2]; t0 = vec_perm(i2, i1, align);
t1 = vec_perm(pv2[1], i2, align); i2 = vec_ld(32, v2);
t1 = vec_perm(i1, i2, align);
i0 = pv1[0]; i0 = pv1[0];
i1 = pv1[1]; i1 = pv1[1];
res = vec_msum(t0, i0, res); res = vec_msum(t0, i0, res);
res = vec_msum(t1, i1, res); res = vec_msum(t1, i1, res);
t0 = vec_perm(i3, pv3[1], align); i4 = vec_ld(16, v3);
i3 = pv3[2]; t0 = vec_perm(i3, i4, align);
t1 = vec_perm(pv3[1], i3, align); i3 = vec_ld(32, v3);
t1 = vec_perm(i4, i3, align);
pv1[0] = vec_mladd(t0, muls, i0); pv1[0] = vec_mladd(t0, muls, i0);
pv1[1] = vec_mladd(t1, muls, i1); pv1[1] = vec_mladd(t1, muls, i1);
pv1 += 2; pv1 += 2;
pv2 += 2; v2 += 8;
pv3 += 2; v3 += 8;
} while(--order); } while(--order);
res = vec_splat(vec_sums(res, zero_s32v), 3); res = vec_splat(vec_sums(res, zero_s32v), 3);
vec_ste(res, 0, &ires); vec_ste(res, 0, &ires);

View File

@ -1216,8 +1216,8 @@ static int decode_packet(AVCodecContext *avctx, void *data, int *got_frame_ptr,
save_bits(s, gb, num_bits_prev_frame, 1); save_bits(s, gb, num_bits_prev_frame, 1);
/* decode the cross packet frame if it is valid */ /* decode the cross packet frame if it is valid */
if (!s->packet_loss) if (num_bits_prev_frame < remaining_packet_bits && !s->packet_loss)
decode_frame(s); decode_frame(s);
} else if (s->num_saved_bits - s->frame_offset) { } else if (s->num_saved_bits - s->frame_offset) {
av_dlog(avctx, "ignoring %x previously saved bits\n", av_dlog(avctx, "ignoring %x previously saved bits\n",
s->num_saved_bits - s->frame_offset); s->num_saved_bits - s->frame_offset);