ffmpeg / libavcodec / ppc / float_altivec.c @ 12802ec0
History  View  Annotate  Download (4.23 KB)
1 
/*


2 
* Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org>

3 
*

4 
* This file is part of FFmpeg.

5 
*

6 
* FFmpeg is free software; you can redistribute it and/or

7 
* modify it under the terms of the GNU Lesser General Public

8 
* License as published by the Free Software Foundation; either

9 
* version 2.1 of the License, or (at your option) any later version.

10 
*

11 
* FFmpeg is distributed in the hope that it will be useful,

12 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

13 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

14 
* Lesser General Public License for more details.

15 
*

16 
* You should have received a copy of the GNU Lesser General Public

17 
* License along with FFmpeg; if not, write to the Free Software

18 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

19 
*/

20  
21 
#include "libavcodec/dsputil.h" 
22  
23 
#include "dsputil_altivec.h" 
24 
#include "util_altivec.h" 
25  
26 
static void vector_fmul_altivec(float *dst, const float *src0, const float *src1, int len) 
27 
{ 
28 
int i;

29 
vector float d0, d1, s, zero = (vector float)vec_splat_u32(0); 
30 
for(i=0; i<len7; i+=8) { 
31 
d0 = vec_ld(0, src0+i);

32 
s = vec_ld(0, src1+i);

33 
d1 = vec_ld(16, src0+i);

34 
d0 = vec_madd(d0, s, zero); 
35 
d1 = vec_madd(d1, vec_ld(16,src1+i), zero);

36 
vec_st(d0, 0, dst+i);

37 
vec_st(d1, 16, dst+i);

38 
} 
39 
} 
40  
41 
static void vector_fmul_reverse_altivec(float *dst, const float *src0, 
42 
const float *src1, int len) 
43 
{ 
44 
int i;

45 
vector float d, s0, s1, h0, l0,

46 
s2, s3, zero = (vector float)vec_splat_u32(0); 
47 
src1 += len4;

48 
for(i=0; i<len7; i+=8) { 
49 
s1 = vec_ld(0, src1i); // [a,b,c,d] 
50 
s0 = vec_ld(0, src0+i);

51 
l0 = vec_mergel(s1, s1); // [c,c,d,d]

52 
s3 = vec_ld(16, src1i);

53 
h0 = vec_mergeh(s1, s1); // [a,a,b,b]

54 
s2 = vec_ld(16, src0+i);

55 
s1 = vec_mergeh(vec_mergel(l0,h0), // [d,b,d,b]

56 
vec_mergeh(l0,h0)); // [c,a,c,a]

57 
// [d,c,b,a]

58 
l0 = vec_mergel(s3, s3); 
59 
d = vec_madd(s0, s1, zero); 
60 
h0 = vec_mergeh(s3, s3); 
61 
vec_st(d, 0, dst+i);

62 
s3 = vec_mergeh(vec_mergel(l0,h0), 
63 
vec_mergeh(l0,h0)); 
64 
d = vec_madd(s2, s3, zero); 
65 
vec_st(d, 16, dst+i);

66 
} 
67 
} 
68  
69 
static void vector_fmul_add_altivec(float *dst, const float *src0, 
70 
const float *src1, const float *src2, 
71 
int len)

72 
{ 
73 
int i;

74 
vector float d, s0, s1, s2, t0, t1, edges;

75 
vector unsigned char align = vec_lvsr(0,dst), 
76 
mask = vec_lvsl(0, dst);

77  
78 
for (i=0; i<len3; i+=4) { 
79 
t0 = vec_ld(0, dst+i);

80 
t1 = vec_ld(15, dst+i);

81 
s0 = vec_ld(0, src0+i);

82 
s1 = vec_ld(0, src1+i);

83 
s2 = vec_ld(0, src2+i);

84 
edges = vec_perm(t1 ,t0, mask); 
85 
d = vec_madd(s0,s1,s2); 
86 
t1 = vec_perm(d, edges, align); 
87 
t0 = vec_perm(edges, d, align); 
88 
vec_st(t1, 15, dst+i);

89 
vec_st(t0, 0, dst+i);

90 
} 
91 
} 
92  
93 
static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, int len) 
94 
{ 
95 
vector float zero, t0, t1, s0, s1, wi, wj;

96 
const vector unsigned char reverse = vcprm(3,2,1,0); 
97 
int i,j;

98  
99 
dst += len; 
100 
win += len; 
101 
src0+= len; 
102  
103 
zero = (vector float)vec_splat_u32(0); 
104  
105 
for(i=len*4, j=len*416; i<0; i+=16, j=16) { 
106 
s0 = vec_ld(i, src0); 
107 
s1 = vec_ld(j, src1); 
108 
wi = vec_ld(i, win); 
109 
wj = vec_ld(j, win); 
110  
111 
s1 = vec_perm(s1, s1, reverse); 
112 
wj = vec_perm(wj, wj, reverse); 
113  
114 
t0 = vec_madd(s0, wj, zero); 
115 
t0 = vec_nmsub(s1, wi, t0); 
116 
t1 = vec_madd(s0, wi, zero); 
117 
t1 = vec_madd(s1, wj, t1); 
118 
t1 = vec_perm(t1, t1, reverse); 
119  
120 
vec_st(t0, i, dst); 
121 
vec_st(t1, j, dst); 
122 
} 
123 
} 
124  
125 
void float_init_altivec(DSPContext* c, AVCodecContext *avctx)

126 
{ 
127 
c>vector_fmul = vector_fmul_altivec; 
128 
c>vector_fmul_reverse = vector_fmul_reverse_altivec; 
129 
c>vector_fmul_add = vector_fmul_add_altivec; 
130 
if(!(avctx>flags & CODEC_FLAG_BITEXACT)) {

131 
c>vector_fmul_window = vector_fmul_window_altivec; 
132 
} 
133 
} 