00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "libavutil/common.h"
00025
00026 #define PIXOP2(OPNAME, OP) \
00027 \
00028 static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00029 {\
00030 do {\
00031 OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \
00032 src1+=src_stride1; \
00033 src2+=src_stride2; \
00034 dst+=dst_stride; \
00035 } while(--h); \
00036 }\
00037 \
00038 static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00039 {\
00040 do {\
00041 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \
00042 src1+=src_stride1; \
00043 src2+=src_stride2; \
00044 dst+=dst_stride; \
00045 } while(--h); \
00046 }\
00047 \
00048 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00049 {\
00050 do {\
00051 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \
00052 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \
00053 OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),LPC(src2+8)) ); \
00054 OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),LPC(src2+12)) ); \
00055 src1+=src_stride1; \
00056 src2+=src_stride2; \
00057 dst+=dst_stride; \
00058 } while(--h); \
00059 }\
00060 \
00061 static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00062 {\
00063 do {\
00064 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \
00065 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \
00066 OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),LPC(src2+8)) ); \
00067 OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),LPC(src2+12)) ); \
00068 src1+=src_stride1; \
00069 src2+=src_stride2; \
00070 dst+=dst_stride; \
00071 } while(--h); \
00072 }\
00073 \
00074 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00075 {\
00076 do { \
00077 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \
00078 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \
00079 src1+=src_stride1; \
00080 src2+=src_stride2; \
00081 dst+=dst_stride; \
00082 } while(--h); \
00083 }\
00084 \
00085 static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00086 {\
00087 do {\
00088 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LPC(src2 )) ); \
00089 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LPC(src2+4)) ); \
00090 src1+=src_stride1; \
00091 src2+=src_stride2; \
00092 dst+=dst_stride; \
00093 } while(--h); \
00094 }\
00095 \
00096 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00097 {\
00098 do {\
00099 OP(LP(dst ),no_rnd_avg32(LPC(src1 ),LPC(src2 )) ); \
00100 OP(LP(dst+4),no_rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \
00101 src1+=src_stride1; \
00102 src2+=src_stride2; \
00103 dst+=dst_stride; \
00104 } while(--h); \
00105 }\
00106 \
00107 static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00108 {\
00109 do {\
00110 OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \
00111 OP(LP(dst+4),rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \
00112 src1+=src_stride1; \
00113 src2+=src_stride2; \
00114 dst+=dst_stride; \
00115 } while(--h); \
00116 }\
00117 \
00118 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00119 {\
00120 do {\
00121 OP(LP(dst ),no_rnd_avg32(LPC(src1 ),LPC(src2 )) ); \
00122 OP(LP(dst+4),no_rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \
00123 OP(LP(dst+8),no_rnd_avg32(LPC(src1+8),LPC(src2+8)) ); \
00124 OP(LP(dst+12),no_rnd_avg32(LPC(src1+12),LPC(src2+12)) ); \
00125 src1+=src_stride1; \
00126 src2+=src_stride2; \
00127 dst+=dst_stride; \
00128 } while(--h); \
00129 }\
00130 \
00131 static inline void OPNAME ## _pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00132 {\
00133 do {\
00134 OP(LP(dst ),rnd_avg32(LPC(src1 ),LPC(src2 )) ); \
00135 OP(LP(dst+4),rnd_avg32(LPC(src1+4),LPC(src2+4)) ); \
00136 OP(LP(dst+8),rnd_avg32(LPC(src1+8),LPC(src2+8)) ); \
00137 OP(LP(dst+12),rnd_avg32(LPC(src1+12),LPC(src2+12)) ); \
00138 src1+=src_stride1; \
00139 src2+=src_stride2; \
00140 dst+=dst_stride; \
00141 } while(--h); \
00142 }\
00143 \
00144 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00145 { OPNAME ## _no_rnd_pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
00146 \
00147 static inline void OPNAME ## _pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00148 { OPNAME ## _pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
00149 \
00150 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00151 { OPNAME ## _no_rnd_pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
00152 \
00153 static inline void OPNAME ## _pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
00154 { OPNAME ## _pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
00155 \
00156 static inline void OPNAME ## _pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00157 do { \
00158 uint32_t a0,a1,a2,a3; \
00159 UNPACK(a0,a1,LPC(src1),LPC(src2)); \
00160 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
00161 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
00162 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \
00163 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
00164 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \
00165 src1+=src_stride1;\
00166 src2+=src_stride2;\
00167 src3+=src_stride3;\
00168 src4+=src_stride4;\
00169 dst+=dst_stride;\
00170 } while(--h); \
00171 } \
00172 \
00173 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00174 do { \
00175 uint32_t a0,a1,a2,a3; \
00176 UNPACK(a0,a1,LPC(src1),LPC(src2)); \
00177 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
00178 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
00179 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \
00180 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
00181 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
00182 src1+=src_stride1;\
00183 src2+=src_stride2;\
00184 src3+=src_stride3;\
00185 src4+=src_stride4;\
00186 dst+=dst_stride;\
00187 } while(--h); \
00188 } \
00189 \
00190 static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00191 do { \
00192 uint32_t a0,a1,a2,a3; \
00193 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \
00194 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
00195 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
00196 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \
00197 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
00198 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \
00199 src1+=src_stride1;\
00200 src2+=src_stride2;\
00201 src3+=src_stride3;\
00202 src4+=src_stride4;\
00203 dst+=dst_stride;\
00204 } while(--h); \
00205 } \
00206 \
00207 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00208 do { \
00209 uint32_t a0,a1,a2,a3; \
00210 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \
00211 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
00212 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
00213 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \
00214 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
00215 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
00216 src1+=src_stride1;\
00217 src2+=src_stride2;\
00218 src3+=src_stride3;\
00219 src4+=src_stride4;\
00220 dst+=dst_stride;\
00221 } while(--h); \
00222 } \
00223 \
00224 static inline void OPNAME ## _pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00225 do { \
00226 uint32_t a0,a1,a2,a3; \
00227 UNPACK(a0,a1,LPC(src1),LPC(src2)); \
00228 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
00229 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
00230 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \
00231 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
00232 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
00233 UNPACK(a0,a1,LPC(src1+8),LPC(src2+8)); \
00234 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \
00235 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
00236 UNPACK(a0,a1,LPC(src1+12),LPC(src2+12)); \
00237 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \
00238 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \
00239 src1+=src_stride1;\
00240 src2+=src_stride2;\
00241 src3+=src_stride3;\
00242 src4+=src_stride4;\
00243 dst+=dst_stride;\
00244 } while(--h); \
00245 } \
00246 \
00247 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00248 do { \
00249 uint32_t a0,a1,a2,a3; \
00250 UNPACK(a0,a1,LPC(src1),LPC(src2)); \
00251 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
00252 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
00253 UNPACK(a0,a1,LPC(src1+4),LPC(src2+4)); \
00254 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
00255 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
00256 UNPACK(a0,a1,LPC(src1+8),LPC(src2+8)); \
00257 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \
00258 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \
00259 UNPACK(a0,a1,LPC(src1+12),LPC(src2+12)); \
00260 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \
00261 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \
00262 src1+=src_stride1;\
00263 src2+=src_stride2;\
00264 src3+=src_stride3;\
00265 src4+=src_stride4;\
00266 dst+=dst_stride;\
00267 } while(--h); \
00268 } \
00269 \
00270 static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00271 do { \
00272 uint32_t a0,a1,a2,a3; \
00273 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \
00274 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
00275 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
00276 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \
00277 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
00278 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
00279 UNPACK(a0,a1,AV_RN32(src1+8),LPC(src2+8)); \
00280 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \
00281 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
00282 UNPACK(a0,a1,AV_RN32(src1+12),LPC(src2+12)); \
00283 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \
00284 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \
00285 src1+=src_stride1;\
00286 src2+=src_stride2;\
00287 src3+=src_stride3;\
00288 src4+=src_stride4;\
00289 dst+=dst_stride;\
00290 } while(--h); \
00291 } \
00292 \
00293 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
00294 do { \
00295 uint32_t a0,a1,a2,a3; \
00296 UNPACK(a0,a1,AV_RN32(src1),LPC(src2)); \
00297 UNPACK(a2,a3,LPC(src3),LPC(src4)); \
00298 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
00299 UNPACK(a0,a1,AV_RN32(src1+4),LPC(src2+4)); \
00300 UNPACK(a2,a3,LPC(src3+4),LPC(src4+4)); \
00301 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
00302 UNPACK(a0,a1,AV_RN32(src1+8),LPC(src2+8)); \
00303 UNPACK(a2,a3,LPC(src3+8),LPC(src4+8)); \
00304 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \
00305 UNPACK(a0,a1,AV_RN32(src1+12),LPC(src2+12)); \
00306 UNPACK(a2,a3,LPC(src3+12),LPC(src4+12)); \
00307 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \
00308 src1+=src_stride1;\
00309 src2+=src_stride2;\
00310 src3+=src_stride3;\
00311 src4+=src_stride4;\
00312 dst+=dst_stride;\
00313 } while(--h); \
00314 } \
00315 \
00316
00317 #define op_avg(a, b) a = rnd_avg32(a,b)
00318 #define op_put(a, b) a = b
00319
00320 PIXOP2(avg, op_avg)
00321 PIXOP2(put, op_put)
00322 #undef op_avg
00323 #undef op_put
00324
00325 #define avg2(a,b) ((a+b+1)>>1)
00326 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
00327
00328
00329 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
00330 {
00331 const int A=(16-x16)*(16-y16);
00332 const int B=( x16)*(16-y16);
00333 const int C=(16-x16)*( y16);
00334 const int D=( x16)*( y16);
00335
00336 do {
00337 int t0,t1,t2,t3;
00338 uint8_t *s0 = src;
00339 uint8_t *s1 = src+stride;
00340 t0 = *s0++; t2 = *s1++;
00341 t1 = *s0++; t3 = *s1++;
00342 dst[0]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
00343 t0 = *s0++; t2 = *s1++;
00344 dst[1]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
00345 t1 = *s0++; t3 = *s1++;
00346 dst[2]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
00347 t0 = *s0++; t2 = *s1++;
00348 dst[3]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
00349 t1 = *s0++; t3 = *s1++;
00350 dst[4]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
00351 t0 = *s0++; t2 = *s1++;
00352 dst[5]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
00353 t1 = *s0++; t3 = *s1++;
00354 dst[6]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
00355 t0 = *s0++; t2 = *s1++;
00356 dst[7]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
00357 dst+= stride;
00358 src+= stride;
00359 }while(--h);
00360 }
00361
00362 #define H264_CHROMA_MC(OPNAME, OP)\
00363 static void OPNAME ## h264_chroma_mc2_sh4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
00364 const int A=(8-x)*(8-y);\
00365 const int B=( x)*(8-y);\
00366 const int C=(8-x)*( y);\
00367 const int D=( x)*( y);\
00368 \
00369 assert(x<8 && y<8 && x>=0 && y>=0);\
00370 \
00371 do {\
00372 int t0,t1,t2,t3; \
00373 uint8_t *s0 = src; \
00374 uint8_t *s1 = src+stride; \
00375 t0 = *s0++; t2 = *s1++; \
00376 t1 = *s0++; t3 = *s1++; \
00377 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
00378 t0 = *s0++; t2 = *s1++; \
00379 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
00380 dst+= stride;\
00381 src+= stride;\
00382 }while(--h);\
00383 }\
00384 \
00385 static void OPNAME ## h264_chroma_mc4_sh4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
00386 const int A=(8-x)*(8-y);\
00387 const int B=( x)*(8-y);\
00388 const int C=(8-x)*( y);\
00389 const int D=( x)*( y);\
00390 \
00391 assert(x<8 && y<8 && x>=0 && y>=0);\
00392 \
00393 do {\
00394 int t0,t1,t2,t3; \
00395 uint8_t *s0 = src; \
00396 uint8_t *s1 = src+stride; \
00397 t0 = *s0++; t2 = *s1++; \
00398 t1 = *s0++; t3 = *s1++; \
00399 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
00400 t0 = *s0++; t2 = *s1++; \
00401 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
00402 t1 = *s0++; t3 = *s1++; \
00403 OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
00404 t0 = *s0++; t2 = *s1++; \
00405 OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
00406 dst+= stride;\
00407 src+= stride;\
00408 }while(--h);\
00409 }\
00410 \
00411 static void OPNAME ## h264_chroma_mc8_sh4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){\
00412 const int A=(8-x)*(8-y);\
00413 const int B=( x)*(8-y);\
00414 const int C=(8-x)*( y);\
00415 const int D=( x)*( y);\
00416 \
00417 assert(x<8 && y<8 && x>=0 && y>=0);\
00418 \
00419 do {\
00420 int t0,t1,t2,t3; \
00421 uint8_t *s0 = src; \
00422 uint8_t *s1 = src+stride; \
00423 t0 = *s0++; t2 = *s1++; \
00424 t1 = *s0++; t3 = *s1++; \
00425 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
00426 t0 = *s0++; t2 = *s1++; \
00427 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
00428 t1 = *s0++; t3 = *s1++; \
00429 OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
00430 t0 = *s0++; t2 = *s1++; \
00431 OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
00432 t1 = *s0++; t3 = *s1++; \
00433 OP(dst[4], (A*t0 + B*t1 + C*t2 + D*t3));\
00434 t0 = *s0++; t2 = *s1++; \
00435 OP(dst[5], (A*t1 + B*t0 + C*t3 + D*t2));\
00436 t1 = *s0++; t3 = *s1++; \
00437 OP(dst[6], (A*t0 + B*t1 + C*t2 + D*t3));\
00438 t0 = *s0++; t2 = *s1++; \
00439 OP(dst[7], (A*t1 + B*t0 + C*t3 + D*t2));\
00440 dst+= stride;\
00441 src+= stride;\
00442 }while(--h);\
00443 }
00444
00445 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
00446 #define op_put(a, b) a = (((b) + 32)>>6)
00447
00448 H264_CHROMA_MC(put_ , op_put)
00449 H264_CHROMA_MC(avg_ , op_avg)
00450 #undef op_avg
00451 #undef op_put
00452
00453 #define QPEL_MC(r, OPNAME, RND, OP) \
00454 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00455 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00456 do {\
00457 uint8_t *s = src; \
00458 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
00459 src0= *s++;\
00460 src1= *s++;\
00461 src2= *s++;\
00462 src3= *s++;\
00463 src4= *s++;\
00464 OP(dst[0], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
00465 src5= *s++;\
00466 OP(dst[1], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
00467 src6= *s++;\
00468 OP(dst[2], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
00469 src7= *s++;\
00470 OP(dst[3], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
00471 src8= *s++;\
00472 OP(dst[4], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
00473 OP(dst[5], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
00474 OP(dst[6], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
00475 OP(dst[7], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
00476 dst+=dstStride;\
00477 src+=srcStride;\
00478 }while(--h);\
00479 }\
00480 \
00481 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00482 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00483 int w=8;\
00484 do{\
00485 uint8_t *s = src, *d=dst;\
00486 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
00487 src0 = *s; s+=srcStride; \
00488 src1 = *s; s+=srcStride; \
00489 src2 = *s; s+=srcStride; \
00490 src3 = *s; s+=srcStride; \
00491 src4 = *s; s+=srcStride; \
00492 OP(*d, (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));d+=dstStride;\
00493 src5 = *s; s+=srcStride; \
00494 OP(*d, (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));d+=dstStride;\
00495 src6 = *s; s+=srcStride; \
00496 OP(*d, (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));d+=dstStride;\
00497 src7 = *s; s+=srcStride; \
00498 OP(*d, (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));d+=dstStride;\
00499 src8 = *s; \
00500 OP(*d, (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));d+=dstStride;\
00501 OP(*d, (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));d+=dstStride;\
00502 OP(*d, (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));d+=dstStride;\
00503 OP(*d, (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
00504 dst++;\
00505 src++;\
00506 }while(--w);\
00507 }\
00508 \
00509 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00510 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00511 do {\
00512 uint8_t *s = src;\
00513 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
00514 int src9,src10,src11,src12,src13,src14,src15,src16;\
00515 src0= *s++;\
00516 src1= *s++;\
00517 src2= *s++;\
00518 src3= *s++;\
00519 src4= *s++;\
00520 OP(dst[ 0], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
00521 src5= *s++;\
00522 OP(dst[ 1], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
00523 src6= *s++;\
00524 OP(dst[ 2], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
00525 src7= *s++;\
00526 OP(dst[ 3], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
00527 src8= *s++;\
00528 OP(dst[ 4], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
00529 src9= *s++;\
00530 OP(dst[ 5], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
00531 src10= *s++;\
00532 OP(dst[ 6], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
00533 src11= *s++;\
00534 OP(dst[ 7], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
00535 src12= *s++;\
00536 OP(dst[ 8], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
00537 src13= *s++;\
00538 OP(dst[ 9], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
00539 src14= *s++;\
00540 OP(dst[10], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
00541 src15= *s++;\
00542 OP(dst[11], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
00543 src16= *s++;\
00544 OP(dst[12], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
00545 OP(dst[13], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
00546 OP(dst[14], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
00547 OP(dst[15], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
00548 dst+=dstStride;\
00549 src+=srcStride;\
00550 }while(--h);\
00551 }\
00552 \
00553 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00554 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00555 int w=16;\
00556 do {\
00557 uint8_t *s = src, *d=dst;\
00558 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
00559 int src9,src10,src11,src12,src13,src14,src15,src16;\
00560 src0 = *s; s+=srcStride; \
00561 src1 = *s; s+=srcStride; \
00562 src2 = *s; s+=srcStride; \
00563 src3 = *s; s+=srcStride; \
00564 src4 = *s; s+=srcStride; \
00565 OP(*d, (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));d+=dstStride;\
00566 src5 = *s; s+=srcStride; \
00567 OP(*d, (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));d+=dstStride;\
00568 src6 = *s; s+=srcStride; \
00569 OP(*d, (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));d+=dstStride;\
00570 src7 = *s; s+=srcStride; \
00571 OP(*d, (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));d+=dstStride;\
00572 src8 = *s; s+=srcStride; \
00573 OP(*d, (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));d+=dstStride;\
00574 src9 = *s; s+=srcStride; \
00575 OP(*d, (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));d+=dstStride;\
00576 src10 = *s; s+=srcStride; \
00577 OP(*d, (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));d+=dstStride;\
00578 src11 = *s; s+=srcStride; \
00579 OP(*d, (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));d+=dstStride;\
00580 src12 = *s; s+=srcStride; \
00581 OP(*d, (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));d+=dstStride;\
00582 src13 = *s; s+=srcStride; \
00583 OP(*d, (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));d+=dstStride;\
00584 src14 = *s; s+=srcStride; \
00585 OP(*d, (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));d+=dstStride;\
00586 src15 = *s; s+=srcStride; \
00587 OP(*d, (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));d+=dstStride;\
00588 src16 = *s; \
00589 OP(*d, (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));d+=dstStride;\
00590 OP(*d, (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));d+=dstStride;\
00591 OP(*d, (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));d+=dstStride;\
00592 OP(*d, (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
00593 dst++;\
00594 src++;\
00595 }while(--w);\
00596 }\
00597 \
00598 static void OPNAME ## qpel8_mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){\
00599 OPNAME ## pixels8_c(dst, src, stride, 8);\
00600 }\
00601 \
00602 static void OPNAME ## qpel8_mc10_sh4(uint8_t *dst, uint8_t *src, int stride){\
00603 uint8_t half[64];\
00604 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00605 OPNAME ## pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8);\
00606 }\
00607 \
00608 static void OPNAME ## qpel8_mc20_sh4(uint8_t *dst, uint8_t *src, int stride){\
00609 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
00610 }\
00611 \
00612 static void OPNAME ## qpel8_mc30_sh4(uint8_t *dst, uint8_t *src, int stride){\
00613 uint8_t half[64];\
00614 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00615 OPNAME ## pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8);\
00616 }\
00617 \
00618 static void OPNAME ## qpel8_mc01_sh4(uint8_t *dst, uint8_t *src, int stride){\
00619 uint8_t full[16*9];\
00620 uint8_t half[64];\
00621 copy_block9(full, src, 16, stride, 9);\
00622 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00623 OPNAME ## pixels8_l2_aligned(dst, full, half, stride, 16, 8, 8);\
00624 }\
00625 \
00626 static void OPNAME ## qpel8_mc02_sh4(uint8_t *dst, uint8_t *src, int stride){\
00627 uint8_t full[16*9];\
00628 copy_block9(full, src, 16, stride, 9);\
00629 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
00630 }\
00631 \
00632 static void OPNAME ## qpel8_mc03_sh4(uint8_t *dst, uint8_t *src, int stride){\
00633 uint8_t full[16*9];\
00634 uint8_t half[64];\
00635 copy_block9(full, src, 16, stride, 9);\
00636 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00637 OPNAME ## pixels8_l2_aligned(dst, full+16, half, stride, 16, 8, 8);\
00638 }\
00639 static void OPNAME ## qpel8_mc11_sh4(uint8_t *dst, uint8_t *src, int stride){\
00640 uint8_t full[16*9];\
00641 uint8_t halfH[72];\
00642 uint8_t halfHV[64];\
00643 copy_block9(full, src, 16, stride, 9);\
00644 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00645 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
00646 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00647 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
00648 }\
00649 static void OPNAME ## qpel8_mc31_sh4(uint8_t *dst, uint8_t *src, int stride){\
00650 uint8_t full[16*9];\
00651 uint8_t halfH[72];\
00652 uint8_t halfHV[64];\
00653 copy_block9(full, src, 16, stride, 9);\
00654 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00655 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
00656 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00657 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
00658 }\
00659 static void OPNAME ## qpel8_mc13_sh4(uint8_t *dst, uint8_t *src, int stride){\
00660 uint8_t full[16*9];\
00661 uint8_t halfH[72];\
00662 uint8_t halfHV[64];\
00663 copy_block9(full, src, 16, stride, 9);\
00664 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00665 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
00666 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00667 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
00668 }\
00669 static void OPNAME ## qpel8_mc33_sh4(uint8_t *dst, uint8_t *src, int stride){\
00670 uint8_t full[16*9];\
00671 uint8_t halfH[72];\
00672 uint8_t halfHV[64];\
00673 copy_block9(full, src, 16, stride, 9);\
00674 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00675 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
00676 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00677 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
00678 }\
00679 static void OPNAME ## qpel8_mc21_sh4(uint8_t *dst, uint8_t *src, int stride){\
00680 uint8_t halfH[72];\
00681 uint8_t halfHV[64];\
00682 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
00683 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00684 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
00685 }\
00686 static void OPNAME ## qpel8_mc23_sh4(uint8_t *dst, uint8_t *src, int stride){\
00687 uint8_t halfH[72];\
00688 uint8_t halfHV[64];\
00689 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
00690 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
00691 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
00692 }\
00693 static void OPNAME ## qpel8_mc12_sh4(uint8_t *dst, uint8_t *src, int stride){\
00694 uint8_t full[16*9];\
00695 uint8_t halfH[72];\
00696 copy_block9(full, src, 16, stride, 9);\
00697 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00698 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
00699 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
00700 }\
00701 static void OPNAME ## qpel8_mc32_sh4(uint8_t *dst, uint8_t *src, int stride){\
00702 uint8_t full[16*9];\
00703 uint8_t halfH[72];\
00704 copy_block9(full, src, 16, stride, 9);\
00705 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
00706 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
00707 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
00708 }\
00709 static void OPNAME ## qpel8_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){\
00710 uint8_t halfH[72];\
00711 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
00712 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
00713 }\
00714 static void OPNAME ## qpel16_mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){\
00715 OPNAME ## pixels16_c(dst, src, stride, 16);\
00716 }\
00717 \
00718 static void OPNAME ## qpel16_mc10_sh4(uint8_t *dst, uint8_t *src, int stride){\
00719 uint8_t half[256];\
00720 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
00721 OPNAME ## pixels16_l2_aligned2(dst, src, half, stride, stride, 16, 16);\
00722 }\
00723 \
00724 static void OPNAME ## qpel16_mc20_sh4(uint8_t *dst, uint8_t *src, int stride){\
00725 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
00726 }\
00727 \
00728 static void OPNAME ## qpel16_mc30_sh4(uint8_t *dst, uint8_t *src, int stride){\
00729 uint8_t half[256];\
00730 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
00731 OPNAME ## pixels16_l2_aligned2(dst, src+1, half, stride, stride, 16, 16);\
00732 }\
00733 \
00734 static void OPNAME ## qpel16_mc01_sh4(uint8_t *dst, uint8_t *src, int stride){\
00735 uint8_t full[24*17];\
00736 uint8_t half[256];\
00737 copy_block17(full, src, 24, stride, 17);\
00738 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
00739 OPNAME ## pixels16_l2_aligned(dst, full, half, stride, 24, 16, 16);\
00740 }\
00741 \
00742 static void OPNAME ## qpel16_mc02_sh4(uint8_t *dst, uint8_t *src, int stride){\
00743 uint8_t full[24*17];\
00744 copy_block17(full, src, 24, stride, 17);\
00745 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
00746 }\
00747 \
00748 static void OPNAME ## qpel16_mc03_sh4(uint8_t *dst, uint8_t *src, int stride){\
00749 uint8_t full[24*17];\
00750 uint8_t half[256];\
00751 copy_block17(full, src, 24, stride, 17);\
00752 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
00753 OPNAME ## pixels16_l2_aligned(dst, full+24, half, stride, 24, 16, 16);\
00754 }\
00755 static void OPNAME ## qpel16_mc11_sh4(uint8_t *dst, uint8_t *src, int stride){\
00756 uint8_t full[24*17];\
00757 uint8_t halfH[272];\
00758 uint8_t halfHV[256];\
00759 copy_block17(full, src, 24, stride, 17);\
00760 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
00761 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
00762 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
00763 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
00764 }\
00765 static void OPNAME ## qpel16_mc31_sh4(uint8_t *dst, uint8_t *src, int stride){\
00766 uint8_t full[24*17];\
00767 uint8_t halfH[272];\
00768 uint8_t halfHV[256];\
00769 copy_block17(full, src, 24, stride, 17);\
00770 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
00771 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
00772 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
00773 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
00774 }\
00775 static void OPNAME ## qpel16_mc13_sh4(uint8_t *dst, uint8_t *src, int stride){\
00776 uint8_t full[24*17];\
00777 uint8_t halfH[272];\
00778 uint8_t halfHV[256];\
00779 copy_block17(full, src, 24, stride, 17);\
00780 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
00781 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
00782 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
00783 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
00784 }\
00785 static void OPNAME ## qpel16_mc33_sh4(uint8_t *dst, uint8_t *src, int stride){\
00786 uint8_t full[24*17];\
00787 uint8_t halfH[272];\
00788 uint8_t halfHV[256];\
00789 copy_block17(full, src, 24, stride, 17);\
00790 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
00791 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
00792 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
00793 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
00794 }\
00795 static void OPNAME ## qpel16_mc21_sh4(uint8_t *dst, uint8_t *src, int stride){\
00796 uint8_t halfH[272];\
00797 uint8_t halfHV[256];\
00798 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
00799 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
00800 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
00801 }\
00802 static void OPNAME ## qpel16_mc23_sh4(uint8_t *dst, uint8_t *src, int stride){\
00803 uint8_t halfH[272];\
00804 uint8_t halfHV[256];\
00805 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
00806 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
00807 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
00808 }\
00809 static void OPNAME ## qpel16_mc12_sh4(uint8_t *dst, uint8_t *src, int stride){\
00810 uint8_t full[24*17];\
00811 uint8_t halfH[272];\
00812 copy_block17(full, src, 24, stride, 17);\
00813 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
00814 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
00815 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
00816 }\
00817 static void OPNAME ## qpel16_mc32_sh4(uint8_t *dst, uint8_t *src, int stride){\
00818 uint8_t full[24*17];\
00819 uint8_t halfH[272];\
00820 copy_block17(full, src, 24, stride, 17);\
00821 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
00822 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
00823 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
00824 }\
00825 static void OPNAME ## qpel16_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){\
00826 uint8_t halfH[272];\
00827 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
00828 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
00829 }
00830
00831 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
00832 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
00833 #define op_put(a, b) a = cm[((b) + 16)>>5]
00834 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
00835
00836 QPEL_MC(0, put_ , _ , op_put)
00837 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
00838 QPEL_MC(0, avg_ , _ , op_avg)
00839
00840 #undef op_avg
00841 #undef op_avg_no_rnd
00842 #undef op_put
00843 #undef op_put_no_rnd
00844
00845 #define H264_LOWPASS(OPNAME, OP, OP2) \
00846 static inline void OPNAME ## h264_qpel_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\
00847 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00848 do {\
00849 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\
00850 uint8_t *s = src-2;\
00851 srcB = *s++;\
00852 srcA = *s++;\
00853 src0 = *s++;\
00854 src1 = *s++;\
00855 src2 = *s++;\
00856 src3 = *s++;\
00857 OP(dst[0], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
00858 src4 = *s++;\
00859 OP(dst[1], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
00860 src5 = *s++;\
00861 OP(dst[2], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
00862 src6 = *s++;\
00863 OP(dst[3], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
00864 if (w>4) { \
00865 int src7,src8,src9,src10; \
00866 src7 = *s++;\
00867 OP(dst[4], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
00868 src8 = *s++;\
00869 OP(dst[5], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
00870 src9 = *s++;\
00871 OP(dst[6], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
00872 src10 = *s++;\
00873 OP(dst[7], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
00874 if (w>8) { \
00875 int src11,src12,src13,src14,src15,src16,src17,src18; \
00876 src11 = *s++;\
00877 OP(dst[8] , (src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));\
00878 src12 = *s++;\
00879 OP(dst[9] , (src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));\
00880 src13 = *s++;\
00881 OP(dst[10], (src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));\
00882 src14 = *s++;\
00883 OP(dst[11], (src11+src12)*20 - (src10+src13)*5 + (src9 +src14));\
00884 src15 = *s++;\
00885 OP(dst[12], (src12+src13)*20 - (src11+src14)*5 + (src10+src15));\
00886 src16 = *s++;\
00887 OP(dst[13], (src13+src14)*20 - (src12+src15)*5 + (src11+src16));\
00888 src17 = *s++;\
00889 OP(dst[14], (src14+src15)*20 - (src13+src16)*5 + (src12+src17));\
00890 src18 = *s++;\
00891 OP(dst[15], (src15+src16)*20 - (src14+src17)*5 + (src13+src18));\
00892 } \
00893 } \
00894 dst+=dstStride;\
00895 src+=srcStride;\
00896 }while(--h);\
00897 }\
00898 \
00899 static inline void OPNAME ## h264_qpel_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\
00900 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00901 do{\
00902 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\
00903 uint8_t *s = src-2*srcStride,*d=dst;\
00904 srcB = *s; s+=srcStride;\
00905 srcA = *s; s+=srcStride;\
00906 src0 = *s; s+=srcStride;\
00907 src1 = *s; s+=srcStride;\
00908 src2 = *s; s+=srcStride;\
00909 src3 = *s; s+=srcStride;\
00910 OP(*d, (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));d+=dstStride;\
00911 src4 = *s; s+=srcStride;\
00912 OP(*d, (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));d+=dstStride;\
00913 src5 = *s; s+=srcStride;\
00914 OP(*d, (src2+src3)*20 - (src1+src4)*5 + (src0+src5));d+=dstStride;\
00915 src6 = *s; s+=srcStride;\
00916 OP(*d, (src3+src4)*20 - (src2+src5)*5 + (src1+src6));d+=dstStride;\
00917 if (h>4) { \
00918 int src7,src8,src9,src10; \
00919 src7 = *s; s+=srcStride;\
00920 OP(*d, (src4+src5)*20 - (src3+src6)*5 + (src2+src7));d+=dstStride;\
00921 src8 = *s; s+=srcStride;\
00922 OP(*d, (src5+src6)*20 - (src4+src7)*5 + (src3+src8));d+=dstStride;\
00923 src9 = *s; s+=srcStride;\
00924 OP(*d, (src6+src7)*20 - (src5+src8)*5 + (src4+src9));d+=dstStride;\
00925 src10 = *s; s+=srcStride;\
00926 OP(*d, (src7+src8)*20 - (src6+src9)*5 + (src5+src10));d+=dstStride;\
00927 if (h>8) { \
00928 int src11,src12,src13,src14,src15,src16,src17,src18; \
00929 src11 = *s; s+=srcStride;\
00930 OP(*d , (src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));d+=dstStride;\
00931 src12 = *s; s+=srcStride;\
00932 OP(*d , (src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));d+=dstStride;\
00933 src13 = *s; s+=srcStride;\
00934 OP(*d, (src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));d+=dstStride;\
00935 src14 = *s; s+=srcStride;\
00936 OP(*d, (src11+src12)*20 - (src10+src13)*5 + (src9 +src14));d+=dstStride;\
00937 src15 = *s; s+=srcStride;\
00938 OP(*d, (src12+src13)*20 - (src11+src14)*5 + (src10+src15));d+=dstStride;\
00939 src16 = *s; s+=srcStride;\
00940 OP(*d, (src13+src14)*20 - (src12+src15)*5 + (src11+src16));d+=dstStride;\
00941 src17 = *s; s+=srcStride;\
00942 OP(*d, (src14+src15)*20 - (src13+src16)*5 + (src12+src17));d+=dstStride;\
00943 src18 = *s; s+=srcStride;\
00944 OP(*d, (src15+src16)*20 - (src14+src17)*5 + (src13+src18));d+=dstStride;\
00945 } \
00946 } \
00947 dst++;\
00948 src++;\
00949 }while(--w);\
00950 }\
00951 \
00952 static inline void OPNAME ## h264_qpel_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride,int w,int h){\
00953 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00954 int i;\
00955 src -= 2*srcStride;\
00956 i= h+5; \
00957 do {\
00958 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\
00959 uint8_t *s = src-2;\
00960 srcB = *s++;\
00961 srcA = *s++;\
00962 src0 = *s++;\
00963 src1 = *s++;\
00964 src2 = *s++;\
00965 src3 = *s++;\
00966 tmp[0] = ((src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
00967 src4 = *s++;\
00968 tmp[1] = ((src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
00969 src5 = *s++;\
00970 tmp[2] = ((src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
00971 src6 = *s++;\
00972 tmp[3] = ((src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
00973 if (w>4) { \
00974 int src7,src8,src9,src10; \
00975 src7 = *s++;\
00976 tmp[4] = ((src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
00977 src8 = *s++;\
00978 tmp[5] = ((src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
00979 src9 = *s++;\
00980 tmp[6] = ((src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
00981 src10 = *s++;\
00982 tmp[7] = ((src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
00983 if (w>8) { \
00984 int src11,src12,src13,src14,src15,src16,src17,src18; \
00985 src11 = *s++;\
00986 tmp[8] = ((src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));\
00987 src12 = *s++;\
00988 tmp[9] = ((src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));\
00989 src13 = *s++;\
00990 tmp[10] = ((src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));\
00991 src14 = *s++;\
00992 tmp[11] = ((src11+src12)*20 - (src10+src13)*5 + (src9 +src14));\
00993 src15 = *s++;\
00994 tmp[12] = ((src12+src13)*20 - (src11+src14)*5 + (src10+src15));\
00995 src16 = *s++;\
00996 tmp[13] = ((src13+src14)*20 - (src12+src15)*5 + (src11+src16));\
00997 src17 = *s++;\
00998 tmp[14] = ((src14+src15)*20 - (src13+src16)*5 + (src12+src17));\
00999 src18 = *s++;\
01000 tmp[15] = ((src15+src16)*20 - (src14+src17)*5 + (src13+src18));\
01001 } \
01002 } \
01003 tmp+=tmpStride;\
01004 src+=srcStride;\
01005 }while(--i);\
01006 tmp -= tmpStride*(h+5-2);\
01007 i = w; \
01008 do {\
01009 int tmpB,tmpA,tmp0,tmp1,tmp2,tmp3,tmp4,tmp5,tmp6;\
01010 int16_t *s = tmp-2*tmpStride; \
01011 uint8_t *d=dst;\
01012 tmpB = *s; s+=tmpStride;\
01013 tmpA = *s; s+=tmpStride;\
01014 tmp0 = *s; s+=tmpStride;\
01015 tmp1 = *s; s+=tmpStride;\
01016 tmp2 = *s; s+=tmpStride;\
01017 tmp3 = *s; s+=tmpStride;\
01018 OP2(*d, (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));d+=dstStride;\
01019 tmp4 = *s; s+=tmpStride;\
01020 OP2(*d, (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));d+=dstStride;\
01021 tmp5 = *s; s+=tmpStride;\
01022 OP2(*d, (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));d+=dstStride;\
01023 tmp6 = *s; s+=tmpStride;\
01024 OP2(*d, (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));d+=dstStride;\
01025 if (h>4) { \
01026 int tmp7,tmp8,tmp9,tmp10; \
01027 tmp7 = *s; s+=tmpStride;\
01028 OP2(*d, (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));d+=dstStride;\
01029 tmp8 = *s; s+=tmpStride;\
01030 OP2(*d, (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));d+=dstStride;\
01031 tmp9 = *s; s+=tmpStride;\
01032 OP2(*d, (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));d+=dstStride;\
01033 tmp10 = *s; s+=tmpStride;\
01034 OP2(*d, (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));d+=dstStride;\
01035 if (h>8) { \
01036 int tmp11,tmp12,tmp13,tmp14,tmp15,tmp16,tmp17,tmp18; \
01037 tmp11 = *s; s+=tmpStride;\
01038 OP2(*d , (tmp8 +tmp9 )*20 - (tmp7 +tmp10)*5 + (tmp6 +tmp11));d+=dstStride;\
01039 tmp12 = *s; s+=tmpStride;\
01040 OP2(*d , (tmp9 +tmp10)*20 - (tmp8 +tmp11)*5 + (tmp7 +tmp12));d+=dstStride;\
01041 tmp13 = *s; s+=tmpStride;\
01042 OP2(*d, (tmp10+tmp11)*20 - (tmp9 +tmp12)*5 + (tmp8 +tmp13));d+=dstStride;\
01043 tmp14 = *s; s+=tmpStride;\
01044 OP2(*d, (tmp11+tmp12)*20 - (tmp10+tmp13)*5 + (tmp9 +tmp14));d+=dstStride;\
01045 tmp15 = *s; s+=tmpStride;\
01046 OP2(*d, (tmp12+tmp13)*20 - (tmp11+tmp14)*5 + (tmp10+tmp15));d+=dstStride;\
01047 tmp16 = *s; s+=tmpStride;\
01048 OP2(*d, (tmp13+tmp14)*20 - (tmp12+tmp15)*5 + (tmp11+tmp16));d+=dstStride;\
01049 tmp17 = *s; s+=tmpStride;\
01050 OP2(*d, (tmp14+tmp15)*20 - (tmp13+tmp16)*5 + (tmp12+tmp17));d+=dstStride;\
01051 tmp18 = *s; s+=tmpStride;\
01052 OP2(*d, (tmp15+tmp16)*20 - (tmp14+tmp17)*5 + (tmp13+tmp18));d+=dstStride;\
01053 } \
01054 } \
01055 dst++;\
01056 tmp++;\
01057 }while(--i);\
01058 }\
01059 \
01060 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01061 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,4,4); \
01062 }\
01063 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01064 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,8,8); \
01065 }\
01066 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01067 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,16,16); \
01068 }\
01069 \
01070 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01071 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,4,4); \
01072 }\
01073 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01074 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,8,8); \
01075 }\
01076 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
01077 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,16,16); \
01078 }\
01079 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
01080 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,4,4); \
01081 }\
01082 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
01083 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,8,8); \
01084 }\
01085 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
01086 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,16,16); \
01087 }\
01088
01089 #define H264_MC(OPNAME, SIZE) \
01090 static void OPNAME ## h264_qpel ## SIZE ## _mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){\
01091 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
01092 }\
01093 \
01094 static void OPNAME ## h264_qpel ## SIZE ## _mc10_sh4(uint8_t *dst, uint8_t *src, int stride){\
01095 uint8_t half[SIZE*SIZE];\
01096 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
01097 OPNAME ## pixels ## SIZE ## _l2_aligned2(dst, src, half, stride, stride, SIZE, SIZE);\
01098 }\
01099 \
01100 static void OPNAME ## h264_qpel ## SIZE ## _mc20_sh4(uint8_t *dst, uint8_t *src, int stride){\
01101 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
01102 }\
01103 \
01104 static void OPNAME ## h264_qpel ## SIZE ## _mc30_sh4(uint8_t *dst, uint8_t *src, int stride){\
01105 uint8_t half[SIZE*SIZE];\
01106 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
01107 OPNAME ## pixels ## SIZE ## _l2_aligned2(dst, src+1, half, stride, stride, SIZE, SIZE);\
01108 }\
01109 \
01110 static void OPNAME ## h264_qpel ## SIZE ## _mc01_sh4(uint8_t *dst, uint8_t *src, int stride){\
01111 uint8_t full[SIZE*(SIZE+5)];\
01112 uint8_t * const full_mid= full + SIZE*2;\
01113 uint8_t half[SIZE*SIZE];\
01114 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
01115 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
01116 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
01117 }\
01118 \
01119 static void OPNAME ## h264_qpel ## SIZE ## _mc02_sh4(uint8_t *dst, uint8_t *src, int stride){\
01120 uint8_t full[SIZE*(SIZE+5)];\
01121 uint8_t * const full_mid= full + SIZE*2;\
01122 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
01123 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
01124 }\
01125 \
01126 static void OPNAME ## h264_qpel ## SIZE ## _mc03_sh4(uint8_t *dst, uint8_t *src, int stride){\
01127 uint8_t full[SIZE*(SIZE+5)];\
01128 uint8_t * const full_mid= full + SIZE*2;\
01129 uint8_t half[SIZE*SIZE];\
01130 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
01131 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
01132 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
01133 }\
01134 \
01135 static void OPNAME ## h264_qpel ## SIZE ## _mc11_sh4(uint8_t *dst, uint8_t *src, int stride){\
01136 uint8_t full[SIZE*(SIZE+5)];\
01137 uint8_t * const full_mid= full + SIZE*2;\
01138 uint8_t halfH[SIZE*SIZE];\
01139 uint8_t halfV[SIZE*SIZE];\
01140 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
01141 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
01142 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
01143 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
01144 }\
01145 \
01146 static void OPNAME ## h264_qpel ## SIZE ## _mc31_sh4(uint8_t *dst, uint8_t *src, int stride){\
01147 uint8_t full[SIZE*(SIZE+5)];\
01148 uint8_t * const full_mid= full + SIZE*2;\
01149 uint8_t halfH[SIZE*SIZE];\
01150 uint8_t halfV[SIZE*SIZE];\
01151 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
01152 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
01153 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
01154 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
01155 }\
01156 \
01157 static void OPNAME ## h264_qpel ## SIZE ## _mc13_sh4(uint8_t *dst, uint8_t *src, int stride){\
01158 uint8_t full[SIZE*(SIZE+5)];\
01159 uint8_t * const full_mid= full + SIZE*2;\
01160 uint8_t halfH[SIZE*SIZE];\
01161 uint8_t halfV[SIZE*SIZE];\
01162 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
01163 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
01164 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
01165 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
01166 }\
01167 \
01168 static void OPNAME ## h264_qpel ## SIZE ## _mc33_sh4(uint8_t *dst, uint8_t *src, int stride){\
01169 uint8_t full[SIZE*(SIZE+5)];\
01170 uint8_t * const full_mid= full + SIZE*2;\
01171 uint8_t halfH[SIZE*SIZE];\
01172 uint8_t halfV[SIZE*SIZE];\
01173 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
01174 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
01175 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
01176 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
01177 }\
01178 \
01179 static void OPNAME ## h264_qpel ## SIZE ## _mc22_sh4(uint8_t *dst, uint8_t *src, int stride){\
01180 int16_t tmp[SIZE*(SIZE+5)];\
01181 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
01182 }\
01183 \
01184 static void OPNAME ## h264_qpel ## SIZE ## _mc21_sh4(uint8_t *dst, uint8_t *src, int stride){\
01185 int16_t tmp[SIZE*(SIZE+5)];\
01186 uint8_t halfH[SIZE*SIZE];\
01187 uint8_t halfHV[SIZE*SIZE];\
01188 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
01189 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
01190 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
01191 }\
01192 \
01193 static void OPNAME ## h264_qpel ## SIZE ## _mc23_sh4(uint8_t *dst, uint8_t *src, int stride){\
01194 int16_t tmp[SIZE*(SIZE+5)];\
01195 uint8_t halfH[SIZE*SIZE];\
01196 uint8_t halfHV[SIZE*SIZE];\
01197 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
01198 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
01199 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
01200 }\
01201 \
01202 static void OPNAME ## h264_qpel ## SIZE ## _mc12_sh4(uint8_t *dst, uint8_t *src, int stride){\
01203 uint8_t full[SIZE*(SIZE+5)];\
01204 uint8_t * const full_mid= full + SIZE*2;\
01205 int16_t tmp[SIZE*(SIZE+5)];\
01206 uint8_t halfV[SIZE*SIZE];\
01207 uint8_t halfHV[SIZE*SIZE];\
01208 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
01209 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
01210 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
01211 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
01212 }\
01213 \
01214 static void OPNAME ## h264_qpel ## SIZE ## _mc32_sh4(uint8_t *dst, uint8_t *src, int stride){\
01215 uint8_t full[SIZE*(SIZE+5)];\
01216 uint8_t * const full_mid= full + SIZE*2;\
01217 int16_t tmp[SIZE*(SIZE+5)];\
01218 uint8_t halfV[SIZE*SIZE];\
01219 uint8_t halfHV[SIZE*SIZE];\
01220 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
01221 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
01222 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
01223 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
01224 }\
01225
01226 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
01227
01228 #define op_put(a, b) a = cm[((b) + 16)>>5]
01229 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
01230 #define op2_put(a, b) a = cm[((b) + 512)>>10]
01231
01232 H264_LOWPASS(put_ , op_put, op2_put)
01233 H264_LOWPASS(avg_ , op_avg, op2_avg)
01234 H264_MC(put_, 4)
01235 H264_MC(put_, 8)
01236 H264_MC(put_, 16)
01237 H264_MC(avg_, 4)
01238 H264_MC(avg_, 8)
01239 H264_MC(avg_, 16)
01240
01241 #undef op_avg
01242 #undef op_put
01243 #undef op2_avg
01244 #undef op2_put
01245
01246 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
01247 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01248
01249 do{
01250 int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9;
01251 uint8_t *s = src;
01252 src_1 = s[-1];
01253 src0 = *s++;
01254 src1 = *s++;
01255 src2 = *s++;
01256 dst[0]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
01257 src3 = *s++;
01258 dst[1]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
01259 src4 = *s++;
01260 dst[2]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
01261 src5 = *s++;
01262 dst[3]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
01263 src6 = *s++;
01264 dst[4]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
01265 src7 = *s++;
01266 dst[5]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
01267 src8 = *s++;
01268 dst[6]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
01269 src9 = *s++;
01270 dst[7]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
01271 dst+=dstStride;
01272 src+=srcStride;
01273 }while(--h);
01274 }
01275
01276 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
01277 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01278
01279 do{
01280 int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9;
01281 uint8_t *s = src,*d = dst;
01282 src_1 = *(s-srcStride);
01283 src0 = *s; s+=srcStride;
01284 src1 = *s; s+=srcStride;
01285 src2 = *s; s+=srcStride;
01286 *d= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; d+=dstStride;
01287 src3 = *s; s+=srcStride;
01288 *d= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; d+=dstStride;
01289 src4 = *s; s+=srcStride;
01290 *d= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; d+=dstStride;
01291 src5 = *s; s+=srcStride;
01292 *d= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; d+=dstStride;
01293 src6 = *s; s+=srcStride;
01294 *d= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; d+=dstStride;
01295 src7 = *s; s+=srcStride;
01296 *d= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; d+=dstStride;
01297 src8 = *s; s+=srcStride;
01298 *d= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; d+=dstStride;
01299 src9 = *s;
01300 *d= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; d+=dstStride;
01301 src++;
01302 dst++;
01303 }while(--w);
01304 }
01305
01306 static void put_mspel8_mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){
01307 put_pixels8_c(dst, src, stride, 8);
01308 }
01309
01310 static void put_mspel8_mc10_sh4(uint8_t *dst, uint8_t *src, int stride){
01311 uint8_t half[64];
01312 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01313 put_pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8);
01314 }
01315
01316 static void put_mspel8_mc20_sh4(uint8_t *dst, uint8_t *src, int stride){
01317 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
01318 }
01319
01320 static void put_mspel8_mc30_sh4(uint8_t *dst, uint8_t *src, int stride){
01321 uint8_t half[64];
01322 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01323 put_pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8);
01324 }
01325
01326 static void put_mspel8_mc02_sh4(uint8_t *dst, uint8_t *src, int stride){
01327 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
01328 }
01329
01330 static void put_mspel8_mc12_sh4(uint8_t *dst, uint8_t *src, int stride){
01331 uint8_t halfH[88];
01332 uint8_t halfV[64];
01333 uint8_t halfHV[64];
01334 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01335 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
01336 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01337 put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);
01338 }
01339 static void put_mspel8_mc32_sh4(uint8_t *dst, uint8_t *src, int stride){
01340 uint8_t halfH[88];
01341 uint8_t halfV[64];
01342 uint8_t halfHV[64];
01343 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01344 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
01345 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01346 put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);
01347 }
01348 static void put_mspel8_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){
01349 uint8_t halfH[88];
01350 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01351 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
01352 }