## ffmpeg / libavcodec / arm / simple_idct_armv5te.S @ aa05f212

History | View | Annotate | Download (18.8 KB)

1 |
/* |
---|---|

2 |
* Simple IDCT |

3 |
* |

4 |
* Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> |

5 |
* Copyright (c) 2006 Mans Rullgard <mans@mansr.com> |

6 |
* |

7 |
* This file is part of Libav. |

8 |
* |

9 |
* Libav is free software; you can redistribute it and/or |

10 |
* modify it under the terms of the GNU Lesser General Public |

11 |
* License as published by the Free Software Foundation; either |

12 |
* version 2.1 of the License, or (at your option) any later version. |

13 |
* |

14 |
* Libav is distributed in the hope that it will be useful, |

15 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |

16 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |

17 |
* Lesser General Public License for more details. |

18 |
* |

19 |
* You should have received a copy of the GNU Lesser General Public |

20 |
* License along with Libav; if not, write to the Free Software |

21 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |

22 |
*/ |

23 | |

24 |
#include "asm.S" |

25 | |

26 |
#define W1 22725 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ |

27 |
#define W2 21407 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ |

28 |
#define W3 19266 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ |

29 |
#define W4 16383 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ |

30 |
#define W5 12873 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ |

31 |
#define W6 8867 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ |

32 |
#define W7 4520 /* cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 */ |

33 |
#define ROW_SHIFT 11 |

34 |
#define COL_SHIFT 20 |

35 | |

36 |
#define W13 (W1 | (W3 << 16)) |

37 |
#define W26 (W2 | (W6 << 16)) |

38 |
#define W57 (W5 | (W7 << 16)) |

39 | |

40 |
.text |

41 |
.align |

42 |
w13: .long W13 |

43 |
w26: .long W26 |

44 |
w57: .long W57 |

45 | |

46 |
function idct_row_armv5te |

47 |
str lr, [sp, #-4]! |

48 | |

49 |
ldrd v1, [a1, #8] |

50 |
ldrd a3, [a1] /* a3 = row[1:0], a4 = row[3:2] */ |

51 |
orrs v1, v1, v2 |

52 |
cmpeq v1, a4 |

53 |
cmpeq v1, a3, lsr #16 |

54 |
beq row_dc_only |

55 | |

56 |
mov v1, #(1<<(ROW_SHIFT-1)) |

57 |
mov ip, #16384 |

58 |
sub ip, ip, #1 /* ip = W4 */ |

59 |
smlabb v1, ip, a3, v1 /* v1 = W4*row[0]+(1<<(RS-1)) */ |

60 |
ldr ip, w26 /* ip = W2 | (W6 << 16) */ |

61 |
smultb a2, ip, a4 |

62 |
smulbb lr, ip, a4 |

63 |
add v2, v1, a2 |

64 |
sub v3, v1, a2 |

65 |
sub v4, v1, lr |

66 |
add v1, v1, lr |

67 | |

68 |
ldr ip, w13 /* ip = W1 | (W3 << 16) */ |

69 |
ldr lr, w57 /* lr = W5 | (W7 << 16) */ |

70 |
smulbt v5, ip, a3 |

71 |
smultt v6, lr, a4 |

72 |
smlatt v5, ip, a4, v5 |

73 |
smultt a2, ip, a3 |

74 |
smulbt v7, lr, a3 |

75 |
sub v6, v6, a2 |

76 |
smulbt a2, ip, a4 |

77 |
smultt fp, lr, a3 |

78 |
sub v7, v7, a2 |

79 |
smulbt a2, lr, a4 |

80 |
ldrd a3, [a1, #8] /* a3=row[5:4] a4=row[7:6] */ |

81 |
sub fp, fp, a2 |

82 | |

83 |
orrs a2, a3, a4 |

84 |
beq 1f |

85 | |

86 |
smlabt v5, lr, a3, v5 |

87 |
smlabt v6, ip, a3, v6 |

88 |
smlatt v5, lr, a4, v5 |

89 |
smlabt v6, lr, a4, v6 |

90 |
smlatt v7, lr, a3, v7 |

91 |
smlatt fp, ip, a3, fp |

92 |
smulbt a2, ip, a4 |

93 |
smlatt v7, ip, a4, v7 |

94 |
sub fp, fp, a2 |

95 | |

96 |
ldr ip, w26 /* ip = W2 | (W6 << 16) */ |

97 |
mov a2, #16384 |

98 |
sub a2, a2, #1 /* a2 = W4 */ |

99 |
smulbb a2, a2, a3 /* a2 = W4*row[4] */ |

100 |
smultb lr, ip, a4 /* lr = W6*row[6] */ |

101 |
add v1, v1, a2 /* v1 += W4*row[4] */ |

102 |
add v1, v1, lr /* v1 += W6*row[6] */ |

103 |
add v4, v4, a2 /* v4 += W4*row[4] */ |

104 |
sub v4, v4, lr /* v4 -= W6*row[6] */ |

105 |
smulbb lr, ip, a4 /* lr = W2*row[6] */ |

106 |
sub v2, v2, a2 /* v2 -= W4*row[4] */ |

107 |
sub v2, v2, lr /* v2 -= W2*row[6] */ |

108 |
sub v3, v3, a2 /* v3 -= W4*row[4] */ |

109 |
add v3, v3, lr /* v3 += W2*row[6] */ |

110 | |

111 |
1: add a2, v1, v5 |

112 |
mov a3, a2, lsr #11 |

113 |
bic a3, a3, #0x1f0000 |

114 |
sub a2, v2, v6 |

115 |
mov a2, a2, lsr #11 |

116 |
add a3, a3, a2, lsl #16 |

117 |
add a2, v3, v7 |

118 |
mov a4, a2, lsr #11 |

119 |
bic a4, a4, #0x1f0000 |

120 |
add a2, v4, fp |

121 |
mov a2, a2, lsr #11 |

122 |
add a4, a4, a2, lsl #16 |

123 |
strd a3, [a1] |

124 | |

125 |
sub a2, v4, fp |

126 |
mov a3, a2, lsr #11 |

127 |
bic a3, a3, #0x1f0000 |

128 |
sub a2, v3, v7 |

129 |
mov a2, a2, lsr #11 |

130 |
add a3, a3, a2, lsl #16 |

131 |
add a2, v2, v6 |

132 |
mov a4, a2, lsr #11 |

133 |
bic a4, a4, #0x1f0000 |

134 |
sub a2, v1, v5 |

135 |
mov a2, a2, lsr #11 |

136 |
add a4, a4, a2, lsl #16 |

137 |
strd a3, [a1, #8] |

138 | |

139 |
ldr pc, [sp], #4 |

140 | |

141 |
row_dc_only: |

142 |
orr a3, a3, a3, lsl #16 |

143 |
bic a3, a3, #0xe000 |

144 |
mov a3, a3, lsl #3 |

145 |
mov a4, a3 |

146 |
strd a3, [a1] |

147 |
strd a3, [a1, #8] |

148 | |

149 |
ldr pc, [sp], #4 |

150 |
endfunc |

151 | |

152 |
.macro idct_col |

153 |
ldr a4, [a1] /* a4 = col[1:0] */ |

154 |
mov ip, #16384 |

155 |
sub ip, ip, #1 /* ip = W4 */ |

156 |
#if 0 |

157 |
mov v1, #(1<<(COL_SHIFT-1)) |

158 |
smlabt v2, ip, a4, v1 /* v2 = W4*col[1] + (1<<(COL_SHIFT-1)) */ |

159 |
smlabb v1, ip, a4, v1 /* v1 = W4*col[0] + (1<<(COL_SHIFT-1)) */ |

160 |
ldr a4, [a1, #(16*4)] |

161 |
#else |

162 |
mov v1, #((1<<(COL_SHIFT-1))/W4) /* this matches the C version */ |

163 |
add v2, v1, a4, asr #16 |

164 |
rsb v2, v2, v2, lsl #14 |

165 |
mov a4, a4, lsl #16 |

166 |
add v1, v1, a4, asr #16 |

167 |
ldr a4, [a1, #(16*4)] |

168 |
rsb v1, v1, v1, lsl #14 |

169 |
#endif |

170 | |

171 |
smulbb lr, ip, a4 |

172 |
smulbt a3, ip, a4 |

173 |
sub v3, v1, lr |

174 |
sub v5, v1, lr |

175 |
add v7, v1, lr |

176 |
add v1, v1, lr |

177 |
sub v4, v2, a3 |

178 |
sub v6, v2, a3 |

179 |
add fp, v2, a3 |

180 |
ldr ip, w26 |

181 |
ldr a4, [a1, #(16*2)] |

182 |
add v2, v2, a3 |

183 | |

184 |
smulbb lr, ip, a4 |

185 |
smultb a3, ip, a4 |

186 |
add v1, v1, lr |

187 |
sub v7, v7, lr |

188 |
add v3, v3, a3 |

189 |
sub v5, v5, a3 |

190 |
smulbt lr, ip, a4 |

191 |
smultt a3, ip, a4 |

192 |
add v2, v2, lr |

193 |
sub fp, fp, lr |

194 |
add v4, v4, a3 |

195 |
ldr a4, [a1, #(16*6)] |

196 |
sub v6, v6, a3 |

197 | |

198 |
smultb lr, ip, a4 |

199 |
smulbb a3, ip, a4 |

200 |
add v1, v1, lr |

201 |
sub v7, v7, lr |

202 |
sub v3, v3, a3 |

203 |
add v5, v5, a3 |

204 |
smultt lr, ip, a4 |

205 |
smulbt a3, ip, a4 |

206 |
add v2, v2, lr |

207 |
sub fp, fp, lr |

208 |
sub v4, v4, a3 |

209 |
add v6, v6, a3 |

210 | |

211 |
stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp} |

212 | |

213 |
ldr ip, w13 |

214 |
ldr a4, [a1, #(16*1)] |

215 |
ldr lr, w57 |

216 |
smulbb v1, ip, a4 |

217 |
smultb v3, ip, a4 |

218 |
smulbb v5, lr, a4 |

219 |
smultb v7, lr, a4 |

220 |
smulbt v2, ip, a4 |

221 |
smultt v4, ip, a4 |

222 |
smulbt v6, lr, a4 |

223 |
smultt fp, lr, a4 |

224 |
rsb v4, v4, #0 |

225 |
ldr a4, [a1, #(16*3)] |

226 |
rsb v3, v3, #0 |

227 | |

228 |
smlatb v1, ip, a4, v1 |

229 |
smlatb v3, lr, a4, v3 |

230 |
smulbb a3, ip, a4 |

231 |
smulbb a2, lr, a4 |

232 |
sub v5, v5, a3 |

233 |
sub v7, v7, a2 |

234 |
smlatt v2, ip, a4, v2 |

235 |
smlatt v4, lr, a4, v4 |

236 |
smulbt a3, ip, a4 |

237 |
smulbt a2, lr, a4 |

238 |
sub v6, v6, a3 |

239 |
ldr a4, [a1, #(16*5)] |

240 |
sub fp, fp, a2 |

241 | |

242 |
smlabb v1, lr, a4, v1 |

243 |
smlabb v3, ip, a4, v3 |

244 |
smlatb v5, lr, a4, v5 |

245 |
smlatb v7, ip, a4, v7 |

246 |
smlabt v2, lr, a4, v2 |

247 |
smlabt v4, ip, a4, v4 |

248 |
smlatt v6, lr, a4, v6 |

249 |
ldr a3, [a1, #(16*7)] |

250 |
smlatt fp, ip, a4, fp |

251 | |

252 |
smlatb v1, lr, a3, v1 |

253 |
smlabb v3, lr, a3, v3 |

254 |
smlatb v5, ip, a3, v5 |

255 |
smulbb a4, ip, a3 |

256 |
smlatt v2, lr, a3, v2 |

257 |
sub v7, v7, a4 |

258 |
smlabt v4, lr, a3, v4 |

259 |
smulbt a4, ip, a3 |

260 |
smlatt v6, ip, a3, v6 |

261 |
sub fp, fp, a4 |

262 |
.endm |

263 | |

264 |
function idct_col_armv5te |

265 |
str lr, [sp, #-4]! |

266 | |

267 |
idct_col |

268 | |

269 |
ldmfd sp!, {a3, a4} |

270 |
adds a2, a3, v1 |

271 |
mov a2, a2, lsr #20 |

272 |
orrmi a2, a2, #0xf000 |

273 |
add ip, a4, v2 |

274 |
mov ip, ip, asr #20 |

275 |
orr a2, a2, ip, lsl #16 |

276 |
str a2, [a1] |

277 |
subs a3, a3, v1 |

278 |
mov a2, a3, lsr #20 |

279 |
orrmi a2, a2, #0xf000 |

280 |
sub a4, a4, v2 |

281 |
mov a4, a4, asr #20 |

282 |
orr a2, a2, a4, lsl #16 |

283 |
ldmfd sp!, {a3, a4} |

284 |
str a2, [a1, #(16*7)] |

285 | |

286 |
subs a2, a3, v3 |

287 |
mov a2, a2, lsr #20 |

288 |
orrmi a2, a2, #0xf000 |

289 |
sub ip, a4, v4 |

290 |
mov ip, ip, asr #20 |

291 |
orr a2, a2, ip, lsl #16 |

292 |
str a2, [a1, #(16*1)] |

293 |
adds a3, a3, v3 |

294 |
mov a2, a3, lsr #20 |

295 |
orrmi a2, a2, #0xf000 |

296 |
add a4, a4, v4 |

297 |
mov a4, a4, asr #20 |

298 |
orr a2, a2, a4, lsl #16 |

299 |
ldmfd sp!, {a3, a4} |

300 |
str a2, [a1, #(16*6)] |

301 | |

302 |
adds a2, a3, v5 |

303 |
mov a2, a2, lsr #20 |

304 |
orrmi a2, a2, #0xf000 |

305 |
add ip, a4, v6 |

306 |
mov ip, ip, asr #20 |

307 |
orr a2, a2, ip, lsl #16 |

308 |
str a2, [a1, #(16*2)] |

309 |
subs a3, a3, v5 |

310 |
mov a2, a3, lsr #20 |

311 |
orrmi a2, a2, #0xf000 |

312 |
sub a4, a4, v6 |

313 |
mov a4, a4, asr #20 |

314 |
orr a2, a2, a4, lsl #16 |

315 |
ldmfd sp!, {a3, a4} |

316 |
str a2, [a1, #(16*5)] |

317 | |

318 |
adds a2, a3, v7 |

319 |
mov a2, a2, lsr #20 |

320 |
orrmi a2, a2, #0xf000 |

321 |
add ip, a4, fp |

322 |
mov ip, ip, asr #20 |

323 |
orr a2, a2, ip, lsl #16 |

324 |
str a2, [a1, #(16*3)] |

325 |
subs a3, a3, v7 |

326 |
mov a2, a3, lsr #20 |

327 |
orrmi a2, a2, #0xf000 |

328 |
sub a4, a4, fp |

329 |
mov a4, a4, asr #20 |

330 |
orr a2, a2, a4, lsl #16 |

331 |
str a2, [a1, #(16*4)] |

332 | |

333 |
ldr pc, [sp], #4 |

334 |
endfunc |

335 | |

336 |
function idct_col_put_armv5te |

337 |
str lr, [sp, #-4]! |

338 | |

339 |
idct_col |

340 | |

341 |
ldmfd sp!, {a3, a4} |

342 |
ldr lr, [sp, #32] |

343 |
add a2, a3, v1 |

344 |
movs a2, a2, asr #20 |

345 |
movmi a2, #0 |

346 |
cmp a2, #255 |

347 |
movgt a2, #255 |

348 |
add ip, a4, v2 |

349 |
movs ip, ip, asr #20 |

350 |
movmi ip, #0 |

351 |
cmp ip, #255 |

352 |
movgt ip, #255 |

353 |
orr a2, a2, ip, lsl #8 |

354 |
sub a3, a3, v1 |

355 |
movs a3, a3, asr #20 |

356 |
movmi a3, #0 |

357 |
cmp a3, #255 |

358 |
movgt a3, #255 |

359 |
sub a4, a4, v2 |

360 |
movs a4, a4, asr #20 |

361 |
movmi a4, #0 |

362 |
cmp a4, #255 |

363 |
ldr v1, [sp, #28] |

364 |
movgt a4, #255 |

365 |
strh a2, [v1] |

366 |
add a2, v1, #2 |

367 |
str a2, [sp, #28] |

368 |
orr a2, a3, a4, lsl #8 |

369 |
rsb v2, lr, lr, lsl #3 |

370 |
ldmfd sp!, {a3, a4} |

371 |
strh a2, [v2, v1]! |

372 | |

373 |
sub a2, a3, v3 |

374 |
movs a2, a2, asr #20 |

375 |
movmi a2, #0 |

376 |
cmp a2, #255 |

377 |
movgt a2, #255 |

378 |
sub ip, a4, v4 |

379 |
movs ip, ip, asr #20 |

380 |
movmi ip, #0 |

381 |
cmp ip, #255 |

382 |
movgt ip, #255 |

383 |
orr a2, a2, ip, lsl #8 |

384 |
strh a2, [v1, lr]! |

385 |
add a3, a3, v3 |

386 |
movs a2, a3, asr #20 |

387 |
movmi a2, #0 |

388 |
cmp a2, #255 |

389 |
movgt a2, #255 |

390 |
add a4, a4, v4 |

391 |
movs a4, a4, asr #20 |

392 |
movmi a4, #0 |

393 |
cmp a4, #255 |

394 |
movgt a4, #255 |

395 |
orr a2, a2, a4, lsl #8 |

396 |
ldmfd sp!, {a3, a4} |

397 |
strh a2, [v2, -lr]! |

398 | |

399 |
add a2, a3, v5 |

400 |
movs a2, a2, asr #20 |

401 |
movmi a2, #0 |

402 |
cmp a2, #255 |

403 |
movgt a2, #255 |

404 |
add ip, a4, v6 |

405 |
movs ip, ip, asr #20 |

406 |
movmi ip, #0 |

407 |
cmp ip, #255 |

408 |
movgt ip, #255 |

409 |
orr a2, a2, ip, lsl #8 |

410 |
strh a2, [v1, lr]! |

411 |
sub a3, a3, v5 |

412 |
movs a2, a3, asr #20 |

413 |
movmi a2, #0 |

414 |
cmp a2, #255 |

415 |
movgt a2, #255 |

416 |
sub a4, a4, v6 |

417 |
movs a4, a4, asr #20 |

418 |
movmi a4, #0 |

419 |
cmp a4, #255 |

420 |
movgt a4, #255 |

421 |
orr a2, a2, a4, lsl #8 |

422 |
ldmfd sp!, {a3, a4} |

423 |
strh a2, [v2, -lr]! |

424 | |

425 |
add a2, a3, v7 |

426 |
movs a2, a2, asr #20 |

427 |
movmi a2, #0 |

428 |
cmp a2, #255 |

429 |
movgt a2, #255 |

430 |
add ip, a4, fp |

431 |
movs ip, ip, asr #20 |

432 |
movmi ip, #0 |

433 |
cmp ip, #255 |

434 |
movgt ip, #255 |

435 |
orr a2, a2, ip, lsl #8 |

436 |
strh a2, [v1, lr] |

437 |
sub a3, a3, v7 |

438 |
movs a2, a3, asr #20 |

439 |
movmi a2, #0 |

440 |
cmp a2, #255 |

441 |
movgt a2, #255 |

442 |
sub a4, a4, fp |

443 |
movs a4, a4, asr #20 |

444 |
movmi a4, #0 |

445 |
cmp a4, #255 |

446 |
movgt a4, #255 |

447 |
orr a2, a2, a4, lsl #8 |

448 |
strh a2, [v2, -lr] |

449 | |

450 |
ldr pc, [sp], #4 |

451 |
endfunc |

452 | |

453 |
function idct_col_add_armv5te |

454 |
str lr, [sp, #-4]! |

455 | |

456 |
idct_col |

457 | |

458 |
ldr lr, [sp, #36] |

459 | |

460 |
ldmfd sp!, {a3, a4} |

461 |
ldrh ip, [lr] |

462 |
add a2, a3, v1 |

463 |
mov a2, a2, asr #20 |

464 |
sub a3, a3, v1 |

465 |
and v1, ip, #255 |

466 |
adds a2, a2, v1 |

467 |
movmi a2, #0 |

468 |
cmp a2, #255 |

469 |
movgt a2, #255 |

470 |
add v1, a4, v2 |

471 |
mov v1, v1, asr #20 |

472 |
adds v1, v1, ip, lsr #8 |

473 |
movmi v1, #0 |

474 |
cmp v1, #255 |

475 |
movgt v1, #255 |

476 |
orr a2, a2, v1, lsl #8 |

477 |
ldr v1, [sp, #32] |

478 |
sub a4, a4, v2 |

479 |
rsb v2, v1, v1, lsl #3 |

480 |
ldrh ip, [v2, lr]! |

481 |
strh a2, [lr] |

482 |
mov a3, a3, asr #20 |

483 |
and a2, ip, #255 |

484 |
adds a3, a3, a2 |

485 |
movmi a3, #0 |

486 |
cmp a3, #255 |

487 |
movgt a3, #255 |

488 |
mov a4, a4, asr #20 |

489 |
adds a4, a4, ip, lsr #8 |

490 |
movmi a4, #0 |

491 |
cmp a4, #255 |

492 |
movgt a4, #255 |

493 |
add a2, lr, #2 |

494 |
str a2, [sp, #28] |

495 |
orr a2, a3, a4, lsl #8 |

496 |
strh a2, [v2] |

497 | |

498 |
ldmfd sp!, {a3, a4} |

499 |
ldrh ip, [lr, v1]! |

500 |
sub a2, a3, v3 |

501 |
mov a2, a2, asr #20 |

502 |
add a3, a3, v3 |

503 |
and v3, ip, #255 |

504 |
adds a2, a2, v3 |

505 |
movmi a2, #0 |

506 |
cmp a2, #255 |

507 |
movgt a2, #255 |

508 |
sub v3, a4, v4 |

509 |
mov v3, v3, asr #20 |

510 |
adds v3, v3, ip, lsr #8 |

511 |
movmi v3, #0 |

512 |
cmp v3, #255 |

513 |
movgt v3, #255 |

514 |
orr a2, a2, v3, lsl #8 |

515 |
add a4, a4, v4 |

516 |
ldrh ip, [v2, -v1]! |

517 |
strh a2, [lr] |

518 |
mov a3, a3, asr #20 |

519 |
and a2, ip, #255 |

520 |
adds a3, a3, a2 |

521 |
movmi a3, #0 |

522 |
cmp a3, #255 |

523 |
movgt a3, #255 |

524 |
mov a4, a4, asr #20 |

525 |
adds a4, a4, ip, lsr #8 |

526 |
movmi a4, #0 |

527 |
cmp a4, #255 |

528 |
movgt a4, #255 |

529 |
orr a2, a3, a4, lsl #8 |

530 |
strh a2, [v2] |

531 | |

532 |
ldmfd sp!, {a3, a4} |

533 |
ldrh ip, [lr, v1]! |

534 |
add a2, a3, v5 |

535 |
mov a2, a2, asr #20 |

536 |
sub a3, a3, v5 |

537 |
and v3, ip, #255 |

538 |
adds a2, a2, v3 |

539 |
movmi a2, #0 |

540 |
cmp a2, #255 |

541 |
movgt a2, #255 |

542 |
add v3, a4, v6 |

543 |
mov v3, v3, asr #20 |

544 |
adds v3, v3, ip, lsr #8 |

545 |
movmi v3, #0 |

546 |
cmp v3, #255 |

547 |
movgt v3, #255 |

548 |
orr a2, a2, v3, lsl #8 |

549 |
sub a4, a4, v6 |

550 |
ldrh ip, [v2, -v1]! |

551 |
strh a2, [lr] |

552 |
mov a3, a3, asr #20 |

553 |
and a2, ip, #255 |

554 |
adds a3, a3, a2 |

555 |
movmi a3, #0 |

556 |
cmp a3, #255 |

557 |
movgt a3, #255 |

558 |
mov a4, a4, asr #20 |

559 |
adds a4, a4, ip, lsr #8 |

560 |
movmi a4, #0 |

561 |
cmp a4, #255 |

562 |
movgt a4, #255 |

563 |
orr a2, a3, a4, lsl #8 |

564 |
strh a2, [v2] |

565 | |

566 |
ldmfd sp!, {a3, a4} |

567 |
ldrh ip, [lr, v1]! |

568 |
add a2, a3, v7 |

569 |
mov a2, a2, asr #20 |

570 |
sub a3, a3, v7 |

571 |
and v3, ip, #255 |

572 |
adds a2, a2, v3 |

573 |
movmi a2, #0 |

574 |
cmp a2, #255 |

575 |
movgt a2, #255 |

576 |
add v3, a4, fp |

577 |
mov v3, v3, asr #20 |

578 |
adds v3, v3, ip, lsr #8 |

579 |
movmi v3, #0 |

580 |
cmp v3, #255 |

581 |
movgt v3, #255 |

582 |
orr a2, a2, v3, lsl #8 |

583 |
sub a4, a4, fp |

584 |
ldrh ip, [v2, -v1]! |

585 |
strh a2, [lr] |

586 |
mov a3, a3, asr #20 |

587 |
and a2, ip, #255 |

588 |
adds a3, a3, a2 |

589 |
movmi a3, #0 |

590 |
cmp a3, #255 |

591 |
movgt a3, #255 |

592 |
mov a4, a4, asr #20 |

593 |
adds a4, a4, ip, lsr #8 |

594 |
movmi a4, #0 |

595 |
cmp a4, #255 |

596 |
movgt a4, #255 |

597 |
orr a2, a3, a4, lsl #8 |

598 |
strh a2, [v2] |

599 | |

600 |
ldr pc, [sp], #4 |

601 |
endfunc |

602 | |

603 |
function ff_simple_idct_armv5te, export=1 |

604 |
stmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, lr} |

605 | |

606 |
bl idct_row_armv5te |

607 |
add a1, a1, #16 |

608 |
bl idct_row_armv5te |

609 |
add a1, a1, #16 |

610 |
bl idct_row_armv5te |

611 |
add a1, a1, #16 |

612 |
bl idct_row_armv5te |

613 |
add a1, a1, #16 |

614 |
bl idct_row_armv5te |

615 |
add a1, a1, #16 |

616 |
bl idct_row_armv5te |

617 |
add a1, a1, #16 |

618 |
bl idct_row_armv5te |

619 |
add a1, a1, #16 |

620 |
bl idct_row_armv5te |

621 | |

622 |
sub a1, a1, #(16*7) |

623 | |

624 |
bl idct_col_armv5te |

625 |
add a1, a1, #4 |

626 |
bl idct_col_armv5te |

627 |
add a1, a1, #4 |

628 |
bl idct_col_armv5te |

629 |
add a1, a1, #4 |

630 |
bl idct_col_armv5te |

631 | |

632 |
ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} |

633 |
endfunc |

634 | |

635 |
function ff_simple_idct_add_armv5te, export=1 |

636 |
stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr} |

637 | |

638 |
mov a1, a3 |

639 | |

640 |
bl idct_row_armv5te |

641 |
add a1, a1, #16 |

642 |
bl idct_row_armv5te |

643 |
add a1, a1, #16 |

644 |
bl idct_row_armv5te |

645 |
add a1, a1, #16 |

646 |
bl idct_row_armv5te |

647 |
add a1, a1, #16 |

648 |
bl idct_row_armv5te |

649 |
add a1, a1, #16 |

650 |
bl idct_row_armv5te |

651 |
add a1, a1, #16 |

652 |
bl idct_row_armv5te |

653 |
add a1, a1, #16 |

654 |
bl idct_row_armv5te |

655 | |

656 |
sub a1, a1, #(16*7) |

657 | |

658 |
bl idct_col_add_armv5te |

659 |
add a1, a1, #4 |

660 |
bl idct_col_add_armv5te |

661 |
add a1, a1, #4 |

662 |
bl idct_col_add_armv5te |

663 |
add a1, a1, #4 |

664 |
bl idct_col_add_armv5te |

665 | |

666 |
add sp, sp, #8 |

667 |
ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} |

668 |
endfunc |

669 | |

670 |
function ff_simple_idct_put_armv5te, export=1 |

671 |
stmfd sp!, {a1, a2, v1, v2, v3, v4, v5, v6, v7, fp, lr} |

672 | |

673 |
mov a1, a3 |

674 | |

675 |
bl idct_row_armv5te |

676 |
add a1, a1, #16 |

677 |
bl idct_row_armv5te |

678 |
add a1, a1, #16 |

679 |
bl idct_row_armv5te |

680 |
add a1, a1, #16 |

681 |
bl idct_row_armv5te |

682 |
add a1, a1, #16 |

683 |
bl idct_row_armv5te |

684 |
add a1, a1, #16 |

685 |
bl idct_row_armv5te |

686 |
add a1, a1, #16 |

687 |
bl idct_row_armv5te |

688 |
add a1, a1, #16 |

689 |
bl idct_row_armv5te |

690 | |

691 |
sub a1, a1, #(16*7) |

692 | |

693 |
bl idct_col_put_armv5te |

694 |
add a1, a1, #4 |

695 |
bl idct_col_put_armv5te |

696 |
add a1, a1, #4 |

697 |
bl idct_col_put_armv5te |

698 |
add a1, a1, #4 |

699 |
bl idct_col_put_armv5te |

700 | |

701 |
add sp, sp, #8 |

702 |
ldmfd sp!, {v1, v2, v3, v4, v5, v6, v7, fp, pc} |

703 |
endfunc |