Revision e9497946

View differences:

libavcodec/arm/simple_idct_armv6.S
60 60
  Output in registers r4--r11
61 61
*/
62 62
        .macro idct_row shift
63
        ldr    lr, w46               /* lr = W4 | (W6 << 16) */
63
        ldr    lr, w46               /* lr  = W4 | (W6 << 16) */
64 64
        mov    r1, #(1<<(\shift-1))
65 65
        smlad  r4, r2, ip, r1
66 66
        smlsd  r7, r2, ip, r1
67
        ldr    ip, w13               /* ip = W1 | (W3 << 16) */
67
        ldr    ip, w13               /* ip  = W1 | (W3 << 16) */
68 68
        ldr    r10,w57               /* r10 = W5 | (W7 << 16) */
69 69
        smlad  r5, r2, lr, r1
70 70
        smlsd  r6, r2, lr, r1
71 71

  
72
        smuad  r8, r3, ip            /* r8 = B0 = W1*row[1] + W3*row[3] */
73
        smusdx r11,r3, r10           /* r11 = B3 = W7*row[1] - W5*row[3] */
74
        ldr    lr, [r0, #12]         /* lr = row[7,5] */
75
        pkhtb  r2, ip, r10,asr #16   /* r3 = W7 | (W3 << 16) */
76
        pkhbt  r1, ip, r10,lsl #16   /* r1 = W1 | (W5 << 16) */
77
        smusdx r9, r2, r3            /* r9 = -B1 = W7*row[3] - W3*row[1] */
78
        smlad  r8, lr, r10,r8        /* B0 += W5*row[5] + W7*row[7] */
79
        smusdx r10,r3, r1            /* r10 = B2 = W5*row[1] - W1*row[3] */
80

  
81
        ldr    r3, w42n              /* r3 = -W4 | (-W2 << 16) */
82
        smlad  r10,lr, r2, r10       /* B2 += W7*row[5] + W3*row[7] */
83
        ldr    r2, [r0, #4]          /* r2 = row[6,4] */
84
        smlsdx r11,lr, ip, r11       /* B3 += W3*row[5] - W1*row[7] */
85
        ldr    ip, w46               /* ip = W4 | (W6 << 16) */
86
        smlad  r9, lr, r1, r9        /* B1 -= W1*row[5] + W5*row[7] */
72
        smuad  r8, r3, ip            /* r8  =  B0 = W1*row[1] + W3*row[3] */
73
        smusdx r11,r3, r10           /* r11 =  B3 = W7*row[1] - W5*row[3] */
74
        ldr    lr, [r0, #12]         /* lr  =  row[7,5] */
75
        pkhtb  r2, ip, r10,asr #16   /* r3  =  W7 | (W3 << 16) */
76
        pkhbt  r1, ip, r10,lsl #16   /* r1  =  W1 | (W5 << 16) */
77
        smusdx r9, r2, r3            /* r9  = -B1 = W7*row[3] - W3*row[1] */
78
        smlad  r8, lr, r10,r8        /* B0  +=      W5*row[5] + W7*row[7] */
79
        smusdx r10,r3, r1            /* r10 =  B2 = W5*row[1] - W1*row[3] */
80

  
81
        ldr    r3, w42n              /* r3 =  -W4 | (-W2 << 16) */
82
        smlad  r10,lr, r2, r10       /* B2 +=  W7*row[5] + W3*row[7] */
83
        ldr    r2, [r0, #4]          /* r2 =   row[6,4] */
84
        smlsdx r11,lr, ip, r11       /* B3 +=  W3*row[5] - W1*row[7] */
85
        ldr    ip, w46               /* ip =   W4 | (W6 << 16) */
86
        smlad  r9, lr, r1, r9        /* B1 -=  W1*row[5] + W5*row[7] */
87 87

  
88 88
        smlad  r5, r2, r3, r5        /* A1 += -W4*row[4] - W2*row[6] */
89 89
        smlsd  r6, r2, r3, r6        /* A2 += -W4*row[4] + W2*row[6] */
90
        smlad  r4, r2, ip, r4        /* A0 += W4*row[4] + W6*row[6] */
91
        smlsd  r7, r2, ip, r7        /* A3 += W4*row[4] - W6*row[6] */
90
        smlad  r4, r2, ip, r4        /* A0 +=  W4*row[4] + W6*row[6] */
91
        smlsd  r7, r2, ip, r7        /* A3 +=  W4*row[4] - W6*row[6] */
92 92
        .endm
93 93

  
94 94
/*
......
101 101
  Output in registers r4--r11
102 102
*/
103 103
        .macro idct_row4 shift
104
        ldr    lr, w46               /* lr = W4 | (W6 << 16) */
104
        ldr    lr, w46               /* lr =  W4 | (W6 << 16) */
105 105
        ldr    r10,w57               /* r10 = W5 | (W7 << 16) */
106 106
        mov    r1, #(1<<(\shift-1))
107 107
        smlad  r4, r2, ip, r1
108 108
        smlsd  r7, r2, ip, r1
109
        ldr    ip, w13               /* ip = W1 | (W3 << 16) */
109
        ldr    ip, w13               /* ip =  W1 | (W3 << 16) */
110 110
        smlad  r5, r2, lr, r1
111 111
        smlsd  r6, r2, lr, r1
112
        smusdx r11,r3, r10           /* r11 = B3 = W7*row[1] - W5*row[3] */
113
        smuad  r8, r3, ip            /* r8 = B0 = W1*row[1] + W3*row[3] */
114
        pkhtb  r2, ip, r10,asr #16   /* r3 = W7 | (W3 << 16) */
115
        pkhbt  r1, ip, r10,lsl #16   /* r1 = W1 | (W5 << 16) */
116
        smusdx r9, r2, r3            /* r9 = -B1 = W7*row[3] - W3*row[1] */
117
        smusdx r10,r3, r1            /* r10 = B2 = W5*row[1] - W1*row[3] */
112
        smusdx r11,r3, r10           /* r11 =  B3 = W7*row[1] - W5*row[3] */
113
        smuad  r8, r3, ip            /* r8  =  B0 = W1*row[1] + W3*row[3] */
114
        pkhtb  r2, ip, r10,asr #16   /* r3  =  W7 | (W3 << 16) */
115
        pkhbt  r1, ip, r10,lsl #16   /* r1  =  W1 | (W5 << 16) */
116
        smusdx r9, r2, r3            /* r9  = -B1 = W7*row[3] - W3*row[1] */
117
        smusdx r10,r3, r1            /* r10 =  B2 = W5*row[1] - W1*row[3] */
118 118
        .endm
119 119

  
120 120
/*

Also available in: Unified diff