1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
// align 0
mov r6, #0x200
1:
ldr r1, %[data]
ldr r2, %[data]
ldr r3, %[data]
ldr r4, %[data]
subs r6, r6, #0x10
stmia %[buf]!, {r1-r4}
bne 1b

--------------------------------
// align 1
mov r6, #0x200
sub r6, r6, #0x10

// move 3 bytes for alignment
ldr r1, %[data]
strb r1, [%[buf]], #1
mov r1, r1, lsr #8
strh r1, [%[buf]], #2
mov r1, r1, lsr #16

1:
ldr r2, %[data]
orr r1, r2, lsl #8    // 1 word
ldr r3, %[data]
mov r2, r2, lsr #24
orr r2, r3, lsl #8    // 2 word
ldr r4, %[data]
mov r3, r3, lsr #24
orr r3, r4, lsl #8    // 3 word
ldr r5, %[data]
mov r4, r4, lsr #24
orr r4, r5, lsl #8    // 4 word

stmia %[buf]!, {r1-r4}
mov r1, r5, lsr #24
subs r6, r6, #0x10
bne 1b

// last 3 bytes + 1 trailing byte
ldr r2, %[data]
orr r1, r2, lsl #8    // 1 word
ldr r3, %[data]
mov r2, r2, lsr #24
orr r2, r3, lsl #8    // 2 word
ldr r4, %[data]
mov r3, r3, lsr #24
orr r3, r4, lsl #8    // 3 word

stmia %[buf]!, {r1-r3}
mov r4, r4, lsr #24
strb r4, [%[buf]], #1

---------------------------------
// align 2
mov r6, #0x200
sub r6, r6, #0x10

// move halfword for alignment
ldr r1, %[data]
strh r1, [%[buf]], #2
mov r1, r1, lsr #16

1:
ldr r2, %[data]
orr r1, r2, lsl #16    // 1 word
ldr r3, %[data]
mov r2, r2, lsr #16
orr r2, r3, lsl #16    // 2 word
ldr r4, %[data]
mov r3, r3, lsr #16
orr r3, r4, lsl #16   // 3 word
ldr r5, %[data]
mov r4, r4, lsr #16
orr r4, r5, lsl #16   // 4 word

stmia %[buf]!, {r1-r4}
mov r1, r5, lsr #16
subs r6, r6, #0x10
bne 1b

// last 3 words + trailing halfword
dr r2, %[data]
orr r1, r2, lsl #16    // 1 word
ldr r3, %[data]
mov r2, r2, lsr #16
orr r2, r3, lsl #16    // 2 word
ldr r4, %[data]
mov r3, r3, lsr #16
orr r3, r4, lsl #16   // 3 word

stmia %[buf]!, {r1-r3}

// trailing halfword
mov r4, r4, lsr #16
strh r4, [%[buf]], #2

-----------------------------------------------
// align 3
mov r6, #0x200
sub r6, r6, #0x10

// move 1 byte for alignment
ldr r1, %[data]
strb r1, [%[buf]], #1 // alignment
mov r1, r1, lsr #8

1:
ldr r2, %[data]
orr r1, r2, lsl #24   // 1 word
ldr r3, %[data]
mov r2, r2, lsr #8
orr r2, r3, lsl #24    // 2 word
ldr r4, %[data]
mov r3, r3, lsr #8
orr r3, r4, lsl #24    // 3 word
ldr r5, %[data]
mov r4, r4, lsr #8
orr r4, r5, lsl #24    // 4 word

stmia %[buf]!, {r1-r4}    // store 4 words
mov r1, r5, lsr #8
subs r6, r6, #0x10
bne 1b

// last 3 words + trailing 3 bytes
ldr r2, %[data]
orr r1, r2, lsl #24   // 1 word
ldr r3, %[data]
mov r2, r2, lsr #8
orr r2, r3, lsl #24    // 2 word
ldr r4, %[data]
mov r3, r3, lsr #8
orr r3, r4, lsl #24    // 3 word
stmia %[buf]!, {r1-r3}

// copy trailing 3 bytes
mov r4, r4, lsr #8
strh r4, %[[buf]], #2
mov r4, r1, lsr #16
strb r4, %[[buf]], #1