blob: c403c4c9215c571946f39e228c6f8f4b667cbdf3 [file] [log] [blame]
Jens Arnold31ffd7b2006-12-03 22:13:44 +00001/***************************************************************************
2 * __________ __ ___.
3 * Open \______ \ ____ ____ | | _\_ |__ _______ ___
4 * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
5 * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
6 * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
7 * \/ \/ \/ \/ \/
8 * $Id$
9 *
10 * Copyright (C) 2004-2006 by Jens Arnold
11 *
Daniel Stenberg2acc0ac2008-06-28 18:10:04 +000012 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version 2
15 * of the License, or (at your option) any later version.
Jens Arnold31ffd7b2006-12-03 22:13:44 +000016 *
17 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
18 * KIND, either express or implied.
19 *
20 ****************************************************************************/
21
22 .section .icode,"ax",@progbits
23
24 .align 2
25 .global _copy_read_sectors
26 .type _copy_read_sectors,@function
27
28/* Read a number of words from the ATA data port
29 *
30 * Assumes wordcount to be a multiple of 4
31 *
32 * Arguments:
33 * r4 - buffer address
34 * r5 - word count
35 *
36 * Register usage:
37 * r0 - scratch
38 * r1/r2 - read buffers
39 * r3 - mask (if unaligned)
40 * r4 - current address
41 * r5 - end address
42 * r6 - ata port
43 */
44
45_copy_read_sectors:
46 add r5, r5 /* words -> bytes */
47 add r4, r5 /* bytes -> end address */
48 add #-12, r5 /* adjust for offsets */
49 mov.l .ata_data, r6
50
51 mov r4, r0
52 tst #1, r0 /* 16-bit aligned ? */
53 bt .r_aligned /* yes, do word copy */
54
55 /* not 16-bit aligned */
56 mov #-1, r3 /* prepare a bit mask for high byte */
57 shll8 r3 /* r3 = 0xFFFFFF00 */
58
59 mov.w @r6, r2 /* read first word (1st round) */
60 mov.b r2, @r4 /* store low byte of first word */
61 bra .r_start_b /* jump into loop after next instr. */
62 add #-5, r4 /* adjust for dest. offsets; now even */
63
64 .align 2
65.r_loop_b: /* main loop: copy 4 words in a row */
66 mov.w @r6, r2 /* read first word (2+ round) */
67 and r3, r1 /* get high byte of fourth word (2+ round) */
68 extu.b r2, r0 /* get low byte of first word (2+ round) */
69 or r1, r0 /* combine with high byte of fourth word */
70 mov.w r0, @(4, r4) /* store at buf[4] */
71 nop /* maintain alignment */
72.r_start_b:
73 mov.w @r6, r1 /* read second word */
74 and r3, r2 /* get high byte of first word */
75 extu.b r1, r0 /* get low byte of second word */
76 or r2, r0 /* combine with high byte of first word */
77 mov.w r0, @(6, r4) /* store at buf[6] */
78 add #8, r4 /* buf += 8 */
79 mov.w @r6, r2 /* read third word */
80 and r3, r1 /* get high byte of second word */
81 extu.b r2, r0 /* get low byte of third word */
82 or r1, r0 /* combine with high byte of second word */
83 mov.w r0, @r4 /* store at buf[0] */
84 cmp/hi r4, r5 /* check for end */
85 mov.w @r6, r1 /* read fourth word */
86 and r3, r2 /* get high byte of third word */
87 extu.b r1, r0 /* get low byte of fourth word */
88 or r2, r0 /* combine with high byte of third word */
89 mov.w r0, @(2, r4) /* store at buf[2] */
90 bt .r_loop_b
91 /* 24 instructions for 4 copies, takes 30 clock cycles (4 wait) */
92 /* avg. 7.5 cycles per word */
93
94 swap.b r1, r0 /* get high byte of last word */
95 rts
96 mov.b r0, @(4, r4) /* and store it */
97
98 /* 16-bit aligned, loop(read and store word) */
99.r_aligned:
100 mov.w @r6, r2 /* read first word (1st round) */
101 bra .r_start_w /* jump into loop after next instr. */
102 add #-6, r4 /* adjust for destination offsets */
103
104 .align 2
105.r_loop_w: /* main loop: copy 4 words in a row */
106 mov.w @r6, r2 /* read first word (2+ round) */
107 swap.b r1, r0 /* swap fourth word (2+ round) */
108 mov.w r0, @(4, r4) /* store fourth word (2+ round) */
109 nop /* maintain alignment */
110.r_start_w:
111 mov.w @r6, r1 /* read second word */
112 swap.b r2, r0 /* swap first word */
113 mov.w r0, @(6, r4) /* store first word in buf[6] */
114 add #8, r4 /* buf += 8 */
115 mov.w @r6, r2 /* read third word */
116 swap.b r1, r0 /* swap second word */
117 mov.w r0, @r4 /* store second word in buf[0] */
118 cmp/hi r4, r5 /* check for end */
119 mov.w @r6, r1 /* read fourth word */
120 swap.b r2, r0 /* swap third word */
121 mov.w r0, @(2, r4) /* store third word */
122 bt .r_loop_w
123 /* 16 instructions for 4 copies, takes 22 clock cycles (4 wait) */
124 /* avg. 5.5 cycles per word */
125
126 swap.b r1, r0 /* swap fourth word (last round) */
127 rts
128 mov.w r0, @(4, r4) /* and store it */
129
130.r_end:
131 .size _copy_read_sectors,.r_end-_copy_read_sectors
132
133 .align 2
134 .global _copy_write_sectors
135 .type _copy_write_sectors,@function
136
137/* Write a number of words to the ATA data port
138 *
139 * Assumes wordcount to be a multiple of 2.
140 * Writing is not unrolled as much as reading, for several reasons:
141 *
142 * - a similar instruction sequence is faster for writing than for reading
Jens Arnold46b634f2007-05-23 21:40:54 +0000143 * because the auto-incrementing load instructions can be used
Jens Arnold31ffd7b2006-12-03 22:13:44 +0000144 * - writing profits from warp mode
145 *
146 * Both of these add up to have writing faster than the more unrolled reading.
147 *
148 * Arguments:
149 * r4 - buffer address
150 * r5 - word count
151 *
152 * Register usage:
153 * r0/r1 - scratch
154 * r2/r3 - write buffers
155 * r4 - current address
156 * r5 - end address
157 * r6 - mask (if unaligned)
158 * r7 - ata port
159 */
160
161_copy_write_sectors:
162 add r5, r5 /* words -> bytes */
163 add r4, r5 /* bytes -> end address */
164 add #-4, r5 /* adjust for offsets */
165 mov.l .ata_data, r7
166
167 mov r4, r0
168 tst #1, r0 /* 16-bit aligned ? */
169 bt .w_aligned /* yes, do word copy */
170
171 /* not 16-bit aligned */
172 mov #-1, r6 /* prepare a bit mask for high byte */
173 shll8 r6 /* r6 = 0xFFFFFF00 */
174
175 mov.b @r4+, r2 /* load (initial old second) first byte */
176 mov.w @r4+, r3 /* load (initial) first word */
177 bra .w_start_b
178 extu.b r2, r0 /* extend unsigned */
179
180 .align 2
181.w_loop_b: /* main loop: copy 2 words in a row */
182 mov.w @r4+, r3 /* load first word (2+ round) */
183 extu.b r2, r0 /* put away low byte of second word (2+ round) */
184 and r6, r2 /* get high byte of second word (2+ round) */
185 or r1, r2 /* combine with low byte of old first word */
186 mov.w r2, @r7 /* write that */
187.w_start_b:
188 cmp/hi r4, r5 /* check for end */
189 mov.w @r4+, r2 /* load second word */
190 extu.b r3, r1 /* put away low byte of first word */
191 and r6, r3 /* get high byte of first word */
192 or r0, r3 /* combine with high byte of old second word */
193 mov.w r3, @r7 /* write that */
194 bt .w_loop_b
195 /* 12 instructions for 2 copies, takes 14 clock cycles */
196 /* avg. 7 cycles per word */
197
198 /* the loop "overreads" 1 byte past the buffer end, however, the last */
199 /* byte is not written to disk */
200 and r6, r2 /* get high byte of last word */
201 or r1, r2 /* combine with low byte of old first word */
202 rts
203 mov.w r2, @r7 /* write last word */
204
205 /* 16-bit aligned, loop(load and write word) */
206.w_aligned:
207 bra .w_start_w /* jump into loop after next instr. */
208 mov.w @r4+, r2 /* load first word (1st round) */
209
210 .align 2
211.w_loop_w: /* main loop: copy 2 words in a row */
212 mov.w @r4+, r2 /* load first word (2+ round) */
213 swap.b r1, r0 /* swap second word (2+ round) */
214 mov.w r0, @r7 /* write second word (2+ round) */
215.w_start_w:
216 cmp/hi r4, r5 /* check for end */
217 mov.w @r4+, r1 /* load second word */
218 swap.b r2, r0 /* swap first word */
219 mov.w r0, @r7 /* write first word */
220 bt .w_loop_w
221 /* 8 instructions for 2 copies, takes 10 clock cycles */
222 /* avg. 5 cycles per word */
223
224 swap.b r1, r0 /* swap second word (last round) */
225 rts
226 mov.w r0, @r7 /* and write it */
227
228.w_end:
229 .size _copy_write_sectors,.w_end-_copy_write_sectors
230
231 .align 2
232.ata_data:
233 .long 0x06104100 /* ATA data port */