| /*************************************************************************** |
| * __________ __ ___. |
| * Open \______ \ ____ ____ | | _\_ |__ _______ ___ |
| * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / |
| * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < |
| * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ |
| * \/ \/ \/ \/ \/ |
| * $Id$ |
| * |
| * Copyright (C) 2004 by Jens Arnold |
| * |
| * All files in this archive are subject to the GNU General Public License. |
| * See the file COPYING in the source tree root for full license agreement. |
| * |
| * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY |
| * KIND, either express or implied. |
| * |
| ****************************************************************************/ |
| #include "config.h" |
| |
| .section .icode,"ax",@progbits |
| |
| .align 2 |
| .global _memset |
| .type _memset,@function |
| |
| /* Fills a memory region with specified byte value |
| * This version is optimized for speed |
| * |
| * arguments: |
| * r4 - start address |
| * r5 - data |
| * r6 - length |
| * |
| * return value: |
| * r0 - start address (like ANSI version) |
| * |
| * register usage: |
| * r0 - temporary |
| * r1 - start address +11 for main loop |
| * r4 - start address |
| * r5 - data (spread to all 4 bytes when using long stores) |
| * r6 - current address (runs down from end to start) |
| * |
| * The instruction order below is devised in a way to utilize the pipelining |
| * of the SH1 to the max. The routine fills memory from end to start in |
| * order to utilize the auto-decrementing store instructions. |
| */ |
| |
| _memset: |
| neg r4,r0 |
| and #3,r0 /* r0 = (4 - align_offset) % 4 */ |
| add #4,r0 |
| cmp/hs r0,r6 /* at least one aligned longword to fill? */ |
| add r4,r6 /* r6 = end_address */ |
| bf .no_longs /* no, jump directly to byte loop */ |
| |
| extu.b r5,r5 /* start: spread data to all 4 bytes */ |
| swap.b r5,r0 |
| or r0,r5 /* data now in 2 lower bytes of r5 */ |
| swap.w r5,r0 |
| or r0,r5 /* data now in all 4 bytes of r5 */ |
| |
| mov r6,r0 |
| tst #3,r0 /* r0 already long aligned? */ |
| bt .end_b1 /* yes: skip loop */ |
| |
| /* leading byte loop: sets 0..3 bytes */ |
| .loop_b1: |
| mov.b r5,@-r0 /* store byte */ |
| tst #3,r0 /* r0 long aligned? */ |
| bf .loop_b1 /* runs r0 down until long aligned */ |
| |
| mov r0,r6 /* r6 = last long bound */ |
| nop /* keep alignment */ |
| |
| .end_b1: |
| mov r4,r1 /* r1 = start_address... */ |
| add #11,r1 /* ... + 11, combined for rounding and offset */ |
| xor r1,r0 |
| tst #4,r0 /* bit 2 tells whether an even or odd number of */ |
| bf .loop_odd /* longwords to set */ |
| |
| /* main loop: set 2 longs per pass */ |
| .loop_2l: |
| mov.l r5,@-r6 /* store first long */ |
| .loop_odd: |
| cmp/hi r1,r6 /* runs r6 down to first long bound */ |
| mov.l r5,@-r6 /* store second long */ |
| bt .loop_2l |
| |
| .no_longs: |
| cmp/hi r4,r6 /* any bytes left? */ |
| bf .end_b2 /* no: skip loop */ |
| |
| /* trailing byte loop */ |
| .loop_b2: |
| mov.b r5,@-r6 /* store byte */ |
| cmp/hi r4,r6 /* runs r6 down to the start address */ |
| bt .loop_b2 |
| |
| .end_b2: |
| rts |
| mov r4,r0 /* return start address */ |
| |
| .end: |
| .size _memset,.end-_memset |