H300: (1) Use DMA for LCD updates, with auto-aligned line reads. Speeds up LCD updates by ~ 75% at 11MHz and 45MHz. Only ~ 11% speedup at 124MHz due to (2). (2) Less aggressive LCD transfer timing at 124MHz. With the previous timing, slightly corrupted display contents was reported, and with DMA transfers at least 4 waitstates are needed to make updates work at all. * A table in system-iriver.c shows settings for all integer multiples of the base clock frequency (info for developers, not yet complete).


git-svn-id: svn://svn.rockbox.org/rockbox/trunk@11418 a1c6a512-1295-4272-9138-f99709370657
diff --git a/firmware/drivers/lcd-h300.c b/firmware/drivers/lcd-h300.c
index 41d2d11..b7865fa 100644
--- a/firmware/drivers/lcd-h300.c
+++ b/firmware/drivers/lcd-h300.c
@@ -368,7 +368,16 @@
         /* Copy display bitmap to hardware */
         lcd_write_reg(R_RAM_ADDR_SET, xoffset << 8);
         lcd_begin_write_gram();
-        lcd_write_data((unsigned short *)lcd_framebuffer, LCD_WIDTH*LCD_HEIGHT);
+
+        DAR3 = 0xf0000002;
+        SAR3 = (unsigned long)lcd_framebuffer;
+        BCR3 = LCD_WIDTH*LCD_HEIGHT*2;
+        DCR3 = DMA_AA | DMA_BWC(1)
+             | DMA_SINC | DMA_SSIZE(DMA_SIZE_LINE) 
+             | DMA_DSIZE(DMA_SIZE_WORD) | DMA_START;
+
+        while (!(DSR3 & 1));
+        DSR3 = 1;
     }
 }
 
@@ -376,26 +385,39 @@
 void lcd_update_rect(int, int, int, int) ICODE_ATTR;
 void lcd_update_rect(int x, int y, int width, int height)
 {
+    unsigned long dma_addr;
+
     if(display_on) {
-        int ymax = y + height - 1;
 
         if(x + width > LCD_WIDTH)
             width = LCD_WIDTH - x;
-        if (width <= 0)
-            return; /* nothing left to do, 0 is harmful to lcd_write_data() */
-        if(ymax >= LCD_HEIGHT)
-            ymax = LCD_HEIGHT-1;
+        if(width <= 0) /* nothing to do */
+            return;
+        if(y + height > LCD_HEIGHT)
+            height = LCD_HEIGHT - y;
 
-        /* set update window */ 
+        /* set update window */
 
         lcd_write_reg(R_VERT_RAM_ADDR_POS,((x+xoffset+width-1) << 8) | (x+xoffset));
         lcd_write_reg(R_RAM_ADDR_SET, ((x+xoffset) << 8) | y);
-        lcd_begin_write_gram(); 
+        lcd_begin_write_gram();
+        
+        DAR3 = 0xf0000002;
+        dma_addr = (unsigned long)&lcd_framebuffer[y][x];
+        width *= 2;
 
-        /* Copy specified rectangle bitmap to hardware */
-        for (; y <= ymax; y++) 
-        { 
-            lcd_write_data ((unsigned short *)&lcd_framebuffer[y][x], width); 
-        } 
+        for (; height > 0; height--)
+        {
+            SAR3 = dma_addr;
+            BCR3 = width;
+            DCR3 = DMA_AA | DMA_BWC(1)
+                 | DMA_SINC | DMA_SSIZE(DMA_SIZE_LINE)
+                 | DMA_DSIZE(DMA_SIZE_WORD) | DMA_START;
+
+            dma_addr += LCD_WIDTH*2;
+
+            while (!(DSR3 & 1));
+            DSR3 = 1;
+        }
     }
 }
diff --git a/firmware/export/mcf5249.h b/firmware/export/mcf5249.h
index 406e0b1..87c4ae7 100644
--- a/firmware/export/mcf5249.h
+++ b/firmware/export/mcf5249.h
@@ -251,6 +251,7 @@
 #define DMA_EEXT        (1 << 30)       /* Enable peripherial request   */
 #define DMA_CS          (1 << 29)       /* Cycle Steal                  */
 #define DMA_AA          (1 << 28)       /* Auto-Align                   */
+#define DMA_BWC(x)      (((x)&7) << 25) /* Bandwidth control            */
 #define DMA_SINC        (1 << 22)       /* Source Increment             */
 #define DMA_SSIZE(x)    (((x)&3) << 20) /* Size of source data          */
 #define DMA_DINC        (1 << 19)       /* Destination Increment        */
diff --git a/firmware/target/coldfire/iriver/h300/lcd-as-h300.S b/firmware/target/coldfire/iriver/h300/lcd-as-h300.S
index c6c1c76..ae55dfb 100755
--- a/firmware/target/coldfire/iriver/h300/lcd-as-h300.S
+++ b/firmware/target/coldfire/iriver/h300/lcd-as-h300.S
@@ -22,102 +22,6 @@
 
     .section    .icode, "ax", @progbits
 
-    .align      2
-    .global     lcd_write_data
-    .type       lcd_write_data, @function
-
-lcd_write_data:
-    move.l  (4, %sp), %a0   /* data pointer */
-    move.l  (8, %sp), %d0   /* length in words */
-    add.l   %d0, %d0        /* words -> bytes */
-    add.l   %a0, %d0        /* -> end address */
-    lea.l   0xf0000002, %a1 /* LCD data port */
-    
-    move.l  %a0, %d1
-    btst.l  #1, %d1         /* already longword aligned? */
-    beq.s   .word1_end      /* yes: skip initial word copy */
-    
-    move.w  (%a0)+, (%a1)   /* transfer initial word */
-
-.word1_end:                 /* now longword aligned */
-    moveq.l #28, %d1
-    add.l   %a0, %d1
-    and.l   #0xFFFFFFF0,%d1 /* %d1 = first line bound + 16 */
-    cmp.l   %d1, %d0        /* at least one full line to send? */
-    blo.s   .long2_start    /* no: skip to trailing longword handling */
-
-    lea.l   (-16, %sp), %sp /* free up some registers */
-    movem.l %d2-%d4/%a2, (%sp)
-
-    subq.l  #8, %d1
-    subq.l  #8, %d1         /* %d1 = first line bound */
-
-    cmp.l   %a0, %d1        /* any leading longwords? */
-    bls.s   .long1_end      /* no: skip leading long loop */
-    
-.long1_loop:
-    move.l  (%a0)+, %d2     /* read longword */
-    swap    %d2             /* send data to LCD in correct order...*/
-    move.w  %d2, (%a1)
-    swap    %d2
-    move.w  %d2, (%a1)
-    cmp.l   %a0, %d1        /* run %a0 up to first line bound */
-    bhi.s   .long1_loop
-    
-.long1_end:
-    move.l  %d0, %a2
-    lea.l   (-14, %a2), %a2 /* %a2 = end address - 14 (one line/pass) */
-
-    /* burst-optimised line transfers */
-.line_loop:
-    movem.l (%a0), %d1-%d4  /* burst-read line */
-    lea.l   (16, %a0), %a0  /* increment address */
-    swap    %d1             /* send data to LCD in correct order... */
-    move.w  %d1, (%a1)
-    swap    %d1
-    move.w  %d1, (%a1)
-    swap    %d2
-    move.w  %d2, (%a1)
-    swap    %d2
-    move.w  %d2, (%a1)
-    swap    %d3
-    move.w  %d3, (%a1)
-    swap    %d3
-    move.w  %d3, (%a1)
-    swap    %d4
-    move.w  %d4, (%a1)
-    swap    %d4
-    move.w  %d4, (%a1)
-    cmp.l   %a0, %a2        /* run %a0 up to last line bound */
-    bhi.s   .line_loop
-    
-    movem.l (%sp), %d2-%d4/%a2
-    lea.l   (16, %sp), %sp  /* restore registers */
-
-.long2_start:
-    subq.l  #2, %d0         /* account for handling 2 words per loop */
-    cmp.l   %a0, %d0        /* any (trailing longwords? */
-    bls.s   .long2_end      /* no: skip trailing longword loop */
-
-.long2_loop:
-    move.l  (%a0)+, %d1     /* read longword */
-    swap    %d1             /* send data to LCD in correct order */
-    move.w  %d1, (%a1)
-    swap    %d1
-    move.w  %d1, (%a1)
-    cmp.l   %a0, %d0        /* run %a0 up to last long bound */
-    bhi.s   .long2_loop
-    
-.long2_end:
-    blo.s   .word2_end      /* no final word: skip */
-    move.w  (%a0)+, (%a1)   /* transfer final word */
-
-.word2_end:
-    rts
-.lcd_write_data_end:
-    .size   lcd_write_data, .lcd_write_data_end - lcd_write_data
-
-
 /* lcd_write_yuv420_lines(), based on lcd-as-x5.S
  *
  * See http://en.wikipedia.org/wiki/YCbCr
diff --git a/firmware/target/coldfire/iriver/system-iriver.c b/firmware/target/coldfire/iriver/system-iriver.c
index 5f404fb..3517788 100644
--- a/firmware/target/coldfire/iriver/system-iriver.c
+++ b/firmware/target/coldfire/iriver/system-iriver.c
@@ -24,6 +24,28 @@
 #include "timer.h"
 #include "pcf50606.h"
 
+/* Settings for all possible clock frequencies (with properly working timers)
+ *
+ *                        xxx_REFRESH_TIMER below
+ * system.h, CPUFREQ_xxx_MULT        |
+ *              |                    |
+ *              V                    V
+ *                              Refreshtim.                         IDECONFIG1/IDECONFIG2
+ * CPUCLK/Hz  MULT    PLLCR     16MB  32MB  CSCR0   CSCR1   CSCR3   CS2Pre CS2Post CS2Wait
+ * ---------------------------------------------------------------------------------------
+ *  11289600    1   0x10c00200    4     1   0x0180  0x0180  0x0180     1      0       0
+ *  22579200    2   0x15c4e005   10     4   0x0180  0x0180  0x0180     1      0       0
+ *  33868800    3   0x13c46005   15     7   0x0180  0x0180  0x0180     1      0       0
+ *  45158400    4   0x15c4e001   21    10   0x0580  0x0180  0x0580     1      0       0
+ *  56448000    5   0x12c4e005   26    12   0x0580          0x0980
+ *  67737600    6   0x13c46001   32    15   0x0980          0x0d80
+ *  79027200    7   0x13c52001   37    18   0x0980          0x1180
+ *  90316800    8   0x13c5e001   43    21   0x0d80          0x1580
+ * 101606400    9   0x11c48005   48    23   0x0d80          0x1980
+ * 112896000   10   0x11c4e005   54    26   0x1180          0x1d80
+ * 124185600   11   0x11c56005   59    29   0x1180  0x1180  0x2180     2      1       2
+ */
+
 #if MEM < 32
 #define MAX_REFRESH_TIMER     59
 #define NORMAL_REFRESH_TIMER  21
@@ -61,7 +83,7 @@
         RECALC_DELAYS(CPUFREQ_MAX);
         PLLCR = 0x11c56005;
         CSCR0 = 0x00001180; /* Flash: 4 wait states */
-        CSCR1 = 0x00000980; /* LCD: 2 wait states */
+        CSCR1 = 0x00001580; /* LCD: 5 wait states */
 #if CONFIG_USBOTG == USBOTG_ISP1362
         CSCR3 = 0x00002180; /* USBOTG: 8 wait states */
 #endif