| /*************************************************************************** |
| * __________ __ ___. |
| * Open \______ \ ____ ____ | | _\_ |__ _______ ___ |
| * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / |
| * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < |
| * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ |
| * \/ \/ \/ \/ \/ |
| * $Id$ |
| * |
| * Copyright (C) 2005 by Gadi Cohen |
| * |
| * Largely based on php_hebrev by Zeev Suraski <zeev@php.net> |
| * Heavily modified by Gadi Cohen aka Kinslayer <dragon@wastelands.net> |
| * |
| * All files in this archive are subject to the GNU General Public License. |
| * See the file COPYING in the source tree root for full license agreement. |
| * |
| * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY |
| * KIND, either express or implied. |
| * |
| ****************************************************************************/ |
| #include <stdio.h> |
| #include <string.h> |
| #include <ctype.h> |
| #include "file.h" |
| #include "lcd.h" |
| #include "rbunicode.h" |
| #include "arabjoin.h" |
| |
| //#define _HEB_BUFFER_LENGTH (MAX_PATH + LCD_WIDTH/2 + 3 + 2 + 2) * 2 |
| #define _HEB_BLOCK_TYPE_ENG 1 |
| #define _HEB_BLOCK_TYPE_HEB 0 |
| #define _HEB_ORIENTATION_LTR 1 |
| #define _HEB_ORIENTATION_RTL 0 |
| |
| #define ischar(c) ((c > 0x0589 && c < 0x0700) || (c >= 0xfb50 && c <= 0xfefc) ? 1 : 0) |
| #define _isblank(c) ((c==' ' || c=='\t') ? 1 : 0) |
| #define _isnewline(c) ((c=='\n' || c=='\r') ? 1 : 0) |
| #define XOR(a,b) ((a||b) && !(a&&b)) |
| |
| arab_t * arab_lookup(unsigned short uchar) |
| { |
| if (uchar >= 0x621 && uchar <= 0x63a) |
| return &(jointable[uchar - 0x621]); |
| if (uchar >= 0x640 && uchar <= 0x64a) |
| return &(jointable[uchar - 0x621 - 5]); |
| if (uchar >= 0x671 && uchar <= 0x6d5) |
| return &(jointable[uchar - 0x621 - 5 - 38]); |
| if (uchar == 0x200D) /* Support for the zero-width joiner */ |
| return ‍ |
| return 0; |
| } |
| |
| void arabjoin(unsigned short * stringprt, int length){ |
| |
| bool connected = false; |
| unsigned short * writeprt = stringprt; |
| |
| arab_t * prev = 0; |
| arab_t * cur; |
| arab_t * ligature = 0; |
| short uchar; |
| |
| int i; |
| for (i = 0; i <= length; i++) { |
| cur = arab_lookup(uchar = *stringprt++); |
| |
| /* Skip non-arabic chars */ |
| if (cur == 0) { |
| if (prev) { |
| /* Finish the last char */ |
| if (connected) { |
| *writeprt++ = prev->final; |
| connected = false; |
| } else |
| *writeprt++ = prev->isolated; |
| prev = 0; |
| *writeprt++ = uchar; |
| } else { |
| *writeprt++ = uchar; |
| } |
| continue; |
| } |
| |
| /* nothing to do for arabic char if the previous was non-arabic */ |
| if (prev == 0) { |
| prev = cur; |
| continue; |
| } |
| |
| /* if it's LAM, check for LAM+ALEPH ligatures */ |
| if (prev->isolated == 0xfedd) { |
| switch (cur->isolated) { |
| case 0xfe8d: |
| ligature = &(lamaleph[0]); |
| break; |
| case 0xfe87: |
| ligature = &(lamaleph[1]); |
| break; |
| case 0xfe83: |
| ligature = &(lamaleph[2]); |
| break; |
| case 0xfe81: |
| ligature = &(lamaleph[3]); |
| } |
| } |
| |
| if (ligature) { /* replace the 2 glyphs by their ligature */ |
| prev = ligature; |
| ligature = 0; |
| } else { |
| if (connected) { /* previous char has something connected to it */ |
| if (prev->medial && cur->final) /* Can we connect to it? */ |
| *writeprt++ = prev->medial; |
| else { |
| *writeprt++ = prev->final; |
| connected = false; |
| } |
| } else { |
| if (prev->initial && cur->final) { /* Can we connect to it? */ |
| *writeprt++ = prev->initial; |
| connected = true; |
| } else |
| *writeprt++ = prev->isolated; |
| } |
| prev = cur; |
| } |
| } |
| } |
| |
| unsigned short *bidi_l2v(const unsigned char *str, int orientation) |
| { |
| int length = utf8length(str); |
| static unsigned short utf16_buf[MAX_PATH+1]; |
| static unsigned short bidi_buf[MAX_PATH+1]; |
| unsigned short *heb_str, *target, *tmp; // *broken_str |
| int block_start, block_end, block_type, block_length, i; |
| //long max_chars=0; |
| //int begin, end, char_count, orig_begin; |
| |
| if (!str || !*str) |
| return &(unsigned short){0}; |
| |
| //tmp = str; |
| target = tmp = utf16_buf; |
| while (*str) |
| str = utf8decode(str, target++); |
| *target = 0; |
| |
| /* properly join any arabic chars */ |
| arabjoin(utf16_buf, length); |
| |
| block_start=block_end=block_length=0; |
| |
| heb_str = bidi_buf; |
| if (orientation) { |
| target = heb_str; |
| } else { |
| target = heb_str + length; |
| *target = 0; |
| target--; |
| } |
| |
| if (ischar(*tmp)) |
| block_type = _HEB_BLOCK_TYPE_HEB; |
| else |
| block_type = _HEB_BLOCK_TYPE_ENG; |
| |
| do { |
| while((XOR(ischar(*(tmp+1)),block_type) |
| || _isblank(*(tmp+1)) || ispunct((int)*(tmp+1)) |
| || *(tmp+1)=='\n') |
| && block_end < length-1) { |
| tmp++; |
| block_end++; |
| block_length++; |
| } |
| |
| if (block_type != orientation) { |
| while ((_isblank(*tmp) || ispunct((int)*tmp)) |
| && *tmp!='/' && *tmp!='-' && block_end>block_start) { |
| tmp--; |
| block_end--; |
| } |
| } |
| |
| for (i=block_start; i<=block_end; i++) { |
| *target = (block_type == orientation) ? *(utf16_buf+i) : *(utf16_buf+block_end-i+block_start); |
| if (block_type!=orientation) { |
| switch (*target) { |
| case '(': |
| *target = ')'; |
| break; |
| case ')': |
| *target = '('; |
| break; |
| default: |
| break; |
| } |
| } |
| target += orientation ? 1 : -1; |
| } |
| block_type = !block_type; |
| block_start=block_end+1; |
| } while(block_end<length-1); |
| |
| *target = 0; |
| |
| #if 0 /* Is this code really necessary? */ |
| broken_str = utf16_buf; |
| begin=end=length-1; |
| target = broken_str; |
| |
| while (1) { |
| char_count=0; |
| while ((!max_chars || char_count<max_chars) && begin>0) { |
| char_count++; |
| begin--; |
| if (begin<=0 || _isnewline(heb_str[begin])) { |
| while(begin>0 && _isnewline(heb_str[begin-1])) { |
| begin--; |
| char_count++; |
| } |
| break; |
| } |
| } |
| if (char_count==max_chars) { /* try to avoid breaking words */ |
| int new_char_count = char_count; |
| int new_begin = begin; |
| |
| while (new_char_count>0) { |
| if (_isblank(heb_str[new_begin]) || |
| _isnewline(heb_str[new_begin])) { |
| break; |
| } |
| new_begin++; |
| new_char_count--; |
| } |
| if (new_char_count>0) { |
| char_count=new_char_count; |
| begin=new_begin; |
| } |
| } |
| orig_begin=begin; |
| |
| /* if (_isblank(heb_str[begin])) { |
| heb_str[begin]='\n'; |
| } */ |
| |
| /* skip leading newlines */ |
| while (begin<=end && _isnewline(heb_str[begin])) { |
| begin++; |
| } |
| |
| /* copy content */ |
| for (i=begin; i<=end; i++) { |
| *target = heb_str[i]; |
| target++; |
| } |
| |
| for (i=orig_begin; i<=end && _isnewline(heb_str[i]); i++) { |
| *target = heb_str[i]; |
| target++; |
| } |
| begin=orig_begin; |
| |
| if (begin<=0) { |
| *target = 0; |
| break; |
| } |
| begin--; |
| end=begin; |
| } |
| return broken_str; |
| #endif |
| return heb_str; |
| } |
| |