[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: d-i work -- slang
- To: Development Discussions <developer at arabeyes dot org>
- Subject: Re: d-i work -- slang
- From: Mohammed Yousif <mhdyousif at gmx dot net>
- Date: Tue, 8 Jun 2004 14:25:37 +0300
- User-agent: KMail/1.6.1
On ثلاثاء 08 يونيو 2004 08:02, Nadim Shaikli wrote:
> --- Nadim Shaikli <shaikli at yahoo dot com> wrote:
>
> Vorlon (ie. Steve Langasek) has already fixed the bugs (so no need for
> any special attention just yet - although composing characters still
> remain an issue),
>
> http://tennyson.netexpress.net/~vorlon/ar-utf8.png
>
This is wonderfull, but harakat really needs to be fixed.
> His files and fixes have been uploaded to the URL noted earlier.
>
> It would be ideal to see some d-i screenshot of strings with harakat
> (ie. composing characters) in them to get that resolved while the
> topic is still relevant; Anmar, can you push for that and keep us
> posted ?
>
Nadim, your perl code already handles composing characters why didn't
he add it then to the patch?
If he doesn't have the time to port it then I think he can get the necessary
code to get this resolved from the attached file.
--
Mohammed Yousif
Egypt
/************************************************************************
* $Id$
*
* ------------
* Description:
* ------------
* shape_arabic.c
* A port of Nadim Shaikli's Perl Arabic shaping code
*
* (C) Copyright 2004 Arabeyes, Mohammed Yousif
*
* -----------------
* Revision Details: (Updated by Revision Control System)
* -----------------
* $Date$
* $Author$
* $Revision$
* $Source$
*
* (www.arabeyes.org - under GPL license)
*
************************************************************************/
#include <fribidi/fribidi.h>
#define NIL 0x0000
#define MAP_LENGTH 37
#define COMB_MAP_LENGTH 4
#define TRANS_CHARS_LENGTH 39
typedef struct {
FriBidiChar code;
FriBidiChar mIsolated;
FriBidiChar mInitial;
FriBidiChar mMedial;
FriBidiChar mFinal;
} CharRep;
typedef struct {
FriBidiChar code[2];
FriBidiChar mIsolated;
FriBidiChar mInitial;
FriBidiChar mMedial;
FriBidiChar mFinal;
} CombCharRep;
CharRep charsMap[MAP_LENGTH] =
{
{ 0x0621, 0xFE80, NIL, NIL, NIL }, /* HAMZA */
{ 0x0622, 0xFE81, NIL, NIL, 0xFE82 }, /* ALEF_MADDA */
{ 0x0623, 0xFE83, NIL, NIL, 0xFE84 }, /* ALEF_HAMZA_ABOVE */
{ 0x0624, 0xFE85, NIL, NIL, 0xFE86 }, /* WAW_HAMZA */
{ 0x0625, 0xFE87, NIL, NIL, 0xFE88 }, /* ALEF_HAMZA_BELOW */
{ 0x0626, 0xFE89, 0xFE8B, 0xFE8C, 0xFE8A }, /* YEH_HAMZA */
{ 0x0627, 0xFE8D, NIL, NIL, 0xFE8E }, /* ALEF */
{ 0x0628, 0xFE8F, 0xFE91, 0xFE92, 0xFE90 }, /* BEH */
{ 0x0629, 0xFE93, NIL , NIL, 0xFE94 }, /* TEH_MARBUTA */
{ 0x062A, 0xFE95, 0xFE97, 0xFE98, 0xFE96 }, /* TEH */
{ 0x062B, 0xFE99, 0xFE9B, 0xFE9C, 0xFE9A }, /* THEH */
{ 0x062C, 0xFE9D, 0xFE9F, 0xFEA0, 0xFE9E }, /* JEEM */
{ 0x062D, 0xFEA1, 0xFEA3, 0xFEA4, 0xFEA2 }, /* HAH */
{ 0x062E, 0xFEA5, 0xFEA7, 0xFEA8, 0xFEA6 }, /* KHAH */
{ 0x062F, 0xFEA9, NIL, NIL, 0xFEAA }, /* DAL */
{ 0x0630, 0xFEAB, NIL, NIL, 0xFEAC }, /* THAL */
{ 0x0631, 0xFEAD, NIL, NIL, 0xFEAE }, /* REH */
{ 0x0632, 0xFEAF, NIL, NIL, 0xFEB0 }, /* ZAIN */
{ 0x0633, 0xFEB1, 0xFEB3, 0xFEB4, 0xFEB2 }, /* SEEN */
{ 0x0634, 0xFEB5, 0xFEB7, 0xFEB8, 0xFEB6 }, /* SHEEN */
{ 0x0635, 0xFEB9, 0xFEBB, 0xFEBC, 0xFEBA }, /* SAD */
{ 0x0636, 0xFEBD, 0xFEBF, 0xFEC0, 0xFEBE }, /* DAD */
{ 0x0637, 0xFEC1, 0xFEC3, 0xFEC4, 0xFEC2 }, /* TAH */
{ 0x0638, 0xFEC5, 0xFEC7, 0xFEC8, 0xFEC6 }, /* ZAH */
{ 0x0639, 0xFEC9, 0xFECB, 0xFECC, 0xFECA }, /* AIN */
{ 0x063A, 0xFECD, 0xFECF, 0xFED0, 0xFECE }, /* GHAIN */
{ 0x0640, 0x0640, NIL, NIL, NIL }, /* TATWEEL */
{ 0x0641, 0xFED1, 0xFED3, 0xFED4, 0xFED2 }, /* FEH */
{ 0x0642, 0xFED5, 0xFED7, 0xFED8, 0xFED6 }, /* QAF */
{ 0x0643, 0xFED9, 0xFEDB, 0xFEDC, 0xFEDA }, /* KAF */
{ 0x0644, 0xFEDD, 0xFEDF, 0xFEE0, 0xFEDE }, /* LAM */
{ 0x0645, 0xFEE1, 0xFEE3, 0xFEE4, 0xFEE2 }, /* MEEM */
{ 0x0646, 0xFEE5, 0xFEE7, 0xFEE8, 0xFEE6 }, /* NOON */
{ 0x0647, 0xFEE9, 0xFEEB, 0xFEEC, 0xFEEA }, /* HEH */
{ 0x0648, 0xFEED, NIL, NIL, 0xFEEE }, /* WAW */
//{ 0x0649, 0xFEEF, 0xFBE8, 0xFBE9, 0xFEF0 }, /* ALEF_MAKSURA */
{ 0x0649, 0xFEEF, NIL, NIL, 0xFEF0 }, /* ALEF_MAKSURA */
{ 0x064A, 0xFEF1, 0xFEF3, 0xFEF4, 0xFEF2 } /* YEH */
};
CombCharRep combCharsMap[COMB_MAP_LENGTH] =
{
{ {0x0644, 0x0622}, 0xFEF5, NIL, NIL, 0xFEF6 }, /* LAM_ALEF_MADDA */
{ {0x0644, 0x0623}, 0xFEF7, NIL, NIL, 0xFEF8 }, /* LAM_ALEF_HAMZA_ABOVE */
{ {0x0644, 0x0625}, 0xFEF9, NIL, NIL, 0xFEFA }, /* LAM_ALEF_HAMZA_BELOW */
{ {0x0644, 0x0627}, 0xFEFB, NIL, NIL, 0xFEFC } /* LAM_ALEF */
};
FriBidiChar transChars[TRANS_CHARS_LENGTH] =
{
0x0610, /* ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM */
0x0612, /* ARABIC SIGN ALAYHE ASSALLAM */
0x0613, /* ARABIC SIGN RADI ALLAHOU ANHU */
0x0614, /* ARABIC SIGN TAKHALLUS */
0x0615, /* ARABIC SMALL HIGH TAH */
0x064B, /* ARABIC FATHATAN */
0x064C, /* ARABIC DAMMATAN */
0x064D, /* ARABIC KASRATAN */
0x064E, /* ARABIC FATHA */
0x064F, /* ARABIC DAMMA */
0x0650, /* ARABIC KASRA */
0x0651, /* ARABIC SHADDA */
0x0652, /* ARABIC SUKUN */
0x0653, /* ARABIC MADDAH ABOVE */
0x0654, /* ARABIC HAMZA ABOVE */
0x0655, /* ARABIC HAMZA BELOW */
0x0656, /* ARABIC SUBSCRIPT ALEF */
0x0657, /* ARABIC INVERTED DAMMA */
0x0658, /* ARABIC MARK NOON GHUNNA */
0x0670, /* ARABIC LETTER SUPERSCRIPT ALEF */
0x06D6, /* ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA */
0x06D7, /* ARABIC SMALL HIGH LIGATURE QAF WITH LAM WITH ALEF MAKSURA */
0x06D8, /* ARABIC SMALL HIGH MEEM INITIAL FORM */
0x06D9, /* ARABIC SMALL HIGH LAM ALEF */
0x06DA, /* ARABIC SMALL HIGH JEEM */
0x06DB, /* ARABIC SMALL HIGH THREE DOTS */
0x06DC, /* ARABIC SMALL HIGH SEEN */
0x06DF, /* ARABIC SMALL HIGH ROUNDED ZERO */
0x06E0, /* ARABIC SMALL HIGH UPRIGHT RECTANGULAR ZERO */
0x06E1, /* ARABIC SMALL HIGH DOTLESS HEAD OF KHAH */
0x06E2, /* ARABIC SMALL HIGH MEEM ISOLATED FORM */
0x06E3, /* ARABIC SMALL LOW SEEN */
0x06E4, /* ARABIC SMALL HIGH MADDA */
0x06E7, /* ARABIC SMALL HIGH YEH */
0x06E8, /* ARABIC SMALL HIGH NOON */
0x06EA, /* ARABIC EMPTY CENTRE LOW STOP */
0x06EB, /* ARABIC EMPTY CENTRE HIGH STOP */
0x06EC, /* ARABIC ROUNDED HIGH STOP WITH FILLED CENTRE */
0x06ED /* ARABIC SMALL LOW MEEM */
};
fribidi_boolean CharacterMapContains(FriBidiChar c)
{
int i = 0;
for (i=0; i<MAP_LENGTH; i++) {
if (charsMap[i].code == c)
return FRIBIDI_TRUE;
}
return FRIBIDI_FALSE;
}
CharRep GetCharRep(FriBidiChar c)
{
int i = 0;
for (i=0; i<MAP_LENGTH; i++) {
if (charsMap[i].code == c)
return charsMap[i];
}
CharRep nilRep = {NIL, NIL, NIL, NIL};
return nilRep;
}
CombCharRep GetCombCharRep(FriBidiChar c1, FriBidiChar c2)
{
int i = 0;
for (i=0; i<COMB_MAP_LENGTH; i++) {
if (combCharsMap[i].code[0] == c1 && combCharsMap[i].code[1] == c2)
return combCharsMap[i];
}
CombCharRep combNilRep = {{NIL, NIL}, NIL, NIL, NIL};
return combNilRep;
}
fribidi_boolean IsTransparent(FriBidiChar c)
{
int i = 0;
for (i=0; i<TRANS_CHARS_LENGTH; i++) {
if (transChars[i] == c)
return FRIBIDI_TRUE;
}
return FRIBIDI_FALSE;
}
FriBidiChar * shape_arabic (FriBidiChar * str, int len)
{
CharRep crep;
CombCharRep combcrep;
FriBidiChar * shaped = (FriBidiChar *)malloc (sizeof (FriBidiChar) * (len + 1));
int writeCount = 0;
int i = 0;
for (i=0; i<len; i++) {
FriBidiChar current = str[i];
if (CharacterMapContains(current)) {
FriBidiChar prev = NIL;
FriBidiChar next = NIL;
int prevID = i - 1;
int nextID = i + 1;
/*
Transparent characters have no effect in the shaping process.
So, ignore all the transparent characters that are BEFORE the
current character.
*/
for (; prevID >= 0; prevID--)
if (!IsTransparent(str[prevID]))
break;
if ( (prevID < 0) || !CharacterMapContains(prev = str[prevID]) ||
(
!((crep = GetCharRep(prev)).mInitial != NIL) &&
!(crep.mMedial != NIL)
)
)
prev = NIL;
/*
Transparent characters have no effect in the shaping process.
So, ignore all the transparent characters that are AFTER the
current character.
*/
for (; nextID < len; nextID++)
if (!IsTransparent(str[nextID]))
break;
if ( (nextID >= len) || !CharacterMapContains(next = str[nextID]) ||
( !((crep = GetCharRep(next)).mMedial != NIL) &&
!((crep = GetCharRep(next)).mFinal != NIL) &&
(next != 0x0640)
)
)
next = NIL;
/* Combinations */
if (current == 0x0644 && next != NIL && (next==0x0622 || next==0x0623 || next==0x0625 || next==0x0627)) {
combcrep = GetCombCharRep(current, next);
if (prev != NIL) {
shaped[writeCount++] = combcrep.mFinal;
} else {
shaped[writeCount++] = combcrep.mIsolated;
}
i++;
continue;
}
crep = GetCharRep(current);
/* Medial */
if ( prev != NIL && next != NIL && crep.mMedial != NIL ) {
shaped[writeCount++] = crep.mMedial;
continue;
/* Final */
} else if ( prev != NIL && crep.mFinal != NIL ) {
shaped[writeCount++] = crep.mFinal;
continue;
/* Initial */
} else if (next != NIL && crep.mInitial != NIL ) {
shaped[writeCount++] = crep.mInitial;
continue;
}
/* Isolated */
shaped[writeCount++] = crep.mIsolated;
} else {
shaped[writeCount++] = current;
}
}
shaped[writeCount] = NIL;
return shaped;
}