[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Arabic Shaping Patch



Hi all
  I am Isam Bayazidi, from Arabeyes team ( www.arabeyes.org ). We work on 
Arabic language support issues in Open Source Programs and Systems. We had 
our eye on LyX Arabic support for some time, we are glad that LyX supports 
Arabic, nevertheless, the faulty Arabic shaping had been a displeasure.

 There is some faulty shaping, and there is some special cases for letters 
that should be considered when doing the shaping for Arabic.. I had been 
working in a small patch to fix this issue.. the patch does the following:
- fix shapes values in src/encoding.C
- add a new function to consider special characters (those that can not be 
connected from left) (function in src/encoding.C, prototype in encoding.h)
- use is_arabic_special in src/text.C in to fix shaping faults
- fix lib/kbd/arabic.kmap to make it compatable with the most used KeyBoard in 
Arab world
- fix Arabic Lang Symbol in /lib/languages 

I really hope that the developers check this patch, no other code that is not 
Arabic related had been touched. We would be glad to have this patched 
enrolled to the main LyX tree

It was made across 1.2.1 .. I can make it across the CVS if it is needed..
Please CC " developer at arabeyes dot com ", it it the mailing list of 
developers in Arabeyes project.

Yours
Isam Bayazidi
--- lyx-1.2.1/src/encoding.C	Mon Jun 17 13:35:12 2002
+++ lyx-1.2.1-Arabic/src/encoding.C	Sat Nov 16 11:55:11 2002
@@ -102,24 +102,24 @@
 
 unsigned char arabic_table2[63][4] = {
 	{0x41, 0x41, 0x41, 0x41}, // 0xc1 = hamza
-	{0x42, 0xa1, 0x42, 0x42}, // 0xc2 = ligature madda on alef
-	{0x43, 0xa2, 0x43, 0x43}, // 0xc3 = ligature hamza on alef
-	{0x44, 0xa3, 0x44, 0x44}, // 0xc4 = ligature hamza on waw
-	{0x45, 0xa4, 0x45, 0x45}, // 0xc5 = ligature hamza under alef
-	{0xf9, 0xf9, 0xf8, 0xa0}, // 0xc6 = ligature hamza on ya
-	{0x47, 0xa5, 0xa5, 0xa5}, // 0xc7 = alef
+	{0x42, 0xa1, 0x42, 0xa1}, // 0xc2 = ligature madda on alef
+	{0x43, 0xa2, 0x43, 0xa2}, // 0xc3 = ligature hamza on alef
+	{0x44, 0xa3, 0x44, 0xa3}, // 0xc4 = ligature hamza on waw
+	{0x45, 0xa4, 0x45, 0xa4}, // 0xc5 = ligature hamza under alef
+	{0x46, 0xf9, 0xf8, 0xa0}, // 0xc6 = ligature hamza on ya
+	{0x47, 0xa5, 0x47, 0xa5}, // 0xc7 = alef
 	{0x48, 0xae, 0xac, 0xad}, // 0xc8 = baa
-	{0x49, 0xb1, 0xaf, 0xb0}, // 0xc9 = taa marbuta
+	{0x49, 0xb1, 0x49, 0xb1}, // 0xc9 = taa marbuta
 	{0x4a, 0xb4, 0xb2, 0xb3}, // 0xca = taa
 	{0x4b, 0xb7, 0xb5, 0xb6}, // 0xcb = thaa
 	{0x4c, 0xba, 0xb8, 0xb9}, // 0xcc = jeem
 	{0x4d, 0xbd, 0xbb, 0xbc}, // 0xcd = haa
 	{0x4e, 0xc0, 0xbe, 0xbf}, // 0xce = khaa
-	{0x4f, 0xa6, 0xa6, 0xa6}, // 0xcf = dal
+	{0x4f, 0xa6, 0x4f, 0xa6}, // 0xcf = dal
 
-	{0x50, 0xa7, 0xa7, 0xa7}, // 0xd0 = thal
-	{0x51, 0xa8, 0xa8, 0xa8}, // 0xd1 = ra
-	{0x52, 0xa9, 0xa9, 0xa9}, // 0xd2 = zain
+	{0x50, 0xa7, 0x50, 0xa7}, // 0xd0 = thal
+	{0x51, 0xa8, 0x51, 0xa8}, // 0xd1 = ra
+	{0x52, 0xa9, 0x52, 0xa9}, // 0xd2 = zain
 	{0x53, 0xc3, 0xc1, 0xc2}, // 0xd3 = seen
 	{0x54, 0xc6, 0xc4, 0xc5}, // 0xd4 = sheen
 	{0x55, 0xc9, 0xc7, 0xc8}, // 0xd5 = sad
@@ -142,8 +142,8 @@
 	{0x65, 0xe7, 0xe5, 0xe6}, // 0xe5 = meem
 	{0x66, 0xea, 0xe8, 0xe9}, // 0xe6 = noon
 	{0x67, 0xed, 0xeb, 0xec}, // 0xe7 = ha
-	{0x68, 0xaa, 0xaa, 0xaa}, // 0xe8 = waw
-	{0x69, 0xab, 0xab, 0xab}, // 0xe9 = alef maksura
+	{0x68, 0xaa, 0x68, 0xaa}, // 0xe8 = waw
+	{0x69, 0xab, 0x69, 0xab}, // 0xe9 = alef maksura
 	{0x6a, 0xf0, 0xee, 0xef}, // 0xea = ya
 	{0x6b, 0x6b, 0x6b, 0x6b}, // 0xeb = fathatan
 	{0x6c, 0x6c, 0x6c, 0x6c}, // 0xec = dammatan
@@ -252,6 +252,19 @@
 }
 
 
+// Special Arabic letters are ones that do not get connected from left
+// they are hamza, alef_madda, alef_hamza, waw_hamza, alef_hamza_under, 
+// alef, tah_marbota, dal, thal, rah, zai, wow, alef_maksoura
+
+bool Encodings::is_arabic_special(unsigned char c)
+{
+	return 	(c >= 0xc1 && c <= 0xc5) ||
+		 c == 0xc7 || c  == 0xc9  ||
+		 c == 0xcf || c  == 0xe8  ||
+		(c >= 0xd0 && c <= 0xd2) ||
+		 c == 0xe9;
+}
+
 bool Encodings::IsComposeChar_arabic(unsigned char c)
 {
 	return c >= 0xeb && c <= 0xf2;
--- lyx-1.2.1/src/encoding.h	Thu Mar 21 19:25:09 2002
+++ lyx-1.2.1-Arabic/src/encoding.h	Sat Nov 16 11:28:21 2002
@@ -90,6 +90,9 @@
 	bool IsComposeChar_arabic(unsigned char c);
 	///
 	static
+	bool is_arabic_special(unsigned char c);
+	///
+	static
 	bool is_arabic(unsigned char c);
 	///
 	static
--- lyx-1.2.1/src/text.C	Fri Jul 19 19:46:47 2002
+++ lyx-1.2.1-Arabic/src/text.C	Sat Nov 16 11:51:08 2002
@@ -150,12 +150,14 @@
 		}
 
 	if (Encodings::is_arabic(next_char)) {
-		if (Encodings::is_arabic(prev_char))
+		if (Encodings::is_arabic(prev_char) &&
+			!Encodings::is_arabic_special(prev_char))
 			return Encodings::TransformChar(c, Encodings::FORM_MEDIAL);
 		else
 			return Encodings::TransformChar(c, Encodings::FORM_INITIAL);
 	} else {
-		if (Encodings::is_arabic(prev_char))
+		if (Encodings::is_arabic(prev_char) &&
+			!Encodings::is_arabic_special(prev_char))
 			return Encodings::TransformChar(c, Encodings::FORM_FINAL);
 		else
 			return Encodings::TransformChar(c, Encodings::FORM_ISOLATED);
--- lyx-1.2.1/lib/languages	Wed Jul  3 15:52:51 2002
+++ lyx-1.2.1-Arabic/lib/languages	Sat Nov 16 11:36:41 2002
@@ -1,7 +1,7 @@
 # name      babel name	GUI name	RTL?   encoding	  code	latex options
 afrikaans   afrikaans	"Afrikaans"	false  iso8859-1  af_ZA	 ""
 american    american	"American"	false  iso8859-1  en_US	 ""
-arabic      arabic	"Arabic"	true   iso8859-6  ar_SA	 ""
+arabic      arabic	"Arabic"	true   iso8859-6  ar	 ""
 austrian    austrian	"Austrian"	false  iso8859-1  de_AU	 ""
 bahasa      bahasa	"Bahasa"	false  iso8859-1  in_ID	 ""
 belarusian  belarusian	"Belarusian"	false  cp1251     be	 ""
--- lyx-1.2.1/lib/kbd/arabic.kmap	Mon Jul 17 16:41:20 2000
+++ lyx-1.2.1-Arabic/lib/kbd/arabic.kmap	Mon Apr  1 14:24:35 2002
@@ -3,6 +3,7 @@
 #
 # Generated automatically from kikbd map by Adil Alsaid <alsaid at bigfoot dot com>
 #
+# reviewed and fixed by Isam Bayazidi <bayazidi at arabeyes dot org>, Mohamed Kebdani <kebdani1 at iam dot net dot ma>
 
 \kmap q 
 \kmap w 
@@ -27,7 +28,7 @@
 \kmap x 
 \kmap c 
 \kmap v 
-\kmap b 
+\kmap b 
 \kmap n 
 \kmap m 
 \kmap ; 
@@ -35,44 +36,41 @@
 \kmap "," 
 \kmap . 
 \kmap / 
-\kmap ` ;
+\kmap ` 
 \kmap [ 
 \kmap ] 
 
-\kmap Q 
-\kmap W 
-\kmap E 
-\kmap R 
-#\kmap T 
-\kmap T ~
+# shifted keyboard
+
+\kmap Q ?
+\kmap W ?
+\kmap E ?
+\kmap R ?
+\kmap T 
 \kmap Y 
-#\kmap U ~
-\kmap U 
+\kmap U `
 \kmap I 
-\kmap O 
+\kmap O ?
 \kmap P 
-\kmap A 
-\kmap S 
+\kmap A ?
+\kmap S ?
 \kmap D [
 \kmap F ]
-#\kmap G 
-\kmap G ~
+\kmap G 
 \kmap H 
 \kmap J 
-#\kmap K 
-\kmap K ~
+\kmap K 
 \kmap L /
-\kmap Z 
-\kmap X 
+\kmap Z ~
+\kmap X ?
 \kmap C {
 \kmap V }
-#\kmap B 
-\kmap B ~
+\kmap B 
 \kmap N 
-#\kmap M 
-\kmap M ~
+\kmap M '
 \kmap < ","
 \kmap > .
 \kmap ? 
 \kmap { <
 \kmap } >
+\kmap ~ ?