[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] MPlayer Subtitles Arabic Shaping Support



Hello,
  Attached is a patch to enable Arabic language support.
  Currently, FriBiDI does BiDI, but Arabic also needs shaping.
  Shaping is the process of "connecting" the letters to form new
  shapes depending on the position of a given Arabic letter.

   + A screenshot of MPlayer without the patch:
        http://mhdyousif.host.sk/files/mplayer_without_shaping.jpg

   + A screenshot of MPlayer with the patch:
        http://mhdyousif.host.sk/files/mplayer_with_shaping.jpg

  The patch is, FYI, here too:
    http://mhdyousif.host.sk/files/mplayer_arabic_shaping.patch

  Thanks,,
-- 
Mohammed Yousif
Egypt
? main/shape_arabic.c
? main/shape_arabic.h
Index: main/Makefile
===================================================================
RCS file: /cvsroot/mplayer/main/Makefile,v
retrieving revision 1.287
diff -u -r1.287 Makefile
--- main/Makefile	8 May 2004 17:52:24 -0000	1.287
+++ main/Makefile	2 Jun 2004 22:41:47 -0000
@@ -21,7 +21,7 @@
 DO_MAKE = @ for i in $(SUBDIRS); do $(MAKE) -C $$i $@; done
 endif
 
-SRCS_COMMON = cpudetect.c codec-cfg.c spudec.c playtree.c playtreeparser.c asxparser.c vobsub.c subreader.c sub_cc.c find_sub.c m_config.c m_option.c parser-cfg.c m_struct.c
+SRCS_COMMON = cpudetect.c codec-cfg.c spudec.c playtree.c playtreeparser.c asxparser.c vobsub.c subreader.c sub_cc.c find_sub.c m_config.c m_option.c parser-cfg.c m_struct.c shape_arabic.c
 SRCS_MENCODER = mencoder.c mp_msg-mencoder.c $(SRCS_COMMON) libao2/afmt.c divx4_vbr.c libvo/aclib.c libvo/osd.c libvo/sub.c libvo/font_load.c libvo/font_load_ft.c xvid_vbr.c parser-mecmd.c
 SRCS_MPLAYER = mplayer.c mp_msg.c $(SRCS_COMMON) mixer.c parser-mpcmd.c
 
Index: main/configure
===================================================================
RCS file: /cvsroot/mplayer/main/configure,v
retrieving revision 1.866
diff -u -r1.866 configure
--- main/configure	31 May 2004 15:07:20 -0000	1.866
+++ main/configure	2 Jun 2004 22:42:07 -0000
@@ -178,6 +178,7 @@
   --enable-menu          Enable OSD menu support (NOT DVD MENU) [disabled]
   --disable-sortsub      Disable subtitles sorting [enabled]
   --enable-fribidi       Enable using the FriBiDi libs [disabled]
+  --enable-arabic-shaping Enable Arabic text shaping [disabled]
   --disable-enca         Disable using ENCA charset oracle library [autodetect]
   --disable-macosx       Disable Mac OS X specific features [autodetect]
   --disable-inet6        Disable IPv6 support [autodetect]
@@ -1249,6 +1250,7 @@
 _freetypeconfig='freetype-config'
 _fribidi=no
 _fribidiconfig='fribidi-config'
+_arabic_shaping=no
 _enca=auto
 _inet6=auto
 _gethostbyname2=auto
@@ -1474,6 +1476,9 @@
   --enable-fribidi)     _fribidi=yes    ;;
   --disable-fribidi)    _fribidi=no     ;;
 
+  --enable-arabic-shaping)     _arabic_shaping=yes    ;;
+  --disable-arabic-shaping)    _arabic_shaping=no     ;;
+
   --enable-enca)        _enca=yes    ;;
   --disable-enca)       _enca=no     ;;
 
@@ -4444,6 +4449,13 @@
 fi
 echores "$_fribidi"
 
+echocheck "Arabic shaping"
+if test "$_arabic_shaping" = yes ; then
+    _def_arabic_shaping='#define ENABLE_ARABIC_SHAPING 1'
+else
+_def_arabic_shaping='#undef ENABLE_ARABIC_SHAPING'
+fi
+echores "$_arabic_shaping"
 
 echocheck "ENCA"
 if test "$_enca" = auto ; then
@@ -6546,6 +6558,9 @@
 /* enable FriBiDi usage */
 $_def_fribidi
 
+/* enable Arabic shaping */
+$_def_arabic_shaping
+
 /* enable ENCA usage */
 $_def_enca
 
Index: main/subreader.c
===================================================================
RCS file: /cvsroot/mplayer/main/subreader.c,v
retrieving revision 1.134
diff -u -r1.134 subreader.c
--- main/subreader.c	21 May 2004 16:02:09 -0000	1.134
+++ main/subreader.c	2 Jun 2004 22:42:15 -0000
@@ -33,6 +33,13 @@
 #include <fribidi/fribidi.h>
 char *fribidi_charset = NULL;
 int flip_hebrew = 1;
+
+/* Since shaping is almost useless witout RTL, it makes much sense
+   to only enable shaping if fribidi is enabled */
+#ifdef ENABLE_ARABIC_SHAPING
+#include "shape_arabic.h"
+#endif /* ENABLE_ARABIC_SHAPING */
+
 #endif
 
 extern char* dvdsub_lang;
@@ -1161,6 +1168,12 @@
   int l=sub->lines;
   int char_set_num;
   fribidi_boolean log2vis;
+  
+#ifdef ENABLE_ARABIC_SHAPING
+    FriBidiChar * shaped_text = 0;
+    int i;
+#endif /* ENABLE_ARABIC_SHAPING */
+  
   if(flip_hebrew) { // Please fix the indentation someday
   fribidi_set_mirroring (FRIBIDI_TRUE);
   fribidi_set_reorder_nsm (FRIBIDI_FALSE);
@@ -1179,6 +1192,14 @@
       break;
     }
     len = fribidi_charset_to_unicode (char_set_num, ip, len, logical);
+
+#ifdef ENABLE_ARABIC_SHAPING
+    shaped_text = shape_arabic(logical, len);
+    for (i=0; i<len; i++)
+      logical[i] = shaped_text[i];
+    free(shaped_text);
+#endif /* ENABLE_ARABIC_SHAPING */
+
     base = FRIBIDI_TYPE_ON;
     log2vis = fribidi_log2vis (logical, len, &base,
 			       /* output */
--- /dev/null	1994-07-18 02:46:18.000000000 +0300
+++ main/shape_arabic.h	2004-06-02 23:02:33.000000000 +0300
@@ -0,0 +1,37 @@
+/************************************************************************
+ * $Id$
+ *
+ * ------------
+ * Description:
+ * ------------
+ * shape_arabic.h
+ * A port of Nadim Shaikli's Perl Arabic shaping code
+ *
+ * (C) Copyright 2004 Arabeyes, Mohammed Yousif
+ *
+ * -----------------
+ * Revision Details:    (Updated by Revision Control System)
+ * -----------------
+ *  $Date$
+ *  $Author$
+ *  $Revision$
+ *  $Source$
+ *
+ *  (www.arabeyes.org - under GPL license)
+ *
+ ************************************************************************/
+
+#ifndef SHAPE_ARABIC_H
+#define SHAPE_ARABIC_H
+
+#include <fribidi/fribidi.h>
+
+/**
+ * Shapes an Arabic text chunk
+ *
+ * @param FriBidiChar * (The Arabic text chunk to be shaped in Unicode)
+ * @return FriBidiChar * (The shaped Arabic text chunk in Unicode, the user is resposible for freeing it)
+ */
+FriBidiChar * shape_arabic(FriBidiChar *, int);
+
+#endif /* SHAPE_ARABIC_H */
--- /dev/null	1994-07-18 02:46:18.000000000 +0300
+++ main/shape_arabic.c	2004-06-02 23:51:05.000000000 +0300
@@ -0,0 +1,277 @@
+/************************************************************************
+ * $Id$
+ *
+ * ------------
+ * Description:
+ * ------------
+ * shape_arabic.c
+ * A port of Nadim Shaikli's Perl Arabic shaping code
+ *
+ * (C) Copyright 2004 Arabeyes, Mohammed Yousif
+ *
+ * -----------------
+ * Revision Details:    (Updated by Revision Control System)
+ * -----------------
+ *  $Date$
+ *  $Author$
+ *  $Revision$
+ *  $Source$
+ *
+ *  (www.arabeyes.org - under GPL license)
+ *
+ ************************************************************************/
+
+#include "config.h"
+
+#ifdef ENABLE_ARABIC_SHAPING
+
+#include <fribidi/fribidi.h>
+
+#define NIL 0x0000
+
+#define MAP_LENGTH 37
+#define COMB_MAP_LENGTH 4
+#define TRANS_CHARS_LENGTH 39
+
+typedef struct {
+  FriBidiChar code;
+  FriBidiChar mIsolated;
+  FriBidiChar mInitial;
+  FriBidiChar mMedial;
+  FriBidiChar mFinal;
+} CharRep;
+
+typedef struct {
+  FriBidiChar code[2];
+  FriBidiChar mIsolated;
+  FriBidiChar mInitial;
+  FriBidiChar mMedial;
+  FriBidiChar mFinal;
+} CombCharRep;
+
+
+CharRep charsMap[MAP_LENGTH] =
+  {
+    { 0x0621, 0xFE80, NIL,    NIL,    NIL    },    /* HAMZA */
+    { 0x0622, 0xFE81, NIL,    NIL,    0xFE82 },    /* ALEF_MADDA */
+    { 0x0623, 0xFE83, NIL,    NIL,    0xFE84 },    /* ALEF_HAMZA_ABOVE */
+    { 0x0624, 0xFE85, NIL,    NIL,    0xFE86 },    /* WAW_HAMZA */
+    { 0x0625, 0xFE87, NIL,    NIL,    0xFE88 },    /* ALEF_HAMZA_BELOW */
+    { 0x0626, 0xFE89, 0xFE8B, 0xFE8C, 0xFE8A },    /* YEH_HAMZA */
+    { 0x0627, 0xFE8D, NIL,    NIL,    0xFE8E },    /* ALEF */
+    { 0x0628, 0xFE8F, 0xFE91, 0xFE92, 0xFE90 },    /* BEH */
+    { 0x0629, 0xFE93, NIL   , NIL,    0xFE94 },    /* TEH_MARBUTA */
+    { 0x062A, 0xFE95, 0xFE97, 0xFE98, 0xFE96 },    /* TEH */
+    { 0x062B, 0xFE99, 0xFE9B, 0xFE9C, 0xFE9A },    /* THEH */
+    { 0x062C, 0xFE9D, 0xFE9F, 0xFEA0, 0xFE9E },    /* JEEM */
+    { 0x062D, 0xFEA1, 0xFEA3, 0xFEA4, 0xFEA2 },    /* HAH */
+    { 0x062E, 0xFEA5, 0xFEA7, 0xFEA8, 0xFEA6 },    /* KHAH */
+    { 0x062F, 0xFEA9, NIL,    NIL,    0xFEAA },    /* DAL */
+    { 0x0630, 0xFEAB, NIL,    NIL,    0xFEAC },    /* THAL */
+    { 0x0631, 0xFEAD, NIL,    NIL,    0xFEAE },    /* REH */
+    { 0x0632, 0xFEAF, NIL,    NIL,    0xFEB0 },    /* ZAIN */
+    { 0x0633, 0xFEB1, 0xFEB3, 0xFEB4, 0xFEB2 },    /* SEEN */
+    { 0x0634, 0xFEB5, 0xFEB7, 0xFEB8, 0xFEB6 },    /* SHEEN */
+    { 0x0635, 0xFEB9, 0xFEBB, 0xFEBC, 0xFEBA },    /* SAD */
+    { 0x0636, 0xFEBD, 0xFEBF, 0xFEC0, 0xFEBE },    /* DAD */
+    { 0x0637, 0xFEC1, 0xFEC3, 0xFEC4, 0xFEC2 },    /* TAH */
+    { 0x0638, 0xFEC5, 0xFEC7, 0xFEC8, 0xFEC6 },    /* ZAH */
+    { 0x0639, 0xFEC9, 0xFECB, 0xFECC, 0xFECA },    /* AIN */
+    { 0x063A, 0xFECD, 0xFECF, 0xFED0, 0xFECE },    /* GHAIN */
+    { 0x0640, 0x0640, NIL,    NIL,    NIL },       /* TATWEEL */
+    { 0x0641, 0xFED1, 0xFED3, 0xFED4, 0xFED2 },    /* FEH */
+    { 0x0642, 0xFED5, 0xFED7, 0xFED8, 0xFED6 },    /* QAF */
+    { 0x0643, 0xFED9, 0xFEDB, 0xFEDC, 0xFEDA },    /* KAF */
+    { 0x0644, 0xFEDD, 0xFEDF, 0xFEE0, 0xFEDE },    /* LAM */
+    { 0x0645, 0xFEE1, 0xFEE3, 0xFEE4, 0xFEE2 },    /* MEEM */
+    { 0x0646, 0xFEE5, 0xFEE7, 0xFEE8, 0xFEE6 },    /* NOON */
+    { 0x0647, 0xFEE9, 0xFEEB, 0xFEEC, 0xFEEA },    /* HEH */
+    { 0x0648, 0xFEED, NIL,    NIL,    0xFEEE },    /* WAW */
+    //{ 0x0649, 0xFEEF, 0xFBE8, 0xFBE9, 0xFEF0 },    /* ALEF_MAKSURA */
+    { 0x0649, 0xFEEF, NIL,    NIL, 0xFEF0 },    /* ALEF_MAKSURA */
+    { 0x064A, 0xFEF1, 0xFEF3, 0xFEF4, 0xFEF2 }     /* YEH */
+  };
+
+CombCharRep combCharsMap[COMB_MAP_LENGTH] =
+  {
+    { {0x0644, 0x0622}, 0xFEF5, NIL,    NIL,    0xFEF6 },     /* LAM_ALEF_MADDA */
+    { {0x0644, 0x0623}, 0xFEF7, NIL,    NIL,    0xFEF8 },     /* LAM_ALEF_HAMZA_ABOVE */
+    { {0x0644, 0x0625}, 0xFEF9, NIL,    NIL,    0xFEFA },     /* LAM_ALEF_HAMZA_BELOW */
+    { {0x0644, 0x0627}, 0xFEFB, NIL,    NIL,    0xFEFC }      /* LAM_ALEF */
+  };
+    
+
+FriBidiChar transChars[TRANS_CHARS_LENGTH] =
+  {
+    0x0610,  /* ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM */
+    0x0612,  /* ARABIC SIGN ALAYHE ASSALLAM */
+    0x0613,  /* ARABIC SIGN RADI ALLAHOU ANHU */
+    0x0614,  /* ARABIC SIGN TAKHALLUS */
+    0x0615,  /* ARABIC SMALL HIGH TAH */
+    0x064B,  /* ARABIC FATHATAN */
+    0x064C,  /* ARABIC DAMMATAN */
+    0x064D,  /* ARABIC KASRATAN */
+    0x064E,  /* ARABIC FATHA */
+    0x064F,  /* ARABIC DAMMA */
+    0x0650,  /* ARABIC KASRA */
+    0x0651,  /* ARABIC SHADDA */
+    0x0652,  /* ARABIC SUKUN */
+    0x0653,  /* ARABIC MADDAH ABOVE */
+    0x0654,  /* ARABIC HAMZA ABOVE */
+    0x0655,  /* ARABIC HAMZA BELOW */
+    0x0656,  /* ARABIC SUBSCRIPT ALEF */
+    0x0657,  /* ARABIC INVERTED DAMMA */
+    0x0658,  /* ARABIC MARK NOON GHUNNA */
+    0x0670,  /* ARABIC LETTER SUPERSCRIPT ALEF */
+    0x06D6,  /* ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA */
+    0x06D7,  /* ARABIC SMALL HIGH LIGATURE QAF WITH LAM WITH ALEF MAKSURA */
+    0x06D8,  /* ARABIC SMALL HIGH MEEM INITIAL FORM */
+    0x06D9,  /* ARABIC SMALL HIGH LAM ALEF */
+    0x06DA,  /* ARABIC SMALL HIGH JEEM */
+    0x06DB,  /* ARABIC SMALL HIGH THREE DOTS */
+    0x06DC,  /* ARABIC SMALL HIGH SEEN */
+    0x06DF,  /* ARABIC SMALL HIGH ROUNDED ZERO */
+    0x06E0,  /* ARABIC SMALL HIGH UPRIGHT RECTANGULAR ZERO */
+    0x06E1,  /* ARABIC SMALL HIGH DOTLESS HEAD OF KHAH */
+    0x06E2,  /* ARABIC SMALL HIGH MEEM ISOLATED FORM */
+    0x06E3,  /* ARABIC SMALL LOW SEEN */
+    0x06E4,  /* ARABIC SMALL HIGH MADDA */
+    0x06E7,  /* ARABIC SMALL HIGH YEH */
+    0x06E8,  /* ARABIC SMALL HIGH NOON */
+    0x06EA,  /* ARABIC EMPTY CENTRE LOW STOP */
+    0x06EB,  /* ARABIC EMPTY CENTRE HIGH STOP */
+    0x06EC,  /* ARABIC ROUNDED HIGH STOP WITH FILLED CENTRE */
+    0x06ED   /* ARABIC SMALL LOW MEEM */
+  };
+    
+fribidi_boolean CharacterMapContains(FriBidiChar c)
+{
+  int i = 0;
+  for (i=0; i<MAP_LENGTH; i++) {
+    if (charsMap[i].code == c)
+      return FRIBIDI_TRUE;
+  }
+  return FRIBIDI_FALSE;
+}
+
+CharRep GetCharRep(FriBidiChar c)
+{
+  int i = 0;
+  for (i=0; i<MAP_LENGTH; i++) {
+    if (charsMap[i].code == c)
+      return charsMap[i];
+  }
+  CharRep nilRep = {NIL, NIL, NIL, NIL};
+  return nilRep;
+}
+
+CombCharRep GetCombCharRep(FriBidiChar c1, FriBidiChar c2)
+{
+  int i = 0;
+  for (i=0; i<COMB_MAP_LENGTH; i++) {
+    if (combCharsMap[i].code[0] == c1 && combCharsMap[i].code[1] == c2)
+      return combCharsMap[i];
+  }
+  CombCharRep combNilRep = {{NIL, NIL}, NIL, NIL, NIL};
+  return combNilRep;
+}
+
+fribidi_boolean IsTransparent(FriBidiChar c)
+{
+  int i = 0;
+  for (i=0; i<TRANS_CHARS_LENGTH; i++) {
+    if (transChars[i] == c)
+      return FRIBIDI_TRUE;
+  }
+  return FRIBIDI_FALSE;
+}
+
+FriBidiChar * shape_arabic (FriBidiChar * str, int len)
+{
+  CharRep crep;
+  CombCharRep combcrep;
+  FriBidiChar * shaped = (FriBidiChar *)malloc (sizeof (FriBidiChar) * (len + 1));
+  int writeCount = 0;
+  int i = 0;
+  for (i=0; i<len; i++) {
+    FriBidiChar current = str[i];
+    if (CharacterMapContains(current)) {
+      FriBidiChar prev = NIL;
+      FriBidiChar next = NIL;
+      int prevID = i - 1;
+      int nextID = i + 1;
+      
+      /* 
+        Transparent characters have no effect in the shaping process.
+        So, ignore all the transparent characters that are BEFORE the
+	current character.
+      */
+      for (; prevID >= 0; prevID--)
+	if (!IsTransparent(str[prevID]))
+	  break;
+
+
+      if ( (prevID < 0)  ||  !CharacterMapContains(prev = str[prevID])  ||
+           (  
+	      !((crep = GetCharRep(prev)).mInitial != NIL)  &&
+              !(crep.mMedial != NIL)
+	   )
+	 )
+	prev = NIL;
+	
+      
+      /* 
+        Transparent characters have no effect in the shaping process.
+        So, ignore all the transparent characters that are AFTER the
+	current character.
+      */
+      for (; nextID < len; nextID++)
+	if (!IsTransparent(str[nextID]))
+	  break;
+
+      if ( (nextID >= len)  ||  !CharacterMapContains(next = str[nextID])  ||
+	   (  !((crep = GetCharRep(next)).mMedial != NIL)  &&
+              !((crep = GetCharRep(next)).mFinal != NIL)  &&
+	      (next != 0x0640)
+	   )
+	 )
+	next = NIL;
+
+      /* Combinations */
+      if (current == 0x0644 && next != NIL && (next==0x0622 || next==0x0623 || next==0x0625 || next==0x0627)) {
+	combcrep = GetCombCharRep(current, next);
+	if (prev != NIL) {
+	  shaped[writeCount++] = combcrep.mFinal;
+	} else {
+	  shaped[writeCount++] = combcrep.mIsolated;
+	}
+	i++;
+	continue;
+      }
+
+      crep = GetCharRep(current);
+      
+      /* Medial */
+      if ( prev != NIL  &&  next != NIL && crep.mMedial != NIL ) {
+	shaped[writeCount++] = crep.mMedial;
+	continue;
+	/* Final */
+      } else if ( prev != NIL && crep.mFinal != NIL ) {
+	shaped[writeCount++] = crep.mFinal;
+	continue;
+	/* Initial */
+      } else if (next != NIL && crep.mInitial != NIL ) {
+	shaped[writeCount++] = crep.mInitial;
+	continue;
+      }
+      /* Isolated */
+      shaped[writeCount++] = crep.mIsolated;
+    } else {
+      shaped[writeCount++] = current;
+    }
+  }
+  shaped[writeCount] = NIL;
+  return shaped;
+}
+
+#endif /* ENABLE_ARABIC_SHAPING */