[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Nadim's shaping code in C



Salam all,
This is a *port* of Nadim's shaping code to C
I wrote this a while ago, promissed Nadim to clean it, But i didn't find 
time till now.

Maybe it's not perfect or required, But maybe anyone'll make use of it.

I know that glib is required but i'll try and get rid of this 
dependency.

-- 
----------------
-- Katoob Main Developer
Linux registered user # 224950
ICQ # 58475622
FIRST make it run, THEN make it run fast "Brian Kernighan".
--
Don't send me any attachment in Micro$oft (.DOC, .PPT) format please
Read http://www.fsf.org/philosophy/no-word-attachments.html
Preferable attachments: .PDF, .HTML, .TXT
Thanx for adding this text to Your signature
/* Arabic shaping code.
 * Copyright (c) 2003 Arabeyes, Mohammed Sameer.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

/*
 * I can say that this is a direct transltion for Nadim's shape_arabic.pl script
 * All thanks to be directed to him, All blames to be directed to me.
 */

/* $ gcc -o do_shaping do_shaping.c `pkg-config glib-2.0 --cflags --libs` */

/*
 * TODO:
 * Use a static buffer somehow to avoid malloc/free.
 * Try to improve it a bit.
 * Any ideas ?
 */

#include <stdio.h>
#include <glib.h>

#define DEBUG(x) fprintf(stderr, "%s\n", x)

typedef struct
{
  gboolean junk;
  gunichar isolated;
  gunichar initial;
  gunichar medial;
  gunichar final;
}
char_node;

/* *INDENT-OFF* */
char_node shaping_table[] = {
/* 0x621 */  { FALSE, 0xFE80, 0x0000, 0x0000, 0x0000},
/* 0x622 */  { FALSE, 0xFE81, 0x0000, 0x0000, 0xFE82},
/* 0x623 */  { FALSE, 0xFE83, 0x0000, 0x0000, 0xFE84},
/* 0x624 */  { FALSE, 0xFE85, 0x0000, 0x0000, 0xFE86},
/* 0x625 */  { FALSE, 0xFE87, 0x0000, 0x0000, 0xFE88},
/* 0x626 */  { FALSE, 0xFE89, 0xFE8B, 0xFE8C, 0xFE8A},
/* 0x627 */  { FALSE, 0xFE8D, 0x0000, 0x0000, 0xFE8E},
/* 0x628 */  { FALSE, 0xFE8F, 0xFE91, 0xFE92, 0xFE90},
/* 0x629 */  { FALSE, 0xFE93, 0x0000, 0x0000, 0xFE94},
/* 0x62A */  { FALSE, 0xFE95, 0xFE97, 0xFE98, 0xFE96},
/* 0x62B */  { FALSE, 0xFE99, 0xFE9B, 0xFE9C, 0xFE9A},
/* 0x62C */  { FALSE, 0xFE9D, 0xFE9F, 0xFEA0, 0xFE9E},
/* 0x62D */  { FALSE, 0xFEA1, 0xFEA3, 0xFEA4, 0xFEA2},
/* 0x62E */  { FALSE, 0xFEA5, 0xFEA7, 0xFEA8, 0xFEA6},
/* 0x62F */  { FALSE, 0xFEA9, 0x0000, 0x0000, 0xFEAA},
/* 0x630 */  { FALSE, 0xFEAB, 0x0000, 0x0000, 0xFEAC},
/* 0x631 */  { FALSE, 0xFEAD, 0x0000, 0x0000, 0xFEAE},
/* 0x632 */  { FALSE, 0xFEAF, 0x0000, 0x0000, 0xFEB0},
/* 0x633 */  { FALSE, 0xFEB1, 0xFEB3, 0xFEB4, 0xFEB2},
/* 0x634 */  { FALSE, 0xFEB5, 0xFEB7, 0xFEB8, 0xFEB6},
/* 0x635 */  { FALSE, 0xFEB9, 0xFEBB, 0xFEBC, 0xFEBA},
/* 0x636 */  { FALSE, 0xFEBD, 0xFEBF, 0xFEC0, 0xFEBE},
/* 0x637 */  { FALSE, 0xFEC1, 0xFEC3, 0xFEC4, 0xFEC2},
/* 0x638 */  { FALSE, 0xFEC5, 0xFEC7, 0xFEC8, 0xFEC6},
/* 0x639 */  { FALSE, 0xFEC9, 0xFECB, 0xFECC, 0xFECA},
/* 0x63A */  { FALSE, 0xFECD, 0xFECF, 0xFED0, 0xFECE},
/* 0x63B */  { TRUE , 0x0000, 0x0000, 0x0000, 0x0000},
/* 0x63C */  { TRUE , 0x0000, 0x0000, 0x0000, 0x0000},
/* 0x63D */  { TRUE , 0x0000, 0x0000, 0x0000, 0x0000},
/* 0x63E */  { TRUE , 0x0000, 0x0000, 0x0000, 0x0000},
/* 0x63F */  { TRUE , 0x0000, 0x0000, 0x0000, 0x0000},
/* 0x640 */  { FALSE, 0x0640, 0x0000, 0x0000, 0x0000},
/* 0x641 */  { FALSE, 0xFED1, 0xFED3, 0xFED4, 0xFED2},
/* 0x642 */  { FALSE, 0xFED5, 0xFED7, 0xFED8, 0xFED6},
/* 0x643 */  { FALSE, 0xFED9, 0xFEDB, 0xFEDC, 0xFEDA},
/* 0x644 */  { FALSE, 0xFEDD, 0xFEDF, 0xFEE0, 0xFEDE},
/* 0x645 */  { FALSE, 0xFEE1, 0xFEE3, 0xFEE4, 0xFEE2},
/* 0x646 */  { FALSE, 0xFEE5, 0xFEE7, 0xFEE8, 0xFEE6},
/* 0x647 */  { FALSE, 0xFEE9, 0xFEEB, 0xFEEC, 0xFEEA},
/* 0x648 */  { FALSE, 0xFEED, 0x0000, 0x0000, 0xFEEE},
/* 0x649 */  { FALSE, 0xFEEF, 0xFBE8, 0xFBE9, 0xFEF0},
/* 0x64A */  { FALSE, 0xFEF1, 0xFEF3, 0xFEF4, 0xFEF2}
};
/* *INDENT-ON* */

int
main (int argc, char *argv[])
{
  FILE *fl;
  gchar buff[1024];
  glong len;

  if (!argv[1])
    {
      fprintf (stderr, "You didn't supply a file\n");
      return 1;
    }

  fl = fopen (argv[1], "r");
  if (!fl)
    {
      fprintf (stderr, "Can't open file\n");
      return 1;
    }
  while (fgets (buff, 1024, fl))
    {
      gint y;
      gunichar *ucs = NULL, *_ucs = NULL;
      gchar *utf = NULL;
      gint x = 0;		//strlen (buff);

      ucs = g_utf8_to_ucs4_fast (buff, -1, &len);
      _ucs = g_malloc (sizeof (gunichar) * (len + 1));
      _ucs[len] = 0;

      for (y = 0; y < len; y++)
	{
	  gboolean have_previous = TRUE, have_next = TRUE;
/* If it's not in our range, skip it. */
	  if ((ucs[y] < 0x621) || (ucs[y] > 0x64A))
	    {
	      DEBUG ("Character not in range");
	      _ucs[x++] = ucs[y];
	      continue;
	    }

/* The character wasn't included in the unicode shaping table. */
	  if (shaping_table[(ucs[y] - 0x621)].junk)
	    {
	      DEBUG ("Junk character");
	      _ucs[x++] = ucs[y];
	      continue;
	    }

	  if (((ucs[y - 1] < 0x621) || (ucs[y - 1] > 0x64A)) ||
	      (!(shaping_table[(ucs[y - 1] - 0x621)].initial) &&
	       !(shaping_table[(ucs[y - 1] - 0x621)].medial)))
	    {
	      DEBUG ("No previous");
	      have_previous = FALSE;
	    }

	  if (((ucs[y + 1] < 0x621) || (ucs[y + 1] > 0x64A)) ||
	      (!(shaping_table[(ucs[y + 1] - 0x621)].medial) &&
	       !(shaping_table[(ucs[y + 1] - 0x621)].final)
	       && (ucs[y + 1] != 0x640)))
	    {
	      DEBUG ("No next\n");
	      have_next = FALSE;
	    }

	  if (ucs[y] == 0x644)
	    {
	      if (have_next)
		{
		  if ((ucs[y + 1] == 0x622) ||
		      (ucs[y + 1] == 0x623) ||
		      (ucs[y + 1] == 0x625) || (ucs[y + 1] == 0x627))
		    {
		      if (have_previous)
			{
			  if (ucs[y + 1] == 0x622)
			    {
			      _ucs[x++] = 0xFEF6;
			    }
			  else if (ucs[y + 1] == 0x623)
			    {
			      _ucs[x++] = 0xFEF8;
			    }
			  else if (ucs[y + 1] == 0x625)
			    {
			      _ucs[x++] = 0xFEFA;
			    }
			  else
			    {
/* ucs[y+1] = 0x627 */
			      _ucs[x++] = 0xFEFC;
			    }
			}

		      else
			{
			  if (ucs[y + 1] == 0x622)
			    {
			      _ucs[x++] = 0xFEF5;
			    }
			  else if (ucs[y + 1] == 0x623)
			    {
			      _ucs[x++] = 0xFEF7;
			    }
			  else if (ucs[y + 1] == 0x625)
			    {
			      _ucs[x++] = 0xFEF9;
			    }
			  else
			    {
/* ucs[y+1] = 0x627 */
			      _ucs[x++] = 0xFEFB;
			    }
			}
		      y++;
		      continue;
		    }
		}
	    }

/* Medial */
	  if ((have_previous) && (have_next)
	      && (shaping_table[(ucs[y] - 0x621)].medial))
	    {
	      DEBUG ("Medial condition");
/*            if (shaping_table[(ucs[y] - 0x621)].medial)
              { */
	      _ucs[x++] = shaping_table[(ucs[y] - 0x621)].medial;
/*              }
            else
              {
                _ucs[y] = ucs[y];
              } */
	      continue;
	    }

/* Final */
	  else if ((have_previous) && (shaping_table[(ucs[y] - 0x621)].final))
	    {
	      DEBUG ("Previous condition");
	      _ucs[x++] = shaping_table[(ucs[y] - 0x621)].final;
	      continue;
	    }

/* Initial */
	  else if ((have_next) && (shaping_table[(ucs[y] - 0x621)].initial))
	    {
	      DEBUG ("Next condition");
	      _ucs[x++] = shaping_table[(ucs[y] - 0x621)].initial;
	      continue;
	    }

/* Isolated */
	  else
	    {
	      if (shaping_table[(ucs[y] - 0x621)].isolated)
		{
		  _ucs[x++] = shaping_table[(ucs[y] - 0x621)].isolated;
		}
	      else
		{
		  _ucs[x++] = ucs[y];
		}
	      continue;
	    }
	}
/*
{
int x = 0;
char sz[32];
for (x = 0; x < len; x++)
{
printf("%i\n", x);
sprintf(sz, "0x%.4X", ucs[x]);
fprintf(stderr, "%s\t", sz);
sprintf(sz, "0x%.4X", _ucs[x]);
fprintf(stderr, "%s\n", sz);
}
}
*/
      _ucs[x] = 0x0;
      g_free (ucs);
      utf = g_ucs4_to_utf8 (_ucs, len, NULL, NULL, NULL);

      printf ("%s", utf);
      g_free (_ucs);
      g_free (utf);
    }

  fclose (fl);

  return 0;
}

Attachment: pgp00000.pgp
Description: PGP signature