ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x623, AE_MORPH_NEXT, 0x652, AE_MORPH_DIAC_FATHA, AE_MORPH_REST, AE_MORPH_END);
where the 0xXXX are Unicode letters (Ahrof al ziadaa)
and AE_MORPH_XXX are directive like get next letter or previous letter or all but last letters with or without diacritics
#define _GNU_SOURCE /* 1st to get rid of non-GNU, 2nd to have strndup and alloca */
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<stdarg.h>
#include<assert.h>
#include<glib.h>
#define g_unichar_len(x) g_unichar_to_utf8((x),NULL)
#define g_utf8_len(x) (g_utf8_next_char(x)-(x))
enum AE_DIACRITICS {
AE_DIAC_OTHER=1,
AE_FATHATAN=1<<1,AE_DAMMATAN=1<<2, AE_KASRATAN=1<<3,
AE_FATHA=1<<4,AE_DAMMA=1<<5, AE_KASRA=1<<6,
AE_SHADDA=1<<7, AE_SUKUN=1<<8
};
gchar *ae_diacritics(const gchar *t,gunichar *c, gint *result) {
gunichar ch;
int r=0;
gchar *txt=(gchar *)t;
ch=g_utf8_get_char(txt);
*c=ch;
txt=g_utf8_find_next_char(txt,NULL);
ch=g_utf8_get_char(txt);
while(*txt && g_unichar_break_type(ch)==G_UNICODE_BREAK_COMBINING_MARK) {
switch(ch) {
case 0x064B: case 0x064C: case 0x064D:
case 0x064E: case 0x064F: case 0x0650:
case 0x0651: case 0x0652:
r|=1<<(ch-0x064B+1); break;
default:
r|=AE_DIAC_OTHER;
}
txt=g_utf8_find_next_char(txt,NULL);
ch=g_utf8_get_char(txt);
}
*result=r;
return txt;
}
gchar *ae_back_gc(const gchar *t,gunichar *c) {
gunichar ch;
gchar *txt=(gchar *)t;
txt=g_utf8_prev_char(txt);
ch=g_utf8_get_char(txt);
while(*txt && g_unichar_break_type(ch)==G_UNICODE_BREAK_COMBINING_MARK) {
txt=g_utf8_prev_char(txt);
ch=g_utf8_get_char(txt);
}
*c=ch;
return txt;
}
int ae_get_diacritics(gchar *txt) {
gunichar c;
int r;
ae_diacritics(txt,&c, &r);
return r;
}
gint ae_gc_len(const gchar *t) {
gunichar ch;
gint r=0;
gchar *txt=(gchar *)t;
ch=g_utf8_get_char(txt);
while(*txt) {
while (*txt && g_unichar_break_type(ch)==G_UNICODE_BREAK_COMBINING_MARK) {
txt=g_utf8_find_next_char(txt,NULL);
ch=g_utf8_get_char(txt);
}
if (!*txt) return r;
r+=1;
txt=g_utf8_find_next_char(txt,NULL);
ch=g_utf8_get_char(txt);
}
return r;
}
/* expand stress on last char if any */
gchar *ae_last_stress_expand(const gchar *txt) {
gunichar ch;
gint ch_len,gc_len,l;
int diac=0;
gchar *str , *sh, *ptr,*tmp;
l=strlen(txt);
tmp=ae_back_gc(txt+l,&ch);
gc_len=ae_diacritics(tmp,&ch, &diac)-tmp;
ch_len=g_unichar_len(ch);
if (diac&AE_SHADDA) {
assert((str=malloc(l+ch_len+1))!=NULL);
memcpy(str, txt, l-gc_len+ch_len);
ptr=str+l-gc_len+ch_len;
ptr+=g_unichar_to_utf8(0x64E,ptr);
ptr+=g_unichar_to_utf8(ch,ptr);
/* add rest diact without shadda */
sh=g_utf8_strchr(txt+l-gc_len+ch_len,-1,0x0651);
if (sh-tmp-ch_len>0) {
memcpy(ptr, tmp+ch_len, sh-tmp-ch_len);
ptr+=sh-tmp-ch_len;
}
if (tmp+gc_len-sh-2>0) {
memcpy(ptr, sh+2, tmp+gc_len-sh-2);
ptr+=tmp+gc_len-sh-2;
}
*ptr=0;
return str;
} else {
return strndup(txt,l);
}
}
/* the next function expand last stress even if the stress is not on last char */
/*
gchar *ae_last_stress_expand(const gchar *txt) {
gunichar ch;
gint ch_len,gc_len,l,tmp;
int diac=0;
gchar *r,*s , *sh, *ptr;
l=strlen(txt);
sh=g_utf8_strrchr(txt,l,0x651);
if (sh) {
ptr=ae_back_gc(sh,&ch);
gc_len=ae_diacritics(ptr,&ch, &diac)-ptr;
ch_len=g_unichar_len(ch);
assert((s=malloc(l+ch_len+1))!=NULL);
memcpy(s, txt, l);
r=s; s+=ptr-txt+ch_len;
s+=g_unichar_to_utf8(0x64E,s);
s+=g_unichar_to_utf8(ch,s);
if (sh-ptr-ch_len>0) {
memcpy(s, ptr+ch_len, sh-ptr-ch_len);
s+=sh-ptr-ch_len;
}
if (txt+l-sh-2>0) {
memcpy(s, sh+2, txt+l-sh-2);
s+=txt+l-sh-2;
}
*s=0;
return r;
} else {
return strndup(txt,l);
}
}
*/
gchar *ae_destress(const gchar *txt) {
gunichar ch;
gint ch_len,gc_len,s,tmp; /* length of char and grapheme cluster */
int diac=0;
gchar *str=strdup(txt);
gchar *ptr=str,*sh;
s=strlen(txt)+1;
while(*txt) {
gc_len=ae_diacritics(txt,&ch, &diac)-txt;
ch_len=g_unichar_len(ch);
if (diac&AE_SHADDA) {
tmp=ptr-str; s+=ch_len;
assert((str=realloc(str,s))!=NULL);
ptr=str+tmp;
ptr+=g_unichar_to_utf8(ch,ptr);
*ptr++='\331'; *ptr++='\222';
ptr+=g_unichar_to_utf8(ch,ptr);
/* add rest diact without shadda */
sh=g_utf8_strchr(txt,-1,0x0651);
if (sh-txt-ch_len>0) {
memcpy(ptr, txt+ch_len, sh-txt-ch_len);
ptr+=sh-txt-ch_len;
}
if (txt+gc_len-sh-2>0) {
memcpy(ptr, sh+2, txt+gc_len-sh-2);
ptr+=txt+gc_len-sh-2;
}
txt+=gc_len;
} else {
if (ptr!=txt) memcpy(ptr, txt, gc_len);
txt+=gc_len;
ptr+=gc_len;
}
}
*ptr=0;
return str;
}
/* backup */
gchar *ae_destress_working(const gchar *txt) {
gunichar ch;
gint ch_len,gc_len,s,tmp; /* length of char and grapheme cluster */
int diac=0;
gchar *str=strdup(txt);
gchar *ptr=str,*sh;
s=strlen(txt)+1;
while(*txt) {
ch_len=g_utf8_find_next_char(txt,NULL)-txt;
gc_len=ae_diacritics(txt,&ch, &diac)-txt;
if (diac&AE_SHADDA) {
tmp=ptr-str; s+=ch_len;
assert((str=realloc(str,s))!=NULL);
ptr=str+tmp;
memcpy(ptr, txt, ch_len); ptr+=ch_len;
*ptr++='\331'; *ptr++='\222';
memcpy(ptr, txt, ch_len); ptr+=ch_len;
/* add rest diact without shadda */
sh=g_utf8_strchr(txt,-1,0x0651);
if (sh-txt-ch_len>0) {
memcpy(ptr, txt+ch_len, sh-txt-ch_len);
ptr+=sh-txt-ch_len;
}
if (txt+gc_len-sh-2>0) {
memcpy(ptr, sh+2, txt+gc_len-sh-2);
ptr+=txt+gc_len-sh-2;
}
txt+=gc_len;
} else {
if (ptr!=txt) memcpy(ptr, txt, gc_len);
txt+=gc_len;
ptr+=gc_len;
}
}
*ptr=0;
return str;
}
gchar *ae_restress_(gchar *txt) {
gunichar ch;
gint ch_len,gc_len; /* length of char and grapheme cluster */
int diac=0;
gchar *str=txt,*ptr=txt,*nxt;
while(*txt) {
ch_len=g_utf8_len(txt);
gc_len=ae_diacritics(txt,&ch, &diac)-txt;
if ((diac&AE_SUKUN) && (g_utf8_get_char(txt+gc_len)==ch) ) {
if (ptr!=txt) memmove(ptr, txt, ch_len); /* copy char alone */
txt+=gc_len;
ptr+=ch_len;
*ptr++='\331'; *ptr++='\221'; /* safe because sukun len == shadda len */
// ptr+=2;
nxt=ae_diacritics(txt,&ch, &diac);
gc_len=nxt-txt;
/* *ptr=0;
printf("/%s:ـ%s/\n",str,txt+ch_len); */
if (ptr!=txt+ch_len && gc_len-ch_len) memmove(ptr, txt+ch_len, gc_len-ch_len); /* copy after shadda */
txt=nxt;
ptr+=gc_len-ch_len;
} else {
if (ptr!=txt) memmove(ptr, txt, gc_len);
txt+=gc_len;
ptr+=gc_len;
}
}
*ptr=0;
return str;
}
/* TODO: rewrite destress and restress as recarsion and benckmark */
enum AE_MORPH_MODES { AE_MORPH_END=0, AE_MORPH_DIAC_ON=-1 , AE_MORPH_DIAC_OFF=-2,
AE_MORPH_DIAC_KASRA=-3, AE_MORPH_DIAC_DAMMA=-4, AE_MORPH_DIAC_FATHA=-5,
AE_MORPH_SKIP=-6, AE_MORPH_NEXT=-7, AE_MORPH_GET_NEXT=-8,
/* AE_MORPH_BACK=-6, AE_MORPH_PREV=-7, AE_MORPH_GET_PREV=-8, */
AE_MORPH_REST=-9, AE_MORPH_REST_1=-10, AE_MORPH_REST_2=-11, /* rest but 1, rest but 2*/
AE_MORPH_MAX=-12
};
#define AE_MORPH_NUM(x) (AE_MORPH_MAX-(x))
#define AE_MORPH_GET_NUM(x) (AE_MORPH_NUM(0)-(x))
/* TODO: size is not used!! */
gchar *ae_morph(gchar *root,gchar *buff,gint size,...) {
gchar *ptr=buff,*txt=root,*s1,*s2,s3;
int op,l,ll,diac,diac_st=1; /* 0:no diac,1:diac on*/
gunichar ch;
va_list ap;
va_start(ap, size);
while((op = va_arg(ap, gunichar))!=AE_MORPH_END) {
/* printf("<%d>\n",op); */
if (op>0) {
ae_diacritics(txt,&ch, &diac);
/* if (op==0x671 && (ch==0x671 || ch==0x622 || ch==0x623 || ch==0x625 || 0x672 || 0x673) ) continue; */
ptr+=g_unichar_to_utf8(op,ptr);
} else switch(op) {
case AE_MORPH_DIAC_ON: diac_st=1; break;
case AE_MORPH_DIAC_OFF: diac_st=0; break;
case AE_MORPH_DIAC_FATHA:
case AE_MORPH_DIAC_DAMMA:
case AE_MORPH_DIAC_KASRA:
diac_st=op; break;
case AE_MORPH_SKIP:
txt=ae_diacritics(txt,&ch, &diac);
break;
case AE_MORPH_NEXT:
case AE_MORPH_GET_NEXT:
if (diac_st==1) {
l=ae_diacritics(txt,&ch, &diac)-txt;
memcpy(ptr,txt,l);
ptr+=l;
if (op==AE_MORPH_NEXT) txt+=l;
} else {
ll=ae_diacritics(txt,&ch, &diac)-txt;
l=g_unichar_to_utf8(ch,NULL);
memcpy(ptr,txt,l);
ptr+=l;
if (op==AE_MORPH_NEXT) txt+=ll;
if (diac_st) ptr+= g_unichar_to_utf8( 0x64E + diac_st - AE_MORPH_DIAC_FATHA , ptr);
}
break;
case AE_MORPH_REST_1:
case AE_MORPH_REST_2:
case AE_MORPH_REST:
if (diac_st==1) {
*ptr=0;
l=strlen(txt);
strcpy(ptr,txt);
txt+=l;
ptr+=l;
} else {
while(*txt) {
txt=ae_diacritics(txt,&ch, &diac);
ptr+=g_unichar_to_utf8(ch,ptr);
if (diac_st) ptr+= g_unichar_to_utf8( 0x64E + diac_st - AE_MORPH_DIAC_FATHA , ptr);
}
}
if (op!=AE_MORPH_REST) {
txt=ae_back_gc(txt,&ch);
*(ptr=ae_back_gc(ptr,&ch))=0;
}
if (op==AE_MORPH_REST_2) {
txt=ae_back_gc(txt,&ch);
*(ptr=ae_back_gc(ptr,&ch))=0;
}
break;
default:
/* TODO: */
printf("not implemented-d\n");
break;
} /* END switch */
/* printf(":\n"); */
} /* END WHILE */
*ptr=0;
va_end(ap);
return buff;
}
/* ae_filter: convert it's input to spell-format (mainely deal with alef and hamza) */
gchar *ae_filter_(gchar *txt) {
/*
+ الألف اللينة لا يرسم عليها حركات
+ فإن تحركت بغير الفتح الضمني (بما في ذلك الشدة) تعود إلى أصلها إما واو أو ياء
+ ألف لينة متبوعة بألف لينة تعاد الثانية لأصلها (سار - فاعل - ساير)
+ إذا لم يكن أصلها واو أو ياء فهي همزة
+ همزة متبوعة بألف لينة تصبح مدة إملائياً
+ همزة الوصل توضع لمنع الابتداء بساكن
*/
gunichar ch,old_ch=0;
gint ch_len,gc_len; /* length of char and grapheme cluster */
int diac=0;
gchar *str=txt,*ptr=txt,*nxt;
while(*txt) {
ch_len=g_utf8_len(txt);
gc_len=ae_diacritics(txt,&ch, &diac)-txt;
switch(ch) {
case 0x672: /* ألف أصلها واو */
if (diac!=AE_FATHA || old_ch==0x627) {
*ptr++='\331'; *ptr++='\210'; /* safe because ...etc*/
if (ptr!=txt) memmove(ptr, txt+ch_len, gc_len-ch_len);
} else {*ptr++='\330'; *ptr++='\247';}
ptr+=gc_len-ch_len;
break;
case 0x673: /* ألف أصلها ياء */
if (diac!=AE_FATHA || old_ch==0x627) {
*ptr++='\331'; *ptr++='\212'; /* safe because ...etc*/
if (ptr!=txt) memmove(ptr, txt+ch_len, gc_len-ch_len);
} else {*ptr++='\330'; *ptr++='\247';}
ptr+=gc_len-ch_len;
break;
default:
ptr+=gc_len;
}
txt+=gc_len;
old_ch=ch;
}
return str;
}
/*
* sub_roots_sp: the returned list in almost spell-format
* which needs to be filtered to be in spell-format
*/
GList *sub_roots_sp(const gchar *root_str) {
/* TODO: ضبط عين الفعل */
/* TODO: التعامل مع الهمزات وحروف العلة والإملاء */
GList *ls = NULL;
gchar *base_root, *root,*buff,*str,*s1,*s2,*s3;
gint s,size;
gunichar ch;
gint diac;
root=ae_destress(root_str);
s=strlen(root)+1; size=s+16;
buff=(gchar *)malloc(size);
base_root=strndup(root_str,size-1);
ae_restress_(base_root);
ls = g_list_append (ls, base_root);
/* أفعل */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x623, AE_MORPH_NEXT, 0x652, AE_MORPH_DIAC_FATHA, AE_MORPH_REST,
AE_MORPH_END)),size-1));
/* فاعل */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_NEXT, AE_MORPH_DIAC_FATHA,
0x627, AE_MORPH_REST,
AE_MORPH_END)),size-1));
/* فعّل */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_NEXT, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,
AE_MORPH_DIAC_ON, 0x651, 0x64E, AE_MORPH_REST,
AE_MORPH_END)),size-1));
/* انفعل */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_FATHA, 0x671, 0x646, 0x652,AE_MORPH_REST,
AE_MORPH_END)),size-1));
/* افعلّ */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_NEXT, 0x652, AE_MORPH_DIAC_FATHA,
AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x651,0x64E,
AE_MORPH_END)),size-1));
/* تفعّل */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_ON, 0x62A, 0x64E, AE_MORPH_NEXT, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,
AE_MORPH_DIAC_ON, 0x651, 0x64E, AE_MORPH_REST,
AE_MORPH_END)),size-1));
/* تفاعل */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT, 0x64E,
0x627, AE_MORPH_DIAC_FATHA, AE_MORPH_REST,
AE_MORPH_END)),size-1));
/* استفعل */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_ON, 0x671, 0x633, 0x652, 0x62A, 0x64E,
AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x652 , AE_MORPH_NEXT, 0x64E,
AE_MORPH_DIAC_ON, AE_MORPH_REST,
AE_MORPH_END)),size-1));
/* افعوعل */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_NEXT, 0x652, AE_MORPH_GET_NEXT, 0x64E,
0x648, 0x652, AE_MORPH_NEXT, 0x64E, AE_MORPH_DIAC_ON, AE_MORPH_REST,
AE_MORPH_END)),size-1));
ae_diacritics(root,&ch, &diac);
/* TODO: ضبط عين الفعل */
/* افتعل */
switch(ch) {
case 0x648: case 0x64A:
/* اتعل */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_SKIP, 0x62A, 0x64E,
AE_MORPH_DIAC_ON, AE_MORPH_REST,
AE_MORPH_END)),size-1));
break;
case 0x630:
/* افدعل */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_SKIP, 0x62F , 0x652, 0x62F, 0x64E,
AE_MORPH_DIAC_ON, AE_MORPH_REST,
AE_MORPH_END)),size-1));
case 0x62F: case 0x632:
/* افدعل */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_NEXT, 0x652, 0x62F, 0x64E,
AE_MORPH_DIAC_ON, AE_MORPH_REST,
AE_MORPH_END)),size-1));
break;
case 0x635: case 0x636: case 0x637: case 0x638:
/* افطعل */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_NEXT, 0x652, 0x637, 0x64E,
AE_MORPH_DIAC_ON, AE_MORPH_REST,
AE_MORPH_END)),size-1));
break;
default:
/* افتعل */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_NEXT, 0x652, 0x62A, 0x64E,
AE_MORPH_DIAC_FATHA, AE_MORPH_REST,
AE_MORPH_END)),size-1));
}
free(buff);
free(root);
return ls;
}
GList *sub_roots_attach_pronoun(const gchar *root_str) {
GList *ls = NULL;
gchar *base_root, *root, *expanded;
gchar *buff,*str,*s1,*s2,*s3;
gint s,size;
gunichar ch;
gint diac;
root=ae_destress(root_str);
expanded=ae_last_stress_expand(root_str); /* use restress */
s=strlen(root)+1; size=s+16;
buff=(gchar *)malloc(size);
/* هو فعل */
base_root=strndup(root_str,size-1);
ae_restress_(base_root);
ls = g_list_append (ls, base_root);
/* فعلْ ((تُ)|(نا)|(تَ)|(تما)|(تم)|(تِ)|(تنّ)|(ن)) */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, 0x62A, 0x64F,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, 0x646, 0x64E, 0x627,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, 0x62A, 0x64E,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, 0x62A, 0x64F, 0x645,0x64E, 0x627,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, 0x62A, 0x64F, 0x645,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, 0x62A, 0x650,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, 0x62A, 0x64F, 0x646, 0x651, 0x64E,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, 0x646, 0x64E,
AE_MORPH_END)),size-1));
/* هما/هما/هن فعلَ ( (ا)|(تْ)|(تا) ) */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x64E, 0x627,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x64E, 0x62A, 0x64E, 0x627,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x64E, 0x62A, 0x652,
AE_MORPH_END)),size-1));
/* هم فعلُ (وا) */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x64f, 0x648, 0x652,0x627,
AE_MORPH_END)),size-1));
/* المضارع */
/* (ي|ت)فعلُ */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x64A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
AE_MORPH_REST_1,AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x64F,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
AE_MORPH_REST_1,AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x64F,
AE_MORPH_END)),size-1));
/* (أ|ن)فعلُ */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x623, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x64F,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x646, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x64F,
AE_MORPH_END)),size-1));
/* (ي|ت)فعلْ ( (ان)|(ون) ) */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x64A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652, 0x627,0x646,0x650,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
AE_MORPH_REST_1,AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652, 0x627,0x646,0x650,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x64A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
AE_MORPH_REST_1,AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652, 0x648,0x646,0x64E,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652, 0x648,0x646,0x64E,
AE_MORPH_END)),size-1));
/* تفعلِ (ين) */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x650, 0x64A,0x652, 0x646,0x64E,
AE_MORPH_END)),size-1));
/* ((ت|(ي))فعلْ (ن) */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_OFF, 0x64A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652, 0x646,0x64E,
AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
AE_MORPH_REST_1,AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652, 0x646,0x64E,
AE_MORPH_END)),size-1));
/* الأمر */
/* افعل ((ا)|(وا)|(ي)|(ن))؟ */
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, 0x671, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, 0x671, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, 0x627, AE_MORPH_END)),size-1));
ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
AE_MORPH_DIAC_ON, 0x671, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
AE_MORPH_NEXT, 0x652, 0x648, 0x64F,0x627,
AE_MORPH_END)),size-1));
free(buff);
free(expanded);
free(root);
return ls;
}
/* test */
int main() {
gchar str[]="جدَّ";
gchar *s;
GList *ls,*l2;
ls=sub_roots_sp("جدَّ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("دَرَسَ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("سَهُلَ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("قَبُحَ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("قَبِلَ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("وَصَلَ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("قَبِلَ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("ذَكَرَ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("ضَرَبَ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("دَحْرَجَ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("زَلْزَلَ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("بَعْثَرَ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("وَعَدَ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("نَسِيَ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("قَٲَلَ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",
ae_filter_((gchar *)ls->data));
ls=g_list_next(ls);
}
ls=sub_roots_sp("سَٳَرَ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",
ae_filter_((gchar *)ls->data));
ls=g_list_next(ls);
}
ls=sub_roots_sp("وَشَى");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("أَمَرَ");
ls=g_list_first(ls);
while(ls) {
printf("[%s]\n",(gchar *)ls->data);
ls=g_list_next(ls);
}
ls=sub_roots_sp("جَدَّ");
ls=g_list_first(ls);
printf("--------\n\tاسناد [%s] للضمائر\n",(gchar *)ls->data);
while(ls) {
printf("--------\n\tاسناد [%s] للضمائر\n",(gchar *)ls->data);
l2=sub_roots_attach_pronoun((gchar *)ls->data);
l2=g_list_first(l2);
while(l2) {
printf("[%s]\n",(gchar *)l2->data);
l2=g_list_next(l2);
}
ls=g_list_next(ls);
}
ls=sub_roots_sp("دَرَسَ");
ls=g_list_first(ls);
printf("--------\n\tاسناد [%s] للضمائر\n",(gchar *)ls->data);
while(ls) {
printf("--------\n\tاسناد [%s] للضمائر\n",(gchar *)ls->data);
l2=sub_roots_attach_pronoun((gchar *)ls->data);
l2=g_list_first(l2);
while(l2) {
printf("[%s]\n",(gchar *)l2->data);
l2=g_list_next(l2);
}
ls=g_list_next(ls);
}
}