ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x623, AE_MORPH_NEXT, 0x652, AE_MORPH_DIAC_FATHA, AE_MORPH_REST, AE_MORPH_END);
where the 0xXXX are Unicode letters (Ahrof al ziadaa)
and AE_MORPH_XXX are directive like get next letter or previous letter or all but last letters with or without diacritics
#define _GNU_SOURCE /* 1st to get rid of non-GNU, 2nd to have strndup and alloca */ #include<stdio.h> #include<stdlib.h> #include<string.h> #include<stdarg.h> #include<assert.h> #include<glib.h> #define g_unichar_len(x) g_unichar_to_utf8((x),NULL) #define g_utf8_len(x) (g_utf8_next_char(x)-(x)) enum AE_DIACRITICS { AE_DIAC_OTHER=1, AE_FATHATAN=1<<1,AE_DAMMATAN=1<<2, AE_KASRATAN=1<<3, AE_FATHA=1<<4,AE_DAMMA=1<<5, AE_KASRA=1<<6, AE_SHADDA=1<<7, AE_SUKUN=1<<8 }; gchar *ae_diacritics(const gchar *t,gunichar *c, gint *result) { gunichar ch; int r=0; gchar *txt=(gchar *)t; ch=g_utf8_get_char(txt); *c=ch; txt=g_utf8_find_next_char(txt,NULL); ch=g_utf8_get_char(txt); while(*txt && g_unichar_break_type(ch)==G_UNICODE_BREAK_COMBINING_MARK) { switch(ch) { case 0x064B: case 0x064C: case 0x064D: case 0x064E: case 0x064F: case 0x0650: case 0x0651: case 0x0652: r|=1<<(ch-0x064B+1); break; default: r|=AE_DIAC_OTHER; } txt=g_utf8_find_next_char(txt,NULL); ch=g_utf8_get_char(txt); } *result=r; return txt; } gchar *ae_back_gc(const gchar *t,gunichar *c) { gunichar ch; gchar *txt=(gchar *)t; txt=g_utf8_prev_char(txt); ch=g_utf8_get_char(txt); while(*txt && g_unichar_break_type(ch)==G_UNICODE_BREAK_COMBINING_MARK) { txt=g_utf8_prev_char(txt); ch=g_utf8_get_char(txt); } *c=ch; return txt; } int ae_get_diacritics(gchar *txt) { gunichar c; int r; ae_diacritics(txt,&c, &r); return r; } gint ae_gc_len(const gchar *t) { gunichar ch; gint r=0; gchar *txt=(gchar *)t; ch=g_utf8_get_char(txt); while(*txt) { while (*txt && g_unichar_break_type(ch)==G_UNICODE_BREAK_COMBINING_MARK) { txt=g_utf8_find_next_char(txt,NULL); ch=g_utf8_get_char(txt); } if (!*txt) return r; r+=1; txt=g_utf8_find_next_char(txt,NULL); ch=g_utf8_get_char(txt); } return r; } /* expand stress on last char if any */ gchar *ae_last_stress_expand(const gchar *txt) { gunichar ch; gint ch_len,gc_len,l; int diac=0; gchar *str , *sh, *ptr,*tmp; l=strlen(txt); tmp=ae_back_gc(txt+l,&ch); gc_len=ae_diacritics(tmp,&ch, &diac)-tmp; ch_len=g_unichar_len(ch); if (diac&AE_SHADDA) { assert((str=malloc(l+ch_len+1))!=NULL); memcpy(str, txt, l-gc_len+ch_len); ptr=str+l-gc_len+ch_len; ptr+=g_unichar_to_utf8(0x64E,ptr); ptr+=g_unichar_to_utf8(ch,ptr); /* add rest diact without shadda */ sh=g_utf8_strchr(txt+l-gc_len+ch_len,-1,0x0651); if (sh-tmp-ch_len>0) { memcpy(ptr, tmp+ch_len, sh-tmp-ch_len); ptr+=sh-tmp-ch_len; } if (tmp+gc_len-sh-2>0) { memcpy(ptr, sh+2, tmp+gc_len-sh-2); ptr+=tmp+gc_len-sh-2; } *ptr=0; return str; } else { return strndup(txt,l); } } /* the next function expand last stress even if the stress is not on last char */ /* gchar *ae_last_stress_expand(const gchar *txt) { gunichar ch; gint ch_len,gc_len,l,tmp; int diac=0; gchar *r,*s , *sh, *ptr; l=strlen(txt); sh=g_utf8_strrchr(txt,l,0x651); if (sh) { ptr=ae_back_gc(sh,&ch); gc_len=ae_diacritics(ptr,&ch, &diac)-ptr; ch_len=g_unichar_len(ch); assert((s=malloc(l+ch_len+1))!=NULL); memcpy(s, txt, l); r=s; s+=ptr-txt+ch_len; s+=g_unichar_to_utf8(0x64E,s); s+=g_unichar_to_utf8(ch,s); if (sh-ptr-ch_len>0) { memcpy(s, ptr+ch_len, sh-ptr-ch_len); s+=sh-ptr-ch_len; } if (txt+l-sh-2>0) { memcpy(s, sh+2, txt+l-sh-2); s+=txt+l-sh-2; } *s=0; return r; } else { return strndup(txt,l); } } */ gchar *ae_destress(const gchar *txt) { gunichar ch; gint ch_len,gc_len,s,tmp; /* length of char and grapheme cluster */ int diac=0; gchar *str=strdup(txt); gchar *ptr=str,*sh; s=strlen(txt)+1; while(*txt) { gc_len=ae_diacritics(txt,&ch, &diac)-txt; ch_len=g_unichar_len(ch); if (diac&AE_SHADDA) { tmp=ptr-str; s+=ch_len; assert((str=realloc(str,s))!=NULL); ptr=str+tmp; ptr+=g_unichar_to_utf8(ch,ptr); *ptr++='\331'; *ptr++='\222'; ptr+=g_unichar_to_utf8(ch,ptr); /* add rest diact without shadda */ sh=g_utf8_strchr(txt,-1,0x0651); if (sh-txt-ch_len>0) { memcpy(ptr, txt+ch_len, sh-txt-ch_len); ptr+=sh-txt-ch_len; } if (txt+gc_len-sh-2>0) { memcpy(ptr, sh+2, txt+gc_len-sh-2); ptr+=txt+gc_len-sh-2; } txt+=gc_len; } else { if (ptr!=txt) memcpy(ptr, txt, gc_len); txt+=gc_len; ptr+=gc_len; } } *ptr=0; return str; } /* backup */ gchar *ae_destress_working(const gchar *txt) { gunichar ch; gint ch_len,gc_len,s,tmp; /* length of char and grapheme cluster */ int diac=0; gchar *str=strdup(txt); gchar *ptr=str,*sh; s=strlen(txt)+1; while(*txt) { ch_len=g_utf8_find_next_char(txt,NULL)-txt; gc_len=ae_diacritics(txt,&ch, &diac)-txt; if (diac&AE_SHADDA) { tmp=ptr-str; s+=ch_len; assert((str=realloc(str,s))!=NULL); ptr=str+tmp; memcpy(ptr, txt, ch_len); ptr+=ch_len; *ptr++='\331'; *ptr++='\222'; memcpy(ptr, txt, ch_len); ptr+=ch_len; /* add rest diact without shadda */ sh=g_utf8_strchr(txt,-1,0x0651); if (sh-txt-ch_len>0) { memcpy(ptr, txt+ch_len, sh-txt-ch_len); ptr+=sh-txt-ch_len; } if (txt+gc_len-sh-2>0) { memcpy(ptr, sh+2, txt+gc_len-sh-2); ptr+=txt+gc_len-sh-2; } txt+=gc_len; } else { if (ptr!=txt) memcpy(ptr, txt, gc_len); txt+=gc_len; ptr+=gc_len; } } *ptr=0; return str; } gchar *ae_restress_(gchar *txt) { gunichar ch; gint ch_len,gc_len; /* length of char and grapheme cluster */ int diac=0; gchar *str=txt,*ptr=txt,*nxt; while(*txt) { ch_len=g_utf8_len(txt); gc_len=ae_diacritics(txt,&ch, &diac)-txt; if ((diac&AE_SUKUN) && (g_utf8_get_char(txt+gc_len)==ch) ) { if (ptr!=txt) memmove(ptr, txt, ch_len); /* copy char alone */ txt+=gc_len; ptr+=ch_len; *ptr++='\331'; *ptr++='\221'; /* safe because sukun len == shadda len */ // ptr+=2; nxt=ae_diacritics(txt,&ch, &diac); gc_len=nxt-txt; /* *ptr=0; printf("/%s:ـ%s/\n",str,txt+ch_len); */ if (ptr!=txt+ch_len && gc_len-ch_len) memmove(ptr, txt+ch_len, gc_len-ch_len); /* copy after shadda */ txt=nxt; ptr+=gc_len-ch_len; } else { if (ptr!=txt) memmove(ptr, txt, gc_len); txt+=gc_len; ptr+=gc_len; } } *ptr=0; return str; } /* TODO: rewrite destress and restress as recarsion and benckmark */ enum AE_MORPH_MODES { AE_MORPH_END=0, AE_MORPH_DIAC_ON=-1 , AE_MORPH_DIAC_OFF=-2, AE_MORPH_DIAC_KASRA=-3, AE_MORPH_DIAC_DAMMA=-4, AE_MORPH_DIAC_FATHA=-5, AE_MORPH_SKIP=-6, AE_MORPH_NEXT=-7, AE_MORPH_GET_NEXT=-8, /* AE_MORPH_BACK=-6, AE_MORPH_PREV=-7, AE_MORPH_GET_PREV=-8, */ AE_MORPH_REST=-9, AE_MORPH_REST_1=-10, AE_MORPH_REST_2=-11, /* rest but 1, rest but 2*/ AE_MORPH_MAX=-12 }; #define AE_MORPH_NUM(x) (AE_MORPH_MAX-(x)) #define AE_MORPH_GET_NUM(x) (AE_MORPH_NUM(0)-(x)) /* TODO: size is not used!! */ gchar *ae_morph(gchar *root,gchar *buff,gint size,...) { gchar *ptr=buff,*txt=root,*s1,*s2,s3; int op,l,ll,diac,diac_st=1; /* 0:no diac,1:diac on*/ gunichar ch; va_list ap; va_start(ap, size); while((op = va_arg(ap, gunichar))!=AE_MORPH_END) { /* printf("<%d>\n",op); */ if (op>0) { ae_diacritics(txt,&ch, &diac); /* if (op==0x671 && (ch==0x671 || ch==0x622 || ch==0x623 || ch==0x625 || 0x672 || 0x673) ) continue; */ ptr+=g_unichar_to_utf8(op,ptr); } else switch(op) { case AE_MORPH_DIAC_ON: diac_st=1; break; case AE_MORPH_DIAC_OFF: diac_st=0; break; case AE_MORPH_DIAC_FATHA: case AE_MORPH_DIAC_DAMMA: case AE_MORPH_DIAC_KASRA: diac_st=op; break; case AE_MORPH_SKIP: txt=ae_diacritics(txt,&ch, &diac); break; case AE_MORPH_NEXT: case AE_MORPH_GET_NEXT: if (diac_st==1) { l=ae_diacritics(txt,&ch, &diac)-txt; memcpy(ptr,txt,l); ptr+=l; if (op==AE_MORPH_NEXT) txt+=l; } else { ll=ae_diacritics(txt,&ch, &diac)-txt; l=g_unichar_to_utf8(ch,NULL); memcpy(ptr,txt,l); ptr+=l; if (op==AE_MORPH_NEXT) txt+=ll; if (diac_st) ptr+= g_unichar_to_utf8( 0x64E + diac_st - AE_MORPH_DIAC_FATHA , ptr); } break; case AE_MORPH_REST_1: case AE_MORPH_REST_2: case AE_MORPH_REST: if (diac_st==1) { *ptr=0; l=strlen(txt); strcpy(ptr,txt); txt+=l; ptr+=l; } else { while(*txt) { txt=ae_diacritics(txt,&ch, &diac); ptr+=g_unichar_to_utf8(ch,ptr); if (diac_st) ptr+= g_unichar_to_utf8( 0x64E + diac_st - AE_MORPH_DIAC_FATHA , ptr); } } if (op!=AE_MORPH_REST) { txt=ae_back_gc(txt,&ch); *(ptr=ae_back_gc(ptr,&ch))=0; } if (op==AE_MORPH_REST_2) { txt=ae_back_gc(txt,&ch); *(ptr=ae_back_gc(ptr,&ch))=0; } break; default: /* TODO: */ printf("not implemented-d\n"); break; } /* END switch */ /* printf(":\n"); */ } /* END WHILE */ *ptr=0; va_end(ap); return buff; } /* ae_filter: convert it's input to spell-format (mainely deal with alef and hamza) */ gchar *ae_filter_(gchar *txt) { /* + الألف اللينة لا يرسم عليها حركات + فإن تحركت بغير الفتح الضمني (بما في ذلك الشدة) تعود إلى أصلها إما واو أو ياء + ألف لينة متبوعة بألف لينة تعاد الثانية لأصلها (سار - فاعل - ساير) + إذا لم يكن أصلها واو أو ياء فهي همزة + همزة متبوعة بألف لينة تصبح مدة إملائياً + همزة الوصل توضع لمنع الابتداء بساكن */ gunichar ch,old_ch=0; gint ch_len,gc_len; /* length of char and grapheme cluster */ int diac=0; gchar *str=txt,*ptr=txt,*nxt; while(*txt) { ch_len=g_utf8_len(txt); gc_len=ae_diacritics(txt,&ch, &diac)-txt; switch(ch) { case 0x672: /* ألف أصلها واو */ if (diac!=AE_FATHA || old_ch==0x627) { *ptr++='\331'; *ptr++='\210'; /* safe because ...etc*/ if (ptr!=txt) memmove(ptr, txt+ch_len, gc_len-ch_len); } else {*ptr++='\330'; *ptr++='\247';} ptr+=gc_len-ch_len; break; case 0x673: /* ألف أصلها ياء */ if (diac!=AE_FATHA || old_ch==0x627) { *ptr++='\331'; *ptr++='\212'; /* safe because ...etc*/ if (ptr!=txt) memmove(ptr, txt+ch_len, gc_len-ch_len); } else {*ptr++='\330'; *ptr++='\247';} ptr+=gc_len-ch_len; break; default: ptr+=gc_len; } txt+=gc_len; old_ch=ch; } return str; } /* * sub_roots_sp: the returned list in almost spell-format * which needs to be filtered to be in spell-format */ GList *sub_roots_sp(const gchar *root_str) { /* TODO: ضبط عين الفعل */ /* TODO: التعامل مع الهمزات وحروف العلة والإملاء */ GList *ls = NULL; gchar *base_root, *root,*buff,*str,*s1,*s2,*s3; gint s,size; gunichar ch; gint diac; root=ae_destress(root_str); s=strlen(root)+1; size=s+16; buff=(gchar *)malloc(size); base_root=strndup(root_str,size-1); ae_restress_(base_root); ls = g_list_append (ls, base_root); /* أفعل */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x623, AE_MORPH_NEXT, 0x652, AE_MORPH_DIAC_FATHA, AE_MORPH_REST, AE_MORPH_END)),size-1)); /* فاعل */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_ON, AE_MORPH_NEXT, AE_MORPH_DIAC_FATHA, 0x627, AE_MORPH_REST, AE_MORPH_END)),size-1)); /* فعّل */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_ON, AE_MORPH_NEXT, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, AE_MORPH_DIAC_ON, 0x651, 0x64E, AE_MORPH_REST, AE_MORPH_END)),size-1)); /* انفعل */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_FATHA, 0x671, 0x646, 0x652,AE_MORPH_REST, AE_MORPH_END)),size-1)); /* افعلّ */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_NEXT, 0x652, AE_MORPH_DIAC_FATHA, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x651,0x64E, AE_MORPH_END)),size-1)); /* تفعّل */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_ON, 0x62A, 0x64E, AE_MORPH_NEXT, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, AE_MORPH_DIAC_ON, 0x651, 0x64E, AE_MORPH_REST, AE_MORPH_END)),size-1)); /* تفاعل */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT, 0x64E, 0x627, AE_MORPH_DIAC_FATHA, AE_MORPH_REST, AE_MORPH_END)),size-1)); /* استفعل */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_ON, 0x671, 0x633, 0x652, 0x62A, 0x64E, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x652 , AE_MORPH_NEXT, 0x64E, AE_MORPH_DIAC_ON, AE_MORPH_REST, AE_MORPH_END)),size-1)); /* افعوعل */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_NEXT, 0x652, AE_MORPH_GET_NEXT, 0x64E, 0x648, 0x652, AE_MORPH_NEXT, 0x64E, AE_MORPH_DIAC_ON, AE_MORPH_REST, AE_MORPH_END)),size-1)); ae_diacritics(root,&ch, &diac); /* TODO: ضبط عين الفعل */ /* افتعل */ switch(ch) { case 0x648: case 0x64A: /* اتعل */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_SKIP, 0x62A, 0x64E, AE_MORPH_DIAC_ON, AE_MORPH_REST, AE_MORPH_END)),size-1)); break; case 0x630: /* افدعل */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_SKIP, 0x62F , 0x652, 0x62F, 0x64E, AE_MORPH_DIAC_ON, AE_MORPH_REST, AE_MORPH_END)),size-1)); case 0x62F: case 0x632: /* افدعل */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_NEXT, 0x652, 0x62F, 0x64E, AE_MORPH_DIAC_ON, AE_MORPH_REST, AE_MORPH_END)),size-1)); break; case 0x635: case 0x636: case 0x637: case 0x638: /* افطعل */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_NEXT, 0x652, 0x637, 0x64E, AE_MORPH_DIAC_ON, AE_MORPH_REST, AE_MORPH_END)),size-1)); break; default: /* افتعل */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_NEXT, 0x652, 0x62A, 0x64E, AE_MORPH_DIAC_FATHA, AE_MORPH_REST, AE_MORPH_END)),size-1)); } free(buff); free(root); return ls; } GList *sub_roots_attach_pronoun(const gchar *root_str) { GList *ls = NULL; gchar *base_root, *root, *expanded; gchar *buff,*str,*s1,*s2,*s3; gint s,size; gunichar ch; gint diac; root=ae_destress(root_str); expanded=ae_last_stress_expand(root_str); /* use restress */ s=strlen(root)+1; size=s+16; buff=(gchar *)malloc(size); /* هو فعل */ base_root=strndup(root_str,size-1); ae_restress_(base_root); ls = g_list_append (ls, base_root); /* فعلْ ((تُ)|(نا)|(تَ)|(تما)|(تم)|(تِ)|(تنّ)|(ن)) */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size, AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x652, 0x62A, 0x64F, AE_MORPH_END)),size-1)); ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size, AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x652, 0x646, 0x64E, 0x627, AE_MORPH_END)),size-1)); ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size, AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x652, 0x62A, 0x64E, AE_MORPH_END)),size-1)); ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size, AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x652, 0x62A, 0x64F, 0x645,0x64E, 0x627, AE_MORPH_END)),size-1)); ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size, AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x652, 0x62A, 0x64F, 0x645, AE_MORPH_END)),size-1)); ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size, AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x652, 0x62A, 0x650, AE_MORPH_END)),size-1)); ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size, AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x652, 0x62A, 0x64F, 0x646, 0x651, 0x64E, AE_MORPH_END)),size-1)); ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size, AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x652, 0x646, 0x64E, AE_MORPH_END)),size-1)); /* هما/هما/هن فعلَ ( (ا)|(تْ)|(تا) ) */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x64E, 0x627, AE_MORPH_END)),size-1)); ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x64E, 0x62A, 0x64E, 0x627, AE_MORPH_END)),size-1)); ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x64E, 0x62A, 0x652, AE_MORPH_END)),size-1)); /* هم فعلُ (وا) */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x64f, 0x648, 0x652,0x627, AE_MORPH_END)),size-1)); /* المضارع */ /* (ي|ت)فعلُ */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x64A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON, AE_MORPH_REST_1,AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x64F, AE_MORPH_END)),size-1)); ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON, AE_MORPH_REST_1,AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x64F, AE_MORPH_END)),size-1)); /* (أ|ن)فعلُ */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x623, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x64F, AE_MORPH_END)),size-1)); ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x646, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x64F, AE_MORPH_END)),size-1)); /* (ي|ت)فعلْ ( (ان)|(ون) ) */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x64A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652, 0x627,0x646,0x650, AE_MORPH_END)),size-1)); ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON, AE_MORPH_REST_1,AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652, 0x627,0x646,0x650, AE_MORPH_END)),size-1)); ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x64A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON, AE_MORPH_REST_1,AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652, 0x648,0x646,0x64E, AE_MORPH_END)),size-1)); ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652, 0x648,0x646,0x64E, AE_MORPH_END)),size-1)); /* تفعلِ (ين) */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x650, 0x64A,0x652, 0x646,0x64E, AE_MORPH_END)),size-1)); /* ((ت|(ي))فعلْ (ن) */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size, AE_MORPH_DIAC_OFF, 0x64A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652, 0x646,0x64E, AE_MORPH_END)),size-1)); ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size, AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON, AE_MORPH_REST_1,AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652, 0x646,0x64E, AE_MORPH_END)),size-1)); /* الأمر */ /* افعل ((ا)|(وا)|(ي)|(ن))؟ */ ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size, AE_MORPH_DIAC_ON, 0x671, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x652, AE_MORPH_END)),size-1)); ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size, AE_MORPH_DIAC_ON, 0x671, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x652, 0x627, AE_MORPH_END)),size-1)); ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size, AE_MORPH_DIAC_ON, 0x671, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x652, 0x648, 0x64F,0x627, AE_MORPH_END)),size-1)); free(buff); free(expanded); free(root); return ls; } /* test */ int main() { gchar str[]="جدَّ"; gchar *s; GList *ls,*l2; ls=sub_roots_sp("جدَّ"); ls=g_list_first(ls); while(ls) { printf("[%s]\n",(gchar *)ls->data); ls=g_list_next(ls); } ls=sub_roots_sp("دَرَسَ"); ls=g_list_first(ls); while(ls) { printf("[%s]\n",(gchar *)ls->data); ls=g_list_next(ls); } ls=sub_roots_sp("سَهُلَ"); ls=g_list_first(ls); while(ls) { printf("[%s]\n",(gchar *)ls->data); ls=g_list_next(ls); } ls=sub_roots_sp("قَبُحَ"); ls=g_list_first(ls); while(ls) { printf("[%s]\n",(gchar *)ls->data); ls=g_list_next(ls); } ls=sub_roots_sp("قَبِلَ"); ls=g_list_first(ls); while(ls) { printf("[%s]\n",(gchar *)ls->data); ls=g_list_next(ls); } ls=sub_roots_sp("وَصَلَ"); ls=g_list_first(ls); while(ls) { printf("[%s]\n",(gchar *)ls->data); ls=g_list_next(ls); } ls=sub_roots_sp("قَبِلَ"); ls=g_list_first(ls); while(ls) { printf("[%s]\n",(gchar *)ls->data); ls=g_list_next(ls); } ls=sub_roots_sp("ذَكَرَ"); ls=g_list_first(ls); while(ls) { printf("[%s]\n",(gchar *)ls->data); ls=g_list_next(ls); } ls=sub_roots_sp("ضَرَبَ"); ls=g_list_first(ls); while(ls) { printf("[%s]\n",(gchar *)ls->data); ls=g_list_next(ls); } ls=sub_roots_sp("دَحْرَجَ"); ls=g_list_first(ls); while(ls) { printf("[%s]\n",(gchar *)ls->data); ls=g_list_next(ls); } ls=sub_roots_sp("زَلْزَلَ"); ls=g_list_first(ls); while(ls) { printf("[%s]\n",(gchar *)ls->data); ls=g_list_next(ls); } ls=sub_roots_sp("بَعْثَرَ"); ls=g_list_first(ls); while(ls) { printf("[%s]\n",(gchar *)ls->data); ls=g_list_next(ls); } ls=sub_roots_sp("وَعَدَ"); ls=g_list_first(ls); while(ls) { printf("[%s]\n",(gchar *)ls->data); ls=g_list_next(ls); } ls=sub_roots_sp("نَسِيَ"); ls=g_list_first(ls); while(ls) { printf("[%s]\n",(gchar *)ls->data); ls=g_list_next(ls); } ls=sub_roots_sp("قَٲَلَ"); ls=g_list_first(ls); while(ls) { printf("[%s]\n", ae_filter_((gchar *)ls->data)); ls=g_list_next(ls); } ls=sub_roots_sp("سَٳَرَ"); ls=g_list_first(ls); while(ls) { printf("[%s]\n", ae_filter_((gchar *)ls->data)); ls=g_list_next(ls); } ls=sub_roots_sp("وَشَى"); ls=g_list_first(ls); while(ls) { printf("[%s]\n",(gchar *)ls->data); ls=g_list_next(ls); } ls=sub_roots_sp("أَمَرَ"); ls=g_list_first(ls); while(ls) { printf("[%s]\n",(gchar *)ls->data); ls=g_list_next(ls); } ls=sub_roots_sp("جَدَّ"); ls=g_list_first(ls); printf("--------\n\tاسناد [%s] للضمائر\n",(gchar *)ls->data); while(ls) { printf("--------\n\tاسناد [%s] للضمائر\n",(gchar *)ls->data); l2=sub_roots_attach_pronoun((gchar *)ls->data); l2=g_list_first(l2); while(l2) { printf("[%s]\n",(gchar *)l2->data); l2=g_list_next(l2); } ls=g_list_next(ls); } ls=sub_roots_sp("دَرَسَ"); ls=g_list_first(ls); printf("--------\n\tاسناد [%s] للضمائر\n",(gchar *)ls->data); while(ls) { printf("--------\n\tاسناد [%s] للضمائر\n",(gchar *)ls->data); l2=sub_roots_attach_pronoun((gchar *)ls->data); l2=g_list_first(l2); while(l2) { printf("[%s]\n",(gchar *)l2->data); l2=g_list_next(l2); } ls=g_list_next(ls); } }