[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Arabic morphology test



Arabic complex morphology! I hvae written a small function which forms a word
given the MEZAN AL SARFEE and it's root
by calling something like

ae_morph(root,buff,size, AE_MORPH_DIAC_OFF, 0x623, AE_MORPH_NEXT, 0x652, AE_MORPH_DIAC_FATHA, AE_MORPH_REST, AE_MORPH_END);

where the 0xXXX are Unicode letters (Ahrof al ziadaa)

and AE_MORPH_XXX are directive like get next letter or previous letter or all but last letters with or without diacritics

 

#define _GNU_SOURCE /* 1st to get rid of non-GNU, 2nd to have strndup and alloca */
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<stdarg.h>
#include<assert.h>

#include<glib.h>
#define g_unichar_len(x) g_unichar_to_utf8((x),NULL)
#define g_utf8_len(x) (g_utf8_next_char(x)-(x))

enum AE_DIACRITICS {
		AE_DIAC_OTHER=1,
		AE_FATHATAN=1<<1,AE_DAMMATAN=1<<2, AE_KASRATAN=1<<3,
		AE_FATHA=1<<4,AE_DAMMA=1<<5, AE_KASRA=1<<6,
		AE_SHADDA=1<<7,  AE_SUKUN=1<<8
};
gchar *ae_diacritics(const gchar *t,gunichar *c, gint *result) {
	gunichar ch;
	int r=0;
	gchar *txt=(gchar *)t;
	ch=g_utf8_get_char(txt);
	*c=ch;
	txt=g_utf8_find_next_char(txt,NULL);
	ch=g_utf8_get_char(txt);
	while(*txt && g_unichar_break_type(ch)==G_UNICODE_BREAK_COMBINING_MARK) {
		switch(ch) {
		case 0x064B: case 0x064C: case 0x064D:
		case 0x064E: case 0x064F: case 0x0650:
		case 0x0651: case 0x0652:
			r|=1<<(ch-0x064B+1); break;
		default:
			r|=AE_DIAC_OTHER;
		}
		txt=g_utf8_find_next_char(txt,NULL);
		ch=g_utf8_get_char(txt);
	}
	*result=r;
	return txt;
}
gchar *ae_back_gc(const gchar *t,gunichar *c) {
	gunichar ch;
	gchar *txt=(gchar *)t;
	txt=g_utf8_prev_char(txt);
	ch=g_utf8_get_char(txt);
	while(*txt && g_unichar_break_type(ch)==G_UNICODE_BREAK_COMBINING_MARK) {
		txt=g_utf8_prev_char(txt);
		ch=g_utf8_get_char(txt);
	}
	*c=ch;
	return txt;
}
int ae_get_diacritics(gchar *txt) {
	gunichar c;
	int r;
	ae_diacritics(txt,&c, &r);
	return r;
}
gint ae_gc_len(const gchar *t) {
	gunichar ch;
	gint r=0;
	gchar *txt=(gchar *)t;
	ch=g_utf8_get_char(txt);
	while(*txt) {
	while (*txt && g_unichar_break_type(ch)==G_UNICODE_BREAK_COMBINING_MARK) {
		txt=g_utf8_find_next_char(txt,NULL);
		ch=g_utf8_get_char(txt);
	}
	if (!*txt) return r;
	r+=1;
	txt=g_utf8_find_next_char(txt,NULL);
	ch=g_utf8_get_char(txt);
	}
	return r;
}
/* expand stress on last char if any */
gchar *ae_last_stress_expand(const gchar *txt) {
	gunichar ch;
	gint ch_len,gc_len,l;
	int diac=0;
	gchar *str , *sh, *ptr,*tmp;
	l=strlen(txt);
	tmp=ae_back_gc(txt+l,&ch);
	gc_len=ae_diacritics(tmp,&ch, &diac)-tmp;
	ch_len=g_unichar_len(ch);
	if (diac&AE_SHADDA) {
		assert((str=malloc(l+ch_len+1))!=NULL);
		memcpy(str, txt, l-gc_len+ch_len);
		ptr=str+l-gc_len+ch_len;
		ptr+=g_unichar_to_utf8(0x64E,ptr);
		ptr+=g_unichar_to_utf8(ch,ptr);
		/* add rest diact without shadda */
		sh=g_utf8_strchr(txt+l-gc_len+ch_len,-1,0x0651);
		if (sh-tmp-ch_len>0) {
			memcpy(ptr, tmp+ch_len, sh-tmp-ch_len);
			ptr+=sh-tmp-ch_len;
		}
		if (tmp+gc_len-sh-2>0) {
			memcpy(ptr, sh+2, tmp+gc_len-sh-2);
			ptr+=tmp+gc_len-sh-2;
		}
		*ptr=0;
		return str;
	} else {
		return strndup(txt,l);
	}
}
/* the next function expand last stress even if the stress is not on last char  */
/*
gchar *ae_last_stress_expand(const gchar *txt) {
	gunichar ch;
	gint ch_len,gc_len,l,tmp;
	int diac=0;
	gchar *r,*s , *sh, *ptr;
	l=strlen(txt);
	sh=g_utf8_strrchr(txt,l,0x651);
	if (sh) {
		ptr=ae_back_gc(sh,&ch);
		gc_len=ae_diacritics(ptr,&ch, &diac)-ptr;
		ch_len=g_unichar_len(ch);
		assert((s=malloc(l+ch_len+1))!=NULL);
		memcpy(s, txt, l);
		r=s; s+=ptr-txt+ch_len;
		s+=g_unichar_to_utf8(0x64E,s);
		s+=g_unichar_to_utf8(ch,s);
		if (sh-ptr-ch_len>0) {
			memcpy(s, ptr+ch_len, sh-ptr-ch_len);
			s+=sh-ptr-ch_len;
		}
		if (txt+l-sh-2>0) {
			memcpy(s, sh+2, txt+l-sh-2);
			s+=txt+l-sh-2;
		}
		
		*s=0;
		return r;
	} else {
		return strndup(txt,l);
	}
}
*/
gchar *ae_destress(const gchar *txt) {
	gunichar ch;
	gint ch_len,gc_len,s,tmp; /* length of char and grapheme cluster */
	int diac=0;
	gchar *str=strdup(txt);
	gchar *ptr=str,*sh;
	s=strlen(txt)+1;
	while(*txt) {
		gc_len=ae_diacritics(txt,&ch, &diac)-txt;
		ch_len=g_unichar_len(ch);
		if (diac&AE_SHADDA) {
			tmp=ptr-str; s+=ch_len;
			assert((str=realloc(str,s))!=NULL);
			ptr=str+tmp;
			ptr+=g_unichar_to_utf8(ch,ptr);
			*ptr++='\331'; *ptr++='\222';
			ptr+=g_unichar_to_utf8(ch,ptr);
			/* add rest diact without shadda */
			sh=g_utf8_strchr(txt,-1,0x0651);
			if (sh-txt-ch_len>0) {
				memcpy(ptr, txt+ch_len, sh-txt-ch_len);
				ptr+=sh-txt-ch_len;
			}
			if (txt+gc_len-sh-2>0) {
				memcpy(ptr, sh+2, txt+gc_len-sh-2);
				ptr+=txt+gc_len-sh-2;
			}
			txt+=gc_len;
		} else {
			if (ptr!=txt) memcpy(ptr, txt, gc_len);
			txt+=gc_len;
			ptr+=gc_len;	
		}
	}
	*ptr=0;
	return str;
}
/* backup */
gchar *ae_destress_working(const gchar *txt) {
	gunichar ch;
	gint ch_len,gc_len,s,tmp; /* length of char and grapheme cluster */
	int diac=0;
	gchar *str=strdup(txt);
	gchar *ptr=str,*sh;
	s=strlen(txt)+1;
	while(*txt) {
		ch_len=g_utf8_find_next_char(txt,NULL)-txt;
		gc_len=ae_diacritics(txt,&ch, &diac)-txt;
		if (diac&AE_SHADDA) {
			tmp=ptr-str; s+=ch_len;
			assert((str=realloc(str,s))!=NULL);
			ptr=str+tmp;
			memcpy(ptr, txt, ch_len); ptr+=ch_len;
			*ptr++='\331'; *ptr++='\222';
			memcpy(ptr, txt, ch_len); ptr+=ch_len;
			/* add rest diact without shadda */
			sh=g_utf8_strchr(txt,-1,0x0651);
			if (sh-txt-ch_len>0) {
				memcpy(ptr, txt+ch_len, sh-txt-ch_len);
				ptr+=sh-txt-ch_len;
			}
			if (txt+gc_len-sh-2>0) {
				memcpy(ptr, sh+2, txt+gc_len-sh-2);
				ptr+=txt+gc_len-sh-2;
			}
			txt+=gc_len;
		} else {
			if (ptr!=txt) memcpy(ptr, txt, gc_len);
			txt+=gc_len;
			ptr+=gc_len;	
		}
	}
	*ptr=0;
	return str;
}

gchar *ae_restress_(gchar *txt) {
	gunichar ch;
	gint ch_len,gc_len; /* length of char and grapheme cluster */
	int diac=0;
	gchar *str=txt,*ptr=txt,*nxt;
	while(*txt) {
		ch_len=g_utf8_len(txt);
		gc_len=ae_diacritics(txt,&ch, &diac)-txt;
		if ((diac&AE_SUKUN) && (g_utf8_get_char(txt+gc_len)==ch) ) {
			if (ptr!=txt) memmove(ptr, txt, ch_len); /* copy char alone */
			txt+=gc_len;
			ptr+=ch_len;
			*ptr++='\331'; *ptr++='\221'; /* safe because sukun len == shadda len */
			// ptr+=2;
			nxt=ae_diacritics(txt,&ch, &diac);
			gc_len=nxt-txt;
			/* *ptr=0;
			printf("/%s:ـ%s/\n",str,txt+ch_len); */
			if (ptr!=txt+ch_len && gc_len-ch_len) memmove(ptr, txt+ch_len, gc_len-ch_len); /* copy after shadda */
			txt=nxt;
			ptr+=gc_len-ch_len;
		} else {
			if (ptr!=txt) memmove(ptr, txt, gc_len);
			txt+=gc_len;
			ptr+=gc_len;	
		}
	}
	*ptr=0;
	return str;
}
/* TODO: rewrite destress and restress as recarsion and benckmark */
enum AE_MORPH_MODES { AE_MORPH_END=0, AE_MORPH_DIAC_ON=-1 , AE_MORPH_DIAC_OFF=-2,
	AE_MORPH_DIAC_KASRA=-3, AE_MORPH_DIAC_DAMMA=-4, AE_MORPH_DIAC_FATHA=-5,
	AE_MORPH_SKIP=-6, AE_MORPH_NEXT=-7, AE_MORPH_GET_NEXT=-8,
	/* AE_MORPH_BACK=-6, AE_MORPH_PREV=-7, AE_MORPH_GET_PREV=-8, */
	AE_MORPH_REST=-9, AE_MORPH_REST_1=-10, AE_MORPH_REST_2=-11, /* rest but 1, rest but 2*/
	AE_MORPH_MAX=-12
};

#define AE_MORPH_NUM(x) (AE_MORPH_MAX-(x))
#define AE_MORPH_GET_NUM(x) (AE_MORPH_NUM(0)-(x))
/* TODO: size is not used!! */
gchar *ae_morph(gchar *root,gchar *buff,gint size,...) {
	gchar *ptr=buff,*txt=root,*s1,*s2,s3;
	int op,l,ll,diac,diac_st=1; /* 0:no diac,1:diac on*/
	gunichar ch;
	va_list ap;
	va_start(ap, size);
	while((op = va_arg(ap, gunichar))!=AE_MORPH_END) {
		/* printf("<%d>\n",op); */
		if (op>0) {
			ae_diacritics(txt,&ch, &diac);
			/* if (op==0x671 && (ch==0x671 || ch==0x622 || ch==0x623 || ch==0x625 || 0x672 || 0x673) ) continue; */
			ptr+=g_unichar_to_utf8(op,ptr);
		} else switch(op) {
		case AE_MORPH_DIAC_ON: diac_st=1; break;
		case AE_MORPH_DIAC_OFF: diac_st=0; break;
		case AE_MORPH_DIAC_FATHA:
		case AE_MORPH_DIAC_DAMMA:
		case AE_MORPH_DIAC_KASRA:
			diac_st=op; break;
		case AE_MORPH_SKIP:
			txt=ae_diacritics(txt,&ch, &diac);
			break;
		case AE_MORPH_NEXT:
		case AE_MORPH_GET_NEXT:
			if (diac_st==1) {
				l=ae_diacritics(txt,&ch, &diac)-txt;
				memcpy(ptr,txt,l);
				ptr+=l;
				if (op==AE_MORPH_NEXT) txt+=l;
			} else {
				ll=ae_diacritics(txt,&ch, &diac)-txt;
				l=g_unichar_to_utf8(ch,NULL);
				memcpy(ptr,txt,l);
				ptr+=l;
				if (op==AE_MORPH_NEXT) txt+=ll;
				if (diac_st) ptr+= g_unichar_to_utf8( 0x64E + diac_st - AE_MORPH_DIAC_FATHA , ptr);
			}
			break;
		case AE_MORPH_REST_1:
		case AE_MORPH_REST_2:
		case AE_MORPH_REST:
			if (diac_st==1) {
				*ptr=0;
				l=strlen(txt);
				strcpy(ptr,txt);
				txt+=l;
				ptr+=l;
			} else {
				while(*txt) {
					txt=ae_diacritics(txt,&ch, &diac);
					ptr+=g_unichar_to_utf8(ch,ptr);
					if (diac_st) ptr+= g_unichar_to_utf8( 0x64E + diac_st - AE_MORPH_DIAC_FATHA , ptr);
				}
			}
			if (op!=AE_MORPH_REST) {
				txt=ae_back_gc(txt,&ch);
				*(ptr=ae_back_gc(ptr,&ch))=0;
			}
			if (op==AE_MORPH_REST_2) {
				txt=ae_back_gc(txt,&ch);
				*(ptr=ae_back_gc(ptr,&ch))=0;
			}
			break;
		default:
			/* TODO: */
			printf("not implemented-d\n");
			break;
		} /* END switch */
		/* printf(":\n"); */
	} /* END WHILE */
	*ptr=0;
	va_end(ap);
	return buff;
}
/* ae_filter: convert it's input to spell-format (mainely deal with alef and hamza) */
gchar *ae_filter_(gchar *txt) {
/*
+ الألف اللينة لا يرسم عليها حركات
+ فإن تحركت بغير الفتح الضمني (بما في ذلك الشدة) تعود إلى أصلها إما واو أو ياء
+ ألف لينة متبوعة بألف لينة تعاد الثانية لأصلها (سار - فاعل - ساير)
+ إذا لم يكن أصلها واو أو ياء فهي همزة
+ همزة متبوعة بألف لينة تصبح مدة إملائياً
+ همزة الوصل توضع لمنع الابتداء بساكن
*/
	gunichar ch,old_ch=0;
	gint ch_len,gc_len; /* length of char and grapheme cluster */
	int diac=0;
	gchar *str=txt,*ptr=txt,*nxt;
	while(*txt) {
		ch_len=g_utf8_len(txt);
		gc_len=ae_diacritics(txt,&ch, &diac)-txt;
		switch(ch) {
		case 0x672:  /* ألف أصلها واو */
			if (diac!=AE_FATHA || old_ch==0x627) {
				*ptr++='\331'; *ptr++='\210'; /* safe because ...etc*/
				if (ptr!=txt) memmove(ptr, txt+ch_len, gc_len-ch_len);
			} else {*ptr++='\330'; *ptr++='\247';}
			ptr+=gc_len-ch_len;
			break;
		case 0x673: /* ألف أصلها ياء */
			if (diac!=AE_FATHA || old_ch==0x627) {
				*ptr++='\331'; *ptr++='\212'; /* safe because ...etc*/
				if (ptr!=txt) memmove(ptr, txt+ch_len, gc_len-ch_len);
			} else {*ptr++='\330'; *ptr++='\247';}
			ptr+=gc_len-ch_len;
			break;
		default:
			ptr+=gc_len;
		}
		txt+=gc_len;
		old_ch=ch;	
		}
	return str;
}
/*
 * sub_roots_sp: the returned list in almost spell-format
 * 	which needs to be filtered to be in spell-format
 */
GList *sub_roots_sp(const gchar *root_str) {
	/* TODO: ضبط عين الفعل */
	/* TODO: التعامل مع الهمزات وحروف العلة والإملاء */
	GList *ls = NULL;
	gchar *base_root, *root,*buff,*str,*s1,*s2,*s3;
	gint s,size;
	gunichar ch;
	gint diac;
	root=ae_destress(root_str);
	s=strlen(root)+1; size=s+16;
	buff=(gchar *)malloc(size);
	base_root=strndup(root_str,size-1);
	ae_restress_(base_root);
	ls = g_list_append (ls, base_root);
	/* أفعل */
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_OFF, 0x623,  AE_MORPH_NEXT, 0x652, AE_MORPH_DIAC_FATHA, AE_MORPH_REST,
		AE_MORPH_END)),size-1));
	/* فاعل */
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_ON, AE_MORPH_NEXT, AE_MORPH_DIAC_FATHA,
		0x627, AE_MORPH_REST,
		AE_MORPH_END)),size-1));
	/* فعّل */
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_ON, AE_MORPH_NEXT, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,
		AE_MORPH_DIAC_ON, 0x651, 0x64E, AE_MORPH_REST,
		AE_MORPH_END)),size-1));
	/* انفعل */
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_FATHA, 0x671, 0x646, 0x652,AE_MORPH_REST,
		AE_MORPH_END)),size-1));
	/* افعلّ */
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_NEXT, 0x652, AE_MORPH_DIAC_FATHA,
		AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x651,0x64E,
		AE_MORPH_END)),size-1));
	/* تفعّل */
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_ON, 0x62A, 0x64E, AE_MORPH_NEXT, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,
		AE_MORPH_DIAC_ON, 0x651, 0x64E, AE_MORPH_REST,
		AE_MORPH_END)),size-1));
	/* تفاعل */
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT, 0x64E,
		0x627, AE_MORPH_DIAC_FATHA, AE_MORPH_REST,
		AE_MORPH_END)),size-1));
	/* استفعل */
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_ON, 0x671, 0x633, 0x652, 0x62A, 0x64E,
		AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x652 , AE_MORPH_NEXT, 0x64E,
		AE_MORPH_DIAC_ON, AE_MORPH_REST,
		AE_MORPH_END)),size-1));	
	/* افعوعل */
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_NEXT, 0x652, AE_MORPH_GET_NEXT, 0x64E,
		0x648, 0x652, AE_MORPH_NEXT, 0x64E, AE_MORPH_DIAC_ON, AE_MORPH_REST,
		AE_MORPH_END)),size-1));
	ae_diacritics(root,&ch, &diac);
	/* TODO: ضبط عين الفعل */
	/* افتعل */
	switch(ch) {
		case 0x648: case 0x64A:
			/* اتعل */
			ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
				AE_MORPH_DIAC_OFF, 0x671,  AE_MORPH_SKIP, 0x62A, 0x64E,
				AE_MORPH_DIAC_ON, AE_MORPH_REST,
				AE_MORPH_END)),size-1));
			break;
		case 0x630: 
			/* افدعل */
			ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
				AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_SKIP, 0x62F , 0x652, 0x62F, 0x64E,
				AE_MORPH_DIAC_ON, AE_MORPH_REST,
				AE_MORPH_END)),size-1));
		case 0x62F: case 0x632:
			/* افدعل */
			ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
				AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_NEXT, 0x652, 0x62F, 0x64E,
				AE_MORPH_DIAC_ON, AE_MORPH_REST,
				AE_MORPH_END)),size-1));
			break;
		case 0x635: case 0x636: case 0x637: case 0x638:
			/* افطعل */
			ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
				AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_NEXT, 0x652, 0x637, 0x64E, 
				AE_MORPH_DIAC_ON, AE_MORPH_REST,
				AE_MORPH_END)),size-1));
			break;
		default:
			/* افتعل */
			ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
				AE_MORPH_DIAC_OFF, 0x671, AE_MORPH_NEXT, 0x652, 0x62A, 0x64E, 
				AE_MORPH_DIAC_FATHA, AE_MORPH_REST,
				AE_MORPH_END)),size-1));
	}
	free(buff);
	free(root);
	return ls;
}
GList *sub_roots_attach_pronoun(const gchar *root_str) {
	GList *ls = NULL;
	gchar *base_root, *root, *expanded;
	gchar *buff,*str,*s1,*s2,*s3;
	gint s,size;
	gunichar ch;
	gint diac;
	root=ae_destress(root_str);
	expanded=ae_last_stress_expand(root_str); /* use restress */
	s=strlen(root)+1; size=s+16;
	buff=(gchar *)malloc(size);
	/* هو فعل */
	base_root=strndup(root_str,size-1);
	ae_restress_(base_root);
	ls = g_list_append (ls, base_root);
	/* فعلْ ((تُ)|(نا)|(تَ)|(تما)|(تم)|(تِ)|(تنّ)|(ن)) */
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
		AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
		AE_MORPH_NEXT, 0x652, 0x62A, 0x64F,
		AE_MORPH_END)),size-1));
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
		AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
		AE_MORPH_NEXT, 0x652, 0x646, 0x64E, 0x627,
		AE_MORPH_END)),size-1));
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
		AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
		AE_MORPH_NEXT, 0x652, 0x62A, 0x64E,
		AE_MORPH_END)),size-1));
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
		AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
		AE_MORPH_NEXT, 0x652, 0x62A, 0x64F, 0x645,0x64E, 0x627,
		AE_MORPH_END)),size-1));
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
		AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
		AE_MORPH_NEXT, 0x652, 0x62A, 0x64F, 0x645,
		AE_MORPH_END)),size-1));
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
		AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
		AE_MORPH_NEXT, 0x652, 0x62A, 0x650,
		AE_MORPH_END)),size-1));
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
		AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
		AE_MORPH_NEXT, 0x652, 0x62A, 0x64F, 0x646, 0x651, 0x64E,
		AE_MORPH_END)),size-1));
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
		AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
		AE_MORPH_NEXT, 0x652, 0x646, 0x64E,
		AE_MORPH_END)),size-1));

	/* هما/هما/هن فعلَ ( (ا)|(تْ)|(تا) ) */
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
		AE_MORPH_NEXT, 0x64E, 0x627,
		AE_MORPH_END)),size-1));
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
		AE_MORPH_NEXT, 0x64E, 0x62A, 0x64E, 0x627,
		AE_MORPH_END)),size-1));
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
		AE_MORPH_NEXT, 0x64E, 0x62A, 0x652,
		AE_MORPH_END)),size-1));
	/* هم فعلُ (وا) */
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_ON, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
		AE_MORPH_NEXT, 0x64f, 0x648, 0x652,0x627,
		AE_MORPH_END)),size-1));
	/* المضارع */
	/* (ي|ت)فعلُ */
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_OFF, 0x64A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
		AE_MORPH_REST_1,AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x64F,
		AE_MORPH_END)),size-1));
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
		AE_MORPH_REST_1,AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x64F,
		AE_MORPH_END)),size-1));
	/*  (أ|ن)فعلُ  */
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_OFF, 0x623, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
		AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x64F,
		AE_MORPH_END)),size-1));
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_OFF, 0x646, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
		AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT, 0x64F,
		AE_MORPH_END)),size-1));
	/* (ي|ت)فعلْ ( (ان)|(ون) ) */
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_OFF, 0x64A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
		AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652, 0x627,0x646,0x650,
		AE_MORPH_END)),size-1));
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
		AE_MORPH_REST_1,AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652, 0x627,0x646,0x650,
		AE_MORPH_END)),size-1));
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_OFF, 0x64A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
		AE_MORPH_REST_1,AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652, 0x648,0x646,0x64E,
		AE_MORPH_END)),size-1));
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
		AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652, 0x648,0x646,0x64E,
		AE_MORPH_END)),size-1));
	/* تفعلِ (ين) */
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(root,buff,size,
		AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
		AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x650, 0x64A,0x652, 0x646,0x64E,
		AE_MORPH_END)),size-1));
	/* ((ت|(ي))فعلْ (ن) */
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
		AE_MORPH_DIAC_OFF, 0x64A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
		AE_MORPH_REST_1, AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652, 0x646,0x64E,
		AE_MORPH_END)),size-1));
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
		AE_MORPH_DIAC_OFF, 0x62A, 0x64E, AE_MORPH_NEXT,0x652, AE_MORPH_DIAC_ON,
		AE_MORPH_REST_1,AE_MORPH_DIAC_OFF, AE_MORPH_NEXT,0x652, 0x646,0x64E,
		AE_MORPH_END)),size-1));
	/* الأمر */
	/* افعل ((ا)|(وا)|(ي)|(ن))؟ */
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
		AE_MORPH_DIAC_ON, 0x671, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
		AE_MORPH_NEXT, 0x652, AE_MORPH_END)),size-1));
	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
		AE_MORPH_DIAC_ON, 0x671, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
		AE_MORPH_NEXT, 0x652, 0x627, AE_MORPH_END)),size-1));

	ls = g_list_append (ls, strndup(ae_restress_(ae_morph(expanded,buff,size,
		AE_MORPH_DIAC_ON, 0x671, AE_MORPH_REST_1, AE_MORPH_DIAC_OFF,
		AE_MORPH_NEXT, 0x652, 0x648, 0x64F,0x627,
		AE_MORPH_END)),size-1));

	free(buff);
	free(expanded);
	free(root);
	return ls;
}
/* test */
int main() {
	gchar str[]="جدَّ";
	gchar *s;
	GList *ls,*l2;
	ls=sub_roots_sp("جدَّ");
	ls=g_list_first(ls);
	while(ls) {
		printf("[%s]\n",(gchar *)ls->data);
		ls=g_list_next(ls);
	}
	ls=sub_roots_sp("دَرَسَ");
	ls=g_list_first(ls);
	while(ls) {
		printf("[%s]\n",(gchar *)ls->data);
		ls=g_list_next(ls);
	}
	ls=sub_roots_sp("سَهُلَ");
	ls=g_list_first(ls);
	while(ls) {
		printf("[%s]\n",(gchar *)ls->data);
		ls=g_list_next(ls);
	}
	ls=sub_roots_sp("قَبُحَ");
	ls=g_list_first(ls);
	while(ls) {
		printf("[%s]\n",(gchar *)ls->data);
		ls=g_list_next(ls);
	}
	ls=sub_roots_sp("قَبِلَ");
	ls=g_list_first(ls);
	while(ls) {
		printf("[%s]\n",(gchar *)ls->data);
		ls=g_list_next(ls);
	}
	ls=sub_roots_sp("وَصَلَ");
	ls=g_list_first(ls);
	while(ls) {
		printf("[%s]\n",(gchar *)ls->data);
		ls=g_list_next(ls);
	}
	ls=sub_roots_sp("قَبِلَ");
	ls=g_list_first(ls);
	while(ls) {
		printf("[%s]\n",(gchar *)ls->data);
		ls=g_list_next(ls);
	}
	ls=sub_roots_sp("ذَكَرَ");
	ls=g_list_first(ls);
	while(ls) {
		printf("[%s]\n",(gchar *)ls->data);
		ls=g_list_next(ls);
	}
	ls=sub_roots_sp("ضَرَبَ");
	ls=g_list_first(ls);
	while(ls) {
		printf("[%s]\n",(gchar *)ls->data);
		ls=g_list_next(ls);
	}
	ls=sub_roots_sp("دَحْرَجَ");
	ls=g_list_first(ls);
	while(ls) {
		printf("[%s]\n",(gchar *)ls->data);
		ls=g_list_next(ls);
	}
	ls=sub_roots_sp("زَلْزَلَ");
	ls=g_list_first(ls);
	while(ls) {
		printf("[%s]\n",(gchar *)ls->data);
		ls=g_list_next(ls);
	}
	ls=sub_roots_sp("بَعْثَرَ");
	ls=g_list_first(ls);
	while(ls) {
		printf("[%s]\n",(gchar *)ls->data);
		ls=g_list_next(ls);
	}
	ls=sub_roots_sp("وَعَدَ");
	ls=g_list_first(ls);
	while(ls) {
		printf("[%s]\n",(gchar *)ls->data);
		ls=g_list_next(ls);
	}
	ls=sub_roots_sp("نَسِيَ");
	ls=g_list_first(ls);
	while(ls) {
		printf("[%s]\n",(gchar *)ls->data);
		ls=g_list_next(ls);
	}
	ls=sub_roots_sp("قَٲَلَ");
	ls=g_list_first(ls);
	while(ls) {
		printf("[%s]\n",
			ae_filter_((gchar *)ls->data));
		ls=g_list_next(ls);
	}
	ls=sub_roots_sp("سَٳَرَ");
	ls=g_list_first(ls);
	while(ls) {
		printf("[%s]\n",
			ae_filter_((gchar *)ls->data));
		ls=g_list_next(ls);
	}

	ls=sub_roots_sp("وَشَى");
	ls=g_list_first(ls);
	while(ls) {
		printf("[%s]\n",(gchar *)ls->data);
		ls=g_list_next(ls);
	}
	ls=sub_roots_sp("أَمَرَ");
	ls=g_list_first(ls);
	while(ls) {
		printf("[%s]\n",(gchar *)ls->data);
		ls=g_list_next(ls);
	}

	ls=sub_roots_sp("جَدَّ");
	ls=g_list_first(ls);
	printf("--------\n\tاسناد [%s] للضمائر\n",(gchar *)ls->data);
	while(ls) {
		printf("--------\n\tاسناد [%s] للضمائر\n",(gchar *)ls->data);
		l2=sub_roots_attach_pronoun((gchar *)ls->data);
		l2=g_list_first(l2);
		while(l2) {
			printf("[%s]\n",(gchar *)l2->data);
			l2=g_list_next(l2);
		}
		ls=g_list_next(ls);
	}
	ls=sub_roots_sp("دَرَسَ");
	ls=g_list_first(ls);
	printf("--------\n\tاسناد [%s] للضمائر\n",(gchar *)ls->data);
	while(ls) {
		printf("--------\n\tاسناد [%s] للضمائر\n",(gchar *)ls->data);
		l2=sub_roots_attach_pronoun((gchar *)ls->data);
		l2=g_list_first(l2);
		while(l2) {
			printf("[%s]\n",(gchar *)l2->data);
			l2=g_list_next(l2);
		}
		ls=g_list_next(ls);
	}
}