[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

SIRAGI, I need help !



Hi,

I would like to talk you about a little technical problem which blocked me for 
several days... 

You know that OCR processing proceed like this :

1- read image file
2- detect lines
3- detect characters in a line
4- compare characters detected with a database of stored characters and decide 
which characters are related.
5- write text resulting from OCR and make some extra processing (spell 
checking, etc.)

My technical concern is between 3 and 4 stage. After detecting character's 
pixels and before sending them to the comparison system, you should make some 
adjustment to charcter's image :
- we should clean it (remove isolated pixels, etc.)
- we should squlettisize the character, ie. transforming it to become thin : a 
shape made with only one-pixel drawings. This enhance the effectiveness of 
comparison.
- we should resize (resample) the charcter to make it fitting in a one 
standard fixed matrix of pixels.

I have problem to write a good program for squlettisation of characters 
(thinning). I tried many methods but I miss documents describing in details 
thinning algorithms. You can find joind to this mail some programs that I 
have written. But I'm not satisfied by the output of these programs. Either 
they didn't respect the overall shape of character, or they eliminate too 
many pixels, etc.

So, if some one have documents, librairies or programs which can help us in 
"shape thinning or squelettisation", it will be very welcome.

Best regards 
Tarik

/**************************************/
/* thining by Marthon algorithm */
/* Tarik Fdil <tfdil at sagma dot ma> */
/* april 2005 */
/* Licence GPL cf. http://www.gnu.org */
/**************************************/
#include <tiffio.h>
#include <stdlib.h>
#define isblack(a,b) ((a)<0||(b)<0||(a)==w||(b)==h)?0:(buf[(b)*w+(a)] & 0x00FFFFFF)?0:1
#define iswhite(a,b) ((a)<0||(b)<0||(a)==w||(b)==h)?1:(buf[(b)*w+(a)] & 0x00FFFFFF ^ 0x00FFFFFF)?0:1
#define min(a,b) ((a) < (b))? (a) : (b)
#define max(a,b) ((a) > (b))? (a) : (b)

void marthon(uint32 w, uint32 h,uint32 *buf)
{
int x,y,i,j,dx,dy,nv,more,white=0xffffffff;
int d[w*h];

do { 
	more=0;
	for(x=0; x<w; x++) 
		for(y=0; y<h; y++)
		{
			dx=dy=nv=0;
			for(i= -1; i<2; i++)
				for(j= -1; j<2; j++) {
					if(isblack(x+i,y+j) && (i || j )) {
						dx += i; 
						dy += j;	 
						nv++;
					}
				}
			dx = abs(dx);
			dy = abs(dy);
			d[w*y+x] = dx+dy;
			if(d[w*y+x]==3) {
				if (	nv==2 || nv==6 || 
					(nv==3 && max(dx,dy)==3) || 
					(nv==5 && max(dx,dy)==3)
				   )  d[w*y+x] = 5;
			}
		}	

	for(x=0; x<w; x++) 
		for(y=0; y<h; y++)
			if(d[w*y+x] > 3) {
				buf[w*y+x]=white;
				more=1;
			}
} while(more); 

	
} /* marthon */
/**************************************/
/* thining, squelettisation */
/* Tarik Fdil <tfdil at sagma dot ma> */
/* april 2005 */
/* Licence GPL cf. http://www.gnu.org */
/**************************************/
#include <tiffio.h>
#include <stdlib.h>
#define max(a,b) ((a)>(b))?(a):(b)
#define min(a,b) ((a)<(b))?(a):(b)
#define isblack(a,b) ((a)<0||(b)<0||(a)>=w||(b)>=h)?0:(buf[(b)*w+(a)] & 0x00FFFFFF)?0:1
#define iswhite(a,b) ((a)<0||(b)<0||(a)>=w||(b)>=h)?1:(buf[(b)*w+(a)] & 0x00FFFFFF ^ 0x00FFFFFF)?0:1
typedef char matrix[3][3];

static int is_this_pixel_required(uint32 w,uint32 h,uint32 x, uint32 y,uint32 *buf);

void thinning(uint32 w, uint32 h,uint32 *buf)
{
int n,more,x,y,c,white=0xffffffff;
enum direction {north,east,south,west,ne,se,nw,sw,none} dir,dirpix;


do {
      	more=0;
	for(dir=north; dir<=sw; dir++) {
      		for (x=0; x<w; x++)
		for (y=0; y<h; y++)
		{	if(iswhite(x,y)) continue; 
			dirpix = (x==0)? west: (x==w-1)? east : (y==0)? north : (y==h-1)? south:
				 (iswhite(x,y-1))?north:(iswhite(x+1,y))?east:(iswhite(x,y+1))?south: 
				 (iswhite(x-1,y))?west:(iswhite(x+1,y+1))?ne:(iswhite(x+1,y+1))?se: 
				 (iswhite(x-1,y+1))?sw:(iswhite(x-1,y-1))?nw:none;
			if(dir != dirpix) continue;  
	 		if (!is_this_pixel_required(w,h,x,y,buf)) { 
				buf[y*w+x]=white;
				more=1;
			}
		}
	}
} while(more);


} /* thinning */

void propagation(matrix mat, char color, char type_connexite)
{
int x,y,i,j,neighbour;

for(x=0; x<3; x++) 
	for(y=0; y<3; y++) {
		if(mat[x][y] == 'v') {
			for(i=0; i<3; i++) 
				for(j=0; j<3; j++) {
					switch(type_connexite) {
						case 8 : 
							neighbour = abs(i-x)<2 && abs(j-y)<2 ; 
							break;
						case 4 :
							neighbour = abs(i-x)==1 && j==y || i==x && abs(j-y)==1 ;
							break;
					}
					if(neighbour && mat[i][j]==color) mat[i][j]='v';
				}
		}
	}
}


int get_connex_number(matrix m, int color)
{ int x,y,connex=0;
matrix mat;

for(y=0; y<3; y++) 
	for(x=0 ; x<3; x++) mat[x][y] = m[x][y];

for(y=0; y<3; y++) 
	for(x=0 ; x<3; x++) {
		if (mat[x][y]==((color=='b')? 'w' : 'b') || mat[x][y]=='v') continue;
		connex++;
		mat[x][y]='v';
		propagation(mat,color,(color=='w')? 4 : 8);
	}

return connex;
}

static int is_this_pixel_required(uint32 w,uint32 h,uint32 x0, uint32 y0,uint32 *buf)
{
matrix matw,matb;
int x,y,ww,bw,wb,bb;

for(y= -1; y<2; y++) {
	for(x= -1; x<2; x++) {
		matb[x+1][y+1] = (isblack(x0+x,y0+y))?'b':'w';
		matw[x+1][y+1] = matb[x+1][y+1];
	}
}
matw[1][1]='w';

ww = get_connex_number(matw,'w');
bw = get_connex_number(matb,'w'); 
wb = get_connex_number(matw,'b');
bb = get_connex_number(matb,'b');

/* if( wb == 0 && (ww != bw || wb != bb) ) return 1; else return 0; */

if(bw == 1 && bb == 1) return 0; else return 1;

} /* is_this_pixel_required */
/**************************************/
/* thining by Tohmé algorithm */
/* Tarik Fdil <tfdil at sagma dot ma> */
/* april 2005 */
/* Licence GPL cf. http://www.gnu.org */
/**************************************/
#include <tiffio.h>
#define isblack(a,b) ((a)<0||(b)<0||(a)==w||(b)==h)?0:(buf[(b)*w+(a)] & 0x00FFFFFF)?0:1
#define iswhite(a,b) ((a)<0||(b)<0||(a)==w||(b)==h)?1:(buf[(b)*w+(a)] & 0x00FFFFFF ^ 0x00FFFFFF)?0:1
#define min(a,b) ((a) < (b))? (a) : (b)
#define max(a,b) ((a) > (b))? (a) : (b)

void tohme(uint32 w, uint32 h,uint32 *buf)
{
int n,more,x,y,c,white=0xffffffff;
int pa,pb,pc,pd,pe,pf,pg,ph,pi;

do {
      	more=0;
    	for (x=0; x<w; x++)
	for (y=0; y<h; y++)
	{	if(iswhite(x,y)) continue; 
		pb = isblack(x,y+1);
		ph = isblack(x,y-1);
		pa = isblack(x-1,y+1);
		pd = isblack(x-1,y);
		pg = isblack(x-1,y-1);
		pc = isblack(x+1,y+1);
		pf = isblack(x+1,y);
		pi = isblack(x+1,y-1);
 		if ( 	
 		     	pb && pd && ph && !pf  ||
			!pb && pd && ph && pf  ||
 		     	pb && !pd && ph && pf  ||
 		     	pb && pd && !ph && pf  ||
			!pa && !pb && !pc && !pd && !pf && ph && pi ||
			!pa && !pb && !pc && !pd && !pf && ph && pg ||
			pa && pb && !pd && !pf && !pg && !ph && !pi ||
			pc && pb && !pd && !pf && !pg && !ph && !pi ||
			!pa && !pb && !pd && pf && !pg && !ph && pi ||
			!pa && !pb && !pd && pf && !pg && !ph && pc ||
			!pb && !pc && pd && !pf && pg && !ph && !pi ||
			!pb && !pc && pd && !pf && pa && !ph && !pi ||
			!pb && !pc && pd && !pf && ph ||
			!pa && !pb && !pd && pf && ph ||
			pb && !pd && pf && !pg && !ph ||
			pb && pd && !pf && !ph && !pi
		) { 
			buf[y*w+x]=white;
			more=1;
		}
	}
} while(more);


} /* tohme */