[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
SIRAGI, I need help !
- To: developer at arabeyes dot org
- Subject: SIRAGI, I need help !
- From: Tarik FDIL <tarik at linux dot sagma dot ma>
- Date: Tue, 17 May 2005 15:44:41 +0000
Hi,
I would like to talk you about a little technical problem which blocked me for
several days...
You know that OCR processing proceed like this :
1- read image file
2- detect lines
3- detect characters in a line
4- compare characters detected with a database of stored characters and decide
which characters are related.
5- write text resulting from OCR and make some extra processing (spell
checking, etc.)
My technical concern is between 3 and 4 stage. After detecting character's
pixels and before sending them to the comparison system, you should make some
adjustment to charcter's image :
- we should clean it (remove isolated pixels, etc.)
- we should squlettisize the character, ie. transforming it to become thin : a
shape made with only one-pixel drawings. This enhance the effectiveness of
comparison.
- we should resize (resample) the charcter to make it fitting in a one
standard fixed matrix of pixels.
I have problem to write a good program for squlettisation of characters
(thinning). I tried many methods but I miss documents describing in details
thinning algorithms. You can find joind to this mail some programs that I
have written. But I'm not satisfied by the output of these programs. Either
they didn't respect the overall shape of character, or they eliminate too
many pixels, etc.
So, if some one have documents, librairies or programs which can help us in
"shape thinning or squelettisation", it will be very welcome.
Best regards
Tarik
/**************************************/
/* thining by Marthon algorithm */
/* Tarik Fdil <tfdil at sagma dot ma> */
/* april 2005 */
/* Licence GPL cf. http://www.gnu.org */
/**************************************/
#include <tiffio.h>
#include <stdlib.h>
#define isblack(a,b) ((a)<0||(b)<0||(a)==w||(b)==h)?0:(buf[(b)*w+(a)] & 0x00FFFFFF)?0:1
#define iswhite(a,b) ((a)<0||(b)<0||(a)==w||(b)==h)?1:(buf[(b)*w+(a)] & 0x00FFFFFF ^ 0x00FFFFFF)?0:1
#define min(a,b) ((a) < (b))? (a) : (b)
#define max(a,b) ((a) > (b))? (a) : (b)
void marthon(uint32 w, uint32 h,uint32 *buf)
{
int x,y,i,j,dx,dy,nv,more,white=0xffffffff;
int d[w*h];
do {
more=0;
for(x=0; x<w; x++)
for(y=0; y<h; y++)
{
dx=dy=nv=0;
for(i= -1; i<2; i++)
for(j= -1; j<2; j++) {
if(isblack(x+i,y+j) && (i || j )) {
dx += i;
dy += j;
nv++;
}
}
dx = abs(dx);
dy = abs(dy);
d[w*y+x] = dx+dy;
if(d[w*y+x]==3) {
if ( nv==2 || nv==6 ||
(nv==3 && max(dx,dy)==3) ||
(nv==5 && max(dx,dy)==3)
) d[w*y+x] = 5;
}
}
for(x=0; x<w; x++)
for(y=0; y<h; y++)
if(d[w*y+x] > 3) {
buf[w*y+x]=white;
more=1;
}
} while(more);
} /* marthon */
/**************************************/
/* thining, squelettisation */
/* Tarik Fdil <tfdil at sagma dot ma> */
/* april 2005 */
/* Licence GPL cf. http://www.gnu.org */
/**************************************/
#include <tiffio.h>
#include <stdlib.h>
#define max(a,b) ((a)>(b))?(a):(b)
#define min(a,b) ((a)<(b))?(a):(b)
#define isblack(a,b) ((a)<0||(b)<0||(a)>=w||(b)>=h)?0:(buf[(b)*w+(a)] & 0x00FFFFFF)?0:1
#define iswhite(a,b) ((a)<0||(b)<0||(a)>=w||(b)>=h)?1:(buf[(b)*w+(a)] & 0x00FFFFFF ^ 0x00FFFFFF)?0:1
typedef char matrix[3][3];
static int is_this_pixel_required(uint32 w,uint32 h,uint32 x, uint32 y,uint32 *buf);
void thinning(uint32 w, uint32 h,uint32 *buf)
{
int n,more,x,y,c,white=0xffffffff;
enum direction {north,east,south,west,ne,se,nw,sw,none} dir,dirpix;
do {
more=0;
for(dir=north; dir<=sw; dir++) {
for (x=0; x<w; x++)
for (y=0; y<h; y++)
{ if(iswhite(x,y)) continue;
dirpix = (x==0)? west: (x==w-1)? east : (y==0)? north : (y==h-1)? south:
(iswhite(x,y-1))?north:(iswhite(x+1,y))?east:(iswhite(x,y+1))?south:
(iswhite(x-1,y))?west:(iswhite(x+1,y+1))?ne:(iswhite(x+1,y+1))?se:
(iswhite(x-1,y+1))?sw:(iswhite(x-1,y-1))?nw:none;
if(dir != dirpix) continue;
if (!is_this_pixel_required(w,h,x,y,buf)) {
buf[y*w+x]=white;
more=1;
}
}
}
} while(more);
} /* thinning */
void propagation(matrix mat, char color, char type_connexite)
{
int x,y,i,j,neighbour;
for(x=0; x<3; x++)
for(y=0; y<3; y++) {
if(mat[x][y] == 'v') {
for(i=0; i<3; i++)
for(j=0; j<3; j++) {
switch(type_connexite) {
case 8 :
neighbour = abs(i-x)<2 && abs(j-y)<2 ;
break;
case 4 :
neighbour = abs(i-x)==1 && j==y || i==x && abs(j-y)==1 ;
break;
}
if(neighbour && mat[i][j]==color) mat[i][j]='v';
}
}
}
}
int get_connex_number(matrix m, int color)
{ int x,y,connex=0;
matrix mat;
for(y=0; y<3; y++)
for(x=0 ; x<3; x++) mat[x][y] = m[x][y];
for(y=0; y<3; y++)
for(x=0 ; x<3; x++) {
if (mat[x][y]==((color=='b')? 'w' : 'b') || mat[x][y]=='v') continue;
connex++;
mat[x][y]='v';
propagation(mat,color,(color=='w')? 4 : 8);
}
return connex;
}
static int is_this_pixel_required(uint32 w,uint32 h,uint32 x0, uint32 y0,uint32 *buf)
{
matrix matw,matb;
int x,y,ww,bw,wb,bb;
for(y= -1; y<2; y++) {
for(x= -1; x<2; x++) {
matb[x+1][y+1] = (isblack(x0+x,y0+y))?'b':'w';
matw[x+1][y+1] = matb[x+1][y+1];
}
}
matw[1][1]='w';
ww = get_connex_number(matw,'w');
bw = get_connex_number(matb,'w');
wb = get_connex_number(matw,'b');
bb = get_connex_number(matb,'b');
/* if( wb == 0 && (ww != bw || wb != bb) ) return 1; else return 0; */
if(bw == 1 && bb == 1) return 0; else return 1;
} /* is_this_pixel_required */
/**************************************/
/* thining by Tohmé algorithm */
/* Tarik Fdil <tfdil at sagma dot ma> */
/* april 2005 */
/* Licence GPL cf. http://www.gnu.org */
/**************************************/
#include <tiffio.h>
#define isblack(a,b) ((a)<0||(b)<0||(a)==w||(b)==h)?0:(buf[(b)*w+(a)] & 0x00FFFFFF)?0:1
#define iswhite(a,b) ((a)<0||(b)<0||(a)==w||(b)==h)?1:(buf[(b)*w+(a)] & 0x00FFFFFF ^ 0x00FFFFFF)?0:1
#define min(a,b) ((a) < (b))? (a) : (b)
#define max(a,b) ((a) > (b))? (a) : (b)
void tohme(uint32 w, uint32 h,uint32 *buf)
{
int n,more,x,y,c,white=0xffffffff;
int pa,pb,pc,pd,pe,pf,pg,ph,pi;
do {
more=0;
for (x=0; x<w; x++)
for (y=0; y<h; y++)
{ if(iswhite(x,y)) continue;
pb = isblack(x,y+1);
ph = isblack(x,y-1);
pa = isblack(x-1,y+1);
pd = isblack(x-1,y);
pg = isblack(x-1,y-1);
pc = isblack(x+1,y+1);
pf = isblack(x+1,y);
pi = isblack(x+1,y-1);
if (
pb && pd && ph && !pf ||
!pb && pd && ph && pf ||
pb && !pd && ph && pf ||
pb && pd && !ph && pf ||
!pa && !pb && !pc && !pd && !pf && ph && pi ||
!pa && !pb && !pc && !pd && !pf && ph && pg ||
pa && pb && !pd && !pf && !pg && !ph && !pi ||
pc && pb && !pd && !pf && !pg && !ph && !pi ||
!pa && !pb && !pd && pf && !pg && !ph && pi ||
!pa && !pb && !pd && pf && !pg && !ph && pc ||
!pb && !pc && pd && !pf && pg && !ph && !pi ||
!pb && !pc && pd && !pf && pa && !ph && !pi ||
!pb && !pc && pd && !pf && ph ||
!pa && !pb && !pd && pf && ph ||
pb && !pd && pf && !pg && !ph ||
pb && pd && !pf && !ph && !pi
) {
buf[y*w+x]=white;
more=1;
}
}
} while(more);
} /* tohme */