Leptonica  1.82.0
Image processing and image analysis suite
flipdetect.c
Go to the documentation of this file.
1 /*====================================================================*
2  - Copyright (C) 2001 Leptonica. All rights reserved.
3  -
4  - Redistribution and use in source and binary forms, with or without
5  - modification, are permitted provided that the following conditions
6  - are met:
7  - 1. Redistributions of source code must retain the above copyright
8  - notice, this list of conditions and the following disclaimer.
9  - 2. Redistributions in binary form must reproduce the above
10  - copyright notice, this list of conditions and the following
11  - disclaimer in the documentation and/or other materials
12  - provided with the distribution.
13  -
14  - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15  - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16  - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17  - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18  - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21  - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22  - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23  - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *====================================================================*/
26 
201 #ifdef HAVE_CONFIG_H
202 #include <config_auto.h>
203 #endif /* HAVE_CONFIG_H */
204 
205 #include <math.h>
206 #include "allheaders.h"
207 
208  /* Sels for pixOrientDetect() and pixMirrorDetect() */
209 static const char *textsel1 = "x oo "
210  "x oOo "
211  "x o "
212  "x "
213  "xxxxxx";
214 
215 static const char *textsel2 = " oo x"
216  " oOo x"
217  " o x"
218  " x"
219  "xxxxxx";
220 
221 static const char *textsel3 = "xxxxxx"
222  "x "
223  "x o "
224  "x oOo "
225  "x oo ";
226 
227 static const char *textsel4 = "xxxxxx"
228  " x"
229  " o x"
230  " oOo x"
231  " oo x";
232 
233  /* Parameters for determining orientation */
234 static const l_int32 DefaultMinUpDownCount = 70;
235 static const l_float32 DefaultMinUpDownConf = 8.0;
236 static const l_float32 DefaultMinUpDownRatio = 2.5;
237 
238  /* Parameters for determining mirror flip */
239 static const l_int32 DefaultMinMirrorFlipCount = 100;
240 static const l_float32 DefaultMinMirrorFlipConf = 5.0;
241 
242  /* Static debug function */
243 static void pixDebugFlipDetect(const char *filename, PIX *pixs,
244  PIX *pixhm, l_int32 enable);
245 
246 
247 /*----------------------------------------------------------------*
248  * High-level interface for detection and correction *
249  *----------------------------------------------------------------*/
273 PIX *
275  l_float32 minupconf,
276  l_float32 minratio,
277  l_float32 *pupconf,
278  l_float32 *pleftconf,
279  l_int32 *protation,
280  l_int32 debug)
281 {
282 l_int32 orient;
283 l_float32 upconf, leftconf;
284 PIX *pix1;
285 
286  PROCNAME("pixOrientCorrect");
287 
288  if (!pixs || pixGetDepth(pixs) != 1)
289  return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL);
290 
291  /* Get confidences for orientation */
292  pixUpDownDetect(pixs, &upconf, 0, 0, debug);
293  pix1 = pixRotate90(pixs, 1);
294  pixUpDownDetect(pix1, &leftconf, 0, 0, debug);
295  pixDestroy(&pix1);
296  if (pupconf) *pupconf = upconf;
297  if (pleftconf) *pleftconf = leftconf;
298 
299  /* Decide what to do */
300  makeOrientDecision(upconf,leftconf, minupconf, minratio, &orient, debug);
301 
302  /* Do it */
303  switch (orient)
304  {
306  L_INFO("text orientation not determined; no rotation\n", procName);
307  if (protation) *protation = 0;
308  return pixCopy(NULL, pixs);
309  break;
310  case L_TEXT_ORIENT_UP:
311  L_INFO("text is oriented up; no rotation\n", procName);
312  if (protation) *protation = 0;
313  return pixCopy(NULL, pixs);
314  break;
315  case L_TEXT_ORIENT_LEFT:
316  L_INFO("landscape; text oriented left; 90 cw rotation\n", procName);
317  if (protation) *protation = 90;
318  return pixRotateOrth(pixs, 1);
319  break;
320  case L_TEXT_ORIENT_DOWN:
321  L_INFO("text oriented down; 180 cw rotation\n", procName);
322  if (protation) *protation = 180;
323  return pixRotateOrth(pixs, 2);
324  break;
325  case L_TEXT_ORIENT_RIGHT:
326  L_INFO("landscape; text oriented right; 270 cw rotation\n", procName);
327  if (protation) *protation = 270;
328  return pixRotateOrth(pixs, 3);
329  break;
330  default:
331  L_ERROR("invalid orient flag!\n", procName);
332  return pixCopy(NULL, pixs);
333  }
334 }
335 
336 
337 /*----------------------------------------------------------------*
338  * Orientation detection (four 90 degree angles) *
339  *----------------------------------------------------------------*/
404 l_ok
406  l_float32 *pupconf,
407  l_float32 *pleftconf,
408  l_int32 mincount,
409  l_int32 debug)
410 {
411 PIX *pix1;
412 
413  PROCNAME("pixOrientDetect");
414 
415  if (!pixs || pixGetDepth(pixs) != 1)
416  return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
417  if (!pupconf && !pleftconf)
418  return ERROR_INT("nothing to do", procName, 1);
419  if (mincount == 0)
420  mincount = DefaultMinUpDownCount;
421 
422  if (pupconf)
423  pixUpDownDetect(pixs, pupconf, mincount, 0, debug);
424  if (pleftconf) {
425  pix1 = pixRotate90(pixs, 1);
426  pixUpDownDetect(pix1, pleftconf, mincount, 0, debug);
427  pixDestroy(&pix1);
428  }
429 
430  return 0;
431 }
432 
433 
463 l_ok
464 makeOrientDecision(l_float32 upconf,
465  l_float32 leftconf,
466  l_float32 minupconf,
467  l_float32 minratio,
468  l_int32 *porient,
469  l_int32 debug)
470 {
471 l_float32 absupconf, absleftconf;
472 
473  PROCNAME("makeOrientDecision");
474 
475  if (!porient)
476  return ERROR_INT("&orient not defined", procName, 1);
477  *porient = L_TEXT_ORIENT_UNKNOWN; /* default: no decision */
478  if (upconf == 0.0 || leftconf == 0.0) {
479  L_INFO("not enough confidence to get orientation\n", procName);
480  return 0;
481  }
482 
483  if (minupconf == 0.0)
484  minupconf = DefaultMinUpDownConf;
485  if (minratio == 0.0)
486  minratio = DefaultMinUpDownRatio;
487  absupconf = L_ABS(upconf);
488  absleftconf = L_ABS(leftconf);
489 
490  /* Here are the four possible orientation decisions, based
491  * on satisfaction of two threshold constraints. */
492  if (upconf > minupconf && absupconf > minratio * absleftconf)
493  *porient = L_TEXT_ORIENT_UP;
494  else if (leftconf > minupconf && absleftconf > minratio * absupconf)
495  *porient = L_TEXT_ORIENT_LEFT;
496  else if (upconf < -minupconf && absupconf > minratio * absleftconf)
497  *porient = L_TEXT_ORIENT_DOWN;
498  else if (leftconf < -minupconf && absleftconf > minratio * absupconf)
499  *porient = L_TEXT_ORIENT_RIGHT;
500 
501  if (debug) {
502  lept_stderr("upconf = %7.3f, leftconf = %7.3f\n", upconf, leftconf);
503  if (*porient == L_TEXT_ORIENT_UNKNOWN)
504  lept_stderr("Confidence is low; no determination is made\n");
505  else if (*porient == L_TEXT_ORIENT_UP)
506  lept_stderr("Text is rightside-up\n");
507  else if (*porient == L_TEXT_ORIENT_LEFT)
508  lept_stderr("Text is rotated 90 deg ccw\n");
509  else if (*porient == L_TEXT_ORIENT_DOWN)
510  lept_stderr("Text is upside-down\n");
511  else /* *porient == L_TEXT_ORIENT_RIGHT */
512  lept_stderr("Text is rotated 90 deg cw\n");
513  }
514 
515  return 0;
516 }
517 
518 
557 l_ok
559  l_float32 *pconf,
560  l_int32 mincount,
561  l_int32 npixels,
562  l_int32 debug)
563 {
564 l_int32 countup, countdown, nmax;
565 l_float32 nup, ndown;
566 PIX *pix0, *pix1, *pix2, *pix3, *pixm;
567 SEL *sel1, *sel2, *sel3, *sel4;
568 
569  PROCNAME("pixUpDownDetect");
570 
571  if (!pconf)
572  return ERROR_INT("&conf not defined", procName, 1);
573  *pconf = 0.0;
574  if (!pixs || pixGetDepth(pixs) != 1)
575  return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
576  if (mincount == 0)
577  mincount = DefaultMinUpDownCount;
578  if (npixels < 0)
579  npixels = 0;
580 
581  if (debug) {
582  lept_mkdir("lept/orient");
583  }
584 
585  sel1 = selCreateFromString(textsel1, 5, 6, NULL);
586  sel2 = selCreateFromString(textsel2, 5, 6, NULL);
587  sel3 = selCreateFromString(textsel3, 5, 6, NULL);
588  sel4 = selCreateFromString(textsel4, 5, 6, NULL);
589 
590  /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1).
591  * This closes holes in x-height characters and joins them at
592  * the x-height. There is more noise in the descender detection
593  * from this, but it works fairly well. */
594  pix0 = pixMorphCompSequence(pixs, "c1.8 + c30.1", 0);
595 
596  /* Optionally, make a mask of the word bounding boxes, shortening
597  * each of them by a fixed amount at each end. */
598  pixm = NULL;
599  if (npixels > 0) {
600  l_int32 i, nbox, x, y, w, h;
601  BOX *box;
602  BOXA *boxa;
603  pix1 = pixMorphSequence(pix0, "o10.1", 0);
604  boxa = pixConnComp(pix1, NULL, 8);
605  pixm = pixCreateTemplate(pix1);
606  pixDestroy(&pix1);
607  nbox = boxaGetCount(boxa);
608  for (i = 0; i < nbox; i++) {
609  box = boxaGetBox(boxa, i, L_CLONE);
610  boxGetGeometry(box, &x, &y, &w, &h);
611  if (w > 2 * npixels)
612  pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13,
613  PIX_SET, NULL, 0, 0);
614  boxDestroy(&box);
615  }
616  boxaDestroy(&boxa);
617  }
618 
619  /* Find the ascenders and optionally filter with pixm.
620  * For an explanation of the procedure used for counting the result
621  * of the HMT, see comments at the beginning of this function. */
622  pix1 = pixHMT(NULL, pix0, sel1);
623  pix2 = pixHMT(NULL, pix0, sel2);
624  pixOr(pix1, pix1, pix2);
625  if (pixm)
626  pixAnd(pix1, pix1, pixm);
627  pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
628  pixCountPixels(pix3, &countup, NULL);
629  pixDebugFlipDetect("/tmp/lept/orient/up.png", pixs, pix1, debug);
630  pixDestroy(&pix1);
631  pixDestroy(&pix2);
632  pixDestroy(&pix3);
633 
634  /* Find the ascenders and optionally filter with pixm. */
635  pix1 = pixHMT(NULL, pix0, sel3);
636  pix2 = pixHMT(NULL, pix0, sel4);
637  pixOr(pix1, pix1, pix2);
638  if (pixm)
639  pixAnd(pix1, pix1, pixm);
640  pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
641  pixCountPixels(pix3, &countdown, NULL);
642  pixDebugFlipDetect("/tmp/lept/orient/down.png", pixs, pix1, debug);
643  pixDestroy(&pix1);
644  pixDestroy(&pix2);
645  pixDestroy(&pix3);
646 
647  /* Evaluate statistically, generating a confidence that is
648  * related to the probability with a gaussian distribution. */
649  nup = (l_float32)(countup);
650  ndown = (l_float32)(countdown);
651  nmax = L_MAX(countup, countdown);
652  if (nmax > mincount)
653  *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown));
654 
655  if (debug) {
656  if (pixm) pixWriteDebug("/tmp/lept/orient/pixm1.png", pixm, IFF_PNG);
657  lept_stderr("nup = %7.3f, ndown = %7.3f, conf = %7.3f\n",
658  nup, ndown, *pconf);
659  if (*pconf > DefaultMinUpDownConf)
660  lept_stderr("Text is rightside-up\n");
661  if (*pconf < -DefaultMinUpDownConf)
662  lept_stderr("Text is upside-down\n");
663  }
664 
665  pixDestroy(&pix0);
666  pixDestroy(&pixm);
667  selDestroy(&sel1);
668  selDestroy(&sel2);
669  selDestroy(&sel3);
670  selDestroy(&sel4);
671  return 0;
672 }
673 
674 
675 /*----------------------------------------------------------------*
676  * Left-right mirror detection *
677  *----------------------------------------------------------------*/
717 l_ok
719  l_float32 *pconf,
720  l_int32 mincount,
721  l_int32 debug)
722 {
723 l_int32 count1, count2, nmax;
724 l_float32 nleft, nright;
725 PIX *pix0, *pix1, *pix2, *pix3;
726 SEL *sel1, *sel2;
727 
728  PROCNAME("pixMirrorDetect");
729 
730  if (!pconf)
731  return ERROR_INT("&conf not defined", procName, 1);
732  *pconf = 0.0;
733  if (!pixs || pixGetDepth(pixs) != 1)
734  return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
735  if (mincount == 0)
736  mincount = DefaultMinMirrorFlipCount;
737 
738  if (debug) {
739  lept_mkdir("lept/orient");
740  }
741 
742  sel1 = selCreateFromString(textsel1, 5, 6, NULL);
743  sel2 = selCreateFromString(textsel2, 5, 6, NULL);
744 
745  /* Fill x-height characters but not space between them, sort of. */
746  pix3 = pixMorphCompSequence(pixs, "d1.30", 0);
747  pixXor(pix3, pix3, pixs);
748  pix0 = pixMorphCompSequence(pixs, "c15.1", 0);
749  pixXor(pix0, pix0, pixs);
750  pixAnd(pix0, pix0, pix3);
751  pixOr(pix0, pix0, pixs);
752  pixDestroy(&pix3);
753 
754  /* Filter the right-facing characters. */
755  pix1 = pixHMT(NULL, pix0, sel1);
756  pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
757  pixCountPixels(pix3, &count1, NULL);
758  pixDebugFlipDetect("/tmp/lept/orient/right.png", pixs, pix1, debug);
759  pixDestroy(&pix1);
760  pixDestroy(&pix3);
761 
762  /* Filter the left-facing characters. */
763  pix2 = pixHMT(NULL, pix0, sel2);
764  pix3 = pixReduceRankBinaryCascade(pix2, 1, 1, 0, 0);
765  pixCountPixels(pix3, &count2, NULL);
766  pixDebugFlipDetect("/tmp/lept/orient/left.png", pixs, pix2, debug);
767  pixDestroy(&pix2);
768  pixDestroy(&pix3);
769 
770  nright = (l_float32)count1;
771  nleft = (l_float32)count2;
772  nmax = L_MAX(count1, count2);
773  pixDestroy(&pix0);
774  selDestroy(&sel1);
775  selDestroy(&sel2);
776 
777  if (nmax > mincount)
778  *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft));
779 
780  if (debug) {
781  lept_stderr("nright = %f, nleft = %f\n", nright, nleft);
782  if (*pconf > DefaultMinMirrorFlipConf)
783  lept_stderr("Text is not mirror reversed\n");
784  if (*pconf < -DefaultMinMirrorFlipConf)
785  lept_stderr("Text is mirror reversed\n");
786  }
787 
788  return 0;
789 }
790 
791 
792 /*----------------------------------------------------------------*
793  * Static debug helper *
794  *----------------------------------------------------------------*/
795 /*
796  * \brief pixDebugFlipDetect()
797  *
798  * \param[in] filename for output debug file
799  * \param[in] pixs input to pix*Detect
800  * \param[in] pixhm hit-miss result from ascenders or descenders
801  * \param[in] enable 1 to enable this function; 0 to disable
802  * \return void
803  */
804 static void
805 pixDebugFlipDetect(const char *filename,
806  PIX *pixs,
807  PIX *pixhm,
808  l_int32 enable)
809 {
810 PIX *pixt, *pixthm;
811 
812  if (!enable) return;
813 
814  /* Display with red dot at counted locations */
815  pixt = pixConvert1To4Cmap(pixs);
816  pixthm = pixMorphSequence(pixhm, "d5.5", 0);
817  pixSetMaskedCmap(pixt, pixthm, 0, 0, 255, 0, 0);
818 
819  pixWriteDebug(filename, pixt, IFF_PNG);
820  pixDestroy(&pixthm);
821  pixDestroy(&pixt);
822  return;
823 }
l_ok makeOrientDecision(l_float32 upconf, l_float32 leftconf, l_float32 minupconf, l_float32 minratio, l_int32 *porient, l_int32 debug)
makeOrientDecision()
Definition: flipdetect.c:464
l_int32 lept_mkdir(const char *subdir)
lept_mkdir()
Definition: utils2.c:2218
PIX * pixCreateTemplate(const PIX *pixs)
pixCreateTemplate()
Definition: pix1.c:383
Definition: pix.h:713
l_ok pixRasterop(PIX *pixd, l_int32 dx, l_int32 dy, l_int32 dw, l_int32 dh, l_int32 op, PIX *pixs, l_int32 sx, l_int32 sy)
pixRasterop()
Definition: rop.c:204
PIX * pixCopy(PIX *pixd, const PIX *pixs)
pixCopy()
Definition: pix1.c:705
void lept_stderr(const char *fmt,...)
lept_stderr()
Definition: utils1.c:306
void boxaDestroy(BOXA **pboxa)
boxaDestroy()
Definition: boxbasic.c:583
Definition: pix.h:491
BOXA * pixConnComp(PIX *pixs, PIXA **ppixa, l_int32 connectivity)
pixConnComp()
Definition: conncomp.c:151
PIX * pixXor(PIX *pixd, PIX *pixs1, PIX *pixs2)
pixXor()
Definition: pix3.c:1688
void selDestroy(SEL **psel)
selDestroy()
Definition: sel1.c:340
#define PIX_SET
Definition: pix.h:334
PIX * pixAnd(PIX *pixd, PIX *pixs1, PIX *pixs2)
pixAnd()
Definition: pix3.c:1624
PIX * pixMorphSequence(PIX *pixs, const char *sequence, l_int32 dispsep)
pixMorphSequence()
Definition: morphseq.c:137
l_ok pixCountPixels(PIX *pixs, l_int32 *pcount, l_int32 *tab8)
pixCountPixels()
Definition: pix3.c:1937
l_ok pixSetMaskedCmap(PIX *pixs, PIX *pixm, l_int32 x, l_int32 y, l_int32 rval, l_int32 gval, l_int32 bval)
pixSetMaskedCmap()
Definition: paintcmap.c:698
l_ok pixUpDownDetect(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 npixels, l_int32 debug)
pixUpDownDetect()
Definition: flipdetect.c:558
void pixDestroy(PIX **ppix)
pixDestroy()
Definition: pix1.c:621
PIX * pixRotateOrth(PIX *pixs, l_int32 quads)
pixRotateOrth()
Definition: rotateorth.c:75
BOX * boxaGetBox(BOXA *boxa, l_int32 index, l_int32 accessflag)
boxaGetBox()
Definition: boxbasic.c:779
PIX * pixOr(PIX *pixd, PIX *pixs1, PIX *pixs2)
pixOr()
Definition: pix3.c:1560
PIX * pixHMT(PIX *pixd, PIX *pixs, SEL *sel)
pixHMT()
Definition: morph.c:342
l_ok pixOrientDetect(PIX *pixs, l_float32 *pupconf, l_float32 *pleftconf, l_int32 mincount, l_int32 debug)
pixOrientDetect()
Definition: flipdetect.c:405
Definition: pix.h:138
PIX * pixConvert1To4Cmap(PIX *pixs)
pixConvert1To4Cmap()
Definition: pixconv.c:2237
void boxDestroy(BOX **pbox)
boxDestroy()
Definition: boxbasic.c:282
l_int32 boxaGetCount(BOXA *boxa)
boxaGetCount()
Definition: boxbasic.c:734
PIX * pixRotate90(PIX *pixs, l_int32 direction)
pixRotate90()
Definition: rotateorth.c:166
l_ok pixMirrorDetect(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 debug)
pixMirrorDetect()
Definition: flipdetect.c:718
l_ok boxGetGeometry(BOX *box, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph)
boxGetGeometry()
Definition: boxbasic.c:313
Definition: pix.h:480
PIX * pixReduceRankBinaryCascade(PIX *pixs, l_int32 level1, l_int32 level2, l_int32 level3, l_int32 level4)
pixReduceRankBinaryCascade()
Definition: binreduce.c:152
PIX * pixOrientCorrect(PIX *pixs, l_float32 minupconf, l_float32 minratio, l_float32 *pupconf, l_float32 *pleftconf, l_int32 *protation, l_int32 debug)
pixOrientCorrect()
Definition: flipdetect.c:274
SEL * selCreateFromString(const char *text, l_int32 h, l_int32 w, const char *name)
selCreateFromString()
Definition: sel1.c:1607
PIX * pixMorphCompSequence(PIX *pixs, const char *sequence, l_int32 dispsep)
pixMorphCompSequence()
Definition: morphseq.c:304