![]() |
Leptonica
1.82.0
Image processing and image analysis suite
|
#include "allheaders.h"
#include "math.h"
Go to the source code of this file.
Functions | |
l_ok | pixGetRegionsBinary (PIX *pixs, PIX **ppixhm, PIX **ppixtm, PIX **ppixtb, PIXA *pixadb) |
PIX * | pixGenHalftoneMask (PIX *pixs, PIX **ppixtext, l_int32 *phtfound, l_int32 debug) |
PIX * | pixGenerateHalftoneMask (PIX *pixs, PIX **ppixtext, l_int32 *phtfound, PIXA *pixadb) |
PIX * | pixGenTextlineMask (PIX *pixs, PIX **ppixvws, l_int32 *ptlfound, PIXA *pixadb) |
PIX * | pixGenTextblockMask (PIX *pixs, PIX *pixvws, PIXA *pixadb) |
BOX * | pixFindPageForeground (PIX *pixs, l_int32 threshold, l_int32 mindist, l_int32 erasedist, l_int32 showmorph, PIXAC *pixac) |
l_ok | pixSplitIntoCharacters (PIX *pixs, l_int32 minw, l_int32 minh, BOXA **pboxa, PIXA **ppixa, PIX **ppixdebug) |
BOXA * | pixSplitComponentWithProfile (PIX *pixs, l_int32 delta, l_int32 mindel, PIX **ppixdebug) |
PIXA * | pixExtractTextlines (PIX *pixs, l_int32 maxw, l_int32 maxh, l_int32 minw, l_int32 minh, l_int32 adjw, l_int32 adjh, PIXA *pixadb) |
PIXA * | pixExtractRawTextlines (PIX *pixs, l_int32 maxw, l_int32 maxh, l_int32 adjw, l_int32 adjh, PIXA *pixadb) |
l_ok | pixCountTextColumns (PIX *pixs, l_float32 deltafract, l_float32 peakfract, l_float32 clipfract, l_int32 *pncols, PIXA *pixadb) |
l_ok | pixDecideIfText (PIX *pixs, BOX *box, l_int32 *pistext, PIXA *pixadb) |
l_ok | pixFindThreshFgExtent (PIX *pixs, l_int32 thresh, l_int32 *ptop, l_int32 *pbot) |
l_ok | pixDecideIfTable (PIX *pixs, BOX *box, l_int32 orient, l_int32 *pscore, PIXA *pixadb) |
PIX * | pixPrepare1bpp (PIX *pixs, BOX *box, l_float32 cropfract, l_int32 outres) |
l_ok | pixEstimateBackground (PIX *pixs, l_int32 darkthresh, l_float32 edgecrop, l_int32 *pbg) |
l_ok | pixFindLargeRectangles (PIX *pixs, l_int32 polarity, l_int32 nrect, BOXA **pboxa, PIX **ppixdb) |
l_ok | pixFindLargestRectangle (PIX *pixs, l_int32 polarity, BOX **pbox, PIX **ppixdb) |
PIX * | pixAutoPhotoinvert (PIX *pixs, l_int32 thresh, PIX **ppixm, PIXA *pixadb) |
Variables | |
static const l_int32 | MinWidth = 100 |
static const l_int32 | MinHeight = 100 |
Top level page segmentation l_int32 pixGetRegionsBinary()
Halftone region extraction PIX *pixGenHalftoneMask() **Deprecated wrapper** PIX *pixGenerateHalftoneMask()
Textline extraction PIX *pixGenTextlineMask()
Textblock extraction PIX *pixGenTextblockMask()
Location of page foreground PIX *pixFindPageForeground()
Extraction of characters from image with only text l_int32 pixSplitIntoCharacters() BOXA *pixSplitComponentWithProfile()
Extraction of lines of text PIXA *pixExtractTextlines() PIXA *pixExtractRawTextlines()
How many text columns l_int32 pixCountTextColumns()
Decision: text vs photo l_int32 pixDecideIfText() l_int32 pixFindThreshFgExtent()
Decision: table vs text l_int32 pixDecideIfTable() Pix *pixPrepare1bpp()
Estimate the grayscale background value l_int32 pixEstimateBackground()
Largest white or black rectangles in an image l_int32 pixFindLargeRectangles() l_int32 pixFindLargestRectangle()
Generate rectangle inside connected component BOX *pixFindRectangleInCC()
Automatic photoinvert for OCR PIX *pixAutoPhotoinvert()
Definition in file pageseg.c.
pixFindRectangleInCC()
[in] | pixs | 1 bpp, with sufficient closings to make the fg be a single c.c. that is a convex hull |
[in] | boxs | [optional] if NULL, pixs should be a minimum container of a single c.c. |
[in] | fract | first and all consecutive lines found must be at least this fraction of the fast scan dimension |
[in] | dir | L_SCAN_HORIZONTAL, L_SCAN_VERTICAL; direction of fast scan |
[in] | select | L_GEOMETRIC_UNION, L_GEOMETRIC_INTERSECTION, L_LARGEST_AREA, L_SMALEST_AREA |
[in] | debug | if 1, generates output pdf showing intermediate computation and final result |
Notes: (1) Computation is similar to pixFindLargestRectangle(), but allows a different set of results to choose from. (2) Select the fast scan direction. Then, scanning in the slow direction, find the longest run of ON pixels in the fast scan direction and look for the first run that is longer than fract of the dimension. Continue until a shorter run is found. This generates a box of ON pixels fitting into the c.c. (3) Do this from both slow scan directions and use select to get a resulting box from these two. (4) The extracted rectangle is not necessarily the largest that can fit in the c.c. To get that, use pixFindLargestRectangle(). */ BOX * pixFindRectangleInCC(PIX *pixs, BOX *boxs, l_float32 fract, l_int32 dir, l_int32 select, l_int32 debug) { l_int32 x, y, i, w, h, w1, h1, w2, h2, found, res; l_int32 xfirst, xlast, xstart, yfirst, ylast, length; BOX *box1, *box2, *box3, *box4, *box5; PIX *pix1, *pix2, *pixdb1, *pixdb2; PIXA *pixadb;
PROCNAME("pixFindRectangleInCC");
if (!pixs || pixGetDepth(pixs) != 1) return (BOX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); if (fract <= 0.0 || fract > 1.0) return (BOX *)ERROR_PTR("invalid fraction", procName, NULL); if (dir != L_SCAN_VERTICAL && dir != L_SCAN_HORIZONTAL) return (BOX *)ERROR_PTR("invalid scan direction", procName, NULL); if (select != L_GEOMETRIC_UNION && select != L_GEOMETRIC_INTERSECTION && select != L_LARGEST_AREA && select != L_SMALLEST_AREA) return (BOX *)ERROR_PTR("invalid select", procName, NULL);
/* Extract the c.c. if necessary */ x = y = 0; if (boxs) { pix1 = pixClipRectangle(pixs, boxs, NULL); boxGetGeometry(boxs, &x, &y, NULL, NULL); } else { pix1 = pixClone(pixs); }
/* All fast scans are horizontal; rotate 90 deg cw if necessary */ if (dir == L_SCAN_VERTICAL) pix2 = pixRotate90(pix1, 1); else /* L_SCAN_HORIZONTAL */ pix2 = pixClone(pix1); pixGetDimensions(pix2, &w, &h, NULL);
pixadb = (debug) ? pixaCreate(0) : NULL; pixdb1 = NULL; if (pixadb) { lept_mkdir("lept/rect"); pixaAddPix(pixadb, pix1, L_CLONE); pixdb1 = pixConvertTo32(pix2); } pixDestroy(&pix1);
/* Scanning down, find the first scanline with a long enough run. That run goes from (xfirst, yfirst) to (xlast, yfirst). */ found = FALSE; for (i = 0; i < h; i++) { pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length); if (length >= (l_int32)(fract * w + 0.5)) { yfirst = i; xfirst = xstart; xlast = xfirst + length - 1; found = TRUE; break; } } if (!found) { L_WARNING("no run of sufficient size was found\n", procName); pixDestroy(&pix2); pixDestroy(&pixdb1); pixaDestroy(&pixadb); return NULL; }
/* Continue down until the condition fails */ w1 = xlast - xfirst + 1; h1 = h - yfirst; /* init */ ylast = h - 1; /* init */ for (i = yfirst + 1; i < h; i++) { pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length); if (xstart > xfirst || (xstart + length - 1 < xlast) || i == h - 1) { ylast = i - 1; h1 = ylast - yfirst + 1; break; } } box1 = boxCreate(xfirst, yfirst, w1, h1);
/* Scanning up, find the first scanline with a long enough run. That run goes from (xfirst, ylast) to (xlast, ylast). */ for (i = h - 1; i >= 0; i--) { pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length); if (length >= (l_int32)(fract * w + 0.5)) { ylast = i; xfirst = xstart; xlast = xfirst + length - 1; break; } }
/* Continue up until the condition fails */ w2 = xlast - xfirst + 1; h2 = ylast + 1; /* initialize */ for (i = ylast - 1; i >= 0; i--) { pixFindMaxHorizontalRunOnLine(pix2, i, &xstart, &length); if (xstart > xfirst || (xstart + length - 1 < xlast) || i == 0) { yfirst = i + 1; h2 = ylast - yfirst + 1; break; } } box2 = boxCreate(xfirst, yfirst, w2, h2); pixDestroy(&pix2);
if (pixadb) { pixRenderBoxArb(pixdb1, box1, 2, 255, 0, 0); pixRenderBoxArb(pixdb1, box2, 2, 0, 255, 0); pixaAddPix(pixadb, pixdb1, L_INSERT); }
/* Select the final result from the two boxes */ if (select == L_GEOMETRIC_UNION) box3 = boxBoundingRegion(box1, box2); else if (select == L_GEOMETRIC_INTERSECTION) box3 = boxOverlapRegion(box1, box2); else if (select == L_LARGEST_AREA) box3 = (w1 * h1 >= w2 * h2) ? boxCopy(box1) : boxCopy(box2); else /* select == L_SMALLEST_AREA) */ box3 = (w1 * h1 <= w2 * h2) ? boxCopy(box1) : boxCopy(box2); boxDestroy(&box1); boxDestroy(&box2);
/* Rotate the box 90 degrees ccw if necessary */ box4 = NULL; if (box3) { if (dir == L_SCAN_VERTICAL) box4 = boxRotateOrth(box3, w, h, 3); else box4 = boxCopy(box3); }
/* Transform back to global coordinates if boxs exists */ box5 = (box4) ? boxTransform(box4, x, y, 1.0, 1.0) : NULL; boxDestroy(&box3); boxDestroy(&box4);
/* Debug output */ if (pixadb) { pixdb1 = pixConvertTo8(pixs, 0); pixAddConstantGray(pixdb1, 190); pixdb2 = pixConvertTo32(pixdb1); if (box5) pixRenderBoxArb(pixdb2, box5, 4, 0, 0, 255); pixaAddPix(pixadb, pixdb2, L_INSERT); res = pixGetXRes(pixs); L_INFO("Writing debug files to /tmp/lept/rect/\n", procName); pixaConvertToPdf(pixadb, res, 1.0, L_DEFAULT_ENCODE, 75, NULL, "/tmp/lept/rect/fitrect.pdf"); pix1 = pixaDisplayTiledAndScaled(pixadb, 32, 800, 1, 0, 40, 2); pixWrite("/tmp/lept/rect/fitrect.png", pix1, IFF_PNG); pixDestroy(&pix1); pixDestroy(&pixdb1); pixaDestroy(&pixadb); }
return box5; }
/*------------------------------------------------------------------* Automatic photoinvert for OCR * *------------------------------------------------------------------*/ /*! pixAutoPhotoinvert()
[in] | pixs | any depth, colormap ok |
[in] | thresh | binarization threshold; use 0 for default |
[out] | ppixm | [optional] image regions to be inverted |
[out] | pixadb | [optional] debug; input NULL to skip |
Notes: (1) A 1 bpp image is returned, where pixels in image regions are photo-inverted. (2) If there is light text with a dark background, this will identify the region and photoinvert the pixels there if there are at least 60% fg pixels in the region. (3) For debug output, input a (typically empty) pixadb.
Definition at line 2393 of file pageseg.c.
References boxaDestroy(), boxaGetBox(), boxaGetCount(), boxDestroy(), boxGetGeometry(), L_CLONE, L_COPY, lept_stderr(), PIX_CLR, pixaAddPix(), pixClipRectangle(), pixCombineMasked(), pixConnCompBB(), pixConvertTo1(), pixDestroy(), pixFillHolesToBoundingRect(), pixForegroundFraction(), pixGenerateHalftoneMask(), pixInvert(), pixMorphSequence(), pixRasterop(), and pixZero().
l_ok pixCountTextColumns | ( | PIX * | pixs, |
l_float32 | deltafract, | ||
l_float32 | peakfract, | ||
l_float32 | clipfract, | ||
l_int32 * | pncols, | ||
PIXA * | pixadb | ||
) |
[in] | pixs | 1 bpp |
[in] | deltafract | fraction of (max - min) to be used in the delta for extrema finding; typ 0.3 |
[in] | peakfract | fraction of (max - min) to be used to threshold the peak value; typ. 0.5 |
[in] | clipfract | fraction of image dimension removed on each side; typ. 0.1, which leaves w and h reduced by 0.8 |
[out] | pncols | number of columns; -1 if not determined |
[in] | pixadb | [optional] pre-allocated, for showing intermediate computation; use null to skip |
Notes: (1) It is assumed that pixs has the correct resolution set. If the resolution is 0, we set to 300 and issue a warning. (2) If necessary, the image is scaled to between 37 and 75 ppi; most of the processing is done at this resolution. (3) If no text is found (essentially a blank page), this returns ncols = 0. (4) For debug output, input a pre-allocated pixa.
Definition at line 1226 of file pageseg.c.
Referenced by dewarpaApplyInit().
[in] | pixs | any depth, any resolution >= 75 ppi |
[in] | box | [optional] if null, use entire pixs |
[in] | orient | L_PORTRAIT_MODE, L_LANDSCAPE_MODE |
[out] | pscore | 0 - 4; -1 if not determined |
[in] | pixadb | [optional] pre-allocated, for showing intermediate computation; use NULL to skip |
Notes: (1) It is assumed that pixs has the correct resolution set. If the resolution is 0, we assume it is 300 ppi and issue a warning. (2) If orient == L_LANDSCAPE_MODE, the image is rotated 90 degrees clockwise before being analyzed. (3) The interpretation of the returned score: -1 undetermined 0 no table 1 unlikely to have a table 2 likely to have a table 3 even more likely to have a table 4 extremely likely to have a table * Setting the condition for finding a table at score >= 2 works well, except for false positives on kanji and landscape text. * These false positives can be removed by setting the condition at score >= 3, but recall is lowered because it will not find tables without either horizontal or vertical lines. (4) Most of the processing takes place at 75 ppi. (5) Internally, three numbers are determined, for horizontal and vertical fg lines, and for vertical bg lines. From these, four tests are made to decide if there is a table occupying a significant part of the image. (6) Images have arbitrary content and would be likely to trigger this detector, so they are checked for first, and if found, return with a 0 (no table) score. (7) Musical scores (tablature) are likely to trigger the detector. (8) Tables of content with more than 2 columns are likely to trigger the detector. (9) For debug output, input a pre-allocated pixa.
Definition at line 1630 of file pageseg.c.
References L_COPY, L_INSERT, L_LANDSCAPE_MODE, L_SELECT_IF_GTE, L_SELECT_WIDTH, pixaAddPix(), pixClone(), pixCountConnComp(), pixDeskewBoth(), pixDestroy(), pixDilateBrick(), pixGenerateHalftoneMask(), pixInvert(), pixMorphSequence(), pixOr(), pixPrepare1bpp(), pixRotate90(), pixScale(), pixSeedfillBinary(), pixSelectBySize(), pixSubtract(), and pixZero().
[in] | pixs | any depth |
[in] | box | [optional] if null, use entire pixs |
[out] | pistext | 1 if text; 0 if photo; -1 if not determined or empty |
[in] | pixadb | [optional] pre-allocated, for showing intermediate computation; use NULL to skip |
Notes: (1) It is assumed that pixs has the correct resolution set. If the resolution is 0, we set to 300 and issue a warning. (2) If necessary, the image is scaled to 300 ppi; most of the processing is done at this resolution. (3) Text is assumed to be in horizontal lines. (4) Because thin vertical lines are removed before filtering for text lines, this should identify tables as text. (5) If box is null and pixs contains both text lines and line art, this function might return istext == true. (6) If the input pixs is empty, or for some other reason the result can not be determined, return -1. (7) For debug output, input a pre-allocated pixa.
Definition at line 1374 of file pageseg.c.
References pixDestroy(), pixPrepare1bpp(), and pixZero().
l_ok pixEstimateBackground | ( | PIX * | pixs, |
l_int32 | darkthresh, | ||
l_float32 | edgecrop, | ||
l_int32 * | pbg | ||
) |
[in] | pixs | 8 bpp, with or without colormap |
[in] | darkthresh | pixels below this value are never considered part of the background; typ. 70; use 0 to skip |
[in] | edgecrop | fraction of half-width on each side, and of half-height at top and bottom, that are cropped |
[out] | pbg | estimated background, or 0 on error |
Notes: (1) Caller should check that return bg value is > 0.
PIXA* pixExtractRawTextlines | ( | PIX * | pixs, |
l_int32 | maxw, | ||
l_int32 | maxh, | ||
l_int32 | adjw, | ||
l_int32 | adjh, | ||
PIXA * | pixadb | ||
) |
[in] | pixs | any depth, assumed to have nearly horizontal text |
[in] | maxw,maxh | initial filtering: remove any components in pixs with components larger than maxw or maxh; use 0 for default values. |
[in] | adjw,adjh | final adjustment of boxes representing each text line. If > 0, these increase the box size at each edge by this amount. |
[in] | pixadb | pixa for saving intermediate steps; NULL to omit |
Notes: (1) This function assumes that textlines have sufficient vertical separation and small enough skew so that a horizontal dilation sufficient to join words will not join textlines. It aggressively joins textlines across multiple columns, so if that is not desired, you must either (a) make sure that pixs is a single column of text or (b) use instead pixExtractTextlines(), which is more conservative about joining text fragments that have vertical overlap. (2) This first removes components from pixs that are either very wide (> maxw) or very tall (> maxh). (3) For reasonable accuracy, the resolution of pixs should be at least 100 ppi. For reasonable efficiency, the resolution should not exceed 600 ppi. (4) This can be used to determine if some region of a scanned image is horizontal text. (5) As an example, for a pix with resolution 300 ppi, a reasonable set of parameters is: pixExtractRawTextlines(pix, 150, 150, 0, 0, NULL); (6) The output pixa is composed of subimages, one for each textline, and the boxa in the pixa tells where in pixs each textline goes.
PIXA* pixExtractTextlines | ( | PIX * | pixs, |
l_int32 | maxw, | ||
l_int32 | maxh, | ||
l_int32 | minw, | ||
l_int32 | minh, | ||
l_int32 | adjw, | ||
l_int32 | adjh, | ||
PIXA * | pixadb | ||
) |
[in] | pixs | any depth, assumed to have nearly horizontal text |
[in] | maxw,maxh | initial filtering: remove any components in pixs with components larger than maxw or maxh |
[in] | minw,minh | final filtering: remove extracted 'lines' with sizes smaller than minw or minh; use 0 for default. |
[in] | adjw,adjh | final adjustment of boxes representing each text line. If > 0, these increase the box size at each edge by this amount. |
[in] | pixadb | pixa for saving intermediate steps; NULL to omit |
Notes: (1) This function assumes that textline fragments have sufficient vertical separation and small enough skew so that a horizontal dilation sufficient to join words will not join textlines. It does not guarantee that horizontally adjacent textline fragments on the same line will be joined. (2) For images with multiple columns, it attempts to avoid joining textlines across the space between columns. If that is not a concern, you can also use pixExtractRawTextlines(), which will join them with alacrity. (3) This first removes components from pixs that are either wide (> maxw) or tall (> maxh). (4) A final filtering operation removes small components, such that width < minw or height < minh. (5) For reasonable accuracy, the resolution of pixs should be at least 100 ppi. For reasonable efficiency, the resolution should not exceed 600 ppi. (6) This can be used to determine if some region of a scanned image is horizontal text. (7) As an example, for a pix with resolution 300 ppi, a reasonable set of parameters is: pixExtractTextlines(pix, 150, 150, 36, 20, 5, 5, NULL); The defaults minw and minh for 300 ppi are about 36 and 20, so the same result is obtained with: pixExtractTextlines(pix, 150, 150, 0, 0, 5, 5, NULL); (8) The output pixa is composed of subimages, one for each textline, and the boxa in the pixa tells where in pixs each textline goes.
l_ok pixFindLargeRectangles | ( | PIX * | pixs, |
l_int32 | polarity, | ||
l_int32 | nrect, | ||
BOXA ** | pboxa, | ||
PIX ** | ppixdb | ||
) |
[in] | pixs | 1 bpp |
[in] | polarity | 0 within background, 1 within foreground |
[in] | nrect | number of rectangles to be found |
[out] | pboxa | largest rectangles, sorted by decreasing area |
[in,out] | ppixdb | optional return output with rectangles drawn on it |
Notes: (1) This does a greedy search to find the largest rectangles, either black or white and without overlaps, in pix. (2) See pixFindLargestRectangle(), which is called multiple times, for details. On each call, the largest rectangle found is painted, so that none of its pixels can be used later, before calling it again. (3) This function is surprisingly fast. Although pixFindLargestRectangle() runs at about 50 MPix/sec, when it is run multiple times by pixFindLargeRectangles(), it processes at 150 - 250 MPix/sec, and the time is approximately linear in nrect. For example, for a 1 MPix image, searching for the largest 50 boxes takes about 0.2 seconds.
[in] | pixs | 1 bpp |
[in] | polarity | 0 within background, 1 within foreground |
[out] | pbox | largest area rectangle |
[in,out] | ppixdb | optional return output with rectangle drawn on it |
Notes: (1) This is a simple and elegant solution to a problem in computational geometry that at first appears to be quite difficult: what is the largest rectangle that can be placed in the image, covering only pixels of one polarity (bg or fg)? The solution is O(n), where n is the number of pixels in the image, and it requires nothing more than using a simple recursion relation in a single sweep of the image. (2) In a sweep from UL to LR with left-to-right being the fast direction, calculate the largest white rectangle at (x, y), using previously calculated values at pixels #1 and #2: #1: (x, y - 1) #2: (x - 1, y) We also need the most recent "black" pixels that were seen in the current row and column. Consider the largest area. There are only two possibilities: (a) Min(w(1), horizdist) * (h(1) + 1) (b) Min(h(2), vertdist) * (w(2) + 1) where horizdist: the distance from the rightmost "black" pixel seen in the current row across to the current pixel vertdist: the distance from the lowest "black" pixel seen in the current column down to the current pixel and we choose the Max of (a) and (b). (3) To convince yourself that these recursion relations are correct, it helps to draw the maximum rectangles at #1 and #2. Then for #1, you try to extend the rectangle down one line, so that the height is h(1) + 1. Do you get the full width of #1, w(1)? It depends on where the black pixels are in the current row. You know the final width is bounded by w(1) and w(2) + 1, but the actual value depends on the distribution of black pixels in the current row that are at a distance from the current pixel that is between these limits. We call that value "horizdist", and the area is then given by the expression (a) above. Using similar reasoning for #2, where you attempt to extend the rectangle to the right by 1 pixel, you arrive at (b). The largest rectangle is then found by taking the Max.
Definition at line 2052 of file pageseg.c.
References pixCreate(), pixGetData(), pixGetDimensions(), and pixGetLinePtrs().
BOX* pixFindPageForeground | ( | PIX * | pixs, |
l_int32 | threshold, | ||
l_int32 | mindist, | ||
l_int32 | erasedist, | ||
l_int32 | showmorph, | ||
PIXAC * | pixac | ||
) |
[in] | pixs | full resolution (any type or depth |
[in] | threshold | for binarization; typically about 128 |
[in] | mindist | min distance of text from border to allow cleaning near border; at 2x reduction, this should be larger than 50; typically about 70 |
[in] | erasedist | when conditions are satisfied, erase anything within this distance of the edge; typically 20-30 at 2x reduction |
[in] | showmorph | debug: set to a negative integer to show steps in generating masks; this is typically used for debugging region extraction |
[in] | pixac | debug: allocate outside and pass this in to accumulate results of each call to this function, which can be displayed in a mosaic or a pdf. |
Notes: (1) This doesn't simply crop to the fg. It attempts to remove pixel noise and junk at the edge of the image before cropping. The input threshold is used if pixs is not 1 bpp. (2) This is not intended to work on small thumbnails. The dimensions of pixs must be at least MinWidth x MinHeight. (3) Debug: set showmorph to display the intermediate image in the morphological operations on this page. (4) Debug: to get pdf output of results when called repeatedly, call with an existing pixac, which will add an image of this page, with the fg outlined. If no foreground is found, there is no output for this page image.
Definition at line 571 of file pageseg.c.
References pixGetDimensions().
l_ok pixFindThreshFgExtent | ( | PIX * | pixs, |
l_int32 | thresh, | ||
l_int32 * | ptop, | ||
l_int32 * | pbot | ||
) |
[in] | pixs | 1 bpp |
[in] | thresh | threshold number of pixels in row |
[out] | ptop | [optional] location of top of region |
[out] | pbot | [optional] location of bottom of region |
[in] | pixs | 1 bpp, assumed to be 150 to 200 ppi |
[out] | ppixtext | [optional] text part of pixs |
[out] | phtfound | [optional] 1 if the mask is not empty |
[in] | pixadb | input for collecting debug pix; use NULL to skip |
Notes: (1) This is not intended to work on small thumbnails. The dimensions of pixs must be at least MinWidth x MinHeight.
Definition at line 306 of file pageseg.c.
Referenced by pixAutoPhotoinvert(), pixDecideIfTable(), and pixGenHalftoneMask().
Deprecated: This wrapper avoids an ABI change with tesseract 3.0.4. It should be removed when we no longer need to support 3.0.4. The debug parameter is ignored (assumed 0).
Definition at line 281 of file pageseg.c.
References pixGenerateHalftoneMask().
[in] | pixs | 1 bpp, textline mask, assumed to be 150 to 200 ppi |
[in] | pixvws | vertical white space mask |
[in] | pixadb | input for collecting debug pix; use NULL to skip |
Notes: (1) Both the input masks (textline and vertical white space) and the returned textblock mask are at the same resolution. (2) This is not intended to work on small thumbnails. The dimensions of pixs must be at least MinWidth x MinHeight. (3) The result is somewhat noisy, in that small "blocks" of text may be included. These can be removed by post-processing, using, e.g., pixSelectBySize(pix, 60, 60, 4, L_SELECT_IF_EITHER, L_SELECT_IF_GTE, NULL);
[in] | pixs | 1 bpp, assumed to be 150 to 200 ppi |
[out] | ppixvws | vertical whitespace mask |
[out] | ptlfound | [optional] 1 if the mask is not empty |
[in] | pixadb | input for collecting debug pix; use NULL to skip |
Notes: (1) The input pixs should be deskewed. (2) pixs should have no halftone pixels. (3) This is not intended to work on small thumbnails. The dimensions of pixs must be at least MinWidth x MinHeight. (4) Both the input image and the returned textline mask are at the same resolution.
[in] | pixs | 1 bpp, assumed to be 300 to 400 ppi |
[out] | ppixhm | [optional] halftone mask |
[out] | ppixtm | [optional] textline mask |
[out] | ppixtb | [optional] textblock mask |
[in] | pixadb | input for collecting debug pix; use NULL to skip |
Notes: (1) It is best to deskew the image before segmenting. (2) Passing in pixadb enables debug output.
[in] | pixs | any depth |
[in] | box | [optional] if null, use entire pixs |
[in] | cropfract | fraction to be removed from the boundary; use 0.0 to retain the entire image |
[in] | outres | desired resolution of output image; if the input image resolution is not set, assume 300 ppi; use 0 to skip scaling. |
Notes: (1) This handles some common pre-processing operations, where the page segmentation algorithm takes a 1 bpp image.
Definition at line 1780 of file pageseg.c.
References boxCreate(), boxDestroy(), pixClipRectangle(), and pixGetDimensions().
Referenced by pixDecideIfTable(), and pixDecideIfText().
pixSplitComponentWithProfile()
[in] | pixs | 1 bpp, exactly one connected component |
[in] | delta | distance used in extrema finding in a numa; typ. 10 |
[in] | mindel | minimum required difference between profile minimum and profile values +2 and -2 away; typ. 7 |
[out] | ppixdebug | [optional] debug image of splitting |
Notes: (1) This will split the most obvious cases of touching characters. The split points it is searching for are narrow and deep minimima in the vertical pixel projection profile, after a large vertical closing has been applied to the component.
l_ok pixSplitIntoCharacters | ( | PIX * | pixs, |
l_int32 | minw, | ||
l_int32 | minh, | ||
BOXA ** | pboxa, | ||
PIXA ** | ppixa, | ||
PIX ** | ppixdebug | ||
) |
[in] | pixs | 1 bpp, contains only deskewed text |
[in] | minw | min component width for initial filtering; typ. 4 |
[in] | minh | min component height for initial filtering; typ. 4 |
[out] | pboxa | [optional] character bounding boxes |
[out] | ppixa | [optional] character images |
[out] | ppixdebug | [optional] showing splittings |
Notes: (1) This is a simple function that attempts to find split points based on vertical pixel profiles. (2) It should be given an image that has an arbitrary number of text characters. (3) The returned pixa includes the boxes from which the (possibly split) components are extracted.