Leptonica  1.82.0
Image processing and image analysis suite
recogtrain.c File Reference
#include <string.h>
#include "allheaders.h"

Go to the source code of this file.

Functions

static l_int32 recogTemplatesAreOK (L_RECOG *recog, l_int32 minsize, l_float32 minfract, l_int32 *pok)
 
static SARRAYrecogAddMissingClassStrings (L_RECOG *recog)
 
static l_int32 recogCharsetAvailable (l_int32 type)
 
static PIXpixDisplayOutliers (PIXA *pixas, NUMA *nas)
 
static PIXrecogDisplayOutlier (L_RECOG *recog, l_int32 iclass, l_int32 jsamp, l_int32 maxclass, l_float32 maxscore)
 
l_ok recogTrainLabeled (L_RECOG *recog, PIX *pixs, BOX *box, char *text, l_int32 debug)
 
l_ok recogProcessLabeled (L_RECOG *recog, PIX *pixs, BOX *box, char *text, PIX **ppix)
 
l_ok recogAddSample (L_RECOG *recog, PIX *pix, l_int32 debug)
 
PIXrecogModifyTemplate (L_RECOG *recog, PIX *pixs)
 
l_int32 recogAverageSamples (L_RECOG **precog, l_int32 debug)
 
l_int32 pixaAccumulateSamples (PIXA *pixa, PTA *pta, PIX **ppixd, l_float32 *px, l_float32 *py)
 
l_ok recogTrainingFinished (L_RECOG **precog, l_int32 modifyflag, l_int32 minsize, l_float32 minfract)
 
PIXArecogFilterPixaBySize (PIXA *pixas, l_int32 setsize, l_int32 maxkeep, l_float32 max_ht_ratio, NUMA **pna)
 
PIXAArecogSortPixaByClass (PIXA *pixa, l_int32 setsize)
 
l_ok recogRemoveOutliers1 (L_RECOG **precog, l_float32 minscore, l_int32 mintarget, l_int32 minsize, PIX **ppixsave, PIX **ppixrem)
 
PIXApixaRemoveOutliers1 (PIXA *pixas, l_float32 minscore, l_int32 mintarget, l_int32 minsize, PIX **ppixsave, PIX **ppixrem)
 
l_ok recogRemoveOutliers2 (L_RECOG **precog, l_float32 minscore, l_int32 minsize, PIX **ppixsave, PIX **ppixrem)
 
PIXApixaRemoveOutliers2 (PIXA *pixas, l_float32 minscore, l_int32 minsize, PIX **ppixsave, PIX **ppixrem)
 
PIXArecogTrainFromBoot (L_RECOG *recogboot, PIXA *pixas, l_float32 minscore, l_int32 threshold, l_int32 debug)
 
l_ok recogPadDigitTrainingSet (L_RECOG **precog, l_int32 scaleh, l_int32 linew)
 
l_int32 recogIsPaddingNeeded (L_RECOG *recog, SARRAY **psa)
 
PIXArecogAddDigitPadTemplates (L_RECOG *recog, SARRAY *sa)
 
L_RECOGrecogMakeBootDigitRecog (l_int32 nsamp, l_int32 scaleh, l_int32 linew, l_int32 maxyshift, l_int32 debug)
 
PIXArecogMakeBootDigitTemplates (l_int32 nsamp, l_int32 debug)
 
l_ok recogShowContent (FILE *fp, L_RECOG *recog, l_int32 index, l_int32 display)
 
l_ok recogDebugAverages (L_RECOG **precog, l_int32 debug)
 
l_int32 recogShowAverageTemplates (L_RECOG *recog)
 
l_ok recogShowMatchesInRange (L_RECOG *recog, PIXA *pixa, l_float32 minscore, l_float32 maxscore, l_int32 display)
 
PIXrecogShowMatch (L_RECOG *recog, PIX *pix1, PIX *pix2, BOX *box, l_int32 index, l_float32 score)
 

Variables

static const l_int32 DefaultMinSetSize = 1
 
static const l_float32 DefaultMinSetFract = 0.4
 
static const l_float32 DefaultMinScore = 0.75
 
static const l_int32 DefaultMinTarget = 3
 
static const l_float32 LowerScoreThreshold = 0.5
 

Detailed Description

     Training on labeled data
        l_int32             recogTrainLabeled()
        PIX                *recogProcessLabeled()
        l_int32             recogAddSample()
        PIX                *recogModifyTemplate()
        l_int32             recogAverageSamples()
        l_int32             pixaAccumulateSamples()
        l_int32             recogTrainingFinished()
        static l_int32      recogTemplatesAreOK()
        PIXA               *recogFilterPixaBySize()
        PIXAA              *recogSortPixaByClass()
        l_int32             recogRemoveOutliers1()
        PIXA               *pixaRemoveOutliers1()
        l_int32             recogRemoveOutliers2()
        PIXA               *pixaRemoveOutliers2()
     Training on unlabeled data
        L_RECOG             recogTrainFromBoot()
     Padding the digit training set
        l_int32             recogPadDigitTrainingSet()
        l_int32             recogIsPaddingNeeded()
        static SARRAY      *recogAddMissingClassStrings()
        PIXA               *recogAddDigitPadTemplates()
        static l_int32      recogCharsetAvailable()
     Making a boot digit recognizer
        L_RECOG            *recogMakeBootDigitRecog()
        PIXA               *recogMakeBootDigitTemplates()
     Debugging
        l_int32             recogShowContent()
        l_int32             recogDebugAverages()
        l_int32             recogShowAverageTemplates()
        static PIX         *pixDisplayOutliers()
        PIX                *recogDisplayOutlier()
        PIX                *recogShowMatchesInRange()
        PIX                *recogShowMatch()
 These abbreviations are for the type of template to be used:
   * SI (for the scanned images)
   * WNL (for width-normalized lines, formed by first skeletonizing
          the scanned images, and then dilating to a fixed width)
 These abbreviations are for the type of recognizer:
   * BAR (book-adapted recognizer; the best type; can do identification
          with unscaled images and separation of touching characters.
   * BSR (bootstrap recognizer; used if more labeled templates are
          required for a BAR, either for finding more templates from
          the book, or making a hybrid BAR/BSR.
 The recog struct typically holds two versions of the input templates
 (e.g. from a pixa) that were used to generate it.  One version is
 the unscaled input templates.  The other version is the one that
 will be used by the recog to identify unlabeled data.  That version
 depends on the input parameters when the recog is created.  The choices
 for the latter version, and their suggested use, are:
 (1) unscaled SI -- typical for BAR, generated from book images
 (2) unscaled WNL -- ditto
 (3) scaled SI -- typical for recognizers containing template
     images from sources other than the book to be recognized
 (4) scaled WNL -- ditto
 For cases (3) and (4), we recommend scaling to fixed height; e.g.,
 scalew = 0, scaleh = 40.
 When using WNL, we recommend using a width of 5 in the template
 and 4 in the unlabeled data.
 It appears that better results for a BAR are usually obtained using
 SI than WNL, but more experimentation is needed.
 This utility is designed to build recognizers that are specifically
 adapted from a large amount of material, such as a book.  These
 use labeled templates taken from the material, and not scaled.
 In addition, two special recognizers are useful:
 (1) Bootstrap recognizer (BSR).  This uses height-scaled templates,
     that have been extended with several repetitions in one of two ways:
     (a) aniotropic width scaling (for either SI or WNL)
     (b) iterative erosions/dilations (for SI).
 (2) Outlier removal.  This uses height scaled templates.  It can be
     implemented without using templates that are aligned averages of all
     templates in a class.
 Recognizers are inexpensive to generate, for example, from a pixa
 of labeled templates.  The general process of building a BAR is
 to start with labeled templates, e.g., in a pixa, make a BAR, and
 analyze new samples from the book to augment the BAR until it has
 enough samples for each character class.  Along the way, samples
 from a BSR may be added for help in training.  If not enough samples
 are available for the BAR, it can finally be augmented with BSR
 samples, in which case the resulting hybrid BAR/BSR recognizer
 must work on scaled images.
 Here are the steps in doing recog training:
 A. Generate a BAR from any existing labeled templates
   (1) Create a recog and add the templates, using recogAddSample().
       This stores the unscaled templates.
       [Note: this can be done in one step if the labeled templates are put
        into a pixa:
          L_Recog *rec = recogCreateFromPixa(pixa, ...);  ]
   (2) Call recogTrainingFinished() to generate the (sometimes modified)
       templates to be used for correlation.
   (3) Optionally, remove outliers.
   If there are sufficient samples in the classes, we're done. Otherwise,
 B. Try to get more samples from the book to pad the BAR.
    (1) Save the unscaled, labeled templates from the BAR.
    (2) Supplement the BAR with bootstrap templates to make a hybrid BAR/BSR.
    (3) Do recognition on more unlabeled images, scaled to a fixed height
    (4) Add the unscaled, labeled images to the saved set.
    (5) Optionally, remove outliers.
    If there are sufficient samples in the classes, we're done. Otherwise,
 C. For classes without a sufficient number of templates, we can
    supplement the BAR with templates from a BSR (a hybrid RAR/BSR),
    and do recognition scaled to a fixed height.
 Here are several methods that can be used for identifying outliers:
 (1) Compute average templates for each class and remove a candidate
     that is poorly correlated with the average.  This is the most
     simple method.  recogRemoveOutliers1() uses this, supplemented with
     a second threshold and a target number of templates to be saved.
 (2) Compute average templates for each class and remove a candidate
     that is more highly correlated with the average of some other class.
     This does not require setting a threshold for the correlation.
     recogRemoveOutliers2() uses this method, supplemented with a minimum
     correlation score.
 (3) For each candidate, find the average correlation with other
     members of its class, and remove those that have a relatively
     low average correlation.  This is similar to (1), gives comparable
     results and because it does not use average templates, it requires
     a bit more computation.

Definition in file recogtrain.c.

Function Documentation

◆ pixaAccumulateSamples()

l_int32 pixaAccumulateSamples ( PIXA pixa,
PTA pta,
PIX **  ppixd,
l_float32 *  px,
l_float32 *  py 
)

pixaAccumulateSamples()

Parameters
[in]pixaof samples from the same class, 1 bpp
[in]pta[optional] of centroids of the samples
[out]ppixdaccumulated samples, 8 bpp
[out]px[optional] average x coordinate of centroids
[out]py[optional] average y coordinate of centroids
Returns
0 on success, 1 on failure
Notes:
     (1) This generates an aligned (by centroid) sum of the input pix.
     (2) We use only the first 256 samples; that's plenty.
     (3) If pta is not input, we generate two tables, and discard
         after use.  If this is called many times, it is better
         to precompute the pta.

Definition at line 668 of file recogtrain.c.

References L_CLONE, makePixelCentroidTab8(), makePixelSumTab8(), PIX_SRC, pixAccumulate(), pixaGetCount(), pixaGetPix(), pixaSizeRange(), pixCentroid(), pixClearAll(), pixCreate(), pixInitAccumulate(), pixRasterop(), ptaAddPt(), ptaClone(), ptaCreate(), ptaGetCount(), and ptaGetPt().

Referenced by recogAverageSamples().

◆ pixaRemoveOutliers1()

PIXA* pixaRemoveOutliers1 ( PIXA pixas,
l_float32  minscore,
l_int32  mintarget,
l_int32  minsize,
PIX **  ppixsave,
PIX **  ppixrem 
)

pixaRemoveOutliers1()

Parameters
[in]pixasunscaled labeled templates
[in]minscorekeep everything with at least this score; use -1.0 for default.
[in]mintargetminimum desired number to retain if possible; use -1 for default.
[in]minsizeminimum number of samples required for a class; use -1 for default.
[out]ppixsave[optional debug] saved templates, with scores
[out]ppixrem[optional debug] removed templates, with scores
Returns
pixa of unscaled templates to be kept, or NULL on error
Notes:
     (1) Removing outliers is particularly important when recognition
         goes against all the samples in the training set, as opposed
         to the averages for each class.  The reason is that we get
         an identification error if a mislabeled template is a best
         match for an input sample.
     (2) Because the score values depend strongly on the quality
         of the character images, to avoid losing too many samples
         we supplement a minimum score for retention with a score
         necessary to acquire the minimum target number of templates.
         To do this we are willing to use a lower threshold,
         LowerScoreThreshold, on the score.  Consequently, with
         poor quality templates, we may keep samples with a score
         less than minscore, but never less than LowerScoreThreshold.
         And if the number of samples is less than minsize, we do
         not use any.
     (3) This is meant to be used on a BAR, where the templates all
         come from the same book; use minscore ~0.75.
     (4) Method: make a scaled recog from the input pixas.  Then,
         for each class: generate the averages, match each
         scaled template against the average, and save unscaled
         templates that had a sufficiently good match.

Definition at line 1163 of file recogtrain.c.

References L_Rdid::nascore, and L_Rdid::nasum.

Referenced by recogRemoveOutliers1().

◆ pixaRemoveOutliers2()

PIXA* pixaRemoveOutliers2 ( PIXA pixas,
l_float32  minscore,
l_int32  minsize,
PIX **  ppixsave,
PIX **  ppixrem 
)

pixaRemoveOutliers2()

Parameters
[in]pixasunscaled labeled templates
[in]minscorekeep everything with at least this score; use -1.0 for default.
[in]minsizeminimum number of samples required for a class; use -1 for default.
[out]ppixsave[optional debug] saved templates, with scores
[out]ppixrem[optional debug] removed templates, with scores
Returns
pixa of unscaled templates to be kept, or NULL on error
Notes:
     (1) Removing outliers is particularly important when recognition
         goes against all the samples in the training set, as opposed
         to the averages for each class.  The reason is that we get
         an identification error if a mislabeled template is a best
         match for an input sample.
     (2) This method compares each template against the average templates
         of each class, and discards any template that has a higher
         correlation to a class different from its own.  It also
         sets a lower bound on correlation scores with its class average.
     (3) This is meant to be used on a BAR, where the templates all
         come from the same book; use minscore ~0.75.

Definition at line 1367 of file recogtrain.c.

References L_Rdid::nascore.

Referenced by recogRemoveOutliers2().

◆ pixDisplayOutliers()

static PIX * pixDisplayOutliers ( PIXA pixas,
NUMA nas 
)
static

pixDisplayOutliers()

Parameters
[in]pixasunscaled labeled templates
[in]nasscores of templates (against class averages)
Returns
pix tiled pixa with text and scores, or NULL on failure
Notes:
     (1) This debug routine is called from recogRemoveOutliers2(),
         and takes the saved templates and their scores as input.

Definition at line 2224 of file recogtrain.c.

References L_CLONE, L_GET_WHITE_VAL, L_INSERT, numaGetCount(), numaGetFValue(), pixaAddPix(), pixaCreate(), pixAddBlackOrWhiteBorder(), pixaDestroy(), pixaDisplayTiledWithText(), pixaGetCount(), pixaGetPix(), pixDestroy(), pixGetText(), and pixSetText().

◆ recogAddDigitPadTemplates()

PIXA* recogAddDigitPadTemplates ( L_RECOG recog,
SARRAY sa 
)

recogAddDigitPadTemplates()

Parameters
[in]recogtrained
[in]saset of text strings that need to be padded
Returns
pixa of all templates from recog and the additional pad templates from a boot recognizer; or NULL on error
Notes:
     (1) Call recogIsPaddingNeeded() first, which returns sa of
         template text strings for classes where more templates
         are needed.

Definition at line 1772 of file recogtrain.c.

References L_Recog::charset_type, L_CLONE, L_COPY, L_NOCOPY, pixaAddPix(), pixaDestroy(), pixaGetCount(), pixaGetPix(), pixDestroy(), pixGetText(), recogCharsetAvailable(), recogExtractPixa(), recogMakeBootDigitTemplates(), sarrayGetCount(), and sarrayGetString().

Referenced by recogPadDigitTrainingSet().

◆ recogAddMissingClassStrings()

static SARRAY * recogAddMissingClassStrings ( L_RECOG recog)
static

recogAddMissingClassStrings()

Parameters
[in]recogtrained
Returns
sa of class string missing in recog, or NULL on error
Notes:
     (1) This returns an empty sa if there is at least one template
         in each class in recog.

Definition at line 1713 of file recogtrain.c.

References L_Recog::charset_size, L_Recog::charset_type, L_COPY, L_NOCOPY, numaAddNumber(), numaCreate(), numaDestroy(), numaGetIValue(), numaSetValue(), L_Recog::pixaa_u, pixaaGetCount(), L_Recog::sa_text, sarrayAddString(), sarrayCreate(), and sarrayGetString().

Referenced by recogIsPaddingNeeded().

◆ recogAddSample()

l_ok recogAddSample ( L_RECOG recog,
PIX pix,
l_int32  debug 
)

recogAddSample()

Parameters
[in]recog
[in]pixa single character, 1 bpp
[in]debug
Returns
0 if OK, 1 on error
Notes:
     (1) The pix is 1 bpp, with the character string label embedded.
     (2) The pixaa_u array of the recog is initialized to accept
         up to 256 different classes.  When training is finished,
         the arrays are truncated to the actual number of classes.
         To pad an existing recog from the boot recognizers, training
         is started again; if samples from a new class are added,
         the pixaa_u array is extended by adding a pixa to hold them.

Definition at line 356 of file recogtrain.c.

Referenced by recogTrainLabeled().

◆ recogAverageSamples()

l_int32 recogAverageSamples ( L_RECOG **  precog,
l_int32  debug 
)

recogAverageSamples()

Parameters
[in]precogaddr of existing recog; may be destroyed
[in]debug
Returns
0 on success, 1 on failure
Notes:
     (1) This is only called in two situations:
         (a) When splitting characters using either the DID method
             recogDecode() or the the greedy splitter
             recogCorrelationBestRow()
         (b) By a special recognizer that is used to remove outliers.
         Both unscaled and scaled inputs are averaged.
     (2) If the data in any class is nonexistent (no samples), or
         very bad (no fg pixels in the average), or if the ratio
         of max/min average unscaled class template heights is
         greater than max_ht_ratio, this destroys the recog.
         The caller must check the return value of the recog.
     (3) Set debug = 1 to view the resulting templates and their centroids.

Definition at line 490 of file recogtrain.c.

References L_Recog::ave_done, boxDestroy(), boxGetGeometry(), L_CLONE, L_INSERT, L_Recog::max_ht_ratio, L_Recog::max_splith, L_Recog::maxheight_u, L_Recog::maxwidth, L_Recog::maxwidth_u, L_Recog::min_splitw, L_Recog::minheight_u, L_Recog::minwidth, L_Recog::minwidth_u, L_Recog::nasum, L_Recog::nasum_u, numaAddNumber(), numaCreate(), numaDestroy(), L_Recog::pixa, L_Recog::pixa_u, L_Recog::pixaa, L_Recog::pixaa_u, pixaAccumulateSamples(), pixaAddPix(), pixaaGetPixa(), pixaCreate(), pixaDestroy(), pixaGetCount(), pixaSizeRange(), pixClipToForeground(), pixCountPixels(), pixDestroy(), pixInvert(), pixThresholdToBinary(), L_Recog::pta, L_Recog::pta_u, L_Recog::ptaa, L_Recog::ptaa_u, ptaAddPt(), ptaaGetPta(), ptaCreate(), ptaDestroy(), recogDestroy(), recogShowAverageTemplates(), L_Recog::setsize, L_Rdid::size, and L_Recog::sumtab.

Referenced by recogDebugAverages().

◆ recogCharsetAvailable()

static l_int32 recogCharsetAvailable ( l_int32  type)
static

recogCharsetAvailable()

Parameters
[in]typeof charset for padding
Returns
1 if available; 0 if not.

Definition at line 1823 of file recogtrain.c.

References L_ARABIC_NUMERALS, L_LC_ALPHA, L_LC_ROMAN_NUMERALS, L_UC_ALPHA, and L_UC_ROMAN_NUMERALS.

Referenced by recogAddDigitPadTemplates().

◆ recogDebugAverages()

l_ok recogDebugAverages ( L_RECOG **  precog,
l_int32  debug 
)

recogDebugAverages()

Parameters
[in]precogaddr of recog
[in]debug0 no output; 1 for images; 2 for text; 3 for both
Returns
0 if OK, 1 on error
Notes:
     (1) Generates an image that pairs each of the input images used
         in training with the average template that it is best
         correlated to.  This is written into the recog.
     (2) It also generates pixa_tr of all the input training images,
         which can be used, e.g., in recogShowMatchesInRange().
     (3) Destroys the recog if the averaging function finds any bad classes.

Definition at line 2068 of file recogtrain.c.

References L_CLONE, L_INSERT, lept_mkdir(), lept_stderr(), L_Recog::pixa_tr, L_Recog::pixaa, pixaaAddPixa(), pixaaCreate(), pixaAddPix(), pixaaDisplayByPixa(), pixaaFlattenToPixa(), pixaaGetCount(), pixaaGetPix(), pixaaGetPixa(), pixaCreate(), pixAddBorder(), pixaDestroy(), pixaGetCount(), L_Recog::pixdb_ave, pixDestroy(), L_Recog::rch, rchExtract(), recogAverageSamples(), and recogIdentifyPix().

◆ recogDisplayOutlier()

static PIX * recogDisplayOutlier ( L_RECOG recog,
l_int32  iclass,
l_int32  jsamp,
l_int32  maxclass,
l_float32  maxscore 
)
static

recogDisplayOutlier()

Parameters
[in]recog
[in]iclasssample is in this class
[in]jsampindex of sample is class i
[in]maxclassindex of class with closest average to sample
[in]maxscorescore of sample with average of class maxclass
Returns
pix sample and template images, with score, or NULL on error
Notes:
     (1) This shows three templates, side-by-side:
  • The outlier sample
  • The average template from the same class
  • The average class template that best matched the outlier sample

Definition at line 2280 of file recogtrain.c.

References L_Recog::bmf, L_ADD_BELOW, L_CLONE, L_INSERT, L_Recog::pixa, L_Recog::pixaa, pixaAddPix(), pixaaGetPix(), pixaCreate(), pixAddSingleTextblock(), pixaDestroy(), pixaDisplayTiledInRows(), pixaGetPix(), and pixDestroy().

◆ recogFilterPixaBySize()

PIXA* recogFilterPixaBySize ( PIXA pixas,
l_int32  setsize,
l_int32  maxkeep,
l_float32  max_ht_ratio,
NUMA **  pna 
)

recogFilterPixaBySize()

Parameters
[in]pixaslabeled templates
[in]setsizesize of character set (number of classes)
[in]maxkeepmax number of templates to keep in a class
[in]max_ht_ratiomax allowed height ratio (see below)
[out]pna[optional] debug output, giving the number in each class after filtering; use NULL to skip
Returns
pixa filtered templates, or NULL on error
Notes:
     (1) The basic assumption is that the most common and larger
         templates in each class are more likely to represent the
         characters we are interested in.  For example, larger digits
         are more likely to represent page numbers, and smaller digits
         could be data in tables.  Therefore, we bias the first
         stage of filtering toward the larger characters by removing
         very small ones, and select based on proximity of the
         remaining characters to median height.
     (2) For each of the setsize classes, order the templates
         increasingly by height.  Take the rank 0.9 height.  Eliminate
         all templates that are shorter by more than max_ht_ratio.
         Of the remaining ones, select up to maxkeep that are closest
         in rank order height to the median template.

Definition at line 974 of file recogtrain.c.

References L_CLONE, L_COPY, L_INSERT, L_SORT_BY_HEIGHT, L_SORT_INCREASING, numaAddNumber(), numaCreate(), pixaAddPix(), pixaaDestroy(), pixaaGetCount(), pixaaGetPixa(), pixaCopy(), pixaCreate(), pixaDestroy(), pixaGetCount(), pixaGetPix(), pixaGetPixDimensions(), pixaJoin(), pixaSelectRange(), pixaSort(), and recogSortPixaByClass().

◆ recogIsPaddingNeeded()

l_int32 recogIsPaddingNeeded ( L_RECOG recog,
SARRAY **  psa 
)

recogIsPaddingNeeded()

Parameters
[in]recogtrained
[out]psaaddr of returned string containing text value
Returns
1 on error; 0 if OK, whether or not additional padding templates are required.
Notes:
     (1) This returns a string array in &sa containing character values
         for which extra templates are needed; this sarray is
         used by recogGetPadTemplates().  It returns NULL
         if no padding templates are needed.

Definition at line 1654 of file recogtrain.c.

References L_Recog::charset_size, L_COPY, L_INSERT, L_Recog::min_nopad, numaDestroy(), numaGetIValue(), numaGetMin(), L_Recog::pixaa_u, pixaaGetCount(), recogAddMissingClassStrings(), L_Recog::sa_text, sarrayAddString(), and sarrayGetString().

Referenced by recogPadDigitTrainingSet().

◆ recogMakeBootDigitRecog()

L_RECOG* recogMakeBootDigitRecog ( l_int32  nsamp,
l_int32  scaleh,
l_int32  linew,
l_int32  maxyshift,
l_int32  debug 
)

recogMakeBootDigitRecog()

Parameters
[in]nsampnumber of samples of each digit; or 0
[in]scalehscale all heights to this; typ. use 40
[in]linewnormalized line width; typ. use 5; 0 to skip
[in]maxyshiftfrom nominal centroid alignment; typically 0 or 1
[in]debug1 for showing templates; 0 otherwise
Returns
recog, or NULL on error
Notes:
    (1) This takes a set of pre-computed, labeled pixa of single
        digits, and generates a recognizer from them.
        The templates used in the recognizer can be modified by:
  • scaling (isotropically to fixed height)
  • generating a skeleton and thickening so that all strokes have the same width. (2) The resulting templates are scaled versions of either the input bitmaps or images with fixed line widths. To use the input bitmaps, set linew = 0; otherwise, set linew to the desired line width. (3) If nsamp == 0, this uses and extends the output from three boot generators: l_bootnum_gen1, l_bootnum_gen2, l_bootnum_gen3. Otherwise, it uses exactly nsamp templates of each digit, extracted by l_bootnum_gen4.

Definition at line 1884 of file recogtrain.c.

References pixaDestroy(), recogCreateFromPixa(), recogMakeBootDigitTemplates(), and recogShowContent().

◆ recogMakeBootDigitTemplates()

PIXA* recogMakeBootDigitTemplates ( l_int32  nsamp,
l_int32  debug 
)

recogMakeBootDigitTemplates()

Parameters
[in]nsampnumber of samples of each digit; or 0
[in]debug1 for display of templates
Returns
pixa of templates; or NULL on error
Notes:
    (1) See recogMakeBootDigitRecog().

Definition at line 1921 of file recogtrain.c.

References l_bootnum_gen1(), l_bootnum_gen2(), l_bootnum_gen4(), numaAddNumber(), numaCreate(), pixaDestroy(), pixaDisplayTiledWithText(), pixaExtendByScaling(), pixaJoin(), and pixDestroy().

Referenced by recogAddDigitPadTemplates(), and recogMakeBootDigitRecog().

◆ recogModifyTemplate()

PIX* recogModifyTemplate ( L_RECOG recog,
PIX pixs 
)

recogModifyTemplate()

Parameters
[in]recog
[in]pixs1 bpp, to be optionally scaled and turned into strokes of fixed width
Returns
pixd modified pix if OK, NULL on error

Definition at line 421 of file recogtrain.c.

References L_Recog::linew, pixClone(), pixCopy(), pixDestroy(), pixGetDimensions(), L_Rdid::pixs, pixScaleToSize(), pixSetStrokeWidth(), pixZero(), L_Recog::scaleh, and L_Recog::scalew.

Referenced by recogTrainingFinished().

◆ recogPadDigitTrainingSet()

l_ok recogPadDigitTrainingSet ( L_RECOG **  precog,
l_int32  scaleh,
l_int32  linew 
)

recogPadDigitTrainingSet()

Parameters
[in,out]precogtrained; if padding is needed, it is replaced by a a new padded recog
[in]scalehmust be > 0; suggest ~40.
[in]linewuse 0 for original scanned images
Returns
0 if OK, 1 on error
Notes:
     (1) This is a no-op if padding is not needed.  However,
         if it is, this replaces the input recog with a new recog,
         padded appropriately with templates from a boot recognizer,
         and set up with correlation templates derived from
         scaleh and linew.

Definition at line 1596 of file recogtrain.c.

References L_Recog::maxyshift, pixaDestroy(), recogAddDigitPadTemplates(), recogCreateFromPixa(), recogDestroy(), recogIsPaddingNeeded(), sarrayDestroy(), and L_Recog::threshold.

◆ recogProcessLabeled()

l_ok recogProcessLabeled ( L_RECOG recog,
PIX pixs,
BOX box,
char *  text,
PIX **  ppix 
)

recogProcessLabeled()

Parameters
[in]recogin training mode
[in]pixsif depth > 1, will be thresholded to 1 bpp
[in]box[optional] cropping box
[in]text[optional] if null, use text field in pix
[out]ppixaddr of pix, 1 bpp, labeled
Returns
0 if OK, 1 on error
Notes:
     (1) This crops and binarizes the input image, generating a pix
         of one character where the charval is inserted into the pix.

Definition at line 265 of file recogtrain.c.

References L_Recog::num_samples, pixClipRectangle(), pixClone(), L_Rdid::pixs, and Pix::text.

Referenced by recogTrainLabeled().

◆ recogRemoveOutliers1()

l_ok recogRemoveOutliers1 ( L_RECOG **  precog,
l_float32  minscore,
l_int32  mintarget,
l_int32  minsize,
PIX **  ppixsave,
PIX **  ppixrem 
)

recogRemoveOutliers1()

Parameters
[in]precogaddr of recog with unscaled labeled templates
[in]minscorekeep everything with at least this score
[in]mintargetminimum desired number to retain if possible
[in]minsizeminimum number of samples required for a class
[out]ppixsave[optional debug] saved templates, with scores
[out]ppixrem[optional debug] removed templates, with scores
Returns
0 if OK, 1 on error.
Notes:
     (1) This is a convenience wrapper when using default parameters
         for the recog.  See pixaRemoveOutliers1() for details.
     (2) If this succeeds, the new recog replaces the input recog;
         if it fails, the input recog is destroyed.

Definition at line 1085 of file recogtrain.c.

References pixaDestroy(), pixaRemoveOutliers1(), recogCreateFromPixa(), recogDestroy(), and recogExtractPixa().

◆ recogRemoveOutliers2()

l_ok recogRemoveOutliers2 ( L_RECOG **  precog,
l_float32  minscore,
l_int32  minsize,
PIX **  ppixsave,
PIX **  ppixrem 
)

recogRemoveOutliers2()

Parameters
[in]precogaddr of recog with unscaled labeled templates
[in]minscorekeep everything with at least this score
[in]minsizeminimum number of samples required for a class
[out]ppixsave[optional debug] saved templates, with scores
[out]ppixrem[optional debug] removed templates, with scores
Returns
0 if OK, 1 on error.
Notes:
     (1) This is a convenience wrapper when using default parameters
         for the recog.  See pixaRemoveOutliers2() for details.
     (2) If this succeeds, the new recog replaces the input recog;
         if it fails, the input recog is destroyed.

Definition at line 1303 of file recogtrain.c.

References pixaDestroy(), pixaRemoveOutliers2(), recogCreateFromPixa(), recogDestroy(), and recogExtractPixa().

◆ recogShowAverageTemplates()

l_int32 recogShowAverageTemplates ( L_RECOG recog)

◆ recogShowContent()

l_ok recogShowContent ( FILE *  fp,
L_RECOG recog,
l_int32  index,
l_int32  display 
)

recogShowContent()

Parameters
[in]fpfile stream
[in]recog
[in]indexfor naming of output files of template images
[in]display1 for showing template images; 0 otherwise
Returns
0 if OK, 1 on error

Definition at line 1985 of file recogtrain.c.

References L_Recog::dna_tochar, l_dnaGetIValue(), lept_mkdir(), L_Recog::linew, L_Recog::maxyshift, numaDestroy(), numaGetIValue(), L_Recog::pixaa_u, pixaaDisplayByPixa(), pixaaGetCount(), L_Recog::scaleh, L_Recog::scalew, L_Recog::setsize, and L_Recog::threshold.

Referenced by recogMakeBootDigitRecog().

◆ recogShowMatch()

PIX* recogShowMatch ( L_RECOG recog,
PIX pix1,
PIX pix2,
BOX box,
l_int32  index,
l_float32  score 
)

recogShowMatch()

Parameters
[in]recog
[in]pix1input pix; several possibilities
[in]pix2[optional] matching template
[in]box[optional] region in pix1 for which pix2 matches
[in]indexindex of matching template; use -1 to disable printing
[in]scorescore of match
Returns
pixd pair of images, showing input pix and best template, optionally with matching information, or NULL on error.
Notes:
     (1) pix1 can be one of these:
         (a) The input pix alone, which can be either a single character
             (box == NULL) or several characters that need to be
             segmented.  If more than character is present, the box
             region is displayed with an outline.
         (b) Both the input pix and the matching template.  In this case,
             pix2 and box will both be null.
     (2) If the bmf has been made (by a call to recogMakeBmf())
         and the index >= 0, the text field, match score and index
         will be rendered; otherwise their values will be ignored.

Definition at line 2429 of file recogtrain.c.

References L_Recog::bmf, L_ADD_BELOW, L_CLONE, pixaAddPix(), pixaCreate(), pixAddBorderGeneral(), pixAddSingleTextblock(), pixaDestroy(), pixaDisplayTiledInRows(), pixClone(), pixConvertTo32(), pixCopy(), pixDestroy(), pixRenderBoxArb(), and recogGetClassString().

Referenced by recogIdentifyPixa(), and recogShowMatchesInRange().

◆ recogShowMatchesInRange()

l_ok recogShowMatchesInRange ( L_RECOG recog,
PIXA pixa,
l_float32  minscore,
l_float32  maxscore,
l_int32  display 
)

recogShowMatchesInRange()

Parameters
[in]recog
[in]pixaof 1 bpp images to match
[in]minscoremin score to include output
[in]maxscoremax score to include output
[in]display1 to display the result
Returns
0 if OK, 1 on error
Notes:
     (1) This gives a visual output of the best matches for a given
         range of scores.  Each pair of images can optionally be
         labeled with the index of the best match and the correlation.
     (2) To use this, save a set of 1 bpp images (labeled or
         unlabeled) that can be given to a recognizer in a pixa.
         Then call this function with the pixa and parameters
         to filter a range of scores.

Definition at line 2335 of file recogtrain.c.

References L_CLONE, L_INSERT, L_Rdid::nascore, numaAddNumber(), numaCreate(), numaGetFValue(), numaGetIValue(), pixaAddPix(), pixaCreate(), pixaGetCount(), pixaGetPix(), pixDestroy(), L_Recog::rch, rchExtract(), recogIdentifyPix(), and recogShowMatch().

◆ recogSortPixaByClass()

PIXAA* recogSortPixaByClass ( PIXA pixa,
l_int32  setsize 
)

recogSortPixaByClass()

Parameters
[in]pixalabeled templates
[in]setsizesize of character set (number of classes)
Returns
paa pixaa where each pixa has templates for one class, or null on error

Definition at line 1045 of file recogtrain.c.

References L_Recog::pixaa_u, recogCreateFromPixaNoFinish(), and recogDestroy().

Referenced by recogFilterPixaBySize().

◆ recogTemplatesAreOK()

static l_int32 recogTemplatesAreOK ( L_RECOG recog,
l_int32  minsize,
l_float32  minfract,
l_int32 *  pok 
)
static

recogTemplatesAreOK()

Parameters
[in]recog
[in]minsizeset to -1 for default
[in]minfractset to -1.0 for default
[out]pokset to 1 if template set is valid; 0 otherwise
Returns
1 on error; 0 otherwise. An invalid template set is not an error.
Notes:
     (1) This is called by recogTrainingFinished().  A return value of 0
         will cause recogTrainingFinished() to destroy the recog.
     (2) minsize is the minimum number of samples required for
         the class; -1 uses the default
     (3) minfract is the minimum fraction of classes required for
         the recog to be usable; -1.0 uses the default

Definition at line 912 of file recogtrain.c.

Referenced by recogTrainingFinished().

◆ recogTrainFromBoot()

PIXA* recogTrainFromBoot ( L_RECOG recogboot,
PIXA pixas,
l_float32  minscore,
l_int32  threshold,
l_int32  debug 
)

recogTrainFromBoot()

Parameters
[in]recogbootlabeled boot recognizer
[in]pixasset of unlabeled input characters
[in]minscoremin score for accepting the example; e.g., 0.75
[in]thresholdfor binarization, if needed
[in]debug1 for debug output saved to recogboot; 0 otherwise
Returns
pixad labeled version of input pixas, trained on a BSR, or NULL on error
Notes:
     (1) This takes pixas of unscaled single characters and recboot,
         a bootstrep recognizer (BSR) that has been set up with parameters
           * scaleh: scale all templates to this height
           * linew: width of normalized strokes, or 0 if using
             the input image
         It modifies the pix in pixas accordingly and correlates
         with the templates in the BSR.  It returns those input
         images in pixas whose best correlation with the BSR is at
         or above minscore.  The returned pix have added text labels
         for the text string of the class to which the best
         correlated template belongs.
     (2) Identification occurs in scaled mode (typically with h = 40),
         optionally using a width-normalized line images derived
         from those in pixas.

Definition at line 1492 of file recogtrain.c.

References L_CLONE, L_COPY, L_INSERT, L_Recog::linew, pixaAddPix(), pixaCopy(), pixaCreate(), L_Recog::pixadb_boot, pixaDestroy(), pixaGetCount(), pixaGetPix(), pixaSetStrokeWidth(), pixaVerifyDepth(), pixConvertTo1(), pixDestroy(), pixScaleToSize(), pixSetText(), L_Recog::rch, rchExtract(), recogIdentifyPix(), and L_Recog::scaleh.

◆ recogTrainingFinished()

l_ok recogTrainingFinished ( L_RECOG **  precog,
l_int32  modifyflag,
l_int32  minsize,
l_float32  minfract 
)

recogTrainingFinished()

Parameters
[in]precogaddr of recog
[in]modifyflag1 to use recogModifyTemplate(); 0 otherwise
[in]minsizeset to -1 for default
[in]minfractset to -1.0 for default
Returns
0 if OK, 1 on error (input recog will be destroyed)
Notes:
     (1) This must be called after all training samples have been added.
     (2) If the templates are not good enough, the recog input is destroyed.
     (3) Usually, modifyflag == 1, because we want to apply
         recogModifyTemplate() to generate the actual templates
         that will be used.  The one exception is when reading a
         serialized recog: there we want to put the same set of
         templates in both the unscaled and modified pixaa.
         See recogReadStream() to see why we do this.
     (4) See recogTemplatesAreOK() for minsize and minfract usage.
     (5) The following things are done here:
         (a) Allocate (or reallocate) storage for (possibly) modified
             bitmaps, centroids, and fg areas.
         (b) Generate the (possibly) modified bitmaps.
         (c) Compute centroid and fg area data for both unscaled and
             modified bitmaps.
         (d) Truncate the pixaa, ptaa and numaa arrays down from
             256 to the actual size.
     (6) Putting these operations here makes it simple to recompute
         the recog with different modifications on the bitmaps.
     (7) Call recogShowContent() to display the templates, both
         unscaled and modified.

Definition at line 787 of file recogtrain.c.

References L_Recog::centtab, L_CLONE, L_INSERT, L_Recog::maxarraysize, L_Recog::naasum, L_Recog::naasum_u, numaaAddNumber(), numaaCreateFull(), numaaDestroy(), numaaTruncate(), L_Recog::pixaa, L_Recog::pixaa_u, pixaaAddPix(), pixaaCreate(), pixaaDestroy(), pixaaGetPixa(), pixaaInitFull(), pixaaTruncate(), pixaCreate(), pixaDestroy(), pixaGetCount(), pixaGetPix(), pixCentroid(), pixClone(), pixCountPixels(), pixDestroy(), L_Recog::ptaa, L_Recog::ptaa_u, ptaaAddPt(), ptaaCreate(), ptaaDestroy(), ptaaInitFull(), ptaaTruncate(), ptaCreate(), ptaDestroy(), recogDestroy(), recogModifyTemplate(), recogTemplatesAreOK(), L_Recog::setsize, L_Rdid::size, L_Recog::sumtab, and L_Recog::train_done.

Referenced by recogAddAllSamples(), and recogCreateFromPixa().

◆ recogTrainLabeled()

l_ok recogTrainLabeled ( L_RECOG recog,
PIX pixs,
BOX box,
char *  text,
l_int32  debug 
)

recogTrainLabeled()

Parameters
[in]recogin training mode
[in]pixsif depth > 1, will be thresholded to 1 bpp
[in]box[optional] cropping box
[in]text[optional] if null, use text field in pix
[in]debug1 to display images of samples not captured
Returns
0 if OK, 1 on error
Notes:
     (1) Training is restricted to the addition of a single
         character in an arbitrary (e.g., UTF8) charset
     (2) If box != null, it should represent the location in pixs
         of the character image.

Definition at line 216 of file recogtrain.c.

References pixDestroy(), L_Rdid::pixs, recogAddSample(), and recogProcessLabeled().

Referenced by recogCreateFromPixaNoFinish().