/*====================================================================* - Copyright (C) 2001 Leptonica. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *====================================================================*/ /*! * \file recogbasic.c *
 *
 *      Recog creation, destruction and access
 *         L_RECOG            *recogCreateFromRecog()
 *         L_RECOG            *recogCreateFromPixa()
 *         L_RECOG            *recogCreateFromPixaNoFinish()
 *         L_RECOG            *recogCreate()
 *         void                recogDestroy()
 *
 *      Recog accessors
 *         l_int32             recogGetCount()
 *         l_int32             recogSetParams()
 *         static l_int32      recogGetCharsetSize()
 *
 *      Character/index lookup
 *         l_int32             recogGetClassIndex()
 *         l_int32             recogStringToIndex()
 *         l_int32             recogGetClassString()
 *         l_int32             l_convertCharstrToInt()
 *
 *      Serialization
 *         L_RECOG            *recogRead()
 *         L_RECOG            *recogReadStream()
 *         L_RECOG            *recogReadMem()
 *         l_int32             recogWrite()
 *         l_int32             recogWriteStream()
 *         l_int32             recogWriteMem()
 *         PIXA               *recogExtractPixa()
 *         static l_int32      recogAddCharstrLabels()
 *         static l_int32      recogAddAllSamples()
 *
 *  The recognizer functionality is split into four files:
 *    recogbasic.c: create, destroy, access, serialize
 *    recogtrain.c: training on labeled and unlabeled data
 *    recogident.c: running the recognizer(s) on input
 *    recogdid.c:   running the recognizer(s) on input using a
 *                  document image decoding (DID) hidden markov model
 *
 *  This is a content-adapted (or book-adapted) recognizer (BAR) application.
 *  The recognizers here are typically assembled from data that has
 *  been labeled by a generic recognition system, such as Tesseract.
 *  The general procedure to create a recognizer (recog) from labeled data is
 *  to add the labeled character bitmaps, either one at a time or
 *  all together from a pixa with labeled pix.
 *
 *  The suggested use for a BAR that consists of labeled templates drawn
 *  from a single source (e.g., a book) is to identify unlabeled samples
 *  by using unscaled character templates in the BAR, picking the
 *  template closest to the unlabeled sample.
 *
 *  Outliers can be removed from a pixa of labeled pix.  This is one of
 *  two methods that use averaged templates (the other is greedy splitting
 *  of characters).  See recogtrain.c for a discussion and the implementation.
 *
 *  A special bootstrap recognizer (BSR) can be used to make a BAR from
 *  unlabeled book data.  This is done by comparing character images
 *  from the book with labeled templates in the BSR, where all images
 *  are scaled to h = 40.  The templates can be either the scanned images
 *  or images consisting of width-normalized strokes derived from
 *  the skeleton of the character bitmaps.
 *
 *  Two BARs of labeled character data, that have been made by
 *  different recognizers, can be joined by extracting a pixa of the
 *  labeled templates from each, joining the two pixa, and then
 *  and regenerating a BAR from the joined set of templates.
 *  If all the labeled character data is from a single source (e.g, a book),
 *  identification can proceed using unscaled templates (either the input
 *  image or width-normalized lines).  But if the labeled data comes from
 *  more than one source, (a "hybrid" recognizer), the templates should
 *  be scaled, and we recommend scaling to a fixed height.
 *
 *  Suppose it is not possible to generate a BAR with a sufficient number
 *  of templates of each class taken from a single source.  In that case,
 *  templates from the BSR itself can be added.  This is the condition
 *  described above, where the labeled templates come from multiple
 *  sources, and it is necessary to do all character matches using
 *  templates that have been scaled to a fixed height (e.g., 40).
 *  Likewise, the samples to be identified using this hybrid recognizer
 *  must be modified in the same way.  See prog/recogtest3.c for an
 *  example of the steps that can be taken in the construction of a BAR
 *  using a BSR.
 *
 *  For training numeric input, an example set of calls that scales
 *  each training input to fixed h and will use the line templates of
 *  width linew for identifying unknown characters is:
 *         L_Recog  *rec = recogCreate(0, h, linew, 128, 1);
 *         for (i = 0; i < n; i++) {  // read in n training digits
 *             Pix *pix = ...
 *             recogTrainLabeled(rec, pix, NULL, text[i], 0);
 *         }
 *         recogTrainingFinished(&rec, 1, -1, -1.0);  // required
 *
 *  It is an error if any function that computes averages, removes
 *  outliers or requests identification of an unlabeled character,
 *  such as:
 *     (1) computing the sample averages: recogAverageSamples()
 *     (2) removing outliers: recogRemoveOutliers1() or recogRemoveOutliers2()
 *     (3) requesting identification of an unlabeled character:
 *         recogIdentifyPix()
 *  is called before an explicit call to finish training.  Note that
 *  to do further training on a "finished" recognizer, you can set
 *         recog->train_done = FALSE;
 *  add the new training samples, and again call
 *         recogTrainingFinished(&rec, 1, -1, -1.0);  // required
 *
 *  If not scaling, using the images directly for identification, and
 *  removing outliers, do something like this:
 *      L_Recog  *rec = recogCreate(0, 0, 0, 128, 1);
 *      for (i = 0; i < n; i++) {  // read in n training characters
 *          Pix *pix = ...
 *          recogTrainLabeled(rec, pix, NULL, text[i], 0);
 *      }
 *      recogTrainingFinished(&rec, 1, -1, -1.0);
 *      if (!rec) ... [return]
 *      // remove outliers
 *      recogRemoveOutliers1(&rec, 0.7, 2, NULL, NULL);
 *
 *  You can generate a recognizer from a pixa where the text field in
 *  each pix is the character string label for the pix.  For example,
 *  the following recognizer will store unscaled line images:
 *      L_Recog  *rec = recogCreateFromPixa(pixa, 0, 0, linew, 128, 1);
 *  and in use, it is fed unscaled line images to identify.
 *
 *  For the following, assume that you have a pixa of labeled templates.
 *  If it is likely that some of the input templates are mislabeled,
 *  there are several things that can be done to remove them.
 *  The first is to put a size and quantity filter on them; e.g.
 *       Pixa *pixa2 = recogFilterPixaBySize(pixa1, 10, 15, 2.6);
 *  Then you can remove outliers; e.g.,
 *       Pixa *pixa3 = pixaRemoveOutliers2(pixa2, -1.0, -1, NULL, NULL);
 *
 *  To this point, all templates are from a single source, so you
 *  can make a recognizer that uses the unscaled templates and optionally
 *  attempts to split touching characters:
 *       L_Recog *recog1 = recogCreateFromPixa(pixa3, ...);
 *  Alternatively, if you need more templates for some of the classes,
 *  you can pad with templates from a "bootstrap" recognizer (BSR).
 *  If you pad, it is necessary to scale the templates and input
 *  samples to a fixed height, and no attempt will be made to split
 *  the input sample connected components:
 *       L_Recog *recog1 = recogCreateFromPixa(pixa3, 0, 40, 0, 128, 0);
 *       recogPadDigitTrainingSet(&recog1, 40, 0);
 *
 *  A special case is a pure BSR, that contains images scaled to a fixed
 *  height (we use 40 in these examples).
 *  For this,use either the scanned bitmap:
 *      L_Recog  *recboot = recogCreateFromPixa(pixa, 0, 40, 0, 128, 1);
 *  or width-normalized lines (use width of 5 here):
 *      L_Recog  *recboot = recogCreateFromPixa(pixa, 0, 40, 5, 128, 1);
 *
 *  This can be used to train a new book adapted recognizer (BAC), on
 *  unlabeled data from, e.g., a book.  To do this, the following is required:
 *   (1) the input images from the book must be scaled in the same
 *       way as those in the BSR, and
 *   (2) both the BSR and the input images must be set up to be either
 *       input scanned images or width-normalized lines.
 *
 * 
*/ #ifdef HAVE_CONFIG_H #include #endif /* HAVE_CONFIG_H */ #include #include "allheaders.h" static const l_int32 MaxExamplesInClass = 256; /* Default recog parameters that can be changed */ static const l_int32 DefaultCharsetType = L_ARABIC_NUMERALS; static const l_int32 DefaultMinNopad = 1; static const l_float32 DefaultMaxWHRatio = 3.0; /* max allowed w/h ratio for a component to be split */ static const l_float32 DefaultMaxHTRatio = 2.6; /* max allowed ratio of max/min unscaled averaged template heights */ static const l_int32 DefaultThreshold = 150; /* for binarization */ static const l_int32 DefaultMaxYShift = 1; /* for identification */ /* Static functions */ static l_int32 recogGetCharsetSize(l_int32 type); static l_int32 recogAddCharstrLabels(L_RECOG *recog); static l_int32 recogAddAllSamples(L_RECOG **precog, PIXAA *paa, l_int32 debug); /*------------------------------------------------------------------------* * Recog: initialization and destruction * *------------------------------------------------------------------------*/ /*! * \brief recogCreateFromRecog() * * \param[in] recs source recog with arbitrary input parameters * \param[in] scalew scale all widths to this; use 0 otherwise * \param[in] scaleh scale all heights to this; use 0 otherwise * \param[in] linew width of normalized strokes; use 0 to skip * \param[in] threshold for binarization; typically ~128 * \param[in] maxyshift from nominal centroid alignment; default is 1 * \return recd, or NULL on error * *
 * Notes:
 *      (1) This is a convenience function that generates a recog using
 *          the unscaled training data in an existing recog.
 *      (2) It is recommended to use %maxyshift = 1 (the default value)
 *      (3) See recogCreate() for use of %scalew, %scaleh and %linew.
 * 
*/ L_RECOG * recogCreateFromRecog(L_RECOG *recs, l_int32 scalew, l_int32 scaleh, l_int32 linew, l_int32 threshold, l_int32 maxyshift) { L_RECOG *recd; PIXA *pixa; PROCNAME("recogCreateFromRecog"); if (!recs) return (L_RECOG *)ERROR_PTR("recs not defined", procName, NULL); pixa = recogExtractPixa(recs); recd = recogCreateFromPixa(pixa, scalew, scaleh, linew, threshold, maxyshift); pixaDestroy(&pixa); return recd; } /*! * \brief recogCreateFromPixa() * * \param[in] pixa of labeled, 1 bpp images * \param[in] scalew scale all widths to this; use 0 otherwise * \param[in] scaleh scale all heights to this; use 0 otherwise * \param[in] linew width of normalized strokes; use 0 to skip * \param[in] threshold for binarization; typically ~150 * \param[in] maxyshift from nominal centroid alignment; default is 1 * \return recog, or NULL on error * *
 * Notes:
 *      (1) This is a convenience function for training from labeled data.
 *          The pixa can be read from file.
 *      (2) The pixa should contain the unscaled bitmaps used for training.
 *      (3) See recogCreate() for use of %scalew, %scaleh and %linew.
 *      (4) It is recommended to use %maxyshift = 1 (the default value)
 *      (5) All examples in the same class (i.e., with the same character
 *          label) should be similar.  They can be made similar by invoking
 *          recogRemoveOutliers[1,2]() on %pixa before calling this function.
 * 
*/ L_RECOG * recogCreateFromPixa(PIXA *pixa, l_int32 scalew, l_int32 scaleh, l_int32 linew, l_int32 threshold, l_int32 maxyshift) { L_RECOG *recog; PROCNAME("recogCreateFromPixa"); if (!pixa) return (L_RECOG *)ERROR_PTR("pixa not defined", procName, NULL); recog = recogCreateFromPixaNoFinish(pixa, scalew, scaleh, linew, threshold, maxyshift); if (!recog) return (L_RECOG *)ERROR_PTR("recog not made", procName, NULL); recogTrainingFinished(&recog, 1, -1, -1.0); if (!recog) return (L_RECOG *)ERROR_PTR("bad templates", procName, NULL); return recog; } /*! * \brief recogCreateFromPixaNoFinish() * * \param[in] pixa of labeled, 1 bpp images * \param[in] scalew scale all widths to this; use 0 otherwise * \param[in] scaleh scale all heights to this; use 0 otherwise * \param[in] linew width of normalized strokes; use 0 to skip * \param[in] threshold for binarization; typically ~150 * \param[in] maxyshift from nominal centroid alignment; default is 1 * \return recog, or NULL on error * *
 * Notes:
 *      (1) See recogCreateFromPixa() for details.
 *      (2) This is also used to generate a pixaa with templates
 *          in each class within a pixa.  For that, all args except for
 *          %pixa are ignored.
 * 
*/ L_RECOG * recogCreateFromPixaNoFinish(PIXA *pixa, l_int32 scalew, l_int32 scaleh, l_int32 linew, l_int32 threshold, l_int32 maxyshift) { char *text; l_int32 full, n, i, ntext, same, maxd; PIX *pix; L_RECOG *recog; PROCNAME("recogCreateFromPixaNoFinish"); if (!pixa) return (L_RECOG *)ERROR_PTR("pixa not defined", procName, NULL); pixaVerifyDepth(pixa, &same, &maxd); if (maxd > 1) return (L_RECOG *)ERROR_PTR("not all pix are 1 bpp", procName, NULL); pixaIsFull(pixa, &full, NULL); if (!full) return (L_RECOG *)ERROR_PTR("not all pix are present", procName, NULL); n = pixaGetCount(pixa); pixaCountText(pixa, &ntext); if (ntext == 0) return (L_RECOG *)ERROR_PTR("no pix have text strings", procName, NULL); if (ntext < n) L_ERROR("%d text strings < %d pix\n", procName, ntext, n); recog = recogCreate(scalew, scaleh, linew, threshold, maxyshift); if (!recog) return (L_RECOG *)ERROR_PTR("recog not made", procName, NULL); for (i = 0; i < n; i++) { pix = pixaGetPix(pixa, i, L_CLONE); text = pixGetText(pix); if (!text || strlen(text) == 0) { L_ERROR("pix[%d] has no text\n", procName, i); pixDestroy(&pix); continue; } recogTrainLabeled(recog, pix, NULL, text, 0); pixDestroy(&pix); } return recog; } /*! * \brief recogCreate() * * \param[in] scalew scale all widths to this; use 0 otherwise * \param[in] scaleh scale all heights to this; use 0 otherwise * \param[in] linew width of normalized strokes; use 0 to skip * \param[in] threshold for binarization; typically ~128; 0 for default * \param[in] maxyshift from nominal centroid alignment; default is 1 * \return recog, or NULL on error * *
 * Notes:
 *      (1) If %scalew == 0 and %scaleh == 0, no scaling is done.
 *          If one of these is 0 and the other is > 0, scaling is isotropic
 *          to the requested size.  We typically do not set both > 0.
 *      (2) Use linew > 0 to convert the templates to images with fixed
 *          width strokes.  linew == 0 skips the conversion.
 *      (3) The only valid values for %maxyshift are 0, 1 and 2.
 *          It is recommended to use %maxyshift == 1 (default value).
 *          Using %maxyshift == 0 is much faster than %maxyshift == 1, but
 *          it is much less likely to find the template with the best
 *          correlation.  Use of anything but 1 results in a warning.
 *      (4) Scaling is used for finding outliers and for training a
 *          book-adapted recognizer (BAR) from a bootstrap recognizer (BSR).
 *          Scaling the height to a fixed value and scaling the width
 *          accordingly (e.g., %scaleh = 40, %scalew = 0) is recommended.
 *      (5) The storage for most of the arrays is allocated when training
 *          is finished.
 * 
*/ L_RECOG * recogCreate(l_int32 scalew, l_int32 scaleh, l_int32 linew, l_int32 threshold, l_int32 maxyshift) { L_RECOG *recog; PROCNAME("recogCreate"); if (scalew < 0 || scaleh < 0) return (L_RECOG *)ERROR_PTR("invalid scalew or scaleh", procName, NULL); if (linew > 10) return (L_RECOG *)ERROR_PTR("invalid linew > 10", procName, NULL); if (threshold == 0) threshold = DefaultThreshold; if (threshold < 0 || threshold > 255) { L_WARNING("invalid threshold; using default\n", procName); threshold = DefaultThreshold; } if (maxyshift < 0 || maxyshift > 2) { L_WARNING("invalid maxyshift; using default value\n", procName); maxyshift = DefaultMaxYShift; } else if (maxyshift == 0) { L_WARNING("Using maxyshift = 0; faster, worse correlation results\n", procName); } else if (maxyshift == 2) { L_WARNING("Using maxyshift = 2; slower\n", procName); } recog = (L_RECOG *)LEPT_CALLOC(1, sizeof(L_RECOG)); recog->templ_use = L_USE_ALL_TEMPLATES; /* default */ recog->threshold = threshold; recog->scalew = scalew; recog->scaleh = scaleh; recog->linew = linew; recog->maxyshift = maxyshift; recogSetParams(recog, 1, -1, -1.0, -1.0); recog->bmf = bmfCreate(NULL, 6); recog->bmf_size = 6; recog->maxarraysize = MaxExamplesInClass; /* Generate the LUTs */ recog->centtab = makePixelCentroidTab8(); recog->sumtab = makePixelSumTab8(); recog->sa_text = sarrayCreate(0); recog->dna_tochar = l_dnaCreate(0); /* Input default values for min component size for splitting. * These are overwritten when pixTrainingFinished() is called. */ recog->min_splitw = 6; recog->max_splith = 60; /* Allocate the paa for the unscaled training bitmaps */ recog->pixaa_u = pixaaCreate(recog->maxarraysize); /* Generate the storage for debugging */ recog->pixadb_boot = pixaCreate(2); recog->pixadb_split = pixaCreate(2); return recog; } /*! * \brief recogDestroy() * * \param[in,out] precog will be set to null before returning * \return void */ void recogDestroy(L_RECOG **precog) { L_RECOG *recog; PROCNAME("recogDestroy"); if (!precog) { L_WARNING("ptr address is null\n", procName); return; } if ((recog = *precog) == NULL) return; LEPT_FREE(recog->centtab); LEPT_FREE(recog->sumtab); sarrayDestroy(&recog->sa_text); l_dnaDestroy(&recog->dna_tochar); pixaaDestroy(&recog->pixaa_u); pixaDestroy(&recog->pixa_u); ptaaDestroy(&recog->ptaa_u); ptaDestroy(&recog->pta_u); numaDestroy(&recog->nasum_u); numaaDestroy(&recog->naasum_u); pixaaDestroy(&recog->pixaa); pixaDestroy(&recog->pixa); ptaaDestroy(&recog->ptaa); ptaDestroy(&recog->pta); numaDestroy(&recog->nasum); numaaDestroy(&recog->naasum); pixaDestroy(&recog->pixa_tr); pixaDestroy(&recog->pixadb_ave); pixaDestroy(&recog->pixa_id); pixDestroy(&recog->pixdb_ave); pixDestroy(&recog->pixdb_range); pixaDestroy(&recog->pixadb_boot); pixaDestroy(&recog->pixadb_split); bmfDestroy(&recog->bmf); rchDestroy(&recog->rch); rchaDestroy(&recog->rcha); recogDestroyDid(recog); LEPT_FREE(recog); *precog = NULL; } /*------------------------------------------------------------------------* * Recog accessors * *------------------------------------------------------------------------*/ /*! * \brief recogGetCount() * * \param[in] recog * \return count of classes in recog; 0 if no recog or on error */ l_int32 recogGetCount(L_RECOG *recog) { PROCNAME("recogGetCount"); if (!recog) return ERROR_INT("recog not defined", procName, 0); return recog->setsize; } /*! * \brief recogSetParams() * * \param[in] recog to be padded, if necessary * \param[in] type type of char set; -1 for default; * see enum in recog.h * \param[in] min_nopad min number in a class without padding; * use -1 for default * \param[in] max_wh_ratio max width/height ratio allowed for splitting; * use -1.0 for default * \param[in] max_ht_ratio max of max/min averaged template height ratio; * use -1.0 for default * \return 0 if OK, 1 on error * *
 * Notes:
 *      (1) This is called when a recog is created.
 *      (2) Default %min_nopad value allows for some padding.
 *          To disable padding, set %min_nopad = 0.  To pad only when
 *          no samples are available for the class, set %min_nopad = 1.
 *      (3) The %max_wh_ratio limits the width/height ratio for components
 *          that we attempt to split.  Splitting long components is expensive.
 *      (4) The %max_ht_ratio is a quality requirement on the training data.
 *          The recognizer will not run if the averages are computed and
 *          the templates do not satisfy it.
 * 
*/ l_ok recogSetParams(L_RECOG *recog, l_int32 type, l_int32 min_nopad, l_float32 max_wh_ratio, l_float32 max_ht_ratio) { PROCNAME("recogSetParams"); if (!recog) return ERROR_INT("recog not defined", procName, 1); recog->charset_type = (type >= 0) ? type : DefaultCharsetType; recog->charset_size = recogGetCharsetSize(recog->charset_type); recog->min_nopad = (min_nopad >= 0) ? min_nopad : DefaultMinNopad; recog->max_wh_ratio = (max_wh_ratio > 0.0) ? max_wh_ratio : DefaultMaxWHRatio; recog->max_ht_ratio = (max_ht_ratio > 1.0) ? max_ht_ratio : DefaultMaxHTRatio; return 0; } /*! * \brief recogGetCharsetSize() * * \param[in] type of charset * \return size of charset, or 0 if unknown or on error */ static l_int32 recogGetCharsetSize(l_int32 type) { PROCNAME("recogGetCharsetSize"); switch (type) { case L_UNKNOWN: return 0; case L_ARABIC_NUMERALS: return 10; case L_LC_ROMAN_NUMERALS: return 7; case L_UC_ROMAN_NUMERALS: return 7; case L_LC_ALPHA: return 26; case L_UC_ALPHA: return 26; default: L_ERROR("invalid charset_type %d\n", procName, type); return 0; } return 0; /* shouldn't happen */ } /*------------------------------------------------------------------------* * Character/index lookup * *------------------------------------------------------------------------*/ /*! * \brief recogGetClassIndex() * * \param[in] recog with LUT's pre-computed * \param[in] val integer value; can be up to 3 bytes for UTF-8 * \param[in] text text from which %val was derived; used if not found * \param[out] pindex index into dna_tochar * \return 0 if found; 1 if not found and added; 2 on error. * *
 * Notes:
 *      (1) This is used during training.  There is one entry in
 *          recog->dna_tochar (integer value, e.g., ascii) and
 *          one in recog->sa_text (e.g, ascii letter in a string)
 *          for each character class.
 *      (2) This searches the dna character array for %val.  If it is
 *          not found, the template represents a character class not
 *          already seen: it increments setsize (the number of character
 *          classes) by 1, and augments both the index (dna_tochar)
 *          and text (sa_text) arrays.
 *      (3) Returns the index in &index, except on error.
 *      (4) Caller must check the function return value.
 * 
*/ l_int32 recogGetClassIndex(L_RECOG *recog, l_int32 val, char *text, l_int32 *pindex) { l_int32 i, n, ival; PROCNAME("recogGetClassIndex"); if (!pindex) return ERROR_INT("&index not defined", procName, 2); *pindex = -1; if (!recog) return ERROR_INT("recog not defined", procName, 2); if (!text) return ERROR_INT("text not defined", procName, 2); /* Search existing characters */ n = l_dnaGetCount(recog->dna_tochar); for (i = 0; i < n; i++) { l_dnaGetIValue(recog->dna_tochar, i, &ival); if (val == ival) { /* found */ *pindex = i; return 0; } } /* If not found... */ l_dnaAddNumber(recog->dna_tochar, val); sarrayAddString(recog->sa_text, text, L_COPY); recog->setsize++; *pindex = n; return 1; } /*! * \brief recogStringToIndex() * * \param[in] recog * \param[in] text text string for some class * \param[out] pindex index for that class; -1 if not found * \return 0 if OK, 1 on error not finding the string is an error */ l_ok recogStringToIndex(L_RECOG *recog, char *text, l_int32 *pindex) { char *charstr; l_int32 i, n, diff; PROCNAME("recogStringtoIndex"); if (!pindex) return ERROR_INT("&index not defined", procName, 1); *pindex = -1; if (!recog) return ERROR_INT("recog not defined", procName, 1); if (!text) return ERROR_INT("text not defined", procName, 1); /* Search existing characters */ n = recog->setsize; for (i = 0; i < n; i++) { recogGetClassString(recog, i, &charstr); if (!charstr) { L_ERROR("string not found for index %d\n", procName, i); continue; } diff = strcmp(text, charstr); LEPT_FREE(charstr); if (diff) continue; *pindex = i; return 0; } return 1; /* not found */ } /*! * \brief recogGetClassString() * * \param[in] recog * \param[in] index into array of char types * \param[out] pcharstr string representation; * returns an empty string on error * \return 0 if found, 1 on error * *
 * Notes:
 *      (1) Extracts a copy of the string from sa_text, which
 *          the caller must free.
 *      (2) Caller must check the function return value.
 * 
*/ l_int32 recogGetClassString(L_RECOG *recog, l_int32 index, char **pcharstr) { PROCNAME("recogGetClassString"); if (!pcharstr) return ERROR_INT("&charstr not defined", procName, 1); *pcharstr = stringNew(""); if (!recog) return ERROR_INT("recog not defined", procName, 2); if (index < 0 || index >= recog->setsize) return ERROR_INT("invalid index", procName, 1); LEPT_FREE(*pcharstr); *pcharstr = sarrayGetString(recog->sa_text, index, L_COPY); return 0; } /*! * \brief l_convertCharstrToInt() * * \param[in] str input string representing one UTF-8 character; * not more than 4 bytes * \param[out] pval integer value for the input. Think of it * as a 1-to-1 hash code. * \return 0 if OK, 1 on error */ l_ok l_convertCharstrToInt(const char *str, l_int32 *pval) { l_int32 size, val; PROCNAME("l_convertCharstrToInt"); if (!pval) return ERROR_INT("&val not defined", procName, 1); *pval = 0; if (!str) return ERROR_INT("str not defined", procName, 1); size = strlen(str); if (size == 0) return ERROR_INT("empty string", procName, 1); if (size > 4) return ERROR_INT("invalid string: > 4 bytes", procName, 1); val = (l_int32)str[0]; if (size > 1) val = (val << 8) + (l_int32)str[1]; if (size > 2) val = (val << 8) + (l_int32)str[2]; if (size > 3) val = (val << 8) + (l_int32)str[3]; *pval = val; return 0; } /*------------------------------------------------------------------------* * Serialization * *------------------------------------------------------------------------*/ /*! * \brief recogRead() * * \param[in] filename * \return recog, or NULL on error * *
 * Notes:
 *      (1) When a recog is serialized, a pixaa of the templates that are
 *          actually used for correlation is saved in the pixaa_u array
 *          of the recog.  These can be different from the templates that
 *          were used to generate the recog, because those original templates
 *          can be scaled and turned into normalized lines.  When recog1
 *          is deserialized to recog2, these templates are put in both the
 *          unscaled array (pixaa_u) and the modified array (pixaa) in recog2.
 *          Why not put it in only the unscaled array and let
 *          recogTrainingFinalized() regenerate the modified templates?
 *          The reason is that with normalized lines, the operation of
 *          thinning to a skeleton and dilating back to a fixed width
 *          is not idempotent.  Thinning to a skeleton saves pixels at
 *          the end of a line segment, and thickening the skeleton puts
 *          additional pixels at the end of the lines.  This tends to
 *          close gaps.
 * 
*/ L_RECOG * recogRead(const char *filename) { FILE *fp; L_RECOG *recog; PROCNAME("recogRead"); if (!filename) return (L_RECOG *)ERROR_PTR("filename not defined", procName, NULL); if ((fp = fopenReadStream(filename)) == NULL) return (L_RECOG *)ERROR_PTR("stream not opened", procName, NULL); if ((recog = recogReadStream(fp)) == NULL) { fclose(fp); return (L_RECOG *)ERROR_PTR("recog not read", procName, NULL); } fclose(fp); return recog; } /*! * \brief recogReadStream() * * \param[in] fp file stream * \return recog, or NULL on error */ L_RECOG * recogReadStream(FILE *fp) { l_int32 version, setsize, threshold, scalew, scaleh, linew; l_int32 maxyshift, nc; L_DNA *dna_tochar; PIXAA *paa; L_RECOG *recog; SARRAY *sa_text; PROCNAME("recogReadStream"); if (!fp) return (L_RECOG *)ERROR_PTR("stream not defined", procName, NULL); if (fscanf(fp, "\nRecog Version %d\n", &version) != 1) return (L_RECOG *)ERROR_PTR("not a recog file", procName, NULL); if (version != RECOG_VERSION_NUMBER) return (L_RECOG *)ERROR_PTR("invalid recog version", procName, NULL); if (fscanf(fp, "Size of character set = %d\n", &setsize) != 1) return (L_RECOG *)ERROR_PTR("setsize not read", procName, NULL); if (fscanf(fp, "Binarization threshold = %d\n", &threshold) != 1) return (L_RECOG *)ERROR_PTR("binary thresh not read", procName, NULL); if (fscanf(fp, "Maxyshift = %d\n", &maxyshift) != 1) return (L_RECOG *)ERROR_PTR("maxyshift not read", procName, NULL); if (fscanf(fp, "Scale to width = %d\n", &scalew) != 1) return (L_RECOG *)ERROR_PTR("width not read", procName, NULL); if (fscanf(fp, "Scale to height = %d\n", &scaleh) != 1) return (L_RECOG *)ERROR_PTR("height not read", procName, NULL); if (fscanf(fp, "Normalized line width = %d\n", &linew) != 1) return (L_RECOG *)ERROR_PTR("line width not read", procName, NULL); if ((recog = recogCreate(scalew, scaleh, linew, threshold, maxyshift)) == NULL) return (L_RECOG *)ERROR_PTR("recog not made", procName, NULL); if (fscanf(fp, "\nLabels for character set:\n") != 0) { recogDestroy(&recog); return (L_RECOG *)ERROR_PTR("label intro not read", procName, NULL); } l_dnaDestroy(&recog->dna_tochar); if ((dna_tochar = l_dnaReadStream(fp)) == NULL) { recogDestroy(&recog); return (L_RECOG *)ERROR_PTR("dna_tochar not read", procName, NULL); } recog->dna_tochar = dna_tochar; sarrayDestroy(&recog->sa_text); if ((sa_text = sarrayReadStream(fp)) == NULL) { recogDestroy(&recog); return (L_RECOG *)ERROR_PTR("sa_text not read", procName, NULL); } recog->sa_text = sa_text; if (fscanf(fp, "\nPixaa of all samples in the training set:\n") != 0) { recogDestroy(&recog); return (L_RECOG *)ERROR_PTR("pixaa intro not read", procName, NULL); } if ((paa = pixaaReadStream(fp)) == NULL) { recogDestroy(&recog); return (L_RECOG *)ERROR_PTR("pixaa not read", procName, NULL); } recog->setsize = setsize; nc = pixaaGetCount(paa, NULL); if (nc != setsize) { recogDestroy(&recog); pixaaDestroy(&paa); L_ERROR("(setsize = %d) != (paa count = %d)\n", procName, setsize, nc); return NULL; } recogAddAllSamples(&recog, paa, 0); /* this finishes */ pixaaDestroy(&paa); if (!recog) return (L_RECOG *)ERROR_PTR("bad templates", procName, NULL); return recog; } /*! * \brief recogReadMem() * * \param[in] data serialization of recog (not ascii) * \param[in] size of data in bytes * \return recog, or NULL on error */ L_RECOG * recogReadMem(const l_uint8 *data, size_t size) { FILE *fp; L_RECOG *recog; PROCNAME("recogReadMem"); if (!data) return (L_RECOG *)ERROR_PTR("data not defined", procName, NULL); if ((fp = fopenReadFromMemory(data, size)) == NULL) return (L_RECOG *)ERROR_PTR("stream not opened", procName, NULL); recog = recogReadStream(fp); fclose(fp); if (!recog) L_ERROR("recog not read\n", procName); return recog; } /*! * \brief recogWrite() * * \param[in] filename * \param[in] recog * \return 0 if OK, 1 on error * *
 * Notes:
 *      (1) The pixaa of templates that is written is the modified one
 *          in the pixaa field. It is the pixaa that is actually used
 *          for correlation. This is not the unscaled array of labeled
 *          bitmaps, in pixaa_u, that was used to generate the recog in the
 *          first place.  See the notes in recogRead() for the rationale.
 * 
*/ l_ok recogWrite(const char *filename, L_RECOG *recog) { l_int32 ret; FILE *fp; PROCNAME("recogWrite"); if (!filename) return ERROR_INT("filename not defined", procName, 1); if (!recog) return ERROR_INT("recog not defined", procName, 1); if ((fp = fopenWriteStream(filename, "wb")) == NULL) return ERROR_INT("stream not opened", procName, 1); ret = recogWriteStream(fp, recog); fclose(fp); if (ret) return ERROR_INT("recog not written to stream", procName, 1); return 0; } /*! * \brief recogWriteStream() * * \param[in] fp file stream opened for "wb" * \param[in] recog * \return 0 if OK, 1 on error */ l_ok recogWriteStream(FILE *fp, L_RECOG *recog) { PROCNAME("recogWriteStream"); if (!fp) return ERROR_INT("stream not defined", procName, 1); if (!recog) return ERROR_INT("recog not defined", procName, 1); fprintf(fp, "\nRecog Version %d\n", RECOG_VERSION_NUMBER); fprintf(fp, "Size of character set = %d\n", recog->setsize); fprintf(fp, "Binarization threshold = %d\n", recog->threshold); fprintf(fp, "Maxyshift = %d\n", recog->maxyshift); fprintf(fp, "Scale to width = %d\n", recog->scalew); fprintf(fp, "Scale to height = %d\n", recog->scaleh); fprintf(fp, "Normalized line width = %d\n", recog->linew); fprintf(fp, "\nLabels for character set:\n"); l_dnaWriteStream(fp, recog->dna_tochar); sarrayWriteStream(fp, recog->sa_text); fprintf(fp, "\nPixaa of all samples in the training set:\n"); pixaaWriteStream(fp, recog->pixaa); return 0; } /*! * \brief recogWriteMem() * * \param[out] pdata data of serialized recog (not ascii) * \param[out] psize size of returned data * \param[in] recog * \return 0 if OK, 1 on error * *
 * Notes:
 *      (1) Serializes a recog in memory and puts the result in a buffer.
 * 
*/ l_ok recogWriteMem(l_uint8 **pdata, size_t *psize, L_RECOG *recog) { l_int32 ret; FILE *fp; PROCNAME("recogWriteMem"); if (pdata) *pdata = NULL; if (psize) *psize = 0; if (!pdata) return ERROR_INT("&data not defined", procName, 1); if (!psize) return ERROR_INT("&size not defined", procName, 1); if (!recog) return ERROR_INT("recog not defined", procName, 1); #if HAVE_FMEMOPEN if ((fp = open_memstream((char **)pdata, psize)) == NULL) return ERROR_INT("stream not opened", procName, 1); ret = recogWriteStream(fp, recog); #else L_INFO("work-around: writing to a temp file\n", procName); #ifdef _WIN32 if ((fp = fopenWriteWinTempfile()) == NULL) return ERROR_INT("tmpfile stream not opened", procName, 1); #else if ((fp = tmpfile()) == NULL) return ERROR_INT("tmpfile stream not opened", procName, 1); #endif /* _WIN32 */ ret = recogWriteStream(fp, recog); rewind(fp); *pdata = l_binaryReadStream(fp, psize); #endif /* HAVE_FMEMOPEN */ fclose(fp); return ret; } /*! * \brief recogExtractPixa() * * \param[in] recog * \return pixa if OK, NULL on error * *
 * Notes:
 *      (1) This generates a pixa of all the unscaled images in the
 *          recognizer, where each one has its character class label in
 *          the pix text field, by flattening pixaa_u to a pixa.
 * 
*/ PIXA * recogExtractPixa(L_RECOG *recog) { PROCNAME("recogExtractPixa"); if (!recog) return (PIXA *)ERROR_PTR("recog not defined", procName, NULL); recogAddCharstrLabels(recog); return pixaaFlattenToPixa(recog->pixaa_u, NULL, L_CLONE); } /*! * \brief recogAddCharstrLabels() * * \param[in] recog * \return 0 if OK, 1 on error */ static l_int32 recogAddCharstrLabels(L_RECOG *recog) { char *text; l_int32 i, j, n1, n2; PIX *pix; PIXA *pixa; PIXAA *paa; PROCNAME("recogAddCharstrLabels"); if (!recog) return ERROR_INT("recog not defined", procName, 1); /* Add the labels to each unscaled pix */ paa = recog->pixaa_u; n1 = pixaaGetCount(paa, NULL); for (i = 0; i < n1; i++) { pixa = pixaaGetPixa(paa, i, L_CLONE); text = sarrayGetString(recog->sa_text, i, L_NOCOPY); n2 = pixaGetCount(pixa); for (j = 0; j < n2; j++) { pix = pixaGetPix(pixa, j, L_CLONE); pixSetText(pix, text); pixDestroy(&pix); } pixaDestroy(&pixa); } return 0; } /*! * \brief recogAddAllSamples() * * \param[in] precog addr of recog * \param[in] paa pixaa from previously trained recog * \param[in] debug * \return 0 if OK, 1 on error * *
 * Notes:
 *      (1) On error, the input recog is destroyed.
 *      (2) This is used with the serialization routine recogRead(),
 *          where each pixa in the pixaa represents a set of characters
 *          in a different class.  Before calling this function, we have
 *          verified that the number of character classes, given by the
 *          setsize field in %recog, equals the number of pixa in the paa.
 *          The character labels for each set are in the sa_text field.
 * 
*/ static l_int32 recogAddAllSamples(L_RECOG **precog, PIXAA *paa, l_int32 debug) { char *text; l_int32 i, j, nc, ns; PIX *pix; PIXA *pixa, *pixa1; L_RECOG *recog; PROCNAME("recogAddAllSamples"); if (!precog) return ERROR_INT("&recog not defined", procName, 1); if ((recog = *precog) == NULL) return ERROR_INT("recog not defined", procName, 1); if (!paa) { recogDestroy(&recog); return ERROR_INT("paa not defined", procName, 1); } nc = pixaaGetCount(paa, NULL); for (i = 0; i < nc; i++) { pixa = pixaaGetPixa(paa, i, L_CLONE); ns = pixaGetCount(pixa); text = sarrayGetString(recog->sa_text, i, L_NOCOPY); pixa1 = pixaCreate(ns); pixaaAddPixa(recog->pixaa_u, pixa1, L_INSERT); for (j = 0; j < ns; j++) { pix = pixaGetPix(pixa, j, L_CLONE); if (debug) lept_stderr("pix[%d,%d]: text = %s\n", i, j, text); pixaaAddPix(recog->pixaa_u, i, pix, NULL, L_INSERT); } pixaDestroy(&pixa); } recogTrainingFinished(&recog, 0, -1, -1.0); /* For second parameter, see comment in recogRead() */ if (!recog) return ERROR_INT("bad templates; recog destroyed", procName, 1); return 0; }