/*====================================================================* - Copyright (C) 2001 Leptonica. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *====================================================================*/ /*! * \file recogdid.c *
* * Top-level identification * BOXA *recogDecode() * * Generate decoding arrays * static l_int32 recogPrepareForDecoding() * static l_int32 recogMakeDecodingArray() * * Dynamic programming for best path * static l_int32 recogRunViterbi() * static l_int32 recogRescoreDidResult() * static PIX *recogShowPath() * * Create/destroy temporary DID data * l_int32 recogCreateDid() * l_int32 recogDestroyDid() * * Various helpers * l_int32 recogDidExists() * L_RDID *recogGetDid() * static l_int32 recogGetWindowedArea() * l_int32 recogSetChannelParams() * static l_int32 recogTransferRchToDid() * * See recogbasic.c for examples of training a recognizer, which is * required before it can be used for document image decoding. * * Gary Kopec pioneered this hidden markov approach to "Document Image * Decoding" (DID) in the early 1990s. It is based on estimation * using a generative model of the image generation process, and * provides the most likely decoding of an image if the model is correct. * Given the model, it finds the maximum a posteriori (MAP) "message" * given the observed image. The model describes how to generate * an image from a message, and the MAP message is derived from the * observed image using Bayes' theorem. This approach can also be used * to build the model, using the iterative expectation/maximization * method from labeled but errorful data. * * In a little more detail: The model comprises three things: the ideal * printed character templates, the independent bit-flip noise model, and * the character setwidths. When a character is printed, the setwidth * is the distance in pixels that you move forward before being able * to print the next character. It is typically slightly less than the * width of the character template: if too small, an extra character can be * hallucinated; if too large, it will not be able to match the next * character template on the line. The model assumes that the probabilities * of bit flip depend only on the assignment of the pixel to background * or template foreground. The multilevel templates have different * bit flip probabilities for each level. Because a character image * is composed of many pixels, each of which can be independently flipped, * the actual probability of seeing any rendering is exceedingly small, * being composed of the product of the probabilities for each pixel. * The log likelihood is used both to avoid numeric underflow and, * more importantly, because it results in a summation of independent * pixel probabilities. That summation can be shown, in Kopec's * original paper, to consist of a sum of two terms: (a) the number of * fg pixels in the bit-and of the observed image with the ideal * template and (b) the number of fg pixels in the template. Each * has a coefficient that depends only on the bit-flip probabilities * for the fg and bg. A beautiful result, and computationally simple! * One nice feature of this approach is that the result of the decoding * is not very sensitive to the values used for the bit flip probabilities. * * The procedure for finding the best decoding (MAP) for a given image goes * under several names: Viterbi, dynamic programming, hidden markov model. * It is called a "hidden markov model" because the templates are assumed * to be printed serially and we don't know what they are -- the identity * of the templates must be inferred from the observed image. * The possible decodings form a dense trellis over the pixel positions, * where at each pixel position you have the possibility of having any * of the characters printed there (with some reference point) or having * a single pixel wide space inserted there. Thus, before the trellis * can be traversed, we must do the work of finding the log probability, * at each pixel location, that each of the templates was printed there. * Armed with those arrays of data, the dynamic programming procedure * moves from left to right, one pixel at a time, recursively finding * the path with the highest log probability that gets to that pixel * position (and noting which template was printed to arrive there). * After reaching the right side of the image, we can simply backtrack * along the path, jumping over each template that lies on the highest * scoring path. This best path thus only goes through a few of the * pixel positions. * * There are two refinements to the original Kopec paper. In the first, * one uses multiple, non-overlapping fg templates, each with its own * bit flip probability. This makes sense, because the probability * that a fg boundary pixel flips to bg is greater than that of a fg * pixel not on the boundary. And the flip probability of a fg boundary * pixel is smaller than that of a bg boundary pixel, which in turn * is greater than that of a bg pixel not on a boundary (the latter * is taken to be the true background). Then the simplest realistic * multiple template model has three templates that are not background. * * In the second refinement, a heuristic (strict upper bound) is used * iteratively in the Viterbi process to compute the log probabilities. * Using the heuristic, you find the best path, and then score all nodes * on that path with the actual probability, which is guaranteed to * be a smaller number. You run this iteratively, rescoring just the best * found path each time. After each rescoring, the path may change because * the local scores have been reduced. However, the process converges * rapidly, and when it doesn't change, it must be the best path because * it is properly scored (even if neighboring paths are heuristically * scored). The heuristic score is found column-wise by assuming * that all the fg pixels in the template are on fg pixels in the image -- * we just take the minimum of the number of pixels in the template * and image column. This can easily give a 10-fold reduction in * computation because the heuristic score can be computed much faster * than the exact score. * * For reference, the classic paper on the approach by Kopec is: * * "Document Image Decoding Using Markov Source Models", IEEE Trans. * PAMI, Vol 16, No. 6, June 1994, pp 602-617. * A refinement of the method for multilevel templates by Kopec is: * * "Multilevel Character Templates for Document Image Decoding", * Proc. SPIE 3027, Document Recognition IV, p. 168ff, 1997. * Further refinements for more efficient decoding are given in these * two papers, which are both stored on leptonica.org: * * "Document Image Decoding using Iterated Complete Path Search", Minka, * Bloomberg and Popat, Proc. SPIE Vol 4307, p. 250-258, Document * Recognition and Retrieval VIII, San Jose, CA 2001. * * "Document Image Decoding using Iterated Complete Path Search with * Subsampled Heuristic Scoring", Bloomberg, Minka and Popat, ICDAR 2001, * p. 344-349, Sept. 2001, Seattle. **/ #ifdef HAVE_CONFIG_H #include
* Notes: * (1) The input pixs has been filtered so that it is likely to be * composed of more than one touching character. Specifically, * its height can only slightly exceed that of the tallest * unscaled template, the width is somewhat larger than the * width of the widest unscaled template, and the w/h aspect ratio * is bounded by max_wh_ratio. * (2) This uses the DID mechanism with labeled templates to * segment the input %pixs. The resulting segmentation is * returned. (It is given by did->boxa). * (3) In debug mode, the Viterbi path is rescored based on all * the templates. In non-debug mode, the same procedure is * carried out by recogIdentifyPix() on the result of the * segmentation. **/ BOXA * recogDecode(L_RECOG *recog, PIX *pixs, l_int32 nlevels, PIX **ppixdb) { l_int32 debug; PIX *pix1; PIXA *pixa; PROCNAME("recogDecode"); if (ppixdb) *ppixdb = NULL; if (!recog) return (BOXA *)ERROR_PTR("recog not defined", procName, NULL); if (!pixs || pixGetDepth(pixs) != 1) return (BOXA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); if (!recog->train_done) return (BOXA *)ERROR_PTR("training not finished", procName, NULL); if (nlevels != 2) return (BOXA *)ERROR_PTR("nlevels != 2 (for now)", procName, NULL); debug = (ppixdb) ? 1 : 0; if (recogPrepareForDecoding(recog, pixs, debug)) return (BOXA *)ERROR_PTR("error making arrays", procName, NULL); recogSetChannelParams(recog, nlevels); /* Normal path; just run Viterbi */ if (!debug) { if (recogRunViterbi(recog, NULL) == 0) return boxaCopy(recog->did->boxa, L_COPY); else return (BOXA *)ERROR_PTR("error in Viterbi", procName, NULL); } /* Debug path */ if (recogRunViterbi(recog, &pix1)) return (BOXA *)ERROR_PTR("error in viterbi", procName, NULL); pixa = pixaCreate(2); pixaAddPix(pixa, pix1, L_INSERT); if (recogRescoreDidResult(recog, &pix1)) { pixaDestroy(&pixa); return (BOXA *)ERROR_PTR("error in rescoring", procName, NULL); } pixaAddPix(pixa, pix1, L_INSERT); *ppixdb = pixaDisplayTiledInRows(pixa, 32, 2 * pixGetWidth(pix1) + 100, 1.0, 0, 30, 2); pixaDestroy(&pixa); return boxaCopy(recog->did->boxa, L_COPY); } /*------------------------------------------------------------------------* * Generate decoding arrays * *------------------------------------------------------------------------*/ /*! * \brief recogPrepareForDecoding() * * \param[in] recog with LUT's pre-computed * \param[in] pixs typically of multiple touching characters, 1 bpp * \param[in] debug 1 for debug output; 0 otherwise * \return 0 if OK, 1 on error * *
* Notes: * (1) Binarizes and crops input %pixs. * (2) Removes previous L_RDID struct and makes a new one. * (3) Generates the bit-and sum arrays for each character template * at each pixel position in %pixs. These are used in the * Viterbi dynamic programming step. * (4) The values are saved in the scoring arrays at the left edge * of the template. They are used in the Viterbi process * at the setwidth position (which is near the RHS of the template * as it is positioned on pixs) in the generated trellis. **/ static l_int32 recogPrepareForDecoding(L_RECOG *recog, PIX *pixs, l_int32 debug) { l_int32 i; PIX *pix1; L_RDID *did; PROCNAME("recogPrepareForDecoding"); if (!recog) return ERROR_INT("recog not defined", procName, 1); if (!pixs || pixGetDepth(pixs) != 1) return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); if (!recog->train_done) return ERROR_INT("training not finished", procName, 1); if (!recog->ave_done) recogAverageSamples(&recog, 0); /* Binarize and crop to foreground if necessary */ if ((pix1 = recogProcessToIdentify(recog, pixs, 0)) == NULL) return ERROR_INT("pix1 not made", procName, 1); /* Remove any existing RecogDID and set up a new one */ recogDestroyDid(recog); if (recogCreateDid(recog, pix1)) { pixDestroy(&pix1); return ERROR_INT("decoder not made", procName, 1); } /* Compute vertical sum and first moment arrays */ did = recogGetDid(recog); /* owned by recog */ did->nasum = pixCountPixelsByColumn(pix1); did->namoment = pixGetMomentByColumn(pix1, 1); /* Generate the arrays */ for (i = 0; i < recog->did->narray; i++) recogMakeDecodingArray(recog, i, debug); pixDestroy(&pix1); return 0; } /*! * \brief recogMakeDecodingArray() * * \param[in] recog * \param[in] index of averaged template * \param[in] debug 1 for debug output; 0 otherwise * \return 0 if OK, 1 on error * *
* Notes: * (1) Generates the bit-and sum array for a character template along pixs. * (2) The values are saved in the scoring arrays at the left edge * of the template as it is positioned on pixs. **/ static l_int32 recogMakeDecodingArray(L_RECOG *recog, l_int32 index, l_int32 debug) { l_int32 i, j, w1, h1, w2, h2, nx, ycent2, count, maxcount, maxdely; l_int32 sum, moment, dely, shifty; l_int32 *counta, *delya, *ycent1, *arraysum, *arraymoment, *sumtab; NUMA *nasum, *namoment; PIX *pix1, *pix2, *pix3; L_RDID *did; PROCNAME("recogMakeDecodingArray"); if (!recog) return ERROR_INT("recog not defined", procName, 1); if ((did = recogGetDid(recog)) == NULL) return ERROR_INT("did not defined", procName, 1); if (index < 0 || index >= did->narray) return ERROR_INT("invalid index", procName, 1); /* Check that pix1 is large enough for this template. */ pix1 = did->pixs; /* owned by did; do not destroy */ pixGetDimensions(pix1, &w1, &h1, NULL); pix2 = pixaGetPix(recog->pixa_u, index, L_CLONE); pixGetDimensions(pix2, &w2, &h2, NULL); if (w1 < w2) { L_INFO("w1 = %d < w2 = %d for index %d\n", procName, w1, w2, index); pixDestroy(&pix2); return 0; } nasum = did->nasum; namoment = did->namoment; ptaGetIPt(recog->pta_u, index, NULL, &ycent2); sumtab = recog->sumtab; counta = did->counta[index]; delya = did->delya[index]; /* Set up the array for ycent1. This gives the y-centroid location * for a window of width w2, starting at location i. */ nx = w1 - w2 + 1; /* number of positions w2 can be placed in w1 */ ycent1 = (l_int32 *)LEPT_CALLOC(nx, sizeof(l_int32)); arraysum = numaGetIArray(nasum); arraymoment = numaGetIArray(namoment); for (i = 0, sum = 0, moment = 0; i < w2; i++) { sum += arraysum[i]; moment += arraymoment[i]; } for (i = 0; i < nx - 1; i++) { ycent1[i] = (sum == 0) ? ycent2 : (l_float32)moment / (l_float32)sum; sum += arraysum[w2 + i] - arraysum[i]; moment += arraymoment[w2 + i] - arraymoment[i]; } ycent1[nx - 1] = (sum == 0) ? ycent2 : (l_float32)moment / (l_float32)sum; /* Compute the bit-and sum between the template pix2 and pix1, at * locations where the left side of pix2 goes from 0 to nx - 1 * in pix1. Do this around the vertical alignment of the pix2 * centroid and the windowed pix1 centroid. * (1) Start with pix3 cleared and approximately equal in size to pix1. * (2) Blit the y-shifted pix2 onto pix3. Then all ON pixels * are within the intersection of pix1 and the shifted pix2. * (3) AND pix1 with pix3. */ pix3 = pixCreate(w2, h1, 1); for (i = 0; i < nx; i++) { shifty = (l_int32)(ycent1[i] - ycent2 + 0.5); maxcount = 0; maxdely = 0; for (j = -MaxYShift; j <= MaxYShift; j++) { pixClearAll(pix3); dely = shifty + j; /* amount pix2 is shifted relative to pix1 */ pixRasterop(pix3, 0, dely, w2, h2, PIX_SRC, pix2, 0, 0); pixRasterop(pix3, 0, 0, w2, h1, PIX_SRC & PIX_DST, pix1, i, 0); pixCountPixels(pix3, &count, sumtab); if (count > maxcount) { maxcount = count; maxdely = dely; } } counta[i] = maxcount; delya[i] = maxdely; } did->fullarrays = TRUE; pixDestroy(&pix2); pixDestroy(&pix3); LEPT_FREE(ycent1); LEPT_FREE(arraysum); LEPT_FREE(arraymoment); return 0; } /*------------------------------------------------------------------------* * Dynamic programming for best path *------------------------------------------------------------------------*/ /*! * \brief recogRunViterbi() * * \param[in] recog with LUT's pre-computed * \param[out] ppixdb [optional] debug result; can be null * \return 0 if OK, 1 on error * *
* Notes: * (1) This can be used when the templates are unscaled. It works by * matching the average, unscaled templates of each class to * all positions. * (2) It is recursive, in that * (a) we compute the score successively at all pixel positions x, * (b) to compute the score at x in the trellis, for each * template we look backwards to (x - setwidth) to get the * score if that template were to be printed with its * setwidth location at x. We save at x the template and * score that maximizes the sum of the score at (x - setwidth) * and the log-likelihood for the template to be printed with * its LHS there. * (3) The primary output is a boxa of the locations for splitting * the input image. These locations are used later to split the * image and send the pieces individually for recognition. * This can be done in either recogIdentifyMultiple(), or * for debugging in recogRescoreDidResult(). **/ static l_int32 recogRunViterbi(L_RECOG *recog, PIX **ppixdb) { l_int32 i, w1, w2, h1, xnz, x, narray, minsetw; l_int32 first, templ, xloc, dely, counts, area1; l_int32 besttempl, spacetempl; l_int32 *setw, *didtempl; l_int32 *area2; /* must be freed */ l_float32 prevscore, matchscore, maxscore, correl; l_float32 *didscore; BOX *box; PIX *pix1; L_RDID *did; PROCNAME("recogRunViterbi"); if (ppixdb) *ppixdb = NULL; if (!recog) return ERROR_INT("recog not defined", procName, 1); if ((did = recogGetDid(recog)) == NULL) return ERROR_INT("did not defined", procName, 1); if (did->fullarrays == 0) return ERROR_INT("did full arrays not made", procName, 1); /* Compute the minimum setwidth. Bad templates with very small * width can cause havoc because the setwidth is too small. */ w1 = did->size; narray = did->narray; spacetempl = narray; setw = did->setwidth; minsetw = 100000; for (i = 0; i < narray; i++) { if (setw[i] < minsetw) minsetw = setw[i]; } if (minsetw <= 2) return ERROR_INT("minsetw <= 2; bad templates", procName, 1); /* The score array is initialized to 0.0. As we proceed to * the left, the log likelihood for the partial paths goes * negative, and we prune for the max (least negative) path. * No matches will be computed until we reach x = min(setwidth); * until then first == TRUE after looping over templates. */ didscore = did->trellisscore; didtempl = did->trellistempl; area2 = numaGetIArray(recog->nasum_u); besttempl = 0; /* just tells compiler it is initialized */ maxscore = 0.0; /* ditto */ for (x = minsetw; x < w1; x++) { /* will always get a score */ first = TRUE; for (i = 0; i < narray; i++) { if (x - setw[i] < 0) continue; matchscore = didscore[x - setw[i]] + did->gamma[1] * did->counta[i][x - setw[i]] + did->beta[1] * area2[i]; if (first) { maxscore = matchscore; besttempl = i; first = FALSE; } else { if (matchscore > maxscore) { maxscore = matchscore; besttempl = i; } } } /* We can also put down a single pixel space, with no cost * because all pixels are bg. */ prevscore = didscore[x - 1]; if (prevscore > maxscore) { /* 1 pixel space is best */ maxscore = prevscore; besttempl = spacetempl; } didscore[x] = maxscore; didtempl[x] = besttempl; } /* Backtrack to get the best path. * Skip over (i.e., ignore) all single pixel spaces. */ for (x = w1 - 1; x >= 0; x--) { if (didtempl[x] != spacetempl) break; } h1 = pixGetHeight(did->pixs); while (x > 0) { if (didtempl[x] == spacetempl) { /* skip over spaces */ x--; continue; } templ = didtempl[x]; xloc = x - setw[templ]; if (xloc < 0) break; counts = did->counta[templ][xloc]; /* bit-and counts */ recogGetWindowedArea(recog, templ, xloc, &dely, &area1); correl = ((l_float32)(counts) * counts) / (l_float32)(area2[templ] * area1); pix1 = pixaGetPix(recog->pixa_u, templ, L_CLONE); w2 = pixGetWidth(pix1); numaAddNumber(did->natempl, templ); numaAddNumber(did->naxloc, xloc); numaAddNumber(did->nadely, dely); numaAddNumber(did->nawidth, pixGetWidth(pix1)); numaAddNumber(did->nascore, correl); xnz = L_MAX(xloc, 0); box = boxCreate(xnz, dely, w2, h1); boxaAddBox(did->boxa, box, L_INSERT); pixDestroy(&pix1); x = xloc; } if (ppixdb) { numaWriteStderr(did->natempl); numaWriteStderr(did->naxloc); numaWriteStderr(did->nadely); numaWriteStderr(did->nawidth); numaWriteStderr(did->nascore); boxaWriteStderr(did->boxa); *ppixdb = recogShowPath(recog, 0); } LEPT_FREE(area2); return 0; } /*! * \brief recogRescoreDidResult() * * \param[in] recog with LUT's pre-computed * \param[out] ppixdb [optional] debug result; can be null * \return 0 if OK, 1 on error * *
* Notes: * (1) This does correlation matching with all unscaled templates, * using the character segmentation determined by the Viterbi path. **/ static l_int32 recogRescoreDidResult(L_RECOG *recog, PIX **ppixdb) { l_int32 i, n, sample, x, dely, index; char *text; l_float32 score; BOX *box1; PIX *pixs, *pix1; L_RDID *did; PROCNAME("recogRescoreDidResult"); if (ppixdb) *ppixdb = NULL; if (!recog) return ERROR_INT("recog not defined", procName, 1); if ((did = recogGetDid(recog)) == NULL) return ERROR_INT("did not defined", procName, 1); if (did->fullarrays == 0) return ERROR_INT("did full arrays not made", procName, 1); if ((n = numaGetCount(did->naxloc)) == 0) return ERROR_INT("no elements in path", procName, 1); pixs = did->pixs; for (i = 0; i < n; i++) { box1 = boxaGetBox(did->boxa, i, L_COPY); boxGetGeometry(box1, &x, &dely, NULL, NULL); pix1 = pixClipRectangle(pixs, box1, NULL); recogIdentifyPix(recog, pix1, NULL); recogTransferRchToDid(recog, x, dely); if (ppixdb) { rchExtract(recog->rch, &index, &score, &text, &sample, NULL, NULL, NULL); lept_stderr("text = %s, index = %d, sample = %d," " score = %5.3f\n", text, index, sample, score); } pixDestroy(&pix1); boxDestroy(&box1); LEPT_FREE(text); } if (ppixdb) *ppixdb = recogShowPath(recog, 1); return 0; } /*! * \brief recogShowPath() * * \param[in] recog with LUT's pre-computed * \param[in] select 0 for Viterbi; 1 for rescored * \return pix debug output), or NULL on error */ static PIX * recogShowPath(L_RECOG *recog, l_int32 select) { char textstr[16]; l_int32 i, j, n, index, xloc, dely; l_float32 score; L_BMF *bmf; NUMA *natempl_s, *nasample_s, *nascore_s, *naxloc_s, *nadely_s; PIX *pixs, *pix0, *pix1, *pix2, *pix3, *pix4, *pix5; L_RDID *did; PROCNAME("recogShowPath"); if (!recog) return (PIX *)ERROR_PTR("recog not defined", procName, NULL); if ((did = recogGetDid(recog)) == NULL) return (PIX *)ERROR_PTR("did not defined", procName, NULL); bmf = bmfCreate(NULL, 8); pixs = pixScale(did->pixs, 4.0, 4.0); pix0 = pixAddBorderGeneral(pixs, 0, 0, 0, 40, 0); pix1 = pixConvertTo32(pix0); if (select == 0) { /* Viterbi */ natempl_s = did->natempl; nascore_s = did->nascore; naxloc_s = did->naxloc; nadely_s = did->nadely; } else { /* rescored */ natempl_s = did->natempl_r; nasample_s = did->nasample_r; nascore_s = did->nascore_r; naxloc_s = did->naxloc_r; nadely_s = did->nadely_r; } n = numaGetCount(natempl_s); for (i = 0; i < n; i++) { numaGetIValue(natempl_s, i, &index); if (select == 0) { pix2 = pixaGetPix(recog->pixa_u, index, L_CLONE); } else { numaGetIValue(nasample_s, i, &j); pix2 = pixaaGetPix(recog->pixaa_u, index, j, L_CLONE); } pix3 = pixScale(pix2, 4.0, 4.0); pix4 = pixErodeBrick(NULL, pix3, 5, 5); pixXor(pix4, pix4, pix3); numaGetFValue(nascore_s, i, &score); snprintf(textstr, sizeof(textstr), "%5.3f", score); pix5 = pixAddTextlines(pix4, bmf, textstr, 1, L_ADD_BELOW); numaGetIValue(naxloc_s, i, &xloc); numaGetIValue(nadely_s, i, &dely); pixPaintThroughMask(pix1, pix5, 4 * xloc, 4 * dely, 0xff000000); pixDestroy(&pix2); pixDestroy(&pix3); pixDestroy(&pix4); pixDestroy(&pix5); } pixDestroy(&pixs); pixDestroy(&pix0); bmfDestroy(&bmf); return pix1; } /*------------------------------------------------------------------------* * Create/destroy temporary DID data * *------------------------------------------------------------------------*/ /*! * \brief recogCreateDid() * * \param[in] recog * \param[in] pixs of 1 bpp image to match * \return 0 if OK, 1 on error */ l_ok recogCreateDid(L_RECOG *recog, PIX *pixs) { l_int32 i; PIX *pix1; L_RDID *did; PROCNAME("recogCreateDid"); if (!recog) return ERROR_INT("recog not defined", procName, 1); if (!pixs) return ERROR_INT("pixs not defined", procName, 1); recogDestroyDid(recog); did = (L_RDID *)LEPT_CALLOC(1, sizeof(L_RDID)); recog->did = did; did->pixs = pixClone(pixs); did->narray = recog->setsize; did->size = pixGetWidth(pixs); did->natempl = numaCreate(5); did->naxloc = numaCreate(5); did->nadely = numaCreate(5); did->nawidth = numaCreate(5); did->boxa = boxaCreate(5); did->nascore = numaCreate(5); did->natempl_r = numaCreate(5); did->nasample_r = numaCreate(5); did->naxloc_r = numaCreate(5); did->nadely_r = numaCreate(5); did->nawidth_r = numaCreate(5); did->nascore_r = numaCreate(5); /* Make the arrays */ did->setwidth = (l_int32 *)LEPT_CALLOC(did->narray, sizeof(l_int32)); did->counta = (l_int32 **)LEPT_CALLOC(did->narray, sizeof(l_int32 *)); did->delya = (l_int32 **)LEPT_CALLOC(did->narray, sizeof(l_int32 *)); did->beta = (l_float32 *)LEPT_CALLOC(5, sizeof(l_float32)); did->gamma = (l_float32 *)LEPT_CALLOC(5, sizeof(l_float32)); did->trellisscore = (l_float32 *)LEPT_CALLOC(did->size, sizeof(l_float32)); did->trellistempl = (l_int32 *)LEPT_CALLOC(did->size, sizeof(l_int32)); for (i = 0; i < did->narray; i++) { did->counta[i] = (l_int32 *)LEPT_CALLOC(did->size, sizeof(l_int32)); did->delya[i] = (l_int32 *)LEPT_CALLOC(did->size, sizeof(l_int32)); } /* Populate the setwidth array */ for (i = 0; i < did->narray; i++) { pix1 = pixaGetPix(recog->pixa_u, i, L_CLONE); did->setwidth[i] = (l_int32)(SetwidthFraction * pixGetWidth(pix1)); pixDestroy(&pix1); } return 0; } /*! * \brief recogDestroyDid() * * \param[in] recog * \return 0 if OK, 1 on error * *
* Notes: * (1) As the signature indicates, this is owned by the recog, and can * only be destroyed using this function. **/ l_ok recogDestroyDid(L_RECOG *recog) { l_int32 i; L_RDID *did; PROCNAME("recogDestroyDid"); if (!recog) return ERROR_INT("recog not defined", procName, 1); if ((did = recog->did) == NULL) return 0; if (!did->counta || !did->delya) return ERROR_INT("ptr array is null; shouldn't happen!", procName, 1); for (i = 0; i < did->narray; i++) { LEPT_FREE(did->counta[i]); LEPT_FREE(did->delya[i]); } LEPT_FREE(did->setwidth); LEPT_FREE(did->counta); LEPT_FREE(did->delya); LEPT_FREE(did->beta); LEPT_FREE(did->gamma); LEPT_FREE(did->trellisscore); LEPT_FREE(did->trellistempl); pixDestroy(&did->pixs); numaDestroy(&did->nasum); numaDestroy(&did->namoment); numaDestroy(&did->natempl); numaDestroy(&did->naxloc); numaDestroy(&did->nadely); numaDestroy(&did->nawidth); boxaDestroy(&did->boxa); numaDestroy(&did->nascore); numaDestroy(&did->natempl_r); numaDestroy(&did->nasample_r); numaDestroy(&did->naxloc_r); numaDestroy(&did->nadely_r); numaDestroy(&did->nawidth_r); numaDestroy(&did->nascore_r); LEPT_FREE(did); recog->did = NULL; return 0; } /*------------------------------------------------------------------------* * Various helpers * *------------------------------------------------------------------------*/ /*! * \brief recogDidExists() * * \param[in] recog * \return 1 if recog->did exists; 0 if not or on error. */ l_int32 recogDidExists(L_RECOG *recog) { PROCNAME("recogDidExists"); if (!recog) return ERROR_INT("recog not defined", procName, 0); return (recog->did) ? 1 : 0; } /*! * \brief recogGetDid() * * \param[in] recog * \return did still owned by the recog, or NULL on error * *
* Notes: * (1) This also makes sure the arrays are defined. **/ L_RDID * recogGetDid(L_RECOG *recog) { l_int32 i; L_RDID *did; PROCNAME("recogGetDid"); if (!recog) return (L_RDID *)ERROR_PTR("recog not defined", procName, NULL); if ((did = recog->did) == NULL) return (L_RDID *)ERROR_PTR("did not defined", procName, NULL); if (!did->counta || !did->delya) return (L_RDID *)ERROR_PTR("did array ptrs not defined", procName, NULL); for (i = 0; i < did->narray; i++) { if (!did->counta[i] || !did->delya[i]) return (L_RDID *)ERROR_PTR("did arrays not defined", procName, NULL); } return did; } /*! * \brief recogGetWindowedArea() * * \param[in] recog * \param[in] index of template * \param[in] x pixel position of left hand edge of template * \param[out] pdely y shift of template relative to pix1 * \param[out] pwsum number of fg pixels in window of pixs * \return 0 if OK, 1 on error * *
* Notes: * (1) This is called after the best path has been found through * the trellis, in order to produce a correlation that can be used * to evaluate the confidence we have in the identification. * The correlation is |1 & 2|^2 / (|1| * |2|). * |1 & 2| is given by the count array, |2| is found from * nasum_u[], and |1| is wsum returned from this function. **/ static l_int32 recogGetWindowedArea(L_RECOG *recog, l_int32 index, l_int32 x, l_int32 *pdely, l_int32 *pwsum) { l_int32 w1, h1, w2, h2; PIX *pix1, *pix2, *pixt; L_RDID *did; PROCNAME("recogGetWindowedArea"); if (pdely) *pdely = 0; if (pwsum) *pwsum = 0; if (!pdely || !pwsum) return ERROR_INT("&dely and &wsum not both defined", procName, 1); if (!recog) return ERROR_INT("recog not defined", procName, 1); if ((did = recogGetDid(recog)) == NULL) return ERROR_INT("did not defined", procName, 1); if (index < 0 || index >= did->narray) return ERROR_INT("invalid index", procName, 1); pix1 = did->pixs; pixGetDimensions(pix1, &w1, &h1, NULL); if (x >= w1) return ERROR_INT("invalid x position", procName, 1); pix2 = pixaGetPix(recog->pixa_u, index, L_CLONE); pixGetDimensions(pix2, &w2, &h2, NULL); if (w1 < w2) { L_INFO("template %d too small\n", procName, index); pixDestroy(&pix2); return 0; } *pdely = did->delya[index][x]; pixt = pixCreate(w2, h1, 1); pixRasterop(pixt, 0, *pdely, w2, h2, PIX_SRC, pix2, 0, 0); pixRasterop(pixt, 0, 0, w2, h1, PIX_SRC & PIX_DST, pix1, x, 0); pixCountPixels(pixt, pwsum, recog->sumtab); pixDestroy(&pix2); pixDestroy(&pixt); return 0; } /*! * \brief recogSetChannelParams() * * \param[in] recog * \param[in] nlevels * \return 0 if OK, 1 on error * *
* Notes: * (1) This converts the independent bit-flip probabilities in the * "channel" into log-likelihood coefficients on image sums. * These coefficients are only defined for the non-background * template levels. Thus for nlevels = 2 (one fg, one bg), * only beta[1] and gamma[1] are used. For nlevels = 4 (three * fg templates), we use beta[1-3] and gamma[1-3]. **/ l_ok recogSetChannelParams(L_RECOG *recog, l_int32 nlevels) { l_int32 i; const l_float32 *da; L_RDID *did; PROCNAME("recogSetChannelParams"); if (!recog) return ERROR_INT("recog not defined", procName, 1); if ((did = recogGetDid(recog)) == NULL) return ERROR_INT("did not defined", procName, 1); if (nlevels == 2) da = DefaultAlpha2; else if (nlevels == 4) da = DefaultAlpha4; else return ERROR_INT("nlevels not 2 or 4", procName, 1); for (i = 1; i < nlevels; i++) { did->beta[i] = log((1.0 - da[i]) / da[0]); did->gamma[i] = log(da[0] * da[i] / ((1.0 - da[0]) * (1.0 - da[i]))); /* lept_stderr("beta[%d] = %7.3f, gamma[%d] = %7.3f\n", i, did->beta[i], i, did->gamma[i]); */ } return 0; } /*! * \brief recogTransferRchToDid() * * \param[in] recog with rch and did defined * \param[in] x left edge of extracted region, relative to decoded line * \param[in] y top edge of extracted region, relative to input image * \return 0 if OK, 1 on error * *
* Notes: * (1) This is used to transfer the results for a single character match * to the rescored did arrays. **/ static l_int32 recogTransferRchToDid(L_RECOG *recog, l_int32 x, l_int32 y) { L_RDID *did; L_RCH *rch; PROCNAME("recogTransferRchToDid"); if (!recog) return ERROR_INT("recog not defined", procName, 1); if ((did = recogGetDid(recog)) == NULL) return ERROR_INT("did not defined", procName, 1); if ((rch = recog->rch) == NULL) return ERROR_INT("rch not defined", procName, 1); numaAddNumber(did->natempl_r, rch->index); numaAddNumber(did->nasample_r, rch->sample); numaAddNumber(did->naxloc_r, rch->xloc + x); numaAddNumber(did->nadely_r, rch->yloc + y); numaAddNumber(did->nawidth_r, rch->width); numaAddNumber(did->nascore_r, rch->score); return 0; }