/*====================================================================* - Copyright (C) 2001 Leptonica. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *====================================================================*/ /*! * \file recogident.c *
 *
 *      Top-level identification
 *         l_int32             recogIdentifyMultiple()
 *
 *      Segmentation and noise removal
 *         l_int32             recogSplitIntoCharacters()
 *
 *      Greedy character splitting
 *         l_int32             recogCorrelationBestRow()
 *         l_int32             recogCorrelationBestChar()
 *         static l_int32      pixCorrelationBestShift()
 *
 *      Low-level identification of single characters
 *         l_int32             recogIdentifyPixa()
 *         l_int32             recogIdentifyPix()
 *         l_int32             recogSkipIdentify()
 *
 *      Operations for handling identification results
 *         static L_RCHA      *rchaCreate()
 *         void                rchaDestroy()
 *         static L_RCH       *rchCreate()
 *         void                rchDestroy()
 *         l_int32             rchaExtract()
 *         l_int32             rchExtract()
 *         static l_int32      transferRchToRcha()
 *
 *      Preprocessing and filtering
 *         l_int32             recogProcessToIdentify()
 *         static PIX         *recogPreSplittingFilter()
 *         static PIX         *recogSplittingFilter()
 *
 *      Postprocessing
 *         SARRAY             *recogExtractNumbers()
 *         PIX                *showExtractNumbers()
 *
 *      Static debug helper
 *         static void         l_showIndicatorSplitValues()
 *
 *  See recogbasic.c for examples of training a recognizer, which is
 *  required before it can be used for identification.
 *
 *  The character splitter repeatedly does a greedy correlation with each
 *  averaged unscaled template, at all pixel locations along the text to
 *  be identified.  The vertical alignment is between the template
 *  centroid and the (moving) windowed centroid, including a delta of
 *  1 pixel above and below.  The best match then removes part of the
 *  input image, leaving 1 or 2 pieces, which, after filtering,
 *  are put in a queue.  The process ends when the queue is empty.
 *  The filtering is based on the size and aspect ratio of the
 *  remaining pieces; the intent is to remove anything that is
 *  unlikely to be text, such as small pieces and line graphics.
 *
 *  After splitting, the selected segments are identified using
 *  the input parameters that were initially specified for the
 *  recognizer.  Unlike the splitter, which uses the averaged
 *  templates from the unscaled input, the recognizer can use
 *  either all training examples or averaged templates, and these
 *  can be either scaled or unscaled.  These choices are specified
 *  when the recognizer is constructed.
 * 
*/ #ifdef HAVE_CONFIG_H #include #endif /* HAVE_CONFIG_H */ #include #include "allheaders.h" /* There are two methods for splitting characters: DID and greedy. * The default method is DID. */ #define SPLIT_WITH_DID 1 /* Padding on pix1: added before correlations and removed from result */ static const l_int32 LeftRightPadding = 32; /* Parameters for filtering and sorting connected components in splitter */ static const l_float32 MinFillFactor = 0.10; static const l_int32 DefaultMinHeight = 15; /* min unscaled height */ static const l_int32 MinOverlap1 = 6; /* in pass 1 of boxaSort2d() */ static const l_int32 MinOverlap2 = 6; /* in pass 2 of boxaSort2d() */ static const l_int32 MinHeightPass1 = 5; /* min height to start pass 1 */ static l_int32 pixCorrelationBestShift(PIX *pix1, PIX *pix2, NUMA *nasum1, NUMA *namoment1, l_int32 area2, l_int32 ycent2, l_int32 maxyshift, l_int32 *tab8, l_int32 *pdelx, l_int32 *pdely, l_float32 *pscore, l_int32 debugflag ); static L_RCH *rchCreate(l_int32 index, l_float32 score, char *text, l_int32 sample, l_int32 xloc, l_int32 yloc, l_int32 width); static L_RCHA *rchaCreate(); static l_int32 transferRchToRcha(L_RCH *rch, L_RCHA *rcha); static PIX *recogPreSplittingFilter(L_RECOG *recog, PIX *pixs, l_int32 minh, l_float32 minaf, l_int32 debug); static l_int32 recogSplittingFilter(L_RECOG *recog, PIX *pixs, l_int32 min, l_float32 minaf, l_int32 *premove, l_int32 debug); static void l_showIndicatorSplitValues(NUMA *na1, NUMA *na2, NUMA *na3, NUMA *na4, NUMA *na5, NUMA *na6); /*------------------------------------------------------------------------* * Identification *------------------------------------------------------------------------*/ /*! * \brief recogIdentifyMultiple() * * \param[in] recog with training finished * \param[in] pixs containing typically a small number of characters * \param[in] minh remove shorter components; use 0 for default * \param[in] skipsplit 1 to skip the splitting step * \param[out] pboxa [optional] locations of identified components * \param[out] ppixa [optional] images of identified components * \param[out] ppixdb [optional] debug pix: inputs and best fits * \param[in] debugsplit 1 returns pix split debugging images * \return 0 if OK; 1 if nothing is found; 2 for other errors. * *
 * Notes:
 *      (1) This filters the input pixa and calls recogIdentifyPixa()
 *      (2) Splitting is relatively slow, because it tries to match all
 *          character templates to all locations.  This step can be skipped.
 *      (3) An attempt is made to order the (optionally) returned images
 *          and boxes in 2-dimensional sorted order.  These can then
 *          be used to aggregate identified characters into numbers or words.
 *          One typically wants the pixa, which contains a boxa of the
 *          extracted subimages.
 * 
*/ l_ok recogIdentifyMultiple(L_RECOG *recog, PIX *pixs, l_int32 minh, l_int32 skipsplit, BOXA **pboxa, PIXA **ppixa, PIX **ppixdb, l_int32 debugsplit) { l_int32 n; BOXA *boxa; PIX *pixb; PIXA *pixa; PROCNAME("recogIdentifyMultiple"); if (pboxa) *pboxa = NULL; if (ppixa) *ppixa = NULL; if (ppixdb) *ppixdb = NULL; if (!recog) return ERROR_INT("recog not defined", procName, 2); if (!recog->train_done) return ERROR_INT("training not finished", procName, 2); if (!pixs) return ERROR_INT("pixs not defined", procName, 2); /* Binarize if necessary */ if (pixGetDepth(pixs) > 1) pixb = pixConvertTo1(pixs, recog->threshold); else pixb = pixClone(pixs); /* Noise removal and splitting of touching characters */ recogSplitIntoCharacters(recog, pixb, minh, skipsplit, &boxa, &pixa, debugsplit); pixDestroy(&pixb); if (!pixa || (n = pixaGetCount(pixa)) == 0) { pixaDestroy(&pixa); boxaDestroy(&boxa); L_WARNING("nothing found\n", procName); return 1; } recogIdentifyPixa(recog, pixa, ppixdb); if (pboxa) *pboxa = boxa; else boxaDestroy(&boxa); if (ppixa) *ppixa = pixa; else pixaDestroy(&pixa); return 0; } /*------------------------------------------------------------------------* * Segmentation and noise removal * *------------------------------------------------------------------------*/ /*! * \brief recogSplitIntoCharacters() * * \param[in] recog * \param[in] pixs 1 bpp, contains only mostly deskewed text * \param[in] minh remove shorter components; use 0 for default * \param[in] skipsplit 1 to skip the splitting step * \param[out] pboxa character bounding boxes * \param[out] ppixa character images * \param[in] debug 1 for results written to pixadb_split * \return 0 if OK, 1 on error or if no components are returned * *
 * Notes:
 *      (1) This can be given an image that has an arbitrary number
 *          of text characters.  It optionally splits connected
 *          components based on document image decoding in recogDecode().
 *          The returned pixa includes the boxes from which the
 *          (possibly split) components are extracted.
 *      (2) After noise filtering, the resulting components are put in
 *          row-major (2D) order, and the smaller of overlapping
 *          components are removed if they satisfy conditions of
 *          relative size and fractional overlap.
 *      (3) Note that the splitting function uses unscaled templates
 *          and does not bother returning the class results and scores.
 *          These are more accurately found later using the scaled templates.
 * 
*/ l_ok recogSplitIntoCharacters(L_RECOG *recog, PIX *pixs, l_int32 minh, l_int32 skipsplit, BOXA **pboxa, PIXA **ppixa, l_int32 debug) { static l_int32 ind = 0; char buf[32]; l_int32 i, xoff, yoff, empty, maxw, bw, ncomp, scaling; BOX *box; BOXA *boxa1, *boxa2, *boxa3, *boxa4, *boxad; BOXAA *baa; PIX *pix, *pix1, *pix2, *pix3; PIXA *pixa; PROCNAME("recogSplitIntoCharacters"); lept_mkdir("lept/recog"); if (pboxa) *pboxa = NULL; if (ppixa) *ppixa = NULL; if (!pboxa || !ppixa) return ERROR_INT("&boxa and &pixa not defined", procName, 1); if (!recog) return ERROR_INT("recog not defined", procName, 1); if (!recog->train_done) return ERROR_INT("training not finished", procName, 1); if (!pixs || pixGetDepth(pixs) != 1) return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); if (minh <= 0) minh = DefaultMinHeight; pixZero(pixs, &empty); if (empty) return 1; /* Small vertical close for consolidation. Don't do a horizontal * closing, because it might join separate characters. */ pix1 = pixMorphSequence(pixs, "c1.3", 0); /* Carefully filter out noise */ pix2 = recogPreSplittingFilter(recog, pix1, minh, MinFillFactor, debug); pixDestroy(&pix1); /* Get the 8-connected components to be split/identified */ boxa1 = pixConnComp(pix2, NULL, 8); pixDestroy(&pix2); ncomp = boxaGetCount(boxa1); if (ncomp == 0) { boxaDestroy(&boxa1); L_WARNING("all components removed\n", procName); return 1; } /* Save everything and split the large components */ boxa2 = boxaCreate(ncomp); maxw = recog->maxwidth_u + 5; scaling = (recog->scalew > 0 || recog->scaleh > 0) ? TRUE : FALSE; pixa = (debug) ? pixaCreate(ncomp) : NULL; for (i = 0; i < ncomp; i++) { box = boxaGetBox(boxa1, i, L_CLONE); boxGetGeometry(box, &xoff, &yoff, &bw, NULL); /* Treat as one character if it is small, if the images * have been scaled, or if splitting is not to be run. */ if (bw <= maxw || scaling || skipsplit) { boxaAddBox(boxa2, box, L_INSERT); } else { pix = pixClipRectangle(pixs, box, NULL); #if SPLIT_WITH_DID if (!debug) { boxa3 = recogDecode(recog, pix, 2, NULL); } else { boxa3 = recogDecode(recog, pix, 2, &pix2); pixaAddPix(pixa, pix2, L_INSERT); } #else /* use greedy splitting */ recogCorrelationBestRow(recog, pix, &boxa3, NULL, NULL, NULL, debug); if (debug) { pix2 = pixConvertTo32(pix); pixRenderBoxaArb(pix2, boxa3, 2, 255, 0, 0); pixaAddPix(pixa, pix2, L_INSERT); } #endif /* SPLIT_WITH_DID */ pixDestroy(&pix); boxDestroy(&box); if (!boxa3) { L_ERROR("boxa3 not found for component %d\n", procName, i); } else { boxa4 = boxaTransform(boxa3, xoff, yoff, 1.0, 1.0); boxaJoin(boxa2, boxa4, 0, -1); boxaDestroy(&boxa3); boxaDestroy(&boxa4); } } } boxaDestroy(&boxa1); if (pixa) { /* debug */ pix3 = pixaDisplayTiledInColumns(pixa, 1, 1.0, 20, 2); snprintf(buf, sizeof(buf), "/tmp/lept/recog/decode-%d.png", ind++); pixWrite(buf, pix3, IFF_PNG); pixaDestroy(&pixa); pixDestroy(&pix3); } /* Do a 2D sort on the bounding boxes, and flatten the result to 1D. * For the 2D sort, to add a box to an existing boxa, we require * specified minimum vertical overlaps for the first two passes * of the 2D sort. In pass 1, only components with sufficient * height can start a new boxa. */ baa = boxaSort2d(boxa2, NULL, MinOverlap1, MinOverlap2, MinHeightPass1); boxa3 = boxaaFlattenToBoxa(baa, NULL, L_CLONE); boxaaDestroy(&baa); boxaDestroy(&boxa2); /* Remove smaller components of overlapping pairs. * We only remove the small component if the overlap is * at least half its area and if its area is no more * than 30% of the area of the large component. Because the * components are in a flattened 2D sort, we don't need to * look far ahead in the array to find all overlapping boxes; * 10 boxes is plenty. */ boxad = boxaHandleOverlaps(boxa3, L_COMBINE, 10, 0.5, 0.3, NULL); boxaDestroy(&boxa3); /* Extract and save the image pieces from the input image. */ *ppixa = pixClipRectangles(pixs, boxad); *pboxa = boxad; return 0; } /*------------------------------------------------------------------------* * Greedy character splitting * *------------------------------------------------------------------------*/ /*! * \brief recogCorrelationBestRow() * * \param[in] recog with LUT's pre-computed * \param[in] pixs typically of multiple touching characters, 1 bpp * \param[out] pboxa bounding boxs of best fit character * \param[out] pnascore [optional] correlation scores * \param[out] pnaindex [optional] indices of classes * \param[out] psachar [optional] array of character strings * \param[in] debug 1 for results written to pixadb_split * \return 0 if OK, 1 on error * *
 * Notes:
 *      (1) Supervises character matching for (in general) a c.c with
 *          multiple touching characters.  Finds the best match greedily.
 *          Rejects small parts that are left over after splitting.
 *      (2) Matching is to the average, and without character scaling.
 * 
*/ l_ok recogCorrelationBestRow(L_RECOG *recog, PIX *pixs, BOXA **pboxa, NUMA **pnascore, NUMA **pnaindex, SARRAY **psachar, l_int32 debug) { char *charstr; l_int32 index, remove, w, h, bx, bw, bxc, bwc, w1, w2, w3; l_float32 score; BOX *box, *boxc, *boxtrans, *boxl, *boxr, *boxlt, *boxrt; BOXA *boxat; NUMA *nascoret, *naindext, *nasort; PIX *pixb, *pixc, *pixl, *pixr, *pixdb, *pixd; PIXA *pixar, *pixadb; SARRAY *sachart; l_int32 iter; PROCNAME("recogCorrelationBestRow"); if (pnascore) *pnascore = NULL; if (pnaindex) *pnaindex = NULL; if (psachar) *psachar = NULL; if (!pboxa) return ERROR_INT("&boxa not defined", procName, 1); *pboxa = NULL; if (!recog) return ERROR_INT("recog not defined", procName, 1); if (!pixs || pixGetDepth(pixs) != 1) return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); if (pixGetWidth(pixs) < recog->minwidth_u - 4) return ERROR_INT("pixs too narrow", procName, 1); if (!recog->train_done) return ERROR_INT("training not finished", procName, 1); /* Binarize and crop to foreground if necessary */ pixb = recogProcessToIdentify(recog, pixs, 0); /* Initialize the arrays */ boxat = boxaCreate(4); nascoret = numaCreate(4); naindext = numaCreate(4); sachart = sarrayCreate(4); pixadb = (debug) ? pixaCreate(4) : NULL; /* Initialize the images remaining to be processed with the input. * These are stored in pixar, which is used here as a queue, * on which we only put image fragments that are large enough to * contain at least one character. */ pixar = pixaCreate(1); pixGetDimensions(pixb, &w, &h, NULL); box = boxCreate(0, 0, w, h); pixaAddPix(pixar, pixb, L_INSERT); pixaAddBox(pixar, box, L_INSERT); /* Successively split on the best match until nothing is left. * To be safe, we limit the search to 10 characters. */ for (iter = 0; iter < 11; iter++) { if (pixaGetCount(pixar) == 0) break; if (iter == 10) { L_WARNING("more than 10 chars; ending search\n", procName); break; } /* Pop one from the queue */ pixaRemovePixAndSave(pixar, 0, &pixc, &boxc); boxGetGeometry(boxc, &bxc, NULL, &bwc, NULL); /* This is a single component; if noise, remove it */ recogSplittingFilter(recog, pixc, 0, MinFillFactor, &remove, debug); if (debug) lept_stderr("iter = %d, removed = %d\n", iter, remove); if (remove) { pixDestroy(&pixc); boxDestroy(&boxc); continue; } /* Find the best character match */ if (debug) { recogCorrelationBestChar(recog, pixc, &box, &score, &index, &charstr, &pixdb); pixaAddPix(pixadb, pixdb, L_INSERT); } else { recogCorrelationBestChar(recog, pixc, &box, &score, &index, &charstr, NULL); } /* Find the box in original coordinates, and append * the results to the arrays. */ boxtrans = boxTransform(box, bxc, 0, 1.0, 1.0); boxaAddBox(boxat, boxtrans, L_INSERT); numaAddNumber(nascoret, score); numaAddNumber(naindext, index); sarrayAddString(sachart, charstr, L_INSERT); /* Split the current pixc into three regions and save * each region if it is large enough. */ boxGetGeometry(box, &bx, NULL, &bw, NULL); w1 = bx; w2 = bw; w3 = bwc - bx - bw; if (debug) lept_stderr(" w1 = %d, w2 = %d, w3 = %d\n", w1, w2, w3); if (w1 < recog->minwidth_u - 4) { if (debug) L_INFO("discarding width %d on left\n", procName, w1); } else { /* extract and save left region */ boxl = boxCreate(0, 0, bx + 1, h); pixl = pixClipRectangle(pixc, boxl, NULL); boxlt = boxTransform(boxl, bxc, 0, 1.0, 1.0); pixaAddPix(pixar, pixl, L_INSERT); pixaAddBox(pixar, boxlt, L_INSERT); boxDestroy(&boxl); } if (w3 < recog->minwidth_u - 4) { if (debug) L_INFO("discarding width %d on right\n", procName, w3); } else { /* extract and save left region */ boxr = boxCreate(bx + bw - 1, 0, w3 + 1, h); pixr = pixClipRectangle(pixc, boxr, NULL); boxrt = boxTransform(boxr, bxc, 0, 1.0, 1.0); pixaAddPix(pixar, pixr, L_INSERT); pixaAddBox(pixar, boxrt, L_INSERT); boxDestroy(&boxr); } pixDestroy(&pixc); boxDestroy(&box); boxDestroy(&boxc); } pixaDestroy(&pixar); /* Sort the output results by left-to-right in the boxa */ *pboxa = boxaSort(boxat, L_SORT_BY_X, L_SORT_INCREASING, &nasort); if (pnascore) *pnascore = numaSortByIndex(nascoret, nasort); if (pnaindex) *pnaindex = numaSortByIndex(naindext, nasort); if (psachar) *psachar = sarraySortByIndex(sachart, nasort); numaDestroy(&nasort); boxaDestroy(&boxat); numaDestroy(&nascoret); numaDestroy(&naindext); sarrayDestroy(&sachart); /* Final debug output */ if (debug) { pixd = pixaDisplayTiledInRows(pixadb, 32, 2000, 1.0, 0, 15, 2); pixDisplay(pixd, 400, 400); pixaAddPix(recog->pixadb_split, pixd, L_INSERT); pixaDestroy(&pixadb); } return 0; } /*! * \brief recogCorrelationBestChar() * * \param[in] recog with LUT's pre-computed * \param[in] pixs can be of multiple touching characters, 1 bpp * \param[out] pbox bounding box of best fit character * \param[out] pscore correlation score * \param[out] pindex [optional] index of class * \param[out] pcharstr [optional] character string of class * \param[out] ppixdb [optional] debug pix showing input and best fit * \return 0 if OK, 1 on error * *
 * Notes:
 *      (1) Basic matching character splitter.  Finds the best match among
 *          all templates to some region of the image.  This can result
 *          in splitting the image into two parts.  This is "image decoding"
 *          without dynamic programming, because we don't use a setwidth
 *          and compute the best matching score for the entire image.
 *      (2) Matching is to the average templates, without character scaling.
 * 
*/ l_ok recogCorrelationBestChar(L_RECOG *recog, PIX *pixs, BOX **pbox, l_float32 *pscore, l_int32 *pindex, char **pcharstr, PIX **ppixdb) { l_int32 i, n, w1, h1, w2, area2, ycent2, delx, dely; l_int32 bestdelx, bestdely, bestindex; l_float32 score, bestscore; BOX *box; BOXA *boxa; NUMA *nasum, *namoment; PIX *pix1, *pix2; PROCNAME("recogCorrelationBestChar"); if (pindex) *pindex = 0; if (pcharstr) *pcharstr = NULL; if (ppixdb) *ppixdb = NULL; if (pbox) *pbox = NULL; if (pscore) *pscore = 0.0; if (!pbox || !pscore) return ERROR_INT("&box and &score not both defined", procName, 1); if (!recog) return ERROR_INT("recog not defined", procName, 1); if (!pixs || pixGetDepth(pixs) != 1) return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); if (!recog->train_done) return ERROR_INT("training not finished", procName, 1); /* Binarize and crop to foreground if necessary. Add padding * to both the left and right side; this is compensated for * when reporting the bounding box of the best matched character. */ pix1 = recogProcessToIdentify(recog, pixs, LeftRightPadding); pixGetDimensions(pix1, &w1, &h1, NULL); /* Compute vertical sum and moment arrays */ nasum = pixCountPixelsByColumn(pix1); namoment = pixGetMomentByColumn(pix1, 1); /* Do shifted correlation against all averaged templates. */ n = recog->setsize; boxa = boxaCreate(n); /* location of best fits for each character */ bestscore = 0.0; bestindex = bestdelx = bestdely = 0; for (i = 0; i < n; i++) { pix2 = pixaGetPix(recog->pixa_u, i, L_CLONE); w2 = pixGetWidth(pix2); /* Note that the slightly expended w1 is typically larger * than w2 (the template). */ if (w1 >= w2) { numaGetIValue(recog->nasum_u, i, &area2); ptaGetIPt(recog->pta_u, i, NULL, &ycent2); pixCorrelationBestShift(pix1, pix2, nasum, namoment, area2, ycent2, recog->maxyshift, recog->sumtab, &delx, &dely, &score, 1); if (ppixdb) { lept_stderr( "Best match template %d: (x,y) = (%d,%d), score = %5.3f\n", i, delx, dely, score); } /* Compensate for padding */ box = boxCreate(delx - LeftRightPadding, 0, w2, h1); if (score > bestscore) { bestscore = score; bestdelx = delx - LeftRightPadding; bestdely = dely; bestindex = i; } } else { box = boxCreate(0, 0, 1, 1); /* placeholder */ if (ppixdb) lept_stderr("Component too thin: w1 = %d, w2 = %d\n", w1, w2); } boxaAddBox(boxa, box, L_INSERT); pixDestroy(&pix2); } *pscore = bestscore; *pbox = boxaGetBox(boxa, bestindex, L_COPY); if (pindex) *pindex = bestindex; if (pcharstr) recogGetClassString(recog, bestindex, pcharstr); if (ppixdb) { L_INFO("Best match: class %d; shifts (%d, %d)\n", procName, bestindex, bestdelx, bestdely); pix2 = pixaGetPix(recog->pixa_u, bestindex, L_CLONE); *ppixdb = recogShowMatch(recog, pix1, pix2, NULL, -1, 0.0); pixDestroy(&pix2); } pixDestroy(&pix1); boxaDestroy(&boxa); numaDestroy(&nasum); numaDestroy(&namoment); return 0; } /*! * \brief pixCorrelationBestShift() * * \param[in] pix1 1 bpp, the unknown image; typically larger * \param[in] pix2 1 bpp, the matching template image) * \param[in] nasum1 vertical column pixel sums for pix1 * \param[in] namoment1 vertical column first moment of pixels for pix1 * \param[in] area2 number of on pixels in pix2 * \param[in] ycent2 y component of centroid of pix2 * \param[in] maxyshift max y shift of pix2 around the location where * the centroids of pix2 and a windowed part of pix1 * are vertically aligned * \param[in] tab8 [optional] sum tab for ON pixels in byte; * can be NULL * \param[out] pdelx [optional] best x shift of pix2 relative to pix1 * \param[out] pdely [optional] best y shift of pix2 relative to pix1 * \param[out] pscore [optional] maximum score found; can be NULL * \param[in] debugflag <= 0 to skip; positive to generate output; * the integer is used to label the debug image. * \return 0 if OK, 1 on error * *
 * Notes:
 *      (1) This maximizes the correlation score between two 1 bpp images,
 *          one of which is typically wider.  In a typical example,
 *          pix1 is a bitmap of 2 or more touching characters and pix2 is
 *          a single character template.  This finds the location of pix2
 *          that gives the largest correlation.
 *      (2) The windowed area of fg pixels and windowed first moment
 *          in the y direction are computed from the input sum and moment
 *          column arrays, %nasum1 and %namoment1
 *      (3) This is a brute force operation.  We compute the correlation
 *          at every x shift for which pix2 fits entirely within pix1,
 *          and where the centroid of pix2 is aligned, within +-maxyshift,
 *          with the centroid of a window of pix1 of the same width.
 *          The correlation is taken over the full height of pix1.
 *          This can be made more efficient.
 * 
*/ static l_int32 pixCorrelationBestShift(PIX *pix1, PIX *pix2, NUMA *nasum1, NUMA *namoment1, l_int32 area2, l_int32 ycent2, l_int32 maxyshift, l_int32 *tab8, l_int32 *pdelx, l_int32 *pdely, l_float32 *pscore, l_int32 debugflag) { l_int32 w1, w2, h1, h2, i, j, nx, shifty, delx, dely; l_int32 sum, moment, count; l_int32 *tab, *area1, *arraysum, *arraymoment; l_float32 maxscore, score; l_float32 *ycent1; FPIX *fpix; PIX *pixt, *pixt1, *pixt2; PROCNAME("pixCorrelationBestShift"); if (pdelx) *pdelx = 0; if (pdely) *pdely = 0; if (pscore) *pscore = 0.0; if (!pix1 || pixGetDepth(pix1) != 1) return ERROR_INT("pix1 not defined or not 1 bpp", procName, 1); if (!pix2 || pixGetDepth(pix2) != 1) return ERROR_INT("pix2 not defined or not 1 bpp", procName, 1); if (!nasum1 || !namoment1) return ERROR_INT("nasum1 and namoment1 not both defined", procName, 1); if (area2 <= 0 || ycent2 <= 0) return ERROR_INT("area2 and ycent2 must be > 0", procName, 1); /* If pix1 (the unknown image) is narrower than pix2, * don't bother to try the match. pix1 is already padded with * 2 pixels on each side. */ pixGetDimensions(pix1, &w1, &h1, NULL); pixGetDimensions(pix2, &w2, &h2, NULL); if (w1 < w2) { if (debugflag > 0) { L_INFO("skipping match with w1 = %d and w2 = %d\n", procName, w1, w2); } return 0; } nx = w1 - w2 + 1; if (debugflag > 0) fpix = fpixCreate(nx, 2 * maxyshift + 1); if (!tab8) tab = makePixelSumTab8(); else tab = tab8; /* Set up the arrays for area1 and ycent1. We have to do this * for each template (pix2) because the window width is w2. */ area1 = (l_int32 *)LEPT_CALLOC(nx, sizeof(l_int32)); ycent1 = (l_float32 *)LEPT_CALLOC(nx, sizeof(l_int32)); arraysum = numaGetIArray(nasum1); arraymoment = numaGetIArray(namoment1); for (i = 0, sum = 0, moment = 0; i < w2; i++) { sum += arraysum[i]; moment += arraymoment[i]; } for (i = 0; i < nx - 1; i++) { area1[i] = sum; ycent1[i] = (sum == 0) ? ycent2 : (l_float32)moment / (l_float32)sum; sum += arraysum[w2 + i] - arraysum[i]; moment += arraymoment[w2 + i] - arraymoment[i]; } area1[nx - 1] = sum; ycent1[nx - 1] = (sum == 0) ? ycent2 : (l_float32)moment / (l_float32)sum; /* Find the best match location for pix2. At each location, * to insure that pixels are ON only within the intersection of * pix and the shifted pix2: * (1) Start with pixt cleared and equal in size to pix1. * (2) Blit the shifted pix2 onto pixt. Then all ON pixels * are within the intersection of pix1 and the shifted pix2. * (3) AND pix1 with pixt. */ pixt = pixCreate(w2, h1, 1); maxscore = 0; delx = 0; dely = 0; /* amount to shift pix2 relative to pix1 to get alignment */ for (i = 0; i < nx; i++) { shifty = (l_int32)(ycent1[i] - ycent2 + 0.5); for (j = -maxyshift; j <= maxyshift; j++) { pixClearAll(pixt); pixRasterop(pixt, 0, shifty + j, w2, h2, PIX_SRC, pix2, 0, 0); pixRasterop(pixt, 0, 0, w2, h1, PIX_SRC & PIX_DST, pix1, i, 0); pixCountPixels(pixt, &count, tab); score = (l_float32)count * (l_float32)count / ((l_float32)area1[i] * (l_float32)area2); if (score > maxscore) { maxscore = score; delx = i; dely = shifty + j; } if (debugflag > 0) fpixSetPixel(fpix, i, maxyshift + j, 1000.0 * score); } } if (debugflag > 0) { char buf[128]; lept_mkdir("lept/recog"); pixt1 = fpixDisplayMaxDynamicRange(fpix); pixt2 = pixExpandReplicate(pixt1, 5); snprintf(buf, sizeof(buf), "/tmp/lept/recog/junkbs_%d.png", debugflag); pixWrite(buf, pixt2, IFF_PNG); pixDestroy(&pixt1); pixDestroy(&pixt2); fpixDestroy(&fpix); } if (pdelx) *pdelx = delx; if (pdely) *pdely = dely; if (pscore) *pscore = maxscore; if (!tab8) LEPT_FREE(tab); LEPT_FREE(area1); LEPT_FREE(ycent1); LEPT_FREE(arraysum); LEPT_FREE(arraymoment); pixDestroy(&pixt); return 0; } /*------------------------------------------------------------------------* * Low-level identification * *------------------------------------------------------------------------*/ /*! * \brief recogIdentifyPixa() * * \param[in] recog * \param[in] pixa of 1 bpp images to match * \param[out] ppixdb [optional] pix showing inputs and best fits * \return 0 if OK, 1 on error * *
 * Notes:
 *      (1) This should be called by recogIdentifyMuliple(), which
 *          binarizes and splits characters before sending %pixa here.
 *      (2) This calls recogIdentifyPix(), which does the same operation
 *          on each pix in %pixa, and optionally returns the arrays
 *          of results (scores, class index and character string)
 *          for the best correlation match.
 * 
*/ l_ok recogIdentifyPixa(L_RECOG *recog, PIXA *pixa, PIX **ppixdb) { char *text; l_int32 i, n, fail, index, depth; l_float32 score; PIX *pix1, *pix2, *pix3; PIXA *pixa1; L_RCH *rch; PROCNAME("recogIdentifyPixa"); if (ppixdb) *ppixdb = NULL; if (!recog) return ERROR_INT("recog not defined", procName, 1); if (!pixa) return ERROR_INT("pixa not defined", procName, 1); /* Run the recognizer on the set of images. This writes * the text string into each pix in pixa. */ n = pixaGetCount(pixa); rchaDestroy(&recog->rcha); recog->rcha = rchaCreate(); pixa1 = (ppixdb) ? pixaCreate(n) : NULL; depth = 1; for (i = 0; i < n; i++) { pix1 = pixaGetPix(pixa, i, L_CLONE); pix2 = NULL; fail = FALSE; if (!ppixdb) fail = recogIdentifyPix(recog, pix1, NULL); else fail = recogIdentifyPix(recog, pix1, &pix2); if (fail) recogSkipIdentify(recog); if ((rch = recog->rch) == NULL) { L_ERROR("rch not found for char %d\n", procName, i); pixDestroy(&pix1); pixDestroy(&pix2); continue; } rchExtract(rch, NULL, NULL, &text, NULL, NULL, NULL, NULL); pixSetText(pix1, text); LEPT_FREE(text); if (ppixdb) { rchExtract(rch, &index, &score, NULL, NULL, NULL, NULL, NULL); pix3 = recogShowMatch(recog, pix2, NULL, NULL, index, score); if (i == 0) depth = pixGetDepth(pix3); pixaAddPix(pixa1, pix3, L_INSERT); pixDestroy(&pix2); } transferRchToRcha(rch, recog->rcha); pixDestroy(&pix1); } /* Package the images for debug */ if (ppixdb) { *ppixdb = pixaDisplayTiledInRows(pixa1, depth, 2500, 1.0, 0, 20, 1); pixaDestroy(&pixa1); } return 0; } /*! * \brief recogIdentifyPix() * * \param[in] recog with LUT's pre-computed * \param[in] pixs of a single character, 1 bpp * \param[out] ppixdb [optional] debug pix showing input and best fit * \return 0 if OK, 1 on error * *
 * Notes:
 *      (1) Basic recognition function for a single character.
 *      (2) If templ_use == L_USE_ALL_TEMPLATES, which is the default
 *          situation, matching is attempted to every bitmap in the recog,
 *          and the identify of the best match is returned.
 *      (3) For finding outliers, templ_use == L_USE_AVERAGE_TEMPLATES, and
 *          matching is only attemplted to the averaged bitmaps.  For this
 *          case, the index of the bestsample is meaningless (0 is returned
 *          if requested).
 *      (4) The score is related to the confidence (probability of correct
 *          identification), in that a higher score is correlated with
 *          a higher probability.  However, the actual relation between
 *          the correlation (score) and the probability is not known;
 *          we call this a "score" because "confidence" can be misinterpreted
 *          as an actual probability.
 * 
*/ l_ok recogIdentifyPix(L_RECOG *recog, PIX *pixs, PIX **ppixdb) { char *text; l_int32 i, j, n, bestindex, bestsample, area1, area2; l_int32 shiftx, shifty, bestdelx, bestdely, bestwidth, maxyshift; l_float32 x1, y1, x2, y2, delx, dely, score, maxscore; NUMA *numa; PIX *pix0, *pix1, *pix2; PIXA *pixa; PTA *pta; PROCNAME("recogIdentifyPix"); if (ppixdb) *ppixdb = NULL; if (!recog) return ERROR_INT("recog not defined", procName, 1); if (!pixs || pixGetDepth(pixs) != 1) return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); /* Do the averaging if required and not yet done. */ if (recog->templ_use == L_USE_AVERAGE_TEMPLATES && !recog->ave_done) { recogAverageSamples(&recog, 0); if (!recog) return ERROR_INT("averaging failed", procName, 1); } /* Binarize and crop to foreground if necessary */ if ((pix0 = recogProcessToIdentify(recog, pixs, 0)) == NULL) return ERROR_INT("no fg pixels in pix0", procName, 1); /* Optionally scale and/or convert to fixed stroke width */ pix1 = recogModifyTemplate(recog, pix0); pixDestroy(&pix0); if (!pix1) return ERROR_INT("no fg pixels in pix1", procName, 1); /* Do correlation at all positions within +-maxyshift of * the nominal centroid alignment. */ pixCountPixels(pix1, &area1, recog->sumtab); pixCentroid(pix1, recog->centtab, recog->sumtab, &x1, &y1); bestindex = bestsample = bestdelx = bestdely = bestwidth = 0; maxscore = 0.0; maxyshift = recog->maxyshift; if (recog->templ_use == L_USE_AVERAGE_TEMPLATES) { for (i = 0; i < recog->setsize; i++) { numaGetIValue(recog->nasum, i, &area2); if (area2 == 0) continue; /* no template available */ pix2 = pixaGetPix(recog->pixa, i, L_CLONE); ptaGetPt(recog->pta, i, &x2, &y2); delx = x1 - x2; dely = y1 - y2; for (shifty = -maxyshift; shifty <= maxyshift; shifty++) { for (shiftx = -maxyshift; shiftx <= maxyshift; shiftx++) { pixCorrelationScoreSimple(pix1, pix2, area1, area2, delx + shiftx, dely + shifty, 5, 5, recog->sumtab, &score); if (score > maxscore) { bestindex = i; bestdelx = delx + shiftx; bestdely = dely + shifty; maxscore = score; } } } pixDestroy(&pix2); } } else { /* use all the samples */ for (i = 0; i < recog->setsize; i++) { pixa = pixaaGetPixa(recog->pixaa, i, L_CLONE); n = pixaGetCount(pixa); if (n == 0) { pixaDestroy(&pixa); continue; } numa = numaaGetNuma(recog->naasum, i, L_CLONE); pta = ptaaGetPta(recog->ptaa, i, L_CLONE); for (j = 0; j < n; j++) { pix2 = pixaGetPix(pixa, j, L_CLONE); numaGetIValue(numa, j, &area2); ptaGetPt(pta, j, &x2, &y2); delx = x1 - x2; dely = y1 - y2; for (shifty = -maxyshift; shifty <= maxyshift; shifty++) { for (shiftx = -maxyshift; shiftx <= maxyshift; shiftx++) { pixCorrelationScoreSimple(pix1, pix2, area1, area2, delx + shiftx, dely + shifty, 5, 5, recog->sumtab, &score); if (score > maxscore) { bestindex = i; bestsample = j; bestdelx = delx + shiftx; bestdely = dely + shifty; maxscore = score; bestwidth = pixGetWidth(pix2); } } } pixDestroy(&pix2); } pixaDestroy(&pixa); numaDestroy(&numa); ptaDestroy(&pta); } } /* Package up the results */ recogGetClassString(recog, bestindex, &text); rchDestroy(&recog->rch); recog->rch = rchCreate(bestindex, maxscore, text, bestsample, bestdelx, bestdely, bestwidth); if (ppixdb) { if (recog->templ_use == L_USE_AVERAGE_TEMPLATES) { L_INFO("Best match: str %s; class %d; sh (%d, %d); score %5.3f\n", procName, text, bestindex, bestdelx, bestdely, maxscore); pix2 = pixaGetPix(recog->pixa, bestindex, L_CLONE); } else { /* L_USE_ALL_TEMPLATES */ L_INFO("Best match: str %s; sample %d in class %d; score %5.3f\n", procName, text, bestsample, bestindex, maxscore); if (maxyshift > 0 && (L_ABS(bestdelx) > 0 || L_ABS(bestdely) > 0)) { L_INFO(" Best shift: (%d, %d)\n", procName, bestdelx, bestdely); } pix2 = pixaaGetPix(recog->pixaa, bestindex, bestsample, L_CLONE); } *ppixdb = recogShowMatch(recog, pix1, pix2, NULL, -1, 0.0); pixDestroy(&pix2); } pixDestroy(&pix1); return 0; } /*! * \brief recogSkipIdentify() * * \param[in] recog * \return 0 if OK, 1 on error * *
 * Notes:
 *      (1) This just writes a "dummy" result with 0 score and empty
 *          string id into the rch.
 * 
*/ l_ok recogSkipIdentify(L_RECOG *recog) { PROCNAME("recogSkipIdentify"); if (!recog) return ERROR_INT("recog not defined", procName, 1); /* Package up placeholder results */ rchDestroy(&recog->rch); recog->rch = rchCreate(0, 0.0, stringNew(""), 0, 0, 0, 0); return 0; } /*------------------------------------------------------------------------* * Operations for handling identification results * *------------------------------------------------------------------------*/ /*! * \brief rchaCreate() * * Return: 0 if OK, 1 on error * * Notes: * (1) Be sure to destroy any existing rcha before assigning this. */ static L_RCHA * rchaCreate() { L_RCHA *rcha; rcha = (L_RCHA *)LEPT_CALLOC(1, sizeof(L_RCHA)); rcha->naindex = numaCreate(0); rcha->nascore = numaCreate(0); rcha->satext = sarrayCreate(0); rcha->nasample = numaCreate(0); rcha->naxloc = numaCreate(0); rcha->nayloc = numaCreate(0); rcha->nawidth = numaCreate(0); return rcha; } /*! * \brief rchaDestroy() * * \param[in,out] prcha to be nulled */ void rchaDestroy(L_RCHA **prcha) { L_RCHA *rcha; PROCNAME("rchaDestroy"); if (prcha == NULL) { L_WARNING("&rcha is null!\n", procName); return; } if ((rcha = *prcha) == NULL) return; numaDestroy(&rcha->naindex); numaDestroy(&rcha->nascore); sarrayDestroy(&rcha->satext); numaDestroy(&rcha->nasample); numaDestroy(&rcha->naxloc); numaDestroy(&rcha->nayloc); numaDestroy(&rcha->nawidth); LEPT_FREE(rcha); *prcha = NULL; } /*! * \brief rchCreate() * * \param[in] index index of best template * \param[in] score correlation score of best template * \param[in] text character string of best template * \param[in] sample index of best sample; -1 if averages are used * \param[in] xloc x-location of template: delx + shiftx * \param[in] yloc y-location of template: dely + shifty * \param[in] width width of best template * \return 0 if OK, 1 on error * *
 * Notes:
 *      (1) Be sure to destroy any existing rch before assigning this.
 *      (2) This stores the text string, not a copy of it, so the
 *          caller must not destroy the string.
 * 
*/ static L_RCH * rchCreate(l_int32 index, l_float32 score, char *text, l_int32 sample, l_int32 xloc, l_int32 yloc, l_int32 width) { L_RCH *rch; rch = (L_RCH *)LEPT_CALLOC(1, sizeof(L_RCH)); rch->index = index; rch->score = score; rch->text = text; rch->sample = sample; rch->xloc = xloc; rch->yloc = yloc; rch->width = width; return rch; } /*! * \brief rchDestroy() * * \param[in,out] prch to be nulled */ void rchDestroy(L_RCH **prch) { L_RCH *rch; PROCNAME("rchDestroy"); if (prch == NULL) { L_WARNING("&rch is null!\n", procName); return; } if ((rch = *prch) == NULL) return; LEPT_FREE(rch->text); LEPT_FREE(rch); *prch = NULL; } /*! * \brief rchaExtract() * * \param[in] rcha * \param[out] pnaindex [optional] indices of best templates * \param[out] pnascore [optional] correl scores of best templates * \param[out] psatext [optional] character strings of best templates * \param[out] pnasample [optional] indices of best samples * \param[out] pnaxloc [optional] x-locations of templates * \param[out] pnayloc [optional] y-locations of templates * \param[out] pnawidth [optional] widths of best templates * \return 0 if OK, 1 on error * *
 * Notes:
 *      (1) This returns clones of the number and string arrays.  They must
 *          be destroyed by the caller.
 * 
*/ l_ok rchaExtract(L_RCHA *rcha, NUMA **pnaindex, NUMA **pnascore, SARRAY **psatext, NUMA **pnasample, NUMA **pnaxloc, NUMA **pnayloc, NUMA **pnawidth) { PROCNAME("rchaExtract"); if (pnaindex) *pnaindex = NULL; if (pnascore) *pnascore = NULL; if (psatext) *psatext = NULL; if (pnasample) *pnasample = NULL; if (pnaxloc) *pnaxloc = NULL; if (pnayloc) *pnayloc = NULL; if (pnawidth) *pnawidth = NULL; if (!rcha) return ERROR_INT("rcha not defined", procName, 1); if (pnaindex) *pnaindex = numaClone(rcha->naindex); if (pnascore) *pnascore = numaClone(rcha->nascore); if (psatext) *psatext = sarrayClone(rcha->satext); if (pnasample) *pnasample = numaClone(rcha->nasample); if (pnaxloc) *pnaxloc = numaClone(rcha->naxloc); if (pnayloc) *pnayloc = numaClone(rcha->nayloc); if (pnawidth) *pnawidth = numaClone(rcha->nawidth); return 0; } /*! * \brief rchExtract() * * \param[in] rch * \param[out] pindex [optional] index of best template * \param[out] pscore [optional] correlation score of best template * \param[out] ptext [optional] character string of best template * \param[out] psample [optional] index of best sample * \param[out] pxloc [optional] x-location of template * \param[out] pyloc [optional] y-location of template * \param[out] pwidth [optional] width of best template * \return 0 if OK, 1 on error */ l_ok rchExtract(L_RCH *rch, l_int32 *pindex, l_float32 *pscore, char **ptext, l_int32 *psample, l_int32 *pxloc, l_int32 *pyloc, l_int32 *pwidth) { PROCNAME("rchExtract"); if (pindex) *pindex = 0; if (pscore) *pscore = 0.0; if (ptext) *ptext = NULL; if (psample) *psample = 0; if (pxloc) *pxloc = 0; if (pyloc) *pyloc = 0; if (pwidth) *pwidth = 0; if (!rch) return ERROR_INT("rch not defined", procName, 1); if (pindex) *pindex = rch->index; if (pscore) *pscore = rch->score; if (ptext) *ptext = stringNew(rch->text); /* new string: owned by caller */ if (psample) *psample = rch->sample; if (pxloc) *pxloc = rch->xloc; if (pyloc) *pyloc = rch->yloc; if (pwidth) *pwidth = rch->width; return 0; } /*! * \brief transferRchToRcha() * * \param[in] rch source of data * \param[in] rcha append to arrays in this destination * \return 0 if OK, 1 on error * *
 * Notes:
 *      (1) This is used to transfer the results of a single character
 *          identification to an rcha array for the array of characters.
 * 
*/ static l_int32 transferRchToRcha(L_RCH *rch, L_RCHA *rcha) { PROCNAME("transferRchToRcha"); if (!rch) return ERROR_INT("rch not defined", procName, 1); if (!rcha) return ERROR_INT("rcha not defined", procName, 1); numaAddNumber(rcha->naindex, rch->index); numaAddNumber(rcha->nascore, rch->score); sarrayAddString(rcha->satext, rch->text, L_COPY); numaAddNumber(rcha->nasample, rch->sample); numaAddNumber(rcha->naxloc, rch->xloc); numaAddNumber(rcha->nayloc, rch->yloc); numaAddNumber(rcha->nawidth, rch->width); return 0; } /*------------------------------------------------------------------------* * Preprocessing and filtering * *------------------------------------------------------------------------*/ /*! * \brief recogProcessToIdentify() * * \param[in] recog with LUT's pre-computed * \param[in] pixs typ. single character, possibly d > 1 and uncropped * \param[in] pad extra pixels added to left and right sides * \return pixd 1 bpp, clipped to foreground, or NULL if there * are no fg pixels or on error. * *
 * Notes:
 *      (1) This is a lightweight operation to insure that the input
 *          image is 1 bpp, properly cropped, and padded on each side.
 *          If bpp > 1, the image is thresholded.
 * 
*/ PIX * recogProcessToIdentify(L_RECOG *recog, PIX *pixs, l_int32 pad) { l_int32 canclip; PIX *pix1, *pix2, *pixd; PROCNAME("recogProcessToIdentify"); if (!recog) return (PIX *)ERROR_PTR("recog not defined", procName, NULL); if (!pixs) return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); if (pixGetDepth(pixs) != 1) pix1 = pixThresholdToBinary(pixs, recog->threshold); else pix1 = pixClone(pixs); pixTestClipToForeground(pix1, &canclip); if (canclip) pixClipToForeground(pix1, &pix2, NULL); else pix2 = pixClone(pix1); pixDestroy(&pix1); if (!pix2) return (PIX *)ERROR_PTR("no foreground pixels", procName, NULL); pixd = pixAddBorderGeneral(pix2, pad, pad, 0, 0, 0); pixDestroy(&pix2); return pixd; } /*! * \brief recogPreSplittingFilter() * * \param[in] recog * \param[in] pixs 1 bpp, many connected components * \param[in] minh minimum height of components to be retained * \param[in] minaf minimum area fraction (|fg|/(w*h)) to be retained * \param[in] debug 1 to output indicator arrays * \return pixd with filtered components removed or NULL on error */ static PIX * recogPreSplittingFilter(L_RECOG *recog, PIX *pixs, l_int32 minh, l_float32 minaf, l_int32 debug) { l_int32 scaling, minsplitw, maxsplith, maxasp; BOXA *boxas; NUMA *naw, *nah, *na1, *na1c, *na2, *na3, *na4, *na5, *na6, *na7; PIX *pixd; PIXA *pixas; PROCNAME("recogPreSplittingFilter"); if (!recog) return (PIX *)ERROR_PTR("recog not defined", procName, NULL); if (!pixs) return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); /* If there is scaling, do not remove components based on the * values of min_splitw and max_splith. */ scaling = (recog->scalew > 0 || recog->scaleh > 0) ? TRUE : FALSE; minsplitw = (scaling) ? 1 : recog->min_splitw - 3; maxsplith = (scaling) ? 150 : recog->max_splith; maxasp = recog->max_wh_ratio; /* Generate an indicator array of connected components to remove: * short stuff * tall stuff * components with large width/height ratio * components with small area fill fraction */ boxas = pixConnComp(pixs, &pixas, 8); pixaFindDimensions(pixas, &naw, &nah); na1 = numaMakeThresholdIndicator(naw, minsplitw, L_SELECT_IF_LT); na1c = numaCopy(na1); na2 = numaMakeThresholdIndicator(nah, minh, L_SELECT_IF_LT); na3 = numaMakeThresholdIndicator(nah, maxsplith, L_SELECT_IF_GT); na4 = pixaFindWidthHeightRatio(pixas); na5 = numaMakeThresholdIndicator(na4, maxasp, L_SELECT_IF_GT); na6 = pixaFindAreaFraction(pixas); na7 = numaMakeThresholdIndicator(na6, minaf, L_SELECT_IF_LT); numaLogicalOp(na1, na1, na2, L_UNION); numaLogicalOp(na1, na1, na3, L_UNION); numaLogicalOp(na1, na1, na5, L_UNION); numaLogicalOp(na1, na1, na7, L_UNION); pixd = pixCopy(NULL, pixs); pixRemoveWithIndicator(pixd, pixas, na1); if (debug) l_showIndicatorSplitValues(na1c, na2, na3, na5, na7, na1); numaDestroy(&naw); numaDestroy(&nah); numaDestroy(&na1); numaDestroy(&na1c); numaDestroy(&na2); numaDestroy(&na3); numaDestroy(&na4); numaDestroy(&na5); numaDestroy(&na6); numaDestroy(&na7); boxaDestroy(&boxas); pixaDestroy(&pixas); return pixd; } /*! * \brief recogSplittingFilter() * * \param[in] recog * \param[in] pixs 1 bpp, single connected component * \param[in] minh minimum height of component; 0 for default * \param[in] minaf minimum area fraction (|fg|/(w*h)) to be retained * \param[out] premove 0 to save, 1 to remove * \param[in] debug 1 to output indicator arrays * \return 0 if OK, 1 on error */ static l_int32 recogSplittingFilter(L_RECOG *recog, PIX *pixs, l_int32 minh, l_float32 minaf, l_int32 *premove, l_int32 debug) { l_int32 w, h; l_float32 aspratio, fract; PROCNAME("recogSplittingFilter"); if (!premove) return ERROR_INT("&remove not defined", procName, 1); *premove = 0; if (!recog) return ERROR_INT("recog not defined", procName, 1); if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (minh <= 0) minh = DefaultMinHeight; /* Remove from further consideration: * small stuff * components with large width/height ratio * components with small area fill fraction */ pixGetDimensions(pixs, &w, &h, NULL); if (w < recog->min_splitw) { if (debug) L_INFO("w = %d < %d\n", procName, w, recog->min_splitw); *premove = 1; return 0; } if (h < minh) { if (debug) L_INFO("h = %d < %d\n", procName, h, minh); *premove = 1; return 0; } aspratio = (l_float32)w / (l_float32)h; if (aspratio > recog->max_wh_ratio) { if (debug) L_INFO("w/h = %5.3f too large\n", procName, aspratio); *premove = 1; return 0; } pixFindAreaFraction(pixs, recog->sumtab, &fract); if (fract < minaf) { if (debug) L_INFO("area fill fract %5.3f < %5.3f\n", procName, fract, minaf); *premove = 1; return 0; } return 0; } /*------------------------------------------------------------------------* * Postprocessing * *------------------------------------------------------------------------*/ /*! * \brief recogExtractNumbers() * * \param[in] recog * \param[in] boxas location of components * \param[in] scorethresh min score for which we accept a component * \param[in] spacethresh max horizontal distance allowed between digits; * use -1 for default * \param[out] pbaa [optional] bounding boxes of identified numbers * \param[out] pnaa [optional] scores of identified digits * \return sa of identified numbers, or NULL on error * *
 * Notes:
 *      (1) This extracts digit data after recogaIdentifyMultiple() or
 *          lower-level identification has taken place.
 *      (2) Each string in the returned sa contains a sequence of ascii
 *          digits in a number.
 *      (3) The horizontal distance between boxes (limited by %spacethresh)
 *          is the negative of the horizontal overlap.
 *      (4) Components with a score less than %scorethresh, which may
 *          be hyphens or other small characters, will signal the
 *          end of the current sequence of digits in the number.  A typical
 *          value for %scorethresh is 0.60.
 *      (5) We allow two digits to be combined if these conditions apply:
 *            (a) the first is to the left of the second
 *            (b) the second has a horizontal separation less than %spacethresh
 *            (c) the vertical overlap >= 0 (vertical separation < 0)
 *            (d) both have a score that exceeds %scorethresh
 *      (6) Each numa in the optionally returned naa contains the digit
 *          scores of a number.  Each boxa in the optionally returned baa
 *          contains the bounding boxes of the digits in the number.
 * 
*/ SARRAY * recogExtractNumbers(L_RECOG *recog, BOXA *boxas, l_float32 scorethresh, l_int32 spacethresh, BOXAA **pbaa, NUMAA **pnaa) { char *str, *text; l_int32 i, n, x1, x2, h_ovl, v_ovl, h_sep, v_sep; l_float32 score; BOX *box, *prebox; BOXA *ba; BOXAA *baa; NUMA *nascore, *na; NUMAA *naa; SARRAY *satext, *sa, *saout; PROCNAME("recogExtractNumbers"); if (pbaa) *pbaa = NULL; if (pnaa) *pnaa = NULL; if (!recog || !recog->rcha) return (SARRAY *)ERROR_PTR("recog and rcha not both defined", procName, NULL); if (!boxas) return (SARRAY *)ERROR_PTR("boxas not defined", procName, NULL); if (spacethresh < 0) spacethresh = L_MAX(recog->maxheight_u, 20); rchaExtract(recog->rcha, NULL, &nascore, &satext, NULL, NULL, NULL, NULL); if (!nascore || !satext) { numaDestroy(&nascore); sarrayDestroy(&satext); return (SARRAY *)ERROR_PTR("nascore and satext not both returned", procName, NULL); } saout = sarrayCreate(0); naa = numaaCreate(0); baa = boxaaCreate(0); prebox = NULL; n = numaGetCount(nascore); for (i = 0; i < n; i++) { numaGetFValue(nascore, i, &score); text = sarrayGetString(satext, i, L_NOCOPY); if (prebox == NULL) { /* no current run */ if (score < scorethresh) { continue; } else { /* start a number run */ sa = sarrayCreate(0); ba = boxaCreate(0); na = numaCreate(0); sarrayAddString(sa, text, L_COPY); prebox = boxaGetBox(boxas, i, L_CLONE); boxaAddBox(ba, prebox, L_COPY); numaAddNumber(na, score); } } else { /* in a current number run */ box = boxaGetBox(boxas, i, L_CLONE); boxGetGeometry(prebox, &x1, NULL, NULL, NULL); boxGetGeometry(box, &x2, NULL, NULL, NULL); boxOverlapDistance(box, prebox, &h_ovl, &v_ovl); h_sep = -h_ovl; v_sep = -v_ovl; boxDestroy(&prebox); if (x1 < x2 && h_sep <= spacethresh && v_sep < 0 && score >= scorethresh) { /* add to number */ sarrayAddString(sa, text, L_COPY); boxaAddBox(ba, box, L_COPY); numaAddNumber(na, score); prebox = box; } else { /* save the completed number */ str = sarrayToString(sa, 0); sarrayAddString(saout, str, L_INSERT); sarrayDestroy(&sa); boxaaAddBoxa(baa, ba, L_INSERT); numaaAddNuma(naa, na, L_INSERT); boxDestroy(&box); if (score >= scorethresh) { /* start a new number */ i--; continue; } } } } if (prebox) { /* save the last number */ str = sarrayToString(sa, 0); sarrayAddString(saout, str, L_INSERT); boxaaAddBoxa(baa, ba, L_INSERT); numaaAddNuma(naa, na, L_INSERT); sarrayDestroy(&sa); boxDestroy(&prebox); } numaDestroy(&nascore); sarrayDestroy(&satext); if (sarrayGetCount(saout) == 0) { sarrayDestroy(&saout); boxaaDestroy(&baa); numaaDestroy(&naa); L_INFO("saout has no identified text\n", procName); return NULL; } if (pbaa) *pbaa = baa; else boxaaDestroy(&baa); if (pnaa) *pnaa = naa; else numaaDestroy(&naa); return saout; } /*! * \brief showExtractNumbers() * * \param[in] pixs input 1 bpp image * \param[in] sa recognized text strings * \param[in] baa boxa array for location of characters in each string * \param[in] naa numa array for scores of characters in each string * \param[out] ppixdb [optional] input pixs with identified chars outlined * \return pixa of identified strings with text and scores, or NULL on error * *
 * Notes:
 *      (1) This is a debugging routine on digit identification; e.g.:
 *            recogIdentifyMultiple(recog, pixs, 0, 1, &boxa, NULL, NULL, 0);
 *            sa = recogExtractNumbers(recog, boxa, 0.8, -1, &baa, &naa);
 *            pixa = showExtractNumbers(pixs, sa, baa, naa, NULL);
 * 
*/ PIXA * showExtractNumbers(PIX *pixs, SARRAY *sa, BOXAA *baa, NUMAA *naa, PIX **ppixdb) { char buf[128]; char *textstr, *scorestr; l_int32 i, j, n, nchar, len; l_float32 score; L_BMF *bmf; BOX *box1, *box2; BOXA *ba; NUMA *na; PIX *pix1, *pix2, *pix3, *pix4; PIXA *pixa; PROCNAME("showExtractNumbers"); if (ppixdb) *ppixdb = NULL; if (!pixs) return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL); if (!sa) return (PIXA *)ERROR_PTR("sa not defined", procName, NULL); if (!baa) return (PIXA *)ERROR_PTR("baa not defined", procName, NULL); if (!naa) return (PIXA *)ERROR_PTR("naa not defined", procName, NULL); n = sarrayGetCount(sa); pixa = pixaCreate(n); bmf = bmfCreate(NULL, 6); if (ppixdb) *ppixdb = pixConvertTo8(pixs, 1); for (i = 0; i < n; i++) { textstr = sarrayGetString(sa, i, L_NOCOPY); ba = boxaaGetBoxa(baa, i, L_CLONE); na = numaaGetNuma(naa, i, L_CLONE); boxaGetExtent(ba, NULL, NULL, &box1); box2 = boxAdjustSides(NULL, box1, -5, 5, -5, 5); if (ppixdb) pixRenderBoxArb(*ppixdb, box2, 3, 255, 0, 0); pix1 = pixClipRectangle(pixs, box1, NULL); len = strlen(textstr) + 1; pix2 = pixAddBlackOrWhiteBorder(pix1, 14 * len, 14 * len, 5, 3, L_SET_WHITE); pix3 = pixConvertTo8(pix2, 1); nchar = numaGetCount(na); scorestr = NULL; for (j = 0; j < nchar; j++) { numaGetFValue(na, j, &score); snprintf(buf, sizeof(buf), "%d", (l_int32)(100 * score)); stringJoinIP(&scorestr, buf); if (j < nchar - 1) stringJoinIP(&scorestr, ","); } snprintf(buf, sizeof(buf), "%s: %s\n", textstr, scorestr); pix4 = pixAddTextlines(pix3, bmf, buf, 0xff000000, L_ADD_BELOW); pixaAddPix(pixa, pix4, L_INSERT); boxDestroy(&box1); boxDestroy(&box2); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); boxaDestroy(&ba); numaDestroy(&na); LEPT_FREE(scorestr); } bmfDestroy(&bmf); return pixa; } /*------------------------------------------------------------------------* * Static debug helper * *------------------------------------------------------------------------*/ /*! * \brief l_showIndicatorSplitValues() * * \param[in] na1, na2, na3, na4, na5, na6 6 indicator array * *
 * Notes:
 *      (1) The values indicate that specific criteria has been met
 *          for component removal by pre-splitting filter..
 *          The 'result' line shows which components have been removed.
 * 
*/ static void l_showIndicatorSplitValues(NUMA *na1, NUMA *na2, NUMA *na3, NUMA *na4, NUMA *na5, NUMA *na6) { l_int32 i, n; n = numaGetCount(na1); lept_stderr("================================================\n"); lept_stderr("lt minw: "); for (i = 0; i < n; i++) lept_stderr("%4d ", (l_int32)na1->array[i]); lept_stderr("\nlt minh: "); for (i = 0; i < n; i++) lept_stderr("%4d ", (l_int32)na2->array[i]); lept_stderr("\ngt maxh: "); for (i = 0; i < n; i++) lept_stderr("%4d ", (l_int32)na3->array[i]); lept_stderr("\ngt maxasp: "); for (i = 0; i < n; i++) lept_stderr("%4d ", (l_int32)na4->array[i]); lept_stderr("\nlt minaf: "); for (i = 0; i < n; i++) lept_stderr("%4d ", (l_int32)na5->array[i]); lept_stderr("\n------------------------------------------------"); lept_stderr("\nresult: "); for (i = 0; i < n; i++) lept_stderr("%4d ", (l_int32)na6->array[i]); lept_stderr("\n================================================\n"); }