/*====================================================================* - Copyright (C) 2001 Leptonica. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *====================================================================*/ /* * jbclass.c * * These are functions for unsupervised classification of * collections of connected components -- either characters or * words -- in binary images. They can be used as image * processing steps in jbig2 compression. * * Initialization * * JBCLASSER *jbRankHausInit() [rank hausdorff encoder] * JBCLASSER *jbCorrelationInit() [correlation encoder] * JBCLASSER *jbCorrelationInitWithoutComponents() [ditto] * static JBCLASSER *jbCorrelationInitInternal() * * Classify the pages * * l_int32 jbAddPages() * l_int32 jbAddPage() * l_int32 jbAddPageComponents() * * Rank hausdorff classifier * * l_int32 jbClassifyRankHaus() * l_int32 pixHaustest() * l_int32 pixRankHaustest() * * Binary correlation classifier * * l_int32 jbClassifyCorrelation() * * Determine the image components we start with * * l_int32 jbGetComponents() * l_int32 pixWordMaskByDilation() * l_int32 pixWordBoxesByDilation() * * Build grayscale composites (templates) * * PIXA *jbAccumulateComposites * PIXA *jbTemplatesFromComposites * * Utility functions for Classer * * JBCLASSER *jbClasserCreate() * void jbClasserDestroy() * * Utility functions for Data * * JBDATA *jbDataSave() * void jbDataDestroy() * l_int32 jbDataWrite() * JBDATA *jbDataRead() * PIXA *jbDataRender() * l_int32 jbGetULCorners() * l_int32 jbGetLLCorners() * * Static helpers * * static JBFINDCTX *findSimilarSizedTemplatesInit() * static l_int32 findSimilarSizedTemplatesNext() * static void findSimilarSizedTemplatesDestroy() * static l_int32 finalPositioningForAlignment() * * Note: this is NOT an implementation of the JPEG jbig2 * proposed standard encoder, the specifications for which * can be found at http://www.jpeg.org/jbigpt2.html. * (See below for a full implementation.) * It is an implementation of the lower-level part of an encoder that: * * (1) identifies connected components that are going to be used * (2) puts them in similarity classes (this is an unsupervised * classifier), and * (3) stores the result in a simple file format (2 files, * one for templates and one for page/coordinate/template-index * quartets). * * An actual implementation of the official jbig2 encoder could * start with parts (1) and (2), and would then compress the quartets * according to the standards requirements (e.g., Huffman or * arithmetic coding of coordinate differences and image templates). * * The low-level part of the encoder provided here has the * following useful features: * * ~ It is accurate in the identification of templates * and classes because it uses a windowed hausdorff * distance metric. * ~ It is accurate in the placement of the connected * components, doing a two step process of first aligning * the the centroids of the template with those of each instance, * and then making a further correction of up to +- 1 pixel * in each direction to best align the templates. * ~ It is fast because it uses a morphologically based * matching algorithm to implement the hausdorff criterion, * and it selects the patterns that are possible matches * based on their size. * * We provide two different matching functions, one using Hausdorff * distance and one using a simple image correlation. * The Hausdorff method sometimes produces better results for the * same number of classes, because it gives a relatively small * effective weight to foreground pixels near the boundary, * and a relatively large weight to foreground pixels that are * not near the boundary. By effectively ignoring these boundary * pixels, Hausdorff weighting corresponds better to the expected * probabilities of the pixel values in a scanned image, where the * variations in instances of the same printed character are much * more likely to be in pixels near the boundary. By contrast, * the correlation method gives equal weight to all foreground pixels. * * For best results, use the correlation method. Correlation takes * the number of fg pixels in the AND of instance and template, * divided by the product of the number of fg pixels in instance * and template. It compares this with a threshold that, in * general, depends on the fractional coverage of the template. * For heavy text, the threshold is raised above that for light * text, By using both these parameters (basic threshold and * adjustment factor for text weight), one has more flexibility * and can arrive at the fewest substitution errors, although * this comes at the price of more templates. * * The strict Hausdorff scoring is not a rank weighting, because a * single pixel beyond the given distance will cause a match * failure. A rank Hausdorff is more robust to non-boundary noise, * but it is also more susceptible to confusing components that * should be in different classes. For implementing a jbig2 * application for visually lossless binary image compression, * you have two choices: * * (1) use a 3x3 structuring element (size = 3) and a strict * Hausdorff comparison (rank = 1.0 in the rank Hausdorff * function). This will result in a minimal number of classes, * but confusion of small characters, such as italic and * non-italic lower-case 'o', can still occur. * (2) use the correlation method with a threshold of 0.85 * and a weighting factor of about 0.7. This will result in * a larger number of classes, but should not be confused * either by similar small characters or by extremely * thick sans serif characters, such as in prog/cootoots.png. * * As mentioned above, if visual substitution errors must be * avoided, you should use the correlation method. * * We provide executables that show how to do the encoding: * prog/jbrankhaus.c * prog/jbcorrelation.c * * The basic flow for correlation classification goes as follows, * where specific choices have been made for parameters (Hausdorff * is the same except for initialization): * * // Initialize and save data in the classer * JBCLASSER *classer = * jbCorrelationInit(JB_CONN_COMPS, 0, 0, 0.8, 0.7); * SARRAY *safiles = getSortedPathnamesInDirectory(directory, * NULL, 0, 0); * jbAddPages(classer, safiles); * * // Save the data in a data structure for serialization, * // and write it into two files. * JBDATA *data = jbDataSave(classer); * jbDataWrite(rootname, data); * * // Reconstruct (render) the pages from the encoded data. * PIXA *pixa = jbDataRender(data, FALSE); * * Adam Langley has built a jbig2 standards-compliant encoder, the * first one to appear in open source. You can get this encoder at: * http://www.imperialviolet.org/jbig2.html * * It uses arithmetic encoding throughout. It encodes binary images * losslessly with a single arithmetic coding over the full image. * It also does both lossy and lossless encoding from connected * components, using leptonica to generate the templates representing * each cluster. */ #ifdef HAVE_CONFIG_H #include #endif /* HAVE_CONFIG_H */ #include #include #include "allheaders.h" #define L_BUF_SIZE 512 /* For jbClassifyRankHaus(): size of border added around * pix of each c.c., to allow further processing. This * should be at least the sum of the MAX_DIFF_HEIGHT * (or MAX_DIFF_WIDTH) and one-half the size of the Sel */ static const l_int32 JB_ADDED_PIXELS = 6; /* For pixHaustest(), pixRankHaustest() and pixCorrelationScore(): * choose these to be 2 or greater */ static const l_int32 MAX_DIFF_WIDTH = 2; /* use at least 2 */ static const l_int32 MAX_DIFF_HEIGHT = 2; /* use at least 2 */ /* In initialization, you have the option to discard components * (cc, characters or words) that have either width or height larger * than a given size. This is convenient for jbDataSave(), because * the components are placed onto a regular lattice with cell * dimension equal to the maximum component size. The default * values are given here. If you want to save all components, * use a sufficiently large set of dimensions. */ static const l_int32 MAX_CONN_COMP_WIDTH = 350; /* default max cc width */ static const l_int32 MAX_CHAR_COMP_WIDTH = 350; /* default max char width */ static const l_int32 MAX_WORD_COMP_WIDTH = 1000; /* default max word width */ static const l_int32 MAX_COMP_HEIGHT = 120; /* default max component height */ /* This stores the state of a state machine which fetches * similar sized templates */ struct JbFindTemplatesState { JBCLASSER *classer; /* classer */ l_int32 w; /* desired width */ l_int32 h; /* desired height */ l_int32 i; /* index into two_by_two step array */ L_DNA *dna; /* current number array */ l_int32 n; /* current element of dna */ }; typedef struct JbFindTemplatesState JBFINDCTX; /* Static initialization function */ static JBCLASSER * jbCorrelationInitInternal(l_int32 components, l_int32 maxwidth, l_int32 maxheight, l_float32 thresh, l_float32 weightfactor, l_int32 keep_components); /* Static helper functions */ static JBFINDCTX * findSimilarSizedTemplatesInit(JBCLASSER *classer, PIX *pixs); static l_int32 findSimilarSizedTemplatesNext(JBFINDCTX *context); static void findSimilarSizedTemplatesDestroy(JBFINDCTX **pcontext); static l_int32 finalPositioningForAlignment(PIX *pixs, l_int32 x, l_int32 y, l_int32 idelx, l_int32 idely, PIX *pixt, l_int32 *sumtab, l_int32 *pdx, l_int32 *pdy); #ifndef NO_CONSOLE_IO #define DEBUG_CORRELATION_SCORE 0 #endif /* ~NO_CONSOLE_IO */ /*----------------------------------------------------------------------* * Initialization * *----------------------------------------------------------------------*/ /*! * \brief jbRankHausInit() * * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS * \param[in] maxwidth of component; use 0 for default * \param[in] maxheight of component; use 0 for default * \param[in] size of square structuring element; 2, representing * 2x2 sel, is necessary for reasonable accuracy of * small components; combine this with rank ~ 0.97 * to avoid undue class expansion * \param[in] rank rank val of match, each way; in [0.5 - 1.0]; * when using size = 2, 0.97 is a reasonable value * \return jbclasser if OK; NULL on error */ JBCLASSER * jbRankHausInit(l_int32 components, l_int32 maxwidth, l_int32 maxheight, l_int32 size, l_float32 rank) { JBCLASSER *classer; PROCNAME("jbRankHausInit"); if (components != JB_CONN_COMPS && components != JB_CHARACTERS && components != JB_WORDS) return (JBCLASSER *)ERROR_PTR("invalid components", procName, NULL); if (size < 1 || size > 10) return (JBCLASSER *)ERROR_PTR("size not reasonable", procName, NULL); if (rank < 0.5 || rank > 1.0) return (JBCLASSER *)ERROR_PTR("rank not in [0.5-1.0]", procName, NULL); if (maxwidth == 0) { if (components == JB_CONN_COMPS) maxwidth = MAX_CONN_COMP_WIDTH; else if (components == JB_CHARACTERS) maxwidth = MAX_CHAR_COMP_WIDTH; else /* JB_WORDS */ maxwidth = MAX_WORD_COMP_WIDTH; } if (maxheight == 0) maxheight = MAX_COMP_HEIGHT; if ((classer = jbClasserCreate(JB_RANKHAUS, components)) == NULL) return (JBCLASSER *)ERROR_PTR("classer not made", procName, NULL); classer->maxwidth = maxwidth; classer->maxheight = maxheight; classer->sizehaus = size; classer->rankhaus = rank; classer->dahash = l_dnaHashCreate(5507, 4); /* 5507 is prime */ classer->keep_pixaa = 1; /* keep all components in pixaa */ return classer; } /*! * \brief jbCorrelationInit() * * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS * \param[in] maxwidth of component; use 0 for default * \param[in] maxheight of component; use 0 for default * \param[in] thresh value for correlation score: in [0.4 - 0.98] * \param[in] weightfactor corrects thresh for thick characters [0.0 - 1.0] * \return jbclasser if OK; NULL on error * *
 * Notes:
 *      (1) For scanned text, suggested input values are:
 *            thresh ~ [0.8 - 0.85]
 *            weightfactor ~ [0.5 - 0.6]
 *      (2) For electronically generated fonts (e.g., rasterized pdf),
 *          a very high thresh (e.g., 0.95) will not cause a significant
 *          increase in the number of classes.
 * 
*/ JBCLASSER * jbCorrelationInit(l_int32 components, l_int32 maxwidth, l_int32 maxheight, l_float32 thresh, l_float32 weightfactor) { return jbCorrelationInitInternal(components, maxwidth, maxheight, thresh, weightfactor, 1); } /*! * \brief jbCorrelationInitWithoutComponents() * * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS * \param[in] maxwidth of component; use 0 for default * \param[in] maxheight of component; use 0 for default * \param[in] thresh value for correlation score: in [0.4 - 0.98] * \param[in] weightfactor corrects thresh for thick characters [0.0 - 1.0] * \return jbclasser if OK; NULL on error * *
 * Notes:
 *      Acts the same as jbCorrelationInit(), but the resulting
 *      object doesn't keep a list of all the components.
 * 
*/ JBCLASSER * jbCorrelationInitWithoutComponents(l_int32 components, l_int32 maxwidth, l_int32 maxheight, l_float32 thresh, l_float32 weightfactor) { return jbCorrelationInitInternal(components, maxwidth, maxheight, thresh, weightfactor, 0); } static JBCLASSER * jbCorrelationInitInternal(l_int32 components, l_int32 maxwidth, l_int32 maxheight, l_float32 thresh, l_float32 weightfactor, l_int32 keep_components) { JBCLASSER *classer; PROCNAME("jbCorrelationInitInternal"); if (components != JB_CONN_COMPS && components != JB_CHARACTERS && components != JB_WORDS) return (JBCLASSER *)ERROR_PTR("invalid components", procName, NULL); if (thresh < 0.4 || thresh > 0.98) return (JBCLASSER *)ERROR_PTR("thresh not in range [0.4 - 0.98]", procName, NULL); if (weightfactor < 0.0 || weightfactor > 1.0) return (JBCLASSER *)ERROR_PTR("weightfactor not in range [0.0 - 1.0]", procName, NULL); if (maxwidth == 0) { if (components == JB_CONN_COMPS) maxwidth = MAX_CONN_COMP_WIDTH; else if (components == JB_CHARACTERS) maxwidth = MAX_CHAR_COMP_WIDTH; else /* JB_WORDS */ maxwidth = MAX_WORD_COMP_WIDTH; } if (maxheight == 0) maxheight = MAX_COMP_HEIGHT; if ((classer = jbClasserCreate(JB_CORRELATION, components)) == NULL) return (JBCLASSER *)ERROR_PTR("classer not made", procName, NULL); classer->maxwidth = maxwidth; classer->maxheight = maxheight; classer->thresh = thresh; classer->weightfactor = weightfactor; classer->dahash = l_dnaHashCreate(5507, 4); /* 5507 is prime */ classer->keep_pixaa = keep_components; return classer; } /*----------------------------------------------------------------------* * Classify the pages * *----------------------------------------------------------------------*/ /*! * \brief jbAddPages() * * \param[in] jbclasser * \param[in] safiles of page image file names * \return 0 if OK; 1 on error * *
 * Notes:
 *      (1) jbclasser makes a copy of the array of file names.
 *      (2) The caller is still responsible for destroying the input array.
 * 
*/ l_ok jbAddPages(JBCLASSER *classer, SARRAY *safiles) { l_int32 i, nfiles; char *fname; PIX *pix; PROCNAME("jbAddPages"); if (!classer) return ERROR_INT("classer not defined", procName, 1); if (!safiles) return ERROR_INT("safiles not defined", procName, 1); classer->safiles = sarrayCopy(safiles); nfiles = sarrayGetCount(safiles); for (i = 0; i < nfiles; i++) { fname = sarrayGetString(safiles, i, L_NOCOPY); if ((pix = pixRead(fname)) == NULL) { L_WARNING("image file %d not read\n", procName, i); continue; } if (pixGetDepth(pix) != 1) { L_WARNING("image file %d not 1 bpp\n", procName, i); continue; } jbAddPage(classer, pix); pixDestroy(&pix); } return 0; } /*! * \brief jbAddPage() * * \param[in] jbclasser * \param[in] pixs input page * \return 0 if OK; 1 on error */ l_ok jbAddPage(JBCLASSER *classer, PIX *pixs) { BOXA *boxas; PIXA *pixas; PROCNAME("jbAddPage"); if (!classer) return ERROR_INT("classer not defined", procName, 1); if (!pixs || pixGetDepth(pixs) != 1) return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); classer->w = pixGetWidth(pixs); classer->h = pixGetHeight(pixs); /* Get the appropriate components and their bounding boxes */ if (jbGetComponents(pixs, classer->components, classer->maxwidth, classer->maxheight, &boxas, &pixas)) { return ERROR_INT("components not made", procName, 1); } jbAddPageComponents(classer, pixs, boxas, pixas); boxaDestroy(&boxas); pixaDestroy(&pixas); return 0; } /*! * \brief jbAddPageComponents() * * \param[in] jbclasser * \param[in] pixs input page * \param[in] boxas b.b. of components for this page * \param[in] pixas components for this page * \return 0 if OK; 1 on error * *
 * Notes:
 *      (1) If there are no components on the page, we don't require input
 *          of empty boxas or pixas, although that's the typical situation.
 * 
*/ l_ok jbAddPageComponents(JBCLASSER *classer, PIX *pixs, BOXA *boxas, PIXA *pixas) { l_int32 n; PROCNAME("jbAddPageComponents"); if (!classer) return ERROR_INT("classer not defined", procName, 1); if (!pixs) return ERROR_INT("pix not defined", procName, 1); /* Test for no components on the current page. Always update the * number of pages processed, even if nothing is on it. */ if (!boxas || !pixas || (boxaGetCount(boxas) == 0)) { classer->npages++; return 0; } /* Get classes. For hausdorff, it uses a specified size of * structuring element and specified rank. For correlation, * it uses a specified threshold. */ if (classer->method == JB_RANKHAUS) { if (jbClassifyRankHaus(classer, boxas, pixas)) return ERROR_INT("rankhaus classification failed", procName, 1); } else { /* classer->method == JB_CORRELATION */ if (jbClassifyCorrelation(classer, boxas, pixas)) return ERROR_INT("correlation classification failed", procName, 1); } /* Find the global UL corners, adjusted for each instance so * that the class template and instance will have their * centroids in the same place. Then the template can be * used to replace the instance. */ if (jbGetULCorners(classer, pixs, boxas)) return ERROR_INT("UL corners not found", procName, 1); /* Update total component counts and number of pages processed. */ n = boxaGetCount(boxas); classer->baseindex += n; numaAddNumber(classer->nacomps, (l_float32)n); classer->npages++; return 0; } /*----------------------------------------------------------------------* * Classification using windowed rank hausdorff metric * *----------------------------------------------------------------------*/ /*! * \brief jbClassifyRankHaus() * * \param[in] jbclasser * \param[in] boxa new components for classification * \param[in] pixas new components for classification * \return 0 if OK; 1 on error */ l_ok jbClassifyRankHaus(JBCLASSER *classer, BOXA *boxa, PIXA *pixas) { l_int32 n, nt, i, wt, ht, iclass, size, found, testval; l_int32 npages, area1, area3; l_int32 *tab8; l_float32 rank, x1, y1, x2, y2; BOX *box; NUMA *naclass, *napage; NUMA *nafg; /* fg area of all instances */ NUMA *nafgt; /* fg area of all templates */ JBFINDCTX *findcontext; L_DNAHASH *dahash; PIX *pix, *pix1, *pix2, *pix3, *pix4; PIXA *pixa, *pixa1, *pixa2, *pixat, *pixatd; PIXAA *pixaa; PTA *pta, *ptac, *ptact; SEL *sel; PROCNAME("jbClassifyRankHaus"); if (!classer) return ERROR_INT("classer not defined", procName, 1); if (!boxa) return ERROR_INT("boxa not defined", procName, 1); if (!pixas) return ERROR_INT("pixas not defined", procName, 1); if ((n = pixaGetCount(pixas)) == 0) return ERROR_INT("pixas is empty", procName, 1); if ((nafg = pixaCountPixels(pixas)) == NULL) /* areas for this page */ return ERROR_INT("fg counting failed", procName, 1); npages = classer->npages; size = classer->sizehaus; sel = selCreateBrick(size, size, size / 2, size / 2, SEL_HIT); /* Generate the bordered pixa, with and without dilation. * pixa1 and pixa2 contain all the input components. */ pixa1 = pixaCreate(n); pixa2 = pixaCreate(n); for (i = 0; i < n; i++) { pix = pixaGetPix(pixas, i, L_CLONE); pix1 = pixAddBorderGeneral(pix, JB_ADDED_PIXELS, JB_ADDED_PIXELS, JB_ADDED_PIXELS, JB_ADDED_PIXELS, 0); pix2 = pixDilate(NULL, pix1, sel); pixaAddPix(pixa1, pix1, L_INSERT); /* un-dilated */ pixaAddPix(pixa2, pix2, L_INSERT); /* dilated */ pixDestroy(&pix); } /* Get the centroids of all the bordered images. * These are relative to the UL corner of each (bordered) pix. */ pta = pixaCentroids(pixa1); /* centroids for this page; use here */ ptac = classer->ptac; /* holds centroids of components up to this page */ ptaJoin(ptac, pta, 0, -1); /* save centroids of all components */ ptact = classer->ptact; /* holds centroids of templates */ /* Use these to save the class and page of each component. */ naclass = classer->naclass; napage = classer->napage; /* Store the unbordered pix in a pixaa, in a hierarchical * set of arrays. There is one pixa for each class, * and the pix in each pixa are all the instances found * of that class. This is actually more than one would need * for a jbig2 encoder, but there are two reasons to keep * them around: (1) the set of instances for each class * can be used to make an improved binary (or, better, * a grayscale) template, rather than simply using the first * one in the set; (2) we can investigate the failures * of the classifier. This pixaa grows as we process * successive pages. */ pixaa = classer->pixaa; /* arrays to store class exemplars (templates) */ pixat = classer->pixat; /* un-dilated */ pixatd = classer->pixatd; /* dilated */ /* Fill up the pixaa tree with the template exemplars as * the first pix in each pixa. As we add each pix, * we also add the associated box to the pixa. * We also keep track of the centroid of each pix, * and use the difference between centroids (of the * pix with the exemplar we are checking it with) * to align the two when checking that the Hausdorff * distance does not exceed a threshold. * The threshold is set by the Sel used for dilating. * For example, a 3x3 brick, sel_3, corresponds to a * Hausdorff distance of 1. In general, for an NxN brick, * with N odd, corresponds to a Hausdorff distance of (N - 1)/2. * It turns out that we actually need to use a sel of size 2x2 * to avoid small bad components when there is a halftone image * from which components can be chosen. * The larger the Sel you use, the fewer the number of classes, * and the greater the likelihood of putting semantically * different objects in the same class. For simplicity, * we do this separately for the case of rank == 1.0 (exact * match within the Hausdorff distance) and rank < 1.0. */ rank = classer->rankhaus; dahash = classer->dahash; if (rank == 1.0) { for (i = 0; i < n; i++) { pix1 = pixaGetPix(pixa1, i, L_CLONE); pix2 = pixaGetPix(pixa2, i, L_CLONE); ptaGetPt(pta, i, &x1, &y1); nt = pixaGetCount(pixat); /* number of templates */ found = FALSE; findcontext = findSimilarSizedTemplatesInit(classer, pix1); while ((iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) { /* Find score for this template */ pix3 = pixaGetPix(pixat, iclass, L_CLONE); pix4 = pixaGetPix(pixatd, iclass, L_CLONE); ptaGetPt(ptact, iclass, &x2, &y2); testval = pixHaustest(pix1, pix2, pix3, pix4, x1 - x2, y1 - y2, MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT); pixDestroy(&pix3); pixDestroy(&pix4); if (testval == 1) { found = TRUE; numaAddNumber(naclass, (l_float32)iclass); numaAddNumber(napage, (l_float32)npages); if (classer->keep_pixaa) { pixa = pixaaGetPixa(pixaa, iclass, L_CLONE); pix = pixaGetPix(pixas, i, L_CLONE); pixaAddPix(pixa, pix, L_INSERT); box = boxaGetBox(boxa, i, L_CLONE); pixaAddBox(pixa, box, L_INSERT); pixaDestroy(&pixa); } break; } } findSimilarSizedTemplatesDestroy(&findcontext); if (found == FALSE) { /* new class */ numaAddNumber(naclass, (l_float32)nt); numaAddNumber(napage, (l_float32)npages); pixa = pixaCreate(0); pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */ pixaAddPix(pixa, pix, L_INSERT); wt = pixGetWidth(pix); ht = pixGetHeight(pix); l_dnaHashAdd(dahash, (l_uint64)ht * wt, nt); box = boxaGetBox(boxa, i, L_CLONE); pixaAddBox(pixa, box, L_INSERT); pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */ ptaAddPt(ptact, x1, y1); pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */ pixaAddPix(pixatd, pix2, L_INSERT); /* bordered dil template */ } else { /* don't save them */ pixDestroy(&pix1); pixDestroy(&pix2); } } } else { /* rank < 1.0 */ nafgt = classer->nafgt; tab8 = makePixelSumTab8(); for (i = 0; i < n; i++) { /* all instances on this page */ pix1 = pixaGetPix(pixa1, i, L_CLONE); numaGetIValue(nafg, i, &area1); pix2 = pixaGetPix(pixa2, i, L_CLONE); ptaGetPt(pta, i, &x1, &y1); /* use pta for this page */ nt = pixaGetCount(pixat); /* number of templates */ found = FALSE; findcontext = findSimilarSizedTemplatesInit(classer, pix1); while ((iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) { /* Find score for this template */ pix3 = pixaGetPix(pixat, iclass, L_CLONE); numaGetIValue(nafgt, iclass, &area3); pix4 = pixaGetPix(pixatd, iclass, L_CLONE); ptaGetPt(ptact, iclass, &x2, &y2); testval = pixRankHaustest(pix1, pix2, pix3, pix4, x1 - x2, y1 - y2, MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT, area1, area3, rank, tab8); pixDestroy(&pix3); pixDestroy(&pix4); if (testval == 1) { /* greedy match; take the first */ found = TRUE; numaAddNumber(naclass, (l_float32)iclass); numaAddNumber(napage, (l_float32)npages); if (classer->keep_pixaa) { pixa = pixaaGetPixa(pixaa, iclass, L_CLONE); pix = pixaGetPix(pixas, i, L_CLONE); pixaAddPix(pixa, pix, L_INSERT); box = boxaGetBox(boxa, i, L_CLONE); pixaAddBox(pixa, box, L_INSERT); pixaDestroy(&pixa); } break; } } findSimilarSizedTemplatesDestroy(&findcontext); if (found == FALSE) { /* new class */ numaAddNumber(naclass, (l_float32)nt); numaAddNumber(napage, (l_float32)npages); pixa = pixaCreate(0); pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */ pixaAddPix(pixa, pix, L_INSERT); wt = pixGetWidth(pix); ht = pixGetHeight(pix); l_dnaHashAdd(dahash, (l_uint64)ht * wt, nt); box = boxaGetBox(boxa, i, L_CLONE); pixaAddBox(pixa, box, L_INSERT); pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */ ptaAddPt(ptact, x1, y1); pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */ pixaAddPix(pixatd, pix2, L_INSERT); /* ditto */ numaAddNumber(nafgt, (l_float32)area1); } else { /* don't save them */ pixDestroy(&pix1); pixDestroy(&pix2); } } LEPT_FREE(tab8); } classer->nclass = pixaGetCount(pixat); numaDestroy(&nafg); ptaDestroy(&pta); pixaDestroy(&pixa1); pixaDestroy(&pixa2); selDestroy(&sel); return 0; } /*! * \brief pixHaustest() * * \param[in] pix1 new pix, not dilated * \param[in] pix2 new pix, dilated * \param[in] pix3 exemplar pix, not dilated * \param[in] pix4 exemplar pix, dilated * \param[in] delx x comp of centroid difference * \param[in] dely y comp of centroid difference * \param[in] maxdiffw max width difference of pix1 and pix2 * \param[in] maxdiffh max height difference of pix1 and pix2 * \return 0 FALSE) if no match, 1 (TRUE if the new * pix is in the same class as the exemplar. * *
 * Notes:
 *  We check first that the two pix are roughly
 *  the same size.  Only if they meet that criterion do
 *  we compare the bitmaps.  The Hausdorff is a 2-way
 *  check.  The centroid difference is used to align the two
 *  images to the nearest integer for each of the checks.
 *  These check that the dilated image of one contains
 *  ALL the pixels of the undilated image of the other.
 *  Checks are done in both direction.  A single pixel not
 *  contained in either direction results in failure of the test.
 * 
*/ l_int32 pixHaustest(PIX *pix1, PIX *pix2, PIX *pix3, PIX *pix4, l_float32 delx, /* x(1) - x(3) */ l_float32 dely, /* y(1) - y(3) */ l_int32 maxdiffw, l_int32 maxdiffh) { l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, boolmatch; PIX *pixt; /* Eliminate possible matches based on size difference */ wi = pixGetWidth(pix1); hi = pixGetHeight(pix1); wt = pixGetWidth(pix3); ht = pixGetHeight(pix3); delw = L_ABS(wi - wt); if (delw > maxdiffw) return FALSE; delh = L_ABS(hi - ht); if (delh > maxdiffh) return FALSE; /* Round difference in centroid location to nearest integer; * use this as a shift when doing the matching. */ if (delx >= 0) idelx = (l_int32)(delx + 0.5); else idelx = (l_int32)(delx - 0.5); if (dely >= 0) idely = (l_int32)(dely + 0.5); else idely = (l_int32)(dely - 0.5); /* Do 1-direction hausdorff, checking that every pixel in pix1 * is within a dilation distance of some pixel in pix3. Namely, * that pix4 entirely covers pix1: * pixt = pixSubtract(NULL, pix1, pix4), including shift * where pixt has no ON pixels. */ pixt = pixCreateTemplate(pix1); pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC, pix1, 0, 0); pixRasterop(pixt, idelx, idely, wi, hi, PIX_DST & PIX_NOT(PIX_SRC), pix4, 0, 0); pixZero(pixt, &boolmatch); if (boolmatch == 0) { pixDestroy(&pixt); return FALSE; } /* Do 1-direction hausdorff, checking that every pixel in pix3 * is within a dilation distance of some pixel in pix1. Namely, * that pix2 entirely covers pix3: * pixSubtract(pixt, pix3, pix2), including shift * where pixt has no ON pixels. */ pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix3, 0, 0); pixRasterop(pixt, 0, 0, wt, ht, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0); pixZero(pixt, &boolmatch); pixDestroy(&pixt); return boolmatch; } /*! * \brief pixRankHaustest() * * \param[in] pix1 new pix, not dilated * \param[in] pix2 new pix, dilated * \param[in] pix3 exemplar pix, not dilated * \param[in] pix4 exemplar pix, dilated * \param[in] delx x comp of centroid difference * \param[in] dely y comp of centroid difference * \param[in] maxdiffw max width difference of pix1 and pix2 * \param[in] maxdiffh max height difference of pix1 and pix2 * \param[in] area1 fg pixels in pix1 * \param[in] area3 fg pixels in pix3 * \param[in] rank rank value of test, each way * \param[in] tab8 table of pixel sums for byte * \return 0 FALSE) if no match, 1 (TRUE if the new * pix is in the same class as the exemplar. * *
 * Notes:
 *  We check first that the two pix are roughly
 *  the same size.  Only if they meet that criterion do
 *  we compare the bitmaps.  We convert the rank value to
 *  a number of pixels by multiplying the rank fraction by the number
 *  of pixels in the undilated image.  The Hausdorff is a 2-way
 *  check.  The centroid difference is used to align the two
 *  images to the nearest integer for each of the checks.
 *  The rank hausdorff checks that the dilated image of one
 *  contains the rank fraction of the pixels of the undilated
 *  image of the other.   Checks are done in both direction.
 *  Failure of the test in either direction results in failure
 *  of the test.
 * 
*/ l_int32 pixRankHaustest(PIX *pix1, PIX *pix2, PIX *pix3, PIX *pix4, l_float32 delx, /* x(1) - x(3) */ l_float32 dely, /* y(1) - y(3) */ l_int32 maxdiffw, l_int32 maxdiffh, l_int32 area1, l_int32 area3, l_float32 rank, l_int32 *tab8) { l_int32 wi, hi, wt, ht, delw, delh, idelx, idely, boolmatch; l_int32 thresh1, thresh3; PIX *pixt; /* Eliminate possible matches based on size difference */ wi = pixGetWidth(pix1); hi = pixGetHeight(pix1); wt = pixGetWidth(pix3); ht = pixGetHeight(pix3); delw = L_ABS(wi - wt); if (delw > maxdiffw) return FALSE; delh = L_ABS(hi - ht); if (delh > maxdiffh) return FALSE; /* Upper bounds in remaining pixels for allowable match */ thresh1 = (l_int32)(area1 * (1. - rank) + 0.5); thresh3 = (l_int32)(area3 * (1. - rank) + 0.5); /* Round difference in centroid location to nearest integer; * use this as a shift when doing the matching. */ if (delx >= 0) idelx = (l_int32)(delx + 0.5); else idelx = (l_int32)(delx - 0.5); if (dely >= 0) idely = (l_int32)(dely + 0.5); else idely = (l_int32)(dely - 0.5); /* Do 1-direction hausdorff, checking that every pixel in pix1 * is within a dilation distance of some pixel in pix3. Namely, * that pix4 entirely covers pix1: * pixt = pixSubtract(NULL, pix1, pix4), including shift * where pixt has no ON pixels. */ pixt = pixCreateTemplate(pix1); pixRasterop(pixt, 0, 0, wi, hi, PIX_SRC, pix1, 0, 0); pixRasterop(pixt, idelx, idely, wi, hi, PIX_DST & PIX_NOT(PIX_SRC), pix4, 0, 0); pixThresholdPixelSum(pixt, thresh1, &boolmatch, tab8); if (boolmatch == 1) { /* above thresh1 */ pixDestroy(&pixt); return FALSE; } /* Do 1-direction hausdorff, checking that every pixel in pix3 * is within a dilation distance of some pixel in pix1. Namely, * that pix2 entirely covers pix3: * pixSubtract(pixt, pix3, pix2), including shift * where pixt has no ON pixels. */ pixRasterop(pixt, idelx, idely, wt, ht, PIX_SRC, pix3, 0, 0); pixRasterop(pixt, 0, 0, wt, ht, PIX_DST & PIX_NOT(PIX_SRC), pix2, 0, 0); pixThresholdPixelSum(pixt, thresh3, &boolmatch, tab8); pixDestroy(&pixt); if (boolmatch == 1) /* above thresh3 */ return FALSE; else return TRUE; } /*----------------------------------------------------------------------* * Classification using windowed correlation score * *----------------------------------------------------------------------*/ /*! * \brief jbClassifyCorrelation() * * \param[in] jbclasser * \param[in] boxa new components for classification * \param[in] pixas new components for classification * \return 0 if OK; 1 on error */ l_ok jbClassifyCorrelation(JBCLASSER *classer, BOXA *boxa, PIXA *pixas) { l_int32 n, nt, i, iclass, wt, ht, found, area, area1, area2, npages, overthreshold; l_int32 *sumtab, *centtab; l_uint32 *row, word; l_float32 x1, y1, x2, y2, xsum, ysum; l_float32 thresh, weight, threshold; BOX *box; NUMA *naclass, *napage; NUMA *nafgt; /* fg area of all templates */ NUMA *naarea; /* w * h area of all templates */ JBFINDCTX *findcontext; L_DNAHASH *dahash; PIX *pix, *pix1, *pix2; PIXA *pixa, *pixa1, *pixat; PIXAA *pixaa; PTA *pta, *ptac, *ptact; l_int32 *pixcts; /* pixel counts of each pixa */ l_int32 **pixrowcts; /* row-by-row pixel counts of each pixa */ l_int32 x, y, rowcount, downcount, wpl; l_uint8 byte; PROCNAME("jbClassifyCorrelation"); if (!classer) return ERROR_INT("classer not found", procName, 1); if (!boxa) return ERROR_INT("boxa not found", procName, 1); if (!pixas) return ERROR_INT("pixas not found", procName, 1); npages = classer->npages; /* Generate the bordered pixa, which contains all the the * input components. This will not be saved. */ if ((n = pixaGetCount(pixas)) == 0) { L_WARNING("pixas is empty\n", procName); return 0; } pixa1 = pixaCreate(n); for (i = 0; i < n; i++) { pix = pixaGetPix(pixas, i, L_CLONE); pix1 = pixAddBorderGeneral(pix, JB_ADDED_PIXELS, JB_ADDED_PIXELS, JB_ADDED_PIXELS, JB_ADDED_PIXELS, 0); pixaAddPix(pixa1, pix1, L_INSERT); pixDestroy(&pix); } /* Use these to save the class and page of each component. */ naclass = classer->naclass; napage = classer->napage; /* Get the number of fg pixels in each component. */ nafgt = classer->nafgt; /* holds fg areas of the templates */ sumtab = makePixelSumTab8(); pixcts = (l_int32 *)LEPT_CALLOC(n, sizeof(*pixcts)); pixrowcts = (l_int32 **)LEPT_CALLOC(n, sizeof(*pixrowcts)); centtab = makePixelCentroidTab8(); /* Count the "1" pixels in each row of the pix in pixa1; this * allows pixCorrelationScoreThresholded to abort early if a match * is impossible. This loop merges three calculations: the total * number of "1" pixels, the number of "1" pixels in each row, and * the centroid. The centroids are relative to the UL corner of * each (bordered) pix. The pixrowcts[i][y] are the total number * of fg pixels in pixa[i] below row y. */ pta = ptaCreate(n); for (i = 0; i < n; i++) { pix = pixaGetPix(pixa1, i, L_CLONE); pixrowcts[i] = (l_int32 *)LEPT_CALLOC(pixGetHeight(pix), sizeof(**pixrowcts)); xsum = 0; ysum = 0; wpl = pixGetWpl(pix); row = pixGetData(pix) + (pixGetHeight(pix) - 1) * wpl; downcount = 0; for (y = pixGetHeight(pix) - 1; y >= 0; y--, row -= wpl) { pixrowcts[i][y] = downcount; rowcount = 0; for (x = 0; x < wpl; x++) { word = row[x]; byte = word & 0xff; rowcount += sumtab[byte]; xsum += centtab[byte] + (x * 32 + 24) * sumtab[byte]; byte = (word >> 8) & 0xff; rowcount += sumtab[byte]; xsum += centtab[byte] + (x * 32 + 16) * sumtab[byte]; byte = (word >> 16) & 0xff; rowcount += sumtab[byte]; xsum += centtab[byte] + (x * 32 + 8) * sumtab[byte]; byte = (word >> 24) & 0xff; rowcount += sumtab[byte]; xsum += centtab[byte] + x * 32 * sumtab[byte]; } downcount += rowcount; ysum += rowcount * y; } pixcts[i] = downcount; if (downcount > 0) { ptaAddPt(pta, xsum / (l_float32)downcount, ysum / (l_float32)downcount); } else { /* no pixels; shouldn't happen */ L_ERROR("downcount == 0 !\n", procName); ptaAddPt(pta, (l_float32)(pixGetWidth(pix) / 2), (l_float32)(pixGetHeight(pix) / 2)); } pixDestroy(&pix); } ptac = classer->ptac; /* holds centroids of components up to this page */ ptaJoin(ptac, pta, 0, -1); /* save centroids of all components */ ptact = classer->ptact; /* holds centroids of templates */ /* Store the unbordered pix in a pixaa, in a hierarchical * set of arrays. There is one pixa for each class, * and the pix in each pixa are all the instances found * of that class. This is actually more than one would need * for a jbig2 encoder, but there are two reasons to keep * them around: (1) the set of instances for each class * can be used to make an improved binary (or, better, * a grayscale) template, rather than simply using the first * one in the set; (2) we can investigate the failures * of the classifier. This pixaa grows as we process * successive pages. */ pixaa = classer->pixaa; /* Array to store class exemplars */ pixat = classer->pixat; /* Fill up the pixaa tree with the template exemplars as * the first pix in each pixa. As we add each pix, * we also add the associated box to the pixa. * We also keep track of the centroid of each pix, * and use the difference between centroids (of the * pix with the exemplar we are checking it with) * to align the two when checking that the correlation * score exceeds a threshold. The correlation score * is given by the square of the area of the AND * between aligned instance and template, divided by * the product of areas of each image. For identical * template and instance, the score is 1.0. * If the threshold is too small, non-equivalent instances * will be placed in the same class; if too large, there will * be an unnecessary division of classes representing the * same character. The weightfactor adds in some of the * difference (1.0 - thresh), depending on the heaviness * of the template (measured as the fraction of fg pixels). */ thresh = classer->thresh; weight = classer->weightfactor; naarea = classer->naarea; dahash = classer->dahash; for (i = 0; i < n; i++) { pix1 = pixaGetPix(pixa1, i, L_CLONE); area1 = pixcts[i]; ptaGetPt(pta, i, &x1, &y1); /* centroid for this instance */ nt = pixaGetCount(pixat); found = FALSE; findcontext = findSimilarSizedTemplatesInit(classer, pix1); while ( (iclass = findSimilarSizedTemplatesNext(findcontext)) > -1) { /* Get the template */ pix2 = pixaGetPix(pixat, iclass, L_CLONE); numaGetIValue(nafgt, iclass, &area2); ptaGetPt(ptact, iclass, &x2, &y2); /* template centroid */ /* Find threshold for this template */ if (weight > 0.0) { numaGetIValue(naarea, iclass, &area); threshold = thresh + (1.f - thresh) * weight * area2 / area; } else { threshold = thresh; } /* Find score for this template */ overthreshold = pixCorrelationScoreThresholded(pix1, pix2, area1, area2, x1 - x2, y1 - y2, MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT, sumtab, pixrowcts[i], threshold); #if DEBUG_CORRELATION_SCORE { l_float32 score, testscore; l_int32 count, testcount; pixCorrelationScore(pix1, pix2, area1, area2, x1 - x2, y1 - y2, MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT, sumtab, &score); pixCorrelationScoreSimple(pix1, pix2, area1, area2, x1 - x2, y1 - y2, MAX_DIFF_WIDTH, MAX_DIFF_HEIGHT, sumtab, &testscore); count = (l_int32)rint(sqrt(score * area1 * area2)); testcount = (l_int32)rint(sqrt(testscore * area1 * area2)); if ((score >= threshold) != (testscore >= threshold)) { lept_stderr("Correlation score mismatch: " "%d(%g,%d) vs %d(%g,%d) (%g)\n", count, score, score >= threshold, testcount, testscore, testscore >= threshold, score - testscore); } if ((score >= threshold) != overthreshold) { lept_stderr("Mismatch between correlation/threshold " "comparison: %g(%g,%d) >= %g(%g) vs %s\n", score, score*area1*area2, count, threshold, threshold*area1*area2, (overthreshold ? "true" : "false")); } } #endif /* DEBUG_CORRELATION_SCORE */ pixDestroy(&pix2); if (overthreshold) { /* greedy match */ found = TRUE; numaAddNumber(naclass, (l_float32)iclass); numaAddNumber(napage, (l_float32)npages); if (classer->keep_pixaa) { /* We are keeping a record of all components */ pixa = pixaaGetPixa(pixaa, iclass, L_CLONE); pix = pixaGetPix(pixas, i, L_CLONE); pixaAddPix(pixa, pix, L_INSERT); box = boxaGetBox(boxa, i, L_CLONE); pixaAddBox(pixa, box, L_INSERT); pixaDestroy(&pixa); } break; } } findSimilarSizedTemplatesDestroy(&findcontext); if (found == FALSE) { /* new class */ numaAddNumber(naclass, (l_float32)nt); numaAddNumber(napage, (l_float32)npages); pixa = pixaCreate(0); pix = pixaGetPix(pixas, i, L_CLONE); /* unbordered instance */ pixaAddPix(pixa, pix, L_INSERT); wt = pixGetWidth(pix); ht = pixGetHeight(pix); l_dnaHashAdd(dahash, (l_uint64)ht * wt, nt); box = boxaGetBox(boxa, i, L_CLONE); pixaAddBox(pixa, box, L_INSERT); pixaaAddPixa(pixaa, pixa, L_INSERT); /* unbordered instance */ ptaAddPt(ptact, x1, y1); numaAddNumber(nafgt, (l_float32)area1); pixaAddPix(pixat, pix1, L_INSERT); /* bordered template */ area = (pixGetWidth(pix1) - 2 * JB_ADDED_PIXELS) * (pixGetHeight(pix1) - 2 * JB_ADDED_PIXELS); numaAddNumber(naarea, (l_float32)area); } else { /* don't save it */ pixDestroy(&pix1); } } classer->nclass = pixaGetCount(pixat); LEPT_FREE(pixcts); LEPT_FREE(centtab); for (i = 0; i < n; i++) { LEPT_FREE(pixrowcts[i]); } LEPT_FREE(pixrowcts); LEPT_FREE(sumtab); ptaDestroy(&pta); pixaDestroy(&pixa1); return 0; } /*----------------------------------------------------------------------* * Determine the image components we start with * *----------------------------------------------------------------------*/ /*! * \brief jbGetComponents() * * \param[in] pixs 1 bpp * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS * \param[in] maxwidth of saved components; larger are discarded * \param[in] maxheight of saved components; larger are discarded * \param[out] ppboxa b.b. of component items * \param[out] pppixa component items * \return 0 if OK, 1 on error */ l_ok jbGetComponents(PIX *pixs, l_int32 components, l_int32 maxwidth, l_int32 maxheight, BOXA **pboxad, PIXA **ppixad) { l_int32 empty, res, redfactor; BOXA *boxa; PIX *pix1, *pix2, *pix3; PIXA *pixa, *pixat; PROCNAME("jbGetComponents"); if (!pboxad) return ERROR_INT("&boxad not defined", procName, 1); *pboxad = NULL; if (!ppixad) return ERROR_INT("&pixad not defined", procName, 1); *ppixad = NULL; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (components != JB_CONN_COMPS && components != JB_CHARACTERS && components != JB_WORDS) return ERROR_INT("invalid components", procName, 1); pixZero(pixs, &empty); if (empty) { *pboxad = boxaCreate(0); *ppixad = pixaCreate(0); return 0; } /* If required, preprocess input pixs. The method for both * characters and words is to generate a connected component * mask over the units that we want to aggregrate, which are, * in general, sets of related connected components in pixs. * For characters, we want to include the dots with * 'i', 'j' and '!', so we do a small vertical closing to * generate the mask. For words, we make a mask over all * characters in each word. This is a bit more tricky, because * the spacing between words is difficult to predict a priori, * and words can be typeset with variable spacing that can * in some cases be barely larger than the space between * characters. The first step is to generate the mask and * identify each of its connected components. */ if (components == JB_CONN_COMPS) { /* no preprocessing */ boxa = pixConnComp(pixs, &pixa, 8); } else if (components == JB_CHARACTERS) { pix1 = pixMorphSequence(pixs, "c1.6", 0); boxa = pixConnComp(pix1, &pixat, 8); pixa = pixaClipToPix(pixat, pixs); pixDestroy(&pix1); pixaDestroy(&pixat); } else { /* components == JB_WORDS */ /* Do the operations at about 150 ppi resolution. * It is much faster at 75 ppi, but the results are * more accurate at 150 ppi. This will segment the * words in body text. It can be expected that relatively * infrequent words in a larger font will be split. */ res = pixGetXRes(pixs); if (res <= 200) { redfactor = 1; pix1 = pixClone(pixs); } else if (res <= 400) { redfactor = 2; pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); } else { redfactor = 4; pix1 = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); } /* Estimate the word mask, at approximately 150 ppi. * This has both very large and very small components left in. */ pixWordMaskByDilation(pix1, &pix2, NULL, NULL); /* Expand the optimally dilated word mask to full res. */ pix3 = pixExpandReplicate(pix2, redfactor); /* Pull out the pixels in pixs corresponding to the mask * components in pix3. Note that above we used threshold * levels in the reduction of 1 to insure that the resulting * mask fully covers the input pixs. The downside of using * a threshold of 1 is that very close characters from adjacent * lines can be joined. But with a level of 2 or greater, * it is necessary to use a seedfill, followed by a pixOr(): * pixt4 = pixSeedfillBinary(NULL, pix3, pixs, 8); * pixOr(pix3, pix3, pixt4); * to insure that the mask coverage is complete over pixs. */ boxa = pixConnComp(pix3, &pixat, 4); pixa = pixaClipToPix(pixat, pixs); pixaDestroy(&pixat); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); } /* Remove large components, and save the results. */ *ppixad = pixaSelectBySize(pixa, maxwidth, maxheight, L_SELECT_IF_BOTH, L_SELECT_IF_LTE, NULL); *pboxad = boxaSelectBySize(boxa, maxwidth, maxheight, L_SELECT_IF_BOTH, L_SELECT_IF_LTE, NULL); pixaDestroy(&pixa); boxaDestroy(&boxa); return 0; } /*! * \brief pixWordMaskByDilation() * * \param[in] pixs 1 bpp; typ. at 75 to 150 ppi * \param[out] pmask [optional] dilated word mask * \param[out] psize [optional] size of good horizontal dilation * \param[out] pixadb [optional] debug: pixa of intermediate steps * \return 0 if OK, 1 on error * *
 * Notes:
 *      (1) This gives an estimate of the word masks.  See
 *          pixWordBoxesByDilation() for further filtering of the word boxes.
 *      (2) The resolution should be between 75 and 150 ppi, and the optimal
 *          dilation will be between 3 and 10.
 *      (3) A good size for dilating to get word masks is optionally returned.
 *      (4) Typically, the number of c.c. reduced with each successive
 *          dilation (stored in nadiff) decreases quickly to a minimum
 *          (where the characters in a word are joined), and then
 *          increases again as the smaller number of words are joined.
 *          For the typical case, you can then look for this minimum
 *          and dilate to get the word mask.  However, there are many
 *          cases where the function is not so simple. For example, if the
 *          pix has been upscaled 2x, the nadiff function oscillates, with
 *          every other value being zero!  And for some images it tails
 *          off without a clear minimum to indicate where to break.
 *          So a more simple and robust method is to find the dilation
 *          where the initial number of c.c. has been reduced by some
 *          fraction (we use a 70% reduction).
 * 
*/ l_ok pixWordMaskByDilation(PIX *pixs, PIX **ppixm, l_int32 *psize, PIXA *pixadb) { l_int32 i, n, ndil, maxdiff, diff, ibest; l_int32 check, count, total, xres; l_int32 ncc[13]; /* max dilation + 1 */ l_int32 *diffa; BOXA *boxa; NUMA *nacc, *nadiff; PIX *pix1, *pix2; PROCNAME("pixWordMaskByDilation"); if (ppixm) *ppixm = NULL; if (psize) *psize = 0; if (!pixs || pixGetDepth(pixs) != 1) return ERROR_INT("pixs undefined or not 1 bpp", procName, 1); if (!ppixm && !psize) return ERROR_INT("no output requested", procName, 1); /* Find a good dilation to create the word mask, by successively * increasing dilation size and counting the connected components. */ pix1 = pixCopy(NULL, pixs); ndil = 12; /* appropriate for 75 to 150 ppi */ nacc = numaCreate(ndil + 1); nadiff = numaCreate(ndil + 1); for (i = 0; i <= ndil; i++) { if (i == 0) /* first one not dilated */ pix2 = pixCopy(NULL, pix1); else /* successive dilation by sel_2h */ pix2 = pixMorphSequence(pix1, "d2.1", 0); boxa = pixConnCompBB(pix2, 4); ncc[i] = boxaGetCount(boxa); numaAddNumber(nacc, (l_float32)ncc[i]); if (i == 0) total = ncc[0]; if (i > 0) { diff = ncc[i - 1] - ncc[i]; numaAddNumber(nadiff, (l_float32)diff); } pixDestroy(&pix1); pix1 = pix2; boxaDestroy(&boxa); } pixDestroy(&pix1); /* Find the dilation at which the c.c. count has reduced * to 30% of the initial value. Although 30% seems high, * it seems better to use this but add one to ibest. */ diffa = numaGetIArray(nadiff); n = numaGetCount(nadiff); maxdiff = 0; check = TRUE; ibest = 2; for (i = 1; i < n; i++) { numaGetIValue(nacc, i, &count); if (check && count < 0.3 * total) { ibest = i + 1; check = FALSE; } diff = diffa[i]; if (diff > maxdiff) maxdiff = diff; } LEPT_FREE(diffa); /* Add small compensation for higher resolution */ xres = pixGetXRes(pixs); if (xres == 0) xres = 150; if (xres > 110) ibest++; if (ibest < 2) { L_INFO("setting ibest to minimum allowed value of 2\n", procName); ibest = 2; } if (pixadb) { lept_mkdir("lept/jb"); {NUMA *naseq; PIX *pix3, *pix4; L_INFO("Best dilation: %d\n", procName, L_MAX(3, ibest + 1)); naseq = numaMakeSequence(1, 1, numaGetCount(nacc)); pix3 = gplotGeneralPix2(naseq, nacc, GPLOT_LINES, "/tmp/lept/jb/numcc", "Number of cc vs. horizontal dilation", "Sel horiz", "Number of cc"); pixaAddPix(pixadb, pix3, L_INSERT); numaDestroy(&naseq); naseq = numaMakeSequence(1, 1, numaGetCount(nadiff)); pix3 = gplotGeneralPix2(naseq, nadiff, GPLOT_LINES, "/tmp/lept/jb/diffcc", "Diff count of cc vs. horizontal dilation", "Sel horiz", "Diff in cc"); pixaAddPix(pixadb, pix3, L_INSERT); numaDestroy(&naseq); pix3 = pixCloseBrick(NULL, pixs, ibest + 1, 1); pix4 = pixScaleToSize(pix3, 600, 0); pixaAddPix(pixadb, pix4, L_INSERT); pixDestroy(&pix3); } } if (psize) *psize = ibest + 1; if (ppixm) *ppixm = pixCloseBrick(NULL, pixs, ibest + 1, 1); numaDestroy(&nacc); numaDestroy(&nadiff); return 0; } /*! * \brief pixWordBoxesByDilation() * * \param[in] pixs 1 bpp; typ. 75 - 200 ppi * \param[in] minwidth saved components; smaller are discarded * \param[in] minheight saved components; smaller are discarded * \param[in] maxwidth saved components; larger are discarded * \param[in] maxheight saved components; larger are discarded * \param[out] pboxa of dilated word mask * \param[out] psize [optional] size of good horizontal dilation * \param[out] pixadb [optional] debug: pixa of intermediate steps * \return 0 if OK, 1 on error * *
 * Notes:
 *      (1) Returns a pruned set of word boxes.
 *      (2) See pixWordMaskByDilation().
 * 
*/ l_ok pixWordBoxesByDilation(PIX *pixs, l_int32 minwidth, l_int32 minheight, l_int32 maxwidth, l_int32 maxheight, BOXA **pboxa, l_int32 *psize, PIXA *pixadb) { BOXA *boxa1, *boxa2; PIX *pix1, *pix2; PROCNAME("pixWordBoxesByDilation"); if (psize) *psize = 0; if (!pixs || pixGetDepth(pixs) != 1) return ERROR_INT("pixs undefined or not 1 bpp", procName, 1); if (!pboxa) return ERROR_INT("&boxa not defined", procName, 1); *pboxa = NULL; /* Make a first estimate of the word mask */ if (pixWordMaskByDilation(pixs, &pix1, psize, pixadb)) return ERROR_INT("pixWordMaskByDilation() failed", procName, 1); /* Prune the word mask. Get the bounding boxes of the words. * Remove the small ones, which can be due to punctuation * that was not joined to a word. Also remove the large ones, * which are not likely to be words. */ boxa1 = pixConnComp(pix1, NULL, 8); boxa2 = boxaSelectBySize(boxa1, minwidth, minheight, L_SELECT_IF_BOTH, L_SELECT_IF_GTE, NULL); *pboxa = boxaSelectBySize(boxa2, maxwidth, maxheight, L_SELECT_IF_BOTH, L_SELECT_IF_LTE, NULL); if (pixadb) { pix2 = pixUnpackBinary(pixs, 32, 1); pixRenderBoxaArb(pix2, boxa1, 2, 255, 0, 0); pixaAddPix(pixadb, pix2, L_INSERT); pix2 = pixUnpackBinary(pixs, 32, 1); pixRenderBoxaArb(pix2, boxa2, 2, 0, 255, 0); pixaAddPix(pixadb, pix2, L_INSERT); } boxaDestroy(&boxa1); boxaDestroy(&boxa2); pixDestroy(&pix1); return 0; } /*----------------------------------------------------------------------* * Build grayscale composites (templates) * *----------------------------------------------------------------------*/ /*! * \brief jbAccumulateComposites() * * \param[in] pixaa one pixa for each class * \param[out] ppna number of samples used to build each composite * \param[out] pptat centroids of bordered composites * \return pixad accumulated sum of samples in each class, or NULL on error * */ PIXA * jbAccumulateComposites(PIXAA *pixaa, NUMA **pna, PTA **pptat) { l_int32 n, nt, i, j, d, minw, maxw, minh, maxh, xdiff, ydiff; l_float32 x, y, xave, yave; NUMA *na; PIX *pix, *pixt1, *pixt2, *pixsum; PIXA *pixa, *pixad; PTA *ptat, *pta; PROCNAME("jbAccumulateComposites"); if (!pptat) return (PIXA *)ERROR_PTR("&ptat not defined", procName, NULL); *pptat = NULL; if (!pna) return (PIXA *)ERROR_PTR("&na not defined", procName, NULL); *pna = NULL; if (!pixaa) return (PIXA *)ERROR_PTR("pixaa not defined", procName, NULL); n = pixaaGetCount(pixaa, NULL); if ((ptat = ptaCreate(n)) == NULL) return (PIXA *)ERROR_PTR("ptat not made", procName, NULL); *pptat = ptat; pixad = pixaCreate(n); na = numaCreate(n); *pna = na; for (i = 0; i < n; i++) { pixa = pixaaGetPixa(pixaa, i, L_CLONE); nt = pixaGetCount(pixa); numaAddNumber(na, nt); if (nt == 0) { L_WARNING("empty pixa found!\n", procName); pixaDestroy(&pixa); continue; } pixaSizeRange(pixa, &minw, &minh, &maxw, &maxh); pix = pixaGetPix(pixa, 0, L_CLONE); d = pixGetDepth(pix); pixDestroy(&pix); pixt1 = pixCreate(maxw, maxh, d); pixsum = pixInitAccumulate(maxw, maxh, 0); pta = pixaCentroids(pixa); /* Find the average value of the centroids ... */ xave = yave = 0; for (j = 0; j < nt; j++) { ptaGetPt(pta, j, &x, &y); xave += x; yave += y; } xave = xave / (l_float32)nt; yave = yave / (l_float32)nt; /* and place all centroids at their average value */ for (j = 0; j < nt; j++) { pixt2 = pixaGetPix(pixa, j, L_CLONE); ptaGetPt(pta, j, &x, &y); xdiff = (l_int32)(x - xave); ydiff = (l_int32)(y - yave); pixClearAll(pixt1); pixRasterop(pixt1, xdiff, ydiff, maxw, maxh, PIX_SRC, pixt2, 0, 0); pixAccumulate(pixsum, pixt1, L_ARITH_ADD); pixDestroy(&pixt2); } pixaAddPix(pixad, pixsum, L_INSERT); ptaAddPt(ptat, xave, yave); pixaDestroy(&pixa); pixDestroy(&pixt1); ptaDestroy(&pta); } return pixad; } /*! * \brief jbTemplatesFromComposites() * * \param[in] pixac one pix of composites for each class * \param[in] na number of samples used for each class composite * \return pixad 8 bpp templates for each class, or NULL on error * */ PIXA * jbTemplatesFromComposites(PIXA *pixac, NUMA *na) { l_int32 n, i; l_float32 nt; /* number of samples in the composite; always an integer */ l_float32 factor; PIX *pixsum; /* accumulated composite */ PIX *pixd; PIXA *pixad; PROCNAME("jbTemplatesFromComposites"); if (!pixac) return (PIXA *)ERROR_PTR("pixac not defined", procName, NULL); if (!na) return (PIXA *)ERROR_PTR("na not defined", procName, NULL); n = pixaGetCount(pixac); pixad = pixaCreate(n); for (i = 0; i < n; i++) { pixsum = pixaGetPix(pixac, i, L_COPY); /* changed internally */ numaGetFValue(na, i, &nt); factor = 255.f / nt; pixMultConstAccumulate(pixsum, factor, 0); /* changes pixsum */ pixd = pixFinalAccumulate(pixsum, 0, 8); pixaAddPix(pixad, pixd, L_INSERT); pixDestroy(&pixsum); } return pixad; } /*----------------------------------------------------------------------* * jbig2 utility routines * *----------------------------------------------------------------------*/ /*! * \brief jbClasserCreate() * * \param[in] method JB_RANKHAUS, JB_CORRELATION * \param[in] components JB_CONN_COMPS, JB_CHARACTERS, JB_WORDS * \return jbclasser, or NULL on error */ JBCLASSER * jbClasserCreate(l_int32 method, l_int32 components) { JBCLASSER *classer; PROCNAME("jbClasserCreate"); if (method != JB_RANKHAUS && method != JB_CORRELATION) return (JBCLASSER *)ERROR_PTR("invalid method", procName, NULL); if (components != JB_CONN_COMPS && components != JB_CHARACTERS && components != JB_WORDS) return (JBCLASSER *)ERROR_PTR("invalid component", procName, NULL); classer = (JBCLASSER *)LEPT_CALLOC(1, sizeof(JBCLASSER)); classer->method = method; classer->components = components; classer->nacomps = numaCreate(0); classer->pixaa = pixaaCreate(0); classer->pixat = pixaCreate(0); classer->pixatd = pixaCreate(0); classer->nafgt = numaCreate(0); classer->naarea = numaCreate(0); classer->ptac = ptaCreate(0); classer->ptact = ptaCreate(0); classer->naclass = numaCreate(0); classer->napage = numaCreate(0); classer->ptaul = ptaCreate(0); return classer; } /* * \brief jbClasserDestroy() * * \param[in,out] pclasser will be set to null before returning * \return void */ void jbClasserDestroy(JBCLASSER **pclasser) { JBCLASSER *classer; if (!pclasser) return; if ((classer = *pclasser) == NULL) return; sarrayDestroy(&classer->safiles); numaDestroy(&classer->nacomps); pixaaDestroy(&classer->pixaa); pixaDestroy(&classer->pixat); pixaDestroy(&classer->pixatd); l_dnaHashDestroy(&classer->dahash); numaDestroy(&classer->nafgt); numaDestroy(&classer->naarea); ptaDestroy(&classer->ptac); ptaDestroy(&classer->ptact); numaDestroy(&classer->naclass); numaDestroy(&classer->napage); ptaDestroy(&classer->ptaul); ptaDestroy(&classer->ptall); LEPT_FREE(classer); *pclasser = NULL; } /*! * \brief jbDataSave() * * \param[in] jbclasser * \param[in] latticew cell width used to store each connected * component in the composite * \param[in] latticeh ditto for cell height * \return jbdata, or NULL on error * *
 * Notes:
 *      (1) This routine stores the jbig2-type data required for
 *          generating a lossy jbig2 version of the image.
 *          It can be losslessly written to (and read from) two files.
 *      (2) It generates and stores the mosaic of templates.
 *      (3) It clones the Numa and Pta arrays, so these must all
 *          be destroyed by the caller.
 *      (4) Input 0 to use the default values for latticew and/or latticeh,
 * 
*/ JBDATA * jbDataSave(JBCLASSER *classer) { l_int32 maxw, maxh; JBDATA *data; PIX *pix; PROCNAME("jbDataSave"); if (!classer) return (JBDATA *)ERROR_PTR("classer not defined", procName, NULL); /* Write the templates into an array. */ pixaSizeRange(classer->pixat, NULL, NULL, &maxw, &maxh); pix = pixaDisplayOnLattice(classer->pixat, maxw + 1, maxh + 1, NULL, NULL); if (!pix) return (JBDATA *)ERROR_PTR("data not made", procName, NULL); data = (JBDATA *)LEPT_CALLOC(1, sizeof(JBDATA)); data->pix = pix; data->npages = classer->npages; data->w = classer->w; data->h = classer->h; data->nclass = classer->nclass; data->latticew = maxw + 1; data->latticeh = maxh + 1; data->naclass = numaClone(classer->naclass); data->napage = numaClone(classer->napage); data->ptaul = ptaClone(classer->ptaul); return data; } /* * \brief jbDataDestroy() * * \param[in,out] pdata will be set to null before returning * \return void */ void jbDataDestroy(JBDATA **pdata) { JBDATA *data; if (!pdata) return; if ((data = *pdata) == NULL) return; pixDestroy(&data->pix); numaDestroy(&data->naclass); numaDestroy(&data->napage); ptaDestroy(&data->ptaul); LEPT_FREE(data); *pdata = NULL; } /*! * \brief jbDataWrite() * * \param[in] rootname for output files; everything but the extension * \param[in] jbdata * \return 0 if OK, 1 on error * *
 * Notes:
 *      (1) Serialization function that writes data in jbdata to file.
 * 
*/ l_ok jbDataWrite(const char *rootout, JBDATA *jbdata) { char buf[L_BUF_SIZE]; l_int32 w, h, nclass, npages, cellw, cellh, ncomp, i, x, y, iclass, ipage; NUMA *naclass, *napage; PTA *ptaul; PIX *pixt; FILE *fp; PROCNAME("jbDataWrite"); if (!rootout) return ERROR_INT("no rootout", procName, 1); if (!jbdata) return ERROR_INT("no jbdata", procName, 1); npages = jbdata->npages; w = jbdata->w; h = jbdata->h; pixt = jbdata->pix; nclass = jbdata->nclass; cellw = jbdata->latticew; cellh = jbdata->latticeh; naclass = jbdata->naclass; napage = jbdata->napage; ptaul = jbdata->ptaul; snprintf(buf, L_BUF_SIZE, "%s%s", rootout, JB_TEMPLATE_EXT); pixWrite(buf, pixt, IFF_PNG); snprintf(buf, L_BUF_SIZE, "%s%s", rootout, JB_DATA_EXT); if ((fp = fopenWriteStream(buf, "wb")) == NULL) return ERROR_INT("stream not opened", procName, 1); ncomp = ptaGetCount(ptaul); fprintf(fp, "jb data file\n"); fprintf(fp, "num pages = %d\n", npages); fprintf(fp, "page size: w = %d, h = %d\n", w, h); fprintf(fp, "num components = %d\n", ncomp); fprintf(fp, "num classes = %d\n", nclass); fprintf(fp, "template lattice size: w = %d, h = %d\n", cellw, cellh); for (i = 0; i < ncomp; i++) { numaGetIValue(napage, i, &ipage); numaGetIValue(naclass, i, &iclass); ptaGetIPt(ptaul, i, &x, &y); fprintf(fp, "%d %d %d %d\n", ipage, iclass, x, y); } fclose(fp); return 0; } /*! * \brief jbDataRead() * * \param[in] rootname for template and data files * \return jbdata, or NULL on error */ JBDATA * jbDataRead(const char *rootname) { char fname[L_BUF_SIZE]; char *linestr; l_uint8 *data; l_int32 nsa, i, w, h, cellw, cellh, x, y, iclass, ipage; l_int32 npages, nclass, ncomp, ninit; size_t size; JBDATA *jbdata; NUMA *naclass, *napage; PIX *pixs; PTA *ptaul; SARRAY *sa; PROCNAME("jbDataRead"); if (!rootname) return (JBDATA *)ERROR_PTR("rootname not defined", procName, NULL); snprintf(fname, L_BUF_SIZE, "%s%s", rootname, JB_TEMPLATE_EXT); if ((pixs = pixRead(fname)) == NULL) return (JBDATA *)ERROR_PTR("pix not read", procName, NULL); snprintf(fname, L_BUF_SIZE, "%s%s", rootname, JB_DATA_EXT); if ((data = l_binaryRead(fname, &size)) == NULL) { pixDestroy(&pixs); return (JBDATA *)ERROR_PTR("data not read", procName, NULL); } if ((sa = sarrayCreateLinesFromString((char *)data, 0)) == NULL) { pixDestroy(&pixs); LEPT_FREE(data); return (JBDATA *)ERROR_PTR("sa not made", procName, NULL); } nsa = sarrayGetCount(sa); /* number of cc + 6 */ linestr = sarrayGetString(sa, 0, L_NOCOPY); if (strcmp(linestr, "jb data file") != 0) { pixDestroy(&pixs); LEPT_FREE(data); sarrayDestroy(&sa); return (JBDATA *)ERROR_PTR("invalid jb data file", procName, NULL); } linestr = sarrayGetString(sa, 1, L_NOCOPY); sscanf(linestr, "num pages = %d", &npages); linestr = sarrayGetString(sa, 2, L_NOCOPY); sscanf(linestr, "page size: w = %d, h = %d", &w, &h); linestr = sarrayGetString(sa, 3, L_NOCOPY); sscanf(linestr, "num components = %d", &ncomp); linestr = sarrayGetString(sa, 4, L_NOCOPY); sscanf(linestr, "num classes = %d\n", &nclass); linestr = sarrayGetString(sa, 5, L_NOCOPY); sscanf(linestr, "template lattice size: w = %d, h = %d\n", &cellw, &cellh); #if 1 lept_stderr("num pages = %d\n", npages); lept_stderr("page size: w = %d, h = %d\n", w, h); lept_stderr("num components = %d\n", ncomp); lept_stderr("num classes = %d\n", nclass); lept_stderr("template lattice size: w = %d, h = %d\n", cellw, cellh); #endif ninit = ncomp; if (ncomp > 1000000) { /* fuzz protection */ L_WARNING("ncomp > 1M\n", procName); ninit = 1000000; } naclass = numaCreate(ninit); napage = numaCreate(ninit); ptaul = ptaCreate(ninit); for (i = 6; i < nsa; i++) { linestr = sarrayGetString(sa, i, L_NOCOPY); sscanf(linestr, "%d %d %d %d\n", &ipage, &iclass, &x, &y); numaAddNumber(napage, (l_float32)ipage); numaAddNumber(naclass, (l_float32)iclass); ptaAddPt(ptaul, (l_float32)x, (l_float32)y); } jbdata = (JBDATA *)LEPT_CALLOC(1, sizeof(JBDATA)); jbdata->pix = pixs; jbdata->npages = npages; jbdata->w = w; jbdata->h = h; jbdata->nclass = nclass; jbdata->latticew = cellw; jbdata->latticeh = cellh; jbdata->naclass = naclass; jbdata->napage = napage; jbdata->ptaul = ptaul; LEPT_FREE(data); sarrayDestroy(&sa); return jbdata; } /*! * \brief jbDataRender() * * \param[in] jbdata * \param[in] debugflag if TRUE, writes into 2 bpp pix and adds * component outlines in color * \return pixa reconstruction of original images, using templates or * NULL on error */ PIXA * jbDataRender(JBDATA *data, l_int32 debugflag) { l_int32 i, w, h, cellw, cellh, x, y, iclass, ipage; l_int32 npages, nclass, ncomp, wp, hp; BOX *box; NUMA *naclass, *napage; PIX *pixt, *pixt2, *pix, *pixd; PIXA *pixat; /* pixa of templates */ PIXA *pixad; /* pixa of output images */ PIXCMAP *cmap; PTA *ptaul; PROCNAME("jbDataRender"); if (!data) return (PIXA *)ERROR_PTR("data not defined", procName, NULL); npages = data->npages; w = data->w; h = data->h; pixt = data->pix; nclass = data->nclass; cellw = data->latticew; cellh = data->latticeh; naclass = data->naclass; napage = data->napage; ptaul = data->ptaul; ncomp = numaGetCount(naclass); /* Reconstruct the original set of images from the templates * and the data associated with each component. First, * generate the output pixa as a set of empty pix. */ if ((pixad = pixaCreate(npages)) == NULL) return (PIXA *)ERROR_PTR("pixad not made", procName, NULL); for (i = 0; i < npages; i++) { if (debugflag == FALSE) { pix = pixCreate(w, h, 1); } else { pix = pixCreate(w, h, 2); cmap = pixcmapCreate(2); pixcmapAddColor(cmap, 255, 255, 255); pixcmapAddColor(cmap, 0, 0, 0); pixcmapAddColor(cmap, 255, 0, 0); /* for box outlines */ pixSetColormap(pix, cmap); } pixaAddPix(pixad, pix, L_INSERT); } /* Put the class templates into a pixa. */ if ((pixat = pixaCreateFromPix(pixt, nclass, cellw, cellh)) == NULL) { pixaDestroy(&pixad); return (PIXA *)ERROR_PTR("pixat not made", procName, NULL); } /* Place each component in the right location on its page. */ for (i = 0; i < ncomp; i++) { numaGetIValue(napage, i, &ipage); numaGetIValue(naclass, i, &iclass); pix = pixaGetPix(pixat, iclass, L_CLONE); /* the template */ wp = pixGetWidth(pix); hp = pixGetHeight(pix); ptaGetIPt(ptaul, i, &x, &y); pixd = pixaGetPix(pixad, ipage, L_CLONE); /* the output page */ if (debugflag == FALSE) { pixRasterop(pixd, x, y, wp, hp, PIX_SRC | PIX_DST, pix, 0, 0); } else { pixt2 = pixConvert1To2Cmap(pix); pixRasterop(pixd, x, y, wp, hp, PIX_SRC | PIX_DST, pixt2, 0, 0); box = boxCreate(x, y, wp, hp); pixRenderBoxArb(pixd, box, 1, 255, 0, 0); pixDestroy(&pixt2); boxDestroy(&box); } pixDestroy(&pix); /* the clone only */ pixDestroy(&pixd); /* the clone only */ } pixaDestroy(&pixat); return pixad; } /*! * \brief jbGetULCorners() * * \param[in] jbclasser * \param[in] pixs full res image * \param[in] boxa of c.c. bounding rectangles for this page * \return 0 if OK, 1 on error * *
 * Notes:
 *      (1) This computes the ptaul field, which has the global UL corners,
 *          adjusted for each specific component, so that each component
 *          can be replaced by the template for its class and have the
 *          centroid in the template in the same position as the
 *          centroid of the original connected component.  It is important
 *          that this be done properly to avoid a wavy baseline in the
 *          result.
 *      (2) The array fields ptac and ptact give the centroids of
 *          those components relative to the UL corner of each component.
 *          Here, we compute the difference in each component, round to
 *          nearest integer, and correct the box->x and box->y by
 *          the appropriate integral difference.
 *      (3) The templates and stored instances are all bordered.
 * 
*/ l_ok jbGetULCorners(JBCLASSER *classer, PIX *pixs, BOXA *boxa) { l_int32 i, baseindex, index, n, iclass, idelx, idely, x, y, dx, dy; l_int32 *sumtab; l_float32 x1, x2, y1, y2, delx, dely; BOX *box; NUMA *naclass; PIX *pixt; PTA *ptac, *ptact, *ptaul; PROCNAME("jbGetULCorners"); if (!classer) return ERROR_INT("classer not defined", procName, 1); if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (!boxa) return ERROR_INT("boxa not defined", procName, 1); n = boxaGetCount(boxa); ptaul = classer->ptaul; naclass = classer->naclass; ptac = classer->ptac; ptact = classer->ptact; baseindex = classer->baseindex; /* num components before this page */ sumtab = makePixelSumTab8(); for (i = 0; i < n; i++) { index = baseindex + i; ptaGetPt(ptac, index, &x1, &y1); numaGetIValue(naclass, index, &iclass); ptaGetPt(ptact, iclass, &x2, &y2); delx = x2 - x1; dely = y2 - y1; if (delx >= 0) idelx = (l_int32)(delx + 0.5); else idelx = (l_int32)(delx - 0.5); if (dely >= 0) idely = (l_int32)(dely + 0.5); else idely = (l_int32)(dely - 0.5); if ((box = boxaGetBox(boxa, i, L_CLONE)) == NULL) { LEPT_FREE(sumtab); return ERROR_INT("box not found", procName, 1); } boxGetGeometry(box, &x, &y, NULL, NULL); /* Get final increments dx and dy for best alignment */ pixt = pixaGetPix(classer->pixat, iclass, L_CLONE); finalPositioningForAlignment(pixs, x, y, idelx, idely, pixt, sumtab, &dx, &dy); /* if (i % 20 == 0) lept_stderr("dx = %d, dy = %d\n", dx, dy); */ ptaAddPt(ptaul, (l_float32)(x - idelx + dx), (l_float32)(y - idely + dy)); boxDestroy(&box); pixDestroy(&pixt); } LEPT_FREE(sumtab); return 0; } /*! * \brief jbGetLLCorners() * * \param[in] jbclasser * \return 0 if OK, 1 on error * *
 * Notes:
 *      (1) This computes the ptall field, which has the global LL corners,
 *          adjusted for each specific component, so that each component
 *          can be replaced by the template for its class and have the
 *          centroid in the template in the same position as the
 *          centroid of the original connected component. It is important
 *          that this be done properly to avoid a wavy baseline in the result.
 *      (2) It is computed here from the corresponding UL corners, where
 *          the input templates and stored instances are all bordered.
 *          This should be done after all pages have been processed.
 *      (3) For proper substitution, the templates whose LL corners are
 *          placed in these locations must be UN-bordered.
 *          This is available for a realistic jbig2 encoder, which would
 *          (1) encode each template without a border, and (2) encode
 *          the position using the LL corner (rather than the UL
 *          corner) because the difference between y-values
 *          of successive instances is typically close to zero.
 * 
*/ l_ok jbGetLLCorners(JBCLASSER *classer) { l_int32 i, iclass, n, x1, y1, h; NUMA *naclass; PIX *pix; PIXA *pixat; PTA *ptaul, *ptall; PROCNAME("jbGetLLCorners"); if (!classer) return ERROR_INT("classer not defined", procName, 1); ptaul = classer->ptaul; naclass = classer->naclass; pixat = classer->pixat; ptaDestroy(&classer->ptall); n = ptaGetCount(ptaul); ptall = ptaCreate(n); classer->ptall = ptall; /* If the templates were bordered, we would add h - 1 to the UL * corner y-value. However, because the templates to be used * here have their borders removed, and the borders are * JB_ADDED_PIXELS on each side, we add h - 1 - 2 * JB_ADDED_PIXELS * to the UL corner y-value. */ for (i = 0; i < n; i++) { ptaGetIPt(ptaul, i, &x1, &y1); numaGetIValue(naclass, i, &iclass); pix = pixaGetPix(pixat, iclass, L_CLONE); h = pixGetHeight(pix); ptaAddPt(ptall, (l_float32)x1, y1 + h - 1 - 2 * JB_ADDED_PIXELS); pixDestroy(&pix); } return 0; } /*----------------------------------------------------------------------* * Static helpers * *----------------------------------------------------------------------*/ /* When looking for similar matches we check templates whose size is +/- 2 in * each direction. This involves 25 possible sizes. This array contains the * offsets for each of those positions in a spiral pattern. There are 25 pairs * of numbers in this array: even positions are x values. */ static int two_by_two_walk[50] = { 0, 0, 0, 1, -1, 0, 0, -1, 1, 0, -1, 1, 1, 1, -1, -1, 1, -1, 0, -2, 2, 0, 0, 2, -2, 0, -1, -2, 1, -2, 2, -1, 2, 1, 1, 2, -1, 2, -2, 1, -2, -1, -2, -2, 2, -2, 2, 2, -2, 2}; /*! * \brief findSimilarSizedTemplatesInit() * * \param[in] classer * \param[in] pixs instance to be matched * \return Allocated context to be used with findSimilar* */ static JBFINDCTX * findSimilarSizedTemplatesInit(JBCLASSER *classer, PIX *pixs) { JBFINDCTX *state; state = (JBFINDCTX *)LEPT_CALLOC(1, sizeof(JBFINDCTX)); state->w = pixGetWidth(pixs) - 2 * JB_ADDED_PIXELS; state->h = pixGetHeight(pixs) - 2 * JB_ADDED_PIXELS; state->classer = classer; return state; } static void findSimilarSizedTemplatesDestroy(JBFINDCTX **pstate) { JBFINDCTX *state; PROCNAME("findSimilarSizedTemplatesDestroy"); if (pstate == NULL) { L_WARNING("ptr address is null\n", procName); return; } if ((state = *pstate) == NULL) return; l_dnaDestroy(&state->dna); LEPT_FREE(state); *pstate = NULL; return; } /*! * \brief findSimilarSizedTemplatesNext() * * \param[in] state from findSimilarSizedTemplatesInit * \return next template number, or -1 when finished * * We have a dna hash table that maps template area to a list of template * numbers with that area. We wish to find similar sized templates, * so we first look for templates with the same width and height, and * then with width + 1, etc. This walk is guided by the * two_by_two_walk array, above. * * We don't want to have to collect the whole list of templates first, * because we hope to find a well-matching template quickly. So we * keep the context for this walk in an explictit state structure, * and this function acts like a generator. */ static l_int32 findSimilarSizedTemplatesNext(JBFINDCTX *state) { l_int32 desiredh, desiredw, size, templ; PIX *pixt; while(1) { /* Continue the walk over step 'i' */ if (state->i >= 25) { /* all done; didn't find a good match */ return -1; } desiredw = state->w + two_by_two_walk[2 * state->i]; desiredh = state->h + two_by_two_walk[2 * state->i + 1]; if (desiredh < 1 || desiredw < 1) { /* invalid size */ state->i++; continue; } if (!state->dna) { /* We have yet to start walking the array for the step 'i' */ state->dna = l_dnaHashGetDna(state->classer->dahash, (l_uint64)desiredh * desiredw, L_CLONE); if (!state->dna) { /* nothing there */ state->i++; continue; } state->n = 0; /* OK, we got a dna. */ } /* Continue working on this dna */ size = l_dnaGetCount(state->dna); for ( ; state->n < size; ) { templ = (l_int32)(state->dna->array[state->n++] + 0.5); pixt = pixaGetPix(state->classer->pixat, templ, L_CLONE); if (pixGetWidth(pixt) - 2 * JB_ADDED_PIXELS == desiredw && pixGetHeight(pixt) - 2 * JB_ADDED_PIXELS == desiredh) { pixDestroy(&pixt); return templ; } pixDestroy(&pixt); } /* Exhausted the dna (no match found); take another step and * try again. */ state->i++; l_dnaDestroy(&state->dna); continue; } } /*! * \brief finalPositioningForAlignment() * * \param[in] pixs input page image * \param[in] x, y location of UL corner of bb of component in pixs * \param[in] idelx, idely compensation to match centroids of component * and template * \param[in] pixt template, with JB_ADDED_PIXELS of padding * on all sides * \param[in] sumtab for summing fg pixels in an image * \param[in] pdx, pdy return delta on position for best match; each * one is in the set {-1, 0, 1} * \return 0 if OK, 1 on error * */ static l_int32 finalPositioningForAlignment(PIX *pixs, l_int32 x, l_int32 y, l_int32 idelx, l_int32 idely, PIX *pixt, l_int32 *sumtab, l_int32 *pdx, l_int32 *pdy) { l_int32 w, h, i, j, minx, miny, count, mincount; PIX *pixi; /* clipped from source pixs */ PIX *pixr; /* temporary storage */ BOX *box; PROCNAME("finalPositioningForAlignment"); if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (!pixt) return ERROR_INT("pixt not defined", procName, 1); if (!pdx || !pdy) return ERROR_INT("&dx and &dy not both defined", procName, 1); if (!sumtab) return ERROR_INT("sumtab not defined", procName, 1); *pdx = *pdy = 0; /* Use JB_ADDED_PIXELS pixels padding on each side */ pixGetDimensions(pixt, &w, &h, NULL); box = boxCreate(x - idelx - JB_ADDED_PIXELS, y - idely - JB_ADDED_PIXELS, w, h); pixi = pixClipRectangle(pixs, box, NULL); boxDestroy(&box); if (!pixi) return ERROR_INT("pixi not made", procName, 1); pixr = pixCreate(pixGetWidth(pixi), pixGetHeight(pixi), 1); mincount = 0x7fffffff; for (i = -1; i <= 1; i++) { for (j = -1; j <= 1; j++) { pixCopy(pixr, pixi); pixRasterop(pixr, j, i, w, h, PIX_SRC ^ PIX_DST, pixt, 0, 0); pixCountPixels(pixr, &count, sumtab); if (count < mincount) { minx = j; miny = i; mincount = count; } } } pixDestroy(&pixi); pixDestroy(&pixr); *pdx = minx; *pdy = miny; return 0; }