/*====================================================================* - Copyright (C) 2001 Leptonica. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *====================================================================*/ /*! * \file baseline.c *
 *
 *      Locate text baselines in an image
 *           NUMA     *pixFindBaselines()
 *
 *      Projective transform to remove local skew
 *           PIX      *pixDeskewLocal()
 *
 *      Determine local skew
 *           l_int32   pixGetLocalSkewTransform()
 *           NUMA     *pixGetLocalSkewAngles()
 *
 *  We have two apparently different functions here:
 *    ~ finding baselines
 *    ~ finding a projective transform to remove keystone warping
 *  The function pixGetLocalSkewAngles() returns an array of angles,
 *  one for each raster line, and the baselines of the text lines
 *  should intersect the left edge of the image with that angle.
 * 
*/ #ifdef HAVE_CONFIG_H #include #endif /* HAVE_CONFIG_H */ #include #include "allheaders.h" /* Min to travel after finding max before abandoning peak */ static const l_int32 MinDistInPeak = 35; /* Thresholds for peaks and zeros, relative to the max peak */ static const l_int32 PeakThresholdRatio = 20; static const l_int32 ZeroThresholdRatio = 100; /* Default values for determining local skew */ static const l_int32 DefaultSlices = 10; static const l_int32 DefaultSweepReduction = 2; static const l_int32 DefaultBsReduction = 1; static const l_float32 DefaultSweepRange = 5.; /* degrees */ static const l_float32 DefaultSweepDelta = 1.; /* degrees */ static const l_float32 DefaultMinbsDelta = 0.01f; /* degrees */ /* Overlap slice fraction added to top and bottom of each slice */ static const l_float32 OverlapFraction = 0.5; /* Minimum allowed confidence (ratio) for accepting a value */ static const l_float32 MinAllowedConfidence = 3.0; /*---------------------------------------------------------------------* * Locate text baselines in an image * *---------------------------------------------------------------------*/ /*! * \brief pixFindBaselines() * * \param[in] pixs 1 bpp, 300 ppi * \param[out] ppta [optional] pairs of pts corresponding to * approx. ends of each text line * \param[in] pixadb for debug output; use NULL to skip * \return na of baseline y values, or NULL on error * *
 * Notes:
 *      (1) Input binary image must have text lines already aligned
 *          horizontally.  This can be done by either rotating the
 *          image with pixDeskew(), or, if a projective transform
 *          is required, by doing pixDeskewLocal() first.
 *      (2) Input null for &pta if you don't want this returned.
 *          The pta will come in pairs of points (left and right end
 *          of each baseline).
 *      (3) Caution: this will not work properly on text with multiple
 *          columns, where the lines are not aligned between columns.
 *          If there are multiple columns, they should be extracted
 *          separately before finding the baselines.
 *      (4) This function constructs different types of output
 *          for baselines; namely, a set of raster line values and
 *          a set of end points of each baseline.
 *      (5) This function was designed to handle short and long text lines
 *          without using dangerous thresholds on the peak heights.  It does
 *          this by combining the differential signal with a morphological
 *          analysis of the locations of the text lines.  One can also
 *          combine this data to normalize the peak heights, by weighting
 *          the differential signal in the region of each baseline
 *          by the inverse of the width of the text line found there.
 * 
*/ NUMA * pixFindBaselines(PIX *pixs, PTA **ppta, PIXA *pixadb) { l_int32 h, i, j, nbox, val1, val2, ndiff, bx, by, bw, bh; l_int32 imaxloc, peakthresh, zerothresh, inpeak; l_int32 mintosearch, max, maxloc, nloc, locval; l_int32 *array; l_float32 maxval; BOXA *boxa1, *boxa2, *boxa3; GPLOT *gplot; NUMA *nasum, *nadiff, *naloc, *naval; PIX *pix1, *pix2; PTA *pta; PROCNAME("pixFindBaselines"); if (ppta) *ppta = NULL; if (!pixs || pixGetDepth(pixs) != 1) return (NUMA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); /* Close up the text characters, removing noise */ pix1 = pixMorphSequence(pixs, "c25.1 + e15.1", 0); /* Estimate the resolution */ if (pixadb) pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT); /* Save the difference of adjacent row sums. * The high positive-going peaks are the baselines */ if ((nasum = pixCountPixelsByRow(pix1, NULL)) == NULL) { pixDestroy(&pix1); return (NUMA *)ERROR_PTR("nasum not made", procName, NULL); } h = pixGetHeight(pixs); nadiff = numaCreate(h); numaGetIValue(nasum, 0, &val2); for (i = 0; i < h - 1; i++) { val1 = val2; numaGetIValue(nasum, i + 1, &val2); numaAddNumber(nadiff, val1 - val2); } numaDestroy(&nasum); if (pixadb) { /* show the difference signal */ lept_mkdir("lept/baseline"); gplotSimple1(nadiff, GPLOT_PNG, "/tmp/lept/baseline/diff", "Diff Sig"); pix2 = pixRead("/tmp/lept/baseline/diff.png"); pixaAddPix(pixadb, pix2, L_INSERT); } /* Use the zeroes of the profile to locate each baseline. */ array = numaGetIArray(nadiff); ndiff = numaGetCount(nadiff); numaGetMax(nadiff, &maxval, &imaxloc); numaDestroy(&nadiff); /* Use this to begin locating a new peak: */ peakthresh = (l_int32)maxval / PeakThresholdRatio; /* Use this to begin a region between peaks: */ zerothresh = (l_int32)maxval / ZeroThresholdRatio; naloc = numaCreate(0); naval = numaCreate(0); inpeak = FALSE; for (i = 0; i < ndiff; i++) { if (inpeak == FALSE) { if (array[i] > peakthresh) { /* transition to in-peak */ inpeak = TRUE; mintosearch = i + MinDistInPeak; /* accept no zeros * between i and mintosearch */ max = array[i]; maxloc = i; } } else { /* inpeak == TRUE; look for max */ if (array[i] > max) { max = array[i]; maxloc = i; mintosearch = i + MinDistInPeak; } else if (i > mintosearch && array[i] <= zerothresh) { /* leave */ inpeak = FALSE; numaAddNumber(naval, max); numaAddNumber(naloc, maxloc); } } } LEPT_FREE(array); /* If array[ndiff-1] is max, eg. no descenders, baseline at bottom */ if (inpeak) { numaAddNumber(naval, max); numaAddNumber(naloc, maxloc); } if (pixadb) { /* show the raster locations for the peaks */ gplot = gplotCreate("/tmp/lept/baseline/loc", GPLOT_PNG, "Peak locs", "rasterline", "height"); gplotAddPlot(gplot, naloc, naval, GPLOT_POINTS, "locs"); gplotMakeOutput(gplot); gplotDestroy(&gplot); pix2 = pixRead("/tmp/lept/baseline/loc.png"); pixaAddPix(pixadb, pix2, L_INSERT); } numaDestroy(&naval); /* Generate an approximate profile of text line width. * First, filter the boxes of text, where there may be * more than one box for a given textline. */ pix2 = pixMorphSequence(pix1, "r11 + c20.1 + o30.1 +c1.3", 0); if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); boxa1 = pixConnComp(pix2, NULL, 4); pixDestroy(&pix1); pixDestroy(&pix2); if (boxaGetCount(boxa1) == 0) { numaDestroy(&naloc); boxaDestroy(&boxa1); L_INFO("no compnents after filtering\n", procName); return NULL; } boxa2 = boxaTransform(boxa1, 0, 0, 4., 4.); boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL); boxaDestroy(&boxa1); boxaDestroy(&boxa2); /* Optionally, find the baseline segments */ pta = NULL; if (ppta) { pta = ptaCreate(0); *ppta = pta; } if (pta) { nloc = numaGetCount(naloc); nbox = boxaGetCount(boxa3); for (i = 0; i < nbox; i++) { boxaGetBoxGeometry(boxa3, i, &bx, &by, &bw, &bh); for (j = 0; j < nloc; j++) { numaGetIValue(naloc, j, &locval); if (L_ABS(locval - (by + bh)) > 25) continue; ptaAddPt(pta, bx, locval); ptaAddPt(pta, bx + bw, locval); break; } } } boxaDestroy(&boxa3); if (pixadb && pta) { /* display baselines */ l_int32 npts, x1, y1, x2, y2; pix1 = pixConvertTo32(pixs); npts = ptaGetCount(pta); for (i = 0; i < npts; i += 2) { ptaGetIPt(pta, i, &x1, &y1); ptaGetIPt(pta, i + 1, &x2, &y2); pixRenderLineArb(pix1, x1, y1, x2, y2, 2, 255, 0, 0); } pixWriteDebug("/tmp/lept/baseline/baselines.png", pix1, IFF_PNG); pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT); pixDestroy(&pix1); } return naloc; } /*---------------------------------------------------------------------* * Projective transform to remove local skew * *---------------------------------------------------------------------*/ /*! * \brief pixDeskewLocal() * * \param[in] pixs 1 bpp * \param[in] nslices the number of horizontal overlapping slices; * must be larger than 1 and not exceed 20; * use 0 for default * \param[in] redsweep sweep reduction factor: 1, 2, 4 or 8; * use 0 for default value * \param[in] redsearch search reduction factor: 1, 2, 4 or 8, and * not larger than redsweep; use 0 for default value * \param[in] sweeprange half the full range, assumed about 0; in degrees; * use 0.0 for default value * \param[in] sweepdelta angle increment of sweep; in degrees; * use 0.0 for default value * \param[in] minbsdelta min binary search increment angle; in degrees; * use 0.0 for default value * \return pixd, or NULL on error * *
 * Notes:
 *      (1) This function allows deskew of a page whose skew changes
 *          approximately linearly with vertical position.  It uses
 *          a projective transform that in effect does a differential
 *          shear about the LHS of the page, and makes all text lines
 *          horizontal.
 *      (2) The origin of the keystoning can be either a cheap document
 *          feeder that rotates the page as it is passed through, or a
 *          camera image taken from either the left or right side
 *          of the vertical.
 *      (3) The image transformation is a projective warping,
 *          not a rotation.  Apart from this function, the text lines
 *          must be properly aligned vertically with respect to each
 *          other.  This can be done by pre-processing the page; e.g.,
 *          by rotating or horizontally shearing it.
 *          Typically, this can be achieved by vertically aligning
 *          the page edge.
 * 
*/ PIX * pixDeskewLocal(PIX *pixs, l_int32 nslices, l_int32 redsweep, l_int32 redsearch, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta) { l_int32 ret; PIX *pixd; PTA *ptas, *ptad; PROCNAME("pixDeskewLocal"); if (!pixs || pixGetDepth(pixs) != 1) return (PIX *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); /* Skew array gives skew angle (deg) as fctn of raster line * where it intersects the LHS of the image */ ret = pixGetLocalSkewTransform(pixs, nslices, redsweep, redsearch, sweeprange, sweepdelta, minbsdelta, &ptas, &ptad); if (ret != 0) return (PIX *)ERROR_PTR("transform pts not found", procName, NULL); /* Use a projective transform */ pixd = pixProjectiveSampledPta(pixs, ptad, ptas, L_BRING_IN_WHITE); ptaDestroy(&ptas); ptaDestroy(&ptad); return pixd; } /*---------------------------------------------------------------------* * Determine the local skew * *---------------------------------------------------------------------*/ /*! * \brief pixGetLocalSkewTransform() * * \param[in] pixs * \param[in] nslices the number of horizontal overlapping slices; * must be larger than 1 and not exceed 20; * use 0 for default * \param[in] redsweep sweep reduction factor: 1, 2, 4 or 8; * use 0 for default value * \param[in] redsearch search reduction factor: 1, 2, 4 or 8, and not * larger than redsweep; use 0 for default value * \param[in] sweeprange half the full range, assumed about 0; * in degrees; use 0.0 for default value * \param[in] sweepdelta angle increment of sweep; in degrees; * use 0.0 for default value * \param[in] minbsdelta min binary search increment angle; in degrees; * use 0.0 for default value * \param[out] pptas 4 points in the source * \param[out] pptad the corresponding 4 pts in the dest * \return 0 if OK, 1 on error * *
 * Notes:
 *      (1) This generates two pairs of points in the src, each pair
 *          corresponding to a pair of points that would lie along
 *          the same raster line in a transformed (dewarped) image.
 *      (2) The sets of 4 src and 4 dest points returned by this function
 *          can then be used, in a projective or bilinear transform,
 *          to remove keystoning in the src.
 * 
*/ l_ok pixGetLocalSkewTransform(PIX *pixs, l_int32 nslices, l_int32 redsweep, l_int32 redsearch, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta, PTA **pptas, PTA **pptad) { l_int32 w, h, i; l_float32 deg2rad, angr, angd, dely; NUMA *naskew; PTA *ptas, *ptad; PROCNAME("pixGetLocalSkewTransform"); if (!pptas || !pptad) return ERROR_INT("&ptas and &ptad not defined", procName, 1); *pptas = *pptad = NULL; if (!pixs || pixGetDepth(pixs) != 1) return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); if (nslices < 2 || nslices > 20) nslices = DefaultSlices; if (redsweep < 1 || redsweep > 8) redsweep = DefaultSweepReduction; if (redsearch < 1 || redsearch > redsweep) redsearch = DefaultBsReduction; if (sweeprange == 0.0) sweeprange = DefaultSweepRange; if (sweepdelta == 0.0) sweepdelta = DefaultSweepDelta; if (minbsdelta == 0.0) minbsdelta = DefaultMinbsDelta; naskew = pixGetLocalSkewAngles(pixs, nslices, redsweep, redsearch, sweeprange, sweepdelta, minbsdelta, NULL, NULL, 0); if (!naskew) return ERROR_INT("naskew not made", procName, 1); deg2rad = 3.14159265f / 180.f; w = pixGetWidth(pixs); h = pixGetHeight(pixs); ptas = ptaCreate(4); ptad = ptaCreate(4); *pptas = ptas; *pptad = ptad; /* Find i for skew line that intersects LHS at i and RHS at h / 20 */ for (i = 0; i < h; i++) { numaGetFValue(naskew, i, &angd); angr = angd * deg2rad; dely = w * tan(angr); if (i - dely > 0.05 * h) break; } ptaAddPt(ptas, 0, i); ptaAddPt(ptas, w - 1, i - dely); ptaAddPt(ptad, 0, i); ptaAddPt(ptad, w - 1, i); /* Find i for skew line that intersects LHS at i and RHS at 19h / 20 */ for (i = h - 1; i > 0; i--) { numaGetFValue(naskew, i, &angd); angr = angd * deg2rad; dely = w * tan(angr); if (i - dely < 0.95 * h) break; } ptaAddPt(ptas, 0, i); ptaAddPt(ptas, w - 1, i - dely); ptaAddPt(ptad, 0, i); ptaAddPt(ptad, w - 1, i); numaDestroy(&naskew); return 0; } /*! * \brief pixGetLocalSkewAngles() * * \param[in] pixs 1 bpp * \param[in] nslices the number of horizontal overlapping slices; * must be larger than 1 and not exceed 20; * use 0 for default * \param[in] redsweep sweep reduction factor: 1, 2, 4 or 8; * use 0 for default value * \param[in] redsearch search reduction factor: 1, 2, 4 or 8, and not * larger than redsweep; use 0 for default value * \param[in] sweeprange half the full range, assumed about 0; * in degrees; use 0.0 for default value * \param[in] sweepdelta angle increment of sweep; in degrees; * use 0.0 for default value * \param[in] minbsdelta min binary search increment angle; in degrees; * use 0.0 for default value * \param[out] pa [optional] slope of skew as fctn of y * \param[out] pb [optional] intercept at y = 0 of skew, 8 as a function of y * \param[in] debug 1 for generating plot of skew angle vs. y; * 0 otherwise * \return naskew, or NULL on error * *
 * Notes:
 *      (1) The local skew is measured in a set of overlapping strips.
 *          We then do a least square linear fit parameters to get
 *          the slope and intercept parameters a and b in
 *              skew-angle = a * y + b  (degrees)
 *          for the local skew as a function of raster line y.
 *          This is then used to make naskew, which can be interpreted
 *          as the computed skew angle (in degrees) at the left edge
 *          of each raster line.
 *      (2) naskew can then be used to find the baselines of text, because
 *          each text line has a baseline that should intersect
 *          the left edge of the image with the angle given by this
 *          array, evaluated at the raster line of intersection.
 * 
*/ NUMA * pixGetLocalSkewAngles(PIX *pixs, l_int32 nslices, l_int32 redsweep, l_int32 redsearch, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta, l_float32 *pa, l_float32 *pb, l_int32 debug) { l_int32 w, h, hs, i, ystart, yend, ovlap, npts; l_float32 angle, conf, ycenter, a, b; BOX *box; GPLOT *gplot; NUMA *naskew, *nax, *nay; PIX *pix; PTA *pta; PROCNAME("pixGetLocalSkewAngles"); if (!pixs || pixGetDepth(pixs) != 1) return (NUMA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); if (nslices < 2 || nslices > 20) nslices = DefaultSlices; if (redsweep < 1 || redsweep > 8) redsweep = DefaultSweepReduction; if (redsearch < 1 || redsearch > redsweep) redsearch = DefaultBsReduction; if (sweeprange == 0.0) sweeprange = DefaultSweepRange; if (sweepdelta == 0.0) sweepdelta = DefaultSweepDelta; if (minbsdelta == 0.0) minbsdelta = DefaultMinbsDelta; pixGetDimensions(pixs, &w, &h, NULL); hs = h / nslices; ovlap = (l_int32)(OverlapFraction * hs); pta = ptaCreate(nslices); for (i = 0; i < nslices; i++) { ystart = L_MAX(0, hs * i - ovlap); yend = L_MIN(h - 1, hs * (i + 1) + ovlap); ycenter = (l_float32)(ystart + yend) / 2; box = boxCreate(0, ystart, w, yend - ystart + 1); pix = pixClipRectangle(pixs, box, NULL); pixFindSkewSweepAndSearch(pix, &angle, &conf, redsweep, redsearch, sweeprange, sweepdelta, minbsdelta); if (conf > MinAllowedConfidence) ptaAddPt(pta, ycenter, angle); pixDestroy(&pix); boxDestroy(&box); } /* Do linear least squares fit */ if ((npts = ptaGetCount(pta)) < 2) { ptaDestroy(&pta); return (NUMA *)ERROR_PTR("can't fit skew", procName, NULL); } ptaGetLinearLSF(pta, &a, &b, NULL); if (pa) *pa = a; if (pb) *pb = b; /* Make skew angle array as function of raster line */ naskew = numaCreate(h); for (i = 0; i < h; i++) { angle = a * i + b; numaAddNumber(naskew, angle); } if (debug) { lept_mkdir("lept/baseline"); ptaGetArrays(pta, &nax, &nay); gplot = gplotCreate("/tmp/lept/baseline/skew", GPLOT_PNG, "skew as fctn of y", "y (in raster lines from top)", "angle (in degrees)"); gplotAddPlot(gplot, NULL, naskew, GPLOT_POINTS, "linear lsf"); gplotAddPlot(gplot, nax, nay, GPLOT_POINTS, "actual data pts"); gplotMakeOutput(gplot); gplotDestroy(&gplot); numaDestroy(&nax); numaDestroy(&nay); } ptaDestroy(&pta); return naskew; }