/*====================================================================* - Copyright (C) 2001 Leptonica. All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions - are met: - 1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - 2. Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials - provided with the distribution. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *====================================================================*/ /*! * \file ptafunc2.c *

 *
 *      --------------------------------------
 *      This file has these Pta utilities:
 *         - sorting
 *         - ordered set operations
 *         - hash map operations
 *      --------------------------------------
 *
 *      Sorting
 *           PTA        *ptaSort()
 *           l_int32     ptaGetSortIndex()
 *           PTA        *ptaSortByIndex()
 *           PTAA       *ptaaSortByIndex()
 *           l_int32     ptaGetRankValue()
 *           PTA        *ptaSort2d()
 *           l_int32     ptaEqual()
 *
 *      Set operations using aset (rbtree)
 *           PTA        *ptaUnionByAset()
 *           PTA        *ptaRemoveDupsByAset()
 *           PTA        *ptaIntersectionByAset()
 *           L_ASET     *l_asetCreateFromPta()
 *
 *      Set operations using hashing (dnahash)
 *           PTA        *ptaUnionByHash()
 *           l_int32     ptaRemoveDupsByHash()
 *           PTA        *ptaIntersectionByHash();
 *           l_int32     ptaFindPtByHash()
 *           L_DNAHASH  *l_dnaHashCreateFromPta()
 *
 *
 * We have two implementations of set operations on an array of points:
 *
 *   (1) Using an underlying tree (rbtree)
 *       This uses a good 64 bit hashing function for the key,
 *       that is not expected to have hash collisions (and we do
 *       not test for them).  The tree is built up of the hash
 *       values, and if the hash is found in the tree, it is
 *       assumed that the point has already been found.
 *
 *   (2) Using an underlying hashing of the keys (dnahash)
 *       This uses a fast 64 bit hashing function for the key,
 *       which is then hashed into a bucket (a dna in a dnaHash).
 *       Because hash collisions can occur, the index into the
 *       pta for the point that gave rise to that key is stored,
 *       and the dna (bucket) is traversed, using the stored indices
 *       to determine if that point had already been seen.
 *
 *

*/ #ifdef HAVE_CONFIG_H #include #endif /* HAVE_CONFIG_H */ #include "allheaders.h" /*---------------------------------------------------------------------* * Sorting * *---------------------------------------------------------------------*/ /*! * \brief ptaSort() * * \param[in] ptas * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y * \param[in] sortorder L_SORT_INCREASING, L_SORT_DECREASING * \param[out] pnaindex [optional] index of sorted order into * original array * \return ptad sorted version of ptas, or NULL on error */ PTA * ptaSort(PTA *ptas, l_int32 sorttype, l_int32 sortorder, NUMA **pnaindex) { PTA *ptad; NUMA *naindex; PROCNAME("ptaSort"); if (pnaindex) *pnaindex = NULL; if (!ptas) return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y) return (PTA *)ERROR_PTR("invalid sort type", procName, NULL); if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) return (PTA *)ERROR_PTR("invalid sort order", procName, NULL); if (ptaGetSortIndex(ptas, sorttype, sortorder, &naindex) != 0) return (PTA *)ERROR_PTR("naindex not made", procName, NULL); ptad = ptaSortByIndex(ptas, naindex); if (pnaindex) *pnaindex = naindex; else numaDestroy(&naindex); if (!ptad) return (PTA *)ERROR_PTR("ptad not made", procName, NULL); return ptad; } /*! * \brief ptaGetSortIndex() * * \param[in] ptas * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y * \param[in] sortorder L_SORT_INCREASING, L_SORT_DECREASING * \param[out] pnaindex index of sorted order into original array * \return 0 if OK, 1 on error */ l_ok ptaGetSortIndex(PTA *ptas, l_int32 sorttype, l_int32 sortorder, NUMA **pnaindex) { l_int32 i, n; l_float32 x, y; NUMA *na, *nai; PROCNAME("ptaGetSortIndex"); if (!pnaindex) return ERROR_INT("&naindex not defined", procName, 1); *pnaindex = NULL; if (!ptas) return ERROR_INT("ptas not defined", procName, 1); if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y) return ERROR_INT("invalid sort type", procName, 1); if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) return ERROR_INT("invalid sort order", procName, 1); /* Build up numa of specific data */ n = ptaGetCount(ptas); if ((na = numaCreate(n)) == NULL) return ERROR_INT("na not made", procName, 1); for (i = 0; i < n; i++) { ptaGetPt(ptas, i, &x, &y); if (sorttype == L_SORT_BY_X) numaAddNumber(na, x); else numaAddNumber(na, y); } /* Get the sort index for data array */ nai = numaGetSortIndex(na, sortorder); numaDestroy(&na); if (!nai) return ERROR_INT("naindex not made", procName, 1); *pnaindex = nai; return 0; } /*! * \brief ptaSortByIndex() * * \param[in] ptas * \param[in] naindex na that maps from the new pta to the input pta * \return ptad sorted, or NULL on error */ PTA * ptaSortByIndex(PTA *ptas, NUMA *naindex) { l_int32 i, index, n; l_float32 x, y; PTA *ptad; PROCNAME("ptaSortByIndex"); if (!ptas) return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); if (!naindex) return (PTA *)ERROR_PTR("naindex not defined", procName, NULL); /* Build up sorted pta using sort index */ n = numaGetCount(naindex); if ((ptad = ptaCreate(n)) == NULL) return (PTA *)ERROR_PTR("ptad not made", procName, NULL); for (i = 0; i < n; i++) { numaGetIValue(naindex, i, &index); ptaGetPt(ptas, index, &x, &y); ptaAddPt(ptad, x, y); } return ptad; } /*! * \brief ptaaSortByIndex() * * \param[in] ptaas * \param[in] naindex na that maps from the new ptaa to the input ptaa * \return ptaad sorted, or NULL on error */ PTAA * ptaaSortByIndex(PTAA *ptaas, NUMA *naindex) { l_int32 i, n, index; PTA *pta; PTAA *ptaad; PROCNAME("ptaaSortByIndex"); if (!ptaas) return (PTAA *)ERROR_PTR("ptaas not defined", procName, NULL); if (!naindex) return (PTAA *)ERROR_PTR("naindex not defined", procName, NULL); n = ptaaGetCount(ptaas); if (numaGetCount(naindex) != n) return (PTAA *)ERROR_PTR("numa and ptaa sizes differ", procName, NULL); ptaad = ptaaCreate(n); for (i = 0; i < n; i++) { numaGetIValue(naindex, i, &index); pta = ptaaGetPta(ptaas, index, L_COPY); ptaaAddPta(ptaad, pta, L_INSERT); } return ptaad; } /*! * \brief ptaGetRankValue() * * \param[in] pta * \param[in] fract use 0.0 for smallest, 1.0 for largest * \param[in] ptasort [optional] version of %pta sorted by %sorttype * \param[in] sorttype L_SORT_BY_X, L_SORT_BY_Y * \param[out] pval rankval: the x or y value at %fract * \return 0 if OK, 1 on error */ l_ok ptaGetRankValue(PTA *pta, l_float32 fract, PTA *ptasort, l_int32 sorttype, l_float32 *pval) { l_int32 index, n; PTA *ptas; PROCNAME("ptaGetRankValue"); if (!pval) return ERROR_INT("&val not defined", procName, 1); *pval = 0.0; if (!pta) return ERROR_INT("pta not defined", procName, 1); if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y) return ERROR_INT("invalid sort type", procName, 1); if (fract < 0.0 || fract > 1.0) return ERROR_INT("fract not in [0.0 ... 1.0]", procName, 1); if ((n = ptaGetCount(pta)) == 0) return ERROR_INT("pta empty", procName, 1); if (ptasort) ptas = ptasort; else ptas = ptaSort(pta, sorttype, L_SORT_INCREASING, NULL); index = (l_int32)(fract * (l_float32)(n - 1) + 0.5); if (sorttype == L_SORT_BY_X) ptaGetPt(ptas, index, pval, NULL); else /* sort by y */ ptaGetPt(ptas, index, NULL, pval); if (!ptasort) ptaDestroy(&ptas); return 0; } /*! * \brief ptaSort2d() * * \param[in] ptas * \return ptad, or NULL on error * *

 * Notes:
 *      (1) Sort increasing by row-major, scanning down from the UL corner,
 *          where for each value of y, order the pts from left to right.
 *

*/ PTA * ptaSort2d(PTA *pta) { l_int32 index, i, j, n, nx, ny, start, end; l_float32 x, y, yp, val; NUMA *na1, *na2, *nas, *nax; PTA *pta1, *ptad; PROCNAME("ptaSort2d"); if (!pta) return (PTA *)ERROR_PTR("pta not defined", procName, NULL); /* Sort by row-major (y first, then x). After sort by y, * the x values at the same y are not sorted. */ pta1 = ptaSort(pta, L_SORT_BY_Y, L_SORT_INCREASING, NULL); /* Find start and ending indices with the same y value */ n = ptaGetCount(pta1); na1 = numaCreate(0); /* holds start index of sequence with same y */ na2 = numaCreate(0); /* holds end index of sequence with same y */ numaAddNumber(na1, 0); ptaGetPt(pta1, 0, &x, &yp); for (i = 1; i < n; i++) { ptaGetPt(pta1, i, &x, &y); if (y != yp) { numaAddNumber(na1, i); numaAddNumber(na2, i - 1); } yp = y; } numaAddNumber(na2, n - 1); /* Sort by increasing x each set with the same y value */ ptad = ptaCreate(n); ny = numaGetCount(na1); /* number of distinct y values */ for (i = 0, index = 0; i < ny; i++) { numaGetIValue(na1, i, &start); numaGetIValue(na2, i, &end); nx = end - start + 1; /* number of points with current y value */ if (nx == 1) { ptaGetPt(pta1, index++, &x, &y); ptaAddPt(ptad, x, y); } else { /* More than 1 point; extract and sort the x values */ nax = numaCreate(nx); for (j = 0; j < nx; j++) { ptaGetPt(pta1, index + j, &x, &y); numaAddNumber(nax, x); } nas = numaSort(NULL, nax, L_SORT_INCREASING); /* Add the points with x sorted */ for (j = 0; j < nx; j++) { numaGetFValue(nas, j, &val); ptaAddPt(ptad, val, y); } index += nx; numaDestroy(&nax); numaDestroy(&nas); } } numaDestroy(&na1); numaDestroy(&na2); ptaDestroy(&pta1); return ptad; } /*! * \brief ptaEqual() * * \param[in] pta1 * \param[in] pta2 * \param[out] psame 1 if same; 0 if different * \return 0 if OK; 1 on error * *

 * Notes:
 *      (1) Equality is defined as having the same set of points,
 *          independent of the order in which they are presented.
 *

*/ l_ok ptaEqual(PTA *pta1, PTA *pta2, l_int32 *psame) { l_int32 i, n1, n2; l_float32 x1, y1, x2, y2; PTA *ptas1, *ptas2; PROCNAME("ptaEqual"); if (!psame) return ERROR_INT("&same not defined", procName, 1); *psame = 0.0; if (!pta1 || !pta2) return ERROR_INT("pta1 and pta2 not both defined", procName, 1); n1 = ptaGetCount(pta1); n2 = ptaGetCount(pta2); if (n1 != n2) return 0; /* 2d sort each and compare */ ptas1 = ptaSort2d(pta1); ptas2 = ptaSort2d(pta2); for (i = 0; i < n1; i++) { ptaGetPt(ptas1, i, &x1, &y1); ptaGetPt(ptas2, i, &x2, &y2); if (x1 != x2 || y1 != y2) { ptaDestroy(&ptas1); ptaDestroy(&ptas2); return 0; } } *psame = 1; ptaDestroy(&ptas1); ptaDestroy(&ptas2); return 0; } /*---------------------------------------------------------------------* * Set operations using aset (rbtree) * *---------------------------------------------------------------------*/ /*! * \brief ptaUnionByAset() * * \param[in] pta1, pta2 * \return ptad with the union of the set of points, or NULL on error * *

 * Notes:
 *      (1) See sarrayRemoveDupsByAset() for the approach.
 *      (2) The key is a 64-bit hash from the (x,y) pair.
 *      (3) This is slower than ptaUnionByHash(), mostly because of the
 *          nlogn sort to build up the rbtree.  Do not use for large
 *          numbers of points (say, > 1M).
 *      (4) The *Aset() functions use the sorted l_Aset, which is just
 *          an rbtree in disguise.
 *

*/ PTA * ptaUnionByAset(PTA *pta1, PTA *pta2) { PTA *pta3, *ptad; PROCNAME("ptaUnionByAset"); if (!pta1) return (PTA *)ERROR_PTR("pta1 not defined", procName, NULL); if (!pta2) return (PTA *)ERROR_PTR("pta2 not defined", procName, NULL); /* Join */ pta3 = ptaCopy(pta1); ptaJoin(pta3, pta2, 0, -1); /* Eliminate duplicates */ ptad = ptaRemoveDupsByAset(pta3); ptaDestroy(&pta3); return ptad; } /*! * \brief ptaRemoveDupsByAset() * * \param[in] ptas assumed to be integer values * \return ptad with duplicates removed, or NULL on error * *

 * Notes:
 *      (1) This is slower than ptaRemoveDupsByHash(), mostly because
 *          of the nlogn sort to build up the rbtree.  Do not use for
 *          large numbers of points (say, > 1M).
 *

*/ PTA * ptaRemoveDupsByAset(PTA *ptas) { l_int32 i, n, x, y; PTA *ptad; l_uint64 hash; L_ASET *set; RB_TYPE key; PROCNAME("ptaRemoveDupsByAset"); if (!ptas) return (PTA *)ERROR_PTR("ptas not defined", procName, NULL); set = l_asetCreate(L_UINT_TYPE); n = ptaGetCount(ptas); ptad = ptaCreate(n); for (i = 0; i < n; i++) { ptaGetIPt(ptas, i, &x, &y); l_hashPtToUint64(x, y, &hash); key.utype = hash; if (!l_asetFind(set, key)) { ptaAddPt(ptad, x, y); l_asetInsert(set, key); } } l_asetDestroy(&set); return ptad; } /*! * \brief ptaIntersectionByAset() * * \param[in] pta1, pta2 * \return ptad intersection of the point sets, or NULL on error * *

 * Notes:
 *      (1) See sarrayIntersectionByAset() for the approach.
 *      (2) The key is a 64-bit hash from the (x,y) pair.
 *      (3) This is slower than ptaIntersectionByHash(), mostly because
 *          of the nlogn sort to build up the rbtree.  Do not use for
 *          large numbers of points (say, > 1M).
 *

*/ PTA * ptaIntersectionByAset(PTA *pta1, PTA *pta2) { l_int32 n1, n2, i, n, x, y; l_uint64 hash; L_ASET *set1, *set2; RB_TYPE key; PTA *pta_small, *pta_big, *ptad; PROCNAME("ptaIntersectionByAset"); if (!pta1) return (PTA *)ERROR_PTR("pta1 not defined", procName, NULL); if (!pta2) return (PTA *)ERROR_PTR("pta2 not defined", procName, NULL); /* Put the elements of the biggest array into a set */ n1 = ptaGetCount(pta1); n2 = ptaGetCount(pta2); pta_small = (n1 < n2) ? pta1 : pta2; /* do not destroy pta_small */ pta_big = (n1 < n2) ? pta2 : pta1; /* do not destroy pta_big */ set1 = l_asetCreateFromPta(pta_big); /* Build up the intersection of points */ ptad = ptaCreate(0); n = ptaGetCount(pta_small); set2 = l_asetCreate(L_UINT_TYPE); for (i = 0; i < n; i++) { ptaGetIPt(pta_small, i, &x, &y); l_hashPtToUint64(x, y, &hash); key.utype = hash; if (l_asetFind(set1, key) && !l_asetFind(set2, key)) { ptaAddPt(ptad, x, y); l_asetInsert(set2, key); } } l_asetDestroy(&set1); l_asetDestroy(&set2); return ptad; } /*! * \brief l_asetCreateFromPta() * * \param[in] pta * \return set using a 64-bit hash of (x,y) as the key */ L_ASET * l_asetCreateFromPta(PTA *pta) { l_int32 i, n, x, y; l_uint64 hash; L_ASET *set; RB_TYPE key; PROCNAME("l_asetCreateFromPta"); if (!pta) return (L_ASET *)ERROR_PTR("pta not defined", procName, NULL); set = l_asetCreate(L_UINT_TYPE); n = ptaGetCount(pta); for (i = 0; i < n; i++) { ptaGetIPt(pta, i, &x, &y); l_hashPtToUint64(x, y, &hash); key.utype = hash; l_asetInsert(set, key); } return set; } /*---------------------------------------------------------------------* * Set operations using hashing (rbtree) * *---------------------------------------------------------------------*/ /*! * \brief ptaUnionByHash() * * \param[in] pta1, pta2 * \return ptad with the union of the set of points, or NULL on error * *

 * Notes:
 *      (1) This is faster than ptaUnionByAset(), because the
 *          bucket lookup is O(n).  It should be used if the pts are
 *          integers (e.g., representing pixel positions).
 *

*/ PTA * ptaUnionByHash(PTA *pta1, PTA *pta2) { PTA *pta3, *ptad; PROCNAME("ptaUnionByHash"); if (!pta1) return (PTA *)ERROR_PTR("pta1 not defined", procName, NULL); if (!pta2) return (PTA *)ERROR_PTR("pta2 not defined", procName, NULL); /* Join */ pta3 = ptaCopy(pta1); ptaJoin(pta3, pta2, 0, -1); /* Eliminate duplicates */ ptaRemoveDupsByHash(pta3, &ptad, NULL); ptaDestroy(&pta3); return ptad; } /*! * \brief ptaRemoveDupsByHash() * * \param[in] ptas assumed to be integer values * \param[out] pptad unique set of pts; duplicates removed * \param[out] pdahash [optional] dnahash used for lookup * \return 0 if OK, 1 on error * *

 * Notes:
 *      (1) Generates a pta with unique values.
 *      (2) The dnahash is built up with ptad to assure uniqueness.
 *          It can be used to find if a point is in the set:
 *              ptaFindPtByHash(ptad, dahash, x, y, &index)
 *      (3) The hash of the (x,y) location is simple and fast.  It scales
 *          up with the number of buckets to insure a fairly random
 *          bucket selection for adjacent points.
 *      (4) A Dna is used rather than a Numa because we need accurate
 *          representation of 32-bit integers that are indices into ptas.
 *          Integer --> float --> integer conversion makes errors for
 *          integers larger than 10M.
 *      (5) This is faster than ptaRemoveDupsByAset(), because the
 *          bucket lookup is O(n), although there is a double-loop
 *          lookup within the dna in each bucket.
 *

*/ l_ok ptaRemoveDupsByHash(PTA *ptas, PTA **pptad, L_DNAHASH **pdahash) { l_int32 i, n, index, items, x, y; l_uint32 nsize; l_uint64 key; PTA *ptad; L_DNAHASH *dahash; PROCNAME("ptaRemoveDupsByHash"); if (pdahash) *pdahash = NULL; if (!pptad) return ERROR_INT("&ptad not defined", procName, 1); *pptad = NULL; if (!ptas) return ERROR_INT("ptas not defined", procName, 1); n = ptaGetCount(ptas); findNextLargerPrime(n / 20, &nsize); /* buckets in hash table */ dahash = l_dnaHashCreate(nsize, 8); ptad = ptaCreate(n); *pptad = ptad; for (i = 0, items = 0; i < n; i++) { ptaGetIPt(ptas, i, &x, &y); ptaFindPtByHash(ptad, dahash, x, y, &index); if (index < 0) { /* not found */ l_hashPtToUint64(x, y, &key); l_dnaHashAdd(dahash, key, (l_float64)items); ptaAddPt(ptad, x, y); items++; } } if (pdahash) *pdahash = dahash; else l_dnaHashDestroy(&dahash); return 0; } /*! * \brief ptaIntersectionByHash() * * \param[in] pta1, pta2 * \return ptad intersection of the point sets, or NULL on error * *

 * Notes:
 *      (1) This is faster than ptaIntersectionByAset(), because the
 *          bucket lookup is O(n).  It should be used if the pts are
 *          integers (e.g., representing pixel positions).
 *

*/ PTA * ptaIntersectionByHash(PTA *pta1, PTA *pta2) { l_int32 n1, n2, nsmall, i, x, y, index1, index2; l_uint32 nsize2; l_uint64 key; L_DNAHASH *dahash1, *dahash2; PTA *pta_small, *pta_big, *ptad; PROCNAME("ptaIntersectionByHash"); if (!pta1) return (PTA *)ERROR_PTR("pta1 not defined", procName, NULL); if (!pta2) return (PTA *)ERROR_PTR("pta2 not defined", procName, NULL); /* Put the elements of the biggest pta into a dnahash */ n1 = ptaGetCount(pta1); n2 = ptaGetCount(pta2); pta_small = (n1 < n2) ? pta1 : pta2; /* do not destroy pta_small */ pta_big = (n1 < n2) ? pta2 : pta1; /* do not destroy pta_big */ dahash1 = l_dnaHashCreateFromPta(pta_big); /* Build up the intersection of points. Add to ptad * if the point is in pta_big (using dahash1) but hasn't * yet been seen in the traversal of pta_small (using dahash2). */ ptad = ptaCreate(0); nsmall = ptaGetCount(pta_small); findNextLargerPrime(nsmall / 20, &nsize2); /* buckets in hash table */ dahash2 = l_dnaHashCreate(nsize2, 0); for (i = 0; i < nsmall; i++) { ptaGetIPt(pta_small, i, &x, &y); ptaFindPtByHash(pta_big, dahash1, x, y, &index1); if (index1 >= 0) { /* found */ ptaFindPtByHash(pta_small, dahash2, x, y, &index2); if (index2 == -1) { /* not found */ ptaAddPt(ptad, x, y); l_hashPtToUint64(x, y, &key); l_dnaHashAdd(dahash2, key, (l_float64)i); } } } l_dnaHashDestroy(&dahash1); l_dnaHashDestroy(&dahash2); return ptad; } /*! * \brief ptaFindPtByHash() * * \param[in] pta * \param[in] dahash built from pta * \param[in] x, y arbitrary points * \param[out] pindex index into pta if (x,y) is in pta; -1 otherwise * \return 0 if OK, 1 on error * *

 * Notes:
 *      (1) Fast lookup in dnaHash associated with a pta, to see if a
 *          random point (x,y) is already stored in the hash table.
 *      (2) We use a strong hash function to minimize the chance that
 *          two different points hash to the same key value.
 *      (3) We select the number of buckets to be about 5% of the size
 *          of the input %pta, so that when fully populated, each
 *          bucket (dna) will have about 20 entries, each being an index
 *          into %pta.  In lookup, after hashing to the key, and then
 *          again to the bucket, we traverse the bucket (dna), using the
 *          index into %pta to check if the point (x,y) has been found before.
 *

*/ l_ok ptaFindPtByHash(PTA *pta, L_DNAHASH *dahash, l_int32 x, l_int32 y, l_int32 *pindex) { l_int32 i, nvals, index, xi, yi; l_uint64 key; L_DNA *da; PROCNAME("ptaFindPtByHash"); if (!pindex) return ERROR_INT("&index not defined", procName, 1); *pindex = -1; if (!pta) return ERROR_INT("pta not defined", procName, 1); if (!dahash) return ERROR_INT("dahash not defined", procName, 1); l_hashPtToUint64(x, y, &key); da = l_dnaHashGetDna(dahash, key, L_NOCOPY); if (!da) return 0; /* Run through the da, looking for this point */ nvals = l_dnaGetCount(da); for (i = 0; i < nvals; i++) { l_dnaGetIValue(da, i, &index); ptaGetIPt(pta, index, &xi, &yi); if (x == xi && y == yi) { *pindex = index; return 0; } } return 0; } /*! * \brief l_dnaHashCreateFromPta() * * \param[in] pta * \return dahash, or NULL on error */ L_DNAHASH * l_dnaHashCreateFromPta(PTA *pta) { l_int32 i, n, x, y; l_uint32 nsize; l_uint64 key; L_DNAHASH *dahash; PROCNAME("l_dnaHashCreateFromPta"); if (!pta) return (L_DNAHASH *)ERROR_PTR("pta not defined", procName, NULL); /* Build up dnaHash of indices, hashed by a key that is * a large linear combination of x and y values designed to * randomize the key. Having about 20 pts in each bucket is * roughly optimal for speed for large sets. */ n = ptaGetCount(pta); findNextLargerPrime(n / 20, &nsize); /* buckets in hash table */ /* lept_stderr("Prime used: %d\n", nsize); */ /* Add each point, using the hash as key and the index into * %ptas as the value. Storing the index enables operations * that check for duplicates. */ dahash = l_dnaHashCreate(nsize, 8); for (i = 0; i < n; i++) { ptaGetIPt(pta, i, &x, &y); l_hashPtToUint64(x, y, &key); l_dnaHashAdd(dahash, key, (l_float64)i); } return dahash; }