summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAkinori Hattori <hattya@gentoo.org>2021-10-09 15:48:40 +0900
committerAkinori Hattori <hattya@gentoo.org>2021-10-09 15:54:10 +0900
commit2ca0adba706af006c13a385fd0357fcb66fb5da0 (patch)
tree9a15243b7d1c7f3caa6f6ca6cfaee536870ca768
parentapp-i18n/libkkc: fix link with gold (diff)
downloadgentoo-2ca0adba.tar.gz
gentoo-2ca0adba.tar.bz2
gentoo-2ca0adba.zip
app-i18n/libkkc-data: initial import
Package-Manager: Portage-3.0.20, Repoman-3.0.3 Signed-off-by: Akinori Hattori <hattya@gentoo.org>
-rw-r--r--app-i18n/libkkc-data/Manifest1
-rw-r--r--app-i18n/libkkc-data/files/libkkc-data-python3.patch138
-rw-r--r--app-i18n/libkkc-data/libkkc-data-0.2.7.ebuild20
-rw-r--r--app-i18n/libkkc-data/metadata.xml11
4 files changed, 170 insertions, 0 deletions
diff --git a/app-i18n/libkkc-data/Manifest b/app-i18n/libkkc-data/Manifest
new file mode 100644
index 000000000000..c0fc91c8c736
--- /dev/null
+++ b/app-i18n/libkkc-data/Manifest
@@ -0,0 +1 @@
+DIST libkkc-data-0.2.7.tar.xz 22262552 BLAKE2B 2c735ee9fabf8f8f201591c9ed584cece22ddcd15da5f36b39bb422b1bce1dbcbcd66f71b5713e2dd4c5e2862b06b014c24a4a3db63c86ecee20519434da9261 SHA512 61c0cd8c0fa41ed8df49cac6709eebb245cc965d7e192b1ba945e95f2fc46aca8aa48c16e1977a12c157c55dab6b9f4c30f4905806725eca6e697b762eb7cbd7
diff --git a/app-i18n/libkkc-data/files/libkkc-data-python3.patch b/app-i18n/libkkc-data/files/libkkc-data-python3.patch
new file mode 100644
index 000000000000..46109a92dff8
--- /dev/null
+++ b/app-i18n/libkkc-data/files/libkkc-data-python3.patch
@@ -0,0 +1,138 @@
+From ba1c1bd3eb86d887fc3689c3142732658071b5f7 Mon Sep 17 00:00:00 2001
+From: Takao Fujiwara <tfujiwar@redhat.com>
+Date: Mon, 30 Jul 2018 15:26:37 +0900
+Subject: [PATCH] build: Enable python3
+
+---
+ tools/genfilter.py | 18 +++++++--------
+ tools/sortlm.py | 23 ++++++++-----------
+ 2 files changed, 19 insertions(+), 22 deletions(-)
+
+diff --git a/tools/genfilter.py b/tools/genfilter.py
+index 5ffab32..0c5f75a 100644
+--- a/tools/genfilter.py
++++ b/tools/genfilter.py
+@@ -84,24 +84,24 @@ def __init__(self, infile, outfile, record_size):
+
+ def generate(self):
+ size = os.fstat(self.infile.fileno()).st_size
+- n = size / self.record_size
++ n = size // self.record_size
+ m = int(math.ceil(-n*math.log10(ERROR_RATE) /
+ math.pow(math.log10(2), 2)))
+- m = (m/8 + 1)*8
++ m = (m//8 + 1)*8
+ inmem = mmap.mmap(self.infile.fileno(),
+ size,
+ access=mmap.ACCESS_READ)
+- outmem = bytearray(m/8)
+- for i in xrange(0, n):
++ outmem = bytearray(m//8)
++ for i in range(0, n):
+ offset = i*self.record_size
+ b0, b1 = struct.unpack("=LL", inmem[offset:offset+8])
+- for k in xrange(0, 4):
++ for k in range(0, 4):
+ h = murmur_hash3_32(b0, b1, k)
+ h = int(h * (m / float(0xFFFFFFFF)))
+- outmem[h/8] |= (1 << (h%8))
++ outmem[h//8] |= (1 << (h%8))
+ inmem.close()
+- # Convert bytearray to str, for Python 2.6 compatibility.
+- self.outfile.write(str(outmem))
++ # Convert bytearray to bytes, for Python 3 compatibility.
++ self.outfile.write(bytes(outmem))
+
+ if __name__ == '__main__':
+ import sys
+@@ -110,7 +110,7 @@ def generate(self):
+ parser = argparse.ArgumentParser(description='filter')
+ parser.add_argument('infile', type=argparse.FileType('r'),
+ help='input file')
+- parser.add_argument('outfile', type=argparse.FileType('w'),
++ parser.add_argument('outfile', type=argparse.FileType('wb'),
+ help='output file')
+ parser.add_argument('record_size', type=int,
+ help='record size')
+diff --git a/tools/sortlm.py b/tools/sortlm.py
+index a0dd8fe..40f0837 100644
+--- a/tools/sortlm.py
++++ b/tools/sortlm.py
+@@ -40,10 +40,10 @@ def __init__(self, infile, output_prefix):
+ self.__min_cost = 0.0
+
+ def read(self):
+- print "reading N-grams"
++ print("reading N-grams")
+ self.__read_tries()
+ self.__read_ngrams()
+- print "min cost = %lf" % self.__min_cost
++ print("min cost = %lf" % self.__min_cost)
+
+ def __read_tries(self):
+ while True:
+@@ -58,7 +58,7 @@ def __read_tries(self):
+ line = self.__infile.readline()
+ if line == "":
+ break
+- line = line.strip()
++ line = line.strip('\n')
+ if line == "":
+ break
+ match = self.__ngram_line_regex.match(line)
+@@ -89,7 +89,7 @@ def __read_ngrams(self):
+ line = self.__infile.readline()
+ if line == "":
+ break
+- line = line.strip()
++ line = line.strip('\n')
+ if line == "":
+ break
+ match = self.__ngram_line_regex.match(line)
+@@ -125,14 +125,11 @@ def __write_ngrams(self):
+ def quantize(cost, min_cost):
+ return max(0, min(65535, int(cost * 65535 / min_cost)))
+
+- def cmp_header(a, b):
+- return cmp(a[0], b[0])
+-
+- print "writing 1-gram file"
++ print("writing 1-gram file")
+ unigram_offsets = {}
+ unigram_file = open("%s.1gram" % self.__output_prefix, "wb")
+ offset = 0
+- for ids, value in sorted(self.__ngram_entries[0].iteritems()):
++ for ids, value in sorted(self.__ngram_entries[0].items()):
+ unigram_offsets[ids[0]] = offset
+ s = struct.pack("=HHH",
+ quantize(value[0], self.__min_cost),
+@@ -143,13 +140,13 @@ def cmp_header(a, b):
+ offset += 1
+ unigram_file.close()
+
+- print "writing 2-gram file"
++ print("writing 2-gram file")
+ bigram_offsets = {}
+ bigram_file = open("%s.2gram" % self.__output_prefix, "wb")
+ keys = self.__ngram_entries[1].keys()
+ items = [(struct.pack("=LL", ids[1], unigram_offsets[ids[0]]), ids) for ids in keys]
+ offset = 0
+- for header, ids in sorted(items, cmp=cmp_header):
++ for header, ids in sorted(items, key=lambda x: x[0]):
+ value = self.__ngram_entries[1][ids]
+ bigram_offsets[ids] = offset
+ s = struct.pack("=HH",
+@@ -160,11 +157,11 @@ def cmp_header(a, b):
+ bigram_file.close()
+
+ if len(self.__ngram_entries[2]) > 0:
+- print "writing 3-gram file"
++ print("writing 3-gram file")
+ trigram_file = open("%s.3gram" % self.__output_prefix, "wb")
+ keys = self.__ngram_entries[2].keys()
+ items = [(struct.pack("=LL", ids[2], bigram_offsets[(ids[0], ids[1])]), ids) for ids in keys]
+- for header, ids in sorted(items, cmp=cmp_header):
++ for header, ids in sorted(items, key=lambda x: x[0]):
+ value = self.__ngram_entries[2][ids]
+ s = struct.pack("=H",
+ quantize(value[0], self.__min_cost))
diff --git a/app-i18n/libkkc-data/libkkc-data-0.2.7.ebuild b/app-i18n/libkkc-data/libkkc-data-0.2.7.ebuild
new file mode 100644
index 000000000000..eff62dbe39f0
--- /dev/null
+++ b/app-i18n/libkkc-data/libkkc-data-0.2.7.ebuild
@@ -0,0 +1,20 @@
+# Copyright 1999-2021 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI="7"
+PYTHON_COMPAT=( python3_{7..10} )
+
+inherit python-any-r1
+
+DESCRIPTION="Language model data for libkkc"
+HOMEPAGE="https://github.com/ueno/libkkc"
+SRC_URI="https://github.com/ueno/${PN%-*}/releases/download/v0.3.5/${P}.tar.xz"
+
+LICENSE="GPL-3+"
+SLOT="0"
+KEYWORDS="~amd64 ~x86"
+
+RDEPEND=""
+BDEPEND="${PYTHON_DEPS}"
+
+PATCHES=( "${FILESDIR}"/${PN}-python3.patch )
diff --git a/app-i18n/libkkc-data/metadata.xml b/app-i18n/libkkc-data/metadata.xml
new file mode 100644
index 000000000000..6853c12be89d
--- /dev/null
+++ b/app-i18n/libkkc-data/metadata.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE pkgmetadata SYSTEM "https://www.gentoo.org/dtd/metadata.dtd">
+<pkgmetadata>
+ <maintainer type="project">
+ <email>cjk@gentoo.org</email>
+ <name>Cjk</name>
+ </maintainer>
+ <upstream>
+ <remote-id type="github">ueno/libkkc</remote-id>
+ </upstream>
+</pkgmetadata>