summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBernard Cafarelli <voyageur@gentoo.org>2017-01-30 17:43:44 +0100
committerBernard Cafarelli <voyageur@gentoo.org>2017-01-30 17:56:20 +0100
commit7032d231288986d9cfa191830e4f79789c589d0b (patch)
treefab63ad82d505e05af50e4674c13c577c13652d3 /app-text/tesseract
parentnet-mail/offlineimap: version bump to 7.0.13 (diff)
downloadgentoo-7032d231288986d9cfa191830e4f79789c589d0b.tar.gz
gentoo-7032d231288986d9cfa191830e4f79789c589d0b.tar.bz2
gentoo-7032d231288986d9cfa191830e4f79789c589d0b.zip
app-text/tesseract: add live ebuild
AVX/SSE4.1 are detected at runtime, so no need for CPU USE flags Package-Manager: Portage-2.3.3, Repoman-2.3.1
Diffstat (limited to 'app-text/tesseract')
-rw-r--r--app-text/tesseract/files/tesseract-4.00.00-use-system-piccolo2d.patch27
-rw-r--r--app-text/tesseract/tesseract-4.00.00_alpha.ebuild1
-rw-r--r--app-text/tesseract/tesseract-9999.ebuild142
3 files changed, 169 insertions, 1 deletions
diff --git a/app-text/tesseract/files/tesseract-4.00.00-use-system-piccolo2d.patch b/app-text/tesseract/files/tesseract-4.00.00-use-system-piccolo2d.patch
new file mode 100644
index 00000000000..ad1f0f3df20
--- /dev/null
+++ b/app-text/tesseract/files/tesseract-4.00.00-use-system-piccolo2d.patch
@@ -0,0 +1,27 @@
+diff -Naur tesseract.orig/java/Makefile.am tesseract/java/Makefile.am
+--- tesseract.orig/java/Makefile.am 2017-01-30 17:20:23.135458366 +0100
++++ tesseract/java/Makefile.am 2017-01-30 17:21:29.834462557 +0100
+@@ -36,10 +36,9 @@
+ com/google/scrollview/ScrollView.class
+
+ SCROLLVIEW_LIBS = \
+- piccolo2d-core-3.0.jar \
+- piccolo2d-extras-3.0.jar
++ /usr/share/piccolo2d/lib/piccolo2d.jar
+
+-CLASSPATH = piccolo2d-core-3.0.jar:piccolo2d-extras-3.0.jar
++CLASSPATH = /usr/share/piccolo2d/lib/piccolo2d.jar
+
+ ScrollView.jar : $(SCROLLVIEW_CLASSES)
+ $(JAR) cfm $@ $(srcdir)/Manifest.txt com/google/scrollview/*.class \
+diff -Naur tesseract.orig/viewer/Makefile.am tesseract/viewer/Makefile.am
+--- tesseract.orig/viewer/Makefile.am 2017-01-30 17:20:23.176458368 +0100
++++ tesseract/viewer/Makefile.am 2017-01-30 17:22:39.281467163 +0100
+@@ -17,3 +17,7 @@
+
+ libtesseract_viewer_la_SOURCES = \
+ scrollview.cpp svmnode.cpp svutil.cpp svpaint.cpp
++
++if !GRAPHICS_DISABLED
++AM_CPPFLAGS += -DSCROLLVIEW_PATH=/usr/share/tessdata/
++endif
diff --git a/app-text/tesseract/tesseract-4.00.00_alpha.ebuild b/app-text/tesseract/tesseract-4.00.00_alpha.ebuild
index d5d4de1dcc1..029aee60eef 100644
--- a/app-text/tesseract/tesseract-4.00.00_alpha.ebuild
+++ b/app-text/tesseract/tesseract-4.00.00_alpha.ebuild
@@ -21,7 +21,6 @@ SRC_URI="https://github.com/${MY_PN}/${PN}/archive/${PV/_}.tar.gz -> ${P}.tar.gz
LICENSE="Apache-2.0"
SLOT="0"
KEYWORDS="~alpha ~amd64 ~arm ~mips ~ppc ~ppc64 ~sparc ~x86"
-# Next release will include SSE4.1/AVX
IUSE="doc examples jpeg math opencl openmp osd png scrollview static-libs tiff training webp"
# List of supported Gentoo linguas and their upstream mapping
diff --git a/app-text/tesseract/tesseract-9999.ebuild b/app-text/tesseract/tesseract-9999.ebuild
new file mode 100644
index 00000000000..883c5c3bdb0
--- /dev/null
+++ b/app-text/tesseract/tesseract-9999.ebuild
@@ -0,0 +1,142 @@
+# Copyright 1999-2017 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+# $Id$
+
+EAPI=6
+
+MY_PN="tesseract-ocr"
+LANGPACKV="4.00"
+URI_PREFIX="https://github.com/${MY_PN}/tessdata/raw/${LANGPACKV}/"
+JAVA_PKG_OPT_USE="scrollview"
+
+inherit autotools git-r3 java-pkg-opt-2 toolchain-funcs
+
+DESCRIPTION="An OCR Engine, orginally developed at HP, now open source."
+HOMEPAGE="https://github.com/tesseract-ocr"
+SRC_URI="${URI_PREFIX}eng.traineddata -> eng.traineddata-${LANGPACKV}
+ math? ( ${URI_PREFIX}equ.traineddata -> equ.traineddata-${LANGPACKV} )
+ osd? ( ${URI_PREFIX}osd.traineddata -> osd.traineddata-${LANGPACKV} )"
+EGIT_REPO_URI="https://github.com/${MY_PN}/${PN}.git"
+
+LICENSE="Apache-2.0"
+SLOT="0"
+KEYWORDS=""
+IUSE="doc examples jpeg math opencl openmp osd png scrollview static-libs tiff training webp"
+
+# List of supported Gentoo linguas and their upstream mapping
+# https://github.com/tesseract-ocr/tesseract/wiki/Data-Files
+# "old" variants were regrouped in the matching modern locale
+LANGUAGES="af:afr am:amh ar:ara as:asm az:aze,aze_cyrl be:bel bn:ben bo:bod bs:bos bg:bul ca:cat cs:ces zh:chi_sim,chi_tra cy:cym da:dan de:deu,frk dz:dzo el:ell,grc en:enm eo:epo et:est eu:eus fa:fas fi:fin fr:fra,frm ga:gle gl:glg gu:guj he:heb hi:hin hr:hrv hu:hun id:ind is:isl it:ita,ita_old ja:jpn kn:kan ka:kat,kat_old kk:kaz km:khm ky:kir ko:kor ku:kur lo:lao la:lat lv:lav lt:lit ml:mal mr:mar mk:mkd ms:msa my:mya ne:nep nl:nld no:nor or:ori pa:pan pl:pol pt:por ro:ron ru:rus sa:san si:sin sk:slk sl:slv es:spa,spa_old sq:sqi sr:srp,srp_latn sw:swa sv:swe syc:syr ta:tam te:tel tg:tgk tl:tgl th:tha tr:tur ug:uig uk:ukr uz:uzb,uzb_cyrl vi:vie"
+# Missing matches:
+# ceb Cebuano
+# chr Cherokee
+# hat Haitian; Haitian Creole
+# iku Inuktitut
+# jav Javanese
+# mlt Maltese
+# pus Pushto; Pashto
+# tir Tigrinya
+# urd Urdu
+# yid Yiddish
+# l10n_en provides the additional data:
+# enm English, Middle (1100-1500)
+
+for lang in ${LANGUAGES}; do
+ gentoo_lang=${lang%:*}
+ tess_langs=${lang#*:}
+ for tess_lang in ${tess_langs//,/ }; do
+ SRC_URI+=" l10n_${gentoo_lang}? ( ${URI_PREFIX}${tess_lang}.traineddata -> ${tess_lang}.traineddata-${LANGPACKV} )"
+ done
+ IUSE+=" l10n_${gentoo_lang}"
+done
+
+# With opencl USE=tiff is necessary in leptonica
+CDEPEND=">=media-libs/leptonica-1.74:=[zlib,tiff?,jpeg?,png?,webp?]
+ opencl? (
+ virtual/opencl
+ media-libs/tiff:0=
+ media-libs/leptonica:=[tiff]
+ )
+ scrollview? (
+ >=dev-java/piccolo2d-3.0:0
+ )
+ training? (
+ dev-libs/icu:=
+ x11-libs/pango:=
+ x11-libs/cairo:=
+ )"
+
+DEPEND="${CDEPEND}
+ doc? ( app-doc/doxygen )
+ scrollview? ( >=virtual/jdk-1.7 )"
+
+RDEPEND="${CDEPEND}
+ scrollview? ( >=virtual/jre-1.7 )"
+
+DOCS=( AUTHORS ChangeLog NEWS README.md )
+
+PATCHES=(
+ "${FILESDIR}/${PN}-4.00.00-use-system-piccolo2d.patch"
+)
+
+pkg_pretend() {
+ [[ ${MERGE_TYPE} != binary ]] && use openmp && tc-check-openmp
+}
+
+pkg_setup() {
+ [[ ${MERGE_TYPE} != binary ]] && use openmp && tc-check-openmp
+}
+
+src_unpack() {
+ git-r3_src_unpack
+ for file in ${A}; do
+ if [[ "${file}" == *traineddata* ]]; then
+ cp "${DISTDIR}/${file}" "${S}/tessdata/${file%-*}" || die
+ fi
+ done
+}
+
+src_prepare() {
+ default
+ eautoreconf
+
+ java-pkg-opt-2_src_prepare
+}
+
+src_configure() {
+ local myeconfargs=(
+ --enable-shared
+ $(use_enable opencl)
+ $(use_enable openmp)
+ $(use_enable scrollview graphics)
+ $(use_enable static-libs static)
+ )
+
+ econf "${myeconfargs[@]}"
+}
+
+src_compile() {
+ default
+ use doc && emake doc
+ use scrollview && emake ScrollView.jar JAVAC="javac $(java-pkg_javac-args)"
+ use training && emake training
+}
+
+src_install() {
+ use doc && HTML_DOCS=( doc/html/. )
+ default
+ prune_libtool_files
+
+ if use training; then
+ emake DESTDIR="${D}" training-install
+ fi
+
+ if use examples; then
+ insinto /usr/share/doc/${PF}/examples
+ doins testing/eurotext.tif testing/phototest.tif
+ fi
+
+ insinto /usr/share/tessdata
+ doins tessdata/*traineddata* # language files
+ use scrollview && doins java/ScrollView.jar # scrollview
+}