|
1 | | -FROM docker.io/ocrd/core:v2.67.2 AS base |
2 | | -# set proper locales |
3 | | -ENV LANG C.UTF-8 |
4 | | -ENV LC_ALL C.UTF-8 |
| 1 | +ARG DOCKER_BASE_IMAGE |
| 2 | +FROM $DOCKER_BASE_IMAGE |
5 | 3 | # install ocrd-tesserocr (until here commands for installing tesseract-ocr) |
6 | 4 | ARG VCS_REF |
7 | 5 | ARG BUILD_DATE |
8 | 6 | LABEL \ |
9 | 7 | maintainer="https://ocr-d.de/kontakt" \ |
10 | 8 | org.label-schema.vcs-ref=$VCS_REF \ |
11 | 9 | org.label-schema.vcs-url="https://github.com/OCR-D/ocrd_tesserocr" \ |
12 | | - org.label-schema.build-date=$BUILD_DATE |
| 10 | + org.label-schema.build-date=$BUILD_DATE \ |
| 11 | + org.opencontainers.image.vendor="DFG-Funded Initiative for Optical Character Recognition Development" \ |
| 12 | + org.opencontainers.image.title="ocrd_tesserocr" \ |
| 13 | + org.opencontainers.image.description="Tesseract OCR bindings" \ |
| 14 | + org.opencontainers.image.source="https://github.com/OCR-D/ocrd_tesserocr" \ |
| 15 | + org.opencontainers.image.documentation="https://github.com/OCR-D/ocrd_tesserocr/blob/${VCS_REF}/README.md" \ |
| 16 | + org.opencontainers.image.revision=$VCS_REF \ |
| 17 | + org.opencontainers.image.created=$BUILD_DATE \ |
| 18 | + org.opencontainers.image.base.name=ocrd/core |
13 | 19 |
|
14 | | -ENV PYTHONIOENCODING utf8 |
15 | 20 |
|
16 | 21 | # set frontend non-interactive to silence interactive tzdata config |
17 | | -ARG DEBIAN_FRONTEND=noninteractive |
| 22 | +ENV DEBIAN_FRONTEND noninteractive |
| 23 | +# set proper locales |
| 24 | +ENV PYTHONIOENCODING utf8 |
| 25 | +ENV LANG C.UTF-8 |
| 26 | +ENV LC_ALL C.UTF-8 |
18 | 27 |
|
19 | 28 | # set proper date and timezone in container |
20 | 29 | RUN echo "Europe/Berlin" > /etc/timezone |
21 | 30 | RUN ln -sf /usr/share/zoneinfo/Europe/Berlin /etc/localtime |
22 | 31 | RUN dpkg-reconfigure -f noninteractive tzdata |
23 | | - |
24 | 32 | # diagnostic output - check timezone settings |
25 | 33 | # RUN cat /etc/timezone |
26 | 34 |
|
27 | 35 | # avoid HOME/.local/share (hard to predict USER here) |
28 | 36 | # so let XDG_DATA_HOME coincide with fixed system location |
29 | 37 | # (can still be overridden by derived stages) |
30 | 38 | ENV XDG_DATA_HOME /usr/local/share |
| 39 | +# avoid the need for an extra volume for persistent resource user db |
| 40 | +# (i.e. XDG_CONFIG_HOME/ocrd/resources.yml) |
31 | 41 | ENV XDG_CONFIG_HOME /usr/local/share/ocrd-resources |
32 | 42 | ENV TESSDATA_PREFIX $XDG_DATA_HOME/tessdata |
33 | 43 |
|
34 | 44 | WORKDIR /build/ocrd_tesserocr |
35 | | -COPY setup.py . |
36 | | -COPY ocrd_tesserocr/ocrd-tool.json . |
37 | | -COPY README.md . |
38 | | -COPY requirements.txt . |
39 | | -COPY requirements_test.txt . |
40 | | -COPY .git .git |
41 | | -COPY .gitmodules . |
42 | | -COPY ocrd_tesserocr ocrd_tesserocr |
43 | | -COPY repo/tesserocr repo/tesserocr |
44 | | -COPY repo/tesseract repo/tesseract |
45 | | -COPY Makefile . |
| 45 | +COPY . . |
| 46 | +# prepackage ocrd-tool.json as ocrd-all-tool.json |
| 47 | +RUN ocrd ocrd-tool ocrd_tesserocr/ocrd-tool.json dump-tools > $(dirname $(ocrd bashlib filename))/ocrd-all-tool.json |
| 48 | +# install everything and reduce image size |
46 | 49 | RUN make deps-ubuntu \ |
47 | | - && make -j4 install-tesseract \ |
48 | | - && make -j4 install-tesseract-training \ |
49 | | - && make deps install \ |
| 50 | + && make -j4 install GIT_SUBMODULE=: \ |
| 51 | + && make -j4 install-tesseract-training GIT_SUBMODULE=: \ |
50 | 52 | && rm -rf /build/ocrd_tesserocr \ |
51 | 53 | && apt-get -y remove --auto-remove g++ libtesseract-dev make |
52 | 54 |
|
|
0 commit comments