Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Explicitly encode output as UTF-8 #237

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions ocropus-econf
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ import numpy as np
import ocrolib
from ocrolib import edist

utf8writer = codecs.getwriter('utf8')
sys.stdout = utf8writer(sys.stdout)
sys.stderr = utf8writer(sys.stderr)

# disable rank warnings from polyfit
warnings.simplefilter('ignore',np.RankWarning)

Expand Down
5 changes: 5 additions & 0 deletions ocropus-errs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,15 @@ import sys
import os
import os.path
import multiprocessing
import codecs

import ocrolib
from ocrolib import edist

utf8writer = codecs.getwriter('utf8')
sys.stdout = utf8writer(sys.stdout)
sys.stderr = utf8writer(sys.stderr)

parser = argparse.ArgumentParser(description = """
Compute the edit distances between ground truth and recognizer output.
Run with the ground truth files as arguments, and it will find the
Expand Down
9 changes: 7 additions & 2 deletions ocropus-gpageseg
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import os
import os.path
import sys
import traceback
import codecs
from multiprocessing import Pool

import numpy as np
Expand All @@ -31,6 +32,10 @@ from ocrolib import psegutils,morph,sl
from ocrolib.exceptions import OcropusException
from ocrolib.toplevel import *

utf8writer = codecs.getwriter('utf8')
sys.stdout = utf8writer(sys.stdout)
sys.stderr = utf8writer(sys.stderr)

parser = argparse.ArgumentParser(add_help=False)

# error checking
Expand Down Expand Up @@ -131,11 +136,11 @@ def check_page(image):


def print_info(*objs):
print("INFO: ", *objs, file=sys.stdout)
print("INFO: ", " ".join(objs))


def print_error(*objs):
print("ERROR: ", *objs, file=sys.stderr)
print("ERROR: ", " ".join(objs), file=sys.stderr)


if len(args.files)<1:
Expand Down
9 changes: 7 additions & 2 deletions ocropus-nlbin
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import argparse
import os
import multiprocessing
import sys
import codecs

import numpy as np
import matplotlib.pyplot as plt
Expand All @@ -14,6 +15,10 @@ from scipy import stats

import ocrolib

utf8writer = codecs.getwriter('utf8')
sys.stdout = utf8writer(sys.stdout)
sys.stderr = utf8writer(sys.stderr)


parser = argparse.ArgumentParser("""
Image binarization using non-linear processing.
Expand Down Expand Up @@ -51,11 +56,11 @@ if len(args.files)<1:


def print_info(*objs):
print("INFO: ", *objs, file=sys.stdout)
print("INFO: ", " ".join(objs))


def print_error(*objs):
print("ERROR: ", *objs, file=sys.stderr)
print("ERROR: ", " ".join(objs), file=sys.stderr)


def check_page(image):
Expand Down
8 changes: 6 additions & 2 deletions ocropus-rpred
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ from ocrolib import lstm
from ocrolib import edist
from ocrolib.exceptions import FileNotFound, OcropusException

utf8writer = codecs.getwriter('utf8')
sys.stdout = utf8writer(sys.stdout)
sys.stderr = utf8writer(sys.stderr)

parser = argparse.ArgumentParser("apply an RNN recognizer")

# error checking
Expand Down Expand Up @@ -72,11 +76,11 @@ args = parser.parse_args()


def print_info(*objs):
print("INFO: ", *objs, file=sys.stdout)
print("INFO: ", " ".join(objs))


def print_error(*objs):
print("ERROR: ", *objs, file=sys.stderr)
print("ERROR: ", " ".join(objs), file=sys.stderr)


def check_line(image):
Expand Down
4 changes: 4 additions & 0 deletions ocropus-rtrain
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import os.path
import traceback
import argparse
import sys
import codecs

import numpy as np
import matplotlib.pyplot as plt
Expand All @@ -16,6 +17,9 @@ import ocrolib
import ocrolib.lstm as lstm
from ocrolib import lineest

utf8writer = codecs.getwriter('utf8')
sys.stdout = utf8writer(sys.stdout)

np.seterr(divide='raise',over='raise',invalid='raise',under='ignore')

parser = argparse.ArgumentParser("train an RNN recognizer")
Expand Down