From 82eb554f64ee5d574b7b72bd3a874fe2c5e220e6 Mon Sep 17 00:00:00 2001 From: Philipp Zumstein Date: Tue, 11 Oct 2016 18:03:38 +0200 Subject: [PATCH] Changed reading order algorithmus slightly --- ocrolib/psegutils.py | 35 +++++++++++++++++++++++++++++++++-- ocropus-gpageseg | 2 ++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/ocrolib/psegutils.py b/ocrolib/psegutils.py index 7f4fdd91..538344d8 100644 --- a/ocrolib/psegutils.py +++ b/ocrolib/psegutils.py @@ -127,24 +127,54 @@ def reading_order(lines,highlight=None,debug=0): order = zeros((len(lines),len(lines)),'B') def x_overlaps(u,v): return u[1].startv[1].start + #explanation: u,v are disjoint <=> u[1].start>v[1].stop or u[1].stopv[0].start def above(u,v): return u[0].startmin(u[0].stop,v[0].stop) and w[0].stopmax(u[0].stop,v[0].stop): return 0 if w[1].startv[1].start: return 1 + def is_corner(w,u,v): + if w==u or w==v: return 0 + if x_overlaps(v,w) and y_overlaps(u,w): return 1 + if x_overlaps(u,w) and y_overlaps(v,w): return 1 + return 0 if highlight is not None: clf(); title("highlight"); imshow(binary); ginput(1,debug) for i,u in enumerate(lines): for j,v in enumerate(lines): + #if i==0 and j==90: print "### i =",i,"### j =",j,"###" if x_overlaps(u,v): + #if i==0 and j==90: print "x overlaps" if above(u,v): order[i,j] = 1 else: - if [w for w in lines if separates(w,u,v)]==[]: - if left_of(u,v): order[i,j] = 1 + #if i==0 and j==90: print "NO x overlaps" + if y_overlaps(u,v): + #if i==0 and j==90: print "y overlaps" + if left_of(u,v): + order[i,j] = 1 + else: + #if i==0 and j==90: print "NO y overlaps" + corners = [w for w in lines if is_corner(w,u,v)] + dividers = [w for w in lines if divides(w,u,v)] + if i==0 and j==90: print corners + if i==0 and j==90: print dividers + if corners==[] or dividers!=[]: + if above(u,v): + order[i,j] = 1 + else: + if left_of(u,v): + order[i,j] = 1 if j==highlight and order[i,j]: print (i,j), y0,x0 = sl.center(lines[i]) @@ -170,6 +200,7 @@ def visit(k): L.append(k) for k in range(n): visit(k) + print(L) return L #[::-1] def show_lines(image,lines,lsort): diff --git a/ocropus-gpageseg b/ocropus-gpageseg index 3b671729..a0402767 100755 --- a/ocropus-gpageseg +++ b/ocropus-gpageseg @@ -387,6 +387,8 @@ def process1(job): if not args.quiet: print_info("finding reading order") lines = psegutils.compute_lines(segmentation,scale) order = psegutils.reading_order([l.bounds for l in lines]) + if args.debug: + print(order) lsort = psegutils.topsort(order) # renumber the labels so that they conform to the specs