Commit d823ba4f authored by Konstantin Baierer's avatar Konstantin Baierer Committed by GitHub
Browse files

Merge pull request #206 from zuphilip/imports

Standardize Imports I, #176
parents 50277940 8f354ad4
......@@ -2,20 +2,17 @@
from __future__ import print_function
import random as pyrandom
import re
from pylab import *
import os.path
import ocrolib
import argparse
import matplotlib
import numpy
import sys
import numpy as np
from scipy.misc import imsave
import ocrolib
from ocrolib import lineest
import ocrolib.lstm as lstm
import traceback
import scipy
numpy.seterr(divide='raise',over='raise',invalid='raise',under='ignore')
np.seterr(divide='raise',over='raise',invalid='raise',under='ignore')
parser = argparse.ArgumentParser("dewarp lines as used for the recognizer")
......@@ -36,9 +33,9 @@ if len(inputs)==0:
print("# inputs", len(inputs))
if args.lineest=="center":
lnorm = lineest.CenterNormalizer()
lnorm = lineest.CenterNormalizer()
else:
raise Exception(args.lineest+": unknown line normalizer")
raise Exception(args.lineest+": unknown line normalizer")
lnorm.setHeight(args.height)
......@@ -47,9 +44,9 @@ for fname in inputs:
base,_ = ocrolib.allsplitext(fname)
try:
line = ocrolib.read_image_gray(fname)
lnorm.measure(amax(line)-line)
line = lnorm.normalize(line,cval=amax(line))
scipy.misc.imsave(base+".dew.png",line)
lnorm.measure(np.amax(line)-line)
line = lnorm.normalize(line,cval=np.amax(line))
imsave(base+".dew.png",line)
except Exception as e:
print("ERROR", e)
continue
......@@ -3,14 +3,21 @@
from __future__ import print_function
import warnings,numpy,argparse,sys,os,os.path,multiprocessing,codecs
import warnings
import argparse
import sys
import os.path
import multiprocessing
import codecs
from collections import Counter
import numpy as np
import ocrolib
from pylab import *
from ocrolib import edist
# disable rank warnings from polyfit
warnings.simplefilter('ignore',numpy.RankWarning)
warnings.simplefilter('ignore',np.RankWarning)
parser = argparse.ArgumentParser(description = """
Compute the edit distances between ground truth and recognizer output.
......
......@@ -3,7 +3,12 @@
from __future__ import print_function
import argparse,sys,os,os.path,multiprocessing
import argparse
import sys
import os
import os.path
import multiprocessing
import ocrolib
from ocrolib import edist
......@@ -27,6 +32,7 @@ args.files = ocrolib.glob_all(args.files)
if not ".gt." in args.files[0]:
sys.stderr.write("warning: compare on .gt.txt files, not .txt files\n")
def process1(fname):
# fgt = ocrolib.allsplitext(fname)[0]+args.gtextension
gt = ocrolib.project_text(ocrolib.read_text(fname),kind=args.kind)
......
......@@ -13,13 +13,19 @@
from __future__ import print_function
from pylab import *
import argparse,glob,os,os.path
import argparse
import glob
import os
import os.path
import sys
import traceback
from multiprocessing import Pool
import numpy as np
from scipy.ndimage import measurements
from scipy.misc import imsave
from scipy.ndimage.filters import gaussian_filter,uniform_filter,maximum_filter
from multiprocessing import Pool
import ocrolib
from ocrolib import psegutils,morph,sl
from ocrolib.exceptions import OcropusException
......@@ -100,18 +106,21 @@ parser.add_argument('files',nargs='+')
args = parser.parse_args()
args.files = ocrolib.glob_all(args.files)
def norm_max(v):
return v/amax(v)
return v/np.amax(v)
def check_page(image):
if len(image.shape)==3: return "input image is color image %s"%(image.shape,)
if mean(image)<median(image): return "image may be inverted"
if np.mean(image)<np.median(image): return "image may be inverted"
h,w = image.shape
if h<600: return "image not tall enough for a page image %s"%(image.shape,)
if h>10000: return "image too tall for a page image %s"%(image.shape,)
if w<600: return "image too narrow for a page image %s"%(image.shape,)
if w>10000: return "line too wide for a page image %s"%(image.shape,)
slots = int(w*h*1.0/(30*30))
_,ncomps = measurements.label(image>mean(image))
_,ncomps = measurements.label(image>np.mean(image))
if ncomps<10: return "too few connected components for a page image (got %d)"%(ncomps,)
if ncomps>slots: return "too many connnected components for a page image (%d > %d)"%(ncomps,slots)
return None
......@@ -120,6 +129,7 @@ def check_page(image):
def print_info(*objs):
print("INFO: ", *objs, file=sys.stdout)
def print_error(*objs):
print("ERROR: ", *objs, file=sys.stderr)
......@@ -135,21 +145,22 @@ print_info("")
if args.parallel>1:
args.quiet = 1
def B(a):
if a.dtype==dtype('B'): return a
return array(a,'B')
if a.dtype==np.dtype('B'): return a
return np.array(a,'B')
def DSAVE(title,image):
if not args.debug: return
if type(image)==list:
assert len(image)==3
image = transpose(array(image),[1,2,0])
image = np.transpose(np.array(image),[1,2,0])
fname = "_"+title+".png"
print_info("debug " + fname)
imsave(fname,image)
################################################################
### Column finding.
###
......@@ -170,12 +181,13 @@ def compute_separators_morph(binary,scale):
vert = morph.select_regions(vert,sl.dim0,min=20*scale,nbest=args.maxseps)
return vert
def compute_colseps_morph(binary,scale,maxseps=3,minheight=20,maxwidth=5):
"""Finds extended vertical whitespace corresponding to column separators
using morphological operations."""
boxmap = psegutils.compute_boxmap(binary,scale,dtype='B')
bounds = morph.rb_closing(B(boxmap),(int(5*scale),int(5*scale)))
bounds = maximum(B(1-bounds),B(boxmap))
bounds = np.maximum(B(1-bounds),B(boxmap))
cols = 1-morph.rb_closing(boxmap,(int(20*scale),int(scale)))
cols = morph.select_regions(cols,sl.aspect,min=args.csminaspect)
cols = morph.select_regions(cols,sl.dim0,min=args.csminheight*scale,nbest=args.maxcolseps)
......@@ -183,25 +195,27 @@ def compute_colseps_morph(binary,scale,maxseps=3,minheight=20,maxwidth=5):
cols = morph.r_dilation(cols,(int(0.5+scale),0),origin=(int(scale/2)-1,0))
return cols
def compute_colseps_mconv(binary,scale=1.0):
"""Find column separators using a combination of morphological
operations and convolution."""
h,w = binary.shape
smoothed = gaussian_filter(1.0*binary,(scale,scale*0.5))
smoothed = uniform_filter(smoothed,(5.0*scale,1))
thresh = (smoothed<amax(smoothed)*0.1)
thresh = (smoothed<np.amax(smoothed)*0.1)
DSAVE("1thresh",thresh)
blocks = morph.rb_closing(binary,(int(4*scale),int(4*scale)))
DSAVE("2blocks",blocks)
seps = minimum(blocks,thresh)
seps = np.minimum(blocks,thresh)
seps = morph.select_regions(seps,sl.dim0,min=args.csminheight*scale,nbest=args.maxcolseps)
DSAVE("3seps",seps)
blocks = morph.r_dilation(blocks,(5,5))
DSAVE("4blocks",blocks)
seps = maximum(seps,1-blocks)
seps = np.maximum(seps,1-blocks)
DSAVE("5combo",seps)
return seps
def compute_colseps_conv(binary,scale=1.0):
"""Find column separators by convoluation and
thresholding."""
......@@ -209,16 +223,16 @@ def compute_colseps_conv(binary,scale=1.0):
# find vertical whitespace by thresholding
smoothed = gaussian_filter(1.0*binary,(scale,scale*0.5))
smoothed = uniform_filter(smoothed,(5.0*scale,1))
thresh = (smoothed<amax(smoothed)*0.1)
thresh = (smoothed<np.amax(smoothed)*0.1)
DSAVE("1thresh",thresh)
# find column edges by filtering
grad = gaussian_filter(1.0*binary,(scale,scale*0.5),order=(0,1))
grad = uniform_filter(grad,(10.0*scale,1))
# grad = abs(grad) # use this for finding both edges
grad = (grad>0.5*amax(grad))
grad = (grad>0.5*np.amax(grad))
DSAVE("2grad",grad)
# combine edges and whitespace
seps = minimum(thresh,maximum_filter(grad,(int(scale),int(5*scale))))
seps = np.minimum(thresh,maximum_filter(grad,(int(scale),int(5*scale))))
seps = maximum_filter(seps,(int(2*scale),1))
DSAVE("3seps",seps)
# select only the biggest column separators
......@@ -226,6 +240,7 @@ def compute_colseps_conv(binary,scale=1.0):
DSAVE("4seps",seps)
return seps
def compute_colseps(binary,scale):
"""Computes column separators either from vertical black lines or whitespace."""
print_info("considering at most %g whitespace column separators" % args.maxcolseps)
......@@ -241,12 +256,11 @@ def compute_colseps(binary,scale):
seps = compute_separators_morph(binary,scale)
DSAVE("colseps",0.7*seps+0.3*binary)
#colseps = compute_colseps_morph(binary,scale)
colseps = maximum(colseps,seps)
binary = minimum(binary,1-seps)
colseps = np.maximum(colseps,seps)
binary = np.minimum(binary,1-seps)
return colseps,binary
################################################################
### Text Line Finding.
###
......@@ -273,6 +287,7 @@ def compute_gradmaps(binary,scale):
top = ocrolib.norm_max((grad>0)*grad)
return bottom,top,boxmap
def compute_line_seeds(binary,bottom,top,colseps,scale):
"""Base on gradient maps, computes candidates for baselines
and xheights. Then, it marks the regions between the two
......@@ -280,14 +295,14 @@ def compute_line_seeds(binary,bottom,top,colseps,scale):
t = args.threshold
vrange = int(args.vscale*scale)
bmarked = maximum_filter(bottom==maximum_filter(bottom,(vrange,0)),(2,2))
bmarked = bmarked*(bottom>t*amax(bottom)*t)*(1-colseps)
bmarked = bmarked*(bottom>t*np.amax(bottom)*t)*(1-colseps)
tmarked = maximum_filter(top==maximum_filter(top,(vrange,0)),(2,2))
tmarked = tmarked*(top>t*amax(top)*t/2)*(1-colseps)
tmarked = tmarked*(top>t*np.amax(top)*t/2)*(1-colseps)
tmarked = maximum_filter(tmarked,(1,20))
seeds = zeros(binary.shape,'i')
seeds = np.zeros(binary.shape,'i')
delta = max(3,int(scale/2))
for x in range(bmarked.shape[1]):
transitions = sorted([(y,1) for y in find(bmarked[:,x])]+[(y,0) for y in find(tmarked[:,x])])[::-1]
transitions = sorted([(y,1) for y in np.where(bmarked[:,x])[0]]+[(y,0) for y in np.where(tmarked[:,x][0])])[::-1]
transitions += [(0,0)]
for l in range(len(transitions)-1):
y0,s0 = transitions[l]
......@@ -302,7 +317,6 @@ def compute_line_seeds(binary,bottom,top,colseps,scale):
return seeds
################################################################
### The complete line segmentation process.
################################################################
......@@ -313,12 +327,13 @@ def remove_hlines(binary,scale,maxsize=10):
for i,b in enumerate(objects):
if sl.width(b)>maxsize*scale:
labels[b][labels[b]==i+1] = 0
return array(labels!=0,'B')
return np.array(labels!=0,'B')
def compute_segmentation(binary,scale):
"""Given a binary image, compute a complete segmentation into
lines, computing both columns and text lines."""
binary = array(binary,'B')
binary = np.array(binary,'B')
# start by removing horizontal black lines, which only
# interfere with the rest of the page segmentation
......@@ -340,12 +355,11 @@ def compute_segmentation(binary,scale):
llabels = morph.propagate_labels(boxmap,seeds,conflict=0)
if not args.quiet: print_info("spreading labels")
spread = morph.spread_labels(seeds,maxdist=scale)
llabels = where(llabels>0,llabels,spread*binary)
llabels = np.where(llabels>0,llabels,spread*binary)
segmentation = llabels*binary
return segmentation
################################################################
### Processing each file.
################################################################
......@@ -369,7 +383,7 @@ def process1(job):
checktype(binary,ABINARY2)
if not args.nocheck:
check = check_page(amax(binary)-binary)
check = check_page(np.amax(binary)-binary)
if check is not None:
print_error("%s SKIPPED %s (use -n to disable this check)" % (fname, check))
return
......@@ -386,7 +400,7 @@ def process1(job):
else:
scale = args.scale
print_info("scale %f" % (scale))
if isnan(scale) or scale>1000.0:
if np.isnan(scale) or scale>1000.0:
print_error("%s: bad scale (%g); skipping\n" % (fname, scale))
return
if scale<args.minscale:
......@@ -397,10 +411,10 @@ def process1(job):
if not args.quiet: print_info("computing segmentation")
segmentation = compute_segmentation(binary,scale)
if amax(segmentation)>args.maxlines:
print_error("%s: too many lines %g" % (fname, amax(segmentation)))
if np.amax(segmentation)>args.maxlines:
print_error("%s: too many lines %g" % (fname, np.amax(segmentation)))
return
if not args.quiet: print_info("number of lines %g" % amax(segmentation))
if not args.quiet: print_info("number of lines %g" % np.amax(segmentation))
# compute the reading order
......@@ -411,8 +425,8 @@ def process1(job):
# renumber the labels so that they conform to the specs
nlabels = amax(segmentation)+1
renumber = zeros(nlabels,'i')
nlabels = np.amax(segmentation)+1
renumber = np.zeros(nlabels,'i')
for i,v in enumerate(lsort): renumber[lines[v].label] = 0x010000+(i+1)
segmentation = renumber[segmentation]
......@@ -437,6 +451,7 @@ if len(args.files)==1 and os.path.isdir(args.files[0]):
else:
files = args.files
def safe_process1(job):
fname,i = job
try:
......
......@@ -2,11 +2,17 @@
from __future__ import print_function
from pylab import *
import argparse,codecs,re,os.path,ocrolib,base64
from lxml import etree
import argparse
import codecs
import re
import os.path
import base64
import urllib2
from lxml import etree
import ocrolib
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(help="subcommands",dest="subparser_name")
......
......@@ -2,8 +2,15 @@
import __builtin__ as python
import random as pyrandom
import sys,os,re,glob,argparse,codecs
from pylab import median, imread
import sys
import os.path
import re
import glob
import argparse
import codecs
import numpy as np
from matplotlib.pyplot import imread
import ocrolib
from ocrolib import hocr
......@@ -47,12 +54,12 @@ xhfiles = python.sum([glob.glob(d+"/??????.xheight") for d in dirs],[])
if len(xhfiles)>5:
xheights = [float(ocrolib.read_text(f)) for f in xhfiles]
if len(xheights)>0:
median_xheight = median(xheights)
median_xheight = np.median(xheights)
else:
lfiles = python.sum([glob.glob(d+"/??????.bin.png") for d in dirs],[])
pyrandom.shuffle(lfiles)
if len(lfiles)>0:
median_xheight = 0.5*median([imread(f).shape[0] for f in lfiles[:100]])
median_xheight = 0.5*np.median([imread(f).shape[0] for f in lfiles[:100]])
E("median_xheight",median_xheight)
P(hocr.header())
......@@ -127,7 +134,7 @@ for arg in args.files:
if median_xheight is not None and os.path.exists(lbase+".xheight"):
xheight = float(ocrolib.read_text(lbase+".xheight"))
perc = int(clip(xheight*100.0/median_xheight,30,300))
perc = int(np.clip(xheight*100.0/median_xheight,30,300))
perc = 10*((perc+5)//10)
if perc!=100:
style += "font-size:%d%%;"%perc
......
......@@ -4,15 +4,23 @@
from __future__ import print_function
import random as pyrandom
import glob,sys,os,re,codecs,traceback
from pylab import *
import glob
import sys
import os
import re
import codecs
import traceback
import argparse
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from PIL import ImageFont,ImageDraw
from scipy.ndimage import filters,measurements,interpolation
from scipy.misc import imsave
import ocrolib
import argparse
parser = argparse.ArgumentParser(description = "Generate text line training data")
parser.add_argument('-o','--base',default='linegen',help='output directory, default: %(default)s')
parser.add_argument('-r','--distort',type=float,default=1.0)
......@@ -116,36 +124,41 @@ assert len(lines)>0
lines = list(set(lines))
print("got", len(lines), "unique lines")
def rgeometry(image,eps=0.03,delta=0.3):
m = array([[1+eps*randn(),0.0],[eps*randn(),1.0+eps*randn()]])
m = np.array([[1+eps*np.random.randn(),0.0],[eps*np.random.randn(),1.0+eps*np.random.randn()]])
w,h = image.shape
c = array([w/2.0,h/2])
d = c-dot(m,c)+array([randn()*delta,randn()*delta])
c = np.array([w/2.0,h/2])
d = c-np.dot(m,c)+np.array([np.random.randn()*delta,np.random.randn()*delta])
return interpolation.affine_transform(image,m,offset=d,order=1,mode='constant',cval=image[0,0])
def rdistort(image,distort=3.0,dsigma=10.0,cval=0):
h,w = image.shape
hs = randn(h,w)
ws = randn(h,w)
hs = np.random.randn(h,w)
ws = np.random.randn(h,w)
hs = filters.gaussian_filter(hs,dsigma)
ws = filters.gaussian_filter(ws,dsigma)
hs *= distort/amax(hs)
ws *= distort/amax(ws)
hs *= distort/np.amax(hs)
ws *= distort/np.amax(ws)
def f(p):
return (p[0]+hs[p[0],p[1]],p[1]+ws[p[0],p[1]])
return interpolation.geometric_transform(image,f,output_shape=(h,w),
order=1,mode='constant',cval=cval)
if args.debug_show:
ion(); gray()
plt.ion()
plt.gray()
base = args.base
print("base", base)
os.system("rm -rf "+base)
os.mkdir(base)
def crop(image,pad=1):
[[r,c]] = measurements.find_objects(array(image==0,'i'))
[[r,c]] = measurements.find_objects(np.array(image==0,'i'))
r0 = r.start
r1 = r.stop
c0 = c.start
......@@ -157,6 +170,7 @@ last_font = None
last_size = None
last_fontfile = None
def genline(text,fontfile=None,size=36,sigma=0.5,threshold=0.5):
global image,draw,last_font,last_fontfile
if last_fontfile!=fontfile or last_size!=size:
......@@ -168,13 +182,13 @@ def genline(text,fontfile=None,size=36,sigma=0.5,threshold=0.5):
draw.rectangle((0,0,6000,6000),fill="white")
# print("\t", size, font)
draw.text((250,20),text,fill="black",font=font)
a = asarray(image,'f')
a = a*1.0/amax(a)
a = np.asarray(image,'f')
a = a*1.0/np.amax(a)
if sigma>0.0:
a = filters.gaussian_filter(a,sigma)
a += clip(randn(*a.shape)*0.2,-0.25,0.25)
a += np.clip(np.random.randn(*a.shape)*0.2,-0.25,0.25)
a = rgeometry(a)
a = array(a>threshold,'f')
a = np.array(a>threshold,'f')
a = crop(a,pad=3)
# FIXME add grid warping here
# clf(); ion(); gray(); imshow(a); ginput(1,0.1)
......@@ -196,8 +210,8 @@ for pageno,font in enumerate(fonts):
lineno = 0
while lineno<args.maxlines:
(sigma,ssigma,threshold,sthreshold) = pyrandom.choice(degradations)
sigma += (2*rand()-1)*ssigma
threshold += (2*rand()-1)*sthreshold
sigma += (2*np.random.rand()-1)*ssigma
threshold += (2*np.random.rand()-1)*sthreshold
line = pyrandom.choice(lines)
size = pyrandom.choice(sizes)
with open(pagedir+".info","w") as stream:
......@@ -208,13 +222,15 @@ for pageno,font in enumerate(fonts):
except:
traceback.print_exc()
continue
if amin(image.shape)<10: continue
if amax(image)<0.5: continue
if np.amin(image.shape)<10: continue
if np.amax(image)<0.5: continue
if args.distort>0:
image = rdistort(image,args.distort,args.dsigma,cval=amax(image))
image = rdistort(image,args.distort,args.dsigma,cval=np.amax(image))
if args.display:
gray()
clf(); imshow(image); ginput(1,0.1)
plt.gray()
plt.clf()
plt.imshow(image)
plt.ginput(1,0.1)
fname = pagedir+"/01%04d"%lineno
imsave(fname+".bin.png",image)
gt = ocrolib.normalize_text(line)
......
......@@ -2,26 +2,25 @@
from __future__ import print_function
import random as pyrandom
import re
from pylab import *
import os.path
import ocrolib
import argparse
import matplotlib
import numpy
import sys
import numpy as np
import matplotlib.pyplot as plt
import ocrolib
from ocrolib import lineest
import ocrolib.lstm as lstm
from ocrolib import edist
import traceback
import clstm
ion()
matplotlib.rc('xtick',labelsize=7)
matplotlib.rc('ytick',labelsize=7)
matplotlib.rcParams.update({"font.size":7})
plt.ion()
plt.rc('xtick',labelsize=7)
plt.rc('ytick',labelsize=7)
plt.rcParams.update({"font.size":7})