Unverified Commit d79fb4dc authored by Konstantin Baierer's avatar Konstantin Baierer Committed by GitHub
Browse files

Merge pull request #274 from tmbdev/native-cleanup

Native cleanup
parents ed5b5451 dc0a09cb
......@@ -150,7 +150,3 @@ retrain models as better recognizers become available.
After making CLSTM a full replacement for `ocropus-rtrain`/`ocropus-rpred`, the
next step will be to replace the binarization, text/image segmentation, and layout
analysis in OCRopus with trainable 2D LSTM models.
## Solution for clang
[Read README_OSX.md](README_OSX.md)
# Patch for OSX (clang)
The compile flag `-fopenmp` doesn't work for clang,
so if your `gcc` is link to `clang` you might need to take solutions below:
1. Install `gcc` and make sure `gcc` in `$PATH` is point to real `gcc`,
not `clang`.
2. If you don't have `gcc` installed, apply patch below to change compile flag.
``` diff
--- ocrolib/native.py 2015-02-21 13:34:52.000000000 +0800
+++ ocrolib_osx/native.py 2015-02-21 15:29:56.000000000 +0800
@@ -41,7 +41,7 @@
pass
def compile_and_find(c_string,prefix=".pynative",opt="-g -O4",libs="-lm",
- options="-shared -fopenmp -std=c99 -fPIC",verbose=0):
+ options="-shared -openmp -std=c99 -fPIC",verbose=0):
if not os.path.exists(prefix):
os.mkdir(prefix)
m = hashlib.md5()
```
__all__ = [
"binnednn","cairoextras","common","components","dbtables",
"fgen","gmmtree","gtkyield","hocr","lang","native",
"mlp","multiclass","default","lineest"
"common",
"hocr",
"lang",
"default",
"lineest",
]
################################################################
......
......@@ -767,37 +767,6 @@ def ustrg2unicode(u,lig=ligatures.lig):
result += "<%d>"%value
return result
### code for instantiation native components
def pyconstruct(s):
"""Constructs a Python object from a constructor, an expression
of the form x.y.z.name(args). This ensures that x.y.z is imported.
In the future, more forms of syntax may be accepted."""
env = {}
if "(" not in s:
s += "()"
path = s[:s.find("(")]
if "." in path:
module = path[:path.rfind(".")]
print("import", module)
exec "import "+module in env
return eval(s,env)
def mkpython(name):
"""Tries to instantiate a Python class. Gives an error if it looks
like a Python class but can't be instantiated. Returns None if it
doesn't look like a Python class."""
if name is None or len(name)==0:
return None
elif type(name) is not str:
return name()
elif name[0]=="=":
return pyconstruct(name[1:])
elif "(" in name or "." in name:
return pyconstruct(name)
else:
return None
################################################################
### loading and saving components
################################################################
......
......@@ -91,17 +91,6 @@ def sumouter(us,vs,lo=-1.0,hi=1.0,out=None):
result += outer(clip(u,lo,hi),v)
return result
def sumprod(us,vs,lo=-1.0,hi=1.0,out=None):
"""Sum the element-wise products of the `us` and `vs`.
Values are clipped into the range `[lo,hi]`.
This is mainly used for computing weight updates
in logistic regression layers."""
assert len(us[0])==len(vs[0])
result = out or zeros(len(us[0]))
for u,v in zip(us,vs):
result += clip(u,lo,hi)*v
return result
class Network:
"""General interface for networks. This mainly adds convenience
functions for `predict` and `train`.
......
################################################################
### A simple set of functions for embedding native C code
### inside Python.
################################################################
from __future__ import print_function
import os,hashlib
from ctypes import c_int,c_float,c_double,c_byte
from numpy.ctypeslib import ndpointer
import ctypes
import time
import errno
import contextlib
@contextlib.contextmanager
def lockfile(fname,delay=0.5):
while 1:
try:
fd = os.open(fname,os.O_RDWR|os.O_CREAT|os.O_EXCL)
except OSError as e:
if e.errno!=errno.EEXIST: raise
time.sleep(delay)
continue
else:
break
try:
yield fd
finally:
os.close(fd)
os.unlink(fname)
I = c_int
F = c_float
D = c_double
B = c_byte
for d in range(1,4):
for T,t in [("I","int32"),("F","float32"),("D","float64"),("B","int8"),("U","uint8")]:
exec "A%d%s = ndpointer(dtype='%s',ndim=%d,flags='CONTIGUOUS,ALIGNED')"%(d,T,t,d)
class CompileError(Exception):
pass
def compile_and_find(c_string,prefix=".pynative",opt="-g -O4",libs="-lm",
options="-shared -fopenmp -std=c99 -fPIC",verbose=0):
if not os.path.exists(prefix):
os.mkdir(prefix)
m = hashlib.md5()
m.update(c_string)
base = m.hexdigest()
if verbose: print("hash", base, "for", c_string[:20], "...")
with lockfile(os.path.join(prefix,base+".lock")):
so = os.path.join(prefix,base+".so")
if os.path.exists(so):
if verbose: print("returning existing", so)
return so
source = os.path.join(prefix,base+".c")
with open(source,"w") as stream:
stream.write(c_string)
cmd = "gcc "+opt+" "+libs+" "+options+" "+source+" -o "+so
if verbose: print("#", cmd)
if os.system(cmd)!=0:
if verbose: print("compilation failed")
raise CompileError()
return so
def compile_and_load(c_string,**keys):
path = compile_and_find(c_string,**keys)
return ctypes.CDLL(path)
from ocrolib.native import *
lstm_utils = r"""
#include <math.h>
void sumouter(int r,int n,int m,double out[n][m],double u[r][n],double v[r][m]) {
for(int i=0;i<n;i++) {
for(int j=0;j<m;j++) {
double total = 0.0;
for(int k=0;k<r;k++) total += u[k][i]*v[k][j];
out[i][j] = total;
}
}
}
void sumprod(int r,int n,double out[n],double u[r][n],double v[r][n]) {
for(int i=0;i<n;i++) {
double total = 0.0;
for(int k=0;k<r;k++) total += u[k][i]*v[k][i];
out[i] = total;
}
}
"""
lstm_native = compile_and_load(lstm_utils)
lstm_native.sumouter.argtypes = [I,I,I,A2D,A2D,A2D]
lstm_native.sumprod.argtypes = [I,I,A1D,A2D,A2D]
def sumouter(u,v,out=None):
assert out.shape==u.shape[1:]+v.shape[1:] and u.shape[:1]==v.shape[:1]
lstm_native.sumouter(u.shape[0],out.shape[0],out.shape[1],out,u,v)
return out
def sumprod(u,v,out=None):
assert out.shape==u.shape[1:] and out.shape==v.shape[1:] and u.shape[:1]==v.shape[:1]
lstm_native.sumprod(len(u),len(out),out,u,v)
return out
def test():
from pylab import randn
sumouter(randn(11,3),randn(11,4),out=randn(3,4))
sumprod(randn(11,7),randn(11,7),out=randn(7))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment