Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Christoph Wick
ocropy
Commits
a79c6581
Commit
a79c6581
authored
Apr 06, 2013
by
Tom
Browse files
smallish bug fixes
parent
ebfd7dea
Changes
5
Hide whitespace changes
Inline
Side-by-side
ocrolib/lstm.py
View file @
a79c6581
import
common
as
ocrolib
import
pdb
from
pylab
import
*
import
sys
...
...
@@ -368,7 +369,8 @@ class LSTM(Network):
ni
,
ns
,
na
=
self
.
dims
assert
len
(
xs
[
0
])
==
ni
n
=
len
(
xs
)
if
n
>
len
(
self
.
gi
):
raise
RangeError
(
"input too large"
)
if
n
>
len
(
self
.
gi
):
raise
ocrolib
.
RecognitionError
(
"input too large for LSTM model"
)
self
.
last_n
=
n
self
.
reset
(
n
)
for
t
in
range
(
n
):
...
...
@@ -398,7 +400,8 @@ class LSTM(Network):
def
backward
(
self
,
deltas
):
"""Perform backward propagation of deltas."""
n
=
len
(
deltas
)
if
n
>
len
(
self
.
gi
):
raise
RangeError
(
"input too large"
)
if
n
>
len
(
self
.
gi
):
raise
ocrolib
.
RecognitionError
(
"input too large"
)
assert
n
==
self
.
last_n
ni
,
ns
,
na
=
self
.
dims
for
t
in
reversed
(
range
(
n
)):
...
...
@@ -595,7 +598,8 @@ def log_mul(x,y):
def
log_add
(
x
,
y
):
"Perform addition in the log domain."
return
where
(
abs
(
x
-
y
)
>
10
,
maximum
(
x
,
y
),
log
(
exp
(
x
-
y
)
+
1
)
+
y
)
#return where(abs(x-y)>10,maximum(x,y),log(exp(x-y)+1)+y)
return
where
(
abs
(
x
-
y
)
>
10
,
maximum
(
x
,
y
),
log
(
exp
(
clip
(
x
-
y
,
-
20
,
20
))
+
1
)
+
y
)
def
forward_algorithm
(
match
,
skip
=-
5.0
):
"""Apply the forward algorithm to an array of log state
...
...
@@ -631,9 +635,11 @@ def ctc_align_targets(outputs,targets,threshold=100.0,verbose=0,debug=0,lo=1e-5)
assert
not
isnan
(
lmatch
).
any
()
both
=
forwardbackward
(
lmatch
)
epath
=
exp
(
both
-
amax
(
both
))
epath
/=
sum
(
epath
,
axis
=
0
)[
newaxis
,:]
l
=
sum
(
epath
,
axis
=
0
)[
newaxis
,:]
epath
/=
where
(
l
==
0.0
,
1e-9
,
l
)
aligned
=
maximum
(
lo
,
dot
(
epath
,
targets
))
aligned
/=
sum
(
aligned
,
axis
=
1
)[:,
newaxis
]
l
=
sum
(
aligned
,
axis
=
1
)[:,
newaxis
]
aligned
/=
where
(
l
==
0.0
,
1e-9
,
l
)
if
debug
:
subplot
(
413
);
imshow
(
epath
.
T
,
cmap
=
cm
.
hot
,
interpolation
=
'nearest'
)
subplot
(
414
);
imshow
(
aligned
.
T
,
cmap
=
cm
.
hot
,
interpolation
=
'nearest'
)
...
...
ocropus-gtedit
View file @
a79c6581
...
...
@@ -33,7 +33,7 @@ p_org.add_argument('files',nargs='+')
p_html
=
subparsers
.
add_parser
(
"html"
,
help
=
"generate html"
)
p_html
.
add_argument
(
'-d'
,
'--debug'
,
action
=
"store_true"
)
p_html
.
add_argument
(
'-o'
,
'--output'
,
default
=
"correct.html"
)
p_html
.
add_argument
(
'-o'
,
'--output'
,
default
=
"correct
ion
.html"
)
p_html
.
add_argument
(
'-x'
,
'--extension'
,
default
=
'.txt'
)
p_html
.
add_argument
(
'-f'
,
'--fontsize'
,
default
=
14
,
type
=
int
)
p_html
.
add_argument
(
'-H'
,
'--height'
,
default
=
24
,
type
=
int
)
...
...
ocropus-hocr
View file @
a79c6581
...
...
@@ -59,11 +59,13 @@ dirs = [ocrolib.allsplitext(name)[0] for name in args.files]
xhfiles
=
python
.
sum
([
glob
.
glob
(
d
+
"/??????.xheight"
)
for
d
in
dirs
],[])
if
len
(
xhfiles
)
>
5
:
xheights
=
[
float
(
ocrolib
.
read_text
(
f
))
for
f
in
xhfiles
]
median_xheight
=
median
(
xheights
)
if
len
(
xheights
)
>
0
:
median_xheight
=
median
(
xheights
)
else
:
lfiles
=
python
.
sum
([
glob
.
glob
(
d
+
"/??????.bin.png"
)
for
d
in
dirs
],[])
pyrandom
.
shuffle
(
lfiles
)
median_xheight
=
0.5
*
median
([
imread
(
f
).
shape
[
0
]
for
f
in
lfiles
[:
100
]])
if
len
(
lfiles
)
>
0
:
median_xheight
=
0.5
*
median
([
imread
(
f
).
shape
[
0
]
for
f
in
lfiles
[:
100
]])
E
(
"median_xheight"
,
median_xheight
)
P
(
hocr
.
header
())
...
...
ocropus-rpred
View file @
a79c6581
...
...
@@ -180,6 +180,9 @@ def process1(arg):
def
safe_process1
(
arg
):
try
:
return
process1
(
arg
)
except
ocrolib
.
RecognitionError
as
e
:
print
e
return
None
except
:
traceback
.
print_exc
()
return
None
...
...
ocropus-rtrain
View file @
a79c6581
...
...
@@ -7,7 +7,6 @@ import os.path
import
glob
import
ocrolib
import
argparse
import
lstm
import
scipy
import
matplotlib
import
numpy
...
...
@@ -218,6 +217,9 @@ for trial in range(start,args.ntrain):
else
:
assert
"dew.png"
in
fname
,
"input must already be dewarped"
if
line
.
size
<
10
or
amax
(
line
)
==
amin
(
line
):
print
"EMPTY-INPUT"
continue
line
=
line
*
1.0
/
amax
(
line
)
line
=
amax
(
line
)
-
line
line
=
line
.
T
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment