Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Christoph Wick
ocropy
Commits
0c8f355d
Commit
0c8f355d
authored
Mar 16, 2018
by
Christoph Wick
Browse files
allowing cudnn data to be used on cpu
parent
febceb79
Changes
1
Hide whitespace changes
Inline
Side-by-side
ocrolib/tfmodel/model.py
View file @
0c8f355d
...
...
@@ -3,7 +3,7 @@ from tensorflow.python.framework import ops
from
tensorflow.python.ops
import
array_ops
from
tensorflow.python.ops.nn_grad
import
_BroadcastMul
from
tensorflow.python.ops
import
rnn
from
tensorflow.contrib.rnn
import
LSTMCell
,
MultiRNNCell
,
DropoutWrapper
,
LSTMBlockFusedCell
from
tensorflow.contrib.rnn
import
LSTMCell
,
MultiRNNCell
,
DropoutWrapper
,
LSTMBlockFusedCell
,
LSTMBlockCell
import
tensorflow.contrib.cudnn_rnn
as
cudnn_rnn
from
tensorflow.python.framework
import
sparse_tensor
from
tensorflow.python.ops
import
ctc_ops
...
...
@@ -38,28 +38,15 @@ class Model:
@
staticmethod
def
default_model_settings
():
return
{
"conv_pool"
:
[
{
"filters"
:
40
,
"kernel_size"
:
[
3
,
3
],
"pool_size"
:
[
2
,
2
],
},
{
"filters"
:
60
,
"kernel_size"
:
[
3
,
3
],
"pool_size"
:
[
2
,
2
],
},
],
"lstm"
:
[
100
],
"ctc_merge_repeated"
:
True
,
"use_peepholes"
:
False
,
"ctc_merge_repeated"
:
False
,
"dropout"
:
False
,
"solver"
:
"Adam"
,
"ctc"
:
"Default"
,
"l_rate"
:
1e-3
,
"momentum"
:
0.9
,
"layers"
:
[],
"cudnn"
:
True
,
}
@
staticmethod
...
...
@@ -69,19 +56,11 @@ class Model:
params
=
str
.
split
(
","
)
model
=
[]
lstm_appeared
=
False
params_dict
=
{
"ctc_merge_repeated"
:
True
,
"use_peepholes"
:
False
,
"dropout"
:
False
,
"solver"
:
"Adam"
,
"ctc"
:
"Default"
,
"l_rate"
:
1e-3
,
"momentum"
:
0.9
,
"layers"
:
model
,
}
params_dict
=
Model
.
default_model_settings
()
params_dict
[
"layers"
]
=
model
for
param
in
params
:
label
,
value
=
tuple
(
param
.
split
(
"="
))
flags
=
[
"use_peepholes"
,
"ctc_merge_repeated"
,
"dropout"
]
flags
=
[
"use_peepholes"
,
"ctc_merge_repeated"
,
"dropout"
,
"cudnn"
]
strs
=
[
"solver"
,
"ctc"
]
floats
=
[
"l_rate"
,
"momentum"
]
if
label
in
flags
:
...
...
@@ -194,6 +173,12 @@ class Model:
@
staticmethod
def
create
(
num_features
,
num_classes
,
model_settings
,
reuse_variables
=
False
,
threads
=
1
):
# add new variables to model_settings
for
key
,
value
in
Model
.
default_model_settings
().
iteritems
():
if
key
not
in
model_settings
:
print
(
"Adding new model setting variable as default value: {}={}"
.
format
(
key
,
value
))
model_settings
[
key
]
=
value
print
(
"Creating tf graph with settings: %s"
%
model_settings
)
graph
=
tf
.
Graph
()
with
graph
.
as_default
():
...
...
@@ -201,6 +186,11 @@ class Model:
config
=
tf
.
ConfigProto
(
intra_op_parallelism_threads
=
1
,
inter_op_parallelism_threads
=
threads
,
))
gpu_enabled
=
False
for
d
in
session
.
list_devices
():
if
d
.
device_type
==
"GPU"
:
gpu_enabled
=
True
break
inputs
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
num_features
),
name
=
"inputs"
)
batch_size
=
tf
.
shape
(
inputs
)[
0
]
...
...
@@ -260,54 +250,64 @@ class Model:
lstm_layers
=
[
l
for
l
in
model_settings
[
"layers"
]
if
l
[
'type'
]
==
"lstm"
]
if
len
(
lstm_layers
)
>
0
:
def
get_lstm_cell
(
num_hidden
,
use_peepholes
=
model_settings
[
"use_peepholes"
]):
return
LSTMBlockFusedCell
(
num_hidden
,
forget_bias
=
1.0
,
use_peepholes
=
use_peepholes
,
reuse
=
reuse_variables
,
#initializer=tf.initializers.random_uniform(-0.1, 0.1),
#cell_clip=20,
#proj_clip=20,
#activation=tf.sigmoid,
)
# for i, lstm in enumerate(lstm_layers):
# fw, bw = get_lstm_cell(lstm["hidden"]), get_lstm_cell(lstm["hidden"])
# (output_fw, output_bw), _ \
# = rnn.bidirectional_dynamic_rnn(fw, bw, rnn_inputs, lstm_seq_len,
# dtype=tf.float32, scope=scope.name + "BiRNN%d" % i)
#
# rnn_inputs = tf.concat((output_fw, output_bw), 2)
# output_size = lstm_layers[-1]["hidden"] * 2
# outputs = rnn_inputs
# Time major inputs required for lstm
time_major_inputs
=
tf
.
transpose
(
rnn_inputs
,
[
1
,
0
,
2
])
if
len
(
lstm_layers
)
>
0
:
for
i
,
lstm
in
enumerate
(
lstm_layers
):
if
lstm
[
"hidden"
]
!=
lstm_layers
[
0
][
"hidden"
]:
raise
Exception
(
"Currently all lstm layers must have an equal number of hidden nodes. "
"Got {} != {}"
.
format
(
lstm
[
"hidden"
],
lstm_layers
[
0
][
"hidden"
]))
# Time major inputs required for lstm
time_major_inputs
=
tf
.
transpose
(
rnn_inputs
,
[
1
,
0
,
2
])
def
cpu_cudnn_compatible_lstm_backend
(
time_major_inputs
):
def
get_lstm_cell
(
num_hidden
):
return
cudnn_rnn
.
CudnnCompatibleLSTMCell
(
num_hidden
,
reuse
=
reuse_variables
)
fw
,
bw
=
zip
(
*
[(
get_lstm_cell
(
lstm
[
"hidden"
]),
get_lstm_cell
(
lstm
[
"hidden"
]))
for
lstm
in
lstm_layers
])
time_major_outputs
,
output_fw
,
output_bw
\
=
tf
.
contrib
.
rnn
.
stack_bidirectional_dynamic_rnn
(
list
(
fw
),
list
(
bw
),
time_major_inputs
,
sequence_length
=
lstm_seq_len
,
dtype
=
tf
.
float32
,
scope
=
"{}cudnn_lstm/stack_bidirectional_rnn"
.
format
(
scope
.
name
),
time_major
=
True
,
)
return
time_major_outputs
# Create the Cudnn LSTM factory
rnn_lstm
=
cudnn_rnn
.
CudnnLSTM
(
len
(
lstm_layers
),
lstm_layers
[
0
][
"hidden"
],
direction
=
'bidirectional'
,
kernel_initializer
=
tf
.
initializers
.
random_uniform
(
-
0.1
,
0.1
))
def
gpu_cudnn_lstm_backend
(
time_major_inputs
):
for
i
,
lstm
in
enumerate
(
lstm_layers
):
if
lstm
[
"hidden"
]
!=
lstm_layers
[
0
][
"hidden"
]:
raise
Exception
(
"Currently all lstm layers must have an equal number of hidden nodes. "
"Got {} != {}"
.
format
(
lstm
[
"hidden"
],
lstm_layers
[
0
][
"hidden"
]))
# TODO: Check if the models are loadable from meta Graph, maybe the next line fixed this
rnn_lstm
.
_saveable_cls
=
cudnn_rnn
.
CudnnLSTMSaveable
# Create the Cudnn LSTM factory
rnn_lstm
=
cudnn_rnn
.
CudnnLSTM
(
len
(
lstm_layers
),
lstm_layers
[
0
][
"hidden"
],
direction
=
'bidirectional'
,
kernel_initializer
=
tf
.
initializers
.
random_uniform
(
-
0.1
,
0.1
))
# Apply the lstm to the inputs
time_major_outputs
,
(
output_h
,
output_c
)
=
rnn_lstm
(
time_major_inputs
)
# TODO: Check if the models are loadable from meta Graph, maybe the next line fixed this
rnn_lstm
.
_saveable_cls
=
cudnn_rnn
.
CudnnLSTMSaveable
# Apply the lstm to the inputs
time_major_outputs
,
(
output_h
,
output_c
)
=
rnn_lstm
(
time_major_inputs
)
return
time_major_outputs
if
model_settings
[
"cudnn"
]:
if
gpu_enabled
:
print
(
"Using CUDNN LSTM backend on GPU"
)
time_major_outputs
=
gpu_cudnn_lstm_backend
(
time_major_inputs
)
else
:
print
(
"Using CUDNN compatible LSTM backend on CPU"
)
time_major_outputs
=
cpu_cudnn_compatible_lstm_backend
(
time_major_inputs
)
else
:
raise
Exception
(
"Only cudnn based backend supported yet."
)
# Set the output size
output_size
=
lstm_layers
[
-
1
][
"hidden"
]
*
2
else
:
output_size
=
lstm_num_features
outputs
=
rnn_inputs
time_major_outputs
=
tf
.
transpose
(
outputs
,
[
1
,
0
,
2
])
time_major_outputs
=
time_major_inputs
# flatten to (T * N, F) for matrix multiplication. This will be reversed later
time_major_outputs
=
tf
.
reshape
(
time_major_outputs
,
[
-
1
,
time_major_outputs
.
shape
.
as_list
()[
2
]])
...
...
@@ -404,7 +404,8 @@ class Model:
if
"Adam"
not
in
v
.
name
and
"beta1_power"
not
in
v
.
name
and
"beta2_power"
not
in
v
.
name
]
print
(
all_var_names
)
print
(
"Variables to Load: {}"
.
format
([
v
.
name
for
v
in
all_var_names
]))
saver
=
tf
.
train
.
Saver
(
all_var_names
)
# Restore variables from disk.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment