Commit 0c8f355d authored by Christoph Wick's avatar Christoph Wick
Browse files

allowing cudnn data to be used on cpu

parent febceb79
......@@ -3,7 +3,7 @@ from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops.nn_grad import _BroadcastMul
from tensorflow.python.ops import rnn
from tensorflow.contrib.rnn import LSTMCell, MultiRNNCell, DropoutWrapper, LSTMBlockFusedCell
from tensorflow.contrib.rnn import LSTMCell, MultiRNNCell, DropoutWrapper, LSTMBlockFusedCell, LSTMBlockCell
import tensorflow.contrib.cudnn_rnn as cudnn_rnn
from tensorflow.python.framework import sparse_tensor
from tensorflow.python.ops import ctc_ops
......@@ -38,28 +38,15 @@ class Model:
@staticmethod
def default_model_settings():
return {
"conv_pool": [
{
"filters": 40,
"kernel_size": [3, 3],
"pool_size": [2, 2],
},
{
"filters": 60,
"kernel_size": [3, 3],
"pool_size": [2, 2],
},
],
"lstm": [
100
],
"ctc_merge_repeated": True,
"use_peepholes": False,
"ctc_merge_repeated": False,
"dropout": False,
"solver": "Adam",
"ctc": "Default",
"l_rate": 1e-3,
"momentum": 0.9,
"layers": [],
"cudnn": True,
}
@staticmethod
......@@ -69,19 +56,11 @@ class Model:
params = str.split(",")
model = []
lstm_appeared = False
params_dict = {
"ctc_merge_repeated": True,
"use_peepholes": False,
"dropout": False,
"solver": "Adam",
"ctc": "Default",
"l_rate": 1e-3,
"momentum": 0.9,
"layers": model,
}
params_dict = Model.default_model_settings()
params_dict["layers"] = model
for param in params:
label, value = tuple(param.split("="))
flags = ["use_peepholes", "ctc_merge_repeated", "dropout"]
flags = ["use_peepholes", "ctc_merge_repeated", "dropout", "cudnn"]
strs = ["solver", "ctc"]
floats = ["l_rate", "momentum"]
if label in flags:
......@@ -194,6 +173,12 @@ class Model:
@staticmethod
def create(num_features, num_classes, model_settings, reuse_variables=False, threads=1):
# add new variables to model_settings
for key, value in Model.default_model_settings().iteritems():
if key not in model_settings:
print("Adding new model setting variable as default value: {}={}".format(key, value))
model_settings[key] = value
print("Creating tf graph with settings: %s" % model_settings)
graph = tf.Graph()
with graph.as_default():
......@@ -201,6 +186,11 @@ class Model:
config=tf.ConfigProto(intra_op_parallelism_threads=1,
inter_op_parallelism_threads=threads,
))
gpu_enabled = False
for d in session.list_devices():
if d.device_type == "GPU":
gpu_enabled = True
break
inputs = tf.placeholder(tf.float32, shape=(None, None, num_features), name="inputs")
batch_size = tf.shape(inputs)[0]
......@@ -260,54 +250,64 @@ class Model:
lstm_layers = [l for l in model_settings["layers"] if l['type'] == "lstm"]
if len(lstm_layers) > 0:
def get_lstm_cell(num_hidden, use_peepholes=model_settings["use_peepholes"]):
return LSTMBlockFusedCell(num_hidden,
forget_bias=1.0,
use_peepholes=use_peepholes,
reuse=reuse_variables,
#initializer=tf.initializers.random_uniform(-0.1, 0.1),
#cell_clip=20,
#proj_clip=20,
#activation=tf.sigmoid,
)
# for i, lstm in enumerate(lstm_layers):
# fw, bw = get_lstm_cell(lstm["hidden"]), get_lstm_cell(lstm["hidden"])
# (output_fw, output_bw), _ \
# = rnn.bidirectional_dynamic_rnn(fw, bw, rnn_inputs, lstm_seq_len,
# dtype=tf.float32, scope=scope.name + "BiRNN%d" % i)
#
# rnn_inputs = tf.concat((output_fw, output_bw), 2)
# output_size = lstm_layers[-1]["hidden"] * 2
# outputs = rnn_inputs
# Time major inputs required for lstm
time_major_inputs = tf.transpose(rnn_inputs, [1, 0, 2])
if len(lstm_layers) > 0:
for i, lstm in enumerate(lstm_layers):
if lstm["hidden"] != lstm_layers[0]["hidden"]:
raise Exception("Currently all lstm layers must have an equal number of hidden nodes. "
"Got {} != {}".format(lstm["hidden"], lstm_layers[0]["hidden"]))
# Time major inputs required for lstm
time_major_inputs = tf.transpose(rnn_inputs, [1, 0, 2])
def cpu_cudnn_compatible_lstm_backend(time_major_inputs):
def get_lstm_cell(num_hidden):
return cudnn_rnn.CudnnCompatibleLSTMCell(num_hidden, reuse=reuse_variables)
fw, bw = zip(*[(get_lstm_cell(lstm["hidden"]), get_lstm_cell(lstm["hidden"])) for lstm in lstm_layers])
time_major_outputs, output_fw, output_bw \
= tf.contrib.rnn.stack_bidirectional_dynamic_rnn(list(fw), list(bw), time_major_inputs,
sequence_length=lstm_seq_len,
dtype=tf.float32,
scope="{}cudnn_lstm/stack_bidirectional_rnn".format(scope.name),
time_major=True,
)
return time_major_outputs
# Create the Cudnn LSTM factory
rnn_lstm = cudnn_rnn.CudnnLSTM(len(lstm_layers), lstm_layers[0]["hidden"],
direction='bidirectional',
kernel_initializer=tf.initializers.random_uniform(-0.1, 0.1))
def gpu_cudnn_lstm_backend(time_major_inputs):
for i, lstm in enumerate(lstm_layers):
if lstm["hidden"] != lstm_layers[0]["hidden"]:
raise Exception("Currently all lstm layers must have an equal number of hidden nodes. "
"Got {} != {}".format(lstm["hidden"], lstm_layers[0]["hidden"]))
# TODO: Check if the models are loadable from meta Graph, maybe the next line fixed this
rnn_lstm._saveable_cls = cudnn_rnn.CudnnLSTMSaveable
# Create the Cudnn LSTM factory
rnn_lstm = cudnn_rnn.CudnnLSTM(len(lstm_layers), lstm_layers[0]["hidden"],
direction='bidirectional',
kernel_initializer=tf.initializers.random_uniform(-0.1, 0.1))
# Apply the lstm to the inputs
time_major_outputs, (output_h, output_c) = rnn_lstm(time_major_inputs)
# TODO: Check if the models are loadable from meta Graph, maybe the next line fixed this
rnn_lstm._saveable_cls = cudnn_rnn.CudnnLSTMSaveable
# Apply the lstm to the inputs
time_major_outputs, (output_h, output_c) = rnn_lstm(time_major_inputs)
return time_major_outputs
if model_settings["cudnn"]:
if gpu_enabled:
print("Using CUDNN LSTM backend on GPU")
time_major_outputs = gpu_cudnn_lstm_backend(time_major_inputs)
else:
print("Using CUDNN compatible LSTM backend on CPU")
time_major_outputs = cpu_cudnn_compatible_lstm_backend(time_major_inputs)
else:
raise Exception("Only cudnn based backend supported yet.")
# Set the output size
output_size = lstm_layers[-1]["hidden"] * 2
else:
output_size = lstm_num_features
outputs = rnn_inputs
time_major_outputs = tf.transpose(outputs, [1, 0, 2])
time_major_outputs = time_major_inputs
# flatten to (T * N, F) for matrix multiplication. This will be reversed later
time_major_outputs = tf.reshape(time_major_outputs, [-1, time_major_outputs.shape.as_list()[2]])
......@@ -404,7 +404,8 @@ class Model:
if "Adam" not in v.name and "beta1_power" not in v.name and "beta2_power" not in v.name
]
print(all_var_names)
print("Variables to Load: {}".format([v.name for v in all_var_names]))
saver = tf.train.Saver(all_var_names)
# Restore variables from disk.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment