我正在尝试遵循https://www.tensorflow.org/tutorials/seq2seq中的tensorflow教程.
数据似乎加载正常,但是当我初始化模型时,我收到以下错误:
Traceback (most recent call last): File "/Users//PycharmProjects/tensorflow_chatbot/execute.py", line 334, in train() File "/Users/ /PycharmProjects/tensorflow_chatbot/execute.py", line 151, in train model = create_model(sess, False) File "/Users/ /PycharmProjects/tensorflow_chatbot/execute.py", line 113, in create_model forward_Only=forward_only) File "/Users/ /PycharmProjects/tensorflow_chatbot/seq2seq_model_tf.py", line 181, in __init__ softmax_loss_function=softmax_loss_function) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/site-packages/tensorflow/contrib/legacy_seq2seq/python/ops/seq2seq.py", line 1206, in model_with_buckets decoder_inputs[:bucket[1]]) File "/Users/ /PycharmProjects/tensorflow_chatbot/seq2seq_model_tf.py", line 180, in lambda x, y: seq2seq_f(x, y, False), File "/Users/ /PycharmProjects/tensorflow_chatbot/seq2seq_model_tf.py", line 144, in seq2seq_f dtype=dtype) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/site-packages/tensorflow/contrib/legacy_seq2seq/python/ops/seq2seq.py", line 848, in embedding_attention_seq2seq encoder_cell = copy.deepcopy(cell) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 174, in deepcopy y = copier(memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/site-packages/tensorflow/python/layers/base.py", line 476, in __deepcopy__ setattr(result, k, copy.deepcopy(v, memo)) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy y = copier(x, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 230, in _deepcopy_list y.append(deepcopy(a, memo)) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy y = _reconstruct(x, rv, 1, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct state = deepcopy(state, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy y = copier(x, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict y[deepcopy(key, memo)] = deepcopy(value, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy y = _reconstruct(x, rv, 1, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct state = deepcopy(state, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy y = copier(x, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict y[deepcopy(key, memo)] = deepcopy(value, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy y = _reconstruct(x, rv, 1, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct state = deepcopy(state, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy y = copier(x, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict y[deepcopy(key, memo)] = deepcopy(value, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy y = _reconstruct(x, rv, 1, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct state = deepcopy(state, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy y = copier(x, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict y[deepcopy(key, memo)] = deepcopy(value, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy y = copier(x, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict y[deepcopy(key, memo)] = deepcopy(value, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy y = copier(x, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 230, in _deepcopy_list y.append(deepcopy(a, memo)) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy y = _reconstruct(x, rv, 1, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct state = deepcopy(state, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy y = copier(x, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict y[deepcopy(key, memo)] = deepcopy(value, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy y = _reconstruct(x, rv, 1, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct state = deepcopy(state, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy y = copier(x, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict y[deepcopy(key, memo)] = deepcopy(value, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy y = _reconstruct(x, rv, 1, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct state = deepcopy(state, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy y = copier(x, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict y[deepcopy(key, memo)] = deepcopy(value, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy y = copier(x, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 230, in _deepcopy_list y.append(deepcopy(a, memo)) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy y = copier(x, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 237, in _deepcopy_tuple y.append(deepcopy(a, memo)) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy y = copier(x, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict y[deepcopy(key, memo)] = deepcopy(value, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy y = _reconstruct(x, rv, 1, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 334, in _reconstruct state = deepcopy(state, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 163, in deepcopy y = copier(x, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 257, in _deepcopy_dict y[deepcopy(key, memo)] = deepcopy(value, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 190, in deepcopy y = _reconstruct(x, rv, 1, memo) File "/Users/ /anaconda/envs/tensorflow_source_gpu/lib/python2.7/copy.py", line 343, in _reconstruct y.__dict__.update(state) AttributeError: 'NoneType' object has no attribute 'update'
如果我将_buckets
变量translate.py
改为只有1个元组(哪一个没关系),那么没有问题,但是训练效果不好.任何超过1个元组都会导致此错误.除了更改本地项目的文件名之外,未在https://github.com/tensorflow/models/tree/master/tutorials/rnn/translate上的github上找到的文件进行任何更改.
__init__
导致该区域的功能的开头如下:
def __init__(self, source_vocab_size, target_vocab_size, buckets, size, num_layers, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor, use_lstm=False, num_samples=512, forward_Only=False, dtype=tf.float32): """Create the model. Args: source_vocab_size: size of the source vocabulary. target_vocab_size: size of the target vocabulary. buckets: a list of pairs (I, O), where I specifies maximum input length that will be processed in that bucket, and O specifies maximum output length. Training instances that have inputs longer than I or outputs longer than O will be pushed to the next bucket and padded accordingly. We assume that the list is sorted, e.g., [(2, 4), (8, 16)]. size: number of units in each layer of the model. num_layers: number of layers in the model. max_gradient_norm: gradients will be clipped to maximally this norm. batch_size: the size of the batches used during training; the model construction is independent of batch_size, so it can be changed after initialization if this is convenient, e.g., for decoding. learning_rate: learning rate to start with. learning_rate_decay_factor: decay learning rate by this much when needed. use_lstm: if true, we use LSTM cells instead of GRU cells. num_samples: number of samples for sampled softmax. forward_only: if set, we do not construct the backward pass in the model. dtype: the data type to use to store internal variables. """ self.source_vocab_size = source_vocab_size self.target_vocab_size = target_vocab_size self.buckets = buckets self.batch_size = batch_size self.learning_rate = tf.Variable( float(learning_rate), trainable=False, dtype=dtype) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) # If we use sampled softmax, we need an output projection. output_projection = None softmax_loss_function = None # Sampled softmax only makes sense if we sample less than vocabulary size. if 01: cell = tf.contrib.rnn.MultiRNNCell([single_cell() for _ in range(num_layers)]) # The seq2seq function: we use embedding for the input and attention. def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): return tf.contrib.legacy_seq2seq.embedding_attention_seq2seq( encoder_inputs, decoder_inputs, cell, num_encoder_symbols=source_vocab_size, num_decoder_symbols=target_vocab_size, embedding_size=size, output_projection=output_projection, feed_previous=do_decode, dtype=dtype) # Feeds for inputs. self.encoder_inputs = [] self.decoder_inputs = [] self.target_weights = [] for i in xrange(buckets[-1][0]): # Last bucket is the biggest one. self.encoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i))) for i in xrange(buckets[-1][1] + 1): self.decoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i))) self.target_weights.append(tf.placeholder(dtype, shape=[None], name="weight{0}".format(i))) # Our targets are decoder inputs shifted by one. targets = [self.decoder_inputs[i + 1] for i in xrange(len(self.decoder_inputs) - 1)] # Training outputs and losses. if forward_only: self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, True), softmax_loss_function=softmax_loss_function) # If we use output projection, we need to project outputs for decoding. if output_projection is not None: for b in xrange(len(buckets)): self.outputs[b] = [ tf.matmul(output, output_projection[0]) + output_projection[1] for output in self.outputs[b] ] else: self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets( self.encoder_inputs, self.decoder_inputs, targets, self.target_weights, buckets, lambda x, y: seq2seq_f(x, y, False), softmax_loss_function=softmax_loss_function) # this is where the error occurrs
我需要做些什么才能让它发挥作用?使用tensorflow版本1.2
更新:这已经过测试,从源代码构建的tensorflow和Mac OSX Sierra上的Pip包构建,同样的问题正在发生
正如我已经在这里评论的那样,你试图实现的模型已被弃用.如果你想让它工作,请检查我在问题中粘贴的代码.从tensorflow 1.1和1.2开始,您可以使用动态解码功能tf.nn.bidirectional_dynamic_rnn
.它允许您免费考虑动态大小的序列.
我正在创建一些示例,我将使用新的api向您发布一个工作示例.