# create two random vector of shape (100,2), (100,1)
features, labels = (np.random.sample((100,2)), np.random.sample((100,1)))
# make a dataset from a numpy array
dataset = tf.data.Dataset.from_tensor_slices((features,labels))
x = np.random.sample((100,2))
# make a dataset from a numpy array
dataset = tf.data.Dataset.from_tensor_slices(x)
# create the iterator
iter = dataset.make_one_shot_iterator()
# data source
data = np.random.sample((100, 2))
# using a placeholder to retrieve input data
x = tf.placeholder(tf.float32, shape=[None,2])
dataset = tf.data.Dataset.from_tensor_slices(x)
iter = dataset.make_initializable_iterator() # create the iterator, it's initializable but not initialized yet
el = iter.get_next()
with tf.Session() as sess:
# feed the placeholder with data
# iter.initializer defines the iterator's initialization op, run it with feeded data
sess.run(iter.initializer, feed_dict = {x: data})
# after the initialization, we can get the data iterately
print(sess.run(el)
当然,更普遍的情况是,我们希望它能够在train和test中来回切换,下面是一个稍微复杂点的例子:
# different input data source
train_data = (np.random.sample((100,2)), np.random.sample((100,1)))
test_data = (np.array([[1,2]]), np.array([[0]]))
# two placeholder to unpack feature and label
x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])
dataset = tf.data.Dataset.from_tensor_slices((x, y))
# use dataset to create initializable iterator
iter = dataset.make_initializable_iterator()
features, labels = iter.get_next()
with tf.Session() as sess:
# initialise iterator with train data
sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1]})
for _ in range(10):
sess.run([features, labels])
# switch to test data
sess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1]})
print(sess.run([features, labels]))
# input data
train_data = (np.random.sample((100,2)), np.random.sample((100,1)))
test_data = (np.random.sample((10,2)), np.random.sample((10,1)))
# create two datasets, one for training and one for test
train_dataset = tf.data.Dataset.from_tensor_slices(train_data)
test_dataset = tf.data.Dataset.from_tensor_slices(test_data)
# create a iterator of the correct shape and type
# this iterator is generic, because it's none of dataset, we just indicate its type and shape
iter = tf.data.Iterator.from_structure(train_dataset.output_types,
train_dataset.output_shapes)
features, labels = iter.get_next()
with tf.Session() as sess:
sess.run(train_init_op) # switch to train dataset
for _ in range(10):
sess.run([features, labels])
sess.run(test_init_op) # switch to val dataset
print(sess.run([features, labels]))
# input data
train_data = (np.random.sample((100,2)), np.random.sample((100,1)))
test_data = (np.random.sample((10,2)), np.random.sample((10,1)))
# create placeholder
x, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])
# create two datasets, one for training and one for test
train_dataset = tf.data.Dataset.from_tensor_slices((x,y))
test_dataset = tf.data.Dataset.from_tensor_slices((x,y))
# create two iterators from the dataset
train_iterator = train_dataset.make_initializable_iterator()
test_iterator = test_dataset.make_initializable_iterator()
iter = tf.data.Iterator.from_string_handle(handle, train_dataset.output_types, train_dataset.output_shapes)
# get data from generic iterator
next_elements = iter.get_next()
with tf.Session() as sess:
# get two handle as the different iterators' identification name
train_handle = sess.run(train_iterator.string_handle())
test_handle = sess.run(test_iterator.string_handle())
# initialise iterators
sess.run(train_iterator.initializer, feed_dict={ x: train_data[0], y: train_data[1]})
sess.run(test_iterator.initializer, feed_dict={ x: test_data[0], y: test_data[1]})
# get data
for _ in range(10):
# feed train_handle to get data of train set
x,y = sess.run(next_elements, feed_dict = {handle: train_handle})
print(x, y)
# feed test_handle to get data of test set
x,y = sess.run(next_elements, feed_dict = {handle: test_handle})
print(x,y)
# char_to_id is python dict
table = tf.contrib.lookup.HashTable(
tf.contrib.lookup.KeyValueTensorInitializer(
char_to_id.keys(), char_to_id.values()), # initialize value for hashtable
char_to_id.get(u"<UNK>") # default value for not found key
)
有了这个table,对一个char list形式的tensor,想得到它对应的id list:
id_list = table.lookup(char_list)
不过在实际使用的时候需要对table进行初始化操作的:
with tf.Session() as sess:
# table.init return the initialization op of HashTable
sess.run(table.init)