# create two random vector of shape (100,2), (100,1)features, labels = (np.random.sample((100,2)), np.random.sample((100,1)))# make a dataset from a numpy arraydataset = tf.data.Dataset.from_tensor_slices((features,labels))
x = np.random.sample((100,2))# make a dataset from a numpy arraydataset = tf.data.Dataset.from_tensor_slices(x)# create the iteratoriter= dataset.make_one_shot_iterator()
# data sourcedata = np.random.sample((100, 2))# using a placeholder to retrieve input datax = tf.placeholder(tf.float32, shape=[None,2])dataset = tf.data.Dataset.from_tensor_slices(x)iter= dataset.make_initializable_iterator()# create the iterator, it's initializable but not initialized yetel =iter.get_next()with tf.Session()as sess:# feed the placeholder with data# iter.initializer defines the iterator's initialization op, run it with feeded data sess.run(iter.initializer, feed_dict = {x: data})# after the initialization, we can get the data iteratelyprint(sess.run(el)
当然,更普遍的情况是,我们希望它能够在train和test中来回切换,下面是一个稍微复杂点的例子:
# different input data sourcetrain_data = (np.random.sample((100,2)), np.random.sample((100,1)))test_data = (np.array([[1,2]]), np.array([[0]]))# two placeholder to unpack feature and labelx, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])dataset = tf.data.Dataset.from_tensor_slices((x, y))# use dataset to create initializable iteratoriter= dataset.make_initializable_iterator()features, labels =iter.get_next()with tf.Session()as sess:# initialise iterator with train data sess.run(iter.initializer, feed_dict={ x: train_data[0], y: train_data[1]})for _ inrange(10): sess.run([features, labels])# switch to test data sess.run(iter.initializer, feed_dict={ x: test_data[0], y: test_data[1]})print(sess.run([features, labels]))
# input datatrain_data = (np.random.sample((100,2)), np.random.sample((100,1)))test_data = (np.random.sample((10,2)), np.random.sample((10,1)))# create two datasets, one for training and one for testtrain_dataset = tf.data.Dataset.from_tensor_slices(train_data)test_dataset = tf.data.Dataset.from_tensor_slices(test_data)
# create a iterator of the correct shape and type# this iterator is generic, because it's none of dataset, we just indicate its type and shapeiter= tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes)
我们接下来定义iterator的两个初始化operation
# create the initialisation operationstrain_init_op =iter.make_initializer(train_dataset)test_init_op =iter.make_initializer(test_dataset)
# input datatrain_data = (np.random.sample((100,2)), np.random.sample((100,1)))test_data = (np.random.sample((10,2)), np.random.sample((10,1)))# create placeholderx, y = tf.placeholder(tf.float32, shape=[None,2]), tf.placeholder(tf.float32, shape=[None,1])# create two datasets, one for training and one for testtrain_dataset = tf.data.Dataset.from_tensor_slices((x,y))test_dataset = tf.data.Dataset.from_tensor_slices((x,y))# create two iterators from the datasettrain_iterator = train_dataset.make_initializable_iterator()test_iterator = test_dataset.make_initializable_iterator()
iter= tf.data.Iterator.from_string_handle(handle, train_dataset.output_types, train_dataset.output_shapes)# get data from generic iteratornext_elements =iter.get_next()
with tf.Session()as sess:# get two handle as the different iterators' identification name train_handle = sess.run(train_iterator.string_handle()) test_handle = sess.run(test_iterator.string_handle())# initialise iterators sess.run(train_iterator.initializer, feed_dict={ x: train_data[0], y: train_data[1]}) sess.run(test_iterator.initializer, feed_dict={ x: test_data[0], y: test_data[1]})# get datafor _ inrange(10):# feed train_handle to get data of train set x,y = sess.run(next_elements, feed_dict = {handle: train_handle})print(x, y)# feed test_handle to get data of test set x,y = sess.run(next_elements, feed_dict = {handle: test_handle})print(x,y)
# char_to_id is python dicttable = tf.contrib.lookup.HashTable( tf.contrib.lookup.KeyValueTensorInitializer( char_to_id.keys(), char_to_id.values()), # initialize value for hashtable char_to_id.get(u"<UNK>") # default value for not found key )
有了这个table,对一个char list形式的tensor,想得到它对应的id list:
id_list = table.lookup(char_list)
不过在实际使用的时候需要对table进行初始化操作的:
with tf.Session()as sess:# table.init return the initialization op of HashTable sess.run(table.init)