I use a Keras data generator initializing shuffle to false by default:
class data_generator(keras.utils.Sequence):
def __init__(self, frames, labels, batch_size, data_dir, shuffle=False):
'Initialization'
self.batch_size = batch_size
self.labels = labels
self.frames = frames
self.data_dir = data_dir
self.shuffle = shuffle
self.size = len(self.frames)
self.on_epoch_end()
...
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.frames))
if self.shuffle == True:
np.random.shuffle(self.indexes)
...
And this is how I create instances for training and validation:
train_generator = data_generator(x_train[:num_train_examples], y_train[:num_train_examples], batch_size, data_dir)
val_generator = data_generator(x_train[num_train_examples:], y_train[num_train_examples:], batch_size, data_dir)
And then train the model:
model.fit_generator(train_generator,
validation_data=val_generator,
callbacks=[history],
epochs=num_epochs)
But the generator keeps producing random indexes:
starting training
Epoch 1/1
batch start: 0, batch end: 2
batch start: 24, batch end: 26
batch start: 2, batch end: 4
batch start: 114, batch end: 116
batch start: 4, batch end: 6
batch start: 60, batch end: 62
batch start: 6, batch end: 8
batch start: 68, batch end: 70
batch start: 8, batch end: 10
batch start: 94, batch end: 96
What can I do to make it not to shuffle?
A getitem function from the generator class:
def __getitem__(self, index):
'Generate one batch of data'
x_batch, y_batch = self.__data_generation(index)
return x_batch, y_batch
def __data_generation(self, index):
'Generates data containing batch_size samples'
limit = min(self.size, (index + 1)*self.batch_size)
x_batch = []
print('\nbatch start: ' + str(index*self.batch_size) + ', batch end: ' + str(limit))
for frame in self.frames[index*self.batch_size:limit]:
video_array = np.load(self.data_dir + '/' + frame + '.npy')
x_batch.append(np.array(video_array))
return np.array(x_batch), self.labels[index*self.batch_size:limit]
EDIT: Now I can see the pattern, looks like non-random batches alternate with random ones
I am assuming the problem might be in your __len__(self)
function (if you have defined it that is). I added the __len__(self)
function to your code and tried, it does not shuffle now. The code is here:
class data_generator(keras.utils.Sequence):
def __init__(self, frames, labels, batch_size, data_dir, shuffle=False):
'Initialization'
self.batch_size = batch_size
self.labels = labels
self.frames = frames
self.data_dir = data_dir
self.shuffle = shuffle
self.size = len(self.frames)
self.on_epoch_end()
def __len__(self):
return int(np.ceil(self.size/self.batch_size))
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.frames))
if self.shuffle == True:
np.random.shuffle(self.indexes)
def __getitem__(self, index):
'Generate one batch of data'
x_batch, y_batch = self.__data_generation(index)
return x_batch, y_batch
# def __data_generation(self, index):
# 'Generates data containing batch_size samples'
# current_indices = self.indexes[index*self.batch_size:(index + 1)*self.batch_size]
# x_batch = []
# y_batch = []
# for idx in current_indices:
# # video_array = np.load(self.data_dir + '/' + self.frames[idx] + '.npy')
# # x_batch.append(np.array(video_array))
# y_batch.append(self.labels[idx])
# return np.array(x_batch), y_batch
def __data_generation(self, index):
'Generates data containing batch_size samples'
limit = min(self.size, (index + 1)*self.batch_size)
x_batch = []
print('\nbatch start: ' + str(index*self.batch_size) + ', batch end: ' + str(limit))
for frame in self.frames[index*self.batch_size:limit]:
video_array = np.load(self.data_dir + '/' + frame + '.npy')
x_batch.append(np.array(video_array))
return np.array(x_batch), self.labels[index*self.batch_size:limit]
The above code works as you expected, it does not shuffle. However, the way you have defined your __data_generation function, it does not work if you want it to shuffle. Therefore, I wrote my own __data_generation
function that you can see commented out. If you use this, you can get the functionality you desire. If shuffle is True, it will shuffle. If shuffle is False, it won't shuffle. Hope it helps.
Collected from the Internet
Please contact [email protected] to delete if infringement.
Comments