About
A basic Dataset for numeric features from a Pandas dataframe. If there is enough memory the dataframe can be converted into a FloatTensor to speed up data access.
Dataframe consists of feature columns and target columns and a kfold column.
class NumDataset(torch.utils.data.Dataset):
def __init__(self, df, features = [], targets: list = None, kfolds=[0]):
if targets:
self.targets = torch.FloatTensor(df[df['kfold'].isin(kfolds)][targets].values.reshape(-1,len(targets)))
else:
self.targets = torch.zeros(len(targets))
# Convert Dataframe into FloatTensor for speed up
self.df = torch.FloatTensor(df[features].values.reshape(-1,len(features)))
def __len__(self):
return len(self.df)
def __getitem__(self, item):
return self.df[item], self.targets[item]
For dict usage in Module.forward define dataset like this:
....
def __getitem__(self, item):
return {'data': self.df[item], 'targets': self.targets[item]}
....
class MyModel(nn.Module):
....
def forward(self, data, targets):
....DataLoader in Fastai:
train_ds = NumDataset(df = train, features = FEATURES, targets=TARGETS, kfolds = [1,2,3,4])
valid_ds = NumDataset(df = train, features = FEATURES, targets=TARGETS, kfolds = [0])
train_dl = DataLoader(train_ds, batch_size= BS_TRAIN, shuffle = True, num_workers = WORKERS)
valid_dl = DataLoader(valid_ds, batch_size= BS_TEST, shuffle = False, num_workers = WORKERS)
data = DataLoaders(train_dl, valid_dl).cuda() Training in Fastai:
learn = Learner(data, MYMODEL, loss_func=MYLOSS, metrics=MYMETRICS)
learn.fit_one_cycle(1,1e-3)