About
A basic Dataset for numeric features from a Pandas dataframe. If there is enough memory the dataframe can be converted into a FloatTensor to speed up data access.
Dataframe consists of feature columns and target columns and a kfold column.
class NumDataset(torch.utils.data.Dataset):
def __init__(self, df, features = [], targets: list = None, kfolds=[0]):
if targets:
self.targets = torch.FloatTensor(df[df['kfold'].isin(kfolds)][targets].values.reshape(-1,len(targets)))
else:
self.targets = torch.zeros(len(targets))
# Convert Dataframe into FloatTensor for speed up
self.df = torch.FloatTensor(df[features].values.reshape(-1,len(features)))
def __len__(self):
return len(self.df)
def __getitem__(self, item):
return self.df[item], self.targets[item]
For dict usage in Module.forward define dataset like this:
.... def __getitem__(self, item):
return {'data': self.df[item], 'targets': self.targets[item]}
....
class MyModel(nn.Module):
....def forward(self, data, targets):
....
DataLoader in Fastai:
= NumDataset(df = train, features = FEATURES, targets=TARGETS, kfolds = [1,2,3,4])
train_ds = NumDataset(df = train, features = FEATURES, targets=TARGETS, kfolds = [0])
valid_ds
= DataLoader(train_ds, batch_size= BS_TRAIN, shuffle = True, num_workers = WORKERS)
train_dl = DataLoader(valid_ds, batch_size= BS_TEST, shuffle = False, num_workers = WORKERS)
valid_dl
= DataLoaders(train_dl, valid_dl).cuda() data
Training in Fastai:
= Learner(data, MYMODEL, loss_func=MYLOSS, metrics=MYMETRICS)
learn
1,1e-3) learn.fit_one_cycle(