pytorch tabular
class TabularModel(nn.Module):
def __init__(self, emb_szs, n_cont, out_sz, layers, p=0.5):
# Call the parent __init__
super().__init__()
# Set up the embedding, dropout, and batch normalization layer attributes
self.embeds = nn.ModuleList([nn.Embedding(ni, nf) for ni,nf in emb_szs])
self.emb_drop = nn.Dropout(p)
self.bn_cont = nn.BatchNorm1d(n_cont)
# Assign a variable to hold a list of layers
layerlist = []
# Assign a variable to store the number of embedding and continuous layers
n_emb = sum((nf for ni,nf in emb_szs))
n_in = n_emb + n_cont
# Iterate through the passed-in "layers" parameter (ie, [200,100]) to build a list of layers
for i in layers:
layerlist.append(nn.Linear(n_in,i))
layerlist.append(nn.ReLU(inplace=True))
layerlist.append(nn.BatchNorm1d(i))
layerlist.append(nn.Dropout(p))
n_in = i
layerlist.append(nn.Linear(layers[-1],out_sz))
# Convert the list of layers into an attribute
self.layers = nn.Sequential(*layerlist)
def forward(self, x_cat, x_cont):
# Extract embedding values from the incoming categorical data
embeddings = []
for i,e in enumerate(self.embeds):
embeddings.append(e(x_cat[:,i]))
x = torch.cat(embeddings, 1)
# Perform an initial dropout on the embeddings
x = self.emb_drop(x)
# Normalize the incoming continuous data
x_cont = self.bn_cont(x_cont)
x = torch.cat([x, x_cont], 1)
# Set up model layers
x = self.layers(x)
return x
# one hidden layer containing 50 neurons
model = TabularModel(emb_szs, conts.shape[1], 2, [50], p=0.4)
#train
criterion = nn.CrossEntropyLoss() # classification
#criterion = nn.MSELoss() for regression
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 300
losses = []
for i in range(epochs):
i+=1
y_pred = model(cat_train, con_train)
loss = criterion(y_pred, y_train)
losses.append(loss)
# a neat trick to save screen space:
if i%25 == 1:
print(f'epoch: {i:3} loss: {loss.item():10.8f}')
optimizer.zero_grad()
loss.backward()
optimizer.step()