AI in Python - Ghostwriter made a bug

Question:
Name data is not defined. How do I define it? Ghostwriter made this code and I put it verbatim into my Repl. I then did pip install torch. Starting on line 64.

Repl link:
https://replit.com/@CoderElijah/ai#main.py

Code
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# define the neural network architecture
class Net(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim):
        super(Net, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x, hidden):
        x = self.embedding(x)
        x, hidden = self.lstm(x.unsqueeze(1), hidden)
        x = self.fc(x.squeeze(1))
        return x, hidden

# define the training function
def train(model, iterator, optimizer, criterion, device):
    model.train()
    hidden = (torch.zeros(1, 1, model.hidden_dim).to(device),
              torch.zeros(1, 1, model.hidden_dim).to(device))
    for batch in iterator:
        optimizer.zero_grad()
        text = batch.text.to(device)
        target = batch.target.to(device)
        output, hidden = model(text, hidden)
        loss = criterion(output.view(-1, model.vocab_size), target.view(-1))
        loss.backward()
        optimizer.step()

# define the testing function
def test(model, iterator, criterion, device):
    model.eval()
    hidden = (torch.zeros(1, 1, model.hidden_dim).to(device),
              torch.zeros(1, 1, model.hidden_dim).to(device))
    with torch.no_grad():
        loss = 0
        for batch in iterator:
            text = batch.text.to(device)
            target = batch.target.to(device)
            output, hidden = model(text, hidden)
            loss += criterion(output.view(-1, model.vocab_size), target.view(-1)).item()
        loss /= len(iterator)
    return loss

# define the main function
def main():
    # set up the hyperparameters
    train_file = "train.txt"
    test_file = "test.txt"
    batch_size = 16
    embedding_dim = 128
    hidden_dim = 256
    num_epochs = 10
    lr = 0.001

    # set up the device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # set up the field and the dataset
    TEXT = data.Field(tokenize=lambda x: x.split(), lower=True)
    train_data, test_data = data.TabularDataset.splits(
        path='', train=train_file, test=test_file, format='csv',
        fields=[('text', TEXT), ('target', TEXT)])
    TEXT.build_vocab(train_data)

    # set up the iterators
    train_iterator, test_iterator = data.BucketIterator.splits(
        (train_data, test_data), batch_size=batch_size, device=device)

    # set up the model, criterion and optimizer
    model = Net(len(TEXT.vocab), embedding_dim, hidden_dim).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # train the model
    for epoch in range(num_epochs):
        train(model, train_iterator, optimizer, criterion, device)
        train_loss = test(model, train_iterator, criterion, device)
        test_loss = test(model, test_iterator, criterion, device)
        print(f'Epoch: {epoch+1:02}  | Train Loss: {train_loss:.3f}  | Test Loss: {test_loss:.3f}')

if __name__ == '__main__':
    main()

@CoderElijah Maybe it’s afraid of being replaced
I did do some editing on the code (a.k.a. I played with it for a little while on VS Code) and I think it may be fixed:

New code
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchtext.legacy import data

# define the neural network architecture
class Net(nn.Module):
    # rest of the code remains the same

# define the training function
def train(model, iterator, optimizer, criterion, device):
    # rest of the code remains the same

# define the testing function
def test(model, iterator, criterion, device):
    # rest of the code remains the same

# define the main function
def main():
    # rest of the code remains the same

    # set up the field and the dataset
    TEXT = data.Field(tokenize=lambda x: x.split(), lower=True)
    train_data, test_data = data.TabularDataset.splits(
        path='', train=train_file, test=test_file, format='csv',
        fields=[('text', TEXT), ('target', TEXT)])
    TEXT.build_vocab(train_data)

    # set up the iterators
    train_iterator, test_iterator = data.BucketIterator.splits(
        (train_data, test_data), batch_size=batch_size, device=device)

    # rest of the code remains the same

if __name__ == '__main__':
    main()

Can you just give a complete code instead of me trying to cobble your incomplete code into the original?

I tried adding import torch.utils.data as data but get this new error:

Traceback (most recent call last):
  File "main.py", line 88, in <module>
    main()
  File "main.py", line 65, in main
    TEXT = data.Field(tokenize=lambda x: x.split(), lower=True)
AttributeError: module 'torch.utils.data' has no attribute 'Field'

I also tried torchtext.data. I found out that it may not work anymore.

First of all, make sure to install CPU torch (and with some suffering on free) you will manage to install it, but the storage is so small that I am not sure you will manage to install also torchtext

Did not do that.

Already installed torch and torchtext with plenty of room to spare.
image

1 Like

on free? i did not manage, i was forced to use CPU or it would run out of storage during the installation … maybe replit changes something in the meantime

I had to use pip because poetry crashed and reverted the changes. But yes, I use the free tier. I got Ghostwriter through the trial.