Code

Here is a small glance of the code of our program:

import torch
import torch.nn as nn
from torch.nn import functional as F

#hyper parameters
batch_size = 32
chunk_size = 256
max_iters = 5000
eval_interval = 500
learning_rate = 3e-4
device = 'cuda' if torch.cuda.is_available() else 'cpu'
eval_iters = 200
n_embd = 384
n_head = 6
n_layer = 6
dropout = 0.2

# read it in to inspect it
with open(r'C:\Users\HP\Desktop\Study\Coding\Python\BIGGEST
PROJECT\input.txt', 'r', encoding='utf-8') as f:
text = f.read()

# here are all the unique characters that occur in this text
chars = sorted(list(set(text)))
vocab_size = len(chars)

#Encoder and decoder
stoi={ch:i for i,ch in enumerate(chars)}
itos={i:ch for i,ch in enumerate(chars)}
encode=lambda s:[stoi[c] for c in s]
decode=lambda l:''.join([itos[i] for i in l])
data = torch.tensor(encode(text), dtype=torch.long)

#Split up data into train and validation set
n=int(0.9*len(data))
train_data = data[:n]
val_data = data[n:]

#data loading
def get_batch(split):
# generate a small batch of data of inputs x and targets y
data = train_data if split == 'train' else val_data
ix = torch.randint(len(data) - chunk_size, (batch_size,))
x = torch.stack([data[i:i+chunk_size] for i in ix])
y = torch.stack([data[i+1:i+chunk_size+1] for i in ix])
x,y = x.to(device), y.to(device)
return x, y

@torch.no_grad()#to tell pytorch that everything in this function doesnot call backward(make it more memory efficient)
def estimate_loss():#finds average mean loss over multiple batchs so that the loss is much less noisy
out = {}
model.eval()
for split in ['train', 'val']:
losses = torch.zeros(eval_iters)
for k in range(eval_iters):
X, Y = get_batch(split)
logits, loss = model(X, Y)
losses[k] = loss.item()
out[split] = losses.mean()
model.train()
return out

class Head(nn.Module):
def _init_(self,head_size):
super()._init_()
self.key = nn.Linear(n_embd, head_size, bias=False)#key vector show what does it contain before
self.query = nn.Linear(n_embd, head_size, bias=False)#query vector show what we want to predict
self.value = nn.Linear(n_embd,head_size, bias=False)
self.register_buffer('tril',torch.tril(torch.ones(chunk_size,chunk_size)))
self.dropout = nn.Dropout(dropout)