CS224W學習筆記
colab1
torch.ones
全1
torch.zeros
全0
torch.rand
0-1隨機
x.shape
shape
x.dtype
數據類型
zeros = torch.zeros(3, 4, dtype=torch.float32)
使用數據類型
zeros = zeros.type(torch.long)
改變數據類型
使用torch進行梯度下降:
def train(emb, loss_fn, sigmoid, train_label, train_edge):
# TODO: Train the embedding layer here. You can also change epochs and
# learning rate. In general, you need to implement:
# (1) Get the embeddings of the nodes in train_edge
# (2) Dot product the embeddings between each node pair
# (3) Feed the dot product result into sigmoid
# (4) Feed the sigmoid output into the loss_fn
# (5) Print both loss and accuracy of each epoch
# (6) Update the embeddings using the loss and optimizer
# (as a sanity check, the loss should decrease during training)
epochs = 500
learning_rate = 0.1
optimizer = SGD(emb.parameters(), lr=learning_rate, momentum=0.9)
for i in range(epochs):
############# Your code here ############
embeddings = emb(train_edge)
print(embeddings.shape)
dot_pro = torch.sum(embeddings[0] * embeddings[1],axis = 1)
#print(dot_pro.shape)
pred = sigmoid(dot_pro)
print(pred.shape)
loss = loss_fn(pred,train_label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
accu = accuracy(pred, train_label)
print(f"Epoch {i+1}/{epochs}, Loss: {loss.item():.4f}, Accuracy: {accu:.4f}")
#########################################
loss_fn = nn.BCELoss()
sigmoid = nn.Sigmoid()
print(pos_edge_index.shape)
# Generate the positive and negative labels
pos_label = torch.ones(pos_edge_index.shape[1], )
neg_label = torch.zeros(neg_edge_index.shape[1], )
# Concat positive and negative labels into one tensor
train_label = torch.cat([pos_label, neg_label], dim=0)
print(train_label.shape)
# Concat positive and negative edges into one tensor
# Since the network is very small, we do not split the edges into val/test sets
train_edge = torch.cat([pos_edge_index, neg_edge_index], dim=1)
print(train_edge.shape)
train(emb, loss_fn, sigmoid, train_label, train_edge)
每個epoch內進行:
optimizer.zero_grad()
loss.backward()
optimizer.step()
colab2
使用torch搭建GCN:
聲明一個 class GCN(torch.nn.Module):
,裏面運用nn的各種函數、卷積層、Batchnorm對tensor進行操作,最後操作同colab1
optimizer使用torch.optim.Adam
from torch_geometric.datasets import TUDataset
import torch
import os
if 'IS_GRADESCOPE_ENV' not in os.environ:
root = './enzymes'
name = 'ENZYMES'
# The ENZYMES dataset
pyg_dataset= TUDataset(root, name)
# You will find that there are 600 graphs in this dataset
print(pyg_dataset)
def get_num_classes(pyg_dataset):
# TODO: Implement a function that takes a PyG dataset object
# and returns the number of classes for that dataset.
num_classes = 0
############# Your code here ############
## (~1 line of code)
## Note
## 1. Colab autocomplete functionality might be useful.
num_classes = pyg_dataset.num_classes
#########################################
return num_classes
def get_num_features(pyg_dataset):
# TODO: Implement a function that takes a PyG dataset object
# and returns the number of features for that dataset.
num_features = 0
############# Your code here ############
## (~1 line of code)
## Note
## 1. Colab autocomplete functionality might be useful.
num_features = pyg_dataset.num_features
#########################################
return num_features
if 'IS_GRADESCOPE_ENV' not in os.environ:
num_classes = get_num_classes(pyg_dataset)
num_features = get_num_features(pyg_dataset)
print("{} dataset has {} classes".format(name, num_classes))
print("{} dataset has {} features".format(name, num_features))
def get_graph_class(pyg_dataset, idx):
# TODO: Implement a function that takes a PyG dataset object,
# an index of a graph within the dataset, and returns the class/label
# of the graph (as an integer).
label = -1
############# Your code here ############
## (~1 line of code)
label = pyg_dataset.y[idx]
#########################################
return label
# Here pyg_dataset is a dataset for graph classification
if 'IS_GRADESCOPE_ENV' not in os.environ:
graph_0 = pyg_dataset[0]
print(graph_0)
idx = 100
label = get_graph_class(pyg_dataset, idx)
print('Graph with index {} has label {}'.format(idx, label))
def get_graph_num_edges(pyg_dataset, idx):
# TODO: Implement a function that takes a PyG dataset object,
# the index of a graph in the dataset, and returns the number of
# edges in the graph (as an integer). You should not count an edge
# twice if the graph is undirected. For example, in an undirected
# graph G, if two nodes v and u are connected by an edge, this edge
# should only be counted once.
num_edges = 0
############# Your code here ############
## Note:
## 1. You can't return the data.num_edges directly
## 2. We assume the graph is undirected
## 3. Look at the PyG dataset built in functions
## (~4 lines of code)
graph = pyg_dataset[idx]
num_edges = graph.num_edges
#########################################
return num_edges
if 'IS_GRADESCOPE_ENV' not in os.environ:
idx = 200
num_edges = get_graph_num_edges(pyg_dataset, idx)
print('Graph with index {} has {} edges'.format(idx, num_edges))
import torch_geometric.transforms as T
from ogb.nodeproppred import PygNodePropPredDataset
if 'IS_GRADESCOPE_ENV' not in os.environ:
dataset_name = 'ogbn-arxiv'
# Load the dataset and transform it to sparse tensor
dataset = PygNodePropPredDataset(name=dataset_name,
transform=T.ToSparseTensor())
print('The {} dataset has {} graph'.format(dataset_name, len(dataset)))
# Extract the graph
data = dataset[0]
print(data)
def graph_num_features(data):
# TODO: Implement a function that takes a PyG data object,
# and returns the number of features in the graph (as an integer).
num_features = 0
############# Your code here ############
## (~1 line of code)
num_features = data.num_features
#########################################
return num_features
if 'IS_GRADESCOPE_ENV' not in os.environ:
num_features = graph_num_features(data)
print('The graph has {} features'.format(num_features))
import torch
import pandas as pd
import torch.nn.functional as F
print(torch.__version__)
# The PyG built-in GCNConv
from torch_geometric.nn import GCNConv
import torch_geometric.transforms as T
from ogb.nodeproppred import PygNodePropPredDataset, Evaluator
if 'IS_GRADESCOPE_ENV' not in os.environ:
dataset_name = 'ogbn-arxiv'
dataset = PygNodePropPredDataset(name=dataset_name,
transform=T.ToSparseTensor())
data = dataset[0]
# Make the adjacency matrix to symmetric
data.adj_t = data.adj_t.to_symmetric()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# If you use GPU, the device should be cuda
print('Device: {}'.format(device))
data = data.to(device)
split_idx = dataset.get_idx_split()
train_idx = split_idx['train'].to(device)
class GCN(torch.nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, num_layers,
dropout, return_embeds=False):
# TODO: Implement a function that initializes self.convs,
# self.bns, and self.softmax.
super(GCN, self).__init__()
# A list of GCNConv layers
self.convs = torch.nn.ModuleList()
# A list of 1D batch normalization layers
self.bns = torch.nn.ModuleList()
# The log softmax layer
self.softmax = None
############# Your code here ############
## Note:
## 1. You should use torch.nn.ModuleList for self.convs and self.bns
## 2. self.convs has num_layers GCNConv layers
## 3. self.bns has num_layers - 1 BatchNorm1d layers
## 4. You should use torch.nn.LogSoftmax for self.softmax
## 5. The parameters you can set for GCNConv include 'in_channels' and
## 'out_channels'. For more information please refer to the documentation:
## https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html#torch_geometric.nn.conv.GCNConv
## 6. The only parameter you need to set for BatchNorm1d is 'num_features'
## For more information please refer to the documentation:
## https://pytorch.org/docs/stable/generated/torch.nn.BatchNorm1d.html
## (~10 lines of code)
self.num_layers = num_layers
for i in range(num_layers-1):
self.convs.append(GCNConv(input_dim if i == 0 else hidden_dim,hidden_dim))
self.bns.append(torch.nn.BatchNorm1d(hidden_dim))
self.convs.append(GCNConv(hidden_dim,output_dim))
self.softmax = torch.nn.LogSoftmax(dim=1)
self.return_embeds = return_embeds
#########################################
# Probability of an element getting zeroed
self.dropout = dropout
# Skip classification layer and return node embeddings
self.return_embeds = return_embeds
def reset_parameters(self):
for conv in self.convs:
conv.reset_parameters()
for bn in self.bns:
bn.reset_parameters()
def forward(self, x, adj_t):
# TODO: Implement a function that takes the feature tensor x and
# edge_index tensor adj_t and returns the output tensor as
# shown in the figure.
out = None
############# Your code here ############
## Note:
## 1. Construct the network as shown in the figure
## 2. torch.nn.functional.relu and torch.nn.functional.dropout are useful
## For more information please refer to the documentation:
## https://pytorch.org/docs/stable/nn.functional.html
## 3. Don't forget to set F.dropout training to self.training
## 4. If return_embeds is True, then skip the last softmax layer
## (~7 lines of code)
#########################################
for i in range(self.num_layers-1):
x = self.convs[i].forward(x,adj_t)
x = self.bns[i].forward(x)
x = F.relu(x)
x = F.dropout(x,self.dropout,training=self.training)
out = self.convs[-1].forward(x,adj_t)
if not self.return_embeds:
#print(out[0])
out = self.softmax(out)
#print(out[0])
return out
def train(model, data, train_idx, optimizer, loss_fn):
# TODO: Implement a function that trains the model by
# using the given optimizer and loss_fn.
model.train()
loss = 0
############# Your code here ############
## Note:
## 1. Zero grad the optimizer
## 2. Feed the data into the model
## 3. Slice the model output and label by train_idx
## 4. Feed the sliced output and label to loss_fn
## (~4 lines of code)
optimizer.zero_grad()
out = model.forward(data.x,data.adj_t)
out = out[train_idx]#.argmax(dim=-1, keepdim=True)
label = data.y[train_idx].reshape(out.shape[0])
#print(data.y[:10])
loss = loss_fn(out,label)
#########################################
loss.backward()
optimizer.step()
return loss.item()
# Test function here
@torch.no_grad()
def test(model, data, split_idx, evaluator, save_model_results=False):
# TODO: Implement a function that tests the model by
# using the given split_idx and evaluator.
model.eval()
# The output of model on all data
out = None
############# Your code here ############
## (~1 line of code)
## Note:
## 1. No index slicing here
out = model.forward(data.x,data.adj_t)
#########################################
y_pred = out.argmax(dim=-1, keepdim=True)
train_acc = evaluator.eval({
'y_true': data.y[split_idx['train']],
'y_pred': y_pred[split_idx['train']],
})['acc']
valid_acc = evaluator.eval({
'y_true': data.y[split_idx['valid']],
'y_pred': y_pred[split_idx['valid']],
})['acc']
test_acc = evaluator.eval({
'y_true': data.y[split_idx['test']],
'y_pred': y_pred[split_idx['test']],
})['acc']
if save_model_results:
print ("Saving Model Predictions")
data = {}
data['y_pred'] = y_pred.view(-1).cpu().detach().numpy()
df = pd.DataFrame(data=data)
# Save locally as csv
df.to_csv('ogbn-arxiv_node.csv', sep=',', index=False)
return train_acc, valid_acc, test_acc
# Please do not change the args
if 'IS_GRADESCOPE_ENV' not in os.environ:
args = {
'device': device,
'num_layers': 3,
'hidden_dim': 256,
'dropout': 0.5,
'lr': 0.01,
'epochs': 500,
}
args
if 'IS_GRADESCOPE_ENV' not in os.environ:
model = GCN(data.num_features, args['hidden_dim'],
dataset.num_classes, args['num_layers'],
args['dropout']).to(device)
evaluator = Evaluator(name='ogbn-arxiv')
# Please do not change these args
# Training should take <10min using GPU runtime
import copy
if 'IS_GRADESCOPE_ENV' not in os.environ:
# reset the parameters to initial random value
model.reset_parameters()
optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
loss_fn = F.nll_loss
best_model = None
best_valid_acc = 0
for epoch in range(1, 1 + args["epochs"]):
loss = train(model, data, train_idx, optimizer, loss_fn)
result = test(model, data, split_idx, evaluator)
train_acc, valid_acc, test_acc = result
if valid_acc > best_valid_acc:
best_valid_acc = valid_acc
best_model = copy.deepcopy(model)
print(f'Epoch: {epoch:02d}, '
f'Loss: {loss:.4f}, '
f'Train: {100 * train_acc:.2f}%, '
f'Valid: {100 * valid_acc:.2f}% '
f'Test: {100 * test_acc:.2f}%')
下面用global_mean_pool的操作和上面差不多
from ogb.graphproppred import PygGraphPropPredDataset, Evaluator
from torch_geometric.data import DataLoader
from tqdm.notebook import tqdm
import torch
import os
if 'IS_GRADESCOPE_ENV' not in os.environ:
# Load the dataset
dataset = PygGraphPropPredDataset(name='ogbg-molhiv')
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device: {}'.format(device))
split_idx = dataset.get_idx_split()
# Check task type
print('Task type: {}'.format(dataset.task_type))
# Load the dataset splits into corresponding dataloaders
# We will train the graph classification task on a batch of 32 graphs
# Shuffle the order of graphs for training set
if 'IS_GRADESCOPE_ENV' not in os.environ:
train_loader = DataLoader(dataset[split_idx["train"]], batch_size=32, shuffle=True, num_workers=0)
valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=32, shuffle=False, num_workers=0)
test_loader = DataLoader(dataset[split_idx["test"]], batch_size=32, shuffle=False, num_workers=0)
if 'IS_GRADESCOPE_ENV' not in os.environ:
# Please do not change the args
args = {
'device': device,
'num_layers': 5,
'hidden_dim': 256,
'dropout': 0.5,
'lr': 0.001,
'epochs': 30,
}
args
from ogb.graphproppred.mol_encoder import AtomEncoder
from torch_geometric.nn import global_add_pool, global_mean_pool
### GCN to predict graph property
class GCN_Graph(torch.nn.Module):
def __init__(self, hidden_dim, output_dim, num_layers, dropout):
super(GCN_Graph, self).__init__()
# Load encoders for Atoms in molecule graphs
self.node_encoder = AtomEncoder(hidden_dim)
# Node embedding model
# Note that the input_dim and output_dim are set to hidden_dim
self.gnn_node = GCN(hidden_dim, hidden_dim,
hidden_dim, num_layers, dropout, return_embeds=True)
self.pool = None
############# Your code here ############
## Note:
## 1. Initialize self.pool as a global mean pooling layer
## For more information please refer to the documentation:
## https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html#global-pooling-layers
self.pool = global_mean_pool
#########################################
# Output layer
self.linear = torch.nn.Linear(hidden_dim, output_dim)
def reset_parameters(self):
self.gnn_node.reset_parameters()
self.linear.reset_parameters()
def forward(self, batched_data):
# TODO: Implement a function that takes as input a
# mini-batch of graphs (torch_geometric.data.Batch) and
# returns the predicted graph property for each graph.
#
# NOTE: Since we are predicting graph level properties,
# your output will be a tensor with dimension equaling
# the number of graphs in the mini-batch
# Extract important attributes of our mini-batch
x, edge_index, batch = batched_data.x, batched_data.edge_index, batched_data.batch
embed = self.node_encoder(x)
out = None
############# Your code here ############
## Note:
## 1. Construct node embeddings using existing GCN model
## 2. Use the global pooling layer to aggregate features for each individual graph
## For more information please refer to the documentation:
## https://pytorch-geometric.readthedocs.io/en/latest/modules/nn.html#global-pooling-layers
## 3. Use a linear layer to predict each graph's property
## (~3 lines of code)
node_embeddings = self.gnn_node(embed, edge_index)
pooled = self.pool(node_embeddings, batch)
out = self.linear(pooled)
#########################################
return out
def train(model, device, data_loader, optimizer, loss_fn):
# TODO: Implement a function that trains your model by
# using the given optimizer and loss_fn.
model.train()
loss = 0
for step, batch in enumerate(tqdm(data_loader, desc="Iteration")):
batch = batch.to(device)
if batch.x.shape[0] == 1 or batch.batch[-1] == 0:
pass
else:
## ignore nan targets (unlabeled) when computing training loss.
is_labeled = batch.y == batch.y
############# Your code here ############
## Note:
## 1. Zero grad the optimizer
## 2. Feed the data into the model
## 3. Use `is_labeled` mask to filter output and labels
## 4. You may need to change the type of label to torch.float32
## 5. Feed the output and label to the loss_fn
## (~3 lines of code)
optimizer.zero_grad()
output = model.forward(batch)
output = output[is_labeled]
labels = batch.y[is_labeled].float()
loss = loss_fn(output, labels)
#########################################
loss.backward()
optimizer.step()
return loss.item()
# The evaluation function
def eval(model, device, loader, evaluator, save_model_results=False, save_file=None):
model.eval()
y_true = []
y_pred = []
for step, batch in enumerate(tqdm(loader, desc="Iteration")):
batch = batch.to(device)
if batch.x.shape[0] == 1:
pass
else:
with torch.no_grad():
pred = model(batch)
y_true.append(batch.y.view(pred.shape).detach().cpu())
y_pred.append(pred.detach().cpu())
y_true = torch.cat(y_true, dim = 0).numpy()
y_pred = torch.cat(y_pred, dim = 0).numpy()
input_dict = {"y_true": y_true, "y_pred": y_pred}
if save_model_results:
print ("Saving Model Predictions")
# Create a pandas dataframe with a two columns
# y_pred | y_true
data = {}
data['y_pred'] = y_pred.reshape(-1)
data['y_true'] = y_true.reshape(-1)
df = pd.DataFrame(data=data)
# Save to csv
df.to_csv('ogbg-molhiv_graph_' + save_file + '.csv', sep=',', index=False)
return evaluator.eval(input_dict)
if 'IS_GRADESCOPE_ENV' not in os.environ:
model = GCN_Graph(args['hidden_dim'],
dataset.num_tasks, args['num_layers'],
args['dropout']).to(device)
evaluator = Evaluator(name='ogbg-molhiv')
# Please do not change these args
# Training should take <10min using GPU runtime
import copy
if 'IS_GRADESCOPE_ENV' not in os.environ:
model.reset_parameters()
optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'])
loss_fn = torch.nn.BCEWithLogitsLoss()
best_model = None
best_valid_acc = 0
for epoch in range(1, 1 + args["epochs"]):
print('Training...')
loss = train(model, device, train_loader, optimizer, loss_fn)
print('Evaluating...')
train_result = eval(model, device, train_loader, evaluator)
val_result = eval(model, device, valid_loader, evaluator)
test_result = eval(model, device, test_loader, evaluator)
train_acc, valid_acc, test_acc = train_result[dataset.eval_metric], val_result[dataset.eval_metric], test_result[dataset.eval_metric]
if valid_acc > best_valid_acc:
best_valid_acc = valid_acc
best_model = copy.deepcopy(model)
print(f'Epoch: {epoch:02d}, '
f'Loss: {loss:.4f}, '
f'Train: {100 * train_acc:.2f}%, '
f'Valid: {100 * valid_acc:.2f}% '
f'Test: {100 * test_acc:.2f}%')
順便一提GCN就是把周圍的信息aggregate到當前點再除以一下兩邊度數,非常naive