当前位置:网站首页>刘二大人。Pytorch深度学习实践学习。
刘二大人。Pytorch深度学习实践学习。
2022-07-19 05:20:00 【刚学编程的小白( •̥́ ˍ •̀ू )】
Pytorch深度学习实践
学习b站刘二大人Pytorch深度学习的视频。
1、线性模型
import numpy as np
import matplotlib.pylab as plt
# 穷举法
x_data = [1.0,2.0,3.0]
y_data = [2.0,4.0,6.0]
def forward(x):
return x * w
def loss(x,y):
y_pred = forward(x)
return (y_pred-y) * (y_pred-y)
w_list = []
mse_list = []
for w in np.arange(0.0,4.1,0.1):
print('w=',w)
l_sum = 0
for x_val,y_val in zip(x_data,y_data):
y_pred_val = forward(x_val)
loss_val = loss(x_val,y_val)
l_sum += loss_val
print('\t',x_val,y_val,y_pred_val,loss_val)
print('MSE=',l_sum / 3)
w_list.append(w)
mse_list.append(l_sum / 3)
plt.plot(w_list,mse_list)
plt.ylabel('Loss')
plt.xlabel('w')
plt.show()
2、梯度下降算法
import matplotlib.pyplot as plt
# 梯度下降法
x_data = [1.0,2.0,3.0]
y_data = [2.0,4.0,6.0]
w = 1.0
def forward(x):
return x * w
# MSE
def cost(xs,ys):
cost = 0
for x,y in zip(xs,ys):
y_pred = forward(x)
cost += (y_pred - y) ** 2
return cost / len(ys)
def gradient(xs,ys):
grad = 0
for x,y in zip(xs,ys):
grad +=2 * x * (x * w - y)
return grad / len(xs)
epoch_list = []
cost_list = []
print('Predict (before training)',4,forward(4))
# 进行100次训练
for epoch in range(100):
cost_val = cost(x_data,y_data)
grad_val = gradient(x_data,y_data)
w -= 0.01 * grad_val # 0.01是学习速率
print('Epoch:',epoch,'w=',w,'loss=',cost_val)
epoch_list.append(epoch)
cost_list.append(cost_val)
print('Predict (after training)',4,forward(4))
plt.plot(epoch_list,cost_list)
plt.ylabel('cost')
plt.xlabel('epoch')
plt.show()
import matplotlib.pyplot as plt
# 随机梯度下降法
x_data = [1.0,2.0,3.0]
y_data = [2.0,4.0,6.0]
w = 1.0
def forward(x):
return x * w
def loss(x,y):
y_pred = forward(x)
return (y_pred - y) ** 2
def gradient(x,y):
return 2 * x * (x * w - y)
epoch_list = []
loss_list = []
print('Predict (before training)',4,forward(4))
# 进行100次训练
for epoch in range(100):
for x,y in zip(x_data,y_data):
grad = gradient(x,y)
w = w - 0.01 * grad
# print("\tgrad:",x,y,grad)
l = loss(x,y)
print("第", epoch+1,"次训练:", "w=", w, "loss=", l)
epoch_list.append(epoch)
loss_list.append(l)
print('Predict (after training)',4,forward(4))
plt.plot(epoch_list,loss_list)
plt.ylabel('loss')
plt.xlabel('epoch')
plt.show()
3、反向传播
# 反向传播
# w是Tensor(张量类型),Tensor中包含data和grad,data和grad也是Tensor。
# grad初始为None,调用l.backward()方法后 w.grad为Tensor,故更新w.data时需使用w.grad.data。
# 如果w需要计算梯度,那构建的计算图中,跟w相关的tensor都默认需要计算梯度。
import torch
x_data = [1.0,2.0,3.0]
y_data = [2.0,4.0,6.0]
w = torch.tensor([1.0])
w.requires_grad=True # 需要计算梯度
def forward(x):
return x * w
def loss(x,y):
y_pred = forward(x)
return (y_pred - y) ** 2
print('Predict (before training)',4,forward(4).item())
for epoch in range(100):
for x,y in zip(x_data,y_data):
l = loss(x,y)
l.backward() # 计算梯度。调用该方法后w.grad由None更新为Tensor类型,且w.grad.data的值用于后续w.data的更新。
print('\tgrad',x,y,w.grad.item()) # 使用item()变成标量
w.data = w.data - 0.01 * w.grad.data
w.grad.data.zero_() # 清零
print('progress:', epoch, l.item()) # 取出loss使用l.item,不要直接使用l(l是tensor会构建计算图)
print('Predict (after training)',4,forward(4).item())
4、实现线性回归
import torch
# Pytorch实现线性回归
# x,y是矩阵,3行1列 也就是说总共有3个数据,每个数据只有1个特征。
x_data = torch.Tensor([[1.0],[2.0],[3.0]])
y_data = torch.Tensor([[2.0],[4.0],[6.0]])
# Module实现了函数__call__(),call()里面有一条语句是要调用forward()。
# 所以需要重写forward()覆盖掉父类中的forward()。
class LinearModel(torch.nn.Module):
def __init__(self):
super(LinearModel, self).__init__()
# (1,1)是指输入x和输出y的特征维度,这里数据集中的x和y的特征都是1维的。
self.linear = torch.nn.Linear(1,1)
def forward(self,x):
y_pred = self.linear(x)
return y_pred
model = LinearModel()
criterion = torch.nn.MSELoss(size_average=False) # 不计算平均值。
optimizer = torch.optim.SGD(model.parameters(),lr=0.01)
for epoch in range(1000):
# forward体现是通过以下语句实现的。
y_pred = model(x_data)
loss = criterion(y_pred,y_data)
print('progress : ',epoch,loss.item())
optimizer.zero_grad()
loss.backward() # 反向传播,计算梯度
optimizer.step() # 更新w和b的值
print('w = ',model.linear.weight.item())
print('b = ',model.linear.bias.item())
x_test = torch.Tensor([[4.0]])
y_test = model(x_test)
print('y_pred = ',y_test.data)
5、逻辑斯蒂回归
# 逻辑斯蒂回归
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
# 1. Prepare Dataset
x_data = torch.Tensor([[1.0], [2.0], [3.0]])
y_data = torch.Tensor([[0], [0], [1]])
# 2. Define Model
class LogisticRegressionModel(torch.nn.Module):
def __init__(self):
super(LogisticRegressionModel, self).__init__()
self.linear = torch.nn.Linear(1,1)
def forward(self,x):
y_pred = F.sigmoid(self.linear(x))
return y_pred
model = LogisticRegressionModel()
# 3. Construct Loss and Optimizer
# 默认情况下,loss会基于element平均,如果size_average=False的话,loss会被累加.
criterion = torch.nn.BCELoss(size_average=False)
optimizer = torch.optim.SGD(model.parameters(),lr=0.01)
# 4. Training Cycle
for epoch in range(100):
y_pred = model(x_data)
loss = criterion(y_pred,y_data)
print('progress:', epoch, loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
x = np.linspace(0,10,200) # 0-10之间取200个点。
x_t = torch.Tensor(x).view((200,1)) # 维度变换
y_t = model(x_t)
y = y_t.data.numpy()
plt.plot(x,y)
# plt.plot([0.10],[0.5,0.5],c='r')
plt.xlabel('Hours')
plt.ylabel('Probability of Pass')
plt.grid() # 显示网格线 1=True=默认显示;0=False=不显示
plt.show()
6、处理多维特征的输入
# 处理多维特征的输入
import numpy as np
import torch
import matplotlib.pyplot as plt
xy = np.loadtxt('../dataset01/diabetes.csv.gz',delimiter=',',dtype=np.float32)
x_data = torch.from_numpy(xy[:, :-1]) # 第一个‘:’是指读取所有行,第二个‘:’是指从第一列开始,最后一列不要.
print("input data.shape",x_data.shape)
y_data = torch.from_numpy(xy[:, [-1]]) # [-1] 最后得到的是个矩阵.取最后一列.
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear1 = torch.nn.Linear(8,6)
self.linear2 = torch.nn.Linear(6,4)
self.linear3 = torch.nn.Linear(4,1)
self.sigmoid = torch.nn.Sigmoid()
def forward(self,x):
x = self.sigmoid(self.linear1(x))
x = self.sigmoid(self.linear2(x))
x = self.sigmoid(self.linear3(x))
return x
model = Model()
criterion = torch.nn.BCELoss(reduction='mean')
optimizer = torch.optim.SGD(model.parameters(),lr=0.1)
epoch_list = []
loss_list = []
for epoch in range(100):
y_pred = model(x_data)
loss = criterion(y_pred,y_data)
print(epoch,loss.item())
epoch_list.append(epoch)
loss_list.append(loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
plt.plot(epoch_list,loss_list)
plt.ylabel('loss')
plt.xlabel('epoch')
plt.show()
7、加载数据集
import torch
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
# 加载数据集
# 1.prepare dataset
# init,getitem,len魔法函数。分别是加载数据集,获取数据索引,获取数据总量。
class DiabetesDatset(Dataset):
def __init__(self,filepath):
xy = np.loadtxt(filepath,delimiter=',',dtype=np.float32)
self.len = xy.shape[0] # shape(多少行,多少列)
self.x_data = torch.from_numpy(xy[:, :-1])
self.y_data = torch.from_numpy(xy[:, [-1]])
def __getitem__(self, index):
return self.x_data[index],self.y_data[index]
def __len__(self):
return self.len
dataset = DiabetesDatset('../dataset01/diabetes.csv.gz')
train_loader = DataLoader(dataset=dataset,batch_size=32,shuffle=True,num_workers=2)
# 2.design model using class
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
self.linear1 = torch.nn.Linear(8,6)
self.linear2 = torch.nn.Linear(6,4)
self.linear3 = torch.nn.Linear(4,1)
self.sigmoid = torch.nn.Sigmoid()
def forward(self,x):
x = self.sigmoid(self.linear1(x))
x = self.sigmoid(self.linear2(x))
x = self.sigmoid(self.linear3(x))
return x
model = Model()
# 3.construct loss and optimizer
criterion = torch.nn.BCELoss(reduction='mean') # reduction = 'mean',返回loss的平均值.
optimizer = torch.optim.SGD(model.parameters(),lr=0.01)
# 4.training cycle forward, backward, update
if __name__ == '__main__':
for epoch in range(100):
for i,data in enumerate(train_loader,0):
inputs,labels = data
y_pred = model(inputs)
loss = criterion(y_pred,labels)
print(epoch,i,loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
8、多分类问题
# 多分类问题
# softmax作用:如果在进行softmax前的input有负数,通过指数变换,得到正数;所有类的概率求和为1。
# CrossEntropyLoss <==> LogSoftmax + NLLLoss
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
batch_size = 64
transform = transforms.Compose([transforms.ToTensor(), # 将原始图像PIL变为张量tensor(H*W*C),再将[0,255]区间转换为[0.1,1.0]
transforms.Normalize((0.1307,),(0.3081,))]) # 归一化,均值和方差。
train_dataset = datasets.MNIST(root='../dataset02/mnist/',train=True,download=True,transform=transform)
train_loader = DataLoader(train_dataset,shuffle=True,batch_size=batch_size)
test_dataset = datasets.MNIST(root='../dataset02/mnist/', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.l1 = torch.nn.Linear(784,512)
self.l2 = torch.nn.Linear(512,256)
self.l3 = torch.nn.Linear(256,128)
self.l4 = torch.nn.Linear(128,64)
self.l5 = torch.nn.Linear(64,10)
def forward(self,x):
x = x.view(-1,784) # -1意思是不确定行数,但确定列数,自动计算行数。
x = F.relu(self.l1(x))
x = F.relu(self.l2(x))
x = F.relu(self.l3(x))
x = F.relu(self.l4(x))
return self.l5(x) # 最后一层不做激活,不进行非线性变换。
model = Net()
criterion = torch.nn.CrossEntropyLoss() # 交叉熵损失函数
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) # momentum:冲量
def train(epoch):
running_loss = 0.0
# 返回了数据下标和数据
for batch_idx,data in enumerate(train_loader,0):
# 送入两个张量,一个张量是64个图像的特征,一个张量图片对应的数字
inputs,target = data
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs,target)
loss.backward()
optimizer.step()
running_loss += loss.item()
# 每300轮输出一次。
if batch_idx % 300 == 299:
print('[%d,%5d] loss:%.3f'%(epoch+1,batch_idx+1,running_loss/300))
running_loss = 0.0
def kuang():
correct = 0
total = 0
with torch.no_grad(): # 不计算梯度。
for data in test_loader:
images,labels = data
outputs = model(images)
_,predicted = torch.max(outputs.data,dim = 1) # dim = 1 列是第0个维度,行是第1个维度。
# labels.size(0)=64 每个都是64个元素,就可以计算总的元素.
total += labels.size(0) # labels是N*1的矩阵。size是一个元祖(N,1),size(0)就是这个N。
correct += (predicted == labels).sum().item() # 张量之间的比较运算,真是1,假是0。
print('accuracy on test set: %d %% ' %(100 * correct/total)) # 为了显示百分比.
if __name__ == '__main__':
for epoch in range(10):
train(epoch)
kuang()
9、卷积神经网络
# 卷积神经网络
# 每一个卷积核它的通道数量要求和输入通道是一样的。这种卷积核的总数有多少个和你输出通道的数量是一样的。
import torch
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
batch_size = 64
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = datasets.MNIST(root='../dataset02/mnist/', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_dataset = datasets.MNIST(root='../dataset02/mnist/', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)
class Kuang(torch.nn.Module):
def __init__(self):
super(Kuang, self).__init__()
self.conv1 = torch.nn.Conv2d(1,10,kernel_size=5)
self.conv2 = torch.nn.Conv2d(10,20,kernel_size=5)
self.pooling = torch.nn.MaxPool2d(2)
self.fc = torch.nn.Linear(320,10)
def forward(self,x):
# flatten data from (n,1,28,28) to (n, 784)
batch_size = x.size(0)
x = F.relu(self.pooling(self.conv1(x)))
x = F.relu(self.pooling(self.conv2(x)))
x = x.view(batch_size,-1) # -1 此处自动算出的是320
x = self.fc(x)
return x
model = Kuang()
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
def train(epoch):
running_loss = 0.0
for batch_idx,data in enumerate(train_loader,0):
inputs,target = data
# inputs, target = inputs.to(device), target.to(device)
outputs = model(inputs)
loss = criterion(outputs,target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
if batch_idx % 300 == 299:
print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
running_loss = 0.0
def shen():
correct = 0
total = 0
with torch.no_grad():
for data in test_loader :
images,labels = data
# images, labels = images.to(device), labels.to(device)
outputs = model(images)
_,predicted = torch.max(outputs.data,dim = 1)
total +=labels.size(0)
correct += (predicted == labels).sum().item()
print('accuracy on test set: %d %% ' % (100 * correct/total))
if __name__ == '__main__':
for epoch in range(10):
train(epoch)
shen()
# 卷积神经网络2
# 使用1x1卷积核虽然参数量增加了,但是能够显著的降低计算量。
# 4个分支在dim=1(channels)上进行concatenate。24+16+24+24 = 88。
# 1408这个数据可以通过x = x.view(in_size, -1)后调用x.shape得到。
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
batch_size = 64
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = datasets.MNIST(root='../dataset02/mnist/', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_dataset = datasets.MNIST(root='../dataset02/mnist/', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)
class InceptionA(nn.Module):
def __init__(self, in_channels):
super(InceptionA, self).__init__()
self.branch1x1 = nn.Conv2d(in_channels, 16, kernel_size=1)
self.branch5x5_1 = nn.Conv2d(in_channels, 16, kernel_size=1)
self.branch5x5_2 = nn.Conv2d(16, 24, kernel_size=5, padding=2)
self.branch3x3_1 = nn.Conv2d(in_channels, 16, kernel_size=1)
self.branch3x3_2 = nn.Conv2d(16, 24, kernel_size=3, padding=1)
self.branch3x3_3 = nn.Conv2d(24, 24, kernel_size=3, padding=1)
self.branch_pool = nn.Conv2d(in_channels, 24, kernel_size=1)
def forward(self, x):
branch1x1 = self.branch1x1(x)
branch5x5 = self.branch5x5_1(x)
branch5x5 = self.branch5x5_2(branch5x5)
branch3x3 = self.branch3x3_1(x)
branch3x3 = self.branch3x3_2(branch3x3)
branch3x3 = self.branch3x3_3(branch3x3)
branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
branch_pool = self.branch_pool(branch_pool)
outputs = [branch1x1, branch5x5, branch3x3, branch_pool]
return torch.cat(outputs, dim=1) # b,c,w,h c对应的是dim=1
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(88, 20, kernel_size=5) # 88 = 24x3 + 16
self.incep1 = InceptionA(in_channels=10) # 与conv1 中的10对应
self.incep2 = InceptionA(in_channels=20) # 与conv2 中的20对应
self.mp = nn.MaxPool2d(2)
self.fc = nn.Linear(1408, 10)
def forward(self, x):
in_size = x.size(0)
x = F.relu(self.mp(self.conv1(x)))
x = self.incep1(x)
x = F.relu(self.mp(self.conv2(x)))
x = self.incep2(x)
x = x.view(in_size, -1)
x = self.fc(x)
return x
model = Net()
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
def train(epoch):
running_loss = 0.0
for batch_idx, data in enumerate(train_loader, 0):
inputs, target = data
outputs = model(inputs)
loss = criterion(outputs, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
if batch_idx % 300 == 299:
print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
running_loss = 0.0
def kuang():
correct = 0
total = 0
with torch.no_grad():
for data in test_loader:
images, labels = data
outputs = model(images)
_, predicted = torch.max(outputs.data, dim=1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('accuracy on test set: %d %% ' % (100 * correct / total))
if __name__ == '__main__':
for epoch in range(10):
train(epoch)
kuang()
# 卷积神经网络3
# 要解决的问题:梯度消失
# 跳连接,H(x) = F(x) + x,张量维度必须一样,加完后再激活。不要做pooling,张量的维度会发生变化。
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision import datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
batch_size = 64
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = datasets.MNIST(root='../dataset02/mnist/', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_dataset = datasets.MNIST(root='../dataset02/mnist/', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)
class ResidualBlock(nn.Module):
def __init__(self, channels):
super(ResidualBlock, self).__init__()
self.channels = channels
self.conv1 = nn.Conv2d(channels, channels, kernel_size=3, padding=1)
self.conv2 = nn.Conv2d(channels, channels, kernel_size=3, padding=1)
def forward(self, x):
y = F.relu(self.conv1(x))
y = self.conv2(y)
return F.relu(x + y)
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 16, kernel_size=5)
self.conv2 = nn.Conv2d(16, 32, kernel_size=5) # 88 = 24x3 + 16
self.rblock1 = ResidualBlock(16)
self.rblock2 = ResidualBlock(32)
self.mp = nn.MaxPool2d(2)
self.fc = nn.Linear(512, 10)
def forward(self, x):
in_size = x.size(0)
x = self.mp(F.relu(self.conv1(x)))
x = self.rblock1(x)
x = self.mp(F.relu(self.conv2(x)))
x = self.rblock2(x)
x = x.view(in_size, -1)
x = self.fc(x)
return x
model = Net()
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
def train(epoch):
running_loss = 0.0
for batch_idx, data in enumerate(train_loader, 0):
inputs, target = data
outputs = model(inputs)
loss = criterion(outputs, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
running_loss += loss.item()
if batch_idx % 300 == 299:
print('[%d, %5d] loss: %.3f' % (epoch + 1, batch_idx + 1, running_loss / 300))
running_loss = 0.0
def kuang():
correct = 0
total = 0
with torch.no_grad():
for data in test_loader:
images, labels = data
outputs = model(images)
_, predicted = torch.max(outputs.data, dim=1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('accuracy on test set: %d %% ' % (100 * correct / total))
if __name__ == '__main__':
for epoch in range(10):
train(epoch)
kuang()
10、循环神经网络
# 循环神经网络
import torch
batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
cell = torch.nn.RNNCell(input_size=input_size,hidden_size=hidden_size)
# (seq,batch,features)
dataset = torch.randn(seq_len,batch_size,input_size) # seq_len:序列长度
hidden = torch.zeros(batch_size,hidden_size)
for idx,input in enumerate(dataset):
print('=' * 20,idx,'=' * 20)
print('Input size:',input.shape)
hidden = cell(input,hidden)
print('output size:',hidden.shape)
print(hidden)
# 循环神经网络2
import torch
batch_size = 1
seq_len = 3
input_size = 4
hidden_size = 2
num_layers = 1
# num_layers:RNN的层数,如果RNN有多层,每一层都会有输出.
# 如果初始化RNN时,把batch_first设置成了TRUE,那么inputs的参数batch_size和seq_len需要调换一下位置.
cell = torch.nn.RNN(input_size=input_size,hidden_size=hidden_size,num_layers=num_layers)
# (seqLen,batchSize,inputSize)
inputs = torch.randn(seq_len,batch_size,input_size)
hidden = torch.zeros(num_layers,batch_size,hidden_size)
out,hidden = cell(inputs,hidden)
print('Output size:',out.shape)
print('Output:',out)
print('Hidden size:',hidden.shape)
print('Hidden:',hidden)
# 循环神经网络3
# 使用RNNCell
import torch
input_size = 4
hidden_size = 4
batch_size = 1
idx2char = ['e','h','l','o'] #可以根据索引把字母取出来
x_data = [1,0,2,3,3] # hello中各个字符的下标
y_data = [3,1,2,3,2] # ohlol中各个字符的下标
#独热向量
one_hot_lookup = [[1,0,0,0],
[0,1,0,0],
[0,0,1,0],
[0,0,0,1]]
x_one_hot = [one_hot_lookup[x] for x in x_data] # (seqLen, inputSize)
#reshape the inputs to (seqlen,batchSize,inputSize)
inputs = torch.Tensor(x_one_hot).view(-1,batch_size,input_size)
#reshape the labels to (seqlen,1)
labels = torch.LongTensor(y_data).view(-1,1)
print(inputs.shape,labels.shape) # torch.Size([5, 1, 4]) torch.Size([5, 1])
class Model(torch.nn.Module):
def __init__(self,input_size,hidden_size,batch_size):
super(Model, self).__init__()
self.batch_size = batch_size
self.input_size = input_size
self.hidden_size = hidden_size
self.rnncell = torch.nn.RNNCell(input_size=self.input_size,hidden_size=self.hidden_size)
def forward(self,inputs,hidden):
hidden = self.rnncell(inputs,hidden)
return hidden
def init_hidden(self):
return torch.zeros(self.batch_size,self.hidden_size) # 提供初始的隐层,生成全0的h0.
net = Model(input_size,hidden_size,batch_size)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(),lr=0.1) #使用Adam优化器
for epoch in range(15):
loss = 0
optimizer.zero_grad()
hidden = net.init_hidden() # 每一轮的第一步先初始化hidden,即先计算h0
print('Predicted string:', end='')
# shape of inputs:(seqlen,batchSize,inputSize), shape of input:(batchSize,inputSize)
# shape of labeis:(seqsize,1), shape of labei:(1)
for input,label in zip(inputs,labels):
hidden = net(input,hidden)
# 注意交叉熵在计算loss的时候维度关系,这里的hidden是([1, 4]), label是 ([1])
loss += criterion(hidden,label) #不用loss.item,所有的和才是最终的损失.
_,idx = hidden.max(dim = 1) #idx最大值的下标.
print(idx2char[idx.item()],end='')
loss.backward()
optimizer.step()
print(', Epoch [%d/15] loss=%.4f' % (epoch + 1, loss.item()))
# Embedding编码方式
# Embedding把一个高维的稀疏的样本映射到一个稠密的低维的空间里面,也就是数据的降维。
import torch
input_size = 4
num_class = 4
hidden_size = 8
embedding_size = 10
batch_size = 1
num_layers = 2
seq_len = 5
idx2char_1 = ['e', 'h', 'l', 'o']
x_data = [[1, 0, 2, 2, 3]]
y_data = [3, 1, 2, 2, 3]
# inputs 维度为(batchsize,seqLen)
inputs = torch.LongTensor(x_data)
# labels 维度为(batchsize*seqLen)
labels = torch.LongTensor(y_data)
class Model(torch.nn.Module):
def __init__(self):
super(Model, self).__init__()
# 告诉input大小和 embedding大小 ,构成input_size * embedding_size 的矩阵
self.emb = torch.nn.Embedding(input_size, embedding_size)
self.rnn = torch.nn.RNN(input_size=embedding_size,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True)
# batch_first=True,input of RNN:(batchsize,seqlen,embeddingsize) output of RNN:(batchsize,seqlen,hiddensize)
self.fc = torch.nn.Linear(hidden_size, num_class) # 从hiddensize 到 类别数量的 变换
def forward(self, x):
hidden = torch.zeros(num_layers, x.size(0), hidden_size)
x = self.emb(x) # 进行embedding处理,把输入的长整型张量转变成嵌入层的稠密型张量.
x, _ = self.rnn(x, hidden)
x = self.fc(x)
return x.view(-1, num_class) # 为了使用交叉熵,变成一个矩阵(batchsize * seqlen,numclass)
net = Model()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=0.05)
for epoch in range(15):
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
_, idx = outputs.max(dim=1)
idx = idx.data.numpy()
print('Predicted string: ', ''.join([idx2char_1[x] for x in idx]), end='')
print(", Epoch [%d/15] loss = %.3f" % (epoch + 1, loss.item()))
# 循环神经网络(高级篇)
''' 根据名字识别他所在的国家. 人名字符长短不一,最长的10个字符,所以处理成10维输入张量,都是英文字母刚好可以映射到ASCII上. Maclean -> ['M', 'a', 'c', 'l', 'e', 'a', 'n'] -> [ 77 97 99 108 101 97 110] -> [ 77 97 99 108 101 97 110 0 0 0] 共有18个国家,设置索引为0-17. 训练集和测试集的表格文件都是第一列人名,第二列国家. '''
import torch
import time
import csv
import gzip
from torch.utils.data import DataLoader
import datetime
import matplotlib.pyplot as plt
import numpy as np
HIDDEN_SIZE = 100 # 隐层数量(GRU输出的隐层的维度)
BATCH_SIZE = 256
N_LAYER = 2 # 用来设置所使用的GRU层数
N_EPOCHS = 100 # 训练100轮
N_CHARS = 128 # 字符数量
USE_GPU = False # 是否使用GPU
# 处理数据集
class NameDataset():
def __init__(self, is_train_set=True):
filename = '../dataset01/names_train.csv.gz' if is_train_set else '../dataset01/names_test.csv.gz'
with gzip.open(filename, 'rt') as f: # 打开压缩文件并将变量名设为为f
reader = csv.reader(f) # 读取表格文件
rows = list(reader) # 一个元组
self.names = [row[0] for row in rows] # 取出人名
self.len = len(self.names) # 人名数量
self.countries = [row[1] for row in rows] # 取出国家名
# countrys是所有国家名,set(countrys)把所有国家明元素设为集合(去除重复项),sorted()函数是将集合排序
self.country_list = list(sorted(set(self.countries))) # 国家名集合,18个国家名的集合。
self.country_dict = self.getCountryDict() # 转变成词典
self.country_num = len(self.country_list) # 得到国家集合的长度18
def __getitem__(self, index):
return self.names[index], self.country_dict[self.countries[index]]
def __len__(self):
return self.len
def getCountryDict(self):
country_dict = dict() # 创建空字典
for idx, country_name in enumerate(self.country_list, 0): # 取出序号和对应国家名
country_dict[country_name] = idx # 把对应的国家名和序号存入字典
return country_dict
def idx2country(self, index): # 返回索引对应国家名
return self.country_list(index)
def getCountrysNum(self): # 返回国家数量
return self.country_num
trainset = NameDataset(is_train_set=True)
trainloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
testset = NameDataset(is_train_set=False)
testloader = DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False)
N_COUNTRY = trainset.getCountrysNum() # 模型输出大小
# 判断是否使用GPU 使用的话把tensor搬到GPU上去
def create_tensor(tensor):
if USE_GPU:
device = torch.device("cuda:0")
tensor = tensor.to(device)
return tensor
class RNNClassifier(torch.nn.Module):
# input_size=128, hidden_size=100, output_size=18
def __init__(self, input_size, hidden_size, output_size, n_layers=1, bidirectional=True):
super(RNNClassifier, self).__init__()
self.hidden_size = hidden_size
self.n_layers = n_layers
self.n_directions = 2 if bidirectional else 1 # 是否双向循环神经网络
# input.shape=(seqlen,batch) output.shape=(seqlen,batch,hiddensize)
self.embedding = torch.nn.Embedding(input_size,
hidden_size)
# 经过Embedding后input的大小是100,hidden_size的大小也是100,所以形参都是hidden_size。
self.gru = torch.nn.GRU(hidden_size, hidden_size, n_layers, bidirectional=bidirectional)
# 如果是双向,会输出两个hidden层,要进行拼接,所以线性成的input大小是 hidden_size * self.n_directions,输出是大小是18,是为18个国家的概率。
self.fc = torch.nn.Linear(hidden_size * self.n_directions, output_size)
def forward(self, input, seq_lengths):
input = input.t() # 转置:Batch x Seq -> S x B 。用于embedding
batch_size = input.size(1)
hidden = self._init_hidden(batch_size)
embedding = self.embedding(input)
# pack_padded_sequence函数当出入seq_lengths是GPU张量时报错,在这里改成cpu张量就可以,不用GPU直接注释掉下面这一行代码。
seq_lengths = seq_lengths.cpu() # 改成cpu张量
# pack them up
gru_input = torch.nn.utils.rnn.pack_padded_sequence(embedding, seq_lengths)
# 让0值不参与运算加快运算速度的方式.
# 需要提前把输入按有效值长度降序排列,再对输入做嵌入,然后按每个输入len(seq——lengths)取值做为GRU输入.
output, hidden = self.gru(gru_input, hidden) # 双向传播的话hidden有两个
# 如果是双向的,需要进行拼接
if self.n_directions == 2:
hidden_cat = torch.cat([hidden[-1], hidden[-2]], dim=1)
else:
hidden_cat = hidden[-1]
fc_output = self.fc(hidden_cat)
return fc_output
def _init_hidden(self, batch_size):
hidden = torch.zeros(self.n_layers * self.n_directions, batch_size, self.hidden_size)
return create_tensor(hidden)
classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_COUNTRY, N_LAYER)
# 需要先把每个名字按字符都变成ASCII码
def name2list(name):
arr = [ord(c) for c in name]
return arr, len(arr)
# 处理名字ASCII码,重新排序的长度和国家列表
def make_tensors(names, countries):
sequences_and_lengths = [name2list(name) for name in names] # 把每个名字按字符都变成ASCII码
name_sequences = [sl[0] for sl in sequences_and_lengths] # 取出名字列表对应的ACSII码
seq_lengths = torch.LongTensor([sl[1] for sl in sequences_and_lengths]) # 取出每个名字对应的长度列表
countries = countries.long()
# make tensor of name, BatchSize x SeqLen
seq_tensor = torch.zeros(len(name_sequences), seq_lengths.max()).long() # 先做一个 名字数量x最长名字长度 的全0tensor
for idx, (seq, seq_len) in enumerate(zip(name_sequences, seq_lengths), 0): # 取出序列,ACSII码和长度列表.
seq_tensor[idx, :seq_len] = torch.LongTensor(seq) # 用名字列表的ACSII码填充上面的全0tensor.
# sort by length to use pack_padded_sequence
seq_lengths, perm_idx = seq_lengths.sort(dim=0, descending=True) # 将seq_lengths按序列长度重新降序排序,返回排序结果和排序序列。
seq_tensor = seq_tensor[perm_idx] # 按新序列把ASCII表重新排序
countries = countries[perm_idx] # 按新序列把国家列表重新排序
# 返回排序后的 ASCII列表,名字长度降序列表,国家名列表.
return create_tensor(seq_tensor), create_tensor(seq_lengths), create_tensor(countries)
def trainModel():
total_loss = 0
for i, (names, countries) in enumerate(trainloader, 1):
optimizer.zero_grad()
inputs, seq_lengths, target = make_tensors(names, countries) # 取出排序后的 ASCII列表 名字长度列表 国家名列表
output = classifier(inputs, seq_lengths) # 把输入和序列放入分类器
loss = criterion(output, target) # 计算损失
loss.backward()
optimizer.step()
total_loss += loss.item()
# 打印输出结果
if i == len(trainset) // BATCH_SIZE: # “//”表示取整除
print(f'loss={
total_loss / (i * len(inputs))}')
return total_loss
def testModel():
correct = 0
total = len(testset)
with torch.no_grad():
for i, (names, countries) in enumerate(testloader, 1):
inputs, seq_lengths, target = make_tensors(names, countries) # 返回处理后的名字ASCII码 重新排序的长度 国家列表
output = classifier(inputs, seq_lengths)
pred = output.max(dim=1, keepdim=True)[1] # 预测
correct += pred.eq(target.view_as(pred)).sum().item() # 计算预测对了多少
percent = '%.2f' % (100 * correct / total)
print(f'Test set: Accuracy {
correct}/{
total} {
percent}%')
return correct / total
# if __name__ == '__main__':
# print("Train for %d epochs..." % N_EPOCHS)
# start = time.time()
# classifier = RNNClassifier(N_CHARS, HIDDEN_SIZE, N_COUNTRY, N_LAYER)
# if USE_GPU:
# device = torch.device('cuda:0')
# classifier.to(device)
#
# criterion = torch.nn.CrossEntropyLoss() # 计算损失
# optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001) # 更新
#
# acc_list = []
# for epoch in range(1, N_EPOCHS + 1):
# # 训练
# print('%d / %d:' % (epoch, N_EPOCHS))
# trainModel()
# acc = testModel()
# acc_list.append(acc)
# end = time.time()
# print(datetime.timedelta(seconds=(end - start) // 1))
#
# epoch = np.arange(1, len(acc_list) + 1, 1)
# acc_list = np.array(acc_list)
# plt.plot(epoch, acc_list)
# plt.xlabel('Epoch')
# plt.ylabel('Accuracy')
# plt.grid()
# plt.show()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)
acc_list = []
for epoch in range(1, N_EPOCHS + 1):
print('%d / %d:' % (epoch, N_EPOCHS))
trainModel()
acc = testModel()
acc_list.append(acc)
epoch = np.arange(1, len(acc_list) + 1, 1)
acc_list = np.array(acc_list)
plt.plot(epoch, acc_list)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.grid()
plt.show()
边栏推荐
- List、Set、Map、Queue、Deque、Stack遍历方式总结
- 【资源记录】流形学习 Manifold learning 和 PCA 的关系
- 网络安全学习(十五)ARP
- 【资源记录】Invertible Neural Networks 可逆神经网络是什么,与VAE,GAN的关系;什么是Bits per pixel,Bits per dim
- NJU南京大学高程课设:坦克大战(BattleCity)
- Network Security Learning (XVIII) single arm routing, ICMP, layer 3 switching
- 六大集合List、Set、Map、Queue、Deque、Stack的遍历方式总结
- 网安学习(十九)HSRP协议
- 论文笔记:Look Back and Predict Forward in Image Captioning# Look Back and Predict Forward in Image Capti
- Unity practical framework (III) event system
猜你喜欢
随机推荐
List、Set、Map、Queue、Deque、Stack遍历方式总结
Network Security Learning (XXII) build virtual special lines for companies and branches
Nanjing University calculation method (numerical analysis) final review notes
zabbix图表中文乱码
Network packet capturing to understand the establishment process of TCP triple handshake
【转】解决内存/显存泄露的方法 pytorch
进程和计划任务管理
使用 gst-launch 小记
RedHat 7 replace Yum source
网络安全与等级保护
FileInputStream与BufferedInputStream有哪些区别?
金仓数据库KingbaseES数据库管理员指南--13表的管理
黑马程序员线程安全问题
静态路由工作原理与配置
Mikrotik ROS软路由配置PCC负载均衡实现双宽带叠加
Server hardware and RAID configuration and Practice
抽象类和接口的区别
RAID磁盘列阵
网络安全学习(十四)IP协议
论文笔记:Self-critical Sequence Training for Image Captioning