Part 01:异或拼接的判断机
主函数框架
1 2 3 4 5 6 7 8 data = X network.initialize() for (epoches){ network.forward(X) network.backward() } network.print ()
Forward
注意:
保存所有的 L
保存所有的 Z
1 2 3 4 def forward (X ): z_i = dot(L_i,w_i) L_{i+1 } = sigmoid(z_i) return self .L_{final}
Backward
注意:
计算所有的 delta:
计算所有的 delta_error:
1 2 3 4 5 6 7 8 9 10 def backward (X, y, learning_rate ): output_error = y - self .Y_hat output_delta = output_error * sigmoid_derivative(self .Y_hat) hidden_error = output_delta.dot(self .w2.T) hidden_delta = hidden_error * sigmoid_derivative(self .H) self .w2 += self .H.T.dot(output_delta) * learning_rate self .w1 += X.T.dot(hidden_delta) * learning_rate
Q:是不是哪里不对? sigmoid_derivative(self.Y_hat)
是不是应该是 sigmoid_derivative(self.A2)
?
A:这是一个好问题!这与 sigmoid_derivative
的实现有关!
Sigmoid
1 2 3 4 5 6 def sigmoid (x ): return 1 / (1 + np.exp(-x)) def sigmoid_derivative (y ): return y * (1 - y)
注意其中的 sigmoid_derivative
,它的输入并不是我们认为的自变量 x
, 而是函数值 y
。(即用 y 表示 y 的导数)。
Sigmoid 激活函数:
把一切综合起来
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 import numpy as npdef sigmoid (x ): return 1 / (1 + np.exp(-x)) def sigmoid_derivative (x ): return x * (1 - x) class SimpleNeuralNetwork : def __init__ (self, input_size, hidden_size, output_size ): self .w1 = np.random.rand(input_size, hidden_size) self .w2 = np.random.rand(hidden_size, output_size) def forward (self, X ): self .A1 = np.dot(X, self .w1) self .H = sigmoid(self .A1) self .A2 = np.dot(self .H, self .w2) self .Y_hat = sigmoid(self .A2) return self .Y_hat def backward (self, X, y, learning_rate ): output_error = y - self .Y_hat output_delta = output_error * sigmoid_derivative(self .Y_hat) hidden_error = output_delta.dot(self .w2.T) hidden_delta = hidden_error * sigmoid_derivative(self .H) self .w2 += self .H.T.dot(output_delta) * learning_rate self .w1 += X.T.dot(hidden_delta) * learning_rate if __name__ == "__main__" : X = np.array([[0 , 0 ], [0 , 1 ], [1 , 0 ], [1 , 1 ]]) y = np.array([[0 ], [1 ], [1 ], [0 ]]) nn = SimpleNeuralNetwork(input_size=2 , hidden_size=3 , output_size=1 ) epochs = 10000 learning_rate = 0.5 for epoch in range (epochs): nn.forward(X) nn.backward(X, y, learning_rate) print ("Final Output after training:" ) print (nn.forward(X))
Part 02:封装结构的掉包侠
你一定觉得这么写很麻烦,尤其是反向传播的导数部分,我也是这么觉得的!我们可以借助pytorch中提供的工具!
PyTorch API
作用
torch.tensor()
创建张量
nn.Linear()
定义全连接层
nn.Sigmoid()
激活函数
forward()
定义前向传播
nn.MSELoss()
计算损失
optim.SGD()
梯度更新(随机梯度下降)
zero_grad()
清空梯度
backward()
反向传播计算梯度
step()
更新权重
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 import torchimport torch.nn as nnimport torch.optim as optimimport numpy as npX = torch.tensor([[0 , 0 ], [0 , 1 ], [1 , 0 ], [1 , 1 ]], dtype=torch.float32) y = torch.tensor([[0 ], [1 ], [1 ], [0 ]], dtype=torch.float32) class SimpleNN (nn.Module): def __init__ (self ): super (SimpleNN, self ).__init__() self .hidden = nn.Linear(2 , 3 ) self .output = nn.Linear(3 , 1 ) self .sigmoid = nn.Sigmoid() def forward (self, x ): x = self .sigmoid(self .hidden(x)) x = self .sigmoid(self .output(x)) return x model = SimpleNN() criterion = nn.MSELoss() optimizer = optim.SGD(model.parameters(), lr=0.5 ) epochs = 10000 for epoch in range (epochs): optimizer.zero_grad() output = model(X) loss = criterion(output, y) loss.backward() optimizer.step() print ("Final Output after training:" )print (model(X).detach().numpy())
是不是非常简单!(虽然造轮子也很有趣就是了)
定义模型和前向传播函数,创建损失函数和优化器,最后根据模板写训练,这三板斧就是绝大多数前向神经网络的写法了,接下来我们用轮子来写一个CNN试试。
定义模型
卷积神经网络利用卷积层(conv )提取局部特征,这一操作会缩小图片大小故通常会在外面补齐 padding
而卷积后使用最大池化层(Max pool )来在保留特征基础上缩小参数量。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 class CNN (nn.Module): def __init__ (self ): super (CNN, self ).__init__() self .conv1 = nn.Conv2d(1 , 32 , kernel_size=3 , padding=1 ) self .conv2 = nn.Conv2d(32 , 64 , kernel_size=3 , padding=1 ) self .pool = nn.MaxPool2d(2 , 2 ) self .fc1 = nn.Linear(64 * 7 * 7 , 128 ) self .fc2 = nn.Linear(128 , 10 ) self .relu = nn.ReLU() def forward (self, x ): x = self .pool(self .relu(self .conv1(x))) x = self .pool(self .relu(self .conv2(x))) x = x.view(-1 , 64 * 7 * 7 ) x = self .relu(self .fc1(x)) x = self .fc2(x) return x
创建损失函数和优化器
数字识别属于分类任务,因此我们采取交叉熵损失函数。
优化使用 Adam(自适应梯度),它能够对每个不同的参数调整不同的学习率
1 2 criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001 )
训练
1 2 3 4 5 6 7 8 9 10 11 epochs = 10 for epoch in range (epochs): running_loss = 0.0 for batch_idx, (images, labels) in enumerate (trainloader): images, labels = images.to(device), labels.to(device) optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item()
加一点点细节
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 import torchimport torch.nn as nnimport torch.optim as optimimport torchvisionimport torchvision.transforms as transformstransform = transforms.Compose([transforms.ToTensor()]) trainset = torchvision.datasets.MNIST(root='./data' , train=True , download=False , transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=64 , shuffle=True ) class CNN (nn.Module): def __init__ (self ): super (CNN, self ).__init__() self .conv1 = nn.Conv2d(1 , 32 , kernel_size=3 , padding=1 ) self .conv2 = nn.Conv2d(32 , 64 , kernel_size=3 , padding=1 ) self .pool = nn.MaxPool2d(2 , 2 ) self .fc1 = nn.Linear(64 * 7 * 7 , 128 ) self .fc2 = nn.Linear(128 , 10 ) self .relu = nn.ReLU() def forward (self, x ): x = self .pool(self .relu(self .conv1(x))) x = self .pool(self .relu(self .conv2(x))) x = x.view(-1 , 64 * 7 * 7 ) x = self .relu(self .fc1(x)) x = self .fc2(x) return x device = torch.device("cuda" if torch.cuda.is_available() else "cpu" ) model = CNN().to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001 ) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5 , gamma=0.5 ) epochs = 10 for epoch in range (epochs): running_loss = 0.0 for batch_idx, (images, labels) in enumerate (trainloader): images, labels = images.to(device), labels.to(device) optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() if batch_idx % 100 == 0 : print (f"Epoch [{epoch+1 } /{epochs} ], Batch [{batch_idx} /{len (trainloader)} ], Loss: {loss.item():.4 f} " ) scheduler.step() print (f"🔹 Epoch {epoch+1 } 完成,当前学习率: {scheduler.get_last_lr()[0 ]:.6 f} , 平均损失: {running_loss/len (trainloader):.4 f} " ) print ("✅ CNN 训练完成!🎉" )
总之就是:
定义模型后
损失函数优化器
最后写训练
感谢你看到这里,天气也是热起来了,今天要放的歌是…《黎明与萤火》
形のない歌で朝を描いたまま
仍是以无形的歌声 去幻想着清晨
浅い浅い夏の向こうに
于那浅浅的 浅浅的 夏日的彼方
冷たくない君の手のひらが見えた
我并不寒冷 因为能看见你的手心
感谢你的阅读~