% 第二章示例 - 研究生论文
% ==========================================
% 格式说明同第一章
% 本章展示公式、图片的使用方法
% ==========================================

\chapter{相关理论基础}

\section{深度学习基本原理}

深度学习是机器学习的一个分支，通过构建多层神经网络来学习数据的层次化表示。与传统机器学习方法相比，深度学习能够自动从原始数据中提取特征，无需人工设计特征工程。

\subsection{神经网络基本结构}

人工神经网络（Artificial Neural Network, ANN）是深度学习的基础。一个典型的神经网络由输入层、隐藏层和输出层组成。神经元之间通过权重连接，并通过激活函数引入非线性。

% 公式示例
% 格式说明：公式编号右对齐，公式居中
神经元的输出可以表示为：
\begin{equation}
  y = f\left(\sum_{i=1}^{n} w_i x_i + b\right)
\end{equation}
其中，$x_i$为输入，$w_i$为权重，$b$为偏置，$f(\cdot)$为激活函数。

常用的激活函数包括：

\textbf{Sigmoid函数：}
\begin{equation}
  \sigma(x) = \frac{1}{1+e^{-x}}
\end{equation}

\textbf{ReLU函数：}
\begin{equation}
  \text{ReLU}(x) = \max(0, x)
\end{equation}

\textbf{Tanh函数：}
\begin{equation}
  \tanh(x) = \frac{e^x - e^{-x}}{e^x + e^{-x}}
\end{equation}

\subsection{反向传播算法}

反向传播（Backpropagation, BP）算法是训练神经网络的核心算法。其基本思想是通过梯度下降法最小化损失函数。

损失函数定义为：
\begin{equation}
  L(\theta) = \frac{1}{N}\sum_{i=1}^{N} \ell(y_i, \hat{y}_i)
\end{equation}
其中，$\theta$表示网络参数，$y_i$为真实值，$\hat{y}_i$为预测值，$\ell(\cdot)$为损失函数。

参数更新规则为：
\begin{equation}
  \theta_{t+1} = \theta_t - \eta \nabla_\theta L(\theta_t)
\end{equation}
其中，$\eta$为学习率，$\nabla_\theta L$为损失函数对参数的梯度。

\section{卷积神经网络}

卷积神经网络（Convolutional Neural Network, CNN）特别适合处理具有网格结构的数据，如图像、时间序列等。CNN通过卷积操作提取局部特征，通过池化操作降低维度。

\subsection{卷积操作}

二维卷积操作可以表示为：
\begin{equation}
  (f * g)(i,j) = \sum_{m}\sum_{n} f(m,n) \cdot g(i-m, j-n)
\end{equation}

在CNN中，卷积层的输出为：
\begin{equation}
  y_{ij} = \sigma\left(\sum_{m}\sum_{n} w_{mn} \cdot x_{(i+m)(j+n)} + b\right)
\end{equation}

\subsection{池化操作}

池化操作用于降低特征图的空间维度，常用的池化方式包括：

\textbf{最大池化：}
\begin{equation}
  y_{ij} = \max_{(m,n) \in R_{ij}} x_{mn}
\end{equation}

\textbf{平均池化：}
\begin{equation}
  y_{ij} = \frac{1}{|R_{ij}|}\sum_{(m,n) \in R_{ij}} x_{mn}
\end{equation}

\section{循环神经网络}

循环神经网络（Recurrent Neural Network, RNN）是处理序列数据的重要模型。RNN通过引入循环连接，使网络具有记忆功能。

\subsection{RNN基本结构}

RNN的隐藏状态更新公式为：
\begin{equation}
  h_t = \tanh(W_h h_{t-1} + W_x x_t + b)
\end{equation}

输出计算公式为：
\begin{equation}
  y_t = W_y h_t + b_y
\end{equation}

\subsection{LSTM网络}

长短期记忆网络（Long Short-Term Memory, LSTM）通过引入门控机制解决了RNN的梯度消失问题。LSTM包含三个门：遗忘门、输入门和输出门。

\textbf{遗忘门：}
\begin{equation}
  f_t = \sigma(W_f \cdot [h_{t-1}, x_t] + b_f)
\end{equation}

\textbf{输入门：}
\begin{equation}
  i_t = \sigma(W_i \cdot [h_{t-1}, x_t] + b_i)
\end{equation}

\textbf{候选细胞状态：}
\begin{equation}
  \tilde{C}_t = \tanh(W_C \cdot [h_{t-1}, x_t] + b_C)
\end{equation}

\textbf{细胞状态更新：}
\begin{equation}
  C_t = f_t \odot C_{t-1} + i_t \odot \tilde{C}_t
\end{equation}

\textbf{输出门：}
\begin{equation}
  o_t = \sigma(W_o \cdot [h_{t-1}, x_t] + b_o)
\end{equation}

\textbf{隐藏状态：}
\begin{equation}
  h_t = o_t \odot \tanh(C_t)
\end{equation}

\section{强化学习基本原理}

强化学习（Reinforcement Learning, RL）是机器学习的另一个重要分支，通过与环境交互学习最优策略。

\subsection{马尔可夫决策过程}

强化学习问题通常建模为马尔可夫决策过程（Markov Decision Process, MDP），定义为一个五元组：
\begin{equation}
  \text{MDP} = (S, A, P, R, \gamma)
\end{equation}
其中：
\begin{itemize}
  \item $S$：状态空间
  \item $A$：动作空间
  \item $P$：状态转移概率
  \item $R$：奖励函数
  \item $\gamma$：折扣因子
\end{itemize}

\subsection{Q学习算法}

Q学习是一种无模型的强化学习算法，通过学习状态-动作价值函数$Q(s,a)$来获得最优策略。

Q值更新公式为：
\begin{equation}
  Q(s_t, a_t) \leftarrow Q(s_t, a_t) + \alpha \left[r_t + \gamma \max_{a'} Q(s_{t+1}, a') - Q(s_t, a_t)\right]
\end{equation}
其中$\alpha$为学习率。

\subsection{深度Q网络}

深度Q网络（Deep Q-Network, DQN）将深度学习与Q学习结合，使用神经网络近似Q函数：
\begin{equation}
  Q(s,a;\theta) \approx Q^*(s,a)
\end{equation}

DQN的损失函数为：
\begin{equation}
  L(\theta) = \mathbb{E}\left[\left(r + \gamma \max_{a'} Q(s',a';\theta^-) - Q(s,a;\theta)\right)^2\right]
\end{equation}
其中$\theta^-$为目标网络参数。

\section{本章小结}

本章介绍了深度学习和强化学习的基本理论，包括神经网络、CNN、LSTM和DQN等模型的基本原理和数学表达。这些理论为后续章节的模型设计和算法实现提供了理论基础。