reorgnization

52ecf09c · xiaotong · 9c2f9cb7 · 52ecf09c · 52ecf09c · 52ecf09c
Commit 52ecf09c authored Nov 22, 2019 by xiaotong
--- a/Section04-Phrasal-and-Syntactic-Models/section04-test.tex
+++ b/Section04-Phrasal-and-Syntactic-Models/section04-test.tex
@@ -136,15 +136,81 @@ $d$是一个$(\textbf{s},\textbf{t})$上基于短语的翻译推导，$\textrm{P
 %%% 翻译推导的建模
 \begin{frame}{对翻译推导进行建模}
 \begin{itemize}
-\item $\textrm{P}(\textbf{t}|\textbf{s}) = \sum_{d} \textrm{P}(d,\textbf{t}|\textbf{s})$带来新的问题：
+\item $\textrm{P}(\textbf{t}|\textbf{s}) = \sum_{d} \textrm{P}(d,\textbf{t}|\textbf{s})$带来新的问题：如何描述$\textrm{P}(d,\textbf{t}|\textbf{s})$ \\
+
+\vspace{0.5em}
+\begin{center}
+\begin{tikzpicture}
+
+\begin{scope}[minimum height = 18pt]
+
+\node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:};
+\node[anchor=west,fill=ugreen!50] (s1) at (0, 0) {在};
+\node[anchor=west,fill=red!50] (s2) at ([xshift=1em]s1.east) {桌子 上 的};
+\node[anchor=west,fill=blue!50] (s3) at ([xshift=1em]s2.east) {苹果};
+
+\node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:};
+\node[anchor=west,fill=blue!50] (t1) at (0, -1.5) {the apple};
+\node[anchor=west,fill=ugreen!50] (t2) at ([xshift=1em]t1.east) {on};
+\node[anchor=west,fill=red!50] (t3) at ([xshift=1em]t2.east) {the table};
+
+\path[<->, thick] (s1.south) edge (t3.north);
+\path[<->, thick] (s2.south) edge (t2.north);
+\path[<->, thick] (s3.south) edge (t1.north);
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
+
+上图体现了三方面问题
+
+    \begin{enumerate}
+    \item 确定哪些是``可用''的短语
+    \item 描述短语翻译的好坏
+    \item 描述翻译中的调序现象
+    \end{enumerate}
+
+\item<2-> 希望有这样一种模型可以对任意的因素进行方便的建模。经典的判别式模型成为了不二的选择
+\end{itemize}
+
+\visible<2->{
+\textbf{Discriminative Training and Maximum Entropy Models for Statistical Machine Translation}\\
+\textbf{Franz Och and Hermann Ney, 2002, In Proc of ACL}
+}
+
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 判别式模型
+\begin{frame}{判别式模型}
+\begin{itemize}
+\item 判别式模型的形式：
+\begin{displaymath}
+\textrm{P}(d,\textbf{t}|\textbf{s}) = \frac{\exp(\sum_{i=1}^{M} \lambda_i \cdot h_i(d,\textbf{s},\textbf{t}))}{\sum_{d',t'}\exp(\sum_{i=1}^{M} \lambda_i \cdot h_i(d',\textbf{s},\textbf{t}'))}
+\end{displaymath}
    \begin{itemize}
-    \item \textbf{短语获取}：如何获取双语短语，以构成$d$
-    \item \textbf{翻译建模}：如何描述$\textrm{P}(d,\textbf{t}|\textbf{s})$
-    \item \textbf{模型简化}：如何对所有$d$进行$\textrm{P}(d,\textbf{t}|\textbf{s})$的求和
+    \item $\{h_i(\cdot)\}$是$M$个特征，每个$h_i(d,\textbf{s},\textbf{t})$把$d$映射为一个实数值
+    \item $\{\lambda_i\}$是这些特征对应权重，权重越大表示特征越重要
+    \item $\sum_{i=1}^{M} \lambda_i \cdot h_i(d,\textbf{s},\textbf{t}))$描述了$d$的整体质量，值约大$d$越``好''
    \end{itemize}
-    后面会分别展开讨论
-\vspace{0.3em}
-\item<2-> 回到一开始的问题: 给定$\textbf{s}$和$\textbf{t}$，\alert{如何获得双语短语}
+\item 判别式模型的优点在于，它可以很方便的引入各种特征。我们只需要设计不同的特征函数$h_i(\cdot)$即可。
+    \begin{itemize}
+    \item 比如，可以定义短语翻译概率作为特征，也可以定义调序的程度作为一个特征
+    \end{itemize}
+\item \textbf{两个问题}：
+    \begin{itemize}
+    \item 特征定义：定义短语翻译特征和调序特征(马上)
+    \item 权重调优：得到最好的特征权重（后面）
+    \end{itemize}
+
+\end{itemize}
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 翻译推导的建模
+\begin{frame}{对翻译推导进行建模}
+\begin{itemize}
+\item 回到最开始的问题: 给定$\textbf{s}$和$\textbf{t}$，\alert{如何获得双语短语}
    \begin{itemize}
    \item 如果没有限制，$\textbf{s}$和$\textbf{t}$之间任何子串映射都可以看做双语短语
    \end{itemize}
@@ -152,7 +218,6 @@ $d$是一个$(\textbf{s},\textbf{t})$上基于短语的翻译推导，$\textrm{P

 \vspace{-0.7em}

-\visible<2->{
 \begin{center}
 \begin{tikzpicture}

@@ -219,7 +284,14 @@ $d$是一个$(\textbf{s},\textbf{t})$上基于短语的翻译推导，$\textrm{P

 \end{tikzpicture}
 \end{center}
-}
+
+\begin{itemize}
+\item<2-> \textbf{显然}，不加限制的定义短语会带来很多问题
+    \begin{itemize}
+    \item 短语数量随句子长度增加急剧膨胀
+    \item 大量噪声，如``到 ? $\leftrightarrow$ Have you learned nothing''
+    \end{itemize}
+\end{itemize}

 \end{frame}


--- a/Section04-Phrasal-and-Syntactic-Models/section04.tex
+++ b/Section04-Phrasal-and-Syntactic-Models/section04.tex
@@ -853,8 +853,8 @@
 {\normalsize
 \textbf{基于短语的模型} \vspace{-0.2em}\\
 \small{1. 建模} \vspace{-0.2em}\\
-\small{2. 如何获取短语} \vspace{-0.2em}\\
-\small{3. 判别式模型和最小错误率训练} \vspace{-0.2em}\\
+\small{2. 短语获取和调序} \vspace{-0.2em}\\
+\small{3. 翻译特征和最小错误率训练} \vspace{-0.2em}\\
 \small{4. 栈解码}
 }
 \end{tcolorbox}
@@ -959,7 +959,7 @@
 \vspace{0.5em}
 \item<2-> 对于双语的情况 \\
 \begin{beamerboxesrounded}[upper=uppercolblue,lower=lowercolblue,shadow=true]{定义 - 双语短语(或短语对)}
-对于源语和目标语句对($\textbf{s}, \textbf{t}$)，$\textbf{s}$中短语$\tilde{s}_i$和$\textbf{t}$中的短语$\tilde{t}_j$可以构成一个双语短语对$(\tilde{s}_i,\tilde{t}_j)$，简称\alert{短语对}$(\tilde{s}_i,\tilde{t}_j)$
+对于源语和目标语句对($\textbf{s}, \textbf{t}$)，$\textbf{s}$中短语$\bar{s}_i$和$\textbf{t}$中的短语$\bar{t}_j$可以构成一个双语短语对$(\bar{s}_i,\bar{t}_j)$，简称\alert{短语对}$(\bar{s}_i,\bar{t}_j)$
 \end{beamerboxesrounded}

 	\begin{itemize}
@@ -978,7 +978,7 @@
 \begin{frame}{基于短语的翻译推导}
 \begin{beamerboxesrounded}[upper=uppercolblue,lower=lowercolblue,shadow=true]{定义 - 基于短语的翻译推导}
 {\small
-对于源语和目标语句对($\textbf{s}, \textbf{t}$)，有$l$个短语对$\{(\tilde{s}_i,\tilde{t}_j)\}$，且所有源语言短语$\{\tilde{s}_i\}$和所有目标语短语$\{\tilde{t}_j\}$分别构成$\textbf{s}$和$\textbf{t}$ 的切分，则称这些短语对$\{(\tilde{s}_i,\tilde{t}_j)\}$构成了$\textbf{s}$到$\textbf{t}$的\alert{基于短语的翻译推导}(简称推导)，记为$d(\{(\tilde{s}_i,\tilde{t}_j)\},\textbf{s},\textbf{t})$(简记为$d(\{(\tilde{s}_i,\tilde{t}_j)\})$或$d$)。
+对于源语和目标语句对($\textbf{s}, \textbf{t}$)，有$l$个短语对$\{(\bar{s}_i,\bar{t}_j)\}$，且所有源语言短语$\{\bar{s}_i\}$和所有目标语短语$\{\bar{t}_j\}$分别构成$\textbf{s}$和$\textbf{t}$ 的切分，则称这些短语对$\{(\bar{s}_i,\bar{t}_j)\}$构成了$\textbf{s}$到$\textbf{t}$的\alert{基于短语的翻译推导}(简称推导)，记为$d(\{(\bar{s}_i,\bar{t}_j)\},\textbf{s},\textbf{t})$(简记为$d(\{(\bar{s}_i,\bar{t}_j)\})$或$d$)。
 }
 \end{beamerboxesrounded}

@@ -1010,12 +1010,12 @@
 \path[<->, thick] (s2.south) edge (t2.north);
 \path[<->, thick] (s3.south) edge (t3.north);

-\node[anchor=south,inner sep=0pt,yshift=-0.3em] (sp1) at (s1.north) {\scriptsize{$\tilde{s}_1$}};
-\node[anchor=south,inner sep=0pt,yshift=-0.3em] (sp2) at (s2.north) {\scriptsize{$\tilde{s}_2$}};
-\node[anchor=south,inner sep=0pt,yshift=-0.3em] (sp3) at (s3.north) {\scriptsize{$\tilde{s}_3$}};
-\node[anchor=north,inner sep=0pt,yshift=0.3em] (tp1) at (t1.south) {\scriptsize{$\tilde{t}_1$}};
-\node[anchor=north,inner sep=0pt,yshift=0.3em] (tp2) at (t2.south) {\scriptsize{$\tilde{t}_2$}};
-\node[anchor=north,inner sep=0pt,yshift=0.3em] (tp3) at (t3.south) {\scriptsize{$\tilde{t}_3$}};
+\node[anchor=south,inner sep=0pt,yshift=-0.3em] (sp1) at (s1.north) {\scriptsize{$\bar{s}_1$}};
+\node[anchor=south,inner sep=0pt,yshift=-0.3em] (sp2) at (s2.north) {\scriptsize{$\bar{s}_2$}};
+\node[anchor=south,inner sep=0pt,yshift=-0.3em] (sp3) at (s3.north) {\scriptsize{$\bar{s}_3$}};
+\node[anchor=north,inner sep=0pt,yshift=0.3em] (tp1) at (t1.south) {\scriptsize{$\bar{t}_1$}};
+\node[anchor=north,inner sep=0pt,yshift=0.3em] (tp2) at (t2.south) {\scriptsize{$\bar{t}_2$}};
+\node[anchor=north,inner sep=0pt,yshift=0.3em] (tp3) at (t3.south) {\scriptsize{$\bar{t}_3$}};

 \end{scope}
 \end{tikzpicture}
@@ -1024,9 +1024,12 @@
 \vspace{-1.0em}

 \begin{itemize}
-\item $\{\tilde{s}_1,\tilde{s}_2,\tilde{s}_3\}$是$\textbf{s}$的一个短语切分
-\item $\{\tilde{t}_1,\tilde{t}_2,\tilde{t}_3\}$是$\textbf{t}$的一个短语切分
-\item $\{(\tilde{s}_k,\tilde{t}_k)\}$构成了$(\textbf{s},\textbf{t})$的一个基于短语的翻译推导
+\item $\{(\bar{s}_k,\bar{t}_k)\}$构成了$(\textbf{s},\textbf{t})$的一个基于短语的翻译推导
+\item 需要在建模中描述的两个问题：
+    \begin{itemize}
+    \item $\bar{s}_k$是如何被翻译成$\bar{t}_k$的?
+    \item $\bar{t}_k$在目标语中位置是如何决定的?
+    \end{itemize}
 \end{itemize}

 \end{frame}
@@ -1065,15 +1068,84 @@ $d$是一个$(\textbf{s},\textbf{t})$上基于短语的翻译推导，$\textrm{P
 %%% 翻译推导的建模
 \begin{frame}{对翻译推导进行建模}
 \begin{itemize}
-\item $\textrm{P}(\textbf{t}|\textbf{s}) = \sum_{d} \textrm{P}(d,\textbf{t}|\textbf{s})$带来新的问题：
+\item $\textrm{P}(\textbf{t}|\textbf{s}) = \sum_{d} \textrm{P}(d,\textbf{t}|\textbf{s})$带来新的问题：如何描述$\textrm{P}(d,\textbf{t}|\textbf{s})$ \\
+
+\vspace{0.5em}
+\begin{center}
+\begin{tikzpicture}
+
+\begin{scope}[minimum height = 18pt]
+
+\node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:};
+\node[anchor=west,fill=ugreen!50] (s1) at (0, 0) {在};
+\node[anchor=west,fill=red!50] (s2) at ([xshift=1em]s1.east) {桌子 上 的};
+\node[anchor=west,fill=blue!50] (s3) at ([xshift=1em]s2.east) {苹果};
+
+\node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:};
+\node[anchor=west,fill=blue!50] (t1) at (0, -1.5) {the apple};
+\node[anchor=west,fill=ugreen!50] (t2) at ([xshift=1em]t1.east) {on};
+\node[anchor=west,fill=red!50] (t3) at ([xshift=1em]t2.east) {the table};
+
+\path[<->, thick] (s1.south) edge (t3.north);
+\path[<->, thick] (s2.south) edge (t2.north);
+\path[<->, thick] (s3.south) edge (t1.north);
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
+
+上图体现了三方面问题
+
+    \begin{enumerate}
+    \item 短语获取：确定哪些是``可用''的短语
+    \item 翻译模型：描述短语翻译的好坏
+    \item 调序模型：描述翻译中的调序现象
+    \end{enumerate}
+
+\item<2-> 希望有这样一种模型可以对任意的因素进行方便的建模。经典的判别式模型成为了不二的选择
+\end{itemize}
+
+\visible<2->{
+\textbf{Discriminative Training and Maximum Entropy Models for Statistical Machine Translation}\\
+\textbf{Franz Och and Hermann Ney, 2002, In Proc of ACL}
+}
+
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 判别式模型
+\begin{frame}{判别式模型}
+\begin{itemize}
+\item 判别式模型的形式：
+\begin{displaymath}
+\textrm{P}(d,\textbf{t}|\textbf{s}) = \frac{\exp(\sum_{i=1}^{M} \lambda_i \cdot h_i(d,\textbf{s},\textbf{t}))}{\sum_{d',t'}\exp(\sum_{i=1}^{M} \lambda_i \cdot h_i(d',\textbf{s},\textbf{t}'))}
+\end{displaymath}
    \begin{itemize}
-    \item \textbf{短语获取}：如何获取双语短语，以构成$d$
-    \item \textbf{翻译建模}：如何描述$\textrm{P}(d,\textbf{t}|\textbf{s})$
-    \item \textbf{模型简化}：如何对所有$d$进行$\textrm{P}(d,\textbf{t}|\textbf{s})$的求和
+    \item $\{h_i(\cdot)\}$是$M$个\alert{特征}，每个$h_i(d,\textbf{s},\textbf{t})$把$d$映射为一个实数值
+    \item $\{\lambda_i\}$是这些特征对应权重，权重越大表示特征越重要
+    \item $\sum_{i=1}^{M} \lambda_i \cdot h_i(d,\textbf{s},\textbf{t}))$描述了$d$的整体质量，值约大$d$越``好''
    \end{itemize}
-    后面会分别展开讨论
-\vspace{0.3em}
-\item<2-> 回到一开始的问题: 给定$\textbf{s}$和$\textbf{t}$，\alert{如何获得双语短语}
+\item<2-> 判别式模型的优点在于，它可以很方便的引入各种特征。我们只需要设计不同的特征函数$h_i(\cdot)$即可。
+    \begin{itemize}
+    \item 比如，可以定义短语翻译概率作为特征，也可以定义调序的程度作为一个特征
+    \end{itemize}
+\item<2-> \textbf{两个问题}：
+    \begin{itemize}
+    \item \textbf{特征定义}：定义短语翻译特征和调序特征(马上)
+    \item \textbf{权重调优}：得到最好的特征权重（后面）
+    \end{itemize}
+
+\end{itemize}
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+\subsection{短语抽取}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 短语获取
+\begin{frame}{短语获取}
+\begin{itemize}
+\item 回到最开始的问题: 给定$\textbf{s}$和$\textbf{t}$，\alert{如何获得双语短语}
    \begin{itemize}
    \item 如果没有限制，$\textbf{s}$和$\textbf{t}$之间任何子串映射都可以看做双语短语
    \end{itemize}
@@ -1081,7 +1153,6 @@ $d$是一个$(\textbf{s},\textbf{t})$上基于短语的翻译推导，$\textrm{P

 \vspace{-0.7em}

-\visible<2->{
 \begin{center}
 \begin{tikzpicture}

@@ -1128,11 +1199,11 @@ $d$是一个$(\textbf{s},\textbf{t})$上基于短语的翻译推导，$\textrm{P
 \node[tgtnode] (tgt7) at ([yshift=-0.5\hnode]tgt6.north east) {\scriptsize{?}};
 \node[tgtnode] (tgt8) at ([yshift=-0.5\hnode]tgt7.north east) {\scriptsize{EOS}};

-\node [anchor=west] (p1line1) at ([xshift=4em,yshift=1em]a57.east) {\footnotesize{$\tilde{s}_i$: 什么\ \ \ 都\ \ \ 没}};
-\node [anchor=north west] (p1line2) at ([xshift=0]p1line1.south west) {\footnotesize{$\tilde{t}_i$: learned\ \ \ nothing\ \ \ ? \ \ \ \ \ \ \ \ \ \ \ \ }};
+\node [anchor=west] (p1line1) at ([xshift=4em,yshift=1em]a57.east) {\footnotesize{$\bar{s}_i$: 什么\ \ \ 都\ \ \ 没}};
+\node [anchor=north west] (p1line2) at ([xshift=0]p1line1.south west) {\footnotesize{$\bar{t}_i$: learned\ \ \ nothing\ \ \ ? \ \ \ \ \ \ \ \ \ \ \ \ }};

-\node [anchor=west] (p2line1) at ([xshift=4em]a53.east) {\footnotesize{$\tilde{s}_j$: 到\ \ \ ?}};
-\node [anchor=north west] (p2line2) at ([xshift=0]p2line1.south west) {\footnotesize{$\tilde{t}_j$: Have\ \ \ you\ \ \ learned\ \ \ nothing}};
+\node [anchor=west] (p2line1) at ([xshift=4em]a53.east) {\footnotesize{$\bar{s}_j$: 到\ \ \ ?}};
+\node [anchor=north west] (p2line2) at ([xshift=0]p2line1.south west) {\footnotesize{$\bar{t}_j$: Have\ \ \ you\ \ \ learned\ \ \ nothing}};

 \begin{pgfonlayer}{background}
 \node [rectangle,draw=red,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (a26) (a44)] (phrase1) {};
@@ -1148,18 +1219,68 @@ $d$是一个$(\textbf{s},\textbf{t})$上基于短语的翻译推导，$\textrm{P

 \end{tikzpicture}
 \end{center}
-}
+
+\begin{itemize}
+\item<2-> \textbf{显然}，不加限制的定义短语会带来很多问题
+    \begin{itemize}
+    \item 短语数量随句子长度增加急剧膨胀
+    \item 大量噪声，如``到 ? $\leftrightarrow$ Have you learned nothing''
+    \end{itemize}
+\end{itemize}

 \end{frame}

 %%%------------------------------------------------------------------------------------------------------------
-\subsection{短语抽取}
+%%% 与词对齐的兼容性
+\begin{frame}{与词对齐的兼容性}
+% 与词对齐相兼容的短语
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 短语抽取方法
+\begin{frame}{基于词对齐的短语抽取}
+% 描述算法，给出词对齐及短语抽取结果
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 词对齐系统
+\begin{frame}{如何获得词对齐}
+% GIZA++ + 对称化
+% FastAlign
+% ...
+
+% 如何评价词对齐 - 1) 自动指标 2) 下游系统（短语抽取、机器翻译）
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 改进方法
+\begin{frame}{更好更多的短语}
+% 提高词对齐性能 -> 增加短语质量? -> 提高翻译质量?
+% 提高Recall，对于不同任务，词对齐的密度更重要？
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 如何度量短语的好坏
+\begin{frame}{短语打分 - 翻译概率}
+% 正向、反向翻译概率
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 如何度量短语的好坏
+\begin{frame}{短语打分 - 词汇翻译概率}
+% 正向、反向翻译概率
+\end{frame}
+
+%%%------------------------------------------------------------------------------------------------------------
+%%% 展示短语表的内容
+\begin{frame}{短语表实例}
+\end{frame}

 %%%------------------------------------------------------------------------------------------------------------
-\subsection{判别式模型及特征}
+\subsection{调序}

 %%%------------------------------------------------------------------------------------------------------------
-\subsection{最小错误率训练}
+\subsection{特征及最小错误率训练}

 %%%------------------------------------------------------------------------------------------------------------
 \subsection{栈解码}

--- a/Section06-Neural-Machine-Translation/section06.tex
+++ b/Section06-Neural-Machine-Translation/section06.tex
@@ -1188,7 +1188,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设，端到端学习 \\
            \node[auxnode,label={-45:28}] () at (aux28) {};
            \coordinate (aux29) at ([xshift=2\base]aux28);
            \node[auxnode,label={-45:29}] () at (aux29) {};
-            
+
            \coordinate (aux33) at ([yshift=\base]aux23);
            \node[auxnode,label={-45:33}] () at (aux33) {};
            \coordinate (aux34) at ([yshift=\base]aux24);
@@ -1197,10 +1197,10 @@ NLP问题的隐含结构假设 & 无隐含结构假设，端到端学习 \\
            \node[auxnode,label={-45:35}] () at (aux35) {};
            \coordinate (aux37) at ([yshift=\base]aux27);
            \node[auxnode,label={-45:37}] () at (aux37) {};
-            
+
            \coordinate (aux45) at ([yshift=\base]aux35);
            \node[auxnode,label={-45:45}] () at (aux45) {};
-            
+
            \coordinate (aux55) at ([yshift=\base]aux45);
            \node[auxnode,label={-45:55}] () at (aux55) {};
            \ExtractX{$(aux21)$}
@@ -1219,7 +1219,7 @@ NLP问题的隐含结构假设 & 无隐含结构假设，端到端学习 \\
            \ExtractY{$(aux55)$}
            \coordinate (aux59) at (\XCoord,\YCoord);
            \node[auxnode,label={-45:59}] () at (aux59) {};
-            
+
            \coordinate (aux68) at ([yshift=\base]aux58);
            \node[auxnode,label={-45:68}] () at (aux68) {};
        \end{scope}
@@ -2244,8 +2244,8 @@ $\textrm{``you''} = \argmax_{y} \textrm{P}(y|\textbf{s}_1, \alert{\textbf{C}})$ 

 \vspace{-2.5em}
 \begin{eqnarray}
-\textbf{C}_3 & = & 0.4 \times \textbf{h}(\textrm{``什么''}) + 0.4 \times \textbf{h}(\textrm{``也''}) + \nonumber \\
-             &   & 0 \times \textbf{h}(\textrm{``没''}) + 0.1 \times \textbf{h}(\textrm{``学''}) \nonumber
+\textbf{C}_3 & = & 0.4 \times \textbf{h}(\textrm{``你''}) + 0.4 \times \textbf{h}(\textrm{``什么''}) + \nonumber \\
+             &   & 0 \times \textbf{h}(\textrm{``也''}) + 0.1 \times \textbf{h}(\textrm{``没''}) \nonumber
 \end{eqnarray}

 \vspace{-0.5em}