updates chapter1-3

e31fccc6 · 曹润柘 · 697c7418 · e31fccc6 · e31fccc6 · e31fccc6
Commit e31fccc6 authored Feb 27, 2020 by 曹润柘
--- a/Book/Chapter1/Figures/figure-comparison-mt-ht.tex
+++ b/Book/Chapter1/Figures/figure-comparison-mt-ht.tex
@@ -6,7 +6,7 @@
 \begin{tikzpicture}
 %第一段----------------------------------------------
 %原文-------------
-\node [pos=0.4,left,xshift=-0.4em,yshift=2.0em] (original1) {\scriptsize{源\qquad文：从前有一个小岛，上面住着快乐、悲哀、知识和爱，还有其他各种情感。一天，情感们得知}};
+\node [pos=0.4,left,xshift=-0.4em,yshift=2.0em] (original1) {\scriptsize{源 \qquad 文：从前有一个小岛，上面住着快乐、悲哀、知识和爱，还有其他各种情感。一天，情感们得知}};
 \node [anchor=north west] (original1-1) at ([xshift=3.5em,yshift=0.3em]original1.south west)  {\scriptsize{小岛快要下沉了。于是，大家都准备船只，离开小岛，只有爱决定留下来，她想坚持到最后}};
 \node [anchor=north west] (original1-2) at ([yshift=0.3em]original1-1.south west)  {\scriptsize{一刻。过了几天，小岛真的要下沉了，爱想请人帮忙。}};
 %机器翻译--------------
@@ -30,7 +30,7 @@
 \node [anchor=north west] (ht1-4) at ([yshift=0.4em]ht1-3.south west) {\scriptsize{and LOVE had to {\color{red}\underline{seek for help.}}}};
 %第二段--------------------------------
 %原文-------------
-\node[anchor=north west] (original4) at ([xshift=-3.5em,yshift=-0.3em]ht1-4.south west)  {\scriptsize{源\qquad文：这时，富裕乘着一艘大船经过。爱说：``富裕，你能带我走吗？''富裕 答道：``不，我的船中}};
+\node[anchor=north west] (original4) at ([xshift=-3.5em,yshift=-0.3em]ht1-4.south west)  {\scriptsize{源 \qquad 文：这时，富裕乘着一艘大船经过。爱说：``富裕，你能带我走吗？''富裕 答道：``不，我的船中}};
 \node[anchor=north west] (original4-1) at ([xshift=3.5em,yshift=0.3em]original4.south west)  {\scriptsize{有许多金银财宝，没有你的位置。''爱看见虚荣在一艘华丽的小船上，说：``虚荣，帮帮我吧}};
 \node[anchor=north west] (original4-2) at ([yshift=0.3em]original4-1.south west)  {\scriptsize{！''``我帮不了你。你全身都湿透了，会弄坏我这漂亮的小船的。''悲哀来了，爱向他求助：``}};
 \node[anchor=north west] (original4-3) at ([yshift=0.3em]original4-2.south west)  {\scriptsize{悲哀，让我跟你去吧！''``哦，……爱，我实在太悲哀了，想自己一个人呆一会儿！''悲哀答}};
@@ -58,7 +58,7 @@
 \node [anchor=north west] (ht4-5) at ([yshift=0.4em]ht4-4.south west) {\scriptsize{-ed SADNESS. JOY {\color{red}\underline{came close to }} love, but she was so happy that she did not hear him call her!}};

 %第三段--------------------------------
-\node[anchor=north west] (original8) at ([xshift=-3.5em,yshift=-0.3em]ht4-5.south west)  {\scriptsize{源\qquad文：突然，一个声音传来：``过来，爱，我带你走。''这是位长者。爱大喜过望，竟忘了问他他}};
+\node[anchor=north west] (original8) at ([xshift=-3.5em,yshift=-0.3em]ht4-5.south west)  {\scriptsize{源 \qquad 文：突然，一个声音传来：``过来，爱，我带你走。''这是位长者。爱大喜过望，竟忘了问他他}};
 \node[anchor=north west] (original8-1) at ([xshift=3.5em,yshift=0.3em]original8.south west)  {\scriptsize{的名字。登上陆地后，长者独自走开了。爱对长者感激不尽，问另一位长者知识：``帮我的}};
 \node[anchor=north west] (original8-2) at ([yshift=0.3em]original8-1.south west)  {\scriptsize{那个人是谁？''``他是时间。''知识老人回答。``时间？''爱问道，``他为什么要帮我？''知识老}};
 \node[anchor=north west] (original8-3) at ([yshift=0.3em]original8-2.south west)  {\scriptsize{人笑道：``因为只有时间才能理解爱有多么伟大。''}};

--- a/Book/Chapter1/Figures/figure-zh_en-example.png
+++ b/Book/Chapter1/Figures/figure-zh_en-example.png
--- a/Book/Chapter1/chapter1.tex
+++ b/Book/Chapter1/chapter1.tex
@@ -94,7 +94,7 @@
 \begin{figure}[htp]
    \centering
 \includegraphics[scale=0.25]{./Chapter1/Figures/figure-eniac.jpg}
-    \caption{世界上公认的第一台通用电子数字计算机``埃尼阿克''（ENIAC）\\【图片来源：百度百科】}
+    \caption{世界上公认的第一台通用电子数字计算机``埃尼阿克''（ENIAC）}%\\【图片来源：百度百科】
    \label{fig:eniac}
 \end{figure}
 %-------------------------------------------
@@ -139,7 +139,7 @@
 \includegraphics[scale=0.3]{./Chapter1/Figures/figure-wmt-participation.jpg}
 \includegraphics[scale=0.3]{./Chapter1/Figures/figure-wmt-bestresults.jpg}
 \setlength{\belowcaptionskip}{-1.5em}
-    \caption{左图是WMT19参赛队伍，右图是WMT19最终个项目最好分数结果}
+    \caption{国际机器翻译大赛(左：WMT19参赛队伍；右：WMT19最终个项目最好分数结果)}
    \label{fig:wmt}
 \end{figure}
 %-------------------------------------------
@@ -169,7 +169,7 @@
    \centering
 \input{./Chapter1/Figures/figure-Example-RBMT}
 \setlength{\belowcaptionskip}{-1.5em}
-    \caption{基于规则的机器翻译的示例图\\1. 左侧是规则库2.右侧是匹配结构}
+    \caption{基于规则的机器翻译的示例图（左：规则库；右：匹配结构）}
    \label{fig:Example-RBMT}
 \end{figure}
 %-------------------------------------------
@@ -224,7 +224,7 @@
 \begin{figure}[htp]
    \centering
 \input{./Chapter1/Figures/figure-Example-SMT}
-    \caption{统计机器翻译的示例图\\1.左侧是语料资源2.中间是翻译模型与语言模型3.右侧是翻译假设与翻译引擎}
+    \caption{统计机器翻译的示例图（左：语料资源；中：翻译模型与语言模型；右：翻译假设与翻译引擎）}
    \label{fig:Example-SMT}
 \end{figure}
 %-------------------------------------------
@@ -242,7 +242,7 @@
 \begin{figure}[htp]
    \centering
 \input{./Chapter1/Figures/figure-Example-NMT}
-    \caption{神经机器翻译的示例图\\1.左侧是编码器-解码器网络2.右侧是编码器示例网络}
+    \caption{神经机器翻译的示例图（左：编码器-解码器网络；右：编码器示例网络）}
    \label{fig:Example-NMT}
 \end{figure}
 %-------------------------------------------
@@ -251,7 +251,7 @@

 \parinterval 相比统计机器翻译，神经机器翻译的优势体现在其不需要特征工程，所有信息由神经网络自动从原始输入中提取。而且，相比离散化的表示，词和句子的分布式连续空间表示可以建模提供更为丰富的信息，同时可以使用相对成熟的基于梯度的方法优化模型。此外，神经网络的存储需求较小，天然适合小设备上的应用。但是，神经机器翻译也存在问题。首先，虽然脱离了特征工程，神经网络的结构需要人工设计，即使设计好结构，系统的调优、超参数的设置等等仍然依赖大量的实验；其次，神经机器翻译现在缺乏可解释性，其过程和人的认知差异很大，通过人的先验知识干预的程度差；再有，神经机器翻译对数据的依赖很大，数据规模、质量对性能都有很大影响，特别是在数据稀缺的情况下，充分训练神经网络具有挑战。

-\subsection{对比}\index{Chapter1.4.5}
+\subsection{对比分析}\index{Chapter1.4.5}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 \parinterval 不同机器翻译方法有不同的特点。表\ref{tab:comparison-of-different-MT}对比了这些方法，不难看出：
@@ -351,7 +351,7 @@ Candidate：the the the the

 \parinterval 译文整体的准确率等于各$n$-gram的加权平均：
 \begin{eqnarray}
-P_{\textrm{avg}}=\exp(\sum_{n=1}^Nw_n\cdot \log{P_n})
+\textrm{P}_{\textrm{avg}}=\exp(\sum_{n=1}^Nw_n\cdot \log{\textrm{P}_n})
 \label{eq:weighted-average}
 \end{eqnarray}

@@ -492,9 +492,9 @@ His house is on the south bank of the river.
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \vspace{0.5em}
 \begin{itemize}
-\item NiuTrans：NiuTrans是由东北大学自然语言处理实验室自主研发的SMT系统，该系统可支持基于短语的模型、基于层次结构的模型以及基于句法树结构的模型。由于使用C++语言开发，所以该系统运行时间快，所占存储空间少且易于上手。系统中内嵌有n语言模型，故无需使用其他的系统即可对语言进行建模。\url{http://www.niutrans.com/}
+\item NiuTrans：NiuTrans是由东北大学自然语言处理实验室自主研发的SMT系统，该系统可支持基于短语的模型、基于层次结构的模型以及基于句法树结构的模型。由于使用C++语言开发，所以该系统运行时间快，所占存储空间少且易于上手。系统中内嵌有$n$-gram语言模型，故无需使用其他的系统即可对语言进行建模。\url{http://www.niutrans.com/}
 \vspace{0.5em}
-\item Moses：Moses是（主要）由爱丁堡大学的SMT 组开发的，具有开拓性的SMT系统，最新的Moses系统支持很多的功能。例如，它既支持基于短语的模型，也支持基于句法的模型。Moses 提供因子翻译模型(factored translation model)，该模型可以在不同的层次中使用不同的信息。此外，它允许将混淆网络和字格(word lattices)作为输入，可缓解系统的1-best输出中的错误。Moses还提供了很多有用的脚本和工具来支持其他的功能。\url{http://www.statmt.org/moses/}
+\item Moses：Moses（主要）是由爱丁堡大学的SMT 组开发的，具有开拓性的SMT系统，最新的Moses系统支持很多的功能。例如，它既支持基于短语的模型，也支持基于句法的模型。Moses 提供因子翻译模型(factored translation model)，该模型可以在不同的层次中使用不同的信息。此外，它允许将混淆网络和字格(word lattices)作为输入，可缓解系统的1-best输出中的错误。Moses还提供了很多有用的脚本和工具来支持其他的功能。\url{http://www.statmt.org/moses/}
 \vspace{0.5em}
 \item Joshua：Joshua是另一种先进的开源SMT系统，是由约翰霍普金斯大学的语言和语音处理中心开发。由于Joshua是由Java语言开发，所以它在不同的平台上运行或开发时具有良好的可扩展性和可移植性。此外，Java 的使用提供了一种简单的方法（相对C/C++语言）来实验新的方法策略，以得到更好的效果。\url{http://joshua.sourceforge.net/Joshua/Welcome.html}
 \vspace{0.5em}
@@ -577,7 +577,7 @@ His house is on the south bank of the river.

 \parinterval 以上机器翻译评测各自有不同的特点，NIST最近几年更加关注稀缺资源翻译相关问题；NTCIR 在评估方式上纳入了时间、多语种评估等手段；WAT倾向于评测亚洲相关语言的翻译；CCMT以汉语为核心，并支持国内许多少数民族语言；WMT面向欧洲语系，现在也延伸到汉语，语种范围广，评测类型丰富；IWSLT针对语音对话的翻译相关问题进行评测。

-\parinterval 以上评测数据大多可以从评测网站上下载，此外部分数据也可以从LDC（Lingu \\ -istic Data Consortium）上申请，网址为\url{https://www.ldc.upenn.edu/}。ELRA（Euro \\ -pean Language Resources Association）上也有一些免费的语料库供研究使用，其官网为\url{http://www.elra.info/}。{\red 更多机器翻译的语料信息可参看附录？？}。
+\parinterval 以上评测数据大多可以从评测网站上下载，此外部分数据也可以从LDC（Lingu \\ -istic Data Consortium）上申请，网址为\url{https://www.ldc.upenn.edu/}。ELRA（Euro \\ -pean Language Resources Association）上也有一些免费的语料库供研究使用，其官网为\url{http://www.elra.info/}。更多机器翻译的语料信息可参看附录\ref{appendix-A}。

 \parinterval 从机器翻译发展的角度看，这些评测任务给相关研究提供了基准数据集，使得不同的系统都可以在同一个环境下进行比较和分析，进而建立了机器翻译研究所需的实验基础。此外，这些评测任务也使得研究者可以第一时间了解机器翻译研究的最新成果，比如，有多篇ACL最佳论文的灵感就来自当年参加机器翻译评测任务的系统。

@@ -619,11 +619,13 @@ His house is on the south bank of the river.
 \vspace{0.5em}
 \end{itemize}
 \vspace{0.5em}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\begin{appendices}
+\chapter{附录A}
+\label{appendix-A}


-
-
-
+\end{appendices}




--- a/Book/Chapter2/Figures/figure-Self-information-function.tex
+++ b/Book/Chapter2/Figures/figure-Self-information-function.tex
+\definecolor{ublue}{rgb}{0.152,0.250,0.545}
+\begin{tikzpicture}
+\begin{axis}[
+  width=12cm,
+  height=4cm,
+  xlabel={$\textrm{P}(x)$},
+  ylabel={$\textrm{I}(x)$},
+  ylabel style={yshift=-0.5cm},
+  xtick={0,0.2,...,1.0},
+  ytick={0,1,2},
+  domain=0:1,
+  enlarge x limits=true,
+  enlarge y limits={upper},
+  legend style={draw=none},
+  xmin=0,
+  xmax=1,
+  ymin=0,
+  ymax=2 
+]
+\addplot[draw=ublue,thick] {170*exp(-x-4)-1};
+\legend{$\textrm{I}(x) \cdot \textrm{P}(x)$}
+\end{axis}
+\end{tikzpicture}
+
+%---------------------------------------------------------------------
\ No newline at end of file
--- a/Book/Chapter2/Figures/figure-example-of-zh-syntactic-analysis.tex
+++ b/Book/Chapter2/Figures/figure-example-of-zh-syntactic-analysis.tex
@@ -9,11 +9,11 @@

 \node[anchor=west] (input) at (0,0) {\textbf{ \scriptsize{源句语法树}}};

-\node[anchor=north west] (rule) at ([xshift=-14.5em,yshift=11.5em]input.south west) {\scriptsize{句法翻译规则:}};
+%\node[anchor=north west] (rule) at ([xshift=-14.5em,yshift=11.5em]input.south west) {\scriptsize{句法翻译规则:}};

-\node[anchor=north west] (rule2) at ([yshift=0.2em]rule.south west) {\scriptsize{(VP BA(将) $x_1$:NP $x_2$:VP PP(P(给) $x_3$:NP))}};
+%\node[anchor=north west] (rule2) at ([yshift=0.2em]rule.south west) {\scriptsize{(VP BA(将) $x_1$:NP $x_2$:VP PP(P(给) $x_3$:NP))}};

-\node[anchor=north west] (rule3) at ([yshift=0.2em]rule2.south west) {\scriptsize{$\to$ $x_2$ $x_1$ to $x_3$ }};
+%\node[anchor=north west] (rule3) at ([yshift=0.2em]rule2.south west) {\scriptsize{$\to$ $x_2$ $x_1$ to $x_3$ }};


 \begin{scope}[scale = 0.7, grow'=up, sibling distance=6pt, level distance=35pt, xshift=3.6em, yshift=1.0em]
@@ -36,9 +36,9 @@

 \end{scope}

-\draw [-,dotted,very thick,blue] (tn3.south west) -- (tn4.south west) -- (seg2.north west) -- (seg2.north east) -- ([xshift=0.0em]tn4.north east) -- ([xshift=0.8em]tn6.north east) -- (seg5.north west) -- (seg5.north east) -- ([yshift=-1.2em]seg5.south east) -- (tn10.north east) -- (tn10.south east) -- (tn8.south east) -- (tn3.south east) -- (tn3.south west);
+%\draw [-,dotted,very thick,blue] (tn3.south west) -- (tn4.south west) -- (seg2.north west) -- (seg2.north east) -- ([xshift=0.0em]tn4.north east) -- ([xshift=0.8em]tn6.north east) -- (seg5.north west) -- (seg5.north east) -- ([yshift=-1.2em]seg5.south east) -- (tn10.north east) -- (tn10.south east) -- (tn8.south east) -- (tn3.south east) -- (tn3.south west);

-\draw[<->,dotted,blue,thick] ([yshift=-0.3em]seg2.south west) .. controls +(west:0.7) and +(south:0.5) ..  ([xshift=-1.8em]rule2.south);
+%\draw[<->,dotted,blue,thick] ([yshift=-0.3em]seg2.south west) .. controls +(west:0.7) and +(south:0.5) ..  ([xshift=-1.8em]rule2.south);


 \end{tikzpicture}

--- a/Book/Chapter2/Figures/figure-examples-of-Chinese-word-segmentation-based-on-1-gram-model.tex
+++ b/Book/Chapter2/Figures/figure-examples-of-Chinese-word-segmentation-based-on-1-gram-model.tex
@@ -20,19 +20,19 @@
 \end{pgfonlayer}
 }

-\node [anchor=west,ugreen] (P) at ([xshift=4em,yshift=-0.8em]corpus.east){\large{P($\cdot$)}};
+\node [anchor=west,ugreen] (P) at ([xshift=5.2em,yshift=-0.8em]corpus.east){\large{P($\cdot$)}};
 \node [anchor=south] (modellabel) at (P.north) {{\color{ublue} {\scriptsize \textbf{统计模型}}}};

 \begin{pgfonlayer}{background}
 \node[rectangle,draw=ublue,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (P) (modellabel)] (model) {};
 \end{pgfonlayer}

-\draw [->,very thick,ublue] ([xshift=0.2em]corpus.east) -- ([xshift=3.2em]corpus.east)  node [pos=0.5, above] {\color{red}{\scriptsize{统计学习}}};
+\draw [->,very thick,ublue] ([xshift=0.2em]corpus.east) -- ([xshift=4.2em]corpus.east)  node [pos=0.5, above] {\color{red}{\scriptsize{统计学习}}};

-\draw [->,very thick,ublue] ([xshift=0.2em]model.east) -- ([xshift=3.2em]model.east)  node [pos=0.5, above] {\color{red}{\scriptsize{穷举\&计算}}};
+\draw [->,very thick,ublue] ([xshift=0.2em]model.east) -- ([xshift=4.2em]model.east)  node [pos=0.5, above] {\color{red}{\scriptsize{穷举\&计算}}};

 {\scriptsize
-\node [anchor=north west] (sentlabel) at ([xshift=5.5em,yshift=-1em]model.north east) {\color{red}{自动分词系统}};
+\node [anchor=north west] (sentlabel) at ([xshift=6.2em,yshift=-1em]model.north east) {\color{red}{自动分词系统}};
 \node [anchor=north west] (sent) at (sentlabel.south west) {\textbf{对任意句子进行分词}};
 }

@@ -42,9 +42,9 @@

 {\footnotesize
 {
-\node [anchor=west] (label1) at (0,6em) {实际上，通过学习我们得到了一个分词模型P($\cdot$)，给定任意的};
-\node [anchor=north west] (label1part2) at ([yshift=0.5em]label1.south west) {分词结果$W=w_1 w_2...w_n$，都能通过P($W$)=$\textrm{P}(w_1) \cdot \textrm{P}(w_2) \cdot ... \cdot \textrm{P}(w_n)$  \;  \, };
-\node [anchor=north west] (label1part3) at ([yshift=0.5em]label1part2.south west) {计算这种分词的概率值};
+\node [anchor=west] (label1) at (0,6em) {实际上，通过学习我们得到了一个分词模型P($\cdot$)，给定任意的分词结果};
+\node [anchor=north west] (label1part2) at ([yshift=0.5em]label1.south west) {$W=w_1 w_2...w_n$，都能通过P($W$)=$\textrm{P}(w_1) \cdot \textrm{P}(w_2) \cdot ... \cdot \textrm{P}(w_n)$ 计算这种分词的\hspace{0.13em} };
+\node [anchor=north west] (label1part3) at ([yshift=0.5em]label1part2.south west) {概率值};
 }

 \begin{pgfonlayer}{background}
@@ -70,10 +70,10 @@
 \node [anchor=north west,draw,thick,inner sep=2pt] (data11) at (label2.south west) {枚举所有可能的切分};
 }
 {
-\node [anchor=west,draw,thick,inner sep=2pt] (data12) at ([xshift=2em]data11.east) {计算每种切分的概率};
+\node [anchor=west,draw,thick,inner sep=2pt] (data12) at ([xshift=4em]data11.east) {计算每种切分的概率};
 }
 {
-\node [anchor=west,draw,thick,inner sep=2pt] (data13) at ([xshift=3.5em]data12.east) {选择最佳结果};
+\node [anchor=west,draw,thick,inner sep=2pt] (data13) at ([xshift=4.0em]data12.east) {选择最佳结果};
 }
 {
 \draw [->,thick] ([xshift=0.1em]data11.east) -- ([xshift=-0.1em]data12.west);

--- a/Book/Chapter2/Figures/figure-no-smoothing&smoothed-probability-distributions.tex
+++ b/Book/Chapter2/Figures/figure-no-smoothing&smoothed-probability-distributions.tex
+\definecolor{ublue}{rgb}{0.152,0.250,0.545}
+\begin{tikzpicture}
+\begin{axis}[
+  %align=left,
+  %axis x line=right,
+  %axis y line=left,   
+  width=12cm, height=5.5cm, 
+  symbolic x coords={未抽取词,do,want,what,am,people,look},%自定义x坐标
+  %enlargelimits=0.15,%x轴移动
+  xtick=data,%自定义x坐标
+  ytick={0,0.05,0.1,0.15,0.2,0.25},
+  xlabel={低概率词汇},
+  ylabel={词汇概率},
+  legend pos=outer north east,%图标位置
+  xlabel style={align=right,xshift=6.5cm,yshift=1cm},
+  ylabel style={rotate=-90,yshift=3cm,xshift=1cm},
+  y tick label style={/pgf/number format/.cd,fixed,precision=2},%y轴精度，不用科学表示
+  y axis line style={opacity=0},%隐藏y轴
+  tick align=inside,%原本的横行线
+  ymajorgrids,%显示横行线
+  axis x line*=bottom,%显示汉字
+  %legend entries={未平滑,平滑后},%右上图标
+  %clip=false,%不剪切
+  %xlabel shift=5cm,%整体右移
+  major grid style={dotted,draw=ublue},%横行线颜色
+  %enlarge x limits=true,%扩展x轴长度，防止边缘bar部分显示
+  axis on top,%网格线位于顶层
+  legend style={anchor=north west},%图标格式
+  ymin=0,
+  ymax=0.25]
+  \addplot [ybar,bar shift=-3mm,bar width=6mm,fill=blue!20,draw=blue!20,area legend] coordinates{(未抽取词,0) (do,0.05) (want,0.05) (what,0.05) (am,0.1) (people,0.15) (look,0.2)};
+  \addplot [ybar,bar shift=3mm,bar width=6mm,fill=red!20,draw=blue!20,area legend] coordinates{(未抽取词,0.03) (do,0.062) (want,0.062) (what,0.062) (am,0.09) (people,0.122) (look,0.138)};
+  \legend{未平滑,平滑后}
+\end{axis}
+\end{tikzpicture}
+
+%---------------------------------------------------------------------
\ No newline at end of file
--- a/Book/Chapter2/Figures/figure-process-of-statistical-syntax-analysis.tex
+++ b/Book/Chapter2/Figures/figure-process-of-statistical-syntax-analysis.tex
@@ -46,19 +46,19 @@
 \end{pgfonlayer}
 }

-\node [anchor=west,ugreen] (P) at ([xshift=4.5em,yshift=-0.8em]corpus.east){\large{P($\cdot$)}};
+\node [anchor=west,ugreen] (P) at ([xshift=5.95em,yshift=-0.8em]corpus.east){\large{P($\cdot$)}};
 \node [anchor=south] (modellabel) at (P.north) {{\color{ublue} {\scriptsize \textbf{统计分析模型}}}};

 \begin{pgfonlayer}{background}
 \node[rectangle,draw=ublue,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (P) (modellabel)] (model) {};
 \end{pgfonlayer}

-\draw [->,very thick,ublue] ([xshift=0.2em]corpus.east) -- ([xshift=3.0em]corpus.east)  node [pos=0.5, above] {\color{red}{\scriptsize{统计学习}}};
+\draw [->,very thick,ublue] ([xshift=0.2em]corpus.east) -- ([xshift=4.2em]corpus.east)  node [pos=0.5, above] {\color{red}{\scriptsize{统计学习}}};

-\draw [->,very thick,ublue] ([xshift=0.2em]model.east) -- ([xshift=3.2em]model.east)  node [pos=0.5, above] {\color{red}{\scriptsize{穷举\&计算}}};
+\draw [->,very thick,ublue] ([xshift=0.2em]model.east) -- ([xshift=4.2em]model.east)  node [pos=0.5, above] {\color{red}{\scriptsize{穷举\&计算}}};

 {\scriptsize
-\node [anchor=north west] (sentlabel) at ([xshift=5.0em,yshift=-1em]model.north east) {{\color{ublue} {\scriptsize \textbf{统计分析模型}}}};
+\node [anchor=north west] (sentlabel) at ([xshift=6.2em,yshift=-1em]model.north east) {{\color{ublue} {\scriptsize \textbf{统计分析模型}}}};
 \node [anchor=north west] (sent) at (sentlabel.south west) {\textbf{对任意句子进行分析}};
 }

@@ -72,3 +72,32 @@



+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/Book/Chapter2/Figures/figure-word-frequency-distribution.tex
+++ b/Book/Chapter2/Figures/figure-word-frequency-distribution.tex
+\definecolor{ublue}{rgb}{0.152,0.250,0.545}
+\begin{tikzpicture}
+\begin{axis}[
+  width=12cm,
+  height=6cm,
+  xlabel={某语料中的词汇},
+  ylabel={词汇出现总次数},
+  xlabel style={xshift=6.6cm,yshift=1cm},
+  ylabel style={rotate=-90,yshift=3cm,xshift=1.2cm},
+  xticklabel style={opacity=0},
+  ytick={0,10000000,20000000,30000000,40000000,50000000,60000000},
+  tick align=inside,
+  y axis line style={opacity=0},
+  ymajorgrids,
+  major grid style={dotted,draw=ublue},
+  ybar,
+  bar width=0.5,
+  xmin=0,
+  xmax=105,
+  ymin=0,
+  ymax=60000000 
+]
+\addplot[fill=blue!20,draw=ublue] coordinates{(1,56000000)
+(2,34000000)
+(3,30000000)
+(4,27000000)
+(5,18000000)
+(6,12000000)
+(7,11000000)
+(8,10000000)
+(9,9000000)
+(10,8600000)
+(11,8200000)
+(12,7800000)
+(13,7400000)
+(14,7000000)
+(15,6600000)
+(16,6200000)
+(17,5800000)
+(18,5400000)
+(19,5000000)
+(20,4800000)
+(21,4600000)
+(22,4400000)
+(23,4200000)
+(24,4000000)
+(25,3800000)
+(26,3600000)
+(27,3400000)
+(28,3200000)
+(29,3000000)
+(30,2950000)
+(31,2900000)
+(32,2850000)
+(33,2800000)
+(34,2750000)
+(35,2700000)
+(36,2650000)
+(37,2600000)
+(38,2550000)
+(39,2500000)
+(40,2450000)
+(41,2400000)
+(42,2350000)
+(43,2300000)
+(44,2250000)
+(45,2200000)
+(46,2150000)
+(47,2100000)
+(48,2050000)
+(49,2000000)
+(50,1970000)
+(51,1940000)
+(52,1910000)
+(53,1880000)
+(54,1850000)
+(55,1820000)
+(56,1790000)
+(57,1760000)
+(58,1730000)
+(59,1700000)
+(60,1670000)
+(61,1640000)
+(62,1610000)
+(63,1580000)
+(64,1550000)
+(65,1520000)
+(66,1490000)
+(67,1460000)
+(68,1430000)
+(69,1400000)
+(70,1370000)
+(71,1340000)
+(72,1310000)
+(73,1280000)
+(74,1250000)
+(75,1220000)
+(76,1190000)
+(77,1160000)
+(78,1130000)
+(79,1100000)
+(80,1070000)
+(81,1040000)
+(82,1010000)
+(83,980000)
+(84,950000)
+(85,920000)
+(86,890000)
+(87,860000)
+(88,830000)
+(89,800000)
+(90,770000)
+(91,740000)
+(92,710000)
+(93,680000)
+(94,650000)
+(95,620000)
+(96,590000)
+(97,560000)
+(98,530000)
+(99,500000)
+(100,470000)
+(101,440000)
+(102,410000)
+(103,380000)
+(104,350000)};
+\end{axis}
+\end{tikzpicture}
+
+%---------------------------------------------------------------------
\ No newline at end of file
--- a/Book/Chapter2/Figures/figure-word-segmentation-based-on-statistics.tex
+++ b/Book/Chapter2/Figures/figure-word-segmentation-based-on-statistics.tex
@@ -29,7 +29,7 @@
 }

 {
-\node [anchor=west,ugreen] (P) at ([xshift=4em,yshift=-0.8em]corpus.east){\large{P($\cdot$)}};
+\node [anchor=west,ugreen] (P) at ([xshift=5.2em,yshift=-0.8em]corpus.east){\large{P($\cdot$)}};
 \node [anchor=south] (modellabel) at (P.north) {{\color{ublue} {\scriptsize \textbf{统计模型}}}};
 }

@@ -40,16 +40,16 @@
 \end{pgfonlayer}

 {
-\draw [->,very thick,ublue] ([xshift=0.2em]corpus.east) -- ([xshift=3.1em]corpus.east)  node [pos=0.5, above] {\color{red}{\scriptsize{统计学习}}};
+\draw [->,very thick,ublue] ([xshift=0.2em]corpus.east) -- ([xshift=4.2em]corpus.east)  node [pos=0.5, above] {\color{red}{\scriptsize{统计学习}}};
 }

 {
-\draw [->,very thick,ublue] ([xshift=0.2em]model.east) -- ([xshift=3.1em]model.east)  node [pos=0.5, above] {\color{red}{\scriptsize{穷举\&计算}}};
+\draw [->,very thick,ublue] ([xshift=0.2em]model.east) -- ([xshift=4.2em]model.east)  node [pos=0.5, above] {\color{red}{\scriptsize{穷举\&计算}}};
 }

 {\scriptsize
 {
-\node [anchor=north west] (sentlabel) at ([xshift=5.5em,yshift=2em]model.north east) {\color{red}{新的句子}};
+\node [anchor=north west] (sentlabel) at ([xshift=6.8em,yshift=2em]model.north east) {\color{red}{新的句子}};
 \node [anchor=north west] (sent) at (sentlabel.south west) {\textbf{确实现在数据很多}};
 }
 {

--- a/Book/Chapter2/chapter2.tex
+++ b/Book/Chapter2/chapter2.tex
@@ -18,7 +18,7 @@

 \parinterval 语言分析部分将以汉语为例介绍词法和句法分析。它们都是自然语言处理中的基本问题，而且在机器翻译中也会经常被使用。同样，我们会介绍这两个任务的定义和解决问题的思路。

-\parinterval 语言建模是机器翻译中最常用的一种技术，它主要用于句子的生成和流畅度评价。我们会以传统计语言模型为例，对语言建模的相关概念进行介绍。但是，这里并不深入探讨语言模型技术，在后面的章节中还有会单独的内容对神经网络语言模型等前沿技术进行讨论。\\ \\ \\ \\ 
+\parinterval 语言建模是机器翻译中最常用的一种技术，它主要用于句子的生成和流畅度评价。我们会以传统统计语言模型为例，对语言建模的相关概念进行介绍。但是，这里并不深入探讨语言模型技术，在后面的章节中还有会单独的内容对神经网络语言模型等前沿技术进行讨论。\\ \\ \\ \\ 
 %--问题概述-----------------------------------------
 \section{问题概述 }\index{Chapter2.1}

@@ -50,9 +50,9 @@

 \vspace{0.5em}
 \begin{itemize}
-\item 分词。这个过程会把词串进行切分，切割成最小的单元。因为只有知道了什么是待处理字符串的最小单元，机器翻译系统才能对其进行表示、分析和生成。
+\item 分词：这个过程会把词串进行切分，切割成最小的单元。因为只有知道了什么是待处理字符串的最小单元，机器翻译系统才能对其进行表示、分析和生成。
 \vspace{0.5em}
-\item 句法分析。这个过程会对经过切分的句子进行进一步分析，得到句子的句法结构，这种结构是对句子的进一步抽象。比如，NP VP就可以表示名词短语(NP)和动词短语(VP)这种主谓结构。利用这些信息，机器翻译可以更加准确的对语言的结构进行分析和生成。
+\item 句法分析：这个过程会对经过切分的句子进行进一步分析，得到句子的句法结构，这种结构是对句子的进一步抽象。比如，NP VP就可以表示名词短语(NP)和动词短语(VP)这种主谓结构。利用这些信息，机器翻译可以更加准确的对语言的结构进行分析和生成。
 \end{itemize}
 \vspace{0.5em}

@@ -72,7 +72,7 @@
 \subsection{随机变量和概率}\index{Chapter2.2.1}
 \parinterval 在自然界中，有这样一类具有偶然性的事件，它在一定条件下是否会发生是不确定的。例如，明天会下雨、掷一枚硬币是正面朝上、扔一个骰子的点数是5……这类可能会发生也可能不会发生，通过大量的重复试验，能发现其发生具有某种规律性的事件叫做\textbf{随机事件}。

-\parinterval \textbf{随机变量（random variable）}是对随机事件发生可能状态的描述，是随机事件的数量表征。设$\Omega= \{ \omega \}$为一个随机试验的样本空间，X=X$(\omega)$就是定义在样本空间$\omega$上的、取值为实数的单值函数，即X=X$(\omega)$为随机变量，记为X。随机变量是一种能随机选取数值的变量，常用大写的英文字母或希腊字母表示，其取值通常用小写字母来表示。例如，用$A$表示一个随机变量，用$a$表示变量$A$的一个取值。根据随机变量可以选取的值，可以将其划分为离散变量和连续变量。
+\parinterval \textbf{随机变量（random variable）}是对随机事件发生可能状态的描述，是随机事件的数量表征。设$\Omega = \{ \omega \}$为一个随机试验的样本空间，$X=X(\omega)$就是定义在样本空间$\omega$上的、取值为实数的单值函数，即$X=X(\omega)$为随机变量，记为$X$。随机变量是一种能随机选取数值的变量，常用大写的英文字母或希腊字母表示，其取值通常用小写字母来表示。例如，用$A$表示一个随机变量，用$a$表示变量$A$的一个取值。根据随机变量可以选取的值，可以将其划分为离散变量和连续变量。

 \parinterval 离散变量是指在其取值区间内可以被一一列举，总数有限并且可计算的数值变量。例如，用随机变量$X$代表某次投骰子出现的点数，点数只可能取1$\sim$6这6个整数，$X$是一个离散变量。

@@ -88,11 +88,9 @@
 \centering
 \caption{离散变量A的概率分布}
 \begin{tabular}{c|c c c c c c}
-               \hline
 \rule{0pt}{15pt}     A & $a_1=1$ & $a_2=2$ & $a_3=3$ & $a_4=4$ & $a_5=5$ & $a_6=6$\\
               \hline
 \rule{0pt}{15pt}     $\textrm{P}_i$ & $\textrm{P}_1=\frac{4}{25}$  &  $\textrm{P}_2=\frac{3}{25}$ &  $\textrm{P}_3=\frac{4}{25}$ & $\textrm{P}_4=\frac{6}{25}$ & $\textrm{P}_5=\frac{3}{25}$ & $\textrm{P}_6=\frac{1}{25}$  \\
-               \hline
             \end{tabular}
             \label{tab1}
 \end{table}
@@ -231,13 +229,13 @@
 %---------------------------------------------
 \parinterval 这就是全概率公式。

-\parinterval 举个例子，小张从家到公司有三条路分别为a，b，c，选择每条路的概率分别为0.5，0.3，0.2，那么：
+\parinterval 举个例子，小张从家到公司有三条路分别为$a$，$b$，$c$，选择每条路的概率分别为0.5，0.3，0.2，那么：

 \parinterval $S_a$: 选择a路去上班，$S_b$: 选择b路去上班，$S_c$: 选择c路去上班 $S$：小张去上班

 \parinterval 这四件事的关系即为：$S_a$，$S_b$，$S_c$是$S$的划分。

-\parinterval 如果三条路不拥堵的概率分别为$\textrm{P}({S_{a}^{'}})$=0.2，$\textrm{P}({S_{b}^{'}})$=0.4，$\textrm{P}({S_{c}^{'}})$=0.7，那么事件L：小张上班没有遇到拥堵情况的概率就是：
+\parinterval 如果三条路不拥堵的概率分别为$\textrm{P}({S_{a}^{'}})$=0.2，$\textrm{P}({S_{b}^{'}})$=0.4，$\textrm{P}({S_{c}^{'}})$=0.7，那么事件$L$：小张上班没有遇到拥堵情况的概率就是：
 %--------------------------------------------
 \begin{eqnarray}
 {\textrm{P}(L)} &=& {\textrm{P}( L| S_a )\textrm{P}(S_a )+\textrm{P}( L| S_b )\textrm{P}(S_b )+\textrm{P}( L| S_c )\textrm{P}(S_c )}\nonumber \\
@@ -321,9 +319,9 @@
 % 图2.5
 \begin{figure}[htp]
 \centering
-\includegraphics[scale=0.5]{./Chapter2/Figures/figure-Self-information-function.pdf}
+\input{./Chapter2/Figures/figure-Self-information-function}
 \setlength{\belowcaptionskip}{-1.5em}
-\caption{自信息函数图像}
+\caption{\red{自信息函数图像}}
 \label{fig:Self-information-function}
 \end{figure}
 %-------------------------------------------
@@ -383,33 +381,15 @@

 %-------------------------------------------
 \begin{definition}
-《新华字典》
+词

-语言里最小的可以独立运用的单位：词汇。
-\end{definition}
-%-------------------------------------------
+语言里最小的可以独立运用的单位：词汇。——《新华字典》

-%-------------------------------------------
-\begin{definition}
-《维基百科》
-
-单词（word），含有语义内容或语用内容，且能被单独念出来的的最小单位。
-\end{definition}
-%-------------------------------------------
+单词（word），含有语义内容或语用内容，且能被单独念出来的的最小单位。——《维基百科》

-%-------------------------------------------
-\begin{definition}
-《国语辞典》
-
-語句中具有完整概念，能獨立自由運用的基本單位。
-\end{definition}
-%-------------------------------------------
-
-%-------------------------------------------
-\begin{definition}
-《现代汉语词典》
+語句中具有完整概念，能獨立自由運用的基本單位。——《国语辞典》

-说话或诗歌、文章、戏剧中的语句
+说话或诗歌、文章、戏剧中的语句。——《现代汉语词典》
 \end{definition}
 %-------------------------------------------

@@ -505,28 +485,28 @@
 %-------------------------------------------
 \parinterval 似乎玩家的胜利只能来源于运气。不过，请注意，这里的假设``随便选一个数字''这本身就是一个概率模型，它对骰子的六个面的出现做了均匀分布假设。
 \begin{eqnarray}
-\textrm{P}("1")=\textrm{P}("2")=...=\textrm{P}("5")=\textrm{P}("6")=1/6
+\textrm{P(``1'')}=\textrm{P(``2'')}=...=\textrm{P(``5'')}=\textrm{P(``6'')}=1/6
 \label{eqC2.21-new}
 \end{eqnarray}

 \parinterval 但是这个游戏没有人规定骰子是均匀的（有些被坑了的感觉）。但是如果骰子的六个面不均匀呢？我们可以用一种更加``聪明''的方式定义一个新模型，即定义骰子的每一个面都以一定的概率出现，而不是相同的概率。这里，为了保证概率的归一性，我们只需定义$\theta_1 \sim \theta_5$，最后一个面的概率用1减去前几个面的概率之和进行表示，即
 \begin{eqnarray}
-\textrm{P}("1") &=&\theta_1 \nonumber \\
-\textrm{P}("2") &=&\theta_2 \nonumber \\
-\textrm{P}("3") &=&\theta_3 \nonumber \\
-\textrm{P}("4") &=&\theta_4 \nonumber \\
-\textrm{P}("5") &=&\theta_5 \nonumber \\
-\textrm{P}("6") &=&1-\sum_{1 \leq i \leq 5}\theta_i \qquad \lhd \textrm {归一性}
+\textrm{P(``1'')} &=&\theta_1 \nonumber \\
+\textrm{P(``2'')} &=&\theta_2 \nonumber \\
+\textrm{P(``3'')} &=&\theta_3 \nonumber \\
+\textrm{P(``4'')} &=&\theta_4 \nonumber \\
+\textrm{P(``5'')} &=&\theta_5 \nonumber \\
+\textrm{P(``6'')} &=&1-\sum_{1 \leq i \leq 5}\theta_i \qquad \lhd \textrm {归一性}
 \label{eqC2.22-new}
 \end{eqnarray}

 \parinterval 这里$\theta_1 \sim \theta_5$可以被看作是模型的参数。对于这样的模型，参数确定了，模型也就确定了。但是，新的问题来了，在定义骰子每个面的概率后，如何求出具体的值呢？一种常用的方法是，从大量实例中学习模型参数，这个方法也是常说的参数估计。我们可以将这个不均匀的骰子先实验性的掷很多次，这可以被看作是独立同分布的若干次采样，比如$X$次，发现``1''出现$X_1$次，``2''出现$X_2$次，以此类推，得到了各个面出现的次数。假设掷骰子中每个面出现的概率符合多项式分布，通过简单的概率论知识可以知道每个面出现概率的极大似然估计为：
 \begin{eqnarray}
-\textrm{P}("i")=\frac {X_i}{X}
+\textrm{P(``i'')}=\frac {X_i}{X}
 \label{eqC2.23-new}
 \end{eqnarray}

-\parinterval 当X足够大的话，$\frac{X_i}{X}$可以无限逼近P(``i'')的真实值，因此可以通过大量的实验推算出掷骰子各个面的概率的准确估计值。回归到我们的问题中，如果我们在正式开始游戏前，预先掷骰子30次，得到如图\ref{fig:the-dice-game2}的结果。
+\parinterval 当$X$足够大的话，$\frac{X_i}{X}$可以无限逼近P(``$i$'')的真实值，因此可以通过大量的实验推算出掷骰子各个面的概率的准确估计值。回归到我们的问题中，如果我们在正式开始游戏前，预先掷骰子30次，得到如图\ref{fig:the-dice-game2}的结果。

 %----------------------------------------------
 % 图2.12
@@ -581,11 +561,11 @@

 \parinterval 如果，我们把这些数字换成汉语中的词，比如

-\parinterval 88 – 这
+\parinterval 88\; – \; 这

-\parinterval 87 – 是
+\parinterval 87\; – \; 是

-\parinterval 45 – 一
+\parinterval 45\; – \; 一

 \parinterval …

@@ -620,13 +600,15 @@
 \end{equation}

 \parinterval 这里可以使用``大题小做''的技巧：原始的问题很复杂，我们将其切分为小问题。这样，将复杂的分词问题简单化，基于独立性假设解决分词问题：假定所有词出现都是相互独立的。设$w_1 w_2 w_3…w_m$表示一个由单词$w_1,w_2,w_3,…,w_m$组成的切分结果，于是有：
+{\setlength{\belowdisplayskip}{-9pt}
 \begin{eqnarray}
 \textrm{P}(w_1 w_2 w_3…w_m)=\textrm{P}(w_1) \cdot \textrm{P}(w_2) \cdot ... \cdot \textrm{P}(w_m)
 \label{eqC2.25-new}
 \end{eqnarray}
+}
 \begin{eqnarray}
 &\textrm{P}&\textrm{(`确实/现在/数据/很/多')} \nonumber \\
-& = &\textrm{P}\textrm{`确实'} \cdot \textrm{P}\textrm{`现在'} \cdot \textrm{P}\textrm{`数据'} \cdot \textrm{P}\textrm{`很'} \cdot \textrm{P}\textrm{`多'} \nonumber \\
+& = &\textrm{P}\textrm{(`确实')} \cdot \textrm{P}\textrm{(`现在')} \cdot \textrm{P}\textrm{(`数据')} \cdot \textrm{P}\textrm{(`很')} \cdot \textrm{P}\textrm{(`多')} \nonumber \\
 & = &0.000001 \times 0.000022 \times 0.000009 \times 0.000010 \times 0.000078 \nonumber \\
 & = &1.5444 \times 10^{-25}
 \label{eqC2.26-new}
@@ -658,12 +640,13 @@
 \parinterval 在基于统计的汉语分词模型中，我们通过``大题小做''的技巧，利用独立性假设把整个句子的单词切分概率转化为每个单个词出现概率的乘积。这里，每个单词也被称作1-gram（或uni-gram），而1-gram概率的乘积实际上也是在度量词序列出现的可能性（记为$\textrm{P}(w_1 w_2...w_m)$）。这种计算整个单词序列概率$\textrm{P}(w_1 w_2...w_m)$的方法被称为统计语言模型。1-gram语言模型是最简单的一种语言模型，它没有考虑任何的上下文。很自然的一个问题是：能否考虑上下文信息构建更强大的语言模型，进而得到跟准确的分词结果。下面我们将进一步介绍更加通用的$n$-gram语言模型，它在机器翻译及其它自然语言处理任务中有更加广泛的应用。

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\subsection{$n$-gram语言模型}\index{Chapter2.4.1}
+\subsection{$n \textrm{-gram}$语言模型}\index{Chapter2.4.1}

 \parinterval \textbf{语言模型（language model）}的目的是描述文字序列出现的规律。这个对问题建模的过程被称作\textbf{语言建模（language modeling）}。如果使用统计建模的方式，语言模型可以被定义为计算$\textrm{P}(w_1 w_2...w_m)$，也就是计算整个词序列$w_1 w_2...w_m$出现的可能性大小。具体定义如下，

 %----------------------------------------------
 % 定义3.1
+\vspace{0.5em}
 \begin{definition}[]
 词汇表V上的语言模型是一个函数$\textrm{P}(w_1 w_2...w_m)$，它表示$V^+$上的一个概率分布。其中，对于任何词串$w_1 w_2...w_m\in{V^+}$，有$\textrm{P}(w_1 w_2...w_m)\geq{0}$。而且对于所有的词串，函数满足归一化条件$\sum{_{w_1 w_2...w_m\in{V^+}}\textrm{P}(w_1 w_2...w_m)}=1$。
 \end{definition}
@@ -686,26 +669,24 @@
 \parinterval 这样，整个序列$w_1 w_2...w_m$的生成概率可以被重新定义为：
 %------------------------------------------------------
 % 表1.2
-\begin{table}[htp]{
-\begin{center}
+\begin{table}[htp]
+\centering
 \caption{$n$-gram语言模型取不同$n$值的模型描述}
 \label{tab:n-gram-model-of-different-n}
 {\scriptsize
-\begin{tabular}{l|l|l l l}
-\toprule
+\begin{tabular}{l|l|l l|l}
 \textbf{链式法则} & \textbf{1-gram} & \textbf{2-gram} & $...$ & \textbf{$n$-gram}\\
-\midrule
-$\textrm{P}(w_1 w_2...w_m)$ = & $\textrm{P}(w_1 w_2...w_m)$ = & $\textrm{P}(w_1 w_2...w_m)$ = & $...$ & $\textrm{P}(w_1 w_2...w_m)$ = \\
-$\textrm{P}(w_1)\times$ & $\textrm{P}(w_1)\times$ & $\textrm{P}(w_1)\times$  & $...$ & $\textrm{P}(w_1)\times$ \\
-$\textrm{P}(w_2|w_1)\times$ & $\textrm{P}(w_2)\times$ & $\textrm{P}(w_2|w_1)\times$ & $...$ & $\textrm{P}(w_2|w_1)\times$\\
-$\textrm{P}(w_3|w_1 w_2)\times$ & $\textrm{P}(w_3)\times$ & $\textrm{P}(w_3|w_2)\times$ & $...$ & $\textrm{P}(w_3|w_1 w_2)\times$ \\
-$\textrm{P}(w_4|w_1 w_2 w_3)\times$ & $\textrm{P}(w_4)\times$ & $\textrm{P}(w_4|w_3)\times$ & $...$ & $\textrm{P}(w_4|w_1 w_2 w_3)\times$ \\
-$...$ & $...$ & $...$ & $...$ & $...$ \\
-$\textrm{P}(w_m|w_1 ... w_{m-1})$ & $\textrm{P}(w_m)$ & $\textrm{P}(w_m|w_{m-1})$ & $...$ & $\textrm{P}(w_m|w_{m-n+1} ... w_{m-1})$ 
+ \hline
+\rule{0pt}{10pt} $\textrm{P}(w_1 w_2...w_m)$ = & $\textrm{P}(w_1 w_2...w_m)$ = & $\textrm{P}(w_1 w_2...w_m)$ = & $...$ & $\textrm{P}(w_1 w_2...w_m)$ = \\
+\rule{0pt}{10pt} $\textrm{P}(w_1)\times$ & $\textrm{P}(w_1)\times$ & $\textrm{P}(w_1)\times$  & $...$ & $\textrm{P}(w_1)\times$ \\
+\rule{0pt}{10pt} $\textrm{P}(w_2|w_1)\times$ & $\textrm{P}(w_2)\times$ & $\textrm{P}(w_2|w_1)\times$ & $...$ & $\textrm{P}(w_2|w_1)\times$\\
+\rule{0pt}{10pt} $\textrm{P}(w_3|w_1 w_2)\times$ & $\textrm{P}(w_3)\times$ & $\textrm{P}(w_3|w_2)\times$ & $...$ & $\textrm{P}(w_3|w_1 w_2)\times$ \\
+\rule{0pt}{10pt} $\textrm{P}(w_4|w_1 w_2 w_3)\times$ & $\textrm{P}(w_4)\times$ & $\textrm{P}(w_4|w_3)\times$ & $...$ & $\textrm{P}(w_4|w_1 w_2 w_3)\times$ \\
+\rule{0pt}{10pt} $...$ & $...$ & $...$ & $...$ & $...$ \\
+\rule{0pt}{10pt} $\textrm{P}(w_m|w_1 ... w_{m-1})$ & $\textrm{P}(w_m)$ & $\textrm{P}(w_m|w_{m-1})$ & $...$ & $\textrm{P}(w_m|w_{m-n+1} ... w_{m-1})$ 
 \end{tabular}
 }
-\end{center}
-}\end{table}
+\end{table}
 %------------------------------------------------------

 \parinterval 可以看到，1-gram语言模型只是$n$-gram语言模型的一种特殊形式。$n$-gram的优点在于，它所使用的历史信息是有限的，即$n$-1个单词。这种性质也反映了经典的马尔可夫链的思想\cite{liuke-markov-2004}\cite{resnick1992adventures}有时也被称作马尔可夫假设或者马尔可夫属性。因此$n$-gram也可以被看作是变长序列上的一种马尔可夫模型，比如，2-gram语言模型对应着1阶马尔可夫模型，3-gram语言模型对应着2阶马尔可夫模型，以此类推。
@@ -714,7 +695,7 @@ $\textrm{P}(w_m|w_1 ... w_{m-1})$ & $\textrm{P}(w_m)$ & $\textrm{P}(w_m|w_{m-1})
 \begin{itemize}
 \item 极大似然估计。直接利用不同词序列在训练数据中出现的频度计算出$\textrm{P}(w_m$\\$|w_{m-n+1} ... w_{m-1})$
 \begin{eqnarray}
-\textrm{P}(w_m|w_{m-n+1}...w_{m-1})=\frac{count(w_{m-n+1}...w_m)}{count(w_{m-n+1}...w_{m-1})}
+\textrm{P}(w_m|w_{m-n+1}...w_{m-1})=\frac{\textrm{count}(w_{m-n+1}...w_m)}{\textrm{count}(w_{m-n+1}...w_{m-1})}
 \label{eqC2.29-new}
 \end{eqnarray}

@@ -738,7 +719,7 @@ $\textrm{P}(w_m|w_1 ... w_{m-1})$ & $\textrm{P}(w_m)$ & $\textrm{P}(w_m|w_{m-1})

 \parinterval 在式\ref{eqC2.30-new}的例子中，如果语料中从没有``确实''和``现在''两个词连续出现的情况，那么使用2-gram计算 ``确实/现在/数据/很/多''的切分方式的概率时，会出现如下情况
 \begin{eqnarray}
-\textrm{P}(\textrm{`现在'}|\textrm{`确实'}) = \frac{count(\textrm{`确实}\,\textrm{现在'})}{count(\textrm{`确实'})} = \frac{0}{count(\textrm{`确实'})} = 0
+\textrm{P}(\textrm{`现在'}|\textrm{`确实'}) = \frac{\textrm{count}(\textrm{`确实}\,\textrm{现在'})}{\textrm{count}(\textrm{`确实'})} = \frac{0}{\textrm{count}(\textrm{`确实'})} = 0
 \label{eqC2.31-new}
 \end{eqnarray}

@@ -749,8 +730,8 @@ $\textrm{P}(w_m|w_1 ... w_{m-1})$ & $\textrm{P}(w_m)$ & $\textrm{P}(w_m|w_{m-1})

 \begin{figure}[htp]
    \centering
-\includegraphics[scale=0.35]{./Chapter2/Figures/figure-word-frequency-distribution.pdf}
-	 \caption{词语频度分布}
+\input{./Chapter2/Figures/figure-word-frequency-distribution}
+	 \caption{\red{词语频度分布}}
    \label{fig:word-frequency-distribution}
 \end{figure}
 %---------------------------
@@ -766,13 +747,13 @@ $\textrm{P}(w_m|w_1 ... w_{m-1})$ & $\textrm{P}(w_m)$ & $\textrm{P}(w_m|w_{m-1})

 \parinterval 通常情况下，我们会利用采集到的语料库来模拟现实生活中真实全部的语料库。但是采集总是不充分的，比如无法涵盖所有的词汇，直接依据这样语料所获得的统计信息计算现实中的语言概率就会产生偏差。假设依据某语料C（从未出现``确实 现在''二元语法），评估一个已经分好词的句子S =``确实 现在 物价 很 高''的概率，当计算``确实 现在''的概率时使得评估$\textrm{P}(S) = 0$。显然这个结果是不够准确的，根据我们的常识，句子$S$是有出现的可能性的，这样句子的概率值不应该是0。
 \begin{eqnarray}
-\textrm{P}(\textrm{现在}|\textrm{确实}) = \frac{count(\textrm{确实}\,\textrm{现在})}{count(\textrm{确实})} = \frac{0}{count(\textrm{确实})} = 0
+\textrm{P}(\textrm{现在}|\textrm{确实}) = \frac{\textrm{count}(\textrm{确实}\,\textrm{现在})}{\textrm{count}(\textrm{确实})} = \frac{0}{\textrm{count}(\textrm{确实})} = 0
 \label{eqC2.32-new}
 \end{eqnarray}

 \parinterval 加法平滑方法（additive smoothing）假设每个$n$-gram出现的次数比实际统计次数多$\theta$次，$0 \leqslant\theta\leqslant 1$，使得分子部分不为0，那么计算前文例子``确实 现在''的概率时，可以使用如下方法计算。
 \begin{eqnarray}
-\textrm{P}(\textrm{现在}|\textrm{确实}) = \frac{\theta + count(\textrm{确实}\,\textrm{现在})}{\sum_{w}^{|V|}(\theta + count(\textrm{确实}w))} = \frac{\theta + count(\textrm{确实}\,\textrm{现在})}{\theta{|V|} + count(\textrm{确实})}
+\textrm{P}(\textrm{现在}|\textrm{确实}) = \frac{\theta + \textrm{count}(\textrm{确实}\,\textrm{现在})}{\sum_{w}^{|V|}(\theta + \textrm{count}(\textrm{确实}w))} = \frac{\theta + \textrm{count}(\textrm{确实}\,\textrm{现在})}{\theta{|V|} + \textrm{count}(\textrm{确实})}
 \label{eqC2.33-new}
 \end{eqnarray}

@@ -784,9 +765,8 @@ $\textrm{P}(w_m|w_1 ... w_{m-1})$ & $\textrm{P}(w_m)$ & $\textrm{P}(w_m|w_{m-1})
 % 图2.19
 \begin{figure}[htp]
    \centering
- 	\subfigure[无平滑的概率分布] {\includegraphics[scale=0.25]{./Chapter2/Figures/figure-no-smoothing&smoothed-probability-distributions-1.pdf}  }
- 	\subfigure[加一平滑的概率分布] {\includegraphics[scale=0.25]{./Chapter2/Figures/figure-no-smoothing&smoothed-probability-distributions-2.pdf}}
-	\caption{无平滑和有平滑后的概率分布}
+\input{./Chapter2/Figures/figure-no-smoothing&smoothed-probability-distributions}  
+	\caption{\red{无平滑和有平滑后的概率分布}}
    \label{fig:no-smoothing&smoothed-probability-distributions}
 \end{figure}
 %-------------------------------------------
@@ -808,7 +788,7 @@ r^* = (r + 1)\frac{n_{r + 1}}{n_r}
 \label{eqC2.35-new}
 \end{eqnarray}

-\parinterval 基于这个公式，就可以估计所有0次$n$元语法的频次$n_0 r_0^*=(r_0+1)n_1=n_1$。要把这个重新估计的统计数转化为概率，只需要进行归一化处理：对于每个统计数为$r$的事件，其概率为$\textrm{p}_r=r^*/N$，其中
+\parinterval 基于这个公式，就可以估计所有0次$n$元语法的频次$n_0 r_0^*=(r_0+1)n_1=n_1$。要把这个重新估计的统计数转化为概率，只需要进行归一化处理：对于每个统计数为$r$的事件，其概率为$\textrm{P}_r=r^*/N$，其中
 \begin{eqnarray}
 N = \sum_{r=0}^{\infty}{r^{*}n_r} = \sum_{r=0}^{\infty}{(r + 1)n_{r + 1}} = \sum_{r=1}^{\infty}{r\,n_r}
 \label{eqC2.36-new}
@@ -824,7 +804,7 @@ N = \sum_{r>0}{p_r n_r} = 1 - \frac{n_1}{N} < 1

 \parinterval Good-Turing方法最终通过出现1次的$n$元语法估计了统计为0的事件概率，达到了平滑的效果。

-\parinterval 我们使用一个例子来说明这个方法是如何通过已知事物的数量来预测未来事物的数量。仍然考虑在加法平滑法的英文词汇抽取的例子，根据Good-Turing方法进行修正如下表
+\parinterval 我们使用一个例子来说明这个方法是如何通过已知事物的数量来预测未来事物的数量。仍然考虑在加法平滑法的英文词汇抽取的例子，根据Good-Turing方法进行修正如下表：
 %------------------------------------------------------
 % 表1.3
 \begin{table}[htp]{
@@ -832,11 +812,13 @@ N = \sum_{r>0}{p_r n_r} = 1 - \frac{n_1}{N} < 1
 \caption{英文词汇抽取统计结果}
 \label{tab:results-of-en-vocabulary-extraction}
 {
-\begin{tabular}{l|l|l|l}
-\toprule
-\textbf{$r$} & \textbf{$n_r$} & \textbf{$n^*$} & \textbf{$p_r$}\\
-\midrule
-0 & 14 & 0.21 & 0.018 \\ \hline 1 & 3 & 0.67 & 0.056 \\ \hline 2 & 1 & 3 & 0.25 \\ \hline 3 & 1 & 4 & 0.333 \\ \hline 4 & 1 & - & - \\ \hline
+\begin{tabular}{l|lll}
+\rule{0pt}{10pt} \textbf{$r$} & \textbf{$n_r$} & \textbf{$n^*$} & \textbf{$p_r$}\\ \hline
+\rule{0pt}{10pt} 0 & 14 & 0.21 & 0.018 \\  
+\rule{0pt}{10pt} 1 & 3 & 0.67 & 0.056 \\ 
+\rule{0pt}{10pt} 2 & 1 & 3 & 0.25 \\ 
+\rule{0pt}{10pt} 3 & 1 & 4 & 0.333 \\ 
+\rule{0pt}{10pt} 4 & 1 & - & - \\ 
 \end{tabular}
 }
 \end{center}
@@ -850,7 +832,7 @@ N = \sum_{r>0}{p_r n_r} = 1 - \frac{n_1}{N} < 1

 \parinterval Kneser-Ney平滑方法由R.Kneser和H.Ney于1995年提出的用于计算$n$元语法概率分布的方法\cite{kneser1995improved}\cite{chen1999empirical}。基于absolute discounting，并被广泛认为是最有效的平滑方法。这种平滑方法改进了absolute discounting中与高阶分布相结合的低阶分布的计算方法，使不同阶分布得到充分的利用。这种算法综合利用了其他多种平滑算法的思想，是一种先进而且标准的平滑算法。

-\parinterval 首先介绍一下absolute discounting平滑算法，公式如下所示
+\parinterval 首先介绍一下absolute discounting平滑算法，公式如下所示：
 \begin{eqnarray}
 \textrm{P}_{\textrm{AbsDiscount}}(w_i | w_{i-1}) = \frac{c(w_{i-1},w_i )-d}{c(w_{i-1})} + \lambda(w_{i-1})\textrm{P}(w)
 \label{eqC2.38-new}
@@ -862,7 +844,7 @@ N = \sum_{r>0}{p_r n_r} = 1 - \frac{n_1}{N} < 1

 \parinterval 观察语料的二元语法发现，Francisco的前一个词仅是San，不会出现reading。这个分析提醒了我们，考虑前一个词的影响是有帮助的，比如仅在前一个词时San时，我们才给Francisco赋予一个较高的概率值。基于这种想法，改进原有的1-gram模型，创造一个新的1-gram模型$\textrm{P}_{\textrm{continuation}}$，使这个模型可以通过考虑前一个词的影响评估了当前词作为第二个词出现的可能性。

-\parinterval 为了评估$\textrm{P}_{cont}$，统计使用当前词作为第二个词所出现二元语法的种类，二元语法种类越多，这个词作为第二个词出现的可能性越高，呈正比：
+\parinterval 为了评估$\textrm{P}_{\textrm{cont}}$，统计使用当前词作为第二个词所出现二元语法的种类，二元语法种类越多，这个词作为第二个词出现的可能性越高，呈正比：
 \begin{eqnarray}
 \textrm{P}_{\textrm{cont}}(w_i) \varpropto |w_{i-1}: c(w_{i-1} w_i )>0|
 \label{eqC2.39-new}
@@ -897,14 +879,13 @@ N = \sum_{r>0}{p_r n_r} = 1 - \frac{n_1}{N} < 1
 \parinterval 为了更具普适性，不仅局限为2-gram和1-gram的插值模型，利用递归的方式得到更通用的公式
 \begin{eqnarray}
 \textrm{P}_{\textrm{KN}}(w_i|w_{i-n+1} ...w_{i-1}) & = & \frac{\max(c_{\textrm{KN}}(w_{i-n+1}...w_{i-1})-d,0)}{c_{\textrm{KN}}(w_{i-n+1}...w_{i-1})} + \nonumber \\
-                                                   &   &  \lambda(w_{i-n+1}...w_{i-1})\textrm{P}_{\textrm{KN}}(w_i|w_{i-n+2}...w_{i-1}) \nonumber
+                                                   &   &  \lambda(w_{i-n+1}...w_{i-1})\textrm{P}_{\textrm{KN}}(w_i|w_{i-n+2}...w_{i-1}) 
 \end{eqnarray}
-
-\noindent 其中continuationcount$(\cdot)$表示的是基于某个单个词作为第$n$个词的$n$元语法的种类数目。
 \begin{eqnarray}
 \lambda(w_{i-1}) & = &  \frac{d}{c_{\textrm{KN}}(w_{i-n+1}^{i-1})}|\{w:c_{\textrm{KN}}(w_{i-n+1}...w_{i-1}w)>0\}| \label{eqC2.44-new} \\
 c_{\textrm{KN}}(\cdot) & = & \begin{cases} \textrm{count}(\cdot)\quad for\ the\ highest\ order  \\ \textrm{continuationcount}(\cdot)\quad for\ lower\ order \end{cases} \label{eqC2.45-new}
 \end{eqnarray}
+\noindent 其中continuationcount$(\cdot)$表示的是基于某个单个词作为第$n$个词的$n$元语法的种类数目。

 \parinterval 我们前面提到Kneser-Ney Smoothing 是当前一个标准的、广泛采用的、先进的平滑算法。还有很多基于此为基础衍生出来的算法，有兴趣的读者可以查找更多资料了解。\cite{parsing2009speech}\cite{ney1994structuring}\cite{chen1999empirical}

@@ -940,17 +921,11 @@ c_{\textrm{KN}}(\cdot) & = & \begin{cases} \textrm{count}(\cdot)\quad for\ the\ 

 %-------------------------------------------
 \begin{definition}
-《百度百科》
-
-句法分析(Parsing)就是指对句子中的词语语法功能进行分析。
-\end{definition}
-%-------------------------------------------
+句法分析

-%-------------------------------------------
-\begin{definition}
-《维基百科（译文）》
+句法分析(Parsing)就是指对句子中的词语语法功能进行分析。——《百度百科》

-在自然语言或者计算机语言中，句法分析是利用形式化的文法规则对一个符号串进行分析的过程。
+在自然语言或者计算机语言中，句法分析是利用形式化的文法规则对一个符号串进行分析的过程。——《维基百科（译文）》
 \end{definition}
 %-------------------------------------------


--- a/Book/Chapter3/Chapter3.tex
+++ b/Book/Chapter3/Chapter3.tex
--- a/Book/Chapter3/Figures/figure-EM-algorithm-flow-chart.tex
+++ b/Book/Chapter3/Figures/figure-EM-algorithm-flow-chart.tex
@@ -7,17 +7,17 @@
 %-------------------------------------------------------------------------
 \begin{tikzpicture}
 \node [anchor=north west] (line1) at (0,0) {\textbf{IBM模型1的训练（EM算法）}};
-\node [anchor=north west] (line2) at ([yshift=-0.3em]line1.south west) {输入: 平行语料${(s^{[1]},t^{[1]}),...,(s^{[N]},t^{[N]})}$};
+\node [anchor=north west] (line2) at ([yshift=-0.3em]line1.south west) {输入: 平行语料${(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),...,(\mathbf{s}^{[N]},\mathbf{t}^{[N]})}$};
 \node [anchor=north west] (line3) at ([yshift=-0.1em]line2.south west) {输出: 参数$f(\cdot|\cdot)$的最优值};
-\node [anchor=north west] (line4) at ([yshift=-0.1em]line3.south west) {1: \textbf{Function} \textsc{TrainItWithEM}($\{(s^{[1]},t^{[1]}),...,(s^{[N]},t^{[N]})\}$) };
+\node [anchor=north west] (line4) at ([yshift=-0.1em]line3.south west) {1: \textbf{Function} \textsc{TrainItWithEM}($\{(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),...,(\mathbf{s}^{[N]},\mathbf{t}^{[N]})\}$) };
 \node [anchor=north west] (line5) at ([yshift=-0.1em]line4.south west) {2: \ \ Initialize $f(\cdot|\cdot)$ \hspace{5em} $\rhd$ 比如给$f(\cdot|\cdot)$一个均匀分布};
 \node [anchor=north west] (line6) at ([yshift=-0.1em]line5.south west) {3: \ \ Loop until $f(\cdot|\cdot)$ converges};
 \node [anchor=north west] (line7) at ([yshift=-0.1em]line6.south west) {4: \ \ \ \ \textbf{foreach} $k = 1$ to $N$ \textbf{do}};
-\node [anchor=north west] (line8) at ([yshift=-0.1em]line7.south west) {5: \ \ \ \ \ \ \ \footnotesize{$c_{\mathbb{E}}(s_u|t_v;s^{[k]},t^{[k]}) = \sum\limits_{j=1}^{|s^{[k]}|} \delta(s_j,s_u) \sum\limits_{i=0}^{|t^{[k]}|} \delta(t_i,t_v) \cdot \frac{f(s_u|t_v)}{\sum_{i=0}^{l}f(s_u|t_i)}$}\normalsize{}};
-\node [anchor=north west] (line9) at ([yshift=-0.1em]line8.south west) {6: \ \ \ \ \textbf{foreach} $t_v$ appears at least one of $\{t^{[1]},...,t^{[N]}\}$ \textbf{do}};
-\node [anchor=north west] (line10) at ([yshift=-0.1em]line9.south west) {7: \ \ \ \ \ \ \ $\lambda_{t_v}^{'} = \sum_{s_u} \sum_{k=1}^{N} c_{\mathbb{E}}(s_u|t_v;s^{[k]},t^{[k]})$};
-\node [anchor=north west] (line11) at ([yshift=-0.1em]line10.south west) {8: \ \ \ \ \ \ \ \textbf{foreach} $s_u$ appears at least one of $\{s^{[1]},...,s^{[N]}\}$ \textbf{do}};
-\node [anchor=north west] (line12) at ([yshift=-0.1em]line11.south west) {9: \ \ \ \ \ \ \ \ \ $f(s_u|t_v) = \sum_{k=1}^{N} c_{\mathbb{E}}(s_u|t_v;s^{[k]},t^{[k]}) \cdot (\lambda_{t_v}^{'})^{-1}$};
+\node [anchor=north west] (line8) at ([yshift=-0.1em]line7.south west) {5: \ \ \ \ \ \ \ \footnotesize{$c_{\mathbb{E}}(\mathbf{s}_u|\mathbf{t}_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]}) = \sum\limits_{j=1}^{|\mathbf{s}^{[k]}|} \delta(s_j,s_u) \sum\limits_{i=0}^{|\mathbf{t}^{[k]}|} \delta(t_i,t_v) \cdot \frac{f(s_u|t_v)}{\sum_{i=0}^{l}f(s_u|t_i)}$}\normalsize{}};
+\node [anchor=north west] (line9) at ([yshift=-0.1em]line8.south west) {6: \ \ \ \ \textbf{foreach} $t_v$ appears at least one of $\{\mathbf{t}^{[1]},...,\mathbf{t}^{[N]}\}$ \textbf{do}};
+\node [anchor=north west] (line10) at ([yshift=-0.1em]line9.south west) {7: \ \ \ \ \ \ \ $\lambda_{t_v}^{'} = \sum_{s_u} \sum_{k=1}^{N} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})$};
+\node [anchor=north west] (line11) at ([yshift=-0.1em]line10.south west) {8: \ \ \ \ \ \ \ \textbf{foreach} $s_u$ appears at least one of $\{\mathbf{s}^{[1]},...,\mathbf{s}^{[N]}\}$ \textbf{do}};
+\node [anchor=north west] (line12) at ([yshift=-0.1em]line11.south west) {9: \ \ \ \ \ \ \ \ \ $f(s_u|t_v) = \sum_{k=1}^{N} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]}) \cdot (\lambda_{t_v}^{'})^{-1}$};
 \node [anchor=north west] (line13) at ([yshift=-0.1em]line12.south west) {10: \ \textbf{return} $f(\cdot|\cdot)$};

 \begin{pgfonlayer}{background}

--- a/Book/Chapter3/Figures/figure-a-more-detailed-explanation-of-formula-3.40.tex
+++ b/Book/Chapter3/Figures/figure-a-more-detailed-explanation-of-formula-3.40.tex
@@ -27,11 +27,11 @@
 }

 {
-\node [anchor=south west,inner sep=2pt] (label1) at (eq4.north west) {\textbf{\scriptsize{翻译概率$\textrm{P}(s|t)$}}};
+\node [anchor=south west,inner sep=2pt] (label1) at (eq4.north west) {\textbf{\scriptsize{翻译概率$\textrm{P}(\mathbf{s}|\mathbf{t})$}}};
 }
 {
 \node [anchor=south west,inner sep=2pt] (label2) at (eq5.north west) {\textbf{\scriptsize{配对的总次数}}};
-\node [anchor=south west,inner sep=2pt] (label2part2) at ([yshift=-3pt]label2.north west) {\textbf{\scriptsize{$(s_u,t_v)$在句对$(s,t)$中}}};
+\node [anchor=south west,inner sep=2pt] (label2part2) at ([yshift=-3pt]label2.north west) {\textbf{\scriptsize{$(s_u,t_v)$在句对$(\mathbf{s},\mathbf{t})$中}}};
 }
 {
 \node [anchor=south west,inner sep=2pt] (label3) at (eq6.north west) {\textbf{\scriptsize{有的$t_i$的相对值}}};
@@ -40,7 +40,7 @@

 {
 \node [anchor=east,rotate=90] (neweq1) at ([yshift=-0em]eq4.south) {=};
-\node [anchor=north,inner sep=1pt] (neweq1full) at (neweq1.west) {\large{$\textrm{P}(s|t)$}};
+\node [anchor=north,inner sep=1pt] (neweq1full) at (neweq1.west) {\large{$\textrm{P}(\mathbf{s}|\mathbf{t})$}};
 }

 {

--- a/Book/Chapter3/Figures/figure-alignment-of-all-words-in-zh-en-sentence.tex
+++ b/Book/Chapter3/Figures/figure-alignment-of-all-words-in-zh-en-sentence.tex
@@ -143,7 +143,7 @@
    {
    \node [anchor=south east,inner sep=0pt] (p) at (t0.north west) {\small{{\color{ugreen} P(}}};
    \node [anchor=south west,inner sep=0pt] (p2) at ([yshift=0.2em]t2.north east) {\small{{\color{ugreen} )}}};
-    \node [anchor=west] (eq) at ([xshift=0.7em]p2.east) {\normalsize{= \ P($s|t$)}};
+    \node [anchor=west] (eq) at ([xshift=0.7em]p2.east) {\normalsize{= \ P($\mathbf{s}|\mathbf{t}$)}};
    }
    }
    \end{scope}

--- a/Book/Chapter3/Figures/figure-calculation-formula&iterative-process-of-function.tex
+++ b/Book/Chapter3/Figures/figure-calculation-formula&iterative-process-of-function.tex
@@ -7,23 +7,23 @@


    \begin{tikzpicture}
-    \node [anchor=west,inner sep=2pt] (eq1) at (0,0) {$f(s_u|t_v)$};
+    \node [anchor=west,inner sep=2pt] (eq1) at (0,0) {$f(\mathbf{s}_u|\mathbf{t}_v)$};
    \node [anchor=west] (eq2) at (eq1.east) {$=$\ };
    \draw [-] ([xshift=0.3em]eq2.east) -- ([xshift=11.6em]eq2.east);
-    \node [anchor=south west] (eq3) at ([xshift=1em]eq2.east) {$\sum_{i=1}^{N} c_{\mathbb{E}}(s_u|t_v;s^{[i]},t^{[i]})$};
-    \node [anchor=north west] (eq4) at (eq2.east) {$\sum_{s_u} \sum_{i=1}^{N} c_{\mathbb{E}}(s_u|t_v;s^{[i]},t^{[i]})$};
+    \node [anchor=south west] (eq3) at ([xshift=1em]eq2.east) {$\sum_{i=1}^{N} c_{\mathbb{E}}(\mathbf{s}_u|\mathbf{t}_v;s^{[i]},t^{[i]})$};
+    \node [anchor=north west] (eq4) at (eq2.east) {$\sum_{\mathbf{s}_u} \sum_{i=1}^{N} c_{\mathbb{E}}(\mathbf{s}_u|\mathbf{t}_v;s^{[i]},t^{[i]})$};

   {
    \node [anchor=south] (label1) at ([yshift=-6em,xshift=3em]eq1.north west) {利用这个公式计算};
-    \node [anchor=north west] (label1part2) at ([yshift=0.3em]label1.south west) {新的$f(s_u|t_v)$值};
+    \node [anchor=north west] (label1part2) at ([yshift=0.3em]label1.south west) {新的$f(\mathbf{s}_u|\mathbf{t}_v)$值};
    }
    {
-    \node [anchor=west] (label2) at ([xshift=5em]label1.east) {用当前的$f(s_u|t_v)$};
+    \node [anchor=west] (label2) at ([xshift=5em]label1.east) {用当前的$f(\mathbf{s}_u|\mathbf{t}_v)$};
    \node [anchor=north west] (label2part2) at ([yshift=0.3em]label2.south west) {计算期望频次$c_{\mathbb{E}}(\cdot)$};
    }

    {
-    \node [anchor=west,fill=red!20,inner sep=2pt] (eq1) at (0,0) {$f(s_u|t_v)$};
+    \node [anchor=west,fill=red!20,inner sep=2pt] (eq1) at (0,0) {$f(\mathbf{s}_u|\mathbf{t}_v)$};
    }

    \begin{pgfonlayer}{background}

--- a/Book/Chapter3/Figures/figure-correspondence-between-IBM-model&formula-1.13.tex
+++ b/Book/Chapter3/Figures/figure-correspondence-between-IBM-model&formula-1.13.tex
@@ -4,14 +4,14 @@
 %-------------------------------------------------------------------------

    \begin{tikzpicture}
-    \node [anchor=west] (e1) at (0,0) {$g(s,t)$};
+    \node [anchor=west] (e1) at (0,0) {$g(\mathbf{s},\mathbf{t})$};
    \node [anchor=west] (e2) at (e1.east) {$=$};
    \node [anchor=west,inner sep=2pt,fill=red!20] (e3) at (e2.east) {$\prod\nolimits_{(j,i) \in \hat{A}} \textrm{P}(s_j,t_i)$};
    \node [anchor=west,inner sep=1pt] (e4) at (e3.east) {$\times$};
-    \node [anchor=west,inner sep=3pt,fill=blue!20] (e5) at (e4.east) {$\textrm{P}_{lm}(t)$};
-    \node [anchor=north west,inner sep=1pt] (n1) at ([xshift=7.0em,yshift=-0.5em]e1.south west) {$\textrm{P}(s|t)$};
+    \node [anchor=west,inner sep=3pt,fill=blue!20] (e5) at (e4.east) {$\textrm{P}_{lm}(\mathbf{t})$};
+    \node [anchor=north west,inner sep=1pt] (n1) at ([xshift=7.0em,yshift=-0.5em]e1.south west) {$\textrm{P}(\mathbf{s}|\mathbf{t})$};
    \node [anchor=north] (n1part2) at ([yshift=0.3em]n1.south) {\scriptsize{\textbf{翻译模型}}};
-    \node [anchor=west,inner sep=1pt] (n2) at ([xshift=4.0em]n1.east) {$\textrm{P}(t)$};
+    \node [anchor=west,inner sep=1pt] (n2) at ([xshift=4.0em]n1.east) {$\textrm{P}(\mathbf{t})$};
    \node [anchor=north] (n2part2) at ([yshift=0.3em]n2.south) {\scriptsize{\textbf{语言模型}}};
    %\draw [->,thick] (e3.south) .. controls +(south:1em) and +(north:1em) .. (n1.north);
    %\draw [->,thick] (e5.south) .. controls +(south:1em) and +(70:1em) .. (n2.north);

--- a/Book/Chapter3/Figures/figure-different-translation-candidate-space.tex
+++ b/Book/Chapter3/Figures/figure-different-translation-candidate-space.tex
@@ -7,26 +7,26 @@

 \begin{tikzpicture}

-\node [draw,red,fill=red!10,thick,anchor=center,circle,inner sep=3.5pt] (s1) at (0,0) {\black{$s$}};
-\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t) at ([xshift=1in]s1.east) {\black{$\hat{t}$}};
+\node [draw,red,fill=red!10,thick,anchor=center,circle,inner sep=3.5pt] (s1) at (0,0) {\black{$\mathbf{s}$}};
+\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t) at ([xshift=1in]s1.east) {\black{$\widehat{\mathbf{t}}$}};

 \draw [->,thick,] (s1.north east) .. controls +(north east:1em) and +(north west:1em).. (t.north west) node[pos=0.5,below] {\tiny{正确翻译}};


-\node [draw,red,fill=red!10,thick,anchor=center,circle,inner sep=3.5pt] (s) at ([xshift=13em,yshift=0em]s1.east) {\black{$s$}};
-\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t1) at ([xshift=1in]s.east) {\black{$t_1$}};
-\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t2) at ([xshift=3em,yshift=2em]t1.north east) {\black{$t_2$}};
-\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t3) at ([xshift=1em,yshift=4em]t1.north east) {\black{$t_3$}};
-\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t4) at ([xshift=3em,yshift=-1.5em]t1.north east) {\black{$t_4$}};
+\node [draw,red,fill=red!10,thick,anchor=center,circle,inner sep=3.5pt] (s) at ([xshift=13em,yshift=0em]s1.east) {\black{$\mathbf{s}$}};
+\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t1) at ([xshift=1in]s.east) {\black{$\mathbf{t}_1$}};
+\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t2) at ([xshift=3em,yshift=2em]t1.north east) {\black{$\mathbf{t}_2$}};
+\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t3) at ([xshift=1em,yshift=4em]t1.north east) {\black{$\mathbf{t}_3$}};
+\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t4) at ([xshift=3em,yshift=-1.5em]t1.north east) {\black{$\mathbf{t}_4$}};

 \node [draw,dashed,ublue,fill=blue!10,thick,anchor=center,circle,minimum size=18pt] (t5) at ([xshift=3em]t3.east) {};
 \node [draw,dashed,ublue,fill=blue!10,thick,anchor=center,circle,minimum size=18pt] (t6) at ([xshift=3em]t2.east) {};
 \node [draw,dashed,ublue,fill=blue!10,thick,anchor=center,circle,minimum size=18pt] (t7) at ([xshift=3em]t4.east) {};

-\draw [->,thick,] (s.north east) .. controls +(north east:1em) and +(north west:1em).. (t1.north west) node[pos=0.5,below] {\tiny{P ($t_1|s$)=0.1}};
-\draw [->,thick,] (s.60) .. controls +(50:4em) and +(west:1em).. (t2.west) node[pos=0.5,below] {\tiny{P($t_2|s$)=0.2}};
-\draw [->,thick,] (s.north) .. controls +(70:4em) and +(west:1em).. (t3.west) node[pos=0.5,above,xshift=-1em] {\tiny{P($t_3|s$)=0.3}};
-\draw [->,thick,] (s.south east) .. controls +(300:3em) and +(south west:1em).. (t4.south west) node[pos=0.5,below] {\tiny{P($t_4|s$)=0.1}};
+\draw [->,thick,] (s.north east) .. controls +(north east:1em) and +(north west:1em).. (t1.north west) node[pos=0.5,below] {\tiny{P ($\mathbf{t}_1|\mathbf{s}$)=0.1}};
+\draw [->,thick,] (s.60) .. controls +(50:4em) and +(west:1em).. (t2.west) node[pos=0.5,below] {\tiny{P($\mathbf{t}_2|\mathbf{s}$)=0.2}};
+\draw [->,thick,] (s.north) .. controls +(70:4em) and +(west:1em).. (t3.west) node[pos=0.5,above,xshift=-1em] {\tiny{P($\mathbf{t}_3|\mathbf{s}$)=0.3}};
+\draw [->,thick,] (s.south east) .. controls +(300:3em) and +(south west:1em).. (t4.south west) node[pos=0.5,below] {\tiny{P($\mathbf{t}_4|\mathbf{s}$)=0.1}};

 \node [anchor=center] (foot1) at ([xshift=3.8em,yshift=-3em]s1.south) {\footnotesize{人的翻译候选空间}};
 \node [anchor=center] (foot2) at ([xshift=7em,yshift=-3em]s.south) {\footnotesize{机器的翻译候选空间}};

--- a/Book/Chapter3/Figures/figure-different-translation-result-in-different-score-IBM1.tex
+++ b/Book/Chapter3/Figures/figure-different-translation-result-in-different-score-IBM1.tex
@@ -8,28 +8,28 @@

 \begin{scope}

-\node [anchor=west] (s1) at (0,0) {$s$ = 在\ \ 桌子\ \ 上};
-\node [anchor=west] (t1) at ([yshift=-2em]s1.west) {$t$ = on\ \ the\ \ table};
+\node [anchor=west] (s1) at (0,0) {$\mathbf{s}$ = 在\ \ 桌子\ \ 上};
+\node [anchor=west] (t1) at ([yshift=-2em]s1.west) {$\mathbf{t}$ = on\ \ the\ \ table};
 \draw [->,double,thick,ublue] ([yshift=0.2em]s1.south) -- ([yshift=-0.8em]s1.south);

 \end{scope}

 \begin{scope}[xshift=1.5in]

-\node [anchor=west] (s2) at (0,0) {$s$ = 在\ \ 桌子\ \ 上};
-\node [anchor=west] (t2) at ([yshift=-2em]s2.west) {$t'$ = table \ on\ \ the};
+\node [anchor=west] (s2) at (0,0) {$\mathbf{s}$ = 在\ \ 桌子\ \ 上};
+\node [anchor=west] (t2) at ([yshift=-2em]s2.west) {$\mathbf{t}'$ = table \ on\ \ the};
 \draw [->,double,thick,ublue] ([yshift=0.2em]s2.south) -- ([yshift=-0.8em]s2.south);

 \end{scope}

-\node [anchor=north] (score11) at ([yshift=-2.0em]s1.south) {$\textrm{P}(s|t)$};
-\node [anchor=north] (score12) at ([yshift=-2.0em]s2.south) {$\textrm{P}(s|t')$};
+\node [anchor=north] (score11) at ([yshift=-2.0em]s1.south) {$\textrm{P}(\mathbf{s}|\mathbf{t})$};
+\node [anchor=north] (score12) at ([yshift=-2.0em]s2.south) {$\textrm{P}(\mathbf{s}|\mathbf{t}')$};
 \node [anchor=west] (comp1) at ([xshift=2.3em]score11.east) {\large{$\mathbf{=}$}};
 \node [anchor=east] (label1) at ([xshift=-1em,yshift=0.1em]score11.west) {\textbf{IBM模型1:}};

 {
-\node [anchor=north] (score21) at ([yshift=0.2em]score11.south) {$\textrm{P}(s|t)$};
-\node [anchor=north] (score22) at ([yshift=0.2em]score12.south) {$\textrm{P}(s|t')$};
+\node [anchor=north] (score21) at ([yshift=0.2em]score11.south) {$\textrm{P}(\mathbf{s}|\mathbf{t})$};
+\node [anchor=north] (score22) at ([yshift=0.2em]score12.south) {$\textrm{P}(\mathbf{s}|\mathbf{t}')$};
 \node [anchor=west] (comp2) at ([xshift=2.3em]score21.east) {\large{$\mathbf{>}$}};
 \node [anchor=east] (label2) at ([xshift=-1em,yshift=0.1em]score21.west) {\textbf{理想:}};
 }

--- a/Book/Chapter3/Figures/figure-example-translation-alignment.tex
+++ b/Book/Chapter3/Figures/figure-example-translation-alignment.tex
@@ -19,7 +19,7 @@
 \node [anchor=west] (s3) at ([xshift=3.2em]s2.east) {你$_3$};
 \node [anchor=west] (s4) at ([xshift=3.6em]s3.east) {感到$_4$};
 \node [anchor=west] (s5) at ([xshift=1.9em]s4.east) {满意$_5$};
-\node [anchor=east] (s) at (s1.west) {$s=$};
+\node [anchor=east] (s) at (s1.west) {$\mathbf{s}=$};
 \end{scope}

 \begin{scope}[yshift=-3.6em]
@@ -28,7 +28,7 @@
 \node [anchor=west] (t3) at ([xshift=2.3em,yshift=0.1em]t2.east) {satisfied$_3$};
 \node [anchor=west] (t4) at ([xshift=2.3em]t3.east) {with$_4$};
 \node [anchor=west] (t5) at ([xshift=2.3em,yshift=-0.2em]t4.east) {you$_5$};
-\node [anchor=east] (t) at (t1.west) {$t'=$};
+\node [anchor=east] (t) at (t1.west) {$\mathbf{t}'=$};
 \end{scope}


@@ -56,7 +56,7 @@
 \node [anchor=west] (s3) at ([xshift=2.5em]s2.east) {你$_3$};
 \node [anchor=west] (s4) at ([xshift=2.5em]s3.east) {感到$_4$};
 \node [anchor=west] (s5) at ([xshift=2.5em]s4.east) {满意$_5$};
-\node [anchor=east] (s) at (s1.west) {$s=$};
+\node [anchor=east] (s) at (s1.west) {$\mathbf{s}=$};
 \end{scope}

 \begin{scope}[yshift=-3.6em]
@@ -65,7 +65,7 @@
 \node [anchor=center] (t3) at ([yshift=-1.7em]s3.south) {you$_3$};
 \node [anchor=center] (t4) at ([yshift=-1.7em]s4.south) {am$_4$};
 \node [anchor=center] (t5) at ([yshift=-1.6em]s5.south) {satisfied$_5$};
-\node [anchor=center] (t) at ([xshift=-1.3em]t1.west) {$t''=$};
+\node [anchor=center] (t) at ([xshift=-1.3em]t1.west) {$\mathbf{t}''=$};
 \end{scope}



--- a/Book/Chapter3/Figures/figure-expression.tex
+++ b/Book/Chapter3/Figures/figure-expression.tex
@@ -12,16 +12,16 @@



-\node [anchor=west,inner sep=2pt,minimum height=2.5em] (eq1) at (0,0) {${\textrm{P}(\tau,\pi|t) =  \prod_{j=0}^{l}{\textrm{P}(\varphi_j|\varphi_{1}^{j-1},t)} \times {\textrm{P}(\varphi_0|\varphi_{1}^{l},t)} \times}$};
-\node [anchor=north west,inner sep=2pt,minimum height=2.5em] (eq2) at ([xshift=-15.06em,yshift=0.0em]eq1.south east) {${\prod_{j=0}^l{\prod_{k=1}^{\varphi_j}{\textrm{P}(\tau_{jk}|\tau_{j1}^{k-1},\tau_{1}^{j-1},\varphi_{0}^{l},t )}} \times}$};
-\node [anchor=north west,inner sep=2pt,minimum height=2.5em] (eq3) at ([xshift=-15.56em,yshift=0.0em]eq2.south east) {${\prod_{j=1}^l{\prod_{k=1}^{\varphi_j}{\textrm{P}(\pi_{jk}|\pi_{j1}^{k-1},\pi_{1}^{j-1},\tau_{0}^{l},\varphi_{0}^{l},t )}} \times}$};
-\node [anchor=north west,inner sep=2pt,minimum height=2.5em] (eq4) at ([xshift=-17.10em,yshift=0.0em]eq3.south east) {{${\prod_{k=1}^{\varphi_0}{\textrm{P}(\pi_{0k}|\pi_{01}^{k-1},\pi_{1}^{l},\tau_{0}^{l},\varphi_{0}^{l},t )}}$}};
-
-\node [anchor=west,inner sep=2pt,minimum height=2.0em,fill=red!15] (part1) at ([xshift=-12.5em,yshift=0.0em]eq1.east) {{${\textrm{P}(\varphi_j|\varphi_{1}^{j-1},t)}$}};
-\node [anchor=west,inner sep=2pt,minimum height=2.0em,fill=blue!15] (part2) at ([xshift=-5.9em,yshift=0.0em]eq1.east) {{${\textrm{P}(\varphi_0|\varphi_{1}^{l},t)}$}};
-\node [anchor=west,inner sep=2pt,minimum height=2.0em,fill=green!15] (part3) at ([xshift=-10.7em,yshift=0.0em]eq2.east) {{${\textrm{P}(\tau_{jk}|\tau_{j1}^{k-1},\tau_{1}^{j-1},\varphi_{0}^{l},t )}$}};
-\node [anchor=west,inner sep=2pt,minimum height=2.0em,fill=yellow!15] (part4) at ([xshift=-12.23em,yshift=0.0em]eq3.east) {{${\textrm{P}(\pi_{jk}|\pi_{j1}^{k-1},\pi_{1}^{j-1},\tau_{0}^{l},\varphi_{0}^{l},t )}$}};
-\node [anchor=west,inner sep=2pt,minimum height=2.0em,fill=gray!15] (part5) at ([xshift=-10.4em,yshift=0.0em]eq4.east) {{${\textrm{P}(\pi_{0k}|\pi_{01}^{k-1},\pi_{1}^{l},\tau_{0}^{l},\varphi_{0}^{l},t )}$}};
+\node [anchor=west,inner sep=2pt,minimum height=2.5em] (eq1) at (0,0) {${\textrm{P}(\tau,\pi|\mathbf{t}) =  \prod_{j=0}^{l}{\textrm{P}(\varphi_j|\varphi_{1}^{j-1},\mathbf{t})} \times {\textrm{P}(\varphi_0|\varphi_{1}^{l},\mathbf{t})} \times}$};
+\node [anchor=north west,inner sep=2pt,minimum height=2.5em] (eq2) at ([xshift=-15.06em,yshift=0.0em]eq1.south east) {${\prod_{j=0}^l{\prod_{k=1}^{\varphi_j}{\textrm{P}(\tau_{jk}|\tau_{j1}^{k-1},\tau_{1}^{j-1},\varphi_{0}^{l},\mathbf{t} )}} \times}$};
+\node [anchor=north west,inner sep=2pt,minimum height=2.5em] (eq3) at ([xshift=-15.56em,yshift=0.0em]eq2.south east) {${\prod_{j=1}^l{\prod_{k=1}^{\varphi_j}{\textrm{P}(\pi_{jk}|\pi_{j1}^{k-1},\pi_{1}^{j-1},\tau_{0}^{l},\varphi_{0}^{l},\mathbf{t} )}} \times}$};
+\node [anchor=north west,inner sep=2pt,minimum height=2.5em] (eq4) at ([xshift=-17.10em,yshift=0.0em]eq3.south east) {{${\prod_{k=1}^{\varphi_0}{\textrm{P}(\pi_{0k}|\pi_{01}^{k-1},\pi_{1}^{l},\tau_{0}^{l},\varphi_{0}^{l},\mathbf{t} )}}$}};
+
+\node [anchor=west,inner sep=2pt,minimum height=2.0em,fill=red!15] (part1) at ([xshift=-12.5em,yshift=0.0em]eq1.east) {{${\textrm{P}(\varphi_j|\varphi_{1}^{j-1},\mathbf{t})}$}};
+\node [anchor=west,inner sep=2pt,minimum height=2.0em,fill=blue!15] (part2) at ([xshift=-5.9em,yshift=0.0em]eq1.east) {{${\textrm{P}(\varphi_0|\varphi_{1}^{l},\mathbf{t})}$}};
+\node [anchor=west,inner sep=2pt,minimum height=2.0em,fill=green!15] (part3) at ([xshift=-10.7em,yshift=0.0em]eq2.east) {{${\textrm{P}(\tau_{jk}|\tau_{j1}^{k-1},\tau_{1}^{j-1},\varphi_{0}^{l},\mathbf{t} )}$}};
+\node [anchor=west,inner sep=2pt,minimum height=2.0em,fill=yellow!15] (part4) at ([xshift=-12.23em,yshift=0.0em]eq3.east) {{${\textrm{P}(\pi_{jk}|\pi_{j1}^{k-1},\pi_{1}^{j-1},\tau_{0}^{l},\varphi_{0}^{l},\mathbf{t} )}$}};
+\node [anchor=west,inner sep=2pt,minimum height=2.0em,fill=gray!15] (part5) at ([xshift=-10.4em,yshift=0.0em]eq4.east) {{${\textrm{P}(\pi_{0k}|\pi_{01}^{k-1},\pi_{1}^{l},\tau_{0}^{l},\varphi_{0}^{l},\mathbf{t} )}$}};


 \end{tikzpicture}

--- a/Book/Chapter3/Figures/figure-greedy-MT-decoding-pseudo-code.tex
+++ b/Book/Chapter3/Figures/figure-greedy-MT-decoding-pseudo-code.tex
@@ -16,7 +16,7 @@
 \node [anchor=north west,inner sep=2pt,align=left] (line11) at ([yshift=-1pt]line10.south west) {\textrm{10: \textbf{return} $best.translatoin$}};

 \node [anchor=south west,inner sep=2pt,align=left] (head1) at ([yshift=1pt]line1.north west) {输出: 找的最佳译文};
-\node [anchor=south west,inner sep=2pt,align=left] (head2) at ([yshift=1pt]head1.north west) {输入: 源语句子$s=s_1...s_m$};
+\node [anchor=south west,inner sep=2pt,align=left] (head2) at ([yshift=1pt]head1.north west) {输入: 源语句子$\mathbf{s}=s_1...s_m$};

 }


--- a/Book/Chapter3/Figures/figure-noise-channel-model.tex
+++ b/Book/Chapter3/Figures/figure-noise-channel-model.tex
@@ -6,8 +6,8 @@

 \begin{tikzpicture}

-\node [draw,red,fill=red!10,thick,anchor=center,circle,inner sep=3.5pt] (s) at (0,0) {\black{$s$}};
-\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=3.3pt] (t) at ([xshift=1.5in]s.east) {\black{$t$}};
+\node [draw,red,fill=red!10,thick,anchor=center,circle,inner sep=3.5pt] (s) at (0,0) {\black{$\mathbf{s}$}};
+\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=3.3pt] (t) at ([xshift=1.5in]s.east) {\black{$\mathbf{t}$}};

 \draw [<->,thick,] (s.east) -- (t.west) node [pos=0.5,draw,fill=white] {噪声信道};
 \node [anchor=east] at (s.west) {\scriptsize{信宿}};

--- a/Book/Chapter3/Figures/figure-scores-of-different-translation_model&language_model.tex
+++ b/Book/Chapter3/Figures/figure-scores-of-different-translation_model&language_model.tex
@@ -2,7 +2,7 @@
 %-------------------------------------------------------------------------
 \begin{tabular}{| l | l |}
 \hline
-& {\footnotesize{$\prod\limits_{(j,i) \in \hat{A}} \textrm{P}(s_j,t_i)$} \color{red}{{\footnotesize{$\times\textrm{P}_{lm}(t)$}}}} \\ \hline
+& {\footnotesize{$\prod\limits_{(j,i) \in \hat{A}} \textrm{P}(s_j,t_i)$} \color{red}{{\footnotesize{$\times\textrm{P}_{lm}(\mathbf{t})$}}}} \\ \hline

 \begin{tikzpicture}

@@ -15,7 +15,7 @@
 \node [anchor=west] (s3) at ([xshift=3.2em]s2.east) {你$_3$};
 \node [anchor=west] (s4) at ([xshift=3.6em]s3.east) {感到$_4$};
 \node [anchor=west] (s5) at ([xshift=1.9em]s4.east) {满意$_5$};
-\node [anchor=east] (s) at (s1.west) {$s=$};
+\node [anchor=east] (s) at (s1.west) {$\mathbf{s}=$};
 \end{scope}

 \begin{scope}[yshift=-3.6em]
@@ -24,7 +24,7 @@
 \node [anchor=west] (t3) at ([xshift=2.3em,yshift=0.1em]t2.east) {satisfied$_3$};
 \node [anchor=west] (t4) at ([xshift=2.3em]t3.east) {with$_4$};
 \node [anchor=west] (t5) at ([xshift=2.3em,yshift=-0.2em]t4.east) {you$_5$};
-\node [anchor=east] (t) at (t1.west) {$t'=$};
+\node [anchor=east] (t) at (t1.west) {$\mathbf{t}'=$};
 \end{scope}


@@ -52,7 +52,7 @@
 \node [anchor=west] (s3) at ([xshift=2.5em]s2.east) {你$_3$};
 \node [anchor=west] (s4) at ([xshift=2.5em]s3.east) {感到$_4$};
 \node [anchor=west] (s5) at ([xshift=2.5em]s4.east) {满意$_5$};
-\node [anchor=east] (s) at (s1.west) {$s=$};
+\node [anchor=east] (s) at (s1.west) {$\mathbf{s}=$};
 \end{scope}

 \begin{scope}[yshift=-3.6em]
@@ -61,7 +61,7 @@
 \node [anchor=center] (t3) at ([yshift=-1.7em]s3.south) {you$_3$};
 \node [anchor=center] (t4) at ([yshift=-1.7em]s4.south) {am$_4$};
 \node [anchor=center] (t5) at ([yshift=-1.6em]s5.south) {satisfied$_5$};
-\node [anchor=center] (t) at ([xshift=-1.3em]t1.west) {$t''=$};
+\node [anchor=center] (t) at ([xshift=-1.3em]t1.west) {$\mathbf{t}''=$};
 \end{scope}



--- a/Book/Chapter3/Figures/figure-word-alignment&probability-distribution-in-IBM-model-3.tex
+++ b/Book/Chapter3/Figures/figure-word-alignment&probability-distribution-in-IBM-model-3.tex
@@ -16,7 +16,7 @@
 \node [anchor=north] (eq3) at ([xshift=0.0em,yshift=-2.0em]eq1.south) {Have};
 \node [anchor=north] (eq4) at ([xshift=0.0em,yshift=-2.0em]eq2.south) {breakfast};
 \node [anchor=east] (eq5) at ([xshift=-1.0em,yshift=-1.8em]eq1.west) {$a_{1}$};
-\node [anchor=west] (eq6) at ([xshift=1.0em,yshift=-1.8em]eq2.east) {$\textrm{P}(s,a_{1}|t)=0.5$};
+\node [anchor=west] (eq6) at ([xshift=1.0em,yshift=-1.8em]eq2.east) {$\textrm{P}(\mathbf{s},a_{1}|\mathbf{t})=0.5$};
 \draw [-,very thick](eq1.south) -- (eq3.north);
 \draw [-,very thick](eq2.south) -- (eq4.north);
 \node [anchor=west] (eq7) at ([xshift=13.1em,yshift=1.4em]eq2.east) {};
@@ -35,7 +35,7 @@
 \node [anchor=north] (eq3) at ([xshift=0.0em,yshift=-2.0em]eq1.south) {Have};
 \node [anchor=north] (eq4) at ([xshift=0.0em,yshift=-2.0em]eq2.south) {breakfast};
 \node [anchor=east] (eq5) at ([xshift=-1.0em,yshift=-1.8em]eq1.west) {$a_{2}$};
-\node [anchor=west] (eq6) at ([xshift=1.0em,yshift=-1.8em]eq2.east) {$\textrm{P}(s,a_{2}|t)=0.1$};
+\node [anchor=west] (eq6) at ([xshift=1.0em,yshift=-1.8em]eq2.east) {$\textrm{P}(\mathbf{s},a_{2}|\mathbf{t})=0.1$};
 \draw [-,very thick](eq1.south) -- (eq4.north);
 \draw [-,very thick](eq2.south) -- (eq3.north);
 \end{scope}
@@ -46,7 +46,7 @@
 \node [anchor=north] (eq3) at ([xshift=0.0em,yshift=-2.0em]eq1.south) {Have};
 \node [anchor=north] (eq4) at ([xshift=0.0em,yshift=-2.0em]eq2.south) {breakfast};
 \node [anchor=east] (eq5) at ([xshift=-1.0em,yshift=-1.8em]eq1.west) {$a_{3}$};
-\node [anchor=west] (eq6) at ([xshift=1.0em,yshift=-1.8em]eq2.east) {$\textrm{P}(s,a_{3}|t)=0.1$};
+\node [anchor=west] (eq6) at ([xshift=1.0em,yshift=-1.8em]eq2.east) {$\textrm{P}(\mathbf{s},a_{3}|\mathbf{t})=0.1$};
 \draw [-,very thick](eq1.south) -- (eq3.north);
 \draw [-,very thick](eq2.south) -- (eq3.north);
 \end{scope}
@@ -57,7 +57,7 @@
 \node [anchor=north] (eq3) at ([xshift=0.0em,yshift=-2.0em]eq1.south) {Have};
 \node [anchor=north] (eq4) at ([xshift=0.0em,yshift=-2.0em]eq2.south) {breakfast};
 \node [anchor=east] (eq5) at ([xshift=-1.0em,yshift=-1.8em]eq1.west) {$a_{4}$};
-\node [anchor=west] (eq6) at ([xshift=1.0em,yshift=-1.8em]eq2.east) {$\textrm{P}(s,a_{4}|t)=0.1$};
+\node [anchor=west] (eq6) at ([xshift=1.0em,yshift=-1.8em]eq2.east) {$\textrm{P}(\mathbf{s},a_{4}|\mathbf{t})=0.1$};
 \draw [-,very thick](eq1.south) -- (eq4.north);
 \draw [-,very thick](eq2.south) -- (eq4.north);
 \end{scope}
@@ -68,7 +68,7 @@
 \node [anchor=north] (eq3) at ([xshift=0.0em,yshift=-2.0em]eq1.south) {Have};
 \node [anchor=north] (eq4) at ([xshift=0.0em,yshift=-2.0em]eq2.south) {breakfast};
 \node [anchor=east] (eq5) at ([xshift=-1.0em,yshift=-1.8em]eq1.west) {$a_{5}$};
-\node [anchor=west] (eq6) at ([xshift=1.0em,yshift=-1.8em]eq2.east) {$\textrm{P}(s,a_{5}|t)=0.05$};
+\node [anchor=west] (eq6) at ([xshift=1.0em,yshift=-1.8em]eq2.east) {$\textrm{P}(\mathbf{s},a_{5}|\mathbf{t})=0.05$};
 \draw [-,very thick](eq1.south) -- (eq3.north);
 \draw [-,very thick](eq1.south) -- (eq4.north);
 \draw [-,very thick](eq2.south) -- (eq3.north);
@@ -83,7 +83,7 @@
 \node [anchor=north] (eq3) at ([xshift=0.0em,yshift=-2.0em]eq1.south) {Have};
 \node [anchor=north] (eq4) at ([xshift=0.0em,yshift=-2.0em]eq2.south) {breakfast};
 \node [anchor=east] (eq5) at ([xshift=-1.0em,yshift=-1.8em]eq1.west) {$a_{6}$};
-\node [anchor=west] (eq6) at ([xshift=1.0em,yshift=-1.8em]eq2.east) {$\textrm{P}(s,a_{6}|t)=0.05$};
+\node [anchor=west] (eq6) at ([xshift=1.0em,yshift=-1.8em]eq2.east) {$\textrm{P}(\mathbf{s},a_{6}|\mathbf{t})=0.05$};
 \draw [-,very thick](eq1.south) -- (eq3.north);
 \draw [-,very thick](eq2.south) -- (eq4.north);
 \draw [-,very thick](eq2.south) -- (eq3.north);

--- a/Book/Chapter3/Figures/figure-word-alignment.tex
+++ b/Book/Chapter3/Figures/figure-word-alignment.tex
@@ -39,8 +39,8 @@
 \draw [->,thick] (s5.south) -- (t6.north);
 }

-\node [anchor=east] (ss) at ([xshift=-0.5em]s1.west) {\textbf{s}};
-\node [anchor=east] (tt) at ([xshift=-0.5em]t1.west) {\textbf{t}};
+\node [anchor=east] (ss) at ([xshift=-0.5em]s1.west) {$\mathbf{s}$};
+\node [anchor=east] (tt) at ([xshift=-0.5em]t1.west) {$\mathbf{t}$};

 }
 \end{tikzpicture}

--- a/Book/Chapter3/Figures/figure-zh-en-translation-sentence-pairs&word-alignment-connection.tex
+++ b/Book/Chapter3/Figures/figure-zh-en-translation-sentence-pairs&word-alignment-connection.tex
@@ -11,7 +11,7 @@
 \node [anchor=west] (s3) at ([xshift=0.5em]s2.east) {你\footnotesize{$_3$}};
 \node [anchor=west] (s4) at ([xshift=0.5em]s3.east) {感到\footnotesize{$_4$}};
 \node [anchor=west] (s5) at ([xshift=0.5em]s4.east) {满意\footnotesize{$_5$}};
-\node [anchor=east] (s) at (s1.west) {$s=$};
+\node [anchor=east] (s) at (s1.west) {$\mathbf{s}=$};
 \end{scope}

 \begin{scope}[yshift=-3.0em]
@@ -20,7 +20,7 @@
 \node [anchor=west] (t3) at ([xshift=0.3em,yshift=0.1em]t2.east) {satisfied\footnotesize{$_3$}};
 \node [anchor=west] (t4) at ([xshift=0.3em]t3.east) {with\footnotesize{$_4$}};
 \node [anchor=west] (t5) at ([xshift=0.3em,yshift=-0.2em]t4.east) {you\footnotesize{$_5$}};
-\node [anchor=east] (t) at ([xshift=-0.3em]t1.west) {$t=$};
+\node [anchor=east] (t) at ([xshift=-0.3em]t1.west) {$\mathbf{t}=$};
 \end{scope}



--- a/Book/Chapter3/Figures/figurerole-of-P-in-sentence-level-translation.tex
+++ b/Book/Chapter3/Figures/figurerole-of-P-in-sentence-level-translation.tex
@@ -16,7 +16,7 @@
 \end{pgfonlayer}
 }

-\node [anchor=west,ugreen] (P) at ([xshift=4em,yshift=-0.7em]corpus.east){P($t|s$)};
+\node [anchor=west,ugreen] (P) at ([xshift=4em,yshift=-0.7em]corpus.east){P($\mathbf{t}|\mathbf{s}$)};
 \node [anchor=south] (modellabel) at (P.north) {{\color{ublue} {\scriptsize \textbf{翻译模型}}}};

 \begin{pgfonlayer}{background}

--- a/Book/Chapter3/Figures/greedy-MT-decoding-process-1.tex
+++ b/Book/Chapter3/Figures/greedy-MT-decoding-process-1.tex
@@ -103,7 +103,7 @@
 \node [anchor=west] (hlabel) at ([yshift=-2.5em]jlabel.west) {\scriptsize{$i = 1, j = 1$}};
 }
 {\tiny
-\node [anchor=north west] (glabel) at (hlabel.south west) {$g(s,t)$};
+\node [anchor=north west] (glabel) at (hlabel.south west) {$g(\mathbf{s},\mathbf{t})$};
 \node [anchor=west] (translabel) at (glabel.east) {翻译结果};
 \draw [-] (glabel.north east) -- ([yshift=-2.0in]glabel.north east);
 \draw [-] (glabel.south west) -- ([xshift=3.5in]glabel.south west);
@@ -228,7 +228,7 @@
 }

 {\tiny%下面的表格
-\node [anchor=north west] (glabel) at (hlabel.south west) {$g(s,t)$};
+\node [anchor=north west] (glabel) at (hlabel.south west) {$g(\mathbf{s},\mathbf{t})$};
 \node [anchor=west] (translabel) at (glabel.east) {翻译结果};
 \draw [-] (glabel.north east) -- ([yshift=-2.0in]glabel.north east);
 \draw [-] (glabel.south west) -- ([xshift=3.5in]glabel.south west);

--- a/Book/Chapter3/Figures/greedy-MT-decoding-process-3.tex
+++ b/Book/Chapter3/Figures/greedy-MT-decoding-process-3.tex
@@ -120,7 +120,7 @@
 }

 {\tiny%下面的表格
-\node [anchor=north west] (glabel) at (hlabel.south west) {$g(s,t)$};
+\node [anchor=north west] (glabel) at (hlabel.south west) {$g(\mathbf{s},\mathbf{t})$};
 \node [anchor=west] (translabel) at (glabel.east) {翻译结果};
 \draw [-] (glabel.north east) -- ([yshift=-2.0in]glabel.north east);
 \draw [-] (glabel.south west) -- ([xshift=3.5in]glabel.south west);
@@ -278,7 +278,7 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 {\tiny%下面的表格
-\node [anchor=north west] (glabel) at (hlabel.south west) {$g(s,t)$};
+\node [anchor=north west] (glabel) at (hlabel.south west) {$g(\mathbf{s},\mathbf{t})$};
 \node [anchor=west] (translabel) at (glabel.east) {翻译结果};
 \draw [-] (glabel.north east) -- ([yshift=-2.0in]glabel.north east);
 \draw [-] (glabel.south west) -- ([xshift=3.5in]glabel.south west);

--- a/Book/structure.tex
+++ b/Book/structure.tex
@@ -551,8 +551,8 @@ addtohook={%
 \usepackage{bm}
 \usetikzlibrary{shapes.misc}
 \usepackage{appendix}
-
-
+\usepackage{pgfplots}
+\usepackage{tikz}