update all chapters4-14

83185b7b · 曹润柘 · 775f7122 · 83185b7b · 83185b7b · 83185b7b
Commit 83185b7b authored Apr 14, 2020 by 曹润柘
--- a/Book/Chapter1/Figures/figure-zh_en-example.png
+++ b/Book/Chapter1/Figures/figure-zh_en-example.png
--- a/Book/Chapter1/chapter1.tex
+++ b/Book/Chapter1/chapter1.tex
--- a/Book/Chapter2/Figures/figure-Self-information-function.tex
+++ b/Book/Chapter2/Figures/figure-Self-information-function.tex
@@ -6,24 +6,24 @@
  xlabel={{$\textrm{P}(x)$}},
  ylabel={{$\textrm{I}(x)$}},
  ylabel style={yshift=-1.5em,font=\footnotesize},
-xlabel style={yshift=0.3em,font=\footnotesize},
+  xlabel style={yshift=0.3em,font=\footnotesize},
  xtick={0,0.2,...,1.0},
-  ytick={0,1,2},
-y tick style={opacity=0},
-x tick style={opacity=0},
-  domain=0:1,
+  ytick={0,8},
+  y tick style={opacity=0},
+  x tick style={opacity=0},
+  domain=0.01:1,
  enlarge x limits=true,
  enlarge y limits={upper},
  legend style={draw=none},
  xmin=0,
  xmax=1,
  ymin=0,
-  ymax=2,
+  ymax=8,
 xticklabel style={font=\small}, %坐标轴字体大小
 yticklabel style={font=\small}
 ]
-\addplot[draw=blue!40,thick] {170*exp(-x-4)-1};
-\legend{\footnotesize{$\textrm{I}(x) \cdot \textrm{P}(x)$}}
+\addplot[draw=ublue,samples=100,thick] {-log2(x)};
+\legend{\footnotesize{$\textrm{I}(x) = -\log \textrm{P}(x)$}}
 \end{axis}
 \end{tikzpicture}


--- a/Book/Chapter2/Figures/figure-evaluation-of-probability-for-grammar.tex
+++ b/Book/Chapter2/Figures/figure-evaluation-of-probability-for-grammar.tex
@@ -68,6 +68,7 @@

 \node [anchor=north west] (math3) at ([yshift=-6em]math2.north west) {P(``IP $\to$ NP NP'')};
 \node [anchor=north west] (math3part2) at ([xshift=-1em,yshift=0.2em]math3.south west) {$=\frac{\textrm{``IP''和``NP NP''同时出现的次数=0}}{\textrm{``IP''出现的次数}=3}$};
+\node [anchor=north west] (math3part3) at ([yshift=0.2em]math3part2.south west){$=\frac{0}{3}$};

 \begin{pgfonlayer}{background}


--- a/Book/Chapter2/Figures/figure-examples-of-Chinese-word-segmentation-based-on-1-gram-model.tex
+++ b/Book/Chapter2/Figures/figure-examples-of-Chinese-word-segmentation-based-on-1-gram-model.tex
@@ -29,7 +29,7 @@

 \draw [->,very thick,ublue] ([xshift=0.2em]corpus.east) -- ([xshift=4.2em]corpus.east)  node [pos=0.5, above] {\color{red}{\scriptsize{统计学习}}};

-\draw [->,very thick,ublue] ([xshift=0.2em]model.east) -- ([xshift=4.2em]model.east)  node [pos=0.5, above] {\color{red}{\scriptsize{穷举\&计算}}};
+\draw [->,very thick,ublue] ([xshift=0.2em]model.east) -- ([xshift=4.2em]model.east)  node [pos=0.5, above] {\color{red}{\scriptsize{搜索\&计算}}};

 {\scriptsize
 \node [anchor=north west] (sentlabel) at ([xshift=6.2em,yshift=-1em]model.north east) {\color{red}{自动分词系统}};
@@ -103,7 +103,7 @@
 \node [anchor=north west] (data42) at (data32.south west) {$\textrm{P}(\textrm{“确实”}) \cdot \textrm{P}(\textrm{“现在”}) \cdot \textrm{P}(\textrm{“数据”}) \cdot $};
 }
 {
-\node [anchor=north west] (data43) at ([yshift=-0.2em]data33.south west) {\color{red}{\textbf{输出}}};
+\node [anchor=north west] (data43) at ([yshift=-0.2em,xshift=3em]data33.south west) {\color{red}{\textbf{输出}}};
 \draw [->,red,thick] (data43.west)--([xshift=-1em]data43.west);
 }
 {

--- a/Book/Chapter2/Figures/figure-perspectives-of-expert-ordinary-and-syntactic-parser.tex
+++ b/Book/Chapter2/Figures/figure-perspectives-of-expert-ordinary-and-syntactic-parser.tex
@@ -74,7 +74,7 @@

 语言学家: & 不对 & 对 & 不对  \\ 
 我们: & 似乎对了 & 比较肯定 & 不太可能 \\ 
-分析器: & P=0.2 & P=0.6 & P=0.1
+分析器: & $\textrm{P}=0.2$ & $\textrm{P}=0.6$ & $\textrm{P}=0.1$

 \end{tabular}
 %---------------------------------------------------------------------

--- a/Book/Chapter2/Figures/figure-probability-values-corresponding-to-different-derivations.tex
+++ b/Book/Chapter2/Figures/figure-probability-values-corresponding-to-different-derivations.tex
-
 \definecolor{ublue}{rgb}{0.152,0.250,0.545}
 \definecolor{ugreen}{rgb}{0,0.5,0}

@@ -66,7 +65,7 @@
 \end{scope}

 \draw [->,thick,ublue] ([xshift=-2em]sent.south) ..controls + (south:2em) and +(north:2em).. ([xshift=-8em,yshift=-2em]sent.south);
-\draw [->,thick,ublue] ([xshift=-1em]sent.south) ..controls + (south:2em) and +(north:2em).. ([xshift=-2em,yshift=-3em]sent.south);
+\draw [->,thick,ublue] ([xshift=-1em]sent.south) ..controls + (south:2em) and +(north:2em).. ([xshift=-2em,yshift=-2em]sent.south);
 \draw [->,thick,ublue] ([xshift=0em]sent.south) ..controls + (south:2em) and +(north:2em).. ([xshift=6.5em,yshift=-2em]sent.south);
 \draw [->,thick,ublue,dotted] ([xshift=1em]sent.south) ..controls + (south:1.5em) and +(north:2.5em).. ([xshift=12.5em,yshift=-2em]sent.south);

@@ -77,12 +76,12 @@
 \node [] (d2) at (0em,-10em) {$d_2$};
 \node [] (d3) at (8.5em,-10em) {$d_2$};

-\node [anchor=east] (d1p) at ([xshift=0.4em]d1.west) {P(};
-\node [anchor=west] (d1p2) at ([xshift=-0.4em]d1.east) {)=0.0123};
-\node [anchor=east] (d2p) at ([xshift=0.4em]d2.west) {P(};
-\node [anchor=west] (d2p2) at ([xshift=-0.4em]d2.east) {)=0.4031};
-\node [anchor=east] (d3p) at ([xshift=0.4em]d3.west) {P(};
-\node [anchor=west] (d3p2) at ([xshift=-0.4em]d3.east) {)=0.0056};
+\node [anchor=east] (d1p) at ([xshift=0.4em]d1.west) {$\textrm{P}($};
+\node [anchor=west] (d1p2) at ([xshift=-0.4em]d1.east) {$)=0.0123$};
+\node [anchor=east] (d2p) at ([xshift=0.4em]d2.west) {$\textrm{P}($};
+\node [anchor=west] (d2p2) at ([xshift=-0.4em]d2.east) {$)=0.4031$};
+\node [anchor=east] (d3p) at ([xshift=0.4em]d3.west) {$\textrm{P}($};
+\node [anchor=west] (d3p2) at ([xshift=-0.4em]d3.east) {$)=0.0056$};

 \end{tikzpicture}
 %---------------------------------------------------------------------

--- a/Book/Chapter2/Figures/figure-process-of-statistical-syntax-analysis.tex
+++ b/Book/Chapter2/Figures/figure-process-of-statistical-syntax-analysis.tex
@@ -55,7 +55,7 @@

 \draw [->,very thick,ublue] ([xshift=0.2em]corpus.east) -- ([xshift=4.2em]corpus.east)  node [pos=0.5, above] {\color{red}{\scriptsize{统计学习}}};

-\draw [->,very thick,ublue] ([xshift=0.2em]model.east) -- ([xshift=4.2em]model.east)  node [pos=0.5, above] {\color{red}{\scriptsize{穷举\&计算}}};
+\draw [->,very thick,ublue] ([xshift=0.2em]model.east) -- ([xshift=4.2em]model.east)  node [pos=0.5, above] {\color{red}{\scriptsize{搜索\&计算}}};

 {\scriptsize
 \node [anchor=north west] (sentlabel) at ([xshift=6.2em,yshift=-1em]model.north east) {{\color{ublue} {\scriptsize \textbf{统计分析模型}}}};

--- a/Book/Chapter2/Figures/figure-schematic-edge-probability&joint-probability.tex
+++ b/Book/Chapter2/Figures/figure-schematic-edge-probability&joint-probability.tex
@@ -7,17 +7,13 @@

 \begin{scope}[scale=1.0]
 {
-\node [anchor=north west,minimum width=5em, minimum height=2.2em,fill=blue!70](num1)  at (0,0) {\quad \ A};

-\node [anchor=north west,minimum width=2em, minimum height=2.2em,fill=green!70](num2)  at ([xshift=3.8em,yshift=2.23em]num1.south west) {B};
+\node [anchor=north west,minimum width=7em, minimum height=2.5em,fill=blue!30](num1)  at (0,0) {$A$\quad \quad \quad \quad  };
+\node [anchor=west,minimum width=7em, minimum height=5em,fill=ugreen!30](num2)  at ([xshift=-3em]num1.east) {\quad \quad $B$};
+\node [anchor=west,minimum width=3em, minimum height=2.5em,fill=yellow!30](part1)  at (num2.west) {$C$};

-
-\node [anchor=north west,minimum width=3.8em, minimum height=2.2em,fill=yellow!70](part1)  at ([xshift=5.8em,yshift=2.23em]num1.south west) {C\quad \ };
-
-
-\draw [-,very thick,black] (num1.north west)--(num2.north west)--(num2.south west)--(num1.south west)--([yshift=0.05em]num1.north west);
-\draw [-,very thick,black] (num2.north west)--(num2.north east)--(num2.south east)--(num2.south west)--(num2.north west);
-\draw [-,very thick,black] (num2.north east)--(part1.north east)--(part1.south east)--(num2.south east);
+\draw [-,thick] (num1.north west) -- (num1.north east) -- (num1.south east) -- (num1.south west) -- (num1.north west);
+\draw [-,very thick,dotted] (num2.north west) -- (num2.north east) -- (num2.south east) -- (num2.south west) -- (num2.north west);

 }
 \end{scope}

--- a/Book/Chapter2/Figures/figure-word-segmentation-based-on-statistics.tex
+++ b/Book/Chapter2/Figures/figure-word-segmentation-based-on-statistics.tex
@@ -44,7 +44,7 @@
 }

 {
-\draw [->,very thick,ublue] ([xshift=0.2em]model.east) -- ([xshift=4.2em]model.east)  node [pos=0.5, above] {\color{red}{\scriptsize{穷举\&计算}}};
+\draw [->,very thick,ublue] ([xshift=0.2em]model.east) -- ([xshift=4.2em]model.east)  node [pos=0.5, above] {\color{red}{\scriptsize{搜索\&计算}}};
 }

 {\scriptsize

--- a/Book/Chapter2/chapter2.tex
+++ b/Book/Chapter2/chapter2.tex
--- a/Book/Chapter3/Chapter3.tex
+++ b/Book/Chapter3/Chapter3.tex
--- a/Book/Chapter3/Figures/figure-EM-algorithm-flow-chart.tex
+++ b/Book/Chapter3/Figures/figure-EM-algorithm-flow-chart.tex
@@ -9,7 +9,7 @@
 \node [anchor=north west] (line1) at (0,0) {\small\sffamily\bfseries{IBM模型1的训练（EM算法）}};
 \node [anchor=north west] (line2) at ([yshift=-0.3em]line1.south west) {输入: 平行语料${(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),...,(\mathbf{s}^{[N]},\mathbf{t}^{[N]})}$};
 \node [anchor=north west] (line3) at ([yshift=-0.1em]line2.south west) {输出: 参数$f(\cdot|\cdot)$的最优值};
-\node [anchor=north west] (line4) at ([yshift=-0.1em]line3.south west) {1: \textbf{Function} \textsc{TrainItWithEM}($\{(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),...,(\mathbf{s}^{[N]},\mathbf{t}^{[N]})\}$) };
+\node [anchor=north west] (line4) at ([yshift=-0.1em]line3.south west) {1: \textbf{Function} \textsc{EM}($\{(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),...,(\mathbf{s}^{[N]},\mathbf{t}^{[N]})\}$) };
 \node [anchor=north west] (line5) at ([yshift=-0.1em]line4.south west) {2: \ \ Initialize $f(\cdot|\cdot)$ \hspace{5em} $\rhd$ 比如给$f(\cdot|\cdot)$一个均匀分布};
 \node [anchor=north west] (line6) at ([yshift=-0.1em]line5.south west) {3: \ \ Loop until $f(\cdot|\cdot)$ converges};
 \node [anchor=north west] (line7) at ([yshift=-0.1em]line6.south west) {4: \ \ \ \ \textbf{foreach} $k = 1$ to $N$ \textbf{do}};

--- a/Book/Chapter3/Figures/figure-example-of-t-s-generate.tex
+++ b/Book/Chapter3/Figures/figure-example-of-t-s-generate.tex
@@ -4,38 +4,51 @@

 {
 {\scriptsize
-\node [anchor=west,minimum height=2.5em,minimum width=5.5em] (sf1) at (2.3em,0) {};
-\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2.5em,minimum width=5.5em] (s1) at ([xshift=2.3em]sf1.east) {科学家};
-\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2.5em,minimum width=5.5em] (s2) at ([xshift=2.32em]s1.east) {们};
-\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2.5em,minimum width=5.5em] (s3) at ([xshift=2.33em]s2.east) {并不};
-\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2.5em,minimum width=5.5em] (s4) at ([xshift=2.30em]s3.east) {知道};
+\node [anchor=west,minimum height=2.5em,minimum width=5.5em] (sf1) at ([xshift=1em]st.east) {};
+\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2.5em,minimum width=5.5em,fill=green!30,drop shadow] (s1) at ([xshift=2.3em]sf1.east) {科学家};
+\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2.5em,minimum width=5.5em,fill=green!30,drop shadow] (s2) at ([xshift=2.3em]s1.east) {们};
+\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2.5em,minimum width=5.5em,fill=green!30,drop shadow] (s3) at ([xshift=2.3em]s2.east) {并不};
+\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2.5em,minimum width=5.5em,fill=green!30,drop shadow] (s4) at ([xshift=2.3em]s3.east) {知道};

-\node [anchor=north] (tau11) at ([xshift=-1.5em,yshift=-3.5em]sf1.south) {$\tau_0$};
+\node [anchor=west] (tau11) at ([xshift=1.5em]taut.east) {$\tau_0$};
 \node [anchor=west] (tau12) at ([xshift=-0.5em]tau11.east) {\tiny{1.NULL}};
-\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em] (tau1) [fit = (tau11) (tau12)] {};
+\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em,fill=red!30,drop shadow] (tau1) [fit = (tau11) (tau12)] {};
+\node [anchor=west] (tau11-top) at ([xshift=1.5em]taut.east) {$\tau_0$};
+\node [anchor=west] (tau12-top) at ([xshift=-0.5em]tau11-top.east) {\tiny{1.NULL}};

-\node [anchor=west] (tau21) at ([xshift=1.9em]tau1.east) {$\tau_1$};
+
+\node [anchor=west] (tau21) at ([xshift=1.80em]tau1.east) {$\tau_1$};
 \node [anchor=west] (tau22) at ([xshift=-0.5em]tau21.north east) {\tiny{1.科学家}};
 \node [anchor=west] (tau23) at ([xshift=-0.5em]tau21.south east) {\tiny{2.们}};
-\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em] (tau2)[fit = (tau21) (tau22) (tau23)] {};
+\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em,fill=blue!30,drop shadow] (tau2)[fit = (tau21) (tau22) (tau23)] {};
+\node [anchor=west] (tau21-top) at ([xshift=1.8em]tau1.east) {$\tau_1$};
+\node [anchor=west] (tau22-top) at ([xshift=-0.5em]tau21-top.north east) {\tiny{1.科学家}};
+\node [anchor=west] (tau23-top) at ([xshift=-0.5em]tau21-top.south east) {\tiny{2.们}};
+

-\node [anchor=west] (tau31) at ([xshift=2.1em]tau2.east) {$\tau_2$};
+\node [anchor=west] (tau31) at ([xshift=2.05em]tau2.east) {$\tau_2$};
 \node [anchor=west] (tau32) at ([xshift=-0.5em]tau31.east) {\tiny{1.NULL}};
-\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em] (tau3) [fit = (tau31) (tau32)] {};
+\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em,fill=red!30,drop shadow] (tau3) [fit = (tau31) (tau32)] {};
+\node [anchor=west] (tau31-top) at ([xshift=2.05em]tau2.east) {$\tau_2$};
+\node [anchor=west] (tau32-top) at ([xshift=-0.5em]tau31-top.east) {\tiny{1.NULL}};

-\node [anchor=west] (tau41) at ([xshift=2.3em]tau3.east) {$\tau_3$};
+\node [anchor=west] (tau41) at ([xshift=2.2em]tau3.east) {$\tau_3$};
 \node [anchor=west] (tau42) at ([xshift=-0.5em]tau41.east) {\tiny{1.并不}};
-\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em] (tau4) [fit = (tau41) (tau42)] {};
+\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em,fill=red!30,drop shadow] (tau4) [fit = (tau41) (tau42)] {};
+\node [anchor=west] (tau41-top) at ([xshift=2.2em]tau3.east) {$\tau_3$};
+\node [anchor=west] (tau42-top) at ([xshift=-0.5em]tau41-top.east) {\tiny{1.并不}};

-\node [anchor=west] (tau51) at ([xshift=2.3em]tau4.east) {$\tau_4$};
+\node [anchor=west] (tau51) at ([xshift=2.2em]tau4.east) {$\tau_4$};
 \node [anchor=west] (tau52) at ([xshift=-0.5em]tau51.east) {\tiny{1.知道}};
-\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em] (tau5) [fit = (tau51) (tau52)] {};
+\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em,fill=red!30,drop shadow] (tau5) [fit = (tau51) (tau52)] {};
+\node [anchor=west] (tau51-top) at ([xshift=2.2em]tau4.east) {$\tau_4$};
+\node [anchor=west] (tau52-top) at ([xshift=-0.5em]tau51-top.east) {\tiny{1.知道}};
 }



 {
-\node [anchor=north] (d1) at ([yshift=-6em]sf1.south) {$...$};
+\node [anchor=north] (d1) at ([yshift=-6.02em]sf1.south) {$...$};
 \node [anchor=north] (d2) at ([yshift=-6em]s1.south) {$...$};
 \node [anchor=north] (d31) at ([yshift=-6em]s2.south) {$...$};
 \node [anchor=north] (d32) at ([xshift=0.2em]d31.south) {\footnotesize{${<{\tau,\pi}>}_1$}};
@@ -50,50 +63,61 @@

 \draw [->,thick] (tau23.east) -- (s2.south);

-\draw [->,thick] ([yshift=4.2em]d2.north) -- (s1.south);
-\draw [->,thick] ([yshift=4.2em]d4.north) -- (s3.south);
-\draw [->,thick] ([yshift=4.2em]d5.north) -- (s4.south);
+\draw [->,thick] ([yshift=4.1em]d2.north) -- ([yshift=0.05em]s1.south);
+\draw [->,thick] ([yshift=4.1em]d4.north) -- ([yshift=0.05em]s3.south);
+\draw [->,thick] ([yshift=4.1em]d5.north) -- ([yshift=0.05em]s4.south);

-\draw [->,thick] (d1.north) -- ([yshift=-4.25em]sf1.south);
-\draw [->,thick] (d2.north) -- ([yshift=-4.25em]s1.south);
-\draw [->,thick] (d31.north) -- ([yshift=-4.25em]s2.south);
-\draw [->,thick] (d4.north) -- ([yshift=-4.25em]s3.south);
-\draw [->,thick] (d5.north) -- ([yshift=-4.25em]s4.south);
+\draw [->,thick] (d1.north) -- ([yshift=-4.48em]sf1.south);
+\draw [->,thick] (d2.north) -- ([yshift=-4.45em]s1.south);
+\draw [->,thick] (d31.north) -- ([yshift=-4.45em]s2.south);
+\draw [->,thick] (d4.north) -- ([yshift=-4.45em]s3.south);
+\draw [->,thick] (d5.north) -- ([yshift=-4.45em]s4.south);



 {\scriptsize
-\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2.5em,minimum width=5.5em] (ns1) at ([yshift=-13em]s1.south) {科学家};
-\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2.5em,minimum width=5.5em] (ns2) at ([yshift=-13em]s2.south) {们};
-\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2.5em,minimum width=5.5em] (ns3) at ([yshift=-13em]s3.south) {并不};
-\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2.5em,minimum width=5.5em] (ns4) at ([yshift=-13em]s4.south) {知道};
+\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2.5em,minimum width=5.5em,fill=green!30,drop shadow] (ns1) at ([yshift=-13em]s1.south) {科学家};
+\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2.5em,minimum width=5.5em,fill=green!30,drop shadow] (ns2) at ([yshift=-13em]s2.south) {们};
+\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2.5em,minimum width=5.5em,fill=green!30,drop shadow] (ns3) at ([yshift=-13em]s3.south) {并不};
+\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2.5em,minimum width=5.5em,fill=green!30,drop shadow] (ns4) at ([yshift=-13em]s4.south) {知道};

 \node [anchor=north] (ntau11) at ([yshift=-15em]tau11.south) {$\tau_0$};
 \node [anchor=west] (ntau12) at ([xshift=-0.5em]ntau11.east) {\tiny{1.NULL}};
-\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em] (ntau1) [fit = (ntau11) (ntau12)] {};
+\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em,fill=red!30,drop shadow] (ntau1) [fit = (ntau11) (ntau12)] {};
+\node [anchor=north] (ntau11-top) at ([yshift=-15em]tau11.south) {$\tau_0$};
+\node [anchor=west] (ntau12-top) at ([xshift=-0.5em]ntau11-top.east) {\tiny{1.NULL}};

-\node [anchor=west] (ntau21) at ([xshift=1.9em]ntau1.east) {$\tau_1$};
+\node [anchor=west] (ntau21) at ([xshift=1.8em]ntau1.east) {$\tau_1$};
 \node [anchor=west] (ntau22) at ([xshift=-0.5em]ntau21.north east) {\tiny{1.们}};
 \node [anchor=west] (ntau23) at ([xshift=-0.5em]ntau21.south east) {\tiny{2.科学家}};
-\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em] (ntau2)[fit = (ntau21) (ntau22) (ntau23)] {};
+\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em,fill=yellow!30,drop shadow] (ntau2)[fit = (ntau21) (ntau22) (ntau23)] {};
+\node [anchor=west] (ntau21-top) at ([xshift=1.8em]ntau1.east) {$\tau_1$};
+\node [anchor=west] (ntau22-top) at ([xshift=-0.5em]ntau21-top.north east) {\tiny{1.们}};
+\node [anchor=west] (ntau23-top) at ([xshift=-0.5em]ntau21-top.south east) {\tiny{2.科学家}};

-\node [anchor=west] (ntau31) at ([xshift=2.1em]ntau2.east) {$\tau_2$};
+\node [anchor=west] (ntau31) at ([xshift=2.05em]ntau2.east) {$\tau_2$};
 \node [anchor=west] (ntau32) at ([xshift=-0.5em]ntau31.east) {\tiny{1.NULL}};
-\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em] (ntau3) [fit = (ntau31) (ntau32)] {};
+\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em,fill=red!30,drop shadow] (ntau3) [fit = (ntau31) (ntau32)] {};
+\node [anchor=west] (ntau31-top) at ([xshift=2.05em]ntau2.east) {$\tau_2$};
+\node [anchor=west] (ntau32-top) at ([xshift=-0.5em]ntau31-top.east) {\tiny{1.NULL}};

-\node [anchor=west] (ntau41) at ([xshift=2.3em]ntau3.east) {$\tau_3$};
+\node [anchor=west] (ntau41) at ([xshift=2.2em]ntau3.east) {$\tau_3$};
 \node [anchor=west] (ntau42) at ([xshift=-0.5em]ntau41.east) {\tiny{1.并不}};
-\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em] (ntau4) [fit = (ntau41) (ntau42)] {};
+\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em,fill=red!30,drop shadow] (ntau4) [fit = (ntau41) (ntau42)] {};
+\node [anchor=west] (ntau41-top) at ([xshift=2.2em]ntau3.east) {$\tau_3$};
+\node [anchor=west] (ntau42-top) at ([xshift=-0.5em]ntau41-top.east) {\tiny{1.并不}};

-\node [anchor=west] (ntau51) at ([xshift=2.3em]ntau4.east) {$\tau_4$};
+\node [anchor=west] (ntau51) at ([xshift=2.2em]ntau4.east) {$\tau_4$};
 \node [anchor=west] (ntau52) at ([xshift=-0.5em]ntau51.east) {\tiny{1.知道}};
-\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em] (ntau5) [fit = (ntau51) (ntau52)] {};
+\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em,fill=red!30,drop shadow] (ntau5) [fit = (ntau51) (ntau52)] {};
+\node [anchor=west] (ntau51-top) at ([xshift=2.2em]ntau4.east) {$\tau_4$};
+\node [anchor=west] (ntau52-top) at ([xshift=-0.5em]ntau51-top.east) {\tiny{1.知道}};
 }


 {
 {
-\node [anchor=north] (nd1) at ([yshift=-11em]d1.south) {$...$};
+\node [anchor=north] (nd1) at ([yshift=-11.01em]d1.south) {$...$};
 \node [anchor=north] (nd2) at ([yshift=-11em]d2.south) {$...$};
 \node [anchor=north] (nd31) at ([yshift=-11em]d31.south) {$...$};
 \node [anchor=north] (nd32) at ([xshift=0.2em]nd31.south) {\footnotesize{${<{\tau,\pi}>}_2$}};
@@ -107,17 +131,37 @@

 \draw [->,thick] (ntau23.east) -- (ns2.south);

-\draw [->,thick] ([yshift=4.2em]nd2.north) -- (ns1.south);
-\draw [->,thick] ([yshift=4.2em]nd4.north) -- (ns3.south);
-\draw [->,thick] ([yshift=4.2em]nd5.north) -- (ns4.south);
+\draw [->,thick] ([yshift=4.1em]nd2.north) -- ([yshift=0.05em]ns1.south);
+\draw [->,thick] ([yshift=4.1em]nd4.north) -- ([yshift=0.05em]ns3.south);
+\draw [->,thick] ([yshift=4.1em]nd5.north) -- ([yshift=0.05em]ns4.south);

-\draw [->,thick] (nd1.north) -- ([yshift=-16.15em]sf1.south);
-\draw [->,thick] (nd2.north) -- ([yshift=-16.15em]s1.south);
-\draw [->,thick] (nd31.north) -- ([yshift=-16.15em]s2.south);
-\draw [->,thick] (nd4.north) -- ([yshift=-16.15em]s3.south);
-\draw [->,thick] (nd5.north) -- ([yshift=-16.15em]s4.south);
+\draw [->,thick] (nd1.north) -- ([yshift=-16.37em]sf1.south);
+\draw [->,thick] (nd2.north) -- ([yshift=-16.35em]s1.south);
+\draw [->,thick] (nd31.north) -- ([yshift=-16.35em]s2.south);
+\draw [->,thick] (nd4.north) -- ([yshift=-16.35em]s3.south);
+\draw [->,thick] (nd5.north) -- ([yshift=-16.35em]s4.south);
 }

 }
 \end{tikzpicture}
-%---------------------------------------------------------------------
\ No newline at end of file
+%---------------------------------------------------------------------
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/Book/Chapter3/Figures/figure-expression.tex
+++ b/Book/Chapter3/Figures/figure-expression.tex
@@ -17,11 +17,11 @@
 \node [anchor=north west,inner sep=2pt,minimum height=2.5em] (eq3) at ([xshift=-15.56em,yshift=0.0em]eq2.south east) {${\prod_{j=1}^l{\prod_{k=1}^{\varphi_j}{\textrm{P}(\pi_{jk}|\pi_{j1}^{k-1},\pi_{1}^{j-1},\tau_{0}^{l},\varphi_{0}^{l},\mathbf{t} )}} \times}$};
 \node [anchor=north west,inner sep=2pt,minimum height=2.5em] (eq4) at ([xshift=-17.10em,yshift=0.0em]eq3.south east) {{${\prod_{k=1}^{\varphi_0}{\textrm{P}(\pi_{0k}|\pi_{01}^{k-1},\pi_{1}^{l},\tau_{0}^{l},\varphi_{0}^{l},\mathbf{t} )}}$}};

-\node [anchor=west,inner sep=2pt,minimum height=2.0em,fill=red!15] (part1) at ([xshift=-12.5em,yshift=0.0em]eq1.east) {{${\textrm{P}(\varphi_j|\varphi_{1}^{j-1},\mathbf{t})}$}};
-\node [anchor=west,inner sep=2pt,minimum height=2.0em,fill=blue!15] (part2) at ([xshift=-5.9em,yshift=0.0em]eq1.east) {{${\textrm{P}(\varphi_0|\varphi_{1}^{l},\mathbf{t})}$}};
-\node [anchor=west,inner sep=2pt,minimum height=2.0em,fill=green!15] (part3) at ([xshift=-10.7em,yshift=0.0em]eq2.east) {{${\textrm{P}(\tau_{jk}|\tau_{j1}^{k-1},\tau_{1}^{j-1},\varphi_{0}^{l},\mathbf{t} )}$}};
-\node [anchor=west,inner sep=2pt,minimum height=2.0em,fill=yellow!15] (part4) at ([xshift=-12.23em,yshift=0.0em]eq3.east) {{${\textrm{P}(\pi_{jk}|\pi_{j1}^{k-1},\pi_{1}^{j-1},\tau_{0}^{l},\varphi_{0}^{l},\mathbf{t} )}$}};
-\node [anchor=west,inner sep=2pt,minimum height=2.0em,fill=gray!15] (part5) at ([xshift=-10.4em,yshift=0.0em]eq4.east) {{${\textrm{P}(\pi_{0k}|\pi_{01}^{k-1},\pi_{1}^{l},\tau_{0}^{l},\varphi_{0}^{l},\mathbf{t} )}$}};
+\node [anchor=west,inner sep=2pt,minimum height=2.0em,fill=red!30] (part1) at ([xshift=-12.5em,yshift=0.0em]eq1.east) {{${\textrm{P}(\varphi_j|\varphi_{1}^{j-1},\mathbf{t})}$}};
+\node [anchor=west,inner sep=2pt,minimum height=2.0em,fill=blue!30] (part2) at ([xshift=-5.9em,yshift=0.0em]eq1.east) {{${\textrm{P}(\varphi_0|\varphi_{1}^{l},\mathbf{t})}$}};
+\node [anchor=west,inner sep=2pt,minimum height=2.0em,fill=green!30] (part3) at ([xshift=-10.7em,yshift=0.0em]eq2.east) {{${\textrm{P}(\tau_{jk}|\tau_{j1}^{k-1},\tau_{1}^{j-1},\varphi_{0}^{l},\mathbf{t} )}$}};
+\node [anchor=west,inner sep=2pt,minimum height=2.0em,fill=yellow!30] (part4) at ([xshift=-12.23em,yshift=0.0em]eq3.east) {{${\textrm{P}(\pi_{jk}|\pi_{j1}^{k-1},\pi_{1}^{j-1},\tau_{0}^{l},\varphi_{0}^{l},\mathbf{t} )}$}};
+\node [anchor=west,inner sep=2pt,minimum height=2.0em,fill=gray!30] (part5) at ([xshift=-10.4em,yshift=0.0em]eq4.east) {{${\textrm{P}(\pi_{0k}|\pi_{01}^{k-1},\pi_{1}^{l},\tau_{0}^{l},\varphi_{0}^{l},\mathbf{t} )}$}};


 \end{tikzpicture}

--- a/Book/Chapter3/Figures/figure-greedy-MT-decoding-pseudo-code.tex
+++ b/Book/Chapter3/Figures/figure-greedy-MT-decoding-pseudo-code.tex
@@ -120,7 +120,7 @@
 %% remark 5
 \begin{scope}
 {
-\node [anchor=north west,align=left] (remark5) at ([xshift=0.6em,yshift=-1.6em]remark4.south west) {\textsc{PruneForTop1}\\保留得分最高的结果};
+\node [anchor=north west,align=left] (remark5) at ([xshift=0.72em,yshift=-1.6em]remark4.south west) {\textsc{PruneForTop1}\\保留得分最高的结果};
 \node [anchor=west,draw,inner sep=1pt] (s1) at ([yshift=-0.5em,xshift=1.2em]remark5.north east){\tiny{0.234}};
 \node [anchor=north west,draw,inner sep=1pt] (s2) at ([yshift=-0.2em]s1.south west){\tiny{0.197}};
 \node [anchor=north west,draw,inner sep=1pt] (s3) at ([yshift=-0.2em]s2.south west){\tiny{0.083}};

--- a/Book/Chapter3/Figures/figure-probability_translation_process.tex
+++ b/Book/Chapter3/Figures/figure-probability_translation_process.tex
@@ -11,56 +11,79 @@
 }
 {\scriptsize
 \node [anchor=west,minimum height=2.5em,minimum width=5.5em] (sf1) at ([xshift=1em]st.east) {};
-\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2.5em,minimum width=5.5em] (s1) at ([xshift=2.3em]sf1.east) {科学家};
-\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2.5em,minimum width=5.5em] (s2) at ([xshift=2.3em]s1.east) {们};
-\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2.5em,minimum width=5.5em] (s3) at ([xshift=2.3em]s2.east) {并不};
-\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2.5em,minimum width=5.5em] (s4) at ([xshift=2.3em]s3.east) {知道};
+\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2.5em,minimum width=5.5em,fill=green!30,drop shadow] (s1) at ([xshift=2.3em]sf1.east) {科学家};
+\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2.5em,minimum width=5.5em,fill=green!30,drop shadow] (s2) at ([xshift=2.3em]s1.east) {们};
+\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2.5em,minimum width=5.5em,fill=green!30,drop shadow] (s3) at ([xshift=2.3em]s2.east) {并不};
+\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2.5em,minimum width=5.5em,fill=green!30,drop shadow] (s4) at ([xshift=2.3em]s3.east) {知道};
 }


 {\scriptsize
 \node [anchor=west] (tau11) at ([xshift=1.5em]taut.east) {$\tau_0$};
 \node [anchor=west] (tau12) at ([xshift=-0.5em]tau11.east) {\tiny{1.NULL}};
-\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em] (tau1) [fit = (tau11) (tau12)] {};
+\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em,fill=red!30,drop shadow] (tau1) [fit = (tau11) (tau12)] {};
+\node [anchor=west] (tau11-top) at ([xshift=1.5em]taut.east) {$\tau_0$};
+\node [anchor=west] (tau12-top) at ([xshift=-0.5em]tau11-top.east) {\tiny{1.NULL}};

-\node [anchor=west] (tau21) at ([xshift=1.9em]tau1.east) {$\tau_1$};
+
+\node [anchor=west] (tau21) at ([xshift=1.80em]tau1.east) {$\tau_1$};
 \node [anchor=west] (tau22) at ([xshift=-0.5em]tau21.north east) {\tiny{1.科学家}};
 \node [anchor=west] (tau23) at ([xshift=-0.5em]tau21.south east) {\tiny{2.们}};
-\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em] (tau2)[fit = (tau21) (tau22) (tau23)] {};
+\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em,fill=red!30,drop shadow] (tau2)[fit = (tau21) (tau22) (tau23)] {};
+\node [anchor=west] (tau21-top) at ([xshift=1.8em]tau1.east) {$\tau_1$};
+\node [anchor=west] (tau22-top) at ([xshift=-0.5em]tau21-top.north east) {\tiny{1.科学家}};
+\node [anchor=west] (tau23-top) at ([xshift=-0.5em]tau21-top.south east) {\tiny{2.们}};
+

-\node [anchor=west] (tau31) at ([xshift=2.1em]tau2.east) {$\tau_2$};
+\node [anchor=west] (tau31) at ([xshift=2.05em]tau2.east) {$\tau_2$};
 \node [anchor=west] (tau32) at ([xshift=-0.5em]tau31.east) {\tiny{1.NULL}};
-\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em] (tau3) [fit = (tau31) (tau32)] {};
+\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em,fill=red!30,drop shadow] (tau3) [fit = (tau31) (tau32)] {};
+\node [anchor=west] (tau31-top) at ([xshift=2.05em]tau2.east) {$\tau_2$};
+\node [anchor=west] (tau32-top) at ([xshift=-0.5em]tau31-top.east) {\tiny{1.NULL}};

-\node [anchor=west] (tau41) at ([xshift=2.3em]tau3.east) {$\tau_3$};
+\node [anchor=west] (tau41) at ([xshift=2.2em]tau3.east) {$\tau_3$};
 \node [anchor=west] (tau42) at ([xshift=-0.5em]tau41.east) {\tiny{1.并不}};
-\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em] (tau4) [fit = (tau41) (tau42)] {};
+\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em,fill=red!30,drop shadow] (tau4) [fit = (tau41) (tau42)] {};
+\node [anchor=west] (tau41-top) at ([xshift=2.2em]tau3.east) {$\tau_3$};
+\node [anchor=west] (tau42-top) at ([xshift=-0.5em]tau41-top.east) {\tiny{1.并不}};

-\node [anchor=west] (tau51) at ([xshift=2.3em]tau4.east) {$\tau_4$};
+\node [anchor=west] (tau51) at ([xshift=2.2em]tau4.east) {$\tau_4$};
 \node [anchor=west] (tau52) at ([xshift=-0.5em]tau51.east) {\tiny{1.知道}};
-\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em] (tau5) [fit = (tau51) (tau52)] {};
+\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em,fill=red!30,drop shadow] (tau5) [fit = (tau51) (tau52)] {};
+\node [anchor=west] (tau51-top) at ([xshift=2.2em]tau4.east) {$\tau_4$};
+\node [anchor=west] (tau52-top) at ([xshift=-0.5em]tau51-top.east) {\tiny{1.知道}};
 }

 {\scriptsize
 \node [anchor=west] (phi11) at ([xshift=2.4em]phit.east) {$\phi_0$};
 \node [anchor=west] (phi12) at ([xshift=-0.5em]phi11.east) {0};
-\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em] (phi1) [fit = (phi11) (phi12)] {};
+\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em,fill=blue!30,drop shadow] (phi1) [fit = (phi11) (phi12)] {};
+\node [anchor=west] (phi11-top) at ([xshift=2.4em]phit.east) {$\phi_0$};
+\node [anchor=west] (phi12-top) at ([xshift=-0.5em]phi11-top.east) {0};

 \node [anchor=west] (phi21) at ([xshift=3em]phi1.east) {$\phi_1$};
 \node [anchor=west] (phi22) at ([xshift=-0.5em]phi21.east) {2};
-\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em] (phi2) [fit = (phi21) (phi22)] {};
+\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em,fill=blue!30,drop shadow] (phi2) [fit = (phi21) (phi22)] {};
+\node [anchor=west] (phi21-top) at ([xshift=3em]phi1.east) {$\phi_1$};
+\node [anchor=west] (phi22-top) at ([xshift=-0.5em]phi21-top.east) {2};

 \node [anchor=west] (phi31) at ([xshift=3em]phi2.east) {$\phi_2$};
 \node [anchor=west] (phi32) at ([xshift=-0.5em]phi31.east) {0};
-\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em] (phi3) [fit = (phi31) (phi32)] {};
+\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em,fill=blue!30,drop shadow] (phi3) [fit = (phi31) (phi32)] {};
+\node [anchor=west] (phi31-top) at ([xshift=3em]phi2.east) {$\phi_2$};
+\node [anchor=west] (phi32-top) at ([xshift=-0.5em]phi31-top.east) {0};

 \node [anchor=west] (phi41) at ([xshift=3em]phi3.east) {$\phi_3$};
 \node [anchor=west] (phi42) at ([xshift=-0.5em]phi41.east) {1};
-\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em] (phi4) [fit = (phi41) (phi42)] {};
+\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em,fill=blue!30,drop shadow] (phi4) [fit = (phi41) (phi42)] {};
+\node [anchor=west] (phi41-top) at ([xshift=3em]phi3.east) {$\phi_3$};
+\node [anchor=west] (phi42-top) at ([xshift=-0.5em]phi41-top.east) {1};

 \node [anchor=west] (phi51) at ([xshift=3em]phi4.east) {$\phi_4$};
 \node [anchor=west] (phi52) at ([xshift=-0.5em]phi51.east) {1};
-\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em] (phi5) [fit = (phi51) (phi52)] {};
+\node [rounded rectangle,draw,line width=1pt,minimum height=3.4em,minimum width=7.8em,fill=blue!30,drop shadow] (phi5) [fit = (phi51) (phi52)] {};
+\node [anchor=west] (phi51-top) at ([xshift=3em]phi4.east) {$\phi_4$};
+\node [anchor=west] (phi52-top) at ([xshift=-0.5em]phi51-top.east) {1};
 }

 \draw [->,thick,dashed] ([yshift=-1.4em]st.south west) -- ([xshift=0.8em,yshift=-1em]s4.south east);
@@ -68,40 +91,40 @@
 \draw [->,thick,dashed] ([yshift=-10.3em]st.south west) -- ([xshift=0.8em,yshift=-9.9em]s4.south east);

 {\scriptsize
-\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2.5em,minimum width=5.5em] (t1) at ([yshift=-15em]sf1.south) {$t_0$};
-\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2.5em,minimum width=5.5em] (t2) at ([yshift=-15em]s1.south) {Scientists};
-\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2.5em,minimum width=5.5em] (t3) at ([yshift=-15em]s2.south) {do};
-\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2.5em,minimum width=5.5em] (t4) at ([yshift=-15em]s3.south) {not};
-\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2.5em,minimum width=5.5em] (t5) at ([yshift=-15em]s4.south) {konw};
+\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2.5em,minimum width=5.5em,fill=yellow!30,drop shadow] (t1) at ([yshift=-15em]sf1.south) {$t_0$};
+\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2.5em,minimum width=5.5em,fill=yellow!30,drop shadow] (t2) at ([yshift=-15em]s1.south) {Scientists};
+\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2.5em,minimum width=5.5em,fill=yellow!30,drop shadow] (t3) at ([yshift=-15em]s2.south) {do};
+\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2.5em,minimum width=5.5em,fill=yellow!30,drop shadow] (t4) at ([yshift=-15em]s3.south) {not};
+\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2.5em,minimum width=5.5em,fill=yellow!30,drop shadow] (t5) at ([yshift=-15em]s4.south) {konw};
 }


 \draw [->,thick] (tau23.east) -- (s2.south);

-\draw [->,thick] (t1.north) -- ([yshift=-8.8em]sf1.south);
+\draw [->,thick] (t1.north) -- ([yshift=-8.82em]sf1.south);
 \draw [->,thick] (t2.north) -- ([yshift=-8.8em]s1.south);
 \draw [->,thick] (t3.north) -- ([yshift=-8.8em]s2.south);
 \draw [->,thick] (t4.north) -- ([yshift=-8.8em]s3.south);
 \draw [->,thick] (t5.north) -- ([yshift=-8.8em]s4.south);

-\draw [->,thick] ([yshift=4.6em]t1.north) -- ([yshift=-4.4em]sf1.south);
-\draw [->,thick] ([yshift=4.6em]t2.north) -- ([yshift=-4.4em]s1.south);
-\draw [->,thick] ([yshift=4.6em]t3.north) -- ([yshift=-4.4em]s2.south);
-\draw [->,thick] ([yshift=4.6em]t4.north) -- ([yshift=-4.4em]s3.south);
-\draw [->,thick] ([yshift=4.6em]t5.north) -- ([yshift=-4.4em]s4.south);
+\draw [->,thick] ([yshift=4.6em]t1.north) -- ([yshift=-4.45em]sf1.south);
+\draw [->,thick] ([yshift=4.65em]t2.north) -- ([yshift=-4.4em]s1.south);
+\draw [->,thick] ([yshift=4.65em]t3.north) -- ([yshift=-4.4em]s2.south);
+\draw [->,thick] ([yshift=4.65em]t4.north) -- ([yshift=-4.4em]s3.south);
+\draw [->,thick] ([yshift=4.65em]t5.north) -- ([yshift=-4.4em]s4.south);

-\draw [->,thick] ([yshift=9em]t2.north) -- (s1.south);
-\draw [->,thick] ([yshift=9em]t4.north) -- (s3.south);
-\draw [->,thick] ([yshift=9em]t5.north) -- (s4.south);
+\draw [->,thick] ([yshift=9em]t2.north) -- ([yshift=0.05em]s1.south);
+\draw [->,thick] ([yshift=9em]t4.north) -- ([yshift=0.05em]s3.south);
+\draw [->,thick] ([yshift=9em]t5.north) -- ([yshift=0.05em]s4.south);


 {\scriptsize
-\node [anchor=west] (sent11) at ([xshift=1em,yshift=-2em]s4.south east) {把这些元语};
+\node [anchor=west] (sent11) at ([xshift=1em,yshift=-0.3em]s4.south east) {把这些元语};
 \node [anchor=west] (sent12) at ([yshift=-1em]sent11.west) {言单词放在};
 \node [anchor=west] (sent13) at ([yshift=-1em]sent12.west) {合适的位置};
-\node [anchor=west] (sent21) at ([yshift=-3em]sent13.west) {确定生成元};
+\node [anchor=west] (sent21) at ([yshift=-4.6em]sent13.west) {确定生成元};
 \node [anchor=west] (sent22) at ([yshift=-1em]sent21.west) {语言单词};
-\node [anchor=west] (sent31) at ([yshift=-4em]sent22.west) {确定生成元};
+\node [anchor=west] (sent31) at ([yshift=-4.6em]sent22.west) {确定生成元};
 \node [anchor=west] (sent32) at ([yshift=-1em]sent31.west) {语言单词的};
 \node [anchor=west] (sent33) at ([yshift=-1em]sent32.west) {个数};
 }
@@ -114,4 +137,34 @@

 }
 \end{tikzpicture}
-%---------------------------------------------------------------------
\ No newline at end of file
+%---------------------------------------------------------------------
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/Book/Chapter3/Figures/figure-processes-SMT.tex
+++ b/Book/Chapter3/Figures/figure-processes-SMT.tex
@@ -16,7 +16,7 @@
 \end{pgfonlayer}
 }

-\node [anchor=west,ugreen] (P) at ([xshift=4em,yshift=-0.7em]corpus.east){P($t|s$)};
+\node [anchor=west,ugreen] (P) at ([xshift=4em,yshift=-0.7em]corpus.east){P($\mathbf{t}|\mathbf{s}$)};
 \node [anchor=south] (modellabel) at (P.north) {{\color{ublue} {\scriptsize \sffamily\bfseries{翻译模型}}}};

 \begin{pgfonlayer}{background}
@@ -26,7 +26,7 @@
 \draw [->,very thick,ublue] ([xshift=0.2em]corpus.east) -- ([xshift=3.2em]corpus.east)  node [pos=0.5, above] {\color{red}{\scriptsize{模型学习}}};

 {
-\draw [->,very thick,ublue] ([xshift=0.4em]model.east) -- ([xshift=3.4em]model.east)  node [inner sep=0pt,pos=0.5, above,yshift=0.3em] (decodingarrow) {\color{red}{\scriptsize{穷举\&计算}}};
+\draw [->,very thick,ublue] ([xshift=0.4em]model.east) -- ([xshift=3.4em]model.east)  node [inner sep=0pt,pos=0.5, above,yshift=0.3em] (decodingarrow) {\color{red}{\scriptsize{搜索\&计算}}};

 {\scriptsize
 \node [anchor=north west,inner sep=2pt] (sentlabel) at ([xshift=5.5em,yshift=-0.9em]model.north east) {{\color{ublue} \sffamily\bfseries{机器翻译引擎}}};

--- a/Book/Chapter3/Figures/figure-word-alignment.tex
+++ b/Book/Chapter3/Figures/figure-word-alignment.tex
@@ -5,11 +5,11 @@
 {
 {\footnotesize
 \node [anchor=north west,minimum height=2em,minimum width=4em] (s11) at (0,0) {};
-\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2em,minimum width=4em] (s1) at ([xshift=2em]s11.east) {我};
-\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2em,minimum width=4em] (s2) at ([xshift=2em]s1.east) {改变};
-\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2em,minimum width=4em] (s3) at ([xshift=2em]s2.east) {主意};
-\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2em,minimum width=4em] (s4) at ([xshift=2em]s3.east) {了};
-\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2em,minimum width=4em] (s5) at ([xshift=2em]s4.east) {。};
+\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2em,minimum width=4em,fill=green!30,drop shadow] (s1) at ([xshift=2em]s11.east) {我};
+\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2em,minimum width=4em,fill=green!30,drop shadow] (s2) at ([xshift=2em]s1.east) {改变};
+\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2em,minimum width=4em,fill=green!30,drop shadow] (s3) at ([xshift=2em]s2.east) {主意};
+\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2em,minimum width=4em,fill=green!30,drop shadow] (s4) at ([xshift=2em]s3.east) {了};
+\node [rectangle,draw,anchor=west,line width=1pt,minimum height=2em,minimum width=4em,fill=green!30,drop shadow] (s5) at ([xshift=2em]s4.east) {。};

 \node [anchor=south] (nu1) at (s1.north) {1};
 \node [anchor=south] (nu2) at (s2.north) {2};
@@ -20,12 +20,12 @@

 {
 {\footnotesize
-\node [anchor=north,rectangle,draw,line width=1pt,minimum height=2em,minimum width=4em] (t1) at ([yshift=-3.5em]s11.south) {$t_0$};
-\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2em,minimum width=4em] (t2) at ([yshift=-3.5em]s1.south) {I};
-\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2em,minimum width=4em] (t3) at ([yshift=-3.5em]s2.south) {changed};
-\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2em,minimum width=4em] (t4) at ([yshift=-3.5em]s3.south) {my};
-\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2em,minimum width=4em] (t5) at ([yshift=-3.5em]s4.south) {mind};
-\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2em,minimum width=4em] (t6) at ([yshift=-3.5em]s5.south) {.};
+\node [anchor=north,rectangle,draw,line width=1pt,minimum height=2em,minimum width=4em,fill=red!30,drop shadow] (t1) at ([yshift=-3.5em]s11.south) {$t_0$};
+\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2em,minimum width=4em,fill=red!30,drop shadow] (t2) at ([yshift=-3.5em]s1.south) {I};
+\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2em,minimum width=4em,fill=red!30,drop shadow] (t3) at ([yshift=-3.5em]s2.south) {changed};
+\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2em,minimum width=4em,fill=red!30,drop shadow] (t4) at ([yshift=-3.5em]s3.south) {my};
+\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2em,minimum width=4em,fill=red!30,drop shadow] (t5) at ([yshift=-3.5em]s4.south) {mind};
+\node [rectangle,draw,anchor=north,line width=1pt,minimum height=2em,minimum width=4em,fill=red!30,drop shadow] (t6) at ([yshift=-3.5em]s5.south) {.};

 \node [anchor=north] (nd1) at (t2.south) {[1]};
 \node [anchor=north] (nd2) at (t3.south) {[2]};

--- a/Book/Chapter4/Figures/an-example-of-phrase-system.tex
+++ b/Book/Chapter4/Figures/an-example-of-phrase-system.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  短语系统的问题 - 一个实例
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}
+\node [anchor=east] (shead) at (0,0) {源语:};
+\node [anchor=west] (swords) at (shead.east) {澳洲\ \ 是\ \ 与\ \ 北韩\ \ 有\ \ 邦交\ \ 的\ \ 少数\ \ 国家\ \ 之一};
+\node [anchor=north east] (thead) at ([yshift=-0.8em]shead.south east) {短语系统:};
+\node [anchor=west] (twords) at (thead.east) {Australia is diplomatic relations with North Korea};
+\node [anchor=north west] (twords2) at ([yshift=-0.2em]twords.south west) {is one of the few countries};
+\node [anchor=north east] (rhead) at ([yshift=-2.2em]thead.south east) {参考译文:};
+\node [anchor=west] (rwords) at (rhead.east) {Australia is one of the few countries that have};
+\node [anchor=north west] (rwords2) at ([yshift=-0.2em]rwords.south west) {diplomatic relations with North Korea};
+
+\begin{pgfonlayer}{background}
+{
+\draw[fill=red!20,draw=white] ([xshift=-5.4em]twords.north) rectangle ([xshift=10.8em]twords.south);
+\draw[fill=blue!20,draw=white] ([xshift=-4.6em]twords2.north) rectangle ([xshift=6.1em]twords2.south);
+\node [anchor=south east,inner sep=1pt,fill=black] (l1) at ([xshift=10.8em]twords.south) {\tiny{{\color{white} 1}}};
+\node [anchor=south east,inner sep=1pt,fill=black] (l2) at ([xshift=6.1em]twords2.south) {\tiny{{\color{white} 2}}};
+}
+\end{pgfonlayer}
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
+
+
--- a/Book/Chapter4/Figures/based-on-tree-structure-generate-sentence-pairs.tex
+++ b/Book/Chapter4/Figures/based-on-tree-structure-generate-sentence-pairs.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  规则的组装对应翻译过程
+
+\begin{tikzpicture}
+
+% target side
+\begin{scope}[xshift=-1em, level distance=25pt]
+
+\node[scale=0.8] at (1.5, 2.2) {目标语言};
+
+\node[scale=0.6, inner sep=0.1cm, draw,xshift=1em] (tfrag1) at
+   (0,0.05) {\Tree[.NP [.DT the ] [.NNS imports ]]};
+
+{
+\node[scale=0.6, inner sep=0.1cm, draw, sibling distance=100pt] (tfrag2) at
+   (1.4,1.4) {\Tree[.S [.NP ] [.VP ]]};
+}
+
+{
+\node[scale=0.6, inner sep=0.1cm, draw, sibling distance=30pt] (tfrag3) at
+   (3,-0.2) {\Tree[.VP [.VBZ have ] [.ADVP [.RB ] [.VBN fallen ]]]};
+}
+
+{
+\node[scale=0.6, inner sep=0.1cm, draw,xshift=-1.5em] (tfrag4) at
+   (2.8,-1.8) {\Tree[.RB drastically ]};
+}
+
+{\draw[*-*] (0.15,0.7) -- (0.15,1.05);}
+{\draw[*-*] (2.7,0.7) -- (2.7,1.05);}
+{\draw[*-*] (2.4,-1.4) .. controls +(north:0.6) and +(south:0.6) .. (3.0,-0.6);}
+\end{scope}
+
+% source side
+\begin{scope}[scale=0.8, level distance=25pt, xshift=-20em, yshift=1em]
+
+\node[scale=0.8] at (2.1, 2.45) {源语言};
+
+{
+\node[scale=0.6, inner sep=0.1cm, draw] (sfrag1) at
+   (0,0) {\Tree[.NN 进口 ]};
+}
+
+{
+\node[scale=0.6, inner sep=0.1cm, draw, sibling distance=135pt] (sfrag2) at
+   (2.0,1.4) {\Tree[.IP [.NN ] [.VP ]]};
+}
+
+{
+\node[scale=0.6, inner sep=0.1cm, xshift=1em, draw] (sfrag3) at
+   (4,-0.7) {\Tree[.VP [.AD ] [.ADVP [.VV 下降 ] [.AS 了 ]]]};
+}
+
+{
+\node[scale=0.6, inner sep=0.1cm, draw] (sfrag4) at
+   (1.3,-1.3) {\Tree[.AD 大幅度 ]};
+}
+
+{\draw[*-*] (0.05,0.5) -- (0.05,0.95);}
+{\draw[*-*] (4.15,0.5) -- (4.15,0.95);}
+{\draw[*-*] (1.55,-0.95) .. controls +(east:1.0) and +(south:0.4) .. (3.5,-0.6);}
+
+\draw[*-*] (-0.2,-2.5)--(0.3,-2.5) ;
+\node[scale=0.6] (sfrag5) at
+   ([xshift=2em,yshift=-3.2em]sfrag4) {{表示对变量的替换操作}};
+
+\end{scope}
+
+% rule 1
+\begin{scope}[scale=0.6, xshift=-22em, yshift=-11em, level distance=20pt]
+
+\begin{scope}[anchor=north east, xshift=-6em] \Tree[.NN 进口 ] \end{scope}
+\draw[->] (-1.7,0.1) -- (-0.9,0.1);
+\begin{scope}[anchor=north west] \Tree[.NP [.DT the ] [.NNS imports ]] \end{scope}
+
+\end{scope}
+
+% rule 2
+\begin{scope}[scale=0.6, xshift=-11em, yshift=-11em, level distance=20pt]
+
+\begin{scope}[anchor=north east, xshift=-6em] \Tree[.AD 大幅度 ] \end{scope}
+\draw[->] (-1.7,0.1) -- (-0.9,0.1);
+\begin{scope}[anchor=north west] \Tree[.RB drastically ] \end{scope}
+
+\end{scope}
+
+% rule 3
+\begin{scope}[scale=0.6, xshift=2em, yshift=-11em, level distance=20pt]
+
+\begin{scope}[anchor=north east, xshift=-7em] \Tree[.VP [.AD ] [.ADVP [.VV 下降 ] [.AS 了 ]]] \end{scope}
+\draw[->] (-1.7,0.1) -- (-0.9,0.1);
+\begin{scope}[anchor=north west, xshift=1em] \Tree[.VP [.VBZ have ] [.ADVP [.RB ] [.VBN fallen ]]] \end{scope}
+
+\end{scope}
+
+% rule 4
+\begin{scope}[scale=0.6, xshift=16em, yshift=-11em, level distance=20pt]
+
+\begin{scope}[anchor=north east, xshift=-6em] \Tree[.IP [.NN ] [.VP ]] \end{scope}
+\draw[->] (-1.7,0.1) -- (-0.9,0.1);
+\begin{scope}[anchor=north west,xshift=-1em] \Tree[.S [.NP ] [.VP ]] \end{scope}
+
+\end{scope}
+
+% red rule 1
+\begin{scope}[scale=0.6, xshift=-22em, yshift=-11em, level distance=20pt]
+
+{
+\begin{scope}[anchor=north east, xshift=-6em] \Tree[.NN 进口 ] \end{scope}
+\draw[->] (-1.7,0.1) -- (-0.9,0.1);
+\begin{scope}[anchor=north west] \Tree[.NP [.DT the ] [.NNS imports ]] \end{scope}
+}
+
+\end{scope}
+
+{
+% red rule 2
+\begin{scope}[scale=0.6, xshift=-11em, yshift=-11em, level distance=20pt]
+
+\begin{scope}[anchor=north east, xshift=-6em] \Tree[.AD 大幅度 ] \end{scope}
+\draw[->] (-1.7,0.1) -- (-0.9,0.1);
+\begin{scope}[anchor=north west] \Tree[.RB drastically ] \end{scope}
+
+\end{scope}
+}
+
+{
+% red rule 3
+\begin{scope}[scale=0.6, xshift=2em, yshift=-11em, level distance=20pt]
+
+\begin{scope}[anchor=north east, xshift=-7em] \Tree[.VP [.AD ] [.ADVP [.VV 下降 ] [.AS 了 ]]] \end{scope}
+\draw[->] (-1.7,0.1) -- (-0.9,0.1);
+\begin{scope}[anchor=north west, xshift=1em] \Tree[.VP [.VBZ have ] [.ADVP [.RB ] [.VBN fallen ]]] \end{scope}
+
+\end{scope}
+}
+
+{
+% red rule 4
+\begin{scope}[scale=0.6, xshift=16em, yshift=-11em, level distance=20pt]
+
+\begin{scope}[anchor=north east, xshift=-6em] \Tree[.IP [.NN ] [.VP ]] \end{scope}
+\draw[->] (-1.7,0.1) -- (-0.9,0.1);
+\begin{scope}[anchor=north west,xshift=-1em] \Tree[.S [.NP ] [.VP ]] \end{scope}
+
+\end{scope}
+}
+
+\end{tikzpicture}
\ No newline at end of file
--- a/Book/Chapter4/Figures/basic-process-of-translation.tex
+++ b/Book/Chapter4/Figures/basic-process-of-translation.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 什么是解码
+\begin{tikzpicture}
+
+\begin{scope}[minimum height = 18pt]
+
+\node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:};
+\node[anchor=west,fill=green!20] (s1) at (0, 0) {\footnotesize{桌子 上}};
+\node[anchor=west,fill=red!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{有}};
+\node[anchor=west,fill=blue!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
+
+\node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:};
+
+\end{scope}
+
+
+
+\begin{scope}[xshift=14.5em,minimum height = 18pt]
+
+\node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:};
+\node[anchor=west,fill=green!20] (s1) at (0, 0) {\footnotesize{桌子 上}};
+\node[anchor=west,fill=red!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{有}};
+\node[anchor=west,fill=blue!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
+
+\node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:};
+{
+\node[anchor=west,fill=red!20] (t1) at (0, -1.5) {\footnotesize{There is}};
+\path[<->, thick] (s2.south) edge (t1.north);
+}
+
+\end{scope}
+
+
+
+\begin{scope}[yshift=-6.0em,minimum height = 18pt]
+
+\node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:};
+\node[anchor=west,fill=green!20] (s1) at (0, 0) {\footnotesize{桌子 上}};
+\node[anchor=west,fill=red!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{有}};
+\node[anchor=west,fill=blue!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
+
+\node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:};
+{
+\node[anchor=west,fill=red!20] (t1) at (0, -1.5) {\footnotesize{There is}};
+\path[<->, thick] (s2.south) edge (t1.north);
+}
+{
+\node[anchor=west,fill=blue!20] (t2) at ([xshift=1em]t1.east) {\footnotesize{an apple}};
+\path[<->, thick] (s3.south) edge (t2.north);
+}
+
+\end{scope}
+
+
+
+
+\begin{scope}[xshift=14.5em,yshift=-6.0em,minimum height = 18pt]%[scale=0.5]
+
+\node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:};
+\node[anchor=west,fill=green!20] (s1) at (0, 0) {\footnotesize{桌子 上}};
+\node[anchor=west,fill=red!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{有}};
+\node[anchor=west,fill=blue!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
+
+\node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:};
+{
+\node[anchor=west,fill=red!20] (t1) at (0, -1.5) {\footnotesize{There is}};
+\path[<->, thick] (s2.south) edge (t1.north);
+}
+{
+\node[anchor=west,fill=blue!20] (t2) at ([xshift=1em]t1.east) {\footnotesize{an apple}};
+\path[<->, thick] (s3.south) edge (t2.north);
+}
+{
+\node[anchor=west,fill=green!20] (t3) at ([xshift=1em]t2.east) {\footnotesize{on the table}};
+\path[<->, thick] (s1.south) edge (t3.north);
+}
+
+\end{scope}
+\end{tikzpicture}
\ No newline at end of file
--- a/Book/Chapter4/Figures/chinese-syntax-tree.tex
+++ b/Book/Chapter4/Figures/chinese-syntax-tree.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 短语 -> 句法
+\begin{center}
+\begin{tikzpicture}
+
+\begin{scope}[scale=0.8, sibling distance=1pt, level distance=30pt, yshift=-1.4in]
+\Tree[. S
+        [.NP
+            [.NP
+                [.DT the ]
+                [.NN import ]
+            ]
+            [.IN in ]
+            [.NP \edge[roof]; {North Korea} ]
+        ]
+        [.VP
+            [.VBZ have ]
+            [.ADVP
+                [.RB drastically ]
+                [.VBN fallen ]
+            ]
+        ]
+     ]
+\end{scope}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/classification-of-models-based-on-syntax.tex
+++ b/Book/Chapter4/Figures/classification-of-models-based-on-syntax.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 句法模型的分类
+\begin{center}
+\begin{tikzpicture}
+
+\begin{scope}
+\tikzstyle{cnode} = [minimum width=7.0em,minimum height=2.5em,rounded corners=0.2em];
+\tikzstyle{xnode} = [minimum width=4.5em,minimum height=2.5em,rounded corners=0.2em];
+
+\node[xnode,anchor=west,fill=red!25,align=left] (itg) at (0,0) {\footnotesize{反向转录}\\\footnotesize{文法}};
+\node[xnode,anchor=west,fill=red!25,align=left] (hiero) at ([xshift=0.5em]itg.east) {\footnotesize{层次短语}\\\footnotesize{模型}};
+\node[xnode,anchor=west,fill=blue!25,align=left] (s2t) at ([xshift=0.5em]hiero.east) {\footnotesize{串到树}\\\footnotesize{模型}};
+\node[xnode,anchor=west,fill=blue!25,align=left] (t2s) at ([xshift=0.5em]s2t.east) {\footnotesize{树到串}\\\footnotesize{模型}};
+\node[xnode,anchor=west,fill=blue!25,align=left] (t2t) at ([xshift=0.5em]t2s.east) {\footnotesize{树到树}\\\footnotesize{模型}};
+
+\node[cnode,anchor=south,fill=red!25,align=left] (cat1) at ([xshift=-0.2em,yshift=2em]hiero.north west) {\footnotesize{基于形式文法}\\\footnotesize{的模型}};
+\node[cnode,anchor=south,fill=blue!25,align=left] (cat2) at ([xshift=-0.0em,yshift=2em]t2s.north) {\footnotesize{基于语言学}\\\footnotesize{句法的模型}};
+\node[cnode,anchor=south,minimum width=10.0em,fill=green!25,align=center] (cat0) at ([xshift=-3em,yshift=2em]cat2.north west) {\footnotesize{(广义上)}\\\footnotesize{基于句法的模型}};
+
+\draw [-,thick] ([yshift=0.1em,xshift=1em]cat1.north) -- ([xshift=-1.5em,yshift=-0.1em]cat0.south);
+\draw [-,thick] ([yshift=0.1em,xshift=-1em]cat2.north) -- ([xshift=1.5em,yshift=-0.1em]cat0.south);
+\draw [-,thick] ([yshift=0.1em]itg.north) -- ([xshift=-0.5em,yshift=-0.1em]cat1.south);
+\draw [-,thick] ([yshift=0.1em]hiero.north) -- ([xshift=0.5em,yshift=-0.1em]cat1.south);
+\draw [-,thick] ([yshift=0.1em]s2t.north) -- ([xshift=-0.8em,yshift=-0.1em]cat2.south);
+\draw [-,thick] ([yshift=0.1em]t2s.north) -- ([xshift=-0.0em,yshift=-0.1em]cat2.south);
+\draw [-,thick] ([yshift=0.1em]t2t.north) -- ([xshift=0.8em,yshift=-0.1em]cat2.south);
+
+\node [anchor=north] (itglabel) at (itg.south) {\scriptsize{(Wu, 1995)}};
+\node [anchor=north] (hierolabel) at (hiero.south) {\scriptsize{(Chiang, 2005)}};
+\node [anchor=north,align=left] (s2tlabel) at (s2t.south) {\scriptsize{(Galley et al.,}\\\scriptsize{\ 2004; 2006)}};
+\node [anchor=north,align=left] (t2slabel) at (t2s.south) {\scriptsize{(Liu et al.,}\\\scriptsize{\ 2006)}};
+\node [anchor=north,align=left] (t2tlabel) at (t2t.south) {\scriptsize{(Eisner, 2003)}};
+
+\end{scope}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/combination-of-translation-with-different-rules.tex
+++ b/Book/Chapter4/Figures/combination-of-translation-with-different-rules.tex
+%------------------------------------------------------------------------------------------------------------
+%%%  基于树的解码方法 - chart-based decoding
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}%[scale=0.2]
+
+\node[anchor=north] (q1) at (0,0) {\scriptsize\sffamily\bfseries{输入字符串：}};
+\node[anchor=west] (q2) at ([xshift=0em,yshift=-2em]q1.west) {\footnotesize{进口$\quad$和$\quad$出口$\quad$大幅度$\quad$下降$\quad$了}};
+
+\node[anchor=north,fill=blue!20,minimum height=1em,minimum width=1em] (f1) at ([xshift=-4.1em,yshift=-0.8em]q2.south) {};
+
+\node[anchor=north,fill=blue!20,minimum height=4em,minimum width=1em] (f1) at ([xshift=2.2em,yshift=-0.7em]q2.south) {};
+
+\node[anchor=east] (n1) at ([xshift=1em,yshift=-2em]q2.west) {\scriptsize\sffamily\bfseries{匹配规则：}};
+
+\node[anchor=west] (n2) at ([xshift=0em,yshift=0em]n1.east) {\scriptsize{$\textrm{X} \to  \langle\ \textrm{X}_1\ \text{大幅度}\ \text{下降}\ \text{了},\ \textrm{X}_1\ \textrm{have}\ \textrm{drastically}\ \textrm{fallen}\ \rangle$}};
+
+\node[anchor=west] (n3) at ([xshift=0em,yshift=-1.5em]n2.west) {\scriptsize{$\textrm{X} \to  \langle\ \textrm{X}_1\ \text{大幅度}\ \text{下降}\ \text{了},\ \textrm{X}_1\ \textrm{have}\ \textrm{fallen}\ \textrm{drastically}\ \rangle$}};
+
+\node[anchor=west] (n4) at ([xshift=0em,yshift=-1.5em]n3.west) {\scriptsize{$\textrm{X} \to  \langle\ \textrm{X}_1\ \text{大幅度}\ \text{下降}\ \text{了},\ \textrm{X}_1\ \textrm{has}\ \textrm{drastically}\ \textrm{fallen}\ \rangle$}};
+
+\draw[decorate,decoration={mirror,brace}]([xshift=0.5em,yshift=-1em]q2.west) --([xshift=7em,yshift=-1em]q2.west) node [xshift=0em,yshift=-1em,align=center](label1) {};	
+
+{\scriptsize
+\node[anchor=west] (h1) at ([xshift=1em,yshift=-12em]q2.west) {{Span[0,3]下的翻译假设：}};
+\node[anchor=west] (h2) at ([xshift=0em,yshift=-1.3em]h1.west) {{X: imports and exports}};
+\node[anchor=west] (h6) at ([xshift=0em,yshift=-1.3em]h2.west) {{S: the import and export}};
+}
+
+{\scriptsize
+\node[anchor=west] (h21) at ([xshift=9em,yshift=2em]h1.east) {{替换$\textrm{X}_1$后生成的翻译假设：}};
+\node[anchor=west] (h22) at ([xshift=0em,yshift=-1.3em]h21.west) {{X: imports and exports have drastically fallen}};
+\node[anchor=west] (h23) at ([xshift=0em,yshift=-1.3em]h22.west) {{X: the import and export have drastically fallen}};
+\node[anchor=west] (h24) at ([xshift=0em,yshift=-1.3em]h23.west) {{X: imports and exports have drastically fallen}};
+\node[anchor=west] (h25) at ([xshift=0em,yshift=-1.3em]h24.west) {{X: the import and export have drastically fallen}};
+\node[anchor=west] (h26) at ([xshift=0em,yshift=-1.3em]h25.west) {{X: imports and exports has drastically fallen}};
+\node[anchor=west] (h27) at ([xshift=0em,yshift=-1.3em]h26.west) {{X: the import and export has drastically fallen}};
+}
+
+\node [rectangle,inner sep=0.1em,rounded corners=1pt,draw] [fit = (h1) (h5) (h6)] (gl1) {};
+\node [rectangle,inner sep=0.1em,rounded corners=1pt,draw] [fit = (h21) (h25) (h27)] (gl2) {};
+
+\draw [->,ublue,thick] ([xshift=0.6em,yshift=0.2em]n4.south) .. controls +(south:2em) and +(east:0em) ..   ([xshift=-0em,yshift=2em]gl2.west);
+\draw [->,ublue,thick] ([xshift=0em,yshift=0em]gl1.east) .. controls +(north:2.2em) and +(east:0em) ..   ([xshift=-0em,yshift=2em]gl2.west);
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
--- a/Book/Chapter4/Figures/combine-minimum-rule-1.tex
+++ b/Book/Chapter4/Figures/combine-minimum-rule-1.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  组合规则
+\begin{minipage}[b]{0.38\textwidth}
+{\footnotesize
+\renewcommand*{\arraystretch}{1.3}
+\begin{tabular}{l l}
+{$r_1$} & {NP(PN(他)) $\to$  he} \\
+{$r_5$} & {PP(P$_1$ NP$_2$) $\to$ P$_1$ NP$_2$} \\
+{$r_6$} & {VP(PP$_1$ VP$_2$) $\to$ VP$_2$ PP$_1$} \\
+{$r_7$} & {IP(NP$_1$ VP$_2$) $\to$ NP$_1$ VP$_2$} \\
+{\red{$r_{1,7}$}} & {IP(NP(PN(他)) VP$_1$) $\to$} \\
+                          & {he VP$_1$}\\
+{\red{$r_{1,6,7}$}} & {IP(NP(PN(他)) VP(PP$_1$ VP$_2$))} \\
+                           & {$\to$ he VP$_2$ PP$_1$}\\
+{\red{$r_{1,5,6,7}$}} & {IP(NP(PN(他)) } \\
+                           & {VP(P$_1$ NP$_2$ VP$_3$))}\\
+                           & {$\to$ he VP$_3$ P$_1$ NP$_2$}\\
+                           & \\
+\end{tabular}
+\renewcommand*{\arraystretch}{1.0}
+}
+\end{minipage}
--- a/Book/Chapter4/Figures/combine-minimum-rule-2.tex
+++ b/Book/Chapter4/Figures/combine-minimum-rule-2.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  组合规则
+
+\begin{minipage}[t]{0.47\textwidth}
+\begin{tikzpicture}
+
+{\scriptsize
+\begin{scope}
+
+{
+\begin{scope}[level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,draw] (cfrag1) at
+   (0,0.25) {\Tree[.\node(sn1){NP}; [.\node(sn2){PN}; 他 ]]};
+\end{scope}
+
+\begin{scope}[level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag2) at
+   ([xshift=1.2em]cfrag1.south east) {\Tree[.\node(sn3){P}; 对 ]};
+\end{scope}
+
+\begin{scope}[level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag3) at
+   ([xshift=1.2em]cfrag2.south east) {\Tree[.\node(sn4){NP}; [.NN 回答 ]]};
+   \end{scope}
+
+\begin{scope}[sibling distance=15pt,level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag4) at
+   ([xshift=1.4em]cfrag3.south east) {\Tree[.\node(sn5){VP}; [.\node(sn6){VV}; 表示 ] [.\node(sn7){NN}; 满意 ]]};
+\end{scope}
+
+\begin{scope}[sibling distance=20pt,,level distance=25pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag5) at
+   ([xshift=0.3em,yshift=2.5em]cfrag2.north west) {\Tree[.\node(sn8){PP}; [.\node(sn9){P}; ] [.\node(sn10){NP}; ]]};
+\end{scope}
+
+\begin{scope}[sibling distance=60pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag6) at
+   ([xshift=1.6em,yshift=0.8em]cfrag5.north west) {\Tree[.\node(sn11){VP}; [.\node(sn12){PP}; ] [.\node(sn13){VP}; ]]};
+\end{scope}
+
+\begin{scope}[sibling distance=80pt,level distance=18pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south east,draw] (cfrag7) at
+   ([xshift=-3.6em,yshift=0.8em]cfrag6.north east) {\Tree[.\node(sn14){IP}; [.\node(sn15){NP}; ] [.\node(sn16){VP}; ]]};
+\end{scope}
+
+\node[scale=0.9,anchor=north,minimum size=18pt] (tw11) at ([xshift=-0.3em,yshift=-1.2em]cfrag1.south){he};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw12) at ([yshift=-0.1em,xshift=0.5em]tw11.east){was};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw13) at ([yshift=0.1em,xshift=0.5em]tw12.east){satisfied};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw14) at ([xshift=0.5em]tw13.east){with};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw15) at ([xshift=0.5em]tw14.east){the};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw16) at ([yshift=-0.1em,xshift=0.5em]tw15.east){answer};
+
+\draw[dashed] ([xshift=-0.3em]cfrag1.south) -- ([yshift=-0.3em]tw11.north);
+\draw[dashed] (cfrag2.south) -- ([yshift=-0.4em]tw14.north);
+\draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw15.north);
+\draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw16.north);
+\draw[dashed] (cfrag4.south) .. controls +(south:0.6) and +(north:0.6) .. ([yshift=-0.4em]tw13.north);
+
+\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag1.north) -- ([xshift=0.0em,yshift=11.3em]cfrag1.north);
+\draw[*-*] ([xshift=0.1em,yshift=-0.2em]cfrag2.north) -- ([xshift=0.1em,yshift=2.9em]cfrag2.north);
+\draw[*-*] ([xshift=0.1em,yshift=-0.4em]cfrag3.north) -- ([xshift=0.1em,yshift=0.9em]cfrag3.north);
+\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag4.north) -- ([xshift=0.0em,yshift=5.7em]cfrag4.north);
+\draw[*-*] ([xshift=0.1em,yshift=-0.2em]cfrag5.north) -- ([xshift=0.1em,yshift=1em]cfrag5.north);
+\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag6.north) -- ([xshift=0.0em,yshift=1em]cfrag6.north);
+
+{
+\node [fill=blue,circle,inner sep=2pt] (rlabel1) at (cfrag1.south east) {{\color{white} \tiny{1}}};
+\draw[*-*,red,thick] ([xshift=0.0em,yshift=-0.2em]cfrag1.north) -- ([xshift=0.0em,yshift=11.3em]cfrag1.north);
+
+}
+{
+\node [fill=blue,circle,inner sep=2pt] (rlabel5) at (cfrag5.north west) {{\color{white} \tiny{5}}};
+\draw[*-*,red] ([xshift=0.1em,yshift=-0.2em]cfrag5.north) -- ([xshift=0.1em,yshift=1em]cfrag5.north);
+}
+{
+\node [fill=blue,circle,inner sep=2pt] (rlabel6) at (cfrag6.north east) {{\color{white} \tiny{6}}};
+\draw[*-*,red] ([xshift=0.0em,yshift=-0.2em]cfrag6.north) -- ([xshift=0.0em,yshift=1em]cfrag6.north);
+}
+{
+\node [fill=blue,circle,inner sep=2pt] (rlabel7) at (cfrag7.south west) {{\color{white} \tiny{7}}};
+}
+
+\begin{pgfonlayer}{background}
+{
+\node [fill=green!20,inner sep=0pt] (cfrag1back) [fit = (cfrag1)] {};
+\node [fill=green!20,inner sep=0pt] (cfrag1back) [fit = (cfrag7)] {};
+}
+{
+\node [fill=green!20,inner sep=0pt] (cfrag1back) [fit = (cfrag6)] {};
+}
+{
+\node [fill=green!20,inner sep=0pt] (cfrag1back) [fit = (cfrag5)] {};
+}
+\end{pgfonlayer}
+
+}
+\end{scope}
+}
+\end{tikzpicture}
+\end{minipage}
\ No newline at end of file
--- a/Book/Chapter4/Figures/consistence-of-word-alignment.tex
+++ b/Book/Chapter4/Figures/consistence-of-word-alignment.tex
+%%------------------------------------------------------------------------------------------------------------
+%% 短语抽取方法
+\begin{center}
+\begin{tikzpicture}
+
+\tikzstyle{elementnode} = [rectangle,text=white,anchor=center]
+\tikzstyle{srcnode} = [font=\small,anchor=south west]
+\tikzstyle{tgtnode} = [left,font=\small,anchor=north east]
+\tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6*1.1cm,minimum width=0.36*1.1cm]
+\tikzstyle{probnode} = [fill=blue!30,minimum width=0.4*1.1cm]
+\tikzstyle{labelnode} = [above]
+
+ alignment matrix1
+\begin{scope}[scale=1,yshift=0.12in]
+\foreach \i / \j / \c in
+    {0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15,
+    0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15,
+    0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15,
+    0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15}
+    \node[elementnode,minimum size=0.6*1.1cm*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*1.1cm*\i-5.4*0.5*1.1cm,0.5*1.1cm*\j-1.05*1.1cm) {};
+
+
+
+ source
+\node[srcnode] (src01) at (-5.9*0.5*1.1cm,-1.05*1.1cm+3.4*0.5*1.1cm) {\scriptsize{$t_1$}};
+\node[srcnode] (src02) at ([xshift=0.5*1.1cm]src01.south west) {\scriptsize{$t_2$}};
+\node[srcnode] (src03) at ([xshift=0.5*1.1cm]src02.south west) {\scriptsize{$t_3$}};
+\node[srcnode] (src04) at ([xshift=0.5*1.1cm]src03.south west) {\scriptsize{$t_4$}};
+
+ target
+\node[tgtnode] (tgt01) at (-6.0*0.5*1.1cm,-1.05*1.1cm+3.3*0.5*1.1cm) {\scriptsize{$s_1$}};
+\node[tgtnode] (tgt02) at ([yshift=-0.5*1.1cm]tgt01.north east) {\scriptsize{$s_2$}};
+\node[tgtnode] (tgt03) at ([yshift=-0.5*1.1cm]tgt02.north east) {\scriptsize{$s_3$}};
+\node[tgtnode] (tgt04) at ([yshift=-0.5*1.1cm]tgt03.north east) {\scriptsize{$s_4$}};
+
+ alignment matrix2
+\foreach \i / \j / \c in
+    {0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15,
+    0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15,
+    0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15,
+    0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15}
+    \node[elementnode,minimum size=0.6*1.1cm*\c,inner sep=0.1pt,fill=blue] (b\i\j) at (0.5*1.1cm*\i+0.6*0.5*1.1cm,0.5*1.1cm*\j-1.05*1.1cm) {};
+
+ source
+\node[srcnode] (src11) at (0.1*0.5*1.1cm,-1.05*1.1cm+3.4*0.5*1.1cm) {\scriptsize{$t_1$}};
+\node[srcnode] (src12) at ([xshift=0.5*1.1cm]src11.south west) {\scriptsize{$t_2$}};
+\node[srcnode] (src13) at ([xshift=0.5*1.1cm]src12.south west) {\scriptsize{$t_3$}};
+\node[srcnode] (src14) at ([xshift=0.5*1.1cm]src13.south west) {\scriptsize{$t_4$}};
+
+ target
+\node[tgtnode] (tgt11) at (0.2*0.5*1.1cm,-1.05*1.1cm+3.3*0.5*1.1cm) {\scriptsize{$s_1$}};
+\node[tgtnode] (tgt12) at ([yshift=-0.5*1.1cm]tgt11.north east) {\scriptsize{$s_2$}};
+\node[tgtnode] (tgt13) at ([yshift=-0.5*1.1cm]tgt12.north east) {\scriptsize{$s_3$}};
+\node[tgtnode] (tgt14) at ([yshift=-0.5*1.1cm]tgt13.north east) {\scriptsize{$s_4$}};
+
+ alignment matrix3
+\foreach \i / \j / \c in
+    {0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15,
+    0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15,
+    0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15,
+    0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15}
+    \node[elementnode,minimum size=0.6*1.1cm*\c,inner sep=0.1pt,fill=blue] (c\i\j) at (0.5*1.1cm*\i+6.6*0.5*1.1cm,0.5*1.1cm*\j-1.05*1.1cm) {};
+
+ source
+\node[srcnode] (src21) at (6.1*0.5*1.1cm,-1.05*1.1cm+3.4*0.5*1.1cm) {\scriptsize{$t_1$}};
+\node[srcnode] (src22) at ([xshift=0.5*1.1cm]src21.south west) {\scriptsize{$t_2$}};
+\node[srcnode] (src23) at ([xshift=0.5*1.1cm]src22.south west) {\scriptsize{$t_3$}};
+\node[srcnode] (src24) at ([xshift=0.5*1.1cm]src23.south west) {\scriptsize{$t_4$}};
+
+ target
+\node[tgtnode] (tgt21) at (6.2*0.5*1.1cm,-1.05*1.1cm+3.3*0.5*1.1cm) {\scriptsize{$s_1$}};
+\node[tgtnode] (tgt22) at ([yshift=-0.5*1.1cm]tgt21.north east) {\scriptsize{$s_2$}};
+\node[tgtnode] (tgt23) at ([yshift=-0.5*1.1cm]tgt22.north east) {\scriptsize{$s_3$}};
+\node[tgtnode] (tgt24) at ([yshift=-0.5*1.1cm]tgt23.north east) {\scriptsize{$s_4$}};
+
+ word alignment
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la1) at (a03) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la2) at (a12) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la3) at (a11) {};
+
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lb1) at (b03) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lb2) at (b12) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=red!50] (lb3) at (b11) {};
+
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lc1) at (c03) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lc2) at (c12) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lc3) at (c11) {};
+
+\begin{pgfonlayer}{background}
+\node [rectangle,draw=ugreen,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (a03) (a11)] (phrase1) {};
+\node [rectangle,draw=red,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (b03) (b12)] (phrase2) {};
+\node [rectangle,draw=ugreen,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (c03) (c21)] (phrase3) {};
+\end{pgfonlayer}
+\end{scope}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/content-of-chart-in-tree-based-decoding.tex
+++ b/Book/Chapter4/Figures/content-of-chart-in-tree-based-decoding.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  基于树的解码方法 - chart-based decoding
+{
+\begin{center}
+\begin{tikzpicture}
+
+\begin{scope}
+
+\tikzstyle{chartnode2}=[rectangle,minimum size=1.3em,fill=orange!20,draw]
+\node [chartnode2,anchor=north west] (cell11) at (0,0) {};
+\node [chartnode2,anchor=north west] (cell21) at ([yshift=0.04em]cell11.south west) {};
+\node [chartnode2,anchor=west] (cell22) at ([xshift=-0.04em]cell21.east) {};
+\node [chartnode2,anchor=north west] (cell31) at ([yshift=0.04em]cell21.south west) {};
+\node [chartnode2,anchor=west] (cell32) at ([xshift=-0.04em]cell31.east) {};
+\node [chartnode2,anchor=west] (cell33) at ([xshift=-0.04em]cell32.east) {};
+\node [chartnode2,anchor=north west] (cell41) at ([yshift=0.04em]cell31.south west) {};
+\node [chartnode2,anchor=west] (cell42) at ([xshift=-0.04em]cell41.east) {};
+\node [chartnode2,anchor=west] (cell43) at ([xshift=-0.04em]cell42.east) {};
+\node [chartnode2,anchor=west] (cell44) at ([xshift=-0.02em]cell43.east) {};
+
+\node [anchor=east] (s1) at (cell11.west) {\scriptsize{猫}};
+\node [anchor=east] (s2) at (cell21.west) {\scriptsize{喜欢}};
+\node [anchor=east] (s3) at (cell31.west) {\scriptsize{吃}};
+\node [anchor=east] (s4) at (cell41.west) {\scriptsize{鱼}};
+\node [anchor=north] (t5) at (cell41.south) {\tiny{$l$=1}};
+\node [anchor=north] (t5) at (cell42.south) {\tiny{$l$=2}};
+\node [anchor=north] (t5) at (cell43.south) {\tiny{$l$=3}};
+\node [anchor=north] (t5) at (cell44.south) {\tiny{$l$=4}};
+
+\node [anchor=north] (chartlabel) at ([yshift=-1em]cell42.south east) {\footnotesize{\textbf{chart}}};
+
+{\footnotesize
+\node [anchor=north west] (w1) at ([yshift=-2.5em,xshift=-1.0em]cell41.south west) {猫};
+\node [anchor=west] (w2) at ([xshift=0.3em]w1.east) {喜欢};
+\node [anchor=west] (w3) at ([xshift=0.3em]w2.east) {吃};
+\node [anchor=west] (w4) at ([xshift=0.3em]w3.east) {鱼};
+\node [anchor=north east] (p0) at ([xshift=0.3em]w1.south west) {\blue{0}};
+\node [anchor=north east] (p1) at ([xshift=0.3em]w2.south west) {\blue{1}};
+\node [anchor=north east] (p2) at ([xshift=0.3em]w3.south west) {\blue{2}};
+\node [anchor=north east] (p3) at ([xshift=0.3em]w4.south west) {\blue{3}};
+\node [anchor=north west] (p4) at ([xshift=-0.4em]w4.south east) {\blue{4}};
+\node [anchor=north] (slabel) at ([yshift=-0.6em]p2.south) {\scriptsize{\textbf{源语言句子}}};
+}
+
+\end{scope}
+{\normalsize
+\node[anchor=north](t1) at (4.5,0.3){{\scriptsize{序号}}};
+\node[anchor=south](k1) at ([xshift=3em,yshift=0em]t1.south){{\scriptsize{跨度}}};
+\node[anchor=south](b1) at ([xshift=3em,yshift=0em]k1.south){{\scriptsize{标记}}};
+\node[anchor=south](y1) at ([xshift=5em,yshift=0em]b1.south){{\scriptsize{源语句子片段}}};
+}
+\draw [-] ([xshift=-0em,yshift=0em]t1.south west) -- ([xshift=13em,yshift=0em]t1.south);
+{\scriptsize
+\node[anchor=west](t2) at ([xshift=0.2em,yshift=-1.7em]t1.west){{1}};
+\node[anchor=west](t3) at ([xshift=0em,yshift=-1.5em]t2.west){{2}};
+\node[anchor=west](t4) at ([xshift=0em,yshift=-1.5em]t3.west){{3}};
+\node[anchor=west](t5) at ([xshift=0em,yshift=-1.5em]t4.west){{4}};
+\node[anchor=west](t6) at ([xshift=0em,yshift=-1.5em]t5.west){{5}};
+\node[anchor=west](t7) at ([xshift=0em,yshift=-1.5em]t6.west){{6}};
+\node[anchor=west](t8) at ([xshift=0em,yshift=-1.5em]t7.west){{7}};
+\node[anchor=west](t9) at ([xshift=0em,yshift=-1.5em]t8.west){{8}};
+\node[anchor=west](t10) at ([xshift=0em,yshift=-1.5em]t9.west){{9}};
+\node[anchor=west](t11) at ([xshift=0em,yshift=-1.5em]t10.west){{10}};
+
+\node[anchor=west](k2) at ([xshift=0.2em,yshift=-1.7em]k1.west){{[0,1]}};
+\node[anchor=west](k3) at ([xshift=0em,yshift=-1.5em]k2.west){{[1,2]}};
+\node[anchor=west](k4) at ([xshift=0em,yshift=-1.5em]k3.west){{[2,5]}};
+\node[anchor=west](k5) at ([xshift=0em,yshift=-1.5em]k4.west){{[3,6]}};
+\node[anchor=west](k6) at ([xshift=0em,yshift=-1.5em]k5.west){{[0,2]}};
+\node[anchor=west](k7) at ([xshift=0em,yshift=-1.5em]k6.west){{[1,3]}};
+\node[anchor=west](k8) at ([xshift=0em,yshift=-1.5em]k7.west){{[2,4]}};
+\node[anchor=west](k9) at ([xshift=0em,yshift=-1.5em]k8.west){{[0,3]}};
+\node[anchor=west](k10) at ([xshift=0em,yshift=-1.5em]k9.west){{[1,4]}};
+\node[anchor=west](k11) at ([xshift=0em,yshift=-1.5em]k10.west){{[0,4]}};
+
+\node[anchor=west](b2) at ([xshift=0.2em,yshift=-1.7em]b1.west){{NN \& NP}};
+\node[anchor=west](b3) at ([xshift=0em,yshift=-1.5em]b2.west){{VV}};
+\node[anchor=west](b4) at ([xshift=0em,yshift=-1.5em]b3.west){{VV}};
+\node[anchor=west](b5) at ([xshift=0em,yshift=-1.5em]b4.west){{NN \& NP}};
+\node[anchor=west](b6) at ([xshift=0em,yshift=-1.5em]b5.west){{N/A}};
+\node[anchor=west](b7) at ([xshift=0em,yshift=-1.5em]b6.west){{N/A}};
+\node[anchor=west](b8) at ([xshift=0em,yshift=-1.5em]b7.west){{VP}};
+\node[anchor=west](b9) at ([xshift=0em,yshift=-1.5em]b8.west){{N/A}};
+\node[anchor=west](b10) at ([xshift=0em,yshift=-1.5em]b9.west){{VP}};
+\node[anchor=west](b11) at ([xshift=0em,yshift=-1.5em]b10.west){{IP({\redroot})}};
+
+\node[anchor=west](y2) at ([xshift=0.2em,yshift=-1.7em]y1.west){{猫}};
+\node[anchor=west](y3) at ([xshift=0em,yshift=-1.5em]y2.west){{喜欢}};
+\node[anchor=west](y4) at ([xshift=0em,yshift=-1.5em]y3.west){{吃}};
+\node[anchor=west](y5) at ([xshift=0em,yshift=-1.5em]y4.west){{鱼}};
+\node[anchor=west](y6) at ([xshift=0em,yshift=-1.5em]y5.west){{猫喜欢}};
+\node[anchor=west](y7) at ([xshift=0em,yshift=-1.5em]y6.west){{喜欢吃}};
+\node[anchor=west](y8) at ([xshift=0em,yshift=-1.5em]y7.west){{吃鱼}};
+\node[anchor=west](y9) at ([xshift=0em,yshift=-1.5em]y8.west){{猫喜欢吃}};
+\node[anchor=west](y10) at ([xshift=0em,yshift=-1.5em]y9.west){{喜欢吃鱼}};
+\node[anchor=west](y11) at ([xshift=0em,yshift=-1.5em]y10.west){{猫喜欢吃鱼}};
+}
+
+\draw[-] ([xshift=-0.3em,yshift=-0.3em]k1.north west) -- ([xshift=-0.3em,yshift=-10.7em]k1.south west);
+%\draw[-] ([xshift=-0.3em,yshift=-0.3em]b1.north west) -- ([xshift=-0.3em,yshift=-10.7em]b1.south west);
+%\draw[-] ([xshift=-0em,yshift=-0.3em]y1.north west) -- ([xshift=-0em,yshift=-10.7em]y1.south west);
+
+\end{tikzpicture}
+\end{center}
+
+}
\ No newline at end of file
--- a/Book/Chapter4/Figures/cut-different-positions-of-word-string.tex
+++ b/Book/Chapter4/Figures/cut-different-positions-of-word-string.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  基于串的解码 - 连续变量的匹配，复杂度增加
+\begin{center}
+\begin{tikzpicture}
+
+\begin{scope}
+{\scriptsize
+
+\node [anchor=west] (sw11) at (0,0) {阿都拉$_1$};
+\node [anchor=west] (sw12) at ([xshift=0.1em]sw11.east) {对$_2$};
+\node [anchor=west,fill=red!20] (sw13) at ([xshift=0.1em]sw12.east) {自己$_3$ 四$_4$\ 个$_5$\ 多$_6$\ 月$_7$\ 以来$_8$\ 的$_9$\ 施政$_{10}$\ 表现$_{11}$ 感到$_{12}$ };
+\node [anchor=west,fill=blue!20] (sw14) at ([xshift=0.2em]sw13.east) {满意$_{13}$};
+
+\node [anchor=north west] (sw21) at ([yshift=-0.3em]sw11.south west) {阿都拉$_1$};
+\node [anchor=west] (sw22) at ([xshift=0.1em]sw21.east) {对$_2$};
+\node [anchor=west,fill=red!20] (sw23) at ([xshift=0.1em]sw22.east) {自己$_3$ 四$_4$\ 个$_5$\ 多$_6$\ 月$_7$\ 以来$_8$\ 的$_9$\ 施政$_{10}$\ 表现$_{11}$};
+\node [anchor=west,fill=blue!20] (sw24) at ([xshift=0.2em]sw23.east) {感到$_{12}$ 满意$_{13}$};
+
+\node [anchor=north west] (sw31) at ([yshift=-0.3em]sw21.south west) {阿都拉$_1$};
+\node [anchor=west] (sw32) at ([xshift=0.1em]sw31.east) {对$_2$};
+\node [anchor=west,fill=red!20] (sw33) at ([xshift=0.1em]sw32.east) {自己$_3$ 四$_4$\ 个$_5$\ 多$_6$\ 月$_7$\ 以来$_8$\ 的$_9$\ 施政$_{10}$};
+\node [anchor=west,fill=blue!20] (sw34) at ([xshift=0.2em]sw33.east) {表现$_{11}$ 感到$_{12}$ 满意$_{13}$};
+
+\node [anchor=north] (dots) at ([yshift=-0.5em]sw33.south) {...};
+
+\node [anchor=north west] (sw41) at ([yshift=-1.8em]sw31.south west) {阿都拉$_1$};
+\node [anchor=west] (sw42) at ([xshift=0.1em]sw41.east) {对$_2$};
+\node [anchor=west,fill=red!20] (sw43) at ([xshift=0.1em]sw42.east) {自己$_3$ };
+\node [anchor=west,fill=blue!20] (sw44) at ([xshift=0.2em]sw43.east) {四$_4$\ 个$_5$\ 多$_6$\ 月$_7$\ 以来$_8$\ 的$_9$ 施政$_{10}$ 表现$_{11}$ 感到$_{12}$ 满意$_{13}$};
+
+\node [anchor=south] (label) at ([yshift=0.3em]sw13.north) {\footnotesize{在跨度[{\blue 0},{\blue 13}]上匹配``NP 对 NP VP''}};
+
+\node [anchor=north west,minimum size=1.2em,fill=red!20] (np) at ([yshift=-1.0em,xshift=0.3em]sw41.south west) {};
+\node [anchor=west] (nplabel) at (np.east) {NP(第二个)};
+\node [anchor=west,minimum size=1.2em,fill=blue!20] (vp) at ([xshift=1.0em]nplabel.east) {};
+\node [anchor=west] (vplabel) at (vp.east) {VP};
+
+}
+\end{scope}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/cyk-algorithm.tex
+++ b/Book/Chapter4/Figures/cyk-algorithm.tex
+%------------------------------------------------------------------------------------------------------------
+%%%  CYK解码
+% 看NiuTrans Manual
+\begin{center}
+\begin{tikzpicture}
+\tikzstyle{alignmentnode} = [rectangle,fill=blue!30,minimum size=0.45em,text=white,inner sep=0.1pt]
+\tikzstyle{selectnode} = [rectangle,fill=green!20,minimum height=1.5em,minimum width=1.5em,inner sep=1.2pt]
+\tikzstyle{srcnode} = [anchor=south west]
+\begin{scope}[scale=0.85]
+
+\node[srcnode] (c1) at (0,0) {\normalsize{\textbf{Function} CYK-Algorithm($\textbf{s},G$)}};
+\node[srcnode,anchor=north west] (c21) at ([xshift=1.5em,yshift=0.4em]c1.south west) {\normalsize{\textbf{fore} $j=0$ to $ J - 1$}};
+\node[srcnode,anchor=north west] (c22) at ([xshift=1.5em,yshift=0.4em]c21.south west) {\normalsize{$span[j,j+1 ]$.Add($A \to a \in G$)}};
+\node[srcnode,anchor=north west] (c3) at ([xshift=-1.5em,yshift=0.4em]c22.south west) {\normalsize{\textbf{for} $l$ = 1 to $J$}};
+\node[srcnode,anchor=west] (c31) at ([xshift=6em]c3.east) {\normalsize{// length of span}};
+\node[srcnode,anchor=north west] (c4) at ([xshift=1.5em,yshift=0.4em]c3.south west) {\normalsize{\textbf{for} $j$ = 0 to $J-l$}};
+\node[srcnode,anchor=north west] (c41) at ([yshift=0.4em]c31.south west) {\normalsize{// beginning of span}};
+\node[srcnode,anchor=north west] (c5) at ([xshift=1.5em,yshift=0.4em]c4.south west) {\normalsize{\textbf{for} $k$ = $j$ to $j+l$}};
+\node[srcnode,anchor=north west] (c51) at ([yshift=0.4em]c41.south west) {\normalsize{// partition of span}};
+\node[srcnode,anchor=north west] (c6) at ([xshift=1.5em,yshift=0.4em]c5.south west) {\normalsize{$hypos$ = Compose($span[j, k], span[k, j+l]$)}};
+\node[srcnode,anchor=north west] (c7) at ([yshift=0.4em]c6.south west) {\normalsize{$span[j, j+l]$.Update($hypos$)}};
+\node[srcnode,anchor=north west] (c8) at ([xshift=-4.5em,yshift=0.4em]c7.south west) {\normalsize{\textbf{return} $span[0, J]$}};
+
+\node[anchor=west] (c9) at ([xshift=-3.2em,yshift=1.7em]c1.west) {\small{\textrm{参数：}\textbf{s}为输入字符串。$G$为输入CFG。$J$为待分析字符串长度。}};
+\node[anchor=west] (c10) at ([xshift=0em,yshift=1.3em]c9.west) {\small{\textrm{输出：字符串全部可能的语法分析结果}}};
+\node[anchor=west] (c11) at ([xshift=0em,yshift=1.3em]c10.west) {\small{\textrm{输入：符合乔姆斯基范式的待分析字符串和一个上下文无关文法（CFG）}}};
+
+
+
+\begin{pgfonlayer}{background}
+\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=blue!10!white] [fit = (c1) (c21) (c3) (c6) (c7) (c8) (c11)] (gl1) {};
+\end{pgfonlayer}
+
+\end{scope}
+
+\end{tikzpicture}
+\end{center}
+
--- a/Book/Chapter4/Figures/derivation-consist-of-bilingual-phrase.tex
+++ b/Book/Chapter4/Figures/derivation-consist-of-bilingual-phrase.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 融合用双语短语描述翻译
+\begin{center}
+\begin{tikzpicture}
+
+\begin{scope}[minimum height = 18pt]
+{\scriptsize
+
+\node[anchor=north,fill=green!20] (s1) at (0,0) {进口};
+\node [anchor=north,fill=red!20] (s2) at ([xshift=4em,yshift=0em]s1.north) {大幅度};
+\node[anchor=north,fill=blue!20] (s3) at ([xshift=4.5em,yshift=0em]s2.north) {下降 了};
+
+\node[anchor=west,fill=green!20] (t1) at ([xshift=0em,yshift=-4em]s1.west) {the imports have};
+\node[anchor=north,fill=red!20] (t2) at ([xshift=8em,yshift=0em]t1.north) {drastically};
+\node[anchor=north,fill=blue!20] (t3) at ([xshift=5.7em,yshift=0em]t2.north) {fallen};
+
+\path[<->, thick] (s1.south) edge (t1.north);
+\path[<->, thick] (s2.south) edge (t2.north);
+\path[<->, thick] (s3.south) edge (t3.north);
+}
+
+\node[anchor=south] (s0) at ([xshift=-2em,yshift=0em]s1.south) {\textbf{s:}};
+\node[anchor=east] (t0) at ([xshift=0em,yshift=-2.85em]s0.east) {\textbf{t:}};
+
+\node[anchor=south,inner sep=0pt,yshift=-0.3em] (sp1) at (s1.north) {\scriptsize{$\bar{s}_{a_1 = 1}$}};
+\node[anchor=south,inner sep=0pt,yshift=-0.3em] (sp2) at (s2.north) {\scriptsize{$\bar{s}_{a_2 = 2}$}};
+\node[anchor=south,inner sep=0pt,yshift=-0.3em] (sp3) at (s3.north) {\scriptsize{$\bar{s}_{a_3 = 3}$}};
+\node[anchor=north,inner sep=0pt,yshift=0.3em] (tp1) at (t1.south) {\scriptsize{$\bar{t}_1$}};
+\node[anchor=north,inner sep=0pt,yshift=0.3em] (tp2) at (t2.south) {\scriptsize{$\bar{t}_2$}};
+\node[anchor=north,inner sep=0pt,yshift=0.3em] (tp3) at (t3.south) {\scriptsize{$\bar{t}_3$}};
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/derivation-of-hierarchical-phrase-and-tree-structure-model.tex
+++ b/Book/Chapter4/Figures/derivation-of-hierarchical-phrase-and-tree-structure-model.tex
+%------------------------------------------------------------------------------------------------------------
+%%%  句法树(层次短语)
+\begin{tikzpicture}
+{\scriptsize
+\begin{scope}[sibling distance=0pt, level distance = 27pt]
+{\scriptsize
+\Tree[.\node(n1){\textbf{S}};
+        [.\node(n2){\textbf{S}};
+	        [.\node(n3){\textbf{S}};
+		        [.\node(n4){\textbf{S}};
+		            [.\node(n5){\textbf{X}}; \node(cw1){但}; ]
+		        ]
+		        [.\node(n6){\textbf{X}}; \node(cw2){美国}; ]
+		    ]
+	        [.\node(n7){\textbf{X}};
+	            [. \node(cw3){并没有}; ]
+	            [. \node(cw4){执行}; ]
+	        ]
+        ]
+        [.\node(n8){\textbf{X}};
+            [. \node(cw5){世贸}; ]
+            [.\node(n9){\textbf{X}};
+                [. \node(cw6){组织}; ]
+                [. \node(cw7){的}; ]
+            ]
+            [. \node(cw8){裁决}; ]
+        ]
+     ]
+
+\node[circle, inner sep = 0.5pt, fill=blue!90!white, anchor = west] (l1) at ([xshift=-0.3em]n1.north east) {{\color{white} \tiny{$r_2$}}};
+\node[circle, inner sep = 0.5pt, fill=blue!90!white, anchor = east] (l2) at ([xshift=0.3em]n2.north west) {{\color{white} \tiny{$r_2$}}};
+\node[circle, inner sep = 0.5pt, fill=blue!90!white, anchor = east] (l2) at ([xshift=0.3em]n3.north west) {{\color{white} \tiny{$r_2$}}};
+\node[circle, inner sep = 0.5pt, fill=blue!90!white, anchor = east] (l2) at ([xshift=0.3em]n4.north west) {{\color{white} \tiny{$r_1$}}};{\node[circle, inner sep = 0.5pt, fill=blue!90!white, anchor = east] (l2) at ([xshift=0.3em]n5.north west) {{\color{white} \tiny{$r_3$}}};}
+\node[circle, inner sep = 0.5pt, fill=blue!90!white, anchor = west] (l1) at ([xshift=-0.3em]n6.south east) {{\color{white} \tiny{$r_4$}}};
+\node[circle, inner sep = 0.5pt, fill=blue!90!white, anchor = west] (l1) at ([xshift=-0.3em]n7.north east) {{\color{white} \tiny{$r_5$}}};
+\node[circle, inner sep = 0.5pt, fill=blue!90!white, anchor = west] (l1) at ([xshift=-0.3em]n8.north east) {{\color{white} \tiny{$r_6$}}};
+\node[circle, inner sep = 0.5pt, fill=blue!90!white, anchor = west] (l1) at ([xshift=-0.3em]n9.north east) {{\color{white} \tiny{$r_7$}}};
+
+}
+\end{scope}
+
+\begin{scope}[xshift = 1.8in, yshift = 0.1in]
+\node (rules) {\textbf{层次短语翻译规则:}};
+\draw[-] (rules.south west)--([xshift=1.8in]rules.south west);
+
+\node[anchor=north west] (r1) at ([yshift=-0.2em]rules.south west) {$r_1$};
+\node[anchor=west] (rc1) at ([xshift=0.0em]r1.east) {$\textrm{S} \; \to \; \langle\ \textrm{X}_1, \; \; \textrm{X}_1\ \rangle$};
+
+\node[anchor=north west] (r2) at ([yshift=-0.4em]r1.south west) {$r_2$};
+\node[anchor=west] (rc2) at ([xshift=0em]r2.east) {$\textrm{S} \; \to \; \langle\  \textrm{S}_1 \; \textrm{X}_2, \; \; \textrm{S}_1 \; \textrm{X}_2\ \rangle$};
+
+\node[anchor=north west] (r3) at ([yshift=-0.4em]r2.south west) {$r_3$};
+\node[anchor=west] (rc3) at ([xshift=0em]r3.east) {$\textrm{X} \; \to \; \langle\  \text{但}, \; \; \text{but}\ \rangle$};
+
+\node[anchor=north west] (r4) at ([yshift=-0.4em]r3.south west) {$r_4$};
+\node[anchor=west] (rc4) at ([xshift=0em]r4.east) {$\textrm{X} \; \to \; \langle\  \text{美国}, \; \; \text{the U.S.}\ \rangle$};
+
+\node[anchor=north west] (r5) at ([yshift=-0.4em]r4.south west) {$r_5$};
+\node[anchor=west] (rc5) at ([xshift=0em]r5.east) {$\textrm{X} \; \to \; \langle\  \text{并没有} \; \text{执行}, \; \; \text{}$};
+
+\node[anchor=north west] (r52) at ([yshift=-0.4em]r5.south west) {{\color{white} $r_5$}};
+\node[anchor=west] (rc52) at ([xshift=2.9em]r52.east) {$\text{has not implemented}\ \rangle$};
+
+\node[anchor=north west] (r6) at ([yshift=-0.4em]r52.south west) {$r_6$};
+\node[anchor=west] (rc6) at ([xshift=0em]r6.east) {$\textrm{X} \; \to \; \langle\ \text{世贸} \; \textrm{X}_1 \; \text{裁决}, $};
+
+\node[anchor=north west] (r61) at ([yshift=-0.4em]r6.south west) {{\color{white} $r_6$}};
+\node[anchor=west] (rc61) at ([xshift=2.9em]r61.east) {$\text{the decision} \; \textrm{X}_1 \; \text{the WTO}\ \rangle$};
+
+\node[anchor=north west] (r7) at ([yshift=-0.4em]r61.south west) {$r_7$};
+\node[anchor=west] (rc7) at ([xshift=0em]r7.east) {$\textrm{X} \; \to \; \langle\ \text{组织 的}, \; \; \text{of}\ \rangle$};
+\end{scope}
+
+\node[anchor=south] (l1) at ([xshift=-9em,yshift=1em]rules.north) {\normalsize{${d = r_3}{\circ r_1}{ \circ r_4}{ \circ r_2}{ \circ r_5}{ \circ r_2}{ \circ r_7}{ \circ r_6}{ \circ r_2}$}};
+
+}
+\end{tikzpicture}
--- a/Book/Chapter4/Figures/different-representations-of-syntax-tree.tex
+++ b/Book/Chapter4/Figures/different-representations-of-syntax-tree.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  树结构的表示
+\begin{tikzpicture}
+{\footnotesize
+\begin{scope}[sibling distance=3pt, level distance = 22pt]
+\Tree[.S [.NN ] [.VP [.AD ] [.VP [.VV ] [.AS ] ] ] ]
+\end{scope}
+	
+\begin{scope}[xshift=1in,yshift=0.25in]
+\node [anchor=north west,align=left] (string1) at (0,0) {[S \\\hspace{1em}NN\\\hspace{1em}VP[\\\hspace{2.5em}AD \\\hspace{2.5em}VP[\\\hspace{4em}VV \\\hspace{4em}AS]]]};
+\end{scope}
+	
+\begin{scope}[xshift=2.5in,yshift=-0.80in]
+\node [anchor=west,align=left] (string2) at (0,0) {(S NN VP(AD \\ VP(VV AS)))};
+\end{scope}
+	
+\node [anchor=north west] (cap1) at (-1.5em,-1in) {\scriptsize{(a) 树状表示}};
+\node [anchor=west] (cap2) at ([xshift=0.5in]cap1.east) {\scriptsize{(b) 序列表示(缩进)}};
+\node [anchor=west] (cap3) at ([xshift=0.5in]cap2.east) {\scriptsize{(c) 序列表示}};
+}
+\end{tikzpicture}
\ No newline at end of file
--- a/Book/Chapter4/Figures/example-of-cyk-algorithm-execution-label.tex
+++ b/Book/Chapter4/Figures/example-of-cyk-algorithm-execution-label.tex
+%------------------------------------------------------------------------------------------------------------
+%%%  基于树的解码方法 - chart-based decoding
+\begin{center}
+\begin{tikzpicture}\footnotesize
+\begin{scope}[scale=0.2]
+\node[anchor=south east] (g1) at (0,0) {\small{$\textrm{S} \to \textrm{AB}\ \ \ \textrm{A} \to \textrm{CD}\  \vert \  \textrm{CF}\ \ \ \textrm{B} \to \textrm{c}\  \vert \  \textrm{BE}$}};
+\node[anchor=north west] (g2) at ([yshift=0.3em]g1.south west) {\small{$\textrm{C} \to \textrm{a}\ \ \ \  \textrm{D} \to \textrm{b}\ \ \ \ \textrm{E} \to \textrm{c}\ \ \ \ \textrm{F} \to \textrm{AD}$}};
+
+\begin{pgfonlayer}{background}
+\node [rectangle,inner sep=0.1em,rounded corners=1pt,fill=green!10,drop shadow,draw=ugreen] [fit = (g1) (g2)] (gl1) {};
+\end{pgfonlayer}
+\end{scope}
+\end{tikzpicture}
+\end{center}
--- a/Book/Chapter4/Figures/example-of-cyk-algorithm-execution.tex
+++ b/Book/Chapter4/Figures/example-of-cyk-algorithm-execution.tex
+%------------------------------------------------------------------------------------------------------------
+%%%  CYK解码
+\begin{tikzpicture}\scriptsize
+\tikzstyle{alignmentnode} = [rectangle,fill=blue!20,minimum size=0.5em,text=white,inner sep=0.1pt]
+\tikzstyle{selectnode} = [rectangle,fill=green!20,minimum height=1.5em,minimum width=1.5em,inner sep=1.2pt]
+\tikzstyle{srcnode} = [anchor=south west]
+\tikzstyle{chartnode}=[rectangle,minimum size=1.3em,draw]
+%图1
+\begin{scope}[scale=0.5]
+
+\node [anchor=east] (s1) at (0,0) {a};
+\node [anchor=north west] (s2) at ([yshift=-2.5em]s1.south west) {a};
+\node [anchor=north west] (s3) at ([yshift=-2.4em]s2.south west) {b};
+\node [anchor=north west] (s4) at ([yshift=-2.3em]s3.south west) {b};
+\node [anchor=north west] (s5) at ([yshift=-2.2em]s4.south west) {c};
+
+\node [alignmentnode,anchor=west] (cell11) at ([xshift=1.9em]s1.east) {};
+\node [alignmentnode,anchor=west] (cell21) at ([xshift=1.9em]s2.east) {};
+\node [alignmentnode,anchor=west] (cell22) at ([xshift=3.5em]cell21.east) {};
+\node [alignmentnode,anchor=west] (cell31) at ([xshift=2em]s3.east) {};
+\node [alignmentnode,anchor=west] (cell32) at ([xshift=3.5em]cell31.east) {};
+\node [alignmentnode,anchor=west] (cell33) at ([xshift=3.5em]cell32.east) {};
+\node [alignmentnode,anchor=west] (cell41) at ([xshift=1.9em]s4.east) {};
+\node [alignmentnode,anchor=west] (cell42) at ([xshift=3.5em]cell41.east) {};
+\node [alignmentnode,anchor=west] (cell43) at ([xshift=3.5em]cell42.east) {};
+\node [alignmentnode,anchor=west] (cell44) at ([xshift=3.5em]cell43.east) {};
+\node [alignmentnode,anchor=west] (cell51) at ([xshift=1.9em]s5.east) {};
+\node [alignmentnode,anchor=west] (cell52) at ([xshift=3.5em]cell51.east) {};
+\node [alignmentnode,anchor=west] (cell53) at ([xshift=3.5em]cell52.east) {};
+\node [alignmentnode,anchor=west] (cell54) at ([xshift=3.5em]cell53.east) {};
+\node [alignmentnode,anchor=west] (cell55) at ([xshift=3.5em]cell54.east) {};
+
+\node [anchor=north] (l1) at ([yshift=-1em]cell51.south) {\tiny{$l$=1}};
+\node [anchor=north] (l2) at ([yshift=-1em]cell52.south) {\tiny{$l$=2}};
+\node [anchor=north] (l3) at ([yshift=-1em]cell53.south) {\tiny{$l$=3}};
+\node [anchor=north] (l4) at ([yshift=-1em]cell54.south) {\tiny{$l$=4}};
+\node [anchor=north] (l5) at ([yshift=-1em]cell55.south) {\tiny{$l$=5}};
+\node [anchor=north] (caption1) at ([xshift=0.0em,yshift=0.0em]l5.south) {(a)};
+
+\node [anchor=center] (y1) at ([xshift=-2.1em,yshift=2em]cell11.center) {\tiny{\blue 0}};
+\node [anchor=center] (y2) at ([xshift=-2.1em,yshift=2em]cell21.center) {\tiny{\blue 1}};
+\node [anchor=center] (y3) at ([xshift=-2.1em,yshift=2em]cell31.center) {\tiny{\blue 2}};
+\node [anchor=center] (y4) at ([xshift=-2.1em,yshift=2em]cell41.center) {\tiny{\blue 3}};
+\node [anchor=center] (y5) at ([xshift=-2.1em,yshift=2em]cell51.center) {\tiny{\blue 4}};
+\node [anchor=center] (y6) at ([xshift=-2.1em,yshift=-2em]cell51.center) {\tiny{\blue 5}};
+
+\node [anchor=west] (num) at ([xshift=22.3em,yshift=2em]s1.east) {\scriptsize{序号}};
+\node [anchor=west] (kua) at ([xshift=0em]num.east) {\scriptsize{跨度}};
+\node [anchor=west] (tui) at ([xshift=0.4em]kua.east) {\scriptsize{推导}};
+
+\draw[-] ([yshift=-0.1em]num.south west)--([xshift=17em,yshift=-0.1em]num.south west);
+
+{
+\node [anchor=west] (n1) at ([xshift=1.4em,yshift=-1.5em]num.south west) {\scriptsize{1}};
+\node [anchor=west] (k1) at ([yshift=-1.8em]kua.south west) {\scriptsize{[{\blue 0},{\blue 1}]}};
+\node [anchor=west] (t1) at ([xshift=-0.8em,yshift=-1.5em]tui.south west) {\scriptsize{C $\to$ a}};
+\node [anchor=center,selectnode,fill=red!20] (alig11) at (cell11.center) {\tiny{C}};
+}
+\end{scope}
+
+%图2
+\begin{scope}[xshift=22.5em,scale=0.5]
+
+\node [anchor=east] (s1) at (0,0) {a};
+\node [anchor=north west] (s2) at ([yshift=-2.5em]s1.south west) {a};
+\node [anchor=north west] (s3) at ([yshift=-2.4em]s2.south west) {b};
+\node [anchor=north west] (s4) at ([yshift=-2.3em]s3.south west) {b};
+\node [anchor=north west] (s5) at ([yshift=-2.2em]s4.south west) {c};
+
+\node [alignmentnode,anchor=west] (cell11) at ([xshift=1.9em]s1.east) {};
+\node [alignmentnode,anchor=west] (cell21) at ([xshift=1.9em]s2.east) {};
+\node [alignmentnode,anchor=west] (cell22) at ([xshift=3.5em]cell21.east) {};
+\node [alignmentnode,anchor=west] (cell31) at ([xshift=2em]s3.east) {};
+\node [alignmentnode,anchor=west] (cell32) at ([xshift=3.5em]cell31.east) {};
+\node [alignmentnode,anchor=west] (cell33) at ([xshift=3.5em]cell32.east) {};
+\node [alignmentnode,anchor=west] (cell41) at ([xshift=1.9em]s4.east) {};
+\node [alignmentnode,anchor=west] (cell42) at ([xshift=3.5em]cell41.east) {};
+\node [alignmentnode,anchor=west] (cell43) at ([xshift=3.5em]cell42.east) {};
+\node [alignmentnode,anchor=west] (cell44) at ([xshift=3.5em]cell43.east) {};
+\node [alignmentnode,anchor=west] (cell51) at ([xshift=1.9em]s5.east) {};
+\node [alignmentnode,anchor=west] (cell52) at ([xshift=3.5em]cell51.east) {};
+\node [alignmentnode,anchor=west] (cell53) at ([xshift=3.5em]cell52.east) {};
+\node [alignmentnode,anchor=west] (cell54) at ([xshift=3.5em]cell53.east) {};
+\node [alignmentnode,anchor=west] (cell55) at ([xshift=3.5em]cell54.east) {};
+
+\node [anchor=north] (l1) at ([yshift=-1em]cell51.south) {\tiny{$l$=1}};
+\node [anchor=north] (l2) at ([yshift=-1em]cell52.south) {\tiny{$l$=2}};
+\node [anchor=north] (l3) at ([yshift=-1em]cell53.south) {\tiny{$l$=3}};
+\node [anchor=north] (l4) at ([yshift=-1em]cell54.south) {\tiny{$l$=4}};
+\node [anchor=north] (l5) at ([yshift=-1em]cell55.south) {\tiny{$l$=5}};
+\node [anchor=north] (caption2) at ([xshift=0.0em,yshift=0.0em]l5.south) {(b)};
+
+\node [anchor=center] (y1) at ([xshift=-2.1em,yshift=2em]cell11.center) {\tiny{\blue 0}};
+\node [anchor=center] (y2) at ([xshift=-2.1em,yshift=2em]cell21.center) {\tiny{\blue 1}};
+\node [anchor=center] (y3) at ([xshift=-2.1em,yshift=2em]cell31.center) {\tiny{\blue 2}};
+\node [anchor=center] (y4) at ([xshift=-2.1em,yshift=2em]cell41.center) {\tiny{\blue 3}};
+\node [anchor=center] (y5) at ([xshift=-2.1em,yshift=2em]cell51.center) {\tiny{\blue 4}};
+\node [anchor=center] (y6) at ([xshift=-2.1em,yshift=-2em]cell51.center) {\tiny{\blue 5}};
+
+\node [anchor=west] (num) at ([xshift=22.3em,yshift=2em]s1.east) {\scriptsize{序号}};
+\node [anchor=west] (kua) at ([xshift=0em]num.east) {\scriptsize{跨度}};
+\node [anchor=west] (tui) at ([xshift=0.4em]kua.east) {\scriptsize{推导}};
+
+\draw[-] ([yshift=-0.1em]num.south west)--([xshift=17em,yshift=-0.1em]num.south west);
+
+{
+\node [anchor=west] (n1) at ([xshift=1.4em,yshift=-1.5em]num.south west) {\scriptsize{1}};
+\node [anchor=west] (k1) at ([yshift=-1.8em]kua.south west) {\scriptsize{[{\blue 0},{\blue 1}]}};
+\node [anchor=west] (t1) at ([xshift=-0.8em,yshift=-1.5em]tui.south west) {\scriptsize{C $\to$ a}};
+\node [anchor=center,selectnode,fill=red!20] (alig11) at (cell11.center) {\tiny{C}};
+}
+
+{
+\node [anchor=center] (n2) at ([yshift=-2.2em]n1.center) {\scriptsize{2}};
+\node [anchor=center] (k2) at ([yshift=-2.2em]k1.center) {\scriptsize{[{\blue 1},{\blue 2}]}};
+\node [anchor=west] (t2) at ([yshift=-2.2em]t1.west) {\scriptsize{C $\to$ a}};
+\node [anchor=center,selectnode,fill=red!20] (alig21) at (cell21.center) {\tiny{C}};
+}
+
+{
+\node [anchor=center] (n3) at ([yshift=-2.2em]n2.center) {\scriptsize{3}};
+\node [anchor=center] (k3) at ([yshift=-2.2em]k2.center) {\scriptsize{[{\blue 2},{\blue 3}]}};
+\node [anchor=west] (t3) at ([yshift=-2.2em]t2.west) {\scriptsize{D $\to$ b}};
+\node [anchor=center,selectnode,fill=red!20] (alig31) at (cell31.center) {\tiny{D}};
+}
+
+{
+\node [anchor=center] (n4) at ([yshift=-2.2em]n3.center) {\scriptsize{4}};
+\node [anchor=center] (k4) at ([yshift=-2.2em]k3.center) {\scriptsize{[{\blue 3},{\blue 4}]}};
+\node [anchor=west] (t4) at ([yshift=-2.2em]t3.west) {\scriptsize{D $\to$ b}};
+\node [anchor=center,selectnode,fill=red!20] (alig41) at (cell41.center) {\tiny{D}};
+}
+
+{
+\node [anchor=center] (n5) at ([yshift=-2.2em]n4.center) {\scriptsize{5}};
+\node [anchor=center] (k5) at ([yshift=-2.2em]k4.center) {\scriptsize{[{\blue 4},{\blue 5}]}};
+\node [anchor=west] (t5) at ([yshift=-2.2em]t4.west) {\scriptsize{B $\to$ c , }};
+\node [anchor=east] (t52) at ([xshift=-1em,yshift=-2em]t5.east) {\scriptsize{E $\to$ c}};
+\node [anchor=center,selectnode,fill=red!20] (alig51) at (cell51.center) {\tiny{B,E}};
+}
+\end{scope}
+
+%图3
+\begin{scope}[yshift=-16.0em,scale=0.5]
+
+\node [anchor=east] (s1) at (0,0) {a};
+\node [anchor=north west] (s2) at ([yshift=-2.5em]s1.south west) {a};
+\node [anchor=north west] (s3) at ([yshift=-2.4em]s2.south west) {b};
+\node [anchor=north west] (s4) at ([yshift=-2.3em]s3.south west) {b};
+\node [anchor=north west] (s5) at ([yshift=-2.2em]s4.south west) {c};
+
+\node [alignmentnode,anchor=west] (cell11) at ([xshift=1.9em]s1.east) {};
+\node [alignmentnode,anchor=west] (cell21) at ([xshift=1.9em]s2.east) {};
+\node [alignmentnode,anchor=west] (cell22) at ([xshift=3.5em]cell21.east) {};
+\node [alignmentnode,anchor=west] (cell31) at ([xshift=2em]s3.east) {};
+\node [alignmentnode,anchor=west] (cell32) at ([xshift=3.5em]cell31.east) {};
+\node [alignmentnode,anchor=west] (cell33) at ([xshift=3.5em]cell32.east) {};
+\node [alignmentnode,anchor=west] (cell41) at ([xshift=1.9em]s4.east) {};
+\node [alignmentnode,anchor=west] (cell42) at ([xshift=3.5em]cell41.east) {};
+\node [alignmentnode,anchor=west] (cell43) at ([xshift=3.5em]cell42.east) {};
+\node [alignmentnode,anchor=west] (cell44) at ([xshift=3.5em]cell43.east) {};
+\node [alignmentnode,anchor=west] (cell51) at ([xshift=1.9em]s5.east) {};
+\node [alignmentnode,anchor=west] (cell52) at ([xshift=3.5em]cell51.east) {};
+\node [alignmentnode,anchor=west] (cell53) at ([xshift=3.5em]cell52.east) {};
+\node [alignmentnode,anchor=west] (cell54) at ([xshift=3.5em]cell53.east) {};
+\node [alignmentnode,anchor=west] (cell55) at ([xshift=3.5em]cell54.east) {};
+
+\node [anchor=north] (l1) at ([yshift=-1em]cell51.south) {\tiny{$l$=1}};
+\node [anchor=north] (l2) at ([yshift=-1em]cell52.south) {\tiny{$l$=2}};
+\node [anchor=north] (l3) at ([yshift=-1em]cell53.south) {\tiny{$l$=3}};
+\node [anchor=north] (l4) at ([yshift=-1em]cell54.south) {\tiny{$l$=4}};
+\node [anchor=north] (l5) at ([yshift=-1em]cell55.south) {\tiny{$l$=5}};
+\node [anchor=north] (caption3) at ([xshift=0.0em,yshift=0.0em]l5.south) {(c)};
+
+\node [anchor=center] (y1) at ([xshift=-2.1em,yshift=2em]cell11.center) {\tiny{\blue 0}};
+\node [anchor=center] (y2) at ([xshift=-2.1em,yshift=2em]cell21.center) {\tiny{\blue 1}};
+\node [anchor=center] (y3) at ([xshift=-2.1em,yshift=2em]cell31.center) {\tiny{\blue 2}};
+\node [anchor=center] (y4) at ([xshift=-2.1em,yshift=2em]cell41.center) {\tiny{\blue 3}};
+\node [anchor=center] (y5) at ([xshift=-2.1em,yshift=2em]cell51.center) {\tiny{\blue 4}};
+\node [anchor=center] (y6) at ([xshift=-2.1em,yshift=-2em]cell51.center) {\tiny{\blue 5}};
+
+\node [anchor=west] (num) at ([xshift=22.3em,yshift=2em]s1.east) {\scriptsize{序号}};
+\node [anchor=west] (kua) at ([xshift=0em]num.east) {\scriptsize{跨度}};
+\node [anchor=west] (tui) at ([xshift=0.4em]kua.east) {\scriptsize{推导}};
+
+\draw[-] ([yshift=-0.1em]num.south west)--([xshift=17em,yshift=-0.1em]num.south west);
+
+{
+\node [anchor=west] (n1) at ([xshift=1.4em,yshift=-1.5em]num.south west) {\scriptsize{1}};
+\node [anchor=west] (k1) at ([yshift=-1.8em]kua.south west) {\scriptsize{[{\blue 0},{\blue 1}]}};
+\node [anchor=west] (t1) at ([xshift=-0.8em,yshift=-1.5em]tui.south west) {\scriptsize{C $\to$ a}};
+\node [anchor=center,selectnode,fill=red!20] (alig11) at (cell11.center) {\tiny{C}};
+}
+
+{
+\node [anchor=center] (n2) at ([yshift=-2.2em]n1.center) {\scriptsize{2}};
+\node [anchor=center] (k2) at ([yshift=-2.2em]k1.center) {\scriptsize{[{\blue 1},{\blue 2}]}};
+\node [anchor=west] (t2) at ([yshift=-2.2em]t1.west) {\scriptsize{C $\to$ a}};
+\node [anchor=center,selectnode,fill=red!20] (alig21) at (cell21.center) {\tiny{C}};
+}
+
+{
+\node [anchor=center] (n3) at ([yshift=-2.2em]n2.center) {\scriptsize{3}};
+\node [anchor=center] (k3) at ([yshift=-2.2em]k2.center) {\scriptsize{[{\blue 2},{\blue 3}]}};
+\node [anchor=west] (t3) at ([yshift=-2.2em]t2.west) {\scriptsize{D $\to$ b}};
+\node [anchor=center,selectnode,fill=red!20] (alig31) at (cell31.center) {\tiny{D}};
+}
+
+{
+\node [anchor=center] (n4) at ([yshift=-2.2em]n3.center) {\scriptsize{4}};
+\node [anchor=center] (k4) at ([yshift=-2.2em]k3.center) {\scriptsize{[{\blue 3},{\blue 4}]}};
+\node [anchor=west] (t4) at ([yshift=-2.2em]t3.west) {\scriptsize{D $\to$ b}};
+\node [anchor=center,selectnode,fill=red!20] (alig41) at (cell41.center) {\tiny{D}};
+}
+
+{
+\node [anchor=center] (n5) at ([yshift=-2.2em]n4.center) {\scriptsize{5}};
+\node [anchor=center] (k5) at ([yshift=-2.2em]k4.center) {\scriptsize{[{\blue 4},{\blue 5}]}};
+\node [anchor=west] (t5) at ([yshift=-2.2em]t4.west) {\scriptsize{B $\to$ c , }};
+\node [anchor=east] (t52) at ([xshift=-1em,yshift=-2em]t5.east) {\scriptsize{E $\to$ c}};
+\node [anchor=center,selectnode,fill=red!20] (alig51) at (cell51.center) {\tiny{B,E}};
+}
+
+{
+\node [anchor=center] (n6) at ([yshift=-4em]n5.center) {\scriptsize{6}};
+\node [anchor=center] (k6) at ([yshift=-4em]k5.center) {\scriptsize{[{\blue 0},{\blue 2}]}};
+\node [anchor=west] (t6) at ([xshift=0.2em,yshift=-4em]t5.west) {\scriptsize{none}};
+\node [anchor=center,selectnode,fill=red!20] (alig22) at (cell22.center) {\tiny{}};
+}
+
+{
+\node [anchor=center] (n7) at ([yshift=-2.2em]n6.center) {\scriptsize{7}};
+\node [anchor=center] (k7) at ([yshift=-2.2em]k6.center) {\scriptsize{[{\blue 1},{\blue 3}]}};
+\node [anchor=west] (t7) at ([yshift=-2.2em]t6.west) {\scriptsize{A $\to$ CD}};
+\node [anchor=center,selectnode,fill=red!20] (alig32) at (cell32.center) {\tiny{A}};
+}
+\end{scope}
+
+
+%图4
+\begin{scope}[xshift=22.5em,yshift=-16.0em,scale=0.5]
+
+\node [anchor=east] (s1) at (0,0) {a};
+\node [anchor=north west] (s2) at ([yshift=-2.5em]s1.south west) {a};
+\node [anchor=north west] (s3) at ([yshift=-2.4em]s2.south west) {b};
+\node [anchor=north west] (s4) at ([yshift=-2.3em]s3.south west) {b};
+\node [anchor=north west] (s5) at ([yshift=-2.2em]s4.south west) {c};
+
+\node [alignmentnode,anchor=west] (cell11) at ([xshift=1.9em]s1.east) {};
+\node [alignmentnode,anchor=west] (cell21) at ([xshift=1.9em]s2.east) {};
+\node [alignmentnode,anchor=west] (cell22) at ([xshift=3.5em]cell21.east) {};
+\node [alignmentnode,anchor=west] (cell31) at ([xshift=2em]s3.east) {};
+\node [alignmentnode,anchor=west] (cell32) at ([xshift=3.5em]cell31.east) {};
+\node [alignmentnode,anchor=west] (cell33) at ([xshift=3.5em]cell32.east) {};
+\node [alignmentnode,anchor=west] (cell41) at ([xshift=1.9em]s4.east) {};
+\node [alignmentnode,anchor=west] (cell42) at ([xshift=3.5em]cell41.east) {};
+\node [alignmentnode,anchor=west] (cell43) at ([xshift=3.5em]cell42.east) {};
+\node [alignmentnode,anchor=west] (cell44) at ([xshift=3.5em]cell43.east) {};
+\node [alignmentnode,anchor=west] (cell51) at ([xshift=1.9em]s5.east) {};
+\node [alignmentnode,anchor=west] (cell52) at ([xshift=3.5em]cell51.east) {};
+\node [alignmentnode,anchor=west] (cell53) at ([xshift=3.5em]cell52.east) {};
+\node [alignmentnode,anchor=west] (cell54) at ([xshift=3.5em]cell53.east) {};
+\node [alignmentnode,anchor=west] (cell55) at ([xshift=3.5em]cell54.east) {};
+
+\node [anchor=north] (l1) at ([yshift=-1em]cell51.south) {\tiny{$l$=1}};
+\node [anchor=north] (l2) at ([yshift=-1em]cell52.south) {\tiny{$l$=2}};
+\node [anchor=north] (l3) at ([yshift=-1em]cell53.south) {\tiny{$l$=3}};
+\node [anchor=north] (l4) at ([yshift=-1em]cell54.south) {\tiny{$l$=4}};
+\node [anchor=north] (l5) at ([yshift=-1em]cell55.south) {\tiny{$l$=5}};
+\node [anchor=north] (caption4) at ([xshift=0.0em,yshift=0.0em]l5.south) {(d)};
+
+\node [anchor=center] (y1) at ([xshift=-2.1em,yshift=2em]cell11.center) {\tiny{\blue 0}};
+\node [anchor=center] (y2) at ([xshift=-2.1em,yshift=2em]cell21.center) {\tiny{\blue 1}};
+\node [anchor=center] (y3) at ([xshift=-2.1em,yshift=2em]cell31.center) {\tiny{\blue 2}};
+\node [anchor=center] (y4) at ([xshift=-2.1em,yshift=2em]cell41.center) {\tiny{\blue 3}};
+\node [anchor=center] (y5) at ([xshift=-2.1em,yshift=2em]cell51.center) {\tiny{\blue 4}};
+\node [anchor=center] (y6) at ([xshift=-2.1em,yshift=-2em]cell51.center) {\tiny{\blue 5}};
+
+\node [anchor=west] (num) at ([xshift=22.3em,yshift=2em]s1.east) {\scriptsize{序号}};
+\node [anchor=west] (kua) at ([xshift=0em]num.east) {\scriptsize{跨度}};
+\node [anchor=west] (tui) at ([xshift=0.4em]kua.east) {\scriptsize{推导}};
+
+\draw[-] ([yshift=-0.1em]num.south west)--([xshift=17em,yshift=-0.1em]num.south west);
+
+{
+\node [anchor=west] (n1) at ([xshift=1.4em,yshift=-1.5em]num.south west) {\scriptsize{1}};
+\node [anchor=west] (k1) at ([yshift=-1.8em]kua.south west) {\scriptsize{[{\blue 0},{\blue 1}]}};
+\node [anchor=west] (t1) at ([xshift=-0.8em,yshift=-1.5em]tui.south west) {\scriptsize{C $\to$ a}};
+\node [anchor=center,selectnode,fill=red!20] (alig11) at (cell11.center) {\tiny{C}};
+}
+
+{
+\node [anchor=center] (n2) at ([yshift=-2.2em]n1.center) {\scriptsize{2}};
+\node [anchor=center] (k2) at ([yshift=-2.2em]k1.center) {\scriptsize{[{\blue 1},{\blue 2}]}};
+\node [anchor=west] (t2) at ([yshift=-2.2em]t1.west) {\scriptsize{C $\to$ a}};
+\node [anchor=center,selectnode,fill=red!20] (alig21) at (cell21.center) {\tiny{C}};
+}
+
+{
+\node [anchor=center] (n3) at ([yshift=-2.2em]n2.center) {\scriptsize{3}};
+\node [anchor=center] (k3) at ([yshift=-2.2em]k2.center) {\scriptsize{[{\blue 2},{\blue 3}]}};
+\node [anchor=west] (t3) at ([yshift=-2.2em]t2.west) {\scriptsize{D $\to$ b}};
+\node [anchor=center,selectnode,fill=red!20] (alig31) at (cell31.center) {\tiny{D}};
+}
+
+{
+\node [anchor=center] (n4) at ([yshift=-2.2em]n3.center) {\scriptsize{4}};
+\node [anchor=center] (k4) at ([yshift=-2.2em]k3.center) {\scriptsize{[{\blue 3},{\blue 4}]}};
+\node [anchor=west] (t4) at ([yshift=-2.2em]t3.west) {\scriptsize{D $\to$ b}};
+\node [anchor=center,selectnode,fill=red!20] (alig41) at (cell41.center) {\tiny{D}};
+}
+
+{
+\node [anchor=center] (n5) at ([yshift=-2.2em]n4.center) {\scriptsize{5}};
+\node [anchor=center] (k5) at ([yshift=-2.2em]k4.center) {\scriptsize{[{\blue 4},{\blue 5}]}};
+\node [anchor=west] (t5) at ([yshift=-2.2em]t4.west) {\scriptsize{B $\to$ c , }};
+\node [anchor=east] (t52) at ([xshift=-1em,yshift=-2em]t5.east) {\scriptsize{E $\to$ c}};
+\node [anchor=center,selectnode,fill=red!20] (alig51) at (cell51.center) {\tiny{B,E}};
+}
+
+{
+\node [anchor=center] (n6) at ([yshift=-4em]n5.center) {\scriptsize{6}};
+\node [anchor=center] (k6) at ([yshift=-4em]k5.center) {\scriptsize{[{\blue 0},{\blue 2}]}};
+\node [anchor=west] (t6) at ([xshift=0.2em,yshift=-4.2em]t5.west) {\scriptsize{none}};
+\node [anchor=center,selectnode,fill=red!20] (alig22) at (cell22.center) {\tiny{}};
+}
+
+{
+\node [anchor=center] (n7) at ([yshift=-2.2em]n6.center) {\scriptsize{7}};
+\node [anchor=center] (k7) at ([yshift=-2.2em]k6.center) {\scriptsize{[{\blue 1},{\blue 3}]}};
+\node [anchor=west] (t7) at ([yshift=-2.2em]t6.west) {\scriptsize{A $\to$ CD}};
+\node [anchor=center,selectnode,fill=red!20] (alig32) at (cell32.center) {\tiny{A}};
+}
+
+
+{
+\node [anchor=center] (sep1) at ([yshift=-1.7em]n7.center) {\scriptsize{...}};
+\node [anchor=center] (n8) at ([yshift=-3.4em]n7.center) {\scriptsize{15}};
+\node [anchor=center] (k8) at ([yshift=-3.4em]k7.center) {\scriptsize{[{\blue 0},{\blue 5}]}};
+\node [anchor=west] (t8) at ([yshift=-3.4em]t7.west) {\tiny{S $\to$ AB}};
+
+\node [anchor=center,selectnode,fill=red!20] (alig33) at (cell33.center) {\tiny{}};
+\node [anchor=center,selectnode,fill=red!20] (alig42) at (cell42.center) {\tiny{}};
+\node [anchor=center,selectnode,fill=red!20] (alig43) at (cell43.center) {\tiny{F}};
+\node [anchor=center,selectnode,fill=red!20] (alig44) at (cell44.center) {\tiny{A}};
+\node [anchor=center,selectnode,fill=red!20] (alig52) at (cell52.center) {\tiny{}};
+\node [anchor=center,selectnode,fill=red!20] (alig53) at (cell53.center) {\tiny{}};
+\node [anchor=center,selectnode,fill=red!20] (alig54) at (cell54.center) {\tiny{}};
+\node [anchor=center,selectnode,fill=red!20] (alig55) at (cell55.center) {\tiny{S}};
+}
+\end{scope}
+
+
+
+
+
+\end{tikzpicture}
+
+
--- a/Book/Chapter4/Figures/example-of-hyper-graph.tex
+++ b/Book/Chapter4/Figures/example-of-hyper-graph.tex
+%------------------------------------------------------------------------------------------------------------
+%%%  基于树的解码方法 - chart-based decoding
+\begin{center}
+\begin{tikzpicture}\footnotesize
+\begin{scope}[scale=0.7]
+\node [anchor=center,draw,thick,circle,inner sep=13pt,ublue] (s1) at (0,0) {};
+\node [anchor=north] (t11) at ([yshift=-0.8em]s1.north) {VP};
+\node [anchor=north] (t12) at ([yshift=-0.3em]t11.south) {[0,2]};
+
+\node [anchor=center,draw,thick,circle,inner sep=13pt,ublue] (s2) at ([xshift=12em,yshift=-5em]s1.north) {};
+\node [anchor=north] (t21) at ([yshift=-0.8em]s2.north) {NP};
+\node [anchor=north] (t22) at ([yshift=-0.3em]t21.south) {[0,2]};
+
+\node [anchor=center,draw,thick,circle,inner sep=13pt,ublue] (s3) at ([xshift=-6em,yshift=-13em]s1.south) {};
+\node [anchor=north] (t31) at ([yshift=-0.8em]s3.north) {VV};
+\node [anchor=north] (t32) at ([yshift=-0.3em]t31.south) {[0,1]};
+
+\node [anchor=center,draw,thick,circle,inner sep=13pt,ublue] (s4) at ([xshift=13em,yshift=2.9em]s3.south) {};
+\node [anchor=north] (t41) at ([yshift=-0.8em]s4.north) {NN};
+\node [anchor=north] (t42) at ([yshift=-0.3em]t41.south) {[1,2]};
+
+\node [anchor=center,draw,thick,circle,inner sep=13pt,ublue] (s5) at ([xshift=13em,yshift=2.9em]s4.south) {};
+\node [anchor=north] (t51) at ([yshift=-0.8em]s5.north) {NP};
+\node [anchor=north] (t52) at ([yshift=-0.3em]t51.south) {[1,2]};
+
+{
+\draw [->,red!50,very thick] ([xshift=-1em,yshift=-0.3em]s3.north) .. controls +(north:10em) and +(south:10em) .. ([xshift=0em,yshift=0em]s1.south);
+\draw [->,red!50,very thick] ([xshift=-1em,yshift=-0.3em]s5.north) .. controls +(north:8em) and +(south:14em) .. ([xshift=0em,yshift=0em]s1.south);
+}
+
+{
+\draw [->,blue!50,very thick] ([xshift=-1em,yshift=-0.3em]s4.north) .. controls +(north:8em) and +(south:8em) .. ([xshift=0em,yshift=0em]s2.south);
+\draw [->,blue!50,very thick] ([xshift=1em,yshift=-0.3em]s5.north) .. controls +(north:9em) and +(south:7em) .. ([xshift=0em,yshift=0em]s2.south);
+}
+
+\node [anchor=north] (t51) at ([yshift=7em]s3.north) {edge1};
+\node [anchor=north] (t52) at ([xshift=-2em,yshift=7.5em]s5.north) {edge2};
+\end{scope}
+\end{tikzpicture}
+\end{center}
--- a/Book/Chapter4/Figures/example-of-hypothesis-recombination.tex
+++ b/Book/Chapter4/Figures/example-of-hypothesis-recombination.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 假设重组
+\begin{tikzpicture}
+\begin{scope}
+{
+\node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h0) at (0,0) {\tiny{null}};
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl0) at (h0.north west) {\tiny{{\color{white} \textbf{0}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt0) at (h0.east) {\tiny{{\color{white} \textbf{P=1}}}};
+
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h2) at ([xshift=1.8em,yshift=2.5em]h0.east) {\tiny{an}};
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h3) at ([xshift=1.8em]h2.east) {\tiny{apple}};
+
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl2) at (h2.north west) {\tiny{{\color{white} \textbf{1}}}};
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl3) at (h3.north west) {\tiny{{\color{white} \textbf{2}}}};
+
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt2) at (h2.east) {\tiny{{\color{white} \textbf{P=.3}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt3) at (h3.east) {\tiny{{\color{white} \textbf{P=.5}}}};
+
+\draw [->,very thick,ublue] ([xshift=0.1em]pt0.south) -- ([xshift=-0.1em]h2.west);
+\draw [->,very thick,ublue] ([xshift=0.1em]pt2.south) -- ([xshift=-0.1em]h3.west);
+
+{
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h1) at ([xshift=5.42em]h0.east) {\tiny{an apple}};
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl1) at (h1.north west) {\tiny{{\color{white} \textbf{1-2}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt1) at (h1.east) {\tiny{{\color{white} \textbf{P=.5}}}};
+\draw [->,very thick,ublue] ([xshift=0.1em]pt0.south) -- ([xshift=-0.1em]h1.west);
+}
+}
+{
+\node [anchor=north west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h4) at ([yshift=-4em]h0.south west) {\tiny{null}};
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h5) at ([xshift=1.8em]h4.east) {\tiny{he}};
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h6) at ([xshift=1.8em,yshift=2.5em]h4.east) {\tiny{it}};
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h8) at ([xshift=1.8em]h6.east) {\tiny{is not}};
+
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl4) at (h4.north west) {\tiny{{\color{white} \textbf{0}}}};
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl5) at (h5.north west) {\tiny{{\color{white} \textbf{1}}}};
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl5) at (h6.north west) {\tiny{{\color{white} \textbf{1}}}};
+
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl5) at (h8.north west) {\tiny{{\color{white} \textbf{2}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt4) at (h4.east) {\tiny{{\color{white} \textbf{P=1}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt5) at (h5.east) {\tiny{{\color{white} \textbf{P=.3}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt6) at (h6.east) {\tiny{{\color{white} \textbf{P=.4}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt8) at (h8.east) {\tiny{{\color{white} \textbf{P=.2}}}};
+
+\draw [->,very thick,ublue] ([xshift=0.1em]pt4.south) -- ([xshift=-0.1em]h5.west);
+\draw [->,very thick,ublue] ([xshift=0.1em]pt4.south) -- ([xshift=-0.1em]h6.west);
+\draw [->,very thick,ublue] ([xshift=0.1em]pt6.south) -- ([xshift=-0.1em]h8.west);
+
+{
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h7) at ([xshift=1.8em]h5.east) {\tiny{is not}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt7) at (h7.east) {\tiny{{\color{white} \textbf{P=.2}}}};
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl5) at (h7.north west) {\tiny{{\color{white} \textbf{2}}}};
+\draw [->,very thick,ublue] ([xshift=0.1em]pt5.south) -- ([xshift=-0.1em]h7.west);
+}
+}
+
+%\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em,opacity=0.7] (h1) at ([xshift=-1em,yshift=2em]h2.north) {原假设};
+
+\end{scope}
+
+
+
+
+
+\begin{scope}[xshift = 13em, yshift = 0em]
+{
+\node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h0) at (0,0) {\tiny{null}};
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl0) at (h0.north west) {\tiny{{\color{white} \textbf{0}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt0) at (h0.east) {\tiny{{\color{white} \textbf{P=1}}}};
+
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h2) at ([xshift=1.8em,yshift=2.5em]h0.east) {\tiny{an}};
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h3) at ([xshift=1.8em]h2.east) {\tiny{apple}};
+
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl2) at (h2.north west) {\tiny{{\color{white} \textbf{1}}}};
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl3) at (h3.north west) {\tiny{{\color{white} \textbf{2}}}};
+
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt2) at (h2.east) {\tiny{{\color{white} \textbf{P=.3}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt3) at (h3.east) {\tiny{{\color{white} \textbf{P=.5}}}};
+
+\draw [->,very thick,ublue] ([xshift=0.1em]pt0.south) -- ([xshift=-0.1em]h2.west);
+\draw [->,very thick,ublue] ([xshift=0.1em]pt2.south) -- ([xshift=-0.1em]h3.west);
+
+{
+\draw [->,ultra thick,red,line width=2pt,opacity=0.7] ([xshift=0.1em]pt0.south) -- ([xshift=-1em,yshift=-0.1em]h3.south);
+}
+}
+{
+\node [anchor=north west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h4) at ([yshift=-4em]h0.south west) {\tiny{null}};
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h5) at ([xshift=1.8em]h4.east) {\tiny{he}};
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h6) at ([xshift=1.8em,yshift=2.5em]h4.east) {\tiny{it}};
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em] (h8) at ([xshift=1.8em]h6.east) {\tiny{is not}};
+
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl4) at (h4.north west) {\tiny{{\color{white} \textbf{0}}}};
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl5) at (h5.north west) {\tiny{{\color{white} \textbf{1}}}};
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl5) at (h6.north west) {\tiny{{\color{white} \textbf{1}}}};
+
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl5) at (h8.north west) {\tiny{{\color{white} \textbf{2}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt4) at (h4.east) {\tiny{{\color{white} \textbf{P=1}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt5) at (h5.east) {\tiny{{\color{white} \textbf{P=.3}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt6) at (h6.east) {\tiny{{\color{white} \textbf{P=.4}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt8) at (h8.east) {\tiny{{\color{white} \textbf{P=.2}}}};
+
+\draw [->,very thick,ublue] ([xshift=0.1em]pt4.south) -- ([xshift=-0.1em]h5.west);
+\draw [->,very thick,ublue] ([xshift=0.1em]pt4.south) -- ([xshift=-0.1em]h6.west);
+\draw [->,very thick,ublue] ([xshift=0.1em]pt6.south) -- ([xshift=-0.1em]h8.west);
+
+{
+\draw [->,ultra thick,red,line width=2pt,opacity=0.7] ([xshift=0.1em]pt5.south) -- ([xshift=-1em,yshift=-0.1em]h8.south);
+}
+}
+
+
+
+{
+{
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em,opacity=0.3] (h1) at ([xshift=5.42em]h0.east) {\tiny{an apple}};
+\node [anchor=north west,inner sep=1.0pt,fill=black,opacity=0.3] (hl1) at (h1.north west) {\tiny{{\color{white} \textbf{1-2}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black,opacity=0.3] (pt1) at (h1.east) {\tiny{{\color{white} \textbf{P=.5}}}};
+}
+{
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em,opacity=0.3] (h7) at ([xshift=1.8em]h5.east) {\tiny{is not}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black,opacity=0.3] (pt7) at (h7.east) {\tiny{{\color{white} \textbf{P=.2}}}};
+\node [anchor=north west,inner sep=1.0pt,fill=black,opacity=0.3] (hl5) at (h7.north west) {\tiny{{\color{white} \textbf{2}}}};
+}
+}
+
+\node [anchor=west] (l1) at ([xshift=1em, yshift=0.5em]h1.east) {\scriptsize{舍弃概率}};
+\node [anchor=west] (l11) at ([xshift=0em, yshift=-1em]l1.west) {\scriptsize{较低假设}};
+\node [anchor=west] (l2) at ([xshift=1em, yshift=0.5em]h7.east) {\scriptsize{舍弃概率}};
+\node [anchor=west] (l21) at ([xshift=0em, yshift=-1em]l2.west) {\scriptsize{较低假设}};
+
+%\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.0em,opacity=0.7] (h1) at ([xshift=-1em,yshift=2em]h2.north) {重组假设};
+
+\end{scope}
+
+
+
+
+\end{tikzpicture}
\ No newline at end of file
--- a/Book/Chapter4/Figures/example-of-n-gram-1.tex
+++ b/Book/Chapter4/Figures/example-of-n-gram-1.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 引入短语翻译
+{\small
+\begin{tabular}{l | l}
+{{\red{\sout{单词}}}词串翻译表} & P \\ \hline
+我 $\to$ I & 0.6 \\
+喜欢 $\to$ like & 0.3 \\
+红 $\to$ red & 0.8 \\
+红 $\to$ black & 0.1 \\
+茶 $\to$ tea & 0.8\\
+我 喜欢 $\to$ I like & 0.3\\
+我 喜欢 $\to$ I liked & 0.2\\
+绿 茶 $\to$ green tea & 0.5\\
+绿 茶 $\to$ the green tea & 0.1\\
+红 茶 $\to$ black tea & 0.6\\
+... & 
+\end{tabular}
+}
--- a/Book/Chapter4/Figures/example-of-n-gram-2.tex
+++ b/Book/Chapter4/Figures/example-of-n-gram-2.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 引入短语翻译
+\begin{minipage}[c]{0.48\linewidth}
+\vspace{1em}
+\begin{center}
+\begin{tikzpicture}
+
+\begin{scope}
+\begin{scope}
+{\footnotesize
+\node [anchor=west] (s1) at (0,0) {我};
+\node [anchor=west] (s2) at ([xshift=1.0em]s1.east) {喜欢};
+\node [anchor=west] (s3) at ([xshift=1.0em]s2.east) {\red{红}};
+\node [anchor=west] (s4) at ([xshift=1.0em]s3.east) {茶};
+\node [anchor=east] (s) at (s1.west) {$\textbf{s}=$};
+}
+\end{scope}
+
+\begin{scope}[yshift=-3.0em]
+{\small
+\node [anchor=west] (t1) at (0.25em,0) {I};
+\node [anchor=west] (t2) at ([xshift=0.8em,yshift=-0.0em]t1.east) {like};
+\node [anchor=west] (t3) at ([xshift=0.6em,yshift=-0.0em]t2.east) {red};
+\node [anchor=west] (t4) at ([xshift=1.15em,yshift=-0.1em]t3.east) {tea};
+\node [anchor=east] (t) at ([xshift=-0.2em]t1.west) {$\textbf{t}=$};
+}
+\end{scope}
+
+
+\draw [-] (s1.south) -- (t1.north);
+\draw [-] (s2.south) ..controls +(south:1.0em) and +(north:1.0em).. (t2.north);
+\draw [-] (s3.south) ..controls +(south:1.0em) and +(north:1.0em).. (t3.north);
+\draw [-] (s4.south) -- (t4.north);
+
+\node [anchor=center,draw=ublue,circle,thick,fill=white,inner sep=2pt,circular drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] (head1) at ([xshift=2.5em,yshift=-0.5em]s4.south east) {{\color{red} \small{\textbf{No}}}};
+
+\end{scope}
+
+{
+\begin{scope}[yshift=-5.0em]
+\begin{scope}
+{\footnotesize
+\node [anchor=west] (s1) at (0,0) {我};
+\node [anchor=west] (s2) at ([xshift=1.0em]s1.east) {喜欢};
+\node [anchor=west] (s3) at ([xshift=1.0em]s2.east) {\red{红}};
+\node [anchor=west] (s4) at ([xshift=1.0em]s3.east) {茶};
+\node [anchor=east] (s) at (s1.west) {$\textbf{s}=$};
+}
+\end{scope}
+
+\begin{scope}[yshift=-3.0em]
+{\small
+\node [anchor=west] (t1) at (0.25em,0) {I};
+\node [anchor=west] (t2) at ([xshift=0.8em,yshift=-0.0em]t1.east) {like};
+\node [anchor=west] (t3) at ([xshift=0.6em,yshift=-0.0em]t2.east) {black};
+\node [anchor=west] (t4) at ([xshift=1.0em,yshift=-0.1em]t3.east) {tea};
+\node [anchor=east] (t) at ([xshift=-0.2em]t1.west) {$\textbf{t}=$};
+}
+\end{scope}
+
+\begin{pgfonlayer}{background}
+{
+\node [rectangle,draw=red,thick,inner sep=0.0em,fill=white] [fit = (s3) (s4)] (sphrase1) {};
+\node [rectangle,draw=black,thick,inner sep=0.0em,fill=white] [fit = (t3) (t4)] (tphrase1) {};
+}
+\end{pgfonlayer}
+
+\draw [-] (s1.south) -- (t1.north);
+\draw [-] (s2.south) ..controls +(south:1.0em) and +(north:1.0em).. (t2.north);
+\draw [-] (sphrase1.south) ..controls +(south:1.0em) and +(north:1.0em).. ([xshift=-1em]tphrase1.north);
+
+\node [anchor=center,draw=ublue,circle,thick,fill=white,inner sep=1pt,circular drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] (head1) at ([xshift=2.5em,yshift=-0.5em]s4.south east) {{\color{ugreen} \small{\textbf{Yes}}}};
+
+\end{scope}
+}
+
+\end{tikzpicture}
+\end{center}
+\end{minipage}
--- a/Book/Chapter4/Figures/example-of-phrase-table.tex
+++ b/Book/Chapter4/Figures/example-of-phrase-table.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 展示短语表的内容
+\begin{center}
+\begin{tikzpicture}
+\node [anchor=west] (s1) at (0,0) {\scriptsize{报告 认为 $\vert\vert\vert$ report holds that $\vert\vert\vert$ -2.62 -5.81 -0.91 -2.85 1 0 $\vert\vert\vert$ 4 $\vert\vert\vert$ 0-0 1-1 1-2}};
+\node [anchor=west] (s2) at ([yshift=-1.2em]s1.west) {\scriptsize{，悲伤 $\vert\vert\vert$ , sadness $\vert\vert\vert$ -1.946 -3.659 0 -3.709 1 0 $\vert\vert\vert$ 1 $\vert\vert\vert$ 0-0 1-1}};
+\node [anchor=west] (s3) at ([yshift=-1.2em]s2.west) {\scriptsize{，北京 等 $\vert\vert\vert$ , beijing , and other $\vert\vert\vert$ 0 -7.98 0 -3.84 1 0 $\vert\vert\vert$ 2 $\vert\vert\vert$ 0-0 1-1 2-2 2-3 2-4}};
+\node [anchor=west] (s4) at ([yshift=-1.2em]s3.west) {\scriptsize{，北京 及 $\vert\vert\vert$ , beijing , and $\vert\vert\vert$ -0.69 -1.45 -0.92 -4.80 1 0  $\vert\vert\vert$ 2 $\vert\vert\vert$ 0-0 1-1 2-2}};
+\node [anchor=west] (s5) at ([yshift=-1.2em]s4.west) {\scriptsize{一个 中国 $\vert\vert\vert$ one china $\vert\vert\vert$ 0 -1.725 0 -1.636 1 0 $\vert\vert\vert$ 2 $\vert\vert\vert$ 1-1 2-2}};
+\node [anchor=west] (s7) at ([yshift=-1.1em]s5.west) {\scriptsize{...}};
+\node [anchor=west] (s6) at ([yshift=1.0em]s1.west) {\scriptsize{...}};
+\begin{pgfonlayer}{background}
+\node [rectangle,inner sep=0.3em,fill=red!20] [fit = (s1) (s3) (s4) (s6) (s7)] (box1) {};
+\end{pgfonlayer}
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/example-of-stack-decode.tex
+++ b/Book/Chapter4/Figures/example-of-stack-decode.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 栈解码
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}
+{
+\node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.2em] (h0) at (0,0) {\tiny{null}};
+\node [anchor=north west,inner sep=1.5pt,fill=black] (hl0) at (h0.north west) {\tiny{{\color{white} \textbf{0}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt0) at (h0.east) {\tiny{{\color{white} \textbf{P=1}}}};
+}
+{
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.2em] (h13) at ([xshift=2.1em,yshift=5em]h0.east) {\tiny{there is}};
+%\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.2em] (h12) at ([xshift=2.1em,yshift=2.5em]h0.east) {\tiny{on}};
+\node [anchor=west,inner sep=2pt,minimum height=1.5em,minimum width=2.2em] (h12) at ([xshift=2.1em,yshift=3em]h0.east) {\footnotesize{\textbf{...}}};
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.2em] (h1) at ([xshift=2.1em]h0.east) {\tiny{tabel}};
+
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl1) at (h1.north west) {\tiny{{\color{white} \textbf{1}}}};
+%\node [anchor=north west,inner sep=1.0pt,fill=black] (hl2) at (h12.north west) {\tiny{{\color{white} \textbf{2}}}};
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl3) at (h13.north west) {\tiny{{\color{white} \textbf{3}}}};
+
+
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt1) at (h1.east) {\tiny{{\color{white} \textbf{P=.2}}}};
+%\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt2) at (h12.east) {\tiny{{\color{white} \textbf{P=.3}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt3) at (h13.east) {\tiny{{\color{white} \textbf{P=.5}}}};
+
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.2em] (h2) at ([xshift=2.1em]h1.east) {\tiny{have}};
+\node [anchor=west,inner sep=2pt,minimum height=1.5em,minimum width=2.2em] (h22) at ([xshift=2.1em]h12.east) {\footnotesize{\textbf{...}}};
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.2em] (h23) at ([xshift=2.1em]h13.east) {\tiny{an}};
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.2em] (h3) at ([xshift=2.1em]h2.east) {\tiny{there is}};
+\node [anchor=west,inner sep=2pt,minimum height=1.5em,minimum width=2.2em] (h32) at ([xshift=2.1em]h22.east) {\footnotesize{\textbf{...}}};
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.2em] (h33) at ([xshift=2.1em]h23.east) {\tiny{an apple}};
+
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl2) at (h2.north west) {\tiny{{\color{white} \textbf{3}}}};
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl23) at (h23.north west) {\tiny{{\color{white} \textbf{4}}}};
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl3) at (h3.north west) {\tiny{{\color{white} \textbf{2}}}};
+\node [anchor=north west,inner sep=1.0pt,fill=black] (hl33) at (h33.north west) {\tiny{{\color{white} \textbf{4-5}}}};
+
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt2) at (h2.east) {\tiny{{\color{white} \textbf{P=.5}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt23) at (h23.east) {\tiny{{\color{white} \textbf{P=.5}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt3) at (h3.east) {\tiny{{\color{white} \textbf{P=.5}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.5em,fill=black] (pt33) at (h33.east) {\tiny{{\color{white} \textbf{P=.5}}}};
+}
+\node [anchor=north] (l0) at ([xshift=0.2em,yshift=-0.7em]h0.south) {\scriptsize{\textbf{未译词}}};
+\node [anchor=north] (l1) at ([xshift=0.3em,yshift=-0.7em]h1.south) {\scriptsize{\textbf{已译1词}}};
+\node [anchor=north] (l2) at ([xshift=0.3em,yshift=-0.7em]h2.south) {\scriptsize{\textbf{已译2词}}};
+\node [anchor=north] (l3) at ([xshift=0.3em,yshift=-0.7em]h3.south) {\scriptsize{\textbf{已译3词}}};
+
+\begin{pgfonlayer}{background}
+\node [rectangle,inner sep=0.3em,fill=blue!10] [fit = (h0) (pt0)] (box0) {};
+\node [rectangle,inner sep=0.3em,fill=blue!10] [fit = (h1) (pt1) (h13)] (box1) {};
+\node [rectangle,inner sep=0.3em,fill=blue!10] [fit = (h2) (pt2) (h23)] (box2) {};
+\node [rectangle,inner sep=0.3em,fill=blue!10] [fit = (h3) (pt3) (h33)] (box3) {};
+\end{pgfonlayer}
+
+{
+\draw [->,thick,red] (h13.north).. controls +(60:0.5) and +(120:0.5) .. (h23.north);
+\draw [->,thick,red] (h13.north).. controls +(58:0.8) and +(122:0.8) .. (h33.north);
+\draw [->,thick,red] (h1.north).. controls +(60:0.5) and +(120:0.5) .. (h2.north);
+\draw [->,thick,red] (h2.north).. controls +(60:0.5) and +(120:0.5) .. (h3.north);
+}
+\node [anchor=south east] (wtranslabel) at ([xshift=-1.7em,yshift=-2em]h0.south west) {\scriptsize{\textbf{：假设堆栈}}};
+\node [anchor=east,inner sep=2pt,fill=blue!10,minimum height=1em,minimum width=2em] (stacklabel) at ([xshift=-0.1em]wtranslabel.west) {};
+{
+\node [anchor=east] (line1) at ([xshift=-1.0em,yshift=0em]h0.west) {\footnotesize{0号栈包含空假设}};
+}
+{
+\node [anchor=east] (line2) at ([xshift=-1.8em,yshift=0.3em]h13.west) {\footnotesize{通过假设扩展产生新的假设}};
+\node [anchor=north west] (line3) at ([yshift=0.1em]line2.south west) {\footnotesize{并不断的被存入假设堆栈中}};
+}
+\begin{pgfonlayer}{background}
+{
+\node [rectangle,inner sep=0.1em,fill=ugreen!10] [fit = (line1)] (box1) {};
+}
+{
+\node [rectangle,inner sep=0.1em,fill=red!10] [fit = (line2) (line3)] (box2) {};
+}
+\end{pgfonlayer}
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/example-of-translation-base-word-1.tex
+++ b/Book/Chapter4/Figures/example-of-translation-base-word-1.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 基于单词的模型的问题
+
+
+{\small
+\begin{tabular}{l | l}
+单词翻译表 & P \\ \hline
+我 $\to$ I & 0.6 \\
+喜欢 $\to$ like & 0.3 \\
+绿 $\to$ green & 0.9 \\
+茶 $\to$ tea & 0.8 \\
+\end{tabular}
+}
+
--- a/Book/Chapter4/Figures/example-of-translation-base-word-2.tex
+++ b/Book/Chapter4/Figures/example-of-translation-base-word-2.tex
+\begin{minipage}[c]{0.35\linewidth}
+\begin{tikzpicture}
+
+\begin{scope}
+{\small
+\node [anchor=west] (s1) at (0,0) {我};
+\node [anchor=west] (s2) at ([xshift=1.0em]s1.east) {喜欢};
+\node [anchor=west] (s3) at ([xshift=1.0em]s2.east) {{\color{ugreen} 绿}};
+\node [anchor=west] (s4) at ([xshift=1.07em]s3.east) {茶};
+\node [anchor=east] (s) at (s1.west) {$\textbf{s}=$};
+}
+\end{scope}
+
+{
+\begin{scope}[yshift=-2.0em]
+{\small
+\node [anchor=west] (t1) at (0.35em,0) {I};
+\node [anchor=west] (t2) at ([xshift=0.8em,yshift=-0.0em]t1.east) {like};
+\node [anchor=west] (t3) at ([xshift=1.0em,yshift=-0.2em]t2.east) {green};
+\node [anchor=west] (t4) at ([xshift=0.78em,yshift=0.1em]t3.east) {tea};
+\node [anchor=east] (t) at ([xshift=-0.3em]t1.west) {$\textbf{t}=$};
+}
+\end{scope}
+
+\draw [-] (s1.south) -- (t1.north);
+\draw [-] (s2.south) ..controls +(south:0.5em) and +(north:0.5em).. (t2.north);
+\draw [-] (s3.south) ..controls +(south:0.5em) and +(north:0.5em).. (t3.north);
+\draw [-] (s4.south) -- (t4.north);
+}%visible<2->
+
+\end{tikzpicture}
+\end{minipage}
\ No newline at end of file
--- a/Book/Chapter4/Figures/example-of-translation-black-tea-1.tex
+++ b/Book/Chapter4/Figures/example-of-translation-black-tea-1.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 基于单词的模型的问题
+{\small
+\begin{tabular}{l | l}
+单词翻译表 & P \\ \hline
+我 $\to$ I & 0.6 \\
+喜欢 $\to$ like & 0.3 \\
+红 $\to$ red & 0.8 \\
+红 $\to$ black & 0.1 \\
+茶 $\to$ tea & 0.8\\
+\end{tabular}
+}
+
--- a/Book/Chapter4/Figures/example-of-translation-black-tea-2.tex
+++ b/Book/Chapter4/Figures/example-of-translation-black-tea-2.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 基于单词的模型的问题
+\begin{minipage}[c]{0.35\linewidth}
+\vspace{1em}
+\begin{tikzpicture}
+
+\begin{scope}
+{\small
+\node [anchor=west] (s1) at (0,0) {我};
+\node [anchor=west] (s2) at ([xshift=1.0em]s1.east) {喜欢};
+\node [anchor=west] (s3) at ([xshift=1.0em]s2.east) {\red{红}};
+\node [anchor=west] (s4) at ([xshift=1.0em]s3.east) {茶};
+\node [anchor=east] (s) at (s1.west) {$\textbf{s}=$};
+}
+\end{scope}
+
+{
+\begin{scope}[yshift=-2.0em]
+{\small
+\node [anchor=west] (t1) at (0.35em,0) {I};
+\node [anchor=west] (t2) at ([xshift=0.8em,yshift=-0.0em]t1.east) {like};
+\node [anchor=west] (t3) at ([xshift=1.0em,yshift=-0.0em]t2.east) {red};
+\node [anchor=west] (t4) at ([xshift=1.0em,yshift=-0.1em]t3.east) {tea};
+\node [anchor=east] (t) at ([xshift=-0.3em]t1.west) {$\textbf{t}=$};
+}
+\end{scope}
+
+\draw [-] (s1.south) -- (t1.north);
+\draw [-] (s2.south) ..controls +(south:0.5em) and +(north:0.5em).. (t2.north);
+\draw [-] (s3.south) ..controls +(south:0.5em) and +(north:0.5em).. (t3.north);
+\draw [-] (s4.south) ..controls +(south:0.5em) and +(north:0.5em).. (t4.north);
+}%visible<4->
+
+\begin{pgfonlayer}{background}
+{
+\node [rectangle,draw=red,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (t3) (t4)] (problemphrase) {};
+\node [anchor=north,red,text width=8em,align=left] (problemlabel) at (problemphrase.south) {\begin{spacing}{0.8}\scriptsize{``红 茶''为一种搭配，应该翻译为``black tea''}\end{spacing}};
+}
+\end{pgfonlayer}
+
+\end{tikzpicture}
+\end{minipage}
--- a/Book/Chapter4/Figures/example-of-translation-use-syntactic-structure.tex
+++ b/Book/Chapter4/Figures/example-of-translation-use-syntactic-structure.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 基于句法的模型
+\begin{center}
+\begin{tikzpicture}
+
+%% example
+\begin{scope}[xshift=-0.1in,yshift=-1.5in]
+
+{\tiny
+
+\node[anchor=west] (ref) at (0,0) {{{人工翻译:}} {\red{After}} North Korea demanded concessions from U.S. again before the start of a new round of six-nation talks ...};
+
+\node[anchor=north west] (hifst) at ([yshift=-0.3em]ref.south west) {{{机器翻译:}} \blue{In}\black{} the new round of six-nation talks on North Korea again demanded that U.S. in the former promise ...};
+
+{
+\node[anchor=north west] (synhifst) at ([yshift=-0.3em]hifst.south west) {\sffamily\bfseries{better?:}};
+
+\node[anchor=west, fill=red!20!white, inner sep=0.3em] (synhifstpart1) at ([xshift=-0.5em]synhifst.east) {After};
+
+\node[anchor=west, fill=blue!20!white, inner sep=0.25em] (synhifstpart2) at ([xshift=0.1em,yshift=-0.05em]synhifstpart1.east) {North Korea again demanded that U.S. promised concessions before the new round of six-nation talks};
+
+\node[anchor=west] (synhifstpart3) at ([xshift=-0.2em]synhifstpart2.east) {...};
+}
+
+\node [anchor=west] (inputlabel) at ([yshift=-0.4in]synhifst.west) {\sffamily\bfseries{Input:}};
+
+\node [anchor=west,minimum height=12pt] (inputseg1) at (inputlabel.east) {在$_1$ };
+\node [anchor=west,minimum height=12pt] (inputseg2) at ([xshift=0.2em]inputseg1.east) {北韩$_2$ 再度$_3$ 要求$_4$ 美国$_5$ 于$_6$ 新$_7$ 回合$_8$ 六$_9$ 国$_{10}$ 会谈$_{11}$ 前$_{12}$ 承诺$_{13}$ 让步$_{14}$};
+\node [anchor=west,minimum height=12pt] (inputseg3) at ([xshift=0.2em]inputseg2.east) {后$_{15}$};
+\node [anchor=west,minimum height=12pt] (inputseg4) at ([xshift=0.2em]inputseg3.east) {,$_{16}$};
+\node [anchor=west,minimum height=12pt] (inputseg5) at ([xshift=0.2em]inputseg4.east) {...};
+
+{
+\node [anchor=north,inner sep=2pt] (synlabel1) at ([yshift=-0.34in]inputseg2.south) {\scriptsize{PP}};
+\node [anchor=north,inner sep=2pt] (synlabel2) at ([yshift=-0.34in]inputseg4.south) {\scriptsize{PU}};
+\node [anchor=north,inner sep=2pt] (synlabel3) at ([yshift=-0.34in]inputseg5.south) {\scriptsize{VP}};
+\node [anchor=north,inner sep=2pt] (synlabel4) at ([xshift=1.6in,yshift=-0.35in]synlabel1.south) {\scriptsize{VP}};
+
+\draw [-] (inputseg1.south west) -- (inputseg3.south east) -- (synlabel1.north) -- cycle;
+\draw [-] (inputseg4.south) -- (synlabel2.north);
+\draw [-] (inputseg5.south) -- (synlabel3.north);
+\draw [-] (synlabel1.south) -- (synlabel4.north);
+\draw [-] (synlabel2.south) -- (synlabel4.north);
+\draw [-] (synlabel3.south) -- (synlabel4.north);
+}
+
+{
+\node [anchor=north east,align=left] (nolimitlabel) at (synlabel1.south west) {\tiny{短语结构树很容易捕捉}\\\tiny{这种介词短语结构}};
+}
+
+{
+\node [anchor=west,minimum height=12pt,fill=red!20] (inputseg1) at (inputlabel.east) {在$_1$ };
+\node [anchor=west,minimum height=12pt,fill=blue!20] (inputseg2) at ([xshift=0.2em]inputseg1.east) {北韩$_2$ 再度$_3$ 要求$_4$ 美国$_5$ 于$_6$ 新$_7$ 回合$_8$ 六$_9$ 国$_{10}$ 会谈$_{11}$ 前$_{12}$ 承诺$_{13}$ 让步$_{14}$};
+\node [anchor=west,minimum height=12pt,fill=red!20] (inputseg3) at ([xshift=0.2em]inputseg2.east) {后$_{15}$};
+
+\path [draw,->,dashed] (inputseg1.north) .. controls +(north:0.2) and +(south:0.3) ..  ([xshift=1em]synhifstpart1.south);
+\path [draw,->,dashed] (inputseg3.north) .. controls +(north:0.2) and +(south:0.6) ..  ([xshift=1em]synhifstpart1.south);
+\path [draw,->,dashed] ([xshift=-0.5in]inputseg2.north) --  ([xshift=-0.6in]synhifstpart2.south);
+}
+
+}
+
+\end{scope}
+%% end of example
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/example-of-tree-structure-correspondence.tex
+++ b/Book/Chapter4/Figures/example-of-tree-structure-correspondence.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  规则实例(树到树)
+\begin{center}
+\begin{tikzpicture}
+
+{\small
+\begin{scope}[sibling distance=5pt, level distance = 22pt]
+\Tree[.\node(s1){VP}; [.\node(s2){PP}; ] [.\node(s3){VP}; [.\node(s4){VV}; \node[fill=white](w1){表示}; ] [.\node(s5){NN}; ] ] ]
+\end{scope}
+
+\begin{scope}[xshift=2in,sibling distance=5pt, level distance = 22pt]
+\Tree[.\node(t1){VP}; [.\node(t2){VBZ}; \node(w2){was}; ] [.\node(t3){VP}; [.\node(t4){VBN}; ] [.\node(t5){PP}; ] ] ]
+\end{scope}
+
+\begin{pgfonlayer}{background}
+\node [anchor=west] (arrow) at ([xshift=3em]s5.east) {\Large{\textbf{$\to$}}};{
+\node [inner sep=0,fill=blue!20] [fit = (s2)] (snode1) {};
+\node [inner sep=0,fill=red!20] [fit = (s5)] (snode2) {};
+\node [inner sep=0,fill=red!20] [fit = (t4)] (tnode1) {};
+\node [inner sep=0,fill=blue!20] [fit = (t5)] (tnode2) {};
+\draw [<->,dotted,thick] (snode2.south) ..controls +(south:2.5em) and +(south:2.5em).. (tnode1.south);
+\draw [<->,dotted,thick] (snode1.south) ..controls +(south:8em) and +(south:4.5em).. (tnode2.south);
+}
+\end{pgfonlayer}
+}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/example-of-tree-to-string-rule-and-word-alignment.tex
+++ b/Book/Chapter4/Figures/example-of-tree-to-string-rule-and-word-alignment.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  翻译规则抽取本质上是要完成对树结构的切割
+\begin{center}
+\begin{tikzpicture}
+
+{\footnotesize
+\begin{scope}[sibling distance=25pt, level distance=20pt]
+
+\Tree[.\node(n1){IP};
+     	[.\node(n2){NP}; [.\node(n3){PN}; \node(cw1){他}; ]]
+     	[.\node(n4){VP};
+     		[.\node(n5){PP};
+     			[.\node(n6){P}; \node(cw2){对}; ]
+     			[.\node(n7){NP};
+                    [.\node(n8){NN}; \node(cw3){回答}; ]
+                ]
+     		]
+     		[.\node(n9){VP};
+     			[.\node(n10){VV}; \node(cw4){表示}; ]
+     			[.\node(n11){NN}; \node(cw5){满意}; ]
+     		]
+     	]
+     ]
+
+\node[anchor=north,minimum size=18pt] (tw1) at ([yshift=-6.0em]cw1.south){he};
+\node[anchor=west,minimum size=18pt] (tw2) at ([yshift=-0.1em,xshift=1.1em]tw1.east){was};
+\node[anchor=west,minimum size=18pt] (tw3) at ([yshift=0.1em,xshift=1.1em]tw2.east){satisfied};
+\node[anchor=west,minimum size=18pt] (tw4) at ([xshift=1.1em]tw3.east){with};
+\node[anchor=west,minimum size=18pt] (tw5) at ([xshift=1.1em]tw4.east){the};
+\node[anchor=west,minimum size=18pt] (tw6) at ([yshift=-0.1em,xshift=1.1em]tw5.east){answer};
+
+\draw[dashed] (cw1.south) -- ([yshift=-0.4em]tw1.north);
+\draw[dashed] (cw2.south) .. controls +(south:1.6) and +(north:0.6) .. ([yshift=-0.4em]tw4.north);
+\draw[dashed] (cw3.south) -- ([yshift=-0.4em]tw5.north);
+\draw[dashed] (cw3.south) -- ([yshift=-0.4em]tw6.north);
+\draw[dashed] (cw4.south) .. controls +(south:2.0) and +(north:0.6) .. ([yshift=-0.4em]tw3.north);
+\draw[dashed] (cw5.south) .. controls +(south:2.0) and +(north:0.6) .. ([yshift=-0.4em]tw3.north);
+
+
+\begin{pgfonlayer}{background}
+{
+\node [rectangle,inner sep=0em,fill=red!20] [fit = (cw2) (cw3) (n5)] (rule1s) {};
+\node [rectangle,inner sep=0em,fill=red!20] [fit = (tw4) (tw5) (tw6)] (rule1t) {};
+}
+{
+\node [rectangle,inner sep=0em,fill=blue!20] [fit = (cw5) (n11)] (rule2s) {};
+\node [rectangle,inner sep=0em,fill=blue!20] [fit = (tw3)] (rule2t) {};
+}
+\end{pgfonlayer}
+
+{
+\node [anchor=south] (rule1label) at ([xshift=1em]rule1s.north west) {\scriptsize\sffamily\bfseries{\red{正确的规则}}};
+}
+{
+\node [anchor=north west,align=left] (rule2label) at (rule2s.north east) {\scriptsize{\sffamily\bfseries{\color{blue} 错误的规则}}\\\scriptsize{因为``satisfied''会}\\\scriptsize{对齐到规则外，}\\\scriptsize{也就是这条规则}\\\scriptsize{与词对齐不相容}};
+}
+
+\end{scope}
+}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/example-of-vocabulary-translation-probability.tex
+++ b/Book/Chapter4/Figures/example-of-vocabulary-translation-probability.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 如何度量短语的好坏
+\begin{center}
+\begin{tikzpicture}
+
+\tikzstyle{elementnode} = [rectangle,text=white,anchor=center]
+\tikzstyle{srcnode} = [font=\small,anchor=south west]
+\tikzstyle{tgtnode} = [left,font=\small,anchor=north east]
+\tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6*1.1cm,minimum width=0.36*1.1cm]
+\tikzstyle{probnode} = [fill=blue!30,minimum width=0.4*1.1cm]
+\tikzstyle{labelnode} = [above]
+
+% alignment matrix1
+\begin{scope}[scale=0.9,yshift=0.12in]
+\foreach \i / \j / \c in
+    {0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 4/3/0.15,
+    0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.15,
+    0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.15,
+    0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.15}
+    \node[elementnode,minimum size=0.6*1.1cm*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*1.1cm*\i-5.4*0.5*1.1cm,0.5*1.1cm*\j-1.05*1.1cm) {};
+
+% source
+\node[srcnode] (tgt01) at (-5.9*0.5*1.1cm,-1.05*1.1cm+3.4*0.5*1.1cm) {\scriptsize{$t_1$}};
+\node[srcnode] (tgt02) at ([xshift=0.5*1.1cm]tgt01.south west) {\scriptsize{$t_2$}};
+\node[srcnode] (tgt03) at ([xshift=0.5*1.1cm]tgt02.south west) {\scriptsize{$t_3$}};
+\node[srcnode] (tgt04) at ([xshift=0.5*1.1cm]tgt03.south west) {\scriptsize{$t_4$}};
+\node[srcnode] (tgt05) at ([xshift=0.5*1.1cm]tgt04.south west) {\scriptsize{$N$}};
+
+% target
+\node[tgtnode] (src01) at (-6.0*0.5*1.1cm,-1.05*1.1cm+3.3*0.5*1.1cm) {\scriptsize{$s_1$}};
+\node[tgtnode] (src02) at ([yshift=-0.5*1.1cm]src01.north east) {\scriptsize{$s_2$}};
+\node[tgtnode] (src03) at ([yshift=-0.5*1.1cm]src02.north east) {\scriptsize{$s_3$}};
+\node[tgtnode] (src04) at ([yshift=-0.5*1.1cm]src03.north east) {\scriptsize{$s_4$}};
+
+% word alignment
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la1) at (a03) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la2) at (a12) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la3) at (a22) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la4) at (a41) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la5) at (a30) {};
+
+\node[anchor=west] (f1) at ([xshift=3em,yshift=0.8em]a43.east) {\footnotesize{$\textrm{P}_{\textrm{lex}}(\bar{t}|\bar{s})=w(t_1|s_1)\times$}};
+\node[anchor=north] (f2) at ([xshift=5.2em]f1.south) {\footnotesize{$\frac{1}{2}(w(t_2|s_2)+w(t_4|s_2))\times$}};
+\node[anchor=north west] (f3) at (f2.south west) {\footnotesize{$w(N|s_3)\times$}};
+\node[anchor=north west] (f4) at (f3.south west) {\footnotesize{$w(t_4|s_4)\times$}};
+
+\end{scope}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/example-of-zh2en-translation-base-phrase.tex
+++ b/Book/Chapter4/Figures/example-of-zh2en-translation-base-phrase.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 什么是短语
+\begin{center}
+\begin{tikzpicture}
+
+\begin{scope}[minimum height = 18pt]
+{\scriptsize
+
+\node[anchor=north,fill=green!20] (s1) at (0,0) {进口};
+\node [anchor=north,fill=red!20] (s2) at ([xshift=4em,yshift=0em]s1.north) {大幅度};
+\node[anchor=north,fill=blue!20] (s3) at ([xshift=4.5em,yshift=0em]s2.north) {下降 了};
+
+\node[anchor=west,fill=green!20] (t1) at ([xshift=0em,yshift=-4em]s1.west) {the imports have};
+\node[anchor=north,fill=red!20] (t2) at ([xshift=8em,yshift=0em]t1.north) {drastically};
+\node[anchor=north,fill=blue!20] (t3) at ([xshift=5.7em,yshift=0em]t2.north) {fallen};
+
+\path[<->, thick] (s1.south) edge (t1.north);
+\path[<->, thick] (s2.south) edge (t2.north);
+\path[<->, thick] (s3.south) edge (t3.north);
+}
+
+\node[anchor=south] (s0) at ([xshift=-3em,yshift=0em]s1.south) {源语:};
+\node[anchor=east] (t0) at ([xshift=0em,yshift=-2.85em]s0.east) {目标语:};
+\end{scope}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/examples-of-translation-with-complex-ordering.tex
+++ b/Book/Chapter4/Figures/examples-of-translation-with-complex-ordering.tex
+%------------------------------------------------------------------------------------------------------------
+%%% 在MT中使用句法带来的好处
+
+\begin{tikzpicture}
+
+\begin{scope}[yshift=-0in]
+
+{\scriptsize
+
+\node[anchor=west] (ref) at (0,0) {{\sffamily\bfseries{参考答案:}} The Chinese star performance troupe presented a wonderful Peking opera as well as singing and dancing };
+
+\node[anchor=north west] (ref2) at (ref.south west) {{\color{white} \sffamily\bfseries{Reference:}} performance to Hong Kong audience .};
+
+\node[anchor=north west] (hifst) at (ref2.south west) {{\sffamily\bfseries{层次短语系统:}} Star troupe of China, highlights of Peking opera and dance show to the audience of Hong Kong .};
+
+\node[anchor=north west] (synhifst) at (hifst.south west) {{\sffamily\bfseries{句法系统:}} Chinese star troupe};
+
+\node[anchor=west, fill=green!20!white, inner sep=0.25em] (synhifstpart2) at (synhifst.east) {presented};
+
+\node[anchor=west, fill=blue!20!white, inner sep=0.25em] (synhifstpart3) at ([xshift=0.2em]synhifstpart2.east) {a wonderful Peking opera singing and dancing};
+
+\node[anchor=west, fill=red!20!white, inner sep=0.40em] (synhifstpart4) at ([xshift=0.2em]synhifstpart3.east) {to};
+
+\node[anchor=west, fill=purple!20!white, inner sep=0.25em] (synhifstpart5) at ([xshift=0.2em]synhifstpart4.east) {Hong Kong audience};
+
+\node[anchor=west] (synhifstpart6) at (synhifstpart5.east) {.};
+
+\node[anchor=north west] (input) at ([yshift=-6.5em]synhifst.south west) {\sffamily\bfseries{源语句法树:}};
+
+\begin{scope}[scale = 0.9, grow'=up, sibling distance=5pt, level distance=23pt, xshift=3.49in, yshift=-2.8in]
+
+\Tree[.\node(tn1){IP};
+        [.\node(tn2){NP}; \edge[roof]; \node[](seg1){中国$_1$ 明星$_2$ 艺术团$_3$}; ]
+        [.\node(tn3){VP};
+            [.\node(tn4){BA}; \node[fill=red!20!white](seg2){将$_4$}; ]
+            [.\node(tn5){NP}; \edge[roof]; \node[fill=blue!20!white](seg3){一$_5$ 台$_6$ 精彩$_7$ 的$_8$ 京剧$_9$ 歌舞$_{10}$}; ]
+            [.\node(tn6){VP};
+                [.\node(tn7){VV}; \node[fill=green!20!white](seg4){呈现$_{11}$}; ]
+            ]
+            [.\node(tn8){PP};
+                [.\node(tn9){P}; \node[fill=red!20!white](seg5){给$_{12}$}; ]
+                [.\node(tn10){NP}; \edge[roof]; \node[fill=purple!20!white](seg6){香港$_{13}$ 观众$_{14}$}; ]
+            ]
+        ]
+        [.\node(tn11){.}; ]
+     ]
+
+\end{scope}
+
+\path [draw,thick,->,dashed] (seg2.north) .. controls +(north:1.0) and +(south:1.5) ..  (synhifstpart4.south);
+\path [draw,thick,->,dashed] (seg3.north) --  (synhifstpart3.south);
+\path [draw,thick,->,dashed] (seg4.north) --  (synhifstpart2.south);
+\path [draw,thick,->,dashed] (seg5.north) .. controls +(north:0.5) ..  (synhifstpart4.south);
+\path [draw,thick,->,dashed] (seg6.north) --  (synhifstpart5.south);
+
+}
+\end{scope}
+\end{tikzpicture}
\ No newline at end of file
--- a/Book/Chapter4/Figures/execution-of-cube-pruning-1.tex
+++ b/Book/Chapter4/Figures/execution-of-cube-pruning-1.tex
+%------------------------------------------------------------------------------------------------------------
+%%%  立方剪枝
+\vspace{0.8em}
+\begin{tikzpicture}
+\tikzstyle{alignmentnode} = [rectangle,fill=blue!30,minimum size=0.4em,text=white,inner sep=0.1pt]
+\tikzstyle{selectnode} = [rectangle,fill=green!20,minimum height=1.5em,minimum width=1.5em,inner sep=1.2pt]
+\tikzstyle{srcnode} = [rotate=45,anchor=south west]
+\begin{scope}[scale=0.85]
+\node [anchor=west] (s1) at (0,0) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{from}\ \textrm{X}_1>$}};
+\node [anchor=east] (s2) at ([yshift=-2em]s1.east) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{since}\ \textrm{X}_1>$}};
+\node [anchor=east] (s3) at ([yshift=-2em]s2.east) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{from the}\ \textrm{X}_1>$}};
+\node [anchor=east] (s4) at ([yshift=-2em]s3.east) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{through}\ \textrm{X}_1>$}};
+
+\node [anchor=center,alignmentnode] (alig1) at ([xshift=-3.5em,yshift=8em]s1.north) {};
+\node [anchor=center,alignmentnode] (alig11) at ([xshift=2.2em]alig1.center) {};
+\node [anchor=center,alignmentnode] (alig12) at ([xshift=2.2em]alig11.center) {};
+\node [anchor=center,alignmentnode] (alig13) at ([xshift=2.2em]alig12.center) {};
+
+\node [anchor=center,alignmentnode] (alig2) at ([yshift=-2.2em]alig1.center) {};
+\node [anchor=center,alignmentnode] (alig21) at ([xshift=2.2em]alig2.center) {};
+\node [anchor=center,alignmentnode] (alig22) at ([xshift=2.2em]alig21.center) {};
+\node [anchor=center,alignmentnode] (alig23) at ([xshift=2.2em]alig22.center) {};
+
+\node [anchor=center,alignmentnode] (alig3) at ([yshift=-2.2em]alig2.center) {};
+\node [anchor=center,alignmentnode] (alig31) at ([xshift=2.2em]alig3.center) {};
+\node [anchor=center,alignmentnode] (alig32) at ([xshift=2.2em]alig31.center) {};
+\node [anchor=center,alignmentnode] (alig33) at ([xshift=2.2em]alig32.center) {};
+
+\node [anchor=center,alignmentnode] (alig4) at ([yshift=-2.2em]alig3.center) {};
+\node [anchor=center,alignmentnode] (alig41) at ([xshift=2.2em]alig4.center) {};
+\node [anchor=center,alignmentnode] (alig42) at ([xshift=2.2em]alig41.center) {};
+\node [anchor=center,alignmentnode] (alig43) at ([xshift=2.2em]alig42.center) {};
+
+\node[srcnode] (c1) at ([yshift=1em]alig1.north) {\footnotesize{plan}};
+\node[srcnode] (c2) at ([yshift=1em]alig11.north) {\footnotesize{scheme}};
+\node[srcnode] (c3) at ([yshift=1em]alig12.north) {\footnotesize{project}};
+\node[srcnode] (c4) at ([yshift=1em]alig13.north) {\footnotesize{times}};
+
+{
+\node [anchor=center,selectnode] (c1) at (alig1.center) {\footnotesize{2.1}};
+}
+
+{
+\node [anchor=center,selectnode,fill=red!20] (c2) at (alig11.center) {\footnotesize{5.1}};
+\node [anchor=center,selectnode,fill=red!20] (c3) at (alig2.center) {\footnotesize{5.5}};
+}
+
+{
+\node [anchor=center,selectnode] (c2) at (alig11.center) {\footnotesize{5.1}};
+\node [anchor=center,selectnode] (c3) at (alig2.center) {\footnotesize{5.5}};
+\node [anchor=center,selectnode,fill=red!20] (c5) at (alig21.center) {\footnotesize{8.5}};
+\node [anchor=center,selectnode,fill=red!20] (c6) at (alig3.center) {\footnotesize{7.7}};
+}
+
+{
+\node [anchor=center,selectnode] (c5) at (alig21.center) {\footnotesize{8.5}};
+\node [anchor=center,selectnode] (c6) at (alig3.center) {\footnotesize{7.7}};
+\node [anchor=center,selectnode,fill=red!20] (c7) at (alig22.center) {\footnotesize{4.2}};
+\node [anchor=center,selectnode,fill=red!20] (c8) at (alig31.center) {\footnotesize{8.2}};
+}
+
+\draw [->,thick] ([xshift=-1.0em,yshift=1.0em]alig1.north west)--([xshift=-1.0em,yshift=-0.7em]alig4.south west);
+\draw [->,thick] ([xshift=-1.0em,yshift=1.0em]alig1.north west)--([xshift=0.8em,yshift=1.0em]alig13.north east);
+\end{scope}
+\end{tikzpicture}
+
--- a/Book/Chapter4/Figures/execution-of-cube-pruning-2.tex
+++ b/Book/Chapter4/Figures/execution-of-cube-pruning-2.tex
+%------------------------------------------------------------------------------------------------------------
+%%%  立方剪枝
+\vspace{0.8em}
+\begin{tikzpicture}
+\tikzstyle{alignmentnode} = [rectangle,fill=blue!30,minimum size=0.4em,text=white,inner sep=0.1pt]
+\tikzstyle{selectnode} = [rectangle,fill=green!20,minimum height=1.5em,minimum width=1.5em,inner sep=1.2pt]
+\tikzstyle{srcnode} = [rotate=45,anchor=south west]
+\begin{scope}[scale=0.85]
+\node [anchor=west] (s1) at (0,0) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{from}\ \textrm{X}_1>$}};
+\node [anchor=east] (s2) at ([yshift=-2em]s1.east) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{since}\ \textrm{X}_1>$}};
+\node [anchor=east] (s3) at ([yshift=-2em]s2.east) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{from the}\ \textrm{X}_1>$}};
+\node [anchor=east] (s4) at ([yshift=-2em]s3.east) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{through}\ \textrm{X}_1>$}};
+
+\node [anchor=center,alignmentnode] (alig1) at ([xshift=-3.5em,yshift=8em]s1.north) {};
+\node [anchor=center,alignmentnode] (alig11) at ([xshift=2.2em]alig1.center) {};
+\node [anchor=center,alignmentnode] (alig12) at ([xshift=2.2em]alig11.center) {};
+\node [anchor=center,alignmentnode] (alig13) at ([xshift=2.2em]alig12.center) {};
+
+\node [anchor=center,alignmentnode] (alig2) at ([yshift=-2.2em]alig1.center) {};
+\node [anchor=center,alignmentnode] (alig21) at ([xshift=2.2em]alig2.center) {};
+\node [anchor=center,alignmentnode] (alig22) at ([xshift=2.2em]alig21.center) {};
+\node [anchor=center,alignmentnode] (alig23) at ([xshift=2.2em]alig22.center) {};
+
+\node [anchor=center,alignmentnode] (alig3) at ([yshift=-2.2em]alig2.center) {};
+\node [anchor=center,alignmentnode] (alig31) at ([xshift=2.2em]alig3.center) {};
+\node [anchor=center,alignmentnode] (alig32) at ([xshift=2.2em]alig31.center) {};
+\node [anchor=center,alignmentnode] (alig33) at ([xshift=2.2em]alig32.center) {};
+
+\node [anchor=center,alignmentnode] (alig4) at ([yshift=-2.2em]alig3.center) {};
+\node [anchor=center,alignmentnode] (alig41) at ([xshift=2.2em]alig4.center) {};
+\node [anchor=center,alignmentnode] (alig42) at ([xshift=2.2em]alig41.center) {};
+\node [anchor=center,alignmentnode] (alig43) at ([xshift=2.2em]alig42.center) {};
+
+\node[srcnode] (c1) at ([yshift=1em]alig1.north) {\footnotesize{plan}};
+\node[srcnode] (c2) at ([yshift=1em]alig11.north) {\footnotesize{scheme}};
+\node[srcnode] (c3) at ([yshift=1em]alig12.north) {\footnotesize{project}};
+\node[srcnode] (c4) at ([yshift=1em]alig13.north) {\footnotesize{times}};
+
+{
+\node [anchor=center,selectnode] (c1) at (alig1.center) {\footnotesize{2.1}};
+}
+
+{
+\node [anchor=center,selectnode,fill=red!20] (c2) at (alig11.center) {\footnotesize{5.1}};
+\node [anchor=center,selectnode,fill=red!20] (c3) at (alig2.center) {\footnotesize{5.5}};
+}
+
+{
+\node [anchor=center,selectnode] (c2) at (alig11.center) {\footnotesize{5.1}};
+\node [anchor=center,selectnode] (c3) at (alig2.center) {\footnotesize{5.5}};
+\node [anchor=center,selectnode,fill=red!20] (c5) at (alig21.center) {\footnotesize{8.5}};
+\node [anchor=center,selectnode,fill=red!20] (c6) at (alig3.center) {\footnotesize{7.7}};
+}
+
+{
+\node [anchor=center,selectnode] (c5) at (alig21.center) {\footnotesize{8.5}};
+\node [anchor=center,selectnode] (c6) at (alig3.center) {\footnotesize{7.7}};
+\node [anchor=center,selectnode,fill=red!20] (c7) at (alig22.center) {\footnotesize{4.2}};
+\node [anchor=center,selectnode,fill=red!20] (c8) at (alig31.center) {\footnotesize{8.2}};
+}
+
+\draw [->,thick] ([xshift=-1.0em,yshift=1.0em]alig1.north west)--([xshift=-1.0em,yshift=-0.7em]alig4.south west);
+\draw [->,thick] ([xshift=-1.0em,yshift=1.0em]alig1.north west)--([xshift=0.8em,yshift=1.0em]alig13.north east);
+\end{scope}
+\end{tikzpicture}
+
--- a/Book/Chapter4/Figures/execution-of-cube-pruning-3.tex
+++ b/Book/Chapter4/Figures/execution-of-cube-pruning-3.tex
+%------------------------------------------------------------------------------------------------------------
+%%%  立方剪枝
+\vspace{0.8em}
+\begin{tikzpicture}
+\tikzstyle{alignmentnode} = [rectangle,fill=blue!30,minimum size=0.4em,text=white,inner sep=0.1pt]
+\tikzstyle{selectnode} = [rectangle,fill=green!20,minimum height=1.5em,minimum width=1.5em,inner sep=1.2pt]
+\tikzstyle{srcnode} = [rotate=45,anchor=south west]
+\begin{scope}[scale=0.85]
+\node [anchor=west] (s1) at (0,0) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{from}\ \textrm{X}_1>$}};
+\node [anchor=east] (s2) at ([yshift=-2em]s1.east) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{since}\ \textrm{X}_1>$}};
+\node [anchor=east] (s3) at ([yshift=-2em]s2.east) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{from the}\ \textrm{X}_1>$}};
+\node [anchor=east] (s4) at ([yshift=-2em]s3.east) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{through}\ \textrm{X}_1>$}};
+
+\node [anchor=center,alignmentnode] (alig1) at ([xshift=-3.5em,yshift=8em]s1.north) {};
+\node [anchor=center,alignmentnode] (alig11) at ([xshift=2.2em]alig1.center) {};
+\node [anchor=center,alignmentnode] (alig12) at ([xshift=2.2em]alig11.center) {};
+\node [anchor=center,alignmentnode] (alig13) at ([xshift=2.2em]alig12.center) {};
+
+\node [anchor=center,alignmentnode] (alig2) at ([yshift=-2.2em]alig1.center) {};
+\node [anchor=center,alignmentnode] (alig21) at ([xshift=2.2em]alig2.center) {};
+\node [anchor=center,alignmentnode] (alig22) at ([xshift=2.2em]alig21.center) {};
+\node [anchor=center,alignmentnode] (alig23) at ([xshift=2.2em]alig22.center) {};
+
+\node [anchor=center,alignmentnode] (alig3) at ([yshift=-2.2em]alig2.center) {};
+\node [anchor=center,alignmentnode] (alig31) at ([xshift=2.2em]alig3.center) {};
+\node [anchor=center,alignmentnode] (alig32) at ([xshift=2.2em]alig31.center) {};
+\node [anchor=center,alignmentnode] (alig33) at ([xshift=2.2em]alig32.center) {};
+
+\node [anchor=center,alignmentnode] (alig4) at ([yshift=-2.2em]alig3.center) {};
+\node [anchor=center,alignmentnode] (alig41) at ([xshift=2.2em]alig4.center) {};
+\node [anchor=center,alignmentnode] (alig42) at ([xshift=2.2em]alig41.center) {};
+\node [anchor=center,alignmentnode] (alig43) at ([xshift=2.2em]alig42.center) {};
+
+\node[srcnode] (c1) at ([yshift=1em]alig1.north) {\footnotesize{plan}};
+\node[srcnode] (c2) at ([yshift=1em]alig11.north) {\footnotesize{scheme}};
+\node[srcnode] (c3) at ([yshift=1em]alig12.north) {\footnotesize{project}};
+\node[srcnode] (c4) at ([yshift=1em]alig13.north) {\footnotesize{times}};
+
+{
+\node [anchor=center,selectnode] (c1) at (alig1.center) {\footnotesize{2.1}};
+}
+
+{
+\node [anchor=center,selectnode,fill=red!20] (c2) at (alig11.center) {\footnotesize{5.1}};
+\node [anchor=center,selectnode,fill=red!20] (c3) at (alig2.center) {\footnotesize{5.5}};
+}
+
+{
+\node [anchor=center,selectnode] (c2) at (alig11.center) {\footnotesize{5.1}};
+\node [anchor=center,selectnode] (c3) at (alig2.center) {\footnotesize{5.5}};
+\node [anchor=center,selectnode,fill=red!20] (c5) at (alig21.center) {\footnotesize{8.5}};
+\node [anchor=center,selectnode,fill=red!20] (c6) at (alig3.center) {\footnotesize{7.7}};
+}
+
+{
+\node [anchor=center,selectnode] (c5) at (alig21.center) {\footnotesize{8.5}};
+\node [anchor=center,selectnode] (c6) at (alig3.center) {\footnotesize{7.7}};
+\node [anchor=center,selectnode,fill=red!20] (c7) at (alig22.center) {\footnotesize{4.2}};
+\node [anchor=center,selectnode,fill=red!20] (c8) at (alig31.center) {\footnotesize{8.2}};
+}
+
+\draw [->,thick] ([xshift=-1.0em,yshift=1.0em]alig1.north west)--([xshift=-1.0em,yshift=-0.7em]alig4.south west);
+\draw [->,thick] ([xshift=-1.0em,yshift=1.0em]alig1.north west)--([xshift=0.8em,yshift=1.0em]alig13.north east);
+\end{scope}
+\end{tikzpicture}
+
--- a/Book/Chapter4/Figures/execution-of-cube-pruning-4.tex
+++ b/Book/Chapter4/Figures/execution-of-cube-pruning-4.tex
+%------------------------------------------------------------------------------------------------------------
+%%%  立方剪枝
+\vspace{0.8em}
+\begin{tikzpicture}
+\tikzstyle{alignmentnode} = [rectangle,fill=blue!30,minimum size=0.4em,text=white,inner sep=0.1pt]
+\tikzstyle{selectnode} = [rectangle,fill=green!20,minimum height=1.5em,minimum width=1.5em,inner sep=1.2pt]
+\tikzstyle{srcnode} = [rotate=45,anchor=south west]
+\begin{scope}[scale=0.85]
+\node [anchor=west] (s1) at (0,0) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{from}\ \textrm{X}_1>$}};
+\node [anchor=east] (s2) at ([yshift=-2em]s1.east) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{since}\ \textrm{X}_1>$}};
+\node [anchor=east] (s3) at ([yshift=-2em]s2.east) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{from the}\ \textrm{X}_1>$}};
+\node [anchor=east] (s4) at ([yshift=-2em]s3.east) {\footnotesize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{through}\ \textrm{X}_1>$}};
+
+\node [anchor=center,alignmentnode] (alig1) at ([xshift=-3.5em,yshift=8em]s1.north) {};
+\node [anchor=center,alignmentnode] (alig11) at ([xshift=2.2em]alig1.center) {};
+\node [anchor=center,alignmentnode] (alig12) at ([xshift=2.2em]alig11.center) {};
+\node [anchor=center,alignmentnode] (alig13) at ([xshift=2.2em]alig12.center) {};
+
+\node [anchor=center,alignmentnode] (alig2) at ([yshift=-2.2em]alig1.center) {};
+\node [anchor=center,alignmentnode] (alig21) at ([xshift=2.2em]alig2.center) {};
+\node [anchor=center,alignmentnode] (alig22) at ([xshift=2.2em]alig21.center) {};
+\node [anchor=center,alignmentnode] (alig23) at ([xshift=2.2em]alig22.center) {};
+
+\node [anchor=center,alignmentnode] (alig3) at ([yshift=-2.2em]alig2.center) {};
+\node [anchor=center,alignmentnode] (alig31) at ([xshift=2.2em]alig3.center) {};
+\node [anchor=center,alignmentnode] (alig32) at ([xshift=2.2em]alig31.center) {};
+\node [anchor=center,alignmentnode] (alig33) at ([xshift=2.2em]alig32.center) {};
+
+\node [anchor=center,alignmentnode] (alig4) at ([yshift=-2.2em]alig3.center) {};
+\node [anchor=center,alignmentnode] (alig41) at ([xshift=2.2em]alig4.center) {};
+\node [anchor=center,alignmentnode] (alig42) at ([xshift=2.2em]alig41.center) {};
+\node [anchor=center,alignmentnode] (alig43) at ([xshift=2.2em]alig42.center) {};
+
+\node[srcnode] (c1) at ([yshift=1em]alig1.north) {\footnotesize{plan}};
+\node[srcnode] (c2) at ([yshift=1em]alig11.north) {\footnotesize{scheme}};
+\node[srcnode] (c3) at ([yshift=1em]alig12.north) {\footnotesize{project}};
+\node[srcnode] (c4) at ([yshift=1em]alig13.north) {\footnotesize{times}};
+
+{
+\node [anchor=center,selectnode] (c1) at (alig1.center) {\footnotesize{2.1}};
+}
+
+{
+\node [anchor=center,selectnode,fill=red!20] (c2) at (alig11.center) {\footnotesize{5.1}};
+\node [anchor=center,selectnode,fill=red!20] (c3) at (alig2.center) {\footnotesize{5.5}};
+}
+
+{
+\node [anchor=center,selectnode] (c2) at (alig11.center) {\footnotesize{5.1}};
+\node [anchor=center,selectnode] (c3) at (alig2.center) {\footnotesize{5.5}};
+\node [anchor=center,selectnode,fill=red!20] (c5) at (alig21.center) {\footnotesize{8.5}};
+\node [anchor=center,selectnode,fill=red!20] (c6) at (alig3.center) {\footnotesize{7.7}};
+}
+
+{
+\node [anchor=center,selectnode] (c5) at (alig21.center) {\footnotesize{8.5}};
+\node [anchor=center,selectnode] (c6) at (alig3.center) {\footnotesize{7.7}};
+\node [anchor=center,selectnode,fill=red!20] (c7) at (alig22.center) {\footnotesize{4.2}};
+\node [anchor=center,selectnode,fill=red!20] (c8) at (alig31.center) {\footnotesize{8.2}};
+}
+
+\draw [->,thick] ([xshift=-1.0em,yshift=1.0em]alig1.north west)--([xshift=-1.0em,yshift=-0.7em]alig4.south west);
+\draw [->,thick] ([xshift=-1.0em,yshift=1.0em]alig1.north west)--([xshift=0.8em,yshift=1.0em]alig13.north east);
+\end{scope}
+\end{tikzpicture}
+
--- a/Book/Chapter4/Figures/execution-of-cube-pruning.tex
+++ b/Book/Chapter4/Figures/execution-of-cube-pruning.tex
+\begin{tikzpicture}
+\tikzstyle{alignmentnode} = [rectangle,fill=blue!30,minimum size=0.4em,text=white,inner sep=0.1pt]
+\tikzstyle{selectnode} = [rectangle,fill=green!20,minimum height=1.5em,minimum width=1.5em,inner sep=1.2pt]
+\tikzstyle{srcnode} = [rotate=45,anchor=south west]
+\begin{scope}[scale=0.85]
+\node [anchor=west] (s1) at (0,0) {\scriptsize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{from}\ \textrm{X}_1>$}};
+\node [anchor=east] (s2) at ([yshift=-2em]s1.east) {\scriptsize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{since}\ \textrm{X}_1>$}};
+\node [anchor=east] (s3) at ([yshift=-2em]s2.east) {\scriptsize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{from the}\ \textrm{X}_1>$}};
+\node [anchor=east] (s4) at ([yshift=-2em]s3.east) {\scriptsize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{through}\ \textrm{X}_1>$}};
+
+\node [anchor=center,alignmentnode] (alig1) at ([xshift=12.0em,yshift=0em]s1.west) {};
+\node [anchor=center,alignmentnode] (alig11) at ([xshift=2.2em]alig1.center) {};
+\node [anchor=center,alignmentnode] (alig12) at ([xshift=2.2em]alig11.center) {};
+\node [anchor=center,alignmentnode] (alig13) at ([xshift=2.2em]alig12.center) {};
+
+\node [anchor=center,alignmentnode] (alig2) at ([yshift=-2.0em]alig1.center) {};
+\node [anchor=center,alignmentnode] (alig21) at ([xshift=2.2em]alig2.center) {};
+\node [anchor=center,alignmentnode] (alig22) at ([xshift=2.2em]alig21.center) {};
+\node [anchor=center,alignmentnode] (alig23) at ([xshift=2.2em]alig22.center) {};
+
+\node [anchor=center,alignmentnode] (alig3) at ([yshift=-2.0em]alig2.center) {};
+\node [anchor=center,alignmentnode] (alig31) at ([xshift=2.2em]alig3.center) {};
+\node [anchor=center,alignmentnode] (alig32) at ([xshift=2.2em]alig31.center) {};
+\node [anchor=center,alignmentnode] (alig33) at ([xshift=2.2em]alig32.center) {};
+
+\node [anchor=center,alignmentnode] (alig4) at ([yshift=-2.0em]alig3.center) {};
+\node [anchor=center,alignmentnode] (alig41) at ([xshift=2.2em]alig4.center) {};
+\node [anchor=center,alignmentnode] (alig42) at ([xshift=2.2em]alig41.center) {};
+\node [anchor=center,alignmentnode] (alig43) at ([xshift=2.2em]alig42.center) {};
+
+\node[srcnode] (c1) at ([yshift=1em]alig1.north) {\footnotesize{plan}};
+\node[srcnode] (c2) at ([yshift=1em]alig11.north) {\footnotesize{scheme}};
+\node[srcnode] (c3) at ([yshift=1em]alig12.north) {\footnotesize{project}};
+\node[srcnode] (c4) at ([yshift=1em]alig13.north) {\footnotesize{times}};
+
+{
+\node [anchor=center,selectnode] (c1) at (alig1.center) {\footnotesize{2.1}};
+}
+
+\draw [->,thick] ([xshift=-1.0em,yshift=1.0em]alig1.north west)--([xshift=-1.0em,yshift=-0.7em]alig4.south west);
+\draw [->,thick] ([xshift=-1.0em,yshift=1.0em]alig1.north west)--([xshift=0.8em,yshift=1.0em]alig13.north east);
+\end{scope}
+
+%图2
+\begin{scope}[xshift=18.0em,scale=0.85]
+\node [anchor=west] (s1) at (0,0) {\scriptsize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{from}\ \textrm{X}_1>$}};
+\node [anchor=east] (s2) at ([yshift=-2em]s1.east) {\scriptsize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{since}\ \textrm{X}_1>$}};
+\node [anchor=east] (s3) at ([yshift=-2em]s2.east) {\scriptsize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{from the}\ \textrm{X}_1>$}};
+\node [anchor=east] (s4) at ([yshift=-2em]s3.east) {\scriptsize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{through}\ \textrm{X}_1>$}};
+
+\node [anchor=center,alignmentnode] (alig1) at ([xshift=12.0em,yshift=0em]s1.west) {};
+\node [anchor=center,alignmentnode] (alig11) at ([xshift=2.2em]alig1.center) {};
+\node [anchor=center,alignmentnode] (alig12) at ([xshift=2.2em]alig11.center) {};
+\node [anchor=center,alignmentnode] (alig13) at ([xshift=2.2em]alig12.center) {};
+
+\node [anchor=center,alignmentnode] (alig2) at ([yshift=-2.0em]alig1.center) {};
+\node [anchor=center,alignmentnode] (alig21) at ([xshift=2.2em]alig2.center) {};
+\node [anchor=center,alignmentnode] (alig22) at ([xshift=2.2em]alig21.center) {};
+\node [anchor=center,alignmentnode] (alig23) at ([xshift=2.2em]alig22.center) {};
+
+\node [anchor=center,alignmentnode] (alig3) at ([yshift=-2.0em]alig2.center) {};
+\node [anchor=center,alignmentnode] (alig31) at ([xshift=2.2em]alig3.center) {};
+\node [anchor=center,alignmentnode] (alig32) at ([xshift=2.2em]alig31.center) {};
+\node [anchor=center,alignmentnode] (alig33) at ([xshift=2.2em]alig32.center) {};
+
+\node [anchor=center,alignmentnode] (alig4) at ([yshift=-2.0em]alig3.center) {};
+\node [anchor=center,alignmentnode] (alig41) at ([xshift=2.2em]alig4.center) {};
+\node [anchor=center,alignmentnode] (alig42) at ([xshift=2.2em]alig41.center) {};
+\node [anchor=center,alignmentnode] (alig43) at ([xshift=2.2em]alig42.center) {};
+
+\node[srcnode] (c1) at ([yshift=1em]alig1.north) {\footnotesize{plan}};
+\node[srcnode] (c2) at ([yshift=1em]alig11.north) {\footnotesize{scheme}};
+\node[srcnode] (c3) at ([yshift=1em]alig12.north) {\footnotesize{project}};
+\node[srcnode] (c4) at ([yshift=1em]alig13.north) {\footnotesize{times}};
+
+{
+\node [anchor=center,selectnode] (c1) at (alig1.center) {\footnotesize{2.1}};
+}
+
+{
+\node [anchor=center,selectnode,fill=red!20] (c2) at (alig11.center) {\footnotesize{5.1}};
+\node [anchor=center,selectnode,fill=red!20] (c3) at (alig2.center) {\footnotesize{5.5}};
+}
+
+\draw [->,thick] ([xshift=-1.0em,yshift=1.0em]alig1.north west)--([xshift=-1.0em,yshift=-0.7em]alig4.south west);
+\draw [->,thick] ([xshift=-1.0em,yshift=1.0em]alig1.north west)--([xshift=0.8em,yshift=1.0em]alig13.north east);
+\end{scope}
+
+%图3
+\begin{scope}[yshift=-12.0em,scale=0.85]
+\node [anchor=west] (s1) at (0,0) {\scriptsize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{from}\ \textrm{X}_1>$}};
+\node [anchor=east] (s2) at ([yshift=-2em]s1.east) {\scriptsize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{since}\ \textrm{X}_1>$}};
+\node [anchor=east] (s3) at ([yshift=-2em]s2.east) {\scriptsize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{from the}\ \textrm{X}_1>$}};
+\node [anchor=east] (s4) at ([yshift=-2em]s3.east) {\scriptsize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{through}\ \textrm{X}_1>$}};
+
+\node [anchor=center,alignmentnode] (alig1) at ([xshift=12.0em,yshift=0em]s1.west) {};
+\node [anchor=center,alignmentnode] (alig11) at ([xshift=2.2em]alig1.center) {};
+\node [anchor=center,alignmentnode] (alig12) at ([xshift=2.2em]alig11.center) {};
+\node [anchor=center,alignmentnode] (alig13) at ([xshift=2.2em]alig12.center) {};
+
+\node [anchor=center,alignmentnode] (alig2) at ([yshift=-2.0em]alig1.center) {};
+\node [anchor=center,alignmentnode] (alig21) at ([xshift=2.2em]alig2.center) {};
+\node [anchor=center,alignmentnode] (alig22) at ([xshift=2.2em]alig21.center) {};
+\node [anchor=center,alignmentnode] (alig23) at ([xshift=2.2em]alig22.center) {};
+
+\node [anchor=center,alignmentnode] (alig3) at ([yshift=-2.0em]alig2.center) {};
+\node [anchor=center,alignmentnode] (alig31) at ([xshift=2.2em]alig3.center) {};
+\node [anchor=center,alignmentnode] (alig32) at ([xshift=2.2em]alig31.center) {};
+\node [anchor=center,alignmentnode] (alig33) at ([xshift=2.2em]alig32.center) {};
+
+\node [anchor=center,alignmentnode] (alig4) at ([yshift=-2.0em]alig3.center) {};
+\node [anchor=center,alignmentnode] (alig41) at ([xshift=2.2em]alig4.center) {};
+\node [anchor=center,alignmentnode] (alig42) at ([xshift=2.2em]alig41.center) {};
+\node [anchor=center,alignmentnode] (alig43) at ([xshift=2.2em]alig42.center) {};
+
+\node[srcnode] (c1) at ([yshift=1em]alig1.north) {\footnotesize{plan}};
+\node[srcnode] (c2) at ([yshift=1em]alig11.north) {\footnotesize{scheme}};
+\node[srcnode] (c3) at ([yshift=1em]alig12.north) {\footnotesize{project}};
+\node[srcnode] (c4) at ([yshift=1em]alig13.north) {\footnotesize{times}};
+
+{
+\node [anchor=center,selectnode] (c1) at (alig1.center) {\footnotesize{2.1}};
+}
+
+{
+\node [anchor=center,selectnode,fill=red!20] (c2) at (alig11.center) {\footnotesize{5.1}};
+\node [anchor=center,selectnode,fill=red!20] (c3) at (alig2.center) {\footnotesize{5.5}};
+}
+
+{
+\node [anchor=center,selectnode] (c2) at (alig11.center) {\footnotesize{5.1}};
+\node [anchor=center,selectnode] (c3) at (alig2.center) {\footnotesize{5.5}};
+\node [anchor=center,selectnode,fill=red!20] (c5) at (alig21.center) {\footnotesize{8.5}};
+\node [anchor=center,selectnode,fill=red!20] (c6) at (alig3.center) {\footnotesize{7.7}};
+}
+
+
+\draw [->,thick] ([xshift=-1.0em,yshift=1.0em]alig1.north west)--([xshift=-1.0em,yshift=-0.7em]alig4.south west);
+\draw [->,thick] ([xshift=-1.0em,yshift=1.0em]alig1.north west)--([xshift=0.8em,yshift=1.0em]alig13.north east);
+\end{scope}
+
+
+%图4
+\begin{scope}[xshift=18.0em,yshift=-12.0em,scale=0.85]
+\node [anchor=west] (s1) at (0,0) {\scriptsize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{from}\ \textrm{X}_1>$}};
+\node [anchor=east] (s2) at ([yshift=-2em]s1.east) {\scriptsize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{since}\ \textrm{X}_1>$}};
+\node [anchor=east] (s3) at ([yshift=-2em]s2.east) {\scriptsize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{from the}\ \textrm{X}_1>$}};
+\node [anchor=east] (s4) at ([yshift=-2em]s3.east) {\scriptsize{$\textrm{X} \to <\textrm{从}\ \textrm{X}_1,\ \textrm{through}\ \textrm{X}_1>$}};
+
+\node [anchor=center,alignmentnode] (alig1) at ([xshift=12.0em,yshift=0em]s1.west) {};
+\node [anchor=center,alignmentnode] (alig11) at ([xshift=2.2em]alig1.center) {};
+\node [anchor=center,alignmentnode] (alig12) at ([xshift=2.2em]alig11.center) {};
+\node [anchor=center,alignmentnode] (alig13) at ([xshift=2.2em]alig12.center) {};
+
+\node [anchor=center,alignmentnode] (alig2) at ([yshift=-2.0em]alig1.center) {};
+\node [anchor=center,alignmentnode] (alig21) at ([xshift=2.2em]alig2.center) {};
+\node [anchor=center,alignmentnode] (alig22) at ([xshift=2.2em]alig21.center) {};
+\node [anchor=center,alignmentnode] (alig23) at ([xshift=2.2em]alig22.center) {};
+
+\node [anchor=center,alignmentnode] (alig3) at ([yshift=-2.0em]alig2.center) {};
+\node [anchor=center,alignmentnode] (alig31) at ([xshift=2.2em]alig3.center) {};
+\node [anchor=center,alignmentnode] (alig32) at ([xshift=2.2em]alig31.center) {};
+\node [anchor=center,alignmentnode] (alig33) at ([xshift=2.2em]alig32.center) {};
+
+\node [anchor=center,alignmentnode] (alig4) at ([yshift=-2.0em]alig3.center) {};
+\node [anchor=center,alignmentnode] (alig41) at ([xshift=2.2em]alig4.center) {};
+\node [anchor=center,alignmentnode] (alig42) at ([xshift=2.2em]alig41.center) {};
+\node [anchor=center,alignmentnode] (alig43) at ([xshift=2.2em]alig42.center) {};
+
+\node[srcnode] (c1) at ([yshift=1em]alig1.north) {\footnotesize{plan}};
+\node[srcnode] (c2) at ([yshift=1em]alig11.north) {\footnotesize{scheme}};
+\node[srcnode] (c3) at ([yshift=1em]alig12.north) {\footnotesize{project}};
+\node[srcnode] (c4) at ([yshift=1em]alig13.north) {\footnotesize{times}};
+
+{
+\node [anchor=center,selectnode] (c1) at (alig1.center) {\footnotesize{2.1}};
+}
+
+{
+\node [anchor=center,selectnode,fill=red!20] (c2) at (alig11.center) {\footnotesize{5.1}};
+\node [anchor=center,selectnode,fill=red!20] (c3) at (alig2.center) {\footnotesize{5.5}};
+}
+
+{
+\node [anchor=center,selectnode] (c2) at (alig11.center) {\footnotesize{5.1}};
+\node [anchor=center,selectnode] (c3) at (alig2.center) {\footnotesize{5.5}};
+\node [anchor=center,selectnode,fill=red!20] (c5) at (alig21.center) {\footnotesize{8.5}};
+\node [anchor=center,selectnode,fill=red!20] (c6) at (alig3.center) {\footnotesize{7.7}};
+}
+
+{
+\node [anchor=center,selectnode] (c5) at (alig21.center) {\footnotesize{8.5}};
+\node [anchor=center,selectnode] (c6) at (alig3.center) {\footnotesize{7.7}};
+\node [anchor=center,selectnode,fill=red!20] (c7) at (alig22.center) {\footnotesize{4.2}};
+\node [anchor=center,selectnode,fill=red!20] (c8) at (alig31.center) {\footnotesize{8.2}};
+}
+
+\draw [->,thick] ([xshift=-1.0em,yshift=1.0em]alig1.north west)--([xshift=-1.0em,yshift=-0.7em]alig4.south west);
+\draw [->,thick] ([xshift=-1.0em,yshift=1.0em]alig1.north west)--([xshift=0.8em,yshift=1.0em]alig13.north east);
+\end{scope}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+\end{tikzpicture}
--- a/Book/Chapter4/Figures/extract-hierarchical-phrase-rules.tex
+++ b/Book/Chapter4/Figures/extract-hierarchical-phrase-rules.tex
+%------------------------------------------------------------------------------------------------------------
+%%%  翻译规则抽取
+% 还是David Chiang的论文
+\begin{center}
+\begin{tikzpicture}
+\tikzstyle{elementnode} = [rectangle,text=white,anchor=center]
+\tikzstyle{srcnode} = [rotate=45,font=\small,anchor=south west]
+\tikzstyle{tgtnode} = [left,font=\small,anchor=north east]
+\tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6*1.1cm,minimum width=0.36*1.1cm]
+\tikzstyle{probnode} = [fill=blue!30,minimum width=0.4*1.1cm]
+\tikzstyle{labelnode} = [above]
+
+% alignment matrix
+\begin{scope}[scale=1.0,yshift=0.12in]
+\foreach \i / \j / \c in
+    {0/5/0.15, 1/5/0.15, 2/5/0.15, 3/5/0.15, 4/5/0.15, 5/5/0.15, 6/5/0.15, 7/5/0.15,
+    0/4/0.15, 1/4/0.15, 2/4/0.15, 3/4/0.15, 4/4/0.15, 5/4/0.15, 6/4/0.15, 7/4/0.15,
+    0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 4/3/0.15, 5/3/0.15, 6/3/0.15, 7/3/0.15,
+    0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.15, 5/2/0.15, 6/2/0.15, 7/2/0.15,
+    0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.15, 5/1/0.15, 6/1/0.15, 7/1/0.15,
+    0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.15, 5/0/0.15, 6/0/0.15, 7/0/0.15}
+    \node[elementnode,minimum size=0.6*1.1cm*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*1.1cm*\i-5.4*0.5*1.1cm,0.5*1.1cm*\j-0.05*1.1cm) {};
+
+% source
+\node[srcnode] (src1) at (-5.4*0.5*1.1cm,-1.05*1.1cm+7.5*0.5*1.1cm) {\scriptsize{The}};
+\node[srcnode] (src2) at ([xshift=0.5*1.1cm]src1.south west) {\scriptsize{weather}};
+\node[srcnode] (src3) at ([xshift=0.5*1.1cm]src2.south west) {\scriptsize{is}};
+\node[srcnode] (src4) at ([xshift=0.5*1.1cm]src3.south west) {\scriptsize{very}};
+\node[srcnode] (src5) at ([xshift=0.5*1.1cm]src4.south west) {\scriptsize{good}};
+\node[srcnode] (src6) at ([xshift=0.5*1.1cm]src5.south west) {\scriptsize{today}};
+\node[srcnode] (src7) at ([xshift=0.5*1.1cm]src6.south west) {\scriptsize{.}};
+\node[srcnode] (src8) at ([xshift=0.5*1.1cm]src7.south west) {\scriptsize{EOS}};
+
+% target
+\node[tgtnode] (tgt1) at (-6.0*0.5*1.1cm,-1.05*1.1cm+7.5*0.5*1.1cm) {\scriptsize{今天}};
+\node[tgtnode] (tgt2) at ([yshift=-0.5*1.1cm]tgt1.north east) {\scriptsize{天气}};
+\node[tgtnode] (tgt3) at ([yshift=-0.5*1.1cm]tgt2.north east) {\scriptsize{真}};
+\node[tgtnode] (tgt4) at ([yshift=-0.5*1.1cm]tgt3.north east) {\scriptsize{好}};
+\node[tgtnode] (tgt5) at ([yshift=-0.5*1.1cm]tgt4.north east) {\scriptsize{。}};
+\node[tgtnode] (tgt6) at ([yshift=-0.5*1.1cm]tgt5.north east) {\scriptsize{EOS}};
+
+% word alignment
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l04) at (a04) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l14) at (a14) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l55) at (a55) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l33) at (a33) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l42) at (a42) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l61) at (a61) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l70) at (a70) {};
+
+\begin{pgfonlayer}{background}
+{
+\node [rectangle,draw=red,thick,inner sep=0.6em,fill=white,drop shadow] [fit = (a04) (a14) (a24) (a33) (a42)] (phrase1) {};
+}
+{
+\node [rectangle,draw=ugreen,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (a04) (a14)] (phrase2) {};
+}
+\end{pgfonlayer}
+
+\end{scope}
+
+\begin{scope}[xshift = 1.3in, yshift = 1.8in]
+{\scriptsize
+\node (phrase) {\textbf{抽取得到的短语:}};
+\draw[-] (phrase.south west)--([xshift=1.9in]phrase.south west);
+\node[anchor=north west] (rules) at ([yshift=-7.5em]phrase.south west) {\textbf{抽取得到的规则:}};
+\draw[-] (rules.south west)--([xshift=1.9in]rules.south west);
+
+{
+\node[anchor=north west] (p1) at ([yshift=-0.3em]phrase.south west) {天气真好 -- The weather is very good};
+}
+
+{
+\node[anchor=north west] (r1) at ([yshift=-0.3em]rules.south west) {$\mathrm{X_1}$真好 -- $\mathrm{X_1}$ is very good};
+
+\node[anchor=east] (r2) at ([yshift=-2.65cm]p1.east) {};
+}
+
+\begin{pgfonlayer}{background}
+{
+\node [rectangle,thick,inner sep=0.1em,fill=red!20] [fit = (p1)] (pb1) {};
+}
+
+{
+\node [rectangle,thick,inner sep=0.1em,fill=ugreen!20] [fit = (r1)] (rb1) {};
+\draw [->,thick,dotted] ([xshift=0.1em]phrase1.east) .. controls +(east:2) and +(west:1.5) ..  ([xshift=-0.1em]r1.west);
+}
+
+{
+\node [rectangle,thick,inner sep=0.1em,fill=red!20] [fit = (p1)] (pb2) {};
+\node [rectangle,thick,inner sep=0.1em,fill=ugreen!20] [fit = (r1) (r2)] (rb2) {};
+}
+\end{pgfonlayer}
+
+}
+\end{scope}
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/function-image-about-weight-and-Bleu-1.tex
+++ b/Book/Chapter4/Figures/function-image-about-weight-and-Bleu-1.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 特征权重调优
+\begin{tikzpicture}
+\begin{scope}
+\node[anchor=west] (x0) at (0, 0) {};
+
+\draw[->,thick] (x0.center) -- ([xshift=8.2em]x0.east);
+\draw[->,thick] (x0.center) -- ([yshift=5.6em]x0.center);
+
+{
+\draw[thick] ([yshift=1em]x0.center) -- ([xshift=8em,yshift=5em]x0.center);
+\draw[thick] ([yshift=2em]x0.center) -- ([xshift=8em,yshift=4em]x0.center);
+\node[anchor=north] (e1) at ([xshift=6em,yshift=6em]x0.south) {\footnotesize{$d_1$}};
+\node[anchor=north] (e2) at ([xshift=7em,yshift=4em]x0.south) {\footnotesize{$d_2$}};
+\node[anchor=north,rotate=90] (e2) at ([xshift=-1.3em,yshift=3.6em]x0.south) {\footnotesize{model score}};
+}
+
+{
+\node [anchor=center,draw=red,circle,inner sep=2pt,thick] (x1) at ([xshift=4em,yshift=3em]x0.center) {};
+\draw[thick,dotted] ([xshift=4em]x0.center) -- ([xshift=4em,yshift=3em]x0.center);
+}
+\node[anchor=north] (zero) at ([yshift=0.1em]x0.south) {\footnotesize{0}};
+\node[anchor=north] (wx) at ([xshift=4em,yshift=0.1em]x0.south) {\footnotesize{$w_x$}};
+\node[anchor=north] (wi) at ([xshift=8em,yshift=0.1em]x0.south) {\footnotesize{$w_i$}};
+
+
+\end{scope}
+\end{tikzpicture}
--- a/Book/Chapter4/Figures/function-image-about-weight-and-Bleu-2.tex
+++ b/Book/Chapter4/Figures/function-image-about-weight-and-Bleu-2.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 特征权重调优
+\begin{tikzpicture}
+\begin{scope}
+\node[anchor=west] (x0) at (0, 0) {};
+
+\draw[->,thick] (x0.center) -- ([xshift=8.2em]x0.east);
+\draw[->,thick] (x0.center) -- ([yshift=5.6em]x0.center);
+
+\node[anchor=north] (zero) at ([yshift=0.1em]x0.south) {\footnotesize{0}};
+\node[anchor=north] (wx) at ([xshift=4em,yshift=0.1em]x0.south) {\footnotesize{$w_x$}};
+\node[anchor=north] (wi) at ([xshift=8em,yshift=0.1em]x0.south) {\footnotesize{$w_i$}};
+
+{
+\draw[thick] ([yshift=2em]x0.center) -- ([xshift=4em,yshift=2em]x0.center);
+\draw[thick] ([xshift=4em,yshift=4em]x0.center) -- ([xshift=8em,yshift=4em]x0.center);
+
+\draw[thick,dotted] ([xshift=4em]x0.center) -- ([xshift=4em,yshift=5.5em]x0.center);
+
+\node[anchor=north] (e1) at ([xshift=2em,yshift=3em]x0.north) {\footnotesize{$d^*=d_1$}};
+\node[anchor=north] (e2) at ([xshift=6.2em,yshift=5em]x0.north) {\footnotesize{$d^*=d_2$}};
+
+\node[anchor=north,rotate=90] (e2) at ([xshift=-1.3em,yshift=3.6em]x0.south) {\footnotesize{BLEU}};
+\draw[decorate,decoration={brace,amplitude=0.4em},red,thick] ([xshift=3.8em,yshift=0.5em]x0.south) -- ([xshift=8.2em,yshift=0.5em]x0.south);
+\node[anchor=north] (wi) at ([xshift=6.1em,yshift=2em]x0.south) {\tiny{\red{挑选$w_i$}}};
+}
+\end{scope}
+\end{tikzpicture}
--- a/Book/Chapter4/Figures/get-word-alignment.tex
+++ b/Book/Chapter4/Figures/get-word-alignment.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 词对齐系统
+\begin{center}
+\begin{tikzpicture}
+
+\tikzstyle{elementnode} = [rectangle,text=white,anchor=center]
+\tikzstyle{srcnode} = [font=\small,anchor=south west]
+\tikzstyle{tgtnode} = [left,font=\small,anchor=north east]
+\tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6*1.1cm,minimum width=0.36*1.1cm]
+\tikzstyle{probnode} = [fill=blue!30,minimum width=0.4*1.1cm]
+\tikzstyle{labelnode} = [above]
+
+% alignment matrix1
+\begin{scope}[scale=0.9,yshift=0.12in]
+\foreach \i / \j / \c in
+    {0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15,
+    0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15,
+    0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15,
+    0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15}
+    \node[elementnode,minimum size=0.6*1.1cm*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*1.1cm*\i-5.4*0.5*1.1cm,0.5*1.1cm*\j-1.05*1.1cm) {};
+
+% source
+\node[srcnode] (src01) at (-5.9*0.5*1.1cm,-1.05*1.1cm+3.4*0.5*1.1cm) {\scriptsize{$t_1$}};
+\node[srcnode] (src02) at ([xshift=0.5*1.1cm]src01.south west) {\scriptsize{$t_2$}};
+\node[srcnode] (src03) at ([xshift=0.5*1.1cm]src02.south west) {\scriptsize{$t_3$}};
+\node[srcnode] (src04) at ([xshift=0.5*1.1cm]src03.south west) {\scriptsize{$t_4$}};
+
+% target
+\node[tgtnode] (tgt01) at (-6.0*0.5*1.1cm,-1.05*1.1cm+3.3*0.5*1.1cm) {\scriptsize{$s_1$}};
+\node[tgtnode] (tgt02) at ([yshift=-0.5*1.1cm]tgt01.north east) {\scriptsize{$s_2$}};
+\node[tgtnode] (tgt03) at ([yshift=-0.5*1.1cm]tgt02.north east) {\scriptsize{$s_3$}};
+\node[tgtnode] (tgt04) at ([yshift=-0.5*1.1cm]tgt03.north east) {\scriptsize{$s_4$}};
+
+% alignment matrix2
+\foreach \i / \j / \c in
+    {0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15,
+    0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15,
+    0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15,
+    0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15}
+    \node[elementnode,minimum size=0.6*1.1cm*\c,inner sep=0.1pt,fill=blue] (b\i\j) at (0.5*1.1cm*\i+0.6*0.5*1.1cm,0.5*1.1cm*\j-1.05*1.1cm) {};
+
+% source
+\node[srcnode] (src11) at (0.1*0.5*1.1cm,-1.05*1.1cm+3.4*0.5*1.1cm) {\scriptsize{$t_1$}};
+\node[srcnode] (src12) at ([xshift=0.5*1.1cm]src11.south west) {\scriptsize{$t_2$}};
+\node[srcnode] (src13) at ([xshift=0.5*1.1cm]src12.south west) {\scriptsize{$t_3$}};
+\node[srcnode] (src14) at ([xshift=0.5*1.1cm]src13.south west) {\scriptsize{$t_4$}};
+
+% target
+\node[tgtnode] (tgt11) at (0.2*0.5*1.1cm,-1.05*1.1cm+3.3*0.5*1.1cm) {\scriptsize{$s_1$}};
+\node[tgtnode] (tgt12) at ([yshift=-0.5*1.1cm]tgt11.north east) {\scriptsize{$s_2$}};
+\node[tgtnode] (tgt13) at ([yshift=-0.5*1.1cm]tgt12.north east) {\scriptsize{$s_3$}};
+\node[tgtnode] (tgt14) at ([yshift=-0.5*1.1cm]tgt13.north east) {\scriptsize{$s_4$}};
+
+% alignment matrix3
+\foreach \i / \j / \c in
+    {0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15,
+    0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15,
+    0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15,
+    0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15}
+    \node[elementnode,minimum size=0.6*1.1cm*\c,inner sep=0.1pt,fill=blue] (c\i\j) at (0.5*1.1cm*\i+6.6*0.5*1.1cm,0.5*1.1cm*\j-1.05*1.1cm) {};
+
+% source
+\node[srcnode] (src21) at (6.1*0.5*1.1cm,-1.05*1.1cm+3.4*0.5*1.1cm) {\scriptsize{$t_1$}};
+\node[srcnode] (src22) at ([xshift=0.5*1.1cm]src21.south west) {\scriptsize{$t_2$}};
+\node[srcnode] (src23) at ([xshift=0.5*1.1cm]src22.south west) {\scriptsize{$t_3$}};
+\node[srcnode] (src24) at ([xshift=0.5*1.1cm]src23.south west) {\scriptsize{$t_4$}};
+
+% target
+\node[tgtnode] (tgt21) at (6.2*0.5*1.1cm,-1.05*1.1cm+3.3*0.5*1.1cm) {\scriptsize{$s_1$}};
+\node[tgtnode] (tgt22) at ([yshift=-0.5*1.1cm]tgt21.north east) {\scriptsize{$s_2$}};
+\node[tgtnode] (tgt23) at ([yshift=-0.5*1.1cm]tgt22.north east) {\scriptsize{$s_3$}};
+\node[tgtnode] (tgt24) at ([yshift=-0.5*1.1cm]tgt23.north east) {\scriptsize{$s_4$}};
+
+% word alignment
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la1) at (a03) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la2) at (a12) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la3) at (a22) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la4) at (a30) {};
+
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lb1) at (b03) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lb2) at (b12) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lb3) at (b11) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lb4) at (b30) {};
+
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lc1) at (c03) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lc2) at (c12) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=red!50] (lc3) at (c11) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=red!50] (lc4) at (c22) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lc5) at (c30) {};
+
+\node[anchor=north] (l1) at ([xshift=0.5em,yshift=-0.5em]a10.south) {\scriptsize{S - T}};
+\node[anchor=north] (l2) at ([xshift=0.5em,yshift=-0.5em]b10.south) {\scriptsize{T - S}};
+\node[anchor=north] (l3) at ([xshift=0.5em,yshift=-0.5em]c10.south) {\scriptsize{交集/并集}};
+
+\end{scope}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/grid-search-1.tex
+++ b/Book/Chapter4/Figures/grid-search-1.tex
+\begin{tikzpicture}
+\begin{scope}[scale=0.62] 
+{\tiny
+\draw[step=1,help lines,color=black] (0,0) grid (4,4); 
+
+\node[anchor=north] (y2) at ([xshift=-3.3em,yshift=0em]n1.north) {0.01};
+\node[anchor=north] (y1) at ([xshift=0em,yshift=-3.3em]y2.south) {0.00};
+\node[anchor=north] (y3) at ([xshift=0em,yshift=4.5em]y2.north) {0.02};
+\node[anchor=north] (y4) at ([xshift=0em,yshift=6.6em]y3.north) {$\vdots$};
+\node[anchor=north] (y5) at ([xshift=0em,yshift=2em]y4.north) {1.00};
+
+\node[anchor=north] (x1) at ([xshift=2em,yshift=-3em]n1.south) {$\lambda_1$};
+\node[anchor=north] (x2) at ([xshift=4.5em,yshift=0em]x1.north) {$\lambda_2$};
+\node[anchor=north] (x3) at ([xshift=4em,yshift=-1em]x2.north) {$...$};
+\node[anchor=north] (x4) at ([xshift=5em,yshift=1em]x3.north) {$\lambda_{M-1}$};
+\node[anchor=north] (x5) at ([xshift=5em,yshift=0em]x4.north) {$\lambda_M$};
+
+\draw [-](n1) (0,4) -- (0,4.4);
+\draw [-](n2) (1,4) -- (1,4.4);
+\draw [-](n3) (2,4) -- (2,4.4);
+\draw [-](n4) (3,4) -- (3,4.4);
+\draw [-](n5) (4,4) -- (4,4.4);
+
+\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r31) at (2,4) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r32) at (2,0) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r33) at (2,2) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r34) at (2,3) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r35) at (2,1) {};
+
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (f11) at ([xshift=0em,yshift=23em]y2.north) {};
+\node[anchor=south] (f12) at ([xshift=5em,yshift=-0.5em]f11.south) {\scriptsize{fixed}};
+\node [anchor=center,draw,circle,inner sep=1.5pt,purple!30,fill=ugreen!50] (f21) at ([xshift=0em,yshift=-4em]f11.north) {};
+\node[anchor=south] (f22) at ([xshift=8.5em,yshift=-0.5em]f21.south) {\scriptsize{valid choices}};
+\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (f31) at ([xshift=0em,yshift=-4em]f21.north) {};
+\node[anchor=south] (f32) at ([xshift=9.5em,yshift=-0.5em]f31.south) {\scriptsize{invalid choices}};
+
+\draw [-,very thick,red!50, dashed] (1,2) -- (2,4) -- (3,2) -- (2,3) -- (1,2) -- (3,2) -- (2,1) -- (1,2) -- (2,0) -- (3,2);
+\draw [-,very thick,blue!50] (0,1) -- (1,2);
+\draw [-,very thick,blue!50] (3,2) -- (4,4);
+
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r11) at (0,1) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r12) at (1,2) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r14) at (3,2) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r15) at (4,4) {};
+}
+\end{scope}
+\end{tikzpicture}
\ No newline at end of file
--- a/Book/Chapter4/Figures/grid-search-2.tex
+++ b/Book/Chapter4/Figures/grid-search-2.tex
+\begin{tikzpicture}
+\begin{scope}[scale=0.62] 
+{\tiny
+\draw[step=1,help lines,color=black] (0,0) grid (4,4); 
+
+\node[anchor=north] (y2) at ([xshift=-3.3em,yshift=0em]n1.north) {0.01};
+\node[anchor=north] (y1) at ([xshift=0em,yshift=-3.3em]y2.south) {0.00};
+\node[anchor=north] (y3) at ([xshift=0em,yshift=4.5em]y2.north) {0.02};
+\node[anchor=north] (y4) at ([xshift=0em,yshift=6.6em]y3.north) {$\vdots$};
+\node[anchor=north] (y5) at ([xshift=0em,yshift=2em]y4.north) {1.00};
+
+\node[anchor=north] (x1) at ([xshift=2em,yshift=-3em]n1.south) {$\lambda_1$};
+\node[anchor=north] (x2) at ([xshift=4.5em,yshift=0em]x1.north) {$\lambda_2$};
+\node[anchor=north] (x3) at ([xshift=4em,yshift=-1em]x2.north) {$...$};
+\node[anchor=north] (x4) at ([xshift=5em,yshift=1em]x3.north) {$\lambda_{M-1}$};
+\node[anchor=north] (x5) at ([xshift=5em,yshift=0em]x4.north) {$\lambda_M$};
+
+\draw [-](n1) (0,4) -- (0,4.4);
+\draw [-](n2) (1,4) -- (1,4.4);
+\draw [-](n3) (2,4) -- (2,4.4);
+\draw [-](n4) (3,4) -- (3,4.4);
+\draw [-](n5) (4,4) -- (4,4.4);
+
+\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r31) at (2,4) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r32) at (2,0) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r33) at (2,2) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r35) at (2,1) {};
+
+\node [anchor=center,draw,circle,inner sep=1.5pt,purple!30,fill=purple!30] (r34) at (2,3) {};
+
+\draw [-,very thick,red!50, dashed] (1,2) -- (2,4) -- (3,2) -- (2,3) -- (1,2) -- (3,2) -- (2,1) -- (1,2) -- (2,0) -- (3,2);
+\draw [-,very thick,blue!50] (0,1) -- (1,2);
+\draw [-,very thick,blue!50] (3,2) -- (4,4);
+\draw [-,very thick,ugreen!50, dashed] (1,2) -- (2,3) -- (3,2);
+
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r11) at (0,1) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r12) at (1,2) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r14) at (3,2) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r15) at (4,4) {};
+}
+\end{scope}
+\end{tikzpicture}
\ No newline at end of file
--- a/Book/Chapter4/Figures/hierarchical-phrase-rule-match-generate.tex
+++ b/Book/Chapter4/Figures/hierarchical-phrase-rule-match-generate.tex
+%------------------------------------------------------------------------------------------------------------
+%%%  基于树的解码方法 - chart-based decoding
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}%[scale=0.2]
+
+\node[anchor=north] (q1) at (0,0) {\scriptsize\sffamily\bfseries{输入字符串：}};
+\node[anchor=west] (q2) at ([xshift=0em,yshift=-2em]q1.west) {\footnotesize{进口$\quad$和$\quad$出口$\quad$大幅度$\quad$下降$\quad$了}};
+
+\node[anchor=north,fill=blue!20,minimum height=1em,minimum width=1em] (f1) at ([xshift=-4.1em,yshift=-0.8em]q2.south) {};
+
+\node[anchor=east] (n1) at ([xshift=1em,yshift=-2em]q2.west) {\scriptsize\sffamily\bfseries{匹配规则：}};
+
+\node[anchor=west] (n2) at ([xshift=0em,yshift=0em]n1.east) {\scriptsize{$\textrm{X} \to  \langle\ \textrm{X}_1\ \text{大幅度}\ \text{下降}\ \text{了},\ \textrm{X}_1\ \textrm{have}\ \textrm{drastically}\ \textrm{fallen}\ \rangle$}};
+
+\draw[decorate,decoration={mirror,brace}]([xshift=0.5em,yshift=-1em]q2.west) --([xshift=7em,yshift=-1em]q2.west) node [xshift=0em,yshift=-1em,align=center](label1) {};	
+
+{\scriptsize
+\node[anchor=west] (h1) at ([xshift=1em,yshift=-7em]q2.west) {{Span[0,3]下的翻译假设：}};
+\node[anchor=west] (h2) at ([xshift=0em,yshift=-1.3em]h1.west) {{X: the imports and exports}};
+\node[anchor=west] (h3) at ([xshift=0em,yshift=-1.3em]h2.west) {{X: imports and exports}};
+\node[anchor=west] (h4) at ([xshift=0em,yshift=-1.3em]h3.west) {{X: exports and imports}};
+\node[anchor=west] (h5) at ([xshift=0em,yshift=-1.3em]h4.west) {{X: the imports and the exports}};
+\node[anchor=west] (h6) at ([xshift=0em,yshift=-1.3em]h5.west) {{S: the import and export}};
+}
+
+{\scriptsize
+\node[anchor=west] (h21) at ([xshift=9em,yshift=0em]h1.east) {{替换$\textrm{X}_1$后生成的翻译假设：}};
+\node[anchor=west] (h22) at ([xshift=0em,yshift=-1.3em]h21.west) {{X: the imports and exports have drastically fallen}};
+\node[anchor=west] (h23) at ([xshift=0em,yshift=-1.3em]h22.west) {{X: imports and exports have drastically fallen}};
+\node[anchor=west] (h24) at ([xshift=0em,yshift=-1.3em]h23.west) {{X: exports and imports have drastically fallen}};
+\node[anchor=west] (h25) at ([xshift=0em,yshift=-1.3em]h24.west) {{X: the imports and the exports have drastically fallen}};
+}
+
+\node [rectangle,inner sep=0.1em,rounded corners=1pt,draw] [fit = (h1) (h5) (h6)] (gl1) {};
+\node [rectangle,inner sep=0.1em,rounded corners=1pt,draw] [fit = (h21) (h25)] (gl2) {};
+
+\draw [->,ublue,thick] ([xshift=0.6em,yshift=0.2em]n2.south) .. controls +(south:2em) and +(east:0em) ..   ([xshift=-0em,yshift=2em]gl2.west);
+\draw [->,ublue,thick] ([xshift=0em,yshift=1em]gl1.east) .. controls +(north:2.2em) and +(east:0em) ..   ([xshift=-0em,yshift=2em]gl2.west);
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
--- a/Book/Chapter4/Figures/hyper-graph-representation-of-machine-translation-derivation.tex
+++ b/Book/Chapter4/Figures/hyper-graph-representation-of-machine-translation-derivation.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  机器翻译推导的超图表示
+\begin{center}
+\begin{tikzpicture}
+
+{\footnotesize
+
+{
+\node [anchor=north,inner sep=0em,draw,minimum height=1.1em,minimum width=9em] (n1) at (0,0) {X | 0,4 | the  answer | a  question };
+\node [anchor=west,inner sep=0em,draw,minimum height=1.1em,minimum width=9em] (n2) at ([xshift=5em,yshift=0em]n1.east) {X | 0,4 | a  question | the  answer };
+\node [anchor=north,inner sep=0em,draw,minimum height=1.1em,minimum width=9em] (n3) at ([xshift=0em,yshift=-12em]n1.south) {X | 0,2 | the  answer | NA };
+\node [anchor=west,inner sep=0em,draw,minimum height=1.1em,minimum width=9em] (n4) at ([xshift=8em,yshift=0em]n3.east) {X | 3,4 | a  question | NA};
+
+\node [anchor=south,inner sep=0em,draw,minimum height=1.2em,minimum width=4em] (s1) at ([xshift=9em,yshift=4em]n1.north) {goal item};
+
+\node [anchor=north] (n5) at ([xshift=0em,yshift=-3em]n3.south) {$\textrm{对}_0 \textrm{回答}_1$};
+\node [anchor=north] (n51) at ([xshift=0em,yshift=0em]n5.south) {with the answer};
+\node [anchor=west] (n6) at ([xshift=6em,yshift=0em]n5.east) {$\textrm{有}_2$};
+\node [anchor=north] (n61) at ([xshift=0em,yshift=0em]n6.south) {have};
+\node [anchor=west] (n7) at ([xshift=6em,yshift=0em]n6.east) {$\textrm{疑问}_3$};
+\node [anchor=north] (n71) at ([xshift=0em,yshift=0em]n7.south) {a question};
+
+\draw[->,thick] ([xshift=-3em,yshift=0em]n1.north) .. controls +(north:3em) and +(south:1em)  .. ([xshift=-1em,yshift=0em]s1.south);
+\draw[->, dotted, thick] ([xshift=4em,yshift=0em]n2.north) .. controls +(north:3em) and +(south:1em)  .. ([xshift=1em,yshift=0em]s1.south);
+}
+
+{\tiny
+\node [anchor=south] (s1) at ([xshift=0em,yshift=1.5em]n5.north) {$\textrm{NP} \to \langle\ \textrm{NP}(\textrm{NN}(\textrm{回答})), \textrm{回答} \rangle$};
+\node [anchor=south] (s2) at ([xshift=0em,yshift=1.5em]n7.north) {$\textrm{NP} \to \langle\ \textrm{NP}(\textrm{疑问}), \textrm{疑问} \rangle$};
+\node [anchor=south] (s3) at ([xshift=-8em,yshift=6em]n3.north) {$\textrm{NP} \to \langle\ \textrm{VP}(\textrm{NP}_1\ \textrm{VP}(\textrm{VV}(\textrm{有})\ \textrm{NN}_2)), $};
+\node [anchor=west] (s31) at ([xshift=0em,yshift=-1.5em]s3.west) {$\textrm{have}\ \textrm{NP}_1\ \textrm{NN}_2 \rangle$};
+\node [anchor=south] (s4) at ([xshift=14em,yshift=1em]s3.north) {$\textrm{NP} \to \langle\ \textrm{VP}(\textrm{NP}_1\ \textrm{VP}(\textrm{VV}(\textrm{有})\ \textrm{NN}_2)), $};
+\node [anchor=west] (s41) at ([xshift=8.8em,yshift=-1.3em]s4.west) {$\textrm{NP}_1\ \textrm{possess}\ \textrm{NN}_2 \rangle$};
+\node [anchor=south] (s5) at ([xshift=14em,yshift=1em]s4.north) {$\textrm{NP} \to \langle\ \textrm{VP}(\textrm{NP}_1\ \textrm{VP}(\textrm{VV}(\textrm{有})\ \textrm{NN}_2)), $};
+\node [anchor=west] (s51) at ([xshift=8.8em,yshift=-1.2em]s5.west) {$\textrm{NP}_1\ \textrm{exist}\ \textrm{NN}_2 \rangle$};
+\node [anchor=south] (s6) at ([xshift=14em,yshift=-6em]s5.north) {$\textrm{NP} \to \langle\ \textrm{VP}(\textrm{NP}_1\ \textrm{VP}(\textrm{VV}(\textrm{有})\ \textrm{NN}_2)), $};
+\node [anchor=west] (s61) at ([xshift=8.8em,yshift=-1.4em]s6.west) {$\textrm{there is}\ \textrm{NP}_1\ \textrm{NN}_2 \rangle$};
+
+\node [anchor=south] (s7) at ([xshift=4em,yshift=1em]n1.north) {$\textrm{S} \to \langle\ \textrm{NP}_1,\textrm{NP}_1 \rangle$};
+\node [anchor=south] (s8) at ([xshift=-4em,yshift=1em]n2.north) {$\textrm{S} \to \langle\ \textrm{NN}_2,\textrm{NN}_2 \rangle$};
+}
+
+
+{
+\draw[->, dash dot dot, thick] ([xshift=0em,yshift=0em]n3.north) .. controls +(north:1.4em) and +(south:3em)  .. ([xshift=2em,yshift=-0em]s3.south);
+\draw[->, dash dot dot, thick] ([xshift=0em,yshift=0em]n6.north west) .. controls +(north:8em) and +(south:2.5em)  .. ([xshift=2em,yshift=-0em]s3.south);
+\draw[->, dash dot dot, thick] ([xshift=0em,yshift=0em]n4.west) .. controls +(north:3em) and +(south:1em)  .. ([xshift=2em,yshift=-0em]s3.south);
+
+\draw[->, thick] ([xshift=2em,yshift=0em]n3.north) .. controls +(north:3em) and +(south:3em)  .. (s41.south);
+\draw[->, thick] ([xshift=-0.5em,yshift=0em]n6.north) .. controls +(north:8em) and +(south:2em)  .. (s41.south);
+\draw[->, thick] ([xshift=1em,yshift=0em]n4.north west) .. controls +(north:4em) and +(south:3em)  .. (s41.south);
+
+\draw[->, dashed, thick] ([xshift=-1em,yshift=0em]n3.north east) .. controls +(north:4em) and +(south:4em)  .. ([xshift=-3em,yshift=0em]s51.south);
+\draw[->, dashed, thick] ([xshift=0em,yshift=0em]n6.north) .. controls +(north:10em) and +(south:4em)  .. ([xshift=-3em,yshift=0em]s51.south);
+\draw[->, dashed, thick] ([xshift=-2em,yshift=0em]n4.north) .. controls +(north:4em) and +(south:3em)  .. ([xshift=-3em,yshift=0em]s51.south);
+
+\draw[->, dotted, thick] ([xshift=0em,yshift=0em]n3.east) .. controls +(north:3em) and +(south:2em)  .. ([xshift=-3em,yshift=0em]s6.south);
+\draw[->, dotted, thick] ([xshift=0.5em,yshift=0em]n6.north) .. controls +(north:7.5em) and +(south:2.5em)  .. ([xshift=-3em,yshift=0em]s6.south);
+\draw[->, dotted, thick] ([xshift=0em,yshift=0em]n4.north) .. controls +(north:4em) and +(south:4em)  .. ([xshift=-3em,yshift=0em]s6.south);
+
+\draw[->, dash dot dot, thick] ([xshift=2em,yshift=0em]s3.north) .. controls +(north:1em) and +(south:3em)  .. ([xshift=-2em,yshift=0em]n1.south);
+\draw[->, thick] ([xshift=2em,yshift=0em]s4.north) .. controls +(north:1em) and +(south:2em)  .. ([xshift=2em,yshift=0em]n1.south);
+\draw[->, dashed, thick] ([xshift=0em,yshift=0em]s5.north) .. controls +(north:0.5em) and +(south:1em)  .. ([xshift=-2em,yshift=0em]n2.south);
+\draw[->, dotted, thick] ([xshift=-2.8em,yshift=0em]s6.north) .. controls +(north:0.5em) and +(south:3em)  .. ([xshift=2em,yshift=0em]n2.south);
+}
+
+\draw [->] ([xshift=0em,yshift=0em]n5.north) -- ([xshift=0em,yshift=0em]s1.south);
+\draw [->] ([xshift=0em,yshift=0em]s1.north) -- ([xshift=0em,yshift=0em]n3.south);
+\draw [->,dashed] ([xshift=0em,yshift=0em]n7.north) -- ([xshift=0.05em,yshift=0em]s2.south);
+\draw [->,dashed] ([xshift=0em,yshift=0em]s2.north) -- ([xshift=0.45em,yshift=0em]n4.south);
+
+}
+
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/judge-type-of-reorder-method.tex
+++ b/Book/Chapter4/Figures/judge-type-of-reorder-method.tex
+%------------------------------------------------------------------------------------------------------------
+%%% 调序模型2：MSD模型
+\begin{center}
+\begin{tikzpicture}
+\tikzstyle{elementnode} = [rectangle,anchor=center]
+\tikzstyle{srcnode} = [font=\small,anchor=south west]
+\tikzstyle{tgtnode} = [left,font=\small,anchor=north east]
+\tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6*1.1cm,minimum width=0.36*1.1cm]
+\tikzstyle{probnode} = [fill=blue!30,minimum width=0.4*1.1cm]
+\tikzstyle{labelnode} = [above]
+
+% alignment matrix1
+\begin{scope}[scale=0.9,yshift=0.12in]
+\foreach \i / \j / \c in
+    {0/5/0.15, 1/5/0.15, 2/5/0.15, 3/5/0.15, 4/5/0.15, 5/5/0.15,
+    0/4/0.15, 1/4/0.15, 2/4/0.15, 3/4/0.15, 4/4/0.15, 5/4/0.15,
+    0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 4/3/0.15, 5/3/0.15,
+    0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.15, 5/2/0.15,
+    0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.15, 5/1/0.15,
+    0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.15, 5/0/0.15}
+    \node[elementnode,minimum size=0.6*1.1cm*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*1.1cm*\i-5.4*0.5*1.1cm,0.5*1.1cm*\j-1.05*1.1cm) {};
+
+% source
+\node[srcnode] (src01) at (-5.9*0.5*1.1cm,-1.05*1.1cm+5.4*0.5*1.1cm) {\scriptsize{$t_1$}};
+\node[srcnode] (src02) at ([xshift=0.5*1.1cm]src01.south west) {\scriptsize{$t_2$}};
+\node[srcnode] (src03) at ([xshift=0.5*1.1cm]src02.south west) {\scriptsize{$t_3$}};
+\node[srcnode] (src04) at ([xshift=0.5*1.1cm]src03.south west) {\scriptsize{$t_4$}};
+\node[srcnode] (src05) at ([xshift=0.5*1.1cm]src04.south west) {\scriptsize{$t_5$}};
+\node[srcnode] (src06) at ([xshift=0.5*1.1cm]src05.south west) {\scriptsize{$t_6$}};
+
+% target
+\node[tgtnode] (tgt01) at (-6.0*0.5*1.1cm,-1.05*1.1cm+5.4*0.5*1.1cm) {\scriptsize{$s_1$}};
+\node[tgtnode] (tgt02) at ([yshift=-0.5*1.1cm]tgt01.north east) {\scriptsize{$s_2$}};
+\node[tgtnode] (tgt03) at ([yshift=-0.5*1.1cm]tgt02.north east) {\scriptsize{$s_3$}};
+\node[tgtnode] (tgt04) at ([yshift=-0.5*1.1cm]tgt03.north east) {\scriptsize{$s_4$}};
+\node[tgtnode] (tgt05) at ([yshift=-0.5*1.1cm]tgt04.north east) {\scriptsize{$s_5$}};
+\node[tgtnode] (tgt06) at ([yshift=-0.5*1.1cm]tgt05.north east) {\scriptsize{$s_6$}};
+
+%% alignment matrix2
+\foreach \i / \j / \c in
+    {0/5/0.15, 1/5/0.15, 2/5/0.15, 3/5/0.15, 4/5/0.15, 5/5/0.15,
+    0/4/0.15, 1/4/0.15, 2/4/0.15, 3/4/0.15, 4/4/0.15, 5/4/0.15,
+    0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 4/3/0.15, 5/3/0.15,
+    0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.15, 5/2/0.15,
+    0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.15, 5/1/0.15,
+    0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.15, 5/0/0.15}
+    \node[elementnode,minimum size=0.6*1.1cm*\c,inner sep=0.1pt,fill=blue] (b\i\j) at (0.5*1.1cm*\i+4*0.5*1.1cm,0.5*1.1cm*\j-1.05*1.1cm) {};
+
+% source
+\node[srcnode] (src11) at (3.5*0.5*1.1cm,-1.05*1.1cm+5.4*0.5*1.1cm) {\scriptsize{$t_1$}};
+\node[srcnode] (src12) at ([xshift=0.5*1.1cm]src11.south west) {\scriptsize{$t_2$}};
+\node[srcnode] (src13) at ([xshift=0.5*1.1cm]src12.south west) {\scriptsize{$t_3$}};
+\node[srcnode] (src14) at ([xshift=0.5*1.1cm]src13.south west) {\scriptsize{$t_4$}};
+\node[srcnode] (src15) at ([xshift=0.5*1.1cm]src14.south west) {\scriptsize{$t_5$}};
+\node[srcnode] (src16) at ([xshift=0.5*1.1cm]src15.south west) {\scriptsize{$t_6$}};
+
+% target
+\node[tgtnode] (tgt11) at (3.4*0.5*1.1cm,-1.05*1.1cm+5.4*0.5*1.1cm) {\scriptsize{$s_1$}};
+\node[tgtnode] (tgt12) at ([yshift=-0.5*1.1cm]tgt11.north east) {\scriptsize{$s_2$}};
+\node[tgtnode] (tgt13) at ([yshift=-0.5*1.1cm]tgt12.north east) {\scriptsize{$s_3$}};
+\node[tgtnode] (tgt14) at ([yshift=-0.5*1.1cm]tgt13.north east) {\scriptsize{$s_4$}};
+\node[tgtnode] (tgt15) at ([yshift=-0.5*1.1cm]tgt14.north east) {\scriptsize{$s_5$}};
+\node[tgtnode] (tgt16) at ([yshift=-0.5*1.1cm]tgt15.north east) {\scriptsize{$s_6$}};
+
+
+% word alignment
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la1) at (a23) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la2) at (a22) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la3) at (a31) {};
+{
+\node[align=center,elementnode,minimum size=0.5cm,inner sep=0.1pt,fill=red!50] (la4) at (a14) {M};
+}
+
+{
+\node[align=center,elementnode,minimum size=0.5cm,inner sep=0.1pt,fill=ugreen!50] (la5) at (a44) {S};
+}
+
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lb1) at (b23) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lb2) at (b22) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lb3) at (b31) {};
+{
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=red!50] (lb4) at (b05) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=red!50] (lb5) at (b14) {};
+}
+{
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=ugreen!50] (lb6) at (b45) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=ugreen!50] (lb7) at (b54) {};
+}
+
+\begin{pgfonlayer}{background}
+\node [rectangle,thick,inner sep=0.4em,fill=blue!40,drop shadow,fill opacity=0.85] [fit = (a23) (a31)] (phrase1) {};
+\node [rectangle,thick,inner sep=0.4em,fill=blue!40,drop shadow,fill opacity=0.85] [fit = (b23) (b31)] (phrase2) {};
+{
+\node [rectangle,thick,inner sep=0.4em,fill=red!40,drop shadow,fill opacity=0.85] [fit = (b05) (b14)] (phrase3) {};
+}
+{
+\node [rectangle,thick,inner sep=0.4em,fill=ugreen!40,drop shadow,fill opacity=0.85] [fit = (b45) (b54)] (phrase4) {};
+}
+\end{pgfonlayer}
+
+{
+\node[anchor=north] (s1) at ([xshift=0.6em,yshift=0.1em]b45.east) {S};
+}
+{
+\node[anchor=north] (m1) at ([xshift=0.6em,yshift=0.1em]b05.east) {M};
+}
+
+\node[anchor=north] (l1) at ([xshift=1.8em,yshift=-0.5em]a10.south) {\scriptsize{基于词}};
+\node[anchor=north] (l2) at ([xshift=2.2em,yshift=-0.5em]b10.south) {\scriptsize{基于短语}};
+
+\end{scope}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/long-distance-dependence-in-zh2en-translation.tex
+++ b/Book/Chapter4/Figures/long-distance-dependence-in-zh2en-translation.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 短语 -> 句法
+\begin{tikzpicture}
+
+\node[anchor=west, fill=blue!30, inner sep=0.05cm] (sp1) at (0, 0) {进口\ \ };
+\node[anchor=west] (sp2) at (2.5em, 0) {在 过去的 五 到 十 年 间};
+\node[anchor=west, fill=red!30, inner sep=0.05cm] (sp3) at (14em, 0) {有了 大幅度 下降};
+\draw[->] (sp1) edge [out=15, in=170] (sp3);
+
+\node[anchor=west, fill=blue!30, inner sep=0.05cm] (tp1) at (0, -0.8) {the imports};
+\node[anchor=west, fill=red!30, inner sep=0.05cm] (tp2) at (5.3em, -0.8) {drastically fell};
+\node[anchor=west] (tp3) at (11.3em, -0.8) {in the past five to ten years};
+\path[->] (tp1) edge [out=30, in=150] (tp2);
+
+\end{tikzpicture}
\ No newline at end of file
--- a/Book/Chapter4/Figures/minimum-rule-from-tree-cutting-1.tex
+++ b/Book/Chapter4/Figures/minimum-rule-from-tree-cutting-1.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  最小规则
+\begin{minipage}[b]{0.35\textwidth}
+{\footnotesize
+\renewcommand*{\arraystretch}{1.3}
+\begin{tabular}{l l}
+{$r_1$} & {NP(PN(他)) $\to$  he} \\
+{$r_2$} & {P(对) $\to$  with} \\
+{$r_3$} & {NP(NN(回答)) $\to$  the answer} \\
+{$r_4$} & {VP(VV(表示) NN(满意) $\to$} \\
+      & {satisfied} \\
+{$r_5$} & {PP(P$_1$ NP$_2$) $\to$} \\
+      & {P$_1$ NP$_2$} \\
+{$r_6$} & {VP(PP$_1$ VP$_2$) $\to$} \\
+      & {VP$_2$ PP$_1$} \\
+{$r_7$} & {IP(NP$_1$ VP$_2$) $\to$} \\
+      & {NP$_1$ VP$_2$} \\
+      & \\
+\end{tabular}
+\renewcommand*{\arraystretch}{1.0}
+}
+\end{minipage}
\ No newline at end of file
--- a/Book/Chapter4/Figures/minimum-rule-from-tree-cutting-2.tex
+++ b/Book/Chapter4/Figures/minimum-rule-from-tree-cutting-2.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  最小规则
+\begin{minipage}[t]{0.47\textwidth}
+\begin{tikzpicture}
+
+{\scriptsize
+\begin{scope}
+
+{
+\begin{scope}[level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,draw] (cfrag1) at
+   (0,0.25) {\Tree[.\node(sn1){NP}; [.\node(sn2){PN}; 他 ]]};
+\end{scope}
+
+\begin{scope}[level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag2) at
+   ([xshift=1.2em]cfrag1.south east) {\Tree[.\node(sn3){P}; 对 ]};
+\end{scope}
+
+\begin{scope}[level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag3) at
+   ([xshift=1.2em]cfrag2.south east) {\Tree[.\node(sn4){NP}; [.NN 回答 ]]};
+   \end{scope}
+
+\begin{scope}[sibling distance=15pt,level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag4) at
+   ([xshift=1.4em]cfrag3.south east) {\Tree[.\node(sn5){VP}; [.\node(sn6){VV}; 表示 ] [.\node(sn7){NN}; 满意 ]]};
+\end{scope}
+
+\begin{scope}[sibling distance=20pt,,level distance=25pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag5) at
+   ([xshift=0.3em,yshift=2.5em]cfrag2.north west) {\Tree[.\node(sn8){PP}; [.\node(sn9){P}; ] [.\node(sn10){NP}; ]]};
+\end{scope}
+
+\begin{scope}[sibling distance=60pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag6) at
+   ([xshift=1.6em,yshift=0.8em]cfrag5.north west) {\Tree[.\node(sn11){VP}; [.\node(sn12){PP}; ] [.\node(sn13){VP}; ]]};
+\end{scope}
+
+\begin{scope}[sibling distance=80pt,level distance=18pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south east,draw] (cfrag7) at
+   ([xshift=-3.6em,yshift=0.8em]cfrag6.north east) {\Tree[.\node(sn14){IP}; [.\node(sn15){NP}; ] [.\node(sn16){VP}; ]]};
+\end{scope}
+
+\node[scale=0.9,anchor=north,minimum size=18pt] (tw11) at ([xshift=-0.3em,yshift=-1.2em]cfrag1.south){he};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw12) at ([yshift=-0.1em,xshift=0.5em]tw11.east){was};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw13) at ([yshift=0.1em,xshift=0.5em]tw12.east){satisfied};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw14) at ([xshift=0.5em]tw13.east){with};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw15) at ([xshift=0.5em]tw14.east){the};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw16) at ([yshift=-0.1em,xshift=0.5em]tw15.east){answer};
+
+\draw[dashed] ([xshift=-0.3em]cfrag1.south) -- ([yshift=-0.3em]tw11.north);
+\draw[dashed] (cfrag2.south) -- ([yshift=-0.4em]tw14.north);
+\draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw15.north);
+\draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw16.north);
+\draw[dashed] (cfrag4.south) .. controls +(south:0.6) and +(north:0.6) .. ([yshift=-0.4em]tw13.north);
+
+\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag1.north) -- ([xshift=0.0em,yshift=11.3em]cfrag1.north);
+\draw[*-*] ([xshift=0.1em,yshift=-0.2em]cfrag2.north) -- ([xshift=0.1em,yshift=2.9em]cfrag2.north);
+\draw[*-*] ([xshift=0.1em,yshift=-0.4em]cfrag3.north) -- ([xshift=0.1em,yshift=0.9em]cfrag3.north);
+\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag4.north) -- ([xshift=0.0em,yshift=5.7em]cfrag4.north);
+\draw[*-*] ([xshift=0.1em,yshift=-0.2em]cfrag5.north) -- ([xshift=0.1em,yshift=1em]cfrag5.north);
+\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag6.north) -- ([xshift=0.0em,yshift=1em]cfrag6.north);
+
+{
+\node [fill=blue,circle,inner sep=2pt] (rlabel1) at (cfrag1.south east) {{\color{white} \tiny{1}}};
+}
+{
+\node [fill=blue,circle,inner sep=2pt] (rlabel2) at (cfrag2.south east) {{\color{white} \tiny{2}}};
+}
+{
+\node [fill=blue,circle,inner sep=2pt] (rlabel3) at (cfrag3.south east) {{\color{white} \tiny{3}}};
+}
+{
+\node [fill=blue,circle,inner sep=2pt] (rlabel4) at (cfrag4.south east) {{\color{white} \tiny{4}}};
+}
+{
+\node [fill=blue,circle,inner sep=2pt] (rlabel5) at (cfrag5.north west) {{\color{white} \tiny{5}}};
+\node [fill=blue,circle,inner sep=2pt] (rlabel6) at (cfrag6.north east) {{\color{white} \tiny{6}}};
+\node [fill=blue,circle,inner sep=2pt] (rlabel7) at (cfrag7.south west) {{\color{white} \tiny{7}}};
+}
+
+}
+\end{scope}
+}
+\end{tikzpicture}
+\end{minipage}
\ No newline at end of file
--- a/Book/Chapter4/Figures/one-best-node-alignment-and-alignment-matrix.tex
+++ b/Book/Chapter4/Figures/one-best-node-alignment-and-alignment-matrix.tex
+\newcommand{\PreserveBackslash}[1]{\let\temp=\\#1\let\\=\temp}
+\newcolumntype{C}[1]{>{\PreserveBackslash\centering}p{#1}}
+\newcolumntype{R}[1]{>{\PreserveBackslash\raggedleft}p{#1}}
+\newcolumntype{L}[1]{>{\PreserveBackslash\raggedright}p{#1}}
+
+\begin{flushright}
+\begin{tikzpicture}
+
+\begin{scope}[scale=0.47]
+
+{\Large
+\begin{scope}[sibling distance=17pt, level distance = 35pt]
+\Tree[.\node(en1){VP$^{[1]}$};
+        [.\node(en2){VBZ$^{[2]}$}; have ]
+        [.\node(en3){ADVP$^{[3]}$};
+            [.\node(en4){RB$^{[4]}$}; drastically ]
+            [.\node(en5){VBN$^{[5]}$}; fallen ]
+        ]
+     ]
+\end{scope}
+
+\begin{scope}[grow'=up, yshift=-3.3in, sibling distance=32pt, level distance = 35pt]
+\Tree[.\node(cn1){VP$^{[1]}$};
+        [.\node(cn2){AD$^{[2]}$}; 大幅度 ]
+        [.\node(cn3){VP$^{[3]}$};
+            [.\node(cn4){VV$^{[4]}$}; 减少 ]
+            [.\node(cn5){AS$^{[5]}$}; 了 ]
+        ]
+     ]
+\end{scope}
+}
+
+\begin{scope}[xshift=2.3in, yshift=-0.3in]
+\node[anchor=west, rotate=60] at (0.8,-0.6) {VP$^{[1]}$};
+\node[anchor=west, rotate=60] at (1.8,-0.6) {VBZ$^{[2]}$};
+\node[anchor=west, rotate=60] at (2.8,-0.6) {ADVP$^{[3]}$};
+\node[anchor=west, rotate=60] at (3.8,-0.6) {RB$^{[4]}$};
+\node[anchor=west, rotate=60] at (4.8,-0.6) {VBN$^{[5]}$};
+
+\node[] at (6.5,-1) {VP$^{[1]}$};
+\node[] at (6.5,-2) {AD$^{[2]}$};
+\node[] at (6.5,-3) {VP$^{[3]}$};
+\node[] at (6.5,-4) {VV$^{[4]}$};
+\node[] at (6.5,-5) {AS$^{[5]}$};
+
+\foreach \i in {1,...,5}{
+    \foreach \j in {-5,...,-1}{
+        \node[fill=blue!40,scale=0.2] at (\i,\j) {};
+    }
+}
+
+\node[fill=blue!40, scale=1.1, inner sep=1pt, minimum size=12pt] at (1,-1) {{\color{white} 1}};
+\node[fill=blue!40, scale=1.1, inner sep=1pt, minimum size=12pt] at (2,-5) {{\color{white} 1}};
+\node[fill=blue!40, scale=1.1, inner sep=1pt, minimum size=12pt] at (4,-2) {{\color{white} 1}};
+\node[fill=blue!40, scale=1.1, inner sep=1pt, minimum size=12pt] at (5,-4) {{\color{white} 1}};
+
+\node[] at (4,-6.3) {{\color{blue!40} $\blacksquare$} = fixed alignment};
+\node[] at (4,-7.2) {Matrix 1: 1-best alignment};
+
+\end{scope}
+
+\begin{scope}[xshift=6.1in, yshift=-0.3in]
+\node[anchor=west, rotate=60] at (0.8,-0.6) {VP$^{[1]}$};
+\node[anchor=west, rotate=60] at (1.8,-0.6) {VBZ$^{[2]}$};
+\node[anchor=west, rotate=60] at (2.8,-0.6) {ADVP$^{[3]}$};
+\node[anchor=west, rotate=60] at (3.8,-0.6) {RB$^{[4]}$};
+\node[anchor=west, rotate=60] at (4.8,-0.6) {VBN$^{[5]}$};
+
+\node[] at (6.5,-1) {VP$^{[1]}$};
+\node[] at (6.5,-2) {AD$^{[2]}$};
+\node[] at (6.5,-3) {VP$^{[3]}$};
+\node[] at (6.5,-4) {VV$^{[4]}$};
+\node[] at (6.5,-5) {AS$^{[5]}$};
+
+\foreach \i in {1,...,5}{
+    \foreach \j in {-5,...,-1}{
+        \node[fill=blue!40,scale=0.2] at (\i,\j) {};
+    }
+}
+
+\node[fill=blue!40, scale=1.1, inner sep=1pt, minimum size=12pt] at (1,-1) {{\color{white} \small{.9}}};
+\node[fill=blue!40, scale=0.5, inner sep=1pt, minimum size=12pt] at (1,-3) {{\color{white} \small{.1}}};
+\node[fill=blue!40, scale=0.5, inner sep=1pt, minimum size=12pt] at (2,-2) {{\color{white} \small{.1}}};
+\node[fill=blue!40, scale=0.8, inner sep=1pt, minimum size=12pt] at (2,-3) {{\color{white} \small{.6}}};
+\node[fill=blue!40, scale=0.8, inner sep=1pt, minimum size=12pt] at (2,-5) {{\color{white} \small{.6}}};
+\node[fill=blue!40, scale=0.5, inner sep=1pt, minimum size=12pt] at (3,-1) {{\color{white} \small{.1}}};
+\node[fill=blue!40, scale=0.5, inner sep=1pt, minimum size=12pt] at (3,-2) {{\color{white} \small{.1}}};
+\node[fill=blue!40, scale=0.5, inner sep=1pt, minimum size=12pt] at (3,-3) {{\color{white} \small{.1}}};
+\node[fill=blue!40, scale=1.0, inner sep=1pt, minimum size=12pt] at (4,-2) {{\color{white} \small{.8}}};
+\node[fill=blue!40, scale=0.6, inner sep=1pt, minimum size=12pt] at (5,-3) {{\color{white} \small{.2}}};
+\node[fill=blue!40, scale=0.7, inner sep=1pt, minimum size=12pt] at (5,-5) {{\color{white} \small{.4}}};
+\node[fill=blue!40, scale=0.65, inner sep=1pt, minimum size=12pt] at (3,-4) {{\color{white} \small{.3}}};
+\node[fill=blue!40, scale=0.9, inner sep=1pt, minimum size=12pt] at (5,-4) {{\color{white} \small{.7}}};
+
+\node[] at (4,-6.3) {{\color{blue!40} $\blacksquare$} = possible alignment};
+\node[] at (4,-7.2) {Matrix 2: posterior};
+
+\node[] at (9,-7.2) {};%占位符
+\end{scope}
+
+\end{scope}
+
+\end{tikzpicture}
+\\[0.8em]
+\end{flushright}
+\begin{center}
+\vspace{-1em}
+(a) Sub-tree alignment matrixes for a sample sub-tree pair
+\end{center}
+
+\begin{center}
+\begin{tabular}[t]{C{0.48\linewidth} C{0.48\linewidth} }
+
+\begin{tabular}{l L{150pt}}
+\multicolumn{2}{l}{\textbf{\footnotesize{Minimal Rules}}} \\
+\multicolumn{2}{l}{\textbf{\footnotesize{Extracted from Matrix 1 (1-best)}}} \\
+\hline
+\footnotesize{$r_3$} & \footnotesize{AD(大幅度) $\rightarrow$ RB(drastically)} \\
+\footnotesize{$r_4$} & \footnotesize{VV(减少) $\rightarrow$ VBN(fallen)} \\
+\footnotesize{$r_6$} & \footnotesize{AS(了) $\rightarrow$ VBZ(have)} \\
+\footnotesize{$r_8$} & \footnotesize{VP(AD$_1$ VP(VV$_2$ AS$_3$)) $\rightarrow$} \\
+                     & \footnotesize{VP(VBZ$_3$ ADVP(RB$_1$ VBN$_2$)} \\
+\rule{0pt}{11pt} \\
+\\
+\\
+\end{tabular}
+
+&
+
+\begin{tabular}{l L{150pt}}
+\multicolumn{2}{l}{\textbf{\small{Minimal Rules}}} \\
+\multicolumn{2}{l}{\textbf{\small{Extracted from Matrix 2 (posterior)}}} \\
+\hline
+\footnotesize{$r_3$} & \footnotesize{AD(大幅度) $\rightarrow$ RB(drastically)} \\
+\footnotesize{$r_4$} & \footnotesize{VV(减少) $\rightarrow$ VBN(fallen)} \\
+\footnotesize{$r_6$} & \footnotesize{AS(了) $\rightarrow$ VBZ(have)} \\
+\footnotesize{$r_8$} & \footnotesize{VP(AD$_1$ VP(VV$_2$ AS$_3$)) $\rightarrow$} \\
+                     & \footnotesize{VP(VBZ$_3$ ADVP(RB$_1$ VBN$_2$)} \\
+\footnotesize{$r_{10}$} & \footnotesize{VP(VV(减少) AS(了)) $\rightarrow$ VBN(fallen)} \\
+\footnotesize{$r_{11}$} & \footnotesize{VP(AD$_1$ VP$_2$) $\rightarrow$ VP(VBZ$_1$ ADVP$_2$)} \\
+\footnotesize{...}\\
+\end{tabular}
+
+\\
+
+\end{tabular}
+
+\begin{center}
+\vspace{-2em}
+(b) Rules extracted using 1-best alignment and alignment posterior.
+\end{center}
+
+\end{center}
--- a/Book/Chapter4/Figures/operation-of-tree-replace.tex
+++ b/Book/Chapter4/Figures/operation-of-tree-replace.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  规则实例(树到树)
+\begin{center}
+\begin{tikzpicture}
+
+{
+\begin{scope}[sibling distance=5pt, level distance = 22pt]
+\Tree[.\node(s1){VP}; [.\node(s2){PP}; ] [.\node(s3){VP}; [.\node(s4){VV}; \node[fill=white](w1){表示}; ] [.\node(s5){NN}; ] ] ]
+\end{scope}
+
+\begin{scope}[xshift=1.2in, yshift=-0.2in, sibling distance=5pt, level distance = 22pt]
+\Tree[.\node(ws1){NN}; \node(ws2){满意}; ]
+\end{scope}
+
+\node (anchor=west] (arrow) at ([xshift=0.4in]ws2.east) {\large{$\Rightarrow$}};
+
+\begin{pgfonlayer}{background}
+\node [inner sep=0,fill=red!20] [fit = (ws1)] (snode1) {};
+\node [inner sep=0,fill=red!20] [fit = (s5)] (snode2) {};
+\draw [<-,thick,dotted] ([xshift=0.1em]s5.east) ..controls +(east:3em) and +(west:3em).. ([xshift=-0.1em]ws1.west);
+\end{pgfonlayer}
+
+\begin{scope}[xshift=2.5in, sibling distance=5pt, level distance = 22pt]
+\Tree[.\node(s1){VP}; [.\node(s2){PP}; ] [.\node(s3){VP}; [.\node(s4){VV}; \node[fill=white](w1){表示}; ] [.\node(s5){NN}; \node(w2){满意}; ] ] ]
+\end{scope}
+}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/phrase-extraction-consistent-with-word-alignment-1.tex
+++ b/Book/Chapter4/Figures/phrase-extraction-consistent-with-word-alignment-1.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 短语抽取方法
+\begin{center}
+\begin{tikzpicture}
+
+\tikzstyle{elementnode} = [rectangle,text=white,anchor=center]
+\tikzstyle{srcnode} = [rotate=45,font=\small,anchor=south west]
+\tikzstyle{tgtnode} = [left,font=\small,anchor=north east]
+\tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6*1.1cm,minimum width=0.36*1.1cm]
+\tikzstyle{probnode} = [fill=blue!30,minimum width=0.4*1.1cm]
+\tikzstyle{labelnode} = [above]
+
+% alignment matrix
+\begin{scope}[scale=0.85,yshift=0.12in]
+\foreach \i / \j / \c in
+    {0/5/0.15, 1/5/0.15, 2/5/0.15, 3/5/0.15, 4/5/0.15, 5/5/0.15, 6/5/0.15, 7/5/0.15,
+    0/4/0.15, 1/4/0.15, 2/4/0.15, 3/4/0.15, 4/4/0.15, 5/4/0.15, 6/4/0.15, 7/4/0.15,
+    0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 4/3/0.15, 5/3/0.15, 6/3/0.15, 7/3/0.15,
+    0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.15, 5/2/0.15, 6/2/0.15, 7/2/0.15,
+    0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.15, 5/1/0.15, 6/1/0.15, 7/1/0.15,
+    0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.15, 5/0/0.15, 6/0/0.15, 7/0/0.15}
+    \node[elementnode,minimum size=0.6*1.1cm*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*1.1cm*\i-5.4*0.5*1.1cm,0.5*1.1cm*\j-0.05*1.1cm) {};
+
+% source
+\node[srcnode] (src1) at (-5.4*0.5*1.1cm,-1.05*1.1cm+7.5*0.5*1.1cm) {\scriptsize{The}};
+\node[srcnode] (src2) at ([xshift=0.5*1.1cm]src1.south west) {\scriptsize{weather}};
+\node[srcnode] (src3) at ([xshift=0.5*1.1cm]src2.south west) {\scriptsize{is}};
+\node[srcnode] (src4) at ([xshift=0.5*1.1cm]src3.south west) {\scriptsize{very}};
+\node[srcnode] (src5) at ([xshift=0.5*1.1cm]src4.south west) {\scriptsize{good}};
+\node[srcnode] (src6) at ([xshift=0.5*1.1cm]src5.south west) {\scriptsize{today}};
+\node[srcnode] (src7) at ([xshift=0.5*1.1cm]src6.south west) {\scriptsize{.}};
+\node[srcnode] (src8) at ([xshift=0.5*1.1cm]src7.south west) {\scriptsize{EOS}};
+
+% target
+\node[tgtnode] (tgt1) at (-6.0*0.5*1.1cm,-1.05*1.1cm+7.5*0.5*1.1cm) {\scriptsize{今天}};
+\node[tgtnode] (tgt2) at ([yshift=-0.5*1.1cm]tgt1.north east) {\scriptsize{天气}};
+\node[tgtnode] (tgt3) at ([yshift=-0.5*1.1cm]tgt2.north east) {\scriptsize{真}};
+\node[tgtnode] (tgt4) at ([yshift=-0.5*1.1cm]tgt3.north east) {\scriptsize{好}};
+\node[tgtnode] (tgt5) at ([yshift=-0.5*1.1cm]tgt4.north east) {\scriptsize{。}};
+\node[tgtnode] (tgt6) at ([yshift=-0.5*1.1cm]tgt5.north east) {\scriptsize{EOS}};
+
+% word alignment
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l14) at (a14) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l55) at (a55) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l33) at (a33) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l42) at (a42) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l61) at (a61) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l70) at (a70) {};
+
+\node [anchor=north west,fill=blue!30,minimum width=1.3in,minimum height=1.5em] (alig) at ([xshift=3em,yshift=-1.5em]tgt6.south west) {\footnotesize{与词对齐保持一致?}};
+
+{
+\begin{pgfonlayer}{background}
+{
+\node [rectangle,draw=red,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (a04) (a14) (a24) (a33) (a42)] (phrase4) {};
+}
+\end{pgfonlayer}
+\draw [->,thick,dotted] ([xshift=0.5em,yshift=-0.1em]phrase4.south) .. controls +(south:2.0) and +(north:1.2) ..  ([yshift=0.1em]alig.north);
+}
+
+\end{scope}
+
+\begin{scope}[xshift = 1.5in, yshift = 1.3in]
+{\scriptsize
+\node (rules) {\textbf{抽取得到的短语:}};
+\draw[-] (rules.south west)--([xshift=2.0in]rules.south west);
+
+{
+\node[anchor=north west] (r1) at ([yshift=-0.3em]rules.south west) {天气 -- The weather};
+}
+
+{
+\node[anchor=north west] (r2) at ([yshift=-0.4em]r1.south west) {天气 -- The weather is};
+}
+
+{
+\node[anchor=north west] (r3) at ([yshift=-0.4em]r2.south west) {天气真 -- The weather is very};
+}
+
+{
+\node[anchor=north west] (r4) at ([yshift=-0.4em]r3.south west) {天气真好 -- The weather is very good};
+}
+
+{
+\begin{pgfonlayer}{background}
+{
+\node [rectangle,thick,inner sep=0.1em,fill=green!20] [fit = (r4)] (p4) {};
+}
+\end{pgfonlayer}
+\draw [->,thick,dotted] ([xshift=0.1em]alig.east) .. controls +(east:2) and +(west:1.5) ..  ([xshift=-0.1em]p4.west);
+}
+
+}
+\end{scope}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/phrase-extraction-consistent-with-word-alignment.tex
+++ b/Book/Chapter4/Figures/phrase-extraction-consistent-with-word-alignment.tex
+%%------------------------------------------------------------------------------------------------------------
+%% 与词对齐的兼容性
+\begin{center}
+\begin{tikzpicture}
+
+\tikzstyle{elementnode} = [rectangle,text=white,anchor=center]
+\tikzstyle{srcnode} = [rotate=45,font=\small,anchor=south west]
+\tikzstyle{tgtnode} = [left,font=\small,anchor=north east]
+\tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6*1.1cm,minimum width=0.36*1.1cm]
+\tikzstyle{probnode} = [fill=blue!30,minimum width=0.4*1.1cm]
+\tikzstyle{labelnode} = [above]
+
+ alignment matrix
+\begin{scope}[scale=0.85,yshift=0.12in]
+\foreach \i / \j / \c in
+    {0/5/0.15, 1/5/0.15, 2/5/0.15, 3/5/0.15, 4/5/0.15, 5/5/0.15, 6/5/0.15, 7/5/0.15,
+    0/4/0.15, 1/4/0.15, 2/4/0.15, 3/4/0.15, 4/4/0.15, 5/4/0.15, 6/4/0.15, 7/4/0.15,
+    0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 4/3/0.15, 5/3/0.15, 6/3/0.15, 7/3/0.15,
+    0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.15, 5/2/0.15, 6/2/0.15, 7/2/0.15,
+    0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.15, 5/1/0.15, 6/1/0.15, 7/1/0.15,
+    0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.15, 5/0/0.15, 6/0/0.15, 7/0/0.15}
+    \node[elementnode,minimum size=0.6*1.1cm*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*1.1cm*\i-5.4*0.5*1.1cm,0.5*1.1cm*\j-0.05*1.1cm) {};
+
+ source
+\node[srcnode] (src1) at (-5.4*0.5*1.1cm,-1.05*1.1cm+7.5*0.5*1.1cm) {\scriptsize{The}};
+\node[srcnode] (src2) at ([xshift=0.5*1.1cm]src1.south west) {\scriptsize{weather}};
+\node[srcnode] (src3) at ([xshift=0.5*1.1cm]src2.south west) {\scriptsize{is}};
+\node[srcnode] (src4) at ([xshift=0.5*1.1cm]src3.south west) {\scriptsize{very}};
+\node[srcnode] (src5) at ([xshift=0.5*1.1cm]src4.south west) {\scriptsize{good}};
+\node[srcnode] (src6) at ([xshift=0.5*1.1cm]src5.south west) {\scriptsize{today}};
+\node[srcnode] (src7) at ([xshift=0.5*1.1cm]src6.south west) {\scriptsize{.}};
+\node[srcnode] (src8) at ([xshift=0.5*1.1cm]src7.south west) {\scriptsize{EOS}};
+
+ target
+\node[tgtnode] (tgt1) at (-6.0*0.5*1.1cm,-1.05*1.1cm+7.5*0.5*1.1cm) {\scriptsize{今天}};
+\node[tgtnode] (tgt2) at ([yshift=-0.5*1.1cm]tgt1.north east) {\scriptsize{天气}};
+\node[tgtnode] (tgt3) at ([yshift=-0.5*1.1cm]tgt2.north east) {\scriptsize{真}};
+\node[tgtnode] (tgt4) at ([yshift=-0.5*1.1cm]tgt3.north east) {\scriptsize{好}};
+\node[tgtnode] (tgt5) at ([yshift=-0.5*1.1cm]tgt4.north east) {\scriptsize{。}};
+\node[tgtnode] (tgt6) at ([yshift=-0.5*1.1cm]tgt5.north east) {\scriptsize{EOS}};
+
+ word alignment
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l04) at (a04) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l14) at (a14) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l55) at (a55) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l33) at (a33) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l42) at (a42) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l61) at (a61) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l70) at (a70) {};
+
+
+{
+\node [anchor=west] (p1line1) at ([xshift=4em,yshift=1em]a75.east) {\footnotesize{$\bar{s}_i$: 天气\ \ \ \ \ \ }};
+\node [anchor=north west] (p1line2) at ([xshift=0]p1line1.south west) {\footnotesize{$\bar{t}_i$: The\ \ \ weather\ \ \ \ \ }};
+
+\node [anchor=west] (p2line1) at ([xshift=4em]a72.east) {\footnotesize{$\bar{s}_j$: 真\ \ \ 好 \ \ }};
+\node [anchor=north west] (p2line2) at ([xshift=0]p2line1.south west) {\footnotesize{$\bar{t}_j$: very\ \ \ good\ \ \ \ \ \ \ \ }};
+
+\node [anchor=east] (p2line3) at ([xshift=0em,yshift=-4em]p1line2.east) {};
+\begin{pgfonlayer}{background}
+{
+\node [rectangle,draw=red,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (a04) (a14)] (phrase1) {};
+\node [rectangle,draw=ugreen,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (a33) (a42)] (phrase2) {};
+\node [rectangle,inner sep=0.2em,fill=red!10] [fit = (p1line1) (p1line2)] (box1) {};
+\node [rectangle,inner sep=0.2em,fill=green!10] [fit = (p2line1) (p2line2) (p2line3)] (box2) {};
+}
+\end{pgfonlayer}
+
+\draw [->,thick,dotted] ([yshift=0.3em]phrase1.east) .. controls +(east:3.5) and +(west:1) ..  (box1.west);
+\draw [->,thick,dotted] ([yshift=-0.0em]phrase2.east) .. controls +(east:2.0) and +(west:1) ..  ([yshift=1em]box2.west);
+}
+\end{scope}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/phrase-structure-tree-and-dependency-tree.tex
+++ b/Book/Chapter4/Figures/phrase-structure-tree-and-dependency-tree.tex
+%------------------------------------------------------------------------------------------------------------
+%%% 使用句法信息的一些思考
+\begin{tikzpicture}
+{\footnotesize
+\begin{scope}[xshift = -0.3in, sibling distance=3pt, level distance = 18pt]
+\Tree[.S
+        [.NP
+            [.NNP \node(w1){Messi}; ]
+        ]
+        [.VP
+            [.VBD \node(w2){hit}; ]
+            [.NP
+                [.DT \node(w3){the}; ]
+                [.NN \node(w4){ball}; ]
+            ]
+        ]
+     ]
+\node [anchor=north west] (cap1) at ([yshift=-2.0em,xshift=2.0em]w1.south west) {\footnotesize{(a) 短语结构树}};
+
+\node [anchor=west] (t1) at ([xshift=3em,]w4.east) {Messi};
+\node [anchor=west] (t2) at ([xshift=0.5em,]t1.east) {hit};
+\node [anchor=west] (t3) at ([xshift=0.5em,]t2.east) {the};
+\node [anchor=west] (t4) at ([xshift=0.5em,]t3.east) {ball};
+
+\draw [->] ([xshift=0em]t3.north) .. controls +(north:1em) and +(north:1em) .. ([xshift=-0.2em]t4.north);
+\draw [->] ([xshift=0.2em]t4.north) .. controls +(north:2.5em) and +(north:2.5em) .. ([xshift=0.2em]t2.north);
+\draw [->] ([xshift=0.0em]t1.north) .. controls +(north:2.5em) and +(north:2.5em) .. ([xshift=-0.2em]t2.north);
+
+\node [anchor=north west] (cap2) at ([yshift=-0.2em,xshift=-0.5em]t2.south west) {\footnotesize{(b) 依存树}};
+\end{scope}
+}
+\end{tikzpicture}
--- a/Book/Chapter4/Figures/process-of-machine-translation-base-phrase.tex
+++ b/Book/Chapter4/Figures/process-of-machine-translation-base-phrase.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  短语系统的架构
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}
+
+\tikzstyle{datanode} = [minimum width=7em,minimum height=1.7em,fill=red!20,rounded corners=0.3em];
+\tikzstyle{modelnode} = [minimum width=7em,minimum height=1.7em,fill=blue!20,rounded corners=0.3em];
+\tikzstyle{decodingnode} = [minimum width=7em,minimum height=1.7em,fill=green!20,rounded corners=0.3em];
+
+\node [datanode,anchor=north west] (bitext) at (0,0) {{ \scriptsize{训练用双语数据}}};
+\node [modelnode,anchor=north] (phrase) at ([yshift=-1.5em]bitext.south) {{ \scriptsize{短语抽取及打分}}};
+\node [modelnode,anchor=west] (reorder) at ([xshift=1.5em]phrase.east) {{ \scriptsize{调序建模}}};
+\node [modelnode,anchor=west] (lm) at ([xshift=1.5em]reorder.east) {{ \scriptsize{语言建模}}};
+\node [datanode,anchor=south] (monotext) at ([yshift=1.5em]lm.north) {{ \scriptsize{目标语单语数据}}};
+
+\node [datanode,anchor=north] (phrasetable) at ([yshift=-1.5em]phrase.south) {{ \scriptsize{短语表}}};
+\node [datanode,anchor=north] (reordertable) at ([yshift=-1.5em]reorder.south) {{ \scriptsize{调序模型}}};
+\node [datanode,anchor=north] (lmtable) at ([yshift=-1.5em]lm.south) {{ \scriptsize{语言模型}}};
+
+\node [decodingnode,anchor=north] (decoding) at ([yshift=-2em]reordertable.south) {{ \scriptsize{解码器}}};
+
+\draw [->,very thick] ([yshift=-0.1em]bitext.south) -- ([yshift=0.1em]phrase.north);
+\draw [->,very thick] (bitext.south east) -- ([yshift=0.1em]reorder.north west);
+\draw [->,very thick] ([yshift=-0.1em]monotext.south) -- ([yshift=0.1em]lm.north);
+\draw [->,very thick] ([yshift=-0.1em]phrase.south) -- ([yshift=0.1em]phrasetable.north);
+\draw [->,very thick] ([yshift=-0.1em]reorder.south) -- ([yshift=0.1em]reordertable.north);
+\draw [->,very thick] ([yshift=-0.1em]lm.south) -- ([yshift=0.1em]lmtable.north);
+
+\draw [->,very thick] ([yshift=-0.1em]phrasetable.south east) -- ([yshift=0.1em,xshift=-3em]decoding.north);
+\draw [->,very thick] ([yshift=-0.1em]reordertable.south) -- ([yshift=0.1em,xshift=0em]decoding.north);
+\draw [->,very thick] ([yshift=-0.1em]lmtable.south west) -- ([yshift=0.1em,xshift=3em]decoding.north);
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/processing-of-hierarchical-phrase-system.tex
+++ b/Book/Chapter4/Figures/processing-of-hierarchical-phrase-system.tex
+%------------------------------------------------------------------------------------------------------------
+%%%  文法驱动的统计机器翻译流程
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}
+
+\tikzstyle{datanode} = [minimum width=7em,minimum height=1.7em,fill=blue!20,rounded corners=0.3em];
+\tikzstyle{modelnode} = [minimum width=7em,minimum height=1.7em,fill=red!20,rounded corners=0.3em];
+\tikzstyle{decodingnode} = [minimum width=7em,minimum height=1.7em,fill=green!20,rounded corners=0.3em];
+
+\node [datanode,anchor=north west] (bitext) at (0,0) {{ \scriptsize{训练用双语数据}}};
+\node [modelnode, anchor=north west] (gi) at ([xshift=2em,yshift=-0.2em]bitext.south east) {{ \scriptsize{文法(规则)抽取}}};
+\node [datanode,anchor=north east] (birules) at ([xshift=-2em,yshift=-0.2em]gi.south west) {{ \scriptsize{同步翻译文法}}};
+\node [modelnode, anchor=north west] (training) at ([xshift=2em,yshift=-0.2em]birules.south east) {{ \scriptsize{特征值学习}}};
+\node [datanode,anchor=north east] (model) at ([xshift=-2em,yshift=-0.2em]training.south west) {{ \scriptsize{翻译模型}}};
+\node [decodingnode, anchor=north west] (tuning) at ([xshift=2em,yshift=-0.2em]model.south east) {{ \scriptsize{特征权重调优}}};
+\node [datanode,anchor=north east] (tuningdata) at ([xshift=-2em,yshift=-0.2em]tuning.south west) {{ \scriptsize{调优用双语数据}}};
+\node [decodingnode, anchor=north west] (decoding) at ([xshift=2em,yshift=-0.2em]tuningdata.south east) {{ \scriptsize{解码新句子}}};
+
+\node [datanode,anchor=south west] (monotext) at ([xshift=2em,yshift=0.2em]training.north east) {{ \scriptsize{目标语数据}}};
+\node [modelnode,anchor=south west] (lm) at ([xshift=2em,yshift=0.2em]tuning.north east) {{ \scriptsize{$n$-gram语言建模}}};
+\node [datanode,anchor=south west] (lmmodel) at ([xshift=2em,yshift=0.2em]decoding.north east) {{ \scriptsize{语言模型}}};
+
+\draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]bitext.east) -- ([yshift=0.1em]gi.north west);
+\draw [->,very thick] ([yshift=-0.1em]gi.south west) -- ([yshift=0.3em,xshift=0.1em]birules.east);
+\draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]birules.east) -- ([yshift=0.1em]training.north west);
+\draw [->,very thick] ([yshift=-0.1em]training.south west) -- ([yshift=0.3em,xshift=0.1em]model.east);
+\draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]model.east) -- ([yshift=0.1em]tuning.north west);
+\draw [->,very thick] ([yshift=-0.1em]tuning.south) -- ([yshift=0.1em]decoding.north);
+\draw [->,very thick] ([yshift=0.3em,xshift=0.1em]tuningdata.east) -- ([yshift=-0.1em]tuning.south west);
+
+\draw [->,very thick] ([yshift=-0.1em]monotext.south) -- ([yshift=0.1em]lm.north);
+\draw [->,very thick] ([yshift=-0.1em]lm.south) -- ([yshift=0.1em]lmmodel.north);
+\draw [->,very thick] ([yshift=0.3em,xshift=-0.1em]lmmodel.west) -- ([yshift=-0.1em]tuning.south east);
+\draw [->,very thick] ([yshift=-0.3em,xshift=-0.1em]lmmodel.west) -- ([yshift=0.1em]decoding.north east);
+\end{scope}
+\end{tikzpicture}
+\end{center}
--- a/Book/Chapter4/Figures/reorder-base-distance.tex
+++ b/Book/Chapter4/Figures/reorder-base-distance.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 调序模型1：基于距离的调序
+\begin{center}
+\begin{tikzpicture}
+
+\begin{scope}[minimum height = 20pt]
+
+\node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:};
+\node[anchor=west,fill=green!20] (s1) at (0, 0) {\scriptsize{在 桌子 上 的}};
+\node[anchor=south] (n1) at ([xshift=-2.5em,yshift=-0.5em]s1.north) {\scriptsize{1}};
+\node[anchor=south] (n2) at ([xshift=-0.8em,yshift=-0.5em]s1.north) {\scriptsize{2}};
+\node[anchor=south] (n3) at ([xshift=1.1em,yshift=-0.5em]s1.north) {\scriptsize{3}};
+\node[anchor=south] (n4) at ([xshift=2.5em,yshift=-0.5em]s1.north) {\scriptsize{4}};
+\node[anchor=west,fill=red!20] (s2) at ([xshift=1em]s1.east) {\scriptsize{苹果}};
+\node[anchor=south] (n5) at ([yshift=-0.5em]s2.north) {\scriptsize{5}};
+
+\node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:};
+\node[anchor=west,fill=red!20] (t1) at (0, -1.5) {\scriptsize{the apple}};
+\node[anchor=west,fill=green!20] (t2) at ([xshift=1em]t1.east) {\scriptsize{on the table}};
+
+\path[<->, thick] (s1.south) edge (t2.north);
+\path[<->, thick] (s2.south) edge (t1.north);
+
+\node[anchor=west] (target) at ([xshift=3.5em,yshift=2.3em]n5.east) {\scriptsize{目标短语}};
+\node[anchor=west] (source) at ([xshift=0.5em]target.east) {\scriptsize{源短语}};
+\node[anchor=west] (distance) at ([xshift=0.9em]source.east) {\scriptsize{距离}};
+
+\node[anchor=north] (t1) at ([xshift=-0em,yshift=-0.1em]target.south) {1};
+\node[anchor=north] (t2) at ([yshift=-1.8em]t1.south) {2};
+
+\node[anchor=north] (so1) at ([yshift=-0.1em]source.south) {5};
+\node[anchor=north] (so2) at ([yshift=-1.8em]so1.south) {1-4};
+
+\node[anchor=north] (d1) at ([xshift=-0.1em,yshift=-0.1em]distance.south) {+4};
+\node[anchor=north] (d2) at ([yshift=-1.8em]d1.south) {-5};
+
+\node[anchor=north west,fill=blue!20] (m1) at ([xshift=-0.5em,yshift=-0.0em]t1.south west) {\scriptsize{$\textrm{start}_1\ -\ \textrm{end}_{0}\ -\ 1$\ =\ 5\ -\ 0\ -\ 1}};
+\node[anchor=north west,fill=blue!20] (m2) at ([xshift=-0.5em,yshift=-0.0em]t2.south west) {\scriptsize{$\textrm{start}_2\ -\ \textrm{end}_{1}\ -\ 1$\ =\ 1\ -\ 5\ -\ 1}};
+
+\draw[-] ([xshift=0.02in]target.south west)--([xshift=1.6in]target.south west);
+
+\draw[-,thick] (s1.north west)--([yshift=0.3in]s1.north west);
+\draw[->,densely dotted,thick] ([yshift=0.3in]s1.north west)--([xshift=0.3in,yshift=0.3in]s1.north west);
+
+\draw[-,thick] (s2.north west)--([yshift=0.3in]s2.north west);
+\draw[->,densely dotted,thick] ([yshift=0.3in]s2.north west)--([xshift=-0.3in,yshift=0.3in]s2.north west);
+
+\node[anchor=south] (ld1) at ([xshift=-0.1em,yshift=0.4em]n1.north) {\scriptsize{$dr$=-5}};
+\node[anchor=south] (ld2) at ([xshift=6em,yshift=0.4em]n1.north) {\scriptsize{$dr$=+4}};
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/reorder-base-phrase-translation.tex
+++ b/Book/Chapter4/Figures/reorder-base-phrase-translation.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 调序模型1：基于距离的调序
+\begin{center}
+\begin{tikzpicture}
+
+\begin{scope}[minimum height = 20pt]
+
+\node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:};
+\node[anchor=west,fill=green!20] (s1) at (0, 0) {\scriptsize{在 桌子 上 的}};
+\node[anchor=west,fill=red!20] (s2) at ([xshift=1em]s1.east) {\scriptsize{苹果}};
+
+\node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:};
+\node[anchor=west,fill=red!20] (t1) at (0, -1.5) {\scriptsize{the apple}};
+\node[anchor=west,fill=green!20] (t2) at ([xshift=1em]t1.east) {\scriptsize{on the table}};
+
+\path[<->, thick] (s1.south) edge (t2.north);
+\path[<->, thick] (s2.south) edge (t1.north);
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/result-of-tree-binarization.tex
+++ b/Book/Chapter4/Figures/result-of-tree-binarization.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  tree binarization (cont.)
+\begin{center}
+\begin{tikzpicture}
+
+{\scriptsize
+\begin{scope}[sibling distance=4pt, level distance=25pt]
+
+\Tree[.\node(n1){NP};
+     	[.NNP \node(sw1){美国}; ]
+     	[.NN \node(sw2){总统}; ]
+        [.NN \node(sw3){唐纳德}; ]
+        [.NN \node(sw4){特朗普}; ]
+     ]
+
+\node [anchor=north] (tw1) at ([yshift=-2em]sw1.south) {U.S.};
+\node [anchor=north] (tw2) at ([yshift=-2em]sw2.south) {President};
+\node [anchor=north] (tw3) at ([yshift=-2em]sw3.south) {Trump};
+
+\draw [-,dashed] (sw1.south) -- (tw1.north);
+\draw [-,dashed] (sw2.south) -- (tw2.north);
+\draw [-,dashed] (sw3.south) -- (tw3.north);
+\draw [-,dashed] (sw4.south) -- (tw3.north);
+
+\draw [->,very thick] ([xshift=1em]sw4.east) -- ([xshift=5em]sw4.east) node [pos=0.5,above] {\tiny{二叉化}};
+
+\end{scope}
+
+\begin{scope}[xshift=2.2in,sibling distance=10pt, level distance=15pt]
+
+\Tree[.\node(n1){NP};
+     	[.NNP \node(sw1){美国}; ]
+	[.NP-BAR
+     	    [.NN \node(sw2){总统}; ]
+	    [.NP-BAR
+                [.NN \node(sw3){唐纳德}; ]
+                [.NN \node(sw4){特朗普}; ]
+             ]
+         ]
+     ]
+
+\node [anchor=north] (tw1) at ([yshift=-4.5em]sw1.south) {U.S.};
+\node [anchor=north] (tw2) at ([yshift=-2.75em]sw2.south) {President};
+\node [anchor=north] (tw3) at ([yshift=-1em]sw3.south) {Trump};
+
+\draw [-,dashed] (sw1.south) -- (tw1.north);
+\draw [-,dashed] (sw2.south) -- (tw2.north);
+\draw [-,dashed] (sw3.south) -- (tw3.north);
+\draw [-,dashed] (sw4.south) -- (tw3.north);
+
+\end{scope}
+
+}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/role-of-syntax-tree-in-different-decoding-methods.tex
+++ b/Book/Chapter4/Figures/role-of-syntax-tree-in-different-decoding-methods.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  基于串的解码方法
+\begin{center}
+\begin{tikzpicture}
+
+\begin{scope}[scale=0.9,level distance=30pt,sibling distance=7pt]
+
+{\scriptsize
+\Tree[.\node(bsn0){IP};
+          [.\node(bsn1){NP};
+               [.\node(bsn2){NN}; \node(bsw1){猫}; ]
+          ]
+          [.\node(bsn3){VP};
+               [.\node(bsn4){VV}; \node(bsw2){喜欢}; ]
+               [.\node(bsn5){VP}; \edge[roof]; \node(bsw3){吃 \ 鱼}; ]
+          ]
+     ]
+
+\node [anchor=west] (target) at ([xshift=1em]bsw3.east) {Cats like eating fish};
+\node [anchor=north,inner sep=3pt] (cap1) at ([yshift=-1em]target.south west) {(a) 基于树的解码};
+\draw [->,thick] (bsw3.east) -- (target.west);
+\node [anchor=west] (sourcelabel) at ([xshift=4em,yshift=-1em]bsn0.east) {显式输入的结构};
+
+\node [anchor=west] (source2) at ([xshift=3.3em]target.east) {猫$\ \ \;$喜欢$\ \;$吃\ 鱼};
+\node [anchor=west] (target2) at ([xshift=1em]source2.east) {Cats like eating fish};
+\node [anchor=north,inner sep=3pt] (cap2) at ([xshift=1.1em,yshift=-1em]target2.south west) {(b) 基于串的解码};
+\draw [->,thick] (source2.east) -- (target2.west);
+
+
+\begin{scope}[xshift=2.55in,yshift=-1em,sibling distance=7pt]
+\Tree[.\node(bsn0){IP};
+          [.\node(bsn1){NP};
+               [.\node(bsn2){NN}; ]
+          ]
+          [.\node(bsn3){VP};
+               [.\node(bsn4){VV}; ]
+               [.\node(bsn5){VP}; ]
+          ]
+     ]
+
+\begin{pgfonlayer}{background}
+\node [draw,dashed,inner sep=2pt] (box) [fit = (bsn0) (bsn1) (bsn2) (bsn3) (bsn4) (bsn5)] {};
+\node [anchor=north west] (boxlabel) at ([xshift=2em]box.north east) {隐含结构};
+\end{pgfonlayer}
+
+\end{scope}
+
+}
+
+\end{scope}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/rule-matching-base-tree.tex
+++ b/Book/Chapter4/Figures/rule-matching-base-tree.tex
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%% 规则使用 - 基于树的匹配
+\begin{center}
+\begin{tikzpicture}
+
+\begin{scope}[scale=0.97]
+{\scriptsize
+
+\begin{scope}[sibling distance=2pt,level distance=20pt,grow'=up]
+\Tree[.\node(treeroot){IP};
+     [.NP [.NR 阿都拉$_1$ ]]
+        [.\node(tn1){VP};
+            [.\node(tn2){PP};
+                [.\node(tn3){P}; \node(cw1){对$_2$}; ]
+                [.\node(tn4){NP}; \edge[roof]; {自己$_3$ 四$_4$\ 个$_5$\ 多$_6$\ 月$_7$\ 以来$_8$\ 的$_9$\ 施政$_{10}$\ 表现$_{11}$} ]
+            ]
+            [.\node(tn5){VP};
+                [.VV 感到$_{12}$ ]
+                [.NN 满意$_{13}$ ]
+            ]
+        ]
+     ]
+\end{scope}
+
+{
+\node [anchor=west,fill=blue!20!white] (rulepart1) at ([yshift=2.0in,xshift=-1.3in]treeroot.east) {VP(PP(P(对) NP$_1$) VP$_2$)};
+\node [anchor=north west] (rulepart2) at (rulepart1.south west) {$\to$ VP$_2$ with NP$_1$};
+}
+
+\begin{pgfonlayer}{background}
+{
+\node[rectangle,draw,inner sep=2pt] [fit = (rulepart1) (rulepart2)] (rulemark) {};
+}
+\end{pgfonlayer}
+
+{
+\node [anchor=south west] at (rulemark.north west) {\tiny{{树到串翻译规则}}};
+}
+
+}
+
+\begin{scope}[xshift=1.5in,yshift=1.6in]
+{
+\node[anchor=center, minimum size=10pt,draw] (cell1and1) at (0,0) {};
+\node[anchor=west, minimum size=10pt,draw] (cell2and1) at ([xshift=-0.04em]cell1and1.east) {};
+\node[anchor=west, minimum size=10pt,draw] (cell3and1) at ([xshift=-0.04em]cell2and1.east) {};
+\node[anchor=west, minimum size=10pt] (cell4and1) at ([xshift=0]cell3and1.east) {\tiny{$\dots$}};
+\node[anchor=west, minimum size=10pt,draw] (cellnand1) at ([xshift=0]cell4and1.east) {};
+
+\node[anchor=south, minimum size=10pt,draw] (cell1and2) at ([yshift=-0.04em]cell1and1.north) {};
+\node[anchor=west, minimum size=10pt,draw] (cell2and2) at ([xshift=-0.04em]cell1and2.east) {};
+
+\node[anchor=south, minimum size=10pt,draw] (cell1and3) at ([yshift=-0.04em]cell1and2.north) {};
+\node[anchor=south, minimum size=10pt] (cell1and4) at ([yshift=0]cell1and3.north) {\tiny{$\vdots$}};
+
+\node[anchor=south, minimum size=10pt] (cell2and3) at ([yshift=0pt]cell2and2.north) {\tiny{$\vdots$}};
+\node[anchor=south, minimum size=10pt] (cell3and2) at ([yshift=1pt]cell3and1.north) {\tiny{$\vdots$}};
+\node[anchor=south, minimum size=10pt,draw] (cell4andn) at ([yshift=11pt,xshift=10pt]cell3and1.north) {};
+
+\node[anchor=south, minimum size=10pt,draw] (cell1andn) at ([yshift=-3pt]cell1and4.north) {};
+\node[anchor=south, minimum size=10pt,draw] (cell2andn) at ([xshift=0pt,yshift=-3pt]cell2and3.north) {};
+%\node[anchor=center, minimum size=10pt,draw] (cell3andn) at ([xshift=10pt,yshift=-10pt]cell2andn.center) {};
+
+\node[anchor=west] (chartlabel) at ([xshift=-0em,yshift=-0.8em]cell1and1.south east) {\scriptsize{Chart}};
+}
+\end{scope}
+
+{
+\path [draw,thick,ublue,->,dashed] (rulemark.north east) .. controls +(60:2.0) and +(north east:1.5)  ..  ([yshift=0.05em,xshift=0.05em]cell4andn.north east) node[pos=0.25, below,yshift=-0.2em] (spanlabel) {};
+}
+
+
+\begin{pgfonlayer}{background}
+{
+\path [fill=red!20] (tn1.south west) -- ([yshift=0.2em]tn2.south west) -- ([yshift=0.2em,xshift=-0.30em]tn3.south west) -- (cw1.north west) -- (cw1.north east) -- ([xshift=0.30em]tn3.north east) -- (tn4.north east) -- (tn4.south east) -- ([xshift=0.5em]tn2.north east) -- (tn5.north east)-- ([yshift=0.2em]tn5.south east) -- (tn1.south east) -- (tn1.south west);
+}
+\end{pgfonlayer}
+
+{
+\path [draw,thick,ublue,<->] ([xshift=-2em]rulepart1.south east) .. controls +(south:1.7) and +(north:1.3) ..  ([xshift=0em,yshift=0.10em]cw1.north) node[pos=0.5, below,xshift=0.3in] {\scriptsize{树片段的匹配}};
+}
+
+\end{scope}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/search-space-representation-of-feature-weight-1.tex
+++ b/Book/Chapter4/Figures/search-space-representation-of-feature-weight-1.tex
+\begin{tikzpicture}
+\begin{scope}[scale=0.55] 
+{\tiny
+\draw[step=1,help lines,color=black] grid (4,4); 
+
+\node[anchor=north] (y2) at ([xshift=-3.3em,yshift=0em]n1.north) {0.01};
+\node[anchor=north] (y1) at ([xshift=0em,yshift=-3.3em]y2.south) {0.00};
+\node[anchor=north] (y3) at ([xshift=0em,yshift=4.5em]y2.north) {0.02};
+\node[anchor=north] (y4) at ([xshift=0em,yshift=6.6em]y3.north) {$\vdots$};
+\node[anchor=north] (y5) at ([xshift=0em,yshift=2em]y4.north) {1.00};
+
+\node[anchor=north] (x1) at ([xshift=2em,yshift=-3em]n1.south) {$\lambda_1$};
+\node[anchor=north] (x2) at ([xshift=4.5em,yshift=0em]x1.north) {$\lambda_2$};
+\node[anchor=north] (x3) at ([xshift=4em,yshift=-1em]x2.north) {$...$};
+\node[anchor=north] (x4) at ([xshift=5em,yshift=1em]x3.north) {$\lambda_{M-1}$};
+\node[anchor=north] (x5) at ([xshift=5em,yshift=0em]x4.north) {$\lambda_M$};
+
+\draw [-](n1) (0,4) -- (0,4.4);
+\draw [-](n2) (1,4) -- (1,4.4);
+\draw [-](n3) (2,4) -- (2,4.4);
+\draw [-](n4) (3,4) -- (3,4.4);
+\draw [-](n5) (4,4) -- (4,4.4);
+
+\draw[decorate,decoration={brace}](0,4.7) --(4,4.7) node [xshift=-4em,yshift=1.5em,align=center](label1) {M dimensions};	
+
+\draw[decorate,decoration={brace}](4.5,4.3) --(4.5,0) node [xshift=2.3em,yshift=5.8em,align=center](label2) {Values};	
+}
+\end{scope}
+\end{tikzpicture}
\ No newline at end of file
--- a/Book/Chapter4/Figures/search-space-representation-of-feature-weight-2.tex
+++ b/Book/Chapter4/Figures/search-space-representation-of-feature-weight-2.tex
+\begin{tikzpicture}
+\begin{scope}[scale=0.55] 
+{\tiny
+\draw[step=1,help lines,color=black] grid (4,4); 
+
+\node[anchor=north] (y2) at ([xshift=-3.3em,yshift=0em]n1.north) {0.01};
+\node[anchor=north] (y1) at ([xshift=0em,yshift=-3.3em]y2.south) {0.00};
+\node[anchor=north] (y3) at ([xshift=0em,yshift=4.5em]y2.north) {0.02};
+\node[anchor=north] (y4) at ([xshift=0em,yshift=6.6em]y3.north) {$\vdots$};
+\node[anchor=north] (y5) at ([xshift=0em,yshift=2em]y4.north) {1.00};
+
+\node[anchor=north] (x1) at ([xshift=2em,yshift=-3em]n1.south) {$\lambda_1$};
+\node[anchor=north] (x2) at ([xshift=4.5em,yshift=0em]x1.north) {$\lambda_2$};
+\node[anchor=north] (x3) at ([xshift=4em,yshift=-1em]x2.north) {$...$};
+\node[anchor=north] (x4) at ([xshift=5em,yshift=1em]x3.north) {$\lambda_{M-1}$};
+\node[anchor=north] (x5) at ([xshift=5em,yshift=0em]x4.north) {$\lambda_M$};
+
+\draw [-](n1) (0,4) -- (0,4.4);
+\draw [-](n2) (1,4) -- (1,4.4);
+\draw [-](n3) (2,4) -- (2,4.4);
+\draw [-](n4) (3,4) -- (3,4.4);
+\draw [-](n5) (4,4) -- (4,4.4);
+
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r11) at (0,1) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r12) at (1,2) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r13) at (2,1) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r14) at (3,2) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r15) at (4,4) {};
+
+\draw [-,very thick,blue!50] (0,1) -- (1,2) -- (2,1) -- (3,2) -- (4,4);
+
+\node[anchor=north] (p1) at ([xshift=5em,yshift=13em]n5.north) {\scriptsize{$\leftarrow$ \textbf{path}:}};
+
+\node[anchor=north] (e1) at ([xshift=0,yshift=-0.4em]p1.south) {$w_1 = 0.01$};
+\node[anchor=north] (e2) at ([xshift=0,yshift=-0.8em]e1.south) {$w_2 = 0.02$};
+\node[anchor=north] (e3) at ([xshift=0,yshift=0.4em]e2.south) {$\vdots$};
+\node[anchor=north] (e4) at ([xshift=0,yshift=-0.2em]e3.south) {$w_M = 1.00$};
+}
+\end{scope}
+\end{tikzpicture}
\ No newline at end of file
--- a/Book/Chapter4/Figures/search-space-representation-of-feature-weight-3.tex
+++ b/Book/Chapter4/Figures/search-space-representation-of-feature-weight-3.tex
+\begin{tikzpicture}
+\begin{scope}[scale=0.55] 
+{\tiny
+\draw[step=1,help lines,color=black] grid (4,4); 
+
+\node[anchor=north] (y2) at ([xshift=-3.3em,yshift=0em]n1.north) {0.01};
+\node[anchor=north] (y1) at ([xshift=0em,yshift=-3.3em]y2.south) {0.00};
+\node[anchor=north] (y3) at ([xshift=0em,yshift=4.5em]y2.north) {0.02};
+\node[anchor=north] (y4) at ([xshift=0em,yshift=6.6em]y3.north) {$\vdots$};
+\node[anchor=north] (y5) at ([xshift=0em,yshift=2em]y4.north) {1.00};
+
+\node[anchor=north] (x1) at ([xshift=2em,yshift=-3em]n1.south) {$\lambda_1$};
+\node[anchor=north] (x2) at ([xshift=4.5em,yshift=0em]x1.north) {$\lambda_2$};
+\node[anchor=north] (x3) at ([xshift=4em,yshift=-1em]x2.north) {$...$};
+\node[anchor=north] (x4) at ([xshift=5em,yshift=1em]x3.north) {$\lambda_{M-1}$};
+\node[anchor=north] (x5) at ([xshift=5em,yshift=0em]x4.north) {$\lambda_M$};
+
+\draw [-](n1) (0,4) -- (0,4.4);
+\draw [-](n2) (1,4) -- (1,4.4);
+\draw [-](n3) (2,4) -- (2,4.4);
+\draw [-](n4) (3,4) -- (3,4.4);
+\draw [-](n5) (4,4) -- (4,4.4);
+
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r11) at (0,1) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r12) at (1,2) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r13) at (2,1) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r14) at (3,2) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (r15) at (4,4) {};
+
+\node [anchor=center,draw,circle,inner sep=1.5pt,ugreen!30,fill=ugreen!30] (r21) at (0,2) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,ugreen!30,fill=ugreen!30] (r22) at (1,3) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,ugreen!30,fill=ugreen!30] (r23) at (2,4) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,ugreen!30,fill=ugreen!30] (r24) at (3,0) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,ugreen!30,fill=ugreen!30] (r25) at (4,2) {};
+
+\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r31) at (0,4) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r32) at (1,3) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r33) at (2,2) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r34) at (3,3) {};
+\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r35) at (4,1) {};
+
+\draw [-,very thick,blue!50] (0,1) -- (1,2) -- (2,1) -- (3,2) -- (4,4);
+\draw [-,very thick,ugreen!50] (0,2) -- (1,3) -- (2,4) -- (3,0) -- (4,2);
+\draw [-,very thick,red!50] (0,4) -- (1,3) -- (2,2) -- (3,3) -- (4,1);
+
+\draw[decorate,decoration={brace}](4.5,4.3) --(4.5,0) node [xshift=2.3em,yshift=7.5em,align=center](label1) {$M^V$};	
+\node[anchor=north] (label2) at ([xshift=0em,yshift=-2.5em]label1.north) {pathes};
+}
+\end{scope}
+\end{tikzpicture}
\ No newline at end of file
--- a/Book/Chapter4/Figures/structure-of-chart.tex
+++ b/Book/Chapter4/Figures/structure-of-chart.tex
+%------------------------------------------------------------------------------------------------------------
+%%%  基于树的解码方法 - chart-based decoding
+\begin{center}
+\begin{tikzpicture}\scriptsize
+\begin{scope}%[scale=0.2]
+\node [anchor=north] (ch) at (0,0) {\small{\textbf{Chart}}};
+
+\draw [->] ([xshift=-1em,yshift=-1em]ch.south) -- ([xshift=-1em,yshift=-9em]ch.south);
+\draw [->] ([xshift=-1em,yshift=-1em]ch.south) -- ([xshift=10em,yshift=-1em]ch.south);
+
+\node [anchor=north] (l11) at ([xshift=-1.7em,yshift=-2.5em]ch.south) {{起}};
+\node [anchor=north] (l12) at ([xshift=0em,yshift=0.5em]l11.south) {{始}};
+\node [anchor=north] (l13) at ([xshift=0em,yshift=0.5em]l12.south) {{位}};
+\node [anchor=north] (l14) at ([xshift=0em,yshift=0.5em]l13.south) {{置}};
+\node [anchor=north] (l2) at ([xshift=4.5em,yshift=0.4em]ch.south) {{跨度大小}};
+
+\draw [-] ([xshift=1em,yshift=-2em]ch.south) -- ([xshift=1em,yshift=-8em]ch.south);
+\draw [-] ([xshift=5em,yshift=-2em]ch.south) -- ([xshift=5em,yshift=-8em]ch.south);
+\draw [-] ([xshift=9em,yshift=-2em]ch.south) -- ([xshift=9em,yshift=-8em]ch.south);
+\draw [-] ([xshift=1em,yshift=-2em]ch.south) -- ([xshift=9em,yshift=-2em]ch.south);
+\draw [-] ([xshift=1em,yshift=-5em]ch.south) -- ([xshift=9em,yshift=-5em]ch.south);
+\draw [-] ([xshift=1em,yshift=-8em]ch.south) -- ([xshift=9em,yshift=-8em]ch.south);
+
+\node [anchor=north,rectangle,draw=ublue, inner sep=0mm,minimum height=4em,minimum width=9em,rounded corners=2pt] (n1) at ([xshift=16em,yshift=2em]ch.south) {};
+\node [anchor=north,rectangle,draw=ublue, inner sep=0mm,minimum height=4em,minimum width=9em,rounded corners=2pt] (n2) at ([xshift=0em,yshift=-0.5em]n1.south) {};
+\node [anchor=north,rectangle,draw=ublue, inner sep=0mm,minimum height=4em,minimum width=9em,rounded corners=2pt] (n3) at ([xshift=0em,yshift=-0.5em]n2.south) {};
+
+\node [anchor=north] (n11) at ([xshift=0em,yshift=-0.5em]n1.north) {Cell[0,1]:};
+\node [anchor=north] (n12) at ([xshift=1em,yshift=-1.4em]n11.north) {VV[0,1]};
+
+\node [anchor=north] (n21) at ([xshift=0em,yshift=-0.1em]n2.north) {Cell[1,2]:};
+\node [anchor=north] (n22) at ([xshift=1em,yshift=-1.4em]n21.north) {NN[1,2]};
+\node [anchor=north] (n23) at ([xshift=0em,yshift=-1.1em]n22.north) {NP[1,2]};
+
+\node [anchor=north] (n31) at ([xshift=0em,yshift=-0.1em]n3.north) {Cell[0,2]:};
+\node [anchor=north] (n32) at ([xshift=1em,yshift=-1.2em]n31.north) {VP[0,2]};
+\node [anchor=north] (n33) at ([xshift=0em,yshift=-1.3em]n32.north) {NP[0,2]};
+
+\draw [->,ublue,thick] ([xshift=0em,yshift=0.5em]n1.west) .. controls +(west:7em) and +(north:4em) .. ([xshift=-13em,yshift=-1.6em]n1.south);
+\draw [->,ublue,thick] ([xshift=0em,yshift=0.5em]n2.west) .. controls +(west:3em) and +(north:2em) .. ([xshift=-12.5em,yshift=1em]n2.south);
+\draw [->,ublue,thick] ([xshift=0em,yshift=-0.5em]n3.west) .. controls +(west:5em) and +(south:0.5em) .. ([xshift=-9em,yshift=5em]n3.south);
+\end{scope}
+\end{tikzpicture}
+\end{center}
--- a/Book/Chapter4/Figures/syntax-tree-in-ctb.tex
+++ b/Book/Chapter4/Figures/syntax-tree-in-ctb.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  tree binarization
+\begin{center}
+\begin{tikzpicture}
+
+{\scriptsize
+\begin{scope}[scale = 0.9, sibling distance=20pt, level distance=30pt]
+
+{\footnotesize
+\Tree[.IP
+     	[.NP ]
+     	[.VP ]
+        [., ]
+        [.VP ]
+        [., ]
+        [.VP ]
+        [., ]
+        [.VP ]
+        [.{.{\color{white} V}} ]
+     ]
+}
+\end{scope}
+}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/syntax-tree-with-admissible-node.tex
+++ b/Book/Chapter4/Figures/syntax-tree-with-admissible-node.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  Admissible Node的定义
+\begin{center}
+\begin{tikzpicture}
+
+{\footnotesize
+\begin{scope}[sibling distance=25pt, level distance=20pt]
+
+\Tree[.\node(n1){IP};
+     	[.\node(n2){NP}; [.\node(n3){PN}; \node(cw1){他}; ]]
+     	[.\node(n4){VP};
+     		[.\node(n5){PP};
+     			[.\node(n6){P}; \node(cw2){对}; ]
+     			[.\node(n7){NP};
+                    [.\node(n8){NN}; \node(cw3){回答}; ]
+                ]
+     		]
+     		[.\node(n9){VP};
+     			[.\node(n10){VV}; \node(cw4){表示}; ]
+     			[.\node(n11){NN}; \node(cw5){满意}; ]
+     		]
+     	]
+     ]
+
+\node[anchor=north,minimum size=18pt,align=center] (tw1) at ([yshift=-6.0em]cw1.south){he\\\scriptsize{1}};
+\node[anchor=west,minimum size=18pt,align=center] (tw2) at ([yshift=-0.1em,xshift=1.1em]tw1.east){was\\\scriptsize{2}};
+\node[anchor=west,minimum size=18pt,align=center] (tw3) at ([yshift=0.1em,xshift=1.1em]tw2.east){satisfied\\\scriptsize{3}};
+\node[anchor=west,minimum size=18pt,align=center] (tw4) at ([xshift=1.1em]tw3.east){with\\\scriptsize{4}};
+\node[anchor=west,minimum size=18pt,align=center] (tw5) at ([xshift=1.1em]tw4.east){the\\\scriptsize{5}};
+\node[anchor=west,minimum size=18pt,align=center] (tw6) at ([yshift=-0.1em,xshift=1.1em]tw5.east){answer\\\scriptsize{6}};
+
+\draw[dashed] (cw1.south) -- ([yshift=-0.4em]tw1.north);
+\draw[dashed] (cw2.south) .. controls +(south:1.6) and +(north:0.6) .. ([yshift=-0.4em]tw4.north);
+\draw[dashed] (cw3.south) -- ([yshift=-0.4em]tw5.north);
+\draw[dashed] (cw3.south) -- ([yshift=-0.4em]tw6.north);
+\draw[dashed] (cw4.south) .. controls +(south:2.0) and +(north:0.6) .. ([yshift=-0.4em]tw3.north);
+\draw[dashed] (cw5.south) .. controls +(south:2.0) and +(north:0.6) .. ([yshift=-0.4em]tw3.north);
+
+\node [anchor=south west,align=left,fill=red!20,drop shadow] (label1) at ([xshift=0.5em]n11.north east) {\scriptsize{span=\{3\}}\\\scriptsize{c-span=\{1,3-6\}}};
+\node [anchor=south west,align=left,fill=blue!20,drop shadow] (label2) at ([xshift=0.5em,yshift=-0.5em]n4.north east) {\scriptsize{span=\{3-6\}}\\\scriptsize{c-span=\{1\}}};
+
+\begin{pgfonlayer}{background}
+\node [rectangle,fill=red!20,inner sep=0] [fit = (n11)] (n11box) {};
+\node [rectangle,fill=blue!20,inner sep=0] [fit = (n4)] (n4box) {};
+
+{
+\node [rectangle,fill=blue!20,inner sep=0] [fit = (n1)] (n1box) {};
+\node [rectangle,fill=blue!20,inner sep=0] [fit = (n2)] (n2box) {};
+\node [rectangle,fill=blue!20,inner sep=0] [fit = (n3)] (n3box) {};
+\node [rectangle,fill=blue!20,inner sep=0] [fit = (n5)] (n5box) {};
+\node [rectangle,fill=blue!20,inner sep=0] [fit = (n6)] (n6box) {};
+\node [rectangle,fill=blue!20,inner sep=0] [fit = (n7)] (n7box) {};
+\node [rectangle,fill=blue!20,inner sep=0] [fit = (n8)] (n8box) {};
+\node [rectangle,fill=blue!20,inner sep=0] [fit = (n9)] (n9box) {};
+\node [rectangle,fill=red!20,inner sep=0] [fit = (n10)] (n10box) {};
+}
+\end{pgfonlayer}
+
+{
+\node [anchor=north] (n11boxlabel) at (label1.south) {\tiny{{\red{不可信}}}};
+\node [anchor=north] (n4boxlabel) at (label2.south) {\tiny{{\red{可信}}}};
+}
+
+{
+\node [anchor=north west, minimum size=1.2em, fill=blue!20] (land1) at ([xshift=1.5em,yshift=-1em]cw5.south east) {};
+\node [anchor=west] (land1label) at (land1.east) {\scriptsize{可信}};
+\node [anchor=north west, minimum size=1.2em, fill=red!20] (land2) at ([yshift=-0.3em]land1.south west) {};
+\node [anchor=west] (land2label) at (land2.east) {\scriptsize{不可信}};
+}
+
+\end{scope}
+}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/three-types-of-reorder-method-in-msd.tex
+++ b/Book/Chapter4/Figures/three-types-of-reorder-method-in-msd.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 调序模型2：MSD模型
+\begin{center}
+\begin{tikzpicture}
+
+\tikzstyle{elementnode} = [rectangle,text=white,anchor=center]
+\tikzstyle{srcnode} = [left,font=\small,anchor=south west]
+\tikzstyle{tgtnode} = [left,font=\small,anchor=north east]
+\tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6*1.1cm,minimum width=0.36*1.1cm]
+\tikzstyle{probnode} = [fill=blue!30,minimum width=0.4*1.1cm]
+\tikzstyle{labelnode} = [above]
+
+ alignment matrix
+\begin{scope}[scale=0.92,yshift=0.12in]
+\foreach \i / \j / \c in
+    {0/5/0.15, 1/5/0.15, 2/5/0.15, 3/5/0.15, 4/5/0.15, 5/5/0.15, 6/5/0.15, 7/5/0.15,
+    0/4/0.15, 1/4/0.15, 2/4/0.15, 3/4/0.15, 4/4/0.15, 5/4/0.15, 6/4/0.15, 7/4/0.15,
+    0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 4/3/0.15, 5/3/0.15, 6/3/0.15, 7/3/0.15,
+    0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.15, 5/2/0.15, 6/2/0.15, 7/2/0.15,
+    0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.15, 5/1/0.15, 6/1/0.15, 7/1/0.15,
+    0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.15, 5/0/0.15, 6/0/0.15, 7/0/0.15}
+    \node[elementnode,minimum size=0.6*1.1cm*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*1.1cm*\i-5.4*0.5*1.1cm,0.5*1.1cm*\j-0.05*1.1cm) {};
+
+ source
+\node[srcnode] (src1) at (-5.9*0.5*1.1cm,-1.05*1.1cm+7.5*0.5*1.1cm) {\scriptsize{$t_1$}};
+\node[srcnode] (src2) at ([xshift=0.5*1.1cm]src1.south west) {\scriptsize{$t_2$}};
+\node[srcnode] (src3) at ([xshift=0.5*1.1cm]src2.south west) {\scriptsize{$t_3$}};
+\node[srcnode] (src4) at ([xshift=0.5*1.1cm]src3.south west) {\scriptsize{$t_4$}};
+\node[srcnode] (src5) at ([xshift=0.5*1.1cm]src4.south west) {\scriptsize{$t_5$}};
+\node[srcnode] (src6) at ([xshift=0.5*1.1cm]src5.south west) {\scriptsize{$t_6$}};
+\node[srcnode] (src7) at ([xshift=0.5*1.1cm]src6.south west) {\scriptsize{$t_7$}};
+\node[srcnode] (src8) at ([xshift=0.5*1.1cm]src7.south west) {\scriptsize{$t_8$}};
+
+ target
+\node[tgtnode] (tgt1) at (-6.0*0.5*1.1cm,-1.05*1.1cm+7.5*0.5*1.1cm) {\scriptsize{$s_1$}};
+\node[tgtnode] (tgt2) at ([yshift=-0.5*1.1cm]tgt1.north east) {\scriptsize{$s_2$}};
+\node[tgtnode] (tgt3) at ([yshift=-0.5*1.1cm]tgt2.north east) {\scriptsize{$s_3$}};
+\node[tgtnode] (tgt4) at ([yshift=-0.5*1.1cm]tgt3.north east) {\scriptsize{$s_4$}};
+\node[tgtnode] (tgt5) at ([yshift=-0.5*1.1cm]tgt4.north east) {\scriptsize{$s_5$}};
+\node[tgtnode] (tgt6) at ([yshift=-0.5*1.1cm]tgt5.north east) {\scriptsize{$s_6$}};
+
+ word alignment
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l05) at (a05) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l14) at (a14) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l24) at (a24) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l31) at (a31) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l32) at (a32) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l43) at (a43) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l53) at (a53) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l63) at (a63) {};
+\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (l70) at (a70) {};
+
+
+{
+\node [anchor=west] (p1line1) at ([xshift=3.5em,yshift=0.5em]a75.east) {\footnotesize{M(monotone):单调调序}};
+\node [anchor=north west] (p1line2) at ([xshift=0,yshift=-1em]p1line1.south west) {\footnotesize{S(swap): 与前面一个短语}};
+\node [anchor=north west] (p1line3) at ([xshift=3.5em]p1line2.south west) {\footnotesize{位置进行交换}};
+\node [anchor=north west] (p1line4) at ([xshift=-3.5em,yshift=-1em]p1line3.south west) {\footnotesize{D(discontinuous):非连续调序}};
+
+\node [anchor=east] (p1line5) at ([xshift=0em,yshift=3em]p1line4.east) {};
+\node [anchor=east] (p1line6) at ([xshift=0em,yshift=7em]p1line4.east) {};
+
+%\node [anchor=west] (p2line1) at ([xshift=4em]a73.east) {\footnotesize{$\bar{s}_j$: 真\ \ \ 好 \ \ }};
+%\node [anchor=north west] (p2line2) at ([xshift=0]p2line1.south west) {\footnotesize{$\bar{t}_j$: very\ \ \ good\ \ \ \ \ \ \ \ }};
+
+\begin{pgfonlayer}{background}
+{
+\node [rectangle,thick,inner sep=0.3em,fill=blue!40,drop shadow,fill opacity=0.85] [fit = (a05)] (phrase1) {};
+\node [rectangle,thick,inner sep=0.3em,fill=blue!40,drop shadow,fill opacity=0.85] [fit = (a14) (a24)] (phrase2) {};
+\node [rectangle,thick,inner sep=0.3em,fill=blue!40,drop shadow,fill opacity=0.85] [fit = (a31) (a32)] (phrase3) {};
+\node [rectangle,thick,inner sep=0.3em,fill=blue!40,drop shadow,fill opacity=0.85] [fit = (a43) (a63)] (phrase4) {};
+\node [rectangle,thick,inner sep=0.3em,fill=blue!40,drop shadow,fill opacity=0.85] [fit = (a70)] (phrase5) {};
+\node [rectangle,inner sep=0.2em,fill=red!10] [fit = (p1line1) (p1line6)] (box1) {};
+\node [rectangle,inner sep=0.2em,fill=ugreen!10] [fit = (p1line2) (p1line3) (p1line5)] (box2) {};
+\node [rectangle,inner sep=0.2em,fill=orange!10] [fit = (p1line4)] (box3) {};
+}
+\end{pgfonlayer}
+
+\node [circle,draw,anchor=south,inner sep=1pt,fill=red!20] (c1) at ([xshift=-0.5em]a05.north) {\scriptsize{m}};
+\node [circle,draw,anchor=south,inner sep=1pt,fill=red!20] (c2) at ([xshift=-0.5em]a14.north) {\scriptsize{m}};
+\node [circle,draw,anchor=north,inner sep=1pt,fill=orange!20] (c3) at ([xshift=0.1em]a24.south) {\scriptsize{d}};
+\node [circle,draw,anchor=south,inner sep=2pt,fill=ugreen!20] (c4) at ([xshift=0.5em]a32.north) {\scriptsize{s}};
+\node [circle,draw,anchor=north,inner sep=1pt,fill=orange!20] (c5) at ([xshift=0.5em]a63.south) {\scriptsize{d}};
+
+
+\draw [->,thick] (a05.south east) -- (c1.315);
+\draw [->,thick] ([xshift=-0.5em]a24.west) -- (c2.315);
+\draw [->,thick] ([yshift=-0.7em]a32.south) .. controls +(west:1.3) and +(east:1.3) .. (c3.340);
+\draw [->,thick] (a53.center) -- (c4.15);
+\draw [->,thick] (a70.center) .. controls +(west:1.3) and +(east:1.3) .. (c5.340);
+}
+\end{scope}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/translation-hypothesis-extension.tex
+++ b/Book/Chapter4/Figures/translation-hypothesis-extension.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 什么是解码
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}
+{
+\node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.7em,minimum width=3.2em] (h0) at (0,0) {\scriptsize{null}};
+\node [anchor=north west,inner sep=1.5pt,fill=black] (hl0) at (h0.north west) {\tiny{{\color{white} \textbf{0}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.7em,fill=black] (pt0) at (h0.east) {\tiny{{\color{white} \textbf{P=1}}}};
+}
+
+{
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.7em,minimum width=3.2em] (h1) at ([xshift=2.5em]h0.east) {\scriptsize{on}};
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.7em,minimum width=3.2em] (h2) at ([xshift=2.5em,yshift=2.5em]h0.east) {\scriptsize{table}};
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.7em,minimum width=3.2em] (h3) at ([xshift=2.5em,yshift=-2.5em]h0.east) {\scriptsize{there is}};
+\node [anchor=north west,inner sep=1.5pt,fill=black] (hl1) at (h1.north west) {\tiny{{\color{white} \textbf{2}}}};
+\node [anchor=north west,inner sep=1.5pt,fill=black] (hl2) at (h2.north west) {\tiny{{\color{white} \textbf{1}}}};
+\node [anchor=north west,inner sep=1.5pt,fill=black] (hl3) at (h3.north west) {\tiny{{\color{white} \textbf{3}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.7em,fill=black] (pt1) at (h1.east) {\tiny{{\color{white} \textbf{P=.2}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.7em,fill=black] (pt2) at (h2.east) {\tiny{{\color{white} \textbf{P=.3}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.7em,fill=black] (pt3) at (h3.east) {\tiny{{\color{white} \textbf{P=.5}}}};
+
+\draw [->,very thick,ublue] ([xshift=0.1em]pt0.south) -- ([xshift=-0.1em]h1.west);
+\draw [->,very thick,ublue] ([xshift=0.1em]pt0.south) -- ([xshift=-0.1em]h2.west);
+\draw [->,very thick,ublue] ([xshift=0.1em]pt0.south) -- ([xshift=-0.1em]h3.west);
+}
+
+{
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.7em,minimum width=3.2em] (h4) at ([xshift=2.5em,yshift=-1.8em]h3.east) {\scriptsize{one}};
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.7em,minimum width=3.2em] (h5) at ([xshift=2.5em,yshift=0.7em]h3.east) {\scriptsize{an apple}};
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.7em,minimum width=3.2em] (h6) at ([xshift=2.5em,yshift=0.7em]h1.east) {\scriptsize{table}};
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.7em,minimum width=3.5em] (h7) at ([xshift=2.5em,yshift=0.7em]h5.east) {\scriptsize{on the table}};
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.7em,minimum width=3.73em] (h8) at ([xshift=2.5em,yshift=-2em]h5.east) {\scriptsize{\ \;apple}};
+
+\node [anchor=north west,inner sep=1.5pt,fill=black] (hl4) at (h4.north west) {\tiny{{\color{white} \textbf{4}}}};
+\node [anchor=north west,inner sep=1.5pt,fill=black] (hl5) at (h5.north west) {\tiny{{\color{white} \textbf{4-5}}}};
+\node [anchor=north west,inner sep=1.5pt,fill=black] (hl6) at (h6.north west) {\tiny{{\color{white} \textbf{1}}}};
+\node [anchor=north west,inner sep=1.5pt,fill=black] (hl7) at (h7.north west) {\tiny{{\color{white} \textbf{1-2}}}};
+\node [anchor=north west,inner sep=1.5pt,fill=black] (hl8) at (h8.north west) {\tiny{{\color{white} \textbf{5}}}};
+
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.7em,fill=black] (pt4) at (h4.east) {\tiny{{\color{white} \textbf{P=.1}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.7em,fill=black] (pt5) at (h5.east) {\tiny{{\color{white} \textbf{P=.4}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.7em,fill=black] (pt6) at (h6.east) {\tiny{{\color{white} \textbf{P=.3}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.7em,fill=black] (pt7) at (h7.east) {\tiny{{\color{white} \textbf{P=.4}}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=1.7em,fill=black] (pt8) at (h8.east) {\tiny{{\color{white} \textbf{P=.2}}}};
+
+\draw [->,very thick,ublue] ([xshift=0.1em]pt1.south) -- ([xshift=1em,yshift=0.7em]pt1.south);
+
+\draw [->,very thick,ublue] ([xshift=0.1em]pt2.south) -- ([xshift=1em,yshift=-0.7em]pt2.south);
+\draw [->,very thick,ublue] ([xshift=0.1em]pt2.south) -- ([xshift=1em,yshift=0.7em]pt2.south);
+
+\draw [->,very thick,ublue] ([xshift=0.1em]pt6.south) -- ([xshift=1em,yshift=-0.7em]pt6.south);
+\draw [->,very thick,ublue] ([xshift=0.1em]pt6.south) -- ([xshift=1em,yshift=0.7em]pt6.south);
+
+\draw [->,very thick,ublue] ([xshift=0.1em]pt3.south) -- ([xshift=-0.1em]h4.west);
+\draw [->,very thick,ublue] ([xshift=0.1em]pt3.south) -- ([xshift=-0.1em]h5.west);
+\draw [->,very thick,ublue] ([xshift=0.1em]pt3.south) -- ([xshift=-0.1em]h6.west);
+
+\draw [->,very thick,ublue] ([xshift=0.1em]pt5.south) -- ([xshift=-0.1em]h7.west);
+\draw [->,very thick,ublue] ([xshift=0.1em]pt5.south) -- ([xshift=1em,yshift=-0.7em]pt5.south);
+
+\draw [->,very thick,ublue] ([xshift=0.1em]pt4.south) -- ([xshift=-0.1em]h8.west);
+\draw [->,very thick,ublue] ([xshift=0.1em]pt4.south) -- ([xshift=1em,yshift=-0.7em]pt4.south);
+}
+
+{
+\draw [->,ultra thick,red,line width=2pt,opacity=0.7] ([xshift=-0.2em]h0.west) -- ([xshift=0.7em]h0.east) -- ([xshift=-0.2em]h3.west) -- ([xshift=0.8em]h3.east) -- ([xshift=-0.2em]h5.west) -- ([xshift=0.8em]h5.east) -- ([xshift=-0.2em]h7.west) -- ([xshift=0.8em]h7.east);
+\node [anchor=north west] (wtranslabel) at ([yshift=-3em]h0.south west) {\scriptsize{翻译路径:}};
+\draw [->,ultra thick,red,line width=1.5pt,opacity=0.7] (wtranslabel.east) -- ([xshift=1.5em]wtranslabel.east);
+}
+\end{scope}
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/translation-option.tex
+++ b/Book/Chapter4/Figures/translation-option.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 什么是解码
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}[minimum height = 16pt]
+
+\node[anchor=east] (s0) at (-0.8em, 0) {$\textbf{s}$:};
+\node[anchor=west] (s1) at (0, 0) {桌子};
+\node[anchor=west] (s2) at ([xshift=2em]s1.east) {上};
+\node[anchor=west] (s3) at ([xshift=2.3em]s2.east) {有};
+\node[anchor=west] (s4) at ([xshift=2em]s3.east) {一个};
+\node[anchor=west] (s5) at ([xshift=1.6em]s4.east) {苹果};
+
+\node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=3em] (t11) at ([yshift=-0.5em]s1.south) {table};
+\node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=3em] (t12) at ([yshift=-0.2em]t11.south) {desk};
+
+\node [anchor=north,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=3em] (t21) at ([yshift=-0.5em]s2.south) {on};
+\node [anchor=north,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=3em] (t22) at ([yshift=-0.2em]t21.south) {up};
+
+\node [anchor=north,inner sep=2pt,fill=blue!20,minimum height=1.5em,minimum width=3.4em] (t31) at ([yshift=-0.5em]s3.south) {have};
+\node [anchor=north,inner sep=2pt,fill=blue!20,minimum height=1.5em,minimum width=3.4em] (t32) at ([yshift=-0.2em]t31.south) {there is};
+
+\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3em] (t41) at ([yshift=-0.5em]s4.south) {one};
+\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3em] (t42) at ([yshift=-0.2em]t41.south) {an};
+
+\node [anchor=north,inner sep=2pt,fill=purple!20,minimum height=1.5em,minimum width=3em] (t51) at ([yshift=-0.5em]s5.south) {apple};
+\node [anchor=north,inner sep=2pt,fill=purple!20,minimum height=1.5em,minimum width=3em] (t52) at ([yshift=-0.2em]t51.south) {apples};
+
+\node [anchor=north west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=7.2em] (t13) at ([yshift=-3.7em]t12.south west) {on table};
+\node [anchor=north west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=7.2em] (t14) at ([yshift=-0.2em]t13.south west) {on the table};
+
+\node [anchor=north west,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=7.35em] (t43) at ([yshift=-0.2em]t42.south west) {one apple};
+\node [anchor=north west,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=7.35em] (t44) at ([yshift=-0.2em]t43.south west) {an apple};
+
+\node [anchor=north west,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=7.25em] (t23) at ([yshift=-0.2em]t22.south west) {upon there};
+
+\node [anchor=north west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=11.5em] (t15) at ([yshift=-1.95em]t12.south west) {upon the table};
+
+\node [anchor=north west,inner sep=2pt,fill=blue!20,minimum height=1.5em,minimum width=11.8em] (t33) at ([yshift=-3.7em]t32.south west) {there is an apple};
+\node [anchor=north west,inner sep=2pt,fill=blue!20,minimum height=1.5em,minimum width=11.8em] (t34) at ([yshift=-0.2em]t33.south west) {have an apple...};
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/translation-rule-based-on-admissible-node.tex
+++ b/Book/Chapter4/Figures/translation-rule-based-on-admissible-node.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  规则抽取
+\begin{center}
+\begin{tikzpicture}
+
+{\footnotesize
+\begin{scope}[sibling distance=25pt, level distance=20pt]
+
+\Tree[.\node(n1){IP};
+     	[.\node(n2){NP}; [.\node(n3){PN}; \node(cw1){他}; ]]
+     	[.\node(n4){VP};
+     		[.\node(n5){PP};
+     			[.\node(n6){P}; \node(cw2){对}; ]
+     			[.\node(n7){NP};
+                    [.\node(n8){NN}; \node(cw3){回答}; ]
+                ]
+     		]
+     		[.\node(n9){VP};
+     			[.\node(n10){VV}; \node(cw4){表示}; ]
+     			[.\node(n11){NN}; \node(cw5){满意}; ]
+     		]
+     	]
+     ]
+
+\node[anchor=north,minimum size=18pt,align=center] (tw1) at ([yshift=-6.0em]cw1.south){he\\\scriptsize{1}};
+\node[anchor=west,minimum size=18pt,align=center] (tw2) at ([yshift=-0.1em,xshift=1.1em]tw1.east){was\\\scriptsize{2}};
+\node[anchor=west,minimum size=18pt,align=center] (tw3) at ([yshift=0.1em,xshift=1.1em]tw2.east){satisfied\\\scriptsize{3}};
+\node[anchor=west,minimum size=18pt,align=center] (tw4) at ([xshift=1.1em]tw3.east){with\\\scriptsize{4}};
+\node[anchor=west,minimum size=18pt,align=center] (tw5) at ([xshift=1.1em]tw4.east){the\\\scriptsize{5}};
+\node[anchor=west,minimum size=18pt,align=center] (tw6) at ([yshift=-0.1em,xshift=1.1em]tw5.east){answer\\\scriptsize{6}};
+
+\draw[dashed] (cw1.south) -- ([yshift=-0.4em]tw1.north);
+\draw[dashed] (cw2.south) .. controls +(south:1.6) and +(north:0.6) .. ([yshift=-0.4em]tw4.north);
+\draw[dashed] (cw3.south) -- ([yshift=-0.4em]tw5.north);
+\draw[dashed] (cw3.south) -- ([yshift=-0.4em]tw6.north);
+\draw[dashed] (cw4.south) .. controls +(south:2.0) and +(north:0.6) .. ([yshift=-0.4em]tw3.north);
+\draw[dashed] (cw5.south) .. controls +(south:2.0) and +(north:0.6) .. ([yshift=-0.4em]tw3.north);
+
+\begin{pgfonlayer}{background}
+{
+\node [fill=blue!30] [fit = (cw2) (cw3) (n7) (n8)] (rule1box1) {};
+\node [fill=blue!30] [fit = (n4) (n9)] (rule1box2) {};
+\node [fill=blue!30] [fit = (tw3) (tw4) (tw5) (tw6)] (rule1box3) {};
+\path [fill=blue!30] ([xshift=0.05em]rule1box2.north west) -- ([xshift=-0.3em,yshift=0.3em]n5.north west) -- ([yshift=-0.05em]rule1box1.north west) -- ([xshift=0.05em,yshift=-0.05em]rule1box2.south west) -- ([xshift=0.05em]rule1box2.north west);
+}
+
+\node [rectangle,fill=orange!20,inner sep=0] [fit = (n11)] (n11box) {};
+\node [rectangle,fill=green!20,inner sep=0] [fit = (n4)] (n4box) {};
+
+\node [rectangle,fill=green!20,inner sep=0] [fit = (n1)] (n1box) {};
+\node [rectangle,fill=green!20,inner sep=0] [fit = (n2)] (n2box) {};
+\node [rectangle,fill=green!20,inner sep=0] [fit = (n3)] (n3box) {};
+\node [rectangle,fill=green!20,inner sep=0] [fit = (n5)] (n5box) {};
+\node [rectangle,fill=green!20,inner sep=0] [fit = (n6)] (n6box) {};
+\node [rectangle,fill=green!20,inner sep=0] [fit = (n7)] (n7box) {};
+\node [rectangle,fill=green!20,inner sep=0] [fit = (n8)] (n8box) {};
+\node [rectangle,fill=green!20,inner sep=0] [fit = (n9)] (n9box) {};
+\node [rectangle,fill=orange!20,inner sep=0] [fit = (n10)] (n10box) {};
+\end{pgfonlayer}
+
+\node [anchor=north west, minimum size=1.2em, fill=green!20] (land1) at ([xshift=1.5em,yshift=-1em]cw5.south east) {};
+\node [anchor=west] (land1label) at (land1.east) {\scriptsize{可信}};
+\node [anchor=north west, minimum size=1.2em, fill=orange!20] (land2) at ([yshift=-0.3em]land1.south west) {};
+\node [anchor=west] (land2label) at (land2.east) {\scriptsize{不可信}};
+
+{
+\node [draw,thick,red,fill=red!20] [fit = (n9)] (var1) {{\color{black} VP}};
+\node [draw,thick,red,fill=red!20] [fit = (tw3)] (var1v2) {{\color{black} \large{VP}}};
+\node [anchor=west] (var1label) at ([yshift=0.5em]var1.east) {\tiny{\red{变量}}};
+\node [anchor=south] (var1v2label) at ([xshift=-2em]var1v2.north) {\tiny{\red{变量}}};
+}
+
+\end{scope}
+}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/translation-rule-describe-two-sentence-generation.tex
+++ b/Book/Chapter4/Figures/translation-rule-describe-two-sentence-generation.tex
+%------------------------------------------------------------------------------------------------------------
+%%%  使用翻译规则
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}
+
+{
+% rule 1 (source)
+\node [anchor=west] (rule1s1) at (0,0) {与};
+\node [anchor=west,inner sep=2pt,fill=black] (rule1s2) at ([xshift=0.5em]rule1s1.east) {\scriptsize{{\color{white} $\textrm{X}_1$}}};
+\node [anchor=west] (rule1s3) at ([xshift=0.5em]rule1s2.east) {有};
+\node [anchor=west,inner sep=2pt,fill=black] (rule1s4) at ([xshift=0.5em]rule1s3.east) {\scriptsize{{\color{white} $\textrm{X}_2$}}};
+
+% rule 1 (target)
+\node [anchor=west] (rule1t1) at ([xshift=0.8in]rule1s4.east) {have};
+\node [anchor=west,inner sep=2pt,fill=black] (rule1t2) at ([xshift=0.5em]rule1t1.east) {\scriptsize{{\color{white} $\textrm{X}_2$}}};
+\node [anchor=west] (rule1t3) at ([xshift=0.5em]rule1t2.east) {with};
+\node [anchor=west,inner sep=2pt,fill=black] (rule1t4) at ([xshift=0.5em]rule1t3.east) {\scriptsize{{\color{white} $\textrm{X}_1$}}};
+}
+
+{
+% phrase 1 (source and target)
+\node [anchor=north] (phrase1s1) at ([yshift=-1em]rule1s2.south) {\footnotesize{北韩}};
+\node [anchor=north] (phrase1t1) at ([yshift=-1em]rule1t4.south) {\footnotesize{North Korea}};
+}
+
+{
+% phrase 2 (source and target)
+\node [anchor=north] (phrase2s1) at ([yshift=-3em]rule1s4.south) {\footnotesize{邦交}};
+\node [anchor=north] (phrase2t1) at ([yshift=-3em]rule1t2.south) {\footnotesize{diplomatic relations}};
+}
+
+\begin{pgfonlayer}{background}
+{
+\node [rectangle,draw=red,inner sep=1pt,thick,fill=white,drop shadow] [fit = (rule1s1) (rule1s4)] (rule1s) {};
+\node [rectangle,draw=red,inner sep=2pt,thick,fill=white,drop shadow] [fit = (rule1t1) (rule1t4)] (rule1t) {};
+\draw [<->,dotted,thick,red] ([xshift=0.1em]rule1s.east) -- ([xshift=-0.1em]rule1t.west);
+}
+\end{pgfonlayer}
+
+{
+% edges (phrases 1-2 to rule1)
+\draw [->] (phrase1s1.north) -- ([yshift=-0.1em]rule1s2.south);
+\draw [->] (phrase1t1.north) -- ([yshift=-0.1em]rule1t4.south);
+\draw [->] (phrase2s1.north) -- ([yshift=-0.1em]rule1s4.south);
+\draw [->] (phrase2t1.north) -- ([yshift=-0.1em]rule1t2.south);
+}
+
+{
+% rule 2 (source)
+\node [anchor=west,inner sep=2pt,fill=black] (rule2s1) at ([yshift=3.5em,xshift=-0.5em]rule1s1.north west) {\scriptsize{{\color{white} $\textrm{X}_1$}}};
+\node [anchor=west] (rule2s2) at ([xshift=0.5em]rule2s1.east) {的};
+\node [anchor=west,inner sep=2pt,fill=black] (rule2s3) at ([xshift=0.5em]rule2s2.east) {\scriptsize{{\color{white} $\textrm{X}_2$}}};
+
+% rule 2 (target)
+\node [anchor=west,inner sep=2pt,fill=black] (rule2t1) at ([xshift=1.8in]rule2s3.east) {\scriptsize{{\color{white} $\textrm{X}_2$}}};
+\node [anchor=west] (rule2t2) at ([xshift=0.5em]rule2t1.east) {that};
+\node [anchor=west,inner sep=2pt,fill=black] (rule2t3) at ([xshift=0.5em]rule2t2.east) {\scriptsize{{\color{white} $\textrm{X}_1$}}};
+
+% phrase 3 (source and target)
+\node [anchor=north] (phrase3s1) at ([yshift=-0.8em]rule2s3.south) {\footnotesize{少数 国家}};
+\node [anchor=north] (phrase3t1) at ([yshift=-0.8em]rule2t1.south) {\footnotesize{the few countries}};
+
+% edges (phrase 3 to rule 2 and rule1 to rule2)
+\draw [->] (phrase3s1.north) -- ([yshift=-0.1em]rule2s3.south);
+\draw [->] (phrase3t1.north) -- ([yshift=-0.1em]rule2t1.south);
+\draw [->] ([xshift=2em]rule1s.north west) ..controls +(north:1.5em) and +(south:1.5em).. ([yshift=-0.1em]rule2s1.south);
+\draw [->] ([xshift=-2em]rule1t.north east) ..controls +(north:1.5em) and +(south:1.5em).. ([yshift=-0.1em]rule2t3.south);
+}
+
+\begin{pgfonlayer}{background}
+{
+\node [rectangle,draw=blue,inner sep=4pt,thick,fill=white,drop shadow] [fit = (rule2s1) (rule2s3)] (rule2s) {};
+\node [rectangle,draw=blue,inner sep=4pt,thick,fill=white,drop shadow] [fit = (rule2t1) (rule2t3)] (rule2t) {};
+\draw [<->,dotted,thick,blue] ([xshift=0.1em]rule2s.east) -- ([xshift=-0.1em]rule2t.west);
+}
+\end{pgfonlayer}
+
+{
+% rule 3 (source)
+\node [anchor=west,inner sep=2pt,fill=black] (rule3s1) at ([yshift=2.5em,xshift=4em]rule2s1.north west) {\scriptsize{{\color{white} $\textrm{X}_1$}}};
+\node [anchor=west] (rule3s2) at ([xshift=0.5em]rule3s1.east) {之一};
+
+% rule 3 (target)
+\node [anchor=west] (rule3t1) at ([xshift=1.0in]rule3s2.east) {one of};
+\node [anchor=west,inner sep=2pt,fill=black] (rule3t2) at ([xshift=0.5em]rule3t1.east) {\scriptsize{{\color{white} $\textrm{X}_1$}}};
+
+% edges: rule 2 to rule 3
+\draw [->] ([xshift=-1em]rule2s.north) ..controls +(north:1.2em) and +(south:1.2em).. ([yshift=-0.1em]rule3s1.south);
+\draw [->] ([xshift=1em]rule2t.north) ..controls +(north:1.2em) and +(south:1.2em).. ([yshift=-0.1em]rule3t2.south);
+}
+
+\begin{pgfonlayer}{background}
+{
+\node [rectangle,draw=ugreen,inner sep=1pt,thick,fill=white,drop shadow] [fit = (rule3s1) (rule3s2)] (rule3s) {};
+\node [rectangle,draw=ugreen,inner sep=2pt,thick,fill=white,drop shadow] [fit = (rule3t1) (rule3t2)] (rule3t) {};
+\draw [<->,dotted,thick,ugreen] ([xshift=0.1em]rule3s.east) -- ([xshift=-0.1em]rule3t.west);
+}
+\end{pgfonlayer}
+
+{
+% rule 4 (source)
+\node [anchor=west,inner sep=2pt,fill=black] (rule4s1) at ([yshift=3.5em,xshift=-3.5em]rule3s1.north west) {\scriptsize{{\color{white} $\textrm{X}_1$}}};
+\node [anchor=west] (rule4s2) at ([xshift=0.5em]rule4s1.east) {是};
+\node [anchor=west,inner sep=2pt,fill=black] (rule4s3) at ([xshift=0.5em]rule4s2.east) {\scriptsize{{\color{white} $\textrm{X}_2$}}};
+
+% rule 2 (target)
+\node [anchor=west,inner sep=2pt,fill=black] (rule4t1) at ([xshift=2.0in]rule4s2.east) {\scriptsize{{\color{white} $\textrm{X}_1$}}};
+\node [anchor=west] (rule4t2) at ([xshift=0.5em]rule4t1.east) {is};
+\node [anchor=west,inner sep=2pt,fill=black] (rule4t3) at ([xshift=0.5em]rule4t2.east) {\scriptsize{{\color{white} $\textrm{X}_2$}}};
+
+% phrase 4 (source and target)
+\node [anchor=north] (phrase4s1) at ([yshift=-0.8em]rule4s1.south) {\footnotesize{澳洲}};
+\node [anchor=north] (phrase4t1) at ([yshift=-0.8em]rule4t1.south) {\footnotesize{Australia}};
+
+% edges (phrase 4 to rule 4 and rule3 to rule4)
+\draw [->] (phrase4s1.north) -- ([yshift=-0.1em]rule4s1.south);
+\draw [->] (phrase4t1.north) -- ([yshift=-0.1em]rule4t1.south);
+\draw [->] ([xshift=1em]rule3s.north) ..controls +(north:1.5em) and +(south:1.5em).. ([yshift=-0.1em]rule4s3.south);
+\draw [->] ([xshift=-1em]rule3t.north east) ..controls +(north:1.5em) and +(south:1.5em).. ([yshift=-0.1em]rule4t3.south);
+}
+
+\begin{pgfonlayer}{background}
+{
+\node [rectangle,draw=orange,inner sep=4pt,thick,fill=white,drop shadow] [fit = (rule4s1) (rule4s3)] (rule4s) {};
+\node [rectangle,draw=orange,inner sep=4pt,thick,fill=white,drop shadow] [fit = (rule4t1) (rule4t3)] (rule4t) {};
+\draw [<->,dotted,thick,orange] ([xshift=0.1em]rule4s.east) -- ([xshift=-0.1em]rule4t.west);
+}
+\end{pgfonlayer}
+
+\end{scope}
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/tree-binarization.tex
+++ b/Book/Chapter4/Figures/tree-binarization.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  tree binarization
+\begin{center}
+\begin{tikzpicture}
+
+{
+{\scriptsize
+\begin{scope}[sibling distance=4pt, level distance=25pt]
+
+{\footnotesize
+\Tree[.\node(n1){NP};
+     	[.NNP \node(sw1){美国}; ]
+     	[.NN \node(sw2){总统}; ]
+        [.NN \node(sw3){唐纳德}; ]
+        [.NN \node(sw4){特朗普}; ]
+     ]
+}
+
+\node [anchor=north] (tw1) at ([yshift=-2em]sw1.south) {U.S.};
+\node [anchor=north] (tw2) at ([yshift=-2em]sw2.south) {President};
+\node [anchor=north] (tw3) at ([yshift=-2em]sw3.south) {Trump};
+
+\draw [-,dashed] (sw1.south) -- (tw1.north);
+\draw [-,dashed] (sw2.south) -- (tw2.north);
+\draw [-,dashed] (sw3.south) -- (tw3.north);
+\draw [-,dashed] (sw4.south) -- (tw3.north);
+
+\node [anchor=west] (rulelabel1) at ([xshift=1in,yshift=0.3em]n1.east) {\footnotesize{抽取到的规则：}};
+\node [anchor=north west] (rule1) at (rulelabel1.south west) {NP(NNP$_1$ NN$_2$ NN(唐纳德) NN(特朗普))};
+\node [anchor=north west] (rule1t) at ([yshift=0.2em]rule1.south west) {$\to$ NNP$_1$ NN$_2$ Trump};
+\node [anchor=north west] (rule2) at (rule1t.south west) {NP(NNP$_1$ NN(总统) NN(唐纳德) NN(特朗普))};
+\node [anchor=north west] (rule2t) at ([yshift=0.2em]rule2.south west) {$\to$ NNP$_1$ President Trump};
+\node [anchor=north west] (rulelabel2) at ([yshift=-0.3em]rule2t.south west) {\footnotesize{{\red{不能}}抽取到的规则：}};
+\node [anchor=north west] (rule3) at (rulelabel2.south west) {NP(NN(唐纳德) NN(特朗普)) $\to$ Trump};
+
+\end{scope}
+}
+}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/tree-cutting-defined-by-edge-nodes.tex
+++ b/Book/Chapter4/Figures/tree-cutting-defined-by-edge-nodes.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  规则抽取
+\begin{center}
+\begin{tikzpicture}
+
+{\scriptsize
+\begin{scope}[scale = 0.9, sibling distance=20pt, level distance=30pt]
+
+{\footnotesize
+\Tree[.\node(n1){IP};
+     	[.\node(n2){NP}; [.\node(n3){PN}; \node(cw1){他}; ]]
+     	[.\node(n4){VP};
+     		[.\node(n5){PP};
+     			[.\node(n6){P}; \node(cw2){对}; ]
+     			[.\node(n7){NP};
+                    [.\node(n8){NN}; \node(cw3){回答}; ]
+                ]
+     		]
+     		[.\node(n9){VP};
+     			[.\node(n10){VV}; \node(cw4){表示}; ]
+     			[.\node(n11){NN}; \node(cw5){满意}; ]
+     		]
+     	]
+     ]
+}
+
+\begin{pgfonlayer}{background}
+
+{
+\node [rectangle,fill=blue!20,inner sep=0] [fit = (n11)] (n11box) {};
+\node [rectangle,fill=red!20,inner sep=0] [fit = (n4)] (n4box) {};
+\node [rectangle,fill=red!20,inner sep=0] [fit = (n1)] (n1box) {};
+\node [rectangle,fill=red!20,inner sep=0] [fit = (n2)] (n2box) {};
+\node [rectangle,fill=red!20,inner sep=0] [fit = (n3)] (n3box) {};
+\node [rectangle,fill=red!20,inner sep=0] [fit = (n5)] (n5box) {};
+\node [rectangle,fill=red!20,inner sep=0] [fit = (n6)] (n6box) {};
+\node [rectangle,fill=red!20,inner sep=0] [fit = (n7)] (n7box) {};
+\node [rectangle,fill=red!20,inner sep=0] [fit = (n8)] (n8box) {};
+\node [rectangle,fill=red!20,inner sep=0] [fit = (n9)] (n9box) {};
+\node [rectangle,fill=blue!20,inner sep=0] [fit = (n10)] (n10box) {};
+
+\node [anchor=north west, minimum size=1.2em, fill=red!20] (land1) at ([xshift=7.0em,yshift=0em]n1.north east) {};
+\node [anchor=west] (land1label) at (land1.east) {\scriptsize{可信}};
+\node [anchor=north west, minimum size=1.2em, fill=blue!20] (land2) at ([yshift=-0.3em]land1.south west) {};
+\node [anchor=west] (land2label) at (land2.east) {\scriptsize{不可信}};
+}
+
+\end{pgfonlayer}
+
+\node[anchor=north,minimum size=18pt] (tw1) at ([yshift=-10.0em]cw1.south){he};
+\node[anchor=west,minimum size=18pt] (tw2) at ([yshift=-0.1em,xshift=0.3em]tw1.east){was};
+\node[anchor=west,minimum size=18pt] (tw3) at ([yshift=0.1em,xshift=0.3em]tw2.east){satisfied};
+\node[anchor=west,minimum size=18pt] (tw4) at ([xshift=0.3em]tw3.east){with};
+\node[anchor=west,minimum size=18pt] (tw5) at ([xshift=0.3em]tw4.east){the};
+\node[anchor=west,minimum size=18pt] (tw6) at ([yshift=-0.1em,xshift=0.3em]tw5.east){answer};
+
+\draw[dashed] (cw1.south) -- ([yshift=-0.4em]tw1.north);
+\draw[dashed] (cw2.south) .. controls +(south:2.0) and +(north:0.6) .. ([yshift=-0.4em]tw4.north);
+\draw[dashed] (cw3.south) -- ([yshift=-0.4em]tw5.north);
+\draw[dashed] (cw3.south) -- ([yshift=-0.4em]tw6.north);
+\draw[dashed] (cw4.south) .. controls +(south:2.5) and +(north:0.6) .. ([yshift=-0.4em]tw3.north);
+\draw[dashed] (cw5.south) .. controls +(south:2.5) and +(north:0.6) .. ([yshift=-0.4em]tw3.north);
+
+\end{scope}
+
+\begin{scope} [yshift = -1.87in, xshift = 1.9in]
+{
+\begin{scope}[level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,draw] (cfrag1) at
+   (0,0.25) {\Tree[.\node(sn1){NP}; [.\node(sn2){PN}; 他 ]]};
+\end{scope}
+
+\begin{scope}[level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag2) at
+   ([xshift=1.2em]cfrag1.south east) {\Tree[.\node(sn3){P}; 对 ]};
+\end{scope}
+
+\begin{scope}[level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag3) at
+   ([xshift=1.2em]cfrag2.south east) {\Tree[.\node(sn4){NP}; [.NN 回答 ]]};
+   \end{scope}
+
+\begin{scope}[sibling distance=15pt,level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag4) at
+   ([xshift=1.4em]cfrag3.south east) {\Tree[.\node(sn5){VP}; [.\node(sn6){VV}; 表示 ] [.\node(sn7){NN}; 满意 ]]};
+\end{scope}
+
+\begin{scope}[sibling distance=20pt,,level distance=25pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag5) at
+   ([xshift=0.3em,yshift=2.5em]cfrag2.north west) {\Tree[.\node(sn8){PP}; [.\node(sn9){P}; ] [.\node(sn10){NP}; ]]};
+\end{scope}
+
+\begin{scope}[sibling distance=60pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag6) at
+   ([xshift=1.6em,yshift=0.8em]cfrag5.north west) {\Tree[.\node(sn11){VP}; [.\node(sn12){PP}; ] [.\node(sn13){VP}; ]]};
+\end{scope}
+
+\begin{scope}[sibling distance=80pt,level distance=18pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south east,draw] (cfrag7) at
+   ([xshift=-3.6em,yshift=0.8em]cfrag6.north east) {\Tree[.\node(sn14){IP}; [.\node(sn15){NP}; ] [.\node(sn16){VP}; ]]};
+\end{scope}
+
+\node[scale=0.9,anchor=north,minimum size=18pt] (tw11) at ([xshift=-0.3em,yshift=-1.2em]cfrag1.south){he};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw12) at ([yshift=-0.1em,xshift=0.5em]tw11.east){was};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw13) at ([yshift=0.1em,xshift=0.5em]tw12.east){satisfied};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw14) at ([xshift=0.5em]tw13.east){with};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw15) at ([xshift=0.5em]tw14.east){the};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw16) at ([yshift=-0.1em,xshift=0.5em]tw15.east){answer};
+
+\draw[dashed] ([xshift=-0.3em]cfrag1.south) -- ([yshift=-0.3em]tw11.north);
+\draw[dashed] (cfrag2.south) -- ([yshift=-0.4em]tw14.north);
+\draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw15.north);
+\draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw16.north);
+\draw[dashed] (cfrag4.south) .. controls +(south:0.6) and +(north:0.6) .. ([yshift=-0.4em]tw13.north);
+
+\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag1.north) -- ([xshift=0.0em,yshift=11.3em]cfrag1.north);
+\draw[*-*] ([xshift=0.1em,yshift=-0.2em]cfrag2.north) -- ([xshift=0.1em,yshift=2.9em]cfrag2.north);
+\draw[*-*] ([xshift=0.1em,yshift=-0.4em]cfrag3.north) -- ([xshift=0.1em,yshift=0.9em]cfrag3.north);
+\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag4.north) -- ([xshift=0.0em,yshift=5.7em]cfrag4.north);
+\draw[*-*] ([xshift=0.1em,yshift=-0.2em]cfrag5.north) -- ([xshift=0.1em,yshift=1em]cfrag5.north);
+\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag6.north) -- ([xshift=0.0em,yshift=1em]cfrag6.north);
+}
+\end{scope}
+}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/tree-fragment-to-string-mapping.tex
+++ b/Book/Chapter4/Figures/tree-fragment-to-string-mapping.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  树到串规则
+\begin{center}
+\begin{tikzpicture}
+
+\begin{scope}[scale=1]
+\Tree[.\node(sn1){VP};
+        [.\node(sn2){VV}; \node(scw1){提高}; ]
+        [.\node(sn3){NN}; ]
+     ]
+
+\path [draw, ->, thick] ([xshift=1em]sn3.east) -- ([xshift=2.5em]sn3.east);
+
+\node [anchor=west] (tw1) at ([xshift=3.5em]sn3.east) {increases};
+\node [anchor=west,fill=red!20] (tw2) at ([xshift=0.3em]tw1.east) {NN};
+
+\draw[dotted,thick] ([yshift=-0.1em]sn3.south)..controls +(south:1.2) and +(south: 1.2)..([yshift=-0.1em]tw2.south);
+
+\begin{pgfonlayer}{background}
+\node [rectangle,inner sep=0em,fill=red!20] [fit = (sn3)] (nn1) {};
+\end{pgfonlayer}
+
+\end{scope}
+
+\end{tikzpicture}
+\end{center}
--- a/Book/Chapter4/Figures/tree-segment-corresponding-to-phrase.tex
+++ b/Book/Chapter4/Figures/tree-segment-corresponding-to-phrase.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  SPMT规则
+\begin{tikzpicture}
+
+{\scriptsize
+\begin{scope}
+
+{
+\begin{scope}[level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,draw] (cfrag1) at
+   (0,0.25) {\Tree[.\node(sn1){NP}; [.\node(sn2){PN}; 他 ]]};
+\end{scope}
+
+\begin{scope}[level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag2) at
+   ([xshift=1.2em]cfrag1.south east) {\Tree[.\node(sn3){P}; 对 ]};
+\end{scope}
+
+\begin{scope}[level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag3) at
+   ([xshift=1.2em]cfrag2.south east) {\Tree[.\node(sn4){NP}; [.NN 形式 ]]};
+   \end{scope}
+
+\begin{scope}[sibling distance=15pt,level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag4) at
+   ([xshift=1.4em]cfrag3.south east) {\Tree[.\node(sn5){VP}; [.\node(sn6){VV}; 表示 ] [.\node(sn7){NN}; 担心 ]]};
+\end{scope}
+
+\begin{scope}[sibling distance=32pt,level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag6) at
+   ([xshift=0.3em,yshift=4em]cfrag2.north west) {\Tree[.\node(sn11){VP}; [.\node(sn9){P}; ] [.\node(sn10){NP}; ] [.\node(sn13){VP}; ]]};
+\end{scope}
+
+\begin{scope}[sibling distance=65pt,level distance=18pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south east,draw] (cfrag7) at
+   ([xshift=-4.5em,yshift=0.5em]cfrag6.north east) {\Tree[.\node(sn14){IP}; [.\node(sn15){NP}; ] [.\node(sn16){VP}; ]]};
+\end{scope}
+
+\node[scale=0.9,anchor=north,minimum size=18pt] (tw11) at ([xshift=-0.3em,yshift=-1.2em]cfrag1.south){he};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw12) at ([yshift=-0.1em,xshift=0.5em]tw11.east){was};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw13) at ([yshift=0.1em,xshift=0.5em]tw12.east){worried};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw14) at ([xshift=0.5em]tw13.east){about};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw15) at ([xshift=0.5em]tw14.east){the};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw16) at ([yshift=-0.1em,xshift=0.5em]tw15.east){situation};
+
+\draw[dashed] ([xshift=-0.3em]cfrag1.south) -- ([yshift=-0.3em]tw11.north);
+\draw[dashed] (cfrag2.south) -- ([yshift=-0.4em]tw14.north);
+\draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw15.north);
+\draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw16.north);
+\draw[dashed] (cfrag4.south) .. controls +(south:0.6) and +(north:0.6) .. ([yshift=-0.4em]tw13.north);
+
+{
+\draw[dashed,red] (cfrag2.south) -- ([yshift=-0.4em]tw14.north);
+\draw[dashed,red] (cfrag3.south) -- ([yshift=-0.4em]tw15.north);
+\draw[dashed,red] (cfrag3.south) -- ([yshift=-0.4em]tw16.north);
+}
+
+\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag1.north) -- ([xshift=0.0em,yshift=6.5em]cfrag1.north);
+\draw[*-*] ([xshift=-0.1em,yshift=-0.2em]cfrag2.north) -- ([xshift=-0.1em,yshift=4.4em]cfrag2.north);
+\draw[*-*] ([xshift=0.1em,yshift=-0.4em]cfrag3.north) .. controls +(north:2.4em) and +(south:2.4em) .. ([xshift=1.1em,yshift=2.5em]cfrag3.north);
+\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag4.north) -- ([xshift=0.0em,yshift=2.5em]cfrag4.north);
+\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag6.north) -- ([xshift=0.0em,yshift=0.9em]cfrag6.north);
+
+{
+\node [fill=blue,circle,inner sep=2pt] (rlabel2) at (cfrag2.north east) {{\color{white} \tiny{2}}};
+\node [fill=blue,circle,inner sep=2pt] (rlabel3) at (cfrag3.north east) {{\color{white} \tiny{3}}};
+\node [fill=blue,circle,inner sep=2pt] (rlabel6) at (cfrag6.north east) {{\color{white} \tiny{5}}};
+}
+
+\begin{pgfonlayer}{background}
+{
+\node [fill=green!20,inner sep=0pt] (cfrag2back) [fit = (cfrag2)] {};
+\node [fill=green!20,inner sep=0pt] (cfrag3back) [fit = (cfrag3)] {};
+\node [fill=green!20,inner sep=0pt] (cfrag6back) [fit = (cfrag6)] {};
+}
+
+{
+\node [anchor=south west,draw=red,thick,fill=red!20,inner sep=0pt,minimum height = 2em, minimum width=6em] (ps) at ([xshift=0em,yshift=0em]cfrag2.south west) {};
+\node [anchor=south west,draw=red,thick,fill=red!20,inner sep=0pt] (pt) [fit = (tw14) (tw15) (tw16)] {};
+}
+\end{pgfonlayer}
+
+}
+\end{scope}
+}
+\end{tikzpicture}
\ No newline at end of file
--- a/Book/Chapter4/Figures/tree-to-string-rule-empty-alignment-1.tex
+++ b/Book/Chapter4/Figures/tree-to-string-rule-empty-alignment-1.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  处理空对其单词
+\begin{minipage}[b]{0.35\textwidth}
+{\footnotesize
+\renewcommand*{\arraystretch}{1.3}
+\begin{tabular}{l l}
+{$r_1$} & {NP(PN(他)) $\to$  he} \\
+{$r_4$} & {VP(VV(表示) NN(满意) $\to$} \\
+                    & {satisfied} \\
+{$r_6$} & {VP(PP$_1$ VP$_2$) $\to$ VP$_2$ PP$_1$} \\
+{$r_7$} & {IP(NP$_1$ VP$_2$) $\to$ NP$_1$ VP$_2$} \\
+{$r_8$} & {NP(PN(他)) $\to$  he {\red{was}}} \\
+{$r_9$} & {VP(VV(表示) NN(满意)) $\to$} \\
+                    & {{\red{was}} satisfied} \\
+{$r_{10}$} & {VP(PP$_1$ VP$_2$) $\to$} \\
+                     & {{\red{was}} VP$_2$ PP$_1$} \\
+{$r_{11}$} & {IP(NP$_1$ VP$_2$) $\to$} \\
+                     & {NP$_1$ {\red{was}} VP$_2$} \\
+\end{tabular}
+\renewcommand*{\arraystretch}{1.0}
+}
+\end{minipage}
--- a/Book/Chapter4/Figures/tree-to-string-rule-empty-alignment-2.tex
+++ b/Book/Chapter4/Figures/tree-to-string-rule-empty-alignment-2.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  处理空对其单词
+\begin{minipage}[t]{0.47\textwidth}
+\begin{tikzpicture}
+
+{\scriptsize
+\begin{scope}
+
+{
+\begin{scope}[level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,draw] (cfrag1) at
+   (0,0.25) {\Tree[.\node(sn1){NP}; [.\node(sn2){PN}; 他 ]]};
+\end{scope}
+
+\begin{scope}[level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag2) at
+   ([xshift=1.2em]cfrag1.south east) {\Tree[.\node(sn3){P}; 对 ]};
+\end{scope}
+
+\begin{scope}[level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag3) at
+   ([xshift=1.2em]cfrag2.south east) {\Tree[.\node(sn4){NP}; [.NN 回答 ]]};
+   \end{scope}
+
+\begin{scope}[sibling distance=15pt,level distance=20pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag4) at
+   ([xshift=1.4em]cfrag3.south east) {\Tree[.\node(sn5){VP}; [.\node(sn6){VV}; 表示 ] [.\node(sn7){NN}; 满意 ]]};
+\end{scope}
+
+\begin{scope}[sibling distance=20pt,,level distance=25pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag5) at
+   ([xshift=0.3em,yshift=2.5em]cfrag2.north west) {\Tree[.\node(sn8){PP}; [.\node(sn9){P}; ] [.\node(sn10){NP}; ]]};
+\end{scope}
+
+\begin{scope}[sibling distance=60pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south west,draw] (cfrag6) at
+   ([xshift=1.6em,yshift=0.8em]cfrag5.north west) {\Tree[.\node(sn11){VP}; [.\node(sn12){PP}; ] [.\node(sn13){VP}; ]]};
+\end{scope}
+
+\begin{scope}[sibling distance=80pt,level distance=18pt]
+\node[scale=0.8, inner sep=0.1cm,align=center,anchor=south east,draw] (cfrag7) at
+   ([xshift=-3.6em,yshift=0.8em]cfrag6.north east) {\Tree[.\node(sn14){IP}; [.\node(sn15){NP}; ] [.\node(sn16){VP}; ]]};
+\end{scope}
+
+\node[scale=0.9,anchor=north,minimum size=18pt] (tw11) at ([xshift=-0.3em,yshift=-1.2em]cfrag1.south){he};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw12) at ([yshift=-0.1em,xshift=0.5em]tw11.east){was};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw13) at ([yshift=0.1em,xshift=0.5em]tw12.east){satisfied};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw14) at ([xshift=0.5em]tw13.east){with};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw15) at ([xshift=0.5em]tw14.east){the};
+\node[scale=0.9,anchor=west,minimum size=18pt] (tw16) at ([yshift=-0.1em,xshift=0.5em]tw15.east){answer};
+
+\draw[dashed] ([xshift=-0.3em]cfrag1.south) -- ([yshift=-0.3em]tw11.north);
+\draw[dashed] (cfrag2.south) -- ([yshift=-0.4em]tw14.north);
+\draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw15.north);
+\draw[dashed] (cfrag3.south) -- ([yshift=-0.4em]tw16.north);
+\draw[dashed] (cfrag4.south) .. controls +(south:0.6) and +(north:0.6) .. ([yshift=-0.4em]tw13.north);
+
+\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag1.north) -- ([xshift=0.0em,yshift=11.3em]cfrag1.north);
+\draw[*-*] ([xshift=0.1em,yshift=-0.2em]cfrag2.north) -- ([xshift=0.1em,yshift=2.9em]cfrag2.north);
+\draw[*-*] ([xshift=0.1em,yshift=-0.4em]cfrag3.north) -- ([xshift=0.1em,yshift=0.9em]cfrag3.north);
+\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag4.north) -- ([xshift=0.0em,yshift=5.7em]cfrag4.north);
+\draw[*-*] ([xshift=0.1em,yshift=-0.2em]cfrag5.north) -- ([xshift=0.1em,yshift=1em]cfrag5.north);
+\draw[*-*] ([xshift=0.0em,yshift=-0.2em]cfrag6.north) -- ([xshift=0.0em,yshift=1em]cfrag6.north);
+
+{
+\node [fill=blue,circle,inner sep=2pt] (rlabel1) at (cfrag1.south east) {{\color{white} \tiny{1}}};
+}
+%\node [fill=blue,circle,inner sep=2pt] (rlabel2) at (cfrag2.south east) {{\color{white} \tiny{2}}};
+%\node [fill=blue,circle,inner sep=2pt] (rlabel3) at (cfrag3.south east) {{\color{white} \tiny{3}}};
+{
+\node [fill=blue,circle,inner sep=2pt] (rlabel4) at (cfrag4.south east) {{\color{white} \tiny{4}}};
+}
+%\node [fill=blue,circle,inner sep=2pt] (rlabel5) at (cfrag5.north west) {{\color{white} \tiny{5}}};
+{
+\node [fill=blue,circle,inner sep=2pt] (rlabel6) at (cfrag6.north east) {{\color{white} \tiny{6}}};
+}
+{
+\node [fill=blue,circle,inner sep=2pt] (rlabel7) at (cfrag7.south west) {{\color{white} \tiny{7}}};
+}
+{
+\node [fill=white,draw=red,thick] (tw12label) at (tw12) {\red{was}};
+}
+
+{
+\draw [->,red] ([xshift=0.2em]tw12label.north west) .. controls +(north:0.4) and +(south:0.4) .. ([xshift=0em]cfrag1.south);
+}
+{
+\draw [->,red] ([xshift=0.8em]tw12label.north west) -- ([xshift=0.8em,yshift=18.4em]tw12label.north west);
+}
+{
+\draw [->,red] ([xshift=0.2em]tw12label.north) .. controls +(north:7em) and +(south:11em) .. ([xshift=0em,yshift=0em]cfrag6.south);
+}
+{
+\draw [->,red] ([xshift=0.6em]tw12label.north) -- ([xshift=-2em]cfrag4.south);
+}
+
+\begin{pgfonlayer}{background}
+{
+\node [fill=green!20,inner sep=0pt] (cfrag1back) [fit = (cfrag1)] {};
+}
+{
+\node [fill=green!20,inner sep=0pt] (cfrag1back) [fit = (cfrag4)] {};
+}
+{
+\node [fill=green!20,inner sep=0pt] (cfrag1back) [fit = (cfrag6)] {};
+}
+{
+\node [fill=green!20,inner sep=0pt] (cfrag1back) [fit = (cfrag7)] {};
+}
+\end{pgfonlayer}
+
+}
+\end{scope}
+}
+\end{tikzpicture}
+\end{minipage}
\ No newline at end of file
--- a/Book/Chapter4/Figures/tree-to-tree-rule-extraction-base-node-alignment.tex
+++ b/Book/Chapter4/Figures/tree-to-tree-rule-extraction-base-node-alignment.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%%  方法2：直接进行节点对齐然后归纳句法映射
+\begin{tikzpicture}
+
+
+\begin{scope}
+
+{
+\begin{scope}[scale=0.65, level distance=27pt]
+\Tree[.\node[draw](en1){S};
+        [.\node[draw](en2){NP};
+            [.DT the ]
+            [.NNS imports ]
+        ]
+        [.\node[draw](en3){VP};
+            [.\node[draw](en4){VBZ}; have ]
+            [.ADVP
+                [.\node[draw](en5){RB}; drastically ]
+                [.\node[draw](en6){VBN}; fallen ]
+            ]
+        ]
+     ]
+\end{scope}
+
+\begin{scope}[scale=0.65, level distance=27pt, grow'=up, xshift=-13pt, yshift=-3.5in, sibling distance=22pt]
+\Tree[.\node[draw](cn1){\ \ IP\ \ };
+        [.\node[draw](cn2){NN}; 进口 ]
+        [.\node[draw](cn3){VP};
+            [.\node[draw](cn4){AD}; 大幅度 ]
+            [.VP
+                [.\node[draw](cn5){VV}; 下降 ]
+                [.\node[draw](cn6){AS}; 了 ]
+            ]
+        ]
+     ]
+\end{scope}
+}
+
+{
+\draw[latex-latex, dotted, thick, red] (cn4.east) .. controls +(east:0.5) and +(west:0.5) .. (en5.west);
+\draw[latex-latex, dotted, thick, red] (cn5.east) .. controls +(east:0.5) and +(south:0.5) .. (en6.south west);
+\draw[latex-latex, dotted, thick, red] (cn6.north west) .. controls +(north:1.5) and +(south:2.5) .. (en4.south west);
+\draw[latex-latex, dotted, thick, red] (cn3.north west) -- (en3.south west);
+\draw[latex-latex, dotted, thick, red] (cn2.west) .. controls +(west:0.6) and +(west:0.6) .. (en2.west);
+\draw[latex-latex, dotted, thick, red] (cn1.north west) .. controls +(north:4) and +(south:5.5) .. (en1.south west);
+}
+
+\end{scope}
+
+
+\node[anchor=north](t1) at (4.7,0.3){{\footnotesize{抽取得到的规则(子树对齐)}}};
+\node[anchor=north](t2) at ([xshift=3.7em,yshift=0.5em]t1.south){\underline{\qquad \qquad \qquad \quad  \qquad \qquad \qquad \qquad \qquad}};
+\node[anchor=north](t3) at ([xshift=-7.7em,yshift=0.0em]t2.south){\color{gray!70}\footnotesize{$r_1$}};
+\node[anchor=west](t3-1) at ([xshift=0.0em,yshift=0.0em]t3.east){\color{gray!70}\footnotesize{AS(了) $\rightarrow$ DT(the)}};
+\node[anchor=north](t4) at ([xshift=0.0em,yshift=0.0em]t3.south){\color{gray!70}\footnotesize{$r_2$}};
+\node[anchor=west](t4-1) at ([xshift=0.0em,yshift=0.0em]t4.east){\color{gray!70}\footnotesize{NN(进口) $\rightarrow$ NNS(imports)}};
+\node[anchor=north](t5) at ([xshift=0.0em,yshift=0.0em]t4.south){\footnotesize{$r_3$}};
+\node[anchor=west](t5-1) at ([xshift=0.0em,yshift=0.0em]t5.east){\footnotesize{AD(大幅度) $\rightarrow$ RB(drastically)}};
+\node[anchor=north](t6) at ([xshift=0.0em,yshift=0.0em]t5.south){\footnotesize{$r_4$}};
+\node[anchor=west](t6-1) at ([xshift=0.0em,yshift=0.0em]t6.east){\footnotesize{VV(下降) $\rightarrow$ VBN(fallen)}};
+\node[anchor=north](t7) at ([xshift=0.0em,yshift=0.0em]t6.south){\color{gray!70}\footnotesize{$r_5$}};
+\node[anchor=west](t7-1) at ([xshift=0.0em,yshift=0.0em]t7.east){\color{gray!70}\footnotesize{IP(NN$_1$ VP(AD$_2$ VP(VV$_3$ AS$_4$)) $\rightarrow$}};
+\node[anchor=north](t8) at ([xshift=9.4em,yshift=0.0em]t7.south){\color{gray!70}\scriptsize{S(NP(DT$_4$ NNS$_1$) VP(VBZ(have) ADVP(RB$_2$ VBN$_3$))}};
+
+\node[anchor=north](s3) at ([xshift=0.0em,yshift=-1.3em]t7.south){\red{\footnotesize{$r_{6}$}}};
+\node[anchor=west](s3-1) at ([xshift=0.0em,yshift=0.0em]s3.east){\red{\footnotesize{AS(了) $\rightarrow$ VBZ(have)}}};
+\node[anchor=north](s4) at ([xshift=0.0em,yshift=0.0em]s3.south){\red{\footnotesize{$r_{7}$}}};
+\node[anchor=west](s4-1) at ([xshift=0.0em,yshift=0.0em]s4.east){\red{\footnotesize{NN(进口) $\rightarrow$}}};
+\node[anchor=north](s5) at ([xshift=0.0em,yshift=0.0em]s4.south){\footnotesize{\color{white}{$r_{?}$}}};
+\node[anchor=west](s5-1) at ([xshift=0.0em,yshift=0.0em]s5.east){\red{\footnotesize{NP(DT(the) NNS(imports))}}};
+\node[anchor=north](s6) at ([xshift=0.0em,yshift=0.0em]s5.south){\red{\footnotesize{$r_{8}$}}};
+\node[anchor=west](s6-1) at ([xshift=0.0em,yshift=0.0em]s6.east){\red{\footnotesize{VP(AD$_1$ VP(VV$_2$ AS$_3$)) $\rightarrow$}}};
+\node[anchor=north](s7) at ([xshift=0.0em,yshift=0.0em]s6.south){\red{\footnotesize{\color{white}{$r_{?}$}}}};
+\node[anchor=west](s7-1) at ([xshift=0.0em,yshift=0.0em]s7.east){\red{\footnotesize{VP(VBZ$_3$ ADVP(RB$_1$ VBN$_2$)}}};
+\node[anchor=north](s8) at ([xshift=0.0em,yshift=0.0em]s7.south){\red{\footnotesize{$r_{9}$}}};
+\node[anchor=west](s8-1) at ([xshift=0.0em,yshift=0.0em]s8.east){\red{\footnotesize{IP(NN$_1$ VP$_2$) $\rightarrow$ S(NP$_1$ VP$_2$)}}};
+
+\end{tikzpicture}
--- a/Book/Chapter4/Figures/tree-to-tree-rule-extraction-base-word-alignment.tex
+++ b/Book/Chapter4/Figures/tree-to-tree-rule-extraction-base-word-alignment.tex
+
+\begin{tikzpicture}
+\begin{scope}
+\begin{scope}[scale=0.65, level distance=27pt]
+\Tree[.S
+        [.NP
+            [.DT \node(ew1){the}; ]
+            [.NNS \node(ew2){imports}; ]
+        ]
+        [.VP
+            [.VBZ \node(ew3){have}; ]
+            [.ADVP
+                [.RB \node(ew4){drastically}; ]
+                [.VBN \node(ew5){fallen}; ]
+            ]
+        ]
+     ]
+\end{scope}
+
+\begin{scope}[scale=0.65, level distance=27pt, grow'=up, xshift=-13pt, yshift=-3.5in, sibling distance=22pt]
+\Tree[.IP
+        [.NN \node(cw1){进口}; ]
+        [.VP
+            [.AD \node(cw2){大幅度}; ]
+            [.VP
+                [.VV \node(cw3){下降}; ]
+                [.AS \node(cw4){了}; ]
+            ]
+        ]
+     ]
+\end{scope}
+
+{
+\draw[-, dashed] (cw1) -- (ew2);
+\draw[-, dashed] (cw2) -- (ew4);
+\draw[-, dashed] (cw3) -- (ew5);
+\draw[-, dashed] (cw4) .. controls +(north:1.0) and +(south:1.6) .. (ew1);
+}
+
+{
+\draw[-, red, dashed,thick] (cw4) .. controls +(north:1.0) and +(south:1.6) .. (ew1);
+}
+
+
+\end{scope}
+
+\node[anchor=north](t1) at (4.5,0.3){{\footnotesize{抽取得到的规则}}};
+\node[anchor=north](t2) at ([xshift=5.5em,yshift=0.5em]t1.south){\underline{\qquad \qquad \qquad \quad  \qquad \qquad \qquad \qquad \qquad}};
+\node[anchor=north](t3) at ([xshift=-7.7em,yshift=0.0em]t2.south){\footnotesize{$r_1$}};
+\node[anchor=west](t3-1) at ([xshift=0.0em,yshift=0.0em]t3.east){\footnotesize{AS(了) $\rightarrow$ DT(the)}};
+\node[anchor=north](t4) at ([xshift=0.0em,yshift=0.0em]t3.south){\footnotesize{$r_2$}};
+\node[anchor=west](t4-1) at ([xshift=0.0em,yshift=0.0em]t4.east){\footnotesize{NN(进口) $\rightarrow$ NNS(imports)}};
+\node[anchor=north](t5) at ([xshift=0.0em,yshift=0.0em]t4.south){\footnotesize{$r_3$}};
+\node[anchor=west](t5-1) at ([xshift=0.0em,yshift=0.0em]t5.east){\footnotesize{AD(大幅度) $\rightarrow$ RB(drastically)}};
+\node[anchor=north](t6) at ([xshift=0.0em,yshift=0.0em]t5.south){\footnotesize{$r_4$}};
+\node[anchor=west](t6-1) at ([xshift=0.0em,yshift=0.0em]t6.east){\footnotesize{VV(下降) $\rightarrow$ VBN(fallen)}};
+\node[anchor=north](t7) at ([xshift=0.0em,yshift=0.0em]t6.south){\footnotesize{$r_6$}};
+\node[anchor=west](t7-1) at ([xshift=0.0em,yshift=0.0em]t7.east){\footnotesize{IP(NN$_1$ VP(AD$_2$ VP(VV$_3$ AS$_4$)) $\rightarrow$}};
+\node[anchor=north](t8) at ([xshift=9.4em,yshift=0.0em]t7.south){\scriptsize{S(NP(DT$_4$ NNS$_1$) VP(VBZ(have) ADVP(RB$_2$ VBN$_3$))}};
+
+\node[anchor=north](s1) at ([yshift=-8.0em]t1.south){{\footnotesize{无法得到的规则}}};
+\node[anchor=north](s2) at ([xshift=5.5em,yshift=0.5em]s1.south){\underline{\qquad \qquad \qquad \quad  \qquad \qquad \qquad \qquad \qquad}};
+\node[anchor=north](s3) at ([xshift=-7.7em,yshift=0.0em]s2.south){\footnotesize{$r_{?}$}};
+\node[anchor=west](s3-1) at ([xshift=0.0em,yshift=0.0em]s3.east){\footnotesize{AS(了) $\rightarrow$ VBZ(have)}};
+\node[anchor=north](s4) at ([xshift=0.0em,yshift=0.0em]s3.south){\footnotesize{$r_{?}$}};
+\node[anchor=west](s4-1) at ([xshift=0.0em,yshift=0.0em]s4.east){\footnotesize{NN(进口) $\rightarrow$}};
+\node[anchor=north](s5) at ([xshift=0.0em,yshift=0.0em]s4.south){\footnotesize{\color{white}{$r_{?}$}}};
+\node[anchor=west](s5-1) at ([xshift=0.0em,yshift=0.0em]s5.east){\footnotesize{NP(DT(the) NNS(imports))}};
+\node[anchor=north](s6) at ([xshift=0.0em,yshift=0.0em]s5.south){\footnotesize{$r_{?}$}};
+\node[anchor=west](s6-1) at ([xshift=0.0em,yshift=0.0em]s6.east){\footnotesize{IP(NN$_1$ VP$_2$) $\rightarrow$ S(NP$_1$ VP$_2$)}};
+
+\end{tikzpicture}
+
+
--- a/Book/Chapter4/Figures/unlimited-phrase-extraction.tex
+++ b/Book/Chapter4/Figures/unlimited-phrase-extraction.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 短语获取
+\begin{center}
+\begin{tikzpicture}
+
+\tikzstyle{elementnode} = [rectangle,text=white,anchor=center]
+\tikzstyle{srcnode} = [rotate=45,font=\small,anchor=south west]
+\tikzstyle{tgtnode} = [left,font=\small,anchor=north east]
+\tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6*1.0cm,minimum width=0.36*1.0cm]
+\tikzstyle{probnode} = [fill=blue!30,minimum width=0.4*1.0cm]
+\tikzstyle{labelnode} = [above]
+
+% alignment matrix
+\begin{scope}[scale=0.85,yshift=0.12in]
+\foreach \i / \j / \c in
+    {0/7/0.15, 1/7/0.15, 2/7/0.15, 3/7/0.15, 4/7/0.15, 5/7/0.15,
+    0/6/0.15, 1/6/0.15, 2/6/0.15, 3/6/0.15, 4/6/0.15, 5/6/0.15,
+    0/5/0.15, 1/5/0.15, 2/5/0.15, 3/5/0.15, 4/5/0.15, 5/5/0.15,
+    0/4/0.15, 1/4/0.15, 2/4/0.15, 3/4/0.15, 4/4/0.15, 5/4/0.15,
+    0/3/0.15, 1/3/0.15, 2/3/0.15, 3/3/0.15, 4/3/0.15, 5/3/0.15,
+    0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.15, 5/2/0.15,
+    0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.15, 5/1/0.15,
+    0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.15, 5/0/0.15}
+    \node[elementnode,minimum size=0.6*1.0cm*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*1.0cm*\i-5.4*0.5*1.0cm,0.5*1.0cm*\j-1.05*1.0cm) {};
+
+% source
+\node[srcnode] (src1) at (-5.4*0.5*1.0cm,-1.05*1.0cm+7.5*0.5*1.0cm) {\scriptsize{Have}};
+\node[srcnode] (src2) at ([xshift=0.5*1.0cm]src1.south west) {\scriptsize{you}};
+\node[srcnode] (src3) at ([xshift=0.5*1.0cm]src2.south west) {\scriptsize{learned}};
+\node[srcnode] (src4) at ([xshift=0.5*1.0cm]src3.south west) {\scriptsize{nothing}};
+\node[srcnode] (src5) at ([xshift=0.5*1.0cm]src4.south west) {\scriptsize{?}};
+\node[srcnode] (src6) at ([xshift=0.5*1.0cm]src5.south west) {\scriptsize{EOS}};
+
+% target
+\node[tgtnode] (tgt1) at (-6.0*0.5*1.0cm,-1.05*1.0cm+7.5*0.5*1.0cm) {\scriptsize{你}};
+\node[tgtnode] (tgt2) at ([yshift=-0.5*1.0cm]tgt1.north east) {\scriptsize{什么}};
+\node[tgtnode] (tgt3) at ([yshift=-0.5*1.0cm]tgt2.north east) {\scriptsize{都}};
+\node[tgtnode] (tgt4) at ([yshift=-0.5*1.0cm]tgt3.north east) {\scriptsize{没}};
+\node[tgtnode] (tgt5) at ([yshift=-0.5*1.0cm]tgt4.north east) {\scriptsize{学}};
+\node[tgtnode] (tgt6) at ([yshift=-0.5*1.0cm]tgt5.north east) {\scriptsize{到}};
+\node[tgtnode] (tgt7) at ([yshift=-0.5*1.0cm]tgt6.north east) {\scriptsize{?}};
+\node[tgtnode] (tgt8) at ([yshift=-0.5*1.0cm]tgt7.north east) {\scriptsize{EOS}};
+
+\node [anchor=west] (p1line1) at ([xshift=4em,yshift=1em]a57.east) {\footnotesize{$\bar{s}_i$: 什么\ \ \ 都\ \ \ 没}};
+\node [anchor=north west] (p1line2) at ([xshift=0]p1line1.south west) {\footnotesize{$\bar{t}_i$: learned\ \ \ nothing\ \ \ ? \ \ \ \ \ \ \ \ \ \ \ \ }};
+
+\node [anchor=west] (p2line1) at ([xshift=4em]a53.east) {\footnotesize{$\bar{s}_j$: 到\ \ \ ?}};
+\node [anchor=north west] (p2line2) at ([xshift=0]p2line1.south west) {\footnotesize{$\bar{t}_j$: Have\ \ \ you\ \ \ learned\ \ \ nothing}};
+
+\node [anchor=east] (p1line3) at ([xshift=0em,yshift=2.9cm]p2line2.east) {};
+\begin{pgfonlayer}{background}
+\node [rectangle,draw=red,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (a26) (a44)] (phrase1) {};
+\node [rectangle,draw=ugreen,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (a01) (a32)] (phrase2) {};
+\node [rectangle,inner sep=0.2em,fill=red!10] [fit = (p1line1) (p1line2) (p1line3)] (box1) {};
+\node [rectangle,inner sep=0.2em,fill=green!10] [fit = (p2line1) (p2line2)] (box2) {};
+\end{pgfonlayer}
+
+\draw [->,thick,dotted] ([yshift=-0.8em]phrase1.east) .. controls +(east:1.5) and +(west:1) ..  (box1.west);
+\draw [->,thick,dotted] ([yshift=-0.0em]phrase2.east) .. controls +(east:2.0) and +(west:1) ..  ([yshift=1em]box2.west);
+
+\end{scope}
+
+\end{tikzpicture}
+\end{center}
\ No newline at end of file
--- a/Book/Chapter4/Figures/word-and-index-of-pos.tex
+++ b/Book/Chapter4/Figures/word-and-index-of-pos.tex
+%------------------------------------------------------------------------------------------------------------
+%%%  基于树的解码方法 - chart-based decoding
+\begin{center}
+\begin{tikzpicture}
+\begin{scope}
+\node [anchor=north west] (w1) at (0, 0) {猫};
+\node [anchor=west] (w2) at ([xshift=0.3em]w1.east) {喜欢};
+\node [anchor=west] (w3) at ([xshift=0.3em]w2.east) {吃};
+\node [anchor=west] (w4) at ([xshift=0.3em]w3.east) {鱼};
+\node [anchor=north east] (p0) at ([xshift=0.3em]w1.south west) {\blue{0}};
+\node [anchor=north east] (p1) at ([xshift=0.3em]w2.south west) {\blue{1}};
+\node [anchor=north east] (p2) at ([xshift=0.3em]w3.south west) {\blue{2}};
+\node [anchor=north east] (p3) at ([xshift=0.3em]w4.south west) {\blue{3}};
+\node [anchor=north west] (p4) at ([xshift=-0.4em]w4.south east) {\blue{4}};
+\end{scope}
+\end{tikzpicture}
+\end{center}
--- a/Book/Chapter4/Figures/word-and-phrase-translation-regard-as-path.tex
+++ b/Book/Chapter4/Figures/word-and-phrase-translation-regard-as-path.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 回顾基于词的翻译模型
+\begin{tikzpicture}
+
+\begin{scope}
+
+\node [anchor=west] (s1) at (0,0) {\textbf{我}};
+\node [anchor=west] (s2) at ([xshift=2em]s1.east) {\textbf{对}};
+\node [anchor=west] (s3) at ([xshift=2em]s2.east) {\textbf{你}};
+\node [anchor=west] (s4) at ([xshift=2em]s3.east) {\textbf{表示}};
+\node [anchor=west] (s5) at ([xshift=2em]s4.east) {\textbf{满意}};
+
+\node [anchor=south west] (sentlabel) at ([yshift=-0.5em]s1.north west) {\scriptsize{\textbf{\red{待翻译句子(已经分词):}}}};
+
+\draw [->,very thick,ublue] (s1.south) -- ([yshift=-0.7em]s1.south);
+\draw [->,very thick,ublue] (s2.south) -- ([yshift=-0.7em]s2.south);
+\draw [->,very thick,ublue] (s3.south) -- ([yshift=-0.7em]s3.south);
+\draw [->,very thick,ublue] (s4.south) -- ([yshift=-0.7em]s4.south);
+\draw [->,very thick,ublue] (s5.south) -- ([yshift=-0.7em]s5.south);
+
+{\small
+\node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.5em] (t11) at ([yshift=-1em]s1.south) {I};
+\node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.5em] (t12) at ([yshift=-0.2em]t11.south) {me};
+\node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.5em] (t13) at ([yshift=-0.2em]t12.south) {I'm};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl11) at (t11.north west) {\tiny{{\color{white} \textbf{1}}}};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl12) at (t12.north west) {\tiny{{\color{white} \textbf{1}}}};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl13) at (t13.north west) {\tiny{{\color{white} \textbf{1}}}};
+
+{
+\node [anchor=north west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=6.55em] (t14) at ([yshift=-0.2em]t13.south west) {I'm};
+\node [anchor=north west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=6.55em] (t15) at ([yshift=-0.2em]t14.south west) {I};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl14) at (t14.north west) {\tiny{{\color{white} \textbf{1-2}}}};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl15) at (t15.north west) {\tiny{{\color{white} \textbf{1-2}}}};
+}
+
+\node [anchor=north,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=2.5em] (t21) at ([yshift=-1em]s2.south) {to};
+\node [anchor=north,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=2.5em] (t22) at ([yshift=-0.2em]t21.south) {with};
+\node [anchor=north,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=2.5em] (t23) at ([yshift=-0.2em]t22.south) {for};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl21) at (t21.north west) {\tiny{{\color{white} \textbf{2}}}};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl22) at (t22.north west) {\tiny{{\color{white} \textbf{2}}}};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl23) at (t23.north west) {\tiny{{\color{white} \textbf{2}}}};
+
+{
+\node [anchor=north west,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=6.55em] (t24) at ([yshift=-0.2em,xshift=-2.6em]t15.south east) {for you};
+\node [anchor=north west,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=6.55em] (t25) at ([yshift=-0.2em]t24.south west) {with you};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl24) at (t24.north west) {\tiny{{\color{white} \textbf{2-3}}}};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl25) at (t25.north west) {\tiny{{\color{white} \textbf{2-3}}}};
+}
+
+\node [anchor=north,inner sep=2pt,fill=blue!20,minimum height=1.5em,minimum width=2.5em] (t31) at ([yshift=-1em]s3.south) {you};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl31) at (t31.north west) {\tiny{{\color{white} \textbf{3}}}};
+
+{
+\node [anchor=west,inner sep=2pt,fill=blue!20,minimum height=1.5em,minimum width=13.35em] (t32) at ([xshift=1.4em]t14.east) {you are satisfied};
+\node [anchor=north west,inner sep=2pt,fill=blue!20,minimum height=1.5em,minimum width=7.45em] (t33) at ([yshift=-0.2em]t32.south west) {$\phi$};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl32) at (t32.north west) {\tiny{{\color{white} \textbf{3-5}}}};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl33) at (t33.north west) {\tiny{{\color{white} \textbf{3-4}}}};
+}
+
+\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3em] (t41) at ([yshift=-1em]s4.south) {$\phi$};
+\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3em] (t42) at ([yshift=-0.2em]t41.south) {show};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl41) at (t41.north west) {\tiny{{\color{white} \textbf{4}}}};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl42) at (t42.north west) {\tiny{{\color{white} \textbf{4}}}};
+
+{
+\node [anchor=west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=9.00em] (t43) at ([xshift=1.75em]t24.east) {satisfied};
+\node [anchor=north west,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=9.00em] (t44) at ([yshift=-0.2em]t43.south west) {satisfactory};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl43) at (t43.north west) {\tiny{{\color{white} \textbf{4-5}}}};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl44) at (t44.north west) {\tiny{{\color{white} \textbf{4-5}}}};
+}
+
+\node [anchor=north,inner sep=2pt,fill=purple!20,minimum height=1.5em,minimum width=4.5em] (t51) at ([yshift=-1em]s5.south) {satisfy};
+\node [anchor=north,inner sep=2pt,fill=purple!20,minimum height=1.5em,minimum width=4.5em] (t52) at ([yshift=-0.2em]t51.south) {satisfied};
+\node [anchor=north,inner sep=2pt,fill=purple!20,minimum height=1.5em,minimum width=4.5em] (t53) at ([yshift=-0.2em]t52.south) {satisfies};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl51) at (t51.north west) {\tiny{{\color{white} \textbf{5}}}};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl52) at (t52.north west) {\tiny{{\color{white} \textbf{5}}}};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl53) at (t53.north west) {\tiny{{\color{white} \textbf{5}}}};
+
+}
+
+{\tiny
+
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt11) at (t11.east) {{\color{white} \textbf{P=.4}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt12) at (t12.east) {{\color{white} \textbf{P=.2}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt13) at (t13.east) {{\color{white} \textbf{P=.4}}};
+{
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt14) at (t14.east) {{\color{white} \textbf{P=.1}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt15) at (t15.east) {{\color{white} \textbf{P=.2}}};
+}
+
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt21) at (t21.east) {{\color{white} \textbf{P=.4}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt22) at (t22.east) {{\color{white} \textbf{P=.3}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt23) at (t23.east) {{\color{white} \textbf{P=.3}}};
+{
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt24) at (t24.east) {{\color{white} \textbf{P=.2}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt25) at (t25.east) {{\color{white} \textbf{P=.1}}};
+}
+
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt31) at (t31.east) {{\color{white} \textbf{P=1}}};
+{
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt33) at (t32.east) {{\color{white} \textbf{P=.4}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt33) at (t33.east) {{\color{white} \textbf{P=.3}}};
+}
+
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt41) at (t41.east) {{\color{white} \textbf{P=.5}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt42) at (t42.east) {{\color{white} \textbf{P=.5}}};
+{
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt43) at (t43.east) {{\color{white} \textbf{P=.3}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt44) at (t44.east) {{\color{white} \textbf{P=.2}}};
+}
+
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt51) at (t51.east) {{\color{white} \textbf{P=.5}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt52) at (t52.east) {{\color{white} \textbf{P=.4}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt53) at (t53.east) {{\color{white} \textbf{P=.1}}};
+
+}
+
+\end{scope}
+
+\begin{scope}
+{\small
+
+}
+\end{scope}
+
+\begin{scope}
+
+
+\draw[decorate,thick,decoration={brace,amplitude=5pt,mirror}] ([yshift=0em,xshift=-0.5em]t11.north west) -- ([xshift=-0.5em]t13.south west) node [pos=0.5,left,xshift=1.0em,yshift=0.0em,text width=5em,align=left] (label2) {\footnotesize{\textbf{单词翻译}}};
+
+{
+\draw [->,ultra thick,red,line width=2pt,opacity=0.7] ([xshift=-0.5em]t13.west) -- ([xshift=0.8em]t13.east) -- ([xshift=-0.2em]t22.west) -- ([xshift=0.8em]t22.east) -- ([xshift=-0.2em]t31.west) -- ([xshift=0.8em]t31.east) -- ([xshift=-0.2em]t41.west) -- ([xshift=0.8em]t41.east) -- ([xshift=-0.2em]t52.west) -- ([xshift=1.2em]t52.east);
+}
+
+{
+\draw [->,ultra thick,ublue,line width=2pt,opacity=0.7] ([xshift=-0.5em]t15.west) -- ([xshift=0.8em]t15.east) -- ([xshift=-0.2em]t32.west) -- ([xshift=1.2em]t32.east);
+
+\draw [->,ultra thick,ublue,line width=2pt,opacity=0.7] ([xshift=-0.5em,yshift=0.1em]t13.west) -- ([xshift=0.8em,yshift=0.1em]t13.east) -- ([xshift=-0.2em]t25.west) -- ([xshift=0.8em]t25.east) -- ([xshift=-0.2em,yshift=0.1em]t41.west) -- ([xshift=0.8em,yshift=0.1em]t41.east) -- ([xshift=-0.2em,yshift=0.1em]t52.west) -- ([xshift=1.2em,yshift=0.1em]t52.east);
+}
+
+{
+\draw[decorate,thick,decoration={brace,amplitude=5pt,mirror}] ([yshift=-0.2em,xshift=-0.5em]t13.south west) -- ([yshift=-6.3em,xshift=-0.5em]t13.south west) node [pos=0.5,left,xshift=1.0em,yshift=0.0em,text width=5em,align=left] (label3) {\footnotesize{\textbf{短语翻译}}};
+}
+
+{
+\node [anchor=north west] (wtranslabel) at ([yshift=-4em]t15.south west) {\scriptsize{翻译路径（仅含有单词）:}};
+\draw [->,ultra thick,red,line width=1.5pt,opacity=0.7] (wtranslabel.east) -- ([xshift=1em]wtranslabel.east);
+}
+
+{
+\node [anchor=north west] (ptranslabel) at ([yshift=-5.5em]t15.south west) {\scriptsize{翻译路径（含有短语）:}};
+\draw [->,ultra thick,ublue,line width=1.5pt,opacity=0.7] ([xshift=0.65em]ptranslabel.east) -- ([xshift=1.65em]ptranslabel.east);
+}
+
+\end{scope}
+
+\end{tikzpicture}
\ No newline at end of file
--- a/Book/Chapter4/Figures/word-translation-regard-as-path.tex
+++ b/Book/Chapter4/Figures/word-translation-regard-as-path.tex
+%%%------------------------------------------------------------------------------------------------------------
+%%% 回顾基于词的翻译模型
+\begin{tikzpicture}
+
+\begin{scope}
+
+\node [anchor=west] (s1) at (0,0) {\textbf{我}};
+\node [anchor=west] (s2) at ([xshift=2em]s1.east) {\textbf{对}};
+\node [anchor=west] (s3) at ([xshift=2em]s2.east) {\textbf{你}};
+\node [anchor=west] (s4) at ([xshift=2em]s3.east) {\textbf{表示}};
+\node [anchor=west] (s5) at ([xshift=2em]s4.east) {\textbf{满意}};
+
+\node [anchor=south west] (sentlabel) at ([yshift=-0.5em]s1.north west) {\scriptsize{\textbf{\red{待翻译句子(已经分词):}}}};
+
+\draw [->,very thick,ublue] (s1.south) -- ([yshift=-0.7em]s1.south);
+\draw [->,very thick,ublue] (s2.south) -- ([yshift=-0.7em]s2.south);
+\draw [->,very thick,ublue] (s3.south) -- ([yshift=-0.7em]s3.south);
+\draw [->,very thick,ublue] (s4.south) -- ([yshift=-0.7em]s4.south);
+\draw [->,very thick,ublue] (s5.south) -- ([yshift=-0.7em]s5.south);
+
+{\small
+\node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.5em] (t11) at ([yshift=-1em]s1.south) {I};
+\node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.5em] (t12) at ([yshift=-0.2em]t11.south) {me};
+\node [anchor=north,inner sep=2pt,fill=red!20,minimum height=1.5em,minimum width=2.5em] (t13) at ([yshift=-0.2em]t12.south) {I'm};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl11) at (t11.north west) {\tiny{{\color{white} \textbf{1}}}};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl12) at (t12.north west) {\tiny{{\color{white} \textbf{1}}}};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl13) at (t13.north west) {\tiny{{\color{white} \textbf{1}}}};
+
+\node [anchor=north,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=2.5em] (t21) at ([yshift=-1em]s2.south) {to};
+\node [anchor=north,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=2.5em] (t22) at ([yshift=-0.2em]t21.south) {with};
+\node [anchor=north,inner sep=2pt,fill=green!20,minimum height=1.5em,minimum width=2.5em] (t23) at ([yshift=-0.2em]t22.south) {for};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl21) at (t21.north west) {\tiny{{\color{white} \textbf{2}}}};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl22) at (t22.north west) {\tiny{{\color{white} \textbf{2}}}};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl23) at (t23.north west) {\tiny{{\color{white} \textbf{2}}}};
+
+\node [anchor=north,inner sep=2pt,fill=blue!20,minimum height=1.5em,minimum width=2.5em] (t31) at ([yshift=-1em]s3.south) {you};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl31) at (t31.north west) {\tiny{{\color{white} \textbf{3}}}};
+
+\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3em] (t41) at ([yshift=-1em]s4.south) {$\phi$};
+\node [anchor=north,inner sep=2pt,fill=orange!20,minimum height=1.5em,minimum width=3em] (t42) at ([yshift=-0.2em]t41.south) {show};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl41) at (t41.north west) {\tiny{{\color{white} \textbf{4}}}};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl42) at (t42.north west) {\tiny{{\color{white} \textbf{4}}}};
+
+\node [anchor=north,inner sep=2pt,fill=purple!20,minimum height=1.5em,minimum width=4.5em] (t51) at ([yshift=-1em]s5.south) {satisfy};
+\node [anchor=north,inner sep=2pt,fill=purple!20,minimum height=1.5em,minimum width=4.5em] (t52) at ([yshift=-0.2em]t51.south) {satisfied};
+\node [anchor=north,inner sep=2pt,fill=purple!20,minimum height=1.5em,minimum width=4.5em] (t53) at ([yshift=-0.2em]t52.south) {satisfies};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl51) at (t51.north west) {\tiny{{\color{white} \textbf{5}}}};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl52) at (t52.north west) {\tiny{{\color{white} \textbf{5}}}};
+\node [anchor=north west,inner sep=1pt,fill=black] (tl53) at (t53.north west) {\tiny{{\color{white} \textbf{5}}}};
+
+}
+
+{\tiny
+
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt11) at (t11.east) {{\color{white} \textbf{P=.4}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt12) at (t12.east) {{\color{white} \textbf{P=.2}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt13) at (t13.east) {{\color{white} \textbf{P=.4}}};
+
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt21) at (t21.east) {{\color{white} \textbf{P=.4}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt22) at (t22.east) {{\color{white} \textbf{P=.3}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt23) at (t23.east) {{\color{white} \textbf{P=.3}}};
+
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt31) at (t31.east) {{\color{white} \textbf{P=1}}};
+
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt41) at (t41.east) {{\color{white} \textbf{P=.5}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt42) at (t42.east) {{\color{white} \textbf{P=.5}}};
+
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt51) at (t51.east) {{\color{white} \textbf{P=.5}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt52) at (t52.east) {{\color{white} \textbf{P=.4}}};
+\node [anchor=north,rotate=90,inner sep=1pt,minimum width=2.55em,fill=black] (pt53) at (t53.east) {{\color{white} \textbf{P=.1}}};
+
+}
+
+\draw[decorate,thick,decoration={brace,amplitude=5pt,mirror}] ([yshift=0em,xshift=-0.5em]t11.north west) -- ([xshift=-0.5em]t13.south west) node [pos=0.5,left,xshift=1.0em,yshift=0.0em,text width=5em,align=left] (label2) {\footnotesize{\textbf{单词翻译}}};
+
+\end{scope}
+
+\begin{scope}
+{\small
+
+}
+\end{scope}
+
+\begin{scope}
+
+{
+\draw [->,ultra thick,red,line width=2pt,opacity=0.7] ([xshift=-0.5em]t13.west) -- ([xshift=0.8em]t13.east) -- ([xshift=-0.2em]t22.west) -- ([xshift=0.8em]t22.east) -- ([xshift=-0.2em]t31.west) -- ([xshift=0.8em]t31.east) -- ([xshift=-0.2em]t41.west) -- ([xshift=0.8em]t41.east) -- ([xshift=-0.2em]t52.west) -- ([xshift=1.2em]t52.east);
+}
+
+\end{scope}
+
+\end{tikzpicture}
\ No newline at end of file
--- a/Book/Chapter4/chapter4.tex
+++ b/Book/Chapter4/chapter4.tex
--- a/Book/Chapter5/Figures/fig-fit.tex
+++ b/Book/Chapter5/Figures/fig-fit.tex
@@ -10,7 +10,7 @@
 \draw [-,ublue] (n10.west) -- (n10.east);
 \draw [-,ublue] (n11.west) -- (n11.east);
 \node [anchor=north] (x1) at ([yshift=-6em]n11.south) {$x_1$};
-\node [anchor=north] (labela) at ([xshift=3.5em,yshift=-0.5em]x1.south) {\footnotesize{(a)}};
+\node [anchor=north] (labela) at ([xshift=3.5em,yshift=-0.5em]x1.south) {\footnotesize{(a) 拟合一小段函数}};
 \node [anchor=north] (b) at ([yshift=-6em]n10.south) {$b$};
 {
 \draw [->,thick,red] (b.north) -- ([yshift=-0.1em]n10.south);
@@ -92,7 +92,7 @@
 \draw [-,ublue] (n10.west) -- (n10.east);
 \draw [-,ublue] (n11.west) -- (n11.east);
 \node [anchor=north] (x1) at ([yshift=-6em]n11.south) {$x_1$};
-\node [anchor=north] (labelb) at ([xshift=6em,yshift=-0.5em]x1.south) {\footnotesize{(b)}};
+\node [anchor=north] (labelb) at ([xshift=6em,yshift=-0.5em]x1.south) {\footnotesize{(b) 拟合更大一段函数}};
 \node [anchor=north] (b) at ([yshift=-6em]n10.south) {$b$};
 {
 \draw [->,thick,red] (b.north) -- ([yshift=-0.1em]n10.south);

--- a/Book/Chapter5/chapter5.tex
+++ b/Book/Chapter5/chapter5.tex
@@ -116,7 +116,7 @@
 \vspace{0.5em}
 \item 最终的系统性能强弱非常依赖特征的选择。有一句话在业界广泛流传：``数据和特征决定了机器学习的上限''，但是人的智力和认知是有限的，因此人工设计的特征的准确性和覆盖度会受到限制；
 \vspace{0.5em}
-\item 通用性差。针对不同的任务，传统机器学习的特征工程方法需要选择出不同的特征，在这个任务上表现很好的特征在其它任务上可能没有效果。
+\item 通用性差。针对不同的任务，传统机器学习的特征工程方法需要选择出不同的特征，在这个任务上表现很好的特征在其他任务上可能没有效果。
 \end{itemize}
 \vspace{0.5em}

@@ -156,9 +156,9 @@

 \parinterval 线性代数作为一个数学分支，广泛应用于科学和工程中，神经网络的数学描述中也大量使用了线性代数工具。因此，这里我们对线性代数的一些概念进行简要介绍，以方便后续对神经网络的数学建模。
 %--5.2.1.1标量、向量和矩阵---------------------
-\subsubsection{（一）标量、向量和矩阵}\index{Chapter5.2.1.1}
+\subsubsection{标量、向量和矩阵}\index{Chapter5.2.1.1}

-\parinterval {\small\sffamily\bfseries{标量}}（Scalar）：标量亦称``无向量''，是一种只具有数值大小而没有方向的量，通俗地说，一个标量就是一个单独的数，这里我们特指实数\footnote{严格意义上，标量可以是复数等其它形式，这里为了方便讨论，我们仅以实数为对象。}。我们一般用小写斜体表示标量。比如，对于$ a=5 $，$ a $就是一个标量。
+\parinterval {\small\sffamily\bfseries{标量}}（Scalar）：标量亦称``无向量''，是一种只具有数值大小而没有方向的量，通俗地说，一个标量就是一个单独的数，这里我们特指实数\footnote{严格意义上，标量可以是复数等其他形式，这里为了方便讨论，我们仅以实数为对象。}。我们一般用小写斜体表示标量。比如，对于$ a=5 $，$ a $就是一个标量。

 \parinterval {\small\sffamily\bfseries{向量}}（Vector）：向量是由一组实数组成的有序数组。与标量不同，向量既有大小也有方向。我们可以把向量看作空间中的点，每个元素是不同坐标轴上的坐标。公式\ref{eqa1.1}和公式\ref{eqa1.2}展示了一个行向量和一个列向量。本章默认使用行向量，如$ \mathbf a=(a_1, a_2, a_3) $，$ \mathbf a $对应的列向量记为$ \mathbf a^{\rm T} $。
 %公式--------------------------------------------------------------------
@@ -188,18 +188,18 @@
 \end{eqnarray}
 %公式--------------------------------------------------------------------
 %--5.2.1.2矩阵的转置---------------------
-\subsubsection{（二）矩阵的转置}\index{Chapter5.2.1.2}
+\subsubsection{矩阵的转置}\index{Chapter5.2.1.2}

-\parinterval {\small\sffamily\bfseries{转置}}（transpose）是矩阵的重要操作之一。矩阵的转置可以看作是将矩阵以对角线为镜像进行翻转：假设$ \mathbf a $为$ m $行$ n $列的矩阵，第$ i $行第$ j $ 列的元素是$ a_{ij} $，即：$ \mathbf a={(a_{ij})}_{m\times n} $，把$ m\times n $矩阵$ \mathbf a $的行换成同序数的列得到一个$ n\times m $矩阵，则得到$ \mathbf a $的转置矩阵，记为$ \mathbf a^{\rm T} $，其中$ a_{ji}^{\rm T}=a_{ij} $。例如：
+\parinterval {\small\sffamily\bfseries{转置}}（Transpose）是矩阵的重要操作之一。矩阵的转置可以看作是将矩阵以对角线为镜像进行翻转：假设$ \mathbf a $为$ m $行$ n $列的矩阵，第$ i $行第$ j $ 列的元素是$ a_{ij} $，即：$ \mathbf a={(a_{ij})}_{m\times n} $，把$ m\times n $矩阵$ \mathbf a $的行换成同序数的列得到一个$ n\times m $矩阵，则得到$ \mathbf a $的转置矩阵，记为$ \mathbf a^{\rm T} $，其中$ a_{ji}^{\rm T}=a_{ij} $。例如：
 \begin{eqnarray}
 \mathbf a & = & \begin{pmatrix} 1 & 3 & 2 & 6\\5 & 4 & 8 & 2\end{pmatrix} \\
 {\mathbf a}^{\rm T} & = &\begin{pmatrix} 1 & 5\\3 & 4\\2 & 8\\6 & 2\end{pmatrix}
 \end{eqnarray}
 \parinterval 向量可以看作只有一行（列）的矩阵。对应地，向量的转置可以看作是只有一列（行）的矩阵。标量可以看作是只有一个元素的矩阵。因此，标量的转置等于它本身，即$ a^{\rm T}=a $。
 %--5.2.1.3矩阵加法和数乘---------------------
-\subsubsection{（三）矩阵加法和数乘}\index{Chapter5.2.1.3}
+\subsubsection{矩阵加法和数乘}\index{Chapter5.2.1.3}

-\parinterval 矩阵加法又被称作{\small\sffamily\bfseries{按元素加法}}（element-wise addition）。它是指两个矩阵把其相对应元素加在一起的运算，通常的矩阵加法被定义在两个形状相同的矩阵上。两个$ m\times n $矩阵$ \mathbf a $和$ \mathbf b $的和，标记为$ \mathbf a + \mathbf b $，它也是个$ m\times n $矩阵，其内的各元素为其相对应元素相加后的值。如果矩阵$ \mathbf c = \mathbf a + \mathbf b $，则$ c_{ij} = a_{ij} + b_{ij} $。公式\ref{eqa1.4}展示了矩阵之间进行加法的计算过程。
+\parinterval 矩阵加法又被称作{\small\sffamily\bfseries{按元素加法}}（Element-wise Addition）。它是指两个矩阵把其相对应元素加在一起的运算，通常的矩阵加法被定义在两个形状相同的矩阵上。两个$ m\times n $矩阵$ \mathbf a $和$ \mathbf b $的和，标记为$ \mathbf a + \mathbf b $，它也是个$ m\times n $矩阵，其内的各元素为其相对应元素相加后的值。如果矩阵$ \mathbf c = \mathbf a + \mathbf b $，则$ c_{ij} = a_{ij} + b_{ij} $。公式\ref{eqa1.4}展示了矩阵之间进行加法的计算过程。
 %公式--------------------------------------------------------------------
 \begin{eqnarray}
 \begin{pmatrix}
@@ -258,7 +258,7 @@
 \parinterval （3）结合律：$ (kl)\mathbf a=k(l\mathbf a) $。

 %--5.2.1.4矩阵乘法和矩阵点乘---------------------
-\subsubsection{（四）矩阵乘法和矩阵点乘}\index{Chapter5.2.1.4}
+\subsubsection{矩阵乘法和矩阵点乘}\index{Chapter5.2.1.4}

 \parinterval 矩阵乘法是矩阵运算中最重要的操作之一，为了与矩阵点乘区分，我们通常也把矩阵乘法叫做矩阵的叉乘。假设$ \mathbf a $为$ m\times p $的矩阵，$ \mathbf b $为$ p\times n $的矩阵，对$ \mathbf a $和$ \mathbf b $作矩阵乘积的结果是一个$ m\times n $的矩阵$ \mathbf c $，其中矩阵$ \mathbf c $中第$ i $行、第$ j $列的元素可以表示为：
 %公式--------------------------------------------------------------------
@@ -311,9 +311,9 @@
 \end{eqnarray}
 %公式--------------------------------------------------------------------
 %--5.2.1.5线性映射---------------------
-\subsubsection{（五）线性映射}\index{Chapter5.2.1.5}
+\subsubsection{线性映射}\index{Chapter5.2.1.5}

-\parinterval {\small\sffamily\bfseries{线性映射}}（ linear mapping）或{\small\sffamily\bfseries{线性变换}}（linear transformation）是从一个向量空间V到另一个向量空间W的映射函数$ f:v\rightarrow w$，且该映射函数保持加法运算和数量乘法运算，即对于空间V中任何两个向量$ \mathbf u $和$ \mathbf v $以及任何标量$ c $：
+\parinterval {\small\sffamily\bfseries{线性映射}}（ Linear Mapping）或{\small\sffamily\bfseries{线性变换}}（Linear Transformation）是从一个向量空间V到另一个向量空间W的映射函数$ f:v\rightarrow w$，且该映射函数保持加法运算和数量乘法运算，即对于空间V中任何两个向量$ \mathbf u $和$ \mathbf v $以及任何标量$ c $：
 %公式--------------------------------------------------------------------
 \begin{eqnarray}
 f(\mathbf u+\mathbf v)&=&f(\mathbf u)+f(\mathbf v)\label{eqa1.9}\\
@@ -336,8 +336,10 @@ f(c\mathbf v)&=&cf(\mathbf v)
 \end{pmatrix}
 \label{eqa1.12}
 \end{eqnarray}
+
 \begin{eqnarray}
-\mathbf y&=&\mathbf a\mathbf x\;\;=\;\;
+\mathbf y& = &\mathbf a\mathbf x \nonumber \\
+               & = &
 \begin{pmatrix}
   a_{11}x_{1}+a_{12}x_{2}+\dots+a_{1n}x_{n}\\
   a_{21}x_{1}+a_{22}x_{2}+\dots+a_{2n}x_{n}\\
@@ -346,9 +348,11 @@ f(c\mathbf v)&=&cf(\mathbf v)
 \label{eqa1.13}\end{pmatrix}
 \end{eqnarray}
 %公式--------------------------------------------------------------------
+
 \parinterval 上例中矩阵$ \mathbf a $定义了一个从$ R^n $到$ R^m $的线性映射：向量$ \mathbf x\in R^n $和$ \mathbf y\in R^m $别为两个空间中的列向量，即大小为$ n\times 1 $和$ m\times 1 $的矩阵。
+
 %--5.2.1.6范数---------------------
-\subsubsection{（六）范数}\index{Chapter5.2.1.6}
+\subsubsection{范数}\index{Chapter5.2.1.6}
 \parinterval 工程领域，我们经常会使用被称为范数（norm）的函数衡量向量大小，范数为向量空间内的所有向量赋予非零的正长度或大小。对于一个n维向量$ \mathbf x $，一个常见的范数函数为$ l_p $范数，通常表示为$ {\Vert{\mathbf x}\Vert}_p $ ，其中$p\geqslant 0$，是一个标量形式的参数。常用的$ p $的取值有$ 1 $，$ 2 $，$ \infty $等。范数的计算公式为：
 %公式--------------------------------------------------------------------
 \begin{eqnarray}
@@ -366,7 +370,8 @@ l_p(\mathbf x)&=&{\Vert{\mathbf x}\Vert}_p\;\;=\;\;{\left (\sum_{i=1}^{n}{{\vert
 \parinterval $ l_2 $范数为向量的各个元素平方和的二分之一次方：
 %公式--------------------------------------------------------------------
 \begin{eqnarray}
-{\Vert{\mathbf x}\Vert}_2&=&\sqrt{\sum_{i=1}^{n}{{x_{i}}^2}}\;\;=\;\;\sqrt{{\mathbf x}^{\rm T}\mathbf x}
+{\Vert{\mathbf x}\Vert}_2&=&\sqrt{\sum_{i=1}^{n}{{x_{i}}^2}} \nonumber \\
+                                      &=&\sqrt{{\mathbf x}^{\rm T}\mathbf x}
 \label{eqa1.16}
 \end{eqnarray}
 %公式--------------------------------------------------------------------
@@ -451,7 +456,8 @@ y=\begin{cases} 0 & \sum_{i}{x_i\cdot w_i}-\sigma <0\\1 & \sum_{i}{x_i\cdot w_i}
 \parinterval 在这种情况下应该如何做出决定呢？比如，女朋友很希望和你一起去看音乐会，但是剧场很远而且票价500元，如果这些因素对你都是同等重要的（即$ w_0=w_1=w_2 $,假设这三个权重都设置为1）那么我们会得到一个综合得分：
 %公式--------------------------------------------------------------------
 \begin{eqnarray}
-x_0\cdot w_0+x_1\cdot w_1+x_2\cdot w_2=0\cdot 1+0\cdot 1+1\cdot 1=1
+x_0\cdot w_0+x_1\cdot w_1+x_2\cdot w_2 & = & 0\cdot 1+0\cdot 1+1\cdot 1 \nonumber \\
+                                                                     & = & 1
 \label{eqa1.20}
 \end{eqnarray}
 %公式--------------------------------------------------------------------
@@ -473,7 +479,9 @@ x_0\cdot w_0+x_1\cdot w_1+x_2\cdot w_2=0\cdot 1+0\cdot 1+1\cdot 1=1
 \parinterval 在上面的例子中，如果你是守财奴，则会对票价看得更重一些，这样你会用不均匀的权重计算每个因素的影响，比如：$ w_0=0.5 $，$ w_1=2 $，$ w_2=0.5 $，此时感知机模型如图\ref{fig:perceptron-to-predict-2}所示。在这种情况下，女友很希望和你一起去看音乐会，但是剧场很远而且票价有500元，会导致你不去看音乐会，因为
 %公式--------------------------------------------------------------------
 \begin{eqnarray}
-\sum_{i}{x_i\cdot w_i}=0\cdot 0.5+0\cdot 2+1\cdot 0.5=0.5<\sigma =1
+\sum_{i}{x_i\cdot w_i} & = & 0\cdot 0.5+0\cdot 2+1\cdot 0.5 \nonumber \\
+                                   & = & 0.5 \nonumber \\
+                                   & < & \sigma = 1
 \label{eqa1.21}
 \end{eqnarray}
 %公式--------------------------------------------------------------------
@@ -513,7 +521,9 @@ x_0\cdot w_0+x_1\cdot w_1+x_2\cdot w_2=0\cdot 1+0\cdot 1+1\cdot 1=1
 \parinterval 使用修改后的模型做决策：女朋友很希望和你一起，但是剧场有20km远而且票价有500元。于是有$ x_0=10/20 $，$ x_1=150/500 $，$ x_2=1 $
 %公式--------------------------------------------------------------------
 \begin{eqnarray}
-\sum_{i}{x_i\cdot w_i}=0.5\cdot 0.5+0.3\cdot 2+1\cdot 0.5=1.35>\sigma =1
+\sum_{i}{x_i\cdot w_i} & = & 0.5\cdot 0.5+0.3\cdot 2+1\cdot 0.5 \nonumber \\
+                                   & = & 1.35 \nonumber \\
+                                   & > & \sigma =1
 \label{eqa1.22}
 \end{eqnarray}
 %公式--------------------------------------------------------------------
@@ -521,7 +531,7 @@ x_0\cdot w_0+x_1\cdot w_1+x_2\cdot w_2=0\cdot 1+0\cdot 1+1\cdot 1=1
 %--5.2.2.4神经元内部的参数学习---------------------
 \subsubsection{（四）神经元内部的参数学习}\index{Chapter5.2.2.4}

-\parinterval 一次成功的音乐会之后，你似乎掌握了一个真理：其它什么都不重要，女友的喜好最重要，所以你又将决策模型的权重做出了调整：最简单的方式就是$ w_0=w_1=0 $，同时令$ w_2>0 $，相当于只考虑$ x_2 $的影响而忽略其它因素，于是你得到了如图\ref {fig:perceptron-to-predict-3}所示的决策模型：
+\parinterval 一次成功的音乐会之后，你似乎掌握了一个真理：其他什么都不重要，女友的喜好最重要，所以你又将决策模型的权重做出了调整：最简单的方式就是$ w_0=w_1=0 $，同时令$ w_2>0 $，相当于只考虑$ x_2 $的影响而忽略其他因素，于是你得到了如图\ref {fig:perceptron-to-predict-3}所示的决策模型：
 %----------------------------------------------
 % 图
 \begin{figure}[htp]
@@ -554,7 +564,7 @@ x_0\cdot w_0+x_1\cdot w_1+x_2\cdot w_2=0\cdot 1+0\cdot 1+1\cdot 1=1

 \parinterval 感知机也被称作一种最简单的单层神经网络。一个非常自然的问题是：能否把多个这样的网络叠加在一起，获得建模更复杂问题的能力？如果可以，那么在多层神经网络的每一层，神经元之间是怎么组织、工作的呢？单层网络又是通过什么方式构造成多层的呢？
 %--5.2.3.1线性变换和激活函数---------------------
-\subsubsection{（一）线性变换和激活函数}\index{Chapter5.2.3.1}
+\subsubsection{线性变换和激活函数}\index{Chapter5.2.3.1}

 \parinterval 为了建立多层神经网络，这里我们首先需要把前面提到的简单的神经元进行扩展，把多个神经元组成一``层''神经元。比如，很多实际问题中我们希望同时有多个输出，这时可以把多个相同的神经元并列起来，每个神经元都会有一个单独的输出，这就构成一``层''，形成了单层神经网络。单层神经网络中的每一个神经元都对应着一组权重和一个输出，我们可以把单层神经网络中的不同输出看作一个事物不同角度的描述。举个简单的例子，预报天气时，往往需要预测温度、湿度和风力，这就意味着如果使用单层神经网络进行预测，需要设置3个神经元。如图\ref{fig:corresponence-between-matrix-element-and-output}，权重矩阵$ \mathbf w=\begin{pmatrix} w_{00} & w_{01} & w_{02}\\ w_{10} & w_{11} & w_{12}\end{pmatrix} $中第一列元素$ \begin{pmatrix} w_{00}\\ w_{10}\end{pmatrix} $是输入相对第一个输出$ y_0 $的权重，参数向量$ \mathbf b=(b_0,b_1,b_2) $的第一个元素$ b_0 $是对应于第一个输出$ y_0 $的偏置量；类似的我们可以得到$ y_1 $和$ y_2 $。预测天气的单层模型如图\ref{fig:single-layer-of-neural-network-for-weather-prediction}所示（在本例中，假设输入$ \mathbf x=(x_0,x_1) $）。
 %----------------------------------------------
@@ -583,9 +593,11 @@ x_0\cdot w_0+x_1\cdot w_1+x_2\cdot w_2=0\cdot 1+0\cdot 1+1\cdot 1=1

 \parinterval 那么，线性变换的本质是什么？

-\parinterval 从代数角度看，对于线性空间$ \rm V $，任意$ a,b\in {\rm V} $和数域中的任意$ \alpha $，线性变换$ T(\cdot) $需满足：$ T(a+b)=T(a)+T(b) $，$ T(\alpha a)=\alpha T(a) $。
+\begin{itemize}
+\item 从代数角度看，对于线性空间$ \rm V $，任意$ a,b\in {\rm V} $和数域中的任意$ \alpha $，线性变换$ T(\cdot) $需满足：$ T(a+b)=T(a)+T(b) $，$ T(\alpha a)=\alpha T(a) $；
+\item 从几何角度上看，公式中的$ \mathbf x\cdot \mathbf w+\mathbf b $将$ \mathbf x $右乘$ \mathbf w $相当于对$ \mathbf x $进行旋转变换，如图\ref{fig:rotation}所示，对三个点$ (0,0) $，$ (0,1) $，$ (1,0) $及其围成的矩形区域右乘$ \mathbf w=\begin{pmatrix} 1 & 0 & 0\\ 0 & -1 & 0\\ 0 & 0 & 1\end{pmatrix} $\\后，矩形区域由第一象限旋转90度到了第四象限。
+\end{itemize}

-\parinterval 从几何角度上看，公式中的$ \mathbf x\cdot \mathbf w+\mathbf b $将$ \mathbf x $右乘$ \mathbf w $相当于对$ \mathbf x $进行旋转变换，如图\ref{fig:rotation}所示，对三个点$ (0,0) $，$ (0,1) $，$ (1,0) $及其围成的矩形区域右乘$ \mathbf w=\begin{pmatrix} 1 & 0 & 0\\ 0 & -1 & 0\\ 0 & 0 & 1\end{pmatrix} $\\后，矩形区域由第一象限旋转90度到了第四象限。
 %----------------------------------------------
 % 图
 \begin{figure}[htp]
@@ -634,20 +646,20 @@ x_0\cdot w_0+x_1\cdot w_1+x_2\cdot w_2=0\cdot 1+0\cdot 1+1\cdot 1=1
    \subfigure[Sigmoid]{
    \centering
    \begin{minipage}{.23\textwidth}
-        \input{./Chapter5/Figures/fig-sigmoid}
+        \input{./Chapter5/Figures/fig-Sigmoid}
    \end{minipage}%
    }
    \qquad
    \subfigure[Tanh]{
    \centering
    \begin{minipage}{.23\textwidth}
-        \input{./Chapter5/Figures/fig-tanh}
+        \input{./Chapter5/Figures/fig-Tanh}
    \end{minipage}
    }\\    \vspace{-0.5em}
-    \subfigure[relu]{
+    \subfigure[Relu]{
    \centering
    \begin{minipage}{.23\textwidth}
-        \input{./Chapter5/Figures/fig-relu}
+        \input{./Chapter5/Figures/fig-Relu}
    \end{minipage}%
    }
    \qquad
@@ -670,7 +682,7 @@ x_0\cdot w_0+x_1\cdot w_1+x_2\cdot w_2=0\cdot 1+0\cdot 1+1\cdot 1=1
 %-------------------------------------------

 %--5.2.3.2单层神经网络->多层神经网络---------------------
-\subsubsection{（二）单层神经网络$\rightarrow$多层神经网络}\index{Chapter5.2.3.2}
+\subsubsection{单层神经网络$\rightarrow$多层神经网络}\index{Chapter5.2.3.2}

 \parinterval 单层神经网络由线性变换和激活函数两部分构成，但在实际问题中，单层网络并不足以拟合所有函数关系，因此很自然的想到将单层网络扩展到多层神经网络即深层神经网络。将一层神经网络的最终输出向量作为另一层神经网络的输入向量，通过这种方式我们可以将多层神经网络连接在一起，如图\ref{fig:more-layers}所示.
 %----------------------------------------------
@@ -701,13 +713,13 @@ x_0\cdot w_0+x_1\cdot w_1+x_2\cdot w_2=0\cdot 1+0\cdot 1+1\cdot 1=1

 \parinterval 众所周知，单层神经网络无法解决线性不可分问题，比如经典的异或问题。但是具有一个隐藏层的两层神经网络在理论上就可以拟合所有的函数了。有些神奇？接下来我们分析一下为什么仅仅是多了一层，神经网络就能变得如此强大。在此之前，需要明确的一点是，``拟合''是把平面上一系列的点，用一条光滑的曲线连接起来，并用函数来表示这条拟合的曲线。在用神经网络解决问题时，我们通过拟合训练数据中的``数据点''来获得输入与输出之间的函数关系，并利用其对未知数据做出判断。我们可以认为输入与输出之间存在一种函数关系，而神经网络的``拟合''能力并不是可以完全准确的计算某输入对应的原函数输出值，而是尽可能地逼近原函数输出值，与原函数输出值越逼近，则意味着拟合得越优秀。

-\parinterval 如图\ref{fig:two-layer-neural-network}是一个以sigmoid作为隐藏层激活函数的两层神经网络。通过调整参数$ \mathbf w=(w_1,w_2) $，$ \mathbf b=(b_1,b_2) $和$ \mathbf w’=(w’_0,w’_1) $ 的值，我们可以不断地改变目标函数的形状。
+\parinterval 如图\ref{fig:two-layer-neural-network}是一个以Sigmoid作为隐藏层激活函数的两层神经网络。通过调整参数$ \mathbf w=(w_1,w_2) $，$ \mathbf b=(b_1,b_2) $和$ \mathbf w’=(w’_0,w’_1) $ 的值，我们可以不断地改变目标函数的形状。
 %----------------------------------------------
 % 图
 \begin{figure}[htp]
 \centering
 \input{./Chapter5/Figures/fig-two-layer-neural-network}
-\caption{以sigmoid作为隐藏层激活函数的两层神经网络}
+\caption{以Sigmoid作为隐藏层激活函数的两层神经网络}
 \label{fig:two-layer-neural-network}
 \end{figure}
 %-------------------------------------------
@@ -802,26 +814,26 @@ x_0\cdot w_0+x_1\cdot w_1+x_2\cdot w_2=0\cdot 1+0\cdot 1+1\cdot 1=1
 \subsection{ 张量及其计算}\index{Chapter5.3.1}

 %--5.3.1.1张量---------------------
-\subsubsection{（一）张量}\index{Chapter5.3.1.1}
+\subsubsection{张量}\index{Chapter5.3.1.1}

-\parinterval 对于神经网络中的某层神经元$ \mathbf y=f(\mathbf x\cdot \mathbf w+\mathbf b) $，其中$ \mathbf w $是权重矩阵，例如$ \begin{pmatrix} 1 & 2\\ 3 & 4\end{pmatrix} $，$ \mathbf b $ 是偏移向量，例如$ (1,3) $。在这里，输入$ \mathbf x $和输出$ \mathbf y $，可以不是简单的向量或是矩阵形式，而是深度学习中更加通用的数学量\ \dash \ 张量，比如下式中的几种情况都可以看作是深度学习中定义数据的张量：
+\parinterval 对于神经网络中的某层神经元$ \mathbf y=f(\mathbf x\cdot \mathbf w+\mathbf b) $，其中$ \mathbf w $是权重矩阵，例如$ \begin{pmatrix} 1 & 2\\ 3 & 4\end{pmatrix} $，$ \mathbf b $ 是偏移向量，例如$ (1,3) $。在这里，输入$ \mathbf x $和输出$ \mathbf y $，可以不是简单的向量或是矩阵形式，而是深度学习中更加通用的数学量\ \dash \ {\small\bfnew{张量}}（Tensor），比如下式中的几种情况都可以看作是深度学习中定义数据的张量：
 %公式--------------------------------------------------------------------
 \begin{eqnarray}
 \mathbf x&=&\begin{pmatrix} -1 & 3\end{pmatrix}\qquad
 \mathbf x\;\;=\;\;\begin{pmatrix} -1 & 3\\ 0.2 & 2\end{pmatrix}\qquad
-\mathbf x\;\;=\;\;\begin{pmatrix}{\begin{pmatrix} -1 & 3\\ 0.2 & 2\end{pmatrix}}\\{\begin{pmatrix} -1 & 3\\ 0.2 & 2\end{pmatrix}}\end{pmatrix}
+\mathbf x\;\;=\;\;\begin{pmatrix}{\begin{pmatrix} -1 & 3\\ 0.2 & 2\end{pmatrix}}\\{\begin{pmatrix} -1 & 3\\ 0.2 & 2\end{pmatrix}}\end{pmatrix} \nonumber
 \label{}
 \end{eqnarray}
 %公式--------------------------------------------------------------------
 \parinterval 简单来说，张量可以为我们描述数据提供更多方便，是一种通用的工具。比如，输入的量有三个维度在变化，用矩阵不容易描述，但是用张量却很容易。

-\parinterval 从计算机实现的角度来看，现在所有深度学习框架都把张量定义为``多维数组''。张量有一个非常重要的属性\ \dash \ 阶（rank），我们可以将多维数组中``维''的属性与张量的``阶''的属性作类比，这两个属性都表示多维数组（张量）有多少个独立的方向。例如，3是一个标量（scalar），相当于一个0维数组或0阶张量；$ {(\begin{array}{cccc} 2 & -3 & 0.8 & 0.2\end{array})}^{\rm T} $是一个向量（vector），相当于一个1维数组或1阶张量；$ \begin{pmatrix} -1 & 3 & 7\\ 0.2 & 2 & 9\end{pmatrix} $是一个矩阵（matrix)，相当于一个2维数组或2阶张量；如图\ref{fig:tensor-sample}，这是一个3维数组或3阶张量，其中，每个4*4的方形代表一个2阶张量，这样的方形有4个，最终形成3阶张量。
+\parinterval 从计算机实现的角度来看，现在所有深度学习框架都把张量定义为``多维数组''。张量有一个非常重要的属性\ \dash \ 阶（rank），我们可以将多维数组中``维''的属性与张量的``阶''的属性作类比，这两个属性都表示多维数组（张量）有多少个独立的方向。例如，3是一个标量（scalar），相当于一个0维数组或0阶张量；$ {(\begin{array}{cccc} 2 & -3 & 0.8 & 0.2\end{array})}^{\rm T} $是一个向量（vector），相当于一个1维数组或1阶张量；$ \begin{pmatrix} -1 & 3 & 7\\ 0.2 & 2 & 9\end{pmatrix} $是一个矩阵（matrix)，相当于一个2维数组或2阶张量；如图\ref{fig:tensor-sample}，这是一个3维数组或3阶张量，其中，每个$4 \times 4$的方形代表一个2阶张量，这样的方形有4个，最终形成3阶张量。
 %----------------------------------------------
 % 图
 \begin{figure}[htp]
 \centering
 \input{./Chapter5/Figures/fig-tensor-sample}
-\caption{3阶张量示例（4*4*4）}
+\caption{3阶张量示例（$4 \times 4 \times 4$）}
 \label{fig:tensor-sample}
 \end{figure}
 %-------------------------------------------
@@ -857,7 +869,7 @@ x_0\cdot w_0+x_1\cdot w_1+x_2\cdot w_2=0\cdot 1+0\cdot 1+1\cdot 1=1

 \parinterval 上面讲了很多和神经网络不太相关的内容，目的是要明确张量的原始定义，以避免对这个概念的误解。但是，在本书中，我们仍然遵循深度学习中常用的概念，把张量理解为多维数组。使用张量，我们可以更容易的表示更高阶的数学形式。在保证数学表达的简洁性的同时，使程序实现接口更加统一。
 %--5.3.1.2张量的矩阵乘法---------------------
-\subsubsection{（二）张量的矩阵乘法}\index{Chapter5.3.1.2}
+\subsubsection{张量的矩阵乘法}\index{Chapter5.3.1.2}

 \parinterval 对于一层神经网络，$ \mathbf y=f(\mathbf x\cdot \mathbf w+\mathbf b) $中的$ \mathbf x\cdot \mathbf w $表示对输入$ \mathbf x $进行线性变换，其中$ \mathbf x $是输入张量，$ \mathbf w $是权重矩阵。$ \mathbf x\cdot \mathbf w $表示的是矩阵乘法，需要注意的是这里是矩阵乘法而不是张量乘法。

@@ -872,7 +884,8 @@ x_0\cdot w_0+x_1\cdot w_1+x_2\cdot w_2=0\cdot 1+0\cdot 1+1\cdot 1=1
 \noindent 例如$ \mathbf a= \begin{pmatrix} a_{11} & a_{12} & a_{13}\\a_{21} & a_{22} & a_{23}\end{pmatrix} $，$ \mathbf b= \begin{pmatrix} b_{11} & b_{12}\\b_{21} & b_{22}\\b_{31} & b_{32}\end{pmatrix} $，则
 %公式--------------------------------------------------------------------
 \begin{eqnarray}
-\mathbf c&=&\mathbf a\mathbf b\;\;=\;\;\begin{pmatrix} a_{11}b_{11}+a_{12}b_{21}+a_{13}b_{31} & a_{11}b_{12}+a_{12}b_{22}+a_{13}b_{32}\\a_{21}b_{11}+a_{22}b_{21}+a_{23}b_{31} & a_{21}b_{12}+a_{22}b_{22}+a_{23}b_{32}\end{pmatrix}
+\mathbf c & = & \mathbf a\mathbf b \nonumber \\
+                & = & \begin{pmatrix} a_{11}b_{11}+a_{12}b_{21}+a_{13}b_{31} & a_{11}b_{12}+a_{12}b_{22}+a_{13}b_{32}\\a_{21}b_{11}+a_{22}b_{21}+a_{23}b_{31} & a_{21}b_{12}+a_{22}b_{22}+a_{23}b_{32}\end{pmatrix}
 \label{}
 \end{eqnarray}
 %公式--------------------------------------------------------------------
@@ -897,9 +910,9 @@ x_0\cdot w_0+x_1\cdot w_1+x_2\cdot w_2=0\cdot 1+0\cdot 1+1\cdot 1=1
 %-------------------------------------------

 %--5.3.1.3张量的单元操作---------------------
-\subsubsection{（三）张量的单元操作}\index{Chapter5.3.1.3}
+\subsubsection{张量的单元操作}\index{Chapter5.3.1.3}

-\parinterval 对于神经网络中的某层神经元$ \mathbf y=f(\mathbf x\cdot \mathbf w+\mathbf b) $，也包含有其它张量单元操作：1）加法：$ \mathbf s+\mathbf b $，其中张量$ \mathbf s=\mathbf x\cdot \mathbf w $；2）激活函数：$ f(\cdot) $。具体来说：
+\parinterval 对于神经网络中的某层神经元$ \mathbf y=f(\mathbf x\cdot \mathbf w+\mathbf b) $，也包含有其他张量单元操作：1）加法：$ \mathbf s+\mathbf b $，其中张量$ \mathbf s=\mathbf x\cdot \mathbf w $；2）激活函数：$ f(\cdot) $。具体来说：

 \vspace{0.5em}
 \begin{itemize}
@@ -915,14 +928,14 @@ x_0\cdot w_0+x_1\cdot w_1+x_2\cdot w_2=0\cdot 1+0\cdot 1+1\cdot 1=1
 %-------------------------------------------

 \vspace{0.5em}
-\item 除了单位加之外，张量之间也可以减法、乘法，也可以对张量作激活函数。我们将其称作为函数的向量化（vectorization）。例如，对向量（1阶张量）作relu激活，其中relu激活函数的公式为：
+\item 除了单位加之外，张量之间也可以减法、乘法，也可以对张量作激活函数。我们将其称作为函数的向量化（vectorization）。例如，对向量（1阶张量）作Relu激活，其中Relu激活函数的公式为：
 %公式--------------------------------------------------------------------
 \begin{eqnarray}
 f(x)=\begin{cases} 0 & x\leqslant0 \\x & x>0\end{cases}
 \label{eqa1.26}
 \end{eqnarray}
 %公式--------------------------------------------------------------------
-例如$ {\rm{relu}}\left( \begin{pmatrix} 2\\-.3\end{pmatrix}\right)=\begin{pmatrix} 2\\0\end{pmatrix} $。
+例如$ {\rm{Relu}}\left( \begin{pmatrix} 2\\-.3\end{pmatrix}\right)=\begin{pmatrix} 2\\0\end{pmatrix} $。
 \end{itemize}
 \vspace{0.5em}
 %--5.3.2 张量的物理存储形式---------------------
@@ -952,9 +965,9 @@ f(x)=\begin{cases} 0 & x\leqslant0 \\x & x>0\end{cases}
 %--5.3.3 使用开源框架实现张量计算---以NiuTensor为例---------------------
 \subsection{使用开源框架实现张量计算}\index{Chapter5.3.3}

-\parinterval 实现神经网络的开源系统有很多，比如，一个简单好用的Python工具包\ \dash \ Numpy（https://numpy.org/）。Numpy提供了张量表示和使用的范式，可以很方便地定义、使用多维数组。
+\parinterval 实现神经网络的开源系统有很多，比如，一个简单好用的Python工具包\ \dash \ Numpy（\url{https://numpy.org/}）。Numpy提供了张量表示和使用的范式，可以很方便地定义、使用多维数组。

-\parinterval 此外，如今深度学习框架已经非常成熟。比如， Tensorflow和Pytorch就是非常受欢迎的深度学习工具包，除此之外还有很多其它优秀的框架：CNTK、MXNet、\\PaddlePaddle、Keras、Chainer、dl4j、NiuTensor等。开发者可以根据自身的喜好和开发项目的要求选择所采用的框架。
+\parinterval 此外，如今深度学习框架已经非常成熟。比如， Tensorflow和Pytorch就是非常受欢迎的深度学习工具包，除此之外还有很多其他优秀的框架：CNTK、MXNet、\\PaddlePaddle、Keras、Chainer、dl4j、NiuTensor等。开发者可以根据自身的喜好和开发项目的要求选择所采用的框架。

 \parinterval 在本节中，我们将使用NiuTensor来描述张量计算。NiuTensor是由国内东北大学小牛团队开发，面向自然语言处理相关任务优化设计，支持丰富的张量计算接口。此外，该NiuTensor内核基于C++语言编写，代码高度优化。该工具包获取网址为http://www.niutrans.com/opensource/niutensor/index.html。

@@ -1075,8 +1088,8 @@ f(x)=\begin{cases} 0 & x\leqslant0 \\x & x>0\end{cases}
 \rule{0pt}{15pt}     Split(a,d,n) & 对张量$ \mathbf a $沿d方向分裂成n份  \\
 \rule{0pt}{15pt}     Sigmoid(a) & 对$ \mathbf a $进行Sigmoid变换  \\
 \rule{0pt}{15pt}     Softmax(a) & 对$ \mathbf a $进行Softmax变换，沿最后一个方向  \\
-\rule{0pt}{15pt}     HardTanh(a) & 对$ \mathbf a $进行hard tanh变换（双曲正切的近似）  \\
-\rule{0pt}{15pt}     Relu(a) & 对$ \mathbf a $进行relu变换  \\
+\rule{0pt}{15pt}     HardTanh(a) & 对$ \mathbf a $进行hard Tanh变换（双曲正切的近似）  \\
+\rule{0pt}{15pt}     Relu(a) & 对$ \mathbf a $进行Relu变换  \\
 \end{tabular}
 \end{table}
 %表2--------------------------------------------------------------------
@@ -1098,10 +1111,10 @@ f(x)=\begin{cases} 0 & x\leqslant0 \\x & x>0\end{cases}
 \end{figure}
 %-------------------------------------------

-\parinterval 它可以被描述为公式\ref{eqa1.27}，其中隐藏层的激活函数是tanh函数，输出层的激活函数是sigmoid函数：
+\parinterval 它可以被描述为公式\ref{eqa1.27}，其中隐藏层的激活函数是Tanh函数，输出层的激活函数是Sigmoid函数：
 %公式------------------------------------------
 \begin{eqnarray}
-y&=&{\rm{sigmoid}}({\rm{tanh}}(\mathbf x\cdot \mathbf w^1+\mathbf b^1)\cdot \mathbf w^2+\mathbf b^2 )
+y&=&{\rm{Sigmoid}}({\rm{Tanh}}(\mathbf x\cdot \mathbf w^1+\mathbf b^1)\cdot \mathbf w^2+\mathbf b^2 )
 \label{eqa1.27}
 \end{eqnarray}
 %公式------------------------------------------
@@ -1115,14 +1128,14 @@ y&=&{\rm{sigmoid}}({\rm{tanh}}(\mathbf x\cdot \mathbf w^1+\mathbf b^1)\cdot \mat
 \end{figure}
 %-------------------------------------------

-\parinterval 前向计算实现如图\ref{fig:weather-forward}所示，图中对各张量和其他参数的形状做了详细说明，类似shape(3)这种形式代表维度为3的1阶张量，shape(3,2)代表2阶张量，其中第1阶有3个维度，第2阶有2个维度，也可以将其理解为$ 3\ast 2 $的矩阵。输入$ \mathbf x $是一个1阶张量，该阶有3个维度，分别对应天空状况、低空气温、水平气压三个方面。输入数据经过隐藏层的线性变换$ \mathbf x\cdot \mathbf w^1+\mathbf b^1 $和tanh激活函数后，得到新的张量$ \mathbf a $，张量$ \mathbf a $也是一个1阶张量，该阶有2个维度，分别对应着从输入数据中提取出的温度和风速两方面特征；神经网络在获取到天气情况的特征$ \mathbf a $后，继续对其进行线性变换$ \mathbf a\cdot \mathbf w^2+ b^2 $（$ b^2 $是标量）和sigmoid激活函数后，得到神经网络的最终输出$ y $，即神经网络此时预测的穿衣指数。
+\parinterval 前向计算实现如图\ref{fig:weather-forward}所示，图中对各张量和其他参数的形状做了详细说明，类似shape(3)这种形式代表维度为3的1阶张量，shape(3,2)代表2阶张量，其中第1阶有3个维度，第2阶有2个维度，也可以将其理解为$ 3\ast 2 $的矩阵。输入$ \mathbf x $是一个1阶张量，该阶有3个维度，分别对应天空状况、低空气温、水平气压三个方面。输入数据经过隐藏层的线性变换$ \mathbf x\cdot \mathbf w^1+\mathbf b^1 $和Tanh激活函数后，得到新的张量$ \mathbf a $，张量$ \mathbf a $也是一个1阶张量，该阶有2个维度，分别对应着从输入数据中提取出的温度和风速两方面特征；神经网络在获取到天气情况的特征$ \mathbf a $后，继续对其进行线性变换$ \mathbf a\cdot \mathbf w^2+ b^2 $（$ b^2 $是标量）和Sigmoid激活函数后，得到神经网络的最终输出$ y $，即神经网络此时预测的穿衣指数。

 %--5.3.5 神经网络实例---------------------
 \subsection{神经网络实例}\index{Chapter5.3.5}

 \parinterval 在了解了神经网络前向计算过程的基础上，我们进一步使用NiuTensor来演示搭建神经网络的过程。注意，搭建神经网络的过程本质上就是定义前向计算的过程。

-\parinterval 首先我们构造一个单层神经网络。如图\ref{fig:code-niutensor-one}所示，简单的定义输入、权重和偏置后，定义激活函数为sigmoid函数，输入$ \mathbf x $经过线性变换和激活函数，得到输出。
+\parinterval 首先我们构造一个单层神经网络。如图\ref{fig:code-niutensor-one}所示，简单的定义输入、权重和偏置后，定义激活函数为Sigmoid函数，输入$ \mathbf x $经过线性变换和激活函数，得到输出。
 %----------------------------------------------
 % 图
 \begin{figure}[htp]
@@ -1133,7 +1146,7 @@ y&=&{\rm{sigmoid}}({\rm{tanh}}(\mathbf x\cdot \mathbf w^1+\mathbf b^1)\cdot \mat
 \end{figure}
 %-------------------------------------------

-	\parinterval 如图\ref{fig:code-niutensor-three}是使用NiuTensor构造三层神经网络的程序示例，首先定义输入和各层的权重、偏置，随后定义神经网络中各层的实现细节，在第一层中，$ \mathbf x $作为输入，$ \mathbf h1 $作为输出，其中$ \mathbf h1={\rm{sigmoid}}(\mathbf x\cdot \mathbf w1+\mathbf b1) $。在第二层中，$ \mathbf h1 $作为输入，$ \mathbf h2 $作为输出，其中$ \mathbf h2={\rm{tanh}}(\mathbf h1\cdot \mathbf w2) $。在第三层中，$ \mathbf h2 $作为输入，$ \mathbf y $作为输出，其中$ \mathbf y={\rm{relu}}(\mathbf h2\cdot \mathbf w3) $。
+	\parinterval 如图\ref{fig:code-niutensor-three}是使用NiuTensor构造三层神经网络的程序示例，首先定义输入和各层的权重、偏置，随后定义神经网络中各层的实现细节，在第一层中，$ \mathbf x $作为输入，$ \mathbf h1 $作为输出，其中$ \mathbf h1={\rm{Sigmoid}}(\mathbf x\cdot \mathbf w1+\mathbf b1) $。在第二层中，$ \mathbf h1 $作为输入，$ \mathbf h2 $作为输出，其中$ \mathbf h2={\rm{Tanh}}(\mathbf h1\cdot \mathbf w2) $。在第三层中，$ \mathbf h2 $作为输入，$ \mathbf y $作为输出，其中$ \mathbf y={\rm{Relu}}(\mathbf h2\cdot \mathbf w3) $。
 %----------------------------------------------
 % 图
 \begin{figure}[htp]
@@ -1158,7 +1171,7 @@ y&=&{\rm{sigmoid}}({\rm{tanh}}(\mathbf x\cdot \mathbf w^1+\mathbf b^1)\cdot \mat
 %--5.4神经网络的参数训练-----------------------------------------
 \section{神经网络的参数训练}\index{Chapter5.4}

-\parinterval 简单来说，神经网络可以被看作是由变量和函数组成的表达式，例如：$ \mathbf y=\mathbf x+\mathbf b $、$ \mathbf y={\rm{relu}}(\mathbf x\cdot \mathbf w+\mathbf b) $、$ \mathbf y={\rm{sigmoid}}({\rm{relu}}(\mathbf x\cdot \mathbf w^1+\mathbf b^1)\cdot \mathbf w^2+\mathbf b^2) $等等，其中的$ \mathbf x $和$ \mathbf y $作为输入和输出变量， $ \mathbf w $、$ \mathbf b $等其它变量作为{\small\sffamily\bfseries{模型参数}}（Model Parameters）。确定了函数表达式和模型参数，也就确定了神经网络模型。通常，表达式的形式需要系统开发者设计，而模型参数的数量非常巨大，因此需要自动学习，这个过程也称为模型学习或训练。为了实现这个目标，通常我们会准备一定量的带有标准答案的数据，称之为{\small\sffamily\bfseries{有标注数据}}（Annotated Data/Labeled Data）。这些数据会用于对模型参数的学习，这也对应了统计模型中的参数估计过程。在机器学习中，一般把这种使用有标注数据进行统计模型参数训练的过程称为{\small\sffamily\bfseries{有指导的训练}}或{\small\sffamily\bfseries{有监督的训练}}（Supervised Training）。在本章中，如果没有特殊说明，模型训练都是指有监督的训练。那么神经网络内部是怎样利用有标注数据对参数进行训练的呢？
+\parinterval 简单来说，神经网络可以被看作是由变量和函数组成的表达式，例如：$ \mathbf y=\mathbf x+\mathbf b $、$ \mathbf y={\rm{Relu}}(\mathbf x\cdot \mathbf w+\mathbf b) $、$ \mathbf y={\rm{Sigmoid}}({\rm{Relu}}(\mathbf x\cdot \mathbf w^1+\mathbf b^1)\cdot \mathbf w^2+\mathbf b^2) $等等，其中的$ \mathbf x $和$ \mathbf y $作为输入和输出变量， $ \mathbf w $、$ \mathbf b $等其他变量作为{\small\sffamily\bfseries{模型参数}}（Model Parameters）。确定了函数表达式和模型参数，也就确定了神经网络模型。通常，表达式的形式需要系统开发者设计，而模型参数的数量非常巨大，因此需要自动学习，这个过程也称为模型学习或训练。为了实现这个目标，通常我们会准备一定量的带有标准答案的数据，称之为{\small\sffamily\bfseries{有标注数据}}（Annotated Data/Labeled Data）。这些数据会用于对模型参数的学习，这也对应了统计模型中的参数估计过程。在机器学习中，一般把这种使用有标注数据进行统计模型参数训练的过程称为{\small\sffamily\bfseries{有指导的训练}}或{\small\sffamily\bfseries{有监督的训练}}（Supervised Training）。在本章中，如果没有特殊说明，模型训练都是指有监督的训练。那么神经网络内部是怎样利用有标注数据对参数进行训练的呢？

 \parinterval 为了回答这个问题，我们可以把模型参数的学习过程看作是一个优化问题，即找到一组参数，使得模型达到某种最优的状态。这个问题又可以被转化为两个新的问题：

@@ -1247,7 +1260,7 @@ y&=&{\rm{sigmoid}}({\rm{tanh}}(\mathbf x\cdot \mathbf w^1+\mathbf b^1)\cdot \mat
 %公式--------------------------------------------------------------------
 \noindent 其中$t $表示更新的步数，$ \alpha $是一个参数，被称作{\small\sffamily\bfseries{学习率}}（Learning Rate），表示更新步幅的大小。$ \alpha $的设置需要根据任务进行调整。

-\parinterval 从优化的角度看，梯度下降是一种典型的基于梯度的方法（Gradient-based Method），属于基于一阶导数的方法。其它类似的方法还有牛顿法、共轭方向法、拟牛顿法等。在5.4.2.3节中我们会进一步介绍梯度下降的几种变形。
+\parinterval 从优化的角度看，梯度下降是一种典型的基于梯度的方法（Gradient-based Method），属于基于一阶导数的方法。其他类似的方法还有牛顿法、共轭方向法、拟牛顿法等。在5.4.2.3节中我们会进一步介绍梯度下降的几种变形。

 \parinterval 在具体实现时，公式\ref{eqa1.29}可以有以下不同的形式。

@@ -1321,7 +1334,7 @@ J(\mathbf w)&=&\frac{1}{m}\sum_{j=i}^{j+m-1}{L(\mathbf x_i,\mathbf {\widetilde y

 \parinterval 数值微分中的截断误差和舍入误差是如何造成的呢？数值微分方法求梯度时，需用极限或无穷过程来求得。然而计算机需要将求解过程化为一系列有限的算术运算和逻辑运算。这样就要对某种无穷过程进行``截断''，即仅保留无穷过程的前段有限序列而舍弃它的后段。这就带来截断误差；舍入误差，是指运算得到的近似值和精确值之间的差异。由于数值微分方法计算复杂函数的梯度问题时，经过无数次的近似，每一次近似都产生了舍入误差，在这样的情况下，误差会随着运算次数增加而积累得很大，最终得出没有意义的运算结果。实际上，截断误差和舍入误差在训练复杂神经网络中也会出现，因此是实际系统研发中需要注意的问题。

-\parinterval 尽管数值微分不适用于大模型中的梯度求解，但是由于数值微分方法非常简单，因此在很多时候，我们利用它来检验其它梯度计算方法的正确性。比如在实现反向传播的时候（5.4.6节），可以检验求导是否正确（Gradient Check），这个过程就是利用数值微分法实现的。
+\parinterval 尽管数值微分不适用于大模型中的梯度求解，但是由于数值微分方法非常简单，因此在很多时候，我们利用它来检验其他梯度计算方法的正确性。比如在实现反向传播的时候（5.4.6节），可以检验求导是否正确（Gradient Check），这个过程就是利用数值微分法实现的。

 %--符号微分---------------------
 \vspace{0.5em}
@@ -1502,7 +1515,7 @@ w_{t+1}&=&w_t-\frac{\eta}{\sqrt{z_t+\epsilon}} v_t
 \end {figure}
 %-------------------------------------------

-\parinterval  图\ref{fig:parallel}对比了同步更新和异步更新的区别，在这个例子中，使用4台设备对一个两层神经网络中的参数进行更新，其中使用了一个参数服务器（parameter server，图中的G4）来保存最新的参数，不同设备（worker，图中的G1、G2、G3）可以通过同步或者异步的方式访问参数服务器。图中的$ \mathbf w_o $和$ \mathbf w_h $分别代表输出层和隐藏层的全部参数，操作push(P)设备向参数服务器传送梯度，操作fetch(F)表示参数服务器向设备传送更新后的参数。
+\parinterval  图\ref{fig:parallel}对比了同步更新和异步更新的区别，在这个例子中，使用4台设备对一个两层神经网络中的参数进行更新，其中使用了一个参数服务器（Parameter Server，图中的G4）来保存最新的参数，不同设备（Worker，图中的G1、G2、G3）可以通过同步或者异步的方式访问参数服务器。图中的$ \mathbf w_o $和$ \mathbf w_h $分别代表输出层和隐藏层的全部参数，操作push(P)设备向参数服务器传送梯度，操作fetch(F)表示参数服务器向设备传送更新后的参数。

 \parinterval  此外，在使用多个设备进行并行训练的时候，由于设备间带宽的限制，大量的数据传输会有较高的延时。对于复杂神经网络来说，设备间参数和梯度传递的时间消耗也会成为一个不得不考虑的因素。有时候，设备间数据传输的时间甚至比模型计算的时间都长，大大降低了并行度\cite{xiao2017fast}。对于这种问题，可以考虑对数据进行压缩或者减少传输的次数缓解问题。
 %--5.4.4 梯度消失、梯度爆炸和稳定性训练---------------------
@@ -1512,42 +1525,42 @@ w_{t+1}&=&w_t-\frac{\eta}{\sqrt{z_t+\epsilon}} v_t
 %--5.4.4.1梯度消失现象及解决方法---------------------
 \subsubsection{（一）梯度消失现象及解决方法}\index{Chapter5.4.4.1}

-\parinterval  网络训练过程中，如果每层网络的梯度都小于1，反向传播过程中，各层梯度的偏导数会与后面层传递而来的梯度相乘得到本层的梯度，并向前一层传递。该过程循环进行，最后导致梯度指数级地减小，这就产生了梯度消失现象。这种情况会导致神经网络层数较浅的部分梯度为0，无法更新参数。一般来说，产生很小梯度的原因是我们使用了类似于sigmoid这样的激活函数，当输入的值过大或者过小的时候这类函数曲线会趋于直线，梯度近似为零。针对这个问题，主要的解决办法是使用更加易于优化的激活函数，比如，使用relu代替sigmoid和tanh作为激活函数。
+\parinterval  网络训练过程中，如果每层网络的梯度都小于1，反向传播过程中，各层梯度的偏导数会与后面层传递而来的梯度相乘得到本层的梯度，并向前一层传递。该过程循环进行，最后导致梯度指数级地减小，这就产生了梯度消失现象。这种情况会导致神经网络层数较浅的部分梯度为0，无法更新参数。一般来说，产生很小梯度的原因是我们使用了类似于Sigmoid这样的激活函数，当输入的值过大或者过小的时候这类函数曲线会趋于直线，梯度近似为零。针对这个问题，主要的解决办法是使用更加易于优化的激活函数，比如，使用Relu代替Sigmoid和Tanh作为激活函数。

-\parinterval  缓解梯度消失问题最直接的想法就是希望各层的偏导数大于或等于1。图\ref{fig:derivative1}展示了sigmoid激活函数$ y=\frac{1}{1+e^{-x}}$的函数曲线和导函数曲线，如果使用sigmoid作为损失函数，其梯度不可能超过0.25，这样经过链式求导之后，很容易发生梯度消失。
+\parinterval  缓解梯度消失问题最直接的想法就是希望各层的偏导数大于或等于1。图\ref{fig:derivative1}展示了Sigmoid激活函数$ y=\frac{1}{1+e^{-x}}$的函数曲线和导函数曲线，如果使用Sigmoid作为损失函数，其梯度不可能超过0.25，这样经过链式求导之后，很容易发生梯度消失。
 %----------------------------------------------
 % 图
 \begin{figure}[htp]
 \centering
 \input{./Chapter5/Figures/fig-derivative1}
-\caption{sigmoid激活函数的函数曲线和导函数曲线}
+\caption{Sigmoid激活函数的函数曲线和导函数曲线}
 \label{fig:derivative1}
 \end {figure}
 %-------------------------------------------

-\parinterval  同理，tanh作为激活函数也容易出现梯度消失现象，图\ref{fig:derivative2}展示了tanh激活函数$ y=\frac{e^x-e^{-x}}{e^x+e^{-x}}$的函数曲线和导函数曲线，可以看出，tanh激活函数比sigmoid激活函数要好一些，但是tanh激活函数的导数也小于1，因此无法避免梯度消失现象。
+\parinterval  同理，Tanh作为激活函数也容易出现梯度消失现象，图\ref{fig:derivative2}展示了Tanh激活函数$ y=\frac{e^x-e^{-x}}{e^x+e^{-x}}$的函数曲线和导函数曲线，可以看出，Tanh激活函数比Sigmoid激活函数要好一些，但是Tanh激活函数的导数也小于1，因此无法避免梯度消失现象。
 %----------------------------------------------
 % 图
 \begin{figure}[htp]
 \centering
 \input{./Chapter5/Figures/fig-derivative2}
-\caption{tanh激活函数的函数曲线和导函数曲线}
+\caption{Tanh激活函数的函数曲线和导函数曲线}
 \label{fig:derivative2}
 \end {figure}
 %-------------------------------------------

-\parinterval  relu激活函数的思想也很简单，如果激活函数的导数为1，那么就不存在梯度消失爆炸的问题了。图\ref{fig:derivative3}展示了relu激活函数$ y={\rm{max}}(0,x)$的函数曲线和导函数曲线。可以很容易看出，relu函数的导数在正数部分是恒等于1的，因此在深层网络中使用relu激活函数就不会产生很小的梯度。
+\parinterval  Relu激活函数的思想也很简单，如果激活函数的导数为1，那么就不存在梯度消失爆炸的问题了。图\ref{fig:derivative3}展示了Relu激活函数$ y={\rm{max}}(0,x)$的函数曲线和导函数曲线。可以很容易看出，Relu函数的导数在正数部分是恒等于1的，因此在深层网络中使用Relu激活函数就不会产生很小的梯度。
 %----------------------------------------------
 % 图
 \begin{figure}[htp]
 \centering
 \input{./Chapter5/Figures/fig-derivative3}
-\caption{relu激活函数的函数曲线和导函数曲线}
+\caption{Relu激活函数的函数曲线和导函数曲线}
 \label{fig:derivative3}
 \end {figure}
 %-------------------------------------------

-\parinterval  当然，梯度消失并不是仅仅可以通过改变激活函数就可以完全消除掉。随着网络层数的增加，很多因素都可能会造成梯度消失。后面也会进一步介绍其它手段，我们可以综合运用这些方法达到很好的缓解梯度消失问题的目的。
+\parinterval  当然，梯度消失并不是仅仅可以通过改变激活函数就可以完全消除掉。随着网络层数的增加，很多因素都可能会造成梯度消失。后面也会进一步介绍其他手段，我们可以综合运用这些方法达到很好的缓解梯度消失问题的目的。
 %--5.4.4.2梯度消失现象及解决方法---------------------
 \subsubsection{（二）梯度爆炸现象及解决方法}\index{Chapter5.4.4.2}

@@ -1564,7 +1577,7 @@ w_{t+1}&=&w_t-\frac{\eta}{\sqrt{z_t+\epsilon}} v_t
 %--5.4.4.3稳定性训练---------------------
 \subsubsection{（三）稳定性训练}\index{Chapter5.4.4.3}

-\parinterval  为了使神经网络模型训练更加稳定，通常还会考虑其它策略。
+\parinterval  为了使神经网络模型训练更加稳定，通常还会考虑其他策略。

 \parinterval  （1）批量归一化（Batch Normalization）

@@ -1714,7 +1727,7 @@ w_{t+1}&=&w_t-\frac{\eta}{\sqrt{z_t+\epsilon}} v_t
 \begin{itemize}
 \item $ \frac{\partial L}{\partial \mathbf h^K} $表示损失函数$ L $相对网络输出$ \mathbf h^K $表的梯度。比如，对于平方损失$ L=\frac{1}{2}{\Vert \widetilde {\mathbf y}-\mathbf h^K\Vert}^2 $，有$ \frac{\partial L}{\partial \mathbf h^K}= \widetilde{ \mathbf y} -\mathbf h^K $。计算结束后，将$ \frac{\partial L}{\partial \mathbf h^K} $向前传递。
 \vspace{0.5em}
-\item $ \frac{\partial f^T(\mathbf s^K)}{\partial \mathbf s^K} $表示激活函数相对于其输入$ \mathbf s^K $的梯度。比如，对于sigmoid函数$ f(\mathbf s)=\frac{1}{1+e^{- \mathbf s}}$，有$ \frac{\partial f(\mathbf s)}{\partial \mathbf s}=f(\mathbf s) (1-f(\mathbf s))$
+\item $ \frac{\partial f^T(\mathbf s^K)}{\partial \mathbf s^K} $表示激活函数相对于其输入$ \mathbf s^K $的梯度。比如，对于Sigmoid函数$ f(\mathbf s)=\frac{1}{1+e^{- \mathbf s}}$，有$ \frac{\partial f(\mathbf s)}{\partial \mathbf s}=f(\mathbf s) (1-f(\mathbf s))$
 \end{itemize}
 \end{spacing}
 \vspace{0.5em}
@@ -1920,17 +1933,17 @@ w_{t+1}&=&w_t-\frac{\eta}{\sqrt{z_t+\epsilon}} v_t

 \parinterval  在FNNLM中，所有的参数、输入、输出都是连续变量，因此FNNLM也是典型的一个连续空间模型。通过使用交叉熵等损失函数，FNNLM很容易进行优化。比如，可以使梯度下降方法对FNNLM的模型参数进行训练。

-\parinterval  FNNLM的实现也非常简单，图\ref{fig:code-FNNLM}展示了基于NiuTensor的FNNLM的部分代码。需要注意的是，在程序实现时， tanh函数一般会用hardtanh函数代替。因为 tanh函数中的指数运算容易导致溢出：
+\parinterval  FNNLM的实现也非常简单，图\ref{fig:code-FNNLM}展示了基于NiuTensor的FNNLM的部分代码。需要注意的是，在程序实现时， Tanh函数一般会用hardTanh函数代替。因为 Tanh函数中的指数运算容易导致溢出：
 %公式--------------------------------------------------------------------
 \begin{eqnarray}
-{\rm{tanh}}(x)&=&\frac{{\rm{exp}}(x)-{\rm{exp}}(-x)}{{\rm{exp}}(x)+\rm{exp}(-x)}
+{\rm{Tanh}}(x)&=&\frac{{\rm{exp}}(x)-{\rm{exp}}(-x)}{{\rm{exp}}(x)+{\rm{exp}}(-x)}
 \label{}
 \end{eqnarray}
 %公式--------------------------------------------------------------------
-\noindent 而hardtanh函数不存在这个问题，因此具有数值计算的稳定性。hardtanh函数表达式如下：
+\noindent 而hardTanh函数不存在这个问题，因此具有数值计算的稳定性。hardTanh函数表达式如下：
 %公式--------------------------------------------------------------------
 \begin{eqnarray}
-{\rm{hardtanh}}(x)&=&\begin{cases} -1 & x<-1\\x & -1\leqslant x\leqslant 1\\1 & x>1\end{cases}
+{\rm{hardTanh}}(x)&=&\begin{cases} -1 & x<-1\\x & -1\leqslant x\leqslant 1\\1 & x>1\end{cases}
 \label{}
 \end{eqnarray}
 %公式--------------------------------------------------------------------
@@ -1957,11 +1970,11 @@ w_{t+1}&=&w_t-\frac{\eta}{\sqrt{z_t+\epsilon}} v_t
 \parinterval  在循环神经网络中，输入和输出都是一个序列，分别记为$ (\mathbf x_1,\dots,\mathbf x_m) $和$ (\mathbf y_1,\dots,\\ \mathbf y_m) $。它们都可以被看作是时序序列，其中每个时刻$ t $都对应一个输入$ \mathbf x_t $和输出$ \mathbf y_t $。循环神经网络的核心是{\small\sffamily\bfseries{循环单元}}（RNN Cell），它读入前一个时刻循环单元的输出和当前时刻的输入，生成当前时刻循环单元的输出。图\ref{fig:rnn-LM}展示了一个简单的循环单元结构，对于时刻$ t $，循环单元的输出被定义为：
 %公式--------------------------------------------------------------------
 \begin{eqnarray}
-\mathbf h_t&=&{\rm{tanh}}(\mathbf x_t\mathbf U+\mathbf h_{t-1}\mathbf W)
+\mathbf h_t&=&{\rm{Tanh}}(\mathbf x_t\mathbf U+\mathbf h_{t-1}\mathbf W)
 \label{eqa1.63}
 \end{eqnarray}
 %公式--------------------------------------------------------------------
-\noindent  其中，$ \mathbf h_t $表示$ t $时刻循环单元的输出，$ \mathbf h_{t-1} $表示$ t-1 $时刻循环单元的输出，$ \mathbf U $和$ \mathbf W $是模型的参数。可以看出，循环单元的结构其实很简单，只是一个对$ \mathbf h_{t-1} $和$ \mathbf x_t $的线性变换再加上一个tanh函数。通过读入上一时刻的输出，当前时刻可以访问以前的历史信息。这个过程可以循环执行，这样就完成了对所有历史信息的建模。$ \mathbf h_t $可以被看作是序列在$ t $时刻的一种表示，也可以被看作是网络的一个隐藏层。进一步，$ \mathbf h_t $可以被送入输出层，得到$ t $时刻的输出：
+\noindent  其中，$ \mathbf h_t $表示$ t $时刻循环单元的输出，$ \mathbf h_{t-1} $表示$ t-1 $时刻循环单元的输出，$ \mathbf U $和$ \mathbf W $是模型的参数。可以看出，循环单元的结构其实很简单，只是一个对$ \mathbf h_{t-1} $和$ \mathbf x_t $的线性变换再加上一个Tanh函数。通过读入上一时刻的输出，当前时刻可以访问以前的历史信息。这个过程可以循环执行，这样就完成了对所有历史信息的建模。$ \mathbf h_t $可以被看作是序列在$ t $时刻的一种表示，也可以被看作是网络的一个隐藏层。进一步，$ \mathbf h_t $可以被送入输出层，得到$ t $时刻的输出：
 %公式--------------------------------------------------------------------
 \begin{eqnarray}
 \mathbf Y_t&=&{\rm{Softmax}}(\mathbf h_t\mathbf V)
@@ -2000,7 +2013,7 @@ w_{t+1}&=&w_t-\frac{\eta}{\sqrt{z_t+\epsilon}} v_t
 \end{figure}
 %-------------------------------------------

-\parinterval  在传统的语言模型中，给定一个单词$ w_i $，其它单词对它的影响并没有显性的被建模。而在基于注意力机制的语言模型中，当前需要预测的单词会更加关注与该位置联系较大的单词。具体来说，注意力机制会计算位置$ i $与其它任意位置之间的相关度，称为{\small\sffamily\bfseries{注意力权重}}（Attention Weights），通过这个权重可以更多的使用与$ w_i $关联紧密的位置的信息。举个简单的例子，在``我 喜欢 学习 数学''这个句子中，我们需要预测``数学''这个词，通过注意力机制我们很可能知道``数学''与``学习''的联系更紧密，所以在预测过程中``学习''所占的权重会更大，预测结果会更加精确。
+\parinterval  在传统的语言模型中，给定一个单词$ w_i $，其他单词对它的影响并没有显性的被建模。而在基于注意力机制的语言模型中，当前需要预测的单词会更加关注与该位置联系较大的单词。具体来说，注意力机制会计算位置$ i $与其他任意位置之间的相关度，称为{\small\sffamily\bfseries{注意力权重}}（Attention Weights），通过这个权重可以更多的使用与$ w_i $关联紧密的位置的信息。举个简单的例子，在``我 喜欢 学习 数学''这个句子中，我们需要预测``数学''这个词，通过注意力机制我们很可能知道``数学''与``学习''的联系更紧密，所以在预测过程中``学习''所占的权重会更大，预测结果会更加精确。
 %--5.5.1.4语言模型的评价---------------------
 \subsubsection{（四）语言模型的评价}\index{Chapter5.5.1.4}

@@ -2019,7 +2032,7 @@ w_{t+1}&=&w_t-\frac{\eta}{\sqrt{z_t+\epsilon}} v_t
 %--5.5.2.1One-hot编码---------------------
 \subsubsection{（一）One-hot编码}\index{Chapter5.5.2.1}

-\parinterval  {\small\sffamily\bfseries{One-hot编码}}（也称{\small\sffamily\bfseries{独热编码}}）是传统的单词表示方法。One-hot编码把单词表示为词汇表大小的0-1向量，其中只有该词所对应的那一项是1，而其余所有项都是零。举个简单的例子，假如我们有一个词典，里面包含10k个单词，并进行编号。那么每个单词都可以表示为一个10k维的One-hot向量，它仅在对应编号那个维度为1，其它维度都为0，如图\ref{fig:one-hot}所示。
+\parinterval  {\small\sffamily\bfseries{One-hot编码}}（也称{\small\sffamily\bfseries{独热编码}}）是传统的单词表示方法。One-hot编码把单词表示为词汇表大小的0-1向量，其中只有该词所对应的那一项是1，而其余所有项都是零。举个简单的例子，假如我们有一个词典，里面包含10k个单词，并进行编号。那么每个单词都可以表示为一个10k维的One-hot向量，它仅在对应编号那个维度为1，其他维度都为0，如图\ref{fig:one-hot}所示。
 %----------------------------------------------
 % 图
 \begin{figure}[htp]
@@ -2109,7 +2122,7 @@ Jobs was the CEO of {\red{\underline{apple}}}.

 \parinterval  在自然语言处理中，{\small\sffamily\bfseries{句子表示模型}}是指把输入的句子进行分布式表示。不过表示的形式不一定是一个单独的向量。现在广泛使用的句子表示模型可以被描述为：给定一个输入的句子$ \{ w_1,\dots ,w_m\} $，得到一个表示序列$ \{ \mathbf h_1,\dots ,\mathbf h_m\} $，其中$ h_i $是句子在第$ i $个位置的表示结果。$ \{ \mathbf h_1,\dots ,\mathbf h_m\} $就被看作是{\small\sffamily\bfseries{句子的表示}}，它可以被送入下游模块。比如，在机器翻译任务中，可以用这种模型表示源语言句子，然后通过这种表示结果进行目标语译文的生成；在序列标注（如词性标注）任务中，可以对输入的句子进行表示，然后在这个表示之上构建标签预测模块。很多自然语言处理任务都可以用句子表示模型进行建模，因此句子的表示模型也是应用最广泛的深度学习模型之一。而学习这种表示的过程也被称作{\small\sffamily\bfseries{表示学习}}（Representation Learning）。

-\parinterval  句子表示模型有两种训练方法。最简单的方法是把它作为目标系统中的一个模块进行训练，比如把句子表示模型作为机器翻译系统的一部分。也就是，我们并不单独训练句子表示模型，而是把它作为一个内部模块放到其它系统中。另一种方法是把句子表示作为独立的模块，用外部系统进行训练，之后把训练好的表示模型放入目标系统中，再进行微调。这种方法构成了一种新的范式：预训练+微调（pre-training + fine-tuning）。图\ref{fig:model-training}对比了这两种不同的方法。
+\parinterval  句子表示模型有两种训练方法。最简单的方法是把它作为目标系统中的一个模块进行训练，比如把句子表示模型作为机器翻译系统的一部分。也就是，我们并不单独训练句子表示模型，而是把它作为一个内部模块放到其他系统中。另一种方法是把句子表示作为独立的模块，用外部系统进行训练，之后把训练好的表示模型放入目标系统中，再进行微调。这种方法构成了一种新的范式：预训练+微调（pre-training + fine-tuning）。图\ref{fig:model-training}对比了这两种不同的方法。
 %----------------------------------------------
 % 图
 \begin{figure}[htp]
@@ -2217,69 +2230,3 @@ Jobs was the CEO of {\red{\underline{apple}}}.
 \item 预训练是表示学习的重要产物。预训练已经在图像处理等领域得到应用。在自然语言处理中，以BERT为代表的预训练模型席卷了很多自然语言处理任务，在阅读理解等比赛（如Stanford Question Answering）中已经成为了所有参赛系统的标配。除了ELMO、GPT、BERT，还有很多优秀的预训练模型，包括GPT-2\cite{radford2019language}、XLM\cite{lample2019cross}、MASS\cite{song2019mass}、XLNet\cite{yang2019xlnet}，等等。但是，预训练往往依赖大规模的数据和并行运算设备，这使得很多普通研究者对训练这样的模型望而却步。不过，也有一些研究关注轻量的预训练方法，也受到了很多关注，例如ALBERT\cite{lan2019albert}。
 \end{itemize}
 \vspace{0.5em}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
--- a/Book/Chapter6/Chapter6.tex
+++ b/Book/Chapter6/Chapter6.tex
@@ -15,7 +15,7 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \chapter{神经机器翻译模型}

-\parinterval \textbf{神经机器翻译}（Neural Machine Translation）是机器翻译的前沿方法。近几年，随着深度学习技术的发展和在各领域中的深入应用，基于端到端表示学习的方法正在改变着我们处理自然语言的方式，神经机器翻译在这种趋势下应运而生。一方面，神经机器翻译仍然延续着统计建模和基于数据驱动的思想，因此在基本问题的定义上与前人的研究是一致的；另一方面，神经机器翻译脱离了统计机器翻译中对隐含翻译结构的假设，同时使用分布式表示来对文字序列进行建模，这使得它可以从一个全新的视角看待翻译问题。现在，神经机器翻译已经成为了机器翻译研究及应用的热点，译文质量得到了巨大的提升。在本章中，我们将对神经机器翻译的基础模型和方法进行介绍。
+\parinterval {\small\sffamily\bfseries{神经机器翻译}}（Neural Machine Translation）是机器翻译的前沿方法。近几年，随着深度学习技术的发展和在各领域中的深入应用，基于端到端表示学习的方法正在改变着我们处理自然语言的方式，神经机器翻译在这种趋势下应运而生。一方面，神经机器翻译仍然延续着统计建模和基于数据驱动的思想，因此在基本问题的定义上与前人的研究是一致的；另一方面，神经机器翻译脱离了统计机器翻译中对隐含翻译结构的假设，同时使用分布式表示来对文字序列进行建模，这使得它可以从一个全新的视角看待翻译问题。现在，神经机器翻译已经成为了机器翻译研究及应用的热点，译文质量得到了巨大的提升。在本章中，我们将对神经机器翻译的基础模型和方法进行介绍。
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{神经机器翻译的发展简史}\index{Chapter6.1}%Index的作用，目前不清晰

@@ -113,7 +113,7 @@
 \end{figure}
 %----------------------------------------------

-\parinterval  除了上面例子中展示的流畅度和准确度外，神经机器翻译在其它评价指标上的表现也全面超越统计机器翻译\cite{Bentivogli2016NeuralVP}。比如，在IWSLT 2015英语-德语任务中，与三个最先进的统计机器翻译系统（PBSY、HPB、SPB）相比，神经机器翻译系统的mTER得分在不同长度句子上都得到了明显的下降，如图\ref{fig:6-3}\footnote{mTER是一种错误率度量，值越低表明译文越好。}。其次，神经机器翻译的单词形态错误率和单词词义错误率都远低于统计机器翻译系统（表\ref{tab:HTER} ）。
+\parinterval  除了上面例子中展示的流畅度和准确度外，神经机器翻译在其他评价指标上的表现也全面超越统计机器翻译\cite{Bentivogli2016NeuralVP}。比如，在IWSLT 2015英语-德语任务中，与三个最先进的统计机器翻译系统（PBSY、HPB、SPB）相比，神经机器翻译系统的mTER得分在不同长度句子上都得到了明显的下降，如图\ref{fig:6-3}\footnote{mTER是一种错误率度量，值越低表明译文越好。}。其次，神经机器翻译的单词形态错误率和单词词义错误率都远低于统计机器翻译系统（表\ref{tab:HTER} ）。

 %----------------------------------------------
 % 表
@@ -361,12 +361,12 @@ NMT                     & $ 21.7^{\ast}$          & $18.7^{\ast}$           & -1
 \end{figure}
 %----------------------------------------------

-\parinterval 本章将会从基于循环神经网络的翻译模型和注意力机制入手，介绍神经机器翻译的基本方法。同时也会以GNMT系统为例，对神经机器翻译的其它相关技术进行讨论。
+\parinterval 本章将会从基于循环神经网络的翻译模型和注意力机制入手，介绍神经机器翻译的基本方法。同时也会以GNMT系统为例，对神经机器翻译的其他相关技术进行讨论。
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{建模}\index{Chapter6.3.1}
 \label{sec:6.3.1}

-\parinterval 同大多数自然语言处理任务一样，神经机器翻译要解决的一个基本问题是如何描述文字序列，称为序列表示问题。例如，处理语音数据、文本数据都可以被看作是典型的序列表示问题。如果把一个序列看作一个时序上的一系列变量，不同时刻的变量之间往往是存在相关性的。也就是说，一个时序中某个时刻变量的状态会依赖其它时刻变量的状态，即上下文的语境信息。下面是一个简单的例子，假设有一个句子，但是最后两个单词被擦掉了，我们如何猜测被擦掉的单词是什么？
+\parinterval 同大多数自然语言处理任务一样，神经机器翻译要解决的一个基本问题是如何描述文字序列，称为序列表示问题。例如，处理语音数据、文本数据都可以被看作是典型的序列表示问题。如果把一个序列看作一个时序上的一系列变量，不同时刻的变量之间往往是存在相关性的。也就是说，一个时序中某个时刻变量的状态会依赖其他时刻变量的状态，即上下文的语境信息。下面是一个简单的例子，假设有一个句子，但是最后两个单词被擦掉了，我们如何猜测被擦掉的单词是什么？

 \begin{example}
 \quad
@@ -379,7 +379,7 @@ NMT                     & $ 21.7^{\ast}$          & $18.7^{\ast}$           & -1

 \parinterval 显然，根据上下文中提到的``没吃饭''、``很饿''，最佳的答案是``吃 饭''或者``吃 东西''。也就是，对序列中某个位置的答案进行预测时我们需要记忆当前时刻之前的序列信息，因此，循环神经网络（Recurrent Neural Network, RNN）应运而生。实际上循环神经网络有着极为广泛的应用，例如语音识别、语言建模以及我们要介绍的神经机器翻译。

-\parinterval 第五章已经对循环神经网络的基本知识进行过介绍。这里再简单回顾一下。简单来说，循环神经网络由循环单元组成。对于序列中的任意时刻，都有一个循环单元与之对应，它会融合当前时刻的输入和上一时刻循环单元的输出，生成当前时刻的输出。这样每个时刻的信息都会被传递到下一时刻，这也间接达到了记录历史信息的目的。比如，对于序列$\mathbf{x}=\{x_1, x_2,…, x_m\}$，循环神经网络的会按顺序输出一个序列$\mathbf{h}=\{ \mathbf{h}_1, \mathbf{h}_2,…, \mathbf{h}_m \}$，其中$\mathbf{h}_i$表示$i$时刻循环神经网络的输出（通常为一个向量）。
+\parinterval 第五章已经对循环神经网络的基本知识进行过介绍。这里再简单回顾一下。简单来说，循环神经网络由循环单元组成。对于序列中的任意时刻，都有一个循环单元与之对应，它会融合当前时刻的输入和上一时刻循环单元的输出，生成当前时刻的输出。这样每个时刻的信息都会被传递到下一时刻，这也间接达到了记录历史信息的目的。比如，对于序列$\mathbf{x}=\{x_1, x_2,..., x_m\}$，循环神经网络的会按顺序输出一个序列$\mathbf{h}=\{ \mathbf{h}_1, \mathbf{h}_2,..., \mathbf{h}_m \}$，其中$\mathbf{h}_i$表示$i$时刻循环神经网络的输出（通常为一个向量）。

 \parinterval 图\ref{fig:6-8}展示了一个循环神经网络处理序列问题的实例。当前时刻循环单元的输入由上一个时刻的输入和当前时刻的输入组成，因此也可以理解为，网络当前时刻计算得到的输出是由之前的序列共同决定的，即网络在不断的传递信息的过程中记忆了历史信息。以最后一个时刻的循环单元为例，它在对``开始''这个单词的信息进行处理时，参考了之前所有词（``<eos> 让 我们''）的信息。

@@ -411,14 +411,14 @@ NMT                     & $ 21.7^{\ast}$          & $18.7^{\ast}$           & -1
 \label{eqC6.1}
 \end{eqnarray}

-\noindent 在这里，我们用$\mathbf{x}=\{ x_1,x_2,…, x_m \}$表示输入的源语言单词序列，$\mathbf{y}=\{ y_1,y_2,…, y_n \}$ 表示生成的目标语单词序列。由于神经机器翻译在生成译文时采用的是自左向右逐词生成的方式，并在翻译每个单词时会考虑已经生成的翻译结果，因此对$\textrm{P} (\mathbf{y} | \mathbf{x})$的求解可以转换为：
+\noindent 在这里，我们用$\mathbf{x}=\{ x_1,x_2,..., x_m \}$表示输入的源语言单词序列，$\mathbf{y}=\{ y_1,y_2,..., y_n \}$ 表示生成的目标语单词序列。由于神经机器翻译在生成译文时采用的是自左向右逐词生成的方式，并在翻译每个单词时会考虑已经生成的翻译结果，因此对$\textrm{P} (\mathbf{y} | \mathbf{x})$的求解可以转换为：
 %-----------------
 \begin{eqnarray}
 \textrm{P} (\mathbf{y} | \mathbf{x}) = \prod_{j=1}^{n} \textrm{P} ( y_j | \mathbf{y}_{<j }, \mathbf{x}  )
 \label{eqC6.2}
 \end{eqnarray}

-\noindent 其中，$ \mathbf{y}_{<j }$表示目标语第$j$个位置之前已经生成的译文单词序列。$\textrm{P} ( y_j | \mathbf{y}_{<j }, \mathbf{x})$可以被解释为，根据源语句子$\mathbf{x} $和已生成的目标语言译文片段$\mathbf{y}_{<j }=\{ y_1, y_2,…, y_{j-1} \}$,生成第$j$个目标语言单词$y_j$的概率。举个简单的例子，已知源文为$\mathbf{x} =$\{\textrm{我，很好}\}，则译文$\mathbf{y}=$\{I’m，fine\}的概率为:
+\noindent 其中，$ \mathbf{y}_{<j }$表示目标语第$j$个位置之前已经生成的译文单词序列。$\textrm{P} ( y_j | \mathbf{y}_{<j }, \mathbf{x})$可以被解释为，根据源语句子$\mathbf{x} $和已生成的目标语言译文片段$\mathbf{y}_{<j }=\{ y_1, y_2,..., y_{j-1} \}$,生成第$j$个目标语言单词$y_j$的概率。举个简单的例子，已知源文为$\mathbf{x} =$\{\textrm{我，很好}\}，则译文$\mathbf{y}=$\{I’m，fine\}的概率为:
 %-------------
 \begin{eqnarray}
 \textrm{P} ( \{{\textrm{I'm,fine}}\}|\{\textrm{我，很好}\}) = \textrm{P} (\textrm{I'm}| \{\textrm{我，很好}\} ) \cdot          \textrm{P} (\textrm{fine}|\textrm{I'm},\{\textrm{我，很好}\})
@@ -428,17 +428,17 @@ NMT                     & $ 21.7^{\ast}$          & $18.7^{\ast}$           & -1
 \parinterval 求解$\textrm{P}(y_j | \mathbf{y}_{<j},\mathbf{x})$有三个关键问题（图\ref{fig:6-10}）：

 \begin{itemize}
-\item	如何对$\mathbf{x}$和$\mathbf{y}_{<j }$进行分布式表示，即\textbf{词嵌入}问题。首先，将由one-hot向量表示的源语言单词，即由0,1表示的离散化向量表示，转化为实数向量。我们把这个过程记为$\textrm{e}_x (\cdot)$。类似的，可以把目标语序列$\mathbf{y}_{<j }$中的每个单词用同样的方式进行表示，记为$\textrm{e}_y (\cdot)$。
+\item	如何对$\mathbf{x}$和$\mathbf{y}_{<j }$进行分布式表示，即{\small\sffamily\bfseries{词嵌入}}（Word Embedding）。首先，将由one-hot向量表示的源语言单词，即由0,1表示的离散化向量表示，转化为实数向量。我们把这个过程记为$\textrm{e}_x (\cdot)$。类似的，可以把目标语序列$\mathbf{y}_{<j }$中的每个单词用同样的方式进行表示，记为$\textrm{e}_y (\cdot)$。

-\item	如何在词嵌入的基础上获取整个序列的表示，即句子的\textbf{表示学习}问题。我们可以把词嵌入的序列作为循环神经网络的输入，循环神经网络最后一个时刻的输出向量便是整个句子的表示结果。如图\ref{fig:6-10}中，编码器最后一个循环单元的输出$\mathbf{h}_m$被看作是一种包含了源语句子信息的表示结果，记为$\mathbf{C}$。
+\item	如何在词嵌入的基础上获取整个序列的表示，即句子的{\small\sffamily\bfseries{表示学习}}（Representation Learning）。我们可以把词嵌入的序列作为循环神经网络的输入，循环神经网络最后一个时刻的输出向量便是整个句子的表示结果。如图\ref{fig:6-10}中，编码器最后一个循环单元的输出$\mathbf{h}_m$被看作是一种包含了源语句子信息的表示结果，记为$\mathbf{C}$。

-\item	如何得到每个目标语单词的概率，即\textbf{译文单词生成}问题。与神经语言模型一样，我们可以用一个Softmax输出层来获取当前时刻所有单词的分布，即利用Softmax函数计算目标语词表中每个单词的概率。令目标语序列$j$时刻的循环神经网络的输出向量（或状态）为$\mathbf{s}_j$。根据循环神经网络的性质，$ y_j$的生成只依赖前一个状态$\mathbf{s}_{j-1}$和当前时刻的输入（即词嵌入$\textrm{e}_y (y_{j-1})$）。同时考虑源语言信息$\mathbf{C}$，$\textrm{P}(y_j  | \mathbf{y}_{<j},\mathbf{x})$可以被重新定义为：
+\item	如何得到每个目标语单词的概率，即译文单词{\small\sffamily\bfseries{生成}}（Generation）。与神经语言模型一样，我们可以用一个Softmax输出层来获取当前时刻所有单词的分布，即利用Softmax函数计算目标语词表中每个单词的概率。令目标语序列$j$时刻的循环神经网络的输出向量（或状态）为$\mathbf{s}_j$。根据循环神经网络的性质，$ y_j$的生成只依赖前一个状态$\mathbf{s}_{j-1}$和当前时刻的输入（即词嵌入$\textrm{e}_y (y_{j-1})$）。同时考虑源语言信息$\mathbf{C}$，$\textrm{P}(y_j  | \mathbf{y}_{<j},\mathbf{x})$可以被重新定义为：
 \begin{eqnarray}
 \textrm{P} (y_j | \mathbf{y}_{<j},\mathbf{x}) \equiv \textrm{P} ( {y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}} )
 \label{eqC6.4}
 \end{eqnarray}

-$\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现，Softmax的输入是循环神经网络$j$时刻的输出。在具体实现时，$\mathbf{C}$可以被简单的作为第一个时刻循环单元的输入，即，当$j=1$ 时，解码器的循环神经网络会读入编码器最后一个隐层状态$ \mathbf{h}_m$（也就是$\mathbf{C}$），而其它时刻的隐层状态不直与$\mathbf{C}$相关。最终，$\textrm{P} (y_j | \mathbf{y}_{<j},\mathbf{x})$ 被表示为：
+$\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现，Softmax的输入是循环神经网络$j$时刻的输出。在具体实现时，$\mathbf{C}$可以被简单的作为第一个时刻循环单元的输入，即，当$j=1$ 时，解码器的循环神经网络会读入编码器最后一个隐层状态$ \mathbf{h}_m$（也就是$\mathbf{C}$），而其他时刻的隐层状态不直与$\mathbf{C}$相关。最终，$\textrm{P} (y_j | \mathbf{y}_{<j},\mathbf{x})$ 被表示为：
 \begin{eqnarray}
 \textrm{P} (y_j | \mathbf{y}_{<j},\mathbf{x}) \equiv
 \left \{ \begin{array}{ll}
@@ -465,7 +465,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现，Sof
 \subsection{输入（词嵌入）及输出（Softmax）}\index{Chapter6.3.2}
 \label{sec:6.3.2}

-\parinterval 由公式\ref{eqC6.2}可知，神经机器翻译系统在运行时需要两个输入，一个是源语言单词序列$\mathbf{x}$，和目标语单词序列$\mathbf{y}_{<j}$（到$j$时刻为止）。因此，第一步我们需要把单词的离散化表示转化为神经网络容易处理的分布式连续表示，即词嵌入。而我们也会把词嵌入的结果作为循环神经网络的输入层。词嵌入的概念已经在第五章神经语言模型的部分进行过详细介绍。以解码端为例，传统方法中每个目标语言单词都对应目标语言词表中的一个索引项，可以用one-hot向量表示。one-hot向量的维度和词表大小相同，但是只有单词所对应的那一维的值为1，其它为均为0。例如，词表中包含三个单词，则它们的one-hot表示分别为[0,0,1]，[0,1,0]，[1,0,0]。词嵌入的目的是把这种one-hot表示转化为一个实数向量，向量的每一维都对应这个单词的某种``属性''。由于是实数向量，这些属性是可以直接进行代数运算的。相比one-hot表示中所有向量都是正交的，词嵌入表示可以更容易描述不同单词间的关系，而不是简单的进行非零0即1的判断。比如，词嵌入表示中的著名例子``queen''=``woman''-``man''+``king''就能在一定程度上说明这个问题。
+\parinterval 由公式\ref{eqC6.2}可知，神经机器翻译系统在运行时需要两个输入，一个是源语言单词序列$\mathbf{x}$，和目标语单词序列$\mathbf{y}_{<j}$（到$j$时刻为止）。因此，第一步我们需要把单词的离散化表示转化为神经网络容易处理的分布式连续表示，即词嵌入。而我们也会把词嵌入的结果作为循环神经网络的输入层。词嵌入的概念已经在第五章神经语言模型的部分进行过详细介绍。以解码端为例，传统方法中每个目标语言单词都对应目标语言词表中的一个索引项，可以用one-hot向量表示。one-hot向量的维度和词表大小相同，但是只有单词所对应的那一维的值为1，其他为均为0。例如，词表中包含三个单词，则它们的one-hot表示分别为[0,0,1]，[0,1,0]，[1,0,0]。词嵌入的目的是把这种one-hot表示转化为一个实数向量，向量的每一维都对应这个单词的某种``属性''。由于是实数向量，这些属性是可以直接进行代数运算的。相比one-hot表示中所有向量都是正交的，词嵌入表示可以更容易描述不同单词间的关系，而不是简单的进行非零0即1的判断。比如，词嵌入表示中的著名例子``queen''=``woman''-``man''+``king''就能在一定程度上说明这个问题。

 \parinterval 那么怎么在神经机器翻译系统中获得单词的词嵌入表示呢？这里我们引入一个词嵌入层对输入的单词进行词嵌入表示，即图\ref{fig:6-11}中的绿色方框部分。假设输入的单词$y_j$已经被表示为one-hot形式（行向量）。词嵌入层的工作就是把one-hot向量右乘一个实数矩阵$\mathbf{E}$，得到的结果（行向量）就是这个单词所对应的词嵌入结果。
 \begin{eqnarray}
@@ -473,7 +473,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现，Sof
 \label{eqC6.6}
 \end{eqnarray}

-\noindent 这里，$\mathbf{E}$也被称作词嵌入矩阵，它可以作为模型的一部分参数共同参与机器翻译系统的训练，也可以由外部其它模块训练得到（如预训练模型）。$\mathbf{E}$的大小为$|V| \times d$，这里$|V|$表示词表$V$的大小，$d$表示循环神经网络输入和输出向量的维度。
+\noindent 这里，$\mathbf{E}$也被称作词嵌入矩阵，它可以作为模型的一部分参数共同参与机器翻译系统的训练，也可以由外部其他模块训练得到（如预训练模型）。$\mathbf{E}$的大小为$|V| \times d$，这里$|V|$表示词表$V$的大小，$d$表示循环神经网络输入和输出向量的维度。

 \parinterval 图\ref{fig:6-11}以单词``you''为例，展示了词嵌入的生成过程。词嵌入层首先将输入的单词``you''转化成one-hot表示，对应虚线框中的0-1向量，即除了you在词表中的索引位置为1，其余位置均为0。然后词嵌入层将这个0-1向量乘以$\mathbf{E}$就得到了词嵌入的结果（绿色圆角框框起来部分），这个过程我们用$\textrm{e}_y (\cdot)$表示，即you的词嵌入表示$\textrm{e}_y (``\textrm{you}'')$。最后，将单词的词嵌入表示作为当前时间步循环单元（蓝色方框）的输入。

@@ -503,11 +503,11 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现，Sof

 \parinterval 输出层的构造很简单，对于输入的向量$\mathbf{s}_j$经过一个线性变换之后再经过Softmax\\函数，即可得到一个$V$上的分布，具体描述如下：
 \begin{eqnarray}
-\mathbf{o}_j=\textrm{Softmax}( \mathbf{s}_j \mathbf{W_o})
+\mathbf{o}_j=\textrm{Softmax}( \mathbf{s}_j \mathbf{W}_o)
 \label{eqC6.7}
 \end{eqnarray}

-\noindent 其中，$\mathbf{W_o}$是线性变换的参数矩阵，矩阵的大小为$d \times |V|$，也就是$d$维的向量会变为$|V|$维的向量；$\mathbf{o}_j$表示输出的结果向量，$\mathbf{o}_j$的每一维$\mathbf{o}_{jk}$表示，在时刻$j$词表$V$中一个第$k$个单词出现的概率。这里我们把$\mathbf{o}_j(y_j)$记作目标语单词$y_j$的生成概率，显然有
+\noindent 其中，$\mathbf{W_o} $是线性变换的参数矩阵，矩阵的大小为$d \times |V|$，也就是$d$维的向量会变为$|V|$维的向量；$\mathbf{o}_j$表示输出的结果向量，$\mathbf{o}_j$的每一维$\mathbf{o}_{jk}$表示，在时刻$j$词表$V$中一个第$k$个单词出现的概率。这里我们把$\mathbf{o}_j(y_j)$记作目标语单词$y_j$的生成概率，显然有
 \begin{eqnarray}
 \textrm{P} (y_j| \mathbf{y}_{<j},\mathbf{x})=\mathbf{o}_j(y_j)
 \label{eqC6.8}
@@ -556,7 +556,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现，Sof
 \parinterval 循环神经网络的核心是设计循环单元的结构。至今，研究人员已经提出了很多优秀的循环单元结构，这里将介绍其中三种基本结构：RNN，LSTM和GRU。LSTM\\和GRU是RNN的变体，在自然语言处理任务中得到了广泛的应用。
 %%%%%%%%%%%%%%%%%%
 \subsubsection{循环神经单元（RNN）}\index{Chapter6.3.3.1}
-\parinterval RNN（Recurrent Neural Network）是最原始的循环神经网络结构。在RNN中，对于序列$\mathbf{x}=\{ \mathbf{x}_1, \mathbf{x}_2,…,\mathbf{x}_m \}$，每个时刻$t$都对应一个循环单元，它的输出是一个向量$\mathbf{h}_t$，可以被描述为：
+\parinterval RNN（Recurrent Neural Network）是最原始的循环神经网络结构。在RNN中，对于序列$\mathbf{x}=\{ \mathbf{x}_1, \mathbf{x}_2,...,\mathbf{x}_m \}$，每个时刻$t$都对应一个循环单元，它的输出是一个向量$\mathbf{h}_t$，可以被描述为：
 \begin{eqnarray}
 \mathbf{h}_t=f(\mathbf{x}_t \mathbf{U}+\mathbf{h}_{t-1} \mathbf{W}+\mathbf{b})
 \label{eqC6.11}
@@ -589,7 +589,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现，Sof
 \parinterval LSTM的结构主要分为三个部分：

 \begin{itemize}
-\item \textbf{遗忘}。顾名思义，遗忘的目的是忘记一些历史，在LSTM中通过遗忘门实现，其结构如图\ref{fig:6-14}（a）所示。$\mathbf{x}_{t}$表示时刻$t$的输入向量，$\mathbf{h}_{t-1}$是时刻$t-1$的循环单元的输出，$\mathbf{x}_{t}$和$\mathbf{h}_{t-1}$都作为$t$时刻循环单元的输入。$\sigma$将对$\mathbf{x}_{t}$和$\mathbf{h}_{t-1}$进行筛选，以决定遗忘的信息，其计算公式如下：
+\item {\small\sffamily\bfseries{遗忘}}。顾名思义，遗忘的目的是忘记一些历史，在LSTM中通过遗忘门实现，其结构如图\ref{fig:6-14}（a）所示。$\mathbf{x}_{t}$表示时刻$t$的输入向量，$\mathbf{h}_{t-1}$是时刻$t-1$的循环单元的输出，$\mathbf{x}_{t}$和$\mathbf{h}_{t-1}$都作为$t$时刻循环单元的输入。$\sigma$将对$\mathbf{x}_{t}$和$\mathbf{h}_{t-1}$进行筛选，以决定遗忘的信息，其计算公式如下：
 \begin{eqnarray}
 \mathbf{f}_t=\sigma(\mathbf{W}_f [\mathbf{h}_{t-1},\mathbf{x}_{t}] + \mathbf{b}_f )
 \label{eqC6.12}
@@ -597,7 +597,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现，Sof

 这里，$\mathbf{W}_f$是权值，$\mathbf{b}_f$是偏置，$[\mathbf{h}_{t-1},\mathbf{x}_{t}]$表示两个向量的拼接。该公式可以解释为，对$[\mathbf{h}_{t-1},\mathbf{x}_{t}]$进行变换，并得到一个 的实数向量$\mathbf{f}_t$。$\mathbf{f}_t$的每一维都可以被理解为一个``门''，它决定可以有多少信息被留下（或遗忘）。

-\item \textbf{记忆更新}。首先，要生成当前时刻需要新增加的信息，该部分由输入门完成，其结构如图\ref{fig:6-14}（b）红色线部分，图中``$\bigotimes$''表示进行点乘操作。输入门的计算分为两部分，首先利用$\sigma$决定门控参数$\mathbf{i}_t$，然后通过Tanh函数得到新的信息$\hat{\mathbf{c}}_t$，具体公式如下：
+\item {\small\sffamily\bfseries{记忆更新}}。首先，要生成当前时刻需要新增加的信息，该部分由输入门完成，其结构如图\ref{fig:6-14}（b）红色线部分，图中``$\bigotimes$''表示进行点乘操作。输入门的计算分为两部分，首先利用$\sigma$决定门控参数$\mathbf{i}_t$，然后通过Tanh函数得到新的信息$\hat{\mathbf{c}}_t$，具体公式如下：
 \begin{eqnarray}
 \mathbf{i}_t = \sigma (\mathbf{W}_i [\mathbf{h}_{t-1},\mathbf{x}_{t}] + \mathbf{b}_i )
 \label{eqC6.13}
@@ -614,7 +614,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现，Sof
 \label{eqC6.15}
 \end{eqnarray}

-\item \textbf{输出}。该部分使用输出门计算最终的输出信息$\mathbf{h}_t$，其结构如图\ref{fig:6-14}（d）红色线部分所示。在输出门中，首先将$\mathbf{x}_t$和$\mathbf{h}_{t-1}$通过$\sigma$函数变换得到$\mathbf{o}_t$。其次，将上一步得到的新记忆信息$\mathbf{c}_t$通过Tanh函数进行变换，得到值范围在[-1，1]的向量。最后将这两部分进行点乘，具体公式如下：
+\item {\small\sffamily\bfseries{输出}}。该部分使用输出门计算最终的输出信息$\mathbf{h}_t$，其结构如图\ref{fig:6-14}（d）红色线部分所示。在输出门中，首先将$\mathbf{x}_t$和$\mathbf{h}_{t-1}$通过$\sigma$函数变换得到$\mathbf{o}_t$。其次，将上一步得到的新记忆信息$\mathbf{c}_t$通过Tanh函数进行变换，得到值范围在[-1，1]的向量。最后将这两部分进行点乘，具体公式如下：
 \begin{eqnarray}
 \mathbf{o}_t = \sigma (\mathbf{W}_o [\mathbf{h}_{t-1},\mathbf{x}_{t}] + \mathbf{b}_o )
 \label{eqC6.16}
@@ -715,7 +715,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现，Sof
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{注意力机制}\index{Chapter6.3.4}
 \label{sec:6.3.4}
-\parinterval 在第二章中我们提到过``上帝是不公平的''，这个观点主要是表达了：世界上事物之间的联系不是均匀的，有些事物之间的联系会很强，而其它的联系可能很弱。自然语言也完美的契合了这个观点。比如，再重新看一下前面提到的根据上下文补缺失单词的例子，
+\parinterval 在第二章中我们提到过``上帝是不公平的''，这个观点主要是表达了：世界上事物之间的联系不是均匀的，有些事物之间的联系会很强，而其他的联系可能很弱。自然语言也完美的契合了这个观点。比如，再重新看一下前面提到的根据上下文补缺失单词的例子，

 \begin{example}
 \quad
@@ -728,7 +728,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现，Sof

 \noindent 我们之所以能想到在横线处填``吃饭''、``吃东西''很有可能是因为看到了``没吃饭''、 ``很饿''等关键信息。也就是这些关键的片段对我们预测缺失的单词起着关键性作用。而预测``吃饭''与前文中的``中午''、``又''之间的联系似乎不那么紧密。也就是说，在我们在形成 ``吃饭''的逻辑时，在潜意识里会更注意``没吃饭''、``很饿''等关键信息。也就是我们的关注度并不是均匀的分布在整个句子上的。

-\parinterval 这个现象可以用注意力机制进行解释。注意力机制的概念来源于生物学的一些现象：当待接收的信息过多时，人类会选择性地关注部分信息而忽略其他信息。它在人类的视觉、听觉、嗅觉等方面均有体现，当我们在感受事物时，我们的大脑会自动过滤或衰减部分信息，仅关注其中少数几个部分。例如，当我们在看到图\ref{fig:6-19}时，往往不是``均匀地''看图像中的所有区域，我们可能最先注意到的是大狗头上带的的帽子，然后才会关注图片中其它部分。
+\parinterval 这个现象可以用注意力机制进行解释。注意力机制的概念来源于生物学的一些现象：当待接收的信息过多时，人类会选择性地关注部分信息而忽略其他信息。它在人类的视觉、听觉、嗅觉等方面均有体现，当我们在感受事物时，我们的大脑会自动过滤或衰减部分信息，仅关注其中少数几个部分。例如，当我们在看到图\ref{fig:6-19}时，往往不是``均匀地''看图像中的所有区域，我们可能最先注意到的是大狗头上带的的帽子，然后才会关注图片中其他部分。

 \parinterval 那么注意力机制和神经机器翻译又有什么关系呢？它可以帮助我们解决哪些神经机器翻译的缺陷呢？下面就一起来看一看。

@@ -751,7 +751,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现，Sof
 \item 此外，当生成某一个目标语单词时，我们并不是均匀的使用源语言句子中的单词信息。更普遍的情况是，我们会参考与这个目标语单词相对应的源语言单词进行翻译。这有些类似于词对齐的作用，即翻译是基于单词之间的某种对应关系。但是，使用单一的源语言表示根本无法区分源语言句子的不同部分，更不用说对源语言单词和目标语言单词之间的联系进行建模了。
 \end{itemize}

-\parinterval 看一个实际的翻译实例，如图\ref{fig:6-20}，目标语中的``very long''仅依赖于源文中的``很长''。这时如果将所有源语编码成一个固定的实数向量，``很长''的信息就很可能被其它词的信息淹没掉，而翻译``very long''时也无法区分不同源语单词的贡献。
+\parinterval 看一个实际的翻译实例，如图\ref{fig:6-20}，目标语中的``very long''仅依赖于源文中的``很长''。这时如果将所有源语编码成一个固定的实数向量，``很长''的信息就很可能被其他词的信息淹没掉，而翻译``very long''时也无法区分不同源语单词的贡献。

 %----------------------------------------------
 % 图3.10
@@ -763,7 +763,7 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现，Sof
 \end{figure}
 %----------------------------------------------

-\parinterval 显然，以上的问题的根本原因在于所使用的表示模型还比较``弱''。因此我们需要一个更强大的表示模型，在生成目标语单词时能够有选择的获取源语言句子中的更有用的部分。更准确的说，对于要生成的目标语单词，相关性更高的源语言片段应该在源语言句子的表示中体现出来，而不是将所有的源语言单词一视同仁。在神经机器翻译中引入注意力机制正是为了达到这个目的\cite{bahdanau2014neural}\cite{DBLP:journals/corr/LuongPM15}。实际上，除了机器翻译，注意力机制也被成功的应用于图像处理、语音识别、自然语言处理的其它任务。而正是注意力机制的引入，使得包括机器翻译在内很多自然语言处理系统得到了新的飞跃。
+\parinterval 显然，以上的问题的根本原因在于所使用的表示模型还比较``弱''。因此我们需要一个更强大的表示模型，在生成目标语单词时能够有选择的获取源语言句子中的更有用的部分。更准确的说，对于要生成的目标语单词，相关性更高的源语言片段应该在源语言句子的表示中体现出来，而不是将所有的源语言单词一视同仁。在神经机器翻译中引入注意力机制正是为了达到这个目的\cite{bahdanau2014neural}\cite{DBLP:journals/corr/LuongPM15}。实际上，除了机器翻译，注意力机制也被成功的应用于图像处理、语音识别、自然语言处理的其他任务。而正是注意力机制的引入，使得包括机器翻译在内很多自然语言处理系统得到了新的飞跃。

 \parinterval 神经机器翻译中的注意力机制并不复杂。对于每个目标语单词$y_j$，我们都生成一个源语言表示向量$\mathbf{C}_j$与之对应，$\mathbf{C}_j$会包含生成$y_j$所需的源语言的信息，或者说$\mathbf{C}_j$是一种包含目标语言单词与源语言单词对应关系的源语言表示。相比用一个静态的表示$\mathbf{C}$，注意机制使用的是动态的表示$\mathbf{C}_j$。$\mathbf{C}_j$也被称作对于目标语位置$i$的上下文向量。图\ref{fig:6-21}对比了未引入注意力机制和引入了注意力机制的编码器-解码器结构。可以看出，在注意力模型中，对于每一个目标单词的生成，都会额外引入一个单独的上下文向量参与运算。

@@ -781,13 +781,13 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现，Sof

 \parinterval 那么注意力机制是如何针对不同单词生成不同的上下文向量的呢？这里，我们可以将注意力机制看做是一种对接收到的信息的加权处理。对于更重要的信息赋予更高的权重即更高的关注度，对于贡献度较低的信息分配较低的权重，弱化其对结果的影响。这样，$\mathbf{C}_j$可以包含更多对当前目标语言位置有贡献的源语言片段的信息。

-\parinterval 根据这种思想，上下文向量$\mathbf{C}_j$被定义为对不同时间步编码器输出的状态序列$\{ \mathbf{h}_1, \mathbf{h}_2,…,\mathbf{h}_m \}$进行加权求和，如下：
+\parinterval 根据这种思想，上下文向量$\mathbf{C}_j$被定义为对不同时间步编码器输出的状态序列$\{ \mathbf{h}_1, \mathbf{h}_2,...,\mathbf{h}_m \}$进行加权求和，如下：
 \begin{eqnarray}
 \mathbf{C}_j=\sum_{i} \alpha_{i,j} \mathbf{h}_i
 \label{eqC6.22}
 \end{eqnarray}

-\noindent 其中，$\alpha_{i,j}$是\textbf{注意力权重}，它表示目标语第$j$个位置与源语第$i$个位置之间的相关性大小。这里，我们将每个时间步编码器的输出$\mathbf{h}_i$看作源语言位置$i$的表示结果。进行翻译时，解码端可以根据当前的位置$j$，通过控制不同$\mathbf{h}_i$的权重得到$\mathbf{C}_j$，使得对目标语位置$j$贡献大的$\mathbf{h}_i$对$\mathbf{C}_j$的影响增大。也就是说，$\mathbf{C}_j$实际上就是\{${\mathbf{h}_1, \mathbf{h}_2,…,\mathbf{h}_m}$\}的一种组合，只不过不同的$\mathbf{h}_i$会根据对目标端的贡献给予不同的权重。图\ref{fig:6-22}展示了上下文向量$\mathbf{C}_j$的计算过程。
+\noindent 其中，$\alpha_{i,j}$是{\small\sffamily\bfseries{注意力权重}}，它表示目标语第$j$个位置与源语第$i$个位置之间的相关性大小。这里，我们将每个时间步编码器的输出$\mathbf{h}_i$看作源语言位置$i$的表示结果。进行翻译时，解码端可以根据当前的位置$j$，通过控制不同$\mathbf{h}_i$的权重得到$\mathbf{C}_j$，使得对目标语位置$j$贡献大的$\mathbf{h}_i$对$\mathbf{C}_j$的影响增大。也就是说，$\mathbf{C}_j$实际上就是\{${\mathbf{h}_1, \mathbf{h}_2,...,\mathbf{h}_m}$\}的一种组合，只不过不同的$\mathbf{h}_i$会根据对目标端的贡献给予不同的权重。图\ref{fig:6-22}展示了上下文向量$\mathbf{C}_j$的计算过程。

 %----------------------------------------------
 % 图3.10
@@ -810,11 +810,11 @@ $\textrm{P}({y_j | \mathbf{s}_{j-1} ,y_{j-1},\mathbf{C}})$由Softmax实现，Sof

 $\textrm{a}(\cdot)$可以被看作是目标语表示和源语言表示的一种``统一化''，即把源语和目标语表示映射在同一个语义空间，进而语义相近的内容有更大的相似性。该函数有多种计算方式，比如，向量乘、向量夹角、单词神经网络等，如下：
 \begin{eqnarray}
-\textrm{a} (\textbf{s},\textbf{h}) =  \left\{ \begin{array}{ll}
-    \textbf{s} \textbf{h}^{\textrm{T}} & \textrm{向量乘} \\
-    \textrm{cos}(\textbf{s}, \textbf{h}) & \textrm{向量夹角} \\
-    \textbf{s} \textbf{W} \textbf{h}^{\textrm{T}} & \textrm{线性模型} \\
-    \textrm{TanH}(\textbf{W}[\textbf{s},\textbf{h}])\textbf{v}^{\textrm{T}} & \textrm{拼接}[\textbf{s},\textbf{h}]+\textrm{单层网络}
+\textrm{a} (\mathbf{s},\mathbf{h}) =  \left\{ \begin{array}{ll}
+    \mathbf{s} \mathbf{h}^{\textrm{T}} & \textrm{向量乘} \\
+    \textrm{cos}(\mathbf{s}, \mathbf{h}) & \textrm{向量夹角} \\
+    \mathbf{s} \mathbf{W} \mathbf{h}^{\textrm{T}} & \textrm{线性模型} \\
+    \textrm{TanH}(\mathbf{W}[\mathbf{s},\mathbf{h}])\mathbf{v}^{\textrm{T}} & \textrm{拼接}[\mathbf{s},\mathbf{h}]+\textrm{单层网络}
    \end{array}
    \right.
 \label{eqC6.24}
@@ -937,14 +937,14 @@ $\textrm{a}(\cdot)$可以被看作是目标语表示和源语言表示的一种`
 \parinterval 将公式\ref{eqC6.29}应用于神经机器翻译有几个基本问题需要考虑：1）损失函数的选择；2）参数初始化的策略，也就是如何设置$\mathbf{w}_0$；3）优化策略和学习率调整策略；4）训练加速。下面我们对这些问题进行讨论。
 %%%%%%%%%%%%%%%%%%
 \subsubsection{损失函数}\index{Chapter6.3.5.1}
-\parinterval 因为神经机器翻译在每个目标语位置都会输出一个概率分布，表示这个位置上不同单词出现的可能性，因此我们需要知道当前位置输出的分布相比于标准答案的``损失''。对于这个问题，常用的是交叉熵损失函数\footnote{\ \ 百度百科：\url{https://baike.baidu.com/item/\%E4\%BA\%A4\%E5\%8F\%89\%E7\%86\%B5/8983241?fr=aladdin}}。令$\mathbf{y}$表示机器翻译模型输出的分布，$\hat{\mathbf{y}}$表示标准答案，则交叉熵损失可以被定义为$L_{\textrm{ce}}(\mathbf{y},\hat{\mathbf{y}}) = - \sum_{k=1}^{|V|} \mathbf{y}[k] \textrm{log} (\hat{\mathbf{y}}[k])$，其中$\mathbf{y}[k]$ 和$\hat{\mathbf{y}}[k]$分别表示向量$\mathbf{y}$和$\hat{\mathbf{y}}$的第$k$维，$|V|$表示输出向量得维度（等于词表大小）。对于一个模型输出的概率分布$\mathbf{Y} = \{ \mathbf{y}_1,\mathbf{y}_2,…, \mathbf{y}_n \}$和标准答案分布$\hat{\mathbf{Y}}=\{ \hat{\mathbf{y}}_1, \hat{\mathbf{y}}_2,…,\hat{\mathbf{y}}_n \}$，损失函数可以被定义为
+\parinterval 因为神经机器翻译在每个目标语位置都会输出一个概率分布，表示这个位置上不同单词出现的可能性，因此我们需要知道当前位置输出的分布相比于标准答案的``损失''。对于这个问题，常用的是交叉熵损失函数\footnote{\ \ 百度百科：\url{https://baike.baidu.com/item/\%E4\%BA\%A4\%E5\%8F\%89\%E7\%86\%B5/8983241?fr=aladdin}}。令$\mathbf{y}$表示机器翻译模型输出的分布，$\hat{\mathbf{y}}$表示标准答案，则交叉熵损失可以被定义为$L_{\textrm{ce}}(\mathbf{y},\hat{\mathbf{y}}) = - \sum_{k=1}^{|V|} \mathbf{y}[k] \textrm{log} (\hat{\mathbf{y}}[k])$，其中$\mathbf{y}[k]$ 和$\hat{\mathbf{y}}[k]$分别表示向量$\mathbf{y}$和$\hat{\mathbf{y}}$的第$k$维，$|V|$表示输出向量得维度（等于词表大小）。对于一个模型输出的概率分布$\mathbf{Y} = \{ \mathbf{y}_1,\mathbf{y}_2,..., \mathbf{y}_n \}$和标准答案分布$\hat{\mathbf{Y}}=\{ \hat{\mathbf{y}}_1, \hat{\mathbf{y}}_2,...,\hat{\mathbf{y}}_n \}$，损失函数可以被定义为
 %-------------
 \begin{eqnarray}
 L(\mathbf{Y},\hat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\hat{\mathbf{y}}_j)
 \label{eqC6.30}
 \end{eqnarray}

-\parinterval 公式\ref{eqC6.30}是一种非常通用的损失函数形式，除了交叉熵，我们也可以使用其它的损失函数，这时只需要替换$L_{ce} (\cdot)$即可。这里使用交叉熵损失函数的好处在于，它非常容易优化，特别是与Softmax组合，其反向传播的实现非常高效。此外，交叉熵损失（在一定条件下）也对应了极大似然的思想，这种方法在自然语言处理中已经被证明是非常有效的。
+\parinterval 公式\ref{eqC6.30}是一种非常通用的损失函数形式，除了交叉熵，我们也可以使用其他的损失函数，这时只需要替换$L_{ce} (\cdot)$即可。这里使用交叉熵损失函数的好处在于，它非常容易优化，特别是与Softmax组合，其反向传播的实现非常高效。此外，交叉熵损失（在一定条件下）也对应了极大似然的思想，这种方法在自然语言处理中已经被证明是非常有效的。

 \parinterval 除了交叉熵，很多系统也使用了面向评价的损失函数，比如，直接利用评价指标BLEU定义损失函数。不过这类损失函数往往不可微分，因此无法直接获取梯度。这时可以引入强化学习技术，通过策略梯度等方法进行优化。不过这类方法需要采样等手段，这里不做重点讨论，相关内容会在后面前言技术部分进行介绍。
 %%%%%%%%%%%%%%%%%%
@@ -991,7 +991,7 @@ L(\mathbf{Y},\hat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\hat{
 \subsubsection{梯度裁剪}\index{Chapter6.3.5.4}
 \parinterval 需要注意的是，训练循环神经网络时，反向传播使得网络层之间的梯度重复相乘，在网络层数过深时，如果连乘因子小于1可能造成梯度指数级的减少，甚至趋近于0，导致网络无法优化，也就是梯度消失问题。当连乘因子大于1时，可能会导致梯度的乘积变得异常大，造成梯度爆炸的问题。在这种情况下需要使用``梯度裁剪''来防止梯度$\pi$超过阈值。具体公式如下：
 \begin{eqnarray}
-\mathbf{w}' = \mathbf{w} \cdot \frac{threshold} {\textrm{max}(threshold,\| \mathbf{w} \|_2)}
+\mathbf{w}' = \mathbf{w} \cdot \frac{\gamma} {\textrm{max}(\gamma,\| \mathbf{w} \|_2)}
 \label{eqC6.32}
 \end{eqnarray}

@@ -1110,7 +1110,7 @@ L(\mathbf{Y},\hat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\hat{

 \noindent 这里，$\{ \hat{y}_{j1},...,\hat{y}_{jk} \}$表示对于位置$j$翻译概率最大的$K$的单词，$\{ \hat{\mathbf{y}}_{<j^{\ast}} \}$表示前$j-1$步top-K单词组成的所有历史。${\hat{\mathbf{y}}_{<j^{\ast}}}$可以被看作是一个集合，里面每一个元素都是一个目标语单词序列，这个序列是前面生成的一系列top-K单词的某种组成。$\textrm{P}(y_j | \{ \hat{\mathbf{y}}_{<{j^{\textrm{*}}}} \},\mathbf{x})$表示基于\{$ \hat{\mathbf{y}}_{<j^{\ast}} $\}的某一条路径生成$y_j$的概率\footnote{严格来说，P$(y_j | {\hat{\mathbf{y}}_{<j^{\ast}} })$不是一个准确的数学表达，这里我们通过这种写法强调$y_j$是由\{$ \hat{\mathbf{y}}_{<j^{\ast}} $\}中的某个译文单词序列作为条件生成的。} 。这种方法也被称为束搜索（beam search），意思是搜索时始终考虑一个集束内的候选。

-\parinterval 不论是贪婪搜索还是束搜索都是一个自左向右的过程，也就是每个位置的处理需要等前面位置处理完才能执行。这是一种典型的自回归模型（autoregressive model），它通常用来描述时序上的随机过程，其中每一个时刻的结果对时序上其它部分的结果有依赖\cite{NIPS2017_7181}。相对应的，也有非自回归模型（non-autoregressive model），它消除了不同时刻结果之间的直接依赖\cite{Gu2017NonAutoregressiveNM}。由于自回归模型是当今神经机器翻译主流的推断方法，这里我们仍以自回归的贪婪搜索和束搜索为基础进行讨论。
+\parinterval 不论是贪婪搜索还是束搜索都是一个自左向右的过程，也就是每个位置的处理需要等前面位置处理完才能执行。这是一种典型的自回归模型（autoregressive model），它通常用来描述时序上的随机过程，其中每一个时刻的结果对时序上其他部分的结果有依赖\cite{NIPS2017_7181}。相对应的，也有非自回归模型（non-autoregressive model），它消除了不同时刻结果之间的直接依赖\cite{Gu2017NonAutoregressiveNM}。由于自回归模型是当今神经机器翻译主流的推断方法，这里我们仍以自回归的贪婪搜索和束搜索为基础进行讨论。
 %%%%%%%%%%%%%%%%%%
 \subsubsection{贪婪搜索}\index{Chapter6.3.6.1}
 \parinterval 图\ref{fig:6-31}展示了一个基于贪婪方法的神经机器翻译解码过程。每一个时间步的单词预测都依赖于其前一步单词的生成。在解码第一个单词时，由于没有之前的单词信息，会用<eos>进行填充，作为起始的单词，且会用一个零向量（可以理解为没有之前时间步的信息）表示第0步的中间层状态。
@@ -1140,15 +1140,15 @@ L(\mathbf{Y},\hat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\hat{
 \parinterval 贪婪搜索的优点在于速度快。在对翻译速度有较高要求的场景中，贪婪搜索是一种十分有效的加速系统的方法。而且贪婪搜索的原理非常简单，易于快速原型。不过，由于每一步只保留一个最好的局部结果，贪婪搜索往往会带来翻译品质上的损失。
 %%%%%%%%%%%%%%%%%%
 \subsubsection{束搜索}\index{Chapter6.3.6.2}
-\parinterval 束搜索是一种启发式图搜索算法。相比于全搜索，它可以减少搜索所占用的空间和时间，在每一步扩展的时候，剪掉一些质量比较差的结点，保留下一些质量较高的结点。具体到机器翻译任务，对于每一个目标语位置，束搜索选择了概率最大的前$K$个单词进行扩展（其中$K$叫做束宽度，或简称为束宽）。如图\ref{fig:6-33}所示，当$K=3$时，若令\{$y_1, y_2,…, y_n$\}表示生成的目标语序列，则束搜索的具体过程为：在预测第一个位置时，我们通过模型得到$y_1$的概率分布，选取概率最大的前3个单词作为候选结果（假设分别为``have'', ``has'', ``it''）。在预测第二个位置的单词时，模型针对已经得到的三个候选结果（``have'', ``has'', ``it''）计算第二个单词的概率分布。例如，我们可以在将``have''作为第二步的输入，计算$y_2$的概率分布。此时，译文序列的概率为
+\parinterval 束搜索是一种启发式图搜索算法。相比于全搜索，它可以减少搜索所占用的空间和时间，在每一步扩展的时候，剪掉一些质量比较差的结点，保留下一些质量较高的结点。具体到机器翻译任务，对于每一个目标语位置，束搜索选择了概率最大的前$K$个单词进行扩展（其中$K$叫做束宽度，或简称为束宽）。如图\ref{fig:6-33}所示，当$K=3$时，若令\{$y_1, y_2,..., y_n$\}表示生成的目标语序列，则束搜索的具体过程为：在预测第一个位置时，我们通过模型得到$y_1$的概率分布，选取概率最大的前3个单词作为候选结果（假设分别为``have'', ``has'', ``it''）。在预测第二个位置的单词时，模型针对已经得到的三个候选结果（``have'', ``has'', ``it''）计算第二个单词的概率分布。例如，我们可以在将``have''作为第二步的输入，计算$y_2$的概率分布。此时，译文序列的概率为
 %--------------------------------------------
 \begin{eqnarray}
 \textrm{P} (y_2,y_1 | \mathbf{x}) & = & \textrm{P} (y_2, \textrm{``have''} | \mathbf{x}) \nonumber \\
-								  & = & \textrm{P}(y_2 | \textrm{``have''} , \mathbf{x}) \textrm{P} (\textrm{``have''} | \mathbf{x})								
+								  & = & \textrm{P}(y_2 | \textrm{``have''} , \mathbf{x}) \cdot \textrm{P} (\textrm{``have''} | \mathbf{x})								
 \label{eqC6.36}
 \end{eqnarray}

-\noindent 类似的，对``has''和``it''进行同样的操作,分别计算得到$ \textrm{P} (y_2, \textrm{``have''} | \mathbf{x})$ ，$ \textrm{P} (y_2, \textrm{``has''} | \mathbf{x})$，\\ $ \textrm{P} (y_2, \textrm{``it''} | \mathbf{x})$，因为$y_2$对应$|V|$种可能，总共可以得到$3 \times |V|$种结果。然后从中选取使序列概率$\textrm{P}(y_2,y_1| \mathbf{x})$最大的前三个$y_2$作为新的输出结果，这样我们便得到了前两个位置的top-3译文。在预测其它位置时也是如此，不断重复此过程直到推断结束。可以看到，束搜索的搜索空间大小与束宽度有关，也就是：束宽度越大，搜索空间越大，更有可能搜索到质量更高的译文，但是搜索会更慢。束宽度等于3，意味着我们每次考虑三个最有可能的结果，贪婪搜索实际上便是集束宽度为1的情况。在神经机器翻译系统实现中，一般束宽度设置在4～8之间。
+\noindent 类似的，对``has''和``it''进行同样的操作,分别计算得到$ \textrm{P} (y_2, \textrm{``have''} | \mathbf{x})$ ，$ \textrm{P} (y_2, \textrm{``has''} | \mathbf{x})$，\\ $ \textrm{P} (y_2, \textrm{``it''} | \mathbf{x})$，因为$y_2$对应$|V|$种可能，总共可以得到$3 \times |V|$种结果。然后从中选取使序列概率$\textrm{P}(y_2,y_1| \mathbf{x})$最大的前三个$y_2$作为新的输出结果，这样我们便得到了前两个位置的top-3译文。在预测其他位置时也是如此，不断重复此过程直到推断结束。可以看到，束搜索的搜索空间大小与束宽度有关，也就是：束宽度越大，搜索空间越大，更有可能搜索到质量更高的译文，但是搜索会更慢。束宽度等于3，意味着我们每次考虑三个最有可能的结果，贪婪搜索实际上便是集束宽度为1的情况。在神经机器翻译系统实现中，一般束宽度设置在4～8之间。

 %----------------------------------------------
 % 图3.10
@@ -1173,7 +1173,7 @@ L(\mathbf{Y},\hat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\hat{
 \end{itemize}


-\parinterval 为了解决上面提到的问题，我们可以使用其它特征与$\textrm{log P}(\mathbf{y} | \mathbf{x})$一起组成新的模型得分$\textrm{score} ( \mathbf{y} , \mathbf{x})$。针对模型倾向于生成短句子的问题，常用的做法是引入惩罚机制。比如，可以定义一个惩罚因子，形式如下：
+\parinterval 为了解决上面提到的问题，我们可以使用其他特征与$\textrm{log P}(\mathbf{y} | \mathbf{x})$一起组成新的模型得分$\textrm{score} ( \mathbf{y} , \mathbf{x})$。针对模型倾向于生成短句子的问题，常用的做法是引入惩罚机制。比如，可以定义一个惩罚因子，形式如下：

 \begin{eqnarray}
 \textrm{lp}(\mathbf{y}) = \frac {(5+ |\mathbf{y}|)^{\alpha}} {(5+1)^{\alpha}}
@@ -1220,7 +1220,7 @@ L(\mathbf{Y},\hat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\hat{
 % 表
 \begin{table}[htp]
 \centering
-\caption{GNMT与其它翻译模型对比\cite{Wu2016GooglesNM}}
+\caption{GNMT与其他翻译模型对比\cite{Wu2016GooglesNM}}
 \label{tab:gnmt vs state-of-the-art models}
 \begin{tabular}{l l l l}
 \multicolumn{1}{l|}{\multirow{2}{*}{\#}} & \multicolumn{2}{c}{BLEU} & \multirow{2}{*}{CPU decoding time} \\
@@ -1249,7 +1249,7 @@ L(\mathbf{Y},\hat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\hat{
 \caption{ RNN、CNN、Transformer的对比\cite{NIPS2017_7181}  }
 \label{tab:rnn vs cnn vs trf}
 \begin{tabular}{l | l l l}
-%\rule{0pt}{20pt} \textbf{Layer Type} &\parbox{5em}{\textbf{Complexity per Layer} }&\parbox{6em}{\textbf{Sequential Operations} } &\parbox{6em}{\textbf{Maximum Path Length}}\\ \hline
+%\rule{0pt}{20pt} {\small\sffamily\bfseries{Layer Type}} &\parbox{5em}{\small\sffamily\bfseries{Complexity per Layer} }&\parbox{6em}{\small\sffamily\bfseries{Sequential Operations} } &\parbox{6em}{\small\sffamily\bfseries{Maximum Path Length}}\\ \hline
 \rule{0pt}{20pt} Layer Type & \begin{tabular}[l]{@{}l@{}}Complexity\\ per Layer\end{tabular} & \begin{tabular}[l]{@{}l@{}}Sequential\\ Operations\end{tabular} & \begin{tabular}[l]{@{}l@{}}Maximum\\ Path Length\end{tabular} \\ \hline
 \rule{0pt}{13pt}Self-Attention &$O(n^2\cdot d)$	&$O(1)$	&$O(1)$       \\
 \rule{0pt}{13pt}Recurrent &$O(n \cdot d^2)$		&$O(n)$	&$O(n)$ 	\\
@@ -1259,7 +1259,7 @@ L(\mathbf{Y},\hat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\hat{
 \end{table}
 %--------------------------------------

-\parinterval 在Transformer被推出之后，这个模型很快就席卷了整个自然语言处理领域。实际上，Transformer也可以当作一种表示模型，因此也被大量的使用在自然语言处理的其它领域，甚至图像处理和语音处理中也能看到它的影子。比如，目前非常流行的预训练模型BERT就是基于Transformer。表\ref{tab:performence form different models}展示了Transformer在机器翻译上的性能。它能用更少的计算量（FLOPS）达到比其它模型更好的翻译品质\footnote{FLOPS = floating-point operations per second，即每秒浮点运算次数。它是度量计算机运算规模的常用单位} 。
+\parinterval 在Transformer被推出之后，这个模型很快就席卷了整个自然语言处理领域。实际上，Transformer也可以当作一种表示模型，因此也被大量的使用在自然语言处理的其他领域，甚至图像处理和语音处理中也能看到它的影子。比如，目前非常流行的预训练模型BERT就是基于Transformer。表\ref{tab:performence form different models}展示了Transformer在机器翻译上的性能。它能用更少的计算量（FLOPS）达到比其他模型更好的翻译品质\footnote{FLOPS = floating-point operations per second，即每秒浮点运算次数。它是度量计算机运算规模的常用单位} 。

 %----------------------------------------------
 % 表
@@ -1269,12 +1269,12 @@ L(\mathbf{Y},\hat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\hat{
 \label{tab:performence form different models}
 \begin{tabular}{l l l l}

-\multicolumn{1}{l|}{\multirow{2}{*}{\#}} & \multicolumn{2}{c}{BLEU} & \multirow{2}{*}{\parbox{6em}{Training Cost(FLOPs)}} \\
+\multicolumn{1}{l|}{\multirow{2}{*}{\#}} & \multicolumn{2}{c}{BLEU} & \multirow{2}{*}{\parbox{6em}{Training Cost (FLOPs)}} \\
 \multicolumn{1}{l|}{}                    & EN-DE  & EN-FR  &                                       \\ \hline
 \multicolumn{1}{l|}{GNMT+RL}             & 24.6            & 39.92           & 1.4$\times 10^{20}$                   \\
 \multicolumn{1}{l|}{ConvS2S}             & 25.16           & 40.46           & 1.5$\times 10^{20}$                   \\
 \multicolumn{1}{l|}{MoE}                 & 26.03           & 40.56           & 1.2$\times 10^{20}$                   \\
-\multicolumn{1}{l|}{Transformer(Big)}    & \textbf{28.4}   & \textbf{41.8}   & 2.3$\times 10^{19}$                   \\
+\multicolumn{1}{l|}{Transformer(Big)}    & {\small\sffamily\bfseries{28.4}}   & {\small\sffamily\bfseries{41.8}}   & 2.3$\times 10^{19}$                   \\
 %\multicolumn{4}{l}{Transformer versus previous state-of-the-art models}
 \end{tabular}
 \end{table}
@@ -1284,7 +1284,7 @@ L(\mathbf{Y},\hat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\hat{
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{自注意力模型}\index{Chapter6.4.1}
 \label{sec:6.4.1}
-\parinterval 首先，让我们再回顾一下循环神经网络处理文字序列的过程。如图\ref{fig:6-34}所示，对于单词序列$\{ w_1,…,w_m \}$，处理第$m$个单词$w_m$时（绿色方框部分），需要输入前一时刻的信息（即处理单词$w_{m-1}$），而$w_{m-1}$又依赖于$w_{m-2}$，以此类推。也就是说，如果想建立$w_m$和$w_1$之间的关系，需要$m-1$次信息传递。对于长序列来说，词汇之间信息传递距离过长会导致信息在传递过程中丢失，同时这种按顺序建模的方式也使得系统对序列的处理十分缓慢。
+\parinterval 首先，让我们再回顾一下循环神经网络处理文字序列的过程。如图\ref{fig:6-34}所示，对于单词序列$\{ w_1,...,w_m \}$，处理第$m$个单词$w_m$时（绿色方框部分），需要输入前一时刻的信息（即处理单词$w_{m-1}$），而$w_{m-1}$又依赖于$w_{m-2}$，以此类推。也就是说，如果想建立$w_m$和$w_1$之间的关系，需要$m-1$次信息传递。对于长序列来说，词汇之间信息传递距离过长会导致信息在传递过程中丢失，同时这种按顺序建模的方式也使得系统对序列的处理十分缓慢。

 %----------------------------------------------
 % 图3.10
@@ -1296,7 +1296,7 @@ L(\mathbf{Y},\hat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\hat{
 \end{figure}
 %----------------------------------------------

-\parinterval 那么能否摆脱这种顺序传递信息的方式，直接对不同位置单词之间的关系进行建模，即将信息传递的距离拉近为1？\textbf{自注意力机制}（Self-Attention）的提出便有效解决了这个问题\cite{DBLP:journals/corr/LinFSYXZB17}。图\ref{fig:6-35}给出了自注意力机制对序列进行建模的示例。对于单词$w_m$，自注意力机制直接建立它与前$m-1$个单词之间的关系。也就是说，$w_m$与序列中所有其它单词的距离都是1。这种方式很好的解决了长距离依赖问题，同时由于单词之间的联系都是相互独立的，因此也大大提高了模型的并行度。
+\parinterval 那么能否摆脱这种顺序传递信息的方式，直接对不同位置单词之间的关系进行建模，即将信息传递的距离拉近为1？{\small\sffamily\bfseries{自注意力机制}}（Self-Attention）的提出便有效解决了这个问题\cite{DBLP:journals/corr/LinFSYXZB17}。图\ref{fig:6-35}给出了自注意力机制对序列进行建模的示例。对于单词$w_m$，自注意力机制直接建立它与前$m-1$个单词之间的关系。也就是说，$w_m$与序列中所有其他单词的距离都是1。这种方式很好的解决了长距离依赖问题，同时由于单词之间的联系都是相互独立的，因此也大大提高了模型的并行度。

 %----------------------------------------------
 % 图3.10
@@ -1330,9 +1330,9 @@ L(\mathbf{Y},\hat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\hat{
 \label{eqC6.40}
 \end{eqnarray}

-\parinterval 同理，也可以用同样的方法处理这个句子中的其它单词。可以看出，在注意力机制中，我们并不是使用类似于循环神经网络的记忆能力去访问历史信息。序列中所有单词之间的信息都是通过同一种操作（$\mathrm{query}$和$\mathrm{key}$的相关度）进行处理。这样，表示结果$\tilde{\mathbf{h}} (\textrm{``你''})$在包含``你''这个单词的信息的同时，也包含了序列中其它词的信息。也就是，序列中每一个位置的表示结果中，都包含了其它位置的信息。从这个角度说，$\tilde{\mathbf{h}} (\textrm{``你''})$已经不再是单词''你''自身的表示结果，而是一种在单词``你''的位置上的全局信息的表示。
+\parinterval 同理，也可以用同样的方法处理这个句子中的其他单词。可以看出，在注意力机制中，我们并不是使用类似于循环神经网络的记忆能力去访问历史信息。序列中所有单词之间的信息都是通过同一种操作（$\mathrm{query}$和$\mathrm{key}$的相关度）进行处理。这样，表示结果$\tilde{\mathbf{h}} (\textrm{``你''})$在包含``你''这个单词的信息的同时，也包含了序列中其他词的信息。也就是，序列中每一个位置的表示结果中，都包含了其他位置的信息。从这个角度说，$\tilde{\mathbf{h}} (\textrm{``你''})$已经不再是单词''你''自身的表示结果，而是一种在单词``你''的位置上的全局信息的表示。

-\parinterval 通常，也把生成\{ $\tilde{\mathbf{h}}(\mathbf{w}_i)$ \}的过程称为\textbf{特征提取}，而实现这个过程的模型被称为特征提取器。循环神经网络、自注意力模型都是典型的特征提取器。特征提取是神经机器翻译系统的关键步骤，在随后的内容中可以看到自注意力模型是一个非常适合机器翻译任务的特征提取器。
+\parinterval 通常，也把生成\{ $\tilde{\mathbf{h}}(\mathbf{w}_i)$ \}的过程称为{\small\sffamily\bfseries{特征提取}}，而实现这个过程的模型被称为特征提取器。循环神经网络、自注意力模型都是典型的特征提取器。特征提取是神经机器翻译系统的关键步骤，在随后的内容中可以看到自注意力模型是一个非常适合机器翻译任务的特征提取器。

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{Transformer架构}\index{Chapter6.4.2}
@@ -1340,18 +1340,18 @@ L(\mathbf{Y},\hat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\hat{
 \parinterval 图\ref{fig:6-38}展示了经典的Transformer结构。解码器由若干层组成（绿色虚线框就代表一层）。每一层（layer）的输入都是一个向量序列，输出是同样大小的向量序列，而Transformer层的作用是对输入进行进一步的抽象，得到新的表示结果。不过这里的层并不是指单一的神经网络结构，它里面由若干不同的模块组成，包括：

 \begin{itemize}
-\item \textbf{自注意力子层}（self-attention sub-layer）：使用自注意力机制对输入的序列进行新的表示
+\item {\small\sffamily\bfseries{自注意力子层}}（Self-attention Sub-layer）：使用自注意力机制对输入的序列进行新的表示

-\item \textbf{前馈神经网络子层}（feed forward sub-layer）：使用全连接的前馈神经网络对输入向量序列进行进一步变换
+\item {\small\sffamily\bfseries{前馈神经网络子层}}（Feed-forward Sub-layer）：使用全连接的前馈神经网络对输入向量序列进行进一步变换

-\item \textbf{残差连接}（residual connection，标记为``Add''）：对于自注意力子层和前馈神经网络子层，都有一个从输入直接到输出的额外连接，也就是一个跨子层的直连。残差连接可以使深层网络的信息传递更为有效。
+\item {\small\sffamily\bfseries{残差连接}}（Residual Connection，标记为``Add''）：对于自注意力子层和前馈神经网络子层，都有一个从输入直接到输出的额外连接，也就是一个跨子层的直连。残差连接可以使深层网络的信息传递更为有效。

-\item \textbf{层正则化}（layer normalization）：自注意力子层和前馈神经网络子层进行最终输出之前，会对输出的向量进行层正则化，规范结果向量取值范围，这样易于后面进一步的处理。
+\item {\small\sffamily\bfseries{层正则化}}（Layer Normalization）：自注意力子层和前馈神经网络子层进行最终输出之前，会对输出的向量进行层正则化，规范结果向量取值范围，这样易于后面进一步的处理。
 \end{itemize}

-\parinterval 以上操作就构成了Transformer的一层，各个模块执行的顺序可以简单描述为：Self-Attention -> Residual Connection -> Layer Normalization -> Feed Forward Network -> Residual Connection -> Layer Normalization。编码器可以包含多个这样的层，比如，我们可以构建一个六层编码器，每层都只执行上面的操作。最上层的结果作为整个编码的结果，会被传入解码器。
+\parinterval 以上操作就构成了Transformer的一层，各个模块执行的顺序可以简单描述为：Self-Attention $\to$ Residual Connection $\to$ Layer Normalization $\to$ Feed Forward Network $\to$ Residual Connection $\to$ Layer Normalization。编码器可以包含多个这样的层，比如，我们可以构建一个六层编码器，每层都只执行上面的操作。最上层的结果作为整个编码的结果，会被传入解码器。

-\parinterval 解码器的结构与编码器十分类似。它也是由若干层组成，每一层包含编码器中的所有结构，即：自注意力子层、前馈神经网络子层、残差连接和层正则化模块。此外，为了捕捉源语言的信息，解码器又引入了一个额外的\textbf{编码-解码注意力子层}（encoder-decoder attention sub-layer）。这个新的子层，可以帮助模型使用源语言句子的表示信息生成目标语不同位置的表示。编码-解码注意力子层仍然基于自注意力机制，因此它和自注意力子层的结构是相同的，只是$\mathrm{query}$、$\mathrm{key}$、$\mathrm{value}$的定义不同。比如，在解码端，自注意力子层的$\mathrm{query}$、$\mathrm{key}$、$\mathrm{value}$是相同的，它们都等于解码端每个位置的表示。而在编码-解码注意力子层中，$\mathrm{query}$是解码端每个位置的表示，而$\mathrm{key}$和$\mathrm{value}$是相同的，等于编码端每个位置的表示。图\ref{fig:6-37}给出了这两种不同注意力子层输入的区别。
+\parinterval 解码器的结构与编码器十分类似。它也是由若干层组成，每一层包含编码器中的所有结构，即：自注意力子层、前馈神经网络子层、残差连接和层正则化模块。此外，为了捕捉源语言的信息，解码器又引入了一个额外的{\small\sffamily\bfseries{编码-解码注意力子层}}（encoder-decoder attention sub-layer）。这个新的子层，可以帮助模型使用源语言句子的表示信息生成目标语不同位置的表示。编码-解码注意力子层仍然基于自注意力机制，因此它和自注意力子层的结构是相同的，只是$\mathrm{query}$、$\mathrm{key}$、$\mathrm{value}$的定义不同。比如，在解码端，自注意力子层的$\mathrm{query}$、$\mathrm{key}$、$\mathrm{value}$是相同的，它们都等于解码端每个位置的表示。而在编码-解码注意力子层中，$\mathrm{query}$是解码端每个位置的表示，而$\mathrm{key}$和$\mathrm{value}$是相同的，等于编码端每个位置的表示。图\ref{fig:6-37}给出了这两种不同注意力子层输入的区别。

 %----------------------------------------------
 % 图3.30
@@ -1520,7 +1520,7 @@ L(\mathbf{Y},\hat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\hat{
 \end{itemize}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \subsection{多头注意力}\index{Chapter6.4.6}
-\parinterval Transformer中使用的另一项重要技术是\textbf{多头注意力机制}（Multi-head attention）。``多头''可以理解成将原来的$\mathbf{Q}$、$\mathbf{K}$、$\mathbf{V}$按照隐层维度平均切分成多份。假设切分$h$份，那么最终我们会得到$\mathbf{Q} = \{ \mathbf{q}_1, \mathbf{q}_2,...,\mathbf{q}_h \}$，$\mathbf{K}=\{ \mathbf{k}_1,\mathbf{k}_2,...,\mathbf{k}_h \}$，$\mathbf{V}=\{ \mathbf{v}_1, \mathbf{v}_2,...,\mathbf{v}_h \}$。多头注意力机制就是用每一个切分得到的$\mathbf{q}$，$\mathbf{k}$，$\mathbf{v}$独立的进行注意力计算。即第$i$个头的注意力计算结果$\mathbf{head}_i = \textrm{Attention}(\mathbf{q}_i,\mathbf{k}_i, \mathbf{v}_i)$。
+\parinterval Transformer中使用的另一项重要技术是{\small\sffamily\bfseries{多头注意力机制}}（Multi-head attention）。``多头''可以理解成将原来的$\mathbf{Q}$、$\mathbf{K}$、$\mathbf{V}$按照隐层维度平均切分成多份。假设切分$h$份，那么最终我们会得到$\mathbf{Q} = \{ \mathbf{q}_1, \mathbf{q}_2,...,\mathbf{q}_h \}$，$\mathbf{K}=\{ \mathbf{k}_1,\mathbf{k}_2,...,\mathbf{k}_h \}$，$\mathbf{V}=\{ \mathbf{v}_1, \mathbf{v}_2,...,\mathbf{v}_h \}$。多头注意力机制就是用每一个切分得到的$\mathbf{q}$，$\mathbf{k}$，$\mathbf{v}$独立的进行注意力计算。即第$i$个头的注意力计算结果$\mathbf{head}_i = \textrm{Attention}(\mathbf{q}_i,\mathbf{k}_i, \mathbf{v}_i)$。

 \parinterval 下面我们根据如图\ref{fig:6-46}详细介绍多头注意力的计算过程：

@@ -1555,7 +1555,7 @@ L(\mathbf{Y},\hat{\mathbf{Y}}) = \sum_{j=1}^n L_{\textrm{ce}}(\mathbf{y}_j,\hat{
 \subsection{残差网络和层正则化}\index{Chapter6.4.7}
 \parinterval Transformer编码器、解码器分别由多层网络组成（通常为6层），每层网络又包含多个子层（自注意力网络、前馈神经网络）。因此Transformer实际上是一个很深的网络结构。再加上前面介绍的点乘注意力机制，包含很多线性和非线性变换；另外，注意力函数Attention($\cdot$)的计算也涉及多层网络，整个网络的信息传递非常复杂。从反向传播的角度来看，每次回传的梯度都会经过若干步骤，容易产生梯度爆炸或者消失。

-\parinterval 解决这个问题的一种办法就是使用\textbf{残差连接}\cite{DBLP:journals/corr/HeZRS15}。残差连接是一种用来训练深层网络的技术，其结构如图\ref{fig:6-47}，即在子层之前通过增加直接连接的方式，将底层信息直接传递给上层。
+\parinterval 解决这个问题的一种办法就是使用{\small\sffamily\bfseries{残差连接}}\cite{DBLP:journals/corr/HeZRS15}。残差连接是一种用来训练深层网络的技术，其结构如图\ref{fig:6-47}，即在子层之前通过增加直接连接的方式，将底层信息直接传递给上层。

 %----------------------------------------------
 % 图3.10
@@ -1682,9 +1682,9 @@ lrate = d_{model}^{-0.5} \cdot \textrm{min} (step^{-0.5} , step \cdot warmup\_st
 \end{figure}
 %----------------------------------------------

-\item Dropout：由于Transformer模型网络结构过于复杂，参数过多，具有很强的学习能力，导致过度拟合训练数据，从而对未见数据的预测结果变差。这种现象也被称作\textbf{过拟合}（over fitting）。为了避免这种现象，Transformer加入了Dropout操作\cite{JMLR:v15:srivastava14a}。Transformer中这四个地方用到了Dropout：词嵌入和位置编码、残差连接、注意力操作和前馈神经网络。Dropout比例通常设置为0.1。
+\item Dropout：由于Transformer模型网络结构过于复杂，参数过多，具有很强的学习能力，导致过度拟合训练数据，从而对未见数据的预测结果变差。这种现象也被称作{\small\sffamily\bfseries{过拟合}}（over fitting）。为了避免这种现象，Transformer加入了Dropout操作\cite{JMLR:v15:srivastava14a}。Transformer中这四个地方用到了Dropout：词嵌入和位置编码、残差连接、注意力操作和前馈神经网络。Dropout比例通常设置为0.1。

-\item 标签平滑（Label Smoothing）：在计算损失的过程中，需要用预测概率去拟合真实概率。在分类任务中，往往使用one-hot向量代表真实概率，即真实答案位置那一维对应的概率为1，其余维为0，而拟合这种概率分布会造成两个问题：1)无法保证模型的泛化能力，容易造成过拟合；2) 1和0概率鼓励所属类别和其它类别之间的差距尽可能加大，会造成模型过于相信预测的类别。因此Transformer里引入标签平滑\cite{Szegedy_2016_CVPR}来缓解这种现象，简单的说就是给正确答案以外的类别分配一定的概率，而不是采用非0即1的概率。这样，可以学习一个比较平滑的概率分布，从而提升泛化能力，防止过拟合。\\
+\item 标签平滑（Label Smoothing）：在计算损失的过程中，需要用预测概率去拟合真实概率。在分类任务中，往往使用one-hot向量代表真实概率，即真实答案位置那一维对应的概率为1，其余维为0，而拟合这种概率分布会造成两个问题：1)无法保证模型的泛化能力，容易造成过拟合；2) 1和0概率鼓励所属类别和其他类别之间的差距尽可能加大，会造成模型过于相信预测的类别。因此Transformer里引入标签平滑\cite{Szegedy_2016_CVPR}来缓解这种现象，简单的说就是给正确答案以外的类别分配一定的概率，而不是采用非0即1的概率。这样，可以学习一个比较平滑的概率分布，从而提升泛化能力，防止过拟合。\\
 \end{itemize}

 \parinterval 不同的Transformer可以适应不同的任务，常见的Transformer模型有Transformer Base、Transformer Big和Transformer Deep\cite{NIPS2017_7181}\cite{WangLearning}，具体设置如下：
@@ -1735,7 +1735,7 @@ Transformer Deep(48层) & 30.2            & 43.1            & 194$\times 10^{6}$
 %----------------------------------------------
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{序列到序列问题及应用}\index{Chapter6.5}
-\parinterval 虽然翻译的目的是进行自然语言文字的转化，但是我们并不需要限制机器翻译只能进行两种语言之间的转换。从某种意义上讲，一个输入序列转化到一个输出序列的过程都可以被看作``翻译''。这类问题通常被称作\textbf{序列到序列}的转换/生成问题（sequence to sequence problem）。而机器翻译模型也是一种典型的序列到序列模型。
+\parinterval 虽然翻译的目的是进行自然语言文字的转化，但是我们并不需要限制机器翻译只能进行两种语言之间的转换。从某种意义上讲，一个输入序列转化到一个输出序列的过程都可以被看作``翻译''。这类问题通常被称作{\small\sffamily\bfseries{序列到序列}}的转换/生成问题（sequence to sequence problem）。而机器翻译模型也是一种典型的序列到序列模型。

 \parinterval 实际上，很多自然语言处理问题都可以被看作是序列到序列的任务。比如，在自动问答中，可以把问题看作是输入序列，把回答看作是输出序列；在自动对联生成中，可以把上联看作是输入序列，把下联看作是输出序列。这样的例子还有很多。对于这类问题，我们都可以使用神经机器翻译来进行建模。比如，使用编码器-解码器框架对输入和输出的序列进行建模。下面我们就来看几个序列到序列的问题，以及如何使用神经机器翻译类似的思想对它们进行求解。
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -1809,7 +1809,7 @@ Transformer Deep(48层) & 30.2            & 43.1            & 194$\times 10^{6}$
 \parinterval 经过几年的积累，神经机器翻译的细分方向已经十分多样，由于篇幅所限，这里也无法覆盖所有内容（虽然笔者尽所能全面介绍相关的基础知识，但是难免会有疏漏）。很多神经机器翻译的模型和方法值得进一步学习和探讨：

 \begin{itemize}
-\item 无论是循环神经网络还是Transformer都有很多变种结构。比如，除了RNN、\\LSTM、GRU，还有其它改进的循环单元结构，如LRN\cite{DBLP:journals/corr/abs-1905-13324}、SRU\cite{Lei2017TrainingRA}、ATR\cite{Zhang2018SimplifyingNM}\\。Transformer是近些年的热门，它也衍生出很多的改进版本，如相对位置编码\cite{Shaw2018SelfAttentionWR}、局部注意力机制\cite{DBLP:journals/corr/abs-1904-03107}、多层信息交互\cite{wang-etal-2018-multi-layer}、深层网络\cite{WangLearning}。此外，其它神经网络架构，如卷积神经网络，也是研发神经机器翻译系统很好的选择\cite{DBLP:journals/corr/GehringAGYD17}\cite{Wu2019PayLA}。最近，也有一些研究者探索异构系统，使用不同的神经网络结构搭建编码器和解码器\cite{Chen2018TheBO}，比如，编码端使用性能更强的Transformer，而解码端使用速度更快的循环神经网络。
+\item 无论是循环神经网络还是Transformer都有很多变种结构。比如，除了RNN、\\LSTM、GRU，还有其他改进的循环单元结构，如LRN\cite{DBLP:journals/corr/abs-1905-13324}、SRU\cite{Lei2017TrainingRA}、ATR\cite{Zhang2018SimplifyingNM}\\。Transformer是近些年的热门，它也衍生出很多的改进版本，如相对位置编码\cite{Shaw2018SelfAttentionWR}、局部注意力机制\cite{DBLP:journals/corr/abs-1904-03107}、多层信息交互\cite{wang-etal-2018-multi-layer}、深层网络\cite{WangLearning}。此外，其他神经网络架构，如卷积神经网络，也是研发神经机器翻译系统很好的选择\cite{DBLP:journals/corr/GehringAGYD17}\cite{Wu2019PayLA}。最近，也有一些研究者探索异构系统，使用不同的神经网络结构搭建编码器和解码器\cite{Chen2018TheBO}，比如，编码端使用性能更强的Transformer，而解码端使用速度更快的循环神经网络。

 \item 注意力机制的使用是机器翻译乃至整个自然语言处理近几年获得成功的重要因素之一\cite{Liu_2019_CVPR}\cite{DBLP:journals/corr/abs-1811-00498}\cite{MoradiInterrogating}。早期，有研究者尝试将注意力机制和统计机器翻译的词对齐进行统一\cite{WangNeural}。近两年，也有研究已经发现注意力模型可以捕捉一些语言现象\cite{DBLP:journals/corr/abs-1905-09418}，比如，在Transformer的多头注意力中，不同头往往会捕捉到不同的信息，比如，有些头对低频词更加敏感，有些头更适合词意消歧，甚至有些头可以捕捉句法信息。此外，由于注意力机制增加了模型的复杂性，而且随着网络层数的增多，神经机器翻译中也存在大量的冗余，因此研发轻量的注意力模型也是具有实践意义的方向\cite{Xiao2019SharingAW}。


--- a/Book/Chapter6/Figures/figure-Beam-search-process.tex
+++ b/Book/Chapter6/Figures/figure-Beam-search-process.tex
@@ -31,13 +31,13 @@
 \node [anchor=west,inner sep=2pt] (s4) at ([xshift=0.3em]s3.east) {\scriptsize{...}};
 }
 {
-\node [rnnnode,anchor=south,fill=blue!20] (o1) at ([yshift=1em]s1.north) {\scriptsize{softmax}};
+\node [rnnnode,anchor=south,fill=blue!20] (o1) at ([yshift=1em]s1.north) {\scriptsize{Softmax}};
 }
 {
-\node [rnnnode,anchor=south,fill=blue!20] (o2) at ([yshift=1em]s2.north) {\scriptsize{softmax ($\times 3$)}};
+\node [rnnnode,anchor=south,fill=blue!20] (o2) at ([yshift=1em]s2.north) {\scriptsize{Softmax ($\times 3$)}};
 }
 {
-\node [rnnnode,anchor=south,fill=blue!20] (o3) at ([yshift=1em]s3.north) {\scriptsize{softmax ($\times 3$)}};
+\node [rnnnode,anchor=south,fill=blue!20] (o3) at ([yshift=1em]s3.north) {\scriptsize{Softmax ($\times 3$)}};
 \node [anchor=west,inner sep=2pt] (o4) at ([xshift=0.3em]o3.east) {\scriptsize{...}};
 }


--- a/Book/Chapter6/Figures/figure-Decode-the-word-probability-distribution-at-the-first-position.tex
+++ b/Book/Chapter6/Figures/figure-Decode-the-word-probability-distribution-at-the-first-position.tex
@@ -20,7 +20,7 @@
 \node [wnode,anchor=north] (wt1) at ([yshift=-0.8em]t1.south) {\scriptsize{$\langle$eos$\rangle$}};

 {
-\node [rnnnode,anchor=south,fill=blue!20] (o1) at ([yshift=1em]s1.north) {\scriptsize{softmax}};
+\node [rnnnode,anchor=south,fill=blue!20] (o1) at ([yshift=1em]s1.north) {\scriptsize{Softmax}};
 }

 {

--- a/Book/Chapter6/Figures/figure-Decoding-process-based-on-greedy-method.tex
+++ b/Book/Chapter6/Figures/figure-Decoding-process-based-on-greedy-method.tex

-
-
-
-
-
 \begin{tikzpicture}
 \begin{scope}
 \tikzstyle{rnnnode} = [minimum height=1.1em,minimum width=2.1em,inner sep=2pt,rounded corners=1pt,draw,fill=red!20];
@@ -50,15 +45,15 @@
 \node [anchor=west,inner sep=2pt] (s5) at ([xshift=0.3em]s4.east) {\tiny{...}};
 }
 {
-\node [rnnnode,anchor=south,fill=blue!20] (o1) at ([yshift=1em]s1.north) {\tiny{softmax}};
+\node [rnnnode,anchor=south,fill=blue!20] (o1) at ([yshift=1em]s1.north) {\tiny{Softmax}};
 \node [anchor=east] (decoder) at ([xshift=-0.5em]o1.north west) {\scriptsize{\textbf{解码器}}};
 }
 {
-\node [rnnnode,anchor=south,fill=blue!20] (o2) at ([yshift=1em]s2.north) {\tiny{softmax}};
+\node [rnnnode,anchor=south,fill=blue!20] (o2) at ([yshift=1em]s2.north) {\tiny{Softmax}};
 }
 {
-\node [rnnnode,anchor=south,fill=blue!20] (o3) at ([yshift=1em]s3.north) {\tiny{softmax}};
-\node [rnnnode,anchor=south,fill=blue!20] (o4) at ([yshift=1em]s4.north) {\tiny{softmax}};
+\node [rnnnode,anchor=south,fill=blue!20] (o3) at ([yshift=1em]s3.north) {\tiny{Softmax}};
+\node [rnnnode,anchor=south,fill=blue!20] (o4) at ([yshift=1em]s4.north) {\tiny{Softmax}};
 \node [anchor=west,inner sep=2pt] (o5) at ([xshift=0.3em]o4.east) {\tiny{...}};
 }
 {
@@ -145,7 +140,7 @@
 \draw [->] ([yshift=-0.3em]s1.west) .. controls +(west:2) and +(-50:0.3) .. (c2.-40);
 }
 {
-\draw [->] (c2.0) -- ([xshift=1.358in]c2.0) -- ([yshift=0.3em,xshift=-1.2em]s2.west) -- ([yshift=0.3em,xshift=-0.1em]s2.west);
+\draw [->] (c2.0) -- ([xshift=1.2in]c2.0) -- ([yshift=0.3em,xshift=-1.2em]s2.west) -- ([yshift=0.3em,xshift=-0.1em]s2.west);
 }

 {

--- a/Book/Chapter6/Figures/figure-Output-layer-structur.tex
+++ b/Book/Chapter6/Figures/figure-Output-layer-structur.tex
@@ -124,7 +124,7 @@
                    \draw [->,thick] ([xshift=0.2em,yshift=0.1em]hidden.north west) -- (target.south west);
                    \draw [->,thick] ([xshift=-0.2em,yshift=0.1em]hidden.north east) -- (target.south east);

-                    \node [anchor=south] () at ([yshift=0.3em]hidden.north) {\scriptsize{$\hat{s}=Ws$}};
+                    \node [anchor=south] () at ([yshift=0.3em]hidden.north) {\scriptsize{$\hat{\mathbf{s}}=\mathbf{Ws}$}};
                }

                {

--- a/Book/Chapter6/Figures/figure-numbers-of-WMT-systems.tex
+++ b/Book/Chapter6/Figures/figure-numbers-of-WMT-systems.tex
@@ -13,33 +13,33 @@
            \node[font=\scriptsize] at (0,4) {20};

            % 2015
-            \node[minimum width=0.5cm,thick,minimum height=7*0.2cm,draw,fill=blue!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (smt2015) at (1.5*0.7,0.5pt) {};
-            \node[minimum width=0.5cm,thick,minimum height=2*0.2cm,draw,fill=red!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (nmt2015) at (smt2015.south east) {};
+            \node[minimum width=0.5cm,thick,minimum height=7*0.2cm,fill=blue!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (smt2015) at (1.5*0.7,0.5pt) {};
+            \node[minimum width=0.5cm,thick,minimum height=2*0.2cm,fill=red!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (nmt2015) at (smt2015.south east) {};
            \node[font=\scriptsize,anchor=north] () at ([yshift=-0.2em]smt2015.south east) {2015};
            % 2016
-            \node[minimum width=0.5cm,thick,minimum height=3*0.2cm,draw,fill=blue!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (smt2016) at ($(nmt2015.south east)+(0.7,0)$) {};
-            \node[minimum width=0.5cm,thick,minimum height=8*0.2cm,draw,fill=red!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (nmt2016) at (smt2016.south east) {};
+            \node[minimum width=0.5cm,thick,minimum height=3*0.2cm,fill=blue!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (smt2016) at ($(nmt2015.south east)+(0.7,0)$) {};
+            \node[minimum width=0.5cm,thick,minimum height=8*0.2cm,fill=red!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (nmt2016) at (smt2016.south east) {};
            \node[font=\scriptsize,anchor=north] () at ([yshift=-0.2em]smt2016.south east) {2016};
            % 2017
-            \node[minimum width=0.5cm,thick,minimum height=3*0.2cm,draw,fill=blue!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (smt2017) at ($(nmt2016.south east)+(0.7,0)$) {};
-            \node[minimum width=0.5cm,thick,minimum height=13*0.2cm,draw,fill=red!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (nmt2017) at (smt2017.south east) {};
+            \node[minimum width=0.5cm,thick,minimum height=3*0.2cm,fill=blue!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (smt2017) at ($(nmt2016.south east)+(0.7,0)$) {};
+            \node[minimum width=0.5cm,thick,minimum height=13*0.2cm,fill=red!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (nmt2017) at (smt2017.south east) {};
            \node[font=\scriptsize,anchor=north] () at ([yshift=-0.2em]smt2017.south east) {2017};
            % 2018
            \node[minimum width=0.5cm,thick,minimum height=0cm,draw,fill=blue!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (smt2018) at ($(nmt2017.south east)+(0.7,0)$) {};
-            \node[minimum width=0.5cm,thick,minimum height=14*0.2cm,draw,fill=red!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (nmt2018) at (smt2018.south east) {};
+            \node[minimum width=0.5cm,thick,minimum height=14*0.2cm,fill=red!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (nmt2018) at (smt2018.south east) {};
            \node[font=\scriptsize,anchor=north] () at ([yshift=-0.2em]smt2018.south east) {2018};
             % 2019
            \node[minimum width=0.5cm,thick,minimum height=0cm,draw,fill=blue!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (smt2019) at ($(nmt2018.south east)+(0.7,0)$) {};
-            \node[minimum width=0.5cm,thick,minimum height=21*0.2cm,draw,fill=red!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (nmt2019) at (smt2019.south east) {};
+            \node[minimum width=0.5cm,thick,minimum height=21*0.2cm,fill=red!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (nmt2019) at (smt2019.south east) {};
            \node[font=\scriptsize,anchor=north] () at ([yshift=-0.2em]smt2019.south east) {2019};
        \end{scope}

        % legend
        \ExtractX{$(nmt2015.west)$}
        \ExtractY{$(WMT.north)$}
-        \node[minimum width=0.5cm,rectangle,draw,fill=blue!30!white,anchor=north west,label={[label distance=1pt,font=\scriptsize]0:统计机器翻译}] () at (\XCoord,\YCoord) {};
+        \node[minimum width=0.5cm,rectangle,fill=blue!30!white,anchor=north west,label={[label distance=1pt,font=\scriptsize]0:统计机器翻译}] () at (\XCoord,\YCoord) {};
        \ExtractX{$(nmt2017.west)$}
-        \node[minimum width=0.5cm,rectangle,draw,fill=red!30!white,anchor=north west,label={[label distance=1pt,font=\scriptsize]0:神经机器翻译}] () at (\XCoord,\YCoord) {};
+        \node[minimum width=0.5cm,rectangle,fill=red!30!white,anchor=north west,label={[label distance=1pt,font=\scriptsize]0:神经机器翻译}] () at (\XCoord,\YCoord) {};

  
       % \node[font=\normalsize,rotate=90] () at ([xshift=-1em]WMT.west) {数量};

--- a/Book/Chapter6/Figures/figure-score-of-mTER.tex
+++ b/Book/Chapter6/Figures/figure-score-of-mTER.tex

-\definecolor{ublue}{rgb}{0.152,0.250,0.545}
+%\definecolor{ublue}{rgb}{0.152,0.250,0.545}
 \begin{tikzpicture}
 \begin{axis}[ 
 width=10cm, height=5cm, 
 symbolic x coords={1-15,16-25,26-35,>35},
 xtick=data,
-ytick={10,12,...,28},
+ytick={6,12,...,28},
 xlabel={句子长度（范围）},
 ylabel={$\%$\footnotesize{mTER}},
 xlabel style={align=center},
@@ -14,17 +14,17 @@ y tick style={opacity=0},
 x tick label style={font=\small},
 y tick label style={font=\small},
 tick align=inside,
-ymajorgrids,
-major grid style={draw=ublue,dashed},
+%major grid style={draw=blue,dashed},
 legend pos=outer north east,
-legend style={anchor=north west,yshift=-1cm},
-ymin=10,
+%legend style={anchor=north west,yshift=-1cm},
+legend style={yshift=-4.5em,xshift=-6em,legend cell align=left,legend plot pos=right},
+ymin=6,
 ymax=28]
-\addplot [sharp plot,very thick,red!60,mark=diamond*] coordinates{(1-15,11.3) (16-25,16.4) (26-35,17) (>35,19.8)};
-\addplot [sharp plot,very thick,purple!60,mark=triangle*] coordinates{(1-15,14.4) (16-25,22.6) (26-35,23.8) (>35,25.9)};
-\addplot [sharp plot,very thick,green!60,mark=square*] coordinates{(1-15,14.9) (16-25,23.7) (26-35,24.7) (>35,26.4)};
-\addplot [sharp plot,very thick,blue!60,mark=*] coordinates{(1-15,17.5) (16-25,24) (26-35,25) (>35,27)};
-\legend{\scriptsize{NMT},\scriptsize{SPB},\scriptsize{HPB},\scriptsize{PBSY}} 
+\addplot [sharp plot,very thick,ublue,mark=diamond*] coordinates{(1-15,11.3) (16-25,16.4) (26-35,17) (>35,19.8)};
+\addplot [sharp plot,very thick,red,mark=triangle*] coordinates{(1-15,14.4) (16-25,22.6) (26-35,23.8) (>35,25.9)};
+\addplot [sharp plot,very thick,ugreen,mark=square*] coordinates{(1-15,14.9) (16-25,23.7) (26-35,24.7) (>35,26.4)};
+\addplot [sharp plot,very thick,orange,mark=*] coordinates{(1-15,17.5) (16-25,24) (26-35,25) (>35,27)};
+\legend{\tiny{NMT},\tiny{SPB},\tiny{HPB},\tiny{PBSY}} 
 \end{axis}
 \end{tikzpicture}


--- a/Book/Chapter6/Figures/figure-softmax.tex
+++ b/Book/Chapter6/Figures/figure-softmax.tex
@@ -18,7 +18,7 @@
  xmax=6,
  ymin=0,
  ymax=1]
-\addplot[draw=ublue,thick]{(tanh(x/2) + 1)/2};
+\addplot[draw=ublue,very thick]{(tanh(x/2) + 1)/2};
 \end{axis}
 \end{tikzpicture}


--- a/Book/ChapterAppend/ChapterAppend.tex
+++ b/Book/ChapterAppend/ChapterAppend.tex
@@ -198,7 +198,7 @@ d_{>1}(\Delta_i|cb,v;\mathbf{s},\mathbf{t}) = \mu_{>1cb}^{-1} \times \sum_{s=1}^

 \parinterval 这里$X_{\{p-1\}}$表示在位置小于$p$的非空对的目标语单词对应的源语单词的平均置位。

-\parinterval 从式(\ref{eq:1.24})中可以看出因子$\delta(v_{\pi_{p1}},v_{\pi_{p1-1}})$保证了，即使对齐$\mathbf{a}$不合理（一个源语位置对应多个目标语位置）也可以避免在这个不合理的对齐上计算结果。需要注意的是因子$\delta(v_{\pi_{p1}},v_{\pi_{p1-1}})$，只能保证$\mathbf{a}$中不合理的部分不产生坏的影响，而$\mathbf{a}$中其它正确的部分仍会参与迭代。
+\parinterval 从式(\ref{eq:1.24})中可以看出因子$\delta(v_{\pi_{p1}},v_{\pi_{p1-1}})$保证了，即使对齐$\mathbf{a}$不合理（一个源语位置对应多个目标语位置）也可以避免在这个不合理的对齐上计算结果。需要注意的是因子$\delta(v_{\pi_{p1}},v_{\pi_{p1-1}})$，只能保证$\mathbf{a}$中不合理的部分不产生坏的影响，而$\mathbf{a}$中其他正确的部分仍会参与迭代。

 \parinterval 不过上面的参数估计过程与前面4个模型中参数估计过程并不完全一样。前面四个模型在每次迭代中，可以在给定$\mathbf{s}$、$\mathbf{t}$和一个对齐$\mathbf{a}$的情况下直接计算并更新参数。但是在模型5的参数估计过程中，如公式(\ref{eq:1.24})中，需要模拟出由$\mathbf{t}$生成$\mathbf{s}$的过程才能得到正确的结果，因为从$\mathbf{t}$、$\mathbf{s}$和$\mathbf{a}$中是不能直接得到 的正确结果的。具体说，就是要从目标语句子的第一个单词开始到最后一个单词结束，依次生成每个目标语单词对应的源语单词，每处理完一个目标语单词就要暂停，然后才能计算式(\ref{eq:1.24})中求和符号里面的内容。这也就是说即使给定了$\mathbf{s}$、$\mathbf{t}$和一个对齐$\mathbf{a}$，也不能直接在它们上计算，必须重新模拟$\mathbf{t}$到$\mathbf{s}$的生成过程。


--- a/Book/bibliography.bib
+++ b/Book/bibliography.bib
@@ -297,11 +297,37 @@ year={2017}}
  year={2016},
  publisher={清华大学出版社}
 }
+
+@article{DBLP:journals/corr/abs-1709-07809,
+  author    = {Philipp Koehn},
+  title     = {Neural Machine Translation},
+  journal   = {CoRR},
+  volume    = {abs/1709.07809},
+  year      = {2017},
+  url       = {http://arxiv.org/abs/1709.07809},
+  archivePrefix = {arXiv},
+  eprint    = {1709.07809},
+  timestamp = {Mon, 13 Aug 2018 16:47:37 +0200},
+  biburl    = {https://dblp.org/rec/journals/corr/abs-1709-07809.bib},
+  bibsource = {dblp computer science bibliography, https://dblp.org}
+}
+
+@book{邱锡鹏2020神经网络与深度学习,
+  title={神经网络与深度学习},
+  author={邱锡鹏},
+  year={2020},
+  note={\url{https://nndl.github.io/}}
+}
+@article{赵军峰2019深化改革,
+  title={深化改革 探讨创新 推进发展——全国翻译专业学位研究生教育2019年会综述},
+  author={赵军峰 and 姚恺璇},
+  journal={中国翻译},
+  year={2019},
+}
 %%%%% chapter 1------------------------------------------------------
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


-
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%% chapter 2------------------------------------------------------

@@ -485,9 +511,48 @@ year={2015}
  publisher={Prentice-Hall Englewood Cliffs, NJ}
 }

+@inproceedings{heafield-2011-kenlm,
+    title = "{K}en{LM}: Faster and Smaller Language Model Queries",
+    author = "Heafield, Kenneth",
+    booktitle = "Proceedings of the Sixth Workshop on Statistical Machine Translation",
+    month = jul,
+    year = "2011",
+    address = "Edinburgh, Scotland",
+    publisher = "Association for Computational Linguistics",
+    url = "https://www.aclweb.org/anthology/W11-2123",
+    pages = "187--197"
+}
+
+@inproceedings{wang-etal-2018-niutrans,
+    title = "The {N}iu{T}rans Machine Translation System for {WMT}18",
+    author = "Wang, Qiang  and
+      Li, Bei  and
+      Liu, Jiqiang  and
+      Jiang, Bojian  and
+      Zhang, Zheyang  and
+      Li, Yinqiao  and
+      Lin, Ye  and
+      Xiao, Tong  and
+      Zhu, Jingbo",
+    booktitle = "Proceedings of the Third Conference on Machine Translation: Shared Task Papers",
+    month = oct,
+    year = "2018",
+    address = "Belgium, Brussels",
+    publisher = "Association for Computational Linguistics",
+    url = "https://www.aclweb.org/anthology/W18-6430",
+    doi = "10.18653/v1/W18-6430",
+    pages = "528--534"
+}
+
+@article{stolcke2002srilm,
+	title={SRILM - an extensible language modeling toolkit},
+	author={Stolcke, Andreas},
+	journal={INTERSPEECH},
+	year={2002}
+}
+
 %%%%% chapter 2------------------------------------------------------
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%% chapter 3------------------------------------------------------

@@ -585,6 +650,620 @@ year={2015}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%% chapter 4------------------------------------------------------
+@article{koehn2003statistical,
+title={Statistical phrase-based translation},
+author={Koehn, Philipp and Och, Franz Josef and Marcu, Daniel},
+pages={48--54},
+year={2003}}
+
+@article{chiang2007hierarchical,
+title={Hierarchical Phrase-Based Translation},
+author={Chiang, David},
+journal={Computational Linguistics},
+volume={33},
+number={2},
+pages={201--228},
+year={2007}}
+
+@article{OchA,
+  title={A Systematic Comparison of Various Statistical Alignment Models},
+  author={Och, Franz Josef and Ney, Hermann},
+  journal={Computational Linguistics},
+  volume={29},
+  number={1},
+  pages={19-51},
+}
+
+@article{koehn2002learning,
+title={Learning a Translation Lexicon from Monolingual Corpora},
+author={Koehn, Philipp and Knight, Kevin},
+pages={9--16},
+year={2002}}
+
+@article{Gros2008MSD,
+  title={MSD Recombination Method in Statistical Machine Translation},
+  author={Gros, Jerneja Žganec},
+  journal={AIP Conference Proceedings},
+  year={2008},
+}
+
+@inproceedings{li-etal-2014-neural,
+    title = "A Neural Reordering Model for Phrase-based Translation",
+    author = "Li, Peng  and
+      Liu, Yang  and
+      Sun, Maosong  and
+      Izuha, Tatsuya  and
+      Zhang, Dakun",
+    booktitle = "Proceedings of {COLING} 2014, the 25th International Conference on Computational Linguistics: Technical Papers",
+    month = aug,
+    year = "2014",
+    address = "Dublin, Ireland",
+    publisher = "Dublin City University and Association for Computational Linguistics",
+    url = "https://www.aclweb.org/anthology/C14-1179",
+    pages = "1897--1907",
+}
+
+@article{och2003minimum,
+title={Minimum Error Rate Training in Statistical Machine Translation},
+author={Och, Franz Josef},
+pages={160--167},
+year={2003}}
+
+@article{crammer2003ultraconservative,
+title={Ultraconservative online algorithms for multiclass problems},
+author={Crammer, Koby and Singer, Yoram},
+journal={Journal of Machine Learning Research},
+volume={3},
+pages={951--991},
+year={2003}}
+
+@article{Chiang2012Hope,
+  title={Hope and fear for discriminative training of statistical translation models},
+  author={Chiang, David},
+  volume={13},
+  number={1},
+  pages={1159-1187},
+  year={2012},
+}
+
+@article{chiang2005a,
+title={A Hierarchical Phrase-Based Model for Statistical Machine Translation},
+author={Chiang, David},
+pages={263--270},
+year={2005}}
+
+@book{cocke1969programming,
+  title={Programming languages and their compilers: Preliminary notes},
+  author={Cocke, John},
+  year={1969},
+  publisher={New York University}
+}
+
+@article{younger1967recognition,
+  title={Recognition and parsing of context-free languages in time n3},
+  author={Younger, Daniel H},
+  journal={Information and control},
+  volume={10},
+  number={2},
+  pages={189--208},
+  year={1967},
+  publisher={Elsevier}
+}
+
+@article{kasami1966efficient,
+  title={An efficient recognition and syntax-analysis algorithm for context-free languages},
+  author={Kasami, Tadao},
+  journal={Coordinated Science Laboratory Report no. R-257},
+  year={1966},
+  publisher={Coordinated Science Laboratory, University of Illinois at Urbana-Champaign}
+}
+
+@inproceedings{huang2005better,
+  title={Better k-best parsing},
+  author={Huang, Liang and Chiang, David},
+  booktitle={Proceedings of the Ninth International Workshop on Parsing Technology},
+  pages={53--64},
+  year={2005},
+  organization={Association for Computational Linguistics}
+}
+
+@article{wu1997stochastic,
+  title={Stochastic inversion transduction grammars and bilingual parsing of parallel corpora},
+  author={Wu, Dekai},
+  journal={Computational linguistics},
+  volume={23},
+  number={3},
+  pages={377--403},
+  year={1997},
+  publisher={MIT Press}
+}
+
+@inproceedings{liu2006tree,
+  title={Tree-to-string alignment template for statistical machine translation},
+  author={Liu, Yang and Liu, Qun and Lin, Shouxun},
+  booktitle={Proceedings of the 21st International Conference on Computational Linguistics and the 44th annual meeting of the Association for Computational Linguistics},
+  pages={609--616},
+  year={2006},
+  organization={Association for Computational Linguistics}
+}
+
+@inproceedings{huang2006statistical,
+  title={Statistical syntax-directed translation with extended domain of locality},
+  author={Huang, Liang and Knight, Kevin and Joshi, Aravind},
+  booktitle={Proceedings of AMTA},
+  pages={66--73},
+  year={2006},
+  organization={Cambridge, MA}
+}
+
+@inproceedings{galley2006scalable,
+  title={Scalable inference and training of context-rich syntactic translation models},
+  author={Galley, Michel and Graehl, Jonathan and Knight, Kevin and Marcu, Daniel and DeNeefe, Steve and Wang, Wei and Thayer, Ignacio},
+  booktitle={Proceedings of the 21st International Conference on Computational Linguistics and the 44th annual meeting of the Association for Computational Linguistics},
+  pages={961--968},
+  year={2006},
+  organization={Association for Computational Linguistics}
+}
+
+@inproceedings{galley2004s,
+  title={What’s in a translation rule?},
+  author={Galley, Michel and Hopkins, Mark and Knight, Kevin and Marcu, Daniel},
+  booktitle={Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics: HLT-NAACL 2004},
+  pages={273--280},
+  year={2004}
+}
+
+@inproceedings{eisner2003learning,
+  title={Learning non-isomorphic tree mappings for machine translation},
+  author={Eisner, Jason},
+  booktitle={The Companion Volume to the Proceedings of 41st Annual Meeting of the Association for Computational Linguistics},
+  pages={205--208},
+  year={2003}
+}
+
+@inproceedings{zhang2008tree,
+  title={A tree sequence alignment-based tree-to-tree translation model},
+  author={Zhang, Min and Jiang, Hongfei and Aw, Aiti and Li, Haizhou and Tan, Chew Lim and Li, Sheng},
+  booktitle={Proceedings of ACL-08: HLT},
+  pages={559--567},
+  year={2008}
+}
+
+@inproceedings{liu2009improving,
+  title={Improving tree-to-tree translation with packed forests},
+  author={Liu, Yang and L{\"u}, Yajuan and Liu, Qun},
+  booktitle={Proceedings of the Joint Conference of the 47th Annual Meeting of the ACL and the 4th International Joint Conference on Natural Language Processing of the AFNLP: Volume 2-Volume 2},
+  pages={558--566},
+  year={2009},
+  organization={Association for Computational Linguistics}
+}
+
+@inproceedings{chiang2010learning,
+  title={Learning to translate with source and target syntax},
+  author={Chiang, David},
+  booktitle={Proceedings of the 48th Annual Meeting of the Association for Computational Linguistics},
+  pages={1443--1452},
+  year={2010},
+  organization={Association for Computational Linguistics}
+}
+
+@inproceedings{galley2004s,
+  title={What’s in a translation rule?},
+  author={Galley, Michel and Hopkins, Mark and Knight, Kevin and Marcu, Daniel},
+  booktitle={Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics: HLT-NAACL 2004},
+  pages={273--280},
+  year={2004}
+}
+
+@inproceedings{galley2006scalable,
+  title={Scalable inference and training of context-rich syntactic translation models},
+  author={Galley, Michel and Graehl, Jonathan and Knight, Kevin and Marcu, Daniel and DeNeefe, Steve and Wang, Wei and Thayer, Ignacio},
+  booktitle={Proceedings of the 21st International Conference on Computational Linguistics and the 44th annual meeting of the Association for Computational Linguistics},
+  pages={961--968},
+  year={2006},
+  organization={Association for Computational Linguistics}
+}
+
+@article{marcu2006spmt:,
+title={SPMT: Statistical Machine Translation with Syntactified Target Language Phrases},
+author={Marcu, Daniel and Wang, Wei and Echihabi, Abdessamad and Knight, Kevin},
+pages={44--52},
+year={2006}}
+
+@inproceedings{Tong2009Better,
+  title={Better Synchronous Binarization for Machine Translation},
+  author={Tong, Xiao and Mu, Li and Zhang, Dongdong and Zhu, Jingbo and Ming, Zhou},
+  booktitle={Conference on Empirical Methods in Natural Language Processing},
+  year={2009},
+}
+
+@inproceedings{Yang2009Improving,
+  title={Improving Tree-to-Tree Translation with Packed Forests},
+  author={Yang, Liu and Lü, Yajuan and Liu, Qun},
+  booktitle={ACL 2009, Proceedings of the 47th Annual Meeting of the Association for Computational Linguistics and the 4th International Joint Conference on Natural Language Processing of the AFNLP, 2-7 August 2009, Singapore},
+  year={2009},
+}
+
+@article{xiao2013unsupervised,
+title={Unsupervised sub-tree alignment for tree-to-tree translation},
+author={Xiao, Tong and Zhu, Jingbo},
+journal={Journal of Artificial Intelligence Research},
+volume={48},
+pages={733--782},
+year={2013}}
+
+@article{liu2009weighted,
+title={Weighted Alignment Matrices for Statistical Machine Translation},
+author={Liu, Yang and Xia, Tian and Xiao, Xinyan and Liu, Qun},
+pages={1017--1026},
+year={2009}}
+
+@article{goodman1999semiring,
+title={Semiring parsing},
+author={Goodman, Joshua T},
+journal={Computational Linguistics},
+volume={25},
+number={4},
+pages={573--605},
+year={1999}}
+
+@article{eisner2002parameter,
+title={Parameter Estimation for Probabilistic Finite-State Transducers},
+author={Eisner, Jason},
+pages={1--8},
+year={2002}}
+
+@article{zhang2006synchronous,
+title={Synchronous Binarization for Machine Translation},
+author={Zhang, Hao and Huang, Liang and Gildea, Daniel and Knight, Kevin},
+pages={256--263},
+year={2006}}
+
+@article{Peter1993The,
+  title={The Mathematics of Statistical Machine Translation: Parameter Estimation},
+  author={Peter F Brown and Stephen Della A Pietra and Vincent Della J Pietra and Robert L Mercer},
+  journal={Computational Linguistics},
+  volume={19},
+  number={2},
+  pages={263-311},
+  year={1993},
+}
+
+@inproceedings{Blunsom2008A,
+  title={A Discriminative Latent Variable Model for Statistical Machine Translation},
+  author={Blunsom, Phil and Cohn, Trevor and Osborne, Miles},
+  booktitle={Acl, Meeting of the Association for Computational Linguistics, June, Columbus, Ohio, Usa},
+  year={2008},
+}
+
+@inproceedings{Blunsom2009A,
+  title={A Gibbs Sampler for Phrasal Synchronous Grammar Induction},
+  author={Blunsom, Phil and Cohn, Trevor and Dyer, Chris and Osborne, Miles},
+  booktitle={ACL 2009, Proceedings of the 47th Annual Meeting of the Association for Computational Linguistics and the 4th International Joint Conference on Natural Language Processing of the AFNLP, 2-7 August 2009, Singapore},
+  year={2009},
+}
+
+@inproceedings{Cohn2009A,
+  title={A Bayesian Model of Syntax-Directed Tree to String Grammar Induction},
+  author={Cohn, Trevor and Blunsom, Phil},
+  booktitle={Conference on Empirical Methods in Natural Language Processing},
+  year={2009},
+}
+
+@article{smith2006minimum,
+title={Minimum Risk Annealing for Training Log-Linear Models},
+author={Smith, David A and Eisner, Jason},
+pages={787--794},
+year={2006}}
+
+@article{li2009first-,
+title={First- and Second-Order Expectation Semirings with Applications to Minimum-Risk Training on Translation Forests},
+author={Li, Zhifei and Eisner, Jason},
+pages={40--51},
+year={2009}}
+
+@article{watanabe2007online,
+title={Online Large-Margin Training for Statistical Machine Translation},
+author={Watanabe, Taro and Suzuki, Jun and Tsukada, Hajime and Isozaki, Hideki},
+pages={764--773},
+year={2007}}
+
+@inproceedings{Chiang200911,
+  title={11,001 New Features for Statistical Machine Translation},
+  author={Chiang, David and Knight, Kevin and Wei, Wang},
+  booktitle={Human Language Technologies: Conference of the North American Chapter of the Association of Computational Linguistics},
+  year={2009},
+}
+
+@article{Hopkins2011Tuning,
+  title={Tuning as Ranking.},
+  author={Hopkins, Mark and May, Jonathan},
+  year={2011},
+}
+
+@article{XiaoA,
+  title={A Loss-Augmented Approach to Training Syntactic Machine Translation Systems},
+  author={Xiao, Tong and Wong, Derek F. and Zhu, Jingbo},
+  journal={IEEE/ACM Transactions on Audio Speech \& Language Processing},
+  volume={24},
+  number={11},
+  pages={2069-2083},
+}
+
+@article{de2010hierarchical,
+  title={Hierarchical phrase-based translation with weighted finite-state transducers and shallow-n grammars},
+  author={de Gispert, Adri{\`a} and Iglesias, Gonzalo and Blackwood, Graeme and R. Banga, Eduardo and Byrne, William},
+  journal={Computational linguistics},
+  volume={36},
+  number={3},
+  pages={505--533},
+  year={2010},
+  publisher={MIT Press}
+}
+
+@article{Casacuberta2004Machine,
+  title={Machine Translation with Inferred Stochastic Finite-State Transducers},
+  author={Casacuberta, Francisco and Vidal, Enrique},
+  journal={Computational Linguistics},
+  volume={30},
+  number={2},
+  pages={205-225},
+  year={2004},
+}
+
+@inproceedings{Yang2009Lattice,
+  title={Lattice-based System Combination for Statistical Machine Translation},
+  author={Yang, Feng and Yang, Liu and Mi, Haitao and Liu, Qun and Lü, Yajuan},
+  booktitle={Proceedings of the 2009 Conference on Empirical Methods in Natural Language Processing, EMNLP 2009, 6-7 August 2009, Singapore, A meeting of SIGDAT, a Special Interest Group of the ACL},
+  year={2009},
+}
+
+@inproceedings{He2008Indirect,
+  title={Indirect-HMM-based Hypothesis Alignment for Combining Outputs from Machine Translation Systems},
+  author={He, Xiaodong and Mei, Yang and Gao, Jianfeng and Nguyen, Patrick and Moore, Robert},
+  booktitle={2008 Conference on Empirical Methods in Natural Language Processing, EMNLP 2008, Proceedings of the Conference, 25-27 October 2008, Honolulu, Hawaii, USA, A meeting of SIGDAT, a Special Interest Group of the ACL},
+  year={2008},
+}
+
+@inproceedings{Li2009Incremental,
+  title={Incremental HMM Alignment for MT System Combination},
+  author={Li, Chi Ho and He, Xiaodong and Liu, Yupeng and Ning, Xi},
+  booktitle={International Joint Conference on Acl},
+  year={2009},
+}
+
+@inproceedings{Yang2009Joint,
+  title={Joint Decoding with Multiple Translation Models},
+  author={Yang, Liu and Mi, Haitao and Yang, Feng and Liu, Qun},
+  booktitle={International Joint Conference on Acl},
+  year={2009},
+}
+
+@inproceedings{Mu2009Collaborative,
+  title={Collaborative Decoding: Partial Hypothesis Re-ranking Using Translation Consensus between Decoders},
+  author={Mu, Li and Nan, Duan and Zhang, Dongdong and Li, Chi Ho and Ming, Zhou},
+  booktitle={ACL 2009, Proceedings of the 47th Annual Meeting of the Association for Computational Linguistics and the 4th International Joint Conference on Natural Language Processing of the AFNLP, 2-7 August 2009, Singapore},
+  year={2009},
+}
+
+@inproceedings{Tong2016Syntactic,
+  title={Syntactic Skeleton-Based Translation},
+  author={Tong, Xiao and Zhu, Jingbo and Zhang, Chunliang and Liu, Tongran},
+  year={2016},
+}
+
+@article{shen2008a,
+title={A New String-to-Dependency Machine Translation Algorithm with a Target Dependency Language Model},
+author={Shen, Libin and Xu, Jinxi and Weischedel, Ralph},
+pages={577--585},
+year={2008}}
+
+@article{TongLanguage,
+  title={Language Modeling for Syntax-Based Machine Translation Using Tree Substitution Grammars: A Case Study on Chinese-English Translation},
+  author={Tong and Xiao and Jingbo and Zhu and Muhua and Zhu},
+}
+
+@article{xiong2006maximum,
+title={Maximum Entropy Based Phrase Reordering Model for Statistical Machine Translation},
+author={Xiong, Deyi and Liu, Qun and Lin, Shouxun},
+pages={521--528},
+year={2006}}
+
+@article{taskar2005a,
+title={A Discriminative Matching Approach to Word Alignment},
+author={Taskar, Ben and Simon, Lacostejulien and Dan, Klein},
+pages={73--80},
+year={2005}}
+
+@inproceedings{ilprints729,
+       booktitle = {Seventh International Workshop on Parsing Technologies (IWPT- 2001)},
+           month = {October},
+           title = {Parsing and Hypergraphs},
+          author = {Dan Klein and Christopher D. Manning},
+            year = {2001},
+         journal = {Proceedings of the Seventh International Workshop on Parsing Technologies},
+        keywords = {parsing, algorithms, hypergraphs, natural language},
+             url = {http://ilpubs.stanford.edu:8090/729/},
+        abstract = {While symbolic parsers can be viewed as deduction systems, this view is less natural for probabilistic parsers.  We present a view of parsing as directed hypergraph analysis which naturally covers both symbolic and probabilistic parsing.  We illustrate the approach by showing how a dynamic extension of Dijkstra's algorithm can be used to construct a probabilistic chart parser with an $O(n^3)$ time bound for arbitrary PCFGs, while preserving as much of the flexibility of symbolic chart parsers as allowed by the inherent ordering of probabilistic dependencies.}
+}
+
+@inproceedings{och2004smorgasbord,
+  title={A smorgasbord of features for statistical machine translation},
+  author={Och, Franz Josef and Gildea, Daniel and Khudanpur, Sanjeev and Sarkar, Anoop and Yamada, Kenji and Fraser, Alexander and Kumar, Shankar and Shen, Libin and Smith, David A and Eng, Katherine and others},
+  booktitle={Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics: HLT-NAACL 2004},
+  pages={161--168},
+  year={2004}
+}
+
+@inproceedings{dreyer2015apro,
+  title={APRO: All-pairs ranking optimization for MT tuning},
+  author={Dreyer, Markus and Dong, Yuanzhe},
+  booktitle={Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
+  pages={1018--1023},
+  year={2015}
+}
+
+@inproceedings{bangalore2001computing,
+  title={Computing consensus translation from multiple machine translation systems},
+  author={Bangalore, B and Bordel, German and Riccardi, Giuseppe},
+  booktitle={IEEE Workshop on Automatic Speech Recognition and Understanding, 2001. ASRU'01.},
+  pages={351--354},
+  year={2001},
+  organization={IEEE}
+}
+
+@inproceedings{rosti2007combining,
+  title={Combining outputs from multiple machine translation systems},
+  author={Rosti, Antti-Veikko and Ayan, Necip Fazil and Xiang, Bing and Matsoukas, Spyros and Schwartz, Richard and Dorr, Bonnie},
+  booktitle={Human Language Technologies 2007: The Conference of the North American Chapter of the Association for Computational Linguistics; Proceedings of the Main Conference},
+  pages={228--235},
+  year={2007}
+}
+
+@inproceedings{charniak2001immediate,
+  title={Immediate-head parsing for language models},
+  author={Charniak, Eugene},
+  booktitle={Proceedings of the 39th Annual Meeting on Association for Computational Linguistics},
+  pages={124--131},
+  year={2001},
+  organization={Association for Computational Linguistics}
+}
+
+@inproceedings{och2004smorgasbord,
+  title={A smorgasbord of features for statistical machine translation},
+  author={Och, Franz Josef and Gildea, Daniel and Khudanpur, Sanjeev and Sarkar, Anoop and Yamada, Kenji and Fraser, Alexander and Kumar, Shankar and Shen, Libin and Smith, David A and Eng, Katherine and others},
+  booktitle={Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics: HLT-NAACL 2004},
+  pages={161--168},
+  year={2004}
+}
+
+@article{xiao2011language,
+  title={Language modeling for syntax-based machine translation using tree substitution grammars: A case study on chinese-english translation},
+  author={Xiao, Tong and Zhu, Jingbo and Zhu, Muhua},
+  journal={ACM Transactions on Asian Language Information Processing (TALIP)},
+  volume={10},
+  number={4},
+  pages={1--29},
+  year={2011},
+  publisher={ACM New York, NY, USA}
+}
+
+@inproceedings{mi2008forest,
+  title={Forest-based translation},
+  author={Mi, Haitao and Huang, Liang and Liu, Qun},
+  booktitle={Proceedings of ACL-08: HLT},
+  pages={192--199},
+  year={2008}
+}
+
+@inproceedings{zhu2011improving,
+  title={Improving decoding generalization for tree-to-string translation},
+  author={Zhu, Jingbo and Xiao, Tong},
+  booktitle={Proceedings of the 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies: short papers-Volume 2},
+  pages={418--423},
+  year={2011},
+  organization={Association for Computational Linguistics}
+}
+
+@article{knight1999decoding,
+  title={Decoding complexity in word-replacement translation models},
+  author={Knight, Kevin},
+  journal={Computational linguistics},
+  volume={25},
+  number={4},
+  pages={607--615},
+  year={1999},
+  publisher={MIT Press}
+}
+
+@article{xiao2013bagging,
+  title={Bagging and boosting statistical machine translation systems},
+  author={Xiao, Tong and Zhu, Jingbo and Liu, Tongran},
+  journal={Artificial Intelligence},
+  volume={195},
+  pages={496--527},
+  year={2013},
+  publisher={Elsevier}
+}
+
+@inproceedings{xiao2010empirical,
+  title={An empirical study of translation rule extraction with multiple parsers},
+  author={Xiao, Tong and Zhu, Jingbo and Zhang, Hao and Zhu, Muhua},
+  booktitle={Proceedings of the 23rd International Conference on Computational Linguistics: Posters},
+  pages={1345--1353},
+  year={2010},
+  organization={Association for Computational Linguistics}
+}
+
+@inproceedings{chiang2008online,
+  title={Online large-margin training of syntactic and structural translation features},
+  author={Chiang, David and Marton, Yuval and Resnik, Philip},
+  booktitle={Proceedings of the 2008 conference on empirical methods in natural language processing},
+  pages={224--233},
+  year={2008}
+}
+
+@inproceedings{gildea2003loosely,
+  title={Loosely tree-based alignment for machine translation},
+  author={Gildea, Daniel},
+  booktitle={Proceedings of the 41st annual meeting of the association for computational linguistics},
+  pages={80--87},
+  year={2003}
+}
+
+@inproceedings{koehn2000estimating,
+  title={Estimating word translation probabilities from unrelated monolingual corpora using the EM algorithm},
+  author={Koehn, Philipp and Knight, Kevin},
+  booktitle={AAAI/IAAI},
+  pages={711--715},
+  year={2000}
+}
+
+@inproceedings{koehn2007factored,
+  title={Factored translation models},
+  author={Koehn, Philipp and Hoang, Hieu},
+  booktitle={Proceedings of the 2007 joint conference on empirical methods in natural language processing and computational natural language learning (EMNLP-CoNLL)},
+  pages={868--876},
+  year={2007}
+}
+
+@article{powell1964an,
+title={An efficient method for finding the minimum of a function of several variables without calculating derivatives},
+author={Powell, M J D},
+journal={The Computer Journal},
+volume={7},
+number={2},
+pages={155--162},
+year={1964}}
+
+@article{tinsley2007robust,
+  title={Robust language pair-independent sub-tree alignment},
+  author={Tinsley, John and Zhechev, Ventsislav and Hearne, Mary and Way, Andy},
+  year={2007},
+  publisher={European Association for Machine Translation}
+}
+
+@inproceedings{sun2010exploring,
+  title={Exploring syntactic structural features for sub-tree alignment using bilingual tree kernels},
+  author={Sun, Jun and Zhang, Min and Tan, Chew Lim},
+  booktitle={Proceedings of the 48th annual meeting of the association for computational linguistics},
+  pages={306--315},
+  year={2010},
+  organization={Association for Computational Linguistics}
+}
+
+@inproceedings{sun2010discriminative,
+  title={Discriminative induction of sub-tree alignment using limited labeled data},
+  author={Sun, Jun and Zhang, Min and Tan, Chew Lim},
+  booktitle={Proceedings of the 23rd International Conference on Computational Linguistics},
+  pages={1047--1055},
+  year={2010},
+  organization={Association for Computational Linguistics}
+}
+%%%%% chapter 4------------------------------------------------------
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%%chapter5------------------------------------------------------------

--- a/Book/mt-book-xelatex.tex
+++ b/Book/mt-book-xelatex.tex
@@ -17,7 +17,7 @@
 \usepackage{ctex}
 \setCJKmainfont{SimSun}
 \setCJKmonofont{SimSun}
-\setmainfont{Times New Roman} 
+\setmainfont{Times New Roman}


 %----------------------------------------------------------------------------------------
@@ -58,7 +58,7 @@
 \node[inner sep=0pt] (background) at (current page.center) {\includegraphics[width=\paperwidth]{background.pdf}};
 \draw (current page.center) node [fill=ocre!30!white,fill opacity=0.6,text opacity=1,inner sep=1cm]{\Huge\centering\bfseries\sffamily\parbox[c][][t]{\paperwidth}{\centering 机器翻译：统计建模与深度学习方法\\[15pt] % Book title
 %{\Large 副标题是否需要}\\[20pt] % Subtitle
-{\huge 肖桐}}}; % Author name
+{\LARGE 肖桐\ \ 朱靖波}}}; % Author name
 \end{tikzpicture}
 \vfill
 \endgroup
@@ -71,15 +71,29 @@
 ~\vfill
 \thispagestyle{empty}

-\noindent Copyright \copyright\ 2020 Xiao Tong\\ % Copyright notice
+\noindent Copyright \copyright\ 2020 肖桐\ \ 朱靖波\\ % Copyright notice

-\noindent \textsc{Published by \red{Publisher}}\\ % Publisher
+\noindent \textsc{东北大学自然语言处理实验室\ /\ 小牛翻译}\\ % Publisher

 \noindent \textsc{\url{http://47.105.50.196/NiuTrans/Toy-MT-Introduction/tree/master/Book}}\\ % URL

 \noindent {\red{Licensed under the Creative Commons Attribution-NonCommercial 3.0 Unported License (the ``License''). You may not use this file except in compliance with the License. You may obtain a copy of the License at \url{http://creativecommons.org/licenses/by-nc/3.0}. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \textsc{``as is'' basis, without warranties or conditions of any kind}, either express or implied. See the License for the specific language governing permissions and limitations under the License.}}\\ % License information, replace this with your own license (if any)

-\noindent \textit{First printing, \red{March 2019}} % Printing/edition date
+\noindent \textit{First Edition, April 2020}
+
+%----------------------------------------------------------------------------------------
+%	ACKNOWLEDGE PAGE
+%----------------------------------------------------------------------------------------
+
+\newpage
+~\vfill
+\thispagestyle{empty}
+
+{\large
+\noindent {\color{red} 在此感谢所有为本书做出贡献的人} \\
+
+\noindent 曹润柘、曾信、孟霞、单韦乔、姜雨帆、王子扬、刘辉、许诺、李北、刘继强、张哲旸、周书涵、周涛、张裕浩、李炎洋，刘晓倩、牛蕊 \\
+}


 %----------------------------------------------------------------------------------------
@@ -98,12 +112,13 @@
 %	CHAPTERS
 %----------------------------------------------------------------------------------------

-\include{Chapter1/chapter1}
-\include{Chapter2/chapter2}
+%\include{Chapter1/chapter1}
+%\include{Chapter2/chapter2}
 \include{Chapter3/chapter3}
-\include{Chapter5/chapter5}
-\include{Chapter6/chapter6}
-\include{ChapterAppend/chapterappend}
+%\include{Chapter4/chapter4}
+%\include{Chapter5/chapter5}
+%\include{Chapter6/chapter6}
+%\include{ChapterAppend/chapterappend}




--- a/Book/mt-book.tex
+++ b/Book/mt-book.tex
 % !Mode:: "TeX:UTF-8"
 % !TEX encoding = UTF-8 Unicode

-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-% The Legrand Orange Book
-% LaTeX Template
-% Version 2.4 (26/09/2018)
-%
-% This template was downloaded from:
-% http://www.LaTeXTemplates.com
-%
-% Original author:
-% Mathias Legrand (legrand.mathias@gmail.com) with modifications by:
-% Vel (vel@latextemplates.com)
-%
-% License:
-% CC BY-NC-SA 3.0 (http://creativecommons.org/licenses/by-nc-sa/3.0/)
-%
-% Compiling this template:
-% This template uses biber for its bibliography and makeindex for its index.
-% When you first open the template, compile it from the command line with the
-% commands below to make sure your LaTeX distribution is configured correctly:
-%
-% 1) pdflatex main
-% 2) makeindex main.idx -s StyleInd.ist
-% 3) biber main
-% 4) pdflatex main x 2
-%
-% After this, when you wish to update the bibliography/index use the appropriate
-% command above and make sure to compile with pdflatex several times
-% afterwards to propagate your changes to the document.
-%
-% This template also uses a number of packages which may need to be
-% updated to the newest versions for the template to compile. It is strongly
-% recommended you update your LaTeX distribution if you have any
-% compilation errors.
-%
-% Important note:
-% Chapter heading images should have a 2:1 width:height ratio,
-% e.g. 920px width and 460px height.
-%
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
 %----------------------------------------------------------------------------------------
 %	PACKAGES AND OTHER DOCUMENT CONFIGURATIONS
 %----------------------------------------------------------------------------------------
@@ -61,7 +21,7 @@

 %\IfFileExists{C:/WINDOWS/win.ini}
 {\newcommand{\mycfont}{song}}
-%{\newcommand{\mycfont}{gbsn}}
+{\newcommand{\mycfont}{gbsn}}

 %公式字体设置为计算机现代罗马
 \AtBeginDocument{
@@ -98,7 +58,7 @@
 \node[inner sep=0pt] (background) at (current page.center) {\includegraphics[width=\paperwidth]{background.pdf}};
 \draw (current page.center) node [fill=ocre!30!white,fill opacity=0.6,text opacity=1,inner sep=1cm]{\Huge\centering\bfseries\sffamily\parbox[c][][t]{\paperwidth}{\centering 机器翻译：统计建模与深度学习方法\\[15pt] % Book title
 %{\Large 副标题是否需要}\\[20pt] % Subtitle
-{\huge 肖桐}}}; % Author name
+{\LARGE 肖桐\ \ 朱靖波}}}; % Author name
 \end{tikzpicture}
 \vfill
 \endgroup
@@ -111,38 +71,51 @@
 ~\vfill
 \thispagestyle{empty}

-\noindent Copyright \copyright\ 2020 Xiao Tong\\ % Copyright notice
+\noindent Copyright \copyright\ 2020 肖桐\ \ 朱靖波\\ % Copyright notice

-\noindent \textsc{Published by \red{Publisher}}\\ % Publisher
+\noindent \textsc{东北大学自然语言处理实验室\ /\ 小牛翻译}\\ % Publisher

 \noindent \textsc{\url{http://47.105.50.196/NiuTrans/Toy-MT-Introduction/tree/master/Book}}\\ % URL

 \noindent {\red{Licensed under the Creative Commons Attribution-NonCommercial 3.0 Unported License (the ``License''). You may not use this file except in compliance with the License. You may obtain a copy of the License at \url{http://creativecommons.org/licenses/by-nc/3.0}. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \textsc{``as is'' basis, without warranties or conditions of any kind}, either express or implied. See the License for the specific language governing permissions and limitations under the License.}}\\ % License information, replace this with your own license (if any)

-\noindent \textit{First printing, \red{March 2019}} % Printing/edition date
+\noindent \textit{First Edition, April 2020}

 %----------------------------------------------------------------------------------------
-%	TABLE OF CONTENTS
+%	ACKNOWLEDGE PAGE
 %----------------------------------------------------------------------------------------

-\chapterimage{chapter_head_1.pdf} %目录标题的图案
+\newpage
+~\vfill
+\thispagestyle{empty}

-\pagestyle{empty} % Disable headers and footers for the following pages
+{\large
+\noindent {\color{red} 在此感谢所有为本书做出贡献的人} \\

-\tableofcontents % 打印目录
+\noindent 曹润柘、曾信、孟霞、单韦乔、姜雨帆、王子扬、刘辉、许诺、李北、刘继强、张哲旸、周书涵、周涛、张裕浩、李炎洋，刘晓倩、牛蕊 \\
+}

-\cleardoublepage %保证章节页在奇数页

+%----------------------------------------------------------------------------------------
+%	TABLE OF CONTENTS
+%----------------------------------------------------------------------------------------
+%\usechapterimagefalse % If you don't want to include a chapter image, use this to toggle images off - it can be enabled later with \usechapterimagetrue
+\chapterimage{chapter_head_1.pdf} %目录标题的图案
+\pagestyle{empty} % Disable headers and footers for the following pages
+\tableofcontents % 打印目录
+\cleardoublepage %保证章节页在奇数页
 \pagestyle{fancy} % Enable headers and footers again

 %----------------------------------------------------------------------------------------
 %	CHAPTERS
 %----------------------------------------------------------------------------------------
-\include{Chapter1/chapter1}
+%\include{Chapter1/chapter1}
 \include{Chapter2/chapter2}
-\include{Chapter3/chapter3}
-\include{Chapter5/chapter5}
-\include{Chapter6/chapter6}
+%\include{Chapter3/chapter3}
+%\include{Chapter4/chapter4}
+%\include{Chapter5/chapter5}
+%\include{Chapter6/chapter6}
+%\include{ChapterAppend/chapterappend}


 %----------------------------------------------------------------------------------------

--- a/Book/structure.tex
+++ b/Book/structure.tex
@@ -74,7 +74,9 @@
 %	BIBLIOGRAPHY AND INDEX
 %----------------------------------------------------------------------------------------

-\usepackage[style=numeric,citestyle=numeric,sorting=nyt,sortcites=true,autopunct=true,babel=hyphen,hyperref=true,abbreviate=false,backref=true,backend=biber]{biblatex}
+\usepackage[style=numeric,citestyle=numeric,sorting=nyt,sortcites=true,maxbibnames=4,minbibnames=3,autopunct=true,babel=hyphen,hyperref=true,abbreviate=false,backref=true,backend=biber]{biblatex}
+%maxbibnames 设置参考文献最多显示作者数目
+%minbibnames 如果作者数目超过maxbibnames，则只显示minbibnames个作者
 \addbibresource{bibliography.bib} % BibTeX bibliography file
 \defbibheading{bibempty}{}

@@ -454,6 +456,12 @@ innerbottommargin=5pt]{cBox}
 \fi}

 %----------------------------------------------------------------------------------------
+%	SPECIAL FONTS
+%----------------------------------------------------------------------------------------
+
+\newcommand\bfnew[1]{\sffamily\bfseries{#1}}
+
+%----------------------------------------------------------------------------------------
 %	CHAPTER HEADINGS
 %----------------------------------------------------------------------------------------

@@ -519,6 +527,7 @@ innerbottommargin=5pt]{cBox}
 \bookmarksetup{
 open,
 numbered,
+depth=2, %设置PDF的书签级别,2显示到subsection,3显示到subsubsection
 addtohook={%
 \ifnum\bookmarkget{level}=0 % chapter
 \bookmarksetup{bold}%
@@ -562,6 +571,12 @@ addtohook={%
 \usepackage{appendix}
 \usepackage{pgfplots}
 \usepackage{tikz}
+
+%----------------------------------------------------------------------------------------
+%	Chapter 4
+%----------------------------------------------------------------------------------------
+\usepackage{pgffor}%图片中使用\foreach语句
+
 %----------------------------------------------------------------------------------------
 %	Chapter 6
 %----------------------------------------------------------------------------------------