17图

c4d60e0e · 孟霞 · b26c251b · c4d60e0e · c4d60e0e · c4d60e0e
Commit c4d60e0e authored Jan 13, 2021 by 孟霞
--- a/Chapter17/Figures/figure-cache.tex
+++ b/Chapter17/Figures/figure-cache.tex
 \begin{tikzpicture}
 %\tikzstyle{every node}=[scale=0.8]
-	\tikzstyle{prob}=[minimum width=0.4em, fill=blue!15,inner sep=0pt]
+	\tikzstyle{prob}=[minimum width=0.4em, fill=blue!20,inner sep=0pt]
-\node[draw,fill=yellow!15,inner sep=0pt,minimum width=3em,minimum height=5em](key) at (0,0){};
+\node[draw,fill=yellow!30,inner sep=0pt,minimum width=3em,minimum height=5em](key) at (0,0){};
 \draw[] ([yshift=0.5em]key.180) -- ([yshift=0.5em]key.0);
 \draw[] ([yshift=1.5em]key.180) -- ([yshift=1.5em]key.0);
 \draw[] ([yshift=-0.5em]key.180) -- ([yshift=-0.5em]key.0);
 \draw[] ([yshift=-1.5em]key.180) -- ([yshift=-1.5em]key.0);
-\node[draw,fill=ugreen!15,inner sep=0pt,minimum width=3em,minimum height=5em](value) at (3em,0){};
+\node[draw,fill=green!30!white,inner sep=0pt,minimum width=3em,minimum height=5em](value) at (3em,0){};
 \draw[] ([yshift=0.5em]value.180) -- ([yshift=0.5em]value.0);
 \draw[] ([yshift=1.5em]value.180) -- ([yshift=1.5em]value.0);
 \draw[] ([yshift=-0.5em]value.180) -- ([yshift=-0.5em]value.0);
@@ -17,10 +17,10 @@
 \node[anchor=south,font=\footnotesize,inner sep=0pt] at ([yshift=0.2em]value.north){value};
 \node[anchor=south,font=\footnotesize,inner sep=0pt] (cache)at ([yshift=2em,xshift=1.5em]key.north){\small\bfnew{缓存}};
-\node[draw,anchor=east,thick,minimum size=1.8em,fill=orange!15] (dt) at ([yshift=2.1em,xshift=-4em]key.west){${\mathbi{d}}_{t}$};
+\node[draw,anchor=east,thick,minimum size=1.8em,fill=orange!30] (dt) at ([yshift=2.1em,xshift=-4em]key.west){${\mathbi{d}}_{t}$};
 \node[anchor=north,font=\footnotesize] (readlab) at ([xshift=2.8em,yshift=0.3em]dt.north){\red{读取}};
-\node[draw,anchor=east,thick,minimum size=1.8em,fill=ugreen!15] (st) at ([xshift=-3.7em]dt.west){${\mathbi{s}}_{t}$};
+\node[draw,anchor=east,thick,minimum size=1.8em,fill=green!30!white] (st) at ([xshift=-3.7em]dt.west){${\mathbi{s}}_{t}$};
-\node[draw,anchor=east,thick,minimum size=1.8em,fill=red!15] (st2) at ([xshift=-0.85em,yshift=3.5em]dt.west){$ \widetilde{\mathbi{s}}_{t}$};
+\node[draw,anchor=east,thick,minimum size=1.8em,fill=red!30] (st2) at ([xshift=-0.85em,yshift=3.5em]dt.west){$ \widetilde{\mathbi{s}}_{t}$};
 %\node[draw,anchor=north,circle,inner sep=0pt, minimum size=1.2em,fill=yellow] (add) at ([yshift=-1em]st2.south){+};
 \node[draw,thick,inner sep=0pt, minimum size=1.1em, circle] (add) at ([yshift=-1.5em]st2.south){};
@@ -29,12 +29,12 @@
 \node[anchor=north,inner sep=0pt,font=\footnotesize,text=red] at ([xshift=-0em,yshift=-0.5em]add.south){融合};
-\node[draw,anchor=east,thick,minimum size=1.8em,fill=yellow!15] (ct) at ([xshift=-2em,yshift=-3.5em]st.west){$ {\mathbi{C}}_{t}$};
+\node[draw,anchor=east,thick,minimum size=1.8em,fill=yellow!30] (ct) at ([xshift=-2em,yshift=-3.5em]st.west){$ {\mathbi{C}}_{t}$};
 \node[anchor=north,font=\footnotesize] (matchlab) at ([xshift=6.7em,yshift=-0.1em]ct.north){\red{匹配}};
 \node[anchor=east] (y) at ([xshift=-6em,yshift=1em]st.west){$\mathbi{y}_{t-1}$};
-\node[draw,anchor=east,minimum width=7em,minimum height=1.4em,fill=blue!20] (output) at ([xshift=-2.6em,yshift=2.6em]st2.west){};
+\node[draw,anchor=east,minimum width=7em,minimum height=1.4em,fill=blue!30] (output) at ([xshift=-2.6em,yshift=2.6em]st2.west){};
 \node[anchor=south] (yt) at ([yshift=4.2em]output.north){$\mathbi{y}_{t}$};
 \draw[] ([xshift=-0.7em]output.90) -- ([xshift=-0.7em]output.-90);

--- a/Chapter17/Figures/figure-layer.tex
+++ b/Chapter17/Figures/figure-layer.tex
@@ -3,38 +3,38 @@
 \tikzstyle{every node}=[scale=0.8]
 \foreach \x in {1,2,3,4}
-	\node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=red!30,rounded corners=1pt] (c1_\x) at (0em+2em*\x, 0em){};
+	\node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=red!35,rounded corners=1pt] (c1_\x) at (0em+2em*\x, 0em){};
 \node[anchor=north] (hpre) at ([yshift=1.8em]c1_1.north) {${\mathbi{h}}^ {\textrm{pre}k}$};
 \foreach \x in {1,2,3}
-	\node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=red!30,rounded corners=1pt] (c2_\x) at (11em+2em*\x, 0em){};
+	\node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=red!35,rounded corners=1pt] (c2_\x) at (11em+2em*\x, 0em){};
 \node[anchor=north] (hpre) at ([yshift=1.8em]c2_1.north) {${\mathbi{h}}^ {\textrm{pre}1}$};
 \foreach \x in {1,2,3,4,5}
-	\node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=red!30,rounded corners=1pt] (c3_\x) at (18.4em+2em*\x, 0em){};,minimum width=1em
+	\node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=red!35,rounded corners=1pt] (c3_\x) at (18.4em+2em*\x, 0em){};,minimum width=1em
 \foreach \x in {1,2,3,4,5}
-	\node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=blue!30,rounded corners=1pt] (c4_\x) at (18.4em+2em*\x, 10.4em){};
+	\node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=blue!35,rounded corners=1pt] (c4_\x) at (18.4em+2em*\x, 10.4em){};
-%\node[inner sep=0pt,minimum size=1em,fill=ugreen,circle] (c5) at (9em, 7em){};
+%\node[inner sep=0pt,minimum size=1em,fill=ugreen,circle,thick] (c5) at (9em, 7em){};
-\node[draw,inner sep=0pt,minimum size=1.2em,fill=green!20,circle] (qs) at (18.6em, 6.4em){};
+\node[draw,inner sep=0pt,minimum size=1.2em,fill=green!30!white,circle,thick] (qs) at (18.6em, 6.4em){};
-\node[draw,inner sep=0pt,minimum size=1.2em,fill=green!20,circle] (qw) at (18.6em, 4.4em){};
+\node[draw,inner sep=0pt,minimum size=1.2em,fill=green!30!white,circle,thick] (qw) at (18.6em, 4.4em){};
-\node[anchor=north] (qslab) at ([xshift=-0.8em,yshift=1em]qs.north) {${\mathbi{q}}^s$};
+\node[anchor=north] (qslab) at ([xshift=-0.8em,yshift=1em]qs.north) {${\mathbi{q}}_s$};
-\node[anchor=north] (qwlab) at ([xshift=-0.8em,yshift=1em]qw.north) {${\mathbi{q}}^w$};
+\node[anchor=north] (qwlab) at ([xshift=-0.8em,yshift=1em]qw.north) {${\mathbi{q}}_w$};
-\node[draw,thick,inner sep=0pt, minimum size=1.2em, circle] (sigma) at (24.4em, 8em){};
+\node[draw,thick,inner sep=0pt, minimum size=1.2em, circle,thick] (sigma) at (24.4em, 8em){};
 \draw[-,thick] (sigma.0) -- (sigma.180);
 \draw[-,thick] (sigma.90) -- (sigma.-90);
-\node[draw,fill=orange!30,inner sep=0pt, minimum size=1.2em, circle] (add1) at (5em, 3em){};
+\node[draw,fill=orange!35,inner sep=0pt, minimum size=1.2em, circle,thick] (add1) at (5em, 3em){};
-\node[draw,fill=orange!30,inner sep=0pt, minimum size=1.2em, circle] (add2) at (15em, 3em){};
+\node[draw,fill=orange!35,inner sep=0pt, minimum size=1.2em, circle,thick] (add2) at (15em, 3em){};
-\node[draw,fill=orange!30,inner sep=0pt, minimum size=1.2em, circle] (add3) at (10em, 5.2em){};
+\node[draw,fill=orange!35,inner sep=0pt, minimum size=1.2em, circle,thick] (add3) at (10em, 5.2em){};
-\node[anchor=north] (cond) at ([xshift=-1em,yshift=0.5em]add3.north) {${\mathbi{d}}$};
+\node[anchor=north] (cond) at ([xshift=-1.2em,yshift=0.5em]add3.north) {${\mathbi{d}_t}$};
 \node[anchor=north] (cons1) at ([xshift=-1em,yshift=0.5em]add2.north) {${\mathbi{s}}^1$};
 \node[anchor=north] (consj) at ([xshift=-1em,yshift=0.5em]add1.north) {${\mathbi{s}}^k$};
 \begin{pgfonlayer}{background}
-\node[draw,rounded corners=2pt,drop shadow,fill=white, minimum width=8.3em][fit=(c1_1)(c1_4)](box1){};
+\node[draw,rounded corners=2pt,drop shadow,fill=white, minimum width=8.3em,thick][fit=(c1_1)(c1_4)](box1){};
-\node[draw,rounded corners=2pt,drop shadow,fill=white,minimum width=6.4em][fit=(c2_1)(c2_3)](box2){};
+\node[draw,rounded corners=2pt,drop shadow,fill=white,minimum width=6.4em,thick][fit=(c2_1)(c2_3)](box2){};
-\node[draw,rounded corners=2pt,drop shadow,fill=white,minimum width=10.5em][fit=(c3_1)(c3_5)](box3){};
+\node[draw,rounded corners=2pt,drop shadow,fill=white,minimum width=10.5em,thick][fit=(c3_1)(c3_5)](box3){};
-\node[draw,rounded corners=2pt,drop shadow,fill=white,minimum width=10.3em][fit=(c4_1)(c4_5)](box4){};
+\node[draw,rounded corners=2pt,drop shadow,fill=white,minimum width=10.3em,thick][fit=(c4_1)(c4_5)](box4){};
 %\node[draw,rounded corners=2pt,inner xsep=6pt,drop shadow,fill=white][fit=(c5)](box5){};
 \end{pgfonlayer}
@@ -44,15 +44,15 @@
 \node[text=ublue] at (10.5em, 0em) {\small\bfnew{...}};
 \node[text=ublue] (hh) at (-0.8em, 0em) {\small\bfnew{...}};
-\draw[->,thick, out=70, in=-120] ([yshift=0.1em]c1_1.90) to ([yshift=-0.1em]add1.-90);
+\draw[->,thick, out=70, in=-120] ([yshift=0.1em]c1_1.90) to ([yshift=-0.1em]add1.-120);
 %node[xshift=-0.4em,yshift=1.2em]{$ \mathbi{h}^ {\textrm j}$}
-\draw[->,thick, out=80, in=-100] ([yshift=0.1em]c1_2.90) to ([yshift=-0.1em]add1.-90);
+\draw[->,thick, out=80, in=-100] ([yshift=0.1em]c1_2.90) to ([yshift=-0.1em]add1.-100);
-\draw[->,thick, out=100, in=-80] ([yshift=0.1em]c1_3.90) to ([yshift=-0.1em]add1.-90);
+\draw[->,thick, out=100, in=-80] ([yshift=0.1em]c1_3.90) to ([yshift=-0.1em]add1.-80);
-\draw[->,thick, out=110, in=-60] ([yshift=0.1em]c1_4.90) to ([yshift=-0.1em]add1.-90);
+\draw[->,thick, out=110, in=-60] ([yshift=0.1em]c1_4.90) to ([yshift=-0.1em]add1.-60);
-\draw[->,thick, out=70, in=-110] ([yshift=0.1em]c2_1.90) to ([yshift=-0.1em]add2.-90);
+\draw[->,thick, out=70, in=-110] ([yshift=0.1em]c2_1.90) to ([yshift=-0.1em]add2.-110);
 \draw[->,thick, out=90, in=-90] ([yshift=0.1em]c2_2.90) to ([yshift=-0.1em]add2.-90);
-\draw[->,thick, out=110, in=-70] ([yshift=0.1em]c2_3.90) to ([yshift=-0.1em]add2.-90);
+\draw[->,thick, out=110, in=-70] ([yshift=0.1em]c2_3.90) to ([yshift=-0.1em]add2.-70);
 \draw[->,thick, out=30, in=-130] ([yshift=0.1em]add1.90) to ([yshift=-0.1em]add3.-120);
@@ -67,20 +67,20 @@
 \draw[->,thick] ([yshift=0.1em]sigma.90) -- ([yshift=-0.1em]n2.-90);
 \draw[->,thick] ([yshift=0.1em]n2.90) -- node[right]{$ \widetilde{\mathbi{h}}_{\textrm{t}}$}([yshift=2em]n2.90);
-\draw[decorate,decoration={brace, mirror},gray, thick] ([yshift=-2em]hh.-180) -- node[font=\scriptsize,text=black,below]{前几个句子}([yshift=-2em]box2.0);
+\draw[decorate,decoration={brace, mirror},gray, thick] ([yshift=-2em]hh.-180) -- node[font=\footnotesize,text=black,below]{前几个句子}([yshift=-2em]box2.0);
-\draw[decorate,decoration={brace, mirror},gray, thick] ([yshift=-2em]box3.-180) -- node[font=\scriptsize,text=black,below]{当前句子}([yshift=-2em]box3.0);
+\draw[decorate,decoration={brace, mirror},gray, thick] ([yshift=-2em]box3.-180) -- node[font=\footnotesize,text=black,below]{当前句子}([yshift=-2em]box3.0);
 \draw[->, thick, rounded corners=2pt] ([yshift=0.1em]add3.90) -- ([yshift=2.1em]add3.90) -- ([xshift=-0.1em]sigma.180);
 %annotation
-\node[fill=red!30,rounded corners=1pt,inner sep=0pt,minimum size=1em] (a1) at (2em,-4.5em) {};
+%\node[fill=red!30,rounded corners=1pt,inner sep=0pt,minimum size=1em] (a1) at (2em,-4.5em) {};
-\node[anchor=west,font=\footnotesize] (w1) at ([xshift=0.4em]a1.east) {编码表示};
+%\node[anchor=west,font=\footnotesize] (w1) at ([xshift=0.4em]a1.east) {编码表示};
-\node[anchor=west,fill=orange!30,rounded corners=1pt,inner sep=0pt,minimum size=1em] (a2) at ([xshift=2em]w1.east) {};
+%\node[anchor=west,fill=orange!30,rounded corners=1pt,inner sep=0pt,minimum size=1em] (a2) at ([xshift=2em]w1.east) {};
-\node[anchor=west,font=\footnotesize] (w2)at ([xshift=0.4em]a2.east) {层次注意力};
+%\node[anchor=west,font=\footnotesize] (w2)at ([xshift=0.4em]a2.east) {层次注意力};
-\node[anchor=west,fill=blue!30,rounded corners=1pt,inner sep=0pt,minimum size=1em] (a3) at ([xshift=2em]w2.east) {};
+%\node[anchor=west,fill=blue!30,rounded corners=1pt,inner sep=0pt,minimum size=1em] (a3) at ([xshift=2em]w2.east) {};
-\node[anchor=west,font=\footnotesize] at ([xshift=0.4em]a3.east) {融合上下文信息的编码表示};
+%\node[anchor=west,font=\footnotesize] at ([xshift=0.4em]a3.east) {融合上下文信息的编码表示};
 \end{tikzpicture}

--- a/Chapter17/Figures/figure-multiencoder.tex
+++ b/Chapter17/Figures/figure-multiencoder.tex
 \definecolor{color1}{rgb}{1,0.725,0.058}
-\tikzstyle{coder} = [rectangle,thick,rounded corners,minimum width=2.8cm,minimum height=1.1cm,text centered,draw=black!70,fill=blue!10,drop shadow]
+\tikzstyle{coder} = [rectangle,thick,rounded corners,minimum width=2.8cm,minimum height=1.1cm,text centered,draw=black!,fill=blue!30,drop shadow]
-\tikzstyle{attention} = [rectangle,thick,rounded corners,minimum width=2.6cm,minimum height=0.9cm,text centered,draw=black!70,fill=green!25,drop shadow]
+\tikzstyle{attention} = [rectangle,thick,rounded corners,minimum width=2.6cm,minimum height=0.9cm,text centered,draw=black,fill=green!30!white,drop shadow]
 \begin{tikzpicture}[node distance = 0,scale = 0.7]
 \tikzstyle{every node}=[scale=0.7]
 \node(encoder_c)[coder]{\large{编码器}};
-\node(encoder_s)[coder, right of = encoder_c, xshift=3.5cm, fill=red!25]{\large{编码器}};
+\node(encoder_s)[coder, right of = encoder_c, xshift=3.5cm, fill=red!30]{\large{编码器}};
 \node(h_pre)[above of = encoder_c, yshift=1.3cm,scale=1.3]{${\mathbi{h}}_{\rm pre}$};
 \node(h)[above of = encoder_s, yshift=1.3cm,scale=1.3]{$\mathbi{h}$};
 \node(cir)[circle,very thick, right of = h, draw=black!90,minimum width=0.5cm,xshift=1.1cm]{};
@@ -15,7 +15,7 @@
 \node(current)[below of = encoder_s, yshift=-1.3cm]{\large{当前句子}};
 \node(attention_left)[attention, above of = encoder_c, xshift=2.4cm,yshift=3.1cm]{\large{注意力机制}};
 \node(d)[above of = attention_left, yshift=1.1cm,scale=1.3]{$\mathbi{d}$};
-\node(ground)[rectangle, thick, rounded corners, minimum width=5cm, minimum height=5.5cm, right of = encoder_s, xshift=4.4cm,yshift=2.2cm, draw=black!70, fill=gray!10]{};
+\node(ground)[rectangle, thick, rounded corners, minimum width=5cm, minimum height=5.5cm, right of = encoder_s, xshift=4.4cm,yshift=2.2cm, draw=black, fill=gray!10]{};
 \node(decoder)[above of = encoder_s, xshift=3.1cm]{\large{解码器}};
 \node(attention_right)[attention, right of = attention_left, xshift=5.4cm,yshift=-0.4cm]{\large{注意力机制}};
 \node(target)[right of = current, xshift=5.3cm]{\large{目标语言句子（位置$j$之前）}};

--- a/Chapter17/Figures/figure-twodecoding.tex
+++ b/Chapter17/Figures/figure-twodecoding.tex
-\tikzstyle{encoder} = [rectangle,thick,rounded corners,minimum width=4.3em,minimum height=2.2em,text centered,draw=black!70,fill=red!25]
+\tikzstyle{encoder} = [rectangle,thick,rounded corners,minimum width=4.3em,minimum height=2.2em,text centered,draw=black,fill=red!30]
-\tikzstyle{decoder} = [rectangle,thick,rounded corners,minimum width=4.3em,minimum height=2.2em,text centered,draw=black!70,fill=blue!15]
+\tikzstyle{decoder} = [rectangle,thick,rounded corners,minimum width=4.3em,minimum height=2.2em,text centered,draw=black,fill=blue!25]
-\tikzstyle{attention} = [rectangle,thick,rounded corners,minimum width=2.6cm,minimum height=2.2em,text centered,draw=black!70,fill=green!25]
+\tikzstyle{attention} = [rectangle,thick,rounded corners,minimum width=2.6cm,minimum height=2.2em,text centered,draw=black,fill=green!25]
 \begin{tikzpicture}[node distance = 0,scale = 0.75]
 \tikzstyle{every node}=[scale=0.75]
@@ -17,6 +17,7 @@
 \node(attention_below)[attention, right of = decoder_right, xshift=4.8cm]{\normalsize{注意力机制}};
 \node(attention_above)[attention, above of = attention_below, yshift=1.6cm]{\normalsize{注意力机制}};
 \node(ffn)[attention, above of = attention_above, yshift=1.6cm, fill=blue!8]{\normalsize{前馈神经网络}};
+\node [right of = attention_above, xshift=2.35cm,yshift=2.5cm,scale=1.2]{\footnotesize{解码器}};
 \node(n)[right of = attention_above, xshift=2.4cm,scale=1.5]{$\times N$};
 \node(text_2)[above of = ffn, yshift=1.9cm]{\normalsize{基于上下文的修正结果}};
 \node(title_2)[right of = title_1, xshift=6.3cm]{\large\bfnew{二阶段翻译}};

--- a/Chapter17/chapter17.tex
+++ b/Chapter17/chapter17.tex
@@ -453,7 +453,7 @@
 \section{篇章级翻译}
-\parinterval 目前大多数机器翻译系统是句子级的。由于缺少了对篇章上下文信息的建模，在需要依赖上下文的翻译场景中其翻译效果总是不尽人意。篇章级翻译的目的就是对篇章上下文信息进行建模，进而改善机器翻译在整个篇章上的翻译质量。篇章级翻译的概念在很早就已经出现\upcite{DBLP:journals/ac/Bar-Hillel60}，随着近几年神经机器翻译取得了巨大进展，篇章级神经机器翻译也成为了重要的方向\upcite{DBLP:journals/corr/abs-1912-08494,DBLP:journals/corr/abs-1901-09115}。基于此，本节将对篇章级神经机器翻译的若干问题展开讨论。
+\parinterval 目前大多数机器翻译系统是句子级的。由于缺少了对篇章上下文信息的建模，在需要依赖上下文的翻译场景中，模型的翻译效果总是不尽人意。篇章级翻译的目的就是对篇章上下文信息进行建模，进而改善机器翻译在整个篇章上的翻译质量。篇章级翻译的概念在很早就已经出现\upcite{DBLP:journals/ac/Bar-Hillel60}，随着近几年神经机器翻译取得了巨大进展，篇章级神经机器翻译也成为了重要的方向\upcite{DBLP:journals/corr/abs-1912-08494,DBLP:journals/corr/abs-1901-09115}。基于此，本节将对篇章级神经机器翻译的若干问题展开讨论。
 %----------------------------------------------------------------------------------------
 %    NEW SUB-SECTION
@@ -477,9 +477,9 @@
 \parinterval  不过由于不同语言的特性多种多样，上下文信息在篇章级翻译中的作用也不尽相同。比如，在德语中名词是分词性的，因此在代词翻译的过程中需要根据其先行词的词性进行区分，而这种现象在其它不区分名词词性的语言中是不存在的。这意味着篇章级翻译在不同的语种中可能对应不同的上下文现象。
-\parinterval 正是由于这种上下文现象的多样性，使评价篇章级翻译模型的性能变得相对困难。目前篇章级机器翻译主要针对一些常见的上下文现象，比如代词翻译、省略、连接和词汇衔接等，而{\chapterfour}介绍的BLEU等通用自动评价指标通常对这些上下文依赖现象不敏感，篇章级翻译需要采用一些专用方法来对这些具体现象进行评价。
+\parinterval 正是由于这种上下文现象的多样性，使评价篇章级翻译模型的性能变得相对困难。目前篇章级机器翻译主要针对一些常见的上下文现象进行优化，比如代词翻译、省略、连接和词汇衔接等，而{\chapterfour}介绍的BLEU等通用自动评价指标通常对这些上下文依赖现象不敏感，因此篇章级翻译需要采用一些专用方法来对这些具体现象进行评价。
-\parinterval 从对篇章信息建模的角度看，在统计机器翻译时代就已经有大量的研究工作。这些工作大多针对某一具体的上下文现象，比如，篇章结构\upcite{DBLP:conf/anlp/MarcuCW00,foster2010translating,DBLP:conf/eacl/LouisW14}、代词回指\upcite{DBLP:conf/iwslt/HardmeierF10,DBLP:conf/wmt/NagardK10,DBLP:conf/eamt/LuongP16,}、词汇衔接\upcite{tiedemann2010context,DBLP:conf/emnlp/GongZZ11,DBLP:conf/ijcai/XiongBZLL13,xiao2011document}和篇章连接词\upcite{DBLP:conf/sigdial/MeyerPZC11,DBLP:conf/hytra/MeyerP12,}等。区别于篇章级统计机器翻译，篇章级神经机器翻译不需要针对某一具体的上下文现象构造相应的特征，而是通过翻译模型本身从上下文句子中抽取和融合的上下文信息。通常情况下，篇章级机器翻译可以采用局部建模的手段将前一句或者周围几句作为上下文送入模型。针对需要长距离上下文的情况，也可以使用全局建模的手段直接从篇章中所有句子中提取上下文信息。近几年多数研究工作都在探索更有效的局部建模或全局建模方法，主要包括改进输入\upcite{DBLP:conf/discomt/TiedemannS17,DBLP:conf/naacl/BawdenSBH18,DBLP:conf/wmt/GonzalesMS17,DBLP:journals/corr/abs-1910-07481}、多编码器结构\upcite{DBLP:journals/corr/JeanLFC17,DBLP:journals/corr/abs-1805-10163,DBLP:conf/emnlp/ZhangLSZXZL18}、层次结构\upcite{DBLP:conf/naacl/MarufMH19,DBLP:conf/acl/HaffariM18,DBLP:conf/emnlp/YangZMGFZ19,DBLP:conf/ijcai/ZhengYHCB20}以及基于缓存的方法\upcite{DBLP:conf/coling/KuangXLZ18,DBLP:journals/tacl/TuLSZ18}等。
+\parinterval 在统计机器翻译时代就已经有大量的研究工作专注于篇章信息的建模，这些工作大多针对某一具体的上下文现象，比如，篇章结构\upcite{DBLP:conf/anlp/MarcuCW00,foster2010translating,DBLP:conf/eacl/LouisW14}、代词回指\upcite{DBLP:conf/iwslt/HardmeierF10,DBLP:conf/wmt/NagardK10,DBLP:conf/eamt/LuongP16,}、词汇衔接\upcite{tiedemann2010context,DBLP:conf/emnlp/GongZZ11,DBLP:conf/ijcai/XiongBZLL13,xiao2011document}和篇章连接词\upcite{DBLP:conf/sigdial/MeyerPZC11,DBLP:conf/hytra/MeyerP12,}等。区别于篇章级统计机器翻译，篇章级神经机器翻译不需要针对某一具体的上下文现象构造相应的特征，而是通过翻译模型本身从上下文句子中抽取和融合的上下文信息。通常情况下，篇章级机器翻译可以采用局部建模的手段将前一句或者周围几句作为上下文送入模型。针对需要长距离上下文的情况，也可以使用全局建模的手段直接从篇章的所有句子中提取上下文信息。近几年多数研究工作都在探索更有效的局部建模或全局建模方法，主要包括改进输入\upcite{DBLP:conf/discomt/TiedemannS17,DBLP:conf/naacl/BawdenSBH18,DBLP:conf/wmt/GonzalesMS17,DBLP:journals/corr/abs-1910-07481}、多编码器结构\upcite{DBLP:journals/corr/JeanLFC17,DBLP:journals/corr/abs-1805-10163,DBLP:conf/emnlp/ZhangLSZXZL18}、层次结构\upcite{DBLP:conf/naacl/MarufMH19,DBLP:conf/acl/HaffariM18,DBLP:conf/emnlp/YangZMGFZ19,DBLP:conf/ijcai/ZhengYHCB20}以及基于缓存的方法\upcite{DBLP:conf/coling/KuangXLZ18,DBLP:journals/tacl/TuLSZ18}等。
 \parinterval 此外，篇章级机器翻译面临的另外一个挑战是数据稀缺。篇章级机器翻译所需要的双语数据需要保留篇章边界，数量相比于句子级双语数据要少很多。除了在之前提到的端到端方法中采用预训练或者参数共享的手段（见{\chaptersixteen}），也可以采用新的建模手段来缓解数据稀缺问题。这类方法通常将篇章级翻译流程进行分离：先训练一个句子级的翻译模型，再通过一些额外的模块来引入上下文信息。比如，在句子级翻译模型的推断过程中，通过在目标端结合篇章级语言模型引入上下文信息\upcite{DBLP:conf/discomt/GarciaCE19,DBLP:journals/tacl/YuSSLKBD20,DBLP:journals/corr/abs-2010-12827}，或者基于句子级的翻译结果，使用两阶段解码等手段引入上下文信息，进而对句子级翻译结果进行修正\upcite{DBLP:conf/aaai/XiongH0W19,DBLP:conf/acl/VoitaST19,DBLP:conf/emnlp/VoitaST19}。
@@ -491,7 +491,7 @@
 \parinterval BLEU等自动评价指标能够在一定程度上反映译文的整体质量，但是并不能有效地评估篇章级翻译模型的性能。这是由于很多标准测试集中需要篇章上下文的情况比例相对较少。而且，$n$-gram的匹配很难检测到一些具体的语言现象，这使得研究人员很难通过BLEU得分来判断篇章级翻译模型的效果。
-\parinterval 为此，研究人员总结了机器翻译任务中存在的上下文现象，并基于此设计了相应的自动评价指标。比如针对篇章中代词的翻译问题，首先借助词对齐工具确定源语言中的代词在译文和参考答案中的对应位置，然后通过计算译文中代词的准确率和召回率等指标对代词翻译质量进行评价\upcite{DBLP:conf/iwslt/HardmeierF10,DBLP:conf/discomt/WerlenP17}。针对篇章中的词汇衔接，使用{\small\sffamily\bfseries{词汇链}}\index{词汇链}（Lexical Chain\index{Lexical Chain}）\footnote{词汇链指篇章中语义相关的词所构成的序列。}等来获取能够反映词汇衔接质量的分数，然后通过加权的方式与常规的BLEU或METEOR等指标结合在一起\upcite{DBLP:conf/emnlp/WongK12,DBLP:conf/discomt/GongZZ15}。针对篇章中的连接词，使用候选词典和词对齐工具对源语中连接词的正确翻译结果进行计数，计算其准确率\upcite{DBLP:conf/cicling/HajlaouiP13}。
+\parinterval 为此，研究人员总结了机器翻译任务中存在的上下文现象，并基于此设计了相应的自动评价指标。比如针对篇章中代词的翻译问题，首先借助词对齐工具确定源语言中的代词在译文和参考答案中的对应位置，然后通过计算译文中代词的准确率和召回率等指标对代词翻译质量进行评价\upcite{DBLP:conf/iwslt/HardmeierF10,DBLP:conf/discomt/WerlenP17}。针对篇章中的词汇衔接，使用{\small\sffamily\bfseries{词汇链}}\index{词汇链}（Lexical Chain\index{Lexical Chain}）\footnote{词汇链指篇章中语义相关的词所构成的序列。}等来获取能够反映词汇衔接质量的分数，然后通过加权的方式与常规的BLEU或METEOR等指标结合在一起\upcite{DBLP:conf/emnlp/WongK12,DBLP:conf/discomt/GongZZ15}。针对篇章中的连接词，使用候选词典和词对齐工具对源文中连接词的正确翻译结果进行计数，计算其准确率\upcite{DBLP:conf/cicling/HajlaouiP13}。
 \parinterval 除了直接对译文打分，也有一些工作针对特有的上下文现象手工构造了相应的测试套件用于评价翻译质量。测试套件中每一个测试样例都包含一个正确翻译的结果，以及多个错误结果，一个理想的翻译模型应该对正确的翻译结果评价最高，排名在所有错误结果之上,此时就可以根据模型是否能挑选出正确翻译结果来评估其性能。这种方法可以很好地衡量翻译模型在某一特定上下文现象上的处理能力，比如词义消歧\upcite{DBLP:conf/wmt/RiosMS18}、代词翻译\upcite{DBLP:conf/naacl/BawdenSBH18,DBLP:conf/wmt/MullerRVS18}和一些衔接问题\upcite{DBLP:conf/acl/VoitaST19}等。但是该方法也存在使用范围受限于测试集的语种和规模的缺点，因此扩展性较差。
@@ -537,7 +537,7 @@
 \label{eg:17-3-1}
 \end{example}
-\parinterval 其他改进输入的做法相比于拼接的方法要复杂一些，首先需要对篇章进行处理，得到词汇链\footnote{词汇链指篇章中语义相关的词所构成的序列。}或者篇章嵌入等信息\upcite{DBLP:conf/wmt/GonzalesMS17,DBLP:journals/corr/abs-1910-07481}，然后将融入这些信息，与当前句子一起送入模型中。目前，这种预先提取篇章信息的方法是否适合机器翻译还有待论证。
+\parinterval 其他改进输入的做法相比于拼接的方法要复杂一些，首先需要对篇章进行处理，得到词汇链\footnote{词汇链指篇章中语义相关的词所构成的序列。}或者篇章嵌入等信息\upcite{DBLP:conf/wmt/GonzalesMS17,DBLP:journals/corr/abs-1910-07481}，然后将这些信息与当前句子一起送入模型中。目前，这种预先提取篇章信息的方法是否适合机器翻译还有待论证。
 %----------------------------------------------------------------------------------------
 %    NEW SUBSUB-SECTION