\node [anchor=north,cirnode] (c2) at ([xshift=0em,yshift=-5.5em]c1.south) {};
\node [anchor=west,cirnode] (c3) at ([xshift=0.6em,yshift=0em]c2.east) {};
\node [anchor=west,cirnode] (c4) at ([xshift=0.6em,yshift=0em]c3.east) {};
\node [anchor=west,cirnode] (c5) at ([xshift=0.6em,yshift=0em]c4.east) {};
\node [anchor=west,cirnode] (c6) at ([xshift=0.6em,yshift=0em]c5.east) {};
\node [anchor=west,cirnode] (c7) at ([xshift=0.6em,yshift=0em]c6.east) {};
\node [anchor=south,inner sep=0.1pt,minimum height=1.6em,minimum width=1em] (b1) at ([xshift=0em,yshift=0.5em]c2.north) {};
\node [anchor=south,inner sep=0.1pt,minimum height=4.1em,minimum width=1em] (b2) at ([xshift=0em,yshift=0.5em]c3.north) {};
\node [anchor=south,inner sep=0.1pt,minimum height=0.8em,minimum width=1em] (b3) at ([xshift=0em,yshift=0.5em]c4.north) {};
\node [anchor=south,inner sep=0.1pt,minimum height=0.4em,minimum width=1em] (b4) at ([xshift=0em,yshift=0.5em]c5.north) {};
\node [anchor=south,inner sep=0.1pt,minimum height=0.15em,minimum width=1em] (b5) at ([xshift=0em,yshift=0.5em]c6.north) {};
\node [anchor=south,inner sep=0.1pt,minimum height=0.15em,minimum width=1em] (b6) at ([xshift=0em,yshift=0.5em]c7.north) {};
\node [anchor=south,inner sep=0.1pt,minimum height=1.6em,minimum width=1em] (b1) at ([xshift=0em,yshift=-5em]c1.south) {};
\node [anchor=south,inner sep=0.1pt,minimum height=4.1em,minimum width=1em] (b2) at ([xshift=1.67em,yshift=0em]b1.south) {};
\node [anchor=south,inner sep=0.1pt,minimum height=0.8em,minimum width=1em] (b3) at ([xshift=1.67em,yshift=0em]b2.south) {};
\node [anchor=south,inner sep=0.1pt,minimum height=0.4em,minimum width=1em] (b4) at ([xshift=1.67em,yshift=0em]b3.south) {};
\node [anchor=south,inner sep=0.1pt,minimum height=0.15em,minimum width=1em] (b5) at ([xshift=1.67em,yshift=0em]b4.south) {};
\node [anchor=south,inner sep=0.1pt,minimum height=0.15em,minimum width=1em] (b6) at ([xshift=1.67em,yshift=0em]b5.south) {};
{\scriptsize
\node [anchor=center] (n1) at ([xshift=0em,yshift=-1em]c2.south){\color{orange}Bush};
\node [anchor=west] (n2) at ([xshift=-0.2em,yshift=0em]n1.east){held};
\node [anchor=west] (n3) at ([xshift=0.35em,yshift=0em]n2.east){a};
\node [anchor=west] (n4) at ([xshift=0.5em,yshift=0em]n3.east){\color{blue!60}talk};
\node [anchor=west] (n5) at ([xshift=-0.3em,yshift=0em]n4.east){with};
\node [anchor=west] (n6) at ([xshift=-0.3em,yshift=0em]n5.east){Sharon};
\node [anchor=center] (n1) at ([xshift=0em,yshift=-1em]b1.south){\color{orange}It};
\node [anchor=west] (n2) at ([xshift=0.5em,yshift=0em]n1.east){is};
\node [anchor=west] (n3) at ([xshift=0.5em,yshift=-0.1em]n2.east){a};
\node [anchor=west] (n4) at ([xshift=0.5em,yshift=0.1em]n3.east){\color{blue}nice};
\node [anchor=west] (n5) at ([xshift=0em,yshift=-0.1em]n4.east){day};
\node [anchor=west] (n6) at ([xshift=0em,yshift=0em]n5.east){today};
}
%\node [anchor=west,show] (s1) at (-0.5em,-5.7em){};
%\node [anchor=west,show] (s2) at (-0.2em,-5.6em){};
%\node [anchor=west,show] (s3) at (1.1em,-5.5em){};
%\node [anchor=west,show] (s11) at (1.9em,-5em){};
%\node [anchor=west,show] (s4) at (3em,-4em){};
%\node [anchor=west,show] (s5) at (3.7em,-3em){};
%\node [anchor=west,show] (s12) at (4.2em,-2.2em){};
%\node [anchor=west,show] (s6) at (5.3em,-1.4em){};
%\node [anchor=west,show] (s7) at (6.3em,-2em){};
%\node [anchor=west,show] (s13) at (7.4em,-3em){};
%\node [anchor=west,show] (s8) at (8.4em,-3.9em){};
%\node [anchor=west,show] (s9) at (8.9em,-4.5em){};
%\node [anchor=west,show] (s10) at (9.3em,-5em){};
%\draw[-,blue!60,thick] (-0.5em,-5.7em)..controls (-0.2em,-5.6em) and (1.1em,-5.5em)..(1.9em,-5em)..controls (3em,-4em) and (3.7em,-3em)..(4.2em,-2.2em)..controls (5.3em,-1.4em) and (6.3em,-2em)..(7.4em,-3em)..controls (8.4em,-3.9em) and (8.9em,-4.5em)..(9.3em,-5em);
%\draw[-,blue!60,thick] (-1em,-6em)..controls (0em,-5.7em) and (1em,-5em)..(1.6em,-4.3em)..controls (5.3em,1em) and (7.4em,-2em)..(9.3em,-5em);
\draw[-,blue!60,thick] ([xshift=-1em,yshift=-4.7em]c1.south)..controls (3.8em,-6em) and (3.9em,3.6em)..([xshift=9.3em,yshift=-4.3em]c1.south);
\draw [-,blue!60,thick] ([xshift=-1em,yshift=-4.7em]c1.south) cos(2em,-4em) sin (4.4em,-1.7em) cos(7.3em,-4em) sin([xshift=9.3em,yshift=-4.7em]c1.south);
%\draw[-,blue!60,thick] ([xshift=-1em,yshift=-4.7em]c1.south)..controls (3.8em,-6em) and (3.9em,3.6em)..([xshift=9.3em,yshift=-4.3em]c1.south);
\node [anchor=north] (l1) at ([xshift=1em,yshift=-1em]n3.south){\small{(b)高斯分布}};
\node [anchor=north west] (line7) at ([yshift=-0.1em]line6.south west) {4: \quad\quad\textbf{foreach}$k =1$ to $K$\textbf{do}};
\node [anchor=north west] (line8) at ([yshift=-0.1em]line7.south west) {5: \quad\quad\quad\footnotesize{$c_{\mathbb{E}}(\seq{s}_u|\seq{t}_v;\seq{s}^{[k]},\seq{t}^{[k]})=\sum\limits_{j=1}^{|\seq{s}^{[k]}|}\delta(s_j,s_u)\sum\limits_{i=0}^{|\seq{t}^{[k]}|}\delta(t_i,t_v)\cdot\frac{f(s_u|t_v)}{\sum_{i=0}^{l}f(s_u|t_i)}$}\normalsize{}};
\node [anchor=north west] (line9) at ([yshift=-0.1em]line8.south west) {6: \quad\quad\textbf{foreach}$t_v$ appears at least one of $\{\seq{t}^{[1]},...,\seq{t}^{[K]}\}$\textbf{do}};
\node [anchor=north west] (line10) at ([yshift=-0.1em]line9.south west) {7: \quad\quad\quad$\lambda_{t_v}^{'}=\sum_{s_u}\sum_{k=1}^{K} c_{\mathbb{E}}(s_u|t_v;\seq{s}^{[k]},\seq{t}^{[k]})$};
\node [anchor=north west] (line10) at ([yshift=-0.1em]line9.south west) {7: \quad\quad\quad$\lambda_{t_v}^{'}=\sum_{s'_u}\sum_{k=1}^{K} c_{\mathbb{E}}(s'_u|t_v;\seq{s}^{[k]},\seq{t}^{[k]})$};
\node [anchor=north west] (line11) at ([yshift=-0.1em]line10.south west) {8: \quad\quad\quad\textbf{foreach}$s_u$ appears at least one of $\{\seq{s}^{[1]},...,\seq{s}^{[K]}\}$\textbf{do}};
\node [anchor=north west] (line12) at ([yshift=-0.1em]line11.south west) {9: \quad\quad\quad\quad$f(s_u|t_v)=\sum_{k=1}^{K} c_{\mathbb{E}}(s_u|t_v;\seq{s}^{[k]},\seq{t}^{[k]})\cdot(\lambda_{t_v}^{'})^{-1}$};
\node [anchor=north west] (line13) at ([yshift=-0.1em]line12.south west) {10: \textbf{return}$f(\cdot|\cdot)$};
\parinterval 在神经网络的有监督学习中,训练模型的数据是由输入和正确答案所组成的样本构成的。假设有多个输入样本$\{{\mathbi{x}}_1,{\mathbi{x}}_2,\dots,{\mathbi{x}}_n\}$,每一个${\mathbi{x}}_i $都对应一个正确答案$\widetilde{\mathbi{y}}_i $,$\{{\mathbi{x}}_i,\widetilde{\mathbi{y}}_i\}$就构成一个优化神经网络的{\small\sffamily\bfseries{训练数据集合}}\index{训练数据集合}(Training Data Set)\index{Training Data Set}。对于一个神经网络模型${\mathbi{y}}=f({\mathbi{x}})$,每个${\mathbi{x}}_i $也会有一个输出${\mathbi{y}}_i $。如果可以度量正确答案$\widetilde{\mathbi{y}}_i $和神经网络输出${\mathbi{y}}_i$之间的偏差,进而通过调整网络参数减小这种偏差,就可以得到更好的模型。
\parinterval 在神经网络的有监督学习中,训练模型的数据是由输入和正确答案所组成的样本构成的。假设有多个输入样本$\{{\mathbi{x}}^{[1]}\dots,{\mathbi{x}}^{[n]}\}$,每一个${\mathbi{x}}^{[i]}$都对应一个正确答案${\mathbi{y}}^{[i]}$,$\{{\mathbi{x}}^{[i]},{\mathbi{y}}^{[i]}\}$就构成一个优化神经网络的{\small\sffamily\bfseries{训练数据集合}}\index{训练数据集合}(Training Data Set)\index{Training Data Set}。对于一个神经网络模型${\mathbi{y}}=f({\mathbi{x}})$,每个${\mathbi{x}}^{[i]}$也会有一个输出${\hat{\mathbi{y}}}^{[i]}$。如果可以度量正确答案${\mathbi{y}}^{[i]}$和神经网络输出${\hat{\mathbi{y}}}^{[i]}$之间的偏差,进而通过调整网络参数减小这种偏差,就可以得到更好的模型。
\parinterval 这里用$ Loss(\widetilde{\mathbi{y}}_i,{\mathbi{y}}_i)$表示网络输出${\mathbi{y}}_i $相对于答案$\widetilde{\mathbi{y}}_i$的损失,简记为$ L $。表\ref{tab:9-3}是几种常见损失函数的定义。需要注意的是,没有一种损失函数可以适用于所有的问题。损失函数的选择取决于许多因素,包括:数据中是否有离群点、模型结构的选择、是否易于找到函数的导数以及预测结果的置信度等。对于相同的神经网络,不同的损失函数会对训练得到的模型产生不同的影响。对于新的问题,如果无法找到已有的、适合于该问题的损失函数,研究人员也可以自定义损失函数。因此设计新的损失函数也是神经网络中有趣的研究方向。
\parinterval 这里用$ Loss({\mathbi{y}}^{[i]},{\hat{\mathbi{y}}}^{[i]})$表示网络输出${\hat{\mathbi{y}}}^{[i]}$相对于答案${\mathbi{y}}^{[i]}$的损失,简记为$ L $。表\ref{tab:9-3}是几种常见损失函数的定义。需要注意的是,没有一种损失函数可以适用于所有的问题。损失函数的选择取决于许多因素,包括:数据中是否有离群点、模型结构的选择、是否易于找到函数的导数以及预测结果的置信度等。对于相同的神经网络,不同的损失函数会对训练得到的模型产生不同的影响。对于新的问题,如果无法找到已有的、适合于该问题的损失函数,研究人员也可以自定义损失函数。因此设计新的损失函数也是神经网络中有趣的研究方向。
\parinterval 对于第$ i $个样本$({\mathbi{x}}_i,\widetilde{\mathbi{y}}_i)$,把损失函数$ L(\widetilde{\mathbi{y}}_i,{\mathbi{y}}_i)$看作是参数$\bm\theta$的函数\footnote{为了简化描述,可以用$
\parinterval 对于第$ i $个样本$({\mathbi{x}}^{[i]},{\mathbi{y}}^{[i]})$,把损失函数$ L({\mathbi{y}}^{[i]},{\hat{\mathbi{y}}}^{[i]})$看作是参数$\bm\theta$的函数\footnote{为了简化描述,可以用$
\item WMT由Special Interest Group for Machine Translation(SIGMT)主办,会议自2006年起每年召开一次,是一个涉及机器翻译多种任务的综合性会议,包括多领域翻译评测任务、质量评价任务以及其他与机器翻译的相关任务(如文档对齐评测等)。现在WMT已经成为机器翻译领域的旗舰评测会议,很多研究工作都以WMT评测结果作为基准。WMT评测涉及的语言范围较广,包括英语、德语、芬兰语、捷克语、罗马尼亚语等十多种语言,翻译方向一般以英语为核心,探索英语与其他语言之间的翻译性能,领域包括新闻、信息技术、生物医学。最近,也增加了无指导机器翻译等热门问题。WMT在评价方面类似于CCMT,也采用人工评价与自动评价相结合的方式,自动评价的指标一般为BLEU、TER 等。此外,WMT公开了所有评测数据,因此也经常被机器翻译相关人员所使用。更多WMT的机器翻译评测相关信息可参考SIGMT官网:\url{http://www.sigmt.org/}。