Commit a9a48f6b by 孟霞

合并分支 'master' 到 'mengxia'

Master

查看合并请求 !107
parents db208900 a2313fde
......@@ -45,7 +45,7 @@
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter1/Figures/figure-Required-parts-of-MT}
\input{./Chapter1/Figures/figure-required-parts-of-mt}
\caption{机器翻译系统的组成}
\label{fig:1-2}
\end{figure}
......@@ -220,7 +220,7 @@
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter1/Figures/figure-Example-RBMT}
\input{./Chapter1/Figures/figure-example-rbmt}
\setlength{\belowcaptionskip}{-1.5em}
\caption{基于规则的机器翻译的示例图(左:规则库;右:规则匹配结果)}
\label{fig:1-8}
......@@ -290,7 +290,7 @@
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter1/Figures/figure-Example-SMT}
\input{./Chapter1/Figures/figure-example-smt}
\caption{统计机器翻译的示例图(左:语料资源;中:翻译模型与语言模型;右:翻译假设与翻译引擎)}
\label{fig:1-11}
\end{figure}
......@@ -311,7 +311,7 @@
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter1/Figures/figure-Example-NMT}
\input{./Chapter1/Figures/figure-example-nmt}
\caption{神经机器翻译的示例图(左:编码器-解码器网络;右:编码器示例网络)}
\label{fig:1-12}
\end{figure}
......
......@@ -35,8 +35,8 @@
%----------------------------------------------
\begin{figure}[htp]
\centering
\subfigure[机器翻译系统被看作一个黑盒] {\input{./Chapter2/Figures/figure-MT-system-as-a-black-box} }
\subfigure[机器翻系统 = 前/后处理 + 翻译引擎] {\input{./Chapter2/Figures/figure-MT=language-analysis+translation-engine}}
\subfigure[机器翻译系统被看作一个黑盒] {\input{./Chapter2/Figures/figure-mt-system-as-a-black-box} }
\subfigure[机器翻系统 = 前/后处理 + 翻译引擎] {\input{./Chapter2/Figures/figure-mt=language-analysis+translation-engine}}
\caption{机器翻译系统的结构}
\label{fig:2-1}
\end{figure}
......@@ -125,7 +125,7 @@ F(X)=\int_{-\infty}^x f(x)dx
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter2/Figures/figure-Probability-density-function&Distribution-function}
\input{./Chapter2/Figures/figure-probability-density-function&distribution-function}
\caption{一个概率密度函数(左)与其对应的分布函数(右)}
\label{fig:2-3}
\end{figure}
......@@ -310,7 +310,7 @@ F(X)=\int_{-\infty}^x f(x)dx
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter2/Figures/figure-Self-information-function}
\input{./Chapter2/Figures/figure-self-information-function}
\caption{自信息函数$\textrm{I}(x)$关于$\textrm{P}(x)$的曲线}
\label{fig:2-6}
\end{figure}
......@@ -429,7 +429,7 @@ F(X)=\int_{-\infty}^x f(x)dx
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter2/Figures/figure-Example-of-word-segmentation-based-on-dictionary}
\input{./Chapter2/Figures/figure-example-of-word-segmentation-based-on-dictionary}
\caption{基于词典进行分词的实例}
\label{fig:2-8}
\end{figure}
......@@ -638,7 +638,7 @@ F(X)=\int_{-\infty}^x f(x)dx
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter2/Figures/figure-examples-of-Chinese-word-segmentation-based-on-1-gram-model}
\input{./Chapter2/Figures/figure-examples-of-chinese-word-segmentation-based-on-1-gram-model}
\caption{基于1-gram语言模型的中文分词实例}
\label{fig:2-17}
\end{figure}
......
......@@ -170,7 +170,7 @@
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter3/Figures/figure-processes-SMT}
\input{./Chapter3/Figures/figure-processes-smt}
\caption{简单的统计机器翻译流程}
\label{fig:3-5}
\end{figure}
......@@ -472,7 +472,7 @@ g(\mathbf{s},\mathbf{t}) \equiv \prod_{j,i \in \widehat{A}}{\textrm{P}(s_j,t_i)}
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter3/Figures/figure-greedy-MT-decoding-pseudo-code}
\input{./Chapter3/Figures/figure-greedy-mt-decoding-pseudo-code}
\caption{贪婪的机器翻译解码算法的伪代码}
\label{fig:3-10}
\end{figure}
......@@ -483,8 +483,8 @@ g(\mathbf{s},\mathbf{t}) \equiv \prod_{j,i \in \widehat{A}}{\textrm{P}(s_j,t_i)}
%----------------------------------------------
\begin{figure}[htp]
\centering
\subfigure{\input{./Chapter3/Figures/greedy-MT-decoding-process-1}}
\subfigure{\input{./Chapter3/Figures/greedy-MT-decoding-process-3}}
\subfigure{\input{./Chapter3/Figures/greedy-mt-decoding-process-1}}
\subfigure{\input{./Chapter3/Figures/greedy-mt-decoding-process-3}}
\setlength{\belowcaptionskip}{14.0em}
\caption{贪婪的机器翻译解码过程实例}
\label{fig:3-11}
......
......@@ -29,7 +29,7 @@
\node [anchor=center,draw,circle,inner sep=1.5pt,blue!30,fill=blue!30] (f11) at ([xshift=0em,yshift=23em]y2.north) {};
\node[anchor=south] (f12) at ([xshift=5em,yshift=-0.5em]f11.south) {\scriptsize{fixed}};
\node [anchor=center,draw,circle,inner sep=1.5pt,purple!30,fill=ugreen!50] (f21) at ([xshift=0em,yshift=-4em]f11.north) {};
\node [anchor=center,draw,circle,inner sep=1.5pt,ugreen!50,fill=ugreen!50] (f21) at ([xshift=0em,yshift=-4em]f11.north) {};
\node[anchor=south] (f22) at ([xshift=8.5em,yshift=-0.5em]f21.south) {\scriptsize{valid choices}};
\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (f31) at ([xshift=0em,yshift=-4em]f21.north) {};
\node[anchor=south] (f32) at ([xshift=9.5em,yshift=-0.5em]f31.south) {\scriptsize{invalid choices}};
......
......@@ -26,7 +26,7 @@
\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r33) at (2,2) {};
\node [anchor=center,draw,circle,inner sep=1.5pt,red!30,fill=red!30] (r35) at (2,1) {};
\node [anchor=center,draw,circle,inner sep=1.5pt,purple!30,fill=purple!30] (r34) at (2,3) {};
\node [anchor=center,draw,circle,inner sep=1.5pt,ugreen!50,fill=ugreen!50] (r34) at (2,3) {};
\draw [-,very thick,red!50, dashed] (1,2) -- (2,4) -- (3,2) -- (2,3) -- (1,2) -- (3,2) -- (2,1) -- (1,2) -- (2,0) -- (3,2);
\draw [-,very thick,blue!50] (0,1) -- (1,2);
......
......@@ -105,7 +105,7 @@
\end{flushright}
\begin{center}
\vspace{-1em}
(a)节点对齐矩阵(1-best vs. Matrix)
\footnotesize{(a)节点对齐矩阵(1-best vs. Matrix)}
\end{center}
\begin{center}
......@@ -147,7 +147,7 @@
\begin{center}
\vspace{-2em}
(b) 抽取得到的树到树翻译规则
\footnotesize{(b) 抽取得到的树到树翻译规则}
\end{center}
\end{center}
......@@ -1653,7 +1653,7 @@ r_9: \quad \textrm{IP(}\textrm{NN}_1\ \textrm{VP}_2) \rightarrow \textrm{S(}\tex
\subsubsection{树到串翻译规则}
\parinterval 基于树结构的文法可以很好的表示两个树片段之间的对应关系,即树到树翻译规则。那树到串翻译规则该如何表示呢?实际上,基于树结构的文法也同样适用于树到串模型。比如,如下是一个树片段到串的映射,它可以被看作是树到串规则的一种表示。
\parinterval 基于树结构的文法可以很好的表示两个树片段之间的对应关系,即树到树翻译规则。那树到串翻译规则该如何表示呢?实际上,基于树结构的文法也同样适用于树到串模型。比如,\ref{fig:4-49}是一个树片段到串的映射,它可以被看作是树到串规则的一种表示。
%----------------------------------------------
\begin{figure}[htp]
......@@ -2162,7 +2162,7 @@ d_1 = {d'} \circ {r_5}
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter4/Figures/structure-of-Chart}
\input{./Chapter4/Figures/structure-of-chart}
\caption{Chart结构}
\label{fig:4-65}
\end{figure}
......@@ -2252,7 +2252,7 @@ d_1 = {d'} \circ {r_5}
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter4/Figures/content-of-Chart-in-tree-based-decoding}
\input{./Chapter4/Figures/content-of-chart-in-tree-based-decoding}
\caption{基于树的解码中Chart的内容}
\label{fig:4-68}
\end{figure}
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -4,12 +4,13 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{tikzpicture}
\setlength{\base}{0.9cm}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=0.5\base,draw,inner sep=0pt,outer sep=0pt]
\tikzstyle{wordnode} = [font=\tiny]
\tikzstyle{wordnode} = [font=\scriptsize]
% RNN translation model
\begin{scope}[local bounding box=RNNMT]
......@@ -23,8 +24,12 @@
\node[wordnode,anchor=east] (init2) at ([xshift=-3.0em]init.west){};
{
\node[rnnnode,fill=purple] (repr) at (enc4) {};
\node[wordnode] (label) at ([xshift=3.5em]enc4.east) {源语言句子表示};
\draw[->,dashed,thick] (label.west) -- (enc4.east);
\node[wordnode] (label) at ([yshift=2.5em]enc4.north) {
\begin{tabular}{c}
源语言句\\子表示
\end{tabular}
};
\draw[->,dashed,thick] (label.south) -- (enc4.north);
}
\node[wordnode,below=0pt of eemb1,font=\scriptsize] (encwordin1) {};
......@@ -37,7 +42,7 @@
% RNN Decoder
\foreach \x in {1,2,...,4}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([xshift=0.0em,yshift=3.0em]enc\x.north) {};
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([xshift=9.0em,yshift=-3.5em]enc\x.north) {};
\foreach \x in {1,2,...,4}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec\x) at ([yshift=0.5\base]demb\x.north) {};
\foreach \x in {1,2,...,4}
......@@ -86,7 +91,7 @@
\draw[-latex'] (dec\x.east) to (dec\y.west);
}
\coordinate (bridge) at ([yshift=-1.15\base]demb2);
\draw[-latex'] (enc4.north) .. controls +(north:0.4\base) and +(east:0.5\base) .. (bridge) .. controls +(west:2.4\base) and +(west:0.5\base) .. (dec1.west);
\draw[-latex'] (enc4.east) -- (dec1.west);
\end{scope}
\end{tikzpicture}
......@@ -124,3 +129,4 @@
......@@ -50,7 +50,7 @@
\vspace{-1.0em}
\footnotesize{
\begin{eqnarray}
\textbf{C}(\textrm{''机票''}) & = & 0.2 \times \textbf{h}(\textrm{``沈阳''}) + 0.3 \times \textbf{h}(\textrm{``到''}) + \nonumber \\
\tilde{\mathbf{h}} (\textrm{''机票''}) & = & 0.2 \times \textbf{h}(\textrm{``沈阳''}) + 0.3 \times \textbf{h}(\textrm{``到''}) + \nonumber \\
& & 0.1 \times \textbf{h}(\textrm{``广州''}) + ... + 0.3 \times \textbf{h}(\textrm{``机票''}) \nonumber
\end{eqnarray}
}
\ No newline at end of file
......@@ -16,7 +16,7 @@ Jenson Button was denied his 100th race for McLaren after an ERS prevented him f
};
%译文1--------------mt1
\node[font=\small] (mt1) at ([xshift=0em,yshift=-16.8em]original0.south) {系统生成\quad};
\node[font=\small] (mt-2) at ([xshift=0em,yshift=-0.5em]mt1.south) {\quad 的摘要:};
\node[font=\small] (mt-2) at ([xshift=0em,yshift=-0.5em]mt1.south) {的摘要:\quad };
\node[font=\small] (ts1) at ([xshift=0em,yshift=-3em]original1.south) {
\begin{tabular}[t]{l}
\parbox{32em}{
......
......@@ -4,28 +4,28 @@
\begin{tikzpicture}
\begin{scope}
\node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white] (Linear0) at (0,0) {\tiny{Linear}};
\node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt] (Linear01) at ([shift={(-0.2em,-0.2em)}]Linear0.south west) {\tiny{Linear}};
\node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear02) at ([shift={(-0.2em,-0.2em)}]Linear01.south west) {\tiny{Linear}};
\node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white] (Linear0) at (0,0) {\footnotesize{Linear}};
\node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt] (Linear01) at ([shift={(-0.2em,-0.2em)}]Linear0.south west) {\footnotesize{Linear}};
\node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear02) at ([shift={(-0.2em,-0.2em)}]Linear01.south west) {\footnotesize{Linear}};
\node [anchor=north] (Q) at ([xshift=0em,yshift=-1em]Linear02.south) {\footnotesize{$\mathbf{Q}$}};
\node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white] (Linear1) at ([xshift=1.5em]Linear0.east) {\tiny{Linear}};
\node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt] (Linear11) at ([shift={(-0.2em,-0.2em)}]Linear1.south west) {\tiny{Linear}};
\node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear12) at ([shift={(-0.2em,-0.2em)}]Linear11.south west) {\tiny{Linear}};
\node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white] (Linear1) at ([xshift=1.5em]Linear0.east) {\footnotesize{Linear}};
\node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt] (Linear11) at ([shift={(-0.2em,-0.2em)}]Linear1.south west) {\footnotesize{Linear}};
\node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear12) at ([shift={(-0.2em,-0.2em)}]Linear11.south west) {\footnotesize{Linear}};
\node [anchor=north] (K) at ([xshift=0em,yshift=-1em]Linear12.south) {\footnotesize{$\mathbf{K}$}};
\node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white] (Linear2) at ([xshift=1.5em]Linear1.east) {\tiny{Linear}};
\node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt] (Linear21) at ([shift={(-0.2em,-0.2em)}]Linear2.south west) {\tiny{Linear}};
\node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear22) at ([shift={(-0.2em,-0.2em)}]Linear21.south west) {\tiny{Linear}};
\node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white] (Linear2) at ([xshift=1.5em]Linear1.east) {\footnotesize{Linear}};
\node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt] (Linear21) at ([shift={(-0.2em,-0.2em)}]Linear2.south west) {\footnotesize{Linear}};
\node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear22) at ([shift={(-0.2em,-0.2em)}]Linear21.south west) {\footnotesize{Linear}};
\node [anchor=north] (V) at ([xshift=0em,yshift=-1em]Linear22.south) {\footnotesize{$\mathbf{V}$}};
\node [anchor=south,draw=black!30,minimum width=9em,inner sep=4pt,fill=blue!20!white] (Scale) at ([yshift=1em]Linear1.north) {\tiny{Scaled Dot-Product Attention}};
\node [anchor=south west,draw=black!50,minimum width=9em,fill=blue!20!white,draw,inner sep=4pt] (Scale1) at ([shift={(-0.2em,-0.2em)}]Scale.south west) {\tiny{Scaled Dot-Product Attention}};
\node [anchor=south west,fill=blue!20!white,draw,minimum width=9em,inner sep=4pt] (Scale2) at ([shift={(-0.2em,-0.2em)}]Scale1.south west) {\tiny{Scaled Dot-Product Attention}};
\node [anchor=south,draw=black!30,minimum width=12em,minimum height=2em,inner sep=4pt,fill=blue!20!white] (Scale) at ([yshift=1em]Linear1.north) {\footnotesize{Scaled Dot-Product Attention}};
\node [anchor=south west,draw=black!50,minimum width=12em,minimum height=2em,fill=blue!20!white,draw,inner sep=4pt] (Scale1) at ([shift={(-0.2em,-0.2em)}]Scale.south west) {\footnotesize{Scaled Dot-Product Attention}};
\node [anchor=south west,fill=blue!20!white,draw,minimum width=12em,minimum height=2em,inner sep=4pt] (Scale2) at ([shift={(-0.2em,-0.2em)}]Scale1.south west) {\footnotesize{Scaled Dot-Product Attention}};
\node [anchor=south,draw,minimum width=4em,inner sep=4pt,fill=yellow!30] (Concat) at ([yshift=1em]Scale2.north) {\tiny{Concat}};
\node [anchor=south,draw,minimum width=4em,inner sep=4pt,fill=yellow!30] (Concat) at ([yshift=1em]Scale2.north) {\footnotesize{Concat}};
\node [anchor=south,draw,minimum width=4em,inner sep=4pt,fill=ugreen!20!white] (Linear) at ([yshift=1em]Concat.north) {\tiny{Linear}};
\node [anchor=south,draw,minimum width=4em,inner sep=4pt,fill=ugreen!20!white] (Linear) at ([yshift=1em]Concat.north) {\footnotesize{Linear}};
\draw [->] ([yshift=0.1em]Q.north) -- ([yshift=-0.1em]Linear02.south);
......
......@@ -23,11 +23,11 @@
\draw [->] ([yshift=0.1em]Scale3.north) -- ([yshift=-0.1em]Mask.south);
\draw [->] ([yshift=0.1em]Mask.north) -- ([yshift=-0.1em]SoftMax.south);
\draw [->] ([yshift=0.1em]SoftMax.north) -- ([yshift=0.9em]SoftMax.north);
\draw [->] ([yshift=0.1em]V1.north) -- ([yshift=9.1em]V1.north);
\draw [->] ([yshift=0.1em]V1.north) -- ([yshift=9.3em]V1.north);
\draw [->] ([yshift=0.1em]MatMul1.north) -- ([yshift=0.8em]MatMul1.north);
{
\node [anchor=east] (line1) at ([xshift=-3em,yshift=1em]MatMul.west) {\scriptsize{自注意力机制的Query}};
\node [anchor=east] (line1) at ([xshift=-4em,yshift=1em]MatMul.west) {\scriptsize{自注意力机制的Query}};
\node [anchor=north west] (line2) at ([yshift=0.3em]line1.south west) {\scriptsize{Key和Value均来自同一句子}};
\node [anchor=north west] (line3) at ([yshift=0.3em]line2.south west) {\scriptsize{编码-解码注意力机制}};
\node [anchor=north west] (line4) at ([yshift=0.3em]line3.south west) {\scriptsize{与前面讲的一样}};
......@@ -60,7 +60,7 @@
{
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=green!10,drop shadow,draw=ugreen] [fit = (line1) (line2) (line3) (line4)] (box1) {};
\node [rectangle,inner sep=0.1em,rounded corners=1pt,very thick,dotted,draw=ugreen] [fit = (Q1) (K1) (V1)] (box0) {};
\draw [->,dotted,very thick,ugreen] ([yshift=-1.5em,xshift=0.8em]box1.east) -- ([yshift=-1.5em,xshift=0.1em]box1.east);
\draw [->,dotted,very thick,ugreen] ([yshift=-1.5em,xshift=1.2em]box1.east) -- ([yshift=-1.5em,xshift=0.1em]box1.east);
}
{
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=blue!20!white,drop shadow,draw=blue] [fit = (line11) (line12) (line13)] (box2) {};
......@@ -74,7 +74,7 @@
{
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=red!10,drop shadow,draw=red] [fit = (line31) (line32) (line33) (line34)] (box4) {};
\draw [->,dotted,very thick,red] ([yshift=-1.5em,xshift=1.5em]box4.east) -- ([yshift=-1.5em,xshift=0.1em]box4.east);
\draw [->,dotted,very thick,red] ([yshift=-1.2em,xshift=2.2em]box4.east) -- ([yshift=-1.2em,xshift=0.1em]box4.east);
}
{
......
......@@ -28,46 +28,7 @@
\draw [->] ([yshift=1pt]query.north) .. controls +(90:2em) and +(90:2em) .. ([yshift=1pt]key3.north) node [pos=0.5,below,yshift=0.2em] {\scriptsize{匹配}};
\node [anchor=north] (result) at (value3.south) {\scriptsize{ {\red 返回结果} }};
\node [anchor=north] (result2) at ([xshift=-2em,yshift=-2em]value2.south) {\footnotesize{ { (a)索引的查询过程} }};
\end{scope}
\end{tikzpicture}
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnode} = [draw,minimum width=3em,minimum height=1.2em]
\node [rnode,anchor=south west,fill=red!20!white] (value1) at (0,0) {\scriptsize{value$_1$}};
\node [rnode,anchor=south west,fill=red!20!white] (value2) at ([xshift=1em]value1.south east) {\scriptsize{value$_2$}};
\node [rnode,anchor=south west,fill=red!20!white] (value3) at ([xshift=1em]value2.south east) {\scriptsize{value$_3$}};
\node [rnode,anchor=south west,fill=red!20!white] (value4) at ([xshift=1em]value3.south east) {\scriptsize{value$_4$}};
\node [rnode,anchor=south west,pattern=north east lines] (key1) at ([yshift=0.2em]value1.north west) {};
\node [rnode,anchor=south west,pattern=dots] (key2) at ([yshift=0.2em]value2.north west) {};
\node [rnode,anchor=south west,pattern=horizontal lines] (key3) at ([yshift=0.2em]value3.north west) {};
\node [rnode,anchor=south west,pattern=crosshatch dots] (key4) at ([yshift=0.2em]value4.north west) {};
\node [fill=white,inner sep=1pt] (key1label) at (key1) {\scriptsize{key$_1$}};
\node [fill=white,inner sep=1pt] (key1label) at (key2) {\scriptsize{key$_2$}};
\node [fill=white,inner sep=1pt] (key1label) at (key3) {\scriptsize{key$_3$}};
\node [fill=white,inner sep=1pt] (key1label) at (key4) {\scriptsize{key$_4$}};
\node [rnode,anchor=east,pattern=vertical lines] (query) at ([xshift=-3em]key1.west) {};
\node [anchor=east] (querylabel) at ([xshift=-0.2em]query.west) {\scriptsize{query}};
\draw [->] ([yshift=1pt,xshift=6pt]query.north) .. controls +(90:1em) and +(90:1em) .. ([yshift=1pt]key1.north);
\draw [->] ([yshift=1pt,xshift=3pt]query.north) .. controls +(90:1.5em) and +(90:1.5em) .. ([yshift=1pt]key2.north);
\draw [->] ([yshift=1pt]query.north) .. controls +(90:2em) and +(90:2em) .. ([yshift=1pt]key3.north);
\draw [->] ([yshift=1pt,xshift=-3pt]query.north) .. controls +(90:2.5em) and +(90:2.5em) .. ([yshift=1pt]key4.north);
\node [anchor=south east] (alpha1) at (key1.north east) {\scriptsize{$\alpha_1$}};
\node [anchor=south east] (alpha2) at (key2.north east) {\scriptsize{$\alpha_2$}};
\node [anchor=south east] (alpha3) at (key3.north east) {\scriptsize{$\alpha_3$}};
\node [anchor=south east] (alpha4) at (key4.north east) {\scriptsize{$\alpha_4$}};
\node [anchor=north] (result) at ([xshift=-1.5em]value2.south east) {\scriptsize{{\red 返回结果}=$\alpha_1 \cdot \textrm{value}_1 + \alpha_2 \cdot \textrm{value}_2 + \alpha_3 \cdot \textrm{value}_3 + \alpha_4 \cdot \textrm{value}_4$}};
\node [anchor=north] (result2) at ([xshift=-1em,yshift=-2.5em]value2.south) {\footnotesize{ { (b)注意力机制查询过程} }};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
......@@ -2,14 +2,14 @@
\begin{tikzpicture}
\begin{axis}[
name=s1,
width=7cm, height=4cm,
width=7cm, height=4.5cm,
xtick={-4,-3,-2,-1,0,1,2,3,4},
ytick={0,1,...,4},
xticklabel style={opacity=0},
yticklabel style={opacity=0},
xlabel={$w$},
ylabel={$L(w)$},
axis line style={->},
axis line style={->,very thick},
xlabel style={xshift=2.2cm,yshift=1.2cm},
ylabel style={rotate=-90,xshift=1.5cm,yshift=1.6cm},
tick align=inside,
......@@ -19,7 +19,7 @@
xmin=-4,
xmax=4,
ymin=0,
ymax=4]
ymax=4.5]
\addplot [dashed,ublue,thick] {x^2/4};
\addplot [quiver={u=1,v=x/2,scale arrows = 0.25},domain=-4:-0.3,->,samples=10,red!60,ultra thick] {x^2/4};
\addplot [draw=ublue,fill=red,mark=*] coordinates{(0,0)};
......@@ -29,14 +29,14 @@
anchor=south,
xshift=6cm,
yshift=0cm,
width=7cm, height=4cm,
width=7cm, height=4.5cm,
xtick={-4,-3,-2,-1,0,1,2,3,4},
ytick={0,1,...,4},
xticklabel style={opacity=0},
yticklabel style={opacity=0},
xlabel={$w$},
ylabel={$L(w)$},
axis line style={->},
axis line style={->,very thick},
xlabel style={xshift=2.2cm,yshift=1.2cm},
ylabel style={rotate=-90,xshift=1.5cm,yshift=1.6cm},
tick align=inside,
......@@ -46,7 +46,7 @@
xmin=-4,
xmax=4,
ymin=0,
ymax=4]
ymax=4.5]
\addplot [dashed,ublue,thick] {x^2/4};
\addplot [quiver={u=-x-(x/abs(x))*(1+x^2-4)^(1/2),v=-0.7},domain=-4:3.6,->,samples=2,red!60,ultra thick] {x^2/4};
\addplot [quiver={u=-x-(x/abs(x))*(1+x^2-4)^(1/2),v=-0.7},domain=-3.13:2.6,->,samples=2,red!60,ultra thick] {x^2/4};
......
......@@ -62,5 +62,8 @@
\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=ugreen!70] [fit = (sa1) (res1) (ffn1) (res2)] (box0) {};
\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=red!60] [fit = (sa2) (res3) (res5)] (box1) {};
\node [ugreen] (count) at ([xshift=-1.7em,yshift=-1em]encoder.south) {$6\times$};
\node [red] (count) at ([xshift=11em,yshift=0em]decoder.south) {$\times 6$};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
......@@ -90,7 +90,7 @@
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter7/Figures/figure-construction-steps-of-MT-system}
\input{./Chapter7/Figures/figure-construction-steps-of-mt-system}
\caption{构建神经机器翻译系统的主要步骤}
\label{fig:7-2}
\end{figure}
......@@ -364,7 +364,7 @@
\begin{figure}[htp]
\centering
\input{./Chapter7/Figures/figure-unk-of-bpe}
\caption{BPE中<UNK>的生成}
\caption{BPE中的子词切分过程}
\label{fig:7-10}
\end{figure}
%----------------------------------------------
......@@ -417,7 +417,7 @@ y = f(x)
% 图7.
\begin{figure}[htp]
\centering
\input{./Chapter7/Figures/figure-Underfitting-vs-Overfitting}
\input{./Chapter7/Figures/figure-underfitting-vs-overfitting}
\caption{欠拟合 vs 过拟合}
\label{fig:7-11}
\end{figure}
......@@ -1155,7 +1155,7 @@ b &=& \omega_{\textrm{high}}\cdot |\mathbf{x}|
\parinterval 有了lattice这样的结构,多模型融合又有了新的思路。首先,可以将多个模型的译文融合为lattice。注意,这个lattice会包含这些模型无法生成的完整译文句子。之后,用一个更强的模型在lattice上搜索最优的结果。这个过程有可能找到一些``新''的译文,即结果可能是从多个模型的结果中重组而来的。lattice上的搜索模型可以基于多模型的融合,也可以使用一个简单的模型,这里需要考虑的是将神经机器翻译模型适应到lattice上进行推断\cite{DBLP:conf/aaai/SuTXJSL17}。其过程基本与原始的模型推断没有区别,只是需要把模型预测的结果附着到lattice中的每条边上,再进行推断。
\parinterval\ref{fig:7-27}对比了不同模型集成方法的区别。从系统开发的角度看,假设选择和模型预测融合的复杂度较低,适合快速原型,而且性能稳定。译文重组需要更多的模块,系统调试的复杂度较高,但是由于看到了更大的搜索空间,因此系统性能提升的潜力较大\footnote{一般来说lattice上的Oracle要比$n$-best译文上的oracle的质量高。}
\parinterval\ref{fig:7-27}对比了不同模型集成方法的区别。从系统开发的角度看,假设选择和模型预测融合的复杂度较低,适合快速原型,而且性能稳定。译文重组需要更多的模块,系统调试的复杂度较高,但是由于看到了更大的搜索空间,因此系统性能提升的潜力较大\footnote{一般来说lattice上的Oracle要比$n$-best译文上的Oracle的质量高。}
%----------------------------------------------
% 图7.
......@@ -1191,7 +1191,7 @@ b &=& \omega_{\textrm{high}}\cdot |\mathbf{x}|
% 图7.5.1
\begin{figure}[htp]
\centering
\input{./Chapter7/Figures/Post-Norm-vs-Pre-Norm}
\input{./Chapter7/Figures/figure-post-norm-vs-pre-norm}
\caption{Post-Norm Transformer vs Pre-Norm Transformer}
\label{fig:7-28}
\end{figure}
......@@ -1261,7 +1261,7 @@ z_{l}=\textrm{LN}(x_{l+1})
\end{eqnarray}
注意,$z_0$表示词嵌入层的输出,$z_l(l>0)$表示Transformer网络中最终的各层输出。
\vspace{0.5em}
\item 定义一个维度为$(L+1)\times(L+1)$的权值矩阵$\mathbf{W}$,矩阵中每一行表示之前各层对当前层计算的贡献度,其中$L$是编码端(或解码端)的层数。令$\mathbf{W}_{l,i}$代表权值矩阵$\mathbf{W}$$l$行第$i$列的权重,则层聚合的输出为$z_i$的线性加权和:
\item 定义一个维度为$(L+1)\times(L+1)$的权值矩阵$\mathbf{W}$,矩阵中每一行表示之前各层对当前层计算的贡献度,其中$L$是编码端(或解码端)的层数。令$\mathbf{W}_{l,i}$代表权值矩阵$\mathbf{W}$$l$行第$i$列的权重,则层聚合的输出为$z_i$的线性加权和:
\begin{eqnarray}
g_l=\sum_{i=0}^{l}z_i\times \mathbf{W}_{l,i}
\label{eq:7-21}
......@@ -1273,7 +1273,7 @@ $g_l$会作为输入的一部分送入第$l+1$层。其网络的结构图\ref{fi
% 图7.5.2
\begin{figure}[htp]
\centering
\input{./Chapter7/Figures/dynamic-linear-aggregation-network-structure}
\input{./Chapter7/Figures/figure-dynamic-linear-aggregation-network-structure}
\caption{动态线性层聚合网络结构图}
\label{fig:7-29}
\end{figure}
......@@ -1299,7 +1299,7 @@ $g_l$会作为输入的一部分送入第$l+1$层。其网络的结构图\ref{fi
% 图7.5.3
\begin{figure}[htp]
\centering
\input{./Chapter7/Figures/progressive-training}
\input{./Chapter7/Figures/figure-progressive-training}
\caption{渐进式深层网络训练过程}
\label{fig:7-30}
\end{figure}
......@@ -1316,7 +1316,7 @@ $g_l$会作为输入的一部分送入第$l+1$层。其网络的结构图\ref{fi
% 图7.5.4
\begin{figure}[htp]
\centering
\input{./Chapter7/Figures/sparse-connections-between-different-groups}
\input{./Chapter7/Figures/figure-sparse-connections-between-different-groups}
\caption{不同组之间的稀疏连接}
\label{fig:7-31}
\end{figure}
......@@ -1335,7 +1335,7 @@ $g_l$会作为输入的一部分送入第$l+1$层。其网络的结构图\ref{fi
% 图7.5.5
\begin{figure}[htp]
\centering
\input{./Chapter7/Figures/learning-rate}
\input{./Chapter7/Figures/figure-learning-rate}
\caption{学习率重置vs从头训练的学习率曲线}
\label{fig:7-32}
\end{figure}
......@@ -1411,7 +1411,7 @@ p_l=\frac{l}{2L}\cdot \varphi
% 图7.5.7
\begin{figure}[htp]
\centering
\input{./Chapter7/Figures/expanded-residual-network}
\input{./Chapter7/Figures/figure-expanded-residual-network}
\caption{Layer Dropout中残差网络的展开图}
\label{fig:7-34}
\end{figure}
......@@ -1633,7 +1633,7 @@ L_{\textrm{seq}} = - \textrm{logP}_{\textrm{s}}(\hat{\textbf{y}} | \textbf{x})
\begin{figure}[htp]
\centering
\input{./Chapter7/Figures/figure-ensemble-knowledge-distillation}
\caption{Ensemble知识精炼}
\caption{迭代式知识精炼}
\label{fig:7-41}
\end{figure}
%-------------------------------------------
......
\begin{tikzpicture}
\begin{scope}
\node [anchor=center] (node1) at (-2.9,1) {\small{训练:}};
\node [anchor=center] (node11) at (-2.5,1) {};
\node [anchor=center] (node12) at (-1.7,1) {};
\node [anchor=center] (node2) at (-2.9,0.5) {\small{推理:}};
\node [anchor=center] (node21) at (-2.5,0.5) {};
\node [anchor=center] (node22) at (-1.7,0.5) {};
\node [anchor=center] (node1) at (4.9,1) {\small{训练:}};
\node [anchor=center] (node11) at (5.5,1) {};
\node [anchor=center] (node12) at (6.7,1) {};
\node [anchor=center] (node2) at (4.9,0.5) {\small{推理:}};
\node [anchor=center] (node21) at (5.5,0.5) {};
\node [anchor=center] (node22) at (6.7,0.5) {};
\node [anchor=west,line width=0.6pt,draw=black,minimum width=5.6em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node1-1) at (0,0) {\footnotesize{双语数据}};
\node [anchor=south,line width=0.6pt,draw=black,minimum width=4.5em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node1-2) at ([yshift=-5em]node1-1.south) {\footnotesize{目标语伪数据}};
\node [anchor=west,line width=0.6pt,draw=black,minimum width=4.5em,minimum height=2.2em,fill=red!20,rounded corners=2pt] (node2-1) at ([xshift=-8.8em,yshift=-2.5em]node1-1.west) {\footnotesize{反向NMT系统}};
......
\begin{tikzpicture}
\tikzstyle{layer} = [rectangle,draw,rounded corners=3pt,minimum width=1cm,minimum height=0.5cm];
\tikzstyle{layer} = [rectangle,draw,rounded corners=3pt,minimum width=1cm,minimum height=0.5cm,line width=1pt];
\tikzstyle{prob} = [minimum width=0.3cm,rectangle,fill=ugreen!20!white,inner sep=0pt];
\begin{scope}[local bounding box=STANDARD]
......@@ -22,8 +22,8 @@
\path [fill=blue!20!white,draw=white] (out1.north west) -- (prob1.south west) -- (prob9.south east) -- (out1.north east) -- (out1.north west);
\draw [->] (input1) to (net1);
\draw [->] (net1) to (out1);
\draw [->,line width=1pt] (input1) to (net1);
\draw [->,line width=1pt] (net1) to (out1);
\node [font=\small] (label1) at ([yshift=0.6cm]out1.north) {Softmax};
\end{scope}
......@@ -51,8 +51,8 @@
\path [fill=blue!20!white,draw=white] (out2.north west) -- (prob1.south west) -- (prob9.south east) -- (out2.north east) -- (out2.north west);
\draw [->] (input2) to (net2);
\draw [->] (net2) to (out2);
\draw [->,line width=1pt] (input2) to (net2);
\draw [->,line width=1pt] (net2) to (out2);
\node [font=\small] (label2) at ([yshift=0.6cm]out2.north) {Softmax};
......@@ -60,9 +60,9 @@
\node [anchor=north,font=\scriptsize] (input3) at ([yshift=-0.5cm]net3.south) {源语};
\node [anchor=south,layer,align=center,font=\scriptsize,fill=yellow!10!white] (out3) at ([yshift=0.9cm]net3.north) {Candidate\\List};
\draw [->] (input3) to (net3);
\draw [->] (net3) to (out3);
\draw [->] (out3) |- (plabel9.east);
\draw [->,line width=1pt] (input3) to (net3);
\draw [->,line width=1pt] (net3) to (out3);
\draw [->,line width=1pt] (out3) |- (plabel9.east);
\end{scope}
\node [anchor=north,font=\scriptsize] () at ([yshift=-0.2em]STANDARD.south) {(a) 标准方法};
......
......@@ -50,8 +50,8 @@
\node [rectangle,inner sep=1em,fill=black!5,rounded corners=4pt] [fit =(w4) (w6) (w9) (encoder0) ] (box) {};
\end{pgfonlayer}
\node [] (left) at ([yshift=-1.5em]box.south) {编码器使用单语数据预训练};
\node [] (right) at ([xshift=11em]left.east) {在翻译任务上进行微调};
\node [font=\footnotesize] (left) at ([yshift=-1.5em]box.south) {编码器使用单语数据预训练};
\node [font=\footnotesize] (right) at ([xshift=11em]left.east) {在翻译任务上进行微调};
\node[anchor=north] (arrow1) at (3.85,0.1){};
......
\begin{tikzpicture}
\begin{scope}
\node [anchor=center] (node1) at (-2.6,1) {\small{训练:}};
\node [anchor=center] (node11) at (-2.2,1) {};
\node [anchor=center] (node12) at (-1.1,1) {};
\node [anchor=center] (node2) at (-2.6,0.5) {\small{推理:}};
\node [anchor=center] (node21) at (-2.2,0.5) {};
\node [anchor=center] (node22) at (-1.1,0.5) {};
\node [anchor=center] (node1) at (9.6,1) {\small{训练:}};
\node [anchor=center] (node11) at (10.2,1) {};
\node [anchor=center] (node12) at (11.4,1) {};
\node [anchor=center] (node2) at (9.6,0.5) {\small{推理:}};
\node [anchor=center] (node21) at (10.2,0.5) {};
\node [anchor=center] (node22) at (11.4,0.5) {};
\node [anchor=west,draw=black,line width=0.6pt,minimum width=5.6em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node1-1) at (0,0) {\footnotesize{双语数据}};
\node [anchor=south,draw=black,line width=0.6pt,minimum width=4.5em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node1-2) at ([yshift=-5em]node1-1.south) {\footnotesize{目标语伪数据}};
\node [anchor=west,draw=black,line width=0.6pt,minimum width=4.5em,minimum height=2.2em,fill=red!20,rounded corners=2pt] (node2-1) at ([xshift=-7.7em,yshift=-2.5em]node1-1.west) {\footnotesize{前向NMT系统}};
......
......@@ -122,13 +122,13 @@
% CHAPTERS
%----------------------------------------------------------------------------------------
\include{Chapter1/chapter1}
%\include{Chapter1/chapter1}
%\include{Chapter2/chapter2}
%\include{Chapter3/chapter3}
%\include{Chapter4/chapter4}
%\include{Chapter5/chapter5}
%\include{Chapter6/chapter6}
%\include{Chapter7/chapter7}
\include{Chapter7/chapter7}
%\include{ChapterAppend/chapterappend}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论