Commit 11979fdb by zengxin

合并分支 'caorunzhe' 到 'zengxin'

Caorunzhe

查看合并请求 !901
parents a4ef3cd3 b6151b33
......@@ -4,15 +4,15 @@
\begin{tikzpicture}
\begin{scope}
\tikzstyle{datanode} = [minimum width=5em,minimum height=1.7em,fill=red!20,rounded corners=0.3em];
\tikzstyle{modelnode} = [minimum width=5em,minimum height=1.7em,fill=blue!20,rounded corners=0.3em];
\tikzstyle{decodingnode} = [minimum width=5em,minimum height=1.7em,fill=green!20,rounded corners=0.3em];
\tikzstyle{datanode} = [minimum width=5em,minimum height=1.7em,draw,thick,fill=red!20,rounded corners=0.3em];
\tikzstyle{modelnode} = [minimum width=5em,minimum height=1.7em,draw,thick,fill=blue!20,rounded corners=0.3em];
\tikzstyle{decodingnode} = [minimum width=5em,minimum height=1.7em,draw,thick,fill=green!20,rounded corners=0.3em];
\node [datanode,anchor=north west] (s1) at (0,0) {{ \small{语言1}}};
\node [datanode,anchor=north] (s2) at ([yshift=-4.5em]s1.south) {{ \small{语言3}}};
\node [datanode,anchor=west] (s3) at ([xshift=4.5em]s1.east) {{ \small{语言2}}};
\node [datanode,anchor=north] (s4) at ([yshift=-4.5em]s3.south) {{ \small{语言4}}};
\node [circle,anchor=north west,inner sep=2pt,fill=blue!20] (m1) at ([xshift=0.8em,yshift=-0.5em]s1.south east) {{ \small{中间语言}}};
\node [circle,draw,thick,anchor=north west,inner sep=2pt,fill=blue!30] (m1) at ([xshift=0.8em,yshift=-0.5em]s1.south east) {{ \small{中间语言}}};
\draw [<->,very thick] (s1.south) -- (m1.170);
\draw [<->,very thick] (s2.north) -- (m1.190);
......@@ -24,11 +24,11 @@
\end{scope}
\begin{scope}[xshift=16em]
\begin{scope}[xshift=21em]
\tikzstyle{datanode} = [minimum width=5em,minimum height=1.7em,fill=red!20,rounded corners=0.3em];
\tikzstyle{modelnode} = [minimum width=5em,minimum height=1.7em,fill=blue!20,rounded corners=0.3em];
\tikzstyle{decodingnode} = [minimum width=5em,minimum height=1.7em,fill=green!20,rounded corners=0.3em];
\tikzstyle{datanode} = [minimum width=5em,minimum height=1.7em,draw,thick,fill=red!20,rounded corners=0.3em];
\tikzstyle{modelnode} = [minimum width=5em,minimum height=1.7em,draw,thick,fill=blue!20,rounded corners=0.3em];
\tikzstyle{decodingnode} = [minimum width=5em,minimum height=1.7em,draw,thick,fill=green!20,rounded corners=0.3em];
\node [datanode,anchor=north west] (s1) at (0,0) {{ \small{语言1}}};
\node [datanode,anchor=north] (s2) at ([yshift=-4.5em]s1.south) {{ \small{语言3}}};
......
......@@ -49,7 +49,7 @@
\end{scope}
\begin{scope}[xshift=16.8em,yshift=6em]
\begin{scope}[xshift=18.8em,yshift=6em]
{
\tikzstyle{numbernode} = [fill=blue!30,minimum height=0.8em,minimum width=0.8em,circle,draw,inner sep=1pt]
......@@ -94,14 +94,14 @@
\begin{pgfonlayer}{background}
{
\node[draw,thin,minimum width=11em,align=left,rounded corners=1pt,fill=red!5,drop shadow] [fit = (outputnode) (word01) (word02) ] (netbox) {};
\node[draw,minimum width=11em,align=left,rounded corners=1pt,fill=red!5,drop shadow] [fit = (outputnode) (word01) (word02) ] (netbox) {};
\node[rectangle,draw,thin,inner sep=3pt,rounded corners=1pt] [fit = (neuron01) (neuron02) (neuron03) (neuron04)] (layer1) {};
\node[rectangle,draw,thin,inner sep=3pt,rounded corners=1pt] [fit = (neuron11) (neuron12) (neuron13) (neuron14)] (layer2) {};
\node[rectangle,draw,thin,inner sep=3pt,rounded corners=1pt] [fit = (neuron21) (neuron22) (neuron23) (neuron24) (neuron25)] (layer3) {};
}
\end{pgfonlayer}
\draw [->,very thick,dotted] (enclabel.east) ..controls + (east:2em) and +(west:2em).. ([yshift=-1em]netbox.west);
\draw [->,very thick,dotted] (enclabel.east) ..controls + (east:4em) and +(west:4em).. ([yshift=1em]netbox.west);
\node [anchor=north] (netlabel) at (netbox.south) {\footnotesize{编码器网络示例}};
}
......
......@@ -2,7 +2,7 @@
%%% 句法树(层次短语)
\begin{tikzpicture}
{\small
\begin{scope}[sibling distance=25pt, level distance = 20pt]
\begin{scope}[sibling distance=25pt, level distance = 25pt]
{\scriptsize
\Tree[.\node(r){IP};
[.\node(n11){NP}; [.\node(n21){PN}; [.\node(l1){};]]]
......
......@@ -24,7 +24,7 @@
\begin{pgfonlayer}{background}
{
\node[rectangle,draw=ublue, inner sep=0mm] [fit = (rule1) (rule2) (rule3) (rule3part2) (rule4) (rule4part2) (rule5) (rule5part2) (rule6) (rule6part2) (rulebaselabel)] {};
\node[rectangle,draw=ublue, thick,inner sep=0mm] [fit = (rule1) (rule2) (rule3) (rule3part2) (rule4) (rule4part2) (rule5) (rule5part2) (rule6) (rule6part2) (rulebaselabel)] {};
}
\end{pgfonlayer}
}
......
......@@ -23,7 +23,7 @@
\begin{pgfonlayer}{background}
{
\node[rectangle,draw=ublue, inner sep=0mm] [fit = (example1) (example1part2) (example2) (example2part2) (example3) (example3part2) (example4) (bidatalabel)(example6)] (bidata) {};
\node[rectangle,draw=ublue, thick, inner sep=0mm] [fit = (example1) (example1part2) (example2) (example2part2) (example3) (example3part2) (example4) (bidatalabel)(example6)] (bidata) {};
}
\end{pgfonlayer}
......@@ -44,7 +44,7 @@
\begin{pgfonlayer}{background}
{
\node[rectangle,draw=ublue, inner sep=0mm] [fit = (entry1) (entry2) (entry3) (entry4) (entry5)(entry6)(monodatalabel)] (monodata) {};
\node[rectangle,draw=ublue, thick, inner sep=0mm] [fit = (entry1) (entry2) (entry3) (entry4) (entry5)(entry6)(monodatalabel)] (monodata) {};
}
\end{pgfonlayer}
......@@ -68,7 +68,7 @@
\begin{pgfonlayer}{background}
{
\node[rectangle,draw=ublue, inner sep=0mm] [fit = (phrase1) (phrase2) (phrase3) (phrase4) (phrase4part2) (phrase5) (phrase5part2) (phrase6)(phrase7) (phrasetablelabel)] (phrasetable) {};
\node[rectangle,draw=ublue, thick, inner sep=0mm] [fit = (phrase1) (phrase2) (phrase3) (phrase4) (phrase4part2) (phrase5) (phrase5part2) (phrase6)(phrase7) (phrasetablelabel)] (phrasetable) {};
}
\end{pgfonlayer}
......@@ -88,7 +88,7 @@
\begin{pgfonlayer}{background}
{
\node[rectangle,draw=ublue, inner sep=0mm] [fit = (ngram1) (ngram2) (ngram3) (ngram4) (lmlabel) (ngrame)] (langaugemodel) {};
\node[rectangle,draw=ublue, thick, inner sep=0mm] [fit = (ngram1) (ngram2) (ngram3) (ngram4) (lmlabel) (ngrame)] (langaugemodel) {};
}
\end{pgfonlayer}
......
......@@ -4,11 +4,11 @@
\begin{tikzpicture}
\begin{scope}
\tikzstyle{node1} = [minimum width=7em,minimum height=1.7em,fill=red!20,rounded corners=0.3em];
\tikzstyle{node2} = [minimum width=7em,minimum height=2.5em,fill=blue!20,rounded corners=0.3em];
\tikzstyle{node3} = [minimum width=7em,minimum height=2.5em,fill=green!20,rounded corners=0.3em];
\tikzstyle{node4} = [minimum width=7em,minimum height=1.7em,fill=orange!20,rounded corners=0.3em];
\tikzstyle{node5} = [minimum width=4.5em,minimum height=1.7em,dashed];
\tikzstyle{node1} = [minimum width=7em,minimum height=1.7em,draw,thick,fill=red!20,rounded corners=0.3em];
\tikzstyle{node2} = [minimum width=7em,minimum height=2.5em,draw,thick,fill=blue!20,rounded corners=0.3em];
\tikzstyle{node3} = [minimum width=7em,minimum height=2.5em,draw,thick,fill=green!20,rounded corners=0.3em];
\tikzstyle{node4} = [minimum width=7em,minimum height=1.7em,draw,thick,fill=orange!20,rounded corners=0.3em];
\tikzstyle{node5} = [minimum width=4.5em,minimum height=1.7em,thick,dashed];
\node [node1,anchor=south west] (s1) at (0,0) {{ \small{源语言词串}}};
\node [node2,anchor=south] (s2) at ([xshift=2.5em,yshift=2em]s1.north) {};
......
......@@ -4,9 +4,9 @@
\begin{tikzpicture}
\begin{scope}
\tikzstyle{datanode} = [minimum width=7em,minimum height=1.7em,fill=red!20,rounded corners=0.3em];
\tikzstyle{modelnode} = [minimum width=7em,minimum height=1.7em,fill=blue!20,rounded corners=0.3em];
\tikzstyle{decodingnode} = [minimum width=7em,minimum height=1.7em,fill=green!20,rounded corners=0.3em];
\tikzstyle{datanode} = [minimum width=7em,minimum height=1.7em,thick,draw,fill=red!20,rounded corners=0.3em];
\tikzstyle{modelnode} = [minimum width=7em,minimum height=1.7em,thick,draw,fill=blue!20,rounded corners=0.3em];
\tikzstyle{decodingnode} = [minimum width=7em,minimum height=1.7em,thick,draw,fill=green!20,rounded corners=0.3em];
\node [datanode,anchor=north west,minimum height=1.7em,minimum width=8em] (s1) at (0,0) {{ \small{源文句子}}};
\node [modelnode,anchor=north,minimum height=1.7em,minimum width=8em] (s2) at ([yshift=-1.5em]s1.south) {{ \small{源语词法分析}}};
......@@ -14,14 +14,14 @@
\node [modelnode,anchor=north,minimum height=1.7em,minimum width=8em] (s4) at ([yshift=-1.5em]s3.south) {{ \small{源语句法分析}}};
\node [datanode,anchor=north,minimum height=1.7em,minimum width=8em] (s5) at ([yshift=-1.5em]s4.south) {{ \small{源文结构}}};
\node [datanode,anchor=west,minimum height=1.7em,minimum width=8em] (t1) at ([xshift=14em]s1.east) {{ \small{译文句子}}};
\node [datanode,anchor=west,minimum height=1.7em,minimum width=8em] (t1) at ([xshift=20em]s1.east) {{ \small{译文句子}}};
\node [modelnode,anchor=north,minimum height=1.7em,minimum width=8em] (t2) at ([yshift=-1.5em]t1.south) {{ \small{目标语词法生成}}};
\node [datanode,anchor=north,minimum height=1.7em,minimum width=8em] (t3) at ([yshift=-1.5em]t2.south) {{ \small{译文词串}}};
\node [modelnode,anchor=north,minimum height=1.7em,minimum width=8em] (t4) at ([yshift=-1.5em]t3.south) {{ \small{目标语句法生成}}};
\node [datanode,anchor=north,minimum height=1.7em,minimum width=8em] (t5) at ([yshift=-1.5em]t4.south) {{ \small{译文结构}}};
\node [decodingnode,anchor=west,minimum height=1.7em,minimum width=8em] (st1) at ([xshift=2.5em,yshift=0.85em]s5.east) {{ \small{源语-目标语词汇转换}}};
\node [decodingnode,anchor=north,minimum height=1.7em,minimum width=8em] (st2) at ([yshift=0.05em]st1.south) {{ \small{源语-目标语结构转换}}};
\node [decodingnode,anchor=west,minimum height=1.7em,minimum width=13em,inner sep=3pt] (st1) at ([xshift=3.5em,yshift=0.85em]s5.east) {{ \small{源语-目标语词汇转换}}};
\node [decodingnode,anchor=north,minimum height=1.7em,minimum width=13em,inner sep=3pt] (st2) at ([yshift=0.05em]st1.south) {{ \small{源语-目标语结构转换}}};
\draw [->,very thick] (s1.south) -- (s2.north);
\draw [->,very thick] (s2.south) -- (s3.north);
......
......@@ -8,22 +8,22 @@
{
\begin{tikzpicture}
\node [anchor=south west, fill=blue!50, minimum width=1.1cm, minimum height=2.3cm] (mt) at (1,0) {{\color{white} {\small\sffamily\bfseries{机器}}}};
\node [anchor=south west, fill=red!50, minimum width=1.1cm, minimum height=2.7cm] (human) at ([xshift=0.5cm]mt.south east) {{\color{white} {\small\sffamily\bfseries{}}}};
\node [anchor=south west, fill=blue!70, minimum width=1.1cm, minimum height=2.3cm] (mt) at (1,0) {{\color{white} {\small\sffamily\bfseries{机器}}}};
\node [anchor=south west, fill=red!70, minimum width=1.1cm, minimum height=2.7cm] (human) at ([xshift=0.5cm]mt.south east) {{\color{white} {\small\sffamily\bfseries{}}}};
\node [anchor=south] (mtscore) at (mt.north) {3.9};
\node [anchor=south] (humanscore) at (human.north) {4.7};
\draw [->,thick] ([xshift=-0.5cm]mt.south west) -- ([xshift=0.5cm]human.south east);
\draw [->,thick] ([xshift=-0.5cm]mt.south west) -- ([xshift=-0.5cm,yshift=3.2cm]mt.south west);
\draw [->,very thick] ([xshift=-0.5cm]mt.south west) -- ([xshift=0.5cm]human.south east);
\draw [->,very thick] ([xshift=-0.5cm]mt.south west) -- ([xshift=-0.5cm,yshift=3.2cm]mt.south west);
\node [anchor=north west] (x1) at ([xshift=0.0cm]human.south east) {\footnotesize{评价对象}};
\node [anchor=north east] (y1) at ([xshift=-0.5cm,yshift=3.2cm]mt.south west) {\footnotesize{打分}};
\node [anchor=north] (y2) at ([yshift=-0cm]y1.south) {\footnotesize{(分)}};
\node [anchor=south west, fill=blue!50, minimum width=1.1cm, minimum height=1.5cm] (mt1) at ([xshift=13.0em,yshift=-3.0em]mt.east) {{\color{white} {\small\sffamily\bfseries{机器}}}};
\node [anchor=south west, fill=red!50, minimum width=1.1cm, minimum height=2.7cm] (human1) at ([xshift=0.5cm]mt1.south east) {{\color{white} {\small\sffamily\bfseries{}}}};
\node [anchor=south west, fill=blue!70, minimum width=1.1cm, minimum height=1.5cm] (mt1) at ([xshift=16.0em,yshift=-3.0em]mt.east) {{\color{white} {\small\sffamily\bfseries{机器}}}};
\node [anchor=south west, fill=red!70, minimum width=1.1cm, minimum height=2.7cm] (human1) at ([xshift=0.5cm]mt1.south east) {{\color{white} {\small\sffamily\bfseries{}}}};
\node [anchor=south] (mtscore1) at (mt1.north) {47};
\node [anchor=south] (humanscore1) at (human1.north) {100};
\draw [->,thick] ([xshift=-0.5cm]mt1.south west) -- ([xshift=0.5cm]human1.south east);
\draw [->,thick] ([xshift=-0.5cm]mt1.south west) -- ([xshift=-0.5cm,yshift=3.2cm]mt1.south west);
\draw [->,very thick] ([xshift=-0.5cm]mt1.south west) -- ([xshift=0.5cm]human1.south east);
\draw [->,very thick] ([xshift=-0.5cm]mt1.south west) -- ([xshift=-0.5cm,yshift=3.2cm]mt1.south west);
\node [anchor=north west] (x1) at ([xshift=0.0cm]human1.south east) {\footnotesize{评价对象}};
\node [anchor=north east] (y1) at ([xshift=-0.5cm,yshift=3.2cm]mt1.south west) {\footnotesize{打分}};
\node [anchor=north] (y2) at ([yshift=-0cm]y1.south) {\footnotesize{(分)}};
......
......@@ -21,7 +21,7 @@
\begin{pgfonlayer}{background}
{
\node[rectangle,draw=ublue, inner sep=0mm] [fit = (example1) (example1part2) (example2) (example2part2) (example3) (examplebaselabel)] {};
\node[rectangle,draw=ublue, thick,inner sep=0mm] [fit = (example1) (example1part2) (example2) (example2part2) (example3) (examplebaselabel)] {};
}
\end{pgfonlayer}
......@@ -40,7 +40,7 @@
\begin{pgfonlayer}{background}
{
\node[rectangle,draw=ublue, inner sep=0mm] [fit = (entry1) (entry2) (entry3) (entry4) (dictionarylabel)] {};
\node[rectangle,draw=ublue, thick,inner sep=0mm] [fit = (entry1) (entry2) (entry3) (entry4) (dictionarylabel)] {};
}
\end{pgfonlayer}
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......@@ -38,7 +38,7 @@
%----------------------------------------------
\begin{figure}[htp]
\centering
\includegraphics[scale=0.2]{./Chapter1/Figures/figure-zh_en-example.png}
\includegraphics[scale=0.25]{./Chapter1/Figures/figure-zh_en-example.png}
\caption{通过计算机将汉语翻译为英语}
\label{fig:1-1}
\end{figure}
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......@@ -131,8 +131,8 @@
\multicolumn{1}{l|}{GNMT+RL} & 24.6 & 39.92 & 1.4$\times 10^{20}$ \\
\multicolumn{1}{l|}{ConvS2S} & 25.16 & 40.46 & 1.5$\times 10^{20}$ \\
\multicolumn{1}{l|}{MoE} & 26.03 & 40.56 & 1.2$\times 10^{20}$ \\
\multicolumn{1}{l|}{Transformer(Base Model)} & 27.3 &38.1 & 3.3$\times 10^{18}$ \\
\multicolumn{1}{l|}{Transformer (Big)} & {\small\sffamily\bfseries{28.4}} & {\small\sffamily\bfseries{41.8}} & 2.3$\times 10^{19}$ \\
\multicolumn{1}{l|}{Transformer (Base Model) } & 27.3 &38.1 & 3.3$\times 10^{18}$ \\
\multicolumn{1}{l|}{Transformer (Big Model)} & {\small\sffamily\bfseries{28.4}} & {\small\sffamily\bfseries{41.8}} & 2.3$\times 10^{19}$ \\
\end{tabular}
\end{table}
%----------------------------------------------
......
\begin{tikzpicture}
\tikzstyle{node1}=[inner sep=0mm,minimum height=1.5em,minimum width=3em,fill=ugreen!10!blue!10]
\tikzstyle{node2}=[inner sep=0mm,minimum height=1.5em,minimum width=3em,fill=ugreen!20!blue!20]
\tikzstyle{node3}=[inner sep=0mm,minimum height=1.5em,minimum width=3em,fill=ugreen!30!blue!30]
\tikzstyle{node4}=[inner sep=0mm,minimum height=1.5em,minimum width=3em,fill=ugreen!40!blue!40]
\tikzstyle{node5}=[inner sep=0mm,minimum height=1.5em,minimum width=3em,fill=ugreen!50!blue!50]
\tikzstyle{node1}=[inner sep=0mm,minimum height=1.5em,minimum width=3em,fill=blue!10]
\tikzstyle{node2}=[inner sep=0mm,minimum height=1.5em,minimum width=3em,fill=blue!25]
\tikzstyle{node3}=[inner sep=0mm,minimum height=1.5em,minimum width=3em,fill=blue!40]
\tikzstyle{node4}=[inner sep=0mm,minimum height=1.5em,minimum width=3em,fill=blue!55]
\tikzstyle{node5}=[inner sep=0mm,minimum height=1.5em,minimum width=3em,fill=blue!70]
\begin{scope}
\foreach \i / \j / \z in
......@@ -27,7 +27,7 @@
\node[anchor=north west] (y1) at ([xshift=14.5em,yshift=-1.3em]n00.south west) {训练时长};
\node[anchor=west,font=\small] (t1) at ([xshift=0.5em,yshift=0em]n52.east) {$\cdots$};
\node[anchor=west,node1,minimum width=0.5em] (c1) at ([xshift=3em,yshift=-2.5em]t1.east) {};
\node[anchor=west,node1,minimum width=0.5em] (c1) at ([xshift=5em,yshift=0em]n50.east) {};
\node[anchor=south,node2,minimum width=0.5em] (c2) at ([xshift=0em,yshift=0em]c1.north) {};
\node[anchor=south,node3,minimum width=0.5em] (c3) at ([xshift=0em,yshift=0em]c2.north) {};
\node[anchor=south,node4,minimum width=0.5em] (c4) at ([xshift=0em,yshift=0em]c3.north) {};
......
\begin{tikzpicture}
\tikzstyle{tnode} = [rectangle,inner sep=0em,minimum width=8em,minimum height=6.6em,rounded corners=5pt,fill=green!20]
\tikzstyle{pnode} = [rectangle,inner sep=0em,minimum width=8em,minimum height=6.6em,rounded corners=5pt,fill=yellow!30]
\tikzstyle{mnode} = [rectangle,inner sep=0em,minimum width=8em,minimum height=6.6em,rounded corners=5pt,fill=red!20]
\tikzstyle{tnode} = [rectangle,inner sep=0em,minimum width=8em,minimum height=6.6em,rounded corners=5pt,fill=green!15,drop shadow]
\tikzstyle{pnode} = [rectangle,inner sep=0em,minimum width=8em,minimum height=6.6em,rounded corners=5pt,fill=yellow!15,drop shadow]
\tikzstyle{mnode} = [rectangle,inner sep=0em,minimum width=8em,minimum height=6.6em,rounded corners=5pt,fill=red!15,drop shadow]
\tikzstyle{wnode} = [inner sep=0em,minimum height=1.5em]
%第一排
......@@ -19,13 +19,13 @@
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.7em,draw,ugreen!60,dashed,very thick,rounded corners=7pt] [fit = (n1) (n4)] (box1) {};
\node [rectangle,inner sep=0.7em,draw,ugreen!60,dashed,thick,rounded corners=7pt] [fit = (n1) (n4)] (box1) {};
\end{pgfonlayer}
\node [anchor=west,align=left,font=\footnotesize] (nt1) at ([xshift=0.1em,yshift=0em]n2.east) {统计词表和\\[0.5ex]词频};
\node [anchor=west,align=left,font=\footnotesize] (nt2) at ([xshift=0.1em,yshift=1em]n3.east) {按字符切分};
\draw [->,very thick,ublue] ([xshift=0em,yshift=0em]n2.east)--([xshift=0em,yshift=0em]n3.west);
\draw [->,very thick,ublue] ([xshift=0em,yshift=0em]n3.east)--([xshift=0em,yshift=0em]n4.west);
\draw [->,thick,ublue] ([xshift=0em,yshift=0em]n2.east)--([xshift=0em,yshift=0em]n3.west);
\draw [->,thick,ublue] ([xshift=0em,yshift=0em]n3.east)--([xshift=0em,yshift=0em]n4.west);
%第二排
\node [anchor=north west,wnode,font=\large] (n5) at ([xshift=0em,yshift=-2em]n2.south west) {BPE学习};
......@@ -34,7 +34,7 @@
\node [anchor=north west,align=left,font=\footnotesize] (n61) at ([xshift=0.2em,yshift=-0.2em]n6.north west) {{\small 词表}\\l/o/w\\l/o/w/e/r\\n/e/w/e/s/t\\w/i/d/e/s/t\\$\ldots$};
\node [anchor=north east,align=center,font=\footnotesize] (n62) at ([xshift=-0.2em,yshift=-0.2em]n6.north east) {{\small 频率}\\6\\2\\6\\3\\$\ldots$};
\draw [->,very thick,ublue] ([xshift=-0em,yshift=-0em]n4.south) .. controls +(south:4em) and +(north:4em) .. ([xshift=-0em,yshift=-0em]n6.north);
\draw [->,thick,ublue] ([xshift=-0em,yshift=-0em]n4.south) .. controls +(south:4em) and +(north:4em) .. ([xshift=1em,yshift=-0em]n6.north);
\node [anchor=west,pnode] (n7) at ([xshift=5em,yshift=0em]n6.east) {};
\node [anchor=north west,align=left,font=\footnotesize] (n71) at ([xshift=0.2em,yshift=-0.2em]n7.north west) {{\small 二元组}\\(e,s)\\(s,t)\\(l,o)\\(o,w)\\$\ldots$};
......@@ -45,15 +45,15 @@
\node [anchor=west,align=left,font=\footnotesize] (nt3) at ([xshift=0.1em,yshift=0em]n6.east) {统计二元组\\[0.5ex]的频次};
\node [anchor=west,align=left,font=\footnotesize] (nt4) at ([xshift=0em,yshift=-0.4em]n7.east) {频次最高的\\[0.5ex](e,s)加入表\\};
\draw [->,very thick,ublue] ([xshift=0em,yshift=0em]n6.east)--([xshift=0em,yshift=0em]n7.west);
\draw [->,very thick,ublue] ([xshift=0em,yshift=0em]n7.east)--([xshift=0em,yshift=0em]n8.west);
\draw [->,thick,ublue] ([xshift=0em,yshift=0em]n6.east)--([xshift=0em,yshift=0em]n7.west);
\draw [->,thick,ublue] ([xshift=0em,yshift=0em]n7.east)--([xshift=0em,yshift=0em]n8.west);
%第三排
\node [anchor=north,tnode] (n9) at ([xshift=0em,yshift=-2.5em]n6.south) {};
\node [anchor=north west,align=left,font=\footnotesize] (n91) at ([xshift=0.2em,yshift=-0.2em]n9.north west) {{\small 词表}\\l/o/w\\l/o/w/e/r\\n/e/w/{\red es}/t\\w/i/d/{\red es}/t\\$\ldots$};
\node [anchor=north east,align=center,font=\footnotesize] (n92) at ([xshift=-0.2em,yshift=-0.2em]n9.north east) {{\small 频率}\\6\\2\\6\\3\\$\ldots$};
\draw [->,very thick,ublue] ([xshift=-0em,yshift=-0em]n8.south) .. controls +(south:3em) and +(north:3em) .. ([xshift=-0em,yshift=-0em]n9.north);
\draw [->,thick,ublue] ([xshift=-0em,yshift=-0em]n8.south) .. controls +(south:3em) and +(north:3em) .. ([xshift=1em,yshift=-0em]n9.north);
\node [anchor=north west,ublue,font=\footnotesize,align=left] (l1) at ([xshift=1em,yshift=-0em]n7.south east) {在词表中\\[0.8ex]合并(e,s)};
\node [anchor=west,pnode] (n10) at ([xshift=5em,yshift=0em]n9.east) {};
......@@ -65,17 +65,17 @@
\node [anchor=west,align=left,font=\footnotesize] (nt5) at ([xshift=0.1em,yshift=0em]n9.east) {统计二元组\\[0.5ex]的频次};
\node [anchor=west,align=left,font=\footnotesize] (nt6) at ([xshift=0em,yshift=-0.4em]n10.east) {频次最高的\\[0.5ex](es,t)加入表\\};
\draw [->,very thick,ublue] ([xshift=0em,yshift=0em]n9.east)--([xshift=0em,yshift=0em]n10.west);
\draw [->,very thick,ublue] ([xshift=0em,yshift=0em]n10.east)--([xshift=0em,yshift=0em]n11.west);
\draw [->,thick,ublue] ([xshift=0em,yshift=0em]n9.east)--([xshift=0em,yshift=0em]n10.west);
\draw [->,thick,ublue] ([xshift=0em,yshift=0em]n10.east)--([xshift=0em,yshift=0em]n11.west);
%第四排
\node [anchor=north,ublue] (cd) at ([xshift=0em,yshift=-1.5em]n10.south) {$\cdots$};
\draw [->,very thick,ublue] ([xshift=-0em,yshift=-0em]n11.south) .. controls +(south:2em) and +(north:2em) .. ([xshift=-0em,yshift=-0em]cd.north);
\node [anchor=north west,ublue,font=\footnotesize,align=left] (l2) at ([xshift=1em,yshift=0.6em]n10.south east) {在词表中\\[0.8ex]合并(es,t)};
\node [anchor=north,ublue] (cd) at ([xshift=0em,yshift=-2.0em]n10.south) {$\cdots$};
\draw [->,thick,ublue] ([xshift=-0em,yshift=-0em]n11.south) .. controls +(south:2em) and +(north:2em) .. ([xshift=-0em,yshift=-0em]cd.north);
\node [anchor=north west,ublue,font=\footnotesize,align=left] (l2) at ([xshift=1em,yshift=-1.0em]n10.south east) {在词表中\\[0.8ex]合并(es,t)};
\node [anchor=east,ublue,align=left,font=\footnotesize] (l3) at ([xshift=-0.5em,yshift=0em]cd.west) {直至达到设定的符号合\\并表大小或无法合并};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.7em,draw,orange!40,dashed,very thick,rounded corners=7pt] [fit = (n5) (n8) (l3) (cd)] (box2) {};
\node [rectangle,inner sep=0.7em,draw,orange!40,dashed,thick,rounded corners=7pt] [fit = (n5) (n8) (l3) (cd)] (box2) {};
\end{pgfonlayer}
%第五排
......@@ -90,9 +90,9 @@
\node [anchor=south west,wnode,font=\large] (n13) at ([xshift=0em,yshift=0.5em]n12.north west) {输出符号合并表};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.7em,draw,red!40,dashed,very thick,rounded corners=7pt] [fit = (n12) (n13)] (box3) {};
\node [rectangle,inner sep=0.7em,draw,red!40,dashed,thick,rounded corners=7pt] [fit = (n12) (n13)] (box3) {};
\end{pgfonlayer}
\draw [->,very thick,ublue] ([xshift=0em,yshift=0em]cd.south)--([xshift=0em,yshift=0em]n12.north);
\draw [->,thick,ublue] ([xshift=0em,yshift=0em]cd.south)--([xshift=0em,yshift=0em]n12.north);
\end{tikzpicture}
\ No newline at end of file
......@@ -18,10 +18,10 @@
\node [] (standard) at ([xshift=-1em]neuron_z.west) {\scriptsize{$\mathbf{w}_{i}^{l}$}};
\node [] (standard) at ([xshift=0.6em,yshift=0.3em]neuron_z.east) {\scriptsize{$f$}};
\draw [->,line width=0.3mm] (neuron_b.east) -- (neuron_z.west);
\draw [->,line width=0.3mm] (neuron_y3.east) -- (neuron_z.west);
\draw [->,line width=0.3mm] (neuron_y2.east) -- (neuron_z.west);
\draw [->,line width=0.3mm] (neuron_y1.east) -- (neuron_z.west);
\draw [->,line width=0.3mm] (neuron_b.east) -- (neuron_z.130);
\draw [->,line width=0.3mm] (neuron_y3.east) -- (neuron_z.170);
\draw [->,line width=0.3mm] (neuron_y2.east) -- (neuron_z.190);
\draw [->,line width=0.3mm] (neuron_y1.east) -- (neuron_z.230);
\draw [->,line width=0.3mm] (neuron_z.east) -- (neuron_y'.west);
%dropout
......@@ -44,10 +44,10 @@
\node [] (standard) at ([xshift=-1em]drop_neuron_z.west) {\scriptsize{$\mathbf{w}_{i}^{l}$}};
\node [] (standard) at ([xshift=0.6em,yshift=0.3em]drop_neuron_z.east) {\scriptsize{$f$}};
%structure
\draw [->,line width=0.3mm] (drop_neuron_b.east) -- (drop_neuron_z.west);
\draw [->,line width=0.3mm] (drop_neuron_y3'.east) -- (drop_neuron_z.west);
\draw [->,line width=0.3mm] (drop_neuron_y2'.east) -- (drop_neuron_z.west);
\draw [->,line width=0.3mm] (drop_neuron_y1'.east) -- (drop_neuron_z.west);
\draw [->,line width=0.3mm] (drop_neuron_b.east) -- (drop_neuron_z.130);
\draw [->,line width=0.3mm] (drop_neuron_y3'.east) -- (drop_neuron_z.170);
\draw [->,line width=0.3mm] (drop_neuron_y2'.east) -- (drop_neuron_z.190);
\draw [->,line width=0.3mm] (drop_neuron_y1'.east) -- (drop_neuron_z.230);
\draw [->,line width=0.3mm] (drop_neuron_z.east) -- (drop_neuron_y'.west);
%r
\draw [->,line width=0.3mm] (drop_neuron_y3.east) -- (drop_neuron_y3'.west);
......
\begin{tikzpicture}
\tikzstyle{node}=[inner sep=0mm,minimum height=3em,minimum width=6em,rounded corners=5pt]
\tikzstyle{node}=[inner sep=0mm, draw,thick,minimum height=3em,minimum width=6em,rounded corners=5pt]
\node[anchor=west,node,fill=ugreen!15] (n1) at (0,0) {训练集};
\node[anchor=west,node,fill=yellow!15] (n2) at ([xshift=2em,yshift=0em]n1.east) {难度评估器};
\node[anchor=west,node,fill=yellow!15] (n2) at ([xshift=4em,yshift=0em]n1.east) {难度评估器};
\node[anchor=west,node,fill=red!15] (n3) at ([xshift=4em,yshift=0em]n2.east) {训练调度器};
\node[anchor=west,node,fill=blue!15] (n4) at ([xshift=4em,yshift=0em]n3.east) {模型训练器};
......
......@@ -6,30 +6,30 @@
\setlength{\YShift}{0.8\base}
\setlength{\XShift}{0.8\base}
\tikzstyle{modelnode} = [rectangle,draw,rounded corners=2pt,inner sep=0pt,minimum height=4.5em,minimum width=2em,font=\small,anchor=north]
\tikzstyle{modelnode} = [rectangle,draw,thick,rounded corners=2pt,inner sep=2pt,minimum height=4.5em,minimum width=2em,font=\small,anchor=north]
\coordinate (stu01) at (0,0);
\coordinate (stu02) at ([xshift=3em]stu01);
\coordinate (stu03) at ([xshift=3em]stu02);
\coordinate (stu04) at ([xshift=3em]stu03);
\coordinate (stu05) at ([xshift=3em]stu04);
\coordinate (tea01) at ([xshift=8em]stu05);
\coordinate (tea02) at ([xshift=3em]tea01);
\coordinate (stu02) at ([xshift=4em]stu01);
\coordinate (stu03) at ([xshift=4em]stu02);
\coordinate (stu04) at ([xshift=4em]stu03);
\coordinate (stu05) at ([xshift=4em]stu04);
\coordinate (tea01) at ([xshift=10em]stu05);
\coordinate (tea02) at ([xshift=4em]tea01);
% iterations
\foreach \curr / \prev in {1/0,2/1,3/2}
{
% models
\node[modelnode,fill=yellow!20] (stu\curr1) at ([yshift=-2em]stu\prev1.south) {\rotatebox{90}{学生模型 $1$}};
\node[modelnode,fill=yellow!20] (stu\curr2) at ([yshift=-2em]stu\prev2.south) {\rotatebox{90}{学生模型 $2$}};
\node[modelnode,fill=yellow!20] (stu\curr3) at ([yshift=-2em]stu\prev3.south) {\rotatebox{90}{学生模型 $3$}};
\node[modelnode,fill=yellow!20] (stu\curr4) at ([yshift=-2em]stu\prev4.south) {\rotatebox{90}{学生模型 $4$}};
\node[modelnode,fill=yellow!20] (stu\curr5) at ([yshift=-2em]stu\prev5.south) {\rotatebox{90}{学生模型 $5$}};
\node[modelnode] (tea\curr1) at ([yshift=-2em]tea\prev1.south) {\rotatebox{90}{\color{red!60} 教师模型 $1$}};
\node[modelnode] (tea\curr2) at ([yshift=-2em]tea\prev2.south) {\rotatebox{90}{\color{blue!60} 教师模型 $2$}};
\node[modelnode,fill=yellow!20] (stu\curr1) at ([yshift=-3em]stu\prev1.south) {\rotatebox{90}{学生模型 $1$}};
\node[modelnode,fill=yellow!20] (stu\curr2) at ([yshift=-3em]stu\prev2.south) {\rotatebox{90}{学生模型 $2$}};
\node[modelnode,fill=yellow!20] (stu\curr3) at ([yshift=-3em]stu\prev3.south) {\rotatebox{90}{学生模型 $3$}};
\node[modelnode,fill=yellow!20] (stu\curr4) at ([yshift=-3em]stu\prev4.south) {\rotatebox{90}{学生模型 $4$}};
\node[modelnode,fill=yellow!20] (stu\curr5) at ([yshift=-3em]stu\prev5.south) {\rotatebox{90}{学生模型 $5$}};
\node[modelnode] (tea\curr1) at ([yshift=-3em]tea\prev1.south) {\rotatebox{90}{\color{red!60} 教师模型 $1$}};
\node[modelnode] (tea\curr2) at ([yshift=-3em]tea\prev2.south) {\rotatebox{90}{\color{blue!60} 教师模型 $2$}};
% 集成 labels
\draw[-latex'] ([xshift=2pt]stu\curr5.east) to node [auto] {\small 集成} ([xshift=-2pt]tea\curr1.west);
\draw[->,very thick] ([xshift=2pt]stu\curr5.east) to node [auto] {\small 集成} ([xshift=-2pt]tea\curr1.west);
}
% iteration labels
......@@ -37,8 +37,8 @@
\node[font=\small,anchor=east,purple!80] (iterate2) at ([xshift=-1em]stu31.west) {\rotatebox{90}{轮数 $2$}};
% distillation labels
\node[font=\small,anchor=south west] (distill1) at ([yshift=0.8em]iterate1.north west) {知识蒸馏};
\node[font=\small,anchor=south west] (distill2) at ([yshift=0.8em]iterate2.north west) {知识蒸馏};
\node[font=\small,anchor=south west] (distill1) at ([yshift=1.2em]iterate1.north west) {知识蒸馏};
\node[font=\small,anchor=south west] (distill2) at ([yshift=1.2em]iterate2.north west) {知识蒸馏};
% 学生模型 groups
\begin{pgfonlayer}{background}
......@@ -50,10 +50,10 @@
\end{pgfonlayer}
% distillation
\draw[-latex',red!60,very thick] (tea11.south) .. controls +(south:1.5em) and +(north:2em) .. (group21.north);
\draw[-latex',blue!60,very thick] (tea12.south) .. controls +(south:2em) and +(north:1.5em) .. (group22.north);
\draw[-latex',red!60,very thick] (tea11.south) .. controls +(south:1.5em) and +(north:3em) .. (group21.north);
\draw[-latex',blue!60,very thick] (tea12.south) .. controls +(south:2em) and +(north:2em) .. (group22.north);
\draw[-latex',red!60,very thick] (tea21.south) .. controls +(south:2em) and +(north:2.5em) .. (group32.north);
\draw[-latex',blue!60,very thick] (tea22.south) .. controls +(south:2em) and +(north:1.5em) .. (group31.north);
\draw[-latex',blue!60,very thick] (tea22.south) .. controls +(south:2em) and +(north:3em) .. (group31.north);
\end{tikzpicture}
\ No newline at end of file
......@@ -8,8 +8,8 @@
\begin{scope}[]
\tikzstyle{rnnnode} = [draw,inner sep=2pt,minimum width=3em,minimum height=1.5em,rounded corners=1pt,fill=red!15]
\tikzstyle{snode} = [draw,inner sep=2pt,minimum width=3em,minimum height=1.5em,rounded corners=1pt,fill=blue!15]
\tikzstyle{rnnnode} = [draw,thick,inner sep=2pt,minimum width=3em,minimum height=1.5em,rounded corners=1pt,fill=red!15]
\tikzstyle{snode} = [draw,thick,inner sep=2pt,minimum width=3em,minimum height=1.5em,rounded corners=1pt,fill=blue!15]
\tikzstyle{ynode} = [inner sep=2pt,minimum width=3em,minimum height=1.5em,rounded corners=1pt]
......@@ -18,11 +18,11 @@
\node [anchor=west,rnnnode] (n3) at ([xshift=2em,yshift=0em]n2.east) {$\mathbi{h}_{j-1}$};
\node [anchor=west,rnnnode] (n4) at ([xshift=2em,yshift=0em]n3.east) {$\mathbi{h}_{j}$};
\node [anchor=south,snode,font=\footnotesize] (n5) at ([xshift=0em,yshift=1em]n3.north) {Softmax};
\node [anchor=south,ynode] (n6) at ([xshift=0em,yshift=1em]n5.north) {$\tilde{{y}}_{j-1}$};
\node [anchor=south,ynode] (n6) at ([xshift=0em,yshift=1em]n5.north) {$\hat{{y}}_{j-1}$};
\node [anchor=south,snode,font=\footnotesize] (n7) at ([xshift=0em,yshift=1em]n4.north) {Softmax};
\node [anchor=south,ynode] (n8) at ([xshift=0em,yshift=1em]n7.north) {$\tilde{{y}}_{j}$};
\node [anchor=south,ynode] (n8) at ([xshift=0em,yshift=1em]n7.north) {$\hat{{y}}_{j}$};
\node [anchor=south,snode,font=\footnotesize] (n13) at ([xshift=0em,yshift=1em]n1.north) {Softmax};
\node [anchor=south,ynode] (n14) at ([xshift=0em,yshift=1em]n13.north) {$\tilde{{y}}_{1}$};
\node [anchor=south,ynode] (n14) at ([xshift=0em,yshift=1em]n13.north) {$\hat{{y}}_{1}$};
\node [anchor=north] (n11) at ([xshift=0em,yshift=-1em]n3.south) {${{y}}_{j-2}$};
\node [anchor=north] (n12) at ([xshift=0em,yshift=-1em]n4.south) {${{y}}_{j-1}$};
......@@ -122,8 +122,8 @@
\begin{scope}[yshift=-2in]
\tikzstyle{rnnnode} = [draw,inner sep=2pt,minimum width=3em,minimum height=1.5em,rounded corners=1pt,fill=red!15]
\tikzstyle{snode} = [draw,inner sep=2pt,minimum width=3em,minimum height=1.5em,rounded corners=1pt,fill=blue!15]
\tikzstyle{rnnnode} = [draw,thick,inner sep=2pt,minimum width=3em,minimum height=1.5em,rounded corners=1pt,fill=red!15]
\tikzstyle{snode} = [draw,thick,inner sep=2pt,minimum width=3em,minimum height=1.5em,rounded corners=1pt,fill=blue!15]
\tikzstyle{ynode} = [inner sep=2pt,minimum width=3em,minimum height=1.5em,rounded corners=1pt]
......@@ -132,16 +132,16 @@
\node [anchor=west,rnnnode] (n3) at ([xshift=2em,yshift=0em]n2.east) {$\mathbi{h}_{j-1}$};
\node [anchor=west,rnnnode] (n4) at ([xshift=2em,yshift=0em]n3.east) {$\mathbi{h}_{j}$};
\node [anchor=south,snode,font=\footnotesize] (n5) at ([xshift=0em,yshift=1em]n3.north) {Softmax};
\node [anchor=south,ynode] (n6) at ([xshift=0em,yshift=1em]n5.north) {$\tilde{{y}}_{j-1}$};
\node [anchor=south,ynode] (n6) at ([xshift=0em,yshift=1em]n5.north) {$\hat{{y}}_{j-1}$};
\node [anchor=south,snode,font=\footnotesize] (n7) at ([xshift=0em,yshift=1em]n4.north) {Softmax};
\node [anchor=south,ynode] (n8) at ([xshift=0em,yshift=1em]n7.north) {$\tilde{{y}}_{j}$};
\node [anchor=south,ynode] (n8) at ([xshift=0em,yshift=1em]n7.north) {$\hat{{y}}_{j}$};
\node [anchor=south,snode,font=\footnotesize] (n13) at ([xshift=0em,yshift=1em]n1.north) {Softmax};
\node [anchor=south,ynode] (n14) at ([xshift=0em,yshift=1em]n13.north) {$\tilde{{y}}_{1}$};
\node [anchor=south,ynode] (n14) at ([xshift=0em,yshift=1em]n13.north) {$\hat{{y}}_{1}$};
\node [anchor=north] (n9) at ([xshift=0em,yshift=-3em]n4.south) {\small{(b) 推断阶段}};
\node [anchor=north] (n11) at ([xshift=0em,yshift=-1em]n3.south) {$\tilde{{y}}_{j-2}$};
\node [anchor=north] (n12) at ([xshift=0em,yshift=-1em]n4.south) {$\tilde{{y}}_{j-1}$};
\node [anchor=north] (n11) at ([xshift=0em,yshift=-1em]n3.south) {$\hat{{y}}_{j-2}$};
\node [anchor=north] (n12) at ([xshift=0em,yshift=-1em]n4.south) {$\hat{{y}}_{j-1}$};
\node [anchor=north] (x1) at ([xshift=0em,yshift=-1em]n1.south) {$\langle$sos$\rangle$};
......@@ -166,19 +166,19 @@
\draw [->,thick,dotted] ([xshift=-2.5em,yshift=1em]n11.north) .. controls +(south:2em) and +(west:0.1em) .. ([xshift=0.2em,yshift=0em]n11.west);
\draw [->,thick,dotted] ([xshift=0em,yshift=0em]n14.east) .. controls +(east:0.3em) and +(north:2em) .. ([xshift=2.5em,yshift=-0.5em]n14.south);
\draw [->,thick,dotted] ([xshift=0em,yshift=0em]n6.east) .. controls ([xshift=2em,yshift=1em]n6.east) and ([xshift=-2em,yshift=-2em]n4.south west) .. ([xshift=0.2em,yshift=-0em]n12.west);
\draw [->,thick,dotted] ([xshift=0em,yshift=0em]n14.east) .. controls +(east:0.3em) and +(north:1.5em) .. ([xshift=2.5em,yshift=-0.5em]n14.south);
\draw [->,thick,dotted] ([xshift=0em,yshift=0em]n6.east) .. controls +(east:2em) and +(west:2em) .. ([xshift=0.2em,yshift=-0em]n12.west);
\draw [->,thick] ([xshift=-0em,yshift=0em]n11.north)--([xshift=-0em,yshift=0em]n3.south);
\draw [->,thick] ([xshift=0em,yshift=0em]n12.north)--([xshift=-0em,yshift=-0em]n4.south);
\node [anchor=west] (n10) at ([xshift=4em,yshift=0em]n8.east) {模型输出$\tilde{{y}}$};
\node [anchor=west] (n10) at ([xshift=4em,yshift=0em]n8.east) {模型输出$\hat{{y}}$};
\node [anchor=north west] (y1) at ([xshift=0.5em,yshift=-7em]n10.south west) {$\tilde{y}_{1}$};
\node [anchor=west] (y2) at ([xshift=1em,yshift=0em]y1.east) {$\tilde{y}_{2}$};
\node [anchor=west] (y3) at ([xshift=1em,yshift=0em]y2.east) {$\tilde{y}_{3}$};
\node [anchor=north west] (y1) at ([xshift=0.5em,yshift=-7em]n10.south west) {$\hat{y}_{1}$};
\node [anchor=west] (y2) at ([xshift=1em,yshift=0em]y1.east) {$\hat{y}_{2}$};
\node [anchor=west] (y3) at ([xshift=1em,yshift=0em]y2.east) {$\hat{y}_{3}$};
\node [anchor=west] (y4) at ([xshift=0.5em,yshift=0em]y3.east) {$\ldots$};
\node [anchor=west] (y5) at ([xshift=0.5em,yshift=0em]y4.east) {$\tilde{y}_{n}$};
\node [anchor=west] (y5) at ([xshift=0.5em,yshift=0em]y4.east) {$\hat{y}_{n}$};
\node [anchor=center,prob,minimum size=0.3em] (label11) at ([xshift=-0.1em,yshift=1em]y1.north) {};
......
......@@ -3,16 +3,16 @@
\begin{tikzpicture}
\tikzstyle{rnnnode} = [draw,inner sep=4pt,minimum width=2em,minimum height=2em,rounded corners=1pt,fill=green!20]
\tikzstyle{snode} = [draw,inner sep=4pt,minimum width=2em,minimum height=2em,rounded corners=1pt,fill=red!20]
\tikzstyle{rnnnode} = [draw,inner sep=4pt,minimum width=2em,minimum height=2em,rounded corners=1pt,fill=green!15]
\tikzstyle{snode} = [draw,inner sep=4pt,minimum width=2em,minimum height=2em,rounded corners=1pt,fill=red!15]
\tikzstyle{wode} = [inner sep=0pt,minimum width=2em,minimum height=2em,rounded corners=0pt]
\node [anchor=west,wode] (n1) at (0,0) {$y$};
\node [anchor=north west,wode] (n2) at ([xshift=3em,yshift=-2.5em]n1.south east) {$x$};
\node [anchor=south west,rnnnode] (n3) at ([xshift=8em,yshift=0.5em]n2.north east) {生成模型$G$};
\node [anchor=south east,wode] (n4) at ([xshift=-2em,yshift=0em]n3.north west) {$\tilde{y}$};
\node [anchor=south,snode] (n5) at ([xshift=0em,yshift=6em]n2.north) {判别网络$D$};
\node [anchor=west,align=left,font=\small] (n6) at ([xshift=15em,yshift=-3em]n5.east) {根据$(\seq{x},\seq{\tilde{y}})$\\成奖励信号};
\node [anchor=south west,rnnnode,thick] (n3) at ([xshift=8em,yshift=0.5em]n2.north east) {生成模型$G$};
\node [anchor=south east,wode] (n4) at ([xshift=-2em,yshift=0em]n3.north west) {$\hat{y}$};
\node [anchor=south,snode,thick] (n5) at ([xshift=0em,yshift=6em]n2.north) {判别网络$D$};
\node [anchor=west,align=left,font=\small] (n6) at ([xshift=15em,yshift=-3em]n5.east) {根据$(\seq{x},\seq{\hat{y}})$\\成奖励信号};
\draw [->,thick] ([xshift=0em,yshift=-0.3em]n1.north)--([xshift=-0.3em,yshift=-0.1em]n5.south);
......
......@@ -38,35 +38,35 @@
\node [anchor=south,font=\scriptsize] (one_hot_w7) at (one_hot_label7.north) {$0$};
%label smoothing
\node [anchor=west,minimum width=1.2em,minimum height=0.2em,fill=red!50,inner sep=0pt] (label1) at ([xshift=1.5em,yshift=-4.4em]model.east) {};
\node [anchor=west,minimum width=1.2em,minimum height=0.2em,fill=orange!50,inner sep=0pt] (label1) at ([xshift=1.5em,yshift=-4.4em]model.east) {};
\node [anchor=south,font=\scriptsize] (w1) at (label1.north) {$0.1$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.2em,fill=red!50,inner sep=0pt] (label2) at (label1.south east) {};
\node [anchor=south west,minimum width=1.2em,minimum height=0.2em,fill=orange!50,inner sep=0pt] (label2) at (label1.south east) {};
\node [anchor=south,font=\scriptsize] (w2) at (label2.north) {$0.1$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.8em,fill=red!50,inner sep=0pt] (label3) at (label2.south east) {};
\node [anchor=south west,minimum width=1.2em,minimum height=0.8em,fill=orange!50,inner sep=0pt] (label3) at (label2.south east) {};
\node [anchor=south,font=\scriptsize] (w3) at (label3.north) {{\color{red} $0.4$}};
\node [anchor=south west,minimum width=1.2em,minimum height=0.2em,fill=red!50,inner sep=0pt] (label4) at (label3.south east) {};
\node [anchor=south west,minimum width=1.2em,minimum height=0.2em,fill=orange!50,inner sep=0pt] (label4) at (label3.south east) {};
\node [anchor=south,font=\scriptsize] (w5) at (label4.north) {$0.1$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.2em,fill=red!50,inner sep=0pt] (label5) at (label4.south east) {};
\node [anchor=south west,minimum width=1.2em,minimum height=0.2em,fill=orange!50,inner sep=0pt] (label5) at (label4.south east) {};
\node [anchor=south,font=\scriptsize] (w6) at (label5.north) {$0.1$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.2em,fill=red!50,inner sep=0pt] (label6) at (label5.south east) {};
\node [anchor=south west,minimum width=1.2em,minimum height=0.2em,fill=orange!50,inner sep=0pt] (label6) at (label5.south east) {};
\node [anchor=south,font=\scriptsize] (w7) at (label6.north) {$0.1$};
\node [anchor=south west,minimum width=1.2em,minimum height=0.2em,fill=red!50,inner sep=0pt] (label7) at (label6.south east) {};
\node [anchor=south west,minimum width=1.2em,minimum height=0.2em,fill=orange!50,inner sep=0pt] (label7) at (label6.south east) {};
\node [anchor=south,font=\scriptsize] (w8) at (label7.north) {$0.1$};
\node[font=\scriptsize] (line1) at ([xshift=9em,yshift=-1.5em]model_label7.east) {$Loss =-0.3 \log p_{3}-\sum_{i=1}^{7} 0.1 \log p_{i}$};
\node[font=\scriptsize] (line2) at ([xshift=5.9em,yshift=3em]model_label7.east) {$Loss =-\log p_{3}$};
\node[font=\scriptsize] (line1) at ([xshift=13em,yshift=-1.5em]model_label7.east) {$Loss =-0.3 \log p_{3}-\sum_{i=1}^{7} 0.1 \log p_{i}$};
\node[font=\scriptsize] (line2) at ([xshift=9.5em,yshift=3em]model_label7.east) {$Loss =-\log p_{3}$};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.5em,rounded corners=1pt,very thick,dotted,draw=red] [fit =(model_w3) (model_label1) (model_label7) (one_hot_w3)] (box1) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=purple!10,drop shadow,draw=purple] [fit = (line2)] (box3) {};
\draw [->,dotted,very thick,red] ([yshift=1em]box1.east) .. controls +(east:1) and +(west:1) .. (box3.west);
\node [rectangle,thick,inner sep=0.2em,rounded corners=1pt,fill=purple!10,drop shadow,draw=purple] [fit = (line2)] (box3) {};
\draw [->,dotted,very thick,red] ([yshift=2em]box1.east) .. controls +(east:1.7) and +(west:1.5) .. (box3.west);
\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=ugreen] [fit =(w3) (label1) (label7) (model_w3)] (box2) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=green!10,drop shadow,draw=ugreen] [fit = (line1)] (box4) {};
\draw [->,dotted,very thick,ugreen] ([yshift=-1.5em]box2.east) .. controls +(east:1) and +(west:1) .. (box4.west);
\node [rectangle,thick,inner sep=0.2em,rounded corners=1pt,fill=green!10,drop shadow,draw=ugreen] [fit = (line1)] (box4) {};
\draw [->,dotted,very thick,ugreen] ([yshift=-2.5em]box2.east) .. controls +(east:1.7) and +(west:1.5) .. (box4.west);
\end{pgfonlayer}
......
......@@ -2,7 +2,7 @@
\begin{tikzpicture}
\def\neuronsep{1}
\tikzstyle{neuronnode} = [minimum size=1.0em,circle,draw,thick,ublue,inner sep=1pt, fill=white,align=center]
\tikzstyle{neuronnode} = [minimum size=1.5em,circle,draw=ublue,very thick,inner sep=0pt, fill=white,align=center]
%standard
......@@ -11,67 +11,115 @@
}
\foreach \n in {1,...,4}{
\node [neuronnode] (neuron1\n) at (1.2\neuronsep ,\n * \neuronsep) {};
\node [neuronnode] (neuron1\n) at (1.5\neuronsep ,\n * \neuronsep) {};
}
\foreach \n in {1,...,4}{
\node [neuronnode] (neuron2\n) at (2.4*\neuronsep ,\n * \neuronsep) {};
\node [neuronnode] (neuron2\n) at (3*\neuronsep ,\n * \neuronsep) {};
}
\node [neuronnode] (neuron3) at (3.6*\neuronsep ,2.5 * \neuronsep) {};
\node [neuronnode] (neuron3) at (4.5*\neuronsep ,2.5 * \neuronsep) {};
\foreach \n in {1,...,4}{
\foreach \m in {1,...,4}{
\draw [->] (neuron0\n.east) -- (neuron1\m.west);
\foreach \n / \o in {1/180,2/165,3/150,4/135}{
\draw [->,line width=0.2mm] (neuron0\n.east) -- (neuron11.\o);
}
\foreach \n / \o in {1/220,2/190,3/170,4/140}{
\foreach \m in {2,3}{
\draw [->,line width=0.2mm] (neuron0\n.east) -- (neuron1\m.\o);
}
}
\foreach \n in {1,...,4}{
\foreach \m in {1,...,4}{
\draw [->] (neuron1\n.east) -- (neuron2\m.west);
\foreach \n / \o in {1/225,2/210,3/195,4/180}{
\draw [->,line width=0.2mm] (neuron0\n.east) -- (neuron14.\o);
}
%\foreach \n in {1,...,4}{
% \foreach \m in {1,...,4}{
% \draw [->,line width=0.2mm] (neuron1\n.east) -- (neuron2\m.west);
% }
%}
\foreach \n / \o in {1/180,2/165,3/150,4/135}{
\draw [->,line width=0.2mm] (neuron1\n.east) -- (neuron21.\o);
}
\foreach \n / \o in {1/220,2/190,3/170,4/140}{
\foreach \m in {2,3}{
\draw [->,line width=0.2mm] (neuron1\n.east) -- (neuron2\m.\o);
}
}
\foreach \n in {1,...,4}{
\draw [->] (neuron2\n.east) -- (neuron3.west);
\foreach \n / \o in {1/225,2/210,3/195,4/180}{
\draw [->,line width=0.2mm] (neuron1\n.east) -- (neuron24.\o);
}
\foreach \n / \o in {1/220,2/190,3/170,4/140}{
\draw [->,line width=0.2mm] (neuron2\n.east) -- (neuron3.\o);
}
%drop
%layer1
\foreach \n in {1,3,4}{
\node [neuronnode] (neuron4\n) at (5*\neuronsep,\n * \neuronsep) {};
\node [neuronnode] (neuron4\n) at (7*\neuronsep,\n * \neuronsep) {};
}
\node [neuronnode,dashed] (neuron42) at (5*\neuronsep,2 * \neuronsep) {};
\node [neuronnode,dashed] (neuron42) at (7*\neuronsep,2 * \neuronsep) {};
%layer1
\foreach \n in {1,2,4}{
\node [neuronnode] (neuron5\n) at (6.2*\neuronsep ,\n * \neuronsep) {};
\node [neuronnode] (neuron5\n) at (8.5*\neuronsep ,\n * \neuronsep) {};
}
\node [neuronnode,dashed] (neuron53) at (6.2*\neuronsep,3 * \neuronsep) {};
\node [neuronnode,dashed] (neuron53) at (8.5*\neuronsep,3 * \neuronsep) {};
%layer3
\foreach \n in {1,4}{
\node [neuronnode] (neuron6\n) at (7.4*\neuronsep ,\n * \neuronsep) {};
\node [neuronnode] (neuron6\n) at (10*\neuronsep ,\n * \neuronsep) {};
}
\node [neuronnode,dashed] (neuron62) at (7.4*\neuronsep ,2 * \neuronsep) {};
\node [neuronnode,dashed] (neuron63) at (7.4*\neuronsep ,3 * \neuronsep) {};
\node [neuronnode,dashed] (neuron62) at (10*\neuronsep ,2 * \neuronsep) {};
\node [neuronnode,dashed] (neuron63) at (10*\neuronsep ,3 * \neuronsep) {};
%layer4
\node [neuronnode] (neuron7) at (8.6*\neuronsep ,2.5 * \neuronsep) {};
\node [neuronnode] (neuron7) at (11.5*\neuronsep ,2.5 * \neuronsep) {};
\foreach \n in {1,3,4}{
\foreach \m in {1,2,4}{
\draw [->] (neuron4\n.east) -- (neuron5\m.west);
}
%\foreach \n in {1,3,4}{
% \foreach \m in {1,2,4}{
% \draw [->,line width=0.2mm] (neuron4\n.east) -- (neuron5\m.west);
% }
%}
\foreach \n / \o in {1/180,3/150,4/135}{
\draw [->,line width=0.2mm] (neuron4\n.east) -- (neuron51.\o);
}
\foreach \n in {1,2,4}{
\foreach \m in {1,4}{
\draw [->] (neuron5\n.east) -- (neuron6\m.west);
\foreach \n / \o in {1/220,3/170,4/140}{
\foreach \m in {2}{
\draw [->,line width=0.2mm] (neuron4\n.east) -- (neuron5\m.\o);
}
}
\foreach \n in {1,4}{
\draw [->] (neuron6\n.east) -- (neuron7.west);
\foreach \n / \o in {1/225,3/195,4/180}{
\draw [->,line width=0.2mm] (neuron4\n.east) -- (neuron54.\o);
}
%\foreach \n in {1,2,4}{
% \foreach \m in {1,4}{
% \draw [->,line width=0.2mm] (neuron5\n.east) -- (neuron6\m.west);
% }
%}
\foreach \n / \o in {1/180,2/165,4/135}{
\draw [->,line width=0.2mm] (neuron5\n.east) -- (neuron61.\o);
}
\foreach \n / \o in {1/225,2/210,4/180}{
\draw [->,line width=0.2mm] (neuron5\n.east) -- (neuron64.\o);
}
%\foreach \n in {1,4}{
% \draw [->,line width=0.2mm] (neuron6\n.east) -- (neuron7.west);
%}
\foreach \n / \o in {1/220,4/140}{
\draw [->,line width=0.2mm] (neuron6\n.east) -- (neuron7.\o);
}
\end{tikzpicture}
\ No newline at end of file
......@@ -4,8 +4,8 @@
\begin{tikzpicture}
\tikzstyle{rnnnode} = [draw,inner sep=2pt,minimum width=4em,minimum height=2em,rounded corners=1pt,fill=red!15]
\tikzstyle{snode} = [draw,inner sep=2pt,minimum width=4em,minimum height=2em,rounded corners=1pt,fill=blue!15]
\tikzstyle{rnnnode} = [draw,thick,inner sep=2pt,minimum width=4em,minimum height=2em,rounded corners=1pt,fill=red!15]
\tikzstyle{snode} = [draw,thick,inner sep=2pt,minimum width=4em,minimum height=2em,rounded corners=1pt,fill=blue!15]
\tikzstyle{ynode} = [inner sep=2pt,minimum width=4em,minimum height=2em,rounded corners=1pt]
......@@ -14,18 +14,18 @@
\node [anchor=west,rnnnode] (n3) at ([xshift=3em,yshift=0em]n2.east) {$\mathbi{h}_{j-1}$};
\node [anchor=west,rnnnode] (n4) at ([xshift=3em,yshift=0em]n3.east) {$\mathbi{h}_{j}$};
\node [anchor=south,snode] (n5) at ([xshift=0em,yshift=1em]n3.north) {Softmax};
\node [anchor=south,ynode] (n6) at ([xshift=0em,yshift=1em]n5.north) {$\tilde{{y}}_{j-1}$};
\node [anchor=south,ynode,fill=green!20] (n6) at ([xshift=0em,yshift=1em]n5.north) {$\hat{{y}}_{j-1}$};
\node [anchor=south,snode] (n7) at ([xshift=0em,yshift=1em]n4.north) {Softmax};
\node [anchor=south,ynode] (n8) at ([xshift=0em,yshift=1em]n7.north) {$\tilde{{y}}_{j}$};
\node [anchor=south,ynode] (n8) at ([xshift=0em,yshift=1em]n7.north) {$\hat{{y}}_{j}$};
\node [anchor=south,snode,font=\footnotesize] (n13) at ([xshift=0em,yshift=1em]n1.north) {Softmax};
\node [anchor=south,ynode] (n14) at ([xshift=0em,yshift=1em]n13.north) {$\tilde{{y}}_{1}$};
\node [anchor=south,snode] (n13) at ([xshift=0em,yshift=1em]n1.north) {Softmax};
\node [anchor=south,ynode] (n14) at ([xshift=0em,yshift=1em]n13.north) {$\hat{{y}}_{1}$};
\node [anchor=north] (x1) at ([xshift=0em,yshift=-1em]n1.south) {$\langle$sos$\rangle$};
\node [anchor=north,font=\small] (x2) at ([xshift=-1.3em,yshift=-2.3em]n3.south) {$\tilde{{y}}_{j-2}$};
\node [anchor=north,font=\small] (x2) at ([xshift=-1.3em,yshift=-2.3em]n3.south) {$\hat{{y}}_{j-2}$};
\node [anchor=north,font=\small] (x3) at ([xshift=1.3em,yshift=-2.5em]n3.south) {${y}_{j-2}$};
\node [anchor=north,font=\small] (x4) at ([xshift=1.3em,yshift=-2.5em]n4.south) {${y}_{j-1}$};
\node [anchor=north,font=\small] (x5) at ([xshift=-1.3em,yshift=-2.3em]n4.south) {$\tilde{{y}}_{j-1}$};
\node [anchor=north,font=\small,fill=green!20] (x5) at ([xshift=-1.3em,yshift=-2.3em]n4.south) {$\hat{{y}}_{j-1}$};
\node [anchor=south,inner sep=2pt] (st1) at (n6.north) {\scriptsize{\textbf{[step $j-1$]}}};
\node [anchor=south,inner sep=2pt] (st2) at (n8.north) {\scriptsize{\textbf{[step $j$]}}};
......@@ -74,7 +74,7 @@
\draw [-,thick] ([xshift=0em,yshift=0.2em]x4.north)-- ([xshift=1.3em,yshift=0em]n4.south);
\draw [->,thick,dotted] ([xshift=-2.5em,yshift=1em]x2.north) .. controls +(south:2em) and +(west:0.1em) .. ([xshift=0.2em,yshift=0em]x2.west);
\draw [->,thick,dotted] ([xshift=0em,yshift=0em]n6.east) .. controls ([xshift=2em,yshift=1em]n6.east) and ([xshift=-2.5em,yshift=-4em]n4.south west) .. ([xshift=-0em,yshift=-0em]x5.west);
\draw [->,thick,dotted] ([xshift=0em,yshift=0em]n14.east) .. controls +(east:0.3em) and +(north:2em) .. ([xshift=3em,yshift=-0.5em]n14.south);
\draw [->,thick,dotted,ugreen] ([xshift=0em,yshift=0em]n6.east) .. controls +(east:2.5em) and +(west:2.5em) .. ([xshift=-0em,yshift=-0em]x5.west);
\draw [->,thick,dotted] ([xshift=0em,yshift=0em]n14.east) .. controls +(east:0.3em) and +(north:1.5em) .. ([xshift=3em,yshift=-0.5em]n14.south);
\end{tikzpicture}
\begin{tikzpicture}
\node[anchor=west,inner sep=0mm,minimum height=4em,minimum width=5.5em,rounded corners=15pt,align=left,draw,fill=red!15] (n1) at (0,0) {Decoder\\Encoder};
\node[anchor=west,thick,inner sep=0mm,minimum height=4em,minimum width=8em,rounded corners=15pt,align=left,draw,fill=red!15] (n1) at (0,0) {Decoder\\Encoder};
\node[anchor=west,inner sep=0mm,minimum height=4em,minimum width=5.5em,rounded corners=15pt,align=left,draw,fill=green!15] (n2) at ([xshift=10em,yshift=0em]n1.east) {Decoder\\Encoder};
\node[anchor=west,thick,inner sep=0mm,minimum height=4em,minimum width=8em,rounded corners=15pt,align=left,draw,fill=green!15] (n2) at ([xshift=14em,yshift=0em]n1.east) {Decoder\\Encoder};
\node[anchor=south,inner sep=0mm,font=\small] (a1) at ([xshift=0em,yshift=1em]n1.north) {演员$p$};
......@@ -24,8 +24,8 @@
%\draw [->,dotted,very thick] ([xshift=0em,yshift=0em]n1.east) .. controls ([xshift=3em,yshift=-1em]n1.-90) and ([xshift=-3em,yshift=-1em]n2.-90) .. (n2.west);
\node[anchor=west,inner sep=0mm] (n3) at ([xshift=2.1em,yshift=1em]n1.east) {$Q_1,Q_2,\ldots,Q_J$};
\node[anchor=west,inner sep=0mm] (n4) at ([xshift=2.9em,yshift=-1em]n1.east) {$\tilde{{y}}_1,\tilde{{y}}_2,\ldots,\tilde{{y}}_J$};
\node[anchor=west,inner sep=0mm] (n3) at ([xshift=4.1em,yshift=1em]n1.east) {$Q_1,Q_2,\ldots,Q_J$};
\node[anchor=west,inner sep=0mm] (n4) at ([xshift=4.9em,yshift=-1em]n1.east) {$\tilde{{y}}_1,\tilde{{y}}_2,\ldots,\tilde{{y}}_J$};
\draw [->,thick] ([xshift=-0.1em,yshift=0.6em]n2.west) -- ([xshift=0.1em,yshift=0.6em]n1.east);
\draw [->,thick] ([xshift=0.1em,yshift=-0.6em]n1.east) -- ([xshift=-0.1em,yshift=-0.6em]n2.west);
......
\begin{tabular}{l l l}
\begin{tabular}{l l l l l}
\begin{tikzpicture}
\draw[->, thick] (0,0) to (3,0);
\draw[->, thick] (0,-0) to (0,2);
......@@ -25,6 +25,10 @@
\node [font=\footnotesize] at (1.5,-0.5) {欠拟合};
\end{tikzpicture}
&
\hspace{3em}
&\begin{tikzpicture}
\draw[->, thick] (0,0) to (3,0);
\draw[->, thick] (0,-0) to (0,2);
......@@ -52,6 +56,10 @@
\node [font=\footnotesize] at (1.5,-0.5) {拟合合适};
\end{tikzpicture}
&
\hspace{3em}
&\begin{tikzpicture}
\draw[->, thick] (0,0) to (3,0);
\draw[->, thick] (0,-0) to (0,2);
......
\begin{tikzpicture}
\node[rounded corners=3pt,minimum width=1.0em,minimum height=2.0em,font=\scriptsize,fill=green!5,drop shadow,thick](top) at (0,0) {
\node[rounded corners=3pt,minimum width=1.0em,minimum height=2.0em,font=\scriptsize,fill=red!10,drop shadow,thick](top) at (0,0) {
\begin{tabular}{lllllll}
\multicolumn{7}{c}{符号合并表} \\
r\ $<$e$>$, & e\ s, & l\ o, & es\ t, & lo\ w, & est\ $<$e$>$, & e\ r$<$e$>$,
(r,$<$e$>$), & (e,s), & (l,o), & (es,t), & (lo,w), & (est,$<$e$>$), & (e,r$<$e$>$),
\end{tabular}
};
\node[font=\footnotesize,anchor=north] (l1) at ([xshift=0em,yshift=-1em]top.south) {(a) 符号合并表};
......
This source diff could not be displayed because it is too large. You can view the blob instead.
\begin{tikzpicture}
\tikzstyle{decoder} = [rectangle,thick,rounded corners,minimum width=5cm,minimum height=0.6cm,text centered,draw=black,fill=blue!15,drop shadow]
\tikzstyle{decoder} = [rectangle,thick,rounded corners,minimum width=5cm,minimum height=0.6cm,text centered,draw=black,fill=blue!25,drop shadow]
\begin{scope}
\node (encoder) at (0,0) {来自编码器的信息};
......
\begin{tikzpicture}
\tikzstyle{snode} = [draw,inner sep=1pt,minimum width=3em,minimum height=0.5em,rounded corners=1pt,fill=green!20!white]
\tikzstyle{snode} = [draw,inner sep=1pt,minimum width=3em,minimum height=0.5em,rounded corners=1pt,fill=green!30!white]
\tikzstyle{pnode} = [draw,inner sep=1pt,minimum width=1em,minimum height=0.5em,rounded corners=1pt]
\node [anchor=west,snode] (s1) at (0,0) {\tiny{}};
\node [anchor=north west,snode,minimum width=6.3em] (s2) at ([yshift=-0.3em]s1.south west) {\tiny{}};
......@@ -11,7 +11,7 @@
\node [anchor=north west,snode,minimum width=3em] (s6) at ([yshift=-0.3em]s5.south west) {\tiny{}};
\node [anchor=east] (label1) at ([xshift=-0.8em,yshift=-2em]s1.west) {{句子:}};
\node [anchor=east,draw,dashed,minimum height=7.5cm,minimum width=7.3cm,thick] (box) at ([xshift=10.9cm]label1.east) {};
\node [anchor=east,draw,dashed,minimum height=7.5cm,minimum width=7.3cm,very thick] (box) at ([xshift=10.9cm]label1.east) {};
%\node [anchor=north] (label6) at ([xshift=3em,yshift=7em]label1.north) {{$m$:显存}};
%\node [anchor=north] (label7) at ([xshift=3.3em,yshift=5.5em]label1.north) {{$t$:延迟}};
\node [anchor=west,pnode,minimum width=3em] (p1) at ([xshift=0.3em]s1.east) {\tiny{}};
......@@ -73,7 +73,7 @@
\draw [->,thick] (model.east) .. controls +(east:0.5) and +(west:0.5) .. ([xshift=-1.8em]box1.west);
\draw [->,thick] (model.east) .. controls +(east:0.5) and +(west:0.5) .. ([xshift=-1.8em]box2.west);
\draw [-,very thick] ([xshift=0.3em]box2.east) -- ([xshift=-0.3em]box3.west);
\draw [-,very thick] ([xshift=0.782em,yshift=0.5em]box2.east) -- ([xshift=0.782em,yshift=-0.5em]box2.east);
\draw [-,very thick] ([xshift=0.782em,yshift=0.48em]box2.east) -- ([xshift=0.782em,yshift=-0.48em]box2.east);
%%%%%
\node [] (t10) at ([yshift=1.5em]box1.north) {$t_1$};
......
......@@ -2,11 +2,11 @@
\begin{tikzpicture}
\scriptsize{
\begin{axis}[
width=8cm,
width=12cm,
height=5cm,
yticklabel style={/pgf/number format/.cd,fixed,precision=2},
xticklabel style={color=white},,
xlabel={\footnotesize{搜索束大小(取$\log$}},ylabel={\footnotesize{BLEU\ \%}},
xlabel={\footnotesize{搜索束大小(取$\log$}},ylabel={\footnotesize{BLEU\;[\%]}},
ymin=28.8,ymax=30.4,
xmin=0,xmax=7,
xtick={0,1,2.32,3.32,4.91,6.64},
......@@ -19,12 +19,12 @@ legend style={yshift=-5em,xshift=0em,legend cell align=left,legend plot pos=righ
\addplot[purple,mark=square,mark=star,very thick] coordinates {(0,29.3) (1,29.7) (1.58,30.05) (2.32,30.1) (3,30.2) (3.32,30.3) (3.9,30.2) (4.32,30.08) (4.91,29.98) (5.91,29.6)(6.64,28.8) };
\end{axis}
\node[inner sep=0pt] at (0,-1em) {$\log$1};
\node[inner sep=0pt] at (1,-1em) {$\log$2};
\node[inner sep=0pt] at (1.5,-1em) {$\log$2};
%\node[inner sep=0pt] at (1.58,-1em) {$\log$3};
\node[inner sep=0pt] at (2.15,-1em) {$\log$5};
\node[inner sep=0pt] at (3.05,-1em) {$\log$10};
\node[inner sep=0pt] at (4.45,-1em) {$\log$30};
\node[inner sep=0pt] at (6,-1em) {$\log$100};
\node[inner sep=0pt] at (3.5,-1em) {$\log$5};
\node[inner sep=0pt] at (5.05,-1em) {$\log$10};
\node[inner sep=0pt] at (7.4,-1em) {$\log$30};
\node[inner sep=0pt] at (10.05,-1em) {$\log$100};
}
\end{tikzpicture}
......
......@@ -2,12 +2,12 @@
\begin{tikzpicture}
\tikzstyle{layer} = [rectangle,draw,rounded corners=3pt,minimum width=1cm,minimum height=0.5cm,line width=1pt];
\tikzstyle{prob} = [minimum width=0.3cm,rectangle,fill=ugreen!20!white,inner sep=0pt];
\tikzstyle{prob} = [minimum width=0.3cm,rectangle,fill=ugreen!30!white,inner sep=0pt];
\begin{scope}[local bounding box=STANDARD]
\node [] (input1) at (0,0) {$\cdots$};
\node [anchor=south,layer,fill=orange!15!white] (net1) at ([yshift=0.5cm]input1.north) {};
\node [anchor=south,layer,fill=orange!15!white] (out1) at ([yshift=0.5cm]net1.north) {};
\node [anchor=south,layer,fill=orange!25!white] (net1) at ([yshift=0.5cm]input1.north) {};
\node [anchor=south,layer,fill=orange!25!white] (out1) at ([yshift=0.5cm]net1.north) {};
\node [anchor=south,prob,minimum height=0.9cm] (prob5) at ([yshift=1.2cm]out1.north) {};
\node [anchor=south east,prob,minimum height=0.1cm] (prob4) at ([xshift=-1pt]prob5.south west) {};
......@@ -20,7 +20,7 @@
\node [anchor=south west,prob,minimum height=0.2cm] (prob8) at ([xshift=1pt]prob7.south east) {};
\node [anchor=south west,prob,minimum height=0.1cm] (prob9) at ([xshift=1pt]prob8.south east) {};
\path [fill=blue!20!white,draw=white] (out1.north west) -- (prob1.south west) -- (prob9.south east) -- (out1.north east) -- (out1.north west);
\path [fill=blue!30!white,draw=white] (out1.north west) -- (prob1.south west) -- (prob9.south east) -- (out1.north east) -- (out1.north west);
\draw [->,line width=1pt] (input1) to (net1);
\draw [->,line width=1pt] (net1) to (out1);
......@@ -29,9 +29,9 @@
\end{scope}
\begin{scope}[local bounding box=SELECTION]
\node [] (input2) at (4.5cm,0) {$\cdots$};
\node [anchor=south,layer,fill=orange!15!white] (net2) at ([yshift=0.5cm]input2.north) {};
\node [anchor=south,layer,fill=orange!15!white] (out2) at ([yshift=0.5cm]net2.north) {};
\node [] (input2) at (6cm,0) {$\cdots$};
\node [anchor=south,layer,fill=orange!25!white] (net2) at ([yshift=0.5cm]input2.north) {};
\node [anchor=south,layer,fill=orange!25!white] (out2) at ([yshift=0.5cm]net2.north) {};
\node [anchor=south,prob,minimum height=0.9cm] (prob5) at ([yshift=1.2cm]out2.north) {};
\node [anchor=south east,prob,minimum height=0.1cm,opacity=0] (prob4) at ([xshift=-1pt]prob5.south west) {};
......@@ -49,16 +49,16 @@
\node [anchor=south west,prob,minimum height=0.1cm,opacity=0] (prob9) at ([xshift=1pt]prob8.south east) {};
\node [text=red,anchor=south,inner sep=1pt] (plabel9) at (prob9.south) {$\times$};
\path [fill=blue!20!white,draw=white] (out2.north west) -- (prob1.south west) -- (prob9.south east) -- (out2.north east) -- (out2.north west);
\path [fill=blue!30!white,draw=white] (out2.north west) -- (prob1.south west) -- (prob9.south east) -- (out2.north east) -- (out2.north west);
\draw [->,line width=1pt] (input2) to (net2);
\draw [->,line width=1pt] (net2) to (out2);
\node [font=\small] (label2) at ([yshift=0.6cm]out2.north) {Softmax};
\node [anchor=west,layer,fill=orange!15!white] (net3) at ([xshift=2cm]net2.east) {};
\node [anchor=west,layer,fill=orange!25!white] (net3) at ([xshift=2cm]net2.east) {};
\node [anchor=north,font=\scriptsize] (input3) at ([yshift=-0.5cm]net3.south) {源语言};
\node [anchor=south,layer,align=center,font=\scriptsize,fill=yellow!10!white] (out3) at ([yshift=0.9cm]net3.north) {候选\\列表};
\node [anchor=south,layer,align=center,font=\scriptsize,fill=yellow!25!white] (out3) at ([yshift=0.9cm]net3.north) {候选\\列表};
\draw [->,line width=1pt] (input3) to (net3);
\draw [->,line width=1pt] (net3) to (out3);
......
......@@ -3,12 +3,12 @@
\node [anchor=north west] (part1) at (0,0) {\small{$\begin{bmatrix} \textrm{Have} \; 0.5 \\ \textrm{Has} \ \ \; 0.1 \\ . \\ . \\ . \\ . \\ . \end{bmatrix}$}};
\node [anchor=north](p1) at ([yshift=-0.3em]part1.south) {$P_1$};
\node [anchor=west](part2) at ([xshift=0.5em]part1.east){\small{$\begin{bmatrix} \textrm{Have} \; 0.2 \\ \textrm{Has} \ \ \; 0.3 \\ . \\ . \\ . \\ . \\ . \end{bmatrix}$}};
\node [anchor=west](part2) at ([xshift=0.8em]part1.east){\small{$\begin{bmatrix} \textrm{Have} \; 0.2 \\ \textrm{Has} \ \ \; 0.3 \\ . \\ . \\ . \\ . \\ . \end{bmatrix}$}};
\node [anchor=north](p2) at ([yshift=-0.3em]part2.south) {$P_2$};
\node [anchor=west](part3) at ([xshift=0.5em]part2.east){\small{$\begin{bmatrix} \textrm{Have} \; 0.4 \\ \textrm{Has} \ \ \; 0.3 \\ . \\ . \\ . \\ . \\ . \end{bmatrix}$}};
\node [anchor=west](part3) at ([xshift=0.8em]part2.east){\small{$\begin{bmatrix} \textrm{Have} \; 0.4 \\ \textrm{Has} \ \ \; 0.3 \\ . \\ . \\ . \\ . \\ . \end{bmatrix}$}};
\node [anchor=north](p3) at ([yshift=-0.3em]part3.south) {$P_3$};
\node [anchor=west](part4) at ([xshift=0.5em]part3.east){\huge{$\Rightarrow$}};
\node [anchor=west](part5) at ([xshift=0.5em]part4.east){\small{$\begin{bmatrix} \textrm{Have} \; 0.37 \\ \textrm{Has} \ \ \; 0.23 \\ . \\ . \\ . \\ . \\ . \end{bmatrix}$}};
\node [anchor=west](part4) at ([xshift=0.8em]part3.east){\huge{$\Rightarrow$}};
\node [anchor=west](part5) at ([xshift=0.8em]part4.east){\small{$\begin{bmatrix} \textrm{Have} \; 0.37 \\ \textrm{Has} \ \ \; 0.23 \\ . \\ . \\ . \\ . \\ . \end{bmatrix}$}};
\node [anchor=north](p5) at (part5.south) {$P=\sum\limits_{i=1}^{3}{\frac{1}{3}P_{i}}$};
\end{tikzpicture}
......
......@@ -25,9 +25,9 @@
\end{scope}
\begin{scope}[local bounding box=SINGLE]
\node [output,fill=green!20,draw,anchor=west] (output3) at ([xshift=4cm]output3.east) {输出 $n$};
\node [output,fill=green!20,draw,anchor=west] (output2) at ([xshift=4cm]output2.east) {输出 $2$};
\node [output,fill=green!20,draw,anchor=west] (output1) at ([xshift=4cm]output1.east) {输出 $1$};
\node [output,fill=green!20,draw,anchor=west] (output3) at ([xshift=5.5cm]output3.east) {输出 $n$};
\node [output,fill=green!20,draw,anchor=west] (output2) at ([xshift=5.5cm]output2.east) {输出 $2$};
\node [output,fill=green!20,draw,anchor=west] (output1) at ([xshift=5.5cm]output1.east) {输出 $1$};
\node [system,fill=green!20,draw,anchor=east,align=center,inner sep=1.9pt] (engine) at ([xshift=-0.5cm]output2.west) {单系统};
......
......@@ -5,44 +5,44 @@
\tikzstyle{er} = [rectangle,minimum width=2.5cm,minimum height=1.5cm,rounded corners,text centered,draw,drop shadow]
\begin{tikzpicture}[node distance = 0,scale = 0.75]
\tikzstyle{every node}=[scale=0.75]
\node (encoder)[er,thick,draw,fill=red!20,minimum width=2.8cm]{\Large{编码器}};
\node (lenpre)[er,anchor=north,thick,draw,fill=yellow!20,minimum height=0.8cm] at ([yshift=1.5cm]encoder.north){\Large{长度预测器}};
\node (decoder_1)[er,thick,draw,right of=encoder,xshift=5cm,fill=blue!20]{\Large{解码器}};
\node (decoder_2)[er,thick,draw,right of=decoder_1,xshift=3.7cm,fill=blue!20]{\Large{解码器}};
\node (encoder)[er,thick,draw,fill=red!25,minimum width=2.8cm]{\Large{编码器}};
\node (lenpre)[er,anchor=north,thick,draw,fill=yellow!30,minimum height=0.8cm] at ([yshift=1.5cm]encoder.north){\Large{长度预测器}};
\node (decoder_1)[er,thick,draw,right of=encoder,xshift=5cm,fill=blue!25]{\Large{解码器}};
\node (decoder_2)[er,thick,draw,right of=decoder_1,xshift=3.7cm,fill=blue!25]{\Large{解码器}};
\node (point)[right of=decoder_2,xshift=3cm,]{\LARGE{...}};
\node (decoder_3)[er,thick,draw,right of=point,xshift=3cm,fill=blue!20]{\Large{解码器}};
\draw [->,very thick,draw=black!70]([xshift=0cm]encoder.east) -- ([xshift=-0cm]decoder_1.west);
\draw [->,very thick,draw=black]([xshift=0cm]encoder.east) -- ([xshift=-0cm]decoder_1.west);
\draw [->,very thick,draw=black!70]([xshift=0,yshift=-1cm]encoder.south) -- ([xshift=0]encoder.south);
\draw [->,very thick,draw=black!70](encoder.north) -- (lenpre.south);
\draw [->,very thick,draw=black]([xshift=0,yshift=-1cm]encoder.south) -- ([xshift=0]encoder.south);
\draw [->,very thick,draw=black](encoder.north) -- (lenpre.south);
\node [below of = encoder,xshift=0cm,yshift=-2.2cm]{\Large$\seq{x}$};
\draw [->,very thick,draw=black!70]([xshift=0,yshift=-1cm]decoder_1.south) -- ([xshift=0]decoder_1.south);
\draw [->,very thick,draw=black!70]([xshift=0]decoder_1.north) -- ([xshift=0,yshift=1cm]decoder_1.north);
\draw [->,very thick,draw=black]([xshift=0,yshift=-1cm]decoder_1.south) -- ([xshift=0]decoder_1.south);
\draw [->,very thick,draw=black]([xshift=0]decoder_1.north) -- ([xshift=0,yshift=1cm]decoder_1.north);
\node (d1x) [below of = decoder_1,xshift=0cm,yshift=-2.2cm]{\Large$\seq{x'}$};
\draw [-,very thick,draw=black!70] (lenpre.east) --([xshift=1.26cm]lenpre.east);
\draw [-,very thick,draw=black!70,dashed] ([xshift=1.26cm]lenpre.east) -- ([xshift=-2cm]d1x.west);
\draw [->,very thick,draw=black!70] ([xshift=-2cm]d1x.west) -- ([xshift=0cm]d1x.west);
\draw [-,very thick,draw=black] (lenpre.east) --([xshift=1.26cm]lenpre.east);
\draw [-,very thick,draw=black,dashed] ([xshift=1.26cm]lenpre.east) -- ([xshift=-2cm]d1x.west);
\draw [->,very thick,draw=black] ([xshift=-2cm]d1x.west) -- ([xshift=0cm]d1x.west);
\node (line1_1)[below of = decoder_1,xshift=0cm,yshift=2.2cm]{\Large$\seq{y}^{[1]}$};
\draw [->,thick,]([xshift=0,yshift=-1cm]decoder_2.south) -- ([xshift=0]decoder_2.south);
\draw [->,very thick,draw=black!70]([xshift=0]decoder_2.north) -- ([xshift=0,yshift=1cm]decoder_2.north);
\draw [->,very thick,draw=black]([xshift=0]decoder_2.north) -- ([xshift=0,yshift=1cm]decoder_2.north);
\node (line1_2)[below of = decoder_2,xshift=0cm,yshift=-2.2cm]{\Large$\seq{y}^{[1]}$};
\node (line2_1)[below of = decoder_2,xshift=0cm,yshift=2.2cm]{\Large$\seq{y}^{[2]}$};
\node (line2_2)[below of = point,xshift=0cm,yshift=-2.2cm]{};
\node (line3_1)[below of = point,xshift=0cm,yshift=2.2cm]{};
\draw [->,very thick,draw=black!70]([xshift=0,yshift=-1cm]decoder_3.south) -- ([xshift=0]decoder_3.south);
\draw [->,very thick,draw=black!70]([xshift=0]decoder_3.north) -- ([xshift=0,yshift=1cm]decoder_3.north);
\draw [->,very thick,draw=black]([xshift=0,yshift=-1cm]decoder_3.south) -- ([xshift=0]decoder_3.south);
\draw [->,very thick,draw=black]([xshift=0]decoder_3.north) -- ([xshift=0,yshift=1cm]decoder_3.north);
\node (line3_2)[below of = decoder_3,xshift=0cm,yshift=-2.2cm]{\Large$\seq{y}^{[N-1]}$};
\node [below of = decoder_3,xshift=0cm,yshift=2.2cm]{\Large$\seq{y}^{[N]}$};
\draw[->,very thick,draw=black!70, out=0, in=180,dotted] (line1_1.east) to (line1_2.west);
\draw[->,very thick,draw=black!70, out=0, in=180,dotted] (line2_1.east) to (line2_2.west);
\draw[->,very thick,draw=black!70, out=0, in=180,dotted] (line3_1.east) to (line3_2.west);
\draw[->,very thick,draw=black, out=0, in=180,dotted] (line1_1.east) to (line1_2.west);
\draw[->,very thick,draw=black, out=0, in=180,dotted] (line2_1.east) to (line2_2.west);
\draw[->,very thick,draw=black, out=0, in=180,dotted] (line3_1.east) to (line3_2.west);
\draw [->,very thick,draw=black!70] ([xshift=0.5cm]encoder.east) -- ([xshift=0.5cm,yshift=-2.8cm]encoder.east) --([xshift=5.55cm,yshift=-2.8cm]encoder.east) --([xshift=-0.5cm]decoder_2.west) -- (decoder_2.west);
\draw [->,very thick,draw=black!70] ([xshift=5.55cm,yshift=-2.8cm]encoder.east) -- ([xshift=9.45cm,yshift=-2.8cm]encoder.east) --([xshift=-0.5cm]point.west) -- (point.west);
\draw [->,very thick,draw=black!70] ([xshift=9.45cm,yshift=-2.8cm]encoder.east) -- ([xshift=11.55cm,yshift=-2.8cm]encoder.east) -- ([xshift=-0.5cm]decoder_3.west) -- (decoder_3.west);
\draw [->,very thick,draw=black] ([xshift=0.5cm]encoder.east) -- ([xshift=0.5cm,yshift=-2.8cm]encoder.east) --([xshift=5.55cm,yshift=-2.8cm]encoder.east) --([xshift=-0.5cm]decoder_2.west) -- (decoder_2.west);
\draw [->,very thick,draw=black] ([xshift=5.55cm,yshift=-2.8cm]encoder.east) -- ([xshift=9.45cm,yshift=-2.8cm]encoder.east) --([xshift=-0.5cm]point.west) -- (point.west);
\draw [->,very thick,draw=black] ([xshift=9.45cm,yshift=-2.8cm]encoder.east) -- ([xshift=11.55cm,yshift=-2.8cm]encoder.east) -- ([xshift=-0.5cm]decoder_3.west) -- (decoder_3.west);
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
%左
\node [anchor=west,draw=black!70,rounded corners,drop shadow,very thick,minimum width=6em,minimum height=3.5em,fill=red!15,align=center,text=black] (part1) at (0,0) {\small{预测模块}};
\node [anchor=west,draw=black!70,rounded corners,drop shadow,very thick,minimum width=6em,minimum height=3.5em,fill=red!30,align=center,text=black] (part1) at (0,0) {\small{预测模块}};
\node [anchor=south] (text) at ([xshift=0.5em,yshift=-3.5em]part1.south) {\scriptsize{源语言句子(编码器输出)}};
\node [anchor=east,draw=black!70,rounded corners,drop shadow,very thick,minimum width=6em,minimum height=3.5em,fill=green!15,align=center,text=black] (part2) at ([xshift=10em]part1.east) {\small{搜索模块}};
\node [anchor=east,draw=black!70,rounded corners,drop shadow,very thick,minimum width=6em,minimum height=3.5em,fill=green!30,align=center,text=black] (part2) at ([xshift=10em]part1.east) {\small{搜索模块}};
\node [anchor=south] (text1) at ([xshift=0.1em,yshift=2.2em]part1.north) {\scriptsize{译文中已经生成的单词}};
\node [anchor=south] (text2) at ([xshift=0.5em,yshift=2.2em]part2.north) {\scriptsize{预测当前位置的单词概率分布}};
......
......@@ -2,13 +2,13 @@
\tikzstyle{er} = [rectangle,minimum width=7cm,minimum height=2.5cm,text centered,draw,drop shadow,rounded corners]
\begin{tikzpicture}[node distance = 0,scale = 0.55]
\tikzstyle{every node}=[scale=0.55]
\node (encoder)[er,thick,minimum width=5.5cm,fill=red!20]{\huge{编码器}};
\node (decoder)[er,thick,right of=encoder,xshift=8.75cm,fill=blue!20]{\huge{解码器}};
\node (decoder_1)[er,thick,right of=decoder,xshift=8.75cm,fill=blue!20]{\huge{解码器}};
\draw [->,very thick,draw=black!70]([xshift=0cm]encoder.east) -- ([xshift=-0cm]decoder.west);
\node (encoder)[er,thick,minimum width=5.5cm,fill=red!25]{\huge{编码器}};
\node (decoder)[er,thick,right of=encoder,xshift=8.75cm,fill=blue!25]{\huge{解码器}};
\node (decoder_1)[er,thick,right of=decoder,xshift=8.75cm,fill=blue!25]{\huge{解码器}};
\draw [->,very thick,draw=black]([xshift=0cm]encoder.east) -- ([xshift=-0cm]decoder.west);
\foreach \x in {-2.2cm,-1.1cm,...,2.2cm}
\draw [->,very thick,draw=black!70]([xshift=\x,yshift=-1cm]encoder.south) -- ([xshift=\x,yshift=-0.2cm]encoder.south);
\draw [->,very thick,draw=black]([xshift=\x,yshift=-1cm]encoder.south) -- ([xshift=\x,yshift=-0.2cm]encoder.south);
\node [below of = encoder,xshift=-2.3cm,yshift=-2.92cm,scale=1.2]{\small{<LEN>}};
\node [below of = encoder,xshift=-1cm,yshift=-2.9cm,scale=1.2]{\large{Hello}};
\node [below of = encoder,xshift=0cm,yshift=-3.05cm,scale=1.2]{,};
......@@ -18,8 +18,8 @@
\foreach \x in {-2.7cm,-0.9cm,...,2.8cm}
{\draw [->,very thick,draw=black!70]([xshift=\x,yshift=-1cm]decoder.south) -- ([xshift=\x,yshift=-0.2cm]decoder.south);
\draw [->,very thick,draw=black!70]([xshift=\x,yshift=0.2cm]decoder.north) -- ([xshift=\x,yshift=1cm]decoder.north);}
{\draw [->,very thick,draw=black]([xshift=\x,yshift=-1cm]decoder.south) -- ([xshift=\x,yshift=-0.2cm]decoder.south);
\draw [->,very thick,draw=black]([xshift=\x,yshift=0.2cm]decoder.north) -- ([xshift=\x,yshift=1cm]decoder.north);}
\node (mask_1) [below of = decoder,xshift=-3cm,yshift=-2.9cm,scale=1.6]{\small{<Mask>}};
\node [below of = decoder,xshift=-1cm,yshift=-2.9cm,scale=1.6]{\small{<Mask>}};
\node [below of = decoder,xshift=1cm,yshift=-2.9cm,scale=1.6]{\small{<Mask>}};
......@@ -32,8 +32,8 @@
\foreach \x in {-2.7cm,-0.9cm,...,2.8cm}
{\draw [->,very thick,draw=black!70]([xshift=\x,yshift=-1cm]decoder_1.south) -- ([xshift=\x,yshift=-0.2cm]decoder_1.south);
\draw [->,very thick,draw=black!70]([xshift=\x,yshift=0.2cm]decoder_1.north) -- ([xshift=\x,yshift=1cm]decoder_1.north);}
{\draw [->,very thick,draw=black]([xshift=\x,yshift=-1cm]decoder_1.south) -- ([xshift=\x,yshift=-0.2cm]decoder_1.south);
\draw [->,very thick,draw=black]([xshift=\x,yshift=0.2cm]decoder_1.north) -- ([xshift=\x,yshift=1cm]decoder_1.north);}
\node [below of = decoder_1,xshift=-2.7cm,yshift=2.9cm,scale=1.6]{你好};
\node [below of = decoder_1,xshift=-0.9cm,yshift=2.7cm,scale=1.6]{};
\node [below of = decoder_1,xshift=0.9cm,yshift=2.9cm,scale=1.6]{世界};
......@@ -44,10 +44,10 @@
\node [below of = decoder_1,xshift=0.9cm,yshift=-2.9cm,scale=1.6]{\small{<Mask>}};
\node [below of = decoder_1,xshift=2.6cm,yshift=-2.8cm,scale=1.6]{};
\draw [-,very thick,draw=black!70]([xshift=-2.2cm]encoder.north) -- ([xshift=-2.2cm,yshift=0.5cm]encoder.north)-- ([xshift=4.1cm,yshift=0.5cm]encoder.north);
\draw [-,very thick,draw=black!70,dashed]([xshift=4.1cm,yshift=0.5cm]encoder.north) -- ([xshift=-0.5cm]mask_1.west);
\draw [->,very thick,draw=black!70]([xshift=-0.5cm]mask_1.west) -- (mask_1.west);
\draw [->,very thick,draw=black!70]([xshift=0.5cm]encoder.east) -- ([xshift=0.5cm,yshift=-3.5cm]encoder.east) -- ([xshift=10.5cm,yshift=-3.5cm]encoder.east) -- ([xshift=-0.72cm]decoder_1.west) -- (decoder_1.west);
\draw [-,very thick,draw=black]([xshift=-2.2cm]encoder.north) -- ([xshift=-2.2cm,yshift=0.5cm]encoder.north)-- ([xshift=4.1cm,yshift=0.5cm]encoder.north);
\draw [-,very thick,draw=black,dashed]([xshift=4.1cm,yshift=0.5cm]encoder.north) -- ([xshift=-0.5cm]mask_1.west);
\draw [->,very thick,draw=black]([xshift=-0.5cm]mask_1.west) -- (mask_1.west);
\draw [->,very thick,draw=black]([xshift=0.5cm]encoder.east) -- ([xshift=0.5cm,yshift=-3.5cm]encoder.east) -- ([xshift=10.5cm,yshift=-3.5cm]encoder.east) -- ([xshift=-0.72cm]decoder_1.west) -- (decoder_1.west);
\draw [->,very thick,dotted] (line1.east) .. controls +(east:1.2) and +(west:1.2) ..(line2.west);
\node [below of = encoder,xshift=1.2cm,yshift=2.4cm,scale=1.7]{译文长度:4};
......
......@@ -4,14 +4,14 @@
%%% outline
%-------------------------------------------------------------------------
\begin{tikzpicture}
\tikzstyle{emb} = [font=\scriptsize,rounded corners=1pt, fill=orange!20, minimum width=1.8em,minimum height=1.5em,draw]
\tikzstyle{emb} = [font=\scriptsize,rounded corners=1pt, fill=orange!25, minimum width=1.8em,minimum height=1.5em,draw]
\tikzstyle{po} = [font=\scriptsize,rounded corners=1pt, fill=gray!20, minimum width=1.8em,minimum height=1.5em,draw]
\tikzstyle{tgt} = [minimum height=1.6em,minimum width=5.2em,fill=black!10!yellow!30,font=\footnotesize,drop shadow={shadow xshift=0.15em,shadow yshift=-0.15em,}]
\tikzstyle{p} = [fill=ugreen!15,minimum width=0.4em,inner sep=0pt]
\node[ rounded corners=3pt, thick,fill=red!20, drop shadow, minimum width=12em,minimum height=4em,draw] (encoder) at (0,0) {编码器};
\node[anchor=north,rounded corners=3pt, thick,fill=yellow!20, drop shadow, minimum width=12em,minimum height=2em,draw] (lenpre) at([yshift=3em]encoder.north){长度预测器};
\tikzstyle{tgt} = [minimum height=1.6em,minimum width=5.2em,fill=black!10!yellow!40,font=\footnotesize,drop shadow={shadow xshift=0.15em,shadow yshift=-0.15em,}]
\tikzstyle{p} = [fill=ugreen!25,minimum width=0.4em,inner sep=0pt]
\node[ rounded corners=3pt, thick,fill=red!25, drop shadow, minimum width=12em,minimum height=4em,draw] (encoder) at (0,0) {编码器};
\node[anchor=north,rounded corners=3pt, thick,fill=yellow!30, drop shadow, minimum width=12em,minimum height=2em,draw] (lenpre) at([yshift=3em]encoder.north){长度预测器};
\node[anchor=north] (lable) at([xshift=3.5em,yshift=2.5em]lenpre.north){译文长度:3};
\node[anchor=west, rounded corners=3pt, thick,fill=blue!20, drop shadow, minimum width=13em,minimum height=4em,draw] (decoder) at ([xshift=1cm]encoder.east) {解码器};
\node[anchor=west, rounded corners=3pt, thick,fill=blue!25, drop shadow, minimum width=13em,minimum height=4em,draw] (decoder) at ([xshift=1cm]encoder.east) {解码器};
\node[anchor=north,emb] (en1) at ([yshift=-1.3em,xshift=-4.5em]encoder.south) {${\mathbi e}$(干)};
\node[anchor=north,emb] (en2) at ([yshift=-1.3em,xshift=-1.5em]encoder.south) {${\mathbi e}$(得)};
......
......@@ -4,13 +4,13 @@
%%% outline
%-------------------------------------------------------------------------
\begin{tikzpicture}
\tikzstyle{emb} = [font=\scriptsize,rounded corners=1pt, fill=orange!20, minimum width=1.8em,minimum height=1.5em,draw]
\tikzstyle{emb} = [font=\scriptsize,rounded corners=1pt, fill=orange!25, minimum width=1.8em,minimum height=1.5em,draw]
\tikzstyle{po} = [font=\scriptsize,rounded corners=1pt, fill=gray!20, minimum width=1.8em,minimum height=1.5em,draw]
\begin{scope}
\node[rounded corners=3pt, thick,fill=red!20, drop shadow, minimum width=10em,minimum height=4em,draw] (encoder) at (0,0) {编码器};
\node[anchor=north,rounded corners=3pt, thick,fill=yellow!20, drop shadow, minimum width=10em,minimum height=2em,draw] (lenpre) at([yshift=3em]encoder.north){长度预测器};
\node[rounded corners=3pt, thick,fill=red!25, drop shadow, minimum width=10em,minimum height=4em,draw] (encoder) at (0,0) {编码器};
\node[anchor=north,rounded corners=3pt, thick,fill=yellow!30, drop shadow, minimum width=10em,minimum height=2em,draw] (lenpre) at([yshift=3em]encoder.north){长度预测器};
\node[anchor=north] (lable) at([xshift=3.5em,yshift=2.5em]lenpre.north){译文长度:4};
\node[anchor=west, rounded corners=3pt, thick,fill=blue!20, drop shadow, minimum width=16em,minimum height=4em,draw] (decoder) at ([xshift=1.8cm]encoder.east) {解码器};
\node[anchor=west, rounded corners=3pt, thick,fill=blue!25, drop shadow, minimum width=16em,minimum height=4em,draw] (decoder) at ([xshift=1.8cm]encoder.east) {解码器};
\node[anchor=north,emb] (en2) at ([yshift=-1.3em]encoder.south) {${\mathbi e}(x_2)$};
\node[anchor=north,emb] (en1) at ([yshift=-1.3em,xshift=-3em]encoder.south) {${\mathbi e}(x_1)$};
......@@ -61,7 +61,7 @@
\end{scope}
\begin{scope}[yshift=2.8in]
\node[rounded corners=3pt, thick,fill=red!20, drop shadow, minimum width=10em,minimum height=4em,draw] (encoder) at (0,0) {编码器};
\node[rounded corners=3pt, thick,fill=red!25, drop shadow, minimum width=10em,minimum height=4em,draw] (encoder) at (0,0) {编码器};
\node[anchor=west,minimum width=16em,minimum height=4em] (decoder) at ([xshift=1.8cm]encoder.east) {};
\node[anchor=north,emb] (en2) at ([yshift=-1.3em]encoder.south) {${\mathbi e}(x_2)$};
......@@ -122,7 +122,7 @@
\draw [->,very thick,dotted] ([xshift=-0.3em]out2.east) .. controls +(east:0.5) and +(west:0.5) ..([xshift=0em]de3.west);
\draw [->,very thick,dotted] ([xshift=-0.3em]out3.east) .. controls +(east:0.5) and +(west:0.5) ..([xshift=0em]de4.west);
\draw [->,very thick,dotted] ([xshift=-0.3em]out4.east) .. controls +(east:0.5) and +(west:0.5) ..([xshift=0em]de5.west);
\node[anchor=west, rounded corners=3pt, thick,fill=blue!20, drop shadow, minimum width=16em,minimum height=4em,draw] (decoder2) at ([xshift=1.8cm]encoder.east) {解码器};
\node[anchor=west, rounded corners=3pt, thick,fill=blue!25, drop shadow, minimum width=16em,minimum height=4em,draw] (decoder2) at ([xshift=1.8cm]encoder.east) {解码器};
\draw[->,line width=1pt] (encoder.east) -- (decoder.west);
\end{scope}
......
\definecolor{taupegray}{rgb}{0.55, 0.52, 0.54}
\definecolor{bananamania}{rgb}{0.98, 0.91, 0.71}
\definecolor{aliceblue}{rgb}{0.94, 0.97, 1.0}
%%% outline
%-------------------------------------------------------------------------
\begin{tikzpicture}
\tikzstyle{layer} = [draw=black!70,thick, minimum width=7.5em,rounded corners=2pt,inner ysep=6pt,font=\footnotesize,align=center]
\tikzstyle{layer} = [draw,thick, minimum width=7.5em,rounded corners=2pt,inner ysep=6pt,font=\footnotesize,align=center]
\tikzstyle{line} = [line width=1pt,->]
\tikzstyle{cir} = [draw,circle,minimum size=1em, thick,inner sep=0pt]
%encoder
\node[layer,fill=red!15] (src_emb) at (0,0){\scriptsize\textbf{Embedding}};
\node[anchor=south,layer,fill=yellow!20] (src_sa) at ([yshift=3.7em]src_emb.north){\scriptsize\textbf{Self-attention}};
\node[anchor=south,layer,fill=orange!20] (src_ff) at ([yshift=1em]src_sa.north){\scriptsize\textbf{Feed Forward}};
\node[anchor=south,layer,fill=blue!20] (src_sf) at ([yshift=2.6em]src_ff.north){\scriptsize\textbf{Softmax}};
\node[layer,fill=red!20] (src_emb) at (0,0){\scriptsize\textbf{Embedding}};
\node[anchor=south,layer,fill=yellow!25] (src_sa) at ([yshift=3.7em]src_emb.north){\scriptsize\textbf{Self-attention}};
\node[anchor=south,layer,fill=orange!25] (src_ff) at ([yshift=1em]src_sa.north){\scriptsize\textbf{Feed Forward}};
\node[anchor=south,layer,fill=blue!25] (src_sf) at ([yshift=2.6em]src_ff.north){\scriptsize\textbf{Softmax}};
%decoder
\node[anchor=west,layer,fill=red!15] (tgt_emb) at ([xshift=4.4em]src_emb.east){\scriptsize\textbf{Embedding}};
\node[anchor=south,layer,fill=yellow!20] (tgt_sa) at ([yshift=3.7em]tgt_emb.north){\scriptsize\textbf{Self-attention}};
\node[anchor=south,layer,fill=yellow!20] (tgt_pa) at ([yshift=1.5em]tgt_sa.north){\scriptsize\textbf{Positional Attention}};
\node[anchor=west,layer,fill=red!20] (tgt_emb) at ([xshift=4.4em]src_emb.east){\scriptsize\textbf{Embedding}};
\node[anchor=south,layer,fill=yellow!25] (tgt_sa) at ([yshift=3.7em]tgt_emb.north){\scriptsize\textbf{Self-attention}};
\node[anchor=south,layer,fill=yellow!25] (tgt_pa) at ([yshift=1.5em]tgt_sa.north){\scriptsize\textbf{Positional Attention}};
\node[anchor=south,layer,draw=red,dashed,line width=2pt,minimum height=1.55em] (tgt_paa) at ([yshift=1.5em]tgt_sa.north){};
\node[anchor=south,layer,fill=yellow!20] (tgt_eda) at ([yshift=1.5em]tgt_pa.north){\scriptsize\textbf{Encoder-Decoder} \\ \scriptsize\textbf{Attention}};
\node[anchor=south,layer,fill=orange!20] (tgt_ff) at ([yshift=1em]tgt_eda.north){\scriptsize\textbf{Feed Forward}};
\node[anchor=south,layer,fill=green!20] (tgt_linear) at ([yshift=1.4em]tgt_ff.north){\scriptsize\textbf{Linear}};
\node[anchor=south,layer,fill=blue!20] (tgt_sf) at ([yshift=1em]tgt_linear.north){\scriptsize\textbf{Softmax}};
\node[anchor=south,layer,fill=yellow!25] (tgt_eda) at ([yshift=1.5em]tgt_pa.north){\scriptsize\textbf{Encoder-Decoder} \\ \scriptsize\textbf{Attention}};
\node[anchor=south,layer,fill=orange!25] (tgt_ff) at ([yshift=1em]tgt_eda.north){\scriptsize\textbf{Feed Forward}};
\node[anchor=south,layer,fill=green!25] (tgt_linear) at ([yshift=1.4em]tgt_ff.north){\scriptsize\textbf{Linear}};
\node[anchor=south,layer,fill=blue!25] (tgt_sf) at ([yshift=1em]tgt_linear.north){\scriptsize\textbf{Softmax}};
\node[font=\footnotesize,anchor=south] (w3) at ([yshift=0.8em]src_sf.north){\scriptsize\textbf{2}};
\node[font=\footnotesize,anchor=east] (w2) at ([xshift=-0.5em]w3.west){\scriptsize\textbf{1}};
\node[font=\footnotesize,anchor=east] (w1) at ([xshift=-0.5em]w2.west){\scriptsize\textbf{1}};
\node[font=\footnotesize,anchor=west] (w4) at ([xshift=0.5em]w3.east){\scriptsize\textbf{0}};
\node[font=\footnotesize,anchor=west] (w5) at ([xshift=0.5em]w4.east){\scriptsize\textbf{1}};
\node[font=\footnotesize,anchor=south] (output) at ([yshift=1em]tgt_sf.north){\scriptsize\sffamily\bfseries{我们\quad 完全\quad 接受\quad\quad}};
\node[font=\footnotesize,anchor=south] (output) at ([yshift=1em]tgt_sf.north){\scriptsize\sffamily\bfseries{我们/完全/接受/ 它/}};
\node[font=\footnotesize,anchor=north] (src) at ([yshift=-1em]src_emb.south){\scriptsize\textbf{We totally accept it .}};
\node[font=\footnotesize,anchor=north] (tgt) at ([yshift=-1em]tgt_emb.south){\scriptsize\textbf{We totally accept accept .}};
\node[cir] (src_add) at (0,2.5em) {};
\node[cir,fill=orange!7] (src_pos) at (-2.5em,2.5em) {};
\node[cir,fill=orange!12] (src_pos) at (-2.5em,2.5em) {};
\node[cir] (tgt_add) at (12em,2.5em) {};
\node[cir,fill=orange!7] (tgt_pos) at (14.5em,2.5em) {};
\node[cir,fill=orange!7] (tgt_pos2) at ([xshift=4.5em,yshift=-2.45em]tgt_pa.north) {};
\node[cir,fill=orange!12] (tgt_pos) at (14.5em,2.5em) {};
\node[cir,fill=orange!12] (tgt_pos2) at ([xshift=4.5em,yshift=-2.45em]tgt_pa.north) {};
\draw[line,rounded corners=2pt] (tgt_pos2.180) -- ([yshift=-0.8em]tgt_pa.south) -- (tgt_pa.south);
\draw[line,rounded corners=2pt] (tgt_pos2.180) -- ([xshift=1.8em,yshift=-0.8em]tgt_pa.south) -- ([xshift=1.8em]tgt_pa.south);
\draw[-,thick,] (tgt_pos2.180) .. controls ([xshift=0.8em,yshift=0.8em]tgt_pos2.180) and ([xshift=-0.8em,yshift=-0.8em]tgt_pos2.0) ..(tgt_pos2.0);
......@@ -73,9 +74,9 @@
\begin{pgfonlayer}{background}
{
\node[draw=taupegray,thick,fill=ugreen!10,inner sep=0pt,minimum height=7em,minimum width=9.5em,rounded corners=4pt,drop shadow] (box1) at (0em,6.9em){};
\node[draw=taupegray,thick,fill=yellow!10,inner sep=0pt,minimum height=4.5em,minimum width=9.5em,rounded corners=4pt,drop shadow] (box2) at (0em,13.2em){};
\node[draw=taupegray,thick,fill=blue!7,inner sep=0pt,minimum height=13.3em,minimum width=10.5em,rounded corners=4pt,drop shadow] (box3) at (12em,10.1em){};
\node[draw=taupegray,thick,fill=ugreen!15,inner sep=0pt,minimum height=7em,minimum width=9.5em,rounded corners=4pt,drop shadow] (box1) at (0em,6.9em){};
\node[draw=taupegray,thick,fill=aliceblue,inner sep=0pt,minimum height=4.5em,minimum width=9.5em,rounded corners=4pt,drop shadow] (box2) at (0em,13.2em){};
\node[draw=taupegray,thick,fill=blue!12,inner sep=0pt,minimum height=13.3em,minimum width=10.5em,rounded corners=4pt,drop shadow] (box3) at (12em,10.1em){};
}
\end{pgfonlayer}
\node[] at ([yshift=1.8em]box2.north){\normalsize{译文长度:5}};
......
......@@ -7,9 +7,9 @@
%%% outline
%-------------------------------------------------------------------------
\begin{tikzpicture}
\tikzstyle{module} = [draw=taupegray,very thick,rounded corners=2pt,inner ysep=8pt,font=\footnotesize,align=center,fill=yellow!15]
\tikzstyle{box} = [draw=taupegray,very thick,rounded corners=4pt,inner ysep=4pt,inner xsep=8pt,fill=ugreen!10,drop shadow];
\tikzstyle{line} = [very thick,-latex];
\tikzstyle{module} = [draw,very thick,rounded corners=2pt,inner ysep=8pt,font=\footnotesize,align=center,fill=yellow!25]
\tikzstyle{box} = [draw=taupegray,very thick,rounded corners=4pt,inner ysep=4pt,inner xsep=8pt,fill=ugreen!25,drop shadow];
\tikzstyle{line} = [very thick,->];
\node[module, minimum width=8em] (encoder) at (0,0) {编码器组件};
......@@ -31,7 +31,7 @@
\node[anchor=north,font=\scriptsize,align=center] (w2) at ([yshift=-2em]decoder.south){\scriptsize\bfnew{There exist different} \\ \scriptsize\bfnew{opinions on this question .}};
\node[anchor=north,font=\scriptsize,text=gray] (w3) at ([yshift=0.6em]w2.south){\scriptsize\bfnew{(复制源语言句子)}};
\node[anchor=south,font=\scriptsize,align=center] (w4) at ([yshift=1.6em]box2.north){\scriptsize\bfnew{on this question} \\ \scriptsize\bfnew{There exist different opinions .}};
\node[anchor=south,font=\scriptsize,align=center] (w5) at ([yshift=1.6em]box3.north){\tiny\bfnew{ \ 这个 \ 问题 \ 存在 \ 不同的 \ 看法 \ }};
\node[anchor=south,font=\scriptsize,align=center] (w5) at ([yshift=1.6em]box3.north){\tiny\bfnew{/这个/问题/存在/不同/的/看法/}};
\node[font=\tiny] at ([xshift=-0.8em,yshift=-0.6em]encoder.east) {$N\times$};
\node[font=\tiny] at ([xshift=-0.8em,yshift=-0.6em]decoder.east) {$1\times$};
\node[font=\tiny] at ([xshift=-1.2em,yshift=-0.6em]decoder2.east) {$N-1\times$};
......
\begin{tikzpicture}
\tikzstyle{encoder} = [rectangle,thick,rounded corners,minimum width=1.9cm,minimum height=1.2cm,text centered,draw=black,fill=red!25,drop shadow]
\tikzstyle{autodecoder} = [rectangle,thick,rounded corners,minimum width=3cm,minimum height=1.2cm,text centered,draw=black,fill=blue!15,drop shadow]
\tikzstyle{nonautodecoder} = [rectangle,thick,rounded corners,minimum width=4cm,minimum height=1.2cm,text centered,draw=black!70,fill=blue!15,drop shadow]
\tikzstyle{autodecoder} = [rectangle,thick,rounded corners,minimum width=3cm,minimum height=1.2cm,text centered,draw=black,fill=blue!25,drop shadow]
\tikzstyle{nonautodecoder} = [rectangle,thick,rounded corners,minimum width=4cm,minimum height=1.2cm,text centered,draw=black!70,fill=blue!25,drop shadow]
\node (encoder)[encoder] at (0,0) {编码器};
%\node (des)[anchor=north] at ([yshift=2cm]encoder.north) {<Mask>:<Mask>};
\node (text_left)[anchor=south] at ([yshift=-3em]encoder.south) {\footnotesize{\ \ 熟睡\ }};
\node (text_left)[anchor=south] at ([yshift=-3em]encoder.south) {\footnotesize{/在/熟睡/}};
\node (autodecoder)[autodecoder,right of=encoder,xshift=6em ] {自回归解码器};
\node (text_mid1)[anchor=north] at ([yshift=3em]autodecoder.north) {\scriptsize{NP1\ VP3\ PU1\ <eos>}};
\node (text_mid2)[anchor=south] at ([yshift=-3em]autodecoder.south) {\scriptsize{<sos>\ NP1\ VP3\ PU1}};
......
......@@ -14,7 +14,7 @@
\renewcommand{\arraystretch}{0}
\setlength{\tabcolsep}{5mm}
\setlength{\fboxsep}{2.2mm} % box size
\begin{tabular}{C{.20\textwidth}C{.20\textwidth}C{.20\textwidth}C{.20\textwidth}}
\begin{tabular}{C{.40\textwidth}C{.40\textwidth}C{.40\textwidth}C{.40\textwidth}}
\setlength{\tabcolsep}{0pt}
\subfigure [\small{自注意力}] {
\begin{tabular}{ccC{1em}}
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......@@ -23,9 +23,9 @@
\chapter{神经机器翻译模型推断}
\parinterval 推断是神经机器翻译中的核心问题。训练时双语句子对模型是可见的,但是在推断阶段,模型需要根据输入的源语言句子预测译文,因此神经机器翻译的推断和训练过程有着很大的不同。特别是,推断系统往往对应着机器翻译实际部署的需要,因此机器翻译推断系统的精度和速度等也是实践中需要考虑的。
\parinterval 推断是神经机器翻译中的核心问题。训练时双语句子对模型是可见的,但是在推断阶段,模型需要根据输入的源语言句子预测译文,因此神经机器翻译的推断和训练过程有着很大的不同。特别是,推断系统往往对应着机器翻译实际部署的需要,因此机器翻译推断系统的精度和速度等因素也是实践中需要考虑的。
\parinterval 本章对神经机器翻译模型推断的若干问题进行讨论。主要涉及三方面内容:
\parinterval 本章对神经机器翻译模型推断的若干问题进行讨论。主要涉及三方面内容:
\begin{itemize}
\vspace{0.5em}
......@@ -43,7 +43,7 @@
\section{面临的挑战}
\parinterval 神经机器翻译的推断是指:对于输入的源语言句子$\seq{x}$,使用已经训练好的模型找到最佳译文$\hat{\seq{y}}$的过程$\hat{\seq{y}}=\arg\max\limits_{\seq{y}}\funp{P}(\seq{y}|\seq{x})$。这个过程也被称作解码。但是为了避免与神经机器翻译中编码器-解码器造成概念上的混淆,这里统一把翻译新句子的操作称作推断。以上这个过程是一个典型的搜索问题(见{\chaptertwo}),比如,可以使用贪婪搜索或者束搜索完成神经机器翻译的推断(见{\chapterten})。
\parinterval 神经机器翻译的推断是指:对于输入的源语言句子$\seq{x}$,使用已经训练好的模型找到最佳译文$\hat{\seq{y}}$的过程,其中$\hat{\seq{y}}=\arg\max\limits_{\seq{y}}\funp{P}(\seq{y}|\seq{x})$。这个过程也被称作解码。但是为了避免与神经机器翻译中编码器-解码器造成概念上的混淆,这里统一把翻译新句子的操作称作推断。以上这个过程是一个典型的搜索问题(见{\chaptertwo}),比如,可以使用贪婪搜索或者束搜索完成神经机器翻译的推断(见{\chapterten})。
\parinterval 通用的神经机器翻译推断包括如下几步:
......@@ -89,7 +89,7 @@
\begin{itemize}
\vspace{0.5em}
\item 搜索的基本问题在神经机器翻译中有着特殊的现象。比如,在统计机器翻译中,降低搜索错误是提升翻译品质的一种手段。但是神经机器翻译中,简单的降低搜索错误可能无法带来性能的提升,甚至会造成翻译品质的下降\upcite{li-etal-2018-simple,Stahlberg2019OnNS}
\item 搜索的基本问题在神经机器翻译中有着特殊的现象。比如,在统计机器翻译中,减少搜索错误是提升翻译品质的一种手段。但是神经机器翻译中,简单的减少搜索错误可能无法带来性能的提升,甚至会造成翻译品质的下降\upcite{li-etal-2018-simple,Stahlberg2019OnNS}
\vspace{0.5em}
\item 搜索的时延很高,系统实际部署的成本很高。与统计机器翻译系统不同的是,神经机器翻译依赖大量的浮点运算。这导致神经机器翻译系统的推断会比统计机器翻译系统慢很多。虽然可以使用GPU来提高神经机器翻译的推断速度,但是也大大增加了成本;
\vspace{0.5em}
......@@ -113,22 +113,22 @@
\subsection{推断方向}\label{sec:14-2-1}
\parinterval 机器翻译有两种常用的推断方式\ \dash \ 自左向右推断和自右向左推断。自左向右推断符合现实世界中人类的语言使用规律,因为人在翻译一个句子时,总是习惯从句子开始的部分往后生成\footnote{有些语言中,文字是自右向左书写,这时自右向左推断更符合人类使用这种语言的习惯。}。不过,有时候人也会使用当前单词后面的译文信息。也就是说,翻译也需要“未来” 的文字信息。于是很容易想到使用自右向左的方法对译文进行生成。
\parinterval 机器翻译有两种常用的推断方式\ \dash \ 自左向右推断和自右向左推断。自左向右推断符合现实世界中人类的语言使用规律,因为人在翻译一个句子时,总是习惯从句子开始的部分向后生成\footnote{有些语言中,文字是自右向左书写,这时自右向左推断更符合人类使用这种语言的习惯。}。不过,有时候人也会使用当前单词后面的译文信息。也就是说,翻译也需要“未来” 的文字信息。于是很容易想到使用自右向左的方式对译文进行生成。
\parinterval 以上两种推断方式在神经机器翻译中都有应用,对于源语言句子$\seq{x}=\{x_1,\dots,x_m\}$和目标语言句子$\seq{y}=\{y_1,\dots,y_n\}$,自左向右的翻译可以被描述为:
\parinterval 以上两种推断方式在神经机器翻译中都有应用,对于源语言句子$\seq{x}=\{x_1,\dots,x_m\}$和目标语言句子$\seq{y}=\{y_1,\dots,y_n\}$,自左向右推断可以被描述为:
\begin{eqnarray}
\funp{P}(\seq{y}\vert\seq{x}) &=& \prod_{j=1}^n \funp{P}(y_j\vert\seq{y}_{<j},\seq{x})
\label{eq:14-1}
\end{eqnarray}
\parinterval 自右向左的翻译可以被描述为:
\parinterval 自右向左推断可以被描述为:
\begin{eqnarray}
\funp{P}(\seq{y}\vert\seq{x}) &=&\prod_{j=1}^n \funp{P}(y_{n+1-j}\vert\seq{y}_{>n+1-j},\seq{x})
\label{eq:14-2}
\end{eqnarray}
\noindent 其中,$\seq{y}_{<j}=\{y_1,\dots,y_{j-1}\}$$\seq{y}_{>j}=\{y_{j+1},\dots,y_n\}$。可以看到,自左向右推断和自右向左推断本质上是一样的。{\chapterten}{\chaptertwelve}均使用了自左向右的推断方法。自右向左推断比较简单的实现方式是:在训练过程中直接将双语数据中的目标语言句子进行反转,之后仍然使用原始的模型进行训练即可。在推断的时候,生成的目标语言词串也需要进行反转得到最终的译文。有时候,使用自右向左的推断方式会取得更好的效果\upcite{DBLP:conf/wmt/SennrichHB16}。不过更多情况下需要同时使用词串左端(历史)和右端(未来)的信息。有多种思路可以融合左右两端信息:
\noindent 其中,$\seq{y}_{<j}=\{y_1,\dots,y_{j-1}\}$$\seq{y}_{>n+1-j}=\{y_{n+1-j},\dots,y_n\}$。可以看到,自左向右推断和自右向左推断本质上是一样的。{\chapterten}{\chaptertwelve}均使用了自左向右的推断方法。自右向左推断比较简单的实现方式是:在训练过程中直接将双语数据中的目标语言句子进行反转,之后仍然使用原始的模型进行训练即可。在推断的时候,生成的目标语言词串也需要进行反转得到最终的译文。有时候,使用自右向左的推断方式会取得更好的效果\upcite{DBLP:conf/wmt/SennrichHB16}。不过更多情况下需要同时使用词串左端(历史)和右端(未来)的信息。有多种思路可以融合左右两端信息:
\begin{itemize}
\vspace{0.5em}
......@@ -200,7 +200,7 @@ b &=& \omega_{\textrm{high}}\cdot |\seq{x}| \label{eq:14-4}
\textrm{cp}(\seq{x},\seq{y}) &=& \sum_{i=1}^{|\seq{x}|} \log( \textrm{max} ( \sum_{j}^{|\seq{y}|} a_{ij},\beta))
\label{eq:14-7}
\end{eqnarray}
\noindent 公式\eqref{eq:14-7}将公式\eqref{eq:14-6}中的向下截断方式为了向上截断。这样,模型可以对过翻译(或重复翻译)有更好的建模能力。不过,这个模型需要在开发集上细致地调整$\beta$,也带来了一定的额外工作量。此外,也可以将这种覆盖度单独建模并进行参数化,与翻译模型一同训练\upcite{Mi2016CoverageEM,TuModeling,Kazimi2017CoverageFC}。这样可以得到更加精细的覆盖度模型。
\noindent 公式\eqref{eq:14-7}将公式\eqref{eq:14-6}中的向下截断方式为了向上截断。这样,模型可以对过翻译(或重复翻译)有更好的建模能力。不过,这个模型需要在开发集上细致地调整$\beta$,也带来了一定的额外工作量。此外,也可以将这种覆盖度单独建模并进行参数化,与翻译模型一同训练\upcite{Mi2016CoverageEM,TuModeling,Kazimi2017CoverageFC}。这样可以得到更加精细的覆盖度模型。
\vspace{0.5em}
\end{itemize}
......@@ -212,11 +212,11 @@ b &=& \omega_{\textrm{high}}\cdot |\seq{x}| \label{eq:14-4}
\parinterval 在机器翻译推断中,何时终止搜索是一个非常基础的问题。如{\chaptertwo}所述,系统研发者一方面希望尽可能遍历更大的搜索空间,找到更好的结果,另一方面也希望在尽可能短的时间内得到结果。这时搜索的终止条件就是一个非常关键的指标。在束搜索中有很多终止条件可以使用,比如,在生成一定数量的译文之后就终止搜索,或者当最佳译文与排名第二的译文之间的分数差距超过一个阈值时就终止搜索等。
\parinterval 在统计机器翻译中,搜索的终止条件相对容易设计。因为所有的翻译结果都可以用相同步骤的搜索过程生成,比如,在CYK解码中搜索的步骤仅与构建的分析表大小有关。在神经机器翻译中,这个问题要更加复杂。当系统找到一个完整的译文之后,可能还有很多译文没有被生成完,这时就面临着一个问题\ \dash \ 如何决定是否继续搜索。
\parinterval 在统计机器翻译中,搜索的终止条件相对容易设计。因为所有的翻译结果都可以用相同步骤的搜索过程生成,比如,在CYK推断中搜索的步骤仅与构建的分析表大小有关。在神经机器翻译中,这个问题要更加复杂。当系统找到一个完整的译文之后,可能还有很多译文没有被生成完,这时就面临着一个问题\ \dash \ 如何决定是否继续搜索。
\parinterval 针对这些问题,研究人员设计了很多新的方法。比如,可以在束搜索中使用启发性信息让搜索尽可能早地停止,同时保证搜索结果是“最优的”\upcite{DBLP:conf/emnlp/HuangZM17}。也可以将束搜索建模为优化问题\upcite{Wiseman2016SequencetoSequenceLA,DBLP:conf/emnlp/Yang0M18},进而设计出新的终止条件\upcite{Ma2019LearningTS}。很多开源机器翻译系统也都使用了简单有效的终止条件,比如,在OpenNMT 系统中当搜索束中当前最好的假设生成了完整的译文搜索就会停止\upcite{KleinOpenNMT},在RNNSearch系统中当找到预设数量的译文时搜索就会停止,同时在这个过程中会不断减小搜索束的大小\upcite{bahdanau2014neural}
\parinterval 实际上,设计搜索终止条件反映了搜索时延和搜索精度之间的一种折中\upcite{Eisner2011LearningST,Jiang2012LearnedPF}。在很多应用中,这个问题会非常关键。比如,在同声传译中,对于输入的长文本,何时开始翻译、何时结束翻译都是十分重要的\upcite{Zheng2020OpportunisticDW,Ma2019STACLST}。在很多线上翻译应用中,翻译结果的响应不能超过一定的时间,这时就需要一种{\small\sffamily\bfseries{时间受限的搜索}}\index{时间受限的搜索}(Time-constrained Search)\index{Time-constrained Search}策略\upcite{DBLP:conf/emnlp/StahlbergHSB17}
\parinterval 实际上,设计搜索终止条件反映了搜索时延和搜索精度的一种折中\upcite{Eisner2011LearningST,Jiang2012LearnedPF}。在很多应用中,这个问题会非常关键。比如,在同声传译中,对于输入的长文本,何时开始翻译、何时结束翻译都是十分重要的\upcite{Zheng2020OpportunisticDW,Ma2019STACLST}。在很多线上翻译应用中,翻译结果的响应不能超过一定的时间,这时就需要一种{\small\sffamily\bfseries{时间受限的搜索}}\index{时间受限的搜索}(Time-constrained Search)\index{Time-constrained Search}策略\upcite{DBLP:conf/emnlp/StahlbergHSB17}
%----------------------------------------------------------------------------------------
% NEW SUBSUB-SECTION
......@@ -227,7 +227,7 @@ b &=& \omega_{\textrm{high}}\cdot |\seq{x}| \label{eq:14-4}
\parinterval 机器翻译系统的输出并不仅限于单个译文。很多情况下,需要多个译文。比如,译文重排序中通常就需要系统的$n$-best输出,在交互式机器翻译中也往往需要提供多个译文供用户选择\upcite{Peris2017InteractiveNM,Peris2018ActiveLF}。但是,无论是统计机器翻译还是神经机器翻译,都面临一个同样的问题:$n$-best输出中的译文十分相似。实例\ref{eg:14-1}就展示了一个神经机器翻译输出的多个翻译结果,可以看到这些译文的区别很小。这个问题也被看做是机器翻译缺乏译文多样性的问题\upcite{Gimpel2013ASE,Li2016MutualIA,DBLP:conf/emnlp/DuanLXZ09,DBLP:conf/acl/XiaoZZW10,xiao2013bagging}
\begin{example}
源语言句子:我们\ 期待\ 安理会\ 尽早\ 就此\ 作出\ 决定\
源语言句子:我们/期待/安理会/尽早/就此/作出/决定/
\qquad\ 机器译文\ \,1\ :We look forward to the Security Council making a decision on this
......@@ -254,9 +254,9 @@ b &=& \omega_{\textrm{high}}\cdot |\seq{x}| \label{eq:14-4}
\subsection{搜索错误}
\parinterval 机器翻译的错误分为两类:搜索错误和模型错误。搜索错误是指由于搜索算法的限制,即使潜在的搜索空间中有更好的解,模型也无法找到。比较典型的例子是,在对搜索结果进行剪枝的时候,如果剪枝过多,找到的结果很有可能不是最优的这时就出现了搜索错误。而模型错误则是指由于模型学习能力的限制,即使搜索空间中存在最优解,模型也无法将该解排序在前面。
\parinterval 机器翻译的错误分为两类:搜索错误和模型错误。搜索错误是指由于搜索算法的限制,即使潜在的搜索空间中有更好的解,模型也无法找到。比较典型的例子是,在对搜索结果进行剪枝的时候,如果剪枝过多,找到的结果很有可能不是最优的这时就出现了搜索错误。而模型错误则是指由于模型学习能力的限制,即使搜索空间中存在最优解,模型也无法将该解排序在前面。
\parinterval 在统计机器翻译中,搜索错误可以通过减少剪枝进行缓解。比较简单的方式是增加搜索束宽度,这往往会带来一定的性能提升\upcite{Xiao2016ALA}。也可以对搜索问题进行单独建模,以保证学习到的模型出现更少的搜索错误\upcite{Liu2014SearchAwareTF,Yu2013MaxViolationPA}。但是,在神经机器翻译中,这个问题却表现出不同的现象:在很多神经机器翻译系统中,随着搜索束的增大,系统的BLEU不升反降。图\ref{fig:14-3}展示了神经机器翻译系统中BLEU随搜索束大小的变化曲线,这里为了使该图更加规整直观,横坐标处将束大小进行了取对数操作。这个现象与传统的常识是相违背的,因此也有一些研究尝试解释这个现象\upcite{Stahlberg2019OnNS,Niehues2017AnalyzingNM}在实验中,研究人员发现增加搜索束的大小会导致翻译生成的结果变得更短。他们将这个现象归因于:神经机器翻译的建模基于局部归一的最大似然估计,增加搜索束的大小,会导致更多的模型错误\upcite{Sountsov2016LengthBI,Murray2018CorrectingLB,StahlbergNeural}。此外,也有研究人员把这种翻译过短的现象归因于搜索错误\upcite{Stahlberg2019OnNS}。 由于搜索时所面临的搜索空间是十分巨大的,因此搜索时可能无法找到模型定义的“最好”的译文。在某种意义上,这也体现了训练和推断不一致的问题(见{\chapterthirteen})。
\parinterval 在统计机器翻译中,搜索错误可以通过减少剪枝进行缓解。比较简单的方式是增加搜索束宽度,这往往会带来一定的性能提升\upcite{Xiao2016ALA}。也可以对搜索问题进行单独建模,以保证学习到的模型出现更少的搜索错误\upcite{Liu2014SearchAwareTF,Yu2013MaxViolationPA}。但是,在神经机器翻译中,这个问题却表现出不同的现象:在很多神经机器翻译系统中,随着搜索束的增大,系统的BLEU不升反降。图\ref{fig:14-3}展示了神经机器翻译系统中BLEU随搜索束大小的变化曲线,这里为了使该图更加规整直观,横坐标处将束大小进行了取对数操作。这个现象与传统的常识是相违背的,因此也有一些研究尝试解释这个现象\upcite{Stahlberg2019OnNS,Niehues2017AnalyzingNM}
%----------------------------------------------------------------------
\begin{figure}[htp]
......@@ -266,10 +266,9 @@ b &=& \omega_{\textrm{high}}\cdot |\seq{x}| \label{eq:14-4}
\label{fig:14-3}
\end{figure}
%----------------------------------------------------------------------
\parinterval 在实验中,研究人员发现增加搜索束的大小会导致翻译生成的结果变得更短。他们将这个现象归因于:神经机器翻译的建模基于局部归一的最大似然估计,增加搜索束的大小,会导致更多的模型错误\upcite{Sountsov2016LengthBI,Murray2018CorrectingLB,StahlbergNeural}。此外,也有研究人员把这种翻译过短的现象归因于搜索错误\upcite{Stahlberg2019OnNS}: 由于搜索时所面临的搜索空间是十分巨大的,因此搜索时可能无法找到模型定义的“最好”的译文,在某种意义上,这也体现了训练和推断不一致的问题(见{\chapterthirteen})。一种解决该问题的思路是从“训练和推断行为不一致”的角度切入。比如,为了解决曝光偏置问题\upcite{Ranzato2016SequenceLT},可以让系统使用前面步骤的预测结果作为预测下一个词所需要的历史信息,而不是依赖于标准答案\upcite{Bengio2015ScheduledSF,Zhang2019BridgingTG}。为了解决训练和推断目标不一致的问题,可以在训练的时候模拟推断的行为,同时让模型训练的目标与评价系统的标准尽可能一致\upcite{DBLP:conf/acl/ShenCHHWSL16}
\parinterval 一种解决问题的思路是从“训练和推断行为不一致”的角度切入。比如,为了解决曝光偏置问题\upcite{Ranzato2016SequenceLT},可以让系统使用前面步骤的预测结果作为预测下一个词所需要的历史信息,而不是依赖于标准答案\upcite{Bengio2015ScheduledSF,Zhang2019BridgingTG}。为了解决训练和推断目标不一致的问题,可以在训练的时候模拟推断的行为,同时让模型训练的目标与评价系统的标准尽可能一致\upcite{DBLP:conf/acl/ShenCHHWSL16}
\parinterval 需要注意的是,前面提到的搜索束变大造成的翻译品质下降的问题还有其它解决方法。比如,可以通过对结果重排序来缓解这个问题\upcite{DBLP:conf/emnlp/Yang0M18},也可以通过设计更好的覆盖度模型来生成长度更加合理的译文\upcite{li-etal-2018-simple}。从这个角度说,上述问题的成因也较为复杂,因此需要同时考虑模型错误和搜索错误。
\parinterval 此外,还有其它方法解决增大搜索束造成的翻译品质下降的问题。比如,可以通过对结果重排序来缓解这个问题\upcite{DBLP:conf/emnlp/Yang0M18},也可以通过设计更好的覆盖度模型来生成长度更加合理的译文\upcite{li-etal-2018-simple}。从这个角度说,上述问题的成因也较为复杂,因此需要同时考虑模型错误和搜索错误。
%----------------------------------------------------------------------------------------
% NEW SECTION
......@@ -318,7 +317,7 @@ b &=& \omega_{\textrm{high}}\cdot |\seq{x}| \label{eq:14-4}
\label{fig:14-5}
\end{figure}
%----------------------------------------------
\parinterval 一种方法是将不同层的注意力权重进行共享,这样顶层的注意力权重可以复用底层的注意力权重\upcite{Xiao2019SharingAW}。在编码-解码注意力中,由于注意力机制中输入的Value 都是一样的\footnote{在Transformer解码器,编码-解码注意力输入的Value是编码器的输出,因此是相同的(见\chaptertwelve)。},甚至可以直接复用前一层注意力计算的结果。图\ref{fig:14-6}给出了不同方法的对比,其中$S$表示注意力权重,$A$表示注意模型的输出。可以看到,使用共享的思想,可以大大减少冗余的计算。
\parinterval 一种消除冗余计算的方法是将不同层的注意力权重进行共享,这样顶层的注意力权重可以复用底层的注意力权重\upcite{Xiao2019SharingAW}。在编码-解码注意力中,由于注意力机制中输入的Value 都是一样的\footnote{在Transformer解码器,编码-解码注意力输入的Value是编码器的输出,因此是相同的(见\chaptertwelve)。},甚至可以直接复用前一层注意力计算的结果。图\ref{fig:14-6}给出了不同方法的对比,其中$S$表示注意力权重,$A$表示注意模型的输出。可以看到,使用共享的思想,可以大大减少冗余的计算。
%----------------------------------------------
\begin{figure}[htp]
......@@ -329,7 +328,7 @@ b &=& \omega_{\textrm{high}}\cdot |\seq{x}| \label{eq:14-4}
\end{figure}
%----------------------------------------------
\parinterval 另一种方法是对不同层的参数进行共享。这种方法虽然不能带来直接的提速,但是可以大大减小模型的体积。比如,可以重复使用同一层的参数完成多层的计算。极端一些的情况下,六层网络可以只使用一层网络的参数\upcite{DBLP:conf/aaai/DabreF19}。不过,在深层模型中(层数> 20),浅层部分的差异往往较大,而深层(远离输入)之间的相似度会更高。这时可以考虑对深层的部分进行更多的共享。
\parinterval 另一种方法是对不同层的参数进行共享。这种方法虽然不能带来直接的提速,但是可以大大减小模型的体积。比如,可以重复使用同一层的参数完成多层的计算。极端情况下,六层网络可以只使用一层网络的参数\upcite{DBLP:conf/aaai/DabreF19}。不过,在深层模型中(层数> 20),浅层部分的差异往往较大,而深层(远离输入)之间的相似度会更高。这时可以考虑对深层的部分进行更多的共享。
\parinterval 减少冗余计算也代表了一种剪枝的思想。本质上,这类方法利用了模型参数的稀疏性假设\upcite{Narang2017BlockSparseRN,Gale2019TheSO}:一部分参数对模型整体的行为影响不大,因此可以直接被抛弃掉。这类方法也被使用在神经机器翻译模型的不同部分。比如,对于Transformer模型,也有研究发现多头注意力中的有些头是有冗余的\upcite{Michel2019AreSH},因此可以直接对其进行剪枝\upcite{DBLP:journals/corr/abs-1905-09418}
......@@ -341,9 +340,9 @@ b &=& \omega_{\textrm{high}}\cdot |\seq{x}| \label{eq:14-4}
\parinterval 在推断时,神经机器翻译的解码器是最耗时的,因为每个目标语言位置需要单独输出单词的分布,同时在搜索过程中每一个翻译假设都要被扩展成多个翻译假设,进一步增加了计算量。因此,提高推断速度的一种思路是使用更加轻量的解码器加快翻译假设的生成速度\upcite{Hinton2015Distilling,Munim2019SequencelevelKD}
\parinterval 比较简单的做法是把解码器的网络变得更“浅”、更“窄”。所谓浅网络是指使用更少的层构建神经网络,比如,使用3 层,甚至1 层网络的Transformer 解码器。所谓窄网络是指将网络中某些层中神经元的数量减少。不过,直接训练这样的小模型会带来翻译品质的下降。这时会考虑使用知识蒸馏等技术来提升小模型的品质(见{\chapterthirteen})。
\parinterval 比较简单的做法是把解码器的网络变得更“浅”、更“窄”。所谓浅网络是指使用更少的层构建神经网络,比如,使用3 层,甚至1 层网络的Transformer 解码器。所谓窄网络是指将网络中某些层中神经元的数量减少。不过,直接训练这样的小模型会造成翻译品质下降。这时会考虑使用知识蒸馏等技术来提升小模型的品质(见{\chapterthirteen})。
\parinterval 另一种提高推断速度的思路是化简Transformer 解码器的神经网络也可以提高推断速度。比如,可以使用平均注意力机制代替原始Transformer 中的自注意力机制\upcite{DBLP:journals/corr/abs-1805-00631},也可以使用运算更轻的卷积操作代替注意力模块\upcite{Wu2019PayLA}。前面提到的基于共享注意力机制的模型也是一种典型的轻量模型\upcite{Xiao2019SharingAW}。这些方法本质上也是对注意力模型结构的优化,这类思想在近几年也受到了很多关注 \upcite{Kitaev2020ReformerTE,Katharopoulos2020TransformersAR,DBLP:journals/corr/abs-2006-04768},在{\chapterfifteen}也会有进一步讨论。
\parinterval 化简Transformer 解码器的神经网络也可以提高推断速度。比如,可以使用平均注意力机制代替原始Transformer 中的自注意力机制\upcite{DBLP:journals/corr/abs-1805-00631},也可以使用运算更轻的卷积操作代替注意力模块\upcite{Wu2019PayLA}。前面提到的基于共享注意力机制的模型也是一种典型的轻量模型\upcite{Xiao2019SharingAW}。这些方法本质上也是对注意力模型结构的优化,这类思想在近几年也受到了很多关注 \upcite{Kitaev2020ReformerTE,Katharopoulos2020TransformersAR,DBLP:journals/corr/abs-2006-04768},在{\chapterfifteen}也会有进一步讨论。
\parinterval 此外,使用异构神经网络也是一种平衡精度和速度的有效方法。在很多研究中发现,基于Transformer 的编码器对翻译品质的影响更大,而解码器的作用会小一些。因此,一种想法是使用速度更快的解码器结构,比如,用基于循环神经网络的解码器代替Transformer模型中基于注意力机制的解码器\upcite{Chen2018TheBO}。这样,既能发挥Transformer 在编码上的优势,同时也能利用循环神经网络在解码器速度上的优势。使用类似的思想,也可以用卷积神经网络等结构进行解码器的设计。
......@@ -415,7 +414,7 @@ b &=& \omega_{\textrm{high}}\cdot |\seq{x}| \label{eq:14-4}
\section{非自回归翻译}
\parinterval 目前大多数神经机器翻译模型都使用自左向右逐词生成译文的策略,即第$j$个目标语言单词的生成依赖于先前生成的$j-1$ 个词。这种翻译方式也被称作{\small\sffamily\bfseries{自回归解码}}\index{自回归解码}(Autoregressive Decoding)\index{Autoregressive Decoding}。虽然以Transformer为代表的模型使得训练过程高度并行化,加快了训练速度。但由于推断过程自回归的特性,模型无法同时生成译文中的所有单词,这导致模型的推断过程非常缓慢,对于神经机器翻译的实际应用是个很大的挑战。因此,如何设计一个在训练和推断阶段都能够并行化的模型是目前研究的热点之一。
\parinterval 目前大多数神经机器翻译模型都使用自左向右逐词生成译文的策略,即第$j$个目标语言单词的生成依赖于先前生成的$j-1$ 个词。这种翻译方式也被称作{\small\sffamily\bfseries{自回归解码}}\index{自回归解码}(Autoregressive Decoding)\index{Autoregressive Decoding}。虽然以Transformer为代表的模型使得训练过程高度并行化,加快了训练速度。但由于推断过程自回归的特性,模型无法同时生成译文中的所有单词,导致模型的推断过程非常缓慢,这对于神经机器翻译的实际应用是个很大的挑战。因此,如何设计一个在训练和推断阶段都能够并行化的模型是目前研究的热点之一。
%----------------------------------------------------------------------------------------
% NEW SUBSUB-SECTION
......@@ -423,7 +422,7 @@ b &=& \omega_{\textrm{high}}\cdot |\seq{x}| \label{eq:14-4}
\subsection{自回归 vs 非自回归}
\parinterval 目前主流的神经机器翻译的推断是一种{\small\sffamily\bfseries{自回归翻译}}\index{自回归翻译}(Autoregressive Translation)\index{Autoregressive Translation}过程。所谓自回归是一种描述时间序列生成的方式。对于目标序列$\seq{y}=\{y_1,\dots,y_n\}$,自回归模型假设$j$时刻状态$y_j$的生成依赖于之前的状态$\{y_1,\dots,y_{j-1}\}$,而且$y_j$$\{y_1,\dots,y_{j-1}\}$构成线性关系,那么生成$y_j$就是自回归的序列生成过程。神经机器翻译借用了这个概念,但是并不要求使用线性模型\ref{sec:14-2-1}节提到的自左向右翻译模型和自右向左翻译模型都属于自回归翻译模型。自回归模型在机器翻译任务上也有很好的表现,特别是配合束搜索往往能够有效地寻找近似最优译文。但是,由于解码器的每个步骤必须顺序地而不是并行地运行,自回归翻译模型会阻碍不同译文单词生成的并行化。特别是在GPU 上,翻译的自回归性会大大降低计算的并行度和设备利用率。
\parinterval 目前主流的神经机器翻译的推断是一种{\small\sffamily\bfseries{自回归翻译}}\index{自回归翻译}(Autoregressive Translation)\index{Autoregressive Translation}过程。所谓自回归是一种描述时间序列生成的方式:对于目标序列$\seq{y}=\{y_1,\dots,y_n\}$,如果$j$时刻状态$y_j$的生成依赖于之前的状态$\{y_1,\dots,y_{j-1}\}$,而且$y_j$$\{y_1,\dots,y_{j-1}\}$构成线性关系,那么称目标序列$\seq{y}$的生成过程是自回归的。神经机器翻译借用了这个概念,但是并不要求$y_j$$\{y_1,\dots,y_{j-1}\}$构成线性关系\ref{sec:14-2-1}节提到的自左向右翻译模型和自右向左翻译模型都属于自回归翻译模型。自回归模型在机器翻译任务上也有很好的表现,特别是配合束搜索往往能够有效地寻找近似最优译文。但是,由于解码器的每个步骤必须顺序地而不是并行地运行,自回归翻译模型会阻碍不同译文单词生成的并行化。特别是在GPU 上,翻译的自回归性会大大降低计算的并行度和设备利用率。
\parinterval 对于这个问题,研究人员也考虑移除翻译的自回归性,进行{\small\sffamily\bfseries{非自回归翻译}}\index{非自回归翻译}(Non-Autoregressive Translation,NAT)\index{Non-Autoregressive Translation}\upcite{Gu2017NonAutoregressiveNM}。一个简单的非自回归翻译模型将问题建模为公式\eqref{eq:14-9}
......@@ -440,9 +439,9 @@ b &=& \omega_{\textrm{high}}\cdot |\seq{x}| \label{eq:14-4}
\subsection{非自回归翻译模型的结构}
\parinterval 在介绍非自回归模型的具体结构之前,先来看看如何实现一个简单的非自回归翻译模型。这里用标准的Transformer来举例。首先为了一次性生成所有的词,需要丢弃解码器对未来信息屏蔽的矩阵,从而去掉模型的自回归性。此外,还要考虑生成译文的长度。自回归模型每步的输入是上一步解码出的结果,当预测到终止符<eos>时,序列的生成就自动停止了,然而非自回归模型却没有这样的特性,因此还需要一个长度预测器来预测出其长度,之后再用这个长度得到每个位置的表示,将其作为解码器的输入,进而完成整个序列的生成。
\parinterval 在介绍非自回归模型的具体结构之前,先来看看如何实现一个简单的非自回归翻译模型。这里用标准的Transformer来举例。首先为了一次性生成所有的词,需要丢弃解码器对未来信息屏蔽的矩阵,从而去掉模型的自回归性。此外,还要考虑生成译文的长度。在自回归模型中,每步的输入是上一步解码出的结果,当预测到终止符<eos>时,序列的生成就自动停止了,然而非自回归模型却没有这样的特性,因此还需要一个长度预测器来预测出其长度,之后再用这个长度得到每个位置的表示,将其作为解码器的输入,进而完成整个序列的生成。
\parinterval\ref{fig:14-12}对比了自回归翻译模型和简单的非自回归翻译模型。可以看到这种自回归翻译模型可以一次性生成完整的译文。不过,高并行性也带来了翻译品质的下降。比如,在IWSLT 英德等数据上的BLEU[\%] 值只有个位数,而现在最好的自回归模型已经能够达到30左右的BLEU得分。这是因为每个位置词的预测只依赖于源语言句子$\seq{x}$,使得预测不准确。
\parinterval\ref{fig:14-12}对比了自回归翻译模型和简单的非自回归翻译模型。可以看到这种自回归翻译模型可以一次性生成完整的译文。不过,高并行性也带来了翻译品质的下降。比如,在IWSLT 英德等数据上的BLEU[\%] 值只有个位数,而现在最好的自回归模型已经能够达到30左右的BLEU得分。这是因为每个位置词的预测只依赖于源语言句子$\seq{x}$,使得预测不准确。需要注意的是,图\ref{fig:14-12}(b)中将位置编码作为非自回归模型解码器的输入只是一个最简单的例子,在真实的系统中,非自回归解码器的输入一般是拷贝的源语言句子词嵌入与位置编码的融合。
%----------------------------------------------------------------------
\begin{figure}[htp]
......@@ -474,7 +473,7 @@ b &=& \omega_{\textrm{high}}\cdot |\seq{x}| \label{eq:14-4}
\parinterval\ref{fig:14-14}给出了基于繁衍率的Transformer非自回归模型的结构\upcite{Gu2017NonAutoregressiveNM},由三个模块组成:编码器,解码器,繁衍率预测器。类似于标准的Transformer模型,这里编码器和解码器都完全由前馈神经网络和多头注意力模块组成。唯一的不同是解码器中新增了位置注意力模块(图\ref{fig:14-14}中被红色虚线框住的模块),用于更好的捕捉目标语言端的位置信息。
\parinterval 繁衍率预测器的一个作用是预测整个译文句子的长度,以便并行地生成所有译文单词。可以通过对每个源语言单词计算繁衍率来估计最终译文的长度。具体来说,繁衍率指的是:根据每个源语言单词预测出其对应的目标语言单词的个数(见\chaptersix),如图\ref{fig:14-14}所示,翻译过程中英语单词“We”对应一个汉语单词“我们”,其繁衍率为1。于是,可以得到源语言句子对应的繁衍率序列(图\ref{fig:14-14}中的数字1\ 1\ 2\ 0\ 1),最终译文长度则由源语言单词的繁衍率之和决定。在模型训练阶段,繁衍率序列可以通过外部词对齐工具得到, 用于之后训练繁衍率预测器。但由于外部词对齐系统会出现错误,因此在模型收敛之后,可以对繁衍率预测器进行额外的微调。
\parinterval 繁衍率预测器的一个作用是预测整个译文句子的长度,以便并行地生成所有译文单词。可以通过对每个源语言单词计算繁衍率来估计最终译文的长度。具体来说,繁衍率指的是:根据每个源语言单词预测出其对应的目标语言单词的个数(见\chaptersix),如图\ref{fig:14-14}所示,翻译过程中英语单词“We”对应一个汉语单词“我们”,其繁衍率为1。于是,可以得到源语言句子对应的繁衍率序列(图\ref{fig:14-14}中的数字1\ 1\ 2\ 0\ 1),最终译文长度则由源语言单词的繁衍率之和决定。之后将源语言单词按该繁衍率序列进行拷贝,在图中的例子中,将“We”、“totally”、“.”拷贝一次,将"accept"、“it”分别拷贝两次和零次,就得到了最终解码器的输入“We totally accept accept .”。在模型训练阶段,繁衍率序列可以通过外部词对齐工具得到, 用于之后训练繁衍率预测器。但由于外部词对齐系统会出现错误,因此在模型收敛之后,可以对繁衍率预测器进行额外的微调。
%----------------------------------------------------------------------
\begin{figure}[htp]
......@@ -597,7 +596,7 @@ b &=& \omega_{\textrm{high}}\cdot |\seq{x}| \label{eq:14-4}
\section{多模型集成}\label{sec:14-5}
\parinterval 在机器学习领域,把多个模型融合成一个模型是提升系统性能的一种有效方法。比如,在经典的AdaBoost 方法中\upcite{DBLP:journals/jcss/FreundS97},用多个“弱” 分类器构建的“强” 分类器可以使模型在训练集上的分类错误率无限接近0。类似的思想也被应用到机器翻译中\upcite{DBLP:conf/acl/XiaoZZW10,DBLP:conf/icassp/SimBGSW07,DBLP:conf/acl/RostiMS07,DBLP:conf/wmt/RostiZMS08},被称为{\small\sffamily\bfseries{系统融合}}\index{系统融合}(System Combination)\index{System Combination}。在各种机器翻译比赛中,系统融合已经成为经常使用的技术之一。因为许多模型融合方法都是在推断阶段完成,因此此类方法开发的代价较低。
\parinterval 在机器学习领域,把多个模型融合成一个模型是提升系统性能的一种有效方法。比如,在经典的AdaBoost 方法中\upcite{DBLP:journals/jcss/FreundS97},用多个“弱” 分类器构建的“强” 分类器可以使模型在训练集上的分类错误率无限接近0。类似的思想也被应用到机器翻译中\upcite{DBLP:conf/acl/XiaoZZW10,DBLP:conf/icassp/SimBGSW07,DBLP:conf/acl/RostiMS07,DBLP:conf/wmt/RostiZMS08},被称为{\small\sffamily\bfseries{系统融合}}\index{系统融合}(System Combination)\index{System Combination}。在各种机器翻译比赛中,系统融合已经成为经常使用的技术之一。由于许多模型融合方法都是在推断阶段完成,因此此类方法开发的代价较低。
\parinterval 广义上来讲,使用多个特征组合的方式都可以被看作是一种模型的融合。融合多个神经机器翻译系统的方法有很多,可以分为假设选择、局部预测融合、译文重组三类,下面分别进行介绍。
......@@ -609,7 +608,7 @@ b &=& \omega_{\textrm{high}}\cdot |\seq{x}| \label{eq:14-4}
\parinterval {\small\sffamily\bfseries{假设选择}}\index{假设选择}(Hypothesis Selection)\index{Hypothesis Selection}是最简单的系统融合方法\upcite{DBLP:conf/emnlp/DuanLXZ09}。其思想是:给定一个翻译假设集合,综合多个模型对每一个翻译假设进行打分,之后选择得分最高的假设作为结果输出。
\parinterval 假设选择首先需要考虑的问题是假设生成。构建翻译假设集合是假设选择的第一步,也是最重要的一步。理想的情况下,这个集合应该尽可能包含更多高质量的翻译假设,这样后面有更大的几率选出更好的结果。不过,由于单个模型的性能是有上限的,因此无法期望这些翻译假设的品质超越单个模型的上限。研究人员更加关心的是翻译假设的多样性,因为已经证明多样的翻译假设非常有助于提升系统融合的性能\upcite{DBLP:journals/corr/LiMJ16,xiao2013bagging}。为了生成多样的翻译假设,通常有两种思路:1)使用不同的模型生成翻译假设;2)使用同一个模型的不同参数和设置生成翻译假设。图\ref{fig:14-8} 展示了二者的区别。比如,可以使用基于循环神经网络的模型和Transformer模型生成不同的翻译假设,之后都放入集合中;也可以只用Transformer 模型,但是用不同的模型参数构建多个系统,之后分别生成翻译假设。在神经机器翻译中,经常采用的是第二种方式,因为系统开发的成本更低。
\parinterval 假设选择首先需要考虑的问题是假设生成。构建翻译假设集合是假设选择的第一步,也是最重要的一步。理想的情况下,这个集合应该尽可能包含更多高质量的翻译假设,这样后面有更大的几率选出更好的结果。不过,由于单个模型的性能是有上限的,因此无法期望这些翻译假设的品质超越单个模型的上限。研究人员更加关心的是翻译假设的多样性,因为已经证明多样的翻译假设非常有助于提升系统融合的性能\upcite{DBLP:journals/corr/LiMJ16,xiao2013bagging}。为了生成多样的翻译假设,通常有两种思路:1)使用不同的模型生成翻译假设;2)使用同一个模型的不同参数和设置生成翻译假设。图\ref{fig:14-8} 展示了二者的区别。比如,可以使用基于循环神经网络的模型和Transformer模型生成不同的翻译假设,之后都放入集合中;也可以只用Transformer 模型,但是用不同的模型参数构建多个系统,之后分别生成翻译假设。在神经机器翻译中,经常采用的是第二种方式,因为系统开发的成本更低。
%----------------------------------------------
\begin{figure}[htp]
......@@ -622,7 +621,7 @@ b &=& \omega_{\textrm{high}}\cdot |\seq{x}| \label{eq:14-4}
\parinterval 此外,模型的选择也十分重要。所谓假设选择实际上就是要用一个更强的模型在候选中进行选择。这个“强” 模型一般是由更多、更复杂的子模型组合而成。常用的方法是直接使用翻译假设生成时的模型构建“强” 模型。比如,使用两个模型生成了翻译假设集合,之后对所有翻译假设都分别用这两个模型进行打分。最后,综合两个模型的打分(如线性插值)得到翻译假设的最终得分,并进行选择。当然,也可以使用更强大的统计模型对多个子模型进行组合,如使用更深、更宽的神经网络。
\parinterval 假设选择也可以被看作是一种简单的投票模型对所有的候选用多个模型投票,选出最好的结果输出。包括重排序在内的很多方法也是假设选择的一种特例。比如,在重排序中,可以把生成$n$-best列表的过程看作是翻译假设生成过程,而重排序的过程可以被看作是融合多个子模型进行最终结果选择的过程。
\parinterval 假设选择也可以被看作是一种简单的投票模型对所有的候选用多个模型投票,选出最好的结果输出。包括重排序在内的很多方法也是假设选择的一种特例。比如,在重排序中,可以把生成$n$-best列表的过程看作是翻译假设生成过程,而重排序的过程可以被看作是融合多个子模型进行最终结果选择的过程。
%----------------------------------------------------------------------------------------
% NEW SUBSUB-SECTION
......@@ -703,7 +702,7 @@ b &=& \omega_{\textrm{high}}\cdot |\seq{x}| \label{eq:14-4}
\section{小结与扩展阅读}
\parinterval 推断系统(或解码系统)是神经机器翻译的重要组成部分。在神经机器翻译研究中,单独推断问题开展的讨论并不多见。更多的工作是将其与实践结合,常见于开源系统、评测比赛中。但是,从应用的角度看,研发高效的推断系统是机器翻译能够被大规模使用的前提。本章也从神经机器翻译推断的基本问题出发,重点探讨了推断系统的效率、非自回归翻译、多模型集成等问题。但是,由于推断问题涉及的问题十分广泛,因此本章也无法对其进行全面覆盖。关于神经机器翻译模型推断还有以下若干研究方向值得关注:
\parinterval 推断系统(或解码系统)是神经机器翻译的重要组成部分。在神经机器翻译研究中,单独针对推断问题开展的讨论并不多见。更多的工作是将其与实践结合,常见于开源系统、评测比赛中。但是,从应用的角度看,研发高效的推断系统是机器翻译能够被大规模使用的前提。本章也从神经机器翻译推断的基本问题出发,重点探讨了推断系统的效率、非自回归翻译、多模型集成等问题。但是,由于推断问题涉及的问题十分广泛,因此本章也无法对其进行全面覆盖。关于神经机器翻译模型推断还有以下若干研究方向值得关注:
\begin{itemize}
\vspace{0.5em}
......
......@@ -18,7 +18,7 @@
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=15em,fill=teal!17,rounded corners=5pt,thick] (n8) at ([xshift=0em,yshift=-3em]n4.south) {层正则化};
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=15em,fill=purple!17,rounded corners=5pt,thick] (n9) at ([xshift=0em,yshift=-1em]n8.south) {$\mathbi{X}\ \quad \mathbi{h}_1\ \quad \mathbi{h}_2\quad \ldots \quad\ \mathbi{h}_l$};
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=15em,fill=purple!17,rounded corners=5pt,thick] (n9) at ([xshift=0em,yshift=-1em]n8.south) {$\mathbi{X}\ \quad \mathbi{h}^1\ \quad \mathbi{h}^2\quad \ldots \quad\ \mathbi{h}^l$};
\node [anchor=north,rectangle,draw, inner sep=0mm,minimum height=1.2em,minimum width=15em,fill=teal!17,rounded corners=5pt,thick] (n10) at ([xshift=0em,yshift=-2em]n9.south) {权重累加};
......
......@@ -7,9 +7,9 @@
\tikzstyle{vlnode}=[rectangle,inner sep=0mm,minimum height=1em,minimum width=5em,rounded corners=2pt,draw]
\node [anchor=west,lnode] (n1) at (0, 0) {$\mathbi{h}_3$};
\node [anchor=north west,lnode] (n2) at ([xshift=0em,yshift=-0.5em]n1.south west) {$\mathbi{h}_2$};
\node [anchor=north west,lnode] (n3) at ([xshift=0em,yshift=-0.5em]n2.south west) {$\mathbi{h}_1$};
\node [anchor=west,lnode] (n1) at (0, 0) {$\mathbi{h}^3$};
\node [anchor=north west,lnode] (n2) at ([xshift=0em,yshift=-0.5em]n1.south west) {$\mathbi{h}^2$};
\node [anchor=north west,lnode] (n3) at ([xshift=0em,yshift=-0.5em]n2.south west) {$\mathbi{h}^1$};
\node [anchor=south] (d1) at ([xshift=0em,yshift=0.2em]n1.north) {1D};
......@@ -96,16 +96,16 @@
\draw [->,thick] ([xshift=0em,yshift=0em]ffn.north) -- ([xshift=0em,yshift=0em]fn.west);
\draw [decorate,decoration={brace,mirror}] ([xshift=0em]n3.south west) to node [midway,font=\small,align=center,xshift=0em,yshift=-0.8em] {$d$} ([xshift=0em]n3.south east);
\draw [decorate,decoration={brace,mirror}] ([xshift=0em]n6.south west) to node [midway,font=\small,align=center,xshift=0em,yshift=-0.8em] {$d_a$} ([xshift=0em]n6.south east);
\draw [decorate,decoration={brace,mirror}] ([xshift=0em]n7.north west) to node [midway,font=\small,align=center,xshift=-0.7em,yshift=-0em] {$d$} ([xshift=0em]n7.south west);
\draw [decorate,decoration={brace}] ([xshift=0em]n7.north west) to node [midway,font=\small,align=center,xshift=0em,yshift=0.7em] {$d$} ([xshift=0em]n7.north east);
\draw [decorate,decoration={brace,mirror}] ([xshift=0em]n8.north west) to node [midway,font=\small,align=center,xshift=-0.8em,yshift=-0em] {$d_a$} ([xshift=0em]n8.south west);
\draw [decorate,decoration={brace}] ([xshift=0em]n8.north west) to node [midway,font=\small,align=center,xshift=0em,yshift=0.8em] {$n_{hop}$} ([xshift=0em]n8.north east);
\draw [decorate,decoration={brace,mirror}] ([xshift=0em]nc31.south west) to node [midway,font=\small,align=center,xshift=0em,yshift=-0.8em] {$n_{hop}$} ([xshift=0em]nc35.south east);
\draw [decorate,decoration={brace,mirror}] ([xshift=0em]ln5.south west) to node [midway,font=\small,align=center,xshift=0em,yshift=-0.8em] {$d$} ([xshift=0em]ln5.south east);
\draw [decorate] ([xshift=0em]ln5.south east) to node [midway,font=\footnotesize,align=center,xshift=1em,yshift=-0.5em] {$n_{hop}$} ([xshift=0em]ln1.south east);
\draw [decorate,decoration={brace,mirror}] ([xshift=0em]fn.south east) to node [midway,font=\small,align=center,xshift=0.7em,yshift=-0em] {$d$} ([xshift=0em]fn.north east);
\draw [decorate,decoration={brace,mirror}] ([xshift=0em]n3.south west) to node [midway,font=\small,align=center,xshift=0em,yshift=-0.8em] {$d_{\textrm{model}}$} ([xshift=0em]n3.south east);
\draw [decorate,decoration={brace,mirror}] ([xshift=0em]n6.south west) to node [midway,font=\small,align=center,xshift=0em,yshift=-0.8em] {$d_{\rm a}$} ([xshift=0em]n6.south east);
\draw [decorate,decoration={brace,mirror}] ([xshift=0em]n7.north west) to node [midway,font=\small,align=center,xshift=-1.4em,yshift=-0em] {$d_{\textrm{model}}$} ([xshift=0em]n7.south west);
\draw [decorate,decoration={brace}] ([xshift=0em]n7.north west) to node [midway,font=\small,align=center,xshift=0em,yshift=0.7em] {$d_{\textrm{model}}$} ([xshift=0em]n7.north east);
\draw [decorate,decoration={brace,mirror}] ([xshift=0em]n8.north west) to node [midway,font=\small,align=center,xshift=-0.8em,yshift=-0em] {$d_{\rm a}$} ([xshift=0em]n8.south west);
\draw [decorate,decoration={brace}] ([xshift=0em]n8.north west) to node [midway,font=\small,align=center,xshift=0em,yshift=0.8em] {$n_{\rm hop}$} ([xshift=0em]n8.north east);
\draw [decorate,decoration={brace,mirror}] ([xshift=0em]nc31.south west) to node [midway,font=\small,align=center,xshift=0em,yshift=-0.8em] {$n_{\rm hop}$} ([xshift=0em]nc35.south east);
\draw [decorate,decoration={brace,mirror}] ([xshift=0em]ln5.south west) to node [midway,font=\small,align=center,xshift=0em,yshift=-0.8em] {$d_{\textrm{model}}$} ([xshift=0em]ln5.south east);
\draw [decorate] ([xshift=0em]ln5.south east) to node [midway,font=\footnotesize,align=center,xshift=1em,yshift=-0.5em] {$n_{\rm hop}$} ([xshift=0em]ln1.south east);
\draw [decorate,decoration={brace,mirror}] ([xshift=0em]fn.south east) to node [midway,font=\small,align=center,xshift=1.4em,yshift=-0em] {$d_{\textrm{model}}$} ([xshift=0em]fn.north east);
......
......@@ -9,15 +9,15 @@
\node [anchor=north,rectangle,minimum height=1.5em,minimum width=2.5em,rounded corners=5pt] (n2) at ([xshift=0em,yshift=-0.2em]n1.south) {$\mathbi{X}$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n3) at ([xshift=1.5em,yshift=0em]n2.east) {$\mathbi{h}_0$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n3) at ([xshift=1.5em,yshift=0em]n2.east) {$\mathbi{h}^0$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n4) at ([xshift=1.5em,yshift=0em]n3.east) {$\mathbi{h}_1$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n4) at ([xshift=1.5em,yshift=0em]n3.east) {$\mathbi{h}^1$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n5) at ([xshift=1.5em,yshift=0em]n4.east) {$\mathbi{h}_2$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n5) at ([xshift=1.5em,yshift=0em]n4.east) {$\mathbi{h}^2$};
\node [anchor=west,rectangle,minimum height=1.5em,minimum width=2.5em,rounded corners=5pt] (n6) at ([xshift=1em,yshift=0em]n5.east) {$\ldots$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n7) at ([xshift=1em,yshift=0em]n6.east) {$\mathbi{h}_{L-1}$};
\node [anchor=west,encnode,draw=red!60!black!80,fill=red!20] (n7) at ([xshift=1em,yshift=0em]n6.east) {$\mathbi{h}^{L-1}$};
\node [anchor=north,rectangle,draw=teal!80, inner sep=0mm,minimum height=2em,minimum width=8em,fill=teal!17,rounded corners=5pt,thick] (n8) at ([xshift=3em,yshift=-1.5em]n4.south) {权重聚合$\mathbi{g}$};
......@@ -27,15 +27,15 @@
\node [anchor=north,rectangle,minimum height=1.5em,minimum width=2.5em,rounded corners=5pt] (n10) at ([xshift=0em,yshift=-0.2em]n9.south) {$\mathbi{y}_{<j}$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n11) at ([xshift=1.5em,yshift=0em]n10.east) {$\mathbi{s}_{0,j}$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n11) at ([xshift=1.5em,yshift=0em]n10.east) {$\mathbi{s}^0_{j}$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n12) at ([xshift=1.5em,yshift=0em]n11.east) {$\mathbi{s}_{1,j}$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n12) at ([xshift=1.5em,yshift=0em]n11.east) {$\mathbi{s}^1_{j}$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n13) at ([xshift=1.5em,yshift=0em]n12.east) {$\mathbi{s}_{2,j}$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n13) at ([xshift=1.5em,yshift=0em]n12.east) {$\mathbi{s}^2_{j}$};
\node [anchor=west,rectangle,minimum height=1.5em,minimum width=2.5em,rounded corners=5pt] (n14) at ([xshift=1em,yshift=0em]n13.east) {$\ldots$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n15) at ([xshift=1em,yshift=0em]n14.east) {$\mathbi{s}_{M-1,j}$};
\node [anchor=west,decnode,draw=ublue,fill=blue!10] (n15) at ([xshift=1em,yshift=0em]n14.east) {$\mathbi{s}^{M-1}_{j}$};
\node [anchor=west,rectangle,minimum height=1.5em,minimum width=2.5em,rounded corners=5pt] (n16) at ([xshift=1.5em,yshift=0em]n15.east) {$\mathbi{y}_{j}$};
......
......@@ -4,7 +4,7 @@
\begin{axis}[
width=.50\textwidth,
height=.40\textwidth,
legend style={at={(0.60,0.08)}, anchor=south west},
legend style={at={(0.45,0.08)}, anchor=south west},
xlabel={\scriptsize{更新次数(10k)}},
ylabel={\scriptsize{学习率 ($10^{-3}$}},
ylabel style={yshift=-1em},xlabel style={yshift=0.0em},
......@@ -15,7 +15,7 @@
]
\addplot[red,line width=1.25pt] coordinates {(0,0) (1.6,2) (1.8,1.888) (2,1.787) (2.5,1.606) (3,1.462) (3.5,1.3549) (4,1.266) (4.5,1.193) (5,1.131)};
\addlegendentry{\scriptsize Base48}
\addlegendentry{\scriptsize 原始学习率}
%\addplot[red,line width=1.25pt] coordinates {(0,0) (8000,0.002) (10000,0.00179) (12000,0.00163) (12950,0.001572)};
\addplot[blue,line width=1.25pt] coordinates {(0,0) (0.8,2) (0.9906,1.7983)};
%\addplot[red,line width=1.25pt] coordinates {(0,0) (8000,0.002) (9906,0.0017983)};
......@@ -31,7 +31,7 @@
\addplot[blue,line width=1.25pt] coordinates {(2.9706,2) (3.1706,1.79) (3.3706,1.63) (3.4656,1.572) (3.6706,1.4602) (3.7136,1.44)};
\addplot[blue,dashed,line width=1.25pt] coordinates {(3.7136,1.44) (3.7136,2)};
\addplot[blue,line width=1.25pt] coordinates {(3.7136,2) (3.9136,1.79) (4.1136,1.63) (4.2086,1.572) (4.4136,1.4602) (4.4566,1.44) (4.7000,1.3574) (5.0000,1.2531)};
\addlegendentry{\scriptsize SDT48}
\addlegendentry{\scriptsize 调整后的学习率}
\end{axis}
}
......
......@@ -9,22 +9,22 @@
\begin{scope}[]
\node [anchor=west,ffnnode] (f1) at (0, 0){FFN};
\node [anchor=west,inner sep=0mm,minimum height=1.8em] (f1) at (0, 0){input};
\node [anchor=south,ebnode] (e1) at ([xshift=0em,yshift=1em]f1.north){Embedding};
\node [anchor=south west,manode] (a1) at ([xshift=0em,yshift=1em]e1.north west){Attention};
\node [anchor=south east,manode] (c1) at ([xshift=0em,yshift=1em]e1.north east){Conv};
\node [anchor=south west,ebnode] (e2) at ([xshift=0em,yshift=1em]a1.north west){Embedding};
\node [anchor=south,draw,circle,inner sep=4pt] (add1) at ([xshift=0em,yshift=0.5em]e2.north){};
\node [anchor=south,ffnnode] (f2) at ([xshift=0em,yshift=0.5em]add1.north){FFN};
\node [anchor=south,inner sep=0mm,minimum height=1.8em] (f2) at ([xshift=0em,yshift=0.5em]add1.north){output};
\draw[->,thick] ([xshift=0em,yshift=0em]f1.north)--([xshift=0em,yshift=0em]e1.south);
\draw[->,thick] ([xshift=0em,yshift=-0.3em]f1.north)--([xshift=0em,yshift=0em]e1.south);
\draw[->,thick] ([xshift=0em,yshift=-1em]a1.south)--([xshift=0em,yshift=0em]a1.south);
\draw[->,thick] ([xshift=0em,yshift=-1em]c1.south)--([xshift=0em,yshift=0em]c1.south);
\draw[->,thick] ([xshift=0em,yshift=0em]a1.north)--([xshift=0em,yshift=1em]a1.north);
\draw[->,thick] ([xshift=0em,yshift=0em]c1.north)--([xshift=0em,yshift=1em]c1.north);
\draw[-,thick] ([xshift=0em,yshift=0em]e2.north)--([xshift=0em,yshift=0em]add1.south);
\draw[->,thick] ([xshift=0em,yshift=0em]add1.north)--([xshift=0em,yshift=0em]f2.south);
\draw[->,thick] ([xshift=0em,yshift=0em]add1.north)--([xshift=0em,yshift=0.3em]f2.south);
\draw[-] ([xshift=0em,yshift=0em]add1.west)--([xshift=-0em,yshift=0em]add1.east);
\draw[-] ([xshift=0em,yshift=0em]add1.south)--([xshift=-0em,yshift=-0em]add1.north);
......@@ -33,11 +33,6 @@
\draw[->,thick,rectangle,rounded corners=5pt] ([xshift=0em,yshift=0.5em]f1.north)--([xshift=-6em,yshift=0.5em]f1.north)--([xshift=-5.45em,yshift=0em]add1.west)--([xshift=0em,yshift=0em]add1.west);
\node [anchor=north,inner sep=0mm,minimum height=1.5em] (ip) at ([xshift=0em,yshift=-1em]f1.south){input};
\node [anchor=south,inner sep=0mm,minimum height=1.5em] (op) at ([xshift=0em,yshift=1em]f2.north){output};
\draw[->,thick] ([xshift=0em,yshift=0em]ip.north)--([xshift=0em,yshift=0em]f1.south);
\draw[->,thick] ([xshift=0em,yshift=0em]f2.north)--([xshift=0em,yshift=0em]op.south);
\end{scope}
\end{tikzpicture}
\end{center}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{cirnode}=[circle,minimum size=3em,font=\footnotesize,draw]
\tikzstyle{cirnode}=[rectangle,rounded corners=2pt,minimum height=2em,minimum width=3em,font=\footnotesize,draw=blue!50!white!50,fill=blue!20,thick]
\tikzstyle{recnode}=[rectangle,rounded corners=2pt,inner sep=0mm,minimum height=1.8em,minimum width=6em]
\node [anchor=west,cirnode] (n1) at (0, 0) {$\mathbi{h}_{i-2}^l$};
......@@ -11,24 +11,24 @@
\node [anchor=west,cirnode] (n5) at ([xshift=1.2em,yshift=0em]n4.east) {$\mathbi{h}_{i+2}^l$};
\begin{pgfonlayer}{background}
\node [anchor=center,red!30,minimum height=4.5em,minimum width=21em,very thick,draw] (c3) at ([xshift=0em,yshift=0em]n3.center) {};
\node [anchor=center,ugreen!30,minimum height=4em,minimum width=12.5em,very thick,draw] (c2) at ([xshift=0em,yshift=0em]n3.center) {};
\node [anchor=center,orange!30,minimum height=3.5em,minimum width=3.6em,very thick,draw] (c1) at ([xshift=0em,yshift=0em]n3.center) {};
\node [anchor=center,red!30,minimum height=3.5em,minimum width=21em,very thick,draw] (c3) at ([xshift=0em,yshift=0em]n3.center) {};
\node [anchor=center,ugreen!30,minimum height=3em,minimum width=13em,very thick,draw] (c2) at ([xshift=0em,yshift=0em]n3.center) {};
\node [anchor=center,orange!30,minimum height=2.5em,minimum width=4em,very thick,draw] (c1) at ([xshift=0em,yshift=0em]n3.center) {};
\end{pgfonlayer}
\node [anchor=south,recnode,fill=red!20] (r1) at ([xshift=-3.5em,yshift=2.5em]n2.north) {$\textrm{head}_1$};
\node [anchor=south,recnode,fill=orange!20] (r2) at ([xshift=0em,yshift=2.5em]n3.north) {$\textrm{head}_2$};
\node [anchor=south,recnode,fill=ugreen!20] (r3) at ([xshift=3.5em,yshift=2.5em]n4.north) {$\textrm{head}_3$};
\node [anchor=south,cirnode] (n6) at ([xshift=0em,yshift=1em]r2.north) {$\mathbi{h}_{i}^{l+1}$};
\node [anchor=south,cirnode] (n6) at ([xshift=0em,yshift=1.5em]r2.north) {$\mathbi{h}_{i}^{l+1}$};
\draw [->,very thick,orange!30] ([xshift=0em,yshift=0em]c1.north) -- ([xshift=0em,yshift=0em]r2.south);
\draw [->,very thick,ugreen!30] ([xshift=3em,yshift=0em]c2.north)..controls +(north:1.5em) and +(south:1.5em)..([xshift=0em,yshift=0em]r3.south);
\draw [->,very thick,red!30] ([xshift=-3em,yshift=0em]c3.north)..controls +(north:1.5em) and +(south:1.5em)..([xshift=0em,yshift=0em]r1.south);
\draw [->] ([xshift=0em,yshift=0em]r1.north) -- ([xshift=0em,yshift=0em]n6.south west);
\draw [->] ([xshift=0em,yshift=0em]r2.north) -- ([xshift=0em,yshift=0em]n6.south);
\draw [->] ([xshift=0em,yshift=0em]r3.north) -- ([xshift=0em,yshift=0em]n6.south east);
\draw [->,red!30,very thick] ([xshift=0em,yshift=0em]r1.north) -- ([xshift=-0.1em,yshift=-0.05em]n6.south);
\draw [->,orange!30,very thick] ([xshift=0em,yshift=0em]r2.north) -- ([xshift=0em,yshift=0em]n6.south);
\draw [->,ugreen!30,very thick] ([xshift=0em,yshift=0em]r3.north) -- ([xshift=0.1em,yshift=-0.05em]n6.south);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
......@@ -11,8 +11,8 @@
\node [anchor=west,enode] (e1) at ([xshift=1.5em,yshift=0em]w1.east) {编码器};
\node [anchor=west,dnode] (d1) at ([xshift=3em,yshift=6em]e1.east) {翻译任务};
\node [anchor=north,dnode] (d2) at ([xshift=0em,yshift=-2em]d1.south) {文本处理任务};
\node [anchor=north,dnode] (d3) at ([xshift=0em,yshift=-2em]d2.south) {语言理解};
\node [anchor=north,dnode] (d2) at ([xshift=0em,yshift=-2em]d1.south) {句法分析任务};
\node [anchor=north,dnode] (d3) at ([xshift=0em,yshift=-2em]d2.south) {语言理解任务};
\node [anchor=north,dnode] (d4) at ([xshift=0em,yshift=-2em]d3.south) {其他任务};
\node [anchor=west] (w2) at ([xshift=2em,yshift=0em]d1.east) {英语(目标语言)};
......
......@@ -6,16 +6,16 @@
\begin{scope}[minimum height = 20pt]
\node [anchor=east] (x1) at (-0.5em, 0) {$\mathbi{x}_l$};
\node [anchor=west,draw,fill=red!20,inner xsep=5pt,rounded corners=2pt] (F1) at ([xshift=2em]x1.east){\small{$F$}};
\node [anchor=west,circle,draw,minimum size=1em] (n1) at ([xshift=2em]F1.east) {};
\node [anchor=west,draw,fill=green!20,inner xsep=5pt,rounded corners=2pt] (ln1) at ([xshift=2em]n1.east){\small{\textrm{LN}}};
\node [anchor=west] (x2) at ([xshift=2em]ln1.east) {$\mathbi{x}_{l+1}$};
\node [anchor=west,draw,fill=red!20,inner xsep=5pt,rounded corners=2pt] (F1) at ([xshift=4em]x1.east){\small{$F$}};
\node [anchor=west,circle,draw,minimum size=1em] (n1) at ([xshift=4em]F1.east) {};
\node [anchor=west,draw,fill=green!20,inner xsep=5pt,rounded corners=2pt] (ln1) at ([xshift=4em]n1.east){\small{\textrm{LN}}};
\node [anchor=west] (x2) at ([xshift=4em]ln1.east) {$\mathbi{x}_{l+1}$};
\node [anchor=north] (x3) at ([yshift=-5em]x1.south) {$\mathbi{x}_l$};
\node [anchor=west,draw,fill=green!20,inner xsep=5pt,rounded corners=2pt] (F2) at ([xshift=2em]x3.east){\small{\textrm{LN}}};
\node [anchor=west,draw,fill=red!20,inner xsep=5pt,rounded corners=2pt] (ln2) at ([xshift=2em]F2.east){\small{$F$}};
\node [anchor=west,circle,draw,,minimum size=1em] (n2) at ([xshift=2em]ln2.east){};
\node [anchor=west] (x4) at ([xshift=2em]n2.east) {$\mathbi{x}_{l+1}$};
\node [anchor=west,draw,fill=green!20,inner xsep=5pt,rounded corners=2pt] (F2) at ([xshift=4em]x3.east){\small{\textrm{LN}}};
\node [anchor=west,draw,fill=red!20,inner xsep=5pt,rounded corners=2pt] (ln2) at ([xshift=4em]F2.east){\small{$F$}};
\node [anchor=west,circle,draw,,minimum size=1em] (n2) at ([xshift=4em]ln2.east){};
\node [anchor=west] (x4) at ([xshift=4em]n2.east) {$\mathbi{x}_{l+1}$};
\draw[->, line width=1pt] ([xshift=-0.1em]x1.east)--(F1.west);
\draw[->, line width=1pt] ([xshift=-0.1em]F1.east)--(n1.west);
......
\begin{tikzpicture}
\tikzstyle{node1} = [anchor=center,draw,minimum height=2em,minimum width=2em,inner sep=0pt,fill=green!80]
\tikzstyle{node2} = [anchor=center,draw,minimum height=2em,minimum width=2em,inner sep=0pt,fill=green!40]
\tikzstyle{node3} = [anchor=center,draw,minimum height=2em,minimum width=2em,inner sep=0pt,fill=green!20]
\tikzstyle{node4} = [anchor=center,draw,minimum height=2em,minimum width=2em,inner sep=0pt]
\tikzstyle{node5} = [anchor=center,draw,minimum height=2em,minimum width=2em,inner sep=0pt,fill=red!20]
\tikzstyle{node6} = [anchor=center,draw,minimum height=2em,minimum width=2em,inner sep=0pt,fill=red!40]
\tikzstyle{node7} = [anchor=center,draw,minimum height=2em,minimum width=2em,inner sep=0pt,fill=red!80]
\begin{scope}[scale=1.0]
\foreach \i / \j / \k / \z in
{0/6/1/-3, 1/6/1/-3, 2/6/1/-3, 3/6/1/-3, 4/6/2/-2, 5/6/3/-1, 6/6/4/0,
0/5/1/-3, 1/5/1/-3, 2/5/1/-3, 3/5/2/-2, 4/5/3/-1, 5/5/4/0, 6/5/5/1,
0/4/1/-3, 1/4/1/-3, 2/4/2/-2, 3/4/3/-1, 4/4/4/0, 5/4/5/1, 6/4/6/2,
0/3/1/-3, 1/3/2/-2, 2/3/3/-1, 3/3/4/0, 4/3/5/1, 5/3/6/2, 6/3/7/3,
0/2/2/-2, 1/2/3/-1, 2/2/4/0, 3/2/5/1, 4/2/6/2, 5/2/7/3, 6/2/7/3,
0/1/3/-1, 1/1/4/0, 2/1/5/1, 3/1/6/2, 4/1/7/3, 5/1/7/3, 6/1/7/3,
0/0/4/0, 1/0/5/1, 2/0/6/2, 3/0/7/3, 4/0/7/3, 5/0/7/3, 6/0/7/3}
\node[node\k] (a\i\j) at (2*1em*\i + 0*0em,2*1em*\j + 0*0em) {$\mathbi{w}_{\z}$};
\foreach \i / \j / \k in
{0/6/1, 0/5/2, 0/4/3, 0/3/4, 0/2/5, 0/1/6, 0/0/7}
\node[anchor=east] (n\k) at ([xshift=-0.5em,yshift=0em]a\i\j.west) {\j};
\foreach \i / \j / \k in
{0/0/8, 1/0/9, 2/0/10, 3/0/11, 4/0/12, 5/0/13, 6/0/14}
\node[anchor=north] (n\k) at ([xshift=-0em,yshift=-0.5em]a\i\j.south) {\i};
%\node [anchor=north] (n1) at ([xshift=0em,yshift=0em]a00.south west) {};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}[node distance = 0,scale = 1]
\tikzstyle{every node}=[scale=1]
\node[draw=white] (input) at (0,0){\includegraphics[width=0.62\textwidth]{./Chapter15/Figures/DLCL-picture.png}};(1.9,-1.4);
\node[scale = 2] at (4.5,3.6){4};
\node[scale = 2] at (4.5,1.8){2};
\node[scale = 2] at (4.5,0){0};
\node[scale = 2] at (4.5,-1.8){-2};
\node[scale = 2] at (4.5,-3.6){-4};
\node[scale = 1.5] at (-4.5,3.75){$\rm x_{1}$};
\node[scale = 1.5] at (-4.5,2.5){$\rm x_{6}$};
\node[scale = 1.5] at (-4.5,1.4){$\rm x_{11}$};
\node[scale = 1.5] at (-4.5,0.1){$\rm x_{16}$};
\node[scale = 1.5] at (-4.5,-1.1){$\rm x_{21}$};
\node[scale = 1.5] at (-4.5,-2.3){$\rm x_{26}$};
\node[scale = 1.5] at (-4.5,-3.4){$\rm x_{31}$};
\node[scale = 1.5] at (-3.8,-4){$\rm y_{0}$};
\node[scale = 1.5] at (-2.7,-4){$\rm y_{5}$};
\node[scale = 1.5] at (-1.5,-4){$\rm y_{10}$};
\node[scale = 1.5] at (-0.3,-4){$\rm y_{15}$};
\node[scale = 1.5] at (0.9,-4){$\rm y_{20}$};
\node[scale = 1.5] at (2.1,-4){$\rm y_{25}$};
\node[scale = 1.5] at (3.3,-4){$\rm y_{30}$};
\end{tikzpicture}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -43,17 +43,17 @@
\end{pgfonlayer}
\draw [->,thick](node0-1.north)--(node1-1.south);
\draw [->,thick](node0-1.north)--(node1-2.south);
\draw [->,thick](node0-1.north)--(node1-4.south);
\draw [->,thick](node0-1.north)--([xshift=-0.15em,yshift=-0.05em]node1-2.south);
\draw [->,thick](node0-1.north)--([xshift=-0.4em,yshift=-0.05em]node1-4.south);
\draw [->,thick](node0-2.north)--(node1-2.south);
\draw [->,thick](node0-2.north)--(node1-4.south);
\draw [->,thick](node0-2.north)--([xshift=-0.15em,yshift=-0.05em]node1-4.south);
\draw [->,thick](node0-4.north)--(node1-4.south);
\draw [->,thick](node1-1.north)--(node2-1.south);
\draw [->,thick](node1-1.north)--(node2-2.south);
\draw [->,thick](node1-1.north)--(node2-4.south);
\draw [->,thick](node1-1.north)--([xshift=-0.15em,yshift=-0.05em]node2-2.south);
\draw [->,thick](node1-1.north)--([xshift=-0.4em,yshift=-0.05em]node2-4.south);
\draw [->,thick](node1-2.north)--(node2-2.south);
\draw [->,thick](node1-2.north)--(node2-4.south);
\draw [->,thick](node1-2.north)--([xshift=-0.15em,yshift=-0.05em]node2-4.south);
\draw [->,thick](node1-4.north)--(node2-4.south);
\node [anchor=south,embedding,fill=yellow!20](node3-1) at ([yshift=2em]node2-1.north){\footnotesize{$\seq{P}_1$}};
......@@ -77,23 +77,23 @@
\node [anchor=south,embedding,fill=yellow!20](node3-8) at ([yshift=2em]node2-8.north){\footnotesize{$\seq{P}_n$}};
\draw [->,thick](node0-5.north)--(node1-5.south);
\draw [->,thick](node0-5.north)--(node1-6.south);
\draw [->,thick](node0-5.north)--(node1-8.south);
\draw [->,thick](node0-6.north)--(node1-5.south);
\draw [->,thick](node0-5.north)--([xshift=-0.15em,yshift=-0.05em]node1-6.south);
\draw [->,thick](node0-5.north)--([xshift=-0.15em,yshift=-0.05em]node1-8.south);
\draw [->,thick](node0-6.north)--([xshift=0.15em,yshift=-0.05em]node1-5.south);
\draw [->,thick](node0-6.north)--(node1-6.south);
\draw [->,thick](node0-6.north)--(node1-8.south);
\draw [->,thick](node0-8.north)--(node1-5.south);
\draw [->,thick](node0-8.north)--(node1-6.south);
\draw [->,thick](node0-6.north)--([xshift=-0.4em,yshift=-0.05em]node1-8.south);
\draw [->,thick](node0-8.north)--([xshift=0.4em,yshift=-0.05em]node1-5.south);
\draw [->,thick](node0-8.north)--([xshift=0.15em,yshift=-0.05em]node1-6.south);
\draw [->,thick](node0-8.north)--(node1-8.south);
\draw [->,thick](node1-5.north)--(node2-5.south);
\draw [->,thick](node1-5.north)--(node2-6.south);
\draw [->,thick](node1-5.north)--(node2-8.south);
\draw [->,thick](node1-6.north)--(node2-5.south);
\draw [->,thick](node1-5.north)--([xshift=-0.15em,yshift=-0.05em]node2-6.south);
\draw [->,thick](node1-5.north)--([xshift=-0.15em,yshift=-0.05em]node2-8.south);
\draw [->,thick](node1-6.north)--([xshift=0.15em,yshift=-0.05em]node2-5.south);
\draw [->,thick](node1-6.north)--(node2-6.south);
\draw [->,thick](node1-6.north)--(node2-8.south);
\draw [->,thick](node1-8.north)--(node2-5.south);
\draw [->,thick](node1-8.north)--(node2-6.south);
\draw [->,thick](node1-6.north)--([xshift=-0.4em,yshift=-0.05em]node2-8.south);
\draw [->,thick](node1-8.north)--([xshift=0.4em,yshift=-0.05em]node2-5.south);
\draw [->,thick](node1-8.north)--([xshift=0.15em,yshift=-0.05em]node2-6.south);
\draw [->,thick](node1-8.north)--(node2-8.south);
\draw [<-,thick](node3-5.south)--(node2-5.north);
......
......@@ -7,7 +7,7 @@
\node [anchor=center,model,fill=blue!20] (decoder) at ([xshift=7.5em]ate.east) {\small{解码器}};
\node [anchor=north,word] (w1) at ([yshift=-1.5em,xshift=0em]decoder.south) {\small{$x_3$}};
\node [anchor=west,word] (w2) at ([xshift=0em]w1.east) {\small{$x_4$}};
\node [anchor=west,word] (w3) at ([xshift=0em]w2.east) {<M>};
\node [anchor=west,word] (w3) at ([xshift=-0.26em]w2.east) {<M>};
\node [anchor=east,word] (w4) at ([xshift=0em]w1.west) {<M>};
\node [anchor=east,word] (w5) at ([xshift=0em]w4.west) {<M>};
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......@@ -244,9 +244,9 @@
\end{figure}
%----------------------------------------------
\parinterval 在神经机器翻译中,应用多任务学习的主要策略是将翻译任务作为主任务,同时设置一些仅使用单语数据的子任务,通过这些子任务来捕捉单语数据中的语言知识\upcite{DBLP:conf/emnlp/DomhanH17,DBLP:conf/emnlp/ZhangZ16,DBLP:journals/corr/LuongLSVK15}。一种多任务学习的方法是利用源语言单语数据,通过单个编码器对源语言数据进行建模,再分别使用两个解码器来学习源语言排序和翻译任务。源语言排序任务是指利用预排序规则对源语言句子中词的顺序进行调整\upcite{DBLP:conf/emnlp/WangCK07},可以通过单语数据来构造训练数据,从而使编码器被训练得更加充分\upcite{DBLP:conf/emnlp/ZhangZ16},如图\ref{fig:16-7}所示,图中$y_{<}$表示当前时刻之前的译文$x_{<}$表示源语言句子中词的顺序调整后的句子。
\parinterval 在神经机器翻译中,应用多任务学习的主要策略是将翻译任务作为主任务,同时设置一些仅使用单语数据的子任务,通过这些子任务来捕捉单语数据中的语言知识\upcite{DBLP:conf/emnlp/DomhanH17,DBLP:conf/emnlp/ZhangZ16,DBLP:journals/corr/LuongLSVK15}。一种多任务学习的方法是利用源语言单语数据,通过单个编码器对源语言数据进行建模,再分别使用两个解码器来学习源语言排序和翻译任务。源语言排序任务是指利用预排序规则对源语言句子中词的顺序进行调整\upcite{DBLP:conf/emnlp/WangCK07},可以通过单语数据来构造训练数据,从而使编码器被训练得更加充分\upcite{DBLP:conf/emnlp/ZhangZ16},如图\ref{fig:16-7}所示,图中$y_{<}$表示当前时刻之前的单词序列$x_{<}$表示源语言句子中词的顺序调整后的句子。
\parinterval 虽然神经机器翻译模型可以看作一种语言生成模型,但生成过程中却依赖于源语言信息,因此无法直接利用目标语言单语数据进行多任务学习。针对这个问题,可以对原有翻译模型结构进行修改,在解码器底层增加一个语言模型子层,这个子层用于学习语言模型任务,与编码器端是完全独立的,如图\ref{fig:16-8}所示\upcite{DBLP:conf/emnlp/DomhanH17},图中$y_{<}$表示当前时刻之前的译文$z_{<}$表示当前时刻之前的单语数据。在训练过程中,分别将双语数据和单语数据送入翻译模型和语言模型进行计算,双语数据训练产生的梯度用于对整个模型进行参数更新,而单语数据产生的梯度只对语言模型子层进行参数更新。
\parinterval 虽然神经机器翻译模型可以看作一种语言生成模型,但生成过程中却依赖于源语言信息,因此无法直接利用目标语言单语数据进行多任务学习。针对这个问题,可以对原有翻译模型结构进行修改,在解码器底层增加一个语言模型子层,这个子层用于学习语言模型任务,与编码器端是完全独立的,如图\ref{fig:16-8}所示\upcite{DBLP:conf/emnlp/DomhanH17},图中$y_{<}$表示当前时刻之前的单词序列$z_{<}$表示当前时刻之前的单语数据。在训练过程中,分别将双语数据和单语数据送入翻译模型和语言模型进行计算,双语数据训练产生的梯度用于对整个模型进行参数更新,而单语数据产生的梯度只对语言模型子层进行参数更新。
%----------------------------------------------
\begin{figure}[htp]
......
......@@ -10,6 +10,9 @@
\node(figure)[draw=white,above of = decoder_right,yshift=6.5em,scale=0.25] {\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-bank-without-attention.jpg}};
\node [anchor=south,scale=1.2] (node1) at ([xshift=-2.5em,yshift=4.5em]y.north) {\small{$x$:源语言文本数据}};
\node [anchor=north,scale=1.2] (node2) at ([xshift=0.57em]node1.south){\small{$y$:目标语言文本数据}};
\draw[->,thick](x)to(encoder);
\draw[->,thick](encoder)to(decoder_left)node[right,xshift=-0.1cm,yshift=-1.25cm,scale=1.2]{\small{翻译}};
\draw[->,thick](decoder_left)to(y_hat);
......
\begin{tikzpicture}
%\tikzstyle{every node}=[scale=0.8]
\tikzstyle{prob}=[minimum width=0.4em, fill=blue!15,inner sep=0pt]
\node[draw,fill=yellow!15,inner sep=0pt,minimum width=3em,minimum height=5em](key) at (0,0){};
\tikzstyle{prob}=[minimum width=0.4em, fill=blue!20,inner sep=0pt]
\node[draw,fill=yellow!30,inner sep=0pt,minimum width=3em,minimum height=5em](key) at (0,0){};
\draw[] ([yshift=0.5em]key.180) -- ([yshift=0.5em]key.0);
\draw[] ([yshift=1.5em]key.180) -- ([yshift=1.5em]key.0);
\draw[] ([yshift=-0.5em]key.180) -- ([yshift=-0.5em]key.0);
\draw[] ([yshift=-1.5em]key.180) -- ([yshift=-1.5em]key.0);
\node[draw,fill=ugreen!15,inner sep=0pt,minimum width=3em,minimum height=5em](value) at (3em,0){};
\node[draw,fill=green!30!white,inner sep=0pt,minimum width=3em,minimum height=5em](value) at (3em,0){};
\draw[] ([yshift=0.5em]value.180) -- ([yshift=0.5em]value.0);
\draw[] ([yshift=1.5em]value.180) -- ([yshift=1.5em]value.0);
\draw[] ([yshift=-0.5em]value.180) -- ([yshift=-0.5em]value.0);
......@@ -17,10 +17,10 @@
\node[anchor=south,font=\footnotesize,inner sep=0pt] at ([yshift=0.2em]value.north){value};
\node[anchor=south,font=\footnotesize,inner sep=0pt] (cache)at ([yshift=2em,xshift=1.5em]key.north){\small\bfnew{缓存}};
\node[draw,anchor=east,thick,minimum size=1.8em,fill=orange!15] (dt) at ([yshift=2.1em,xshift=-4em]key.west){${\mathbi{d}}_{t}$};
\node[draw,anchor=east,thick,minimum size=1.8em,fill=orange!30] (dt) at ([yshift=2.1em,xshift=-4em]key.west){${\mathbi{d}}_{t}$};
\node[anchor=north,font=\footnotesize] (readlab) at ([xshift=2.8em,yshift=0.3em]dt.north){\red{读取}};
\node[draw,anchor=east,thick,minimum size=1.8em,fill=ugreen!15] (st) at ([xshift=-3.7em]dt.west){${\mathbi{s}}_{t}$};
\node[draw,anchor=east,thick,minimum size=1.8em,fill=red!15] (st2) at ([xshift=-0.85em,yshift=3.5em]dt.west){$ \widetilde{\mathbi{s}}_{t}$};
\node[draw,anchor=east,thick,minimum size=1.8em,fill=green!30!white] (st) at ([xshift=-3.7em]dt.west){${\mathbi{s}}_{t}$};
\node[draw,anchor=east,thick,minimum size=1.8em,fill=red!30] (st2) at ([xshift=-0.85em,yshift=3.5em]dt.west){$ \widetilde{\mathbi{s}}_{t}$};
%\node[draw,anchor=north,circle,inner sep=0pt, minimum size=1.2em,fill=yellow] (add) at ([yshift=-1em]st2.south){+};
\node[draw,thick,inner sep=0pt, minimum size=1.1em, circle] (add) at ([yshift=-1.5em]st2.south){};
......@@ -29,12 +29,12 @@
\node[anchor=north,inner sep=0pt,font=\footnotesize,text=red] at ([xshift=-0em,yshift=-0.5em]add.south){融合};
\node[draw,anchor=east,thick,minimum size=1.8em,fill=yellow!15] (ct) at ([xshift=-2em,yshift=-3.5em]st.west){$ {\mathbi{C}}_{t}$};
\node[draw,anchor=east,thick,minimum size=1.8em,fill=yellow!30] (ct) at ([xshift=-2em,yshift=-3.5em]st.west){$ {\mathbi{C}}_{t}$};
\node[anchor=north,font=\footnotesize] (matchlab) at ([xshift=6.7em,yshift=-0.1em]ct.north){\red{匹配}};
\node[anchor=east] (y) at ([xshift=-6em,yshift=1em]st.west){$\mathbi{y}_{t-1}$};
\node[draw,anchor=east,minimum width=7em,minimum height=1.4em,fill=blue!20] (output) at ([xshift=-2.6em,yshift=2.6em]st2.west){};
\node[draw,anchor=east,minimum width=7em,minimum height=1.4em,fill=blue!30] (output) at ([xshift=-2.6em,yshift=2.6em]st2.west){};
\node[anchor=south] (yt) at ([yshift=4.2em]output.north){$\mathbi{y}_{t}$};
\draw[] ([xshift=-0.7em]output.90) -- ([xshift=-0.7em]output.-90);
......
......@@ -10,7 +10,7 @@
\node(process_2)[process,fill=blue!20,right of = process_1,xshift=7.0cm,text width=4cm,align=center]{\baselineskip=4pt\LARGE{[[0.2,...,0.3], \qquad ..., \qquad 0.3,...,0.5]]}\par};
\node(text_2)[below of = process_2,yshift=-2cm,scale=1.5]{语音特征};
\node(process_3)[process,fill=orange!20,minimum width=6cm,minimum height=5cm,right of = process_2,xshift=8.2cm,text width=4cm,align=center]{};
\node(text_3)[below of = process_3,yshift=-3cm,scale=1.5]{源语文本及其词格};
\node(text_3)[below of = process_3,yshift=-3cm,scale=1.5]{源语文本及其词格};
\node(cir_s)[cir,very thick, below of = process_3,xshift=-2.2cm,yshift=1.1cm]{\LARGE S};
\node(cir_a)[cir,right of = cir_s,xshift=1cm,yshift=0.8cm]{\LARGE a};
\node(cir_c)[cir,right of = cir_a,xshift=1.2cm,yshift=0cm]{\LARGE c};
......
......@@ -3,38 +3,38 @@
\tikzstyle{every node}=[scale=0.8]
\foreach \x in {1,2,3,4}
\node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=red!30,rounded corners=1pt] (c1_\x) at (0em+2em*\x, 0em){};
\node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=red!35,rounded corners=1pt] (c1_\x) at (0em+2em*\x, 0em){};
\node[anchor=north] (hpre) at ([yshift=1.8em]c1_1.north) {${\mathbi{h}}^ {\textrm{pre}k}$};
\foreach \x in {1,2,3}
\node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=red!30,rounded corners=1pt] (c2_\x) at (11em+2em*\x, 0em){};
\node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=red!35,rounded corners=1pt] (c2_\x) at (11em+2em*\x, 0em){};
\node[anchor=north] (hpre) at ([yshift=1.8em]c2_1.north) {${\mathbi{h}}^ {\textrm{pre}1}$};
\foreach \x in {1,2,3,4,5}
\node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=red!30,rounded corners=1pt] (c3_\x) at (18.4em+2em*\x, 0em){};,minimum width=1em
\node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=red!35,rounded corners=1pt] (c3_\x) at (18.4em+2em*\x, 0em){};,minimum width=1em
\foreach \x in {1,2,3,4,5}
\node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=blue!30,rounded corners=1pt] (c4_\x) at (18.4em+2em*\x, 10.4em){};
\node[draw,inner sep=0pt,minimum height=1em,minimum width=1.6em,fill=blue!35,rounded corners=1pt] (c4_\x) at (18.4em+2em*\x, 10.4em){};
%\node[inner sep=0pt,minimum size=1em,fill=ugreen,circle] (c5) at (9em, 7em){};
\node[draw,inner sep=0pt,minimum size=1.2em,fill=green!20,circle] (qs) at (18.6em, 6.4em){};
\node[draw,inner sep=0pt,minimum size=1.2em,fill=green!20,circle] (qw) at (18.6em, 4.4em){};
\node[anchor=north] (qslab) at ([xshift=-0.8em,yshift=1em]qs.north) {${\mathbi{q}}^s$};
\node[anchor=north] (qwlab) at ([xshift=-0.8em,yshift=1em]qw.north) {${\mathbi{q}}^w$};
%\node[inner sep=0pt,minimum size=1em,fill=ugreen,circle,thick] (c5) at (9em, 7em){};
\node[draw,inner sep=0pt,minimum size=1.2em,fill=green!30!white,circle,thick] (qs) at (18.6em, 6.4em){};
\node[draw,inner sep=0pt,minimum size=1.2em,fill=green!30!white,circle,thick] (qw) at (18.6em, 4.4em){};
\node[anchor=north] (qslab) at ([xshift=-0.8em,yshift=1em]qs.north) {${\mathbi{q}}_s$};
\node[anchor=north] (qwlab) at ([xshift=-0.8em,yshift=1em]qw.north) {${\mathbi{q}}_w$};
\node[draw,thick,inner sep=0pt, minimum size=1.2em, circle] (sigma) at (24.4em, 8em){};
\node[draw,thick,inner sep=0pt, minimum size=1.2em, circle,thick] (sigma) at (24.4em, 8em){};
\draw[-,thick] (sigma.0) -- (sigma.180);
\draw[-,thick] (sigma.90) -- (sigma.-90);
\node[draw,fill=orange!30,inner sep=0pt, minimum size=1.2em, circle] (add1) at (5em, 3em){};
\node[draw,fill=orange!30,inner sep=0pt, minimum size=1.2em, circle] (add2) at (15em, 3em){};
\node[draw,fill=orange!30,inner sep=0pt, minimum size=1.2em, circle] (add3) at (10em, 5.2em){};
\node[anchor=north] (cond) at ([xshift=-1em,yshift=0.5em]add3.north) {${\mathbi{d}}$};
\node[draw,fill=orange!35,inner sep=0pt, minimum size=1.2em, circle,thick] (add1) at (5em, 3em){};
\node[draw,fill=orange!35,inner sep=0pt, minimum size=1.2em, circle,thick] (add2) at (15em, 3em){};
\node[draw,fill=orange!35,inner sep=0pt, minimum size=1.2em, circle,thick] (add3) at (10em, 5.2em){};
\node[anchor=north] (cond) at ([xshift=-1.2em,yshift=0.5em]add3.north) {${\mathbi{d}_t}$};
\node[anchor=north] (cons1) at ([xshift=-1em,yshift=0.5em]add2.north) {${\mathbi{s}}^1$};
\node[anchor=north] (consj) at ([xshift=-1em,yshift=0.5em]add1.north) {${\mathbi{s}}^k$};
\begin{pgfonlayer}{background}
\node[draw,rounded corners=2pt,drop shadow,fill=white, minimum width=8.3em][fit=(c1_1)(c1_4)](box1){};
\node[draw,rounded corners=2pt,drop shadow,fill=white,minimum width=6.4em][fit=(c2_1)(c2_3)](box2){};
\node[draw,rounded corners=2pt,drop shadow,fill=white,minimum width=10.5em][fit=(c3_1)(c3_5)](box3){};
\node[draw,rounded corners=2pt,drop shadow,fill=white,minimum width=10.3em][fit=(c4_1)(c4_5)](box4){};
\node[draw,rounded corners=2pt,drop shadow,fill=white, minimum width=8.3em,thick][fit=(c1_1)(c1_4)](box1){};
\node[draw,rounded corners=2pt,drop shadow,fill=white,minimum width=6.4em,thick][fit=(c2_1)(c2_3)](box2){};
\node[draw,rounded corners=2pt,drop shadow,fill=white,minimum width=10.5em,thick][fit=(c3_1)(c3_5)](box3){};
\node[draw,rounded corners=2pt,drop shadow,fill=white,minimum width=10.3em,thick][fit=(c4_1)(c4_5)](box4){};
%\node[draw,rounded corners=2pt,inner xsep=6pt,drop shadow,fill=white][fit=(c5)](box5){};
\end{pgfonlayer}
......@@ -44,15 +44,15 @@
\node[text=ublue] at (10.5em, 0em) {\small\bfnew{...}};
\node[text=ublue] (hh) at (-0.8em, 0em) {\small\bfnew{...}};
\draw[->,thick, out=70, in=-120] ([yshift=0.1em]c1_1.90) to ([yshift=-0.1em]add1.-90);
\draw[->,thick, out=70, in=-120] ([yshift=0.1em]c1_1.90) to ([yshift=-0.1em]add1.-120);
%node[xshift=-0.4em,yshift=1.2em]{$ \mathbi{h}^ {\textrm j}$}
\draw[->,thick, out=80, in=-100] ([yshift=0.1em]c1_2.90) to ([yshift=-0.1em]add1.-90);
\draw[->,thick, out=100, in=-80] ([yshift=0.1em]c1_3.90) to ([yshift=-0.1em]add1.-90);
\draw[->,thick, out=110, in=-60] ([yshift=0.1em]c1_4.90) to ([yshift=-0.1em]add1.-90);
\draw[->,thick, out=80, in=-100] ([yshift=0.1em]c1_2.90) to ([yshift=-0.1em]add1.-100);
\draw[->,thick, out=100, in=-80] ([yshift=0.1em]c1_3.90) to ([yshift=-0.1em]add1.-80);
\draw[->,thick, out=110, in=-60] ([yshift=0.1em]c1_4.90) to ([yshift=-0.1em]add1.-60);
\draw[->,thick, out=70, in=-110] ([yshift=0.1em]c2_1.90) to ([yshift=-0.1em]add2.-90);
\draw[->,thick, out=70, in=-110] ([yshift=0.1em]c2_1.90) to ([yshift=-0.1em]add2.-110);
\draw[->,thick, out=90, in=-90] ([yshift=0.1em]c2_2.90) to ([yshift=-0.1em]add2.-90);
\draw[->,thick, out=110, in=-70] ([yshift=0.1em]c2_3.90) to ([yshift=-0.1em]add2.-90);
\draw[->,thick, out=110, in=-70] ([yshift=0.1em]c2_3.90) to ([yshift=-0.1em]add2.-70);
\draw[->,thick, out=30, in=-130] ([yshift=0.1em]add1.90) to ([yshift=-0.1em]add3.-120);
......@@ -67,20 +67,20 @@
\draw[->,thick] ([yshift=0.1em]sigma.90) -- ([yshift=-0.1em]n2.-90);
\draw[->,thick] ([yshift=0.1em]n2.90) -- node[right]{$ \widetilde{\mathbi{h}}_{\textrm{t}}$}([yshift=2em]n2.90);
\draw[decorate,decoration={brace, mirror},gray, thick] ([yshift=-2em]hh.-180) -- node[font=\scriptsize,text=black,below]{前几个句子}([yshift=-2em]box2.0);
\draw[decorate,decoration={brace, mirror},gray, thick] ([yshift=-2em]box3.-180) -- node[font=\scriptsize,text=black,below]{当前句子}([yshift=-2em]box3.0);
\draw[decorate,decoration={brace, mirror},gray, thick] ([yshift=-2em]hh.-180) -- node[font=\footnotesize,text=black,below]{前几个句子}([yshift=-2em]box2.0);
\draw[decorate,decoration={brace, mirror},gray, thick] ([yshift=-2em]box3.-180) -- node[font=\footnotesize,text=black,below]{当前句子}([yshift=-2em]box3.0);
\draw[->, thick, rounded corners=2pt] ([yshift=0.1em]add3.90) -- ([yshift=2.1em]add3.90) -- ([xshift=-0.1em]sigma.180);
%annotation
\node[fill=red!30,rounded corners=1pt,inner sep=0pt,minimum size=1em] (a1) at (2em,-4.5em) {};
\node[anchor=west,font=\footnotesize] (w1) at ([xshift=0.4em]a1.east) {编码表示};
%\node[fill=red!30,rounded corners=1pt,inner sep=0pt,minimum size=1em] (a1) at (2em,-4.5em) {};
%\node[anchor=west,font=\footnotesize] (w1) at ([xshift=0.4em]a1.east) {编码表示};
\node[anchor=west,fill=orange!30,rounded corners=1pt,inner sep=0pt,minimum size=1em] (a2) at ([xshift=2em]w1.east) {};
\node[anchor=west,font=\footnotesize] (w2)at ([xshift=0.4em]a2.east) {层次注意力};
%\node[anchor=west,fill=orange!30,rounded corners=1pt,inner sep=0pt,minimum size=1em] (a2) at ([xshift=2em]w1.east) {};
%\node[anchor=west,font=\footnotesize] (w2)at ([xshift=0.4em]a2.east) {层次注意力};
\node[anchor=west,fill=blue!30,rounded corners=1pt,inner sep=0pt,minimum size=1em] (a3) at ([xshift=2em]w2.east) {};
\node[anchor=west,font=\footnotesize] at ([xshift=0.4em]a3.east) {融合上下文信息的编码表示};
%\node[anchor=west,fill=blue!30,rounded corners=1pt,inner sep=0pt,minimum size=1em] (a3) at ([xshift=2em]w2.east) {};
%\node[anchor=west,font=\footnotesize] at ([xshift=0.4em]a3.east) {融合上下文信息的编码表示};
\end{tikzpicture}
......
\definecolor{color1}{rgb}{1,0.725,0.058}
\tikzstyle{coder} = [rectangle,thick,rounded corners,minimum width=2.8cm,minimum height=1.1cm,text centered,draw=black!70,fill=blue!10,drop shadow]
\tikzstyle{attention} = [rectangle,thick,rounded corners,minimum width=2.6cm,minimum height=0.9cm,text centered,draw=black!70,fill=green!25,drop shadow]
\tikzstyle{coder} = [rectangle,thick,rounded corners,minimum width=2.8cm,minimum height=1.1cm,text centered,draw=black!,fill=blue!30,drop shadow]
\tikzstyle{attention} = [rectangle,thick,rounded corners,minimum width=2.6cm,minimum height=0.9cm,text centered,draw=black,fill=green!30!white,drop shadow]
\begin{tikzpicture}[node distance = 0,scale = 0.7]
\tikzstyle{every node}=[scale=0.7]
\node(encoder_c)[coder]{\large{编码器}};
\node(encoder_s)[coder, right of = encoder_c, xshift=3.5cm, fill=red!25]{\large{编码器}};
\node(encoder_s)[coder, right of = encoder_c, xshift=3.5cm, fill=red!30]{\large{编码器}};
\node(h_pre)[above of = encoder_c, yshift=1.3cm,scale=1.3]{${\mathbi{h}}_{\rm pre}$};
\node(h)[above of = encoder_s, yshift=1.3cm,scale=1.3]{$\mathbi{h}$};
\node(cir)[circle,very thick, right of = h, draw=black!90,minimum width=0.5cm,xshift=1.1cm]{};
......@@ -15,7 +15,7 @@
\node(current)[below of = encoder_s, yshift=-1.3cm]{\large{当前句子}};
\node(attention_left)[attention, above of = encoder_c, xshift=2.4cm,yshift=3.1cm]{\large{注意力机制}};
\node(d)[above of = attention_left, yshift=1.1cm,scale=1.3]{$\mathbi{d}$};
\node(ground)[rectangle, thick, rounded corners, minimum width=5cm, minimum height=5.5cm, right of = encoder_s, xshift=4.4cm,yshift=2.2cm, draw=black!70, fill=gray!10]{};
\node(ground)[rectangle, thick, rounded corners, minimum width=5cm, minimum height=5.5cm, right of = encoder_s, xshift=4.4cm,yshift=2.2cm, draw=black, fill=gray!10]{};
\node(decoder)[above of = encoder_s, xshift=3.1cm]{\large{解码器}};
\node(attention_right)[attention, right of = attention_left, xshift=5.4cm,yshift=-0.4cm]{\large{注意力机制}};
\node(target)[right of = current, xshift=5.3cm]{\large{目标语言句子(位置$j$之前)}};
......
......@@ -12,7 +12,7 @@
\draw[->,very thick]([xshift=-1.4em]trans.west) to (trans.west);
\draw[->,very thick](trans.east) to ([xshift=1.4em]trans.east);
\node[anchor=east] (de1) at ([xshift=4.8cm,yshift=-0.1em]trans.east) {\begin{tabular}{l}{\normalsize{译文:}}{\normalsize{一个\ \;女孩\ \;\ \;{\red{河床}}}}\end{tabular}};
\node[anchor=south] (de2) at ([xshift=-0em,yshift=-1.5em]de1.south) {\begin{tabular}{l}{\normalsize{\ \;跳下来\ \;}} \end{tabular}};
\node[anchor=east] (de1) at ([xshift=4.5cm,yshift=-0.1em]trans.east) {\begin{tabular}{l}{\normalsize{译文:}}{\normalsize{一个/女孩/从/{\red{河床}}/}}\end{tabular}};
\node[anchor=south] (de2) at ([xshift=-0em,yshift=-1.5em]de1.south) {\begin{tabular}{l}{\normalsize{/跳下来/}} \end{tabular}};
\end {scope}
\end{tikzpicture}
\ No newline at end of file
......@@ -5,7 +5,7 @@
\node(river)[word, right of = figure, xshift=5cm, yshift=0.35cm, fill=blue!45]{river};
\node(mountain)[word, above of = river, yshift=0.75cm, fill=blue!45]{mountain};
\node(child)[word, above of = mountain, yshift=0.75cm, fill=blue!15]{child};
\node(man)[word, above of = child, yshift=0.75cm, fill=blue!25]{girl};
\node(woman)[word, above of = child, yshift=0.75cm, fill=blue!25]{girl};
\node(jump)[word, below of = river, yshift=-0.75cm, fill=blue!30]{jump};
\node(bank)[word, below of = jump, yshift=-0.75cm, fill=blue!65]{bank};
\node(sky)[word, below of = bank, yshift=-0.75cm, fill=blue!30]{sky};
......@@ -22,12 +22,12 @@
\draw[->, thick](yn_1)to([yshift=-0.1cm]decoder.south);
\draw[->, thick]([yshift=0.1cm]decoder.north)to(yn_2);
\draw[->, thick, color=blue!45]([xshift=0.05cm]river.east)to([xshift=-0.05cm]cir.west);
\draw[->, thick, color=blue!45]([xshift=0.05cm]mountain.east)to([xshift=-0.05cm]cir.west);
\draw[->, thick, color=blue!15]([xshift=0.05cm]child.east)to([xshift=-0.05cm]cir.west);
\draw[->, thick, color=blue!25]([xshift=0.05cm]man.east)to([xshift=-0.05cm]cir.west);
\draw[->, thick, color=blue!30]([xshift=0.05cm]jump.east)to([xshift=-0.05cm]cir.west);
\draw[->, very thick, color=blue!65]([xshift=0.05cm]bank.east)to([xshift=-0.05cm]cir.west);
\draw[->, thick, color=blue!30]([xshift=0.05cm]sky.east)to([xshift=-0.05cm]cir.west);
\draw[->, thick, color=blue!15]([xshift=0.05cm]tree.east)to([xshift=-0.05cm]cir.west);
\draw[->, thick, color=blue!25]([xshift=0.05cm]woman.east)to([xshift=-0.05cm]cir.north);
\draw[->, thick, color=blue!15]([xshift=0.05cm]child.east)to([yshift=0.7em,xshift=0.4em]cir.west);
\draw[->, thick, color=blue!45]([xshift=0.05cm]mountain.east)to([yshift=0.5em,xshift=0.15em]cir.west);
\draw[->, thick, color=blue!45]([xshift=0.05cm]river.east)to([yshift=0.15em]cir.west);
\draw[->, thick, color=blue!30]([xshift=0.05cm]jump.east)to([yshift=-0.15em]cir.west);
\draw[->, very thick, color=blue!65]([xshift=0.05cm]bank.east)to([yshift=-0.5em,xshift=0.15em]cir.west);
\draw[->, thick, color=blue!30]([xshift=0.05cm]sky.east)to([yshift=-0.7em,xshift=0.4em]cir.west);
\draw[->, thick, color=blue!15]([xshift=0.05cm]tree.east)to([xshift=-0.05em]cir.south);
\end{tikzpicture}
\ No newline at end of file
\tikzstyle{encoder} = [rectangle,thick,rounded corners,minimum width=4.3em,minimum height=2.2em,text centered,draw=black!70,fill=red!25]
\tikzstyle{decoder} = [rectangle,thick,rounded corners,minimum width=4.3em,minimum height=2.2em,text centered,draw=black!70,fill=blue!15]
\tikzstyle{attention} = [rectangle,thick,rounded corners,minimum width=2.6cm,minimum height=2.2em,text centered,draw=black!70,fill=green!25]
\tikzstyle{encoder} = [rectangle,thick,rounded corners,minimum width=4.3em,minimum height=2.2em,text centered,draw=black,fill=red!30]
\tikzstyle{decoder} = [rectangle,thick,rounded corners,minimum width=4.3em,minimum height=2.2em,text centered,draw=black,fill=blue!25]
\tikzstyle{attention} = [rectangle,thick,rounded corners,minimum width=2.6cm,minimum height=2.2em,text centered,draw=black,fill=green!25]
\begin{tikzpicture}[node distance = 0,scale = 0.75]
\tikzstyle{every node}=[scale=0.75]
......@@ -17,6 +17,7 @@
\node(attention_below)[attention, right of = decoder_right, xshift=4.8cm]{\normalsize{注意力机制}};
\node(attention_above)[attention, above of = attention_below, yshift=1.6cm]{\normalsize{注意力机制}};
\node(ffn)[attention, above of = attention_above, yshift=1.6cm, fill=blue!8]{\normalsize{前馈神经网络}};
\node [right of = attention_above, xshift=2.35cm,yshift=2.5cm,scale=1.2]{\footnotesize{解码器}};
\node(n)[right of = attention_above, xshift=2.4cm,scale=1.5]{$\times N$};
\node(text_2)[above of = ffn, yshift=1.9cm]{\normalsize{基于上下文的修正结果}};
\node(title_2)[right of = title_1, xshift=6.3cm]{\large\bfnew{二阶段翻译}};
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......@@ -25,7 +25,7 @@
\parinterval 基于上下文的翻译是机器翻译的一个重要分支。传统方法中,机器翻译通常被定义为对一个句子进行翻译的问题。但是,现实中每句话往往不是独立出现的。比如,人们会使用语音进行表达,或者通过图片来传递信息,这些语音和图片内容都可以伴随着文字一起出现在翻译场景中。此外,句子往往存在于段落或者篇章之中,如果要理解这个句子,也需要整个段落或者篇章的信息。而这些上下文信息都是机器翻译可以利用的。
\parinterval 本章在句子级翻译的基础上将问题扩展为更大上下文中的翻译,具体包括语音翻译、图像翻译、篇章翻译三个主题。这些问题均为机器翻译应用中的真实需求。同时,使用多模态等信息也是当下自然语言处理的热点方向之一。
\parinterval 本章在句子级翻译的基础上将问题扩展为更大上下文中的翻译,具体包括语音翻译、图像翻译、篇章翻译三个主题。这些问题均为机器翻译应用中的真实需求。同时,使用多模态等信息也是当下自然语言处理的热点方向之一。
%----------------------------------------------------------------------------------------
% NEW SECTION
......@@ -33,9 +33,9 @@
\section{机器翻译需要更多的上下文}
\parinterval 长期以来,机器翻译都是指句子级翻译。主要原因在于,句子级的翻译建模可以大大简化问题,使得机器翻译方法更容易被实践和验证。但是人类使用语言的过程并不是孤立地在一个个句子上进行的。这个问题可以类比于人类学习语言的过程:小孩成长过程中会接受视觉、听觉、触觉等多种信号,这些信号的共同作用使得他们产生对客观世界的“认识”,同时促使他们使用“语言”进行表达。从这个角度说,语言能力并不是由单一因素形成的,它往往伴随着其他信息的相互作用,比如,当们翻译一句话的时候,会用到看到的画面、听到的语调、甚至前面说过的句子中的信息。
\parinterval 长期以来,机器翻译都是指句子级翻译。主要原因在于,句子级的翻译建模可以大大简化问题,使得机器翻译方法更容易被实践和验证。但是人类使用语言的过程并不是孤立地在一个个句子上进行的。这个问题可以类比于人类学习语言的过程:小孩成长过程中会接受视觉、听觉、触觉等多种信号,这些信号的共同作用使得他们产生对客观世界的“认识”,同时促使他们使用“语言”进行表达。从这个角度说,语言能力并不是由单一因素形成的,它往往伴随着其他信息的相互作用,比如,当们翻译一句话的时候,会用到看到的画面、听到的语调、甚至前面说过的句子中的信息。
\parinterval 广义上,当前句子以外的信息都可以被看作一种上下文。比如,图\ref{fig:17-1}中,需要把英语句子“A girl jumps off a bank .”翻译为汉语。但是,其中的“bank”有多个含义,因此仅仅使用英语句子本身的信息可能会将其翻译为“银行”,而非正确的译文“河床”。但是,图\ref{fig:17-1}中也提供了这个英语句子所对应的图片,显然图片中直接展示了河床,这时“bank”是没有歧义的。通常也会把这种使用图片和文字一起进行机器翻译的任务称作{\small\bfnew{多模态机器翻译}}\index{多模态机器翻译}(Multi-Modal Machine Translation)\index{Multi-Modal Machine Translation}
\parinterval 广义上,当前句子以外的信息都可以被看作一种上下文。比如,图\ref{fig:17-1}中,需要把英语句子“A girl jumps off a bank .”翻译为汉语。但是,其中的“bank”有多个含义,因此仅仅使用英语句子本身的信息可能会将其翻译为“银行”,而非正确的译文“河床”。但是,图\ref{fig:17-1}中也提供了这个英语句子所对应的图片,显然图片中直接展示了河床,这时“bank”是没有歧义的。通常也会把这种使用图片和文字一起进行机器翻译的任务称作{\small\bfnew{多模态机器翻译}}\index{多模态机器翻译}(Multi-Modal Machine Translation)\index{Multi-model Machine Translation}
%----------------------------------------------
\begin{figure}[htp]
......@@ -54,7 +54,7 @@
%----------------------------------------------------------------------------------------
\section{语音翻译}
\parinterval 语音,是人类交流中最常用的一种信息载体。从日常聊天、出国旅游,到国际会议、跨国合作,对于语音进行翻译的需求不断增加。甚至在有些场景下,用语音进行交互要比用文本进行交互频繁得多。因此,{\small\bfnew{语音翻译}}\index{语音翻译}(Speech Translation)\index{Speech Translation}也成为了语音处理和机器翻译相结合的重要产物。根据目标语言的载体类型,可以将语音翻译分为{\small\bfnew{语音到文本翻译}}\index{语音到文本翻译}(Speech-to-Text Translation)\index{Speech-to-Text Translation}{\small\bfnew{语音到语音翻译}}\index{语音到语音翻译}(Speech-to-Speech Translation)\index{Speech-to-Speech Translation};基于翻译的实时性,还可以分为{\small\bfnew{实时语音翻译}}\index{实时语音翻译}(即同声传译,Simultaneous Translation)\index{Simultaneous Translation}{\small\bfnew{离线语音翻译}}(Offline Speech Translation)\index{离线语音翻译}\index{Offline Speech Translation}。本节主要关注离线语音到文本翻译方法(简称为语音翻译),分别从音频处理、级联语音翻译和端到端语音翻译几个角度开展讨论。
\parinterval 语音,是人类交流中最常用的一种信息载体。从日常聊天、出国旅游,到国际会议、跨国合作,对于语音翻译的需求不断增加。甚至在有些场景下,用语音进行交互要比用文本进行交互频繁得多。因此,{\small\bfnew{语音翻译}}\index{语音翻译}(Speech Translation)\index{Speech Translation}也成为了语音处理和机器翻译相结合的重要产物。根据目标语言的载体类型,可以将语音翻译分为{\small\bfnew{语音到文本翻译}}\index{语音到文本翻译}(Speech-to-Text Translation)\index{Speech-to-Text Translation}{\small\bfnew{语音到语音翻译}}\index{语音到语音翻译}(Speech-to-Speech Translation)\index{Speech-to-Speech Translation};基于翻译的实时性,还可以分为{\small\bfnew{实时语音翻译}}\index{实时语音翻译}(即同声传译,Simultaneous Translation)\index{Simultaneous Translation}{\small\bfnew{离线语音翻译}}(Offline Speech Translation)\index{离线语音翻译}\index{Offline Speech Translation}。本节主要关注离线语音到文本翻译方法(简称为语音翻译),分别从音频处理、级联语音翻译和端到端语音翻译几个角度开展讨论。
%----------------------------------------------------------------------------------------
% NEW SUB-SECTION
......@@ -62,7 +62,7 @@
\subsection{音频处理}
\parinterval 为了保证对相关内容描述的完整性,这里对语音处理的基本知识作简要介绍。不同于文本,音频本质上是经过若干信号处理之后的{\small\bfnew{波形}}(Waveform)\index{Waveform}。具体来说,声音是一种空气的震动,因此可以被转换为模拟信号。模拟信号是一段连续的信号,经过采样变为离散的数字信号。采样是每隔固定的时间记录一下声音的振幅,采样率表示每秒的采样点数,单位是赫兹(Hz)。采样率越高,结果的损失则越小。通常来说,采样的标准是能够通过离散化的数字信号重现原始语音。我们日常生活中使用的手机和电脑设备的采样率一般为16kHz,表示每秒16000个采样点;而音频CD的采样率可以达到44.1kHz。 经过进一步的量化,将采样点的值转换为整型数值保存,从而减少占用的存储空间,通常采用的是16位量化。将采样率和量化位数相乘,就可以得到{\small\bfnew{比特率}}\index{比特率}(Bits Per Second,BPS)\index{Bits Per Second},表示音频每秒占用的位数。例如,16kHz采样率和16位量化的音频,比特率为256kb/s。音频处理的整体流程如图\ref{fig:17-2}所示\upcite{洪青阳2020语音识别原理与应用,陈果果2020语音识别实战}
\parinterval 为了保证对相关内容描述的完整性,这里对语音处理的基本知识作简要介绍。不同于文本,音频本质上是经过若干信号处理之后的{\small\bfnew{波形}}(Waveform)\index{Waveform}。具体来说,声音是一种空气的震动,因此可以被转换为模拟信号。模拟信号是一段连续的信号,经过采样变为离散的数字信号。采样是每隔固定的时间记录一下声音的振幅,采样率表示每秒的采样点数,单位是赫兹(Hz)。采样率越高,结果的损失则越小。通常来说,采样的标准是能够通过离散化的数字信号重现原始语音。日常生活中使用的手机和电脑设备的采样率一般为16kHz,表示每秒16000个采样点;而音频CD的采样率可以达到44.1kHz。 经过进一步的量化,将采样点的值转换为整型数值保存,从而减少占用的存储空间,通常采用的是16位量化。将采样率和量化位数相乘,就可以得到{\small\bfnew{比特率}}\index{比特率}(Bits Per Second,BPS)\index{Bits Per Second},表示音频每秒占用的位数。例如,16kHz采样率和16位量化的音频,比特率为256kb/s。音频处理的整体流程如图\ref{fig:17-2}所示\upcite{洪青阳2020语音识别原理与应用,陈果果2020语音识别实战}
%----------------------------------------------------------------------------------------------------
\begin{figure}[htp]
......@@ -85,7 +85,7 @@
\end{figure}
%----------------------------------------------------------------------------------------------------
\parinterval 经过了上述的预处理操作,可以得到音频对应的帧序列,之后通过不同的操作来提取不同类型的声学特征。常用的声学特征包括{\small\bfnew{Mel频率倒谱系数}}\index{Mel频率倒谱系数}(Mel-Frequency Cepstral Coefficient,MFCC)\index{Mel-Frequency Cepstral Coefficient}{\small\bfnew{感知线性预测系数}}\index{感知线性预测系数}(Perceptual Lienar Predictive,PLP)\index{Perceptual Lienar Predictive}{\small\bfnew{滤波器组}}\index{滤波器组}(Filter-bank,Fbank)\index{Filter-bank}等。MFCC、PLP和Fbank特征都需要对预处理后的音频做{\small\bfnew{短时傅里叶变换}}\index{短时傅里叶变换}(Short-time Fourier Tranform,STFT)\index{Short-time Fourier Tranform},得到具有规律的线性分辨率。之后再经过特定的操作,得到各种声学特征。不同声学特征的特点是不同的,MFCC去相关性较好,PLP抗噪性强,FBank可以保留更多的语音原始特征。在语音翻译中,比较常用的声学特征为FBank或MFCC\upcite{洪青阳2020语音识别原理与应用}
\parinterval 经过了上述的预处理操作,可以得到音频对应的帧序列,之后通过不同的操作来提取不同类型的声学特征。常用的声学特征包括{\small\bfnew{Mel频率倒谱系数}}\index{Mel频率倒谱系数}(Mel-frequency Cepstral Coefficient,MFCC)\index{Mel-Frequency Cepstral Coefficient}{\small\bfnew{感知线性预测系数}}\index{感知线性预测系数}(Perceptual Lienar Predictive,PLP)\index{Perceptual Lienar Predictive}{\small\bfnew{滤波器组}}\index{滤波器组}(Filter-bank,Fbank)\index{Filter-bank}等。MFCC、PLP和Fbank特征都需要对预处理后的音频做{\small\bfnew{短时傅里叶变换}}\index{短时傅里叶变换}(Short-time Fourier Tranform,STFT)\index{Short-time Fourier Tranform},得到具有规律的线性分辨率。之后再经过特定的操作,得到各种声学特征。不同声学特征的特点是不同的,MFCC去相关性较好,PLP抗噪性强,FBank可以保留更多的语音原始特征。在语音翻译中,比较常用的声学特征为FBank或MFCC\upcite{洪青阳2020语音识别原理与应用}
\parinterval 实际上,提取到的声学特征可以类比于计算机视觉中的像素特征,或者自然语言处理中的词嵌入表示。不同之处在于,声学特征更加复杂多变,可能存在着较多的噪声和冗余信息。此外,相比对应的文字序列,音频提取到的特征序列长度要大十倍以上。比如,人类正常交流中每秒钟一般可以说2-3个字,而每秒钟的语音可以提取得到100帧的特征序列。巨大的长度比差异也为声学特征建模带来了挑战。
......@@ -145,9 +145,9 @@
\end{figure}
%----------------------------------------------------------------------------------------------------
\parinterval 可以看出,词格可以保存多条搜索路径,路径中保存了输入序列的时间信息以及解码过程。翻译模型基于词格进行翻译,可以降低语音识别模型带来的误差\upcite{DBLP:conf/acl/ZhangGCF19,DBLP:conf/acl/SperberNPW19}。但在端到端语音识别模型中,一般基于束搜索方法进行解码,且解码序列的长度与输入序列并不匹配,相比传统声学模型解码丢失了语音的时间信息,因此这种基于词格的方法主要集中在传统语音识别系统上。
\parinterval 可以看出,词格可以保存多条搜索路径,路径中保存了输入序列的时间信息以及解码过程。翻译模型基于词格进行翻译,可以降低语音识别模型带来的误差\upcite{DBLP:conf/acl/ZhangGCF19,DBLP:conf/acl/SperberNPW19}。但在端到端语音识别模型中,一般使用基于束搜索的方法进行解码,解码序列的长度与输入序列并不匹配,相比传统声学模型解码丢失了语音的时间信息,因此这种基于词格的方法主要集中在传统语音识别系统上。
\parinterval 为了降低错误传播问题带来的影响,一种思路是通过一个后处理模型修正识别结果中的错误,再送给文本翻译模型进行翻译。也可以进一步对文本做{\small\bfnew{顺滑}}\index{顺滑}(Disfluency Detection\index{Disfluency Detection}),使得送给翻译系统的文本更加干净、流畅,比如除去一些导致停顿的语气词。这一做法在工业界得到了广泛应用,但由于每个模型只能串行地计算,也会带来额外的计算代价以及运算时间。另外一种思路是训练更加健壮的文本翻译模型,使其可以处理输入中存在的噪声或误差\upcite{DBLP:conf/acl/LiuTMCZ18}
\parinterval 为了降低错误传播问题带来的影响,一种思路是通过一个后处理模型修正识别结果中的错误,再送给文本翻译模型进行翻译。也可以进一步对文本做{\small\bfnew{顺滑}}\index{顺滑}(Disfluency Detection\index{Disfluency Detection}处理,使得送给翻译系统的文本更加干净、流畅,比如除去一些导致停顿的语气词。这一做法在工业界得到了广泛应用,但由于每个模型只能串行地计算,也会带来额外的计算代价以及运算时间。另外一种思路是训练更加健壮的文本翻译模型,使其可以处理输入中存在的噪声或误差\upcite{DBLP:conf/acl/LiuTMCZ18}
%----------------------------------------------------------------------------------------
% NEW SUB-SECTION
......@@ -162,7 +162,7 @@
\vspace{0.5em}
\item {\small\bfnew{错误传播问题}}。级联模型导致的一个很严重的问题在于,语音识别模型得到的文本如果存在错误,这些错误很可能在翻译过程中被放大,从而使最后翻译结果出现比较大的偏差。比如识别时在句尾少生成了个“吗”,会导致翻译模型将疑问句翻译为陈述句。
\vspace{0.5em}
\item {\small\bfnew{翻译效率问题}}。由于需要语音识别模型和文本标注模型只能串行地计算,翻译效率相对较低,而实际很多场景中都需要达到低延时的翻译。
\item {\small\bfnew{翻译效率问题}}。由于语音识别模型和文本标注模型只能串行地计算,翻译效率相对较低,而实际很多场景中都需要低延时的翻译。
\vspace{0.5em}
\item {\small\bfnew{语音中的副语言信息丢失}}。将语音识别为文本的过程中,语音中包含的语气、情感、音调等信息会丢失,而同一句话在不同的语气中表达的意思很可能是不同的。尤其是在实际应用中,由于语音识别结果通常并不包含标点,还需要额外的后处理模型将标点还原,也会带来额外的计算代价。
\vspace{0.5em}
......@@ -215,7 +215,7 @@
\parinterval 一种思路是进行多任务学习,让模型在训练过程中得到更多的监督信息。使用多个任务强化主任务(机器翻译),在本书的{\chapterfifteen}{\chaptersixteen}也有所涉及。从这个角度说,机器翻译中很多问题的解决手段都是一致的。
\parinterval 语音语言中多任务学习主要借助语音对应的标注信息,也就是源语言文本。{\small\bfnew{连接时序分类}}\index{连接时序分类}(Connectionist Temporal Classification,CTC)\index{Connectionist Temporal Classification}\upcite{DBLP:conf/icml/GravesFGS06}是语音处理中最简单有效的一种多任务学习方法\upcite{DBLP:journals/jstsp/WatanabeHKHH17,DBLP:conf/icassp/KimHW17},也被广泛应用于文本识别任务中\upcite{DBLP:journals/pami/ShiBY17}。CTC可以将输入序列的每一位置都对应到标注文本中,学习语音和文字之间的软对齐关系。比如,对于下面的音频序列,CTC可以将每个位置分别对应到同一个词。需要注意的是,CTC会额外新增一个词$\epsilon$,类似于一个空白词,表示这个位置没有声音或者没有任何对应的预测结果。然后,将相同且连续的词合并,去除$\epsilon$,就可以得到预测结果,如图\ref{fig:17-8} 所示。
\parinterval 语音翻译中多任务学习主要借助语音对应的标注信息,也就是源语言文本。{\small\bfnew{连接时序分类}}\index{连接时序分类}(Connectionist Temporal Classification,CTC)\index{Connectionist Temporal Classification}\upcite{DBLP:conf/icml/GravesFGS06}是语音处理中最简单有效的一种多任务学习方法\upcite{DBLP:journals/jstsp/WatanabeHKHH17,DBLP:conf/icassp/KimHW17},也被广泛应用于文本识别任务中\upcite{DBLP:journals/pami/ShiBY17}。CTC可以将输入序列的每一位置都对应到标注文本中,学习语音和文字之间的软对齐关系。比如,对于下面的音频序列,CTC可以将每个位置分别对应到同一个词。需要注意的是,CTC会额外新增一个词$\epsilon$,类似于一个空白词,表示这个位置没有声音或者没有任何对应的预测结果。在对齐完成之后,将相同且连续的词合并,去除$\epsilon$,就可以得到预测结果,如图\ref{fig:17-8} 所示。
%----------------------------------------------------------------------------------------------------
\begin{figure}[htp]
......@@ -226,12 +226,12 @@
\end{figure}
%----------------------------------------------------------------------------------------------------
\parinterval CTC的一些特性使其可以很好的完成输入输出之间的对齐,例如
\parinterval CTC的一些特性使其可以很好的完成输入输出之间的对齐,例如
%----------------------------------------------------------------------------------------------------
\begin{itemize}
\vspace{0.5em}
\item {\small\bfnew{输入和输出之间的对齐是单调的}}。也就是后面的输入只会预测与前面的序列相同或后面的输出内容。比如对于图\ref{fig:17-8}中的例子,如果输入的位置t已经预测了字符l,那么t之后的位置不会再预测前面的字符h和e
\item {\small\bfnew{输入和输出之间的对齐是单调的}}。也就是后面的输入只会预测与前面序列相同的后面的输出内容。比如对于图\ref{fig:17-8}中的例子,如果输入的位置t已经对齐了字符“l”,那么t之后的位置不会再对齐前面的字符“h”和“e”
\vspace{0.5em}
\item {\small\bfnew{输入和输出之间是多对一的关系}}。也就是多个输入会对应到同一个输出上。这对于语音序列来说是非常自然的一件事情,由于输入的每个位置只包含非常短的语音特征,因此多个输入才可以对应到一个输出字符。
\vspace{0.5em}
......@@ -249,7 +249,7 @@
\end{figure}
%----------------------------------------------------------------------------------------------------
\parinterval 另外一种多任务学习的思想是通过两个解码器,分别预测语音对应的源语言句子和目标语言句子,具体有图\ref{fig:17-10}展示的三种方式\upcite{DBLP:conf/naacl/AnastasopoulosC18,DBLP:conf/asru/BaharBN19}。图\ref{fig:17-10}(a)中采用单编码器-双解码器的方式,两个解码器根据编码器的表示,分别预测源语言句子和目标语言句子,从而使编码器训练地更加充分。这种做法的好处在于源语言文的本生任务成可以辅助翻译过程,相当于为源语言语音提供了额外的“模态”信息。图\ref{fig:17-10}(b)则通过使用两个级联的解码器,先利用第一个解码器生成源语言句子,然后再利用第一个解码器的表示,通过第二个解码器生成目标语言句子。这种方法通过增加一个中间输出,降低了模型的训练难度,但同时也会带来额外的解码耗时,因为两个解码器需要串行地进行生成。图\ref{fig:17-10}(c) 中模型更进一步,第二个编码器联合编码器和第一个解码器的表示进行生成,更充分地利用了已有信息。
\parinterval 另外一种多任务学习的思想是通过两个解码器,分别预测语音对应的源语言句子和目标语言句子,具体有图\ref{fig:17-10}展示的三种方式\upcite{DBLP:conf/naacl/AnastasopoulosC18,DBLP:conf/asru/BaharBN19}。图\ref{fig:17-10}(a)中采用单编码器-双解码器的方式,两个解码器根据编码器的表示,分别预测源语言句子和目标语言句子,从而使编码器训练地更加充分。这种做法的好处在于源语言的文本生成任务成可以辅助翻译过程,相当于为源语言语音提供了额外的“模态”信息。图\ref{fig:17-10}(b)则通过使用两个级联的解码器,先利用第一个解码器生成源语言句子,然后再利用第一个解码器的表示,通过第二个解码器生成目标语言句子。这种方法通过增加一个中间输出,降低了模型的训练难度,但同时也会带来额外的解码耗时,因为两个解码器需要串行地进行生成。图\ref{fig:17-10}(c) 中模型更进一步,第二个编码器联合编码器和第一个解码器的表示进行生成,更充分地利用了已有信息。
%----------------------------------------------------------------------------------------------------
\begin{figure}[htp]
\centering
......@@ -283,7 +283,7 @@
\section{图像翻译}
\parinterval 在人类所接受的信息中,视觉信息的比重往往不亚于语音和文本信息,甚至更多。视觉信息通常以图像的形式存在,近几年,结合图像的多模态机器翻译受到了广泛的关注。多模态机器翻译(图\ref{fig:17-11} (a))简单来说就是结合源语言和其他模态(例如图像等)的信息生成目标语言的过程。这种结合图像的机器翻译还是一种狭义上的“翻译”,它本质上还是从源语言到目标语言或者说从文本到文本的翻译。那么从图像到文本(图\ref{fig:17-11}(b))的转换,即给定图像生成与图像内容相关的描述,也可以被称为广义上的“翻译”。例如,{\small\bfnew{图片描述生成}}\index{图片描述生成}(Image Captioning)\index{Image Captioning}就是一种典型的图像到文本的翻译。当然,这种广义上的翻译形式不仅仅包括图像到文本的转换,还可以包括从图像到图像的转换(图\ref{fig:17-11}(c)),甚至是从文本到图像的转换(图\ref{fig:17-11}(d))等等。这里将这些与图像相关的翻译任务统称为图像翻译。
\parinterval 在人类所接受的信息中,视觉信息的比重往往不亚于语音和文本信息,甚至更多。视觉信息通常以图像的形式存在,近几年,结合图像的多模态机器翻译受到了广泛的关注。多模态机器翻译(图\ref{fig:17-11} (a))简单来说就是结合源语言和其他模态(例如图像等)的信息生成目标语言的过程。这种结合图像的机器翻译还是一种狭义上的“翻译”,它本质上还是从源语言到目标语言或者说从文本到文本的翻译。事实上从图像到文本(图\ref{fig:17-11}(b))的转换,即给定图像,生成与图像内容相关的描述,也可以被称为广义上的“翻译”。例如,{\small\bfnew{图片描述生成}}\index{图片描述生成}(Image Captioning)\index{Image Captioning}就是一种典型的图像到文本的翻译。当然,这种广义上的翻译形式不仅仅包括图像到文本的转换,还可以包括从图像到图像的转换(图\ref{fig:17-11}(c)),甚至是从文本到图像的转换(图\ref{fig:17-11}(d))等等。这里将这些与图像相关的翻译任务统称为图像翻译。
%----------------------------------------------------------------------------------------------------
\begin{figure}[htp]
......@@ -301,7 +301,7 @@
\subsection{基于图像增强的文本翻译}
\label{sec:image-augmented-translation}
\parinterval 在文本翻译中引入图像信息是最典型的多模态机器翻译任务。虽然多模态机器翻译还是一种从源语言文本到目标语言文本的转换,但是在转换的过程中,融入了其他模态的信息减少了歧义的产生。例如前文提到的通过与源语言相关的图像信息,将“A medium sized child jumps off of a dusty bank”中“bank”翻译为“河岸”而不是“银行”,因为图像中出现了河岸,因此“bank”的歧义大大降低。换句话说,对于同一图像或者视觉场景的描述,源语言和目标语言描述的信息是一致的,只不过,体现在不同语言上会有表达方法上的差异。那么,图像就会存在一些源语言和目标语言的隐含对齐“约束”,而这种“约束”可以捕捉语言中不易表达的隐含信息。
\parinterval 在文本翻译中引入图像信息是最典型的多模态机器翻译任务。虽然多模态机器翻译还是一种从源语言文本到目标语言文本的转换,但是在转换的过程中,融入了其他模态的信息减少了歧义的产生。例如前文提到的通过与源语言相关的图像信息,将“A girl jumps off a bank .”中“bank”翻译为“河岸”而不是“银行”,因为图像中出现了河岸,因此“bank”的歧义大大降低。换句话说,对于同一图像或者视觉场景的描述,源语言和目标语言描述的信息是一致的,只不过,体现在不同语言上会有表达方法上的差异。那么,图像就会存在一些源语言和目标语言的隐含对齐“约束”,而这种“约束”可以捕捉语言中不易表达的隐含信息。
\parinterval 如何融入视觉信息,更好的理解多模态上下文语义是多模态机器翻译研究的重点\upcite{DBLP:conf/wmt/SpeciaFSE16,DBLP:conf/wmt/CaglayanABGBBMH17,DBLP:conf/wmt/LibovickyHTBP16},主要方向包括基于特征融合的方法\upcite{DBLP:conf/emnlp/CalixtoL17,DBLP:journals/corr/abs-1712-03449,DBLP:conf/wmt/HelclLV18}、基于联合模型的方法\upcite{DBLP:conf/ijcnlp/ElliottK17,DBLP:conf/acl/YinMSZYZL20}。下面是具体介绍。
......@@ -311,7 +311,7 @@
\subsubsection{1. 基于特征融合的方法}
\parinterval 早期,通常将图像信息作为输入句子的一部分\upcite{DBLP:conf/emnlp/CalixtoL17,DBLP:conf/wmt/HuangLSOD16},或者用其对编码器、解码器的状态进行初始化\upcite{DBLP:conf/emnlp/CalixtoL17,Elliott2015MultilingualID,DBLP:conf/wmt/MadhyasthaWS17}。如图\ref{fig:17-12}所示,对图像特征的提取通常是基于卷积神经网络,有关卷积神经网络的内容,可以参考{\chaptereleven}内容。通过卷积神经网络得到全局视觉特征,在进行维度变换后,将其作为源语言输入的一部分或者初始化状态引入到模型当中。但是,这种图像信息的引入方式有以下两个缺点:
\parinterval 早期,通常将图像信息作为输入句子的一部分\upcite{DBLP:conf/emnlp/CalixtoL17,DBLP:conf/wmt/HuangLSOD16},或者用其对编码器、解码器的状态进行初始化\upcite{DBLP:conf/emnlp/CalixtoL17,Elliott2015MultilingualID,DBLP:conf/wmt/MadhyasthaWS17}。如图\ref{fig:17-12}所示,图中$y_{<}$表示当前时刻之前的单词序列,对图像特征的提取通常是基于卷积神经网络,有关卷积神经网络的内容,可以参考{\chaptereleven}内容。通过卷积神经网络得到全局视觉特征,在进行维度变换后,将其作为源语言输入的一部分或者初始化状态引入到模型当中。但是,这种图像信息的引入方式有以下两个缺点:
\begin{itemize}
\vspace{0.5em}
......@@ -333,7 +333,7 @@
\parinterval 说到噪音问题就不得不提到注意力机制的引入,前面章节中提到过这样的一个例子:
\vspace{0.8em}
\centerline{中午\ \ 吃饭\ \ \ \ \ \ \ 下午\ 篮球\ \ \ 现在\ \ 饿\ \ \ \underline{\quad \quad \quad}}
\centerline{中午/没/吃饭/,/又/刚/打/了/ 一/下午/篮球/,/我/现在/很/饿/ ,/我/想\underline{\quad \quad}}
\vspace{0.8em}
\parinterval 想在横线处填写“吃饭”,“吃东西”的原因是我们在读句子的过程中,关注到了“没/吃饭”,“很/饿”等关键息。这是在语言生成中注意力机制所解决的问题,即对于要生成的目标语言单词,相关性更高的语言片段应该更加“重要”,而不是将所有单词一视同仁。同样的,注意力机制也应用在多模态机器翻译中,即在生成目标单词时,更应该关注与目标单词相关的图像部分,而弱化对其他部分的关注。另外,注意力机制的引入,也使图像信息更加直接地参与目标语言的生成,解决了在不使用注意力机制的方法中图像信息传递损失的问题。
......@@ -347,7 +347,7 @@
\end{figure}
%----------------------------------------------------------------------------------------------------
\parinterval 那么,多模态机器翻译是如何计算上下文向量的呢?这里仿照第十章的内容给出描述。假设编码器输出的状态序列为$\{\mathbi{h}_1,...\mathbi{h}_m\}$,需要注意的是,这里的状态序列不是源语言句子的状态序列,而是通过基于卷积等操作提取到的图像的状态序列。假设图像的特征维度是$16 \times 16 \times 512$,其中前两个维度分别表示图像的高和宽,这里会将图像映射为$256 \times 512$ 的状态序列,其中$512$为每个状态的维度。对于目标语位置$j$,上下文向量$\mathbi{C}_{j}$被定义为对序列的编码器输出进行加权求和,如下:
\parinterval 那么,多模态机器翻译是如何计算上下文向量的呢?这里仿照第十章的内容给出描述。假设编码器输出的状态序列为$\{\mathbi{h}_1,...\mathbi{h}_m\}$,需要注意的是,这里的状态序列不是源语言句子的状态序列,而是通过基于卷积等操作提取到的图像的状态序列。假设图像的特征维度是$16 \times 16 \times 512$,其中前两个维度分别表示图像的高和宽,这里会将图像映射为$256 \times 512$ 的状态序列,其中$512$为每个状态的维度。对于目标语位置$j$,上下文向量$\mathbi{C}_{j}$被定义为对序列的编码器输出进行加权求和,如下:
\begin{eqnarray}
\mathbi{C}_{j}&=& \sum_{i}{{\alpha}_{i,j}{\mathbi{h}}_{i}}
\end{eqnarray}
......@@ -364,9 +364,9 @@
\parinterval 基于联合模型的方法通常是把翻译任务与其他视觉任务结合,进行联合训练。这种方法也可以被看做是一种多任务学习,只不过这里仅关注翻译和视觉任务。一种常见的方法是共享模型的部分参数来学习不同任务之间相似的部分,并通过特定的模块来学习每个任务特有的部分。
\parinterval 如图\ref{fig:17-14}所示,可以将多模态机器翻译任务分解为两个子任务:机器翻译和图片生成\upcite{DBLP:conf/ijcnlp/ElliottK17}。其中机器翻译作为主任务,图片生成作为子任务。这里的图片生成指的是从一个图片描述生成对应图片,对于图片生成任务在后面还会有描述。通过单个编码器对源语言数据进行建模,然后通过两个解码器(翻译解码器和图像解码器)来学习翻译任务和图像生成任务。顶层任务学习每个任务的独立特征,底层共享参数层能够学习到更丰富的文本表示。
\parinterval 如图\ref{fig:17-14}所示,图中$y_{<}$表示当前时刻之前的单词序列,可以将多模态机器翻译任务分解为两个子任务:机器翻译和图片生成\upcite{DBLP:conf/ijcnlp/ElliottK17}。其中机器翻译作为主任务,图片生成作为子任务。这里的图片生成指的是从一个图片描述生成对应图片,对于图片生成任务在后面还会有描述。通过单个编码器对源语言数据进行建模,然后通过两个解码器(翻译解码器和图像解码器)来学习翻译任务和图像生成任务。顶层任务学习每个任务的独立特征,底层共享参数层能够学习到更丰富的文本表示。
\parinterval 另外在视觉问答领域有研究表明,在多模态任务中,不宜引入过多层的注意力机制,因为过深的模型会导致多模态模型的过拟合\upcite{DBLP:conf/nips/LuYBP16}。这一方面是由于深模型本身对数据的拟合能力,另一方面也是由于多模态任务的数据普遍较小,容易造成复杂模型的过拟合。从另一角度来说,利用多任务学习的方式,提高模型的泛化能力,也是一种有效防止过拟合现象的方式。类似的思想,也大量使用在多模态自然语言处理中,例如图像描述生成、视觉问答等\upcite{DBLP:conf/iccv/AntolALMBZP15}
\parinterval 另外在视觉问答领域有研究表明,在多模态任务中,不宜引入过多层的注意力机制,因为过深的模型会导致多模态模型的过拟合\upcite{DBLP:conf/nips/LuYBP16}。这一方面是由于深模型本身对数据的拟合能力,另一方面也是由于多模态任务的数据普遍较小,容易造成复杂模型的过拟合。从另一角度来说,利用多任务学习的方式,提高模型的泛化能力,也是一种有效防止过拟合现象的方式。类似的思想,也大量使用在多模态自然语言处理任务中,例如图像描述生成、视觉问答等\upcite{DBLP:conf/iccv/AntolALMBZP15}
%----------------------------------------------------------------------------------------------------
\begin{figure}[htp]
......@@ -383,7 +383,7 @@
\subsection{图像到文本的翻译}
\parinterval 图像到文本的转换也可以看作是广义上的翻译,简单来说,就是把图像作为了源语言的唯一输入,而输出是文本。其中,图像描述生成是最典型的图像到文本的翻译任务\upcite{DBLP:conf/ijcai/BernardiCEEEIKM17}。虽然,这部分内容并不是本书的重点,不过为了保证多模态翻译内容的完整性,这里对相关技术进行简要介绍。图像描述有时也被称看图说话、图像字幕生成,它在图像检索、智能导盲、人机交互等领域有着广泛的应用场景。
\parinterval 图像到文本的转换也可以看作是广义上的翻译,简单来说,就是把图像作为唯一的输入,而输出是文本。其中,图像描述生成是最典型的图像到文本的翻译任务\upcite{DBLP:conf/ijcai/BernardiCEEEIKM17}。虽然,这部分内容并不是本书的重点,不过为了保证多模态翻译内容的完整性,这里对相关技术进行简要介绍。图像描述有时也被称看图说话、图像字幕生成,它在图像检索、智能导盲、人机交互等领域有着广泛的应用场景。
%----------------------------------------------------------------------------------------------------
\begin{figure}[htp]
......@@ -402,7 +402,7 @@
\subsubsection{1. 基础框架}
\parinterval 在编码器-解码器框架中,编码器将输入的图像转换为一种新的“表示”形式,这种“表示”包含了输入图像的所有信息。之后解码器把这种“表示”转换为自然语言描述。比如,可以通过卷积神经网络提取图像特征到一个向量表示。然后,利用长短时记忆网络(LSTM)解码生成文字描述,这个过程中与机器翻译的解码过程类似。这种建模方式存在与\ref{sec:image-augmented-translation}描述一样的问题:生成的描述单词不一定需要所有的图像信息,将全局的图像信息送入模型中,可能会引入噪音。这时也可以使用注意力机制对其进行缓解\upcite{DBLP:conf/icml/XuBKCCSZB15}
\parinterval 在编码器-解码器框架中,编码器将输入的图像转换为一种新的“表示”形式,这种“表示”包含了输入图像的所有信息。之后解码器把这种“表示”转换为自然语言描述。比如,可以通过卷积神经网络提取图像特征为一个向量表示。然后,利用长短时记忆网络(LSTM)解码生成文字描述,这个过程中与机器翻译的解码过程类似。这种建模方式存在与\ref{sec:image-augmented-translation}描述一样的问题:生成的描述单词不一定需要所有的图像信息,将全局的图像信息送入模型中,可能会引入噪音。这时可以使用注意力机制来缓解该问题\upcite{DBLP:conf/icml/XuBKCCSZB15}
%----------------------------------------------------------------------------------------
% NEW SUBSUB-SECTION
......@@ -412,7 +412,7 @@
\parinterval 要想使编码器-解码器框架在图像描述生成中充分发挥作用,编码器也要更好的表示图像信息。对于编码器的改进,通常体现在向编码器中添加图像的语义信息\upcite{DBLP:conf/cvpr/YouJWFL16,DBLP:conf/cvpr/ChenZXNSLC17,DBLP:journals/pami/FuJCSZ17}和位置信息\upcite{DBLP:conf/cvpr/ChenZXNSLC17,DBLP:conf/ijcai/LiuSWWY17}
\parinterval 图像的语义信息一般是指图像中存在的实体、属性、场景等等。如图\ref{fig:17-17}所示,从图像中利用属性或实体检测器提取出“girl”、“river”、“bank”等属性词和实体词,将他们作为图像的语义信息编码的一部分,再利用注意力机制计算目标语言单词与这些属性词或实体词之间的注意力权重\upcite{DBLP:conf/cvpr/YouJWFL16}。当然,除了图像中的实体和属性作为语义信息外,也可以将图片的场景信息加入到编码器当中\upcite{DBLP:journals/pami/FuJCSZ17}。有关如何做属性、实体和场景的检测,涉及到目标检测任务的工作,例如Faster-RCNN\upcite{DBLP:journals/pami/RenHG017}、YOLO\upcite{DBLP:journals/corr/abs-1804-02767,DBLP:journals/corr/abs-2004-10934}等等,这里不过多赘述。
\parinterval 图像的语义信息一般是指图像中存在的实体、属性、场景等等。如图\ref{fig:17-17}所示,从图像中利用属性或实体检测器提取出“girl”、“river”、“bank”等属性词和实体词,将他们作为图像的语义信息编码的一部分,再利用注意力机制计算目标语言单词与这些属性词或实体词之间的注意力权重\upcite{DBLP:conf/cvpr/YouJWFL16}。当然,除了图像中的实体和属性作为语义信息外,也可以将图片的场景信息加入到编码器当中\upcite{DBLP:journals/pami/FuJCSZ17}。有关如何做属性、实体和场景的检测,涉及到目标检测任务的工作,例如Faster-RCNN\upcite{DBLP:journals/pami/RenHG017}、YOLO\upcite{DBLP:journals/corr/abs-1804-02767,DBLP:journals/corr/abs-2004-10934}等等,这里不赘述。
%----------------------------------------------------------------------------------------------------
\begin{figure}[htp]
......@@ -453,7 +453,7 @@
\section{篇章级翻译}
\parinterval 目前大多数机器翻译系统是句子级的。由于缺少了对篇章上下文信息的建模,在需要依赖上下文的翻译场景中翻译效果总是不尽人意。篇章级翻译的目的就是对篇章上下文信息进行建模,进而改善机器翻译在整个篇章上的翻译质量。篇章级翻译的概念在很早就已经出现\upcite{DBLP:journals/ac/Bar-Hillel60},随着近几年神经机器翻译取得了巨大进展,篇章级神经机器翻译也成为了重要的方向\upcite{DBLP:journals/corr/abs-1912-08494,DBLP:journals/corr/abs-1901-09115}。基于此,本节将对篇章级神经机器翻译的若干问题展开讨论。
\parinterval 目前大多数机器翻译系统是句子级的。由于缺少了对篇章上下文信息的建模,在需要依赖上下文的翻译场景中,模型的翻译效果总是不尽人意。篇章级翻译的目的就是对篇章上下文信息进行建模,进而改善机器翻译在整个篇章上的翻译质量。篇章级翻译的概念在很早就已经出现\upcite{DBLP:journals/ac/Bar-Hillel60},随着近几年神经机器翻译取得了巨大进展,篇章级神经机器翻译也成为了重要的方向\upcite{DBLP:journals/corr/abs-1912-08494,DBLP:journals/corr/abs-1901-09115}。基于此,本节将对篇章级神经机器翻译的若干问题展开讨论。
%----------------------------------------------------------------------------------------
% NEW SUB-SECTION
......@@ -464,9 +464,9 @@
\parinterval “篇章”在这里是指一系列连续的段落或句子所构成的整体,其中各个句子间从形式和内容上都具有一定的连贯性和一致性\upcite{jurafsky2000speech}。这些联系主要体现在{\small\sffamily\bfseries{衔接}}\index{衔接}(Cohesion \index{Cohesion})以及连贯两个方面。其中衔接体现在显性的语言成分和结构上,包括篇章中句子间的语法和词汇的联系,而连贯体现在各个句子之间的逻辑和语义的联系上。因此,篇章级翻译就是要将这些上下文之间的联系考虑在内,从而生成比句子级翻译更连贯和准确的翻译结果。实例\ref{eg:17-1}就展示了一个使用篇章信息进行机器翻译的实例。
\begin{example}
上下文句子:我\ 上周\ 针对\ 这个\ 问题\ 做出\ 解释\ \ 咨询\ \ 他的\ 意见\
上下文句子:我/上周/针对/这个/问题/做出/解释/并/咨询/了/他的/意见/
\hspace{2em} 待翻译句子:他\ \ 同意\ 我的\ 看法\
\hspace{2em} 待翻译句子:他/也/同意/我的/看法/
\hspace{2em} 句子级翻译结果:He also agrees with me .
......@@ -477,9 +477,9 @@
\parinterval 不过由于不同语言的特性多种多样,上下文信息在篇章级翻译中的作用也不尽相同。比如,在德语中名词是分词性的,因此在代词翻译的过程中需要根据其先行词的词性进行区分,而这种现象在其它不区分名词词性的语言中是不存在的。这意味着篇章级翻译在不同的语种中可能对应不同的上下文现象。
\parinterval 正是由于这种上下文现象的多样性,使评价篇章级翻译模型的性能变得相对困难。目前篇章级机器翻译主要针对一些常见的上下文现象,比如代词翻译、省略、连接和词汇衔接等,而{\chapterfour}介绍的BLEU等通用自动评价指标通常对这些上下文依赖现象不敏感,篇章级翻译需要采用一些专用方法来对这些具体现象进行评价。
\parinterval 正是由于这种上下文现象的多样性,使评价篇章级翻译模型的性能变得相对困难。目前篇章级机器翻译主要针对一些常见的上下文现象进行优化,比如代词翻译、省略、连接和词汇衔接等,而{\chapterfour}介绍的BLEU等通用自动评价指标通常对这些上下文依赖现象不敏感,因此篇章级翻译需要采用一些专用方法来对这些具体现象进行评价。
\parinterval 从对篇章信息建模的角度看,在统计机器翻译时代就已经有大量的研究工作。这些工作大多针对某一具体的上下文现象,比如,篇章结构\upcite{DBLP:conf/anlp/MarcuCW00,foster2010translating,DBLP:conf/eacl/LouisW14}、代词回指\upcite{DBLP:conf/iwslt/HardmeierF10,DBLP:conf/wmt/NagardK10,DBLP:conf/eamt/LuongP16,}、词汇衔接\upcite{tiedemann2010context,DBLP:conf/emnlp/GongZZ11,DBLP:conf/ijcai/XiongBZLL13,xiao2011document}和篇章连接词\upcite{DBLP:conf/sigdial/MeyerPZC11,DBLP:conf/hytra/MeyerP12,}等。区别于篇章级统计机器翻译,篇章级神经机器翻译不需要针对某一具体的上下文现象构造相应的特征,而是通过翻译模型本身从上下文句子中抽取和融合的上下文信息。通常情况下,篇章级机器翻译可以采用局部建模的手段将前一句或者周围几句作为上下文送入模型。针对需要长距离上下文的情况,也可以使用全局建模的手段直接从篇章中所有句子中提取上下文信息。近几年多数研究工作都在探索更有效的局部建模或全局建模方法,主要包括改进输入\upcite{DBLP:conf/discomt/TiedemannS17,DBLP:conf/naacl/BawdenSBH18,DBLP:conf/wmt/GonzalesMS17,DBLP:journals/corr/abs-1910-07481}、多编码器结构\upcite{DBLP:journals/corr/JeanLFC17,DBLP:journals/corr/abs-1805-10163,DBLP:conf/emnlp/ZhangLSZXZL18}、层次结构\upcite{DBLP:conf/naacl/MarufMH19,DBLP:conf/acl/HaffariM18,DBLP:conf/emnlp/YangZMGFZ19,DBLP:conf/ijcai/ZhengYHCB20}以及基于缓存的方法\upcite{DBLP:conf/coling/KuangXLZ18,DBLP:journals/tacl/TuLSZ18}等。
\parinterval 在统计机器翻译时代就已经有大量的研究工作专注于篇章信息的建模,这些工作大多针对某一具体的上下文现象,比如,篇章结构\upcite{DBLP:conf/anlp/MarcuCW00,foster2010translating,DBLP:conf/eacl/LouisW14}、代词回指\upcite{DBLP:conf/iwslt/HardmeierF10,DBLP:conf/wmt/NagardK10,DBLP:conf/eamt/LuongP16,}、词汇衔接\upcite{tiedemann2010context,DBLP:conf/emnlp/GongZZ11,DBLP:conf/ijcai/XiongBZLL13,xiao2011document}和篇章连接词\upcite{DBLP:conf/sigdial/MeyerPZC11,DBLP:conf/hytra/MeyerP12,}等。区别于篇章级统计机器翻译,篇章级神经机器翻译不需要针对某一具体的上下文现象构造相应的特征,而是通过翻译模型本身从上下文句子中抽取和融合的上下文信息。通常情况下,篇章级机器翻译可以采用局部建模的手段将前一句或者周围几句作为上下文送入模型。针对需要长距离上下文的情况,也可以使用全局建模的手段直接从篇章的所有句子中提取上下文信息。近几年多数研究工作都在探索更有效的局部建模或全局建模方法,主要包括改进输入\upcite{DBLP:conf/discomt/TiedemannS17,DBLP:conf/naacl/BawdenSBH18,DBLP:conf/wmt/GonzalesMS17,DBLP:journals/corr/abs-1910-07481}、多编码器结构\upcite{DBLP:journals/corr/JeanLFC17,DBLP:journals/corr/abs-1805-10163,DBLP:conf/emnlp/ZhangLSZXZL18}、层次结构\upcite{DBLP:conf/naacl/MarufMH19,DBLP:conf/acl/HaffariM18,DBLP:conf/emnlp/YangZMGFZ19,DBLP:conf/ijcai/ZhengYHCB20}以及基于缓存的方法\upcite{DBLP:conf/coling/KuangXLZ18,DBLP:journals/tacl/TuLSZ18}等。
\parinterval 此外,篇章级机器翻译面临的另外一个挑战是数据稀缺。篇章级机器翻译所需要的双语数据需要保留篇章边界,数量相比于句子级双语数据要少很多。除了在之前提到的端到端方法中采用预训练或者参数共享的手段(见{\chaptersixteen}),也可以采用新的建模手段来缓解数据稀缺问题。这类方法通常将篇章级翻译流程进行分离:先训练一个句子级的翻译模型,再通过一些额外的模块来引入上下文信息。比如,在句子级翻译模型的推断过程中,通过在目标端结合篇章级语言模型引入上下文信息\upcite{DBLP:conf/discomt/GarciaCE19,DBLP:journals/tacl/YuSSLKBD20,DBLP:journals/corr/abs-2010-12827},或者基于句子级的翻译结果,使用两阶段解码等手段引入上下文信息,进而对句子级翻译结果进行修正\upcite{DBLP:conf/aaai/XiongH0W19,DBLP:conf/acl/VoitaST19,DBLP:conf/emnlp/VoitaST19}
......@@ -491,7 +491,7 @@
\parinterval BLEU等自动评价指标能够在一定程度上反映译文的整体质量,但是并不能有效地评估篇章级翻译模型的性能。这是由于很多标准测试集中需要篇章上下文的情况比例相对较少。而且,$n$-gram的匹配很难检测到一些具体的语言现象,这使得研究人员很难通过BLEU得分来判断篇章级翻译模型的效果。
\parinterval 为此,研究人员总结了机器翻译任务中存在的上下文现象,并基于此设计了相应的自动评价指标。比如针对篇章中代词的翻译问题,首先借助词对齐工具确定源语言中的代词在译文和参考答案中的对应位置,然后通过计算译文中代词的准确率和召回率等指标对代词翻译质量进行评价\upcite{DBLP:conf/iwslt/HardmeierF10,DBLP:conf/discomt/WerlenP17}。针对篇章中的词汇衔接,使用{\small\sffamily\bfseries{词汇链}}\index{词汇链}(Lexical Chain\index{Lexical Chain}\footnote{词汇链指篇章中语义相关的词所构成的序列。}等来获取能够反映词汇衔接质量的分数,然后通过加权的方式与常规的BLEU或METEOR等指标结合在一起\upcite{DBLP:conf/emnlp/WongK12,DBLP:conf/discomt/GongZZ15}。针对篇章中的连接词,使用候选词典和词对齐工具对源中连接词的正确翻译结果进行计数,计算其准确率\upcite{DBLP:conf/cicling/HajlaouiP13}
\parinterval 为此,研究人员总结了机器翻译任务中存在的上下文现象,并基于此设计了相应的自动评价指标。比如针对篇章中代词的翻译问题,首先借助词对齐工具确定源语言中的代词在译文和参考答案中的对应位置,然后通过计算译文中代词的准确率和召回率等指标对代词翻译质量进行评价\upcite{DBLP:conf/iwslt/HardmeierF10,DBLP:conf/discomt/WerlenP17}。针对篇章中的词汇衔接,使用{\small\sffamily\bfseries{词汇链}}\index{词汇链}(Lexical Chain\index{Lexical Chain}\footnote{词汇链指篇章中语义相关的词所构成的序列。}等来获取能够反映词汇衔接质量的分数,然后通过加权的方式与常规的BLEU或METEOR等指标结合在一起\upcite{DBLP:conf/emnlp/WongK12,DBLP:conf/discomt/GongZZ15}。针对篇章中的连接词,使用候选词典和词对齐工具对源中连接词的正确翻译结果进行计数,计算其准确率\upcite{DBLP:conf/cicling/HajlaouiP13}
\parinterval 除了直接对译文打分,也有一些工作针对特有的上下文现象手工构造了相应的测试套件用于评价翻译质量。测试套件中每一个测试样例都包含一个正确翻译的结果,以及多个错误结果,一个理想的翻译模型应该对正确的翻译结果评价最高,排名在所有错误结果之上,此时就可以根据模型是否能挑选出正确翻译结果来评估其性能。这种方法可以很好地衡量翻译模型在某一特定上下文现象上的处理能力,比如词义消歧\upcite{DBLP:conf/wmt/RiosMS18}、代词翻译\upcite{DBLP:conf/naacl/BawdenSBH18,DBLP:conf/wmt/MullerRVS18}和一些衔接问题\upcite{DBLP:conf/acl/VoitaST19}等。但是该方法也存在使用范围受限于测试集的语种和规模的缺点,因此扩展性较差。
......@@ -523,21 +523,21 @@
\begin{example}
传统模型训练输入:
\hspace{10em}源语言:你\ 看到\ \ \
\hspace{10em}源语言:你/看到/了/吗/
\hspace{10em}目标语言:Do you see them ?
\vspace{0.5em}
\qquad\ 改进后模型训练输入:
\hspace{10em}源语言:{\red{他们\ \ \ \ <sep>\ }}\ \ 看到\ \ \
\hspace{10em}源语言:{\red{他们/在/哪/?\ <sep>\ }}\ 你/看到/了/吗/
\hspace{10em}目标语言:Do you see them ?
\label{eg:17-3-1}
\end{example}
\parinterval 其他改进输入的做法相比于拼接的方法要复杂一些,首先需要对篇章进行处理,得到词汇链\footnote{词汇链指篇章中语义相关的词所构成的序列。}或者篇章嵌入等信息\upcite{DBLP:conf/wmt/GonzalesMS17,DBLP:journals/corr/abs-1910-07481},然后将融入这些信息,与当前句子一起送入模型中。目前,这种预先提取篇章信息的方法是否适合机器翻译还有待论证。
\parinterval 其他改进输入的做法相比于拼接的方法要复杂一些,首先需要对篇章进行处理,得到词汇链\footnote{词汇链指篇章中语义相关的词所构成的序列。}或者篇章嵌入等信息\upcite{DBLP:conf/wmt/GonzalesMS17,DBLP:journals/corr/abs-1910-07481},然后将这些信息与当前句子一起送入模型中。目前,这种预先提取篇章信息的方法是否适合机器翻译还有待论证。
%----------------------------------------------------------------------------------------
% NEW SUBSUB-SECTION
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......
......@@ -13,32 +13,32 @@
{
\begin{scope}
\node[anchor=west,draw,very thick,minimum size=25pt] (s1) at (0,0) {{88}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s2) at ([xshift=0.2cm]s1.east) {{87}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s3) at ([xshift=0.2cm]s2.east) {{45}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s4) at ([xshift=0.2cm]s3.east) {{47}};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s5) at ([xshift=0.2cm]s4.east) {{100}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s6) at ([xshift=0.2cm]s5.east) {{15}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s2) at ([xshift=0.7cm]s1.east) {{87}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s3) at ([xshift=0.7cm]s2.east) {{45}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s4) at ([xshift=0.7cm]s3.east) {{47}};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s5) at ([xshift=0.7cm]s4.east) {{100}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s6) at ([xshift=0.7cm]s5.east) {{15}};
\end{scope}
\begin{scope}[yshift=-1cm]
\begin{scope}[yshift=-1.2cm]
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s1) at (0,0) {{5}};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s2) at ([xshift=0.2cm]s1.east) {{230}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s3) at ([xshift=0.2cm]s2.east) {{7}};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s4) at ([xshift=0.2cm]s3.east) {{234}};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s5) at ([xshift=0.2cm]s4.east) {{500}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s6) at ([xshift=0.2cm]s5.east) {{39}};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s7) at ([xshift=0.2cm]s6.east) {{100}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s8) at ([xshift=0.2cm]s7.east) {{15}};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s2) at ([xshift=0.7cm]s1.east) {{230}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s3) at ([xshift=0.7cm]s2.east) {{7}};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s4) at ([xshift=0.7cm]s3.east) {{234}};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s5) at ([xshift=0.7cm]s4.east) {{500}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s6) at ([xshift=0.7cm]s5.east) {{39}};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s7) at ([xshift=0.7cm]s6.east) {{100}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s8) at ([xshift=0.7cm]s7.east) {{15}};
\end{scope}
\begin{scope}[yshift=-2cm]
\begin{scope}[yshift=-2.4cm]
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s1) at (0,0) {{975}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s2) at ([xshift=0.2cm]s1.east) {{7}};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s3) at ([xshift=0.2cm]s2.east) {{234}};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s4) at ([xshift=0.2cm]s3.east) {{294}};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s5) at ([xshift=0.2cm]s4.east) {{69}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s6) at ([xshift=0.2cm]s5.east) {{15}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s2) at ([xshift=0.7cm]s1.east) {{7}};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s3) at ([xshift=0.7cm]s2.east) {{234}};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s4) at ([xshift=0.7cm]s3.east) {{294}};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s5) at ([xshift=0.7cm]s4.east) {{69}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s6) at ([xshift=0.7cm]s5.east) {{15}};
\end{scope}
......
......@@ -9,34 +9,34 @@
\begin{tikzpicture}
\begin{scope}
\node[anchor=west,draw,very thick,minimum size=25pt] (s1) at (0,0) {};
\node[anchor=west,draw,very thick,minimum size=25pt] (s2) at ([xshift=0.2cm]s1.east) {};
\node[anchor=west,draw,very thick,minimum size=25pt] (s3) at ([xshift=0.2cm]s2.east) {};
\node[anchor=west,draw,very thick,minimum size=25pt] (s4) at ([xshift=0.2cm]s3.east) {};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s5) at ([xshift=0.2cm]s4.east) {\small{数据}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s6) at ([xshift=0.2cm]s5.east) {};
\node[anchor=west] (s7) at ([xshift=0.2cm]s6.east) {...};
\node[anchor=west,draw,very thick,minimum size=25pt] (s2) at ([xshift=0.55cm]s1.east) {};
\node[anchor=west,draw,very thick,minimum size=25pt] (s3) at ([xshift=0.55cm]s2.east) {};
\node[anchor=west,draw,very thick,minimum size=25pt] (s4) at ([xshift=0.55cm]s3.east) {};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s5) at ([xshift=0.55cm]s4.east) {\small{数据}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s6) at ([xshift=0.55cm]s5.east) {};
\node[anchor=west] (s7) at ([xshift=0.55cm]s6.east) {...};
\end{scope}
\begin{scope}[yshift=-1cm]
\begin{scope}[yshift=-1.2cm]
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s1) at (0,0) {现在};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s2) at ([xshift=0.2cm]s1.east) {已经};
\node[anchor=west,draw,very thick,minimum size=25pt] (s3) at ([xshift=0.2cm]s2.east) {};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s4) at ([xshift=0.2cm]s3.east) {不少};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s5) at ([xshift=0.2cm]s4.east) {\small{}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s6) at ([xshift=0.2cm]s5.east) {};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s7) at ([xshift=0.2cm]s6.east) {数据};
\node[anchor=west,draw,very thick,minimum size=25pt] (s8) at ([xshift=0.2cm]s7.east) {};
\node[anchor=west] (s9) at ([xshift=0.2cm]s8.east) {...};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s2) at ([xshift=0.55cm]s1.east) {已经};
\node[anchor=west,draw,very thick,minimum size=25pt] (s3) at ([xshift=0.55cm]s2.east) {};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s4) at ([xshift=0.55cm]s3.east) {不少};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s5) at ([xshift=0.55cm]s4.east) {\small{}};
\node[anchor=west,draw,very thick,minimum size=25pt] (s6) at ([xshift=0.55cm]s5.east) {};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s7) at ([xshift=0.55cm]s6.east) {数据};
\node[anchor=west,draw,very thick,minimum size=25pt] (s8) at ([xshift=0.55cm]s7.east) {};
\node[anchor=west] (s9) at ([xshift=0.55cm]s8.east) {...};
\end{scope}
\begin{scope}[yshift=-2cm]
\begin{scope}[yshift=-2.4cm]
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s1) at (0,0) {确实};
\node[anchor=west,draw,very thick,minimum size=25pt] (s2) at ([xshift=0.2cm]s1.east) {};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s3) at ([xshift=0.2cm]s2.east) {};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s4) at ([xshift=0.2cm]s3.east) {};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s5) at ([xshift=0.2cm]s4.east) {疑问};
\node[anchor=west,draw,very thick,minimum size=25pt] (s6) at ([xshift=0.2cm]s5.east) {};
\node[anchor=west] (s7) at ([xshift=0.2cm]s6.east) {...};
\node[anchor=west,draw,very thick,minimum size=25pt] (s2) at ([xshift=0.55cm]s1.east) {};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s3) at ([xshift=0.55cm]s2.east) {};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s4) at ([xshift=0.55cm]s3.east) {};
\node[anchor=west,draw,very thick,minimum size=25pt, inner sep=0] (s5) at ([xshift=0.55cm]s4.east) {疑问};
\node[anchor=west,draw,very thick,minimum size=25pt] (s6) at ([xshift=0.55cm]s5.east) {};
\node[anchor=west] (s7) at ([xshift=0.55cm]s6.east) {...};
\end{scope}
\end{tikzpicture}
......
\definecolor{ublue}{rgb}{0.152,0.250,0.545}
\begin{tikzpicture}
\begin{axis}[
width=10cm, height=4.5cm,
width=12cm, height=4.5cm,
symbolic x coords={未抽取词,do,want,what,am,people,look},
xtick=data,
ytick={0,0.05,0.1,0.15,0.2,0.25},
xlabel={低概率词汇},
ylabel={词汇概率},
legend pos=outer north east,
xlabel style={align=right,xshift=5.3cm,yshift=0.8cm,font=\footnotesize},
xlabel style={align=right,xshift=6.5cm,yshift=0.8cm,font=\footnotesize},
ylabel style={rotate=-90,yshift=2cm,xshift=1cm,font=\footnotesize},
y tick style={opacity=0},
x tick style={opacity=0},
......@@ -24,7 +24,7 @@
ymin=0,
ymax=0.25]
\addplot [ybar,bar shift=-2mm,bar width=4mm,fill=blue!40,draw=blue!40,area legend] coordinates{(未抽取词,0) (do,0.05) (want,0.05) (what,0.05) (am,0.1) (people,0.15) (look,0.2)};
\addplot [ybar,bar shift=2mm,bar width=4mm,fill=red!40,draw=red!40,area legend] coordinates{(未抽取词,0.03) (do,0.062) (want,0.062) (what,0.062) (am,0.09) (people,0.122) (look,0.138)};
\addplot [ybar,bar shift=2.15mm,bar width=4mm,fill=red!40,draw=red!40,area legend] coordinates{(未抽取词,0.03) (do,0.062) (want,0.062) (what,0.062) (am,0.09) (people,0.122) (look,0.138)};
\legend{未平滑,平滑后}
\end{axis}
\end{tikzpicture}
......
\definecolor{ublue}{rgb}{0.152,0.250,0.545}
\begin{tikzpicture}
\begin{axis}[
width=12cm,
width=14cm,
height=4cm,
xlabel={{$\funp{P}(x)$}},
ylabel={{$\funp{I}(x)$}},
......
......@@ -10,55 +10,55 @@
\begin{scope}
{\footnotesize
\foreach \i in {1,...,5}{
\node [draw,thick,minimum size=10pt] at (\i,0) {1};
\node [draw,thick,minimum size=10pt] at (1.2*\i,0) {1};
}
}
\node [anchor=west] at (33em,0) {$\funp{P}(\text{1}) = 5/30$};
\node [anchor=west] at (43em,0) {$\funp{P}(\text{1}) = 5/30$};
\end{scope}
\begin{scope}[yshift=-2.5em]
{\footnotesize
\foreach \i in {1,...,4}{
\node [draw,thick,minimum size=10pt] at (\i,0) {{\color{red} 2}};
\node [draw,thick,minimum size=10pt] at (1.2*\i,0) {{\color{red} 2}};
}
}
\node [anchor=west] at (33em,0) {$\funp{P}(\text{2}) = 4/30$};
\node [anchor=west] at (43em,0) {$\funp{P}(\text{2}) = 4/30$};
\end{scope}
\begin{scope}[yshift=-5.0em]
{\footnotesize
\foreach \i in {1,...,6}{
\node [draw,thick,minimum size=10pt] at (\i,0) {{\color{ublue} 3}};
\node [draw,thick,minimum size=10pt] at (1.2*\i,0) {{\color{ublue} 3}};
}
}
\node [anchor=west] at (33em,0) {$\funp{P}(\text{3}) = 6/30$};
\node [anchor=west] at (43em,0) {$\funp{P}(\text{3}) = 6/30$};
\end{scope}
\begin{scope}[yshift=-7.5em]
{\footnotesize
\foreach \i in {1,...,12}{
\node [draw,thick,minimum size=10pt] at (\i,0) {{\color{ugreen} 4}};
\node [draw,thick,minimum size=10pt] at (1.2*\i,0) {{\color{ugreen} 4}};
}
}
\node [anchor=west] at (33em,0) {$\funp{P}(\text{4}) = 12/30$};
\node [anchor=west] at (43em,0) {$\funp{P}(\text{4}) = 12/30$};
\end{scope}
\begin{scope}[yshift=-10.0em]
{\footnotesize
\foreach \i in {1,...,2}{
\node [draw,thick,minimum size=10pt] at (\i,0) {{\color{purple} 5}};
\node [draw,thick,minimum size=10pt] at (1.2*\i,0) {{\color{purple} 5}};
}
}
\node [anchor=west] at (33em,0) {$\funp{P}(\text{5}) = 2/30$};
\node [anchor=west] at (43em,0) {$\funp{P}(\text{5}) = 2/30$};
\end{scope}
\begin{scope}[yshift=-12.5em]
{\footnotesize
\foreach \i in {1,...,1}{
\node [draw,thick,minimum size=10pt] at (\i,0) {{\color{orange} 6}};
\node [draw,thick,minimum size=10pt] at (1.2*\i,0) {{\color{orange} 6}};
}
}
\node [anchor=west] at (33em,0) {$\funp{P}(\text{6}) = 1/30$};
\node [anchor=west] at (43em,0) {$\funp{P}(\text{6}) = 1/30$};
\end{scope}
\end{tikzpicture}
......
......@@ -13,41 +13,41 @@
{
\begin{scope}
\node[anchor=west,draw,very thick,minimum size=20pt] (s1) at (0,0) {\Large{\textbf{2}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s2) at ([xshift=0.2cm]s1.east) {\Large{\textbf{3}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s3) at ([xshift=0.2cm]s2.east) {\Large{{\color{red}{1}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s4) at ([xshift=0.2cm]s3.east) {\Large{\textbf{4}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s5) at ([xshift=0.2cm]s4.east) {\Large{\textbf{4}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s6) at ([xshift=0.2cm]s5.east) {\Large{{\color{red}{1}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s7) at ([xshift=0.2cm]s6.east) {\Large{\textbf{5}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s8) at ([xshift=0.2cm]s7.east) {\Large{{\color{red}{1}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s9) at ([xshift=0.2cm]s8.east) {\Large{\textbf{4}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s10) at ([xshift=0.2cm]s9.east) {\Large{\textbf{4}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s2) at ([xshift=0.5cm]s1.east) {\Large{\textbf{3}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s3) at ([xshift=0.5cm]s2.east) {\Large{{\color{red}{1}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s4) at ([xshift=0.5cm]s3.east) {\Large{\textbf{4}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s5) at ([xshift=0.5cm]s4.east) {\Large{\textbf{4}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s6) at ([xshift=0.5cm]s5.east) {\Large{{\color{red}{1}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s7) at ([xshift=0.5cm]s6.east) {\Large{\textbf{5}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s8) at ([xshift=0.5cm]s7.east) {\Large{{\color{red}{1}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s9) at ([xshift=0.5cm]s8.east) {\Large{\textbf{4}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s10) at ([xshift=0.5cm]s9.east) {\Large{\textbf{4}}};
\end{scope}
\begin{scope}[yshift=-1cm]
\node[anchor=west,draw,very thick,minimum size=20pt] (s1) at (0,0) {\Large{\textbf{5}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s2) at ([xshift=0.2cm]s1.east) {\Large{\textbf{6}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s3) at ([xshift=0.2cm]s2.east) {\Large{\textbf{4}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s4) at ([xshift=0.2cm]s3.east) {\Large{\textbf{4}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s5) at ([xshift=0.2cm]s4.east) {\Large{\textbf{3}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s6) at ([xshift=0.2cm]s5.east) {\Large{\textbf{2}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s7) at ([xshift=0.2cm]s6.east) {\Large{{\color{red}{1}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s8) at ([xshift=0.2cm]s7.east) {\Large{\textbf{4}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s9) at ([xshift=0.2cm]s8.east) {\Large{\textbf{5}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s10) at ([xshift=0.2cm]s9.east) {\Large{{\color{red}{1}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s2) at ([xshift=0.5cm]s1.east) {\Large{\textbf{6}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s3) at ([xshift=0.5cm]s2.east) {\Large{\textbf{4}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s4) at ([xshift=0.5cm]s3.east) {\Large{\textbf{4}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s5) at ([xshift=0.5cm]s4.east) {\Large{\textbf{3}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s6) at ([xshift=0.5cm]s5.east) {\Large{\textbf{2}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s7) at ([xshift=0.5cm]s6.east) {\Large{{\color{red}{1}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s8) at ([xshift=0.5cm]s7.east) {\Large{\textbf{4}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s9) at ([xshift=0.5cm]s8.east) {\Large{\textbf{5}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s10) at ([xshift=0.5cm]s9.east) {\Large{{\color{red}{1}}}};
\end{scope}
\begin{scope}[yshift=-2cm]
\node[anchor=west,draw,very thick,minimum size=20pt] (s1) at (0,0) {\Large{\textbf{4}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s2) at ([xshift=0.2cm]s1.east) {\Large{\textbf{2}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s3) at ([xshift=0.2cm]s2.east) {\Large{\textbf{2}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s4) at ([xshift=0.2cm]s3.east) {\Large{\textbf{3}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s5) at ([xshift=0.2cm]s4.east) {\Large{\textbf{4}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s6) at ([xshift=0.2cm]s5.east) {\Large{{\color{red}{1}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s7) at ([xshift=0.2cm]s6.east) {\Large{\textbf{5}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s8) at ([xshift=0.2cm]s7.east) {\Large{{\color{red}{1}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s9) at ([xshift=0.2cm]s8.east) {\Large{\textbf{3}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s10) at ([xshift=0.2cm]s9.east) {\Large{\textbf{4}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s2) at ([xshift=0.5cm]s1.east) {\Large{\textbf{2}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s3) at ([xshift=0.5cm]s2.east) {\Large{\textbf{2}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s4) at ([xshift=0.5cm]s3.east) {\Large{\textbf{3}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s5) at ([xshift=0.5cm]s4.east) {\Large{\textbf{4}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s6) at ([xshift=0.5cm]s5.east) {\Large{{\color{red}{1}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s7) at ([xshift=0.5cm]s6.east) {\Large{\textbf{5}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s8) at ([xshift=0.5cm]s7.east) {\Large{{\color{red}{1}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s9) at ([xshift=0.5cm]s8.east) {\Large{\textbf{3}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s10) at ([xshift=0.5cm]s9.east) {\Large{\textbf{4}}};
\end{scope}
}
\end{tikzpicture}
......
......@@ -12,41 +12,41 @@
\begin{scope}
\node[anchor=west,draw,very thick,minimum size=20pt] (s1) at (0,0) {\Large{\textbf{{\color{ublue} 3}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s2) at ([xshift=0.2cm]s1.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s3) at ([xshift=0.2cm]s2.east) {\Large{\textbf{{\color{red} 2}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s4) at ([xshift=0.2cm]s3.east) {\Large{\textbf{{\color{ublue} 3}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s5) at ([xshift=0.2cm]s4.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s6) at ([xshift=0.2cm]s5.east) {\Large{\textbf{{\color{purple} 5}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s7) at ([xshift=0.2cm]s6.east) {\Large{\textbf{1}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s8) at ([xshift=0.2cm]s7.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s9) at ([xshift=0.2cm]s8.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s10) at ([xshift=0.2cm]s9.east) {\Large{\textbf{{\color{ublue} 3}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s2) at ([xshift=0.5cm]s1.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s3) at ([xshift=0.5cm]s2.east) {\Large{\textbf{{\color{red} 2}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s4) at ([xshift=0.5cm]s3.east) {\Large{\textbf{{\color{ublue} 3}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s5) at ([xshift=0.5cm]s4.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s6) at ([xshift=0.5cm]s5.east) {\Large{\textbf{{\color{purple} 5}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s7) at ([xshift=0.5cm]s6.east) {\Large{\textbf{1}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s8) at ([xshift=0.5cm]s7.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s9) at ([xshift=0.5cm]s8.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s10) at ([xshift=0.5cm]s9.east) {\Large{\textbf{{\color{ublue} 3}}}};
\end{scope}
\begin{scope}[yshift=-1cm]
\node[anchor=west,draw,very thick,minimum size=20pt] (s1) at (0,0) {\Large{\textbf{{\color{red} 2}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s2) at ([xshift=0.2cm]s1.east) {\Large{\textbf{1}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s3) at ([xshift=0.2cm]s2.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s4) at ([xshift=0.2cm]s3.east) {\Large{\textbf{{\color{purple} 5}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s5) at ([xshift=0.2cm]s4.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s6) at ([xshift=0.2cm]s5.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s7) at ([xshift=0.2cm]s6.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s8) at ([xshift=0.2cm]s7.east) {\Large{\textbf{{\color{ublue} 3}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s9) at ([xshift=0.2cm]s8.east) {\Large{\textbf{1}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s10) at ([xshift=0.2cm]s9.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s2) at ([xshift=0.5cm]s1.east) {\Large{\textbf{1}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s3) at ([xshift=0.5cm]s2.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s4) at ([xshift=0.5cm]s3.east) {\Large{\textbf{{\color{purple} 5}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s5) at ([xshift=0.5cm]s4.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s6) at ([xshift=0.5cm]s5.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s7) at ([xshift=0.5cm]s6.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s8) at ([xshift=0.5cm]s7.east) {\Large{\textbf{{\color{ublue} 3}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s9) at ([xshift=0.5cm]s8.east) {\Large{\textbf{1}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s10) at ([xshift=0.5cm]s9.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\end{scope}
\begin{scope}[yshift=-2cm]
\node[anchor=west,draw,very thick,minimum size=20pt] (s1) at (0,0) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s2) at ([xshift=0.2cm]s1.east) {\Large{\textbf{{\color{ublue} 3}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s3) at ([xshift=0.2cm]s2.east) {\Large{\textbf{{\color{red} 2}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s4) at ([xshift=0.2cm]s3.east) {\Large{\textbf{{\color{orange} 6}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s5) at ([xshift=0.2cm]s4.east) {\Large{\textbf{1}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s6) at ([xshift=0.2cm]s5.east) {\Large{\textbf{{\color{red} 2}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s7) at ([xshift=0.2cm]s6.east) {\Large{\textbf{{\color{ublue} 3}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s8) at ([xshift=0.2cm]s7.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s9) at ([xshift=0.2cm]s8.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s10) at ([xshift=0.2cm]s9.east) {\Large{\textbf{1}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s2) at ([xshift=0.5cm]s1.east) {\Large{\textbf{{\color{ublue} 3}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s3) at ([xshift=0.5cm]s2.east) {\Large{\textbf{{\color{red} 2}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s4) at ([xshift=0.5cm]s3.east) {\Large{\textbf{{\color{orange} 6}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s5) at ([xshift=0.5cm]s4.east) {\Large{\textbf{1}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s6) at ([xshift=0.5cm]s5.east) {\Large{\textbf{{\color{red} 2}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s7) at ([xshift=0.5cm]s6.east) {\Large{\textbf{{\color{ublue} 3}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s8) at ([xshift=0.5cm]s7.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s9) at ([xshift=0.5cm]s8.east) {\Large{\textbf{{\color{ugreen} 4}}}};
\node[anchor=west,draw,very thick,minimum size=20pt] (s10) at ([xshift=0.5cm]s9.east) {\Large{\textbf{1}}};
\end{scope}
\end{tikzpicture}
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......
......@@ -2,11 +2,11 @@
\tikzstyle{hide} = [draw,line width=1pt,inner sep=2pt,fill=green!30,minimum size=2em]
\node[hide] (y1) at (0,0){$y_1$};
\node[anchor=west,hide](y2)at([xshift=2em]y1.east){$y_2$};
\node[anchor=west,hide](y3)at([xshift=2em]y2.east){$y_3$};
\node[anchor=west,line width=1pt,inner sep=2pt,minimum size=2em](dots)at([xshift=2em]y3.east){$\cdots$};
\node[anchor=west,hide](yn-1)at([xshift=2em]dots.east){$y_{m-1}$};
\node[anchor=west,hide](yn)at([xshift=2em]yn-1.east){$y_m$};
\node[anchor=west,hide](y2)at([xshift=2.4em]y1.east){$y_2$};
\node[anchor=west,hide](y3)at([xshift=2.4em]y2.east){$y_3$};
\node[anchor=west,line width=1pt,inner sep=2pt,minimum size=2em](dots)at([xshift=2.4em]y3.east){$\cdots$};
\node[anchor=west,hide](yn-1)at([xshift=2.4em]dots.east){$y_{m-1}$};
\node[anchor=west,hide](yn)at([xshift=2.4em]yn-1.east){$y_m$};
\node[anchor=north,draw,line width=1pt,inner sep=2pt,fill=red!30,minimum height=2em,minimum width=12em](see)at ([yshift=-3em,xshift=2em]y3.south){${x}=(x_1,x_2,\ldots,x_{m-1},x_m)$};
\node[anchor=south,font=\footnotesize] at ([yshift=1em,xshift=2em]y3.north){待预测的隐含状态序列};
......
......@@ -9,7 +9,7 @@
\begin{tikzpicture}
\begin{scope}
\begin{scope}[xshift=-0.3in]
{
{\small
\node [anchor=north west] (entry1) at (0,0) {{1:}};
......@@ -24,14 +24,14 @@
\begin{pgfonlayer}{background}
{
\node[rectangle,draw=ublue, inner sep=0.2em] [fit = (entry1) (entry2) (entry3) (entry4) (entry5) (entry6) (dictionarylabel)] {};
\node[rectangle,draw=ublue, inner sep=0.2em,thick] [fit = (entry1) (entry2) (entry3) (entry4) (entry5) (entry6) (dictionarylabel)] {};
}
\end{pgfonlayer}
\end{scope}
{
\begin{scope}[xshift=1.2in,yshift=1em]
\begin{scope}[xshift=1.5in,yshift=1em]
\node [anchor=west] (c1) at (0,0) {};
\node [anchor=west] (c2) at ([xshift=0em]c1.east) {};
\node [anchor=west] (c3) at ([xshift=0em]c2.east) {};
......
......@@ -53,10 +53,10 @@
\node [anchor=south] (treebanklabel) at (t1n1.north) {{\color{ublue} 数据:树库}};
\begin{pgfonlayer}{background}
\node[rectangle,draw=ublue, inner sep=0.2em] [fit = (treebanklabel) (t1n1) (t2w1) (t2wn)] (treebank) {};
\node[rectangle,draw=ublue, inner sep=0.2em,thick] [fit = (treebanklabel) (t1n1) (t2w1) (t2wn)] (treebank) {};
\end{pgfonlayer}
\node [anchor=north west] (math1) at ([xshift=2em]treebank.north east) {$\funp{P}$(VP $\to$ VV NN)};
\node [anchor=north west] (math1) at ([xshift=5em]treebank.north east) {$\funp{P}$(VP $\to$ VV NN)};
\node [anchor=north west] (math1part2) at ([xshift=-1em,yshift=0.2em]math1.south west) {$=\frac{\textrm{VP和VV NN同时出现的次数=1}}{\textrm{VP出现的次数}=4}$};
\node [anchor=north west] (math1part3) at ([yshift=0.2em]math1part2.south west){$=\frac{1}{4}$};
......
\begin{tikzpicture}
\tikzstyle{hide} = [draw,inner sep=2pt,line width=1pt,align=center,drop shadow,fill=green!20,font=\footnotesize,minimum height=1.8em,minimum width=1.8em]
\tikzstyle{see} = [draw,inner sep=2pt,line width=1pt,align=center,drop shadow,fill=red!30,font=\footnotesize,minimum height=1.2em,minimum width=1.2em,circle]
\tikzstyle{hide} = [draw,inner sep=2pt,thick,align=center,drop shadow,fill=green!30,font=\footnotesize,minimum height=1.8em,minimum width=1.8em]
\tikzstyle{see} = [draw,inner sep=2pt,thick,align=center,drop shadow,fill=red!35,font=\footnotesize,minimum height=1.2em,minimum width=1.2em,circle]
\node[hide] (h1) at (0,0){C};
\node[hide,anchor=west] (h2) at ([xshift=2em]h1.east){B};
\node[hide,anchor=west] (h3) at ([xshift=2em]h2.east){A};
\node[hide,anchor=west] (h4) at ([xshift=2em]h3.east){B};
\node[hide,anchor=west] (h5) at ([xshift=2em]h4.east){C};
\node[hide,anchor=west] (h6) at ([xshift=2em]h5.east){A};
\node[hide,anchor=west] (h2) at ([xshift=2.4em]h1.east){B};
\node[hide,anchor=west] (h3) at ([xshift=2.4em]h2.east){A};
\node[hide,anchor=west] (h4) at ([xshift=2.4em]h3.east){B};
\node[hide,anchor=west] (h5) at ([xshift=2.4em]h4.east){C};
\node[hide,anchor=west] (h6) at ([xshift=2.4em]h5.east){A};
\node[see,anchor=north] (s1) at ([yshift=-1.6em]h1.south){};
\node[see,anchor=north] (s2) at ([yshift=-1.6em]h2.south){};
......@@ -37,9 +37,9 @@
\node[anchor=west,font=\scriptsize] at ([xshift=0.2em]one_h.east){一个隐含状态};
\node[anchor=north,see] (one_s) at ([yshift=-1.4em]one_h.south){};
\node[anchor=west,font=\scriptsize] at ([xshift=0.2em]one_s.east){一个可见状态};
\draw[->,line width=1.4pt] ([xshift=8em]one_h.east) -- ([xshift=9em]one_h.east);
\node[anchor=west,align=left,font=\scriptsize] at ([xshift=9.2em]one_h.east){从一个隐含状态到下一个隐含状态的\\转换,该过程隐含着转移概率};
\draw[->,line width=1.4pt,blue!60] ([yshift=-2em,xshift=8.5em]one_h.east) --([yshift=-3em,xshift=8.5em]one_h.east) ;
\node[anchor=west,align=left,font=\scriptsize] at ([yshift=-2.5em,xshift=9.2em]one_h.east){从一个隐含状态到可见状态的输出,\\该过程隐含着发射概率};
\draw[->,line width=1.4pt] ([xshift=8.4em]one_h.east) -- ([xshift=9.4em]one_h.east);
\node[anchor=west,align=left,font=\scriptsize] at ([xshift=9.6em]one_h.east){从一个隐含状态到下一个隐含状态的\\转换,该过程隐含着转移概率};
\draw[->,line width=1.4pt,blue!60] ([yshift=-2em,xshift=8.9em]one_h.east) --([yshift=-3em,xshift=8.9em]one_h.east) ;
\node[anchor=west,align=left,font=\scriptsize] at ([yshift=-2.5em,xshift=9.6em]one_h.east){从一个隐含状态到可见状态的输出,\\该过程隐含着发射概率};
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\tikzstyle{unit} = [draw,circle,line width=0.8pt,align=center,fill=green!30,minimum size=1em]
\tikzstyle{unit} = [draw,circle,thick,align=center,fill=green!30,minimum size=1em]
\node[minimum width=3em,minimum height=1.8em] (o) at (0,0){};
\node[anchor=north,inner sep=1pt,font=\footnotesize] (state_A) at ([xshift=-0em,yshift=-1em]o.south){隐含状态$A$};
......@@ -7,10 +7,10 @@
\node[anchor=north,inner sep=1pt,font=\footnotesize] (state_C) at ([yshift=-1.6em]state_B.south){隐含状态$C$};
\node[anchor=north,inner sep=1pt,font=\footnotesize] (state_D) at ([yshift=-1.6em]state_C.south){隐含状态$D$};
\node[anchor=west,inner sep=1pt,font=\footnotesize] (c1) at ([yshift=0.2em,xshift=2em]o.east){$T$};
\node[anchor=west,inner sep=1pt,font=\footnotesize] (c2) at ([xshift=5em]c1.east){$F$};
\node[anchor=west,inner sep=1pt,font=\footnotesize] (c3) at ([xshift=5em]c2.east){$F$};
\node[anchor=west,inner sep=1pt,font=\footnotesize] (c4) at ([xshift=5em]c3.east){$T$};
\node[anchor=west,inner sep=1pt,font=\footnotesize] (c1) at ([yshift=0.2em,xshift=3em]o.east){$T$};
\node[anchor=west,inner sep=1pt,font=\footnotesize] (c2) at ([xshift=6em]c1.east){$F$};
\node[anchor=west,inner sep=1pt,font=\footnotesize] (c3) at ([xshift=6em]c2.east){$F$};
\node[anchor=west,inner sep=1pt,font=\footnotesize] (c4) at ([xshift=6em]c3.east){$T$};
\node[anchor=south,font=\scriptsize] (cl1) at (c1.north) {时刻1};
\node[anchor=south,font=\scriptsize] (cl2) at (c2.north) {时刻2};
\node[anchor=south,font=\scriptsize] (cl3) at (c3.north) {时刻3};
......
......@@ -24,14 +24,14 @@
\begin{pgfonlayer}{background}
{
\node[rectangle,draw=ublue, inner sep=0.2em] [fit = (entry1) (entry2) (entry3) (entry4) (entry5) (entry6) (dictionarylabel)] {};
\node[rectangle,draw=ublue, inner sep=0.2em,thick] [fit = (entry1) (entry2) (entry3) (entry4) (entry5) (entry6) (dictionarylabel)] {};
}
\end{pgfonlayer}
\end{scope}
{
\begin{scope}[xshift=1.2in,yshift=1em]
\begin{scope}[xshift=1.5in,yshift=1em]
\node [anchor=west] (c1) at (0,0) {};
\node [anchor=west] (c2) at ([xshift=0em]c1.east) {};
\node [anchor=west] (c3) at ([xshift=0em]c2.east) {};
......@@ -43,7 +43,7 @@
\end{scope}
}
\begin{scope}[xshift=1.2in,yshift=-4em]
\begin{scope}[xshift=1.5in,yshift=-4em]
{
\node [anchor=west] (bc1) at (0,0) {};
\node [anchor=west] (bc2) at ([xshift=0em]bc1.east) {};
......
......@@ -19,19 +19,19 @@
\end{pgfonlayer}
}
\node [anchor=west,ugreen] (P) at ([xshift=5.2em,yshift=-0.8em]corpus.east){\large{$\funp{P}(\cdot)$}};
\node [anchor=west,ugreen] (P) at ([xshift=6.5em,yshift=-0.8em]corpus.east){\large{$\funp{P}(\cdot)$}};
\node [anchor=south] (modellabel) at (P.north) {{\color{ublue} {\scriptsize \textbf{统计模型}}}};
\begin{pgfonlayer}{background}
\node[rectangle,draw=ublue,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (P) (modellabel)] (model) {};
\end{pgfonlayer}
\draw [->,very thick,ublue] ([xshift=0.2em]corpus.east) -- ([xshift=4.2em]corpus.east) node [pos=0.5, above] {\color{black}{\scriptsize{统计学习}}};
\draw [->,very thick,ublue] ([xshift=0.2em]corpus.east) -- ([xshift=5.6em]corpus.east) node [pos=0.5, above] {\color{black}{\scriptsize{统计学习}}};
\draw [->,very thick,ublue] ([xshift=0.2em]model.east) -- ([xshift=4.2em]model.east) node [pos=0.5, above] {\color{black}{\scriptsize{预测}}};
\draw [->,very thick,ublue] ([xshift=0.2em]model.east) -- ([xshift=5.4em]model.east) node [pos=0.5, above] {\color{black}{\scriptsize{预测}}};
{\scriptsize
\node [anchor=north west] (sentlabel) at ([xshift=6.2em,yshift=-1em]model.north east) {\color{black}{自动分词系统}};
\node [anchor=north west] (sentlabel) at ([xshift=7.8em,yshift=-1em]model.north east) {\color{black}{自动分词系统}};
\node [anchor=north west] (sent) at (sentlabel.south west) {\textbf{对任意句子进行分词}};
}
......@@ -43,19 +43,19 @@
{\footnotesize
{
\node [anchor=west] (label1) at (0,-6.8em) {\textbf{自动分词系统}:对任意的数据句子$S$,找到最佳的分词结果$W^{*}$输出};
\node [anchor=west] (label1) at (0.7em,-7em) {\textbf{自动分词系统}:对任意的数据句子$S$,找到最佳的分词结果$W^{*}$输出};
}
{
\node [anchor=north west] (label2) at (label1.south west) {假设输入$S$=“确实现在数据很多”};
}
{
\node [anchor=north west,draw,thick,inner sep=2pt] (data11) at (label2.south west) {枚举所有可能的切分};
\node [anchor=north west,draw,thick,inner sep=4pt] (data11) at (label2.south west) {枚举所有可能的切分};
}
{
\node [anchor=west,draw,thick,inner sep=2pt] (data12) at ([xshift=4em]data11.east) {计算每种切分的概率};
\node [anchor=west,draw,thick,inner sep=4pt] (data12) at ([xshift=4em]data11.east) {计算每种切分的概率};
}
{
\node [anchor=west,draw,thick,inner sep=2pt] (data13) at ([xshift=4.0em]data12.east) {选择最佳结果};
\node [anchor=west,draw,thick,inner sep=4pt] (data13) at ([xshift=4.0em]data12.east) {选择最佳结果};
}
{
\draw [->,thick] ([xshift=0.1em]data11.east) -- ([xshift=-0.1em]data12.west);
......@@ -100,7 +100,7 @@
\begin{pgfonlayer}{background}
{
\node[rectangle,fill=blue!10,thick,dotted,inner sep=0.1em] [fit = (label1) (data11) (data13) (data51) (data52) (data53)] (segcontent) {};
\node[rectangle,fill=blue!10,thick,dotted,inner sep=0.3em] [fit = (label1) (data11) (data13) (data51) (data52) (data53)] (segcontent) {};
}
\end{pgfonlayer}
......
\begin{tikzpicture}
\tikzstyle{unit} = [draw,inner sep=2pt,line width=0.8pt,align=center,drop shadow,fill=red!30,font=\footnotesize,minimum height=1.2em,minimum width=1.8em]
\tikzstyle{unit} = [draw,inner sep=2pt,thick,align=center,drop shadow,fill=red!35,font=\footnotesize,minimum height=1.2em,minimum width=1.8em]
\tikzstyle{lab} = [inner sep=0pt,align=center,rotate=-90,font=\scriptsize]
\node[unit] (n11) at (0,0){北京};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.5em]n11.east){\Large{/}};
\node[unit,anchor=west] (n12) at ([xshift=1.2em]n11.east){};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.5em]n12.east){\Large{/}};
\node[unit,anchor=west] (n13) at ([xshift=1.2em]n12.east){中华};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.5em]n13.east){\Large{/}};
\node[unit,anchor=west] (n14) at ([xshift=1.2em]n13.east){人民};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.5em]n14.east){\Large{/}};
\node[unit,anchor=west] (n15) at ([xshift=1.2em]n14.east){共和};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.5em]n15.east){\Large{/}};
\node[unit,anchor=west] (n16) at ([xshift=1.2em]n15.east){};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.5em]n16.east){\Large{/}};
\node[unit,anchor=west] (n17) at ([xshift=1.2em]n16.east){};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.5em]n17.east){\Large{/}};
\node[unit,anchor=west] (n18) at ([xshift=1.2em]n17.east){首都};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.6em]n11.east){\Large{/}};
\node[unit,anchor=west] (n12) at ([xshift=1.6em]n11.east){};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.6em]n12.east){\Large{/}};
\node[unit,anchor=west] (n13) at ([xshift=1.6em]n12.east){中华};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.6em]n13.east){\Large{/}};
\node[unit,anchor=west] (n14) at ([xshift=1.6em]n13.east){人民};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.6em]n14.east){\Large{/}};
\node[unit,anchor=west] (n15) at ([xshift=1.6em]n14.east){共和};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.6em]n15.east){\Large{/}};
\node[unit,anchor=west] (n16) at ([xshift=1.6em]n15.east){};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.6em]n16.east){\Large{/}};
\node[unit,anchor=west] (n17) at ([xshift=1.6em]n16.east){};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.6em]n17.east){\Large{/}};
\node[unit,anchor=west] (n18) at ([xshift=1.6em]n17.east){首都};
\node[lab,anchor=north] at ([yshift=-1.4em,xshift=0.2em]n11.south){B-CIT};
\node[lab,anchor=north] at ([yshift=-0.8em,xshift=0.2em]n12.south){O};
......
\begin{tikzpicture}
\tikzstyle{unit} = [draw,inner sep=2pt,line width=0.8pt,align=center,drop shadow,fill=red!30,font=\footnotesize,minimum height=1.2em,minimum width=1.8em]
\tikzstyle{unit} = [draw,inner sep=2pt,thick,align=center,drop shadow,fill=red!35,font=\footnotesize,minimum height=1.2em,minimum width=1.8em]
\tikzstyle{lab} = [inner sep=0pt,align=center,rotate=-90,font=\scriptsize]
\node[unit] (n11) at (0,0){北京};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.5em]n11.east){\Large{/}};
\node[unit,anchor=west] (n12) at ([xshift=1.2em]n11.east){};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.5em]n12.east){\Large{/}};
\node[unit,anchor=west] (n13) at ([xshift=1.2em]n12.east){中华};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.5em]n13.east){\Large{/}};
\node[unit,anchor=west] (n14) at ([xshift=1.2em]n13.east){人民};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.5em]n14.east){\Large{/}};
\node[unit,anchor=west] (n15) at ([xshift=1.2em]n14.east){共和};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.5em]n15.east){\Large{/}};
\node[unit,anchor=west] (n16) at ([xshift=1.2em]n15.east){};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.5em]n16.east){\Large{/}};
\node[unit,anchor=west] (n17) at ([xshift=1.2em]n16.east){};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.5em]n17.east){\Large{/}};
\node[unit,anchor=west] (n18) at ([xshift=1.2em]n17.east){首都};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.6em]n11.east){\Large{/}};
\node[unit,anchor=west] (n12) at ([xshift=1.6em]n11.east){};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.6em]n12.east){\Large{/}};
\node[unit,anchor=west] (n13) at ([xshift=1.6em]n12.east){中华};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.6em]n13.east){\Large{/}};
\node[unit,anchor=west] (n14) at ([xshift=1.6em]n13.east){人民};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.6em]n14.east){\Large{/}};
\node[unit,anchor=west] (n15) at ([xshift=1.6em]n14.east){共和};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.6em]n15.east){\Large{/}};
\node[unit,anchor=west] (n16) at ([xshift=1.6em]n15.east){};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.6em]n16.east){\Large{/}};
\node[unit,anchor=west] (n17) at ([xshift=1.6em]n16.east){};
\node[anchor=west,inner sep=0pt,font=\footnotesize] at ([xshift=0.6em]n17.east){\Large{/}};
\node[unit,anchor=west] (n18) at ([xshift=1.6em]n17.east){首都};
\node[lab,anchor=north] at ([yshift=-1.4em,xshift=0.2em]n11.south){S-CIT};
\node[lab,anchor=north] at ([yshift=-0.8em,xshift=0.2em]n12.south){O};
......
......@@ -64,7 +64,7 @@
{
\node [anchor=west] (mtinputlabel) at ([xshift=0.35in]inputlabel.east) {{\scriptsize \color{red}{\textbf{实际的输入}}}};
\node [anchor=west] (mtoutputlabel) at ([xshift=1.0in]mtinputlabel.east) {{\scriptsize \color{red}{\textbf{实际的输出}}}};
\node[rectangle,draw=ublue, inner sep=2pt] [fit = (mtinputlabel) (mtoutputlabel) (inputmarking) (outputmarking)] {};
\node[rectangle,draw=ublue, inner sep=2pt, thick] [fit = (mtinputlabel) (mtoutputlabel) (inputmarking) (outputmarking)] {};
}
\end{scope}
......
\begin{tikzpicture}
\tikzstyle{class} = [draw,inner sep=2pt,line width=1pt,align=center,drop shadow,fill=green!20,font=\footnotesize,minimum height=1.6em,minimum width=3.4em,rotate=-90]
\tikzstyle{word} = [draw,inner sep=2pt,line width=1pt,align=center,drop shadow,fill=red!30,font=\footnotesize,minimum height=1.4em,minimum width=1.6em]
\tikzstyle{class} = [draw,inner sep=2pt,thick,align=center,drop shadow,fill=green!30,font=\footnotesize,minimum height=1.6em,minimum width=3.4em,rotate=-90]
\tikzstyle{word} = [draw,inner sep=2pt,thick,align=center,drop shadow,fill=red!35,font=\footnotesize,minimum height=1.4em,minimum width=1.6em]
\coordinate (o) at (0,0);
\node[anchor=west,class] (c1) at ([xshift=0em]o.east){B-CIT};
......
......@@ -7,18 +7,18 @@
%-------------------------------------------------------------------------
\begin{tikzpicture}
\begin{scope}[sibling distance=7pt,level distance=22pt]
\begin{scope}[sibling distance=7pt,level distance=28pt]
\Tree[.\node[inner sep=1pt,fill=red!20](sn0){IP\scriptsize{ 句子}};
[.\node[inner sep=1pt,fill=red!20](sn1){NP\scriptsize{}};
[.\node[inner sep=1pt,fill=blue!20](sn2){NN\tiny{ 名词}}; \node(sw1){}; ]
\Tree[.\node[inner sep=1pt,fill=red!30](sn0){IP\scriptsize{ 句子}};
[.\node[inner sep=1pt,fill=red!30](sn1){NP\scriptsize{}};
[.\node[inner sep=1pt,fill=blue!30](sn2){NN\tiny{ 名词}}; \node(sw1){}; ]
]
[.\node[inner sep=1pt,fill=red!20](sn3){VP\scriptsize{}};
[.\node[inner sep=1pt,fill=blue!20](sn4){VV\tiny{ 动词}}; \node(sw2){喜欢}; ]
[.\node[inner sep=1pt,fill=green!20](sn5){VP\scriptsize{}};
[.\node[inner sep=1pt,fill=green!20](sn6){VV\tiny{ 动词}}; \node(sw1){}; ]
[.\node[inner sep=1pt,fill=green!20](sn7){NN\tiny{ 名词}}; \node(sw1){}; ]
[.\node[inner sep=1pt,fill=red!30](sn3){VP\scriptsize{}};
[.\node[inner sep=1pt,fill=blue!30](sn4){VV\tiny{ 动词}}; \node(sw2){喜欢}; ]
[.\node[inner sep=1pt,fill=green!30](sn5){VP\scriptsize{}};
[.\node[inner sep=1pt,fill=green!30](sn6){VV\tiny{ 动词}}; \node(sw1){}; ]
[.\node[inner sep=1pt,fill=green!30](sn7){NN\tiny{ 名词}}; \node(sw1){}; ]
]
]
]
......@@ -27,7 +27,7 @@
\end{scope}
\begin{scope}[xshift=1.7in,yshift=-0.4in]
\begin{scope}[xshift=2in,yshift=-0.4in]
\node [,inner sep=2pt] (w1) at (0,0) {};
\node [anchor=west,inner sep=2pt] (w2) at ([xshift=0.8em,yshift=3em]w1.east) {喜欢};
......
......@@ -11,7 +11,7 @@
\node [] (sent) at (0,0) {\textbf{猫 喜欢 吃 鱼}};
\end{scope}
\begin{scope}[xshift=-8em,yshift=-9em,sibling distance=-5pt,level distance=17pt,grow'=up]
\begin{scope}[xshift=-10em,yshift=-9em,sibling distance=-5pt,level distance=17pt,grow'=up]
{\footnotesize
\Tree[.IP
[.VP
......@@ -64,17 +64,17 @@
}
\end{scope}
\draw [->,thick,ublue] ([xshift=-2em]sent.south) ..controls + (south:2em) and +(north:2em).. ([xshift=-8em,yshift=-2em]sent.south);
\draw [->,thick,ublue] ([xshift=-1em]sent.south) ..controls + (south:2em) and +(north:2em).. ([xshift=-2em,yshift=-2em]sent.south);
\draw [->,thick,ublue] ([xshift=0em]sent.south) ..controls + (south:2em) and +(north:2em).. ([xshift=6.5em,yshift=-2em]sent.south);
\draw [->,thick,ublue,dotted] ([xshift=1em]sent.south) ..controls + (south:1.5em) and +(north:2.5em).. ([xshift=12.5em,yshift=-2em]sent.south);
\draw [->,thick,ublue] ([xshift=-2em]sent.south) ..controls + (south:2em) and +(north:2em).. ([xshift=-10em,yshift=-2em]sent.south);
\draw [->,thick,ublue] ([xshift=-1em]sent.south) ..controls + (south:2em) and +(north:2em).. ([xshift=-2.3em,yshift=-2em]sent.south);
\draw [->,thick,ublue] ([xshift=0em]sent.south) ..controls + (south:2em) and +(north:2em).. ([xshift=6.3em,yshift=-2em]sent.south);
\draw [->,thick,ublue,dotted] ([xshift=1em]sent.south) ..controls + (south:1.5em) and +(north:2.5em).. ([xshift=13.5em,yshift=-2em]sent.south);
\node [anchor=north west] (others) at (11.8em,-3em) {...};
\node [anchor=north west] (others) at (12.8em,-3em) {...};
\node [] (d1) at (-9em,-10em) {$d_1$};
\node [] (d2) at (0em,-10em) {$d_2$};
\node [] (d3) at (8.5em,-10em) {$d_2$};
\node [] (d1) at (-11.9em,-10em) {$d_1$};
\node [] (d2) at (-2.9em,-10em) {$d_2$};
\node [] (d3) at (6.2em,-10em) {$d_2$};
\node [anchor=east] (d1p) at ([xshift=0.4em]d1.west) {$\funp{P}($};
\node [anchor=west] (d1p2) at ([xshift=-0.4em]d1.east) {$)=0.0123$};
......
\begin{tikzpicture}
\tikzstyle{unit} = [draw,minimum size=1em,circle]
\tikzstyle{unit} = [draw,minimum size=1em,circle,thick]
\node[unit,fill=ugreen!20] (g1) at (0,0){};
\node[anchor=west,unit,fill=ugreen!20] (g2)at([xshift=1.8em]g1.east){};
\node[anchor=west,unit,fill=ugreen!20] (g3)at([xshift=1.8em]g2.east){};
\node[anchor=west,unit,fill=ugreen!20] (g4)at([xshift=1.8em]g3.east){};
\node[unit,fill=green!20] (g1) at (0,0){};
\node[anchor=west,unit,fill=green!30] (g2)at([xshift=1.8em]g1.east){};
\node[anchor=west,unit,fill=green!30] (g3)at([xshift=1.8em]g2.east){};
\node[anchor=west,unit,fill=green!30] (g4)at([xshift=1.8em]g3.east){};
\node[anchor=north,unit,fill=red!30] (r1)at([yshift=-4em]g1.south){};
\node[anchor=north,unit,fill=red!30] (r2)at([yshift=-4em]g2.south){};
......
\begin{tikzpicture}
\tikzstyle{unit} = [draw,minimum size=1em,circle]
\tikzstyle{unit} = [draw,minimum size=1em,circle,thick]
\node[unit,fill=ugreen!20] (g1) at (0,0){};
\node[anchor=west,unit,fill=ugreen!20] (g2)at([xshift=1.8em]g1.east){};
\node[anchor=west,unit,fill=ugreen!20] (g3)at([xshift=1.8em]g2.east){};
\node[anchor=west,unit,fill=ugreen!20] (g4)at([xshift=1.8em]g3.east){};
\node[unit,fill=green!30] (g1) at (0,0){};
\node[anchor=west,unit,fill=green!30] (g2)at([xshift=1.8em]g1.east){};
\node[anchor=west,unit,fill=green!30] (g3)at([xshift=1.8em]g2.east){};
\node[anchor=west,unit,fill=green!30] (g4)at([xshift=1.8em]g3.east){};
\node[anchor=north,unit,fill=red!30] (r1)at([yshift=-1.8em,xshift=1.4em]g2.south){};
......
\begin{tikzpicture}
\tikzstyle{unit} = [draw,minimum size=1em,circle]
\tikzstyle{unit} = [draw,minimum size=1em,circle,thick]
\node[unit,fill=ugreen!20] (g1) at (0,0){};
\node[anchor=west,unit,fill=ugreen!20] (g2)at([xshift=1.8em]g1.east){};
\node[anchor=west,unit,fill=ugreen!20] (g3)at([xshift=1.8em]g2.east){};
\node[anchor=west,unit,fill=ugreen!20] (g4)at([xshift=1.8em]g3.east){};
\node[unit,fill=green!30] (g1) at (0,0){};
\node[anchor=west,unit,fill=green!30] (g2)at([xshift=1.8em]g1.east){};
\node[anchor=west,unit,fill=green!30] (g3)at([xshift=1.8em]g2.east){};
\node[anchor=west,unit,fill=green!30] (g4)at([xshift=1.8em]g3.east){};
\node[anchor=north,unit,fill=red!30] (r1)at([yshift=-1.8em]g1.south){};
\node[anchor=north,unit,fill=red!30] (r2)at([yshift=-1.8em]g2.south){};
......
......@@ -8,7 +8,7 @@
\begin{tikzpicture}
\small
\node [anchor=west,inner sep=2pt] (r1) at (0,0) {$r_1$: NN $\to$};
\node [anchor=west,inner sep=2pt] (r2) at ([xshift=6em]r1.east) {$r_2$: VV $\to$ 喜欢};
\node [anchor=west,inner sep=2pt] (r2) at ([xshift=9em]r1.east) {$r_2$: VV $\to$ 喜欢};
\node [anchor=north west,inner sep=2pt] (r3) at ([yshift=-0.2em]r1.south west) {$r_3$: VV $\to$};
\node [anchor=north west,inner sep=2pt] (r4) at ([yshift=-0.2em]r2.south west) {$r_4$: NN $\to$};
......@@ -19,24 +19,24 @@
\node [anchor=north west,inner sep=2pt] (r8) at ([yshift=-0.2em]r6.south west) {$r_8$: IP $\to$ NP VP};
\node [anchor=west,inner sep=2pt,fill=blue!20] (r1) at (0,0) {$r_1$: NN $\to$};
\node [anchor=west,inner sep=2pt,fill=blue!20] (r2) at ([xshift=6em]r1.east) {$r_2$: VV $\to$ 喜欢};
\node [anchor=north west,inner sep=2pt,fill=blue!20] (r3) at ([yshift=-0.2em]r1.south west) {$r_3$: VV $\to$};
\node [anchor=north west,inner sep=2pt,fill=blue!20] (r4) at ([yshift=-0.2em]r2.south west) {$r_4$: NN $\to$};
\node [anchor=west,inner sep=2pt,fill=blue!30] (r1) at (0,0) {$r_1$: NN $\to$};
\node [anchor=west,inner sep=2pt,fill=blue!30] (r2) at ([xshift=6em]r1.east) {$r_2$: VV $\to$ 喜欢};
\node [anchor=north west,inner sep=2pt,fill=blue!30] (r3) at ([yshift=-0.2em]r1.south west) {$r_3$: VV $\to$};
\node [anchor=north west,inner sep=2pt,fill=blue!30] (r4) at ([yshift=-0.2em]r2.south west) {$r_4$: NN $\to$};
\node [anchor=north west,inner sep=2pt,fill=red!20] (r5) at ([yshift=-0.2em]r3.south west) {$r_5$: NP $\to$ NN};
\node [anchor=north west,inner sep=2pt,fill=red!30] (r5) at ([yshift=-0.2em]r3.south west) {$r_5$: NP $\to$ NN};
\node [anchor=north west,inner sep=2pt,fill=green!20] (r6) at ([yshift=-0.2em]r4.south west) {$r_6$: VP $\to$ VV NN};
\node [anchor=north west,inner sep=2pt,fill=green!20] (r7) at ([yshift=-0.2em]r5.south west) {$r_7$: VP $\to$ VV VP};
\node [anchor=north west,inner sep=2pt,fill=green!20] (r8) at ([yshift=-0.2em]r6.south west) {$r_8$: IP $\to$ NP VP};
\node [anchor=north west,inner sep=2pt,fill=green!30] (r6) at ([yshift=-0.2em]r4.south west) {$r_6$: VP $\to$ VV NN};
\node [anchor=north west,inner sep=2pt,fill=green!30] (r7) at ([yshift=-0.2em]r5.south west) {$r_7$: VP $\to$ VV VP};
\node [anchor=north west,inner sep=2pt,fill=green!30] (r8) at ([yshift=-0.2em]r6.south west) {$r_8$: IP $\to$ NP VP};
\node [anchor=north west,fill=blue!20] (sent1) at ([yshift=-0.4em]r7.south west) {$r_1,r_2,r_3,r_4$};
\node [anchor=north west,fill=blue!30] (sent1) at ([yshift=-0.4em]r7.south west) {$r_1,r_2,r_3,r_4$};
\node [anchor=west] (sent1part2) at (sent1.east) {为生成单词词性的规则};
\node [anchor=north west,fill=red!20] (sent2) at ([yshift=-0.2em]sent1.south west) {$r_5$};
\node [anchor=north west,fill=red!30] (sent2) at ([yshift=-0.2em]sent1.south west) {$r_5$};
\node [anchor=west] (sent2part2) at (sent2.east) {为单变量规则,它将词性NN进一步抽象为名词短语NP};
\node [anchor=north west,fill=green!20] (sent3) at ([yshift=-0.2em]sent2.south west){$r_6,r_7,r_8$};
\node [anchor=north west,fill=green!30] (sent3) at ([yshift=-0.2em]sent2.south west){$r_6,r_7,r_8$};
\node [anchor=west] (sent3part2) at (sent3.east) {为句法结构规则,比如$r_8$表示了主(NP)+谓(VP)结构};
\end{tikzpicture}
......
......@@ -7,32 +7,32 @@
\node[anchor=west,inner sep=0pt] (cc) at ([xshift=1.4em]cb.east){\small\sffamily\bfseries{硬币$\boldsymbol C$}};
\node[anchor=north,inner sep=0pt] (ra) at ([yshift=-0.6em,xshift=-0.4em]o.south){\small\sffamily\bfseries{硬币$\boldsymbol A$}};
\node[anchor=north,inner sep=0pt] (rb) at ([yshift=-1.4em]ra.south){\small\sffamily\bfseries{硬币$\boldsymbol B$}};
\node[anchor=north,inner sep=0pt] (rc) at ([yshift=-1.4em]rb.south){\small\sffamily\bfseries{硬币$\boldsymbol C$}};
\node[anchor=north,inner sep=0pt] (rb) at ([yshift=-1.5em]ra.south){\small\sffamily\bfseries{硬币$\boldsymbol B$}};
\node[anchor=north,inner sep=0pt] (rc) at ([yshift=-1.6em]rb.south){\small\sffamily\bfseries{硬币$\boldsymbol C$}};
\node[anchor=north,inner sep=0pt] (n11) at ([yshift=-0.9em]ca.south){\small{$\frac{1}{3}$}};
\node[anchor=north,inner sep=0pt] (n11) at ([yshift=-1em]ca.south){\small{$\frac{1}{3}$}};
\node[anchor=north,inner sep=0pt] (n21) at ([yshift=-1em]n11.south){\small{$\frac{1}{3}$}};
\node[anchor=north,inner sep=0pt] (n31) at ([yshift=-1em]n21.south){\small{$\frac{1}{3}$}};
\node[anchor=north,inner sep=0pt] (n31) at ([yshift=-1.2em]n21.south){\small{$\frac{1}{3}$}};
\node[anchor=north,inner sep=0pt] (n12) at ([yshift=-0.9em]cb.south){\small{$\frac{1}{3}$}};
\node[anchor=north,inner sep=0pt] (n12) at ([yshift=-1em]cb.south){\small{$\frac{1}{3}$}};
\node[anchor=north,inner sep=0pt] (n22) at ([yshift=-1em]n12.south){\small{$\frac{1}{3}$}};
\node[anchor=north,inner sep=0pt] (n32) at ([yshift=-1em]n22.south){\small{$\frac{1}{3}$}};
\node[anchor=north,inner sep=0pt] (n32) at ([yshift=-1.2em]n22.south){\small{$\frac{1}{3}$}};
\node[anchor=north,inner sep=0pt] (n13) at ([yshift=-0.9em]cc.south){\small{$\frac{1}{3}$}};
\node[anchor=north,inner sep=0pt] (n13) at ([yshift=-1em]cc.south){\small{$\frac{1}{3}$}};
\node[anchor=north,inner sep=0pt] (n23) at ([yshift=-1em]n13.south){\small{$\frac{1}{3}$}};
\node[anchor=north,inner sep=0pt] (n33) at ([yshift=-1em]n23.south){\small{$\frac{1}{3}$}};
\node[anchor=north,inner sep=0pt] (n33) at ([yshift=-1.2em]n23.south){\small{$\frac{1}{3}$}};
\draw[thick] (o.north west) -- (o.south east);
\node[anchor=south west] at ([yshift=-1em,xshift=-1.4em]o.45){\tiny{$i+1$}};
\node[anchor=north east] at ([yshift=1em,xshift=1em]o.-135){\tiny{$i$}};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.5em,rounded corners=2pt,fill=red!10] [fit = (o)(n33)(cc) ] (box0) {};
\node [rectangle,inner sep=0.8em,rounded corners=2pt,fill=red!25] [fit = (o)(n33)(cc) ] (box0) {};
\end{pgfonlayer}
\node[anchor=south] at (box0.north){\scriptsize{转移概率$\funp{P}$(第$i+1$次|第$i$次)}};
\end{scope}
\begin{scope}[xshift=8cm]
\begin{scope}[xshift=8.5cm]
\node[minimum width=3em,minimum height=1.5em] (o) at (0,0){};
\node[anchor=west,inner sep=0pt] (ca) at ([yshift=0.2em,xshift=1.4em]o.east){\small\sffamily\bfseries{正面}};
......@@ -42,11 +42,11 @@
\node[anchor=north,inner sep=0pt] (rb) at ([yshift=-1.5em]ra.south){\small\sffamily\bfseries{硬币$\boldsymbol B$}};
\node[anchor=north,inner sep=0pt] (rc) at ([yshift=-1.5em]rb.south){\small\sffamily\bfseries{硬币$\boldsymbol C$}};
\node[anchor=north,inner sep=0pt] (n11) at ([yshift=-1.2em]ca.south){\footnotesize{$0.3$}};
\node[anchor=north,inner sep=0pt] (n11) at ([yshift=-1.4em]ca.south){\footnotesize{$0.3$}};
\node[anchor=north,inner sep=0pt] (n21) at ([yshift=-1.7em]n11.south){\footnotesize{$0.5$}};
\node[anchor=north,inner sep=0pt] (n31) at ([yshift=-1.7em]n21.south){\footnotesize{$0.7$}};
\node[anchor=north,inner sep=0pt] (n31) at ([yshift=-1.8em]n21.south){\footnotesize{$0.7$}};
\node[anchor=north,inner sep=0pt] (n12) at ([yshift=-1.2em]cb.south){\footnotesize{$0.7$}};
\node[anchor=north,inner sep=0pt] (n12) at ([yshift=-1.4em]cb.south){\footnotesize{$0.7$}};
\node[anchor=north,inner sep=0pt] (n22) at ([yshift=-1.7em]n12.south){\footnotesize{$0.5$}};
\node[anchor=north,inner sep=0pt] (n32) at ([yshift=-1.7em]n22.south){\footnotesize{$0.3$}};
......@@ -55,7 +55,7 @@
\node[anchor=north east] at ([yshift=1em,xshift=1em]o.-135){\tiny{隐含}};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.5em,rounded corners=2pt,fill=red!10] [fit = (o)(n32)(rc)(cb) ] (box1) {};
\node [rectangle,inner sep=0.5em,rounded corners=2pt,fill=red!25] [fit = (o)(n32)(rc)(cb) ] (box1) {};
\end{pgfonlayer}
\node[anchor=south] at (box1.north){\scriptsize{发射概率$\funp{P}$(可见状态|隐含状态)}};
\end{scope}
......
......@@ -12,23 +12,23 @@
\node [anchor=west,inner sep=2pt] (u) at ([xshift=-4em,yshift=-4em]arule.west) {VV};
\node [anchor=west,inner sep=2pt,fill=orange!40] (u) at ([xshift=-4em,yshift=-4em]arule.west) {VV};
\node [anchor=west,inner sep=2pt,fill=orange!40,draw] (u) at ([xshift=-4em,yshift=-4em]arule.west) {VV};
\node [anchor=west] (upart2) at (u.east) {NN};
\node [anchor=east] (unumber) at (u.west) {$u:$};
\node [anchor=west,inner sep=2pt] (r) at ([xshift=-3em,yshift=2.5em]u.west) {VV};
\node [anchor=west,inner sep=2pt,fill=orange!40] (r) at ([xshift=-3em,yshift=2.5em]u.west) {VV};
\node [anchor=west,inner sep=2pt,fill=orange!40,draw] (r) at ([xshift=-3em,yshift=2.5em]u.west) {VV};
\node [anchor=west] (rpart2) at (r.east) {$\to$};
\node [anchor=west,inner sep=2pt] (rpart3) at (rpart2.east) {};
\node [anchor=east] (rnumber) at (r.west) {$r:$};
\node [anchor=west,inner sep=2pt,fill=red!20] (rpart3) at (rpart2.east) {};
\node [anchor=west,inner sep=2pt,fill=red!30,draw] (rpart3) at (rpart2.east) {};
\node [anchor=west] (v) at ([xshift=5.5em]u.east) {$v$:};
\node [anchor=west,inner sep=2pt,fill=red!20] (vpart2) at (v.east) {};
\node [anchor=west,inner sep=2pt,fill=red!30,draw] (vpart2) at (v.east) {};
\node [anchor=west] (vpart3) at (vpart2.east) {NN};
\node [anchor=west] (arrow) at ([xshift=3em]u.east) {$\Rightarrow$};
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......
\begin{tikzpicture}[scale=0.5]
\tikzstyle{cand} = [draw,line width=1pt,align=center,minimum width=2.6em,minimum height=1.6em,drop shadow={shadow xshift=0.15em},fill=green!15]
\tikzstyle{ref} = [draw,line width=1pt,align=center,minimum width=2.6em,minimum height=1.6em,drop shadow={shadow xshift=0.15em},fill=red!15]
\tikzstyle{cand} = [draw,thick,align=center,minimum width=2.6em,minimum height=1.6em,drop shadow={shadow xshift=0.15em},fill=green!30]
\tikzstyle{ref} = [draw,thick,align=center,minimum width=2.6em,minimum height=1.6em,drop shadow={shadow xshift=0.15em},fill=red!30]
\node[align=center,minimum width=2.4em,minimum height=1.6em,minimum width=6em] (n11) at (0,0){\small{机器译文:}};
\node[cand,anchor=west] (n12) at ([xshift=0.0em]n11.east){Can};
......
\begin{tikzpicture}[scale=0.5]
\tikzstyle{cand} = [draw,line width=1pt,align=center,minimum width=2.6em,minimum height=1.6em,drop shadow={shadow xshift=0.15em},fill=green!15]
\tikzstyle{ref} = [draw,line width=1pt,align=center,minimum width=2.6em,minimum height=1.6em,drop shadow={shadow xshift=0.15em},fill=red!15]
\tikzstyle{cand} = [draw,thick,align=center,minimum width=2.6em,minimum height=1.6em,drop shadow={shadow xshift=0.15em},fill=green!30]
\tikzstyle{ref} = [draw,thick,align=center,minimum width=2.6em,minimum height=1.6em,drop shadow={shadow xshift=0.15em},fill=red!30]
\node[align=center,minimum width=2.4em,minimum height=1.6em,minimum width=6em] (n11) at (0,0){\small{机器译文:}};
\node[cand,anchor=west] (n12) at ([xshift=0.0em]n11.east){Can};
......
\definecolor{ugreen}{rgb}{0,0.5,0}
\begin{tikzpicture}[scale=0.5]
\tikzstyle{cand} = [draw,line width=1pt,align=center,minimum width=2.6em,minimum height=1.6em,drop shadow={shadow xshift=0.15em},fill=green!15]
\tikzstyle{ref} = [draw,line width=1pt,align=center,minimum width=2.6em,minimum height=1.6em,drop shadow={shadow xshift=0.15em},fill=red!15]
\tikzstyle{cand} = [draw,thick,align=center,minimum width=2.6em,minimum height=1.6em,drop shadow={shadow xshift=0.15em},fill=green!30]
\tikzstyle{ref} = [draw,thick,align=center,minimum width=2.6em,minimum height=1.6em,drop shadow={shadow xshift=0.15em},fill=red!30]
\node[align=center,minimum width=2.4em,minimum height=1.6em,minimum width=6em] (n11) at (0,0){\small{机器译文:}};
\node[cand,anchor=west] (n12) at ([xshift=0.0em]n11.east){Can};
......
......@@ -22,8 +22,8 @@
\node [anchor=south,align=left] (humanevallabel) at ([yshift=1.0em]base.-90) {{\small\bfnew\footnotesize{人直接}}\\{\small\bfnew\footnotesize{进行评价}}};
% quality estimation
\node [anchor=north east,minimum width=10em,minimum height=10em,draw=black!60,very thick,fill=ugreen!20,drop shadow] (qebox) at ([xshift=-8em]base.90) {};
\node [draw,anchor=south,minimum width=10em,align=center,draw=black!60,very thick,fill=ugreen!20,drop shadow] (qelabel) at ([yshift=0.5em]qebox.north) {\small\footnotesize{需要较为复杂的建模,}\\\small\footnotesize{开发难度同机器翻译系统}};
\node [anchor=north east,minimum width=10em,minimum height=10em,draw=black!60,very thick,fill=ugreen!30,drop shadow] (qebox) at ([xshift=-8em]base.90) {};
\node [draw,anchor=south,minimum width=10em,align=center,draw=black!60,very thick,fill=ugreen!30,drop shadow] (qelabel) at ([yshift=0.5em]qebox.north) {\small\footnotesize{需要较为复杂的建模,}\\\small\footnotesize{开发难度同机器翻译系统}};
\node [anchor=north,minimum width=10em] (qetitle) at ([yshift=-0.2em]qebox.north) {{\small\bfnew\large{无参考答案的评价}}};
\draw [-] ([yshift=-2em]qebox.north west) -- ([yshift=-2em]qebox.north east);
\node [anchor=north] (qemethod1) at ([yshift=-0.3em]qetitle.south) {单词级评价};
......@@ -32,8 +32,8 @@
\node [anchor=north] (qemethod4) at ([yshift=-0.3em]qemethod3.south) {篇章级评价};
% auto evaluation
\node [anchor=north west,minimum width=10em,minimum height=10em,draw=black!60,very thick,fill=red!20,drop shadow] (aebox) at ([xshift=8em]base.90) {};
\node [draw,anchor=south,minimum width=10em,align=center,draw=black!60,very thick,fill=red!20,drop shadow] (aelabel) at ([yshift=0.5em]aebox.north) {\small\footnotesize{基于指标性公式和}\\\small\footnotesize{简单的建模}};
\node [anchor=north west,minimum width=10em,minimum height=10em,draw=black!60,very thick,fill=red!30,drop shadow] (aebox) at ([xshift=8em]base.90) {};
\node [draw,anchor=south,minimum width=10em,align=center,draw=black!60,very thick,fill=red!30,drop shadow] (aelabel) at ([yshift=0.5em]aebox.north) {\small\footnotesize{基于指标性公式和}\\\small\footnotesize{简单的建模}};
\node [anchor=north,minimum width=10em] (aetitle) at ([yshift=-0.2em]aebox.north) {{\small\bfnew\large{有参考答案的评价}}};
\draw [-] ([yshift=-2em]aebox.north west) -- ([yshift=-2em]aebox.north east);
\node [anchor=north] (aemethod1) at ([yshift=-0.5em]aetitle.south) {BLEU、NIST、};
......@@ -42,7 +42,7 @@
\node [anchor=north] (aemethod4) at ([yshift=-0.3em]aemethod3.south) {HTER ...};
% human evaluation
\node [anchor=north,minimum width=10em,minimum height=6em,draw=black!60,very thick,fill=yellow!20,drop shadow] (hebox) at ([yshift=-4em]base.-90) {};
\node [anchor=north,minimum width=10em,minimum height=6em,draw=black!60,very thick,fill=yellow!30,drop shadow] (hebox) at ([yshift=-4em]base.-90) {};
\node [anchor=north,minimum width=10em] (hetitle) at ([yshift=-0.2em]hebox.north) {{\small\bfnew\large{人工评价}}};
\draw [-] ([yshift=-2em]hebox.north west) -- ([yshift=-2em]hebox.north east);
\node [anchor=north] (hemethod1) at ([yshift=-0.5em]hetitle.south) {流畅度、忠实度、};
......@@ -50,18 +50,18 @@
% confidence estimation
\node [anchor=east,align=left] (conf) at ([xshift=-6em,yshift=0.6em]hebox.west) {\small\bfnew{用于估计同一个}\\\small\bfnew{系统不同输出的}\\\small\bfnew{可信度}};
\node [anchor=north,single arrow,minimum height=4.0em,fill=blue!30,rotate=-90] (arrow1) at ([yshift=-2.4em]qebox.south) {};
\node [anchor=north,single arrow,minimum height=4.0em,fill=blue!35,rotate=-90] (arrow1) at ([yshift=-2.4em]qebox.south) {};
% comparing different systems
\node [anchor=west,align=left] (com) at ([xshift=8em,yshift=0.6em]hebox.east) {\small\bfnew{用于对比}\\\small\bfnew{不同系统}\\\small\bfnew{性能差异}};
\node [anchor=west,single arrow,minimum height=7.5em,fill=blue!30] (arrow2) at ([yshift=-1.4em,xshift=0.5em]hebox.north east) {};
\node [anchor=west,single arrow,minimum height=7.5em,fill=blue!35] (arrow2) at ([yshift=-1.4em,xshift=0.5em]hebox.north east) {};
\node [anchor=north,fill=white] (arrow2label) at ([xshift=-0.5em]arrow2.south) {\footnotesize{{\color{blue} 成本高但精度高}}};
\node [anchor=north,single arrow,minimum height=4.0em,fill=blue!30,rotate=-90] (arrow3) at ([yshift=-2.4em,xshift=2.2em]aebox.south) {};
\node [anchor=north,single arrow,minimum height=4.0em,fill=blue!35,rotate=-90] (arrow3) at ([yshift=-2.4em,xshift=2.2em]aebox.south) {};
\node [anchor=west,fill=white,font=\footnotesize,align=left,text=blue,inner sep=0pt] (arrow3label) at ([yshift=2.6em,xshift=0.6em]arrow3.east) {成本低\\无人工\\有偏差};
% system optimization
\node [anchor=west,align=left] (optimization) at ([xshift=2em]aebox.east) {\small\bfnew{用于机器}\\\small\bfnew{翻译系统}\\\small\bfnew{调优}};
\node [anchor=west,single arrow,minimum height=1.8em,fill=blue!30] (arrow4) at ([xshift=0.4em]aebox.east) {};
\node [anchor=west,single arrow,minimum height=1.8em,fill=blue!35] (arrow4) at ([xshift=0.4em]aebox.east) {};
\begin{pgfonlayer}{background}
\draw [->,line width=0.3em,dotted,red] ([yshift=1em,xshift=0em]hebox.south east) -- ([yshift=1em,xshift=4em]hebox.south east) -- ([yshift=10em,xshift=4em]hebox.south east) node [pos=0.8,left] {\small{{\color{red} 评价标准}}};
......
\begin{tikzpicture}[scale=0.5]
\tikzstyle{cand} = [draw,line width=1pt,align=center,minimum width=2.6em,minimum height=1.6em,drop shadow={shadow xshift=0.15em},fill=green!15]
\tikzstyle{ref} = [draw,line width=1pt,align=center,minimum width=2.6em,minimum height=1.6em,drop shadow={shadow xshift=0.15em},fill=red!15]
\tikzstyle{cand} = [draw,thick,align=center,minimum width=2.6em,minimum height=1.6em,drop shadow={shadow xshift=0.15em},fill=green!30]
\tikzstyle{ref} = [draw,thick,align=center,minimum width=2.6em,minimum height=1.6em,drop shadow={shadow xshift=0.15em},fill=red!30]
\node[align=center,minimum width=2.4em,minimum height=1.6em,minimum width=6em] (n11) at (0,0){\small{机器译文:}};
\node[cand,anchor=west] (n12) at ([xshift=0.0em]n11.east){Can};
......
%\usetikzlibrary{backgrounds}
%\usetikzlibrary{fit}
\begin{tikzpicture}[scale=0.5]
\tikzstyle{unit} = [draw,inner sep=1.2pt,font=\tiny,minimum height=1em]
\tikzstyle{unit} = [draw,inner sep=1.2pt,font=\scriptsize,minimum height=1em]
\tikzstyle{box} = [draw,rectangle,inner xsep=1.4pt,inner ysep=3pt]
\tikzstyle{bad_tag} = [inner sep=1pt,align=center,font=\tiny,text=red,minimum height=0.8em]
\tikzstyle{ok_tag} = [inner sep=1pt,align=center,font=\tiny,text=ugreen,minimum height=0.8em]
\tikzstyle{bad_tag} = [inner sep=1pt,align=center,font=\scriptsize,text=red,minimum height=0.8em]
\tikzstyle{ok_tag} = [inner sep=1pt,align=center,font=\scriptsize,text=ugreen,minimum height=0.8em]
\coordinate (o) at (0, 0);
\node[anchor=west,inner sep=0pt,align=center,font=\tiny] (n1_1) at ([yshift=8em]o.east){\textbf{Source}};
\node[unit,anchor=west,fill=green!20](n1_2) at ([xshift=2.2em]n1_1.east){Nach};
\node[unit,anchor=west,fill=green!20](n1_3) at ([xshift=0.6em]n1_2.east){Zubereitung};
\node[unit,anchor=west,fill=green!20](n1_4) at ([xshift=1.2em]n1_3.east){im};
\node[unit,anchor=west,fill=green!20](n1_5) at ([xshift=0.6em]n1_4.east){Kühlschrank};
\node[unit,anchor=west,fill=green!20](n1_6) at ([xshift=0.6em]n1_5.east){aufbewahren};
\node[unit,anchor=west,fill=green!20](n1_7) at ([xshift=1.2em]n1_6.east){und};
\node[unit,anchor=west,fill=green!20](n1_8) at ([xshift=0.6em]n1_7.east){innerhalb};
\node[unit,anchor=west,fill=green!20](n1_9) at ([xshift=0.6em]n1_8.east){vonf};
\node[unit,anchor=west,fill=green!20](n1_10) at ([xshift=0.6em]n1_9.east){24};
\node[unit,anchor=west,fill=green!20](n1_11) at ([xshift=1.2em]n1_10.east){Stunden};
\node[unit,anchor=west,fill=green!20](n1_12) at ([xshift=0.6em]n1_11.east){aufbrauchen};
\node[anchor=west,inner sep=0pt,align=center,font=\scriptsize] (n1_1) at ([yshift=8em]o.east){\textbf{Source}};
\node[unit,anchor=west,fill=green!30](n1_2) at ([xshift=2.2em]n1_1.east){Nach};
\node[unit,anchor=west,fill=green!30](n1_3) at ([xshift=0.6em]n1_2.east){Zubereitung};
\node[unit,anchor=west,fill=green!30](n1_4) at ([xshift=1.2em]n1_3.east){im};
\node[unit,anchor=west,fill=green!30](n1_5) at ([xshift=0.6em]n1_4.east){Kühlschrank};
\node[unit,anchor=west,fill=green!30](n1_6) at ([xshift=0.6em]n1_5.east){aufbewahren};
\node[unit,anchor=west,fill=green!30](n1_7) at ([xshift=1.2em]n1_6.east){und};
\node[unit,anchor=west,fill=green!30](n1_8) at ([xshift=0.6em]n1_7.east){innerhalb};
\node[unit,anchor=west,fill=green!30](n1_9) at ([xshift=0.6em]n1_8.east){vonf};
\node[unit,anchor=west,fill=green!30](n1_10) at ([xshift=0.6em]n1_9.east){24};
\node[unit,anchor=west,fill=green!30](n1_11) at ([xshift=1.2em]n1_10.east){Stunden};
\node[unit,anchor=west,fill=green!30](n1_12) at ([xshift=0.6em]n1_11.east){aufbrauchen};
\node[anchor=west,inner sep=0pt,align=center,font=\tiny] (n2_1) at ([yshift=-2em]o.east){\textbf{MT}};
\node[unit,anchor=west,fill=red!20](n2_2) at ([xshift=5em]n2_1.east){After};
\node[unit,anchor=west,fill=red!20](n2_3) at ([xshift=0.6em]n2_2.east){reconstitution};
\node[unit,anchor=west,fill=red!20](n2_4) at ([xshift=1.2em]n2_3.east){in};
\node[unit,anchor=west,fill=red!20](n2_5) at ([xshift=0.6em]n2_4.east){the};
\node[unit,anchor=west,fill=red!20](n2_6) at ([xshift=0.6em]n2_5.east){refrigerator};
\node[unit,anchor=west,fill=red!20](n2_7) at ([xshift=1.2em]n2_6.east){and};
\node[unit,anchor=west,fill=red!20](n2_8) at ([xshift=0.6em]n2_7.east){used};
\node[unit,anchor=west,fill=red!20](n2_9) at ([xshift=0.6em]n2_8.east){within};
\node[unit,anchor=west,fill=red!20](n2_10) at ([xshift=0.6em]n2_9.east){24};
\node[unit,anchor=west,fill=red!20](n2_11) at ([xshift=1.2em]n2_10.east){hours};
\node[anchor=west,inner sep=0pt,align=center,font=\scriptsize] (n2_1) at ([yshift=-2em]o.east){\textbf{MT}};
\node[unit,anchor=west,fill=red!30](n2_2) at ([xshift=5em]n2_1.east){After};
\node[unit,anchor=west,fill=red!30](n2_3) at ([xshift=0.6em]n2_2.east){reconstitution};
\node[unit,anchor=west,fill=red!30](n2_4) at ([xshift=1.2em]n2_3.east){in};
\node[unit,anchor=west,fill=red!30](n2_5) at ([xshift=0.6em]n2_4.east){the};
\node[unit,anchor=west,fill=red!30](n2_6) at ([xshift=0.6em]n2_5.east){refrigerator};
\node[unit,anchor=west,fill=red!30](n2_7) at ([xshift=1.2em]n2_6.east){and};
\node[unit,anchor=west,fill=red!30](n2_8) at ([xshift=0.6em]n2_7.east){used};
\node[unit,anchor=west,fill=red!30](n2_9) at ([xshift=0.6em]n2_8.east){within};
\node[unit,anchor=west,fill=red!30](n2_10) at ([xshift=0.6em]n2_9.east){24};
\node[unit,anchor=west,fill=red!30](n2_11) at ([xshift=1.2em]n2_10.east){hours};
\begin{pgfonlayer}{background}
\node [box,fill=green!5] [fit = (n1_2) (n1_3)] (box1_1) {};
\node [box,fill=green!5] [fit = (n1_4) (n1_5) (n1_6)] (box1_2) {};
......@@ -57,9 +57,9 @@
\node[ok_tag,anchor=north] (tag_2) at ([xshift=2.5em,yshift=-3em]box2_4.south){OK};
\node[anchor=west,inner sep=0pt,align=center,font=\tiny] at ([xshift=3.2em]tag_1.east){\textbf{Phrase-target tags}};
\node[anchor=west,inner sep=0pt,align=center,font=\scriptsize] at ([xshift=5.2em]tag_1.east){\textbf{Phrase-target tags}};
\node[anchor=west,inner sep=0pt,align=center,font=\tiny] at ([xshift=5.0em]tag_2.east){\textbf{Gap tags}};
\node[anchor=west,inner sep=0pt,align=center,font=\scriptsize] at ([xshift=8.4em]tag_2.east){\textbf{Gap tags}};
\draw[blue!30,line width=1pt] (n1_2.south) -- (n2_2.north);
\draw[blue!30,line width=1pt] (n1_2.south) -- (n2_3.north);
......
\definecolor{ugreen}{rgb}{0,0.5,0}
\begin{tikzpicture}[scale=0.6]
\tikzstyle{unit} = [draw,inner sep=3pt,font=\tiny,minimum height=1.2em,drop shadow={shadow xshift=0.1em,shadow yshift=-0.15em}]
\tikzstyle{bad_tag} = [fill=red!15,inner sep=1pt,align=center,font=\tiny,text=red!80]
\tikzstyle{ok_tag} = [fill=ugreen!15,inner sep=1pt,align=center,font=\tiny,text=ugreen!80]
\tikzstyle{unit} = [draw,inner sep=3pt,font=\footnotesize,minimum height=1.2em,drop shadow={shadow xshift=0.1em,shadow yshift=-0.15em}]
\tikzstyle{bad_tag} = [fill=red!15,inner sep=1pt,align=center,font=\scriptsize,text=red!80]
\tikzstyle{ok_tag} = [fill=ugreen!15,inner sep=1pt,align=center,font=\scriptsize,text=ugreen!80]
\coordinate (o) at (0, 0);
\node[anchor=west,inner sep=0pt,align=center,font=\scriptsize] (n1_1) at ([yshift=5.5em]o.east){\textbf{Source}};
\node[unit,anchor=west,fill=green!20](n1_2) at ([xshift=7.6em]n1_1.east){Draw};
\node[unit,anchor=west,fill=green!20](n1_3) at ([xshift=0.8em]n1_2.east){or};
\node[unit,anchor=west,fill=green!20](n1_4) at ([xshift=0.8em]n1_3.east){select};
\node[unit,anchor=west,fill=green!20](n1_5) at ([xshift=0.8em]n1_4.east){a};
\node[unit,anchor=west,fill=green!20](n1_6) at ([xshift=0.8em]n1_5.east){line};
\node[unit,anchor=west,fill=green!20](n1_7) at ([xshift=0.8em]n1_6.east){.};
\node[unit,anchor=west,fill=green!30](n1_2) at ([xshift=8.4em]n1_1.east){Draw};
\node[unit,anchor=west,fill=green!30](n1_3) at ([xshift=0.8em]n1_2.east){or};
\node[unit,anchor=west,fill=green!30](n1_4) at ([xshift=0.8em]n1_3.east){select};
\node[unit,anchor=west,fill=green!30](n1_5) at ([xshift=0.8em]n1_4.east){a};
\node[unit,anchor=west,fill=green!30](n1_6) at ([xshift=0.8em]n1_5.east){line};
\node[unit,anchor=west,fill=green!30](n1_7) at ([xshift=0.8em]n1_6.east){.};
\node[anchor=west,inner sep=0pt,align=center,font=\scriptsize] (n2_1) at (o.east){\textbf{PE}};
\node[unit,anchor=west,fill=red!20](n2_2) at ([xshift=1em]n2_1.east){Zeichnen};
\node[unit,anchor=west,fill=red!20](n2_3) at ([xshift=0.8em]n2_2.east){oder};
\node[unit,anchor=west,fill=red!20](n2_4) at ([xshift=0.8em]n2_3.east){Sie};
\node[unit,anchor=west,fill=red!20](n2_5) at ([xshift=0.8em]n2_4.east){eine};
\node[unit,anchor=west,fill=red!20](n2_6) at ([xshift=0.8em]n2_5.east){linie};
\node[unit,anchor=west,fill=red!20](n2_7) at ([xshift=0.8em]n2_6.east){,};
\node[unit,anchor=west,fill=red!20](n2_8) at ([xshift=0.8em]n2_7.east){order};
\node[unit,anchor=west,fill=red!20](n2_9) at ([xshift=0.8em]n2_8.east){wählen};
\node[unit,anchor=west,fill=red!20](n2_10) at ([xshift=0.8em]n2_9.east){Sie};
\node[unit,anchor=west,fill=red!20](n2_11) at ([xshift=0.8em]n2_10.east){eine};
\node[unit,anchor=west,fill=red!20](n2_12) at ([xshift=0.8em]n2_11.east){aus};
\node[unit,anchor=west,fill=red!20](n2_13) at ([xshift=0.8em]n2_12.east){.};
\node[unit,anchor=west,fill=red!30](n2_2) at ([xshift=1.8em]n2_1.east){Zeichnen};
\node[unit,anchor=west,fill=red!30](n2_3) at ([xshift=0.8em]n2_2.east){oder};
\node[unit,anchor=west,fill=red!30](n2_4) at ([xshift=0.8em]n2_3.east){Sie};
\node[unit,anchor=west,fill=red!30](n2_5) at ([xshift=0.8em]n2_4.east){eine};
\node[unit,anchor=west,fill=red!30](n2_6) at ([xshift=0.8em]n2_5.east){linie};
\node[unit,anchor=west,fill=red!30](n2_7) at ([xshift=0.8em]n2_6.east){,};
\node[unit,anchor=west,fill=red!30](n2_8) at ([xshift=0.8em]n2_7.east){order};
\node[unit,anchor=west,fill=red!30](n2_9) at ([xshift=0.8em]n2_8.east){wählen};
\node[unit,anchor=west,fill=red!30](n2_10) at ([xshift=0.8em]n2_9.east){Sie};
\node[unit,anchor=west,fill=red!30](n2_11) at ([xshift=0.8em]n2_10.east){eine};
\node[unit,anchor=west,fill=red!30](n2_12) at ([xshift=0.8em]n2_11.east){aus};
\node[unit,anchor=west,fill=red!30](n2_13) at ([xshift=0.8em]n2_12.east){.};
\node[anchor=west,inner sep=0pt,align=center,font=\scriptsize] (n3_1) at ([yshift=-5.5em]o.east){\textbf{MT}};
\node[unit,anchor=west,fill=blue!20](n3_2) at ([xshift=4.7em]n3_1.east){Zeichnen};
\node[unit,anchor=west,fill=blue!20](n3_3) at ([xshift=0.8em]n3_2.east){oder};
\node[unit,anchor=west,fill=blue!20](n3_4) at ([xshift=0.8em]n3_3.east){wählen};
\node[unit,anchor=west,fill=blue!20](n3_5) at ([xshift=0.8em]n3_4.east){Sie};
\node[unit,anchor=west,fill=blue!20](n3_6) at ([xshift=0.8em]n3_5.east){eine};
\node[unit,anchor=west,fill=blue!20](n3_7) at ([xshift=0.8em]n3_6.east){Linie};
\node[unit,anchor=west,fill=blue!20](n3_8) at ([xshift=0.8em]n3_7.east){aus};
\node[unit,anchor=west,fill=blue!20](n3_9) at ([xshift=0.8em]n3_8.east){.};
\node[unit,anchor=west,fill=blue!30](n3_2) at ([xshift=5.5em]n3_1.east){Zeichnen};
\node[unit,anchor=west,fill=blue!30](n3_3) at ([xshift=0.8em]n3_2.east){oder};
\node[unit,anchor=west,fill=blue!30](n3_4) at ([xshift=0.8em]n3_3.east){wählen};
\node[unit,anchor=west,fill=blue!30](n3_5) at ([xshift=0.8em]n3_4.east){Sie};
\node[unit,anchor=west,fill=blue!30](n3_6) at ([xshift=0.8em]n3_5.east){eine};
\node[unit,anchor=west,fill=blue!30](n3_7) at ([xshift=0.8em]n3_6.east){Linie};
\node[unit,anchor=west,fill=blue!30](n3_8) at ([xshift=0.8em]n3_7.east){aus};
\node[unit,anchor=west,fill=blue!30](n3_9) at ([xshift=0.8em]n3_8.east){.};
\node[bad_tag,anchor=south] at ([yshift=2pt]n1_2.north){BAD};
\node[bad_tag,anchor=south] at ([yshift=2pt]n1_3.north){BAD};
......@@ -93,7 +93,7 @@
\draw[line width=1pt,ugreen!60] (n2_12.south) -- (n3_8.north);
\draw[line width=1pt,ugreen!60] (n2_13.south) -- (n3_9.north);
\node[anchor=west,inner sep=0pt,align=center,font=\scriptsize](st) at ([xshift=8em]tag1.east){\textbf{Source tags}};
\node[anchor=west,inner sep=0pt,align=center,font=\scriptsize] at ([xshift=3.6em]tag2.east){\textbf{MT tags}};
\node[anchor=west,inner sep=0pt,align=center,font=\scriptsize] (gt) at ([xshift=1.6em]tag3.east){\textbf{Gap tags}};
\node[anchor=west,inner sep=0pt,align=center,font=\scriptsize](st) at ([xshift=14.4em]tag1.east){\textbf{Source tags}};
\node[anchor=west,inner sep=0pt,align=center,font=\scriptsize] at ([xshift=6.6em]tag2.east){\textbf{MT tags}};
\node[anchor=west,inner sep=0pt,align=center,font=\scriptsize] (gt) at ([xshift=4.6em]tag3.east){\textbf{Gap tags}};
\end{tikzpicture}
\ No newline at end of file
\definecolor{ugreen}{rgb}{0,0.5,0}
\begin{tikzpicture}[scale=0.5]
\tikzstyle{cand} = [draw,line width=1pt,align=center,minimum width=2.6em,minimum height=1.6em,drop shadow={shadow xshift=0.15em},fill=green!15]
\tikzstyle{ref} = [draw,line width=1pt,align=center,minimum width=2.6em,minimum height=1.6em,drop shadow={shadow xshift=0.15em},fill=red!15]
\tikzstyle{cand} = [draw,thick,align=center,minimum width=2.6em,minimum height=1.6em,drop shadow={shadow xshift=0.15em},fill=green!30]
\tikzstyle{ref} = [draw,thick,align=center,minimum width=2.6em,minimum height=1.6em,drop shadow={shadow xshift=0.15em},fill=red!30]
\node[align=center,minimum width=2.4em,minimum height=1.6em,minimum width=6em] (n11) at (0,0){\small{机器译文:}};
\node[cand,anchor=west] (n12) at ([xshift=0.0em]n11.east){Can};
......
......@@ -8,12 +8,12 @@
\draw[] (B) .. controls +(east:1.3em) and +(west:0.3em) .. ([xshift=1.5em,yshift=2em]B) .. controls +(east:0.3em) and +(west:1.3em) .. ([xshift=3em]B);
\draw[<->] ([yshift=2.4em]A) -- (A) -- ([xshift=3.7em]A);
\begin{pgfonlayer}{background}
\node [draw,thick,rectangle,inner sep=0.5em,rounded corners=2pt,fill=red!15,drop shadow] [fit = (overall)(hypo)] (box1) {};
\node [draw,thick,rectangle,inner sep=0.5em,rounded corners=2pt,fill=red!30,drop shadow] [fit = (overall)(hypo)] (box1) {};
\end{pgfonlayer}
\node[draw,fill=yellow!15,thick,anchor=west,font=\footnotesize,align=center,drop shadow](sample) at ([xshift=4em]box1.east){样本\\观察结果};
\node[anchor=west,draw,diamond,fill=ugreen!15,drop shadow,aspect=2,font=\scriptsize,align=center,inner sep=1pt,thick] (judge) at ([xshift=3em]sample.east){小概率事件\\发生?};
\node[draw,fill=blue!10,thick,drop shadow,anchor=west,font=\footnotesize,align=center,thick](refuse) at ([xshift=6em]judge.north){拒绝原假设};
\node[draw,fill=blue!10,thick,drop shadow,anchor=west,font=\footnotesize,align=center,thick](accept) at ([xshift=6em]judge.south){接受原假设};
\node[draw,fill=yellow!30,thick,anchor=west,font=\footnotesize,align=center,drop shadow](sample) at ([xshift=4em]box1.east){样本\\观察结果};
\node[anchor=west,draw,diamond,fill=green!30!white,drop shadow,aspect=2,font=\scriptsize,align=center,inner sep=1pt,thick] (judge) at ([xshift=3em]sample.east){小概率事件\\发生?};
\node[draw,fill=blue!30,thick,drop shadow,anchor=west,font=\footnotesize,align=center,thick](refuse) at ([xshift=6em]judge.north){拒绝原假设};
\node[draw,fill=blue!30,thick,drop shadow,anchor=west,font=\footnotesize,align=center,thick](accept) at ([xshift=6em]judge.south){接受原假设};
\draw[->,thick] (box1.east) -- node[above,font=\scriptsize]{抽样}(sample.west);
\draw[->,thick] (sample.east) -- node[above,font=\scriptsize]{检验}(judge.west);
\draw[->,thick] (judge.north) -- node[above,font=\scriptsize]{}(refuse.west);
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......@@ -40,13 +40,13 @@
\caption{汉译英译文质量评价实例}
{
\begin{tabular}{c|l|c}
源文 &只敏捷的棕色狐狸跳过了那只懒惰的狗& 评价得分 \\
源文 &/只/敏捷/的/棕色/狐狸/跳过/了/那/只/懒惰/的/狗/& 评价得分 \\
\hline
\rule{0pt}{10pt} 机器译文1 & The quick brown fox jumped over the lazy dog. & 5 \\
\rule{0pt}{10pt} 机器译文2 & The fast brown fox jumped over a sleepy dog. & 4 \\
\rule{0pt}{10pt} 机器译文3 & The fast brown fox jumps over the dog. & 3 \\
\rule{0pt}{10pt} 机器译文4 & The quick brown fox jumps over dog. & 2 \\
\rule{0pt}{10pt} 机器译文5 & A fast fox jump dog. & 1 \\
\rule{0pt}{10pt} 机器译文1 & The quick brown fox jumped over the lazy dog . & 5 \\
\rule{0pt}{10pt} 机器译文2 & The fast brown fox jumped over a sleepy dog . & 4 \\
\rule{0pt}{10pt} 机器译文3 & The fast brown fox jumps over the dog . & 3 \\
\rule{0pt}{10pt} 机器译文4 & The quick brown fox jumps over dog . & 2 \\
\rule{0pt}{10pt} 机器译文5 & A fast fox jump dog . & 1 \\
\end{tabular}
\label{tab:4-1}
}
......@@ -205,9 +205,9 @@
\noindent 其中,$\textrm{edit}(o,g)$表示系统生成的译文$o$和参考答案$g$之间的距离,$l$是归一化因子,通常为参考答案的长度。在距离计算中所有的操作的代价都为1。在计算距离时,优先考虑移位操作,再计算编辑距离(即增加、删除和替换操作的次数)。直到增加、移位操作无法减少编辑距离时,将编辑距离和移位操作的次数累加得到TER计算的距离。
\begin{example}
机器译文:A cat is standing in the ground.
机器译文:A cat is standing in the ground .
\qquad\ 参考答案:The cat is standing on the ground.
\qquad\ 参考答案:The cat is standing on the ground .
\label{eg:4-1}
\end{example}
......@@ -234,7 +234,7 @@
\begin{example}
机器译文:the the the the
\qquad \ 参考答案:The cat is standing on the ground.
\qquad \ 参考答案:The cat is standing on the ground .
\label{eg:4-bleu-example}
\end{example}
......@@ -385,31 +385,31 @@
\parinterval 基于检测点的评价根据事先定义好的语言学检测点对译文的相应部分进行打分。如下是几个英中翻译中的检测点实例:
\begin{example}
They got up at six this morning.
They got up at six this morning .
\qquad\ \ 他们今天早晨六点钟起床
\qquad\ \ 他们/今天/早晨/六点钟/起床/
\qquad\ \ 检测点:时间词的顺序
\label{eg:4-3}
\end{example}
\begin{example}
There are nine cows on the farm.
There are nine cows on the farm .
\qquad\ \ 农场里有九头牛
\qquad\ \ 农场/里/有/九/头/牛/
\qquad\ \ 检测点:量词“头”
\label{eg:4-4}
\end{example}
\begin{example}
His house is on the south bank of the river.
His house is on the south bank of the river .
\qquad\ \ 的房子在河的南岸
\qquad\ \ /的/房子/在/河/的/南岸/
\qquad\ \ We keep our money in a bank.
\qquad\ \ We keep our money in a bank .
\qquad\ \ 我们在一家银行存钱
\qquad\ \ 我们/在/一家/银行/存钱/
\qquad\ \ 检测点:bank 的多义翻译
\label{eg:4-5}
......@@ -704,11 +704,11 @@ d&=&t \frac{s}{\sqrt{n}}
\begin{example}
单词级质量评估任务
源句(Source):Draw or select a line.(英语)
源句(Source):Draw or select a line .(英语)
机器译文(MT):Zeichnen oder wählen Sie eine Linie aus.(德语)
机器译文(MT):Zeichnen oder wählen Sie eine Linie aus .(德语)
后编辑结果(PE):Zeichnen oder Sie eine Linie, oder wählen Sie eine aus.(德语)
后编辑结果(PE):Zeichnen oder Sie eine Linie, oder wählen Sie eine aus .(德语)
\label{eg:4-7}
\end{example}
......@@ -751,7 +751,7 @@ d&=&t \frac{s}{\sqrt{n}}
源句(Source):Nach Zubereitung || im Kühlschrank aufbewahren || und innerha-
\hspace{7.3em}lb von 24 || Stunden aufbrauchen.(德语)
\hspace{7.3em}lb von 24 || Stunden aufbrauchen .(德语)
机器译文(MT):After reconstitution || in the refrigerator || and used within 24 ||
......@@ -817,11 +817,11 @@ d&=&t \frac{s}{\sqrt{n}}
上文信息:A {\red housewife} won the first prize in the supermarket's anniversary
\hspace{5em}celebration.
\hspace{5em}celebration .
机器译文:A few days ago, {\red he} contacted the News Channel and said that the
\hspace{5em}supermarket owner refused to give {\red him} the prize.
\hspace{5em}supermarket owner refused to give {\red him} the prize .
\label{eg:4-9}
\end{example}
......
......@@ -13,7 +13,7 @@
\draw [->,thick,] (s1.north east) .. controls +(north east:1em) and +(north west:1em).. (t.north west) node[pos=0.5,below] {\tiny{正确翻译}};
\node [draw,red,fill=red!10,thick,anchor=center,circle,inner sep=3.5pt] (s) at ([xshift=13em,yshift=0em]s1.east) {\black{$\seq{s}$}};
\node [draw,red,fill=red!10,thick,anchor=center,circle,inner sep=3.5pt] (s) at ([xshift=14em,yshift=0em]s1.east) {\black{$\seq{s}$}};
\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t1) at ([xshift=1in]s.east) {\black{$\seq{t}_1$}};
\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t2) at ([xshift=3em,yshift=2em]t1.north east) {\black{$\seq{t}_2$}};
\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t3) at ([xshift=1em,yshift=4em]t1.north east) {\black{$\seq{t}_3$}};
......@@ -29,7 +29,7 @@
\draw [->,thick] (s.south east) .. controls +(300:3em) and +(south west:1em).. (t4.south west) node[pos=0.5,below] {\tiny{$\funp{P}(\seq{t}_4|\seq{s})=0.1$}};
\node [anchor=center] (foot1) at ([xshift=3.8em,yshift=-3.5em]s1.south) {\small{(a) 人的翻译候选空间}};
\node [anchor=center] (foot2) at ([xshift=7em,yshift=-3.5em]s.south) {\small{(b) 机器的翻译候选空间}};
\node [anchor=center] (foot2) at ([xshift=8em,yshift=-3.5em]s.south) {\small{(b) 机器的翻译候选空间}};
\end{tikzpicture}
......
\begin{tikzpicture}
\begin{scope}[scale=0.8]
\begin{scope}[scale=1.0]
\draw [-,very thick] (0,0) sin (1,1) cos (2,0) sin (3,-1) cos (4,0) sin (7,-1);
\draw [-latex,thick] (-0.5,-1.2) -- (8,-1.2);
......
......@@ -4,32 +4,32 @@
\definecolor{ugreen}{rgb}{0,0.5,0}
\definecolor{ublue}{rgb}{0.152,0.250,0.545}
\node [anchor=west] (sent) at (0,0) {\scriptsize{源文:我对你感到满意}};
\node [anchor=west,draw,thick,minimum width=10.5em,minimum height=1.2em] (sent-1) at (0,0) {};
\node [anchor=west] (sent) at (0,0) {\small{源文:我对你感到满意}};
\node [anchor=west,draw,thick,minimum width=11.7em,minimum height=1.5em] (sent-1) at (0,0) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,red] (s1) at ([xshift=0.15em,yshift=-2.7em]sent.south west) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,ugreen] (s2) at ([xshift=1.3em]s1.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,orange] (s3) at ([xshift=1.3em]s2.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,ublue] (s4) at ([xshift=1.3em]s3.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,purple] (s5) at ([xshift=1.3em]s4.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.5em,red] (s1) at ([xshift=0.15em,yshift=-3.0em]sent.south west) {};
\node [anchor=west,draw,thick,circle,minimum size=0.5em,ugreen] (s2) at ([xshift=1.5em]s1.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.5em,orange] (s3) at ([xshift=1.5em]s2.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.5em,ublue] (s4) at ([xshift=1.5em]s3.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.5em,purple] (s5) at ([xshift=1.5em]s4.east) {};
{
\node [anchor=west,draw,thick,circle,minimum size=0.3em,red,fill=red] (t1) at ([yshift=-2.5em]s1.west) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,ugreen,fill=ugreen] (t2) at ([xshift=1.3em]t1.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,orange,fill=orange] (t3) at ([xshift=1.3em]t2.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,ublue,fill=ublue] (t4) at ([xshift=1.3em]t3.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,purple,fill=purple] (t5) at ([xshift=1.3em]t4.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.5em,red,fill=red] (t1) at ([yshift=-2.5em]s1.west) {};
\node [anchor=west,draw,thick,circle,minimum size=0.5em,ugreen,fill=ugreen] (t2) at ([xshift=1.5em]t1.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.5em,orange,fill=orange] (t3) at ([xshift=1.5em]t2.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.5em,ublue,fill=ublue] (t4) at ([xshift=1.5em]t3.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.5em,purple,fill=purple] (t5) at ([xshift=1.5em]t4.east) {};
}
{
\node [anchor=west,draw,thick,circle,minimum size=0.3em,red,fill=red] (ft1) at ([yshift=-2.5em]t1.west) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,ublue,fill=ublue] (ft2) at ([xshift=1.3em]ft1.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,purple,fill=purple] (ft3) at ([xshift=1.3em]ft2.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,ugreen,fill=ugreen] (ft4) at ([xshift=1.3em]ft3.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.3em,orange,fill=orange] (ft5) at ([xshift=1.3em]ft4.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.5em,red,fill=red] (ft1) at ([yshift=-2.5em]t1.west) {};
\node [anchor=west,draw,thick,circle,minimum size=0.5em,ublue,fill=ublue] (ft2) at ([xshift=1.5em]ft1.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.5em,purple,fill=purple] (ft3) at ([xshift=1.5em]ft2.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.5em,ugreen,fill=ugreen] (ft4) at ([xshift=1.5em]ft3.east) {};
\node [anchor=west,draw,thick,circle,minimum size=0.5em,orange,fill=orange] (ft5) at ([xshift=1.5em]ft4.east) {};
}
\draw [->,thick,double] ([yshift=-0.1em]sent-1.south) -- ([yshift=-1.0em]sent-1.south);
\draw [->,thick,double] ([yshift=-0.2em]sent-1.south) -- ([yshift=-1.1em]sent-1.south);
{
\draw [->,thick] ([yshift=-0.1em]s1.south) -- ([yshift=0.1em]t1.north);
......@@ -46,38 +46,33 @@
\draw [->,thick] ([yshift=-0.1em]t5.south) -- ([yshift=0.1em]ft3.north);
}
{
\node [anchor=north west] (label1) at ([xshift=0.6em,yshift=0.0em]sent-1.south east) {{分析}};
\node [anchor=north west] (label2) at ([yshift=-1.5em]label1.south west) {{转换}};
\node [anchor=north west] (label3) at ([yshift=-1.1em]label2.south west) {{生成}};
\node [anchor=north west] (label1) at ([xshift=1em,yshift=-0.5em]sent-1.south east) {{分析}};
\node [anchor=north west] (label2) at ([yshift=-1.3em]label1.south west) {{转换}};
\node [anchor=north west] (label3) at ([yshift=-1.0em]label2.south west) {{生成}};
}
{\scriptsize
{\small
\begin{scope}
\node [anchor=west] (ss1) at ([xshift=-1.6em,yshift=1.5em]s1.east) {};
\node [anchor=west] (ss2) at ([xshift=1.4em]ss1.east) {};
\node [anchor=west] (ss3) at ([xshift=1.5em]ss2.east) {};
\node [anchor=west] (ss4) at ([xshift=1.0em]ss3.east) {感到};
\node [anchor=west] (ss5) at ([xshift=0.4em]ss4.east) {满意};
\node [anchor=west] (ss1) at ([xshift=-1.6em,yshift=1.2em]s1.east) {};
\node [anchor=west] (ss2) at ([xshift=1.2em]ss1.east) {};
\node [anchor=west] (ss3) at ([xshift=1.1em]ss2.east) {};
\node [anchor=west] (ss4) at ([xshift=0.5em]ss3.east) {感到};
\node [anchor=west] (ss5) at ([xshift=0.2em]ss4.east) {满意};
\end{scope}
\begin{scope}[yshift=-3em]
\node [anchor=west] (tt1) at ([xshift=-1.2em,yshift=-1.5em]ft1.east) {I};
\node [anchor=west] (tt2) at ([xshift=1.7em,yshift=-0.1em]tt1.east) {am};
\node [anchor=west] (tt3) at ([xshift=0.3em,yshift=0.1em]tt2.east) {satisfied};
\node [anchor=west] (tt4) at ([xshift=-0.1em]tt3.east) {with};
\node [anchor=west] (tt5) at ([xshift=0.7em,yshift=-0.2em]tt4.east) {you};
\node [anchor=west] (tt2) at ([xshift=1.4em,yshift=-0.1em]tt1.east) {am};
\node [anchor=west] (tt3) at ([xshift=-0.1em,yshift=0.1em]tt2.east) {satisfied};
\node [anchor=west] (tt4) at ([xshift=-0.5em]tt3.east) {with};
\node [anchor=west] (tt5) at ([xshift=0.3em,yshift=-0.2em]tt4.east) {you};
\end{scope}
}
\node [anchor=west] (sent2) at ([xshift=-3.0em,yshift=-1.8em]tt2.south) {\scriptsize{译文:I am satisfied with you }};
\node [anchor=west,draw,thick,minimum width=10.5em,minimum height=1.2em] (sent-2) at ([xshift=-3.0em,yshift=-1.8em]tt2.south) {};
\node [anchor=west] (sent2) at ([xshift=-3.2em,yshift=-2.0em]tt2.south) {\small{译文:I am satisfied with you }};
\node [anchor=west,draw,thick,minimum width=11.7em,minimum height=1.5em] (sent-2) at ([xshift=-3.2em,yshift=-2.0em]tt2.south) {};
\draw [->,thick,double] ([yshift=1.1em]sent-2.north) -- ([yshift=0.2em]sent-2.north);
\draw [->,thick,double] ([yshift=1.0em]sent-2.north) -- ([yshift=0.1em]sent-2.north);
\begin{pgfonlayer}{background}
{
%\node[rectangle,draw=ublue,thick, inner sep=0mm] [fit =(sent)] {};
%\node[rectangle,draw=ublue,thick, inner sep=0mm] [fit =(sent2)] {};
}
\end{pgfonlayer}
\end{tikzpicture}
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......@@ -200,11 +200,12 @@ IBM模型由Peter F. Brown等人于上世纪九十年代初提出\upcite{DBLP:jo
\parinterval 以汉译英为例,当翻译“我”这个单词时,可能直接会想到用“I”、“me”或“I'm”作为它的译文,而几乎不会选择“you”、“satisfied”等含义相差太远的译文。这是为什么呢?如果从统计学的角度来看,无论是何种语料,包括教材、新闻、小说等,绝大部分情况下“我”都翻译成了“I”、“me”等,几乎不会看到我被翻译成“you”或“satisfied”的情况。可以说“我”翻译成“I”、“me”等属于高频事件,而翻译成“you”、“satisfied”等属于低频或小概率事件。因此人在翻译时也是选择在统计意义上概率更大的译文,这也间接反映出统计模型可以在一定程度上描述人的翻译习惯和模式。
\parinterval\ref{tab:5-1}展示了汉语到英语的单词翻译实例及相应的翻译概率。可以看到,“我”翻译成“I”的概率最高,为0.5。这是符合人类对翻译的认知的。此外,这种概率化的模型避免了非0即1的判断,所有的译文都是可能的,只是概率不同。这也使得统计模型可以覆盖更多的翻译现象,甚至捕捉到一些人所忽略的情况。\\ \\ \\
\parinterval\ref{tab:5-1}展示了汉语到英语的单词翻译实例及相应的翻译概率。可以看到,“我”翻译成“I”的概率最高,为0.5。这是符合人类对翻译的认知的。此外,这种概率化的模型避免了非0即1的判断,所有的译文都是可能的,只是概率不同。这也使得统计模型可以覆盖更多的翻译现象,甚至捕捉到一些人所忽略的情况。
%----------------------------------------------
\begin{table}[htp]
\centering
\caption{汉译英单词翻译概率}
\begin{tabular}{c | c c}
源语言 & 目标语言 & 翻译概率 \\ \hline
& I & 0.50 \\
......@@ -214,7 +215,6 @@ IBM模型由Peter F. Brown等人于上世纪九十年代初提出\upcite{DBLP:jo
& am & 0.10 \\
... & ... & ... \\
\end{tabular}
\caption{汉译英单词翻译概率}
\label{tab:5-1}
\end{table}
%----------------------------------------------
......@@ -469,13 +469,14 @@ g(\seq{s},\seq{t}) & \equiv & \prod_{j,i \in \widehat{A}}{\funp{P}(s_j,t_i)} \ti
%----------------------------------------------
\parinterval 已经有工作证明机器翻译问题是NP难的\upcite{knight1999decoding}。对于如此巨大的搜索空间,需要一种十分高效的搜索算法才能实现机器翻译的解码。在{\chaptertwo}已经介绍一些常用的搜索方法。这里使用一种贪婪的搜索方法实现机器翻译的解码。它把解码分成若干步骤,每步只翻译一个单词,并保留当前“ 最好”的结果,直至所有源语言单词都被翻译完毕。
\vspace{0.3em}
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter5/Figures/figure-greedy-mt-decoding-pseudo-code}
\caption{贪婪的机器翻译解码算法的伪代码}
\label{fig:5-10}
\subfigure{\input{./Chapter5/Figures/figure-greedy-mt-decoding-process-1}}
\subfigure{\input{./Chapter5/Figures/figure-greedy-mt-decoding-process-3}}
\caption{贪婪的机器翻译解码过程实例}
\label{fig:5-11}
\end{figure}
%----------------------------------------------
......@@ -484,14 +485,13 @@ g(\seq{s},\seq{t}) & \equiv & \prod_{j,i \in \widehat{A}}{\funp{P}(s_j,t_i)} \ti
%----------------------------------------------
\begin{figure}[htp]
\centering
\subfigure{\input{./Chapter5/Figures/figure-greedy-mt-decoding-process-1}}
\subfigure{\input{./Chapter5/Figures/figure-greedy-mt-decoding-process-3}}
\caption{贪婪的机器翻译解码过程实例}
\label{fig:5-11}
\input{./Chapter5/Figures/figure-greedy-mt-decoding-pseudo-code}
\caption{贪婪的机器翻译解码算法的伪代码}
\label{fig:5-10}
\end{figure}
%----------------------------------------------
该算法的核心在于,系统一直维护一个当前最好的结果,之后每一步考虑扩展这个结果的所有可能,并计算模型得分,然后再保留扩展后的最好结果。注意,在每一步中,只有排名第一的结果才会被保留,其他结果都会被丢弃。这也体现了贪婪的思想。显然这个方法不能保证搜索到全局最优的结果,但是由于每次扩展只考虑一个最好的结果,因此该方法速度很快。图\ref{fig:5-11}给出了算法执行过程的简单示例。当然,机器翻译的解码方法有很多,这里仅仅使用简单的贪婪搜索方法来解决机器翻译的解码问题,在后续章节会对更加优秀的解码方法进行介绍。
\parinterval 该算法的核心在于,系统一直维护一个当前最好的结果,之后每一步考虑扩展这个结果的所有可能,并计算模型得分,然后再保留扩展后的最好结果。注意,在每一步中,只有排名第一的结果才会被保留,其他结果都会被丢弃。这也体现了贪婪的思想。显然这个方法不能保证搜索到全局最优的结果,但是由于每次扩展只考虑一个最好的结果,因此该方法速度很快。图\ref{fig:5-11}给出了算法执行过程的简单示例。当然,机器翻译的解码方法有很多,这里仅仅使用简单的贪婪搜索方法来解决机器翻译的解码问题,在后续章节会对更加优秀的解码方法进行介绍。
%----------------------------------------------------------------------------------------
% NEW SECTION
......@@ -875,7 +875,7 @@ g(\seq{s},\seq{t}) & \equiv & \prod_{j,i \in \widehat{A}}{\funp{P}(s_j,t_i)} \ti
\begin{eqnarray}
& & \textrm{max}\Big(\frac{\varepsilon}{(l+1)^m}\prod_{j=1}^{m}\sum_{i=0}^{l}{f({s_j|t_i})}\Big) \nonumber \\
& \textrm{s.t.} & \textrm{任意单词} t_{y}:\;\sum_{s_x}{f(s_x|t_y)} = 1 \nonumber
\label{eq:5-31}
\label{eq:5-29-30}
\end{eqnarray}
\noindent 其中,$\textrm{max}(\cdot)$表示最大化,$\frac{\varepsilon}{(l+1)^m}\prod_{j=1}^{m}\sum_{i=0}^{l}{f({s_j|t_i})}$是目标函数,$f({s_j|t_i})$是模型的参数,$\sum_{s_x}{f(s_x|t_y)}=1$是优化的约束条件,以保证翻译概率满足归一化的要求。需要注意的是$\{f(s_x |t_y)\}$对应了很多参数,每个源语言单词和每个目标语单词的组合都对应一个参数$f(s_x |t_y)$
......@@ -916,42 +916,42 @@ L(f,\lambda)&=&\frac{\varepsilon}{(l+1)^m}\prod_{j=1}^{m}\sum_{i=0}^{l}{f(s_j|t_
\noindent 这里$s_u$$t_v$分别表示源语言和目标语言词表中的某一个单词。为了求$\frac{\partial \big[ \prod\limits_{j=1}^{m} \sum\limits_{i=0}^{l} f(s_j|t_i) \big]}{\partial f(s_u|t_v)}$,这里引入一个辅助函数。令$g(z)=\alpha z^{\beta}$ 为变量$z$ 的函数,显然,
$\frac{\partial g(z)}{\partial z} = \alpha \beta z^{\beta-1} = \frac{\beta}{z}\alpha z^{\beta} = \frac{\beta}{z} g(z)$。这里可以把$\prod_{j=1}^{m} \sum_{i=0}^{l} f(s_j|t_i)$看做$g(z)=\alpha z^{\beta}$的实例。首先,令$z=\sum_{i=0}^{l}f(s_u|t_i)$,注意$s_u$为给定的源语单词。然后,把$\beta$定义为$\sum_{i=0}^{l}f(s_u|t_i)$$\prod_{j=1}^{m} \sum_{i=0}^{l} f(s_j|t_i)$ 中出现的次数,即源语句子中与$s_u$相同的单词的个数。
\vspace{-1em}
\begin{eqnarray}
\beta &=& \sum_{j=1}^{m} \delta(s_j,s_u)
\label{eq:5-32}
\end{eqnarray}
\noindent 其中,当$x=y$时,$\delta(x,y)=1$,否则为0。
\parinterval 根据$\frac{\partial g(z)}{\partial z} = \frac{\beta}{z} g(z)$,可以得到
\vspace{-0.5em}
\begin{eqnarray}
\frac{\partial g(z)}{\partial z}& =& \frac{\partial \big[ \prod\limits_{j=1}^{m} \sum\limits_{i=0}^{l} f(s_j|t_i) \big]}{\partial \big[ \sum\limits_{i=0}^{l}f(s_u|t_i) \big]} \nonumber \\
& = &\frac{\sum\limits_{j=1}^{m} \delta(s_j,s_u)}{\sum\limits_{i=0}^{l}f(s_u|t_i)} \prod\limits_{j=1}^{m} \sum\limits_{i=0}^{l} f(s_j|t_i)
\frac{\partial g(z)}{\partial z}& =& \frac{\partial \big[ \prod\limits_{j=1}^{m} \sum\limits_{i=0}^{l} f(s_j|t_i) \big]}{\partial \big[ \sum\limits_{i=0}^{l}f(s_u|t_i) \big]}\nonumber \\
&=& \frac{\sum\limits_{j=1}^{m} \delta(s_j,s_u)}{\sum\limits_{i=0}^{l}f(s_u|t_i)} \prod\limits_{j=1}^{m} \sum\limits_{i=0}^{l} f(s_j|t_i)
\label{eq:5-33}
\end{eqnarray}
\parinterval 根据$\frac{\partial g(z)}{\partial z}$$\frac{\partial z}{\partial f}$计算的结果,可以得到
\vspace{-0.5em}
\begin{eqnarray}
{\frac{\partial \big[ \prod_{j=1}^{m} \sum_{i=0}^{l} f(s_j|t_i) \big]}{\partial f(s_u|t_v)}}& =& {{\frac{\partial \big[ \prod\limits_{j=1}^{m} \sum\limits_{i=0}^{l} f(s_j|t_i) \big]}{\partial \big[ \sum\limits_{i=0}^{l}f(s_u|t_i) \big]}} \cdot{\frac{\partial \big[ \sum\limits_{i=0}^{l}f(s_u|t_i) \big]}{\partial f(s_u|t_v)}}} \nonumber \\
& = &{\frac{\sum\limits_{j=1}^{m} \delta(s_j,s_u)}{\sum\limits_{i=0}^{l}f(s_u|t_i)} \prod\limits_{j=1}^{m} \sum\limits_{i=0}^{l} f(s_j|t_i) \cdot \sum\limits_{i=0}^{l} \delta(t_i,t_v)}
\label{eq:5-34}
\end{eqnarray}
\parinterval$\frac{\partial \big[ \prod_{j=1}^{m} \sum_{i=0}^{l} f(s_j|t_i) \big]}{\partial f(s_u|t_v)}$进一步代入$\frac{\partial L(f,\lambda)}{\partial f(s_u|t_v)}$,得到$L(f,\lambda)$的导数
\vspace{-0.5em}
\begin{eqnarray}
& &{\frac{\partial L(f,\lambda)}{\partial f(s_u|t_v)}}\nonumber \\
&=&{\frac{\varepsilon}{(l+1)^{m}} \cdot \frac{\partial \big[ \prod\limits_{j=1}^{m} \sum\limits_{i=0}^{l} f(s_j|t_{a_j}) \big]}{\partial f(s_u|t_v)} - \lambda_{t_v}}\nonumber \\
{\frac{\partial L(f,\lambda)}{\partial f(s_u|t_v)}} &=&{\frac{\varepsilon}{(l+1)^{m}} \cdot \frac{\partial \big[ \prod\limits_{j=1}^{m} \sum\limits_{i=0}^{l} f(s_j|t_{a_j}) \big]}{\partial f(s_u|t_v)} - \lambda_{t_v}}\nonumber \\
&=&{\frac{\varepsilon}{(l+1)^{m}} \frac{\sum_{j=1}^{m} \delta(s_j,s_u) \cdot \sum_{i=0}^{l} \delta(t_i,t_v)}{\sum_{i=0}^{l}f(s_u|t_i)} \prod\limits_{j=1}^{m} \sum\limits_{i=0}^{l} f(s_j|t_i) - \lambda_{t_v}}
\label{eq:5-35}
\end{eqnarray}
\parinterval$\frac{\partial L(f,\lambda)}{\partial f(s_u|t_v)}=0$,有
\vspace{-1em}
\begin{eqnarray}
f(s_u|t_v) &=& \frac{\lambda_{t_v}^{-1} \varepsilon}{(l+1)^{m}} \cdot \frac{\sum\limits_{j=1}^{m} \delta(s_j,s_u) \cdot \sum\limits_{i=0}^{l} \delta(t_i,t_v)}{\sum\limits_{i=0}^{l}f(s_u|t_i)} \prod\limits_{j=1}^{m} \sum\limits_{i=0}^{l} f(s_j|t_i) \cdot f(s_u|t_v)
\label{eq:5-36}
\end{eqnarray}
\parinterval 将上式稍作调整得到下式:
\vspace{-1em}
\begin{eqnarray}
f(s_u|t_v) &=& \lambda_{t_v}^{-1} \frac{\varepsilon}{(l+1)^{m}} \prod\limits_{j=1}^{m} \sum\limits_{i=0}^{l} f(s_j|t_i) \sum\limits_{j=1}^{m} \delta(s_j,s_u) \sum\limits_{i=0}^{l} \delta(t_i,t_v) \frac{f(s_u|t_v) }{\sum\limits_{i=0}^{l}f(s_u|t_i)}
\label{eq:5-37}
......@@ -980,11 +980,13 @@ f(s_u|t_v) &=& \lambda_{t_v}^{-1} \frac{\varepsilon}{(l+1)^{m}} \prod\limits_{j=
%----------------------------------------------
\parinterval 期望频次是事件在其分布下出现次数的期望。另$c_{\mathbb{E}}(X)$为事件$X$的期望频次,其计算公式为:
\vspace{-0.5em}
\begin{eqnarray}
c_{\mathbb{E}}(X)&=&\sum_i c(x_i) \cdot \funp{P}(x_i)
\label{eq:5-38}
\end{eqnarray}
\vspace{-0.5em}
\noindent 其中$c(x_i)$表示$X$$x_i$时出现的次数,$\funp{P}(x_i)$表示$X=x_i$出现的概率。图\ref{fig:5-26}展示了事件$X$的期望频次的详细计算过程。其中$x_1$$x_2$$x_3$分别表示事件$X$出现2次、1次和5次的情况。
%----------------------------------------------
......@@ -997,46 +999,66 @@ c_{\mathbb{E}}(X)&=&\sum_i c(x_i) \cdot \funp{P}(x_i)
\end{figure}
%----------------------------------------------
\vspace{-0.5em}
\parinterval 因为在$\funp{P}(\seq{s}|\seq{t})$中,$t_v$翻译(连接)到$s_u$的期望频次为:
\vspace{-0.5em}
\begin{eqnarray}
c_{\mathbb{E}}(s_u|t_v;\seq{s},\seq{t}) & \equiv & \sum\limits_{j=1}^{m} \delta(s_j,s_u) \cdot \sum\limits_{i=0}^{l} \delta(t_i,t_v) \cdot \frac {f(s_u|t_v)}{\sum\limits_{i=0}^{l}f(s_u|t_i)}
\label{eq:5-39}
\end{eqnarray}
\vspace{-0.5em}
\parinterval 所以公式\ref {eq:5-37}可重写为:
\vspace{-0.5em}
\begin{eqnarray}
f(s_u|t_v)&=&\lambda_{t_v}^{-1} \cdot \funp{P}(\seq{s}| \seq{t}) \cdot c_{\mathbb{E}}(s_u|t_v;\seq{s},\seq{t})
\label{eq:5-40}
\end{eqnarray}
\vspace{-0.5em}
\parinterval 在此如果令$\lambda_{t_v}^{'}=\frac{\lambda_{t_v}}{\funp{P}(\seq{s}| \seq{t})}$,可得:
\vspace{-0.5em}
\begin{eqnarray}
f(s_u|t_v) &= &\lambda_{t_v}^{-1} \cdot \funp{P}(\seq{s}| \seq{t}) \cdot c_{\mathbb{E}}(s_u|t_v;\seq{s},\seq{t}) \nonumber \\
&=&{(\lambda_{t_v}^{'})}^{-1} \cdot c_{\mathbb{E}}(s_u|t_v;\seq{s},\seq{t})
\label{eq:5-41}
\end{eqnarray}
\vspace{-0.5em}
\parinterval 又因为IBM模型对$f(\cdot|\cdot)$的约束如下:
\vspace{-0.5em}
\begin{eqnarray}
\forall t_y : \sum\limits_{s_x} f(s_x|t_y) &=& 1
\label{eq:5-42}
\end{eqnarray}
\vspace{-0.5em}
\parinterval 为了满足$f(\cdot|\cdot)$的概率归一化约束,易得$\lambda_{t_v}^{'}$为:
\vspace{-0.5em}
\begin{eqnarray}
\lambda_{t_v}^{'}&=&\sum\limits_{s'_u} c_{\mathbb{E}}(s'_u|t_v;\seq{s},\seq{t})
\label{eq:5-43}
\end{eqnarray}
\vspace{-0.5em}
\parinterval 因此,$f(s_u|t_v)$的计算式可再一步变换成下式:
\vspace{-0.5em}
\begin{eqnarray}
f(s_u|t_v)&=&\frac{c_{\mathbb{E}}(s_u|t_v;\seq{s},\seq{t})} { \sum\limits_{s'_u} c_{\mathbb{E}}(s'_u|t_v;\seq{s},\seq{t}) }
\label{eq:5-44}
\end{eqnarray}
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter5/Figures/figure-calculation-formula&iterative-process-of-function}
\caption{$f(s_u|t_v)$的计算公式和迭代过程}
\label{fig:5-27}
\end{figure}
%----------------------------------------------
\vspace{-0.5em}
\parinterval 进一步,假设有$K$个互译的句对(称作平行语料):
\vspace{-0.5em}
$\{(\seq{s}^{[1]},\seq{t}^{[1]}),...,(\seq{s}^{[K]},\seq{t}^{[K]})\}$$f(s_u|t_v)$的期望频次为:
\begin{eqnarray}
c_{\mathbb{E}}(s_u|t_v)&=&\sum\limits_{k=1}^{K} c_{\mathbb{E}}(s_u|t_v;s^{[k]},t^{[k]})
......@@ -1048,15 +1070,6 @@ c_{\mathbb{E}}(s_u|t_v)&=&\sum\limits_{k=1}^{K} c_{\mathbb{E}}(s_u|t_v;s^{[k]},
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter5/Figures/figure-calculation-formula&iterative-process-of-function}
\caption{$f(s_u|t_v)$的计算公式和迭代过程}
\label{fig:5-27}
\end{figure}
%----------------------------------------------
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter5/Figures/figure-em-algorithm-flow-chart}
\caption{EM算法流程图(IBM模型1)}
\label{fig:5-28}
......@@ -1065,10 +1078,6 @@ c_{\mathbb{E}}(s_u|t_v)&=&\sum\limits_{k=1}^{K} c_{\mathbb{E}}(s_u|t_v;s^{[k]},
\parinterval 至此,本章完成了对IBM模型1训练方法的介绍。其可以通过图\ref{fig:5-27}所示的算法进行实现。算法最终的形式并不复杂,因为只需要遍历每个句对,之后计算$f(\cdot|\cdot)$的期望频次,最后估计新的$f(\cdot|\cdot)$,这个过程迭代直至$f(\cdot|\cdot)$收敛至稳定状态。
\vspace{-1.5em}
%----------------------------------------------------------------------------------------
% NEW SECTION
%----------------------------------------------------------------------------------------
......
......@@ -27,7 +27,7 @@
\node[anchor=west,inner sep=0pt,font=\footnotesize,rotate=45] at([xshift=0.1cm+\bc*4,yshift=0.4em]o.east){you};
\node[anchor=east,inner sep=0pt,font=\small] at([xshift=\bc*4.5,yshift=-1.0cm-\bc*4]o.west){(a)对齐实例1};
\end{scope}
\begin{scope}[xshift=15.0em]
\begin{scope}[xshift=17.0em]
\filldraw [fill=white,drop shadow] (0,0) rectangle (\bc*8,\bc*6);
\filldraw [fill=black] (0,\bc*5) rectangle (\bc*1,\bc*6);
\filldraw [fill=black] (\bc*1,\bc*3) rectangle (\bc*2,\bc*4);
......
......@@ -6,19 +6,19 @@
\begin{tikzpicture}
\begin{scope}
\node [anchor=west] (s1) at (0,0) {\footnotesize{$s_1$}:我};
\node [anchor=west] (s2) at ([xshift=0.5em]s1.east) {\footnotesize{$s_2$}:对};
\node [anchor=west] (s3) at ([xshift=0.5em]s2.east) {\footnotesize{$s_3$}:你};
\node [anchor=west] (s4) at ([xshift=0.5em]s3.east) {\footnotesize{$s_4$}:感到};
\node [anchor=west] (s5) at ([xshift=0.5em]s4.east) {\footnotesize{$s_5$}:满意};
\node [anchor=west] (s1) at (0,0) {\small{$s_1$}:我};
\node [anchor=west] (s2) at ([xshift=0.5em]s1.east) {\small{$s_2$}:对};
\node [anchor=west] (s3) at ([xshift=0.5em]s2.east) {\small{$s_3$}:你};
\node [anchor=west] (s4) at ([xshift=0.5em]s3.east) {\small{$s_4$}:感到};
\node [anchor=west] (s5) at ([xshift=0.5em]s4.east) {\small{$s_5$}:满意};
\end{scope}
\begin{scope}[yshift=-3.0em]
\node [anchor=west] (t1) at (0.35em,0) {\footnotesize{$t_1$}:I};
\node [anchor=west] (t2) at ([xshift=1.0em,yshift=0.0em]t1.east) {\footnotesize{$t_2$}:am};
\node [anchor=west] (t3) at ([xshift=0.3em,yshift=0.0em]t2.east) {\footnotesize{$t_3$}:satisfied};
\node [anchor=west] (t4) at ([xshift=0.3em]t3.east) {\footnotesize{$t_4$}:with};
\node [anchor=west] (t5) at ([xshift=0.3em,yshift=-0.0em]t4.east) {\footnotesize{$t_5$}:you};
\begin{scope}[yshift=-3.5em]
\node [anchor=west] (t1) at (0.45em,0) {\small{$t_1$}:I};
\node [anchor=west] (t2) at ([xshift=1.0em,yshift=0.0em]t1.east) {\small{$t_2$}:am};
\node [anchor=west] (t3) at ([xshift=0.3em,yshift=0.0em]t2.east) {\small{$t_3$}:satisfied};
\node [anchor=west] (t4) at ([xshift=0.3em]t3.east) {\small{$t_4$}:with};
\node [anchor=west] (t5) at ([xshift=0.3em,yshift=-0.0em]t4.east) {\small{$t_5$}:you};
\end{scope}
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......@@ -154,8 +154,6 @@
\parinterval 需要注意的是,公式\eqref{eq:6-7}之所以被看作是一种隐马尔可夫模型,是由于其形式与标准的一阶隐马尔可夫模型无异。$\funp{P}(a_{j}|a_{j-1},l)$可以被看作是一种状态转移概率,$f(s_{j}|t_{a_j})$可以被看作是一种发射概率。关于隐马尔可夫模型具体的数学描述也可参考{\chapterthree}中的相关内容。
%----------------------------------------------------------------------------------------
% NEW SECTION
%----------------------------------------------------------------------------------------
......@@ -175,6 +173,15 @@
\parinterval 这里将会给出另一个翻译模型,能在一定程度上解决上面提到的问题\upcite{DBLP:journals/coling/BrownPPM94,och2003systematic}。该模型把目标语言生成源语言的过程分解为如下几个步骤:首先,确定每个目标语言单词生成源语言单词的个数,这里把它称为{\small\sffamily\bfseries{繁衍率}}\index{繁衍率}{\small\sffamily\bfseries{产出率}}\index{产出率}(Fertility)\index{Fertility};其次,决定目标语言句子中每个单词生成的源语言单词都是什么,即决定生成的第一个源语言单词是什么,生成的第二个源语言单词是什么,以此类推。这样每个目标语言单词就对应了一个源语言单词列表;最后把各组源语言单词列表中的每个单词都放置到合适的位置上,完成目标语言译文到源语言句子的生成。
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter6/Figures/figure-probability-translation-process}
\caption{基于产出率的翻译模型执行过程}
\label{fig:6-5}
\end{figure}
%----------------------------------------------
\parinterval 对于句对$(\seq{s},\seq{t})$,令$\varphi$表示产出率,同时令${\tau}$表示每个目标语言单词对应的源语言单词列表。图{\ref{fig:6-5}}描述了一个英语句子生成汉语句子的过程。
\begin{itemize}
......@@ -184,18 +191,8 @@
\item 其次,确定英语句子中每个单词生成的汉语单词列表。比如“Scientists”生成“科学家”和“们”两个汉语单词,可表示为${\tau}_1=\{{\tau}_{11}=\textrm{“科学家”},{\tau}_{12}=\textrm{“们”}\}$。 这里用特殊的空标记NULL表示翻译对空的情况;
\vspace{0.3em}
\item 最后,把生成的所有汉语单词放在合适的位置。比如“科学家”和“们”分别放在$\seq{s}$的位置1和位置2。可以用符号$\pi$记录生成的单词在源语言句子$\seq{s}$中的位置。比如“Scientists” 生成的汉语单词在$\seq{s}$ 中的位置表示为${\pi}_{1}=\{{\pi}_{11}=1,{\pi}_{12}=2\}$
\vspace{0.3em}
\end{itemize}
%----------------------------------------------
\begin{figure}[htp]
\centering
\input{./Chapter6/Figures/figure-probability-translation-process}
\caption{基于产出率的翻译模型执行过程}
\label{fig:6-5}
\end{figure}
%----------------------------------------------
\parinterval 为了表述清晰,这里重新说明每个符号的含义。$\seq{s}$$\seq{t}$$m$$l$分别表示源语言句子、目标语言译文、源语言单词数量以及译文单词数量。$\mathbf{\varphi}$$\mathbf{\tau}$$\mathbf{\pi}$分别表示产出率、生成的源语言单词以及它们在源语言句子中的位置。${\varphi}_{i}$表示第$i$个目标语言单词$t_i$的产出率。${\tau}_{i}$${\pi}_i$ 分别表示$t_i$生成的源语言单词列表及其在源语言句子$\seq{s}$中的位置列表。
\parinterval 可以看出,一组$\tau$$\pi$(记为$<\tau,\pi>$)可以决定一个对齐$\seq{a}$和一个源语句子$\seq{s}$
......@@ -232,7 +229,6 @@
%----------------------------------------------
\begin{itemize}
\vspace{0.5em}
\item 第一部分:对每个$i\in[1,l]$的目标语单词的产出率建模({\color{red!70} 红色}),即$\varphi_i$的生成概率。它依赖于$\seq{t}$和区间$[1,i-1]$的目标语单词的产出率$\varphi_1^{i-1}$\footnote{这里约定,当$i=1$ 时,$\varphi_1^0$ 表示空。}
\vspace{0.5em}
\item 第二部分:对$i=0$时的产出率建模({\color{blue!70} 蓝色}),即空标记$t_0$的产出率生成概率。它依赖于$\seq{t}$和区间$[1,i-1]$的目标语单词的产出率$\varphi_1^l$
......@@ -251,7 +247,7 @@
\subsection{IBM 模型3}
\parinterval IBM模型3通过一些假设对图\ref{fig:6-7}所表示的基本模型进行了化简。具体来说,对于每个$i\in[1,l]$,假设$\funp{P}(\varphi_i |\varphi_1^{i-1},\seq{t})$仅依赖于$\varphi_i$$t_i$$\funp{P}(\pi_{ik}|\pi_{i1}^{k-1},\pi_1^{i-1},\tau_0^l,\varphi_0^l,\seq{t})$仅依赖于$\pi_{ik}$$i$$m$$l$。而对于所有的$i\in[0,l]$,假设$\funp{P}(\tau_{ik}|\tau_{i1}^{k-1},\tau_1^{i-1},\varphi_0^l,\seq{t})$仅依赖于$\tau_{ik}$$t_i$。这些假设的形式化描述为:
\vspace{-0.5em}
\begin{eqnarray}
\funp{P}(\varphi_i|\varphi_1^{i-1},\seq{t}) & = &{\funp{P}(\varphi_i|t_i)} \label{eq:6-10} \\
\funp{P}(\tau_{ik} = s_j |\tau_{i1}^{k-1},\tau_{1}^{i-1},\varphi_0^t,\seq{t}) & = & t(s_j|t_i) \label{eq:6-11} \\
......@@ -268,7 +264,6 @@
\end{eqnarray}
否则
\begin{eqnarray}
\funp{P}(\pi_{0k}=j|\pi_{01}^{k-1},\pi_1^l,\tau_0^l,\varphi_0^l,\seq{t}) & = & 0
\label{eq:6-14}
......@@ -311,7 +306,6 @@ m-\varphi_0\\
p_0+p_1 & = & 1 \label{eq:6-21}
\end{eqnarray}
}
%----------------------------------------------------------------------------------------
% NEW SUB-SECTION
%----------------------------------------------------------------------------------------
......
......@@ -5,9 +5,9 @@
\begin{scope}[minimum height = 18pt]
\node[anchor=east] (s0) at (-0.5em, 0) {$\seq{s}$};
\node[anchor=west,fill=gray!20] (s1) at (0, 0) {\footnotesize{桌子 上}};
\node[anchor=west,fill=gray!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{}};
\node[anchor=west,fill=gray!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
\node[anchor=west,fill=green!20,draw,thick,rounded corners=0.3em] (s1) at (0, 0) {\footnotesize{桌子 上}};
\node[anchor=west,fill=green!20,draw,thick,rounded corners=0.3em] (s2) at ([xshift=1em]s1.east) {\footnotesize{}};
\node[anchor=west,fill=green!20,draw,thick,rounded corners=0.3em] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
\node[anchor=east] (t0) at (-0.5em, -1.5) {$\seq{t}$};
......@@ -16,16 +16,16 @@
\begin{scope}[xshift=15em,minimum height = 18pt]
\begin{scope}[xshift=17em,minimum height = 18pt]
\node[anchor=east] (s0) at (-0.5em, 0) {$\seq{s}$};
\node[anchor=west,fill=gray!20] (s1) at (0, 0) {\footnotesize{桌子 上}};
\node[anchor=west,fill=red!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{}};
\node[anchor=west,fill=gray!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
\node[anchor=west,fill=green!20,draw,thick,rounded corners=0.3em] (s1) at (0, 0) {\footnotesize{桌子 上}};
\node[anchor=west,fill=red!20,draw,thick,rounded corners=0.3em] (s2) at ([xshift=1em]s1.east) {\footnotesize{}};
\node[anchor=west,fill=green!20,draw,thick,rounded corners=0.3em] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
\node[anchor=east] (t0) at (-0.5em, -1.5) {$\seq{t}$};
{
\node[anchor=west,fill=red!20] (t1) at (0, -1.5) {\footnotesize{There is}};
\node[anchor=west,fill=red!20,draw,thick,rounded corners=0.3em] (t1) at (0, -1.5) {\footnotesize{There is}};
\path[<->, thick] (s2.south) edge (t1.north);
}
......@@ -37,17 +37,17 @@
\begin{scope}[yshift=-9.5em,minimum height = 18pt]
\node[anchor=east] (s0) at (-0.5em, 0) {$\seq{s}$};
\node[anchor=west,fill=gray!20] (s1) at (0, 0) {\footnotesize{桌子 上}};
\node[anchor=west,fill=gray!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{}};
\node[anchor=west,fill=red!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
\node[anchor=west,fill=green!20,draw,thick,rounded corners=0.3em] (s1) at (0, 0) {\footnotesize{桌子 上}};
\node[anchor=west,fill=green!20,draw,thick,rounded corners=0.3em] (s2) at ([xshift=1em]s1.east) {\footnotesize{}};
\node[anchor=west,fill=red!20,draw,thick,rounded corners=0.3em] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
\node[anchor=east] (t0) at (-0.5em, -1.5) {$\seq{t}$};
{
\node[anchor=west,fill=gray!20] (t1) at (0, -1.5) {\footnotesize{There is}};
\node[anchor=west,fill=green!20,draw,thick,rounded corners=0.3em] (t1) at (0, -1.5) {\footnotesize{There is}};
\path[<->, thick] (s2.south) edge (t1.north);
}
{
\node[anchor=west,fill=red!20] (t2) at ([xshift=1em]t1.east) {\footnotesize{an apple}};
\node[anchor=west,fill=red!20,draw,thick,rounded corners=0.3em] (t2) at ([xshift=1em]t1.east) {\footnotesize{an apple}};
\path[<->, thick] (s3.south) edge (t2.north);
}
\node[anchor=north] (l) at ([xshift=7em,yshift=-0.5em]t0.south) {\small{(c)\ 找到译文第二个词}};
......@@ -56,24 +56,24 @@
\begin{scope}[xshift=15em,yshift=-9.5em,minimum height = 18pt]%[scale=0.5]
\begin{scope}[xshift=17em,yshift=-9.5em,minimum height = 18pt]%[scale=0.5]
\node[anchor=east] (s0) at (-0.5em, 0) {$\seq{s}$};
\node[anchor=west,fill=red!20] (s1) at (0, 0) {\footnotesize{桌子 上}};
\node[anchor=west,fill=gray!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{}};
\node[anchor=west,fill=gray!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
\node[anchor=west,fill=red!20,draw,thick,rounded corners=0.3em] (s1) at (0, 0) {\footnotesize{桌子 上}};
\node[anchor=west,fill=green!20,draw,thick,rounded corners=0.3em] (s2) at ([xshift=1em]s1.east) {\footnotesize{}};
\node[anchor=west,fill=green!20,draw,thick,rounded corners=0.3em] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
\node[anchor=east] (t0) at (-0.5em, -1.5) {$\seq{t}$};
{
\node[anchor=west,fill=gray!20] (t1) at (0, -1.5) {\footnotesize{There is}};
\node[anchor=west,fill=green!20,draw,thick,rounded corners=0.3em] (t1) at (0, -1.5) {\footnotesize{There is}};
\path[<->, thick] (s2.south) edge (t1.north);
}
{
\node[anchor=west,fill=gray!20] (t2) at ([xshift=1em]t1.east) {\footnotesize{an apple}};
\node[anchor=west,fill=green!20,draw,thick,rounded corners=0.3em] (t2) at ([xshift=1em]t1.east) {\footnotesize{an apple}};
\path[<->, thick] (s3.south) edge (t2.north);
}
{
\node[anchor=west,fill=red!20] (t3) at ([xshift=1em]t2.east) {\footnotesize{on the table}};
\node[anchor=west,fill=red!20,draw,thick,rounded corners=0.3em] (t3) at ([xshift=1em]t2.east) {\footnotesize{on the table}};
\path[<->, thick] (s1.south) edge (t3.north);
}
\node[anchor=north] (l) at ([xshift=7em,yshift=-0.5em]t0.south) {\small{(d)\ 找到译文第三个词}};
......
......@@ -87,9 +87,9 @@
\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (lc3) at (c11) {};
\begin{pgfonlayer}{background}
\node [rectangle,draw=ugreen,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (a03) (a11)] (phrase1) {};
\node [rectangle,draw=green,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (a03) (a11)] (phrase1) {};
\node [rectangle,draw=red,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (b03) (b12)] (phrase2) {};
\node [rectangle,draw=ugreen,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (c03) (c21)] (phrase3) {};
\node [rectangle,draw=green,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (c03) (c21)] (phrase3) {};
\end{pgfonlayer}
\end{scope}
......
......@@ -6,17 +6,17 @@
\begin{scope}[minimum height = 18pt]
{\small
\node[anchor=north,fill=green!20] (s1) at (0,0) {进口};
\node [anchor=north,fill=red!20] (s2) at ([xshift=4em,yshift=0em]s1.north) {大幅度};
\node[anchor=north,fill=blue!20] (s3) at ([xshift=4.5em,yshift=0em]s2.north) {下降 了};
\node[anchor=north,fill=green!20,draw,thick,rounded corners=0.3em] (s1) at (0,0) {进口};
\node [anchor=north,fill=red!20,draw,thick,rounded corners=0.3em] (s2) at ([xshift=4em,yshift=0em]s1.north) {大幅度};
\node[anchor=north,fill=blue!20,draw,thick,rounded corners=0.3em] (s3) at ([xshift=4.5em,yshift=0em]s2.north) {下降 了};
\node[anchor=west,fill=green!20] (t1) at ([xshift=0em,yshift=-4em]s1.west) {The imports have};
\node[anchor=north,fill=red!20] (t2) at ([xshift=8em,yshift=0em]t1.north) {drastically};
\node[anchor=north,fill=blue!20] (t3) at ([xshift=5.7em,yshift=0em]t2.north) {fallen};
\node[anchor=west,fill=green!20,draw,thick,rounded corners=0.3em] (t1) at ([xshift=0em,yshift=-4em]s1.west) {The imports have};
\node[anchor=north,fill=red!20,draw,thick,rounded corners=0.3em] (t2) at ([xshift=8em,yshift=0em]t1.north) {drastically};
\node[anchor=north,fill=blue!20,draw,thick,rounded corners=0.3em] (t3) at ([xshift=5.7em,yshift=0em]t2.north) {fallen};
\path[<->, thick] (s1.south) edge (t1.north);
\path[<->, thick] (s2.south) edge (t2.north);
\path[<->, thick] (s3.south) edge (t3.north);
\path[<->, thick] ([yshift=-0.1em]s1.south) edge ([yshift=0.1em]t1.north);
\path[<->, thick] ([yshift=-0.1em]s2.south) edge ([yshift=0.1em]t2.north);
\path[<->, thick] ([yshift=-0.1em]s3.south) edge ([yshift=0.1em]t3.north);
}
\node[anchor=south] (s0) at ([xshift=-2em,yshift=0em]s1.south) {$\seq{s}$};
......
......@@ -66,10 +66,10 @@
}
\begin{pgfonlayer}{background}
{
\node [rectangle,inner sep=0.1em,fill=ugreen!10] [fit = (line1)] (box1) {};
\node [rectangle,inner sep=0.1em,fill=green!10,draw,thick,rounded corners=0.3em] [fit = (line1)] (box1) {};
}
{
\node [rectangle,inner sep=0.1em,fill=red!10] [fit = (line2) (line3)] (box2) {};
\node [rectangle,inner sep=0.1em,fill=red!10,draw,thick,rounded corners=0.3em] [fit = (line2) (line3)] (box2) {};
}
\end{pgfonlayer}
......
......@@ -39,7 +39,7 @@
\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la4) at (a41) {};
\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la5) at (a30) {};
\node[anchor=west] (f1) at ([xshift=3em,yshift=0.8em]a43.east) {\small{$\funp{P}_{\textrm{lex}}(\bar{t}|\bar{s})=\sigma (t_1|s_1)\times$}};
\node[anchor=west] (f1) at ([xshift=4em,yshift=0.8em]a43.east) {\small{$\funp{P}_{\textrm{lex}}(\bar{t}|\bar{s})=\sigma (t_1|s_1)\times$}};
\node[anchor=north] (f2) at ([xshift=5.2em]f1.south) {\small{$\frac{1}{2}(\sigma (t_2|s_2)+\sigma (t_3|s_2))\times$}};
\node[anchor=north west] (f3) at (f2.south west) {\small{$\sigma (N|s_3)\times$}};
\node[anchor=north west] (f4) at (f3.south west) {\small{$\sigma (t_4|s_4)\times$}};
......
......@@ -6,20 +6,20 @@
\begin{scope}[minimum height = 18pt]
{\small
\node[anchor=north,fill=green!20] (s1) at (0,0) {进口};
\node [anchor=west,fill=red!20] (s2) at ([xshift=1em,yshift=0em]s1.east) {大幅度};
\node[anchor=west,fill=blue!20] (s3) at ([xshift=1em,yshift=0em]s2.east) {下降\ \ \ };
\node[anchor=north,fill=green!20,draw,thick,rounded corners=0.3em] (s1) at (0,0) {进口};
\node [anchor=west,fill=red!20,draw,thick,rounded corners=0.3em] (s2) at ([xshift=1em,yshift=0em]s1.east) {大幅度};
\node[anchor=west,fill=blue!20,draw,thick,rounded corners=0.3em] (s3) at ([xshift=1em,yshift=0em]s2.east) {下降\ \ \ };
\node[anchor=west,fill=green!20] (t1) at ([xshift=0em,yshift=-4em]s1.west) {The imports have};
\node[anchor=west,fill=red!20] (t2) at ([xshift=1em,yshift=0em]t1.east) {drastically};
\node[anchor=west,fill=blue!20] (t3) at ([xshift=1em,yshift=0em]t2.east) {fallen};
\node[anchor=west,fill=green!20,draw,thick,rounded corners=0.3em] (t1) at ([xshift=0em,yshift=-4em]s1.west) {The imports have};
\node[anchor=west,fill=red!20,draw,thick,rounded corners=0.3em] (t2) at ([xshift=1em,yshift=0em]t1.east) {drastically};
\node[anchor=west,fill=blue!20,draw,thick,rounded corners=0.3em] (t3) at ([xshift=1em,yshift=0em]t2.east) {fallen};
\path[<->, thick] (s1.south) edge (t1.north);
\path[<->, thick] (s2.south) edge (t2.north);
\path[<->, thick] (s3.south) edge (t3.north);
\path[<->, thick] ([yshift=-0.1em]s1.south) edge ([yshift=0.1em]t1.north);
\path[<->, thick] ([yshift=-0.1em]s2.south) edge ([yshift=0.1em]t2.north);
\path[<->, thick] ([yshift=-0.1em]s3.south) edge ([yshift=0.1em]t3.north);
}
\node[anchor=south] (s0) at ([xshift=-3em,yshift=0em]s1.south) {源语言:};
\node[anchor=south] (s0) at ([xshift=-3.5em,yshift=0em]s1.south) {源语言:};
\node[anchor=east] (t0) at ([xshift=0em,yshift=-3.5em]s0.east) {目标语言:};
\end{scope}
......
......@@ -60,7 +60,7 @@
\end{scope}
\begin{scope}[xshift = 1.5in, yshift = 1.3in]
\begin{scope}[xshift = 1.7in, yshift = 1.3in]
{\scriptsize
\node (rules) {\textbf{抽取得到的短语:}};
\draw[-] (rules.south west)--([xshift=2.0in]rules.south west);
......
......@@ -50,19 +50,19 @@
{
\node [anchor=west] (p1line1) at ([xshift=4em,yshift=1em]a75.east) {\footnotesize{$\bar{s}_i$: 天气\ \ \ \ \ \ }};
\node [anchor=west] (p1line1) at ([xshift=5em,yshift=1em]a75.east) {\footnotesize{$\bar{s}_i$: 天气\ \ \ \ \ \ }};
\node [anchor=north west] (p1line2) at ([xshift=0]p1line1.south west) {\footnotesize{$\bar{t}_i$: The\ \ \ weather\ \ \ \ \ }};
\node [anchor=west] (p2line1) at ([xshift=4em]a72.east) {\footnotesize{$\bar{s}_j$: 真\ \ \ \ \ }};
\node [anchor=west] (p2line1) at ([xshift=5em]a72.east) {\footnotesize{$\bar{s}_j$: 真\ \ \ \ \ }};
\node [anchor=north west] (p2line2) at ([xshift=0]p2line1.south west) {\footnotesize{$\bar{t}_j$: very\ \ \ good\ \ \ \ \ \ \ \ }};
\node [anchor=east] (p2line3) at ([xshift=0em,yshift=-4em]p1line2.east) {};
\begin{pgfonlayer}{background}
{
\node [rectangle,draw=red,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (a04) (a14)] (phrase1) {};
\node [rectangle,draw=ugreen,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (a33) (a42)] (phrase2) {};
\node [rectangle,inner sep=0.2em,fill=red!10] [fit = (p1line1) (p1line2)] (box1) {};
\node [rectangle,inner sep=0.2em,fill=green!10] [fit = (p2line1) (p2line2) (p2line3)] (box2) {};
\node [rectangle,draw=green,thick,inner sep=0.4em,fill=white,drop shadow] [fit = (a33) (a42)] (phrase2) {};
\node [rectangle,inner sep=0.2em,fill=red!15,draw=red,thick] [fit = (p1line1) (p1line2)] (box1) {};
\node [rectangle,inner sep=0.2em,fill=green!15,draw=green,thick] [fit = (p2line1) (p2line2) (p2line3)] (box2) {};
}
\end{pgfonlayer}
......
......@@ -4,9 +4,9 @@
\begin{tikzpicture}
\begin{scope}
\tikzstyle{datanode} = [minimum width=7em,minimum height=1.7em,fill=red!20,rounded corners=0.3em];
\tikzstyle{modelnode} = [minimum width=7em,minimum height=1.7em,fill=blue!20,rounded corners=0.3em];
\tikzstyle{decodingnode} = [minimum width=7em,minimum height=1.7em,fill=green!20,rounded corners=0.3em];
\tikzstyle{datanode} = [minimum width=7em,minimum height=1.7em,fill=red!20,rounded corners=0.3em,draw,thick];
\tikzstyle{modelnode} = [minimum width=7em,minimum height=1.7em,fill=blue!20,rounded corners=0.3em,draw,thick];
\tikzstyle{decodingnode} = [minimum width=7em,minimum height=1.7em,fill=green!20,rounded corners=0.3em,draw,thick];
\node [datanode,anchor=north west,minimum height=1.7em,minimum width=8em] (bitext) at (0,0) {{ \small{训练用双语数据}}};
\node [modelnode,anchor=north,minimum height=1.7em,minimum width=8em] (phrase) at ([yshift=-1.5em]bitext.south) {{ \small{短语抽取及打分}}};
......@@ -20,16 +20,16 @@
\node [decodingnode,anchor=north,minimum height=1.7em,minimum width=8em] (decoding) at ([yshift=-2em]reordertable.south) {{ \small{解码器}}};
\draw [->,very thick] ([yshift=-0.1em]bitext.south) -- ([yshift=0.1em]phrase.north);
\draw [->,very thick] (bitext.south east) -- ([yshift=0.1em]reorder.north west);
\draw [->,very thick] ([yshift=-0.1em]monotext.south) -- ([yshift=0.1em]lm.north);
\draw [->,very thick] ([yshift=-0.1em]phrase.south) -- ([yshift=0.1em]phrasetable.north);
\draw [->,very thick] ([yshift=-0.1em]reorder.south) -- ([yshift=0.1em]reordertable.north);
\draw [->,very thick] ([yshift=-0.1em]lm.south) -- ([yshift=0.1em]lmtable.north);
\draw [->,thick] ([yshift=-0.1em]bitext.south) -- ([yshift=0.1em]phrase.north);
\draw [->,thick] (bitext.south east) -- ([yshift=0.1em]reorder.north west);
\draw [->,thick] ([yshift=-0.1em]monotext.south) -- ([yshift=0.1em]lm.north);
\draw [->,thick] ([yshift=-0.1em]phrase.south) -- ([yshift=0.1em]phrasetable.north);
\draw [->,thick] ([yshift=-0.1em]reorder.south) -- ([yshift=0.1em]reordertable.north);
\draw [->,thick] ([yshift=-0.1em]lm.south) -- ([yshift=0.1em]lmtable.north);
\draw [->,very thick] ([yshift=-0.1em]phrasetable.south east) -- ([yshift=0.1em,xshift=-3em]decoding.north);
\draw [->,very thick] ([yshift=-0.1em]reordertable.south) -- ([yshift=0.1em,xshift=0em]decoding.north);
\draw [->,very thick] ([yshift=-0.1em]lmtable.south west) -- ([yshift=0.1em,xshift=3em]decoding.north);
\draw [->,thick] ([yshift=-0.1em]phrasetable.south east) -- ([yshift=0.1em,xshift=-3em]decoding.north);
\draw [->,thick] ([yshift=-0.1em]reordertable.south) -- ([yshift=0.1em,xshift=0em]decoding.north);
\draw [->,thick] ([yshift=-0.1em]lmtable.south west) -- ([yshift=0.1em,xshift=3em]decoding.north);
\end{scope}
\end{tikzpicture}
......
......@@ -6,17 +6,17 @@
\begin{scope}[minimum height = 20pt]
\node[anchor=east] (s0) at (-0.5em, 0) {$\seq{s}$};
\node[anchor=west,fill=green!20] (s1) at (0, 0) {\small{\ \ 桌子\ \ \ \ \;}};
\node[anchor=west,fill=green!20,draw,thick,rounded corners=0.3em] (s1) at (0, 0) {\small{\ \ 桌子\ \ \ \ \;}};
\node[anchor=south] (n1) at ([xshift=-2.5em,yshift=-0.5em]s1.north) {\small{1}};
\node[anchor=south] (n2) at ([xshift=-0.7em,yshift=-0.5em]s1.north) {\small{2}};
\node[anchor=south] (n3) at ([xshift=1.2em,yshift=-0.5em]s1.north) {\small{3}};
\node[anchor=south] (n4) at ([xshift=2.7em,yshift=-0.5em]s1.north) {\small{4}};
\node[anchor=west,fill=red!20] (s2) at ([xshift=1em]s1.east) {\small{苹果}};
\node[anchor=west,fill=red!20,draw,thick,rounded corners=0.3em] (s2) at ([xshift=1em]s1.east) {\small{苹果}};
\node[anchor=south] (n5) at ([yshift=-0.5em]s2.north) {\small{5}};
\node[anchor=east] (t0) at (-0.5em, -1.5) {$\seq{t}$};
\node[anchor=west,fill=red!20] (t1) at (0, -1.5) {\small{the apple}};
\node[anchor=west,fill=green!20] (t2) at ([xshift=1.3em]t1.east) {\small{on the table}};
\node[anchor=west,fill=red!20,draw,thick,rounded corners=0.3em] (t1) at (0, -1.5) {\small{the apple}};
\node[anchor=west,fill=green!20,draw,thick,rounded corners=0.3em] (t2) at ([xshift=1.3em]t1.east) {\small{on the table}};
\path[<->, thick] (s1.south) edge (t2.north);
\path[<->, thick] (s2.south) edge (t1.north);
......@@ -34,8 +34,8 @@
\node[anchor=north] (d1) at ([xshift=-0.1em,yshift=-0.1em]distance.south) {+4};
\node[anchor=north] (d2) at ([yshift=-1.8em]d1.south) {-5};
\node[anchor=north west,fill=blue!20] (m1) at ([xshift=-1em,yshift=-0.0em]t1.south west) {\small{$\rm{start}_1\ \ -\ \ \rm{end}_{0}\ \ -\ \ 1$\quad =\quad 5\ -\ 0\ -\ 1}};
\node[anchor=north west,fill=blue!20] (m2) at ([xshift=-1em,yshift=-0.0em]t2.south west) {\small{$\rm{start}_2\ \ -\ \ \rm{end}_{1}\ \ -\ \ 1$\quad =\quad 1\ -\ 5\ -\ 1}};
\node[anchor=north west,fill=blue!20,draw,thick,rounded corners=0.3em] (m1) at ([xshift=-1em,yshift=-0.0em]t1.south west) {\small{$\rm{start}_1\ \ -\ \ \rm{end}_{0}\ \ -\ \ 1$\quad =\quad 5\ -\ 0\ -\ 1}};
\node[anchor=north west,fill=blue!20,draw,thick,rounded corners=0.3em] (m2) at ([xshift=-1em,yshift=-0.0em]t2.south west) {\small{$\rm{start}_2\ \ -\ \ \rm{end}_{1}\ \ -\ \ 1$\quad =\quad 1\ -\ 5\ -\ 1}};
\draw[-] ([xshift=0.08in]target.south west)--([xshift=2.4in]target.south west);
......
......@@ -6,15 +6,15 @@
\begin{scope}[minimum height = 20pt]
\node[anchor=east] (s0) at (-0.5em, 0) {$\seq{s}$};
\node[anchor=west,fill=green!20] (s1) at (0, 0) {\footnotesize{在 桌子 上 的}};
\node[anchor=west,fill=red!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{苹果}};
\node[anchor=west,fill=green!20,draw,thick,rounded corners=0.3em] (s1) at (0, 0) {\footnotesize{在 桌子 上 的}};
\node[anchor=west,fill=red!20,draw,thick,rounded corners=0.3em] (s2) at ([xshift=2em]s1.east) {\footnotesize{苹果}};
\node[anchor=east] (t0) at (-0.5em, -1.5) {$\seq{t}$};
\node[anchor=west,fill=red!20] (t1) at (0, -1.5) {\footnotesize{the apple}};
\node[anchor=west,fill=green!20] (t2) at ([xshift=1em]t1.east) {\footnotesize{on the table}};
\node[anchor=west,fill=red!20,draw,thick,rounded corners=0.3em] (t1) at (0, -1.5) {\footnotesize{the apple}};
\node[anchor=west,fill=green!20,draw,thick,rounded corners=0.3em] (t2) at ([xshift=2em]t1.east) {\footnotesize{on the table}};
\path[<->, thick] (s1.south) edge (t2.north);
\path[<->, thick] (s2.south) edge (t1.north);
\path[<->, thick] ([yshift=-0.1em]s1.south) edge ([yshift=0.1em]t2.north);
\path[<->, thick] ([yshift=-0.1em]s2.south) edge ([yshift=0.1em]t1.north);
\end{scope}
\end{tikzpicture}
......
......@@ -27,7 +27,7 @@
\node[anchor=north] (label3) at ([xshift=0em,yshift=-2.5em]label2.north) {取值};
}
\node[anchor=north] (l1) at ([xshift=0em,yshift=-2.5em]x3.south) {\small{(a)搜索空间}};
\node[anchor=north] (l1) at ([xshift=0em,yshift=-1.0em]x3.south) {\small{(a)搜索空间}};
\end{scope}
\begin{scope}[scale=0.55,xshift=3.2in]
......@@ -68,7 +68,7 @@
\node[anchor=north] (e4) at ([xshift=0,yshift=-0.2em]e3.south) {$w_M = 1.00$};
}
\node[anchor=north] (l1) at ([xshift=0em,yshift=-2.5em]x3.south) {\small{(b)一条搜索路径}};
\node[anchor=north] (l1) at ([xshift=0em,yshift=-1.0em]x3.south) {\small{(b)一条搜索路径}};
\end{scope}
\begin{scope}[scale=0.55,xshift=6.8in]
......@@ -119,6 +119,6 @@
\node[anchor=north] (label2) at ([xshift=0em,yshift=-2.5em]label1.north) {种组合};
}
\node[anchor=north] (l1) at ([xshift=0em,yshift=-2.5em]x3.south) {\small{(c)多条搜索路径}};
\node[anchor=north] (l1) at ([xshift=0em,yshift=-1.0em]x3.south) {\small{(c)多条搜索路径}};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
......@@ -55,14 +55,11 @@
\node [anchor=west] (p1line1) at ([xshift=3.5em,yshift=0.5em]a75.east) {\footnotesize{M(monotone):单调调序}};
\node [anchor=north west] (p1line2) at ([xshift=0,yshift=-1em]p1line1.south west) {\footnotesize{S(swap): 与前面一个短语}};
\node [anchor=north west] (p1line3) at ([xshift=3.8em]p1line2.south west) {\footnotesize{位置进行交换}};
\node [anchor=north west] (p1line4) at ([xshift=-3.5em,yshift=-1em]p1line3.south west) {\footnotesize{D(discontinuous):非连续调序}};
\node [anchor=north west] (p1line4) at ([xshift=-3.8em,yshift=-1em]p1line3.south west) {\footnotesize{D(discontinuous):非连续调序}};
\node [anchor=east] (p1line5) at ([xshift=0em,yshift=3em]p1line4.east) {};
\node [anchor=east] (p1line6) at ([xshift=0em,yshift=7em]p1line4.east) {};
%\node [anchor=west] (p2line1) at ([xshift=4em]a73.east) {\footnotesize{$\bar{s}_j$: 真\ \ \ 好 \ \ }};
%\node [anchor=north west] (p2line2) at ([xshift=0]p2line1.south west) {\footnotesize{$\bar{t}_j$: very\ \ \ good\ \ \ \ \ \ \ \ }};
\begin{pgfonlayer}{background}
{
\node [rectangle,thick,inner sep=0.3em,fill=blue!40,drop shadow,fill opacity=0.85] [fit = (a05)] (phrase1) {};
......@@ -70,9 +67,9 @@
\node [rectangle,thick,inner sep=0.3em,fill=blue!40,drop shadow,fill opacity=0.85] [fit = (a31) (a32)] (phrase3) {};
\node [rectangle,thick,inner sep=0.3em,fill=blue!40,drop shadow,fill opacity=0.85] [fit = (a43) (a63)] (phrase4) {};
\node [rectangle,thick,inner sep=0.3em,fill=blue!40,drop shadow,fill opacity=0.85] [fit = (a70)] (phrase5) {};
\node [rectangle,inner sep=0.2em,fill=red!10] [fit = (p1line1) (p1line6)] (box1) {};
\node [rectangle,inner sep=0.2em,fill=ugreen!10] [fit = (p1line2) (p1line3) (p1line5)] (box2) {};
\node [rectangle,inner sep=0.2em,fill=orange!10] [fit = (p1line4)] (box3) {};
\node [rectangle,inner sep=0.2em,fill=red!10,draw,thick,rounded corners=0.3em] [fit = (p1line1) (p1line6)] (box1) {};
\node [rectangle,inner sep=0.2em,fill=green!10,draw,thick,rounded corners=0.3em] [fit = (p1line2) (p1line3) (p1line5)] (box2) {};
\node [rectangle,inner sep=0.2em,fill=orange!10,draw,thick,rounded corners=0.3em] [fit = (p1line4)] (box3) {};
}
\end{pgfonlayer}
......
......@@ -41,18 +41,18 @@
\node[tgtnode] (tgt7) at ([yshift=-0.5*1.0cm]tgt6.north east) {\scriptsize{?}};
\node[tgtnode] (tgt8) at ([yshift=-0.5*1.0cm]tgt7.north east) {\scriptsize{EOS}};
\node [anchor=west] (p1line1) at ([xshift=4em,yshift=1em]a57.east) {\footnotesize{$\bar{s}_i$: 什么\ \ \ \ \ \ }};
\node [anchor=west] (p1line1) at ([xshift=5em,yshift=1em]a57.east) {\footnotesize{$\bar{s}_i$: 什么\ \ \ \ \ \ }};
\node [anchor=north west] (p1line2) at ([xshift=0]p1line1.south west) {\footnotesize{$\bar{t}_i$: learned\ \ \ nothing\ \ \ ? \ \ \ \ \ \ \ \ \ \ \ \ }};
\node [anchor=west] (p2line1) at ([xshift=4em]a53.east) {\footnotesize{$\bar{s}_j$: 到\ \ \ ?}};
\node [anchor=west] (p2line1) at ([xshift=5em]a53.east) {\footnotesize{$\bar{s}_j$: 到\ \ \ ?}};
\node [anchor=north west] (p2line2) at ([xshift=0]p2line1.south west) {\footnotesize{$\bar{t}_j$: Have\ \ \ you\ \ \ learned\ \ \ nothing}};
\node [anchor=east] (p1line3) at ([xshift=0em,yshift=2.9cm]p2line2.east) {};
\begin{pgfonlayer}{background}
\node [rectangle,draw=red,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (a26) (a44)] (phrase1) {};
\node [rectangle,draw=ugreen,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (a01) (a32)] (phrase2) {};
\node [rectangle,inner sep=0.2em,fill=red!10] [fit = (p1line1) (p1line2) (p1line3)] (box1) {};
\node [rectangle,inner sep=0.2em,fill=green!10] [fit = (p2line1) (p2line2)] (box2) {};
\node [rectangle,draw=green,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (a01) (a32)] (phrase2) {};
\node [rectangle,inner sep=0.2em,fill=red!15,draw=red,thick] [fit = (p1line1) (p1line2) (p1line3)] (box1) {};
\node [rectangle,inner sep=0.2em,fill=green!15,draw=green,thick] [fit = (p2line1) (p2line2)] (box2) {};
\end{pgfonlayer}
\draw [->,thick,dotted] ([yshift=-0.8em]phrase1.east) .. controls +(east:1.5) and +(west:1) .. (box1.west);
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......
......@@ -28,7 +28,7 @@
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=blue!10!white] [fit = (c1) (c21) (c3) (c6) (c7) (c8) (c11)] (gl1) {};
\node [rectangle,draw=ublue,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (c1) (c21) (c3) (c6) (c7) (c8) (c11)] (gl1) {};
\end{pgfonlayer}
\end{scope}
......
......@@ -4,23 +4,23 @@
\begin{tikzpicture}
\begin{scope}
\tikzstyle{cnode} = [minimum width=7.0em,minimum height=2.5em,rounded corners=0.2em];
\tikzstyle{xnode} = [minimum width=4.5em,minimum height=2.5em,rounded corners=0.2em];
\tikzstyle{cnode} = [minimum width=7.0em,minimum height=2.5em,rounded corners=0.2em,draw,thick];
\tikzstyle{xnode} = [minimum width=4.5em,minimum height=2.5em,rounded corners=0.2em,draw,thick];
\node[cnode,anchor=south,minimum width=10.0em,fill=green!25,align=center] (cat0) at (0,0) {\footnotesize{(广义上)}\\\footnotesize{基于句法的模型}};
\node[cnode,anchor=north,fill=red!25,align=left] (cat1) at ([xshift=-6.5em,yshift=-2em]cat0.south) {\footnotesize{基于形式文法}\\\footnotesize{的模型}};
\node[cnode,anchor=north,fill=blue!25,align=left] (cat2) at ([xshift=6.5em,yshift=-2em]cat0.south) {\footnotesize{基于语言学}\\\footnotesize{句法的模型}};
\node[cnode,anchor=north,fill=red!25,align=left] (cat1) at ([xshift=-7.5em,yshift=-2em]cat0.south) {\footnotesize{基于形式文法}\\\footnotesize{的模型}};
\node[cnode,anchor=north,fill=blue!25,align=left] (cat2) at ([xshift=7.5em,yshift=-2em]cat0.south) {\footnotesize{基于语言学}\\\footnotesize{句法的模型}};
\node[xnode,anchor=north,fill=red!25,align=left] (itg) at ([xshift=-2.5em,yshift=-2.0em]cat1.south) {\footnotesize{反向转录}\\\footnotesize{文法}};
\node[xnode,anchor=north,fill=red!25,align=left] (hiero) at ([xshift=2.5em,yshift=-2.0em]cat1.south) {\footnotesize{层次短语}\\\footnotesize{模型}};
\node[xnode,anchor=north,fill=blue!25,align=left] (s2t) at ([xshift=-5.0em,yshift=-2.0em]cat2.south) {\footnotesize{串到树}\\\footnotesize{模型}};
\node[xnode,anchor=north,fill=red!25,align=left] (itg) at ([xshift=-3.5em,yshift=-2.0em]cat1.south) {\footnotesize{反向转录}\\\footnotesize{文法}};
\node[xnode,anchor=north,fill=red!25,align=left] (hiero) at ([xshift=3.5em,yshift=-2.0em]cat1.south) {\footnotesize{层次短语}\\\footnotesize{模型}};
\node[xnode,anchor=north,fill=blue!25,align=left] (s2t) at ([xshift=-5.5em,yshift=-2.0em]cat2.south) {\footnotesize{串到树}\\\footnotesize{模型}};
\node[xnode,anchor=north,fill=blue!25,align=left] (t2s) at ([xshift=0.0em,yshift=-2.0em]cat2.south) {\footnotesize{树到串}\\\footnotesize{模型}};
\node[xnode,anchor=north,fill=blue!25,align=left] (t2t) at ([xshift=5.0em,yshift=-2.0em]cat2.south) {\footnotesize{树到树}\\\footnotesize{模型}};
\node[xnode,anchor=north,fill=blue!25,align=left] (t2t) at ([xshift=5.5em,yshift=-2.0em]cat2.south) {\footnotesize{树到树}\\\footnotesize{模型}};
\draw [-,thick] ([yshift=0.1em,xshift=1em]cat0.south) -- ([xshift=-1.5em,yshift=-0.1em]cat2.north);
\draw [-,thick] ([yshift=0.1em,xshift=-1em]cat0.south) -- ([xshift=1.5em,yshift=-0.1em]cat1.north);
\draw [-,thick] ([yshift=-0.1em,xshift=1em]cat0.south) -- ([xshift=-1.5em,yshift=0.1em]cat2.north);
\draw [-,thick] ([yshift=-0.1em,xshift=-1em]cat0.south) -- ([xshift=1.5em,yshift=0.1em]cat1.north);
\draw [-,thick] ([yshift=0.1em]itg.north) -- ([xshift=-0.5em,yshift=-0.1em]cat1.south);
\draw [-,thick] ([yshift=0.1em]hiero.north) -- ([xshift=0.5em,yshift=-0.1em]cat1.south);
\draw [-,thick] ([yshift=0.1em]s2t.north) -- ([xshift=-0.8em,yshift=-0.1em]cat2.south);
......
......@@ -39,7 +39,7 @@
}
\end{scope}
\begin{scope}[xshift = 1.8in, yshift = 0.1in]
\begin{scope}[xshift = 2.0in, yshift = 0.1in]
\node (rules) {\textbf{层次短语翻译规则:}};
\draw[-] (rules.south west)--([xshift=1.8in]rules.south west);
......
......@@ -6,16 +6,16 @@
\Tree[.S [.NN ] [.VP [.AD ] [.VP [.VV ] [.AS ] ] ] ]
\end{scope}
\begin{scope}[xshift=1.25in,yshift=0.25in]
\begin{scope}[xshift=1.5in,yshift=0.25in]
\node [anchor=north west,align=left] (string1) at (0,0) {[S \\\hspace{1em}NN\\\hspace{1em}VP[\\\hspace{2.5em}AD \\\hspace{2.5em}VP[\\\hspace{4em}VV \\\hspace{4em}AS]]]};
\end{scope}
\begin{scope}[xshift=2.78in,yshift=-0.80in]
\begin{scope}[xshift=3.3in,yshift=-0.80in]
\node [anchor=west,align=left] (string2) at (0,0) {(S NN VP(AD \\ VP(VV AS)))};
\end{scope}
\node [anchor=north west] (cap1) at (-1.5em,-1in) {{(a) 树状表示}};
\node [anchor=west] (cap2) at ([xshift=0.5in]cap1.east) {{(b) 序列表示(缩进)}};
\node [anchor=west] (cap3) at ([xshift=0.3in]cap2.east) {{(c) 序列表示}};
\node [anchor=west] (cap2) at ([xshift=0.7in]cap1.east) {{(b) 序列表示(缩进)}};
\node [anchor=west] (cap3) at ([xshift=0.7in]cap2.east) {{(c) 序列表示}};
}
\end{tikzpicture}
\ No newline at end of file
......@@ -4,9 +4,9 @@
\begin{tikzpicture}
\begin{scope}
\tikzstyle{datanode} = [minimum width=7em,minimum height=1.7em,fill=blue!20,rounded corners=0.3em];
\tikzstyle{modelnode} = [minimum width=7em,minimum height=1.7em,fill=red!20,rounded corners=0.3em];
\tikzstyle{decodingnode} = [minimum width=7em,minimum height=1.7em,fill=green!20,rounded corners=0.3em];
\tikzstyle{datanode} = [minimum width=7em,minimum height=1.7em,fill=blue!20,rounded corners=0.3em,draw,thick];
\tikzstyle{modelnode} = [minimum width=7em,minimum height=1.7em,fill=red!20,rounded corners=0.3em,draw,thick];
\tikzstyle{decodingnode} = [minimum width=7em,minimum height=1.7em,fill=green!20,rounded corners=0.3em,draw,thick];
\node [datanode,anchor=north west,minimum height=1.7em,minimum width=8em] (bitext) at (0,0) {{ \small{训练用双语数据}}};
\node [modelnode, anchor=north west,minimum height=1.7em,minimum width=8em] (gi) at ([xshift=2em,yshift=-0.2em]bitext.south east) {{ \small{文法(规则)抽取}}};
......@@ -21,18 +21,18 @@
\node [modelnode,anchor=south west,minimum height=1.7em,minimum width=8em] (lm) at ([xshift=2em,yshift=0.2em]tuning.north east) {{ \small{$n$-gram语言建模}}};
\node [datanode,anchor=south west,minimum height=1.7em,minimum width=8em] (lmmodel) at ([xshift=2em,yshift=0.2em]decoding.north east) {{ \small{语言模型}}};
\draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]bitext.east) -- ([yshift=0.1em]gi.north west);
\draw [->,very thick] ([yshift=-0.1em]gi.south west) -- ([yshift=0.3em,xshift=0.1em]birules.east);
\draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]birules.east) -- ([yshift=0.1em]training.north west);
\draw [->,very thick] ([yshift=-0.1em]training.south west) -- ([yshift=0.3em,xshift=0.1em]model.east);
\draw [->,very thick] ([yshift=-0.3em,xshift=0.1em]model.east) -- ([yshift=0.1em]tuning.north west);
\draw [->,very thick] ([yshift=-0.1em]tuning.south) -- ([yshift=0.1em]decoding.north);
\draw [->,very thick] ([yshift=0.3em,xshift=0.1em]tuningdata.east) -- ([yshift=-0.1em]tuning.south west);
\draw [->,thick] ([yshift=-0.3em,xshift=0.1em]bitext.east) -- ([yshift=0.1em]gi.north west);
\draw [->,thick] ([yshift=-0.1em]gi.south west) -- ([yshift=0.3em,xshift=0.1em]birules.east);
\draw [->,thick] ([yshift=-0.3em,xshift=0.1em]birules.east) -- ([yshift=0.1em]training.north west);
\draw [->,thick] ([yshift=-0.1em]training.south west) -- ([yshift=0.3em,xshift=0.1em]model.east);
\draw [->,thick] ([yshift=-0.3em,xshift=0.1em]model.east) -- ([yshift=0.1em]tuning.north west);
\draw [->,thick] ([yshift=-0.1em]tuning.south) -- ([yshift=0.1em]decoding.north);
\draw [->,thick] ([yshift=0.3em,xshift=0.1em]tuningdata.east) -- ([yshift=-0.1em]tuning.south west);
\draw [->,very thick] ([yshift=-0.1em]monotext.south) -- ([yshift=0.1em]lm.north);
\draw [->,very thick] ([yshift=-0.1em]lm.south) -- ([yshift=0.1em]lmmodel.north);
\draw [->,very thick] ([yshift=0.3em,xshift=-0.1em]lmmodel.west) -- ([yshift=-0.1em]tuning.south east);
\draw [->,very thick] ([yshift=-0.3em,xshift=-0.1em]lmmodel.west) -- ([yshift=0.1em]decoding.north east);
\draw [->,thick] ([yshift=-0.1em]monotext.south) -- ([yshift=0.1em]lm.north);
\draw [->,thick] ([yshift=-0.1em]lm.south) -- ([yshift=0.1em]lmmodel.north);
\draw [->,thick] ([yshift=0.3em,xshift=-0.1em]lmmodel.west) -- ([yshift=-0.1em]tuning.south east);
\draw [->,thick] ([yshift=-0.3em,xshift=-0.1em]lmmodel.west) -- ([yshift=0.1em]decoding.north east);
\end{scope}
\end{tikzpicture}
\end{center}
......@@ -32,8 +32,8 @@
\begin{pgfonlayer}{background}
{
\node [rectangle,draw=red,inner sep=1pt,thick,fill=white,drop shadow] [fit = (rule1s1) (rule1s4)] (rule1s) {};
\node [rectangle,draw=red,inner sep=2pt,thick,fill=white,drop shadow] [fit = (rule1t1) (rule1t4)] (rule1t) {};
\node [rectangle,draw=red,inner sep=1pt,thick,fill=white,drop shadow,rounded corners=0.3em] [fit = (rule1s1) (rule1s4)] (rule1s) {};
\node [rectangle,draw=red,inner sep=2pt,thick,fill=white,drop shadow,rounded corners=0.3em] [fit = (rule1t1) (rule1t4)] (rule1t) {};
\draw [<->,dotted,thick,red] ([xshift=0.1em]rule1s.east) -- ([xshift=-0.1em]rule1t.west);
}
\end{pgfonlayer}
......@@ -70,8 +70,8 @@
\begin{pgfonlayer}{background}
{
\node [rectangle,draw=blue,inner sep=4pt,thick,fill=white,drop shadow] [fit = (rule2s1) (rule2s3)] (rule2s) {};
\node [rectangle,draw=blue,inner sep=4pt,thick,fill=white,drop shadow] [fit = (rule2t1) (rule2t3)] (rule2t) {};
\node [rectangle,draw=blue,inner sep=4pt,thick,fill=white,drop shadow,rounded corners=0.3em] [fit = (rule2s1) (rule2s3)] (rule2s) {};
\node [rectangle,draw=blue,inner sep=4pt,thick,fill=white,drop shadow,rounded corners=0.3em] [fit = (rule2t1) (rule2t3)] (rule2t) {};
\draw [<->,dotted,thick,blue] ([xshift=0.1em]rule2s.east) -- ([xshift=-0.1em]rule2t.west);
}
\end{pgfonlayer}
......@@ -92,8 +92,8 @@
\begin{pgfonlayer}{background}
{
\node [rectangle,draw=ugreen,inner sep=1pt,thick,fill=white,drop shadow] [fit = (rule3s1) (rule3s2)] (rule3s) {};
\node [rectangle,draw=ugreen,inner sep=2pt,thick,fill=white,drop shadow] [fit = (rule3t1) (rule3t2)] (rule3t) {};
\node [rectangle,draw=ugreen,inner sep=1pt,thick,fill=white,drop shadow,rounded corners=0.3em] [fit = (rule3s1) (rule3s2)] (rule3s) {};
\node [rectangle,draw=ugreen,inner sep=2pt,thick,fill=white,drop shadow,rounded corners=0.3em] [fit = (rule3t1) (rule3t2)] (rule3t) {};
\draw [<->,dotted,thick,ugreen] ([xshift=0.1em]rule3s.east) -- ([xshift=-0.1em]rule3t.west);
}
\end{pgfonlayer}
......@@ -122,8 +122,8 @@
\begin{pgfonlayer}{background}
{
\node [rectangle,draw=orange,inner sep=4pt,thick,fill=white,drop shadow] [fit = (rule4s1) (rule4s3)] (rule4s) {};
\node [rectangle,draw=orange,inner sep=4pt,thick,fill=white,drop shadow] [fit = (rule4t1) (rule4t3)] (rule4t) {};
\node [rectangle,draw=orange,inner sep=4pt,thick,fill=white,drop shadow,rounded corners=0.3em] [fit = (rule4s1) (rule4s3)] (rule4s) {};
\node [rectangle,draw=orange,inner sep=4pt,thick,fill=white,drop shadow,rounded corners=0.3em] [fit = (rule4t1) (rule4t3)] (rule4t) {};
\draw [<->,dotted,thick,orange] ([xshift=0.1em]rule4s.east) -- ([xshift=-0.1em]rule4t.west);
}
\end{pgfonlayer}
......
......@@ -65,7 +65,7 @@
\end{scope}
\begin{scope} [yshift = -1.87in, xshift = 1.9in]
\begin{scope} [yshift = -1.87in, xshift = 2.2in]
{
\begin{scope}[level distance=20pt]
\node[scale=0.8, inner sep=0.1cm,align=center,draw] (cfrag1) at
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......
......@@ -2,18 +2,18 @@
\begin{tikzpicture}
\begin{scope}
\node [anchor=west] (w0) at (0,0) {\small{${\mathbi{o}}_{i-3}$}};
\node [anchor=west] (w1) at ([xshift=2.8em]w0.east) {\small{${\mathbi{o}}_{i-2}$}};
\node [anchor=west] (w2) at ([xshift=2.8em]w1.east) {\small{${\mathbi{o}}_{i-1}$}};
\node [anchor=west] (w1) at ([xshift=3.5em]w0.east) {\small{${\mathbi{o}}_{i-2}$}};
\node [anchor=west] (w2) at ([xshift=3.5em]w1.east) {\small{${\mathbi{o}}_{i-1}$}};
\node [anchor=north] (index0) at ([yshift=0.5em]w0.south) {\footnotesize(index)};
\node [anchor=north] (index1) at ([yshift=0.5em]w1.south) {\footnotesize(index)};
\node [anchor=north] (index2) at ([yshift=0.5em]w2.south) {\footnotesize(index)};
\node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e0) at ([yshift=1em]w0.north) {\footnotesize{${\mathbi{e}}_1={\mathbi{o}}_{i-3} {\mathbi{C}}$}};
\node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e1) at ([yshift=1em]w1.north) {\footnotesize{${\mathbi{e}}_2={\mathbi{o}}_{i-2} {\mathbi{C}}$}};
\node [anchor=south,draw,inner sep=3pt,fill=blue!20!white] (e2) at ([yshift=1em]w2.north) {\footnotesize{${\mathbi{e}}_3={\mathbi{o}}_{i-1} {\mathbi{C}}$}};
\node [anchor=south,draw,inner sep=3pt,fill=blue!30!white] (e0) at ([yshift=1em]w0.north) {\footnotesize{${\mathbi{e}}_1={\mathbi{o}}_{i-3} {\mathbi{C}}$}};
\node [anchor=south,draw,inner sep=3pt,fill=blue!30!white] (e1) at ([yshift=1em]w1.north) {\footnotesize{${\mathbi{e}}_2={\mathbi{o}}_{i-2} {\mathbi{C}}$}};
\node [anchor=south,draw,inner sep=3pt,fill=blue!30!white] (e2) at ([yshift=1em]w2.north) {\footnotesize{${\mathbi{e}}_3={\mathbi{o}}_{i-1} {\mathbi{C}}$}};
\node [anchor=south,draw,minimum width=11em,inner sep=3pt,fill=orange!20!white] (h0) at ([yshift=1.5em]e1.north) {\footnotesize{${\mathbi{h}}_0=\textrm{Tanh}([{\mathbi{e}}_1,{\mathbi{e}}_2,{\mathbi{e}}_3]{\mathbi{H}} + {\mathbi{d}})$}};
\node [anchor=south,draw,minimum width=9em,inner sep=3pt,fill=orange!20!white] (h1) at ([yshift=1.5em]h0.north) {\footnotesize{${\mathbi{y}}=\textrm{Softmax}({\mathbi{h}}_0 {\mathbi{U}})$}};
\node [anchor=south,draw,minimum width=11em,inner sep=3pt,fill=orange!40!white] (h0) at ([yshift=1.5em]e1.north) {\footnotesize{${\mathbi{h}}_0=\textrm{Tanh}([{\mathbi{e}}_1,{\mathbi{e}}_2,{\mathbi{e}}_3]{\mathbi{H}} + {\mathbi{d}})$}};
\node [anchor=south,draw,minimum width=9em,inner sep=3pt,fill=orange!40!white] (h1) at ([yshift=1.5em]h0.north) {\footnotesize{${\mathbi{y}}=\textrm{Softmax}({\mathbi{h}}_0 {\mathbi{U}})$}};
\node [anchor=south] (ylabel) at ([yshift=1em]h1.north) {\small{$\funp{P}(w_i|w_{i-3}w_{i-2}w_{i-1})$}};
\draw [->,line width=1pt] ([yshift=0.1em]w0.north) -- ([yshift=-0.1em]e0.south);
......@@ -35,9 +35,9 @@
\begin{pgfonlayer}{background}
{
\node [rectangle,draw,inner sep=0.1em,fill=ugreen!20!white] [fit = (w0) (index0)] (wordbox0) {};
\node [rectangle,draw,inner sep=0.1em,fill=ugreen!20!white] [fit = (w1) (index1)] (wordbox1) {};
\node [rectangle,draw,inner sep=0.1em,fill=ugreen!20!white] [fit = (w2) (index2)] (wordbox2) {};
\node [rectangle,draw,inner sep=0.1em,fill=green!20!white] [fit = (w0) (index0)] (wordbox0) {};
\node [rectangle,draw,inner sep=0.1em,fill=green!20!white] [fit = (w1) (index1)] (wordbox1) {};
\node [rectangle,draw,inner sep=0.1em,fill=green!20!white] [fit = (w2) (index2)] (wordbox2) {};
}
\end{pgfonlayer}
......
%%%------------------------------------------------------------------------------------------------------------
\begin{tikzpicture}
\begin{scope}[yscale=0.2,xscale=0.8]
\begin{scope}[yscale=0.2,xscale=1]
\draw[-,very thick,ublue,domain=-4.2:3.5,samples=100] plot (\x,{ - 1/14 * (\x + 4) * (\x + 1) * (\x - 1) * (\x - 3)});
{
\draw[-,very thick,ugreen,domain=-3.8:3.0,samples=100] plot (\x,{ - 1/14 * (4*\x*\x*\x + 3*\x*\x - 26*\x - 1)});
......
......@@ -11,7 +11,7 @@
\end{scope}
%%%------------------------------------------------------------------------------------------------------------
\begin{scope}[xshift=1.6in]
\begin{scope}[xshift=1.7in]
\draw[->, line width=1pt](-1.4,0)--(1.4,0)node[left,below,font=\scriptsize]{$x$};
\draw[->, line width=1pt](0,-1.4)--(0,1.4)node[right,font=\scriptsize]{$y$};
......@@ -26,7 +26,7 @@
\end{scope}
%%%------------------------------------------------------------------------------------------------------------
\begin{scope}[xshift=3.2in]
\begin{scope}[xshift=3.4in]
\draw[->, line width=1pt](-1.4,0)--(1.4,0)node[left,below,font=\scriptsize]{$x$};
\draw[->, line width=1pt](0,-1.4)--(0,1.4)node[right,font=\scriptsize]{$y$};
\draw[dashed](0,1)--(1.4,1);
......@@ -51,7 +51,7 @@
\end{scope}
%%%------------------------------------------------------------------------------------------------------------
\begin{scope}[yshift=-1.8in,xshift=1.6in]
\begin{scope}[yshift=-1.8in,xshift=1.7in]
\draw[->, line width=1pt](-1.4,0)--(1.4,0)node[left,below,font=\scriptsize]{$x$};
\draw[->, line width=1pt](0,-1.4)--(0,1.4)node[right,font=\scriptsize]{$y$};
\foreach \x in {-1.0,-0.5,0.0,0.5,1.0}{\draw(\x,0)--(\x,0.05)node[below,outer sep=2pt,font=\scriptsize]at(\x,0){\x};}
......@@ -62,7 +62,7 @@
\end{scope}
%%%------------------------------------------------------------------------------------------------------------
\begin{scope}[yshift=-1.8in,xshift=3.2in]
\begin{scope}[yshift=-1.8in,xshift=3.4in]
\draw[->, line width=1pt](-1.4,0)--(1.4,0)node[left,below,font=\scriptsize]{$x$};
\draw[->, line width=1pt](0,-1.4)--(0,1.4)node[right,font=\scriptsize]{$y$};
\foreach \x in {-1.0,-0.5,0.0,0.5,1.0}{\draw(\x,0)--(\x,0.05)node[below,outer sep=2pt,font=\scriptsize]at(\x,0){\x};}
......
%%%------------------------------------------------------------------------------------------------------------
\begin{tikzpicture}
\begin{scope}
\node [anchor=center,draw,fill=red!20,minimum height=1.8em,minimum width=2.5em] (h) at (0,0) {${\mathbi{h}}^{k-1}$};
\node [anchor=west,draw,fill=blue!20,minimum height=1.8em,minimum width=2.5em] (s) at ([xshift=6em]h.east) {${\mathbi{s}}^{k}$};
\node [anchor=west,draw,fill=green!20,minimum height=1.8em,minimum width=2.5em] (h2) at ([xshift=6em]s.east) {${\mathbi{h}}^{k}$};
\node [anchor=center,draw,fill=red!30,minimum height=1.8em,minimum width=2.5em] (h) at (0,0) {${\mathbi{h}}^{k-1}$};
\node [anchor=west,draw,fill=blue!30,minimum height=1.8em,minimum width=2.5em] (s) at ([xshift=6em]h.east) {${\mathbi{s}}^{k}$};
\node [anchor=west,draw,fill=green!30,minimum height=1.8em,minimum width=2.5em] (h2) at ([xshift=6em]s.east) {${\mathbi{h}}^{k}$};
\node [anchor=east] (prev) at ([xshift=-2em]h.west) {...};
\node [anchor=west] (next) at ([xshift=2em]h2.east) {...};
\draw [->,thick] ([xshift=0.1em]prev.east) -- ([xshift=-0.1em]h.west);
......
%%%------------------------------------------------------------------------------------------------------------
\begin{tikzpicture}
\begin{scope}
\node [anchor=west,minimum height=1.7em,fill=blue!20,draw] (s) at (0,0) {$\mathbi{s}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=green!20,draw] (h2) at ([xshift=5.5em]s.east) {$\mathbi{h}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=orange!20,draw] (l) at ([xshift=5.5em]h2.east) {$L$};
\node [anchor=west,minimum height=1.7em,fill=blue!30,draw] (s) at (0,0) {$\mathbi{s}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=green!30,draw] (h2) at ([xshift=5.5em]s.east) {$\mathbi{h}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=orange!30,draw] (l) at ([xshift=5.5em]h2.east) {$L$};
\draw [->] (s.east) -- (h2.west);
\draw [->] (h2.east) -- (l.west);
......
%%%------------------------------------------------------------------------------------------------------------
\begin{tikzpicture}
\begin{scope}
\node [anchor=center,minimum height=1.7em,fill=yellow!20,draw] (h) at (0,0) {${\mathbi{h}}^{K-1}$};
\node [anchor=west,minimum height=1.7em,fill=blue!20,draw] (s) at ([xshift=6.0em]h.east) {${\mathbi{s}}^{K}$};
\node [anchor=center,minimum height=1.7em,fill=yellow!30,draw] (h) at (0,0) {${\mathbi{h}}^{K-1}$};
\node [anchor=west,minimum height=1.7em,fill=blue!30,draw] (s) at ([xshift=6.0em]h.east) {${\mathbi{s}}^{K}$};
\draw [->] (h.east) -- (s.west);
......
......@@ -16,7 +16,7 @@
{\draw [-,very thick,ublue,rounded corners=0.1em] (-1.5,0) -- (0,0) -- (0,1) -- (1.5,1);}
\end{scope}
%---------------------------------------------------------------------------------------------
\begin{scope}[xshift=1.6in]
\begin{scope}[xshift=1.8in]
{
\draw [->,thick] (-1.8,0) -- (1.8,0);
\draw [->,thick] (0,0) -- (0,2);
......@@ -31,7 +31,7 @@
{\draw [-,very thick,ublue,rounded corners=0.1em] (-1.5,0) -- (0.25,0) -- (0.25,1) -- (1.5,1);}
\end{scope}
%-----------------------------------------------------------------------------------------------
\begin{scope}[xshift=3.2in]
\begin{scope}[xshift=3.6in]
{
\draw [->,thick] (-1.8,0) -- (1.8,0);
\draw [->,thick] (0,0) -- (0,2);
......
......@@ -2,10 +2,10 @@
\begin{tikzpicture}
\begin{scope}[xshift=0.6in]
\setcounter{mycount1}{1}
\draw[step=0.5cm,color=orange!70,thick] (-1,-0.5) grid (1,0.5);
\draw[step=0.5cm,thick] (-1,-0.5) grid (1,0.5);
\foreach \y in {+0.25,-0.25}
\foreach \x in {-0.75,-0.25,0.25,0.75}{
\node [fill=orange!15,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
\node [fill=orange!30,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
\addtocounter{mycount1}{1};
}
\node [anchor=south] (varlabel) at (0,0.6) {$\mathbi{s}$};
......@@ -14,10 +14,10 @@
\begin{scope}[xshift=2.1in]
\setcounter{mycount1}{1}
\draw[step=0.5cm,color=ugreen!70,thick] (-1,-0.5) grid (1,0);
\draw[step=0.5cm,thick] (-1,-0.5) grid (1,0);
\foreach \y in {-0.25}
\foreach \x in {-0.75,-0.25,0.25,0.75}{
\node [fill=green!15,inner sep=0pt,minimum height=0.48cm,minimum width=0.48cm] at (\x,\y) {$1$};
\node [fill=green!30,inner sep=0pt,minimum height=0.48cm,minimum width=0.48cm] at (\x,\y) {$1$};
\addtocounter{mycount1}{1};
}
\node [anchor=south] (varlabel) at (0,0.1) {$\mathbi{b}$};
......@@ -28,40 +28,40 @@
\begin{scope}[yshift=-1in]
\setcounter{mycount1}{1}
\draw[step=0.5cm,color=orange!70,thick] (-1,-0.5) grid (1,0.5);
\draw[step=0.5cm,thick] (-1,-0.5) grid (1,0.5);
\foreach \y in {+0.25,-0.25}
\foreach \x in {-0.75,-0.25,0.25,0.75}{
\node [fill=orange!15,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
\node [fill=orange!30,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
\addtocounter{mycount1}{1};
}
\node [anchor=south] (varlabel) at (0,0.6) {$\mathbi{s}$};
\end{scope}
\begin{scope}[yshift=-1in,xshift=1.5in]
\setcounter{mycount1}{1}
\draw[step=0.5cm,color=ugreen!70,thick] (-1,-0.5) grid (1,0.5);
\draw[step=0.5cm,thick] (-1,-0.5) grid (1,0.5);
\foreach \y in {+0.25}
\foreach \x in {-0.75,-0.25,0.25,0.75}{
\node [fill=green!15,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$1$};
\node [fill=green!30,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$1$};
\addtocounter{mycount1}{1};
}
\foreach \y in {-0.25}
\foreach \x in {-0.75,-0.25,0.25,0.75}{
\node [fill=purple!15,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$1$};
\node [fill=purple!30,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$1$};
\addtocounter{mycount1}{1};
}
\node [anchor=center] (plabel) at (-4.5em,0) {\huge{$\mathbf{+}$}};
\node [anchor=center] (plabel) at (-4.8em,0) {\huge{$\mathbf{+}$}};
\node [anchor=south] (varlabel) at (0,0.6) {$\mathbi{b}$};
\node [anchor=north] (labelc) at (0,-0.7) {\small{(c)张量的单元加运算}};
\end{scope}
\begin{scope}[yshift=-1in,xshift=3in]
\setcounter{mycount1}{2}
\draw[step=0.5cm,color=orange!70,thick] (-1,-0.5) grid (1,0.5);
\draw[step=0.5cm,thick] (-1,-0.5) grid (1,0.5);
\foreach \y in {+0.25,-0.25}
\foreach \x in {-0.75,-0.25,0.25,0.75}{
\node [fill=orange!15,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
\node [fill=orange!30,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
\addtocounter{mycount1}{1};
}
\node [anchor=center] (plabel) at (-4.5em,0) {\huge{$\mathbf{=}$}};
\node [anchor=center] (plabel) at (-4.8em,0) {\huge{$\mathbf{=}$}};
\node [anchor=south] (varlabel) at (0,0.6) {$\mathbi{s}+\mathbi{b}$};
\end{scope}
......
......@@ -9,7 +9,7 @@
\node [anchor=north] (xlabel) at (6.5em, 0em) {\scriptsize{距离(km)}};
\end{scope}
\begin{scope}[xshift=10em]
\begin{scope}[xshift=12em]
\draw [->,thick] (0,0) -- (3.1,0);
\draw [->,thick] (0,0) -- (0, 2.1);
\draw [-,very thick,ublue,domain=0.3:2.6,samples=100] plot (\x,{ 0.5/\x});
......@@ -17,7 +17,7 @@
\node [anchor=north] (xlabel) at (6.5em, 0em) {\scriptsize{票价(元)}};
\end{scope}
\begin{scope}[xshift=20em]
\begin{scope}[xshift=24em]
\draw [->,thick] (0,0) -- (3.1,0);
\draw [->,thick] (0,0) -- (0, 2.1);
\node [anchor=east] (ylabel) at (0, 4.4em) {\footnotesize{$x_3$}};
......
......@@ -5,7 +5,7 @@
\node [anchor=west,inner sep=2pt] (c) at (e.east) {\small{$\mathbi{C}$}};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.4em,draw,fill=blue!20!white] [fit = (e) (c)] (box) {};
\node [rectangle,inner sep=0.4em,draw,fill=blue!30!white] [fit = (e) (c)] (box) {};
\end{pgfonlayer}
\draw [->,thick] ([yshift=-1em]box.south)--([yshift=-0.1em]box.south) node [pos=0,below] (bottom1) {\small{单词$w$的One-hot表示}};
......@@ -14,13 +14,13 @@
\node [anchor=south] (top2) at ([yshift=-0.3em]top1.north) {\small{单词$w$的分布式表示}};
{
\node [anchor=north west,fill=red!20!white] (cmatrix) at ([xshift=3em,yshift=1.0em]c.north east) {\scriptsize{$\begin{pmatrix} 1 & 0.2 & -0.2 & 8 & ... & 0 \\ 0.6 & 0.8 & -2 & 1 & ... & -0.2 \\ 8 & 0.2 & -1 & 0.9 & ... & 2.3 \\ 1 & 1.2 & -0.9 & 3 & ... & 0.2 \\ ... & ... & ... & ... & ... & ... \\ 1 & 0.3 & 3 & 0.9 & ... & 5.1 \end{pmatrix}$}};
\node [anchor=west,inner sep=2pt,fill=red!30!white] (c) at (e.east) {\small{$\mathbi{C}$}};
\draw [<-,thick] (c.east) -- ([xshift=3em]c.east);
\node [anchor=north west,fill=red!30!white] (cmatrix) at ([xshift=6em,yshift=1.0em]c.north east) {\scriptsize{$\begin{pmatrix} 1 & 0.2 & -0.2 & 8 & ... & 0 \\ 0.6 & 0.8 & -2 & 1 & ... & -0.2 \\ 8 & 0.2 & -1 & 0.9 & ... & 2.3 \\ 1 & 1.2 & -0.9 & 3 & ... & 0.2 \\ ... & ... & ... & ... & ... & ... \\ 1 & 0.3 & 3 & 0.9 & ... & 5.1 \end{pmatrix}$}};
\node [anchor=west,inner sep=2pt,fill=red!35!white] (c) at (e.east) {\small{$\mathbi{C}$}};
\draw [<-,thick] (c.east) -- ([xshift=6em]c.east);
}
{
\node [anchor=south,draw,fill=green!20!white] (e2) at ([yshift=1.5em]cmatrix.north) {\scriptsize{外部词嵌入系统得到的$\mathbi{C}$}};
\node [anchor=south,draw,fill=green!30!white] (e2) at ([yshift=1.5em]cmatrix.north) {\scriptsize{外部词嵌入系统得到的$\mathbi{C}$}};
\draw [->,very thick,dashed] (e2.south) -- (cmatrix.north);
}
......
......@@ -3,12 +3,12 @@
{
\begin{scope}[xshift=2in]
\node [anchor=north west] (o1) at (0,0) {\footnotesize{$\begin{bmatrix} 0.1 \\ -1 \\ 2 \\ ... \\ 0 \end{bmatrix}$}};
\node [anchor=north west] (o2) at ([xshift=1em]o1.north east) {\footnotesize{$\begin{bmatrix} 1 \\ 2 \\ 0.2 \\ ... \\ -1 \end{bmatrix}$}};
\node [anchor=north east] (v) at ([xshift=-0em]o1.north west) {\footnotesize{$\begin{matrix} \textrm{\ \ \ 属性}_1 \\ \textrm{\ \ \ 属性}_2 \\ \textrm{\ \ \ 属性}_3 \\ ... \\ \textrm{属性}_{512} \end{matrix}$}};
\node [anchor=north west] (o2) at ([xshift=2em]o1.north east) {\footnotesize{$\begin{bmatrix} 1 \\ 2 \\ 0.2 \\ ... \\ -1 \end{bmatrix}$}};
\node [anchor=north east] (v) at ([xshift=-2em]o1.north west) {\footnotesize{$\begin{matrix} \textrm{\ \ \ 属性}_1 \\ \textrm{\ \ \ 属性}_2 \\ \textrm{\ \ \ 属性}_3 \\ ... \\ \textrm{属性}_{512} \end{matrix}$}};
\node [anchor=south] (w1) at (o1.north) {\footnotesize{桌子}};
\node [anchor=south] (w2) at (o2.north) {\footnotesize{椅子}};
{
\node [anchor=south,fill=red!20!white] (cosine) at (w1.north) {\footnotesize{$\textrm{cos}(\textrm{‘桌子’},\textrm{‘椅子’})=0.5$}};
\node [anchor=south,fill=red!30!white,minimum width=12em] (cosine) at (w1.north) {\footnotesize{$\textrm{cos}(\textrm{‘桌子’},\textrm{‘椅子’})=0.5$}};
}
\end{scope}
}
......
......@@ -50,7 +50,7 @@
\begin{pgfonlayer}{background}
{
\node [rectangle,inner sep=0.2em,fill=blue!20,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] [fit = (flabel) (flabel2) (flabel3)] (funcbox) {};
\node [rectangle,inner sep=0.2em,fill=blue!30,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] [fit = (flabel) (flabel2) (flabel3)] (funcbox) {};
}
\end{pgfonlayer}
}
......@@ -149,7 +149,7 @@
\draw [->,thick,dotted] ([yshift=-0.3em,xshift=-0.1em]n11.60) .. controls +(east:1) and +(west:2) .. ([xshift=-0.2em]flabel.west) ;
\begin{pgfonlayer}{background}
{
\node [rectangle,inner sep=0.2em,fill=blue!20,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] [fit = (flabel) (flabel2) (flabel3)] (funcbox) {};
\node [rectangle,inner sep=0.2em,fill=blue!30,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] [fit = (flabel) (flabel2) (flabel3)] (funcbox) {};
}
\end{pgfonlayer}
}
......
%%%------------------------------------------------------------------------------------------------------------
\begin{tikzpicture}
\begin{scope}
\node [anchor=center,draw,fill=red!20,minimum height=1.8em,minimum width=2.5em] (h) at (0,0) {${\mathbi{h}}^{k-1}$};
\node [anchor=west,draw,fill=blue!20,minimum height=1.8em,minimum width=2.5em] (s) at ([xshift=6em]h.east) {${\mathbi{s}}^{k}$};
\node [anchor=west,draw,fill=green!20,minimum height=1.8em,minimum width=2.5em] (h2) at ([xshift=6em]s.east) {${\mathbi{h}}^{k}$};
\node [anchor=center,draw,fill=red!30,minimum height=1.8em,minimum width=2.5em] (h) at (0,0) {${\mathbi{h}}^{k-1}$};
\node [anchor=west,draw,fill=blue!30,minimum height=1.8em,minimum width=2.5em] (s) at ([xshift=6em]h.east) {${\mathbi{s}}^{k}$};
\node [anchor=west,draw,fill=green!30,minimum height=1.8em,minimum width=2.5em] (h2) at ([xshift=6em]s.east) {${\mathbi{h}}^{k}$};
\node [anchor=east] (prev) at ([xshift=-2em]h.west) {...};
\node [anchor=west] (next) at ([xshift=2em]h2.east) {...};
\draw [->,thick] ([xshift=0.1em]prev.east) -- ([xshift=-0.1em]h.west);
......
%%%------------------------------------------------------------------------------------------------------------
\begin{tikzpicture}
\begin{scope}
\node [anchor=center,minimum height=1.7em,fill=yellow!20,draw] (h) at (0,0) {$\mathbi{h}^{K-1}$};
\node [anchor=west,minimum height=1.7em,fill=blue!20,draw] (s) at ([xshift=5.5em]h.east) {$\mathbi{s}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=green!20,draw] (h2) at ([xshift=5.5em]s.east) {$\mathbi{h}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=orange!20,draw] (l) at ([xshift=5.5em]h2.east) {$L$};
\node [anchor=center,minimum height=1.7em,fill=yellow!30,draw] (h) at (0,0) {$\mathbi{h}^{K-1}$};
\node [anchor=west,minimum height=1.7em,fill=blue!30,draw] (s) at ([xshift=6.5em]h.east) {$\mathbi{s}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=green!30,draw] (h2) at ([xshift=6.5em]s.east) {$\mathbi{h}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=orange!30,draw] (l) at ([xshift=6.5em]h2.east) {$L$};
\draw [->] (h.east) -- (s.west);
\draw [->] (s.east) -- (h2.west);
\draw [->] (h2.east) -- (l.west) node [pos=0.5,above] {\tiny{损失}};
\node [anchor=south west,inner sep=2pt] (step100) at ([xshift=0.2em,yshift=-0.8em]h.north east) {\tiny{$\mathbi{s}^K = \mathbi{h}^{K-1} \mathbi{W}^K$}};
\node [anchor=south west,inner sep=2pt] (step101) at ([xshift=1em]step100.north west) {\tiny{线性变换}};
\node [anchor=south west,inner sep=2pt] (step100) at ([xshift=0.7em,yshift=-0.8em]h.north east) {\tiny{$\mathbi{s}^K = \mathbi{h}^{K-1} \mathbi{W}^K$}};
\node [anchor=south west,inner sep=2pt] (step101) at ([xshift=1.1em]step100.north west) {\tiny{线性变换}};
\node [anchor=south west,inner sep=2pt] (step200) at ([xshift=0.5em,yshift=-0.8em]s.north east) {\tiny{$\mathbi{h}^K = f^K(\mathbi{s}^K)$}};
\node [anchor=south west,inner sep=2pt] (step200) at ([xshift=0.9em,yshift=-0.8em]s.north east) {\tiny{$\mathbi{h}^K = f^K(\mathbi{s}^K)$}};
\node [anchor=south west,inner sep=2pt] (step201) at ([xshift=1em]step200.north west) {\tiny{激活函数}};
\node [anchor=south,inner sep=1pt] (outputlabel) at ([yshift=0.0em]h2.north) {\tiny{\textbf{输出层}}};
......
......@@ -22,7 +22,7 @@
\node [anchor=west] (w1label) at ([xshift=-0.5em,yshift=0.8em]x5.north east) {${\mathbi{W}}^{[1]}$};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,fill=red!20] [fit = (neuron01) (neuron05)] (layer01) {};
\node [rectangle,inner sep=0.2em,fill=red!30] [fit = (neuron01) (neuron05)] (layer01) {};
\end{pgfonlayer}
\node [anchor=west] (layer00label) at ([xshift=1.4em]x5.east) {\footnotesize{第0层}};
......@@ -51,7 +51,7 @@
\begin{pgfonlayer}{background}
{
\node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron12) (neuron14)] (layer02) {};
\node [rectangle,inner sep=0.2em,fill=ugreen!30] [fit = (neuron12) (neuron14)] (layer02) {};
}
\end{pgfonlayer}
......@@ -81,7 +81,7 @@
\begin{pgfonlayer}{background}
{
\node [rectangle,inner sep=0.2em,fill=blue!20] [fit = (neuron21) (neuron25)] (layer03) {};
\node [rectangle,inner sep=0.2em,fill=blue!30] [fit = (neuron21) (neuron25)] (layer03) {};
}
\end{pgfonlayer}
......
%%%------------------------------------------------------------------------------------------------------------
\pgfplotsset{
colormap={whitered}{color(-1cm)=(orange!75!red);color(1cm)=(white)}
colormap={whitered}{color(-1cm)=(orange!85!red);color(1cm)=(white)}
}
\begin{tikzpicture}[
declare function = {mu1=1;},
......@@ -14,7 +14,7 @@
\begin{scope}
\begin{axis}[
colormap name = whitered,
width = 8cm,
width = 10cm,
height = 5cm,
view = {20}{45},
enlargelimits = false,
......
......@@ -3,11 +3,11 @@
\begin{scope}
\node [anchor=west,draw,thick,minimum width=4em,minimum height=1.7em,fill=blue!20] (encoder) at (0,0) {模块};
\node [anchor=west,draw,thick,minimum width=4em,minimum height=1.7em,fill=blue!30] (encoder) at (0,0) {模块};
\node [anchor=south,minimum width=4em,minimum height=1.7em] (space) at ([yshift=0.3em]encoder.north) {\footnotesize{目标系统}};
\begin{pgfonlayer}{background}
\node [rectangle,draw,thick,fill=red!20] [fit = (encoder) (space)] (system) {};
\node [rectangle,draw,thick,fill=red!30] [fit = (encoder) (space)] (system) {};
\end{pgfonlayer}
\node [anchor=north] (data) at ([yshift=-1em]system.south) {\scriptsize{\textbf{目标任务有标注数据}}};
......@@ -18,13 +18,13 @@
\begin{scope}[xshift=2.8in]
\node [anchor=west,draw,dashed,thick,minimum width=4em,minimum height=1.7em,fill=blue!20] (encoder) at (0,0) {模块};
\node [anchor=west,draw,dashed,thick,minimum width=4em,minimum height=1.7em,fill=blue!30] (encoder) at (0,0) {模块};
\node [anchor=south,minimum width=4em,minimum height=1.7em] (space) at ([yshift=0.3em]encoder.north) {\footnotesize{目标系统}};
\node [anchor=center,draw,thick,minimum width=4em,minimum height=1.7em,fill=green!20] (encoderpre) at ([xshift=-7em]encoder.center) {\footnotesize{语言模型}};
\node [anchor=center,draw,thick,minimum width=4em,minimum height=1.7em,fill=green!30] (encoderpre) at ([xshift=-7em]encoder.center) {\footnotesize{语言模型}};
\draw [->,thick] (encoderpre.east) -- (encoder.west);
\begin{pgfonlayer}{background}
\node [rectangle,draw,thick,fill=red!20] [fit = (encoder) (space)] (system) {};
\node [rectangle,draw,thick,fill=red!30] [fit = (encoder) (space)] (system) {};
\end{pgfonlayer}
\node [anchor=north] (data) at ([yshift=-1em]system.south) {\scriptsize{\textbf{目标任务有标注数据}}};
......
......@@ -2,7 +2,7 @@
\begin{tikzpicture}
\begin{scope}
\def\neuronsep{1.35}
\def\neuronsep{1.6}
\tikzstyle{neuronnode} = [minimum size=1.9em,circle,draw,ublue,very thick,inner sep=1.75pt, fill=white,align=center,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}];
%%% layer 1
......@@ -13,7 +13,7 @@
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,fill=red!20] [fit = (neuron01) (neuron04)] (layer01) {};
\node [rectangle,inner sep=0.2em,fill=red!30] [fit = (neuron01) (neuron04)] (layer01) {};
\node [anchor=east] (layer01label) at (layer01.west) {\footnotesize{$k-1$}};
\end{pgfonlayer}
......@@ -29,7 +29,7 @@
}
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron11) (neuron14)] (layer02) {};
\node [rectangle,inner sep=0.2em,fill=ugreen!30] [fit = (neuron11) (neuron14)] (layer02) {};
\node [anchor=east] (layer02label) at (layer02.west) {\footnotesize{$k$}};
\end{pgfonlayer}
......@@ -46,7 +46,7 @@
}
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,fill=blue!20] [fit = (neuron21) (neuron24)] (layer03) {};
\node [rectangle,inner sep=0.2em,fill=blue!30] [fit = (neuron21) (neuron24)] (layer03) {};
\node [anchor=east] (layer03label) at (layer03.west) {\footnotesize{$k+1$}};
\end{pgfonlayer}
......@@ -61,12 +61,12 @@
}
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,fill=ugreen!20] [fit = (neuron31) (neuron34)] (layer04) {};
\node [rectangle,inner sep=0.2em,fill=ugreen!30] [fit = (neuron31) (neuron34)] (layer04) {};
\node [anchor=east] (layer04label) at (layer04.west) {\footnotesize{$K$(输出)}};
\end{pgfonlayer}
{
\node [neuronnode,draw=red,fill=red!20!white,inner sep=1pt] (neuron12new) at (2 * \neuronsep,4em) {};
\node [neuronnode,draw=red,fill=red!30!white,inner sep=1pt] (neuron12new) at (2 * \neuronsep,4em) {};
\node [anchor=east] (neuronsamplelabel) at ([yshift=-1em]layer02label.south east) {{\textbf{\scriptsize{$k$层, 第$i$个神经元}}}};
\draw [->,dashed,very thick,red] ([xshift=-0.2em,yshift=0.2em]neuronsamplelabel.east) .. controls +(30:1) and +(220:1) .. ([xshift=-0em,yshift=-0em]neuron12new.210);
}
......
......@@ -2,12 +2,12 @@
\begin{tikzpicture}
\begin{scope}
\node [anchor=north west] (o1) at (0,0) {\footnotesize{$\begin{bmatrix} 0 \\ 1 \\ 0 \\ 0 \\ 0 \\ ... \\ 0 \end{bmatrix}$}};
\node [anchor=north west] (o2) at ([xshift=1em]o1.north east) {\footnotesize{$\begin{bmatrix} 0 \\ 0 \\ 0 \\ 1 \\ 0 \\ ... \\ 0 \end{bmatrix}$}};
\node [anchor=north east] (v) at ([xshift=-0em]o1.north west) {\footnotesize{$\begin{matrix} \textrm{\ \ \ \ \ }_1 \\ \textrm{\ \ 桌子}_2 \\ \textrm{\ \ \ \ \ }_3 \\ \textrm{\ \ 椅子}_4 \\ \textrm{\ \ 我们}_5 \\ ... \\ \textrm{你好}_{10k} \end{matrix}$}};
\node [anchor=north west] (o2) at ([xshift=2em]o1.north east) {\footnotesize{$\begin{bmatrix} 0 \\ 0 \\ 0 \\ 1 \\ 0 \\ ... \\ 0 \end{bmatrix}$}};
\node [anchor=north east] (v) at ([xshift=-2em]o1.north west) {\footnotesize{$\begin{matrix} \textrm{\ \ \ \ \ }_1 \\ \textrm{\ \ 桌子}_2 \\ \textrm{\ \ \ \ \ }_3 \\ \textrm{\ \ 椅子}_4 \\ \textrm{\ \ 我们}_5 \\ ... \\ \textrm{你好}_{10k} \end{matrix}$}};
\node [anchor=south] (w1) at (o1.north) {\footnotesize{桌子}};
\node [anchor=south] (w2) at (o2.north) {\footnotesize{椅子}};
{
\node [anchor=south,fill=red!20!white] (cosine) at (w1.north) {\footnotesize{$\textrm{cos}(\textrm{‘桌子’},\textrm{‘椅子’})=0$}};
\node [anchor=south,fill=red!30!white,minimum width=12em] (cosine) at (w1.north) {\footnotesize{$\textrm{cos}(\textrm{‘桌子’},\textrm{‘椅子’})=0$}};
}
\end{scope}
......
......@@ -10,9 +10,9 @@
\tikzstyle{parametershard} = [draw,thick,minimum width=4em,align=left,rounded corners=2pt]
{
\node[parametershard,anchor=west,fill=yellow!10] (param1) at (0,0) {${\bm \theta}_o$};
\node[parametershard,anchor=west,fill=yellow!20] (param1) at (0,0) {${\bm \theta}_o$};
\node (param2) at ([xshift=1em]param1.east) {};
\node[parametershard,anchor=west,fill=red!10] (param3) at ([xshift=1em]param2.east) {${\bm \theta}_h$};
\node[parametershard,anchor=west,fill=red!20] (param3) at ([xshift=1em]param2.east) {${\bm \theta}_h$};
\node[anchor=south,inner sep=1pt] (serverlabel) at ([yshift=0.8em]param2.north) {\small{\textbf{参数服务器}: ${\bm \theta}_{\textrm {new}} = {\bm \theta} - \alpha\cdot \frac{\partial J}{\partial {\bm \theta}}$}};
}
......@@ -22,11 +22,11 @@
}
\end{pgfonlayer}
\tikzstyle{processor} = [draw,thick,fill=orange!15,minimum width=4em,align=left,rounded corners=2pt]
\tikzstyle{processor} = [draw,thick,fill=orange!25,minimum width=4em,align=left,rounded corners=2pt]
{
\node [processor,anchor=north,align=center] (processor2) at ([yshift=-1.2in]serverlabel.south) {\footnotesize{处理器 2}\\\footnotesize{(G2)}};
\node [anchor=north] (labela) at ([xshift=6.5em,yshift=-2em]processor2.south) {\small {(a)同步更新}};
\node [anchor=north] (labela) at ([xshift=9.5em,yshift=-2em]processor2.south) {\small {(a)同步更新}};
\node [processor,anchor=east,align=center] (processor1) at ([xshift=-1em]processor2.west) {\footnotesize{处理器 1}\\\footnotesize{(G1)}};
\node [processor,anchor=west,align=center] (processor3) at ([xshift=1em]processor2.east) {\footnotesize{处理器 3}\\\footnotesize{(G3)}};
}
......@@ -47,15 +47,15 @@
\footnotesize{
{
\node[job,anchor=south west,fill=blue!30] (fetch11) at ([xshift=6em,yshift=-0.2em]processor3.east) {\textbf{F}};
\node[job,anchor=west,fill=orange!25] (minibatch11) at ([yshift=1pt]fetch11.east) {\scriptsize{minibatch3}};
\node[job,anchor=west,fill=red!30] (push11) at ([yshift=1pt]minibatch11.east) {\textbf{P}};
\node[job,anchor=north west,fill=blue!30] (fetch12) at ([xshift=0.8em]fetch11.south west) {\textbf{F}};
\node[job,anchor=west,fill=orange!25] (minibatch12) at ([yshift=1pt]fetch12.east) {\scriptsize{minibatch2}};
\node[job,anchor=west,fill=red!30] (push12) at ([yshift=1pt]minibatch12.east) {\textbf{P}};
\node[job,anchor=north west,fill=blue!30] (fetch13) at ([xshift=0.8em]fetch12.south west) {\textbf{F}};
\node[job,anchor=west,fill=orange!25,minimum width=8.2em] (minibatch13) at ([yshift=1pt]fetch13.east) {\footnotesize{minibatch1}};
\node[job,anchor=west,fill=red!30] (push13) at ([yshift=1pt]minibatch13.east) {\textbf{P}};
\node[job,anchor=south west,fill=blue!25] (fetch11) at ([xshift=10em,yshift=-0.2em]processor3.east) {\textbf{F}};
\node[job,anchor=west,fill=orange!35] (minibatch11) at ([yshift=1pt]fetch11.east) {\scriptsize{minibatch3}};
\node[job,anchor=west,fill=red!35] (push11) at ([yshift=1pt]minibatch11.east) {\textbf{P}};
\node[job,anchor=north west,fill=blue!35] (fetch12) at ([xshift=0.8em]fetch11.south west) {\textbf{F}};
\node[job,anchor=west,fill=orange!35] (minibatch12) at ([yshift=1pt]fetch12.east) {\scriptsize{minibatch2}};
\node[job,anchor=west,fill=red!35] (push12) at ([yshift=1pt]minibatch12.east) {\textbf{P}};
\node[job,anchor=north west,fill=blue!35] (fetch13) at ([xshift=0.8em]fetch12.south west) {\textbf{F}};
\node[job,anchor=west,fill=orange!35,minimum width=8.2em] (minibatch13) at ([yshift=1pt]fetch13.east) {\footnotesize{minibatch1}};
\node[job,anchor=west,fill=red!35] (push13) at ([yshift=1pt]minibatch13.east) {\textbf{P}};
\node[anchor=south west,draw,fill=gray!20,minimum width=7.7em] (update11) at ([yshift=3.82em]push11.north east) {更新};
\node[anchor=north] (G11) at (fetch11.west) {\small{G3}};
......@@ -67,13 +67,13 @@
}
{
\draw [<->,thin,dotted] ([xshift=-1pt]minibatch11.north) .. controls +(west:3em) and +(east:3em) .. ([xshift=1pt]processor3.east);
\draw [<->,thin,dotted] ([xshift=-1pt]fetch11.north) .. controls +(west:4em) and +(east:4em) .. ([xshift=0em,yshift=0.3in]processor3.north);
\draw [<->,thin,dotted] ([xshift=-1pt]push11.north) -- ([xshift=-2.2em,yshift=0.8in]processor3.north);
\draw [<->,thick,dotted] ([xshift=-1pt]minibatch11.north) .. controls +(west:3em) and +(east:3em) .. ([xshift=1pt]processor3.east);
\draw [<->,thick,dotted] ([xshift=-1pt]fetch11.north) .. controls +(west:4em) and +(east:4em) .. ([xshift=0em,yshift=0.3in]processor3.north);
\draw [<->,thick,dotted] ([xshift=-1pt]push11.north) -- ([xshift=-2.2em,yshift=0.8in]processor3.north);
}
{
\draw [<->,thin,dotted] ([xshift=-1pt]update11.west) -- ([xshift=1pt,yshift=-1.28em]serverbox.north east);
\draw [<->,thick,dotted] ([xshift=-1pt]update11.west) -- ([xshift=1pt,yshift=-1.28em]serverbox.north east);
}
......@@ -88,9 +88,9 @@
\tikzstyle{parametershard} = [draw,thick,minimum width=4em,align=left,rounded corners=2pt]
{
\node[parametershard,anchor=west,fill=yellow!10] (param1) at (0,0) {${\bm \theta}_o$};
\node[parametershard,anchor=west,fill=yellow!20] (param1) at (0,0) {${\bm \theta}_o$};
\node (param2) at ([xshift=1em]param1.east) {};
\node[parametershard,anchor=west,fill=red!10] (param3) at ([xshift=1em]param2.east) {${\bm \theta}_h$};
\node[parametershard,anchor=west,fill=red!20] (param3) at ([xshift=1em]param2.east) {${\bm \theta}_h$};
\node[anchor=south,inner sep=1pt] (serverlabel) at ([yshift=0.8em]param2.north) {\small{\textbf{参数服务器}: ${\bm \theta}_{\textrm {new}} = {\bm \theta} - \alpha\cdot \frac{\partial J}{\partial {\bm \theta}}$}};
}
......@@ -100,11 +100,11 @@
}
\end{pgfonlayer}
\tikzstyle{processor} = [draw,thick,fill=orange!15,minimum width=4em,align=left,rounded corners=2pt]
\tikzstyle{processor} = [draw,thick,fill=orange!25,minimum width=4em,align=left,rounded corners=2pt]
{
\node [processor,anchor=north,align=center] (processor2) at ([yshift=-1.2in]serverlabel.south) {\footnotesize{处理器 2}\\\footnotesize{(G2)}};
\node [anchor=north] (label) at ([xshift=6.5em,yshift=-2em]processor2.south) {\small {(b)异步更新}};
\node [anchor=north] (label) at ([xshift=9.5em,yshift=-2em]processor2.south) {\small {(b)异步更新}};
\node [processor,anchor=east,align=center] (processor1) at ([xshift=-1em]processor2.west) {\footnotesize{处理器 1}\\\footnotesize{(G1)}};
\node [processor,anchor=west,align=center] (processor3) at ([xshift=1em]processor2.east) {\footnotesize{处理器 3}\\\footnotesize{(G3)}};
}
......@@ -125,15 +125,15 @@
\footnotesize{
{
\node[job,anchor=south west,fill=blue!30] (fetch21) at ([xshift=6em,yshift=-0.3em]processor3.east) {\textbf{F}};
\node[job,anchor=west,fill=orange!25] (minibatch21) at ([yshift=1pt]fetch21.east) {\scriptsize{minibatch3}};
\node[job,anchor=west,fill=red!30] (push21) at ([yshift=1pt]minibatch21.east) {\textbf{P}};
\node[job,anchor=north west,fill=blue!30] (fetch22) at ([xshift=0.8em]fetch21.south west) {\textbf{F}};
\node[job,anchor=west,fill=orange!25] (minibatch22) at ([yshift=1pt]fetch22.east) {\scriptsize{minibatch2}};
\node[job,anchor=west,fill=red!30] (push22) at ([yshift=1pt]minibatch22.east) {\textbf{P}};
\node[job,anchor=north west,fill=blue!30] (fetch23) at ([xshift=0.8em]fetch22.south west) {\textbf{F}};
\node[job,anchor=west,fill=orange!25,minimum width=8.25em] (minibatch23) at ([yshift=1pt]fetch23.east) {\footnotesize{minibatch1}};
\node[job,anchor=west,fill=red!30] (push23) at ([yshift=1pt]minibatch23.east) {\textbf{P}};
\node[job,anchor=south west,fill=blue!35] (fetch21) at ([xshift=10em,yshift=-0.3em]processor3.east) {\textbf{F}};
\node[job,anchor=west,fill=orange!35] (minibatch21) at ([yshift=1pt]fetch21.east) {\scriptsize{minibatch3}};
\node[job,anchor=west,fill=red!35] (push21) at ([yshift=1pt]minibatch21.east) {\textbf{P}};
\node[job,anchor=north west,fill=blue!35] (fetch22) at ([xshift=0.8em]fetch21.south west) {\textbf{F}};
\node[job,anchor=west,fill=orange!35] (minibatch22) at ([yshift=1pt]fetch22.east) {\scriptsize{minibatch2}};
\node[job,anchor=west,fill=red!35] (push22) at ([yshift=1pt]minibatch22.east) {\textbf{P}};
\node[job,anchor=north west,fill=blue!35] (fetch23) at ([xshift=0.8em]fetch22.south west) {\textbf{F}};
\node[job,anchor=west,fill=orange!35,minimum width=8.25em] (minibatch23) at ([yshift=1pt]fetch23.east) {\footnotesize{minibatch1}};
\node[job,anchor=west,fill=red!35] (push23) at ([yshift=1pt]minibatch23.east) {\textbf{P}};
\node[anchor=south west,draw,fill=gray!20,minimum width=0.6in] (update21) at ([yshift=2pt]push21.north east) {更新};
\node[anchor=south west,draw,fill=gray!20,minimum width=0.25in] (update22) at ([yshift=2.8pt]push23.north east) {\tiny{更新}};
......@@ -143,8 +143,8 @@
\node[anchor=north,align=center] (synlabel) at (G22.south) {\small{\textbf{异步更新}}};
\draw[->,thick] ([xshift=1em]G23.east) -- ([xshift=1em,yshift=1.4in]G23.east) node [pos=0.5,rotate=90,yshift=-1em] {\small{时间轴}};
\draw [<->,thin,dotted] ([xshift=-1pt]update21.west) -- ([xshift=1pt,yshift=-1.55em]serverbox.north east);
\draw [<->,thin,dotted] ([xshift=-1pt]update22.west) -- ([xshift=1pt,yshift=-1.5em]serverbox.north east);
\draw [<->,thick,dotted] ([xshift=-1pt,yshift=-4pt]update21.west) -- ([xshift=1pt,yshift=-1.55em]serverbox.north east);
\draw [<->,thick,dotted] ([xshift=-1pt]update22.west) -- ([xshift=1pt,yshift=-1.5em]serverbox.north east);
}
}
......
......@@ -2,7 +2,7 @@
\begin{tikzpicture}
%% a two-layer neural network
\begin{scope}[xshift=2in]
\begin{scope}[xshift=2.8in]
\tikzstyle{neuronnode} = [minimum size=1.7em,circle,draw,ublue,very thick,inner sep=1pt, fill=white,align=center,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}]
%% output illustration
\begin{scope}[xshift=2.8in,yshift=0.1in]
......
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnnnode} = [draw,inner sep=5pt,minimum width=4em,minimum height=1.5em,fill=green!20!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}]
\tikzstyle{rnnnode} = [draw,inner sep=5pt,minimum width=4em,minimum height=1.5em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}]
{
\node [anchor=west,rnnnode] (node11) at (0,0) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node12) at ([xshift=2em]node11.east) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node13) at ([xshift=2em]node12.east) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node14) at ([xshift=2em]node13.east) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node12) at ([xshift=3.5em]node11.east) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node13) at ([xshift=3.5em]node12.east) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node14) at ([xshift=3.5em]node13.east) {\scriptsize{RNN Cell}};
}
\node [anchor=north,rnnnode,fill=red!20!white] (e1) at ([yshift=-1.2em]node11.south) {\tiny{${\mathbi{e}}_1={\mathbi{o}}_1{\mathbi{C}}$}};
\node [anchor=north,rnnnode,fill=red!20!white] (e2) at ([yshift=-1.2em]node12.south) {\tiny{${\mathbi{e}}_2={\mathbi{o}}_2{\mathbi{C}}$}};
\node [anchor=north,rnnnode,fill=red!20!white] (e3) at ([yshift=-1.2em]node13.south) {\tiny{${\mathbi{e}}_3={\mathbi{o}}_3{\mathbi{C}}$}};
\node [anchor=north,rnnnode,fill=red!20!white] (e4) at ([yshift=-1.2em]node14.south) {\tiny{${\mathbi{e}}_4={\mathbi{o}}_4{\mathbi{C}}$}};
\node [anchor=north,rnnnode,fill=red!30!white] (e1) at ([yshift=-1.2em]node11.south) {\tiny{${\mathbi{e}}_1={\mathbi{o}}_1{\mathbi{C}}$}};
\node [anchor=north,rnnnode,fill=red!30!white] (e2) at ([yshift=-1.2em]node12.south) {\tiny{${\mathbi{e}}_2={\mathbi{o}}_2{\mathbi{C}}$}};
\node [anchor=north,rnnnode,fill=red!30!white] (e3) at ([yshift=-1.2em]node13.south) {\tiny{${\mathbi{e}}_3={\mathbi{o}}_3{\mathbi{C}}$}};
\node [anchor=north,rnnnode,fill=red!30!white] (e4) at ([yshift=-1.2em]node14.south) {\tiny{${\mathbi{e}}_4={\mathbi{o}}_4{\mathbi{C}}$}};
\node [anchor=north] (w1) at ([yshift=-1em]e1.south) {\footnotesize{${\mathbi{o}}_1$}};
\node [anchor=north] (w2) at ([yshift=-1em]e2.south) {\footnotesize{${\mathbi{o}}_2$}};
\node [anchor=north] (w3) at ([yshift=-1em]e3.south) {\footnotesize{${\mathbi{o}}_3$}};
......@@ -32,10 +32,10 @@
\node [anchor=south,rnnnode] (node23) at ([yshift=1.5em]node13.north) {\scriptsize{RNN Cell}};
\node [anchor=south,rnnnode] (node24) at ([yshift=1.5em]node14.north) {\scriptsize{RNN Cell}};
\node [anchor=south,rnnnode,fill=blue!20!white] (node31) at ([yshift=1.5em]node21.north) {\scriptsize{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=blue!20!white] (node32) at ([yshift=1.5em]node22.north) {\scriptsize{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=blue!20!white] (node33) at ([yshift=1.5em]node23.north) {\scriptsize{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=blue!20!white] (node34) at ([yshift=1.5em]node24.north) {\scriptsize{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=blue!30!white] (node31) at ([yshift=1.5em]node21.north) {\scriptsize{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=blue!30!white] (node32) at ([yshift=1.5em]node22.north) {\scriptsize{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=blue!30!white] (node33) at ([yshift=1.5em]node23.north) {\scriptsize{Softmax($\cdot$)}};
\node [anchor=south,rnnnode,fill=blue!30!white] (node34) at ([yshift=1.5em]node24.north) {\scriptsize{Softmax($\cdot$)}};
}
{
......
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnnnode} = [draw,inner sep=5pt,minimum width=4em,minimum height=1.5em,fill=green!20!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}]
\tikzstyle{rnnnode} = [draw,inner sep=5pt,minimum width=4em,minimum height=1.5em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}]
\node [anchor=west,rnnnode] (node11) at (0,0) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node12) at ([xshift=2em]node11.east) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node13) at ([xshift=2em]node12.east) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node14) at ([xshift=2em]node13.east) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node12) at ([xshift=3.5em]node11.east) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node13) at ([xshift=3.5em]node12.east) {\scriptsize{RNN Cell}};
\node [anchor=west,rnnnode] (node14) at ([xshift=3.5em]node13.east) {\scriptsize{RNN Cell}};
\node [anchor=north,rnnnode,fill=red!20!white] (e1) at ([yshift=-1.2em]node11.south) {\scriptsize{embedding}};
\node [anchor=north,rnnnode,fill=red!20!white] (e2) at ([yshift=-1.2em]node12.south) {\scriptsize{embedding}};
\node [anchor=north,rnnnode,fill=red!20!white] (e3) at ([yshift=-1.2em]node13.south) {\scriptsize{embedding}};
\node [anchor=north,rnnnode,fill=red!20!white] (e4) at ([yshift=-1.2em]node14.south) {\scriptsize{embedding}};
\node [anchor=north,rnnnode,fill=red!30!white] (e1) at ([yshift=-1.2em]node11.south) {\scriptsize{embedding}};
\node [anchor=north,rnnnode,fill=red!30!white] (e2) at ([yshift=-1.2em]node12.south) {\scriptsize{embedding}};
\node [anchor=north,rnnnode,fill=red!30!white] (e3) at ([yshift=-1.2em]node13.south) {\scriptsize{embedding}};
\node [anchor=north,rnnnode,fill=red!30!white] (e4) at ([yshift=-1.2em]node14.south) {\scriptsize{embedding}};
\node [anchor=north] (w1) at ([yshift=-1em]e1.south) {\footnotesize{亚伦}};
\node [anchor=north] (w2) at ([yshift=-1em]e2.south) {\footnotesize{任职}};
\node [anchor=north] (w3) at ([yshift=-1em]e3.south) {\footnotesize{}};
......@@ -62,11 +62,11 @@
\draw [->,thick] ([xshift=0.1em]node14.east)--([xshift=1em]node14.east);
{
\node [anchor=south] (toplabel1) at ([yshift=2em,xshift=-1.3em]node32new.north) {\footnotesize{“苹果”的表示:}};
\node [anchor=west,fill=blue!20!white,minimum width=3em] (toplabel2) at (toplabel1.east) {\footnotesize{上下文}};
\node [anchor=south] (toplabel1) at ([yshift=2em,xshift=-0.2em]node32new.north) {\footnotesize{“苹果”的表示:}};
\node [anchor=west,fill=blue!30!white,minimum width=3em] (toplabel2) at (toplabel1.east) {\footnotesize{上下文}};
}
{
\node [anchor=west,fill=red!20!white,minimum width=3em] (toplabel3) at (toplabel2.east) {\footnotesize{}};
\node [anchor=west,fill=red!30!white,minimum width=3em] (toplabel3) at (toplabel2.east) {\footnotesize{}};
}
\begin{pgfonlayer}{background}
......
......@@ -4,7 +4,7 @@
\setcounter{mycount1}{1}
\draw[step=0.5cm,thick] (0,-0) grid (1.5,0.5);
\foreach \x in {0.25,0.75,1.25}{
\node [fill=green!15,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm](vector1) at (\x,0.25) {$\number\value{mycount1}$};
\node [fill=green!30,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm](vector1) at (\x,0.25) {$\number\value{mycount1}$};
\addtocounter{mycount1}{1};
}
\node [anchor=north] (labela) at ([xshift=-1.2em,yshift=-0.3em]vector1.south) {\small{(a)1阶张量 }};
......@@ -14,11 +14,11 @@
\draw[step=0.5cm,thick] (0,-0) grid (3.0,0.5);
\setcounter{mycount2}{1}
\foreach \x in {0.25,0.75,1.25}{
\node [fill=green!15,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] (vector2)at (\x,0.25) {$\number\value{mycount2}$};
\node [fill=green!30,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] (vector2)at (\x,0.25) {$\number\value{mycount2}$};
\addtocounter{mycount2}{1};
}
\foreach \x in {1.75,2.25,2.75}{
\node [fill=red!15,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,0.25) {$\number\value{mycount2}$};
\node [fill=red!30,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,0.25) {$\number\value{mycount2}$};
\addtocounter{mycount2}{1};
}
\node [anchor=north] (labelb) at ([xshift=0.3em,yshift=-0.3em]vector2.south) {\small{(b)2阶张量 }};
......@@ -28,19 +28,19 @@
\draw[step=0.5cm,thick] (0,-0) grid (6.0,0.5);
\setcounter{mycount3}{1}
\foreach \x in {0.25,0.75,1.25}{
\node [fill=green!15,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,0.25) {$\number\value{mycount3}$};
\node [fill=green!30,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,0.25) {$\number\value{mycount3}$};
\addtocounter{mycount3}{1};
}
\foreach \x in {1.75,2.25,2.75}{
\node [fill=red!15,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,0.25) {$\number\value{mycount3}$};
\node [fill=red!30,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,0.25) {$\number\value{mycount3}$};
\addtocounter{mycount3}{1};
}
\foreach \x in {3.25,3.75,4.25}{
\node [fill=green!15,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,0.25) {$\number\value{mycount3}$};
\node [fill=green!30,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,0.25) {$\number\value{mycount3}$};
\addtocounter{mycount3}{1};
}
\foreach \x in {4.75,5.25,5.75}{
\node [fill=red!15,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,0.25) {$\number\value{mycount3}$};
\node [fill=red!30,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,0.25) {$\number\value{mycount3}$};
\addtocounter{mycount3}{1};
}
\draw[decorate,thick,decoration={brace,mirror,raise=0.2em}] (0,-0.2) -- (2.95,-0.2);
......
......@@ -23,21 +23,21 @@
\draw [-,ublue] (0,0) .. controls (2,0) and (3,-1.0)..(3,-1.5) .. controls (3,-2.2) and (2,-1.75)..(1.5,-1.65)..controls (1.5,-1.65) and (0.5,-1.45)..(0,-1.45)..controls (-0.5,-1.45) and (-1.5,-1.65)..(-1.5,-1.65)..controls (-2,-1.75)and (-3,-2.2).. (-3,-1.5)..controls (-3,-1.0) and (-2,0)..(0,0);
\draw [-,ublue,thick] (0,0) .. controls (2,0) and (3,-1.0)..(3,-1.5) .. controls (3,-2.2) and (2,-1.75)..(1.5,-1.65)..controls (1.5,-1.65) and (0.5,-1.45)..(0,-1.45)..controls (-0.5,-1.45) and (-1.5,-1.65)..(-1.5,-1.65)..controls (-2,-1.75)and (-3,-2.2).. (-3,-1.5)..controls (-3,-1.0) and (-2,0)..(0,0);
\draw [-,ublue] (0,0.5)..controls (2,0.5) and (4,-1.0).. (4,-1.7)..controls(4,-2.6)and (3,-2.3)..(2,-2.05)..controls (2,-2.05) and (1,-1.80)..(0,-1.80)..controls (-1,-1.80)and (-2,-2.05)..(-2,-2.05)..controls(-3,-2.3)and(-4,-2.6)..(-4,-1.7)..controls(-4,-1.0)and (-2,0.5)..(0,0.5);
\draw [-,ublue,thick] (0,0.5)..controls (2,0.5) and (4,-1.0).. (4,-1.7)..controls(4,-2.6)and (3,-2.3)..(2,-2.05)..controls (2,-2.05) and (1,-1.80)..(0,-1.80)..controls (-1,-1.80)and (-2,-2.05)..(-2,-2.05)..controls(-3,-2.3)and(-4,-2.6)..(-4,-1.7)..controls(-4,-1.0)and (-2,0.5)..(0,0.5);
\draw[-,ublue](0,1.0)..controls(3,1.0) and (5,-1.0)..(5,-1.9)..controls (5,-3.2)and (4,-2.7)..(3,-2.5)..controls (3,-2.5) and (2,-2.20)..(0,-2.15)..controls (-2,-2.20)and (-3,-2.5)..(-3,-2.5)..controls (-4,-2.7) and (-5,-3.2) ..(-5,-1.9)..controls (-5,-1.0) and (-3,1.0)..(0,1.0);
\draw[-,ublue,thick](0,1.0)..controls(3,1.0) and (5,-1.0)..(5,-1.9)..controls (5,-3.2)and (4,-2.7)..(3,-2.5)..controls (3,-2.5) and (2,-2.20)..(0,-2.15)..controls (-2,-2.20)and (-3,-2.5)..(-3,-2.5)..controls (-4,-2.7) and (-5,-3.2) ..(-5,-1.9)..controls (-5,-1.0) and (-3,1.0)..(0,1.0);
\draw[-,ublue] (0,-0.3)..controls (1.5,-0.3)and (2.5,-1.0)..(2.5,-1.4)..controls(2.5,-1.8)and (2,-1.55)..(1.5,-1.45) ..controls (1.5,-1.45) and (0.5,-1.25)..(0,-1.25) .. controls(-0.5,-1.25)and (-1.5,-1.45)..(-1.5,-1.45)..controls(-2,-1.55)and (-2.5,-1.8) ..(-2.5,-1.4)..controls(-2.5,-1.0) and (-1.5,-0.3)..(0,-0.3);
\draw[-,ublue,thick] (0,-0.3)..controls (1.5,-0.3)and (2.5,-1.0)..(2.5,-1.4)..controls(2.5,-1.8)and (2,-1.55)..(1.5,-1.45) ..controls (1.5,-1.45) and (0.5,-1.25)..(0,-1.25) .. controls(-0.5,-1.25)and (-1.5,-1.45)..(-1.5,-1.45)..controls(-2,-1.55)and (-2.5,-1.8) ..(-2.5,-1.4)..controls(-2.5,-1.0) and (-1.5,-0.3)..(0,-0.3);
\draw[-,ublue](0,-0.5)..controls (1.0,-0.5) and (1.9,-0.8)..(1.9,-1.3)..controls(1.9,-1.5)and (1.5,-1.3)..(1.0,-1.2) ..controls(1.0,-1.2) and (0.5,-1.1)..(0,-1.1)..controls(-0.5,-1.1) and (-1.0,-1.2)..(-1.0,-1.2)..controls (-1.5,-1.3)and (-1.9,-1.5)..(-1.9,-1.3) ..controls(-1.9,-0.8)and (-1.0,-0.5) ..(0,-0.5);
\draw[-,ublue,thick](0,-0.5)..controls (1.0,-0.5) and (1.9,-0.8)..(1.9,-1.3)..controls(1.9,-1.5)and (1.5,-1.3)..(1.0,-1.2) ..controls(1.0,-1.2) and (0.5,-1.1)..(0,-1.1)..controls(-0.5,-1.1) and (-1.0,-1.2)..(-1.0,-1.2)..controls (-1.5,-1.3)and (-1.9,-1.5)..(-1.9,-1.3) ..controls(-1.9,-0.8)and (-1.0,-0.5) ..(0,-0.5);
\draw[-,ublue](0,-0.7)..controls(1.0,-0.7) and (1.4,-0.9)..(1.4,-1.1) .. controls(1.4,-1.25) and (1.2,-1.15)..(1.0,-1.1)..controls(1.0,-1.1) and (0.5,-0.95)..(0,-0.95)..controls(-0.5,-0.95)and (-1.0,-1.1) ..(-1.0,-1.1)..controls(-1.2,-1.15) and (-1.4,-1.25)..(-1.4,-1.1)..controls(-1.4,-0.9) and (-1.0,-0.7)..(0,-0.7);
\draw[-,ublue,thick](0,-0.7)..controls(1.0,-0.7) and (1.4,-0.9)..(1.4,-1.1) .. controls(1.4,-1.25) and (1.2,-1.15)..(1.0,-1.1)..controls(1.0,-1.1) and (0.5,-0.95)..(0,-0.95)..controls(-0.5,-0.95)and (-1.0,-1.1) ..(-1.0,-1.1)..controls(-1.2,-1.15) and (-1.4,-1.25)..(-1.4,-1.1)..controls(-1.4,-0.9) and (-1.0,-0.7)..(0,-0.7);
\draw[-,ublue](0,-0.75)..controls(0.7,-0.75)and (1.0,-0.9)..(1.0,-1.0)..controls(1.0,-1.05) and (0.9,-1.05)..(0.7,-1.0)..controls(0.5,-0.95)and (0.3,-0.9)..(0,-0.9)..controls(-0.3,-0.9)and (-0.5,-0.95)..(-0.7,-1.0)..controls(-0.9,-1.05)and (-1.0,-1.05)..(-1.0,-1.0) ..controls(-1.0,-0.9)and (-0.7,-0.75)..(0,-0.75);
\draw[-,ublue,thick](0,-0.75)..controls(0.7,-0.75)and (1.0,-0.9)..(1.0,-1.0)..controls(1.0,-1.05) and (0.9,-1.05)..(0.7,-1.0)..controls(0.5,-0.95)and (0.3,-0.9)..(0,-0.9)..controls(-0.3,-0.9)and (-0.5,-0.95)..(-0.7,-1.0)..controls(-0.9,-1.05)and (-1.0,-1.05)..(-1.0,-1.0) ..controls(-1.0,-0.9)and (-0.7,-0.75)..(0,-0.75);
\draw[-,ublue](0,-0.8)..controls(0.5,-0.8) and (0.6,-0.85)..(0.6,-0.9)..controls(0.6,-0.93)and (0.5,-0.91)..(0.3,-0.88)..controls(0.2,-0.87)and (0.1,-0.86)..(0,-0.86)..controls(-0.1,-0.86)and(-0.2,-0.87)..(-0.3,-0.88)..controls(-0.5,-0.91) and(-0.6,-0.93) ..(-0.6,-0.9)..controls(-0.6,-0.85)and (-0.5,-0.8)..(0,-0.8);
\draw[-,ublue,thick](0,-0.8)..controls(0.5,-0.8) and (0.6,-0.85)..(0.6,-0.9)..controls(0.6,-0.93)and (0.5,-0.91)..(0.3,-0.88)..controls(0.2,-0.87)and (0.1,-0.86)..(0,-0.86)..controls(-0.1,-0.86)and(-0.2,-0.87)..(-0.3,-0.88)..controls(-0.5,-0.91) and(-0.6,-0.93) ..(-0.6,-0.9)..controls(-0.6,-0.85)and (-0.5,-0.8)..(0,-0.8);
\node [anchor=north] (labela) at (0,-2.7) {\small{(a)梯度下降算法中的``锯齿''现象}};
......@@ -62,21 +62,21 @@
\draw [-,ublue] (0,0) .. controls (2,0) and (3,-1.0)..(3,-1.5) .. controls (3,-2.2) and (2,-1.75)..(1.5,-1.65)..controls (1.5,-1.65) and (0.5,-1.45)..(0,-1.45)..controls (-0.5,-1.45) and (-1.5,-1.65)..(-1.5,-1.65)..controls (-2,-1.75)and (-3,-2.2).. (-3,-1.5)..controls (-3,-1.0) and (-2,0)..(0,0);
\draw [-,ublue,thick] (0,0) .. controls (2,0) and (3,-1.0)..(3,-1.5) .. controls (3,-2.2) and (2,-1.75)..(1.5,-1.65)..controls (1.5,-1.65) and (0.5,-1.45)..(0,-1.45)..controls (-0.5,-1.45) and (-1.5,-1.65)..(-1.5,-1.65)..controls (-2,-1.75)and (-3,-2.2).. (-3,-1.5)..controls (-3,-1.0) and (-2,0)..(0,0);
\draw [-,ublue] (0,0.5)..controls (2,0.5) and (4,-1.0).. (4,-1.7)..controls(4,-2.6)and (3,-2.3)..(2,-2.05)..controls (2,-2.05) and (1,-1.80)..(0,-1.80)..controls (-1,-1.80)and (-2,-2.05)..(-2,-2.05)..controls(-3,-2.3)and(-4,-2.6)..(-4,-1.7)..controls(-4,-1.0)and (-2,0.5)..(0,0.5);
\draw [-,ublue,thick] (0,0.5)..controls (2,0.5) and (4,-1.0).. (4,-1.7)..controls(4,-2.6)and (3,-2.3)..(2,-2.05)..controls (2,-2.05) and (1,-1.80)..(0,-1.80)..controls (-1,-1.80)and (-2,-2.05)..(-2,-2.05)..controls(-3,-2.3)and(-4,-2.6)..(-4,-1.7)..controls(-4,-1.0)and (-2,0.5)..(0,0.5);
\draw[-,ublue](0,1.0)..controls(3,1.0) and (5,-1.0)..(5,-1.9)..controls (5,-3.2)and (4,-2.7)..(3,-2.5)..controls (3,-2.5) and (2,-2.20)..(0,-2.15)..controls (-2,-2.20)and (-3,-2.5)..(-3,-2.5)..controls (-4,-2.7) and (-5,-3.2) ..(-5,-1.9)..controls (-5,-1.0) and (-3,1.0)..(0,1.0);
\draw[-,ublue,thick](0,1.0)..controls(3,1.0) and (5,-1.0)..(5,-1.9)..controls (5,-3.2)and (4,-2.7)..(3,-2.5)..controls (3,-2.5) and (2,-2.20)..(0,-2.15)..controls (-2,-2.20)and (-3,-2.5)..(-3,-2.5)..controls (-4,-2.7) and (-5,-3.2) ..(-5,-1.9)..controls (-5,-1.0) and (-3,1.0)..(0,1.0);
\draw[-,ublue] (0,-0.3)..controls (1.5,-0.3)and (2.5,-1.0)..(2.5,-1.4)..controls(2.5,-1.8)and (2,-1.55)..(1.5,-1.45) ..controls (1.5,-1.45) and (0.5,-1.25)..(0,-1.25) .. controls(-0.5,-1.25)and (-1.5,-1.45)..(-1.5,-1.45)..controls(-2,-1.55)and (-2.5,-1.8) ..(-2.5,-1.4)..controls(-2.5,-1.0) and (-1.5,-0.3)..(0,-0.3);
\draw[-,ublue,thick] (0,-0.3)..controls (1.5,-0.3)and (2.5,-1.0)..(2.5,-1.4)..controls(2.5,-1.8)and (2,-1.55)..(1.5,-1.45) ..controls (1.5,-1.45) and (0.5,-1.25)..(0,-1.25) .. controls(-0.5,-1.25)and (-1.5,-1.45)..(-1.5,-1.45)..controls(-2,-1.55)and (-2.5,-1.8) ..(-2.5,-1.4)..controls(-2.5,-1.0) and (-1.5,-0.3)..(0,-0.3);
\draw[-,ublue](0,-0.5)..controls (1.0,-0.5) and (1.9,-0.8)..(1.9,-1.3)..controls(1.9,-1.5)and (1.5,-1.3)..(1.0,-1.2) ..controls(1.0,-1.2) and (0.5,-1.1)..(0,-1.1)..controls(-0.5,-1.1) and (-1.0,-1.2)..(-1.0,-1.2)..controls (-1.5,-1.3)and (-1.9,-1.5)..(-1.9,-1.3) ..controls(-1.9,-0.8)and (-1.0,-0.5) ..(0,-0.5);
\draw[-,ublue,thick](0,-0.5)..controls (1.0,-0.5) and (1.9,-0.8)..(1.9,-1.3)..controls(1.9,-1.5)and (1.5,-1.3)..(1.0,-1.2) ..controls(1.0,-1.2) and (0.5,-1.1)..(0,-1.1)..controls(-0.5,-1.1) and (-1.0,-1.2)..(-1.0,-1.2)..controls (-1.5,-1.3)and (-1.9,-1.5)..(-1.9,-1.3) ..controls(-1.9,-0.8)and (-1.0,-0.5) ..(0,-0.5);
\draw[-,ublue](0,-0.7)..controls(1.0,-0.7) and (1.4,-0.9)..(1.4,-1.1) .. controls(1.4,-1.25) and (1.2,-1.15)..(1.0,-1.1)..controls(1.0,-1.1) and (0.5,-0.95)..(0,-0.95)..controls(-0.5,-0.95)and (-1.0,-1.1) ..(-1.0,-1.1)..controls(-1.2,-1.15) and (-1.4,-1.25)..(-1.4,-1.1)..controls(-1.4,-0.9) and (-1.0,-0.7)..(0,-0.7);
\draw[-,ublue,thick](0,-0.7)..controls(1.0,-0.7) and (1.4,-0.9)..(1.4,-1.1) .. controls(1.4,-1.25) and (1.2,-1.15)..(1.0,-1.1)..controls(1.0,-1.1) and (0.5,-0.95)..(0,-0.95)..controls(-0.5,-0.95)and (-1.0,-1.1) ..(-1.0,-1.1)..controls(-1.2,-1.15) and (-1.4,-1.25)..(-1.4,-1.1)..controls(-1.4,-0.9) and (-1.0,-0.7)..(0,-0.7);
\draw[-,ublue](0,-0.75)..controls(0.7,-0.75)and (1.0,-0.9)..(1.0,-1.0)..controls(1.0,-1.05) and (0.9,-1.05)..(0.7,-1.0)..controls(0.5,-0.95)and (0.3,-0.9)..(0,-0.9)..controls(-0.3,-0.9)and (-0.5,-0.95)..(-0.7,-1.0)..controls(-0.9,-1.05)and (-1.0,-1.05)..(-1.0,-1.0) ..controls(-1.0,-0.9)and (-0.7,-0.75)..(0,-0.75);
\draw[-,ublue,thick](0,-0.75)..controls(0.7,-0.75)and (1.0,-0.9)..(1.0,-1.0)..controls(1.0,-1.05) and (0.9,-1.05)..(0.7,-1.0)..controls(0.5,-0.95)and (0.3,-0.9)..(0,-0.9)..controls(-0.3,-0.9)and (-0.5,-0.95)..(-0.7,-1.0)..controls(-0.9,-1.05)and (-1.0,-1.05)..(-1.0,-1.0) ..controls(-1.0,-0.9)and (-0.7,-0.75)..(0,-0.75);
\draw[-,ublue](0,-0.8)..controls(0.5,-0.8) and (0.6,-0.85)..(0.6,-0.9)..controls(0.6,-0.93)and (0.5,-0.91)..(0.3,-0.88)..controls(0.2,-0.87)and (0.1,-0.86)..(0,-0.86)..controls(-0.1,-0.86)and(-0.2,-0.87)..(-0.3,-0.88)..controls(-0.5,-0.91) and(-0.6,-0.93) ..(-0.6,-0.9)..controls(-0.6,-0.85)and (-0.5,-0.8)..(0,-0.8);
\draw[-,ublue,thick](0,-0.8)..controls(0.5,-0.8) and (0.6,-0.85)..(0.6,-0.9)..controls(0.6,-0.93)and (0.5,-0.91)..(0.3,-0.88)..controls(0.2,-0.87)and (0.1,-0.86)..(0,-0.86)..controls(-0.1,-0.86)and(-0.2,-0.87)..(-0.3,-0.88)..controls(-0.5,-0.91) and(-0.6,-0.93) ..(-0.6,-0.9)..controls(-0.6,-0.85)and (-0.5,-0.8)..(0,-0.8);
\node [anchor=north] (labelb) at (0,-3) {\small{(b)Momentum梯度下降算法更加``平滑''地更新}};
......
......@@ -37,15 +37,15 @@
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.4em,fill=red!20] [fit = (neuron00) (neuron01) (neuron02)] (layer) {};
\node [rectangle,inner sep=0.4em,fill=red!30] [fit = (neuron00) (neuron01) (neuron02)] (layer) {};
\node [anchor=south] (layerlabel) at ([yshift=0.2em]layer.north) {\scriptsize{一层神经元}};
\node [rectangle,inner sep=0.1em,fill=ugreen!20] [fit = (x0) (x1)] (inputshadow) {};
\node [rectangle,inner sep=0.1em,fill=ugreen!30] [fit = (x0) (x1)] (inputshadow) {};
\node [rectangle,inner sep=0.1em,fill=blue!20] [fit = (y0) (y1) (y2)] (outputshadow) {};
\node [rectangle,inner sep=0.1em,fill=blue!30] [fit = (y0) (y1) (y2)] (outputshadow) {};
\end{pgfonlayer}
......
\definecolor{ublue}{rgb}{0.152,0.250,0.545}
\begin{tikzpicture}
\begin{axis}[
width=8cm, height=5cm,
width=10cm, height=5cm,
xtick={-6,-4,...,6},
ytick={0,0.5,1},
xlabel={\small{$x$}},
......
......@@ -7,7 +7,7 @@
\draw[step=0.5cm,color=orange!70,thick] (-1,-1) grid (1,1);
\foreach \y in {+0.75,+0.25,-0.25,-0.75}
\foreach \x in {-0.75,-0.25,0.25,0.75}{
\node [fill=orange!15,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
\node [fill=orange!25,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
\addtocounter{mycount1}{1};
}
}
......@@ -20,7 +20,7 @@
\draw[step=0.5cm,color=blue!70,thick] (-1,-1) grid (1,1);
\foreach \y in {+0.75,+0.25,-0.25,-0.75}
\foreach \x in {-0.75,-0.25,0.25,0.75}{
\node [fill=blue!15,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount2}$};
\node [fill=blue!25,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount2}$};
\addtocounter{mycount2}{1};
}
}
......@@ -33,7 +33,7 @@
\draw[step=0.5cm,color=ugreen!70,thick] (-1,-1) grid (1,1);
\foreach \y in {+0.75,+0.25,-0.25,-0.75}
\foreach \x in {-0.75,-0.25,0.25,0.75}{
\node [fill=green!15,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount3}$};
\node [fill=green!25,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount3}$};
\addtocounter{mycount3}{1};
}
}
......@@ -46,7 +46,7 @@
\draw[step=0.5cm,color=red!70,thick] (-1,-1) grid (1,1);
\foreach \y in {+0.75,+0.25,-0.25,-0.75}
\foreach \x in {-0.75,-0.25,0.25,0.75}{
\node [fill=red!15,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount4}$};
\node [fill=red!25,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount4}$};
\addtocounter{mycount4}{1};
}
\node [anchor=north] (xlabel) at (0,-1.2) {$\mathbi{x}$};
......@@ -54,7 +54,7 @@
\node [anchor=south west] (label14) at (-1.3,0.9) {\footnotesize{\ding{175}}};
\end{scope}
\begin{scope}[yshift=5em,xshift=1.5in]
\begin{scope}[yshift=5em,xshift=1.8in]
{
\draw[step=0.5cm,thick] (-0.5,-1) grid (0.5,1.0);
\node [fill=black!20,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (-0.25,0.75) {\small{$-1$}};
......@@ -68,19 +68,19 @@
\node [anchor=north] (xlabel) at (0,-1.2) {$\mathbi{W}$};
}
{\draw [->,thick,dashed] (-1.5in+2em+1.5em,-0.3) .. controls +(east:2) and +(west:1) .. (-0.55,0.8) node [pos=0.5,left] {\scriptsize{\textbf{矩阵乘}}};}
{\draw [->,thick,dashed] (-1.5in+2em+1.0em,-0.5) .. controls +(east:2) and +(west:1) .. (-0.55,0.8) ;}
{\draw [->,thick,dashed] (-1.5in+2em+0.5em,-0.7) .. controls +(east:2.5) and +(west:1) .. (-0.55,0.8) ;}
{\draw [->,thick,dashed] (-1.5in+2em,-0.9) .. controls +(east:3) and +(west:1) .. (-0.55,0.8);}
{\draw [->,thick,dashed] (-1.8in+2em+1.5em,-0.3) .. controls +(east:2) and +(west:1) .. (-0.55,0.8) node [pos=0.5,left] {\scriptsize{\textbf{矩阵乘}}};}
{\draw [->,thick,dashed] (-1.8in+2em+1.0em,-0.5) .. controls +(east:2) and +(west:1) .. (-0.55,0.8) ;}
{\draw [->,thick,dashed] (-1.8in+2em+0.5em,-0.7) .. controls +(east:2.5) and +(west:1) .. (-0.55,0.8) ;}
{\draw [->,thick,dashed] (-1.8in+2em,-0.9) .. controls +(east:3) and +(west:1) .. (-0.55,0.8);}
\end{scope}
\begin{scope}[yshift=6.5em,xshift=1em+3in]
\begin{scope}[yshift=6.5em,xshift=1em+3.6in]
{
\draw[step=0.5cm,color=orange!70,thick] (-0.5,-1) grid (0.5,1.0);
\foreach \y in {+0.75,+0.25,-0.25,-0.75}{
\setcounter{mycount1}{2}
\foreach \x in {-0.25,0.25}{
\node [fill=orange!15,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
\node [fill=orange!25,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
\addtocounter{mycount1}{-1};
}
}
......@@ -88,13 +88,13 @@
\node [anchor=south west] (label21) at (-0.8,0.9) {\footnotesize{\ding{172}}};
\end{scope}
\begin{scope}[yshift=6em,xshift=0.5em+3in]
\begin{scope}[yshift=6em,xshift=0.5em+3.6in]
{
\draw[step=0.5cm,color=blue!70,thick] (-0.5,-1) grid (0.5,1.0);
\foreach \y in {+0.75,+0.25,-0.25,-0.75}{
\setcounter{mycount1}{2}
\foreach \x in {-0.25,0.25}{
\node [fill=blue!15,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
\node [fill=blue!25,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
\addtocounter{mycount1}{-1};
}
}
......@@ -102,13 +102,13 @@
\node [anchor=south west] (label22) at (-0.8,0.9) {\footnotesize{\ding{173}}};
\end{scope}
\begin{scope}[yshift=5.5em,xshift=0em+3in]
\begin{scope}[yshift=5.5em,xshift=0em+3.6in]
{
\draw[step=0.5cm,color=ugreen!70,thick] (-0.5,-1) grid (0.5,1.0);
\foreach \y in {+0.75,+0.25,-0.25,-0.75}{
\setcounter{mycount1}{2}
\foreach \x in {-0.25,0.25}{
\node [fill=green!15,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
\node [fill=green!25,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
\addtocounter{mycount1}{-1};
}
}
......@@ -116,13 +116,13 @@
\node [anchor=south west] (label23) at (-0.8,0.9) {\footnotesize{\ding{174}}};
\end{scope}
\begin{scope}[yshift=5.0em,xshift=-0.5em+3in]
\begin{scope}[yshift=5.0em,xshift=-0.5em+3.6in]
{
\draw[step=0.5cm,color=red!70,thick] (-0.5,-1) grid (0.5,1.0);
\foreach \y in {+0.75,+0.25,-0.25,-0.75}{
\setcounter{mycount1}{2}
\foreach \x in {-0.25,0.25}{
\node [fill=red!15,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
\node [fill=red!25,inner sep=0pt,minimum height=0.49cm,minimum width=0.49cm] at (\x,\y) {$\number\value{mycount1}$};
\addtocounter{mycount1}{-1};
}
}
......@@ -130,7 +130,7 @@
\node [anchor=south west] (label24) at (-0.8,0.9) {\footnotesize{\ding{175}}};
{
\node [anchor=north] (xlabel) at (0,-1.2) {${\mathbi{x}} \cdot {\mathbi{W}}$};
\node [anchor=center] (elabel) at (-0.7in,0) {\Huge{$\textbf{=}$}};
\node [anchor=center] (elabel) at (-0.9in,0) {\Huge{$\textbf{=}$}};
}
\end{scope}
......
......@@ -10,7 +10,7 @@
\draw[step=0.5cm,color=orange!70,line width=0.4mm] (-2,-2) grid (1,1);
\foreach \y in {+0.5,-0.5,-1.5}
\foreach \x in {-1.5,-0.5,0.5}{
\node [fill=orange!15,inner sep=0pt,minimum height=0.98cm,minimum width=0.98cm] at (\x,\y) {\number\value{mycount1}};
\node [fill=orange!25,inner sep=0pt,minimum height=0.98cm,minimum width=0.98cm] at (\x,\y) {\number\value{mycount1}};
\addtocounter{mycount1}{1};
}
\end{scope}
......@@ -20,7 +20,7 @@
\draw[step=0.5cm,color=blue!70,line width=0.4mm] (-2,-2) grid (1,1);
\foreach \y in {+0.5,-0.5,-1.5}
\foreach \x in {-1.5,-0.5,0.5}{
\node [fill=blue!15,inner sep=0pt,minimum height=0.98cm,minimum width=0.98cm] at (\x,\y) {\number\value{mycount2}};
\node [fill=blue!25,inner sep=0pt,minimum height=0.98cm,minimum width=0.98cm] at (\x,\y) {\number\value{mycount2}};
\addtocounter{mycount2}{1};
}
\end{scope}
......@@ -30,7 +30,7 @@
\draw[step=0.5cm,color=ugreen!70,line width=0.4mm] (-2,-2) grid (1,1);
\foreach \y in {+0.5,-0.5,-1.5}
\foreach \x in {-1.5,-0.5,0.5}{
\node [fill=ugreen!15,inner sep=0pt,minimum height=0.98cm,minimum width=0.98cm] at (\x,\y) {\number\value{mycount3}};
\node [fill=ugreen!25,inner sep=0pt,minimum height=0.98cm,minimum width=0.98cm] at (\x,\y) {\number\value{mycount3}};
\addtocounter{mycount3}{1};
}
\end{scope}
......@@ -40,7 +40,7 @@
\draw[step=0.5cm,color=red!70,line width=0.4mm] (-2,-2) grid (1,1);
\foreach \y in {+0.5,-0.5,-1.5}
\foreach \x in {-1.5,-0.5,0.5}{
\node [fill=red!15,inner sep=0pt,minimum height=0.98cm,minimum width=0.98cm] at (\x,\y) {\number\value{mycount4}};
\node [fill=red!25,inner sep=0pt,minimum height=0.98cm,minimum width=0.98cm] at (\x,\y) {\number\value{mycount4}};
\addtocounter{mycount4}{1};
}
\end{scope}
......
......@@ -2,7 +2,7 @@
\begin{tikzpicture}
\scriptsize{
\begin{semilogyaxis}[
width=.75\textwidth,
width=.8\textwidth,
height=.30\textwidth,
yticklabel style={/pgf/number format/precision=1,/pgf/number format/fixed zerofill},
xticklabel style={/pgf/number format/1000 sep=},
......
%%%------------------------------------------------------------------------------------------------------------
\begin{tikzpicture}
\tikzstyle{neuron} = [rectangle,draw,thick,fill=red!25,red!30,minimum height=2em,minimum width=2em,font=\small]
\tikzstyle{neuron} = [rectangle,draw,thick,fill=red!35,red!30,minimum height=2em,minimum width=2em,font=\small]
\node[neuron,anchor=north] (a1) at (0,0) {};
\draw[->,thick] ([xshift=-2em,yshift=0em]a1.south) to ([xshift=3em,yshift=0em]a1.south);
\draw[->,thick] ([xshift=0em,yshift=-4em]a1.west) to ([xshift=0em,yshift=2em]a1.west);
......@@ -13,8 +13,8 @@
\node [anchor=west] (x) at ([xshift=-0.7em,yshift=1em]a1.south) {\Large{$\textbf{F}$}};
{
\tikzstyle{neuron} = [rectangle,draw,thick,fill=red!25,red!30,minimum height=2em,minimum width=2em,font=\small]
\node[neuron,anchor=north] (a2) at ([xshift=10em,yshift=0em]a1.south) {};
\tikzstyle{neuron} = [rectangle,draw,thick,fill=red!35,red!30,minimum height=2em,minimum width=2em,font=\small]
\node[neuron,anchor=north] (a2) at ([xshift=12em,yshift=0em]a1.south) {};
\draw[->,thick] ([xshift=-2em,yshift=0em]a2.north) to ([xshift=3em,yshift=0em]a2.north);
\draw[->,thick] ([xshift=0em,yshift=-2em]a2.west) to ([xshift=0em,yshift=4em]a2.west);
\node [anchor=south] (heng1) at ([xshift=2.5em,yshift=1.25em]a2.south) {\scriptsize{$x$}};
......@@ -37,8 +37,8 @@
}
{
\tikzstyle{neuron} = [rectangle,draw,thick,fill=red!25,red!30,minimum height=2em,minimum width=2em,font=\small]
\node[neuron,anchor=north] (a3) at ([xshift=11em,yshift=2.05em]a2.south) {};
\tikzstyle{neuron} = [rectangle,draw,thick,fill=red!35,red!30,minimum height=2em,minimum width=2em,font=\small]
\node[neuron,anchor=north] (a3) at ([xshift=13em,yshift=2.05em]a2.south) {};
\draw[->,thick] ([xshift=-3em,yshift=0em]a3.north) to ([xshift=2em,yshift=0em]a3.north);
\draw[->,thick] ([xshift=-1em,yshift=-2em]a3.west) to ([xshift=-1em,yshift=4em]a3.west);
\node [anchor=south] (heng1) at ([xshift=1.5em,yshift=1.2em]a3.south) {\scriptsize{$x$}};
......
......@@ -52,7 +52,7 @@
\begin{pgfonlayer}{background}
{
\node [rectangle,inner sep=0.2em,fill=blue!20,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] [fit = (flabel) (flabel2) (flabel3)] (funcbox) {};
\node [rectangle,inner sep=0.2em,fill=blue!30,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] [fit = (flabel) (flabel2) (flabel3)] (funcbox) {};
}
\end{pgfonlayer}
}
......
......@@ -16,7 +16,7 @@
{\draw [-,very thick,ublue,rounded corners=0.1em] (-1.5,0) -- (0.5,0) -- (0.5,1) -- (1.5,1);}
\end{scope}
%---------------------------------------------------------------------------------------------
\begin{scope}[xshift=1.6in]
\begin{scope}[xshift=1.8in]
{
\draw [->,thick] (-1.8,0) -- (1.8,0);
\draw [->,thick] (0,0) -- (0,2);
......@@ -32,7 +32,7 @@
\end{scope}
%-----------------------------------------------------------------------------------------------
\begin{scope}[xshift=3.2in]
\begin{scope}[xshift=3.6in]
{
\draw [->,thick] (-1.8,0) -- (1.8,0);
\draw [->,thick] (0,0) -- (0,2);
......
......@@ -16,7 +16,7 @@
{\draw [-,very thick,ublue,rounded corners=0.1em] (-1.5,0) -- (0.5,0) -- (0.5,0.7) -- (1.5,0.7);}
\end{scope}
%---------------------------------------------------------------------------------------------
\begin{scope}[xshift=1.6in]
\begin{scope}[xshift=1.8in]
{
\draw [->,thick] (-1.8,0) -- (1.8,0);
\draw [->,thick] (0,0) -- (0,2);
......@@ -32,7 +32,7 @@
\end{scope}
%-----------------------------------------------------------------------------------------------
\begin{scope}[xshift=3.2in]
\begin{scope}[xshift=3.6in]
{
\draw [->,thick] (-1.8,0) -- (1.8,0);
\draw [->,thick] (0,0) -- (0,2);
......
......@@ -3,33 +3,33 @@
\tikzstyle{neuronnode} = [minimum size=2.2em,circle,draw,ublue,very thick,inner sep=1pt, fill=white,align=center,drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}]
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (bias10) at (0,0.05) {\footnotesize{${\mathbi{b}}^{[1]}$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (bias11) at ([xshift=-1.5em,yshift=-0.3em]bias10.south) {\footnotesize{偏置1}};
\node [anchor=center,rotate=13,fill=white,inner sep=1pt] (b11) at ([yshift=1.0em,xshift=1.8em]bias10.north) {\scriptsize{$b_{11}$}};
\node [anchor=center,rotate=13,fill=white,inner sep=1pt] (b11) at ([yshift=1.2em,xshift=4.2em]bias10.north) {\scriptsize{$b_{11}$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (input10) at (2,0) {\footnotesize {$x_1$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (input10) at (3,0) {\footnotesize {$x_1$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (input11) at ([xshift=-2.1em,yshift=-0.3em]input10.south) {\footnotesize {天空状况}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (input20) at (4,0) {\footnotesize {$x_2$}};
\node [anchor=center,rotate=38,fill=white,inner sep=1pt] (w12) at ([yshift=1.25em,xshift=0.85em]input20.north) {\scriptsize{$w_{22}$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (input20) at (6,0) {\footnotesize {$x_2$}};
\node [anchor=center,rotate=32,fill=white,inner sep=1pt] (w12) at ([yshift=1.25em,xshift=1.4em]input20.north) {\scriptsize{$w_{22}$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (input21) at ([xshift=-2.1em,yshift=-0.3em]input20.south) {\footnotesize {低空气温}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (input30) at (6,0) {\footnotesize {$x_3$}};
\node [anchor=center,rotate=-35,fill=white,inner sep=1pt] (w13) at ([yshift=1.2em,xshift=-1.0em]input30.north) {\scriptsize{$w_{32}$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (input30) at (9,0) {\footnotesize {$x_3$}};
\node [anchor=center,rotate=-20,fill=white,inner sep=1pt] (w13) at ([yshift=1.2em,xshift=-1.4em]input30.north) {\scriptsize{$w_{32}$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (input31) at ([xshift=-2.1em,yshift=-0.3em]input30.south) {\footnotesize {水平气压}};
\node [neuronnode] (n10) at ([xshift=1.5em,yshift=4em]input10.east) {\tiny{Tanh}\\[-1ex] \tiny{$\sum$}};
\node [neuronnode] (n10) at ([xshift=3em,yshift=4em]input10.east) {\tiny{Tanh}\\[-1ex] \tiny{$\sum$}};
\node [anchor=east,minimum width=2.0em,minimum height=1.5em] (a1) at ([xshift=2.3em,yshift=0em]n10.east) {\footnotesize {温度}};
\node [anchor=center,rotate=0,fill=white,inner sep=1pt] (w21) at ([xshift=0.8em,yshift=0.8em]n10.north) {\scriptsize{$w'_{11}$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (bias20) at ([xshift=-5em,yshift=0.3em]n10.west) {\footnotesize {$b^{[2]}$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (bias20) at ([xshift=-7.2em,yshift=0.3em]n10.west) {\footnotesize {$b^{[2]}$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (bias21) at ([xshift=-1.5em,yshift=-0.3em]bias20.south) {\footnotesize {偏置2}};
\node [anchor=center,rotate=25,fill=white,inner sep=1pt] (b21) at ([yshift=1.1em,xshift=1.9em]bias20.north) {\scriptsize{$b'_{11}$}};
\node [neuronnode] (n11) at ([xshift=1.5em,yshift=4em]input20.east){\tiny{Tanh}\\[-1ex] \tiny{$\sum$}};
\node [neuronnode] (n11) at ([xshift=2.8em,yshift=4em]input20.east){\tiny{Tanh}\\[-1ex] \tiny{$\sum$}};
\node [anchor=east,minimum width=2.0em,minimum height=1.5em] (a1) at ([xshift=2.3em,yshift=0em]n11.east) {\footnotesize {风速}};
\node [anchor=center,rotate=-15,fill=white,inner sep=1pt] (w22) at ([yshift=1.05em,xshift=-1.8em]n11.north) {\scriptsize{$w'_{21}$}};
\draw [-,ublue] (n10.west) -- (n10.east);
\draw [-,ublue] (n11.west) -- (n11.east);
\node [neuronnode] (n20) at ([xshift=1.5em,yshift=8em]input10.east) {\tiny{Sigmoid}\\[-1ex] \tiny{$\sum$}};
\node [neuronnode] (n20) at ([xshift=3em,yshift=8em]input10.east) {\tiny{Sigmoid}\\[-1ex] \tiny{$\sum$}};
\node [anchor=east,minimum width=2.0em,minimum height=1.5em] (a1) at ([xshift=3.9em,yshift=0em]n20.east) {\footnotesize {穿衣指数}};
\draw [-,ublue] (n20.west) -- (n20.east);
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (output) at ([xshift=0.5em,yshift=12em]input10.east) {\footnotesize {$y$}};
\node [anchor=west,minimum width=2.0em,minimum height=1.5em] (output) at ([xshift=2em,yshift=12em]input10.east) {\footnotesize {$y$}};
\draw [->,thick,ublue!70,line width=0.33mm] (input10.north) -- (n10.south);
\draw [->,thick,ugreen,line width=0.33mm] (input20.north) -- (n10.south);
......@@ -41,9 +41,9 @@
\draw [->,thick,line width=0.33mm] (n20.north) -- (output.south);
\draw [->,thick,red!80,line width=0.33mm] (bias10.north) -- (n10.south);
\draw [->,thick,orange,line width=0.33mm] (bias10.north) -- (n11.south);
\node [anchor=center,rotate=10,fill=white,inner sep=1pt] (b12) at ([yshift=1.0em,xshift=4em]bias10.north) {\scriptsize{$b_{12}$}};
\node [anchor=center,rotate=40,fill=white,inner sep=1pt] (w11) at ([yshift=0.55em,xshift=1.5em]input10.north) {\scriptsize{$w_{11}$}};
\node [anchor=center,rotate=-30,fill=white,inner sep=1pt] (w12) at ([yshift=0.5em,xshift=-1.4em]input20.north) {\scriptsize{$w_{21}$}};
\node [anchor=center,rotate=10,fill=white,inner sep=1pt] (b12) at ([yshift=0.6em,xshift=7em]bias10.north) {\scriptsize{$b_{12}$}};
\node [anchor=center,rotate=30,fill=white,inner sep=1pt] (w11) at ([yshift=0.4em,xshift=1.7em]input10.north) {\scriptsize{$w_{11}$}};
\node [anchor=center,rotate=-25,fill=white,inner sep=1pt] (w12) at ([yshift=0.4em,xshift=-1.7em]input20.north) {\scriptsize{$w_{21}$}};
\end{tikzpicture}
%%%------------------------------------------------------------------------------------------------------------
......
......@@ -16,7 +16,7 @@
{\draw [-,very thick,ublue,domain=-1.5:1.5,samples=100] plot (\x,{1/(1+exp(-2*\x))});}
\end{scope}
%---------------------------------------------------------------------------------------------
\begin{scope}[xshift=1.6in]
\begin{scope}[xshift=1.8in]
{
\draw [->,thick] (-1.8,0) -- (1.8,0);
\draw [->,thick] (0,0) -- (0,2);
......@@ -31,7 +31,7 @@
{\draw [-,very thick,ublue,domain=-1.5:1.5,samples=100] plot (\x,{1/(1+exp(-4*\x))});}
\end{scope}
%-----------------------------------------------------------------------------------------------
\begin{scope}[xshift=3.2in]
\begin{scope}[xshift=3.6in]
{
\draw [->,thick] (-1.8,0) -- (1.8,0);
\draw [->,thick] (0,0) -- (0,2);
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......@@ -2074,13 +2074,13 @@ z_t&=&\gamma z_{t-1}+(1-\gamma) \frac{\partial J}{\partial {\theta}_t} \cdot \f
\parinterval 那么,分布式表示中每个维度的含义是什么?可以把每一维度都理解为一种属性,比如一个人的身高、体重等。但是,神经网络模型更多的是把每个维度看作是单词的一种抽象“刻画”,是一种统计意义上的“语义”,而非简单的人工归纳的事物的一个个属性。使用这种连续空间的表示的好处在于,表示的内容(实数向量)可以进行计算和学习,因此可以通过模型训练得到更适用于自然语言处理的单词表示结果。
\parinterval 为了方便理解,看一个简单的例子。假如现在有个“预测下一个单词”的任务:有这样一个句子“屋里 要 摆放 一个 \rule[-3pt]{1cm}{0.05em}”,其中下划线的部分表示需要预测的下一个单词。如果模型在训练数据中看到过类似于“摆放 一个 桌子”这样的片段,那么就可以很自信的预测出“桌子”。另一方面,很容易知道,实际上与“桌子”相近的单词,如“椅子”,也是可以预测的单词的。但是,“椅子”恰巧没有出现在训练数据中,这时如果用One-hot编码来表示单词,显然无法把“椅子”填到下划线处;而如果使用单词的分布式表示,很容易就知道 “桌子”与“椅子”是相似的,因此预测“ 椅子”在一定程度上也是合理的。
\parinterval 为了方便理解,看一个简单的例子。假如现在有个“预测下一个单词”的任务:有这样一个句子“屋里/要/摆放/一个/\rule[-3pt]{1cm}{0.05em}”,其中下划线的部分表示需要预测的下一个单词。如果模型在训练数据中看到过类似于“摆放 一个 桌子”这样的片段,那么就可以很自信的预测出“桌子”。另一方面,很容易知道,实际上与“桌子”相近的单词,如“椅子”,也是可以预测的单词的。但是,“椅子”恰巧没有出现在训练数据中,这时如果用One-hot编码来表示单词,显然无法把“椅子”填到下划线处;而如果使用单词的分布式表示,很容易就知道 “桌子”与“椅子”是相似的,因此预测“ 椅子”在一定程度上也是合理的。
\begin{example}
屋里\ \ 摆放\ 一个 \_\_\_\_\_ \hspace{0.5em} \quad \quad 预测下个词
屋里/要/摆放/一个/\_\_\_\_ \hspace{0.5em} \quad \quad 预测下个词
\hspace{2em} 屋里\ \ 摆放\ 一个\ { \red{桌子}} \hspace{3.2em}见过
\hspace{2em} 屋里/要/摆放/一个/{\red{桌子}} \hspace{3.2em}见过
\hspace{2em} 屋里\ \ 摆放\ 一个\ { \blue{椅子}} \hspace{3.2em}没见过,但是仍然是合理预测
\hspace{2em} 屋里/要/摆放/一个/{\blue{椅子}} \hspace{3.2em}没见过,但是仍然是合理预测
\end{example}
\parinterval 关于单词的分布式表示还有一个经典的例子:通过词嵌入可以得到如下关系:$\textrm{“国王”}=\textrm{“女王”}-\textrm{“女人”} +\textrm{“男人”}$。从这个例子可以看出,词嵌入也具有一些代数性质,比如,词的分布式表示可以通过加、减等代数运算相互转换。图\ref{fig:9-66}展示了词嵌入在一个二维平面上的投影,不难发现,含义相近的单词分布比较临近。
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......
......@@ -5090,7 +5090,7 @@ author = {Yoshua Bengio and
Tobias Weyand and
Marco Andreetto and
Hartwig Adam},
journal={CoRR},
publisher ={CoRR},
year={2017},
}
@inproceedings{sifre2014rigid,
......@@ -7130,7 +7130,7 @@ author = {Yoshua Bengio and
@inproceedings{Katharopoulos2020TransformersAR,
title={Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention},
author={Angelos Katharopoulos and Apoorv Vyas and Nikolaos Pappas and Franccois Fleuret},
publisher={CoRR},
publisher={International Conference on Machine Learning},
year={2020},
volume={abs/2006.16236}
}
......@@ -9243,6 +9243,108 @@ author = {Zhuang Liu and
publisher = {Asian Conference on Machine Learning},
year = {2018}
}
@inproceedings{DBLP:journals/corr/abs-1810-02525,
author = {Peter Henderson and
Joshua Romoff and
Joelle Pineau},
title = {Where Did My Optimum Go?: An Empirical Analysis of Gradient Descent
Optimization in Policy Gradient Methods},
publisher = {CoRR},
volume = {abs/1810.02525},
year = {2018}
}
@inproceedings{DBLP:conf/nips/Kakade01,
author = {Sham M. Kakade},
title = {A Natural Policy Gradient},
pages = {1531--1538},
publisher = {Advances in Neural Information Processing Systems},
year = {2001}
}
@inproceedings{DBLP:conf/icml/KoolHW19,
author = {Wouter Kool and
Herke van Hoof and
Max Welling},
title = {Stochastic Beams and Where To Find Them: The Gumbel-Top-k Trick for
Sampling Sequences Without Replacement},
series = {Proceedings of Machine Learning Research},
volume = {97},
pages = {3499--3508},
publisher = {International Conference on Machine Learning},
year = {2019}
}
@inproceedings{DBLP:journals/corr/abs-2012-13866,
author = {Bei Li and
Ziyang Wang and
Hui Liu and
Quan Du and
Tong Xiao and
Chunliang Zhang and
Jingbo Zhu},
title = {Learning Light-Weight Translation Models from Deep Transformer},
publisher = {CoRR},
volume = {abs/2012.13866},
year = {2020}
}
@inproceedings{DBLP:conf/aclnmt/HuLLLLWXZ20,
author = {Chi Hu and
Bei Li and
Yinqiao Li and
Ye Lin and
Yanyang Li and
Chenglong Wang and
Tong Xiao and
Jingbo Zhu},
title = {The NiuTrans System for {WNGT} 2020 Efficiency Task},
pages = {204--210},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2020}
}
@inproceedings{DBLP:conf/acl/WeiYHZWL20,
author = {Xiangpeng Wei and
Heng Yu and
Yue Hu and
Yue Zhang and
Rongxiang Weng and
Weihua Luo},
title = {Multiscale Collaborative Deep Models for Neural Machine Translation},
pages = {414--426},
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2020}
}
@inproceedings{DBLP:conf/emnlp/DeNeefeKWM07,
author = {Steve DeNeefe and
Kevin Knight and
Wei Wang and
Daniel Marcu},
title = {What Can Syntax-Based {MT} Learn from Phrase-Based MT?},
pages = {755--763},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2007}
}
@inproceedings{DBLP:conf/emnlp/ShiPK16,
author = {Xing Shi and
Inkit Padhi and
Kevin Knight},
title = {Does String-Based Neural {MT} Learn Source Syntax?},
pages = {1526--1534},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2016}
}
@inproceedings{tu2017neural,
title={Neural machine translation with reconstruction},
author={Tu, Zhaopeng and Liu, Yang and Shang, Lifeng and Liu, Xiaohua and Li, Hang},
publisher={AAAI Conference on Artificial Intelligence},
volume={31},
number={1},
year={2017}
}
@inproceedings{li2020train,
title={Train large, then compress: Rethinking model size for efficient training and inference of transformers},
author={Li, Zhuohan and Wallace, Eric and Shen, Sheng and Lin, Kevin and Keutzer, Kurt and Klein, Dan and Gonzalez, Joseph E},
publisher={arXiv preprint arXiv:2002.11794},
year={2020}
}
%%%%% chapter 15------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......@@ -11520,28 +11622,28 @@ author = {Zhuang Liu and
publisher = {International Conference on Machine Learning},
year = {2018}
}
@article{zhao2020dual,
@inproceedings{zhao2020dual,
title={Dual Learning: Theoretical Study and an Algorithmic Extension},
author={Zhao, Zhibing and Xia, Yingce and Qin, Tao and Xia, Lirong and Liu, Tie-Yan},
journal={arXiv preprint arXiv:2005.08238},
publisher ={arXiv preprint arXiv:2005.08238},
year={2020}
}
%%%%% chapter 16------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 17------------------------------------------------------
@article{DBLP:journals/ac/Bar-Hillel60,
@inproceedings{DBLP:journals/ac/Bar-Hillel60,
author = {Yehoshua Bar-Hillel},
title = {The Present Status of Automatic Translation of Languages},
journal = {Advances in computers},
publisher = {Advances in computers},
volume = {1},
pages = {91--163},
year = {1960}
}
@article{DBLP:journals/corr/abs-1901-09115,
@inproceedings{DBLP:journals/corr/abs-1901-09115,
author = {Andrei Popescu-Belis},
title = {Context in Neural Machine Translation: {A} Review of Models and Evaluations},
journal = {CoRR},
publisher = {CoRR},
volume = {abs/1901.09115},
year = {2019}
}
......@@ -11596,7 +11698,7 @@ author = {Zhuang Liu and
@inproceedings{tiedemann2010context,
title={Context adaptation in statistical machine translation using models with exponentially decaying cache},
author={Tiedemann, J{\"o}rg},
publisher={Domain Adaptation for Natural Language Processing},
publisher={Annual Meeting of the Association for Computational Linguistics},
pages={8--15},
year={2010}
}
......@@ -11645,7 +11747,7 @@ author = {Zhuang Liu and
title = {Using Sense-labeled Discourse Connectives for Statistical Machine
Translation},
pages = {129--138},
publisher = {Annual Meeting of the Association for Computational Linguistics},
publisher = {Annual Conference of the European Association for Machine Translation},
year = {2012}
}
@inproceedings{DBLP:conf/emnlp/LaubliS018,
......@@ -11658,12 +11760,12 @@ author = {Zhuang Liu and
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2018}
}
@article{DBLP:journals/corr/abs-1912-08494,
@inproceedings{DBLP:journals/corr/abs-1912-08494,
author = {Sameen Maruf and
Fahimeh Saleh and
Gholamreza Haffari},
title = {A Survey on Document-level Machine Translation: Methods and Evaluation},
journal = {CoRR},
publisher = {CoRR},
volume = {abs/1912.08494},
year = {2019}
}
......@@ -11675,21 +11777,20 @@ author = {Zhuang Liu and
publisher = {Association for Computational Linguistics},
year = {2017}
}
@article{DBLP:journals/corr/abs-1910-07481,
@inproceedings{DBLP:journals/corr/abs-1910-07481,
author = {Valentin Mac{\'{e}} and
Christophe Servan},
title = {Using Whole Document Context in Neural Machine Translation},
journal = {CoRR},
volume = {abs/1910.07481},
publisher = {The International Workshop on Spoken Language Translation},
year = {2019}
}
@article{DBLP:journals/corr/JeanLFC17,
@inproceedings{DBLP:journals/corr/JeanLFC17,
author = {S{\'{e}}bastien Jean and
Stanislas Lauly and
Orhan Firat and
Kyunghyun Cho},
title = {Does Neural Machine Translation Benefit from Larger Context?},
journal = {CoRR},
publisher = {CoRR},
volume = {abs/1704.05135},
year = {2017}
}
......@@ -11721,12 +11822,12 @@ author = {Zhuang Liu and
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2019}
}
@article{DBLP:journals/corr/abs-2010-12827,
@inproceedings{DBLP:journals/corr/abs-2010-12827,
author = {Amane Sugiyama and
Naoki Yoshinaga},
title = {Context-aware Decoder for Neural Machine Translation using a Target-side
Document-Level Language Model},
journal = {CoRR},
publisher = {CoRR},
volume = {abs/2010.12827},
year = {2020}
}
......@@ -11868,14 +11969,13 @@ author = {Zhuang Liu and
publisher = {International Joint Conference on Artificial Intelligence},
year = {2020}
}
@article{DBLP:journals/tacl/TuLSZ18,
@inproceedings{DBLP:journals/tacl/TuLSZ18,
author = {Zhaopeng Tu and
Yang Liu and
Shuming Shi and
Tong Zhang},
title = {Learning to Remember Translation History with a Continuous Cache},
publisher = {Transactions of the Association for Computational Linguistics},
volume = {6},
pages = {407--420},
year = {2018}
}
......@@ -11969,7 +12069,7 @@ author = {Zhuang Liu and
publisher = {{AAAI} Press},
year = {2019}
}
@article{DBLP:journals/tacl/YuSSLKBD20,
@inproceedings{DBLP:journals/tacl/YuSSLKBD20,
author = {Lei Yu and
Laurent Sartran and
Wojciech Stokowiec and
......@@ -11978,16 +12078,16 @@ author = {Zhuang Liu and
Phil Blunsom and
Chris Dyer},
title = {Better Document-Level Machine Translation with Bayes' Rule},
journal = {Transactions of the Association for Computational Linguistics},
publisher = {Transactions of the Association for Computational Linguistics},
volume = {8},
pages = {346--360},
year = {2020}
}
@article{DBLP:journals/corr/abs-1903-04715,
@inproceedings{DBLP:journals/corr/abs-1903-04715,
author = {S{\'{e}}bastien Jean and
Kyunghyun Cho},
title = {Context-Aware Learning for Neural Machine Translation},
journal = {CoRR},
publisher = {CoRR},
volume = {abs/1903.04715},
year = {2019}
}
......@@ -12009,7 +12109,7 @@ author = {Zhuang Liu and
publisher = {Annual Conference of the European Association for Machine Translation},
year = {2019}
}
@article{DBLP:journals/corr/abs-1911-03110,
@inproceedings{DBLP:journals/corr/abs-1911-03110,
author = {Liangyou Li and
Xin Jiang and
Qun Liu},
......@@ -12047,33 +12147,28 @@ author = {Zhuang Liu and
publisher = {IEEE Transactions on Acoustics, Speech, and Signal Processing},
year = {2012}
}
@article{DBLP:journals/ftsig/GalesY07,
@inproceedings{DBLP:journals/ftsig/GalesY07,
author = {Mark J. F. Gales and
Steve J. Young},
title = {The Application of Hidden Markov Models in Speech Recognition},
journal = {Found Trends Signal Process},
volume = {1},
number = {3},
publisher = {Found Trends Signal Process},
pages = {195--304},
year = {2007}
}
@article{DBLP:journals/taslp/MohamedDH12,
@inproceedings{DBLP:journals/taslp/MohamedDH12,
author = {Abdel-rahman Mohamed and
George E. Dahl and
Geoffrey E. Hinton},
title = {Acoustic Modeling Using Deep Belief Networks},
journal = {IEEE Transactions on Speech and Audio Processing},
volume = {20},
number = {1},
publisher = {IEEE Transactions on Speech and Audio Processing},
pages = {14--22},
year = {2012}
}
@article{DBLP:journals/spm/X12a,
@inproceedings{DBLP:journals/spm/X12a,
author = {G Hinton and L Deng and D Yu and GE Dahl and B Kingsbury},
title = {Deep Neural Networks for Acoustic Modeling in Speech Recognition:
The Shared Views of Four Research Groups},
journal = {IEEE Signal Processing Magazine},
volume = {29},
number = {6},
publisher = {IEEE Signal Processing Magazine},
pages = {82--97},
year = {2012}
}
......@@ -12130,14 +12225,14 @@ author = {Zhuang Liu and
publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
year = {2016}
}
@article{DBLP:journals/corr/BerardPSB16,
@inproceedings{DBLP:journals/corr/BerardPSB16,
author = {Alexandre Berard and
Olivier Pietquin and
Christophe Servan and
Laurent Besacier},
title = {Listen and Translate: A Proof of Concept for End-to-End Speech-to-Text
Translation},
journal = {CoRR},
publisher = {CoRR},
volume = {abs/1612.01744},
year = {2016}
}
......@@ -12175,16 +12270,14 @@ author = {Zhuang Liu and
publisher = {International Conference on Machine Learning},
year = {2006}
}
@article{DBLP:journals/jstsp/WatanabeHKHH17,
@inproceedings{DBLP:journals/jstsp/WatanabeHKHH17,
author = {Shinji Watanabe and
Takaaki Hori and
Suyoun Kim and
John R. Hershey and
Tomoki Hayashi},
title = {Hybrid CTC/Attention Architecture for End-to-End Speech Recognition},
journal = {IEEE Journal of Selected Topics in Signal Processing},
volume = {11},
number = {8},
publisher = {IEEE Journal of Selected Topics in Signal Processing},
pages = {1240--1253},
year = {2017}
}
......@@ -12198,15 +12291,13 @@ author = {Zhuang Liu and
publisher = {IEEE Transactions on Acoustics, Speech, and Signal Processing},
year = {2017}
}
@article{DBLP:journals/pami/ShiBY17,
@inproceedings{DBLP:journals/pami/ShiBY17,
author = {Baoguang Shi and
Xiang Bai and
Cong Yao},
title = {An End-to-End Trainable Neural Network for Image-Based Sequence Recognition
and Its Application to Scene Text Recognition},
journal = {{IEEE} Transactions on Pattern Analysis and Machine Intelligence},
volume = {39},
number = {11},
publisher = {{IEEE} Transactions on Pattern Analysis and Machine Intelligence},
pages = {2298--2304},
year = {2017}
}
......@@ -12294,16 +12385,16 @@ author = {Zhuang Liu and
title = {Effectively pretraining a speech translation decoder with Machine
Translation data},
pages = {8014--8020},
publisher = {Annual Meeting of the Association for Computational Linguistics},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2020}
}
@article{DBLP:journals/corr/abs-1802-06003,
@inproceedings{DBLP:journals/corr/abs-1802-06003,
author = {Takatomo Kano and
Sakriani Sakti and
Satoshi Nakamura},
title = {Structured-based Curriculum Learning for End-to-end English-Japanese
Speech Translation},
journal = {CoRR},
publisher = {CoRR},
volume = {abs/1802.06003},
year = {2018}
}
......@@ -12322,7 +12413,6 @@ author = {Zhuang Liu and
author = {Lawrence R. Rabiner and
Biing-Hwang Juang},
title = {Fundamentals of speech recognition},
series = {Prentice Hall signal processing series},
publisher = {Prentice Hall},
year = {1993}
}
......@@ -12459,12 +12549,12 @@ author = {Zhuang Liu and
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2016}
}
@article{Elliott2015MultilingualID,
@inproceedings{Elliott2015MultilingualID,
title={Multilingual Image Description with Neural Sequence Models},
author={Desmond Elliott and
Stella Frank and
Eva Hasler},
journal={arXiv: Computation and Language},
publisher ={arXiv: Computation and Language},
year={2015}
}
@inproceedings{DBLP:conf/wmt/MadhyasthaWS17,
......@@ -12477,13 +12567,12 @@ author = {Zhuang Liu and
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2017}
}
@article{DBLP:journals/corr/CaglayanBB16,
@inproceedings{DBLP:journals/corr/CaglayanBB16,
author = {Ozan Caglayan and
Lo{\"{\i}}c Barrault and
Fethi Bougares},
title = {Multimodal Attention for Neural Machine Translation},
journal = {CoRR},
volume = {abs/1609.03976},
publisher = {CoRR},
year = {2016}
}
@inproceedings{DBLP:conf/acl/CalixtoLC17,
......@@ -12495,13 +12584,12 @@ author = {Zhuang Liu and
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2017}
}
@article{DBLP:journals/corr/DelbrouckD17,
@inproceedings{DBLP:journals/corr/DelbrouckD17,
author = {Jean-Benoit Delbrouck and
St{\'{e}}phane Dupont},
title = {Multimodal Compact Bilinear Pooling for Multimodal Neural Machine
Translation},
journal = {CoRR},
volume = {abs/1703.08084},
publisher = {CoRR},
year = {2017}
}
@inproceedings{DBLP:conf/acl/LibovickyH17,
......@@ -12512,22 +12600,20 @@ author = {Zhuang Liu and
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2017}
}
@article{DBLP:journals/corr/abs-1712-03449,
@inproceedings{DBLP:journals/corr/abs-1712-03449,
author = {Jean-Benoit Delbrouck and
St{\'{e}}phane Dupont},
title = {Modulating and attending the source image during encoding improves
Multimodal Translation},
journal = {CoRR},
volume = {abs/1712.03449},
publisher = {CoRR},
year = {2017}
}
@article{DBLP:journals/corr/abs-1807-11605,
@inproceedings{DBLP:journals/corr/abs-1807-11605,
author = {Hasan Sait Arslan and
Mark Fishel and
Gholamreza Anbarjafari},
title = {Doubly Attentive Transformer Machine Translation},
journal = {CoRR},
volume = {abs/1807.11605},
publisher = {CoRR},
year = {2018}
}
@inproceedings{DBLP:conf/wmt/HelclLV18,
......@@ -12619,7 +12705,6 @@ author = {Zhuang Liu and
Yoshua Bengio},
title = {Show, Attend and Tell: Neural Image Caption Generation with Visual
Attention},
volume = {37},
pages = {2048--2057},
publisher = {International Conference on Machine Learning},
year = {2015}
......@@ -12649,7 +12734,7 @@ author = {Zhuang Liu and
publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
year = {2017}
}
@article{DBLP:journals/pami/FuJCSZ17,
@inproceedings{DBLP:journals/pami/FuJCSZ17,
author = {Kun Fu and
Junqi Jin and
Runpeng Cui and
......@@ -12657,9 +12742,7 @@ author = {Zhuang Liu and
Changshui Zhang},
title = {Aligning Where to See and What to Tell: Image Captioning with Region-Based
Attention and Scene-Specific Contexts},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
volume = {39},
number = {12},
publisher = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
pages = {2321--2334},
year = {2017}
}
......@@ -12670,8 +12753,6 @@ author = {Zhuang Liu and
Tao Mei},
title = {Exploring Visual Relationship for Image Captioning},
series = {Lecture Notes in Computer Science},
volume = {11218},
pages = {711--727},
publisher = {European Conference on Computer Vision},
year = {2018}
}
......@@ -12686,21 +12767,19 @@ author = {Zhuang Liu and
publisher = {International Joint Conference on Artificial Intelligence},
year = {2017}
}
@article{DBLP:journals/corr/abs-1804-02767,
@inproceedings{DBLP:journals/corr/abs-1804-02767,
author = {Joseph Redmon and
Ali Farhadi},
title = {YOLOv3: An Incremental Improvement},
journal = {CoRR},
volume = {abs/1804.02767},
publisher = {CoRR},
year = {2018}
}
@article{DBLP:journals/corr/abs-2004-10934,
@inproceedings{DBLP:journals/corr/abs-2004-10934,
author = {Alexey Bochkovskiy and
Chien-Yao Wang and
Hong-Yuan Mark Liao},
title = {YOLOv4: Optimal Speed and Accuracy of Object Detection},
journal = {CoRR},
volume = {abs/2004.10934},
publisher = {CoRR},
year = {2020}
}
@inproceedings{DBLP:conf/cvpr/LuXPS17,
......@@ -12738,15 +12817,13 @@ author = {Zhuang Liu and
publisher = {ACM Multimedia},
year = {2017}
}
@article{DBLP:journals/mta/FangWCT18,
@inproceedings{DBLP:journals/mta/FangWCT18,
author = {Fang Fang and
Hanli Wang and
Yihao Chen and
Pengjie Tang},
title = {Looking deeper and transferring attention for image captioning},
journal = {Multimedia Tools Applications},
volume = {77},
number = {23},
publisher = {Multimedia Tools Applications},
pages = {31159--31175},
year = {2018}
}
......@@ -12759,12 +12836,11 @@ author = {Zhuang Liu and
publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
year = {2018}
}
@article{DBLP:journals/corr/abs-1805-09019,
@inproceedings{DBLP:journals/corr/abs-1805-09019,
author = {Qingzhong Wang and
Antoni B. Chan},
title = {{CNN+CNN:} Convolutional Decoders for Image Captioning},
journal = {CoRR},
volume = {abs/1805.09019},
publisher = {CoRR},
year = {2018}
}
@inproceedings{DBLP:conf/eccv/DaiYL18,
......@@ -12772,8 +12848,6 @@ author = {Zhuang Liu and
Deming Ye and
Dahua Lin},
title = {Rethinking the Form of Latent States in Image Captioning},
volume = {11209},
pages = {294--310},
publisher = {European Conference on Computer Vision},
year = {2018}
}
......@@ -12798,28 +12872,24 @@ author = {Zhuang Liu and
Alexander Kirillov and
Sergey Zagoruyko},
title = {End-to-End Object Detection with Transformers},
volume = {12346},
pages = {213--229},
publisher = {European Conference on Computer Vision},
year = {2020}
}
@article{DBLP:journals/tcsv/YuLYH20,
@inproceedings{DBLP:journals/tcsv/YuLYH20,
author = {Jun Yu and
Jing Li and
Zhou Yu and
Qingming Huang},
title = {Multimodal Transformer With Multi-View Visual Representation for Image
Captioning},
journal = {IEEE Transactions on Circuits and Systems for Video Technology},
volume = {30},
number = {12},
publisher = {IEEE Transactions on Circuits and Systems for Video Technology},
pages = {4467--4480},
year = {2020}
}
@article{Huasong2020SelfAdaptiveNM,
@inproceedings{Huasong2020SelfAdaptiveNM,
title={Self-Adaptive Neural Module Transformer for Visual Question Answering},
author={Zhong Huasong and Jingyuan Chen and Chen Shen and Hanwang Zhang and Jianqiang Huang and Xian-Sheng Hua},
journal={IEEE Transactions on Multimedia},
publisher ={IEEE Transactions on Multimedia},
year={2020},
pages={1-1}
}
......@@ -12842,7 +12912,6 @@ author = {Zhuang Liu and
Xiaokang Yang},
title = {Semantic Equivalent Adversarial Data Augmentation for Visual Question
Answering},
volume = {12364},
pages = {437--453},
publisher = { European Conference on Computer Vision},
year = {2020}
......@@ -12861,7 +12930,6 @@ author = {Zhuang Liu and
Yejin Choi and
Jianfeng Gao},
title = {Oscar: Object-Semantics Aligned Pre-training for Vision-Language Tasks},
volume = {12375},
pages = {121--137},
publisher = { European Conference on Computer Vision},
year = {2020}
......@@ -12903,23 +12971,21 @@ author = {Zhuang Liu and
pages = {465--476},
year = {2017}
}
@article{DBLP:journals/corr/abs-1908-06616,
@inproceedings{DBLP:journals/corr/abs-1908-06616,
author = {Hajar Emami and
Majid Moradi Aliabadi and
Ming Dong and
Ratna Babu Chinnam},
title = {{SPA-GAN:} Spatial Attention {GAN} for Image-to-Image Translation},
journal = {CoRR},
volume = {abs/1908.06616},
publisher = {IEEE Transactions on Multimedia},
year = {2019}
}
@article{DBLP:journals/access/XiongWG19,
@inproceedings{DBLP:journals/access/XiongWG19,
author = {Feng Xiong and
Qianqian Wang and
Quanxue Gao},
title = {Consistent Embedded {GAN} for Image-to-Image Translation},
journal = {International Conference on Access Networks},
volume = {7},
publisher = {International Conference on Access Networks},
pages = {126651--126661},
year = {2019}
}
......@@ -12961,12 +13027,11 @@ author = {Zhuang Liu and
Bernt Schiele and
Honglak Lee},
title = {Generative Adversarial Text to Image Synthesis},
volume = {48},
pages = {1060--1069},
publisher = {International Conference on Machine Learning},
year = {2016}
}
@article{DBLP:journals/corr/DashGALA17,
@inproceedings{DBLP:journals/corr/DashGALA17,
author = {Ayushman Dash and
John Cristian Borges Gamboa and
Sheraz Ahmed and
......@@ -12974,8 +13039,7 @@ author = {Zhuang Liu and
Muhammad Zeshan Afzal},
title = {{TAC-GAN} - Text Conditioned Auxiliary Classifier Generative Adversarial
Network},
journal = {CoRR},
volume = {abs/1703.06412},
publisher = {CoRR},
year = {2017}
}
@inproceedings{DBLP:conf/nips/ReedAMTSL16,
......@@ -13040,12 +13104,11 @@ author = {Zhuang Liu and
publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
year = {2018}
}
@article{DBLP:journals/corr/ChoE16,
@inproceedings{DBLP:journals/corr/ChoE16,
author = {Kyunghyun Cho and
Masha Esipova},
title = {Can neural machine translation do simultaneous translation?},
journal = {CoRR},
volume = {abs/1606.02012},
publisher = {CoRR},
year = {2016}
}
@inproceedings{DBLP:conf/eacl/NeubigCGL17,
......@@ -13124,7 +13187,7 @@ author = {Zhuang Liu and
Desmond Elliott and
Aykut Erdem and
Erkut Erdem and
Nazli Ikizler{-}Cinbis and
Nazli Ikizler-Cinbis and
Frank Keller and
Adrian Muscat and
Barbara Plank},
......@@ -13191,7 +13254,6 @@ author = {Zhuang Liu and
title = {A Reinforcement Learning Approach to Interactive-Predictive Neural
Machine Translation},
publisher = {CoRR},
volume = {abs/1805.01553},
year = {2018}
}
@inproceedings{DBLP:journals/mt/DomingoPC17,
......@@ -13200,8 +13262,6 @@ author = {Zhuang Liu and
Francisco Casacuberta},
title = {Segment-based interactive-predictive machine translation},
publisher = {Machine Translation},
volume = {31},
number = {4},
pages = {163--185},
year = {2017}
}
......@@ -13219,8 +13279,6 @@ author = {Zhuang Liu and
Juan Miguel Vilar},
title = {Statistical Approaches to Computer-Assisted Translation},
publisher = {Computer Linguistics},
volume = {35},
number = {1},
pages = {3--28},
year = {2009}
}
......@@ -13249,7 +13307,6 @@ author = {Zhuang Liu and
title = {TurboTransformers: An Efficient {GPU} Serving System For Transformer
Models},
publisher = {CoRR},
volume = {abs/2010.05680},
year = {2020}
}
@inproceedings{DBLP:conf/iclr/HuangCLWMW18,
......
......@@ -2,8 +2,8 @@
% !TEX encoding = UTF-8 Unicode
%----------------------------------------------------------------------------------------
% 机器翻译:统计建模与深度学习方法
% Machine Translation: Statistical Modeling and Deep Learning Methods
% 机器翻译:基础与模型
% Machine Translation: Foundations and Models
%
% Copyright 2020
% 肖桐(xiaotong@mail.neu.edu.cn) 朱靖波 (zhujingbo@mail.neu.edu.cn)
......@@ -105,9 +105,9 @@
\thispagestyle{empty}
{\large
\noindent {\color{red} 在此感谢为本书做出贡献的小牛团队(部分)成员} \\
\noindent {\color{red} 在此感谢为本书做出贡献的} \\
\noindent 曹润柘、曾信、孟霞、单韦乔、周涛、周书含、许诺、李北、许晨、林野、李垠桥、王子扬、刘辉、张裕浩、冯凯、罗应峰、魏冰浩、王屹超、李炎洋、姜雨帆、田丰宁、刘继强、张哲旸、陈贺轩、刘晓倩、牛蕊、杜权、胡驰、王泽洋、刘腾博、刘兴宇、徐萍、赵闯、高博、张春良、王会珍、张俐、杨木润、宁义明、李洋、秦浩、胡明涵、马安香 \\
\noindent 曹润柘、曾信、孟霞、单韦乔、周涛、周书含、许诺、李北、许晨、林野、李垠桥、王子扬、刘辉、张裕浩、冯凯、罗应峰、魏冰浩、王屹超、李炎洋、胡驰、姜雨帆、田丰宁、刘继强、张哲旸、陈贺轩、牛蕊、杜权、张春良、王会珍、张俐、马安香、胡明涵 \\
}
......@@ -144,14 +144,14 @@
%\include{Chapter10/chapter10}
%\include{Chapter11/chapter11}
%\include{Chapter12/chapter12}
%\include{Chapter13/chapter13}
\include{Chapter13/chapter13}
%\include{Chapter14/chapter14}
%\include{Chapter15/chapter15}
%\include{Chapter16/chapter16}
%\include{Chapter17/chapter17}
%\include{Chapter18/chapter18}
\include{ChapterPostscript/postscript}
\include{ChapterAcknowledgement/acknowledgement}
%\include{ChapterPostscript/postscript}
%\include{ChapterAcknowledgement/acknowledgement}
%\include{ChapterAppend/chapterappend}
......
......@@ -600,6 +600,7 @@ addtohook={%
%----------------------------------------------------------------------------------------
\usepackage{pgffor}%图片中使用\foreach语句
%\usepackage{ulem}%使用/sout
\usepackage{soul}
%----------------------------------------------------------------------------------------
% Chapter 6
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论