Commit 529bf1eb by 孟霞

合并分支 'master' 到 'mengxia'

Master

查看合并请求 !492
parents d1ad458a 534447a8
......@@ -99,7 +99,7 @@
\end{figure}
%------------------------------------------
\parinterval 在此之后,更多的翻译工作在文化和知识传播中开展。其中一个典型代表是宗教文献的翻译。宗教是人类意识形态的一个重要载体,为了宣传教义,人们编写了大量的宗教文献。在西方,一项最早被记录的翻译活动是将旧约圣经(希伯来文及埃兰文)翻译为希腊文版本。迄今为止人类历史上翻译版本最多的书就是圣经。在中国唐代,有一位世界性的文化人物\ \dash \ 玄奘,他不仅是佛学家、旅行家,还是翻译家。玄奘西行求法归来后把全部的心血和智慧奉献给了译经事业,在助手们的帮助下,共翻译佛教经论74部,1335卷,每卷万字左右,合计1335万字,占去整个唐代译经总数的一半以上\upcite{慧立彦宗1983大慈恩寺三藏法师传},树立了我国古代翻译思想的光辉典范。
\parinterval 在此之后,更多的翻译工作在文化和知识传播中开展。其中一个典型代表是宗教文献的翻译。宗教是人类意识形态的一个重要载体,为了宣传教义,人们编写了大量的宗教文献。在西方,一项最早被记录的翻译活动是将旧约圣经(希伯来文及埃兰文)翻译为希腊文版本。迄今为止人类历史上翻译版本最多的书就是圣经。在中国唐代,有一位世界性的文化人物\ \dash \ 玄奘,他不仅是佛学家、旅行家,还是翻译家。玄奘西行求法归来后把全部的心血和智慧奉献给了译经事业,在助手们的帮助下,共翻译佛教经论74部,1335卷,每卷万字左右,合计1335万字,占去整个唐代译经总数的一半以上\upcite{慧立2000大慈恩寺三藏法師傳},树立了我国古代翻译思想的光辉典范。
\parinterval 翻译在人类历史长河中起到了重要的作用。一方面,由于语言文字、文化和地理位置的差异性,使得翻译成为一个重要的需求;另一方面,翻译也加速了不同文明的融会贯通,促进了世界的发展。今天,翻译已经成为重要的行业之一,包括各个高校也都设立了翻译及相关专业,相关人才不断涌现。据《2019年中国语言服务行业发展报告》\upcite{2019cns}统计:全球语言服务产值预计将首次接近500亿美元;中国涉及语言服务的在营企业360,000余家,语言服务为主营业务的在营企业近万家,总产值超过300亿元,年增长3\%以上;全国开设外语类专业的高校数量多达上千所,其中设立有翻译硕士(MTI)和翻译本科(BTI)专业的院校分别有250余所和280余所,其中仅MTI的累计招生数就高达6万余人\upcite{赵军峰2019深化改革}。当然,面对着巨大的需求,如何使用机器辅助翻译等技术手段提高人工翻译效率,也是人工翻译和机器翻译领域需要共同探索的方向。
......
......@@ -12,7 +12,7 @@
% RNN Encoder
\coordinate (eemb0) at (0,0);
\foreach \x [count=\y from 0] in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {\tiny{$e_x()$}};
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {\tiny{$\textrm{e}_x()$}};
\foreach \x in {1,2,...,3}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc\x) at ([yshift=0.3\base]eemb\x.north) {};
\node[] (enclabel1) at (enc1) {\tiny{$\mathbi{h}_{m-2}$}};
......@@ -27,7 +27,7 @@
% RNN Decoder
\foreach \x in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=\base]enc\x.north) {\tiny{$e_y()$}};
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=\base]enc\x.north) {\tiny{$\textrm{e}_y()$}};
\foreach \x in {1,2,...,3}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec\x) at ([yshift=0.3\base]demb\x.north) {{\tiny{$\mathbi{s}_\x$}}};
\foreach \x in {1,2,...,3}
......@@ -80,10 +80,10 @@
}
{
\node [anchor=north west] (line11) at ([yshift=-1.8em]line4.west) {\scriptsize{每个词的one-hot}};
\node [anchor=north west] (line11) at ([yshift=-1.8em]line4.west) {\scriptsize{每个词的One-hot}};
\node [anchor=north west] (line12) at ([yshift=0.3em]line11.south west) {\scriptsize{离散化表示都被转化为\ \ \ \ }};
\node [anchor=north west] (line13) at ([yshift=0.3em]line12.south west) {\scriptsize{实数向量,即词嵌入}};
\node [anchor=north west] (line14) at ([yshift=0.3em]line13.south west) {\scriptsize{($e_x()$$e_y()$函数)}};
\node [anchor=north west] (line14) at ([yshift=0.3em]line13.south west) {\scriptsize{($\textrm{e}_x()$$\textrm{e}_y()$函数)}};
}
{
......
......@@ -43,7 +43,7 @@
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.5\base]dec\x.north) {};
% Decoder input words
\node[wordnode,below=0pt of demb1] (decwordin) {$\langle$eos$\rangle$};
\node[wordnode,below=0pt of demb1] (decwordin) {$\langle$sos$\rangle$};
\ExtractX{$(demb2.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Do};
......
......@@ -83,10 +83,10 @@
\end{scope}
\node[] (tanh) at (aux46){};
\node[] (Tanh) at (aux46){};
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!10!white,rounded corners=5pt,inner sep=6pt,fit=(aux22) (aux76) (z76) (tanh)] (GRU) {};
\node[draw,very thick,rectangle,fill=blue!10!white,rounded corners=5pt,inner sep=6pt,fit=(aux22) (aux76) (z76) (Tanh)] (GRU) {};
\end{pgfonlayer}
......
......@@ -96,10 +96,10 @@
\end{scope}
\node[] (tanh) at (aux46){};
\node[] (Tanh) at (aux46){};
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!10!white,rounded corners=5pt,inner sep=6pt,fit=(aux22) (aux76) (z76) (tanh)] (GRU) {};
\node[draw,very thick,rectangle,fill=blue!10!white,rounded corners=5pt,inner sep=6pt,fit=(aux22) (aux76) (z76) (Tanh)] (GRU) {};
\end{pgfonlayer}
......
......@@ -99,7 +99,7 @@
\draw[-latex,emph] (aux71) -| (aux87);
\draw[-latex,emph] (aux71) -| (aux53) -- (aux23) -| (aux46) -- (z76);
\draw[emph] (aux12) |- (aux23) -| (aux46);
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] (tanh) at (aux46) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] (Tanh) at (aux46) {$\mathrm{Tanh}$};
\node[opnode,circle,draw=red,thick] (a1) at (aux53) {};
\node[opnode,circle,draw=red,thick] (a2) at (aux56) {};
\node[opnode,circle,draw=red,thick] (a3) at (aux75) {};
......@@ -118,7 +118,7 @@
\end{scope}
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!10!white,rounded corners=5pt,inner sep=6pt,fit=(aux22) (aux76) (z76) (tanh)] (GRU) {};
\node[draw,very thick,rectangle,fill=blue!10!white,rounded corners=5pt,inner sep=6pt,fit=(aux22) (aux76) (z76) (Tanh)] (GRU) {};
\end{pgfonlayer}
%%new
......
......@@ -93,7 +93,7 @@
\draw[-latex,emph] (aux21) -- (aux25) -- (u55);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{Tanh}$};
\node[opnode,circle,draw=red,thick] (i45) at (aux45) {};
}
\end{scope}
......
......@@ -92,7 +92,7 @@
\draw[-latex,emph] (aux21) -- (aux25) -- (u55);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{Tanh}$};
\node[opnode,circle,draw=red,thick] (i45) at (aux45) {X};
}
{
......@@ -100,7 +100,7 @@
\draw[-latex,standard] (aux21) -- (aux24) |- (i45);
\draw[-latex,standard] (aux21) -- (aux25) -- (u55);
\node[opnode,circle] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux35) {$\mathrm{Tanh}$};
\node[opnode,circle] (i45) at (aux45) {};
}
% cell update
......
......@@ -93,7 +93,7 @@
\draw[-latex,emph] (aux21) -- (aux25) -- (u55);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{Tanh}$};
\node[opnode,circle,draw=red,thick] (i45) at (aux45) {X};
}
{
......@@ -101,7 +101,7 @@
\draw[-latex,standard] (aux21) -- (aux24) |- (i45);
\draw[-latex,standard] (aux21) -- (aux25) -- (u55);
\node[opnode,circle] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux35) {$\mathrm{Tanh}$};
\node[opnode,circle] (i45) at (aux45) {};
}
% cell update
......@@ -122,7 +122,7 @@
\draw[-latex,emph] (aux21) -- (o27);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux26) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux37) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux37) {$\mathrm{Tanh}$};
\draw[-latex,emph] (o27) -- (aux29);
\draw[-latex,emph] (o27) -| (aux68);
......
......@@ -94,7 +94,7 @@
\draw[-latex,emph] (aux21) -- (aux25) -- (u55);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{Tanh}$};
\node[opnode,circle,draw=red,thick] (i45) at (aux45) {};
}
{
......@@ -102,7 +102,7 @@
\draw[-latex,standard] (aux21) -- (aux24) |- (i45);
\draw[-latex,standard] (aux21) -- (aux25) -- (u55);
\node[opnode,circle] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux35) {$\mathrm{Tanh}$};
\node[opnode,circle] (i45) at (aux45) {};
}
% cell update
......@@ -123,7 +123,7 @@
\draw[-latex,emph] (aux21) -- (o27);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux26) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux37) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux37) {$\mathrm{Tanh}$};
\draw[-latex,emph] (o27) -- (aux29);
\draw[-latex,emph] (o27) -| (aux68);
......@@ -133,7 +133,7 @@
\draw[-latex,standard] (u55) -| (o27);
\draw[-latex,standard] (aux21) -- (o27);
\node[opnode,circle] () at (aux26) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux37) {$\mathrm{tanh}$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux37) {$\mathrm{Tanh}$};
\draw[-latex,standard] (o27) -- (aux29);
\draw[-latex,standard] (o27) -| (aux68);
......@@ -174,7 +174,7 @@
}
{
% input gate formula
\node[formulanode,anchor=north east,text width=10em] () at ([shift={(4\base,-1.5\base)}]aux21) {输入门\\$\mathbi{i}_t=\sigma(\mathbi{W}_i[\mathbi{h}_{t-1},\mathbi{x}_t]+\mathbi{b}_i)$\\$\hat{\mathbi{c}}_t=\mathrm{tanh}(\mathbi{W}_c[\mathbi{h}_{t-1},\mathbi{x}_t]+\mathbi{b}_c)$};
\node[formulanode,anchor=north east,text width=10em] () at ([shift={(4\base,-1.5\base)}]aux21) {输入门\\$\mathbi{i}_t=\sigma(\mathbi{W}_i[\mathbi{h}_{t-1},\mathbi{x}_t]+\mathbi{b}_i)$\\$\hat{\mathbi{c}}_t=\mathrm{Tanh}(\mathbi{W}_c[\mathbi{h}_{t-1},\mathbi{x}_t]+\mathbi{b}_c)$};
}
{
% cell update formula
......@@ -182,7 +182,7 @@
}
{
% output gate formula
\node[formulanode,anchor=north west,text width=10em] () at ([shift={(-4\base,-1.5\base)}]aux29) {输出门\\$\mathbi{o}_t=\sigma(\mathbi{W}_o[\mathbi{h}_{t-1},\mathbi{x}_t]+\mathbi{b}_o)$\\$\mathbi{h}_{t}=\mathbi{o}_t\cdot \mathrm{tanh}(\mathbi{c}_{t})$};
\node[formulanode,anchor=north west,text width=10em] () at ([shift={(-4\base,-1.5\base)}]aux29) {输出门\\$\mathbi{o}_t=\sigma(\mathbi{W}_o[\mathbi{h}_{t-1},\mathbi{x}_t]+\mathbi{b}_o)$\\$\mathbi{h}_{t}=\mathbi{o}_t\cdot \mathrm{Tanh}(\mathbi{c}_{t})$};
}
\end{scope}
\end{tikzpicture}
......

245 KB | W: | H:

245 KB | W: | H:

Chapter10/Figures/mt-history.png
Chapter10/Figures/mt-history.png
Chapter10/Figures/mt-history.png
Chapter10/Figures/mt-history.png
  • 2-up
  • Swipe
  • Onion skin
......@@ -2,29 +2,32 @@
\begin{tikzpicture}[node distance = 0cm]
\node(num1)[num,fill=red!10]{0};
\node(num2)[num,below of = num1,yshift= -0.6cm,fill=red!10]{0};
\node(num1)[num,fill=red!10]{1};
\node(num2)[num,below of = num1,yshift= -0.6cm,fill=red!10]{5};
\node(num3)[num,right of = num1,xshift= 0.6cm,fill=red!10]{0};
\node(num4)[num,below of = num3,yshift= -0.6cm,fill=red!10]{0};
\node(num5)[num,right of = num3,xshift= 0.6cm,fill=green!10]{1};
\node(num6)[num,below of = num5,yshift= -0.6cm,fill=green!10]{3};
\node(num7)[num,right of = num5,xshift= 0.6cm,fill=green!10]{3};
\node(num8)[num,below of = num7,yshift= -0.6cm,fill=green!10]{1};
\node(num9)[num,below of = num2,yshift= -0.6cm,fill=yellow!10]{7};
\node(num10)[num,below of = num9,yshift= -0.6cm,fill=yellow!10]{7};
\node(num11)[num,right of = num9,xshift= 0.6cm,fill=yellow!10]{6};
\node(num12)[num,below of = num11,yshift= -0.6cm,fill=yellow!10]{8};
\node(num13)[num,right of = num11,xshift= 0.6cm,fill=blue!10]{3};
\node(num4)[num,below of = num3,yshift= -0.6cm,fill=red!10]{6};
\node(num5)[num,right of = num3,xshift= 0.6cm,fill=green!10]{4};
\node(num6)[num,below of = num5,yshift= -0.6cm,fill=green!10]{7};
\node(num7)[num,right of = num5,xshift= 0.6cm,fill=green!10]{5};
\node(num8)[num,below of = num7,yshift= -0.6cm,fill=green!10]{8};
\node(num9)[num,below of = num2,yshift= -0.6cm,fill=yellow!10]{3};
\node(num10)[num,below of = num9,yshift= -0.6cm,fill=yellow!10]{1};
\node(num11)[num,right of = num9,xshift= 0.6cm,fill=yellow!10]{2};
\node(num12)[num,below of = num11,yshift= -0.6cm,fill=yellow!10]{2};
\node(num13)[num,right of = num11,xshift= 0.6cm,fill=blue!10]{1};
\node(num14)[num,below of = num13,yshift= -0.6cm,fill=blue!10]{3};
\node(num10)[num,right of = num13,xshift= 0.6cm,fill=blue!10]{2};
\node(num10)[num,right of = num13,xshift= 0.6cm,fill=blue!10]{0};
\node(num16)[num,below of = num10,yshift= -0.6cm,fill=blue!10]{4};
\draw[->,thick]([xshift=0.4cm,yshift=-0.4cm]num8.east)--([xshift=1.5cm,yshift=-0.4cm]num8.east);
\node(num17)[num,right of = num8,xshift= 2.5cm,fill=red!10]{0};
\node(num18)[num,right of = num17,xshift= 0.6cm,fill=green!10]{2};
\node(num19)[num,below of = num17,yshift=-0.6cm,fill=yellow!10]{7};
\node(num20)[num,below of = num18,yshift= -0.6cm,fill=blue!10]{3};
\node(num17)[num,right of = num8,xshift= 2.5cm,fill=red!10]{3};
\node(num18)[num,right of = num17,xshift= 0.6cm,fill=green!10]{6};
\node(num19)[num,below of = num17,yshift=-0.6cm,fill=yellow!10]{2};
\node(num20)[num,below of = num18,yshift= -0.6cm,fill=blue!10]{2};
\node [right of = num2,xshift= -0.7cm]{};
......
......@@ -43,7 +43,7 @@
\node [draw=ugreen!30,rectangle,inner ysep=5pt,inner xsep=1.8em,rounded corners=4pt,line width=2pt,fill=ugreen!10] [fit = (tgt_1) (i_0)(tgt_2)(i_5) ] (group1_2) {};
\end{pgfonlayer}
\node[anchor=east, word] (l_0) at ([xshift=-2em,yshift=-0.5em]i_0.west){\sffamily\bfnew{词嵌入}};
\node[anchor=east, word] (l_0) at ([xshift=-2em,yshift=-0.5em]i_0.west){\sffamily\bfnew{目标语词嵌入}};
\node[anchor=south, word] (l_1) at ([yshift=2em]l_0.north){\sffamily\bfnew{卷积}};
\node[anchor=south, word] (l_2) at ([yshift=2.4em]l_1.north){\sffamily\bfnew{门控}};
\node[anchor=south, word] (l_3) at ([yshift=0.06em]l_2.north){\sffamily\bfnew{线性单元}};
......@@ -144,7 +144,7 @@
\node[anchor=south,word] (src_2) at ([xshift=2em,yshift=0.4em]r_2.north){$<$p$>$};
\node[anchor=east, word] (t_1) at ([xshift=-4em,yshift=0.5em]r_0.west){\sffamily\bfnew{词嵌入}};
\node[anchor=east, word] (t_1) at ([xshift=-4em,yshift=0.5em]r_0.west){\sffamily\bfnew{源语词嵌入}};
\node[anchor=north, word] (t_2) at ([yshift=-2em]t_1.south){\sffamily\bfnew{卷积}};
\node[anchor=north, word] (t_3) at ([yshift=-2.8em]t_2.south){\sffamily\bfnew{门控}};
\node[anchor=north, word] (t_4) at ([yshift=-0.06em]t_3.south){\sffamily\bfnew{线性单元}};
......
......@@ -39,7 +39,7 @@
\node[anchor=north,word] at ([yshift=-0.4em]i_4.south){to};
\node[anchor=north,word] at ([yshift=-0.4em]i_5.south){school};
\node[anchor=east, word] (l_0) at ([xshift=-2em,yshift=-0.5em]i_0.west){\sffamily\bfnew{词嵌入}};
\node[anchor=east, word] (l_0) at ([xshift=-2em,yshift=-0.5em]i_0.west){\sffamily\bfnew{目标语词嵌入}};
\node[anchor=south, word] (l_1) at ([yshift=2em]l_0.north){\sffamily\bfnew{卷积}};
\node[anchor=south, word] (l_2) at ([yshift=2.4em]l_1.north){\sffamily\bfnew{门控}};
\node[anchor=south, word] (l_3) at ([yshift=0.06em]l_2.north){\sffamily\bfnew{线性单元}};
......@@ -139,7 +139,7 @@
\node[anchor=south,word] (src_2) at ([xshift=2em,yshift=0.4em]r_2.north){$<$p$>$};
\node[anchor=east, word] (t_1) at ([xshift=-4em,yshift=0.5em]r_0.west){\sffamily\bfnew{词嵌入}};
\node[anchor=east, word] (t_1) at ([xshift=-4em,yshift=0.5em]r_0.west){\sffamily\bfnew{源语词嵌入}};
\node[anchor=north, word] (t_2) at ([yshift=-2em]t_1.south){\sffamily\bfnew{卷积}};
\node[anchor=north, word] (t_3) at ([yshift=-2.8em]t_2.south){\sffamily\bfnew{门控}};
\node[anchor=north, word] (t_4) at ([yshift=-0.06em]t_3.south){\sffamily\bfnew{线性单元}};
......
......@@ -39,7 +39,7 @@
\node[anchor=north,word] at ([yshift=-0.4em]i_4.south){to};
\node[anchor=north,word] at ([yshift=-0.4em]i_5.south){school};
\node[anchor=east, word] (l_0) at ([xshift=-2em,yshift=-0.5em]i_0.west){\sffamily\bfnew{词嵌入}};
\node[anchor=east, word] (l_0) at ([xshift=-2em,yshift=-0.5em]i_0.west){\sffamily\bfnew{目标语词嵌入}};
\node[anchor=south, word] (l_1) at ([yshift=2em]l_0.north){\sffamily\bfnew{卷积}};
\node[anchor=south, word] (l_2) at ([yshift=2.4em]l_1.north){\sffamily\bfnew{门控}};
\node[anchor=south, word] (l_3) at ([yshift=0.06em]l_2.north){\sffamily\bfnew{线性单元}};
......@@ -140,7 +140,7 @@
\node[anchor=south,word] (src_2) at ([xshift=2em,yshift=0.4em]r_2.north){$<$p$>$};
\node[anchor=east, word] (t_1) at ([xshift=-4em,yshift=0.5em]r_0.west){\sffamily\bfnew{词嵌入}};
\node[anchor=east, word] (t_1) at ([xshift=-4em,yshift=0.5em]r_0.west){\sffamily\bfnew{源语词嵌入}};
\node[anchor=north, word] (t_2) at ([yshift=-2em]t_1.south){\sffamily\bfnew{卷积}};
\node[anchor=north, word] (t_3) at ([yshift=-2.8em]t_2.south){\sffamily\bfnew{门控}};
\node[anchor=north, word] (t_4) at ([yshift=-0.06em]t_3.south){\sffamily\bfnew{线性单元}};
......
......@@ -25,9 +25,9 @@
\node[data,inner sep=2pt,fill=cyan!40] at (14.4em,2.4em) {19};
\node[data,inner sep=2pt] at (16em,2.4em) {25};
\node[font=\footnotesize] (in) at (1.6em,4.8em) {输入};
\node[font=\footnotesize] at (8.8em,4.8em) {卷积核};
\node[font=\footnotesize] (out) at (15.2em,4.8em) {输出};
\node[font=\footnotesize] (in) at (1.6em,-1.8em) {输入:$3\times 3$};
\node[font=\footnotesize] at (8.8em,-1.8em) {卷积核:$2\times 2$};
\node[font=\footnotesize] (out) at (15.2em,-1.8em) {输出:$2\times 2$};
%\node[font=\footnotesize,dashed,draw=cyan,very thick,fill=cyan!5,align=center] at ([yshift=-0.3cm,xshift=1.8cm]out.east) {*\ \ :表示\\卷积计算};
%\node[] at ([yshift=-0.3cm,xshift=-2.3cm]in.east) {\ \ \ \ \ };
......
......@@ -4,16 +4,19 @@
\begin{tikzpicture}[node distance = 0cm]
\node(num1)[num,fill=red!10]{1};
\node(num2)[num,below of = num1,yshift= -0.6cm,fill=red!10]{5};
\node(num3)[num,right of = num1,xshift= 0.6cm,fill=red!10]{1};
\node(num3)[num,right of = num1,xshift= 0.6cm,fill=red!10]{0};
\node(num4)[num,below of = num3,yshift= -0.6cm,fill=red!10]{6};
\node(num5)[num,right of = num3,xshift= 0.6cm,fill=green!10]{2};
\node(num5)[num,right of = num3,xshift= 0.6cm,fill=green!10]{4};
\node(num6)[num,below of = num5,yshift= -0.6cm,fill=green!10]{7};
\node(num7)[num,right of = num5,xshift= 0.6cm,fill=green!10]{4};
\node(num7)[num,right of = num5,xshift= 0.6cm,fill=green!10]{5};
\node(num8)[num,below of = num7,yshift= -0.6cm,fill=green!10]{8};
\node(num9)[num,below of = num2,yshift= -0.6cm,fill=yellow!10]{3};
\node(num10)[num,below of = num9,yshift= -0.6cm,fill=yellow!10]{1};
\node(num11)[num,right of = num9,xshift= 0.6cm,fill=yellow!10]{2};
\node(num12)[num,below of = num11,yshift= -0.6cm,fill=yellow!10]{2};
\node(num13)[num,right of = num11,xshift= 0.6cm,fill=blue!10]{1};
\node(num14)[num,below of = num13,yshift= -0.6cm,fill=blue!10]{3};
\node(num10)[num,right of = num13,xshift= 0.6cm,fill=blue!10]{0};
......
......@@ -8,56 +8,54 @@
\tikzstyle{cir} = [thin,fill=blue!8,draw,circle,minimum size =0.5em,drop shadow={shadow xshift=0.15em, shadow yshift=-0.1em}]
\tikzstyle{word} = [inner sep=0pt, font=\footnotesize,minimum height=\bcc]
\draw[fill=blue!8,xshift=0.3cm,yshift=0.5cm,line width=0.6pt] (0cm,0cm) rectangle (0cm+6*\bcc,0cm+9*\bcc);
\draw[ugreen!60,step=\bcc,xshift=0.3cm,yshift=0.5cm,gray] (0cm,0cm) grid (0cm+6*\bcc,0cm+9*\bcc);
%\draw[line width=0.7pt,xshift=0.3cm,yshift=0.5cm] (0cm,0cm) rectangle (0cm+6*\bcc,0cm+9*\bcc);
\draw[red!60,line width=2pt,xshift=0.3cm,yshift=0.5cm] (0cm,0cm+2*\bcc) rectangle (0cm+6*\bcc,0cm+4*\bcc);
%\draw[fill=blue!8,xshift=0.3cm,yshift=0.5cm,line width=0.6pt] (0cm,0cm) rectangle (0cm+6*\bcc,0cm+9*\bcc);
%\draw[ugreen!60,step=\bcc,xshift=0.3cm,yshift=0.5cm,gray] (0cm,0cm) grid (0cm+6*\bcc,0cm+9*\bcc);
%\draw[red!60,line width=2pt,xshift=0.3cm,yshift=0.5cm] (0cm,0cm+2*\bcc) rectangle (0cm+6*\bcc,0cm+4*\bcc);
% 输入矩阵
\draw[thick,fill=blue!8,line width=0.6pt] (0cm,0cm) rectangle (0cm+6*\bcc,0cm+9*\bcc);
\draw[step=\bcc,gray] (0cm,0cm) grid (0cm+6*\bcc,0cm+9*\bcc);
%\draw[line width=0.7pt] (0cm,0cm) rectangle (0cm+6*\bcc,0cm+9*\bcc);
\draw[red!60,line width=2pt] (0cm,0cm) rectangle (0cm+6*\bcc,0cm+2*\bcc);
\draw[ugreen!60,line width=2pt] (0cm,0cm+3*\bcc) rectangle (0cm+6*\bcc,0cm+6*\bcc);
\draw[red!60,line width=2pt] (0cm,0cm+7*\bcc) rectangle (0cm+6*\bcc,0cm+9*\bcc);
\draw[fill=blue!8,xshift=5.0cm,yshift=1.3cm,line width=0.6pt] (0cm,0cm) rectangle (0cm+1*\bcc,0cm+6*\bcc);
\draw[step=\bcc,gray,xshift=5.0cm,yshift=1.3cm] (0cm,0cm) grid (0cm+1*\bcc,0cm+6*\bcc);
%\draw[xshift=5.0cm,yshift=1.3cm,line width=0.7pt] (0cm,0cm) rectangle (0cm+1*\bcc,0cm+6*\bcc);
% 特征图
\draw[fill=blue!8,xshift=5.0cm,yshift=1.3cm,line width=0.6pt] (0cm,0cm-1*\bcc) rectangle (0cm+1*\bcc,0cm+6*\bcc);
\draw[step=\bcc,gray,xshift=5.0cm,yshift=1.3cm] (0cm,0cm-1*\bcc) grid (0cm+1*\bcc,0cm+6*\bcc);
\draw[ugreen!60,line width=2pt,xshift=5.0cm,yshift=1.3cm] (0cm,0cm+2*\bcc) rectangle (0cm+1*\bcc,0cm+3*\bcc);
\draw [gray,fill=blue!8,line width=0.6pt](8cm,2.6cm) -- (8.4cm, 2.6cm) -- (9cm,1cm) -- (8.6cm, 1cm) -- (8cm,2.6cm);
\draw [gray](8.15cm,2.2cm) -- (8.55cm,2.2cm);
\draw [gray](8.3cm,1.8cm) -- (8.7cm,1.8cm);
\draw [gray](8.45cm,1.4cm) -- (8.85cm,1.4cm);
%最大池化
\draw [gray,fill=blue!8,line width=0.6pt](8cm,2.2cm) -- (8.4cm, 2.2cm) -- (8.7cm,1.4cm) -- (8.3cm, 1.4cm) -- (8cm,2.2cm);
\draw [gray](8.15cm,1.8cm) -- (8.55cm,1.8cm);
%\draw [gray](8.3cm,1.8cm) -- (8.7cm,1.8cm);
%\draw [gray](8.45cm,1.4cm) -- (8.85cm,1.4cm);
\draw [gray,fill=blue!8,line width=0.6pt](11cm,2.2cm) -- (11.4cm, 2.2cm) -- (11.7cm,1.4cm) -- (11.3cm, 1.4cm) -- (11cm,2.2cm);
\draw [gray](11.15cm,1.8cm) -- (11.55cm,1.8cm);
%全连接层
\draw [gray,fill=blue!8,line width=0.6pt](11cm,2.2cm) -- (11.4cm, 2.2cm) -- (11.7cm,1.8cm) -- (11.3cm, 1.8cm) -- (11cm,2.2cm);
%\draw [gray](11.15cm,1.8cm) -- (11.55cm,1.8cm);
\draw[ugreen!60,line] ([xshift=5.0cm,yshift=1.3cm]0cm+1*\bcc,0cm+6*\bcc) -- (8cm,2.6cm);
\draw[ugreen!60,line] ([xshift=5.0cm,yshift=1.3cm]0cm+1*\bcc,0cm) -- (8.15cm,2.2cm);
%最大池化
\draw[ugreen!60,line] ([xshift=5.0cm,yshift=1.3cm]0cm+1*\bcc,0cm+6*\bcc) -- (8cm,2.2cm);
\draw[ugreen!60,line] ([xshift=5.0cm,yshift=1.3cm]0cm+1*\bcc,0cm-1*\bcc) -- (8.15cm,1.8cm);
\draw[fill=blue!8,xshift=5.2cm,yshift=1.0cm,line width=0.6pt] (0cm,0cm) rectangle (0cm+1*\bcc,0cm+6*\bcc);
\draw[step=\bcc,gray,xshift=5.2cm,yshift=1.0cm] (0cm,0cm) grid (0cm+1*\bcc,0cm+6*\bcc);
%\draw[line width=0.7pt,xshift=5.2cm,yshift=1.0cm] (0cm,0cm) rectangle (0cm+1*\bcc,0cm+6*\bcc);
%特征图
%\draw[fill=blue!8,xshift=5.2cm,yshift=1.0cm,line width=0.6pt] (0cm,0cm) rectangle (0cm+1*\bcc,0cm+6*\bcc);
%\draw[step=\bcc,gray,xshift=5.2cm,yshift=1.0cm] (0cm,0cm) grid (0cm+1*\bcc,0cm+6*\bcc);
\draw[fill=blue!8,xshift=5.4cm,yshift=0.3cm,line width=0.6pt] (0cm,0cm) rectangle (0cm+1*\bcc,0cm+7*\bcc);
\draw[step=\bcc,gray,xshift=5.4cm,yshift=0.3cm] (0cm,0cm) grid (0cm+1*\bcc,0cm+7*\bcc);
%\draw[line width=0.7pt,xshift=5.4cm,yshift=0.3cm] (0cm,0cm) rectangle (0cm+1*\bcc,0cm+7*\bcc);
%\draw[fill=blue!8,xshift=5.4cm,yshift=0.3cm,line width=0.6pt] (0cm,0cm) rectangle (0cm+1*\bcc,0cm+7*\bcc);
%\draw[step=\bcc,gray,xshift=5.4cm,yshift=0.3cm] (0cm,0cm) grid (0cm+1*\bcc,0cm+7*\bcc);
\draw[fill=blue!8,xshift=5.6cm,yshift=0cm,line width=0.6pt] (0cm,0cm) rectangle (0cm+1*\bcc,0cm+7*\bcc);
\draw[step=\bcc,gray,xshift=5.6cm,yshift=0cm] (0cm,0cm) grid (0cm+1*\bcc,0cm+7*\bcc);
%\draw[line width=0.7pt,xshift=5.6cm,yshift=0cm] (0cm,0cm) rectangle (0cm+1*\bcc,0cm+7*\bcc);
\draw[fill=blue!8,xshift=5.6cm,yshift=0cm,line width=0.6pt] (0cm,0cm) rectangle (0cm+1*\bcc,0cm+8*\bcc);
\draw[step=\bcc,gray,xshift=5.6cm,yshift=0cm] (0cm,0cm) grid (0cm+1*\bcc,0cm+8*\bcc);
\draw[red!60,line width=2pt,xshift=5.6cm,yshift=0cm] (0cm,0cm) rectangle (0cm+1*\bcc,0cm+1*\bcc);
\draw[red!60,line width=2pt,xshift=5.6cm,yshift=0cm] (0cm,0cm+2*\bcc) rectangle (0cm+1*\bcc,0cm+3*\bcc);
\draw[red!60,line width=2pt,xshift=5.6cm,yshift=0cm] (0cm,0cm+6*\bcc) rectangle (0cm+1*\bcc,0cm+7*\bcc);
\draw[red!60,line width=2pt,xshift=5.6cm,yshift=0cm] (0cm,0cm+7*\bcc) rectangle (0cm+1*\bcc,0cm+8*\bcc);
\draw[line] (8.4cm, 2.6cm) -- (11cm,2.2cm);
\draw[line] (9cm,1cm) -- (11.3cm, 1.4cm);
% 全连接线
\draw[line] (8.4cm, 2.2cm) -- (11.2cm,2.2cm);
\draw[line] (8.7cm,1.4cm) -- (11.3cm, 1.8cm);
\draw[red!60,line] ([xshift=5.6cm,yshift=0cm]0cm+1*\bcc,0cm+7*\bcc) -- (8.45cm,1.4cm);
\draw[red!60,line] ([xshift=5.6cm,yshift=0cm]0cm+1*\bcc,0cm) -- (8.6cm, 1cm);
\draw[red!60,line] ([xshift=5.6cm,yshift=0cm]0cm+1*\bcc,0cm+7*\bcc) -- (8.15cm,1.8cm);
\draw[red!60,line] ([xshift=5.6cm,yshift=0cm]0cm+1*\bcc,0cm) -- (8.25cm, 1.4cm);
\draw[red!60,line] (0cm+6*\bcc,0cm+9*\bcc) -- ([xshift=5.6cm,yshift=0cm]0cm,0cm+7*\bcc);
\draw[red!60,line] (0cm+6*\bcc,0cm+7*\bcc) -- ([xshift=5.6cm,yshift=0cm]0cm,0cm+6*\bcc);
......@@ -65,8 +63,8 @@
\draw[red!60,line] (0cm+6*\bcc,0cm) -- ([xshift=5.6cm,yshift=0cm]0cm,0cm);
\draw[ugreen!60,line] (0cm+6*\bcc,0cm+6*\bcc) -- ([xshift=5.0cm,yshift=1.3cm]0cm,0cm+3*\bcc);
\draw[ugreen!60,line] (0cm+6*\bcc,0cm+3*\bcc) -- ([xshift=5.0cm,yshift=1.3cm]0cm,0cm+2*\bcc);
\draw[red!60,line] ([xshift=0.3cm,yshift=0.5cm]0cm+6*\bcc,0cm+4*\bcc) -- ([xshift=5.6cm,yshift=0cm]0cm,0cm+3*\bcc);
\draw[red!60,line] ([xshift=0.3cm,yshift=0.5cm]0cm+6*\bcc,0cm+2*\bcc) -- ([xshift=5.6cm,yshift=0cm]0cm,0cm+2*\bcc);
%\draw[red!60,line] ([xshift=0.3cm,yshift=0.5cm]0cm+6*\bcc,0cm+4*\bcc) -- ([xshift=5.6cm,yshift=0cm]0cm,0cm+3*\bcc);
%\draw[red!60,line] ([xshift=0.3cm,yshift=0.5cm]0cm+6*\bcc,0cm+2*\bcc) -- ([xshift=5.6cm,yshift=0cm]0cm,0cm+2*\bcc);
\node[word] (w1) at (-0.5cm, 3.4cm) {wait};
\node[word] (w2) at ([yshift=-\bcc]w1) {for};
......@@ -81,18 +79,13 @@
\node[draw,rectangle callout,callout relative pointer={(0.28,-0.6)}] at (-0.3cm,4.6cm) {\textrm{卷积核}};
\node[draw,rectangle callout,callout relative pointer={(0.1,-0.5)}] at (5cm,4.6cm) {\textrm{特征图}};
%\draw [thick] (0cm, -0.3cm) -- (0cm, -0.5cm) -- node[font=\tiny, align=center,yshift=-0.5cm]{$m \times k$ representation of \\ sentence with static and \\ non-static channels} (2.4cm,-0.5cm) -- (2.4cm, -0.3cm);
%\draw [thick] (3.6cm, -0.3cm) -- (3.6cm, -0.5cm) -- node[font=\tiny, align=center,yshift=-0.5cm]{Convolutional layer with \\ multiple filter widths and \\ feature maps} (6cm,-0.5cm) -- (6cm, -0.3cm);
%\draw [thick] (7.2cm, -0.3cm) -- (7.2cm, -0.5cm) -- node[font=\tiny, align=center,yshift=-0.5cm]{Max-over-time\\ pooling} (9cm,-0.5cm) -- (9cm, -0.3cm);
%\draw [thick] (10cm, -0.3cm) -- (10cm, -0.5cm) -- node[font=\tiny, align=center,yshift=-0.5cm]{Fully connected layer \\ with dropout and \\ softmax output} (11.7cm,-0.5cm) -- (11.7cm, -0.3cm);
\draw [thick] (0cm, -0.3cm) -- (0cm, -0.5cm) -- node[font=\tiny, align=center,yshift=-0.5cm]{维度大小为 $m \times K$ \\ 的静态与非静态通道\\的句子表示} (2.4cm,-0.5cm) -- (2.4cm, -0.3cm);
\draw [thick] (0cm, -0.3cm) -- (0cm, -0.5cm) -- node[font=\tiny, align=center,yshift=-0.5cm]{维度大小为 $m \times O$ \\ 的句子表示} (2.4cm,-0.5cm) -- (2.4cm, -0.3cm);
\draw [thick] (3.6cm, -0.3cm) -- (3.6cm, -0.5cm) -- node[font=\tiny, align=center,yshift=-0.5cm]{具有多个不同大小\\的卷积核和特征图\\的卷积层} (6cm,-0.5cm) -- (6cm, -0.3cm);
\draw [thick] (7.2cm, -0.3cm) -- (7.2cm, -0.5cm) -- node[font=\tiny, align=center,yshift=-0.5cm]{最大池化} (9cm,-0.5cm) -- (9cm, -0.3cm);
\draw [thick] (10cm, -0.3cm) -- (10cm, -0.5cm) -- node[font=\tiny, align=center,yshift=-0.5cm]{带有Dropout\\和Softmax输出\\的全连接层} (11.7cm,-0.5cm) -- (11.7cm, -0.3cm);
%\node [font=\Large] at (5.2cm,-2cm){$h_i = dot(F,x_{i:i+l-1})+b$};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
......@@ -16,8 +16,9 @@
\node [Resnode,anchor=south] (res1) at ([yshift=0.3em]sa1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn1) at ([yshift=1em]res1.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res2) at ([yshift=0.3em]ffn1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [inputnode,anchor=north west] (input1) at ([yshift=-1em]sa1.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos1) at ([yshift=-1em]sa1.south east) {\tiny{$\textbf{Position}$}};
\node [inputnode,anchor=north west] (input1) at ([yshift=-1em,xshift=-0.5em]sa1.south west) {\tiny{$\textbf{Embedding}$}};
\node [] (add) at ([yshift=-1.6em,xshift=3.5em]sa1.south west) {$+$};
\node [posnode,anchor=north east] (pos1) at ([yshift=-1em,xshift=0.5em]sa1.south east) {\tiny{$\textbf{Position}$}};
\node [anchor=north] (inputs) at ([yshift=-3em]sa1.south) {\scriptsize{$\textbf{编码器输入: 我\ \ \ \ }$}};
\node [anchor=south] (encoder) at ([xshift=0.2em,yshift=0.6em]res2.north west) {\scriptsize{\textbf{编码器}}};
......@@ -35,8 +36,9 @@
\node [ffnnode,anchor=south] (ffn2) at ([yshift=1em]res4.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res5) at ([yshift=0.3em]ffn2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Position}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em,xshift=-0.5em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [] (add) at ([yshift=-1.6em,xshift=3.5em]sa2.south west) {$+$};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em,xshift=0.5em]sa2.south east) {\tiny{$\textbf{Position}$}};
\node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\scriptsize{$\textbf{解码器输入: $<$sos$>$ I am fine}$}};
\node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}};
\node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\scriptsize{$\textbf{解码器输出: I am fine $<$eos$>$ }$}};
......
......@@ -14,8 +14,9 @@
\node [Resnode,anchor=south] (res1) at ([yshift=0.3em]sa1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn1) at ([yshift=1em]res1.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res2) at ([yshift=0.3em]ffn1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [inputnode,anchor=north west] (input1) at ([yshift=-1em]sa1.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos1) at ([yshift=-1em]sa1.south east) {\tiny{$\textbf{Position}$}};
\node [inputnode,anchor=north west] (input1) at ([yshift=-1em,xshift=-0.5em]sa1.south west) {\tiny{$\textbf{Embedding}$}};
\node [] (add) at ([yshift=-1.6em,xshift=3.5em]sa1.south west) {$+$};
\node [posnode,anchor=north east] (pos1) at ([yshift=-1em,xshift=0.5em]sa1.south east) {\tiny{$\textbf{Position}$}};
\node [anchor=north] (inputs) at ([yshift=-3em]sa1.south) {\scriptsize{$\textbf{编码器输入: 我\ \ \ \ }$}};
\node [anchor=south] (encoder) at ([xshift=0.2em,yshift=0.6em]res2.north west) {\scriptsize{\textbf{编码器}}};
......@@ -33,8 +34,9 @@
\node [ffnnode,anchor=south] (ffn2) at ([yshift=1em]res4.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res5) at ([yshift=0.3em]ffn2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Position}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em,xshift=-0.5em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [] (add) at ([yshift=-1.6em,xshift=3.5em]sa2.south west) {$+$};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em,xshift=0.5em]sa2.south east) {\tiny{$\textbf{Position}$}};
\node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\scriptsize{$\textbf{解码器输入: $<$sos$>$ I am fine}$}};
\node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}};
\node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\scriptsize{$\textbf{解码器输出: I am fine $<$eos$>$ }$}};
......
......@@ -15,8 +15,9 @@
\node [Resnode,anchor=south] (res1) at ([yshift=0.3em]sa1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn1) at ([yshift=1em]res1.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res2) at ([yshift=0.3em]ffn1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [inputnode,anchor=north west] (input1) at ([yshift=-1em]sa1.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos1) at ([yshift=-1em]sa1.south east) {\tiny{$\textbf{Position}$}};
\node [inputnode,anchor=north west] (input1) at ([yshift=-1em,xshift=-0.5em]sa1.south west) {\tiny{$\textbf{Embedding}$}};
\node [] (add) at ([yshift=-1.6em,xshift=3.5em]sa1.south west) {$+$};
\node [posnode,anchor=north east] (pos1) at ([yshift=-1em,xshift=0.5em]sa1.south east) {\tiny{$\textbf{Position}$}};
\node [anchor=north] (inputs) at ([yshift=-3em]sa1.south) {\scriptsize{$\textbf{编码器输入: 我\ \ \ \ }$}};
\node [anchor=south] (encoder) at ([xshift=0.2em,yshift=0.6em]res2.north west) {\scriptsize{\textbf{编码器}}};
......@@ -34,8 +35,9 @@
\node [ffnnode,anchor=south] (ffn2) at ([yshift=1em]res4.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res5) at ([yshift=0.3em]ffn2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Position}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em,xshift=-0.5em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [] (add) at ([yshift=-1.6em,xshift=3.5em]sa2.south west) {$+$};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em,xshift=0.5em]sa2.south east) {\tiny{$\textbf{Position}$}};
\node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\scriptsize{$\textbf{解码器输入: $<$sos$>$ I am fine}$}};
\node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}};
\node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\scriptsize{$\textbf{解码器输出: I am fine $<$eos$>$ }$}};
......
......@@ -14,8 +14,9 @@
\node [Resnode,anchor=south] (res1) at ([yshift=0.3em]sa1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn1) at ([yshift=1em]res1.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res2) at ([yshift=0.3em]ffn1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [inputnode,anchor=north west] (input1) at ([yshift=-1em]sa1.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos1) at ([yshift=-1em]sa1.south east) {\tiny{$\textbf{Position}$}};
\node [inputnode,anchor=north west] (input1) at ([yshift=-1em,xshift=-0.5em]sa1.south west) {\tiny{$\textbf{Embedding}$}};
\node [] (add) at ([yshift=-1.6em,xshift=3.5em]sa1.south west) {$+$};
\node [posnode,anchor=north east] (pos1) at ([yshift=-1em,xshift=0.5em]sa1.south east) {\tiny{$\textbf{Position}$}};
\node [anchor=north] (inputs) at ([yshift=-3em]sa1.south) {\scriptsize{$\textbf{编码器输入: 我\ \ \ \ }$}};
\node [anchor=south] (encoder) at ([xshift=0.2em,yshift=0.6em]res2.north west) {\scriptsize{\textbf{编码器}}};
......@@ -33,8 +34,9 @@
\node [ffnnode,anchor=south] (ffn2) at ([yshift=1em]res4.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res5) at ([yshift=0.3em]ffn2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Position}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em,xshift=-0.5em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [] (add) at ([yshift=-1.6em,xshift=3.5em]sa2.south west) {$+$};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em,xshift=0.5em]sa2.south east) {\tiny{$\textbf{Position}$}};
\node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\scriptsize{$\textbf{解码器输入: $<$sos$>$ I am fine}$}};
\node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}};
\node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\scriptsize{$\textbf{解码器输出: I am fine $<$eos$>$ }$}};
......
......@@ -14,8 +14,9 @@
\node [Resnode,anchor=south] (res1) at ([yshift=0.3em]sa1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn1) at ([yshift=1em]res1.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res2) at ([yshift=0.3em]ffn1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [inputnode,anchor=north west] (input1) at ([yshift=-1em]sa1.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos1) at ([yshift=-1em]sa1.south east) {\tiny{$\textbf{Position}$}};
\node [inputnode,anchor=north west] (input1) at ([yshift=-1em,xshift=-0.5em]sa1.south west) {\tiny{$\textbf{Embedding}$}};
\node [] (add) at ([yshift=-1.6em,xshift=3.5em]sa1.south west) {$+$};
\node [posnode,anchor=north east] (pos1) at ([yshift=-1em,xshift=0.5em]sa1.south east) {\tiny{$\textbf{Position}$}};
\node [anchor=north] (inputs) at ([yshift=-3em]sa1.south) {\scriptsize{$\textbf{编码器输入: 我\ \ \ \ }$}};
\node [anchor=south] (encoder) at ([xshift=0.2em,yshift=0.6em]res2.north west) {\scriptsize{\textbf{编码器}}};
......@@ -33,8 +34,9 @@
\node [ffnnode,anchor=south] (ffn2) at ([yshift=1em]res4.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res5) at ([yshift=0.3em]ffn2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Position}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em,xshift=-0.5em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [] (add) at ([yshift=-1.6em,xshift=3.5em]sa2.south west) {$+$};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em,xshift=0.5em]sa2.south east) {\tiny{$\textbf{Position}$}};
\node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\scriptsize{$\textbf{解码器输入: $<$sos$>$ I am fine}$}};
\node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}};
\node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\scriptsize{$\textbf{解码器输出: I am fine $<$eos$>$ }$}};
......
......@@ -123,7 +123,7 @@
%----------------------------------------------
\begin{table}[htp]
\centering
\caption{ 不同翻译模型性能对比\upcite{NIPS2017_7181}}
\caption{ 不同翻译模型性能对比\upcite{vaswani2017attention}}
\label{tab:12-12}
\begin{tabular}{l l l l}
\multicolumn{1}{l|}{\multirow{2}{*}{系统}} & \multicolumn{2}{c}{BLEU[\%]} & \multirow{2}{*}{\parbox{6em}{模型训练代价 (FLOPs)}} \\
......@@ -324,11 +324,11 @@
\begin{itemize}
\vspace{0.5em}
\item 首先,将$\mathbi{Q}$$\mathbi{K}$$\mathbi{V}$分别通过线性(Linear)变换的方式映射为$h$个子集。即$\mathbi{Q}_i = \mathbi{Q}\mathbi{W}_i^Q $$\mathbi{K}_i = \mathbi{K}\mathbi{W}_i^K $$\mathbi{V}_i = \mathbi{V}\mathbi{W}_i^V $,其中$i$表示第$i$个头, $\mathbi{W}_i^Q \in \mathbb{R}^{d_{model} \times d_k}$, $\mathbi{W}_i^K \in \mathbb{R}^{d_{model} \times d_k}$, $\mathbi{W}_i^V \in \mathbb{R}^{d_{model} \times d_v}$是参数矩阵; $d_k=d_v=d_{model} / h$,对于不同的头采用不同的变换矩阵,这里$d_{model}$表示每个隐层向量的维度;
\item 首先,将$\mathbi{Q}$$\mathbi{K}$$\mathbi{V}$分别通过线性(Linear)变换的方式映射为$h$个子集。即$\mathbi{Q}_i = \mathbi{Q}\mathbi{W}_i^{\,Q} $$\mathbi{K}_i = \mathbi{K}\mathbi{W}_i^{\,K} $$\mathbi{V}_i = \mathbi{V}\mathbi{W}_i^{\,V} $,其中$i$表示第$i$个头, $\mathbi{W}_i^{\,Q} \in \mathbb{R}^{d_{model} \times d_k}$, $\mathbi{W}_i^{\,K} \in \mathbb{R}^{d_{model} \times d_k}$, $\mathbi{W}_i^{\,V} \in \mathbb{R}^{d_{model} \times d_v}$是参数矩阵; $d_k=d_v=d_{model} / h$,对于不同的头采用不同的变换矩阵,这里$d_{model}$表示每个隐层向量的维度;
\vspace{0.5em}
\item 其次,对每个头分别执行点乘注意力操作,并得到每个头的注意力操作的输出$\mathbi{head}_i$
\vspace{0.5em}
\item 最后,将$h$个头的注意力输出在最后一维$d_v$进行拼接(Concat)重新得到维度为$h \times d_v$的输出,并通过对其左乘一个权重矩阵$\mathbi{W}^o$进行线性变换,从而对多头计算得到的信息进行融合,且将多头注意力输出的维度映射为模型的隐层大小(即$d_{model}$),这里参数矩阵$\mathbi{W}^o \in \mathbb{R}^{h \times d_v \times d_{model}}$
\item 最后,将$h$个头的注意力输出在最后一维$d_v$进行拼接(Concat)重新得到维度为$h \times d_v$的输出,并通过对其左乘一个权重矩阵$\mathbi{W}^{\,o}$进行线性变换,从而对多头计算得到的信息进行融合,且将多头注意力输出的维度映射为模型的隐层大小(即$d_{model}$),这里参数矩阵$\mathbi{W}^{\,o} \in \mathbb{R}^{h d_v \times d_{model}}$
\vspace{0.5em}
\end{itemize}
......@@ -343,8 +343,8 @@
\parinterval 多头机制可以被形式化描述为如下公式:
\begin{eqnarray}
\textrm{MultiHead}(\mathbi{Q}, \mathbi{K} , \mathbi{V})& = & \textrm{Concat} (\mathbi{head}_1, ... , \mathbi{head}_h ) \mathbi{W}^o \label{eq:12-48} \\
\mathbi{head}_i & = &\textrm{Attention} (\mathbi{Q}\mathbi{W}_i^Q , \mathbi{K}\mathbi{W}_i^K , \mathbi{V}\mathbi{W}_i^V )
\textrm{MultiHead}(\mathbi{Q}, \mathbi{K} , \mathbi{V})& = & \textrm{Concat} (\mathbi{head}_1, ... , \mathbi{head}_h ) \mathbi{W}^{\,o} \label{eq:12-48} \\
\mathbi{head}_i & = &\textrm{Attention} (\mathbi{Q}\mathbi{W}_i^{\,Q} , \mathbi{K}\mathbi{W}_i^{\,K} , \mathbi{V}\mathbi{W}_i^{\,V} )
\label{eq:12-49}
\end{eqnarray}
......@@ -383,7 +383,7 @@
\section{残差网络和层正则化}
\parinterval Transformer编码器、解码器分别由多层网络组成(通常为6层),每层网络又包含多个子层(自注意力网络、前馈神经网络)。因此Transformer实际上是一个很深的网络结构。再加上点乘注意力机制中包含很多线性和非线性变换;且注意力函数Attention($\cdot$)的计算也涉及多层网络,整个网络的信息传递非常复杂。从反向传播的角度来看,每次回传的梯度都会经过若干步骤,容易产生梯度爆炸或者消失。解决这个问题的一种办法就是使用残差连接\upcite{DBLP:journals/corr/HeZRS15},此部分内容已经在{\chaptereleven}进行了介绍,这里不再赘述。
\parinterval Transformer编码器、解码器分别由多层网络组成(通常为6层),每层网络又包含多个子层(自注意力网络、前馈神经网络)。因此Transformer实际上是一个很深的网络结构。再加上点乘注意力机制中包含很多线性和非线性变换;且注意力函数Attention($\cdot$)的计算也涉及多层网络,整个网络的信息传递非常复杂。从反向传播的角度来看,每次回传的梯度都会经过若干步骤,容易产生梯度爆炸或者消失。解决这个问题的一种办法就是使用残差连接\upcite{DBLP:journals/corr/HeZRS15},此部分内容已经在{\chapternine}进行了介绍,这里不再赘述。
%\parinterval 解决这个问题的一种办法就是使用残差连接\upcite{DBLP:journals/corr/HeZRS15}。残差连接是一种用来训练深层网络的技术,其结构如图\ref{fig:12-49},即在子层之前通过增加直接连接的方式,将底层信息直接传递给上层。
......@@ -416,13 +416,13 @@
\parinterval 在Transformer的训练过程中,由于引入了残差操作,将前面所有层的输出加到一起,如公式:
\begin{eqnarray}
%x_{l+1} = x_l + F (x_l)
\mathbi{h}^{l+1} = F (\mathbi{h}^l) + \mathbi{h}^l
\mathbi{x}^{l+1} = F (\mathbi{x}^l) + \mathbi{x}^l
\label{eq:12-50}
\end{eqnarray}
\noindent 其中$\mathbi{h}^l$表示第$l$层网络的输入向量,$F (\mathbi{h}^l)$是子层运算,这样会导致不同层(或子层)的结果之间的差异性很大,造成训练过程不稳定、训练时间较长。为了避免这种情况,在每层中加入了层正则化操作\upcite{Ba2016LayerN}。图\ref{fig:12-50} 中的红色方框展示了Transformer中残差和层正则化的位置。层正则化的计算公式如下:
\noindent 其中$\mathbi{x}^l$表示第$l$层网络的输入向量,$F (\mathbi{x}^l)$是子层运算,这样会导致不同层(或子层)的结果之间的差异性很大,造成训练过程不稳定、训练时间较长。为了避免这种情况,在每层中加入了层正则化操作\upcite{Ba2016LayerN}。图\ref{fig:12-50} 中的红色方框展示了Transformer中残差和层正则化的位置。层正则化的计算公式如下:
\begin{eqnarray}
\textrm{LN}(\mathbi{h}) = g \cdot \frac{\mathbi{h}- \mu} {\sigma} + b
\textrm{LN}(\mathbi{x}) = g \cdot \frac{\mathbi{x}- \mu} {\sigma} + b
\label{eq:12-51}
\end{eqnarray}
......@@ -527,7 +527,7 @@ lrate = d_{\textrm{model}}^{-0.5} \cdot \textrm{min} (\textrm{step}^{-0.5} , \te
\vspace{0.5em}
\end{itemize}
\parinterval 不同的Transformer可以适应不同的任务,常见的Transformer模型有Transformer Base、Transformer Big和Transformer Deep\upcite{NIPS2017_7181,WangLearning},具体设置如下:
\parinterval 不同的Transformer可以适应不同的任务,常见的Transformer模型有Transformer Base、Transformer Big和Transformer Deep\upcite{vaswani2017attention,WangLearning},具体设置如下:
\begin{itemize}
\vspace{0.5em}
......
......@@ -4,11 +4,12 @@
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% chapter 1------------------------------------------------------
@book{慧立彦宗1983大慈恩寺三藏法师传,
title={大慈恩寺三藏法师传},
author={慧立彦宗},
publisher={中华书局},
year={1983},
@book{慧立2000大慈恩寺三藏法師傳,
title={大慈恩寺三藏法師傳},
author={慧立 and 彦悰 and 道宣},
volume={2},
year={2000},
publisher={中华书局}
}
@book{2019cns,
......@@ -5219,7 +5220,7 @@ author = {Yoshua Bengio and
@article{Waibel1989PhonemeRU,
title={Phoneme recognition using time-delay neural networks},
author={Alexander Waibel and Toshiyuki Hanazawa and Geoffrey Hinton and Kiyohiro Shikano and K.J. Lang},
author={Alexander Waibel and Toshiyuki Hanazawa and Geoffrey Everest Hinton and Kiyohiro Shikano and K.J. Lang},
journal={IEEE Transactions on Acoustics, Speech, and Signal Processing},
year={1989},
volume={37},
......@@ -5228,7 +5229,7 @@ author = {Yoshua Bengio and
@article{LeCun1989BackpropagationAT,
title={Backpropagation Applied to Handwritten Zip Code Recognition},
author={Yann LeCun and Bernhard Boser and John Denker and Don Henderson and R. Howard and W.E. Hubbard and Larry Jackel},
author={Yann LeCun and Bernhard Boser and John Denker and Don Henderson and R.E.Howard and W.E. Hubbard and Larry Jackel},
journal={Neural Computation},
year={1989},
volume={1},
......@@ -5236,7 +5237,7 @@ author = {Yoshua Bengio and
}
@article{726791,
author={Yann {Lecun} and Leon {Bottou} and Y. {Bengio} and Patrick {Haffner}},
author={Yann {Lecun} and Leon {Bottou} and Yoshua {Bengio} and Patrick {Haffner}},
journal={Proceedings of the IEEE},
title={Gradient-based learning applied to document recognition},
year={1998},
......@@ -5448,7 +5449,7 @@ author = {Yoshua Bengio and
@inproceedings{Islam2020HowMP,
author = {Md. Amirul Islam and
Sen Jia and
Neil D. B. Bruce},
Neil Bruce},
title = {How much Position Information Do Convolutional Neural Networks Encode?},
publisher = {International Conference on Learning Representations},
year = {2020},
......@@ -5458,7 +5459,7 @@ author = {Yoshua Bengio and
author = {Ilya Sutskever and
James Martens and
George E. Dahl and
Geoffrey E. Hinton},
Geoffrey Everest Hinton},
publisher = {International Conference on Machine Learning},
pages = {1139--1147},
year={2013}
......@@ -5473,7 +5474,7 @@ author = {Yoshua Bengio and
}
@article{JMLR:v15:srivastava14a,
author = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov},
author = {Nitish Srivastava and Geoffrey Everest Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov},
title = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting},
journal = {Journal of Machine Learning Research},
year = {2014},
......@@ -5491,7 +5492,7 @@ author = {Yoshua Bengio and
@article{Howard2017MobileNetsEC,
title={MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications},
author = {Andrew G. Howard and
author = {Andrew Howard and
Menglong Zhu and
Bo Chen and
Dmitry Kalenichenko and
......@@ -5522,7 +5523,7 @@ author = {Yoshua Bengio and
title={Locally-connected and convolutional neural networks for small footprint speaker recognition},
author = {Yu-hsin Chen and
Ignacio Lopez-Moreno and
Tara N. Sainath and
Tara Sainath and
Mirk{\'{o}} Visontai and
Raziel Alvarez and
Carolina Parada},
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论