Commit e329e9d6 by 曹润柘

合并分支 'caorunzhe' 到 'master'

Caorunzhe

查看合并请求 !1064
parents c90d70e8 6e13b60f
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
}; };
\node[font=\footnotesize,anchor=north] (l1) at ([xshift=0em,yshift=-1em]top.south) {(a) 符号合并表}; \node[font=\footnotesize,anchor=north] (l1) at ([xshift=0em,yshift=-1em]top.south) {(a) 符号合并表};
\node[font=\scriptsize,anchor=west] (n1) at ([xshift=-4.5em,yshift=-6em]top.west) {l\ o\ w\ e\ r\ $<$e$>$}; \node[font=\scriptsize,anchor=west] (n1) at ([xshift=-3em,yshift=-6em]top.west) {l\ o\ w\ e\ r\ $<$e$>$};
\node[font=\scriptsize,anchor=west] (n2) at ([xshift=2.6em]n1.east) {l\ o\ w\ e\ {\red r$<$e$>$}}; \node[font=\scriptsize,anchor=west] (n2) at ([xshift=2.6em]n1.east) {l\ o\ w\ e\ {\red r$<$e$>$}};
\node[font=\scriptsize,anchor=west] (n3) at ([xshift=2.6em]n2.east) {{\red lo}\ w\ e\ r$<$e$>$}; \node[font=\scriptsize,anchor=west] (n3) at ([xshift=2.6em]n2.east) {{\red lo}\ w\ e\ r$<$e$>$};
\node[font=\scriptsize,anchor=west] (n4) at ([xshift=2.6em]n3.east) {{\red low}\ e\ r$<$e$>$}; \node[font=\scriptsize,anchor=west] (n4) at ([xshift=2.6em]n3.east) {{\red low}\ e\ r$<$e$>$};
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
\node[font=\scriptsize,anchor=north east] (s1) at ([yshift=0.1em]n1.north west) {样例1:}; \node[font=\scriptsize,anchor=north east] (s1) at ([yshift=0.1em]n1.north west) {样例1:};
\node[font=\scriptsize,anchor=north east] (s1) at ([yshift=0.1em]t1.north west) {样例2:}; \node[font=\scriptsize,anchor=north east] (s1) at ([yshift=0.1em]t1.north west) {样例2:};
\node[font=\footnotesize,anchor=north] (l2) at ([xshift=2em,yshift=-1em]t3.south) {(b) 合并样例}; \node[font=\footnotesize,anchor=north] (l2) at ([xshift=0.5em,yshift=-1em]t3.south) {(b) 合并样例};
\draw[->,thick](n1.east) -- (n2.west); \draw[->,thick](n1.east) -- (n2.west);
\draw[->,thick](n2.east) -- (n3.west); \draw[->,thick](n2.east) -- (n3.west);
......
...@@ -38,7 +38,7 @@ ...@@ -38,7 +38,7 @@
\end{scope} \end{scope}
\begin{scope}[xshift=1.85in] \begin{scope}[xshift=1.75in]
\node [anchor=west,stnode] (r1) at (0, 0) {第1层}; \node [anchor=west,stnode] (r1) at (0, 0) {第1层};
\node [anchor=south,tnode] (r2) at ([xshift=0em,yshift=1em]r1.north){第2层}; \node [anchor=south,tnode] (r2) at ([xshift=0em,yshift=1em]r1.north){第2层};
...@@ -60,11 +60,11 @@ ...@@ -60,11 +60,11 @@
\draw[->,thick] ([xshift=0em,yshift=0em]r3.north)--([xshift=0em,yshift=0em]r4.south); \draw[->,thick] ([xshift=0em,yshift=0em]r3.north)--([xshift=0em,yshift=0em]r4.south);
\draw[->,thick] ([xshift=0em,yshift=0em]r4.north)--([xshift=0em,yshift=0em]output.south); \draw[->,thick] ([xshift=0em,yshift=0em]r4.north)--([xshift=0em,yshift=0em]output.south);
\node [anchor=north,font=\small] (label) at ([xshift=-1.5em,yshift=-0.7em]input.south) {(b)原始Transformer模型}; \node [anchor=north,font=\small] (label) at ([xshift=-1.7em,yshift=-0.7em]input.south) {(b)原始Transformer模型};
\end{scope} \end{scope}
\begin{scope}[xshift=3.9in] \begin{scope}[xshift=3.85in]
\node [anchor=west,stnode] (r1) at (0, 0) {第1层}; \node [anchor=west,stnode] (r1) at (0, 0) {第1层};
\node [anchor=south,stnode] (r2) at ([xshift=0em,yshift=1em]r1.north){第2层}; \node [anchor=south,stnode] (r2) at ([xshift=0em,yshift=1em]r1.north){第2层};
...@@ -89,7 +89,7 @@ ...@@ -89,7 +89,7 @@
\draw[->,thick] ([xshift=0em,yshift=0em]wr2.east)--([xshift=0em,yshift=0em]r2.west); \draw[->,thick] ([xshift=0em,yshift=0em]wr2.east)--([xshift=0em,yshift=0em]r2.west);
\draw[->,thick] ([xshift=0em,yshift=0em]wr3.east)--([xshift=0em,yshift=0em]r4.west); \draw[->,thick] ([xshift=0em,yshift=0em]wr3.east)--([xshift=0em,yshift=0em]r4.west);
\node [anchor=north,font=\small,align=left] (label) at ([xshift=-3em,yshift=-0.7em]input.south) {(c)共享权重的\\ Transformer模型}; \node [anchor=north,font=\small] (label) at ([xshift=-3em,yshift=-0.7em]input.south) {(c)共享权重的Transformer模型};
\end{scope} \end{scope}
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
\node [anchor=south west,manode] (a1) at ([xshift=0em,yshift=1em]e1.north west){Attention}; \node [anchor=south west,manode] (a1) at ([xshift=0em,yshift=1em]e1.north west){Attention};
\node [anchor=south east,manode] (c1) at ([xshift=0em,yshift=1em]e1.north east){Conv}; \node [anchor=south east,manode] (c1) at ([xshift=0em,yshift=1em]e1.north east){Conv};
\node [anchor=south west,ebnode] (e2) at ([xshift=0em,yshift=1em]a1.north west){Embedding}; \node [anchor=south west,ebnode] (e2) at ([xshift=0em,yshift=1em]a1.north west){Embedding};
\node [anchor=south,draw,circle,inner sep=4pt] (add1) at ([xshift=0em,yshift=0.5em]e2.north){}; \node [anchor=south,draw,circle,inner sep=4pt,thick] (add1) at ([xshift=0em,yshift=0.5em]e2.north){};
\node [anchor=south,ffnnode] (f2) at ([xshift=0em,yshift=0.5em]add1.north){FFN}; \node [anchor=south,ffnnode] (f2) at ([xshift=0em,yshift=0.5em]add1.north){FFN};
\node [anchor=south,inner sep=0mm,minimum height=1.8em] (op) at ([xshift=0em,yshift=0.5em]f2.north){output}; \node [anchor=south,inner sep=0mm,minimum height=1.8em] (op) at ([xshift=0em,yshift=0.5em]f2.north){output};
...@@ -29,8 +29,8 @@ ...@@ -29,8 +29,8 @@
\draw[->,thick] ([xshift=0em,yshift=0em]f2.north)--([xshift=0em,yshift=0.3em]op.south); \draw[->,thick] ([xshift=0em,yshift=0em]f2.north)--([xshift=0em,yshift=0.3em]op.south);
\draw[-] ([xshift=0em,yshift=0em]add1.west)--([xshift=-0em,yshift=0em]add1.east); \draw[-,thick] ([xshift=0em,yshift=0em]add1.west)--([xshift=-0em,yshift=0em]add1.east);
\draw[-] ([xshift=0em,yshift=0em]add1.south)--([xshift=-0em,yshift=-0em]add1.north); \draw[-,thick] ([xshift=0em,yshift=0em]add1.south)--([xshift=-0em,yshift=-0em]add1.north);
\draw[->,thick,rectangle,rounded corners=5pt] ([xshift=0em,yshift=0.5em]f1.north)--([xshift=-6em,yshift=0.5em]f1.north)--([xshift=-5.45em,yshift=0em]add1.west)--([xshift=0em,yshift=0em]add1.west); \draw[->,thick,rectangle,rounded corners=5pt] ([xshift=0em,yshift=0.5em]f1.north)--([xshift=-6em,yshift=0.5em]f1.north)--([xshift=-5.45em,yshift=0em]add1.west)--([xshift=0em,yshift=0em]add1.west);
......
...@@ -10,10 +10,10 @@ ...@@ -10,10 +10,10 @@
\begin{scope}[] \begin{scope}[]
\node [anchor=east,circle,fill=black,inner sep = 2pt] (n1) at (-0, 0) {}; \node [anchor=east,circle,fill=black,inner sep = 2pt] (n1) at (-0, 0) {};
\node [anchor=west,draw,circle,inner sep=5pt] (n2) at ([xshift=13em,yshift=0em]n1.east){}; \node [anchor=west,draw,circle,inner sep=5pt,thick] (n2) at ([xshift=13em,yshift=0em]n1.east){};
\node [anchor=west,lnnode] (n3) at ([xshift=1.5em,yshift=0em]n2.east){LN}; \node [anchor=west,lnnode] (n3) at ([xshift=1.5em,yshift=0em]n2.east){LN};
\node [anchor=west,circle,fill=black,inner sep=2pt] (n4) at ([xshift=1.5em,yshift=0em]n3.east){}; \node [anchor=west,circle,fill=black,inner sep=2pt] (n4) at ([xshift=1.5em,yshift=0em]n3.east){};
\node [anchor=west,draw,circle,inner sep=5pt] (n5) at ([xshift=5em,yshift=0em]n4.east){}; \node [anchor=west,draw,circle,inner sep=5pt,thick] (n5) at ([xshift=5em,yshift=0em]n4.east){};
\node [anchor=west,lnnode] (n6) at ([xshift=1.5em,yshift=0em]n5.east){LN}; \node [anchor=west,lnnode] (n6) at ([xshift=1.5em,yshift=0em]n5.east){LN};
\node [anchor=west,manode] (a1) at ([xshift=1.5em,yshift=2em]n1.east){Multi-Head Attention}; \node [anchor=west,manode] (a1) at ([xshift=1.5em,yshift=2em]n1.east){Multi-Head Attention};
......
...@@ -7,14 +7,14 @@ ...@@ -7,14 +7,14 @@
\node [anchor=east] (x1) at (-0.5em, 0) {$\mathbi{x}_l$}; \node [anchor=east] (x1) at (-0.5em, 0) {$\mathbi{x}_l$};
\node [anchor=west,draw,fill=red!20,inner xsep=5pt,rounded corners=2pt,thick] (F1) at ([xshift=4em]x1.east){\small{$F$}}; \node [anchor=west,draw,fill=red!20,inner xsep=5pt,rounded corners=2pt,thick] (F1) at ([xshift=4em]x1.east){\small{$F$}};
\node [anchor=west,circle,draw,minimum size=1em] (n1) at ([xshift=4em]F1.east) {}; \node [anchor=west,circle,draw,minimum size=1em,thick] (n1) at ([xshift=4em]F1.east) {};
\node [anchor=west,draw,fill=green!20,inner xsep=5pt,rounded corners=2pt,thick] (ln1) at ([xshift=4em]n1.east){\small{\textrm{LN}}}; \node [anchor=west,draw,fill=green!20,inner xsep=5pt,rounded corners=2pt,thick] (ln1) at ([xshift=4em]n1.east){\small{\textrm{LN}}};
\node [anchor=west] (x2) at ([xshift=4em]ln1.east) {$\mathbi{x}_{l+1}$}; \node [anchor=west] (x2) at ([xshift=4em]ln1.east) {$\mathbi{x}_{l+1}$};
\node [anchor=north] (x3) at ([yshift=-5em]x1.south) {$\mathbi{x}_l$}; \node [anchor=north] (x3) at ([yshift=-5em]x1.south) {$\mathbi{x}_l$};
\node [anchor=west,draw,fill=green!20,inner xsep=5pt,rounded corners=2pt,thick] (F2) at ([xshift=4em]x3.east){\small{\textrm{LN}}}; \node [anchor=west,draw,fill=green!20,inner xsep=5pt,rounded corners=2pt,thick] (F2) at ([xshift=4em]x3.east){\small{\textrm{LN}}};
\node [anchor=west,draw,fill=red!20,inner xsep=5pt,rounded corners=2pt,thick] (ln2) at ([xshift=4em]F2.east){\small{$F$}}; \node [anchor=west,draw,fill=red!20,inner xsep=5pt,rounded corners=2pt,thick] (ln2) at ([xshift=4em]F2.east){\small{$F$}};
\node [anchor=west,circle,draw,,minimum size=1em] (n2) at ([xshift=4em]ln2.east){}; \node [anchor=west,circle,draw,,minimum size=1em,thick] (n2) at ([xshift=4em]ln2.east){};
\node [anchor=west] (x4) at ([xshift=4em]n2.east) {$\mathbi{x}_{l+1}$}; \node [anchor=west] (x4) at ([xshift=4em]n2.east) {$\mathbi{x}_{l+1}$};
\draw[->, line width=1pt] ([xshift=-0.1em]x1.east)--(F1.west); \draw[->, line width=1pt] ([xshift=-0.1em]x1.east)--(F1.west);
...@@ -27,10 +27,10 @@ ...@@ -27,10 +27,10 @@
\draw[->, line width=1pt] (n2.east)--(x4.west); \draw[->, line width=1pt] (n2.east)--(x4.west);
\draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x1.north) -- ([yshift=1em]x1.north) -- ([yshift=1.4em]n1.north) -- (n1.north); \draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x1.north) -- ([yshift=1em]x1.north) -- ([yshift=1.4em]n1.north) -- (n1.north);
\draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x3.north) -- ([yshift=1em]x3.north) -- ([yshift=1.4em]n2.north) -- (n2.north); \draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x3.north) -- ([yshift=1em]x3.north) -- ([yshift=1.4em]n2.north) -- (n2.north);
\draw[-] (n1.west)--(n1.east); \draw[-,thick] (n1.west)--(n1.east);
\draw[-] (n1.north)--(n1.south); \draw[-,thick] (n1.north)--(n1.south);
\draw[-] (n2.west)--(n2.east); \draw[-,thick] (n2.west)--(n2.east);
\draw[-] (n2.north)--(n2.south); \draw[-,thick] (n2.north)--(n2.south);
\node [anchor=south] (k1) at ([yshift=-0.1em]x1.north){}; \node [anchor=south] (k1) at ([yshift=-0.1em]x1.north){};
\node [anchor=south] (k2) at ([yshift=-0.1em]x3.north){}; \node [anchor=south] (k2) at ([yshift=-0.1em]x3.north){};
......
...@@ -8,11 +8,11 @@ ...@@ -8,11 +8,11 @@
\node [anchor=east] (x1) at (-0.5em, 0) {$\mathbi{x}_l$}; \node [anchor=east] (x1) at (-0.5em, 0) {$\mathbi{x}_l$};
\node [anchor=west,draw,fill=red!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (ln1) at ([xshift=1em]x1.east){\small{\textrm{LN}}}; \node [anchor=west,draw,fill=red!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (ln1) at ([xshift=1em]x1.east){\small{\textrm{LN}}};
\node [anchor=west,draw,fill=green!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (f1) at ([xshift=0.6em]ln1.east){\small{$F$}}; \node [anchor=west,draw,fill=green!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (f1) at ([xshift=0.6em]ln1.east){\small{$F$}};
\node [anchor=west,circle,draw,,minimum size=1em] (n1) at ([xshift=3em]f1.east){}; \node [anchor=west,circle,draw,,minimum size=1em,thick] (n1) at ([xshift=3em]f1.east){};
\node [anchor=west] (x2) at ([xshift=1em]n1.east) {$\mathbi{x}_{l+1}$}; \node [anchor=west] (x2) at ([xshift=1em]n1.east) {$\mathbi{x}_{l+1}$};
\node [anchor=west,draw,fill=red!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (ln12) at ([xshift=1em]x2.east){\small{\textrm{LN}}}; \node [anchor=west,draw,fill=red!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (ln12) at ([xshift=1em]x2.east){\small{\textrm{LN}}};
\node [anchor=west,draw,fill=green!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (f12) at ([xshift=0.6em]ln12.east){\small{$F$}}; \node [anchor=west,draw,fill=green!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (f12) at ([xshift=0.6em]ln12.east){\small{$F$}};
\node [anchor=west,circle,draw,,minimum size=1em] (n12) at ([xshift=3em]f12.east){}; \node [anchor=west,circle,draw,,minimum size=1em,thick] (n12) at ([xshift=3em]f12.east){};
\node [anchor=west] (x22) at ([xshift=1em]n12.east) {$\mathbi{x}_{l+2}$}; \node [anchor=west] (x22) at ([xshift=1em]n12.east) {$\mathbi{x}_{l+2}$};
\node [anchor=north] (x3) at ([yshift=-5em]x1.south) {$\mathbi{x}_l$}; \node [anchor=north] (x3) at ([yshift=-5em]x1.south) {$\mathbi{x}_l$};
...@@ -20,13 +20,13 @@ ...@@ -20,13 +20,13 @@
\node [anchor=west,draw,fill=green!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (f2) at ([xshift=0.6em]ln2.east){\small{$F$}}; \node [anchor=west,draw,fill=green!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (f2) at ([xshift=0.6em]ln2.east){\small{$F$}};
\node [anchor=west,minimum size=1em] (p1) at ([xshift=1em]f2.east){}; \node [anchor=west,minimum size=1em] (p1) at ([xshift=1em]f2.east){};
\node [anchor=north] (m1) at ([yshift=0.6em]p1.south){\footnotesize{\red{Mask=1}}}; \node [anchor=north] (m1) at ([yshift=0.6em]p1.south){\footnotesize{\red{Mask=1}}};
\node [anchor=west,circle,draw,,minimum size=1em] (n2) at ([xshift=3em]f2.east){}; \node [anchor=west,circle,draw,,minimum size=1em,thick] (n2) at ([xshift=3em]f2.east){};
\node [anchor=west] (x4) at ([xshift=1em]n2.east) {$\mathbi{x}_{l+1}$}; \node [anchor=west] (x4) at ([xshift=1em]n2.east) {$\mathbi{x}_{l+1}$};
\node [anchor=west,draw,fill=red!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (ln22) at ([xshift=1em]x4.east){\small{\textrm{LN}}}; \node [anchor=west,draw,fill=red!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (ln22) at ([xshift=1em]x4.east){\small{\textrm{LN}}};
\node [anchor=west,draw,fill=green!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (f22) at ([xshift=0.6em]ln22.east){\small{$F$}}; \node [anchor=west,draw,fill=green!30,inner xsep=5pt,rounded corners=2pt,draw,thick] (f22) at ([xshift=0.6em]ln22.east){\small{$F$}};
\node [anchor=west,minimum size=1em] (p2) at ([xshift=1em]f22.east){}; \node [anchor=west,minimum size=1em] (p2) at ([xshift=1em]f22.east){};
\node [anchor=north] (m2) at ([yshift=0.6em]p2.south){\footnotesize{\red{Mask=0}}}; \node [anchor=north] (m2) at ([yshift=0.6em]p2.south){\footnotesize{\red{Mask=0}}};
\node [anchor=west,circle,draw,,minimum size=1em] (n22) at ([xshift=3em]f22.east){}; \node [anchor=west,circle,draw,,minimum size=1em,thick] (n22) at ([xshift=3em]f22.east){};
\node [anchor=west] (x42) at ([xshift=1em]n22.east) {$\mathbi{x}_{l+2}$}; \node [anchor=west] (x42) at ([xshift=1em]n22.east) {$\mathbi{x}_{l+2}$};
\draw[->, line width=1pt] ([xshift=-0.1em]x1.east)--(ln1.west); \draw[->, line width=1pt] ([xshift=-0.1em]x1.east)--(ln1.west);
...@@ -41,10 +41,10 @@ ...@@ -41,10 +41,10 @@
\draw[->, line width=1pt] (n2.east)--(x4.west); \draw[->, line width=1pt] (n2.east)--(x4.west);
\draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x1.north) -- ([yshift=1em]x1.north) -- ([yshift=1.4em]n1.north) -- (n1.north); \draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x1.north) -- ([yshift=1em]x1.north) -- ([yshift=1.4em]n1.north) -- (n1.north);
\draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x3.north) -- ([yshift=1em]x3.north) -- ([yshift=1.4em]n2.north) -- (n2.north); \draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x3.north) -- ([yshift=1em]x3.north) -- ([yshift=1.4em]n2.north) -- (n2.north);
\draw[-] (n1.west)--(n1.east); \draw[-,thick] (n1.west)--(n1.east);
\draw[-] (n1.north)--(n1.south); \draw[-,thick] (n1.north)--(n1.south);
\draw[-] (n2.west)--(n2.east); \draw[-,thick] (n2.west)--(n2.east);
\draw[-] (n2.north)--(n2.south); \draw[-,thick] (n2.north)--(n2.south);
\draw[->, line width=1pt] ([xshift=-0.1em]x2.east)--(ln12.west); \draw[->, line width=1pt] ([xshift=-0.1em]x2.east)--(ln12.west);
\draw[->, line width=1pt] ([xshift=-0.1em]ln12.east)--(f12.west); \draw[->, line width=1pt] ([xshift=-0.1em]ln12.east)--(f12.west);
...@@ -58,10 +58,10 @@ ...@@ -58,10 +58,10 @@
\draw[->, line width=1pt] (n22.east)--(x42.west); \draw[->, line width=1pt] (n22.east)--(x42.west);
\draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x2.north) -- ([yshift=1em]x2.north) -- ([yshift=1.4em]n12.north) -- (n12.north); \draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x2.north) -- ([yshift=1em]x2.north) -- ([yshift=1.4em]n12.north) -- (n12.north);
\draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x4.north) -- ([yshift=1em]x4.north) -- ([yshift=1.4em]n22.north) -- (n22.north); \draw[->,rounded corners,line width=1pt] ([yshift=-0.2em]x4.north) -- ([yshift=1em]x4.north) -- ([yshift=1.4em]n22.north) -- (n22.north);
\draw[-] (n12.west)--(n12.east); \draw[-,thick] (n12.west)--(n12.east);
\draw[-] (n12.north)--(n12.south); \draw[-,thick] (n12.north)--(n12.south);
\draw[-] (n22.west)--(n22.east); \draw[-,thick] (n22.west)--(n22.east);
\draw[-] (n22.north)--(n22.south); \draw[-,thick] (n22.north)--(n22.south);
\node [anchor=south] (k1) at ([yshift=-0.1em]x1.north){}; \node [anchor=south] (k1) at ([yshift=-0.1em]x1.north){};
\node [anchor=south] (k2) at ([yshift=-0.1em]x3.north){}; \node [anchor=south] (k2) at ([yshift=-0.1em]x3.north){};
......
...@@ -44,7 +44,7 @@ ...@@ -44,7 +44,7 @@
\node [anchor=east,font=\small] (r1) at ([xshift=-2em,yshift=0em]box1.west) {混合RNN}; \node [anchor=east,font=\small] (r1) at ([xshift=-2em,yshift=0em]box1.west) {混合RNN};
{\small {\small
\node [anchor=south west,wnode] (l1) at ([xshift=1em,yshift=5em]r1.north west) {先序遍历句法树,得到序列:}; \node [anchor=south west,wnode] (l1) at ([xshift=0em,yshift=5em]r1.north west) {先序遍历句法树,得到序列:};
\node [anchor=north west,wnode,align=center] (l2) at ([xshift=0.5em,yshift=-0.6em]l1.north east) {S\\[0.5em]$l_1$}; \node [anchor=north west,wnode,align=center] (l2) at ([xshift=0.5em,yshift=-0.6em]l1.north east) {S\\[0.5em]$l_1$};
\node [anchor=north west,wnode,align=center] (l3) at ([xshift=0.5em,yshift=0em]l2.north east) {NP\\[0.5em]$l_2$}; \node [anchor=north west,wnode,align=center] (l3) at ([xshift=0.5em,yshift=0em]l2.north east) {NP\\[0.5em]$l_2$};
\node [anchor=north west,wnode,align=center] (l4) at ([xshift=0.5em,yshift=0em]l3.north east) {PRN\\[0.5em]$l_3$}; \node [anchor=north west,wnode,align=center] (l4) at ([xshift=0.5em,yshift=0em]l3.north east) {PRN\\[0.5em]$l_3$};
......
...@@ -41,7 +41,7 @@ ...@@ -41,7 +41,7 @@
\node[scale=0.9,anchor=west,minimum size=18pt] (tw13) at ([yshift=0.1em,xshift=0.5em]tw12.east){worried}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw13) at ([yshift=0.1em,xshift=0.5em]tw12.east){worried};
\node[scale=0.9,anchor=west,minimum size=18pt] (tw14) at ([xshift=0.5em]tw13.east){about}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw14) at ([xshift=0.5em]tw13.east){about};
\node[scale=0.9,anchor=west,minimum size=18pt] (tw15) at ([xshift=0.5em]tw14.east){the}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw15) at ([xshift=0.5em]tw14.east){the};
\node[scale=0.9,anchor=west,minimum size=18pt] (tw16) at ([yshift=-0.1em,xshift=0.5em]tw15.east){situation}; \node[scale=0.9,anchor=west,minimum size=18pt] (tw16) at ([xshift=0.5em]tw15.east){situation};
\draw[dashed] ([xshift=-0.3em]cfrag1.south) -- ([yshift=-0.3em]tw11.north); \draw[dashed] ([xshift=-0.3em]cfrag1.south) -- ([yshift=-0.3em]tw11.north);
\draw[dashed] (cfrag2.south) -- ([yshift=-0.4em]tw14.north); \draw[dashed] (cfrag2.south) -- ([yshift=-0.4em]tw14.north);
......
...@@ -5402,8 +5402,7 @@ author = {Yoshua Bengio and ...@@ -5402,8 +5402,7 @@ author = {Yoshua Bengio and
@inproceedings{garcia-martinez2016factored, @inproceedings{garcia-martinez2016factored,
title={Factored Neural Machine Translation Architectures}, title={Factored Neural Machine Translation Architectures},
author={Mercedes {Garcia-Martinez} and Loïc {Barrault} and Fethi {Bougares}}, author={Mercedes {Garcia-Martinez} and Loïc {Barrault} and Fethi {Bougares}},
publisher={International Workshop on Spoken Language Translation (IWSLT'16)}, publisher={International Workshop on Spoken Language Translation},
notes={Sourced from Microsoft Academic - https://academic.microsoft.com/paper/2949810612},
year={2016} year={2016}
} }
@inproceedings{DBLP:conf/acl/Kudo18, @inproceedings{DBLP:conf/acl/Kudo18,
...@@ -5695,7 +5694,7 @@ author = {Yoshua Bengio and ...@@ -5695,7 +5694,7 @@ author = {Yoshua Bengio and
@inproceedings{britz2017effective, @inproceedings{britz2017effective,
title={Effective domain mixing for neural machine translation}, title={Effective domain mixing for neural machine translation},
author={Britz, Denny and Le, Quoc and Pryzant, Reid}, author={Britz, Denny and Le, Quoc and Pryzant, Reid},
publisher={Proceedings of the Second Conference on Machine Translation}, publisher={Annual Meeting of the Association for Computational Linguistics},
pages={118--126}, pages={118--126},
year={2017} year={2017}
} }
...@@ -5770,7 +5769,7 @@ author = {Yoshua Bengio and ...@@ -5770,7 +5769,7 @@ author = {Yoshua Bengio and
Wolfgang Menzel}, Wolfgang Menzel},
title = {Automatic Threshold Detection for Data Selection in Machine Translation}, title = {Automatic Threshold Detection for Data Selection in Machine Translation},
pages = {483--488}, pages = {483--488},
publisher = {Proceedings of the Second Conference on Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2017} year = {2017}
} }
@inproceedings{DBLP:conf/wmt/BiciciY11, @inproceedings{DBLP:conf/wmt/BiciciY11,
...@@ -5778,7 +5777,7 @@ author = {Yoshua Bengio and ...@@ -5778,7 +5777,7 @@ author = {Yoshua Bengio and
Deniz Yuret}, Deniz Yuret},
title = {Instance Selection for Machine Translation using Feature Decay Algorithms}, title = {Instance Selection for Machine Translation using Feature Decay Algorithms},
pages = {272--283}, pages = {272--283},
publisher = {Proceedings of the Sixth Workshop on Statistical Machine Translation}, publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2011} year = {2011}
} }
@inproceedings{poncelas2018feature, @inproceedings{poncelas2018feature,
...@@ -6047,7 +6046,7 @@ author = {Yoshua Bengio and ...@@ -6047,7 +6046,7 @@ author = {Yoshua Bengio and
@inproceedings{hoang2018iterative, @inproceedings{hoang2018iterative,
title={Iterative back-translation for neural machine translation}, title={Iterative back-translation for neural machine translation},
author={Hoang, Vu Cong Duy and Koehn, Philipp and Haffari, Gholamreza and Cohn, Trevor}, author={Hoang, Vu Cong Duy and Koehn, Philipp and Haffari, Gholamreza and Cohn, Trevor},
publisher={Proceedings of the 2nd Workshop on Neural Machine Translation and Generation}, publisher={Annual Meeting of the Association for Computational Linguistics},
pages={18--24}, pages={18--24},
year={2018} year={2018}
} }
...@@ -6257,7 +6256,7 @@ author = {Yoshua Bengio and ...@@ -6257,7 +6256,7 @@ author = {Yoshua Bengio and
Jingbo Zhu}, Jingbo Zhu},
title = {Dynamic Curriculum Learning for Low-Resource Neural Machine Translation}, title = {Dynamic Curriculum Learning for Low-Resource Neural Machine Translation},
pages = {3977--3989}, pages = {3977--3989},
publisher = {International Committee on Computational Linguistics}, publisher = {International Conference on Computational Linguistics},
year = {2020} year = {2020}
} }
@inproceedings{DBLP:conf/acl/ZhouYWWC20, @inproceedings{DBLP:conf/acl/ZhouYWWC20,
...@@ -6307,7 +6306,7 @@ author = {Yoshua Bengio and ...@@ -6307,7 +6306,7 @@ author = {Yoshua Bengio and
Andrew McCallum}, Andrew McCallum},
title = {Active Bias: Training More Accurate Neural Networks by Emphasizing title = {Active Bias: Training More Accurate Neural Networks by Emphasizing
High Variance Samples}, High Variance Samples},
publisher = {Annual Conference on Neural Information Processing Systems}, publisher = {Conference on Neural Information Processing Systems},
pages = {1002--1012}, pages = {1002--1012},
year = {2017} year = {2017}
} }
...@@ -6360,7 +6359,7 @@ author = {Yoshua Bengio and ...@@ -6360,7 +6359,7 @@ author = {Yoshua Bengio and
title = {Investigating Catastrophic Forgetting During Continual Training for title = {Investigating Catastrophic Forgetting During Continual Training for
Neural Machine Translation}, Neural Machine Translation},
pages = {4315--4326}, pages = {4315--4326},
publisher = {International Committee on Computational Linguistics}, publisher = {International Conference on Computational Linguistics},
year = {2020} year = {2020}
} }
@inproceedings{DBLP:conf/cvpr/RebuffiKSL17, @inproceedings{DBLP:conf/cvpr/RebuffiKSL17,
...@@ -6392,7 +6391,7 @@ author = {Yoshua Bengio and ...@@ -6392,7 +6391,7 @@ author = {Yoshua Bengio and
Oriol Vinyals and Oriol Vinyals and
Navdeep Jaitly and Navdeep Jaitly and
Noam Shazeer}, Noam Shazeer},
publisher = {Annual Conference on Neural Information Processing Systems}, publisher = {Conference on Neural Information Processing Systems},
pages = {1171--1179}, pages = {1171--1179},
year = {2015} year = {2015}
} }
...@@ -6835,7 +6834,7 @@ author = {Yoshua Bengio and ...@@ -6835,7 +6834,7 @@ author = {Yoshua Bengio and
@inproceedings{Gu2019LevenshteinT, @inproceedings{Gu2019LevenshteinT,
title={Levenshtein Transformer}, title={Levenshtein Transformer},
author={Jiatao Gu and Changhan Wang and Jake Zhao}, author={Jiatao Gu and Changhan Wang and Jake Zhao},
publisher = {Annual Conference on Neural Information Processing Systems}, publisher = {Conference on Neural Information Processing Systems},
pages = {11179--11189}, pages = {11179--11189},
year = {2019}, year = {2019},
} }
...@@ -6963,7 +6962,7 @@ author = {Yoshua Bengio and ...@@ -6963,7 +6962,7 @@ author = {Yoshua Bengio and
@inproceedings{Jiang2012LearnedPF, @inproceedings{Jiang2012LearnedPF,
title={Learned Prioritization for Trading Off Accuracy and Speed}, title={Learned Prioritization for Trading Off Accuracy and Speed},
author={Jiarong Jiang and Adam R. Teichert and Hal Daum{\'e} and Jason Eisner}, author={Jiarong Jiang and Adam R. Teichert and Hal Daum{\'e} and Jason Eisner},
publisher={Annual Conference on Neural Information Processing Systems}, publisher={Conference on Neural Information Processing Systems},
pages={1340--1348}, pages={1340--1348},
year= {2012} year= {2012}
} }
...@@ -7123,7 +7122,7 @@ author = {Yoshua Bengio and ...@@ -7123,7 +7122,7 @@ author = {Yoshua Bengio and
author = {Paul Michel and author = {Paul Michel and
Omer Levy and Omer Levy and
Graham Neubig}, Graham Neubig},
publisher = {Annual Conference on Neural Information Processing Systems}, publisher = {Conference on Neural Information Processing Systems},
pages = {14014--14024}, pages = {14014--14024},
year = {2019} year = {2019}
} }
...@@ -7157,7 +7156,7 @@ author = {Yoshua Bengio and ...@@ -7157,7 +7156,7 @@ author = {Yoshua Bengio and
title={Generative Neural Machine Translation}, title={Generative Neural Machine Translation},
author={Harshil Shah and author={Harshil Shah and
David Barber}, David Barber},
publisher={Annual Conference on Neural Information Processing Systems}, publisher={Conference on Neural Information Processing Systems},
pages={1353--1362}, pages={1353--1362},
year={2018} year={2018}
} }
...@@ -7246,7 +7245,7 @@ author = {Yoshua Bengio and ...@@ -7246,7 +7245,7 @@ author = {Yoshua Bengio and
Jeff Pool and Jeff Pool and
John Tran and John Tran and
William J. Dally}, William J. Dally},
publisher={Annual Conference on Neural Information Processing Systems}, publisher={Conference on Neural Information Processing Systems},
pages={1135--1143}, pages={1135--1143},
year={2015} year={2015}
} }
...@@ -7281,9 +7280,8 @@ author = {Yoshua Bengio and ...@@ -7281,9 +7280,8 @@ author = {Yoshua Bengio and
Tinghui Zhou and Tinghui Zhou and
Gao Huang and Gao Huang and
Trevor Darrell}, Trevor Darrell},
publisher={ArXiv}, publisher={International Conference on Learning Representations},
year={2019}, year={2019}
volume={abs/1810.05270}
} }
@inproceedings{Liu2017LearningEC, @inproceedings{Liu2017LearningEC,
author = {Zhuang Liu and author = {Zhuang Liu and
...@@ -7341,8 +7339,8 @@ author = {Zhuang Liu and ...@@ -7341,8 +7339,8 @@ author = {Zhuang Liu and
Luke Zettlemoyer and Luke Zettlemoyer and
Omer Levy}, Omer Levy},
title = {Aligned Cross Entropy for Non-Autoregressive Machine Translation}, title = {Aligned Cross Entropy for Non-Autoregressive Machine Translation},
publisher = {CoRR}, publisher = { International Conference on Machine Learning},
volume = {abs/2004.01655}, volume = {119},
year = {2020}, year = {2020},
} }
@inproceedings{Shao2020MinimizingTB, @inproceedings{Shao2020MinimizingTB,
...@@ -7394,9 +7392,8 @@ author = {Zhuang Liu and ...@@ -7394,9 +7392,8 @@ author = {Zhuang Liu and
@inproceedings{Zhou2020UnderstandingKD, @inproceedings{Zhou2020UnderstandingKD,
title={Understanding Knowledge Distillation in Non-autoregressive Machine Translation}, title={Understanding Knowledge Distillation in Non-autoregressive Machine Translation},
author={Chunting Zhou and Graham Neubig and Jiatao Gu}, author={Chunting Zhou and Graham Neubig and Jiatao Gu},
publisher={ArXiv}, publisher={International Conference on Learning Representations},
year={2020}, year={2020}
volume={abs/1911.02727}
} }
@inproceedings{Wang2019NonAutoregressiveMT, @inproceedings{Wang2019NonAutoregressiveMT,
title={Non-Autoregressive Machine Translation with Auxiliary Regularization}, title={Non-Autoregressive Machine Translation with Auxiliary Regularization},
...@@ -7456,16 +7453,16 @@ author = {Zhuang Liu and ...@@ -7456,16 +7453,16 @@ author = {Zhuang Liu and
@inproceedings{Ho2016GenerativeAI, @inproceedings{Ho2016GenerativeAI,
title={Generative Adversarial Imitation Learning}, title={Generative Adversarial Imitation Learning},
author={Jonathan Ho and Stefano Ermon}, author={Jonathan Ho and Stefano Ermon},
publisher={Annual Conference on Neural Information Processing Systems}, publisher={Conference on Neural Information Processing Systems},
pages={4565--4573}, pages={4565--4573},
year={2016} year={2016}
} }
@inproceedings{Duan2017OneShotIL, @inproceedings{Duan2017OneShotIL,
title={One-Shot Imitation Learning}, title={One-Shot Imitation Learning},
author={Yan Duan and Marcin Andrychowicz and Bradly C. Stadie and Jonathan Ho and Jonas Schneider and Ilya Sutskever and Pieter Abbeel and Wojciech Zaremba}, author={Yan Duan and Marcin Andrychowicz and Bradly C. Stadie and Jonathan Ho and Jonas Schneider and Ilya Sutskever and Pieter Abbeel and Wojciech Zaremba},
publisher={CoRR}, publisher={Conference on Neural Information Processing Systems},
year={2017}, year={2017},
volume={abs/1703.07326} pages= {1087--1098}
} }
@inproceedings{Wang2018SemiAutoregressiveNM, @inproceedings{Wang2018SemiAutoregressiveNM,
title={Semi-Autoregressive Neural Machine Translation}, title={Semi-Autoregressive Neural Machine Translation},
...@@ -7486,7 +7483,7 @@ author = {Zhuang Liu and ...@@ -7486,7 +7483,7 @@ author = {Zhuang Liu and
@inproceedings{Kasai2020NonAutoregressiveMT, @inproceedings{Kasai2020NonAutoregressiveMT,
title={Non-Autoregressive Machine Translation with Disentangled Context Transformer}, title={Non-Autoregressive Machine Translation with Disentangled Context Transformer},
author={Jungo Kasai and J. Cross and Marjan Ghazvininejad and Jiatao Gu}, author={Jungo Kasai and J. Cross and Marjan Ghazvininejad and Jiatao Gu},
publisher={arXiv: Computation and Language}, publisher={International Conference on Machine Learning},
year={2020} year={2020}
} }
@inproceedings{Zhou2019SynchronousBN, @inproceedings{Zhou2019SynchronousBN,
...@@ -7517,7 +7514,7 @@ author = {Zhuang Liu and ...@@ -7517,7 +7514,7 @@ author = {Zhuang Liu and
@inproceedings{Xiao2016ALA, @inproceedings{Xiao2016ALA,
title={A Loss-Augmented Approach to Training Syntactic Machine Translation Systems}, title={A Loss-Augmented Approach to Training Syntactic Machine Translation Systems},
author={Tong Xiao and Derek F. Wong and Jingbo Zhu}, author={Tong Xiao and Derek F. Wong and Jingbo Zhu},
publisher={IEEE/ACM Transactions on Audio, Speech, and Language Processing}, publisher={IEEE Transactions on Audio, Speech, and Language Processing},
year={2016}, year={2016},
volume={24}, volume={24},
pages={2069-2083} pages={2069-2083}
...@@ -7571,7 +7568,7 @@ author = {Zhuang Liu and ...@@ -7571,7 +7568,7 @@ author = {Zhuang Liu and
Ran El-Yaniv and Ran El-Yaniv and
Yoshua Bengio}, Yoshua Bengio},
title = {Binarized Neural Networks}, title = {Binarized Neural Networks},
publisher = {Annual Conference on Neural Information Processing Systems}, publisher = {Conference on Neural Information Processing Systems},
pages = {4107--4115}, pages = {4107--4115},
year = {2016} year = {2016}
} }
...@@ -7702,7 +7699,7 @@ author = {Zhuang Liu and ...@@ -7702,7 +7699,7 @@ author = {Zhuang Liu and
author = {Alexei Baevski and author = {Alexei Baevski and
Michael Auli}, Michael Auli},
title = {Adaptive Input Representations for Neural Language Modeling}, title = {Adaptive Input Representations for Neural Language Modeling},
publisher = {arXiv preprint arXiv:1809.10853}, publisher = {International Conference on Learning Representations},
year = {2019} year = {2019}
} }
@inproceedings{DBLP:journals/corr/abs-2006-04768, @inproceedings{DBLP:journals/corr/abs-2006-04768,
...@@ -7736,8 +7733,7 @@ author = {Zhuang Liu and ...@@ -7736,8 +7733,7 @@ author = {Zhuang Liu and
Dawei Song and Dawei Song and
Ming Zhou}, Ming Zhou},
title = {A Tensorized Transformer for Language Modeling}, title = {A Tensorized Transformer for Language Modeling},
publisher = {CoRR}, publisher = {Conference on Neural Information Processing Systems},
volume = {abs/1906.09777},
year = {2019} year = {2019}
} }
@inproceedings{DBLP:conf/nips/YangLSL19, @inproceedings{DBLP:conf/nips/YangLSL19,
...@@ -7816,7 +7812,7 @@ author = {Zhuang Liu and ...@@ -7816,7 +7812,7 @@ author = {Zhuang Liu and
Zhongjun He and Zhongjun He and
Hua Wu and Hua Wu and
Haifeng Wang}, Haifeng Wang},
publisher={arXiv preprint arXiv:1909.01101}, publisher={Conference on Empirical Methods in Natural Language Processing},
year={2019} year={2019}
} }
@inproceedings{DBLP:conf/aclnmt/KoehnK17, @inproceedings{DBLP:conf/aclnmt/KoehnK17,
...@@ -7876,7 +7872,7 @@ author = {Zhuang Liu and ...@@ -7876,7 +7872,7 @@ author = {Zhuang Liu and
@inproceedings{Eisner2011LearningST, @inproceedings{Eisner2011LearningST,
title={Learning Speed-Accuracy Tradeoffs in Nondeterministic Inference Algorithms}, title={Learning Speed-Accuracy Tradeoffs in Nondeterministic Inference Algorithms},
author={J. Eisner and Hal Daum{\'e}}, author={J. Eisner and Hal Daum{\'e}},
publisher={Annual Conference on Neural Information Processing Systems}, publisher={Conference on Neural Information Processing Systems},
year={2011} year={2011}
} }
@inproceedings{Kazimi2017CoverageFC, @inproceedings{Kazimi2017CoverageFC,
...@@ -8929,7 +8925,7 @@ author = {Zhuang Liu and ...@@ -8929,7 +8925,7 @@ author = {Zhuang Liu and
Jesse Bettencourt and Jesse Bettencourt and
David Duvenaud}, David Duvenaud},
title = {Neural Ordinary Differential Equations}, title = {Neural Ordinary Differential Equations},
publisher = {Annual Conference on Neural Information Processing Systems}, publisher = {Conference on Neural Information Processing Systems},
pages = {6572--6583}, pages = {6572--6583},
year = {2018} year = {2018}
} }
...@@ -9189,7 +9185,7 @@ author = {Zhuang Liu and ...@@ -9189,7 +9185,7 @@ author = {Zhuang Liu and
Michael Wilber and Michael Wilber and
Serge Belongie}, Serge Belongie},
title = {Residual Networks Behave Like Ensembles of Relatively Shallow Networks}, title = {Residual Networks Behave Like Ensembles of Relatively Shallow Networks},
publisher = {Annual Conference on Neural Information Processing Systems}, publisher = {Conference on Neural Information Processing Systems},
pages = {550--558}, pages = {550--558},
year = {2016} year = {2016}
} }
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论