Commit 186a8657 by zengxin

合并分支 'caorunzhe' 到 'zengxin'

Caorunzhe

查看合并请求 !774
parents a23d073a dc4cb935
\begin{tikzpicture}
\tikzstyle{node1}=[inner sep=0mm,minimum height=1em,minimum width=3em,fill=ugreen!10!blue!10]
\tikzstyle{node2}=[inner sep=0mm,minimum height=1em,minimum width=3em,fill=ugreen!20!blue!20]
\tikzstyle{node3}=[inner sep=0mm,minimum height=1em,minimum width=3em,fill=ugreen!30!blue!30]
\tikzstyle{node4}=[inner sep=0mm,minimum height=1em,minimum width=3em,fill=ugreen!40!blue!40]
\tikzstyle{node5}=[inner sep=0mm,minimum height=1em,minimum width=3em,fill=ugreen!50!blue!50]
\begin{scope}
\node[anchor=north west] (l1) at (0,0) {};
\node[anchor=west] (l2) at ([xshift=10.3em,yshift=0em]l1.east) {};
\draw [->,thick] ([xshift=-2em,yshift=-1em]l1.south) -- ([xshift=2em,yshift=-1em]l2.south);
\node[anchor=north,node1] (c1) at ([xshift=0em,yshift=-2em]l1.south) {};
\node[anchor=west,node2] (c2) at ([xshift=0em,yshift=0em]c1.east) {};
\node[anchor=west,node3] (c3) at ([xshift=0em,yshift=0em]c2.east) {};
\node[anchor=west,node4] (c4) at ([xshift=0em,yshift=0em]c3.east) {};
\node[anchor=west,node5] (c5) at ([xshift=0em,yshift=0em]c4.east) {};
\node[anchor=north] (lb1) at ([xshift=0em,yshift=-1.5em]c3.south) {(a)样本难易程度图示};
\end{scope}
\begin{scope}[yshift = -1.7in]
\foreach \i / \j / \z in
{0/0/node1, 1/0/node1, 2/0/node1, 3/0/node1, 4/0/node1, 5/0/node1, 6/0/node1,
1/1/node2, 2/1/node2, 3/1/node2, 4/1/node2, 5/1/node2, 6/1/node2,
2/2/node3, 3/2/node3, 4/2/node3, 5/2/node3, 6/2/node3,
3/3/node4, 4/3/node4, 5/3/node4, 6/3/node4,
4/4/node5, 5/4/node5, 6/4/node5,
}
\node[anchor=north west,\z,draw=white](n\i\j) at (1.0*3em*\j + 0*0em,-1.0*1em*\i + 0*0em){};
\node[anchor=north west,node1,draw=white](nc) at (0,0){};
\draw [->,very thick] ([xshift=-1em,yshift=1em]n00.north west) -- ([xshift=16em,yshift=1em]n00.north west);
\draw [->,very thick] ([xshift=-1em,yshift=1em]n00.north west) -- ([xshift=-1em,yshift=-9em]n00.north west);
\node[anchor=west] (x1) at ([xshift=12em,yshift=2em]n00.north west) {数据块};
\node[anchor=east] (y1) at ([xshift=-1em,yshift=-7.5em]n00.north west) {轮次};
\node[anchor=west,font=\small] (t1) at ([xshift=0em,yshift=-1em]n60.south west) {$\ldots$直到模型收敛};
\node[anchor=north] (lb2) at ([xshift=0em,yshift=-3em]n62.south) {(b)不同训练阶段使用的数据};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
......@@ -17,11 +17,11 @@
\draw[->,line width=.03cm] ([yshift=0em]node5.south) -- ([yshift=0em]node6.north);
\node[node,anchor = west] (node8) at ([xshift = 2em,yshift = 2em]node7.east) {对于词表外的词lowest};
\node[node,anchor = north west] (node9) at ([yshift = 0.3em]node8.south west) {可以被分割为low est};
\node[node,anchor = north west] (node9) at ([yshift = 0.3em]node8.south west) {可以被合并为low est};
\node[node,font=\scriptsize,anchor = north,fill=ugreen!5,drop shadow] (dict) at ([xshift = 5em,yshift = -5em]node6.south){\begin{tabular}{llllll}
\multirow{3}{*}{符号合并表:} & ('e','s') & ('es','t') & ('est','$<$e$>$') & ('l','o') & ('lo','w') \\
& ('low','$<$e$>$') & ('n','e') & ('ne','w') & ('new','est$<$e$>$') & ('w','i') \\
& ('n','e') & ('ne','w') & ('new','est$<$e$>$') & ('low','$<$e$>$') & ('w','i') \\
& ('wi','d') & ('wid','est$<$e$>$') & ('low','e') & ('lowe','r') & ('lower','$<$e$>$')
\end{tabular}};
......
......@@ -7,7 +7,7 @@
\tikzstyle{neuronnode} = [minimum size=1.8em,circle,draw,very thick,ublue,inner sep=0pt, fill=white,align=center]
%standard
\node [neuronnode] (neuron_b) at (0,0) {\scriptsize{$b_{i}^{l}$}};
\node [neuronnode] (neuron_b) at (0,0) {\scriptsize{$b^{l}$}};
\node [neuronnode] (neuron_y3) at (0,-1*\neuronsep) {\scriptsize{$x_{3}^{l}$}};
\node [neuronnode] (neuron_y2) at (0,-2*\neuronsep) {\scriptsize{$x_{2}^{l}$}};
\node [neuronnode] (neuron_y1) at (0,-3*\neuronsep) {\scriptsize{$x_{1}^{l}$}};
......@@ -25,7 +25,7 @@
\draw [->,line width=0.3mm] (neuron_z.east) -- (neuron_y'.west);
%dropout
\node [neuronnode] (drop_neuron_b) at (5*\nodespace,0) {\scriptsize{$b_{i}^{l}$}};
\node [neuronnode] (drop_neuron_b) at (5*\nodespace,0) {\scriptsize{$b^{l}$}};
\node [neuronnode] (drop_neuron_y3') at (5*\nodespace,-1*\neuronsep) {\scriptsize{$\tilde{x}_{3}^{l}$}};
\node [neuronnode] (drop_neuron_y2') at (5*\nodespace,-2*\neuronsep) {\scriptsize{$\tilde{x}_{2}^{l}$}};
\node [neuronnode] (drop_neuron_y1') at (5*\nodespace,-3*\neuronsep) {\scriptsize{$\tilde{x}_{1}^{l}$}};
......@@ -60,12 +60,12 @@
%equ
\node [anchor=west,inner sep = 2pt] (line1) at (9*\nodespace,0) {未应用Dropout:};
\node [anchor=north west,inner sep = 2pt] (line2) at (line1.south west) {$z_{i}^{l+1}=\mathbf{w}^{l} \mathbf{x}^{l} + b_{i}^{l}$};
\node [anchor=north west,inner sep = 2pt] (line2) at (line1.south west) {$z_{i}^{l+1}=\mathbf{w}^{l} \mathbf{x}^{l} + b^{l}$};
\node [anchor=north west,inner sep = 2pt] (line3) at (line2.south west) {$x_{i}^{l+1}=f\left(z_{i}^{l+1}\right)$};
\node [anchor=north west,inner sep = 2pt] (line4) at (line3.south west) {应用Dropout:};
\node [anchor=north west,inner sep = 2pt] (line5) at (line4.south west) {$r_{j}^{l} \sim$ Bernoulli $(1-p)$};
\node [anchor=north west,inner sep = 2pt] (line6) at (line5.south west) {$\tilde{\mathbf{x}}=\mathbf{r} * \mathbf{x}$};
\node [anchor=north west,inner sep = 2pt] (line7) at (line6.south west) {$z_{i}^{l+1}=\mathbf{w}^{l} \widetilde{\mathbf{x}}^{l} + b_{i}^{l}$};
\node [anchor=north west,inner sep = 2pt] (line7) at (line6.south west) {$z_{i}^{l+1}=\mathbf{w}^{l} \widetilde{\mathbf{x}}^{l} + b^{l}$};
\node [anchor=north west,inner sep = 2pt] (line8) at (line7.south west) {$x_{i}^{l+1}=f\left(z_{i}^{l+1}\right)$};
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\tikzstyle{node}=[inner sep=0mm,minimum height=3em,minimum width=6em,rounded corners=5pt]
\node[anchor=west,node,fill=ugreen!30] (n1) at (0,0) {训练集};
\node[anchor=west,node,fill=yellow!30] (n2) at ([xshift=2em,yshift=0em]n1.east) {难度评估器};
\node[anchor=west,node,fill=red!30] (n3) at ([xshift=4em,yshift=0em]n2.east) {训练调度器};
\node[anchor=west,node,fill=blue!30] (n4) at ([xshift=4em,yshift=0em]n3.east) {模型训练器};
\draw [->,very thick] ([xshift=0em,yshift=0em]n1.east) -- ([xshift=0em,yshift=0em]n2.west);
\draw [->,very thick] ([xshift=0em,yshift=0em]n2.east) -- ([xshift=0em,yshift=0em]n3.west);
\draw [->,very thick] ([xshift=0em,yshift=0em]n3.east) -- ([xshift=0em,yshift=0em]n4.west);
\node[anchor=west,inner sep=0mm,minimum height=2em,minimum width=3em,font=\small,align=left] (n5) at ([xshift=0.5em,yshift=-1.5em]n2.east) {排序过\\的数据};
\node[anchor=west,inner sep=0mm,minimum height=2em,minimum width=3em,font=\small,align=left] (n6) at ([xshift=1em,yshift=-1.5em]n3.east) {采样\\批次$t$};
\node[anchor=south west,inner sep=0mm,minimum height=2em,minimum width=4em,font=\small] (n7) at ([xshift=2.5em,yshift=1em]n3.north) {如果模型收敛};
\node[anchor=south west,inner sep=0mm] (n8) at ([xshift=0em,yshift=1.5em]n4.north) {批次$t$};
\node[anchor=north east,inner sep=0mm] (n9) at ([xshift=0em,yshift=-1em]n3.south east) {课程设计};
\draw [->,dotted,very thick] ([xshift=0em,yshift=0em]n4.north) -- ([xshift=0em,yshift=1em]n4.north) -- ([xshift=0em,yshift=1em]n3.north) -- (n3.north);
\begin{pgfonlayer}{background}
\node[rectangle,inner sep=5pt,rounded corners=5pt,fill=gray!30] [fit = (n3) (n4) (n6) (n8) ] (g2) {};
\node[rectangle,inner sep=5pt,rounded corners=5pt,fill=orange!30] [fit = (n2) (n3) (n9) ] (g1) {};
\end{pgfonlayer}
\end{tikzpicture}
\ No newline at end of file
%------------------------------------------------------------
\begin{tikzpicture}
\tikzstyle{rnnnode} = [draw,inner sep=2pt,minimum width=4em,minimum height=2em,rounded corners=1pt,fill=yellow!20]
\tikzstyle{snode} = [draw,inner sep=2pt,minimum width=4em,minimum height=2em,rounded corners=1pt,fill=red!20]
\tikzstyle{wode} = [inner sep=0pt,minimum width=4em,minimum height=2em,rounded corners=0pt]
\node [anchor=west,wode] (n1) at (0,0) {${y}_1,{y}_2,\ldots,{y}_n$};
\node [anchor=north west,wode] (n2) at ([xshift=1em,yshift=0.5em]n1.south east) {${x}_1,{x}_2,\ldots,{x}_m$};
\node [anchor=south west,rnnnode] (n3) at ([xshift=8em,yshift=0.5em]n2.north east) {生成模型G};
\node [anchor=south east,wode] (n4) at ([xshift=-2em,yshift=0em]n3.north west) {$\tilde{{y}}_{1},\tilde{{y}}_{2},...,\tilde{{y}}_{J}$};
\node [anchor=south,snode] (n5) at ([xshift=0em,yshift=6em]n2.north) {判别网络D};
\node [anchor=west,align=left,font=\small] (n6) at ([xshift=15em,yshift=-3em]n5.east) {根据$(\seq{x},\seq{\tilde{y}})$\\成奖励信号};
\draw [->,thick] ([xshift=0em,yshift=0em]n1.north)--([xshift=0em,yshift=0em]n5.south);
\draw [->,thick] ([xshift=0em,yshift=0em]n2.north)--([xshift=0em,yshift=0em]n5.south);
\draw [->,thick] ([xshift=0em,yshift=0em]n4.west)--([xshift=0em,yshift=0em]n5.south);
\draw [->,thick] ([xshift=0em,yshift=0em]n3.north)--([xshift=0em,yshift=1em]n3.north)--([xshift=0em,yshift=0em]n4.east);
\draw [->,thick] ([xshift=0em,yshift=0em]n5.east) -- ([xshift=14.5em,yshift=0em]n5.east) -- ([xshift=1em,yshift=0em]n3.east) -- ([xshift=0em,yshift=0em]n3.east);
\draw [->,thick] ([xshift=0em,yshift=0em]n2.east) -- ([xshift=0em,yshift=-1.5em]n3.south) -- ([xshift=0em,yshift=0em]n3.south);
\begin{pgfonlayer}{background}
%\node [rectangle,inner sep=0.5em,rounded corners=5pt,very thick,dotted,draw=yellow] [fit = (n3) (n4)] (b1) {};
%\node [rectangle,inner sep=0.5em,rounded corners=5pt,very thick,dotted,draw=red] [fit = (n5)] (b2) {};
\end{pgfonlayer}
%\draw [->,thick] ([xshift=0em,yshift=0em]n3.south) .. controls +(south:1em) and +(north:1em) .. ([xshift=0em,yshift=0em]n5.north);
\end{tikzpicture}
%------------------------------------------------------------
\begin{tikzpicture}
\tikzstyle{rnnnode} = [draw,inner sep=2pt,minimum width=4em,minimum height=2em,rounded corners=1pt,fill=red!20]
\tikzstyle{snode} = [draw,inner sep=2pt,minimum width=4em,minimum height=2em,rounded corners=1pt,fill=blue!20]
\tikzstyle{ynode} = [inner sep=2pt,minimum width=4em,minimum height=2em,rounded corners=1pt]
\node [anchor=west,rnnnode] (n1) at (0,0) {$\mathbi{h}_{1}$};
\node [anchor=west] (n2) at ([xshift=3em,yshift=0em]n1.east) {$\cdots$};
\node [anchor=west,rnnnode] (n3) at ([xshift=3em,yshift=0em]n2.east) {$\mathbi{h}_{j-1}$};
\node [anchor=west,rnnnode] (n4) at ([xshift=3em,yshift=0em]n3.east) {$\mathbi{h}_{j}$};
\node [anchor=south,snode] (n5) at ([xshift=0em,yshift=1em]n3.north) {Softmax};
\node [anchor=south,ynode] (n6) at ([xshift=0em,yshift=1em]n5.north) {$\tilde{{y}}_{j-1}$};
\node [anchor=south,snode] (n7) at ([xshift=0em,yshift=1em]n4.north) {Softmax};
\node [anchor=south,ynode] (n8) at ([xshift=0em,yshift=1em]n7.north) {$\tilde{{y}}_{j}$};
\node [anchor=south,snode,font=\footnotesize] (n13) at ([xshift=0em,yshift=1em]n1.north) {Softmax};
\node [anchor=south,ynode] (n14) at ([xshift=0em,yshift=1em]n13.north) {$\tilde{{y}}_{1}$};
\node [anchor=north] (x1) at ([xshift=0em,yshift=-1em]n1.south) {$\langle$sos$\rangle$};
\node [anchor=north,font=\small] (x2) at ([xshift=-1.3em,yshift=-2.3em]n3.south) {$\tilde{{y}}_{j-2}$};
\node [anchor=north,font=\small] (x3) at ([xshift=1.3em,yshift=-2.5em]n3.south) {${y}_{j-2}$};
\node [anchor=north,font=\small] (x4) at ([xshift=1.3em,yshift=-2.5em]n4.south) {${y}_{j-1}$};
\node [anchor=north,font=\small] (x5) at ([xshift=-1.3em,yshift=-2.3em]n4.south) {$\tilde{{y}}_{j-1}$};
\node [anchor=south,inner sep=2pt] (st1) at (n6.north) {\scriptsize{\textbf{[step $j-1$]}}};
\node [anchor=south,inner sep=2pt] (st2) at (n8.north) {\scriptsize{\textbf{[step $j$]}}};
\node [anchor=south,inner sep=2pt] (st3) at (n14.north) {\scriptsize{\textbf{[step $1$]}}};
\node [anchor=north,font=\tiny,rotate=90] (e1) at ([xshift=-2.7em,yshift=-1.1em]n3.south) {${(1-\epsilon_i)}^2$};
%\node [anchor=north,font=\scriptsize] (e2) at ([xshift=2em,yshift=-0.1em]n3.south) {$\funp{P}=\epsilon_i$};
%\node [anchor=north,font=\scriptsize] (e3) at ([xshift=-2em,yshift=-1em]n4.south) {$\funp{P}={(1-\epsilon_i)}^2$};
\node [anchor=north,font=\tiny,rotate=90] (e4) at ([xshift=1.5em,yshift=-1.2em]n4.south) {$\epsilon_i$};
%\node [anchor=south east,font=\small] (l1) at ([xshift=-1em,yshift=0.5em]n5.north west) {Loss};
%\node [anchor=south west,font=\small] (l2) at ([xshift=1em,yshift=0.5em]n7.north east) {Loss};
\draw [->,thick] ([xshift=0em,yshift=0em]x1.north)--([xshift=0em,yshift=0em]n1.south);
\draw [->,thick] ([xshift=0em,yshift=0em]n1.east)--([xshift=0em,yshift=0em]n2.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n2.east)--([xshift=0em,yshift=0em]n3.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n3.east)--([xshift=0em,yshift=0em]n4.west);
\draw [->,thick] ([xshift=0em,yshift=0em]n4.east)--([xshift=3em,yshift=0em]n4.east);
\draw [->,thick] ([xshift=0em,yshift=0em]n3.north)--([xshift=0em,yshift=0em]n5.south);
\draw [->,thick] ([xshift=0em,yshift=0em]n5.north)--([xshift=0em,yshift=0em]n6.south);
\draw [->,thick] ([xshift=0em,yshift=0em]n4.north)--([xshift=0em,yshift=0em]n7.south);
\draw [->,thick] ([xshift=0em,yshift=0em]n7.north)--([xshift=0em,yshift=0em]n8.south);
\draw [->,thick] ([xshift=0em,yshift=0em]n1.north)--([xshift=0em,yshift=0em]n13.south);
\draw [->,thick] ([xshift=0em,yshift=0em]n13.north)--([xshift=0em,yshift=0em]n14.south);
%\draw [->,thick] ([xshift=0em,yshift=0em]l1.south) .. controls +(south:1em) and +(west:0.1em) .. ([xshift=0em,yshift=0em]n5.west);
%\draw [->,thick] ([xshift=0em,yshift=0em]l2.south) .. controls +(south:1em) and +(east:0.1em) .. ([xshift=0em,yshift=0em]n7.east);
\node [circle,fill=black,minimum size=1pt,inner sep=1.2pt] (c1) at ([xshift=0em,yshift=0.6em]x2.north) {};
\node [circle,fill=black,minimum size=1pt,inner sep=1.2pt] (c2) at ([xshift=0em,yshift=0.8em]x3.north) {};
\node [circle,fill=black,minimum size=1pt,inner sep=1.2pt] (c3) at ([xshift=0em,yshift=0.8em]x4.north) {};
\node [circle,fill=black,minimum size=1pt,inner sep=1.2pt] (c4) at ([xshift=0em,yshift=0.6em]x5.north) {};
\draw [-,thick] ([xshift=0em,yshift=-0em]x2.north)-- ([xshift=-1.3em,yshift=0em]n3.south);
\draw [-,thick] ([xshift=0em,yshift=0.2em]x3.north)-- ([xshift=0em,yshift=0em]c2.south);
\draw [-,thick] ([xshift=0em,yshift=0em]c2.north)-- ([xshift=0.3em,yshift=0.6em]c2.north);
\draw [-,thick] ([xshift=0em,yshift=0.6em]c2.north)-- ([xshift=1.3em,yshift=0em]n3.south);
\draw [-,thick] ([xshift=-0em,yshift=-0em]x5.north)-- ([xshift=0em,yshift=0em]c4.south);
\draw [-,thick] ([xshift=-0em,yshift=0em]c4.north)-- ([xshift=0.3em,yshift=0.6em]c4.north);
\draw [-,thick] ([xshift=-0em,yshift=0.6em]c4.north)-- ([xshift=-1.3em,yshift=0em]n4.south);
\draw [-,thick] ([xshift=0em,yshift=0.2em]x4.north)-- ([xshift=1.3em,yshift=0em]n4.south);
\draw [->,thick,dotted] ([xshift=-2.5em,yshift=1em]x2.north) .. controls +(south:2em) and +(west:0.1em) .. ([xshift=0.2em,yshift=0em]x2.west);
\draw [->,thick,dotted] ([xshift=0em,yshift=0em]n6.east) .. controls ([xshift=2em,yshift=1em]n6.east) and ([xshift=-2.5em,yshift=-4em]n4.south west) .. ([xshift=-0em,yshift=-0em]x5.west);
\draw [->,thick,dotted] ([xshift=0em,yshift=0em]n14.east) .. controls +(east:0.3em) and +(north:2em) .. ([xshift=3em,yshift=-0.5em]n14.south);
\end{tikzpicture}
\begin{tikzpicture}
\node[anchor=west,inner sep=0mm,minimum height=4em,minimum width=5.5em,rounded corners=15pt,align=left,draw] (n1) at (0,0) {Decoder\\Encoder};
\node[anchor=west,inner sep=0mm,minimum height=4em,minimum width=5.5em,rounded corners=15pt,align=left,draw] (n2) at ([xshift=10em,yshift=0em]n1.east) {Decoder\\Encoder};
\node[anchor=south,inner sep=0mm,font=\small] (a1) at ([xshift=0em,yshift=1em]n1.north) {演员$p_{\theta}$};
\node[anchor=north,inner sep=0mm] (a2) at ([xshift=0em,yshift=-1em]n1.south) {${x}_1,{x}_2,\ldots,{x}_m$};
\node[anchor=south,inner sep=0mm,font=\small] (c1) at ([xshift=0em,yshift=1em]n2.north) {评论家$Q$};
\node[anchor=north,inner sep=0mm] (c2) at ([xshift=0em,yshift=-1em]n2.south) {${y}_1,{y}_2,\ldots,{y}_J$};
% \node[anchor=west,inner sep=0mm] (n3) at ([xshift=2.1em,yshift=2em]n1.east) {$Q_1,Q_2,\ldots,Q_J$};
% \node[anchor=west,inner sep=0mm] (n4) at ([xshift=2.9em,yshift=-0.4em]n1.east) {$\hat{\mathbi{y}}_1,\hat{\mathbi{y}}_2,\ldots,\hat{\mathbi{y}}_J$};
% \node[anchor=west,inner sep=0mm,font=\small] (n5) at ([xshift=3em,yshift=-3em]n1.east) {演员状态};
\draw [-,thick] ([xshift=0em,yshift=0em]n1.west) -- ([xshift=0em,yshift=0em]n1.east);
\draw [-,thick] ([xshift=0em,yshift=0em]n2.west) -- ([xshift=0em,yshift=0em]n2.east);
%\draw [->,thick] ([xshift=0em,yshift=1em]n2.west) -- ([xshift=0em,yshift=1em]n1.east);
%\draw [->,thick] ([xshift=0em,yshift=0.5em]n1.east) -- ([xshift=0em,yshift=0.5em]n2.west);
%\draw [->,dotted,very thick] ([xshift=0em,yshift=0em]n1.east) .. controls ([xshift=3em,yshift=-1em]n1.-90) and ([xshift=-3em,yshift=-1em]n2.-90) .. (n2.west);
\node[anchor=west,inner sep=0mm] (n3) at ([xshift=2.1em,yshift=1em]n1.east) {$Q_1,Q_2,\ldots,Q_J$};
\node[anchor=west,inner sep=0mm] (n4) at ([xshift=2.9em,yshift=-1em]n1.east) {$\tilde{{y}}_1,\tilde{{y}}_2,\ldots,\tilde{{y}}_J$};
\draw [->,thick] ([xshift=0em,yshift=0.2em]n2.west) -- ([xshift=0em,yshift=0.2em]n1.east);
\draw [->,thick] ([xshift=0em,yshift=-0.2em]n1.east) -- ([xshift=0em,yshift=-0.2em]n2.west);
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\node[rounded corners=3pt,minimum width=1.0em,minimum height=2.0em,font=\scriptsize,fill=green!5,drop shadow,thick,draw](top) at (0,0) {
\begin{tabular}{ll}
\multicolumn{2}{c}{BPE词表:} \\
errrr$<$e$>$ & tain$<$e$>$ \\
moun & est$<$e$>$ \\
high & the$<$e$>$ \\
a$<$e$>$ &
\node[rounded corners=3pt,minimum width=1.0em,minimum height=2.0em,font=\scriptsize,fill=green!5,drop shadow,thick](top) at (0,0) {
\begin{tabular}{lllllll}
\multicolumn{7}{c}{符号合并表} \\
r\ $<$e$>$, & e\ s, & l\ o, & es\ t, & lo\ w, & est\ $<$e$>$, & e\ r$<$e$>$,
\end{tabular}
};
\node[font=\scriptsize,anchor=west] (node1) at ([xshift=0.5em,yshift=1em]top.east) {原始序列:};
\node[font=\scriptsize,anchor=west] (this) at (node1.east) {"this$<$e$>$" ,};
\node[font=\scriptsize,anchor=west] (highest) at (this.east) {"highest$<$e$>$",};
\node[font=\scriptsize,anchor=west] (mountain) at (highest.east) { "mountain$<$e$>$"};
\node[font=\footnotesize,anchor=north] (l1) at ([xshift=0em,yshift=-1em]top.south) {(a) 符号合并表};
\node[font=\scriptsize,anchor=west] (n1) at ([xshift=-6em,yshift=-6em]top.west) {l\ o\ w\ e\ r\ $<$e$>$};
\node[font=\scriptsize,anchor=west] (n2) at ([xshift=2.6em]n1.east) {l\ o\ w\ e\ {\red r$<$e$>$}};
\node[font=\scriptsize,anchor=west] (n3) at ([xshift=2.6em]n2.east) {{\red lo}\ w\ e\ r$<$e$>$};
\node[font=\scriptsize,anchor=west] (n4) at ([xshift=2.6em]n3.east) {{\red low}\ e\ r$<$e$>$};
\node[font=\scriptsize,anchor=west] (n5) at ([xshift=2.6em]n4.east) {low\ {\red er$<$e$>$}};
\node[font=\scriptsize,anchor=west] (node2) at ([yshift=-1.5em]node1.south west) {BPE切分:};
\node[font=\scriptsize,anchor=west] (unk) at (node2.east) {"$<$unk$>$",};
\node[font=\scriptsize,anchor=west] (high) at (unk.east) {"high",};
\node[font=\scriptsize,anchor=west] (est) at (high.east) {"est$<$e$>$",};
\node[font=\scriptsize,anchor=west] (moun) at (est.east) {"moun",};
\node[font=\scriptsize,anchor=west] (tain) at (moun.east) {"tain$<$e$>$"};
\node[font=\scriptsize,anchor=west] (t1) at ([yshift=-1.5em]n1.south west) {l\ o\ w\ e\ s\ t\ $<$e$>$};
\node[font=\scriptsize,anchor=west] (t2) at ([xshift=0.8em]t1.east) {l\ o\ w\ {\red es}\ t\ $<$e$>$};
\node[font=\scriptsize,anchor=west] (t3) at ([xshift=0.8em]t2.east) {{\red lo}\ w\ es\ t\ $<$e$>$};
\node[font=\scriptsize,anchor=west] (t4) at ([xshift=0.8em]t3.east) {lo\ w\ {\red est}\ $<$e$>$};
\node[font=\scriptsize,anchor=west] (t5) at ([xshift=0.8em]t4.east) {{\red low}\ est\ $<$e$>$};
\node[font=\scriptsize,anchor=west] (t6) at ([xshift=0.8em]t5.east) {low\ {\red est$<$e$>$}};
\node[font=\footnotesize,anchor=north] (l2) at ([xshift=1.5em,yshift=-1em]t3.south) {(b) 合并样例};
%\draw[->,thick](node1.south) -- ([xshift=-1.0em]node2.north);
\draw[->,thick]([xshift=-0.2em]this.south) -- (unk);
\draw[->,thick](highest.south) -- (high);
\draw[->,thick](highest.south) -- (est);
\draw[->,thick](mountain.south) -- (moun);
\draw[->,thick](mountain.south) -- (tain);
\draw[->,thick](n1.east) -- (n2.west);
\draw[->,thick](n2.east) -- (n3.west);
\draw[->,thick](n3.east) -- (n4.west);
\draw[->,thick](n4.east) -- (n5.west);
\draw[->,thick](t1.east) -- (t2.west);
\draw[->,thick](t2.east) -- (t3.west);
\draw[->,thick](t3.east) -- (t4.west);
\draw[->,thick](t4.east) -- (t5.west);
\draw[->,thick](t5.east) -- (t6.west);
\end{tikzpicture}
\ No newline at end of file
......@@ -11,8 +11,8 @@
\node[anchor = north] (new_root) at ([yshift = -1.5em]newer.south) {new};
\draw [->] ([yshift=0.2em]do_root.north) .. controls +(north:0.4) and +(south:0.6) ..(do.south);
\draw [->] (do_root.north) -- (does.south);
\draw [->] ([yshift=0.2em]do_root.north) .. controls +(north:0.4) and +(south:0.6) ..([yshift=0.1em]doing.south);
\draw [->] ([yshift=0.2em]do_root.north) .. controls +(north:0.4) and +(south:0.6) ..(doing.south);
\draw [->] ([yshift=0.2em]new_root.north) .. controls +(north:0.4) and +(south:0.6) ..(new.south);
\draw [->] (new_root.north) -- (newer.south);
\draw [->] ([yshift=0.2em]new_root.north) .. controls +(north:0.4) and +(south:0.6) ..([yshift=0.08em]newest.south);
\draw [->] ([yshift=0.2em]new_root.north) .. controls +(north:0.4) and +(south:0.6) ..(newest.south);
\end{tikzpicture}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
\tikzstyle{yy} = [circle,minimum height=1cm,text centered,draw=black,thick,drop shadow={shadow xshift=0.3em,yshift=0.8em},fill=white]
\begin{tikzpicture}[node distance = 0,scale = 1]
\begin{scope}[xshift=0.2in]
\tikzstyle{every node}=[scale=1]
\node (y1)[yy]{\large$y_1$};
\node (y2)[yy,right of = y1,xshift=1.5cm]{\large$y_2$};
\node (y3)[yy,right of = y2,xshift=1.5cm]{\large$y_3$};
\node (y4)[yy,right of = y3,xshift=1.5cm]{\large$y_4$};
\node (y5)[yy,right of = y4,xshift=1.5cm]{\large$y_5$};
\node (y6)[yy,right of = y5,xshift=1.5cm]{\large$y_6$};
\node [anchor=north,font=\scriptsize] (labela) at ([xshift=1.8em,yshift=-2em]y3.south) {(a) 自回归模型};
\draw[->,thick] (y1.north) .. controls ([yshift=1.5em]y1.north) and ([yshift=1.5em]y3.north).. (y3.north);
\draw[->,thick] (y1.north) .. controls ([yshift=2em]y1.north) and ([yshift=2em]y4.north).. (y4.north);
\draw[->,thick] (y1.south) .. controls ([yshift=-1.5em]y1.south) and ([yshift=-1.5em]y5.south).. (y5.south);
\draw[->,thick] (y1.south) .. controls ([yshift=-2em]y1.south) and ([yshift=-2em]y6.south).. (y6.south);
\draw[->,thick] (y2.north) .. controls ([yshift=1.5em]y2.north) and ([yshift=1.5em]y4.north).. (y4.north);
\draw[->,thick] (y2.south) .. controls ([yshift=-1.5em]y2.south) and ([yshift=-1.6em]y5.south).. (y5.south);
\draw[->,thick] (y2.south) .. controls ([yshift=-2em]y2.south) and ([yshift=-2em]y6.south).. (y6.south);
\draw[->,thick] (y3.south) .. controls ([yshift=-1.5em]y3.south) and ([yshift=-1.5em]y5.south).. (y5.south);
\draw[->,thick] (y3.south) .. controls ([yshift=-2em]y3.south) and ([yshift=-2em]y6.south).. (y6.south);
\draw[->,thick] (y4.south) .. controls ([yshift=-1.5em]y4.south) and ([yshift=-1.5em]y6.south).. (y6.south);
\draw[->,red,very thick](y1.east)to(y2.west);
\draw[->,red,very thick](y2.east)to(y3.west);
\draw[->,red,very thick](y3.east)to(y4.west);
\draw[->,red,very thick](y4.east)to(y5.west);
\draw[->,red,very thick](y5.east)to(y6.west);
\begin{tikzpicture}
\tikzstyle{decoder} = [rectangle,thick,rounded corners,minimum width=5cm,minimum height=0.6cm,text centered,draw=black,fill=blue!15]
\begin{scope}
\node (aa)[decoder] at (0,0) {};
\node (y2b)[anchor=south] at ([yshift=-2.5em]aa.south) {$y_2$};
\node (label)[anchor=south] at ([yshift=-1.8em]y2b.south) {\small{(a) 自回归解码}};
\node (y1b)[anchor=east] at ([xshift=-2.5em]y2b.east) {$y_1$};
\node (sos)[anchor=east] at ([xshift=-4.3em]y2b.east) {\small{<sos>}};
\node (y3b)[anchor=west] at ([xshift=2.5em]y2b.west) {$y_3$};
\node (y4b)[anchor=west] at ([xshift=5em]y2b.west) {$y_4$};
\node (y3a)[anchor=north] at ([yshift=2.5em]aa.north) {$y_3$};
\node (y2a)[anchor=east] at ([xshift=-2.5em]y3a.east) {$y_2$};
\node (y1a)[anchor=east] at ([xshift=-5em]y3a.east) {$y_1$};
\node (y4a)[anchor=west] at ([xshift=2.5em]y3a.west) {$y_4$};
\node (eos)[anchor=west] at ([xshift=4.3em]y3a.west) {\small{<eos>}};
\draw [->,very thick,dotted] ([xshift=-0.3em]y1a.east) .. controls +(east:0.5) and +(west:0.5) ..([xshift=0.3em]y1b.west);
\draw [->,very thick,dotted] ([xshift=-0.3em]y2a.east) .. controls +(east:0.5) and +(west:0.5) ..([xshift=0.3em]y2b.west);
\draw [->,very thick,dotted] ([xshift=-0.3em]y3a.east) .. controls +(east:0.5) and +(west:0.5) ..([xshift=0.3em]y3b.west);
\draw [->,very thick,dotted] ([xshift=-0.3em]y4a.east) .. controls +(east:0.5) and +(west:0.5) ..([xshift=0.3em]y4b.west);
\node (autodecoder)[decoder] at (0,0) {自回归编码器};
\draw [->,thick]([yshift=0em]y1b.north) to ([yshift=1.15em]y1b.north);
\draw [->,thick]([yshift=0em]y2b.north) to ([yshift=1.15em]y2b.north);
\draw [->,thick]([yshift=0em]y3b.north) to ([yshift=1.15em]y3b.north);
\draw [->,thick]([yshift=0em]y4b.north) to ([yshift=1.15em]y4b.north);
\draw [->,thick]([yshift=0em]sos.north) to ([yshift=1.15em]sos.north);
\draw [->,thick]([yshift=-1.15em]y1a.south) to (y1a.south);
\draw [->,thick]([yshift=-1.15em]y2a.south) to (y2a.south);
\draw [->,thick]([yshift=-1.15em]y3a.south) to (y3a.south);
\draw [->,thick]([yshift=-1.15em]y4a.south) to (y4a.south);
\draw [->,thick]([yshift=-1.2em]eos.south) to (eos.south);
\end{scope}
\begin{scope}[yshift=-1.45in]
\tikzstyle{rec} = [rectangle,minimum width=2.8cm,minimum height=1.5cm,text centered,draw=black,dashed]
\tikzstyle{every node}=[scale=1]
\node (y1)[yy]{\large$y_1$};
\node (y2)[yy,right of = y1,xshift=1.5cm]{\large$y_2$};
\node (y3)[yy,right of = y2,xshift=2cm]{\large$y_3$};
\node (y4)[yy,right of = y3,xshift=1.5cm]{\large$y_4$};
\node (y5)[yy,right of = y4,xshift=2cm]{\large$y_5$};
\node (y6)[yy,right of = y5,xshift=1.5cm]{\large$y_6$};
\node (rec1)[rec,right of = y1,xshift=0.75cm]{};
\node (rec2)[rec,right of = y3,xshift=0.75cm]{};
\node (rec3)[rec,right of = y5,xshift=0.75cm]{};
\node [anchor=north,font=\scriptsize] (labelb) at ([xshift=2.2em,yshift=-1em]y3.south) {(b) 半自回归模型};
\draw[->,red,very thick](rec1.east)to(rec2.west);
\draw[->,red,very thick](rec2.east)to(rec3.west);
\draw[->,thick] (rec1.north) .. controls ([yshift=2.5em]rec1.north) and ([yshift=2.5em]rec3.north).. (rec3.north);
\begin{scope}[yshift=-1.55in]
\node (aa) [decoder] at (0,0) {};
\node (y1y2b)[rectangle,anchor=south,inner sep=0.25em,densely dashed,draw] at ([yshift=-2.6em]aa.south) {$y_1\;y_2$};
\node (label)[anchor=south] at ([yshift=-2.1em]y1y2b.south) {\small{(b) 半自回归解码}};
\node (sos)[anchor=east] at ([xshift=-4.55em]y1y2b.east) {\small{<sos>}};
\node (y3y4b)[rectangle,anchor=west,inner sep=0.25em,densely dashed,draw] at ([xshift=4.7em]y1y2b.west) {$y_3\;y_4$};
\node (y3y4a)[rectangle,anchor=north,inner sep=0.25em,densely dashed,draw] at ([yshift=2.6em]aa.north) {$y_3\;y_4$};
\node (y1y2a)[rectangle,anchor=west,inner sep=0.25em,densely dashed,draw] at ([xshift=-4.7em]y3y4a.west) {$y_1\;y_2$};
\node (eos)[anchor=east] at ([xshift=4.85em]y3y4a.east) {\small{<eos>}};
\draw [->,very thick,dotted] ([xshift=-0em]y1y2a.east) .. controls +(east:0.5) and +(west:0.5) ..([xshift=0em]y1y2b.west);
\draw [->,very thick,dotted] ([xshift=-0em]y3y4a.east) .. controls +(east:0.5) and +(west:0.5) ..([xshift=0em]y3y4b.west);
\node (autodecoder)[decoder] at (0,0) {半自回归编码器};
\draw [->,thick]([yshift=0.05em]sos.north) to ([yshift=1.38em]sos.north);
\draw [->,thick]([yshift=0em]y1y2b.north) to ([yshift=1.38em]y1y2b.north);
\draw [->,thick]([yshift=0em]y3y4b.north) to ([yshift=1.38em]y3y4b.north);
\draw [->,thick]([yshift=-1.5em]y1y2a.south) to ([yshift=-0.02em]y1y2a.south);
\draw [->,thick]([yshift=-1.5em]y3y4a.south) to ([yshift=-0.02em]y3y4a.south);
\draw [->,thick]([yshift=-1.4em]eos.south) to ([yshift=-0.05em]eos.south);
\end{scope}
\begin{scope}[xshift=0.3in,yshift=-2.35in]
\tikzstyle{every node}=[scale=1]
\node (y1)[yy]{\large$y_1$};
\node (y2)[yy,right of = y1,xshift=1.5cm]{\large$y_2$};
\node (y3)[yy,right of = y2,xshift=1.5cm]{\large$y_3$};
\node (y4)[yy,right of = y3,xshift=1.5cm]{\large$y_4$};
\node (y5)[yy,right of = y4,xshift=1.5cm]{\large$y_5$};
\node (y6)[yy,right of = y5,xshift=1.5cm]{\large$y_6$};
\node [anchor=north,font=\scriptsize] (labelc) at ([xshift=1.5em,yshift=-0.5em]y3.south) {(c) 非自回归模型};
\begin{scope}[yshift=-3.1in]
\node (aa) [decoder]at (0,0) {非自回归模型};
\node (y2b)[anchor=south] at ([xshift=-1.5em,yshift=-2.5em]aa.south) {$y_2$};
\node (label)[anchor=south] at ([yshift=-4.3em]aa.south) {\small{(c) 非自回归解码}};
\node (y1b)[anchor=east] at ([xshift=-3em]y2b.east) {$y_1$};
\node (y3b)[anchor=west] at ([xshift=3em]y2b.west) {$y_3$};
\node (y4b)[anchor=west] at ([xshift=6em]y2b.west) {$y_4$};
\node (y2a)[anchor=north] at ([xshift=-1.5em,yshift=2.5em]aa.north) {$y_2$};
\node (y1a)[anchor=east] at ([xshift=-3em]y2a.east) {$y_1$};
\node (y3a)[anchor=west] at ([xshift=3em]y2a.west) {$y_3$};
\node (y4a)[anchor=west] at ([xshift=6em]y2a.west) {$y_4$};
\draw [->,thick]([yshift=0em]y1b.north) to ([yshift=1.15em]y1b.north);
\draw [->,thick]([yshift=0em]y2b.north) to ([yshift=1.15em]y2b.north);
\draw [->,thick]([yshift=0em]y3b.north) to ([yshift=1.15em]y3b.north);
\draw [->,thick]([yshift=0em]y4b.north) to ([yshift=1.15em]y4b.north);
\draw [->,thick]([yshift=-1.2em]y1a.south) to (y1a.south);
\draw [->,thick]([yshift=-1.2em]y2a.south) to (y2a.south);
\draw [->,thick]([yshift=-1.2em]y3a.south) to (y3a.south);
\draw [->,thick]([yshift=-1.2em]y4a.south) to (y4a.south);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -103,7 +103,7 @@
\node [anchor=north] (pos1) at ([xshift=1.5em,yshift=-1.0em]node0-2.south) {\small{(a) GPT模型结构}};
\node [anchor=north] (pos2) at ([xshift=1.5em,yshift=-1.0em]node0-6.south) {\small{(b) BERT模型结构}};
\node [anchor=south] (ex) at ([xshift=2.1em,yshift=0.5em]node3-1.north) {\small{TRM:transformer}};
\node [anchor=south] (ex) at ([xshift=2.1em,yshift=0.5em]node3-1.north) {\small{TRM:Transformer}};
......
......@@ -60,7 +60,7 @@
\node [anchor=west,fill=red!20,minimum width=1.5em](d2-1) at ([xshift=-0.0em]d2.east){};
\node [anchor=west,fill=yellow!20,minimum width=1.5em](d3-1) at ([xshift=-0.0em]d3.east){};
\node [anchor=north] (d4) at ([xshift=1em]d1.south) {\small{训练:}};
\node [anchor=north] (d5) at ([xshift=0.5em]d2.south) {\small{}};
\node [anchor=north] (d5) at ([xshift=0.5em]d2.south) {\small{}};
\draw [->,thick] ([xshift=0em]d4.east)--([xshift=1.5em]d4.east);
\draw [->,thick,dashed] ([xshift=0em]d5.east)--([xshift=1.5em]d5.east);
......
\begin{tikzpicture}
\begin{scope}
\node [anchor=center] (node1) at (0,0) {\textbf{Machine translation}, sometiomes referred to by the abbreviation \textbf{MT} (not to be };
\node [anchor=north] (node2) at (node1.south) {confused with computer-aided translation,,machine-aided human translation inter};
\node [anchor=center] (node1) at (0,0) {\textbf{Machine Translation}, sometimes referred to by the abbreviation \textbf{MT} (not to be };
\node [anchor=north] (node2) at (node1.south) {confused with computer-aided translation,machine-aided human translation inter};
\node [anchor=north] (node3) at (node2.south) {-active translation), is a subfield of computational linguistics that investigates the};
\node [anchor=north] (node4) at ([xshift=-1.8em]node3.south) {use of software to translate text or speech from one language to another.};
\node [anchor=south] (node5) at ([xshift=-12.8em,yshift=0.5em]node1.north) {\Large{WIKIPEDIA}};
......
......@@ -12,8 +12,8 @@
\node[node,anchor=west,minimum width=6em,minimum height=2.4em,fill=blue!20,line width=0.6pt] (decoder2) at ([xshift=4em,yshift=0em]decoder1.east){\small 解码器};
\node[node,anchor=west,minimum width=6em,minimum height=2.4em,fill=blue!30,line width=0.6pt] (decoder3) at ([xshift=3em]decoder2.east){\small 解码器};
\node[anchor=north,font=\scriptsize,fill=yellow!20] (w1) at ([yshift=-1.6em]decoder1.south){知识 \ 就是 \ 力量 \ \ <EOS>};
\node[anchor=north,font=\scriptsize,fill=green!20] (w3) at ([yshift=-1.6em]decoder3.south){Wissen \ ist \ Machit \ . \ <EOS>};
\node[anchor=north,font=\scriptsize,fill=yellow!20] (w1) at ([yshift=-1.6em]decoder1.south){知识 \ 就是 \ 力量 \ \ <eos>};
\node[anchor=north,font=\scriptsize,fill=green!20] (w3) at ([yshift=-1.6em]decoder3.south){Wissen \ ist \ Machit \ . \ <eos>};
\node[anchor=south,font=\scriptsize,fill=orange!20] (w2) at ([yshift=1.6em]encoder1.north){Knowledge \ is \ power \ . };
\node[anchor=south,font=\scriptsize,fill=orange!20] (w4) at ([yshift=1.6em]encoder3.north){Knowledge \ is \ power \ . };
......
This source diff could not be displayed because it is too large. You can view the blob instead.
\tikzstyle{coder} = [rectangle,rounded corners,minimum height=2.2em,minimum width=4.3em,text centered,draw=black,fill=red!25]
\begin{tikzpicture}[node distance = 0,scale = 0.75]
\tikzstyle{every node}=[scale=0.75]
\node(x)[]{x};
\node(x)[]{$x$};
\node(encoder)[coder, above of = x,yshift=4em]{{编码器}};
\node(decoder_left)[coder, above of = encoder, yshift=6em,fill=blue!25]{{解码器}};
\node(y_hat)[above of = decoder_left, yshift=4em]{{$\rm y'$}};
\node(y)[above of = decoder_left, xshift=-6em]{{$\rm y$}};
\node(y_hat)[above of = decoder_left, yshift=4em]{{$y'$}};
\node(y)[above of = decoder_left, xshift=-6em]{{$y$}};
\node(decoder_right)[coder, above of = encoder, xshift=11em,fill=yellow!25]{{解码器}};
\node(figure)[draw=white,above of = decoder_right,yshift=6.5em,scale=0.25] {\includegraphics[width=0.62\textwidth]{./Chapter17/Figures/figure-bank-without-attention.png}};
......
......@@ -18,7 +18,7 @@
\node[anchor=south,font=\footnotesize,inner sep=0pt] (cache)at ([yshift=2em,xshift=1.5em]key.north){\small\bfnew{Cache}};
\node[draw,anchor=east,minimum size=1.8em,fill=orange!15] (dt) at ([yshift=2.1em,xshift=-4em]key.west){${\mathbi{d}}_{t}$};
\node[anchor=north,font=\footnotesize] (readlab) at ([xshift=2.8em,yshift=0.3em]dt.north){\red{reading}};
\node[anchor=north,font=\footnotesize] (readlab) at ([xshift=2.8em,yshift=0.3em]dt.north){\red{读取}};
\node[draw,anchor=east,minimum size=1.8em,fill=ugreen!15] (st) at ([xshift=-3.7em]dt.west){${\mathbi{s}}_{t}$};
\node[draw,anchor=east,minimum size=1.8em,fill=red!15] (st2) at ([xshift=-0.85em,yshift=3.5em]dt.west){$ \widetilde{\mathbi{s}}_{t}$};
......@@ -27,10 +27,10 @@
\draw[-,thick] (add.0) -- (add.180);
\draw[-,thick] (add.90) -- (add.-90);
\node[anchor=north,inner sep=0pt,font=\footnotesize,text=red] at ([xshift=-0.08em,yshift=-1em]add.south){combining};
\node[anchor=north,inner sep=0pt,font=\footnotesize,text=red] at ([xshift=-0em,yshift=-0.5em]add.south){融合};
\node[draw,anchor=east,minimum size=1.8em,fill=yellow!15] (ct) at ([xshift=-2em,yshift=-3.5em]st.west){$ {\mathbi{C}}_{t}$};
\node[anchor=north,font=\footnotesize] (matchlab) at ([xshift=6.7em,yshift=-0.1em]ct.north){\red{mathching}};
\node[anchor=north,font=\footnotesize] (matchlab) at ([xshift=6.7em,yshift=-0.1em]ct.north){\red{匹配}};
\node[anchor=east] (y) at ([xshift=-6em,yshift=1em]st.west){$\mathbi{y}_{t-1}$};
......
......@@ -3,7 +3,7 @@
\begin{tikzpicture}[node distance = 0,scale = 1]
\tikzstyle{every node}=[scale=1]
\node [anchor=center](ori) at (-0.2,-0.2) {$O$};
\node [anchor=center](ori) at (-0.2,-0.2) {0};
\draw[->,thick](-0.5,0)--(5,0)node[below,scale=0.8]{时间};
\draw[->,thick](0,-2)--(0,2)node[left,scale=0.8]{量化值};
\draw[-,thick](0,0)sin(0.7,1.5)cos(1.4,0)sin(2.1,-1.5)cos(2.8,0)sin(3.5,1.5)cos(4.2,0);
......
......@@ -26,7 +26,7 @@
\draw[->, thick,color=black!60](figure.east)to([xshift=-0.1cm]dog.west)node[left,xshift=-0.2cm,yshift=-0.1cm,color=black]{图片检测};
\draw[->, thick,color=black!60]([yshift=-0.1cm]hat.south)to([yshift=0.1cm]ground.north)node[right,xshift=-0.2cm,yshift=0.5cm,color=black]{模板填充};
\node [anchor=north](pos1)at ([xshift=-3.8em,yshift=-0.5em]ground-1.south){(a) 基于检索的图像描述生成范式};
\node [anchor=north](pos2)at ([xshift=-3.8em,yshift=-0.5em]ground.south){(b) 基于模板的图像描述生成范式};
\node [anchor=north](pos1)at ([xshift=-3.8em,yshift=-0.5em]ground-1.south){(a) 基于检索的图像描述生成};
\node [anchor=north](pos2)at ([xshift=-3.8em,yshift=-0.5em]ground.south){(b) 基于模板的图像描述生成};
\end{tikzpicture}
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -39,7 +39,7 @@
\draw [->,dashed,line width=0.7pt] ([yshift=0.5em,xshift=0.5em]three.north) .. controls +(north:3.5em) and +(south:4.5em) .. ([yshift=-0.2em]mthree.south);
\draw [->,dashed,line width=0.7pt] ([yshift=0.5em]four.north) .. controls +(north:4.5em) and +(south:4.5em) .. ([yshift=-0.2em]mfour.south);
\node [word] at ([yshift=-6em]two.south) {(a)显存不复用};
\node [word] at ([yshift=-6em]two.south) {(a) 显存不复用};
%占位
\node[word] at ([xshift=1em]four.east) {};
......@@ -95,7 +95,7 @@
\node [word] at ([xshift=1.5em,yshift=5.6em]one.north) {\scriptsize 显存};
\node [word] at ([yshift=-6em]two.south) {(b)显存复用};
\node [word] at ([yshift=-6em]two.south) {(b) 显存复用};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.5em,rounded corners=1pt,minimum width=10em,minimum height=3.6em,fill=gray!10,drop shadow] at ([yshift=6.6em,xshift=1em]two.north) {};
......
......@@ -519,7 +519,7 @@ His house is on the south bank of the river.
\parinterval {\small\bfnew{词嵌入}}\index{词嵌入}(Word Embedding\index{Word Embedding})技术是近些年自然语言处理中的重要成果,其思想是把每个单词映射为多维实数空间中的一个点(具体表现为一个实数向量),这种技术也被称作单词的{\small\bfnew{分布式表示}}\index{分布式表示}(Distributed Representation\index{Distributed Representation})。在这项技术中,单词之间的关系可以通过空间的几何性质进行刻画,意义相近的单词之间的欧式距离也十分相近(单词分布式表示的具体内容,将在书的{\chapternine} 详细介绍,在此不再赘述)。
\parinterval 受词嵌入技术的启发,研究人员尝试借助参考答案和机器译文的分布式表示来进行译文质量评价,为译文质量评价提供了新思路。在自然语言的上下文中,表示是与每个单词、句子或文档相关联的数学对象。这个对象通常是一个向量,其中每个元素的值在某种程度上描述了相关单词、句子或文档的语义或句法属性。基于这个想法,研究人员提出了{\small\sffamily\bfseries{分布式表示评价度量}}\index{分布式表示评价度量}(Distributed Representations Evaluation Metrics,DREEM)\index{Distributed Representations Evaluation Metrics}\upcite{DBLP:conf/acl/ChenG15}。这种方法将单词或句子的分布式表示映射到连续的低维空间,发现在该空间中,具有相似句法和语义属性的单词彼此接近,类似的结论也出现在相关工作中,如参考文献\cite{bengio2003a,DBLP:conf/emnlp/SocherPHNM11,DBLP:conf/emnlp/SocherPWCMNP13}所示。而这个特点可以被应用到译文质量评估中。
\parinterval 受词嵌入技术的启发,研究人员尝试借助参考答案和机器译文的分布式表示来进行译文质量评价,为译文质量评价提供了新思路。在自然语言的上下文中,表示是与每个单词、句子或文档相关联的数学对象。这个对象通常是一个向量,其中每个元素的值在某种程度上描述了相关单词、句子或文档的语义或句法属性。基于这个想法,研究人员提出了{\small\sffamily\bfseries{分布式表示评价度量}}\index{分布式表示评价度量}(Distributed Representations Evaluation Metrics,DREEM)\index{DREEM}\upcite{DBLP:conf/acl/ChenG15}。这种方法将单词或句子的分布式表示映射到连续的低维空间,发现在该空间中,具有相似句法和语义属性的单词彼此接近,类似的结论也出现在相关工作中,如参考文献\cite{bengio2003a,DBLP:conf/emnlp/SocherPHNM11,DBLP:conf/emnlp/SocherPWCMNP13}所示。而这个特点可以被应用到译文质量评估中。
\parinterval 在DREEM中,分布式表示的选取是一个十分关键的问题,理想的情况下,分布式表示应该涵盖句子在词汇、句法、语法、语义、依存关系等各个方面的信息。目前常见的分布式表示方式如表\ref{tab:4-2}所示。除此之外,还可以通过词袋模型、循环神经网络等将词向量表示转换为句子向量表示。
......@@ -532,9 +532,9 @@ His house is on the south bank of the river.
\hline
\rule{0pt}{10pt} One-hot词向量 & RAE编码\upcite{DBLP:conf/emnlp/SocherPHNM11} \\
\rule{0pt}{10pt} Word2Vec词向量\upcite{DBLP:journals/corr/abs-1301-3781} & Doc2Vec向量\upcite{DBLP:conf/icml/LeM14} \\
\rule{0pt}{10pt} Prob-fasttext词向量\upcite{DBLP:conf/acl/AthiwaratkunW17} & ELMO预训练句子表示\upcite{DBLP:conf/naacl/PetersNIGCLZ18} \\
\rule{0pt}{10pt} Prob-fasttext词向量\upcite{DBLP:conf/acl/AthiwaratkunW17} & ELMO预训练句子表示\upcite{Peters2018DeepCW} \\
\rule{0pt}{10pt} GloVe词向量\upcite{DBLP:conf/emnlp/PenningtonSM14} & GPT句子表示\upcite{radford2018improving} \\
\rule{0pt}{10pt} ELMO预训练词向量\upcite{DBLP:conf/naacl/PetersNIGCLZ18} & BERT预训练句子表示\upcite{devlin2019bert} \\
\rule{0pt}{10pt} ELMO预训练词向量\upcite{Peters2018DeepCW} & BERT预训练句子表示\upcite{devlin2019bert} \\
\rule{0pt}{10pt} BERT预训练词向量\upcite{devlin2019bert} & Skip-thought向量\upcite{DBLP:conf/nips/KirosZSZUTF15} \\
\end{tabular}
\label{tab:4-2}
......@@ -874,7 +874,7 @@ d&=&t \frac{s}{\sqrt{n}}
\vspace{0.5em}
\end{itemize}
\parinterval 随着深度学习技术的发展,另一种思路是使用表示学习技术生成句子的分布式表示,并在此基础上利用神经网络自动提取高度抽象的句子特征\upcite{DBLP:conf/wmt/KreutzerSR15,DBLP:conf/wmt/MartinsAHK16,DBLP:conf/wmt/ChenTZXZLW17},这样就避免了人工设计特征所带来的时间以及人工代价,同时表示学习所得到的分布式表示可以涵盖更多人工设计难以捕获到的特征,更加全面地反映句子的特点,因此在质量评估任务上也取得了很好的效果\upcite{kreutzer2015quality,DBLP:conf/wmt/ShahLPBBBS15,DBLP:conf/wmt/ScartonBSSS16,DBLP:conf/wmt/AbdelsalamBE16,DBLP:conf/wmt/BasuPN18,DBLP:conf/wmt/Lo19,DBLP:conf/wmt/YankovskayaTF19}。比如,最近的一些工作中大量使用了神经机器翻译模型来获得双语句子的表示结果,并用于质量评估\upcite{DBLP:conf/wmt/Qi19,DBLP:conf/wmt/ZhouZH19,DBLP:conf/wmt/Hokamp17,wang2019niutrans}。这样做的好处在于,质量评估可以直接复用机器翻译的模型,从某种意义上降低了质量评估系统开发的代价。此外,随着近几年各种预训练模型的出现,使用预训练模型来获取用于质量评估的句子表示也成为一大流行趋势,这种方法大大减少了质量评估模型自身的训练时间,在该领域内的表现也十分亮眼\upcite{kepler2019unbabel,DBLP:conf/wmt/YankovskayaTF19,DBLP:conf/wmt/KimLKN19}。关于表示学习、神经机器翻译、预训练模型的内容在第九章和第十章会有进一步介绍。
\parinterval 随着深度学习技术的发展,另一种思路是使用表示学习技术生成句子的分布式表示,并在此基础上利用神经网络自动提取高度抽象的句子特征\upcite{DBLP:conf/wmt/KreutzerSR15,DBLP:conf/wmt/MartinsAHK16,DBLP:conf/wmt/ChenTZXZLW17},这样就避免了人工设计特征所带来的时间以及人工代价,同时表示学习所得到的分布式表示可以涵盖更多人工设计难以捕获到的特征,更加全面地反映句子的特点,因此在质量评估任务上也取得了很好的效果\upcite{kreutzer2015quality,DBLP:conf/wmt/ShahLPBBBS15,DBLP:conf/wmt/ScartonBSSS16,DBLP:conf/wmt/AbdelsalamBE16,DBLP:conf/wmt/BasuPN18}。比如,最近的一些工作中大量使用了神经机器翻译模型来获得双语句子的表示结果,并用于质量评估\upcite{DBLP:conf/wmt/Qi19,DBLP:conf/wmt/ZhouZH19,DBLP:conf/wmt/Hokamp17,wang2019niutrans}。这样做的好处在于,质量评估可以直接复用机器翻译的模型,从某种意义上降低了质量评估系统开发的代价。此外,随着近几年各种预训练模型的出现,使用预训练模型来获取用于质量评估的句子表示也成为一大流行趋势,这种方法大大减少了质量评估模型自身的训练时间,在该领域内的表现也十分亮眼\upcite{kepler2019unbabel,DBLP:conf/wmt/YankovskayaTF19,DBLP:conf/wmt/KimLKN19}。关于表示学习、神经机器翻译、预训练模型的内容在第九章和第十章会有进一步介绍。
\parinterval 在得到句子表示之后,可以使用质量评估模块对译文质量进行预测。质量评估模型通常由回归算法或分类算法实现:
......
......@@ -1884,7 +1884,7 @@ z_t&=&\gamma z_{t-1}+(1-\gamma) \frac{\partial J}{\partial {\theta}_t} \cdot \f
\subsubsection{1. 模型结构}
\parinterval 最具代表性的神经语言模型是{\small\sffamily\bfseries{前馈神经网络语言模型}}\index{前馈神经网络语言模型}(Feed-forward Neural Network Language Model\index{Feed-forward Neural Network Language Model},简称FNNLM)。这种语言模型的目标是用神经网络计算$ \funp{P}(w_m|w_{m-n+1}\dots w_{m-1}) $,之后将多个$n$-gram的概率相乘得到整个序列的概率\upcite{bengio2003a}
\parinterval 最具代表性的神经语言模型是{\small\sffamily\bfseries{前馈神经网络语言模型}}\index{前馈神经网络语言模型}(Feed-forward Neural Network Language Model,FNNLM\index{FNNLM})。这种语言模型的目标是用神经网络计算$ \funp{P}(w_m|w_{m-n+1}\dots w_{m-1}) $,之后将多个$n$-gram的概率相乘得到整个序列的概率\upcite{bengio2003a}
%----------------------------------------------
\begin{figure}[htp]
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论