Commit 191df4aa by 孟霞

合并分支 'mengxia' 到 'master'

4.27 section5 before 5.1

查看合并请求 !20
parents a96f10b4 a8d9a067
......@@ -9,7 +9,7 @@
\node [anchor=west,inner sep=4pt] (sep) at ([xshift=1em]Trm3.east) {\scriptsize{...}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm4) at ([xshift=1em]sep.east) {\scriptsize{TRM}};
\node [anchor=south,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm5) at ([yshift=1em]Trm0.north) {\scriptsize{TRM}};
\node [anchor=south,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm5) at ([yshift=1.2em]Trm0.north) {\scriptsize{TRM}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm6) at ([xshift=1em]Trm5.east) {\scriptsize{TRM}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm7) at ([xshift=1em]Trm6.east) {\scriptsize{TRM}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm8) at ([xshift=1em]Trm7.east) {\scriptsize{TRM}};
......@@ -18,22 +18,28 @@
\node [rectangle,rounded corners,draw=black!50,densely dashed,inner sep=0.4em] [fit = (Trm0) (Trm4) (Trm5) (Trm9)] (inputshadow) {};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e1) at ([yshift=-1em]Trm0.south) {\scriptsize{$\textbf{e}_1$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e2) at ([yshift=-1em]Trm1.south) {\scriptsize{$\textbf{e}_2$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e3) at ([yshift=-1em]Trm2.south) {\scriptsize{$\textbf{e}_3$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e4) at ([yshift=-1em]Trm3.south) {\scriptsize{$\textbf{e}_4$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e1) at ([yshift=-1.2em]Trm0.south) {\scriptsize{$\textbf{e}_1$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e2) at ([yshift=-1.2em]Trm1.south) {\scriptsize{$\textbf{e}_2$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e3) at ([yshift=-1.2em]Trm2.south) {\scriptsize{$\textbf{e}_3$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e4) at ([yshift=-1.2em]Trm3.south) {\scriptsize{$\textbf{e}_4$}};
\node [anchor=north,inner sep=4pt] (sep5) at ([yshift=-1em]sep.south) {\scriptsize{...}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e5) at ([yshift=-1em]Trm4.south) {\scriptsize{$\textbf{e}_m$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e5) at ([yshift=-1.2em]Trm4.south) {\scriptsize{$\textbf{e}_m$}};
\node [anchor=south] (word1) at ([yshift=-1.5em]e1.south) {\footnotesize {Once}};
\node [anchor=south] (word2) at ([yshift=-1.7em]e2.south) {\footnotesize {[MASK]}};
\node [anchor=south] (word3) at ([yshift=-1.5em]e3.south) {\footnotesize {a}};
\node [anchor=south] (word4) at ([yshift=-1.5em]e4.south) {\footnotesize {time}};
\node [anchor=south] (wordseq) at ([yshift=-2.0em]sep5.south) {\footnotesize{...}};
\node [anchor=south] (word4) at ([yshift=-1.5em]e5.south) {\footnotesize {island}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t1) at ([yshift=1em]Trm5.north) {\scriptsize{$\textbf{h}_1$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t2) at ([yshift=1em]Trm6.north) {\scriptsize{$\textbf{h}_2$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t3) at ([yshift=1em]Trm7.north) {\scriptsize{$\textbf{h}_3$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t4) at ([yshift=1em]Trm8.north) {\scriptsize{$\textbf{h}_4$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t1) at ([yshift=1.2em]Trm5.north) {\scriptsize{$\textbf{h}_1$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t2) at ([yshift=1.2em]Trm6.north) {\scriptsize{$\textbf{h}_2$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t3) at ([yshift=1.2em]Trm7.north) {\scriptsize{$\textbf{h}_3$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t4) at ([yshift=1.2em]Trm8.north) {\scriptsize{$\textbf{h}_4$}};
\node [anchor=south,inner sep=4pt] (sep6) at ([yshift=1em]sep1.north) {\scriptsize{...}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t5) at ([yshift=1em]Trm9.north) {\scriptsize{$\textbf{h}_m$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t5) at ([yshift=1.2em]Trm9.north) {\scriptsize{$\textbf{h}_m$}};
\node [anchor=west,draw,inner sep=3pt,fill=blue!20!white,minimum width=1em] (Lt1) at ([yshift=1.5em]t1.west) {\tiny{TRM}};
\node [anchor=west] (Lt2) at ([xshift=-0.1em]Lt1.east) {\scriptsize{: Transformer Block}};
\node [anchor=west,draw,inner sep=3pt,fill=blue!20!white,minimum width=1em] (Lt1) at ([yshift=1.2em]t1.west) {\tiny{TRM}};
\node [anchor=west] (Lt2) at ([xshift=-0.1em]Lt1.east) {\tiny{: Transformer}};
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm0.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm1.south);
......@@ -51,7 +57,6 @@
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm5.south);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm6.south);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm7.south);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm8.south);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm9.south);
\draw [->] ([yshift=0.1em]Trm1.north) -- ([yshift=-0.1em]Trm6.south);
......
......@@ -51,11 +51,6 @@
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h3) at ([yshift=1.5em]h2.north) {\scriptsize{h2 = Relu(h1 * w2)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h4) at ([yshift=1.5em]h3.north) {\scriptsize{h3 = h2 + h1}};
{\draw [->,thick] (h1.north) -- (h2.south);}
{\draw [->,thick] (h2.north) -- (h3.south);}
{\draw [->,thick] (h3.north) -- (h4.south);}
{\draw [->,thick,rounded corners] (h2.east) -- ([xshift=0.5em]h2.east) -- ([xshift=0.5em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=1.5em]h3.north east);}
{\draw [<-,very thick,red] (h1.north) -- (h2.south);}
{\draw [<-,very thick,red] (h2.north) -- (h3.south);}
{\draw [<-,very thick,red] (h3.north) -- (h4.south);}
......@@ -64,11 +59,11 @@
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8.0em,minimum height=1.2em,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (slayer) at ([yshift=1.5em]h4.north) {\tiny{h4 = Softmax(h3 * w4) (output)}};
\node [anchor=south] (losslabel) at (slayer.north) {\scriptsize{\textbf{Cross Entropy Loss}}};
{\draw [->,thick] (h4.north) -- (slayer.south);}
{\draw [<-,very thick,red] (h4.north) -- (slayer.south);}
\end{tikzpicture}
\end{center}
\end{tcolorbox}
%%%------------------------------------------------------------------------------------------------------------
%%%------------------------------------------------------------------------------------------------------------
\begin{tcolorbox}
\begin{tcolorbox}
[bicolor,sidebyside,width=13cm,righthand width=4cm,size=title,frame engine=empty,
colback=blue!10!white,colbacklower=black!5!white]
{\scriptsize
\begin{tabbing}
\texttt{XTensor x, y, gold, h[5], w[5], s[5];} \\
\texttt{XTensor dh[5], dw[5], ds[5];} \\
\texttt{...} // 前向过程 \\
\texttt{h[0] = x;} \\
\texttt{y = h[4];} \\
\texttt{XTensor x, loss, gold, h[5], w[5], b[5];} \\
\texttt{...} \\
\texttt{} \\
\texttt{CrossEntropyBackward(dh[4], y, gold);} \\
\texttt{SoftmaxBackward(y, s[4], dh[4], ds[4]);}\\
\texttt{MMul(h[3], {\scriptsize X\_TRANS}, ds[4], {\scriptsize X\_NOTRANS}, dw[4]);}\\
\texttt{MMul(ds[4], {\scriptsize X\_NOTRANS}, w[4], {\scriptsize X\_RANS}, dh[3]);}\\
\texttt{h[1] = Relu(MMul(x, w[1]) + b[1]);} \\
\texttt{h[2] = Relu(MMul(h[1], w[2]) + b[2]);} \\
\texttt{h[3] = HardTanH(h[2]);} \\
\texttt{h[4] = Softmax(MMul(h[3], w[3]));} \\
\texttt{loss = CrossEntropy(h[4], gold);} \\
\texttt{} \\
\texttt{dh[2] = dh[3];}\\
\texttt{ReluBackward(h[2], s[2], dh[2], ds[2]);}\\
\texttt{MMul(h[1], {\scriptsize X\_TRANS}, ds[2], {\scriptsize X\_NOTRANS}, dw[2]);}\\
\texttt{MMul(ds[2], {\scriptsize X\_NOTRANS}, w[2], {\scriptsize X\_TRANS}, dh[2]);}\\
\texttt{XNet net;}\\
{\texttt{net.Backward(loss);} //一行代码实现自动微分}\\
\texttt{} \\
\texttt{dh[1] = dh[1] + dh[3];}\\
\texttt{...} // 继续反向传播 \\
\texttt{} \\
\texttt{for(unsigned i = 0; i < 5; i++)\{} \\
\texttt{} \ \ \ \ ... // 通过{\texttt{dw[i]}}访问参数的梯度\\
\texttt{} \ \ \ \ ... // 通过{\texttt{w[i].grad}}访问参数的梯度\\
\texttt{\}}
\end{tabbing}
}
\tcblower
......@@ -46,26 +30,19 @@
\begin{tikzpicture}
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h1) at (0,0) {\scriptsize{x (input)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h2) at ([yshift=1.5em]h1.north) {\scriptsize{h1 = Relu(x * w1)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h3) at ([yshift=1.5em]h2.north) {\scriptsize{h2 = Relu(h1 * w2)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h4) at ([yshift=1.5em]h3.north) {\scriptsize{h3 = h2 + h1}};
{\draw [->,thick] (h1.north) -- (h2.south);}
{\draw [->,thick] (h2.north) -- (h3.south);}
{\draw [->,thick] (h3.north) -- (h4.south);}
{\draw [->,thick,rounded corners] (h2.east) -- ([xshift=0.5em]h2.east) -- ([xshift=0.5em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=1.5em]h3.north east);}
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.0em,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h1) at (0,0) {\tiny{x (input)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h2) at ([yshift=1.0em]h1.north) {\tiny{h1 = Relu(x * w1 + b1)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h3) at ([yshift=1.0em]h2.north) {\tiny{h2 = Relu(h1 * w2 + b2)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.0em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h4) at ([yshift=1.0em]h3.north) {\tiny{h3 = HardTanh(h2)}};
{\draw [<-,very thick,red] (h1.north) -- (h2.south);}
{\draw [<-,very thick,red] (h2.north) -- (h3.south);}
{\draw [<-,very thick,red] (h3.north) -- (h4.south);}
{\draw [<-,very thick,red,rounded corners] (h2.east) -- ([xshift=0.5em]h2.east) -- ([xshift=0.5em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=1.5em]h3.north east);}
\draw [->,thick] (h1.north) -- (h2.south);
\draw [->,thick] (h2.north) -- (h3.south);
\draw [->,thick] (h3.north) -- (h4.south);
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8.0em,minimum height=1.2em,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (slayer) at ([yshift=1.5em]h4.north) {\tiny{h4 = Softmax(h3 * w4) (output)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8.0em,minimum height=1.0em,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (slayer) at ([yshift=1.0em]h4.north) {\tiny{h4 = Softmax(h3 * w4) (output)}};
\node [anchor=south] (losslabel) at (slayer.north) {\scriptsize{\textbf{Cross Entropy Loss}}};
{\draw [->,thick] (h4.north) -- (slayer.south);}
{\draw [<-,very thick,red] (h4.north) -- (slayer.south);}
\draw [->,thick] (h4.north) -- (slayer.south);
\end{tikzpicture}
\end{center}
......
......@@ -18,6 +18,10 @@
\node [anchor=west,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e2) at ([xshift=1em]e1.east) {\scriptsize{$\textbf{e}_2$}};
\node [anchor=west,inner sep=4pt] (sep5) at ([xshift=1em]e2.east) {\scriptsize{...}};
\node [anchor=west,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e3) at ([xshift=1em]sep5.east) {\scriptsize{$\textbf{e}_m$}};
\node [anchor=south] (word1) at ([yshift=-1.5em]e1.south) {\footnotesize {Once}};
\node [anchor=south] (word2) at ([yshift=-1.6em]e2.south) {\footnotesize {upon}};
\node [anchor=south] (wordseq) at ([yshift=-1.5em]sep5.south) {\footnotesize{...}};
\node [anchor=south] (word3) at ([yshift=-1.5em]e3.south) {\footnotesize {island}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t1) at ([xshift=-2em,yshift=1em]Lstm5.north) {\scriptsize{$\textbf{h}_1$}};
\node [anchor=west,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t2) at ([xshift=1em]t1.east) {\scriptsize{$\textbf{h}_2$}};
......@@ -48,31 +52,31 @@
\draw [->] ([yshift=0.1em]Lstm1.north) -- ([yshift=-0.1em]Lstm4.south);
\draw [->] ([yshift=0.1em]Lstm2.north) -- ([yshift=-0.1em]Lstm5.south);
\draw [->] ([xshift=0.1em]Lstm6.east) -- ([xshift=-0.1em]Lstm7.west);
\draw [->] ([xshift=0.1em]Lstm7.east) -- ([xshift=0.1em]sep3.west);
\draw [->] ([xshift=-0.1em]sep3.east) -- ([xshift=-0.1em]Lstm8.west);
\draw [->] ([xshift=-0.1em]Lstm7.west) -- ([xshift=0.1em]Lstm6.east);
\draw [->] ([xshift=0.1em]sep3.west) -- ([xshift=0.1em]Lstm7.east);
\draw [->] ([xshift=-0.1em]Lstm8.west) -- ([xshift=-0.1em]sep3.east);
\draw [->] ([xshift=0.1em]Lstm9.east) -- ([xshift=-0.1em]Lstm10.west);
\draw [->] ([xshift=0.1em]Lstm10.east) -- ([xshift=0.1em]sep4.west);
\draw [->] ([xshift=-0.1em]sep4.east) -- ([xshift=-0.1em]Lstm11.west);
\draw [->] ([xshift=-0.1em]Lstm10.west) -- ([xshift=0.1em]Lstm9.east);
\draw [->] ([xshift=0.1em]sep4.west) -- ([xshift=0.1em]Lstm10.east);
\draw [->] ([xshift=-0.1em]Lstm11.west) -- ([xshift=-0.1em]sep4.east);
\draw [->] ([yshift=0.1em]Lstm6.north) -- ([yshift=-0.1em]Lstm9.south);
\draw [->] ([yshift=0.1em]Lstm7.north) -- ([yshift=-0.1em]Lstm10.south);
\draw [->] ([yshift=0.1em]Lstm8.north) -- ([yshift=-0.1em]Lstm11.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Lstm0.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Lstm6.south);
\draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]Lstm1.south);
\draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]Lstm7.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Lstm2.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Lstm8.south);
\draw [->] ([yshift=0.1em]Lstm3.north) -- ([xshift=-0.05em,yshift=-0.1em]t1.south);
\draw [->] ([yshift=0.1em]Lstm9.north) -- ([yshift=-0.1em]t1.south);
\draw [->] ([yshift=0.1em]Lstm4.north) -- ([xshift=-0.05em,yshift=-0.1em]t2.south);
\draw [->] ([yshift=0.1em]Lstm10.north) -- ([yshift=-0.1em]t2.south);
\draw [->] ([yshift=0.1em]Lstm5.north) -- ([xshift=-0.05em,yshift=-0.1em]t3.south);
\draw [->] ([yshift=0.1em]Lstm11.north) -- ([yshift=-0.1em]t3.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.12em]Lstm0.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.12em]Lstm6.south);
\draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.12em]Lstm1.south);
\draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.12em]Lstm7.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.12em]Lstm2.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.12em]Lstm8.south);
\draw [->] ([yshift=0.1em]Lstm3.north) -- ([xshift=-0.05em,yshift=-0.12em]t1.south);
\draw [->] ([yshift=0.1em]Lstm9.north) -- ([yshift=-0.12em]t1.south);
\draw [->] ([yshift=0.1em]Lstm4.north) -- ([xshift=-0.05em,yshift=-0.12em]t2.south);
\draw [->] ([yshift=0.1em]Lstm10.north) -- ([yshift=-0.12em]t2.south);
\draw [->] ([yshift=0.1em]Lstm5.north) -- ([xshift=-0.05em,yshift=-0.12em]t3.south);
\draw [->] ([yshift=0.1em]Lstm11.north) -- ([yshift=-0.12em]t3.south);
\end{scope}
\end{tikzpicture}
......
......@@ -32,12 +32,12 @@
\draw [->,thick] ([yshift=0.1em]n20.north) -- (y.south);
%% weight and bias
{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (b0) at ([yshift=3em,xshift=-0.5em]b.north) {\tiny{$b=-6$}};}
{\node [anchor=center,rotate=-59,fill=white,inner sep=1pt] (w2) at ([yshift=1.2em,xshift=-1.2em]x1.north) {\tiny{$w=100$}};}
{\node [anchor=center,rotate=62,fill=white,inner sep=1pt] (w21) at ([yshift=2em,xshift=0.5em]n10.north) {\tiny{$w'=-0.7$}};}
{\node [anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=2em,xshift=-0.5em]n11.north) {\tiny{$w'=0.7$}};}
{\node [anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=4.9em,xshift=2.2em]b.north) {\tiny{$b=-4$}};}
{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north) {\tiny{$w=100$}};}
{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (b0) at ([yshift=3em,xshift=-0.5em]b.north) {\tiny{$b_1=-6$}};}
{\node [anchor=center,rotate=-59,fill=white,inner sep=1pt] (w2) at ([yshift=1.2em,xshift=-1.2em]x1.north) {\tiny{$w_1=100$}};}
{\node [anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=5.1em,xshift=2.3em]b.north) {\tiny{$b_2=-4$}};}
{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north) {\tiny{$w_2=100$}};}
{\node [anchor=center,rotate=62,fill=white,inner sep=1pt] (w21) at ([yshift=1.8em,xshift=0.2em]n10.north) {\tiny{$w'_1=-0.7$}};}
{\node [anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=1.8em,xshift=-0.2em]n11.north) {\tiny{$w'_2=0.7$}};}
%% sigmoid box
\begin{scope}
......@@ -120,18 +120,18 @@
\node [] (y) at ([yshift=3em]n20.north) {$y$};
\draw [->,thick] ([yshift=0.1em]n20.north) -- (y.south);
{
\draw [->,thick] ([yshift=0.1em]n12.north) -- ([yshift=-0.1em]n20.310);
\draw [->,thick] ([yshift=0.1em]n13.north) -- ([yshift=-0.1em]n20.330);
\draw [->,thick] ([yshift=0.1em]n12.north) -- ([yshift=-0.1em]n20.330);
\draw [->,thick] ([yshift=0.1em]n13.north) -- ([yshift=-0.1em]n20.340);
}
%% weight and bias
{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (b0) at ([yshift=3em,xshift=-0.5em]b.north) {\tiny{$b=-6$}};}
{\node [anchor=center,rotate=-59,fill=white,inner sep=1pt] (w2) at ([yshift=1.2em,xshift=-1.2em]x1.north) {\tiny{$w=100$}};}
{\node [anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=4.9em,xshift=2.2em]b.north) {\tiny{$b=-4$}};}
{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north) {\tiny{$w=100$}};}
{\node [anchor=center,rotate=62,fill=white,inner sep=1pt] (w21) at ([yshift=2em,xshift=0.5em]n10.north) {\tiny{$w'=-0.7$}};}
{\node [anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=2em,xshift=-0.5em]n11.north) {\tiny{$w'=0.7$}};}
{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (b0) at ([yshift=3em,xshift=-0.5em]b.north) {\tiny{$b_1=-6$}};}
{\node [anchor=center,rotate=-59,fill=white,inner sep=1pt] (w2) at ([yshift=1.2em,xshift=-1.2em]x1.north) {\tiny{$w_1=100$}};}
{\node [anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=5.1em,xshift=2.3em]b.north) {\tiny{$b_2=-4$}};}
{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=3em,xshift=0.5em]x1.north) {\tiny{$w_2=100$}};}
{\node [anchor=center,rotate=62,fill=white,inner sep=1pt] (w21) at ([yshift=1.8em,xshift=0.2em]n10.north) {\tiny{$w'_1=-0.7$}};}
{\node [anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=1.8em,xshift=-0.2em]n11.north) {\tiny{$w'_2=0.7$}};}
%% sigmoid box
\begin{scope}
......@@ -179,6 +179,8 @@
\end{scope}
\end{tikzpicture}
%%%------------------------------------------------------------------------------------------------------------
......
......@@ -14,7 +14,7 @@
\foreach \n in {1,...,5}{
\foreach \m in {1,...,5}{
\draw [<-] (neuron0\m.south) -- ([yshift=-1.8em]neuron0\n.south);
\draw [<-] ([yshift=-0.1em]neuron0\m.south) -- ([yshift=-1.8em]neuron0\n.south);
}
\node [anchor=north] (x\n) at ([yshift=-1.8em]neuron0\n.south) {$x_\n$};
}
......@@ -25,13 +25,13 @@
\node [rectangle,inner sep=0.2em,fill=red!20] [fit = (neuron01) (neuron05)] (layer01) {};
\end{pgfonlayer}
\node [anchor=west] (layer00label) at ([xshift=1.25em]x5.east) {\footnotesize{{输入层}}};
\node [anchor=west] (layer00label) at ([xshift=1.25em]x5.east) {\footnotesize{\red{{输入层}}}};
{
\node [anchor=west] (layer01label) at ([xshift=1em]layer01.east) {\footnotesize{第二层}};
}
{
\node [anchor=west] (layer01label2) at (layer01label.east) {\footnotesize{(隐层)}};
\node [anchor=west] (layer01label2) at (layer01label.east) {\footnotesize{\red{({隐层})}}};
}
%%% layer 2
......@@ -43,7 +43,7 @@
\foreach \n in {2,...,4}{
\foreach \m in {1,...,5}{
\draw [<-] (neuron1\n.south) -- (neuron0\m.north);
\draw [<-] ([yshift=-0.1em]neuron1\n.south) -- (neuron0\m.north);
}
}
......@@ -57,7 +57,7 @@
\node [anchor=west] (layer02label) at ([xshift=4.5em]layer02.east) {\footnotesize{第三层}};
{
\node [anchor=west] (layer02label2) at (layer02label.east) {\footnotesize{({隐层})}};
\node [anchor=west] (layer02label2) at (layer02label.east) {\footnotesize{\red{({隐层})}}};
}
}
......@@ -70,7 +70,7 @@
\foreach \n in {1,...,5}{
\foreach \m in {2,...,4}{
\draw [<-] (neuron2\n.south) -- (neuron1\m.north);
\draw [<-] ([yshift=-0.1em]neuron2\n.south) -- (neuron1\m.north);
}
\node [anchor=south] (y\n) at ([yshift=1.2em]neuron2\n.north) {$y_\n$};
......@@ -87,7 +87,7 @@
\node [anchor=west] (layer03label) at ([xshift=1em]layer03.east) {\footnotesize{第四层}};
{
\node [anchor=west] (layer03label2) at (layer03label.east) {\footnotesize{({输出层})}};
\node [anchor=west] (layer03label2) at (layer03label.east) {\footnotesize{\red{({输出层})}}};
}
}
......
......@@ -9,7 +9,7 @@
\node [anchor=west,inner sep=4pt] (sep) at ([xshift=1em]Trm3.east) {\scriptsize{...}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm4) at ([xshift=1em]sep.east) {\scriptsize{TRM}};
\node [anchor=south,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm5) at ([yshift=1em]Trm0.north) {\scriptsize{TRM}};
\node [anchor=south,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm5) at ([yshift=1.2em]Trm0.north) {\scriptsize{TRM}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm6) at ([xshift=1em]Trm5.east) {\scriptsize{TRM}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm7) at ([xshift=1em]Trm6.east) {\scriptsize{TRM}};
\node [anchor=west,draw,inner sep=4pt,fill=blue!20!white,minimum width=3em] (Trm8) at ([xshift=1em]Trm7.east) {\scriptsize{TRM}};
......@@ -18,44 +18,51 @@
\node [rectangle,rounded corners,draw=black!50,densely dashed,inner sep=0.4em] [fit = (Trm0) (Trm4) (Trm5) (Trm9)] (inputshadow) {};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e1) at ([yshift=-1em]Trm0.south) {\scriptsize{$\textbf{e}_1$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e2) at ([yshift=-1em]Trm1.south) {\scriptsize{$\textbf{e}_2$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e3) at ([yshift=-1em]Trm2.south) {\scriptsize{$\textbf{e}_3$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e4) at ([yshift=-1em]Trm3.south) {\scriptsize{$\textbf{e}_4$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e1) at ([yshift=-1.2em]Trm0.south) {\scriptsize{$\textbf{e}_1$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e2) at ([yshift=-1.2em]Trm1.south) {\scriptsize{$\textbf{e}_2$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e3) at ([yshift=-1.2em]Trm2.south) {\scriptsize{$\textbf{e}_3$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e4) at ([yshift=-1.2em]Trm3.south) {\scriptsize{$\textbf{e}_4$}};
\node [anchor=north,inner sep=4pt] (sep5) at ([yshift=-1em]sep.south) {\scriptsize{...}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e5) at ([yshift=-1em]Trm4.south) {\scriptsize{$\textbf{e}_m$}};
\node [anchor=north,draw,inner sep=4pt,fill=ugreen!20!white,minimum width=2em] (e5) at ([yshift=-1.2em]Trm4.south) {\scriptsize{$\textbf{e}_m$}};
\node [anchor=south] (word1) at ([yshift=-1.5em]e1.south) {\footnotesize {Once}};
\node [anchor=south] (word2) at ([yshift=-1.6em]e2.south) {\footnotesize {upon}};
\node [anchor=south] (word3) at ([yshift=-1.5em]e3.south) {\footnotesize {a}};
\node [anchor=south] (word4) at ([yshift=-1.5em]e4.south) {\footnotesize {time}};
\node [anchor=south] (wordseq) at ([yshift=-2.0em]sep5.south) {\footnotesize{...}};
\node [anchor=south] (word4) at ([yshift=-1.5em]e5.south) {\footnotesize {island}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t1) at ([yshift=1em]Trm5.north) {\scriptsize{$\textbf{h}_1$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t2) at ([yshift=1em]Trm6.north) {\scriptsize{$\textbf{h}_2$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t3) at ([yshift=1em]Trm7.north) {\scriptsize{$\textbf{h}_3$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t4) at ([yshift=1em]Trm8.north) {\scriptsize{$\textbf{h}_4$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t1) at ([yshift=1.2em]Trm5.north) {\scriptsize{$\textbf{h}_1$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t2) at ([yshift=1.2em]Trm6.north) {\scriptsize{$\textbf{h}_2$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t3) at ([yshift=1.2em]Trm7.north) {\scriptsize{$\textbf{h}_3$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t4) at ([yshift=1.2em]Trm8.north) {\scriptsize{$\textbf{h}_4$}};
\node [anchor=south,inner sep=4pt] (sep6) at ([yshift=1em]sep1.north) {\scriptsize{...}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t5) at ([yshift=1em]Trm9.north) {\scriptsize{$\textbf{h}_m$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t5) at ([yshift=1.2em]Trm9.north) {\scriptsize{$\textbf{h}_m$}};
\node [anchor=west,draw,inner sep=3pt,fill=blue!20!white,minimum width=1em] (Lt1) at ([yshift=1.5em]t1.west) {\tiny{TRM}};
\node [anchor=west] (Lt2) at ([xshift=-0.1em]Lt1.east) {\scriptsize{: Transformer Block}};
\node [anchor=west,draw,inner sep=3pt,fill=blue!20!white,minimum width=1em] (Lt1) at ([yshift=1.2em]t1.west) {\tiny{TRM}};
\node [anchor=west] (Lt2) at ([xshift=-0.1em]Lt1.east) {\tiny{: Transformer}};
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm0.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm1.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm2.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm3.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm4.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm1.260);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm2.260);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm3.260);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm4.260);
\draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]Trm1.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm2.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm3.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm4.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm3.260);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]Trm4.260);
\draw [->] ([yshift=0.1em]e4.north) -- ([yshift=-0.1em]Trm3.south);
\draw [->] ([yshift=0.1em]e5.north) -- ([yshift=-0.1em]Trm4.south);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm5.south);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm6.south);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm7.south);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm8.south);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm9.south);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm6.260);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm7.260);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm8.260);
\draw [->] ([yshift=0.1em]Trm0.north) -- ([yshift=-0.1em]Trm9.260);
\draw [->] ([yshift=0.1em]Trm1.north) -- ([yshift=-0.1em]Trm6.south);
\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm7.south);
\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm8.south);
\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm9.south);
\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm8.260);
\draw [->] ([yshift=0.1em]Trm2.north) -- ([yshift=-0.1em]Trm9.260);
\draw [->] ([yshift=0.1em]Trm3.north) -- ([yshift=-0.1em]Trm8.south);
\draw [->] ([yshift=0.1em]Trm4.north) -- ([yshift=-0.1em]Trm9.south);
......
......@@ -11,7 +11,7 @@
}
\foreach \n in {1,...,5}{
\foreach \m in {1,...,5}{
\draw [<-] (neuron0\m.south) -- ([yshift=-1.8em]neuron0\n.south);
\draw [<-] ([yshift=-0.1em]neuron0\m.south) -- ([yshift=-1.8em]neuron0\n.south);
}
\node [anchor=north] (x\n) at ([yshift=-1.8em]neuron0\n.south) {$x_\n$};
{
......@@ -45,7 +45,7 @@
\foreach \n in {1,...,5}{
\foreach \m in {1,...,5}{
\draw [<-] (neuron0\m.south) -- ([yshift=-1.8em]neuron0\n.south);
\draw [<-] ([yshift=-0.1em]neuron0\m.south) -- ([yshift=-1.8em]neuron0\n.south);
}
\node [anchor=north] (x\n) at ([yshift=-1.8em]neuron0\n.south) {$x_\n$};
}
......@@ -65,7 +65,7 @@
\foreach \n in {2,...,4}{
\foreach \m in {1,...,5}{
\draw [<-] (neuron1\n.south) -- (neuron0\m.north);
\draw [<-] ([yshift=-0.1em]neuron1\n.south) -- (neuron0\m.north);
}
\draw [<-,thick] ([yshift=1.1em]neuron1\n.north) -- (neuron1\n.north);
\node [anchor=south] (y\n) at ([yshift=1.25em]neuron1\n.north) {$y_\n$};
......@@ -98,7 +98,7 @@
\foreach \n in {1,...,5}{
\foreach \m in {1,...,5}{
\draw [<-] (neuron0\m.south) -- ([yshift=-1.8em]neuron0\n.south);
\draw [<-] ([yshift=-0.1em]neuron0\m.south) -- ([yshift=-1.8em]neuron0\n.south);
}
\node [anchor=north] (x\n) at ([yshift=-1.8em]neuron0\n.south) {$x_\n$};
}
......@@ -118,7 +118,7 @@
\foreach \n in {2,...,4}{
\foreach \m in {1,...,5}{
\draw [<-] (neuron1\n.south) -- (neuron0\m.north);
\draw [<-] ([yshift=-0.1em]neuron1\n.south) -- (neuron0\m.north);
}
}
......@@ -142,7 +142,7 @@
\foreach \n in {1,...,5}{
\foreach \m in {2,...,4}{
\draw [<-] (neuron2\n.south) -- (neuron1\m.north);
\draw [<-] ([yshift=-0.1em]neuron2\n.south) -- (neuron1\m.north);
}
\node [anchor=south] (y\n) at ([yshift=1.25em]neuron2\n.north) {$y_\n$};
......
......@@ -24,7 +24,7 @@
\foreach \n in {1,...,4}{
\foreach \m in {1,...,4}{
\draw [<-] (neuron1\n.south) -- (neuron0\m.north);
\draw [<-] ([yshift=-0.1em]neuron1\n.south) -- (neuron0\m.north);
}
}
......@@ -41,7 +41,7 @@
\foreach \n in {1,...,4}{
\foreach \m in {1,...,4}{
\draw [<-] (neuron2\n.south) -- (neuron1\m.north);
\draw [<-] ([yshift=-0.1em]neuron2\n.south) -- (neuron1\m.north);
}
}
......@@ -85,5 +85,6 @@
\end{scope}
\end{tikzpicture}
%%%------------------------------------------------------------------------------------------------------------
......@@ -3,26 +3,25 @@
\begin{scope}
\node [anchor=center] (node1) at (0,0) {};
\node [anchor=north,draw,thick](node2)at ([yshift=-1.5em]node1.south){\small{\ \ layer\ \ }};
\draw[->,thick](node1.south)--(node2.north);
\node [anchor=center] (node6) at (0,0) {};
\node[anchor=west](node6-1) at ([xshift=-0.2em,yshift=-0.6em]node6.east) {\footnotesize{$\rm{ReLU}$}};
\node [anchor=north](node3)at ([yshift=-1.2em]node6.south){$\bigoplus$};
\draw[->,thick]([yshift=-0.32em]node3.north)--(node6.south);
\node [anchor=north](node3)at ([yshift=-1.2em]node2.south){$\bigoplus$};
\draw[->,thick](node2.south)--([yshift=-0.3em]node3.north);
\node [anchor=north,draw,thick](node2)at ([yshift=-1.2em]node3.south){\small{weight layer}};
\draw[->,thick](node2.north)--([yshift=0.35em]node3.south);
\node[anchor=west](node2-1) at ([xshift=2.1em,yshift=1.2em]node2.east) {$\mathbf{x}$};
\node[anchor=north](node2-2) at ([xshift=0.2em,yshift=-0.3em]node2-1.south) {\footnotesize{$\rm{identity}$}};
\node [anchor=east](node4) at ([xshift=0.0em]node2.west) {$\textrm{F}(\mathbf{x})$};
\node [anchor=east](node5) at ([xshift=0.3em]node3.west) {$\textrm{F}(\mathbf{x})+\mathbf{x}$};
\node [anchor=east](node4) at ([xshift=-0.2em]node2.west) {$\textrm{F}(\mathbf{x})$};
\node [anchor=east](node5) at ([xshift=-0.3em]node3.west) {$\textrm{F}(\mathbf{x})+\mathbf{x}$};
\node [anchor=east](node1-1) at ([xshift=0.5em,yshift=-1.0em]node1.west) {$\mathbf{x}$};
\draw[->,thick]([xshift=-0.1em]node1-1.east)--([xshift=4.0em]node1-1.east)--([xshift=4.0em,yshift=-4.45em]node1-1.east)--([xshift=-0.35em]node3.east);
\node [anchor=north](node1) at ([yshift=-1.8em]node2.south) {};
\draw[->,thick]([yshift=0.0em]node1.north)--(node2.south);
\node [anchor=east](node1-1) at ([xshift=1em,yshift=0.4em]node1.east) {$\mathbf{x}$};
\draw[->,thick]([xshift=-1.3em,yshift=0.8em]node1-1.east)--([xshift=2.7em,yshift=0.8em]node1-1.east)--([xshift=2.7em,yshift=5.35em]node1-1.east)--([xshift=-0.4em]node3.east);
\node[anchor=west](node2-1) at ([xshift=2.3em]node2.east) {$\mathbf{x}$};
\node[anchor=north](node2-2) at ([xshift=0.2em,yshift=-0.5em]node2-1.south) {\footnotesize{$\rm{identity}$}};
\node [anchor=north](node6) at ([yshift=-1.2em]node3.south) {};
\draw[->,thick]([yshift=0.3em]node3.south)--([yshift=0.0em]node6.north);
\node[anchor=west](node6-1) at ([xshift=-0.2em,yshift=0.6em]node6.east) {\footnotesize{$\rm{Relu}$}};
\end{scope}
......
......@@ -13,12 +13,12 @@
\node [anchor=north,color=red] (node9) at ([xshift=0.6em,yshift=-1.7em]node1.south) {\large{$\bullet$}};
\node [anchor=north,color=red] (node10) at ([xshift=0.0em,yshift=-1.2em]node1.south) {\large{$\bullet$}};
\draw[-,ublue]([xshift=0.5em,yshift=0.46em]node4.south west)--([xshift=-0.5em,yshift=-0.4em]node5.north east);
\draw[-,ublue]([xshift=-0.45em,yshift=0.52em]node6.south east)--([xshift=0.47em,yshift=-0.43em]node5.north west);
\draw[-,ublue]([xshift=0.5em,yshift=0.46em]node6.south west)--([xshift=-0.5em,yshift=-0.4em]node7.north east);
\draw[-,ublue]([xshift=-0.45em,yshift=0.52em]node8.south east)--([xshift=0.47em,yshift=-0.43em]node7.north west);
\draw[-,ublue,line width=0.3mm]([xshift=0.5em,yshift=0.46em]node4.south west)--([xshift=-0.5em,yshift=-0.4em]node5.north east);
\draw[-,ublue,line width=0.3mm]([xshift=-0.45em,yshift=0.52em]node6.south east)--([xshift=0.47em,yshift=-0.43em]node5.north west);
\draw[-,ublue,line width=0.3mm]([xshift=0.5em,yshift=0.46em]node6.south west)--([xshift=-0.5em,yshift=-0.4em]node7.north east);
\draw[-,ublue,line width=0.3mm]([xshift=-0.45em,yshift=0.52em]node8.south east)--([xshift=0.47em,yshift=-0.43em]node7.north west);
\draw[-,ublue]([xshift=0.5em,yshift=0.46em]node8.south west)--([xshift=-0.5em,yshift=-0.4em]node9.north east);
\draw[-,ublue]([xshift=-0.78em,yshift=0.77em]node9.south east)--([xshift=0.78em,yshift=-0.68em]node10.north west);
\draw[-,ublue,line width=0.3mm]([xshift=-0.78em,yshift=0.77em]node9.south east)--([xshift=0.78em,yshift=-0.68em]node10.north west);
......@@ -39,6 +39,47 @@
\draw[-,ublue](0,-0.8)..controls(0.5,-0.8) and (0.6,-0.85)..(0.6,-0.9)..controls(0.6,-0.93)and (0.5,-0.91)..(0.3,-0.88)..controls(0.2,-0.87)and (0.1,-0.86)..(0,-0.86)..controls(-0.1,-0.86)and(-0.2,-0.87)..(-0.3,-0.88)..controls(-0.5,-0.91) and(-0.6,-0.93) ..(-0.6,-0.9)..controls(-0.6,-0.85)and (-0.5,-0.8)..(0,-0.8);
\node [anchor=north] (labela) at (0,-2.7) {\footnotesize{(a)梯度下降算法中的``锯齿''现象}};
\end{scope}
%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{scope}[yshift=-2in]
\node [anchor=center,color=red] (node1) at (0,0) {};
\node [anchor=north,color=red] (node2) at ([xshift=0.0em,yshift=-1.2em]node1.south) {\large{$\bullet$}};
\node [anchor=north,color=red] (node3) at ([xshift=3.55em,yshift=-0.981em]node1.south) {\large{$\bullet$}};
\node [anchor=north,color=red] (node4) at ([xshift=7.75em,yshift=-2.91em]node1.south) {\large{$\bullet$}};
\node [anchor=north,color=red] (node5) at ([xshift=11.38em,yshift=-1.11em]node1.south) {\large{$\bullet$}};
\draw[-,ublue,line width=0.3mm]([xshift=0.79em,yshift=-0.59em]node2.north west)--([xshift=-0.75em,yshift=0.6em]node3.south east);
\draw[-,ublue,line width=0.3mm]([xshift=0.79em,yshift=0.66em]node3.south west)--([xshift=-0.76em,yshift=-0.5em]node4.north east);
\draw[-,ublue,line width=0.3mm]([xshift=0.79em,yshift=-0.59em]node4.north west)--([xshift=-0.75em,yshift=0.6em]node5.south east);
\draw [-,ublue] (0,0) .. controls (2,0) and (3,-1.0)..(3,-1.5) .. controls (3,-2.2) and (2,-1.75)..(1.5,-1.65)..controls (1.5,-1.65) and (0.5,-1.45)..(0,-1.45)..controls (-0.5,-1.45) and (-1.5,-1.65)..(-1.5,-1.65)..controls (-2,-1.75)and (-3,-2.2).. (-3,-1.5)..controls (-3,-1.0) and (-2,0)..(0,0);
\draw [-,ublue] (0,0.5)..controls (2,0.5) and (4,-1.0).. (4,-1.7)..controls(4,-2.6)and (3,-2.3)..(2,-2.05)..controls (2,-2.05) and (1,-1.80)..(0,-1.80)..controls (-1,-1.80)and (-2,-2.05)..(-2,-2.05)..controls(-3,-2.3)and(-4,-2.6)..(-4,-1.7)..controls(-4,-1.0)and (-2,0.5)..(0,0.5);
\draw[-,ublue](0,1.0)..controls(3,1.0) and (5,-1.0)..(5,-1.9)..controls (5,-3.2)and (4,-2.7)..(3,-2.5)..controls (3,-2.5) and (2,-2.20)..(0,-2.15)..controls (-2,-2.20)and (-3,-2.5)..(-3,-2.5)..controls (-4,-2.7) and (-5,-3.2) ..(-5,-1.9)..controls (-5,-1.0) and (-3,1.0)..(0,1.0);
\draw[-,ublue] (0,-0.3)..controls (1.5,-0.3)and (2.5,-1.0)..(2.5,-1.4)..controls(2.5,-1.8)and (2,-1.55)..(1.5,-1.45) ..controls (1.5,-1.45) and (0.5,-1.25)..(0,-1.25) .. controls(-0.5,-1.25)and (-1.5,-1.45)..(-1.5,-1.45)..controls(-2,-1.55)and (-2.5,-1.8) ..(-2.5,-1.4)..controls(-2.5,-1.0) and (-1.5,-0.3)..(0,-0.3);
\draw[-,ublue](0,-0.5)..controls (1.0,-0.5) and (1.9,-0.8)..(1.9,-1.3)..controls(1.9,-1.5)and (1.5,-1.3)..(1.0,-1.2) ..controls(1.0,-1.2) and (0.5,-1.1)..(0,-1.1)..controls(-0.5,-1.1) and (-1.0,-1.2)..(-1.0,-1.2)..controls (-1.5,-1.3)and (-1.9,-1.5)..(-1.9,-1.3) ..controls(-1.9,-0.8)and (-1.0,-0.5) ..(0,-0.5);
\draw[-,ublue](0,-0.7)..controls(1.0,-0.7) and (1.4,-0.9)..(1.4,-1.1) .. controls(1.4,-1.25) and (1.2,-1.15)..(1.0,-1.1)..controls(1.0,-1.1) and (0.5,-0.95)..(0,-0.95)..controls(-0.5,-0.95)and (-1.0,-1.1) ..(-1.0,-1.1)..controls(-1.2,-1.15) and (-1.4,-1.25)..(-1.4,-1.1)..controls(-1.4,-0.9) and (-1.0,-0.7)..(0,-0.7);
\draw[-,ublue](0,-0.75)..controls(0.7,-0.75)and (1.0,-0.9)..(1.0,-1.0)..controls(1.0,-1.05) and (0.9,-1.05)..(0.7,-1.0)..controls(0.5,-0.95)and (0.3,-0.9)..(0,-0.9)..controls(-0.3,-0.9)and (-0.5,-0.95)..(-0.7,-1.0)..controls(-0.9,-1.05)and (-1.0,-1.05)..(-1.0,-1.0) ..controls(-1.0,-0.9)and (-0.7,-0.75)..(0,-0.75);
\draw[-,ublue](0,-0.8)..controls(0.5,-0.8) and (0.6,-0.85)..(0.6,-0.9)..controls(0.6,-0.93)and (0.5,-0.91)..(0.3,-0.88)..controls(0.2,-0.87)and (0.1,-0.86)..(0,-0.86)..controls(-0.1,-0.86)and(-0.2,-0.87)..(-0.3,-0.88)..controls(-0.5,-0.91) and(-0.6,-0.93) ..(-0.6,-0.9)..controls(-0.6,-0.85)and (-0.5,-0.8)..(0,-0.8);
\node [anchor=north] (labelb) at (0,-3) {\footnotesize{(b)Momentum梯度下降算法更加``平滑''地更新}};
\end{scope}
\end{tikzpicture}
%%%------------------------------------------------------------------------------------------------------------
......
......@@ -33,6 +33,14 @@
\node [] (y) at ([yshift=2.5em]n20.north) {$y$};
\draw [->,thick] ([yshift=0.1em]n20.north) -- (y.south);
%% weight and bias
{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (b0) at ([yshift=2em,xshift=-0.5em]b.north) {\tiny{$b_1$}};}
{\node [anchor=center,rotate=-59,fill=white,inner sep=1pt] (w2) at ([yshift=1em,xshift=-1.0em]x1.north) {\tiny{$w_1$}};}
{\node [anchor=center,rotate=62,fill=white,inner sep=1pt] (w21) at ([yshift=1.2em,xshift=0em]n10.north) {\tiny{$w'_1$}};}
{\node [anchor=center,rotate=-62,fill=white,inner sep=1pt] (w22) at ([yshift=1.2em,xshift=-0em]n11.north) {\tiny{$w'_2$}};}
{\node [anchor=center,rotate=59,fill=white,inner sep=1pt] (b1) at ([yshift=3.4em,xshift=1.5em]b.north) {\tiny{$b_2$}};}
{\node [anchor=center,rotate=90,fill=white,inner sep=1pt] (w1) at ([yshift=2em,xshift=0.5em]x1.north) {\tiny{$w_2$}};}
%% sigmoid box
\begin{scope}
{
......
......@@ -2,19 +2,19 @@
\begin{tikzpicture}
\node [anchor=west,minimum width=1.5em,minimum height=1.5em] (part1) at (0,0) {\footnotesize{$y$}};
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part1-2) at ([xshift=-1.6em,yshift=-0.3em]part1.south) {\scriptsize {$\rm {shape(1)}$}};
\node [anchor=north,draw,minimum width=4.0em,minimum height=1.5em,fill=orange!20] (part2) at ([yshift=-1.5em]part1.south) {\footnotesize {$\rm{sigmoid}$}};
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part1-2) at ([xshift=-1.2em,yshift=-0.3em]part1.south) {\scriptsize {$1\times1$}};
\node [anchor=north,draw,minimum width=4.0em,minimum height=1.5em,fill=orange!20] (part2) at ([yshift=-1.5em]part1.south) {\footnotesize {$\rm{Sigmoid}$}};
\draw [-,thick](part1.south)--(part2.north);
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part2-2) at ([xshift=-1.6em,yshift=-0.3em]part2.south) {\scriptsize {$\rm{shape(1)}$}};
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part2-2) at ([xshift=-1.2em,yshift=-0.3em]part2.south) {\scriptsize {$1\times1$}};
\node [anchor=north,draw,minimum width=4.0em,minimum height=1.5em,fill=green!20] (part3) at ([yshift=-1.5em]part2.south) {\footnotesize {$\rm{ADD}$}};
\draw [-,thick](part2.south)--(part3.north);
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part3-2) at ([xshift=-1.6em,yshift=-0.3em]part3.south) {\scriptsize {$\rm {shape(1)}$}};
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part3-2) at ([xshift=-1.2em,yshift=-0.3em]part3.south) {\scriptsize {$1\times1$}};
\node [anchor=north,draw,minimum width=4.0em,minimum height=1.5em,fill=blue!20] (part4) at ([yshift=-1.5em]part3.south) {\footnotesize {$\rm{MUL}$}};
\draw [-,thick](part3.south)--(part4.north);
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part4-2) at ([xshift=-1.6em,yshift=-0.2em]part4.south) {\scriptsize {$\rm {shape(2)}$}};
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part4-2) at ([xshift=-1.2em,yshift=-0.2em]part4.south) {\scriptsize {$1\times 2$}};
\node [anchor=north,minimum width=4.0em,minimum height=1.5em] (part5) at ([yshift=-1.4em]part4.south) {\footnotesize {$\mathbf a$}};
\draw [-,thick](part4.south)--([yshift=-0.1em]part5.north);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......@@ -22,22 +22,22 @@
\node [anchor=west,minimum width=2.0em,minimum height=1.5em,draw,fill=orange!40] (part5-4) at ([xshift=2.0em,yshift=0.0em]part5-3.east) {\footnotesize {$\mathbf b^2$}};
\draw[-,thick](part4.south)--(part5-3.north);
\draw[-,thick](part3.south)--(part5-4.north);
\node [anchor=south,minimum width=1.5em,minimum height=1.5em] (part5-3-1) at ([xshift=1.3em,yshift=-0.45em]part5-3.north) {\scriptsize {$\rm{shape(2)}$}};
\node [anchor=south,minimum width=1.5em,minimum height=1.5em] (part5-4-1) at ([xshift=1.3em,yshift=-0.45em]part5-4.north) {\scriptsize {$\rm{shape(1)}$}};
\node [anchor=south,minimum width=1.5em,minimum height=1.5em] (part5-3-1) at ([xshift=1.1em,yshift=-0.45em]part5-3.north) {\scriptsize {$1\times 2$}};
\node [anchor=south,minimum width=1.5em,minimum height=1.5em] (part5-4-1) at ([xshift=1.1em,yshift=-0.45em]part5-4.north) {\scriptsize {$1\times1$}};
%%%%%%%%%%%%%%%%%%%%%%%%%%
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part5-2) at ([xshift=-1.6em,yshift=-0.2em]part5.south) {\scriptsize {$\rm{shape(2)}$}};
\node [anchor=north,draw,minimum width=4.0em,minimum height=1.5em,fill=yellow!20] (part6) at ([yshift=-1.4em]part5.south) {\footnotesize {$\rm{tanh}$}};
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part5-2) at ([xshift=-1.2em,yshift=-0.2em]part5.south) {\scriptsize {$1\times 2$}};
\node [anchor=north,draw,minimum width=4.0em,minimum height=1.5em,fill=yellow!20] (part6) at ([yshift=-1.4em]part5.south) {\footnotesize {$\rm{Tanh}$}};
\draw [-,thick]([yshift=0.1em]part5.south)--(part6.north);
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part6-2) at ([xshift=-1.6em,yshift=-0.3em]part6.south) {\scriptsize {$\rm{shape(2)}$}};
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part6-2) at ([xshift=-1.2em,yshift=-0.3em]part6.south) {\scriptsize {$1\times 2$}};
\node [anchor=north,draw,minimum width=4.0em,minimum height=1.5em,fill=green!20] (part7) at ([yshift=-1.5em]part6.south) {\footnotesize {$\rm{ADD}$}};
\draw [-,thick](part6.south)--(part7.north);
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part7-2) at ([xshift=-1.6em,yshift=-0.3em]part7.south) {\scriptsize {$\rm{shape(2)}$}};
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part7-2) at ([xshift=-1.2em,yshift=-0.3em]part7.south) {\scriptsize {$1\times 2$}};
\node [anchor=north,draw,minimum width=4.0em,minimum height=1.5em,fill=blue!20] (part8) at ([yshift=-1.5em]part7.south) {\footnotesize {$\rm{MUL}$}};
\draw [-,thick](part7.south)--(part8.north);
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part8-2) at ([xshift=-1.6em,yshift=-0.2em]part8.south) {\scriptsize{$\rm{shape(2)}$}};
\node [anchor=north,minimum width=1.5em,minimum height=1.5em] (part8-2) at ([xshift=-1.2em,yshift=-0.2em]part8.south) {\scriptsize{$1\times 2$}};
\node [anchor=north,minimum width=4.0em,minimum height=1.5em] (part9) at ([yshift=-1.4em]part8.south) {\footnotesize {$\mathbf x$}};
\draw [-,thick](part8.south)--([yshift=-0.1em]part9.north);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......@@ -45,8 +45,8 @@
\node [anchor=west,minimum width=2.0em,minimum height=1.5em,draw,fill=orange!40] (part9-4) at ([xshift=2.0em,yshift=0.0em]part9-3.east) {\footnotesize {$\mathbf b^1$}};
\draw[-,thick](part8.south)--(part9-3.north);
\draw[-,thick](part7.south)--(part9-4.north);
\node [anchor=south,minimum width=1.5em,minimum height=1.5em] (part9-3-1) at ([xshift=1.5em,yshift=-0.45em]part9-3.north) {\scriptsize {$\rm{shape(3,2)}$}};
\node [anchor=south,minimum width=1.5em,minimum height=1.5em] (part9-4-1) at ([xshift=1.3em,yshift=-0.45em]part9-4.north) {\scriptsize {$\rm{shape(2)}$}};
\node [anchor=south,minimum width=1.5em,minimum height=1.5em] (part9-3-1) at ([xshift=1.1em,yshift=-0.45em]part9-3.north) {\scriptsize {$3\times 2$}};
\node [anchor=south,minimum width=1.5em,minimum height=1.5em] (part9-4-1) at ([xshift=1.1em,yshift=-0.45em]part9-4.north) {\scriptsize {$1\times 2$}};
%%%%%%%%%%%%%%%%%%%%%%%%%%
\end{tikzpicture}
......
......@@ -2706,6 +2706,87 @@ year ={2008},
//biburl = {https://dblp.org/rec/conf/wmt/ZollmannV06.bib},
//bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{brown1992class,
title={Class-based n-gram models of natural language},
author={Brown and
Peter F and
Desouza and
Peter V and
Mercer amd
Robert L
and Pietra and
Vincent J Della
and Lai and
Jenifer C},
journal={Computational linguistics},
volume={18},
number={4},
pages={467--479},
year={1992},
publisher={MIT Press}
}
@article{bengio2003a,
title={A neural probabilistic language model},
author={Bengio and
Yoshua and
Ducharme and
Rejean and
Vincent and
Pascal and
Janvin and
Christian},
journal={Journal of Machine Learning Research},
volume={3},
number={6},
pages={1137--1155},
year={2003}
}
@inproceedings{mikolov2012context,
title={Context dependent recurrent neural network language model},
author={Mikolov and
Tomas and
Zweig and
Geoffrey},
booktitle={2012 IEEE Spoken Language Technology Workshop (SLT)},
pages={234--239},
year={2012},
organization={IEEE}
}
@article{zaremba2014recurrent,
title={Recurrent Neural Network Regularization},
author={Zaremba and
Wojciech and
Sutskever and
Ilya and
Vinyals and
Oriol},
journal={arXiv: Neural and Evolutionary Computing},
year={2014}
}
@article{zilly2016recurrent,
title={Recurrent Highway Networks},
author={Zilly and
Julian and
Srivastava and
Rupesh Kumar and
Koutnik and
Jan and
Schmidhuber and
Jurgen},
journal={arXiv: Learning},
year={2016}
}
@article{merity2017regularizing,
title={Regularizing and optimizing LSTM language models},
author={Merity and
tephen and
Keskar and
Nitish Shirish and
Socher and
Richard},
journal={arXiv: Computation and Language},
year={2017}
}
%%%%% chapter 5------------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论