Commit 28b2ae5b by xiaotong

updates of section 5

parent bd69d653
......@@ -7,20 +7,20 @@
\node [anchor=east] (prev) at ([xshift=-2em]h.west) {...};
\node [anchor=west] (next) at ([xshift=2em]h2.east) {...};
\draw [->,thick] ([xshift=0.1em]prev.east) -- ([xshift=-0.1em]h.west);
\draw [->,thick] ([xshift=0.1em]h.east) -- ([xshift=-0.1em]s.west) node [pos=0.5,below] {\tiny{$\textbf{s}^k = \textbf{h}^{k-1}\textbf{w}^k$}};
\draw [->,thick] ([xshift=0.1em]s.east) -- ([xshift=-0.1em]h2.west) node [pos=0.5,below] {\tiny{$\textbf{h}^k = f^k(\textbf{s}^{k})$}};
\draw [->,thick] ([xshift=0.1em]h.east) -- ([xshift=-0.1em]s.west) node [pos=0.5,below] {\scriptsize{$\textbf{s}^k = \textbf{h}^{k-1}\textbf{w}^k$}};
\draw [->,thick] ([xshift=0.1em]s.east) -- ([xshift=-0.1em]h2.west) node [pos=0.5,below] {\scriptsize{$\textbf{h}^k = f^k(\textbf{s}^{k})$}};
\draw [->,thick] ([xshift=0.1em]h2.east) -- ([xshift=-0.1em]next.west);
{
\draw [<-,thick,red] ([xshift=0.1em,yshift=0.4em]h2.east) -- ([xshift=-0.1em,yshift=0.4em]next.west) node [pos=0.8,above] {\tiny{反向传播}};
\draw [<-,thick,red] ([xshift=0.1em,yshift=0.4em]h2.east) -- ([xshift=-0.1em,yshift=0.4em]next.west) node [pos=0.8,above] {\scriptsize{反向传播}};
}
{
\draw [<-,thick,red] ([xshift=0.1em,yshift=0.4em]s.east) -- ([xshift=-0.1em,yshift=0.4em]h2.west) node [pos=0.5,above] {\tiny{反向传播}};
\draw [<-,thick,red] ([xshift=0.1em,yshift=0.4em]s.east) -- ([xshift=-0.1em,yshift=0.4em]h2.west) node [pos=0.5,above] {\scriptsize{反向传播}};
}
{
\draw [<-,thick,red] ([xshift=0.1em,yshift=0.4em]h.east) -- ([xshift=-0.1em,yshift=0.4em]s.west) node [pos=0.5,above] {\tiny{反向传播}};
\draw [<-,thick,red] ([xshift=0.1em,yshift=0.4em]h.east) -- ([xshift=-0.1em,yshift=0.4em]s.west) node [pos=0.5,above] {\scriptsize{反向传播}};
}
{
......@@ -33,7 +33,7 @@
}
{
\node [anchor=south] (slabel) at (s.north) {$\pi^k = \frac{\partial L}{\partial \textbf{s}^{k}}$};
\node [anchor=south] (slabel) at (s.north) {$\frac{\partial L}{\partial \textbf{s}^{k}}$};
}
{
......
......@@ -7,8 +7,8 @@
\draw [->] (s.east) -- (h2.west);
\draw [->] (h2.east) -- (l.west);
\draw [->,very thick,red] ([yshift=1em,xshift=-0.1em]l.north) -- ([yshift=1em,xshift=0.1em]h2.north) node [pos=0.5,above] {\tiny{求梯度{$\frac{\partial L}{\partial \textbf{h}^K} = ?$}}};
\draw [->,very thick,red] ([yshift=1em,xshift=-0.1em]h2.north) -- ([yshift=1em,xshift=0.1em]s.north) node [pos=0.5,above] {\tiny{求梯度{$\frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K} = ?$}}};
\draw [->,very thick,red] ([yshift=1em,xshift=-0.1em]l.north) -- ([yshift=1em,xshift=0.1em]h2.north) node [pos=0.5,above] {\scriptsize{求梯度{$\frac{\partial L}{\partial \textbf{h}^K} = ?$}}};
\draw [->,very thick,red] ([yshift=1em,xshift=-0.1em]h2.north) -- ([yshift=1em,xshift=0.1em]s.north) node [pos=0.5,above] {\scriptsize{求梯度{$\frac{\partial f^K(\textbf{s}^K)}{\partial \textbf{s}^K} = ?$}}};
\draw [-,very thick,red] ([yshift=0.5em]l.north) -- ([yshift=1.5em]l.north);
\draw [-,very thick,red] ([yshift=0.5em]h2.north) -- ([yshift=1.5em]h2.north);
\draw [-,very thick,red] ([yshift=0.5em]s.north) -- ([yshift=1.5em]s.north);
......
......@@ -2,19 +2,19 @@
\begin{tikzpicture}
\begin{scope}
\node [anchor=center,minimum height=1.7em,fill=yellow!20,draw] (h) at (0,0) {$\textbf{h}^{K-1}$};
\node [anchor=west,minimum height=1.7em,fill=blue!20,draw] (s) at ([xshift=5.5em]h.east) {$\textbf{s}^{K}$};
\node [anchor=west,minimum height=1.7em,fill=blue!20,draw] (s) at ([xshift=6.0em]h.east) {$\textbf{s}^{K}$};
\draw [->] (h.east) -- (s.west);
\node [anchor=south west,inner sep=2pt] (step100) at ([xshift=0.5em,yshift=-0.8em]h.north east) {\tiny{$\textbf{s}^K = \textbf{h}^{K-1} \textbf{w}^K$}};
\node [anchor=south west,inner sep=2pt] (step100) at ([xshift=0.5em,yshift=-0.8em]h.north east) {\scriptsize{$\textbf{s}^K = \textbf{h}^{K-1} \textbf{w}^K$}};
\node [anchor=south west] (slabel) at ([yshift=1em,xshift=0.3em]s.north) {\scriptsize{\red{\textbf{{已经得到:$\pi^K = \frac{\partial L}{\partial \textbf{s}^K}$}}}}};
\draw [->,red] ([yshift=0.3em]slabel.south) .. controls +(south:0.5) and +(north:0.5) .. ([xshift=0.5em]s.north);
{
\draw [->,very thick,red] ([yshift=1em,xshift=-0.1em]s.north) -- ([yshift=1em,xshift=0.1em]h.north) node [pos=0.5,above] {\tiny{{$\frac{\partial L}{\partial \textbf{w}^K} = ?$, $\frac{\partial L}{\partial \textbf{h}^{K-1}} = ?$}}};
\draw [->,very thick,red] ([yshift=1em,xshift=-0.1em]s.north) -- ([yshift=1em,xshift=0.1em]h.north) node [pos=0.5,above] {\scriptsize{{$\frac{\partial L}{\partial \textbf{w}^K} = ?$, $\frac{\partial L}{\partial \textbf{h}^{K-1}} = ?$}}};
\draw [-,very thick,red] ([yshift=0.5em]h.north) -- ([yshift=1.5em]h.north);
\draw [-,very thick,red] ([yshift=0.5em]s.north) -- ([yshift=1.5em]s.north);
}
......
......@@ -33,7 +33,7 @@
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t5) at ([yshift=1em]Trm9.north) {\scriptsize{$\textbf{h}_m$}};
\node [anchor=west,draw,inner sep=3pt,fill=blue!20!white,minimum width=1em] (Lt1) at ([yshift=1.5em]t1.west) {\tiny{TRM}};
\node [anchor=west] (Lt2) at ([xshift=-0.1em]Lt1.east) {\tiny{: Transformer}};
\node [anchor=west] (Lt2) at ([xshift=-0.1em]Lt1.east) {\scriptsize{: Transformer Block}};
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm0.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm1.south);
......
%%%------------------------------------------------------------------------------------------------------------
\begin{tcolorbox}
[bicolor,sidebyside,width=12cm,righthand width=4cm,size=title,frame engine=empty,
[bicolor,sidebyside,width=13cm,righthand width=4cm,size=title,frame engine=empty,
colback=blue!10!white,colbacklower=black!5!white]
{\scriptsize
\begin{tabbing}
......@@ -14,16 +14,16 @@
\texttt{} \\
\texttt{CrossEntropyBackward(dh[4], y, gold);} \\
\texttt{SoftmaxBackward(y, s[4], dh[4], ds[4]);}\\
\texttt{MMul(h[3], {\tiny X\_TRANS}, ds[4], {\tiny X\_NOTRANS}, dw[4]);}\\
\texttt{MMul(ds[4], {\tiny X\_NOTRANS}, w[4], {\tiny X\_RANS}, dh[3]);}\\
\texttt{MMul(h[3], {\scriptsize X\_TRANS}, ds[4], {\scriptsize X\_NOTRANS}, dw[4]);}\\
\texttt{MMul(ds[4], {\scriptsize X\_NOTRANS}, w[4], {\scriptsize X\_RANS}, dh[3]);}\\
\texttt{} \\
\texttt{dh[2] = dh[3];}\\
\texttt{ReluBackward(h[2], s[2], dh[2], ds[2]);}\\
\texttt{MMul(h[1], {\tiny X\_TRANS}, ds[2], {\tiny X\_NOTRANS}, dw[2]);}\\
\texttt{MMul(ds[2], {\tiny X\_NOTRANS}, w[2], {\tiny X\_TRANS}, dh[2]);}\\
\texttt{MMul(h[1], {\scriptsize X\_TRANS}, ds[2], {\scriptsize X\_NOTRANS}, dw[2]);}\\
\texttt{MMul(ds[2], {\scriptsize X\_NOTRANS}, w[2], {\scriptsize X\_TRANS}, dh[2]);}\\
......@@ -46,10 +46,10 @@
\begin{tikzpicture}
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h1) at (0,0) {\tiny{x (input)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h2) at ([yshift=1.5em]h1.north) {\tiny{h1 = Relu(x * w1)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h3) at ([yshift=1.5em]h2.north) {\tiny{h2 = Relu(h1 * w2)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h4) at ([yshift=1.5em]h3.north) {\tiny{h3 = h2 + h1}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h1) at (0,0) {\scriptsize{x (input)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h2) at ([yshift=1.5em]h1.north) {\scriptsize{h1 = Relu(x * w1)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h3) at ([yshift=1.5em]h2.north) {\scriptsize{h2 = Relu(h1 * w2)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h4) at ([yshift=1.5em]h3.north) {\scriptsize{h3 = h2 + h1}};
{\draw [->,thick] (h1.north) -- (h2.south);}
{\draw [->,thick] (h2.north) -- (h3.south);}
......
%%%------------------------------------------------------------------------------------------------------------
\begin{tcolorbox}
[bicolor,sidebyside,width=12cm,righthand width=4cm,size=title,frame engine=empty,
[bicolor,sidebyside,width=13cm,righthand width=4cm,size=title,frame engine=empty,
colback=blue!10!white,colbacklower=black!5!white]
{\scriptsize
\begin{tabbing}
......@@ -14,16 +14,16 @@
\texttt{} \\
\texttt{CrossEntropyBackward(dh[4], y, gold);} \\
\texttt{SoftmaxBackward(y, s[4], dh[4], ds[4]);}\\
\texttt{MMul(h[3], {\tiny X\_TRANS}, ds[4], {\tiny X\_NOTRANS}, dw[4]);}\\
\texttt{MMul(ds[4], {\tiny X\_NOTRANS}, w[4], {\tiny X\_RANS}, dh[3]);}\\
\texttt{MMul(h[3], {\scriptsize X\_TRANS}, ds[4], {\scriptsize X\_NOTRANS}, dw[4]);}\\
\texttt{MMul(ds[4], {\scriptsize X\_NOTRANS}, w[4], {\scriptsize X\_RANS}, dh[3]);}\\
\texttt{} \\
\texttt{dh[2] = dh[3];}\\
\texttt{ReluBackward(h[2], s[2], dh[2], ds[2]);}\\
\texttt{MMul(h[1], {\tiny X\_TRANS}, ds[2], {\tiny X\_NOTRANS}, dw[2]);}\\
\texttt{MMul(ds[2], {\tiny X\_NOTRANS}, w[2], {\tiny X\_TRANS}, dh[2]);}\\
\texttt{MMul(h[1], {\scriptsize X\_TRANS}, ds[2], {\scriptsize X\_NOTRANS}, dw[2]);}\\
\texttt{MMul(ds[2], {\scriptsize X\_NOTRANS}, w[2], {\scriptsize X\_TRANS}, dh[2]);}\\
......@@ -46,10 +46,10 @@
\begin{tikzpicture}
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h1) at (0,0) {\tiny{x (input)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h2) at ([yshift=1.5em]h1.north) {\tiny{h1 = Relu(x * w1)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h3) at ([yshift=1.5em]h2.north) {\tiny{h2 = Relu(h1 * w2)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h4) at ([yshift=1.5em]h3.north) {\tiny{h3 = h2 + h1}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h1) at (0,0) {\scriptsize{x (input)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h2) at ([yshift=1.5em]h1.north) {\scriptsize{h1 = Relu(x * w1)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h3) at ([yshift=1.5em]h2.north) {\scriptsize{h2 = Relu(h1 * w2)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h4) at ([yshift=1.5em]h3.north) {\scriptsize{h3 = h2 + h1}};
{\draw [->,thick] (h1.north) -- (h2.south);}
{\draw [->,thick] (h2.north) -- (h3.south);}
......
......@@ -7,8 +7,8 @@
\node [anchor=east] (prev) at ([xshift=-2em]h.west) {...};
\node [anchor=west] (next) at ([xshift=2em]h2.east) {...};
\draw [->,thick] ([xshift=0.1em]prev.east) -- ([xshift=-0.1em]h.west);
\draw [->,thick] ([xshift=0.1em]h.east) -- ([xshift=-0.1em]s.west) node [pos=0.5,below] {\tiny{$\textbf{s}^k = \textbf{h}^{k-1}\textbf{w}^k$}};
\draw [->,thick] ([xshift=0.1em]s.east) -- ([xshift=-0.1em]h2.west) node [pos=0.5,below] {\tiny{$\textbf{h}^k = f^k(\textbf{s}^{k})$}};
\draw [->,thick] ([xshift=0.1em]h.east) -- ([xshift=-0.1em]s.west) node [pos=0.5,below] {\scriptsize{$\textbf{s}^k = \textbf{h}^{k-1}\textbf{w}^k$}};
\draw [->,thick] ([xshift=0.1em]s.east) -- ([xshift=-0.1em]h2.west) node [pos=0.5,below] {\scriptsize{$\textbf{h}^k = f^k(\textbf{s}^{k})$}};
\draw [->,thick] ([xshift=0.1em]h2.east) -- ([xshift=-0.1em]next.west);
......
......@@ -33,7 +33,7 @@
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2em] (t5) at ([yshift=1em]Trm9.north) {\scriptsize{$\textbf{h}_m$}};
\node [anchor=west,draw,inner sep=3pt,fill=blue!20!white,minimum width=1em] (Lt1) at ([yshift=1.5em]t1.west) {\tiny{TRM}};
\node [anchor=west] (Lt2) at ([xshift=-0.1em]Lt1.east) {\tiny{: Transformer}};
\node [anchor=west] (Lt2) at ([xshift=-0.1em]Lt1.east) {\scriptsize{: Transformer Block}};
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm0.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]Trm1.south);
......
......@@ -112,13 +112,13 @@
% CHAPTERS
%----------------------------------------------------------------------------------------
%\include{Chapter1/chapter1}
%\include{Chapter2/chapter2}
%\include{Chapter3/chapter3}
%\include{Chapter4/chapter4}
\include{Chapter1/chapter1}
\include{Chapter2/chapter2}
\include{Chapter3/chapter3}
\include{Chapter4/chapter4}
\include{Chapter5/chapter5}
%\include{Chapter6/chapter6}
%\include{ChapterAppend/chapterappend}
\include{Chapter6/chapter6}
\include{ChapterAppend/chapterappend}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论