Commit 6f47bff6 by 曹润柘

合并分支 'master' 到 'caorunzhe'

Master

查看合并请求 !191
parents d93b88a7 bc6ad7b2
......@@ -742,6 +742,20 @@ c_{\textrm{KN}}(\cdot) = \left\{\begin{array}{ll}
\parinterval Kneser-Ney平滑是很多语言模型工具的基础\upcite{heafield2011kenlm,stolcke2002srilm}。还有很多以此为基础衍生出来的算法,感兴趣的读者可以通过参考文献自行了解\upcite{parsing2009speech,ney1994structuring,chen1999empirical}
%----------------------------------------------------------------------------------------
% NEW SSUB-SECTION
%----------------------------------------------------------------------------------------
\subsection{语言模型的评价}
\parinterval 在使用语言模型时,往往需要知道模型的质量。{\small\sffamily\bfseries{困惑度}}\index{困惑度}(Perplexity\index{Perplexity},PPL)是一种衡量语言模型的好坏的指标。对于一个真实的词序列$ w_1\dots w_m $,困惑度被定义为
\begin{eqnarray}
{\rm{PPL}}&=&{\rm P}{(w_1\dots w_m)}^{- \frac{1}{m}}
\label{eq:5-65}
\end{eqnarray}
\parinterval 本质上,PPL反映了语言模型对序列可能性预测能力的一种评估。如果$ w_1\dots w_m $\\是真实的自然语言,``完美''的模型会得到$ {\rm P} (w_1\dots w_m)=1 $,它对应了最低的困惑度$ {\rm{PPL}}=1$,这说明模型可以完美地对词序列出现的可能性进行预测。当然,真实的语言模型是无法达到$ {\rm{PPL}}=1$的,比如,在著名的Penn Treebank(PTB)数据上最好的语言模型的PPL值也只能到达35左右。可见自然语言处理任务的困难程度。
%----------------------------------------------------------------------------------------
% NEW SECTION
%----------------------------------------------------------------------------------------
......
......@@ -10,11 +10,11 @@
\node [anchor=south west,inner sep=2pt] (step100) at ([xshift=0.5em,yshift=-0.8em]h.north east) {\scriptsize{$\textbf{s}^K = \textbf{h}^{K-1} \textbf{w}^K$}};
\node [anchor=south west] (slabel) at ([yshift=1em,xshift=0.3em]s.north) {\scriptsize{\red{\textbf{{已经得到:$\pi^K = \frac{\partial L}{\partial \textbf{s}^K}$}}}}};
\draw [->,red] ([yshift=0.3em]slabel.south) .. controls +(south:0.5) and +(north:0.5) .. ([xshift=0.5em]s.north);
\node [anchor=south west] (slabel) at ([yshift=1em,xshift=0.3em]s.north) {\scriptsize{\textbf{{已经得到:$\pi^K = \frac{\partial L}{\partial \textbf{s}^K}$}}}};
\draw [->] ([yshift=0.3em]slabel.south) .. controls +(south:0.5) and +(north:0.5) .. ([xshift=0.5em,yshift=0.1em]s.north);
{
\draw [->,very thick,red] ([yshift=1em,xshift=-0.1em]s.north) -- ([yshift=1em,xshift=0.1em]h.north) node [pos=0.5,above] {\scriptsize{{$\frac{\partial L}{\partial \textbf{w}^K} = ?$, $\frac{\partial L}{\partial \textbf{h}^{K-1}} = ?$}}};
\draw [->,very thick,red] ([yshift=1em,xshift=-0.1em]s.north) -- ([yshift=1.0em,xshift=0.1em]h.north) node [pos=0.5,above] {\scriptsize{{$\frac{\partial L}{\partial \textbf{w}^K} = ?$, $\frac{\partial L}{\partial \textbf{h}^{K-1}} = ?$}}};
\draw [-,very thick,red] ([yshift=0.5em]h.north) -- ([yshift=1.5em]h.north);
\draw [-,very thick,red] ([yshift=0.5em]s.north) -- ([yshift=1.5em]s.north);
}
......
......@@ -51,15 +51,15 @@
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h3) at ([yshift=1.5em]h2.north) {\scriptsize{h2 = Relu(h1 * w2)}};
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8em,minimum height=1.2em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (h4) at ([yshift=1.5em]h3.north) {\scriptsize{h3 = h2 + h1}};
{\draw [<-,very thick,red] (h1.north) -- (h2.south);}
{\draw [<-,very thick,red] (h2.north) -- (h3.south);}
{\draw [<-,very thick,red] (h3.north) -- (h4.south);}
{\draw [<-,very thick,red,rounded corners] (h2.east) -- ([xshift=0.5em]h2.east) -- ([xshift=0.5em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=1.5em]h3.north east);}
{\draw [->,very thick] (h1.north) -- (h2.south);}
{\draw [->,very thick] (h2.north) -- (h3.south);}
{\draw [->,very thick] (h3.north) -- (h4.south);}
{\draw [->,very thick,rounded corners] (h2.east) -- ([xshift=0.5em]h2.east) -- ([xshift=0.5em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=0.5em]h3.north east) -- ([xshift=-2em,yshift=1.5em]h3.north east);}
\node [anchor=south,draw,rounded corners,inner sep=2pt,minimum width=8.0em,minimum height=1.2em,fill=red!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}] (slayer) at ([yshift=1.5em]h4.north) {\tiny{h4 = Softmax(h3 * w4) (output)}};
\node [anchor=south] (losslabel) at (slayer.north) {\scriptsize{\textbf{Cross Entropy Loss}}};
{\draw [<-,very thick,red] (h4.north) -- (slayer.south);}
{\draw [->,very thick] (h4.north) -- (slayer.south);}
\end{tikzpicture}
\end{center}
......
......@@ -42,10 +42,10 @@
%% sigmoid box
\begin{scope}
{
\node [anchor=west] (flabel) at ([xshift=0.5in]y.east) {\scriptsize{sigmoid:}};
\node [anchor=north east] (slabel) at ([xshift=0]flabel.south east) {\scriptsize{sum:}};
\node [anchor=west,inner sep=2pt] (flabel2) at (flabel.east) {\scriptsize{$f(s)=1/(1+e^{-s})$}};
\node [anchor=west,inner sep=2pt] (flabel3) at (slabel.east) {\scriptsize{$s=x_1 \cdot w + b$}};
\node [anchor=west] (flabel) at ([xshift=0.5in]y.east) {\scriptsize{Sigmoid:}};
\node [anchor=north east] (slabel) at ([xshift=0]flabel.south east) {\scriptsize{Sum:}};
\node [anchor=west,inner sep=2pt] (flabel2) at (flabel.east) {\scriptsize{$f(s_2)=1/(1+e^{-s_2})$}};
\node [anchor=west,inner sep=2pt] (flabel3) at (slabel.east) {\scriptsize{$s_2=x_1 \cdot w_2 + b$}};
\draw [->,thick,dotted] ([yshift=-0.3em,xshift=-0.1em]n11.60) .. controls +(east:1) and +(west:2) .. ([xshift=-0.2em]flabel.west) ;
\begin{pgfonlayer}{background}
......@@ -136,10 +136,10 @@
%% sigmoid box
\begin{scope}
{
\node [anchor=west] (flabel) at ([xshift=0.8in]y.east) {\scriptsize{sigmoid:}};
\node [anchor=north east] (slabel) at ([xshift=0]flabel.south east) {\scriptsize{sum:}};
\node [anchor=west,inner sep=2pt] (flabel2) at (flabel.east) {\scriptsize{$f(s)=1/(1+e^{-s})$}};
\node [anchor=west,inner sep=2pt] (flabel3) at (slabel.east) {\scriptsize{$s=x_1 \cdot w + b$}};
\node [anchor=west] (flabel) at ([xshift=0.8in]y.east) {\scriptsize{Sigmoid:}};
\node [anchor=north east] (slabel) at ([xshift=0]flabel.south east) {\scriptsize{Sum:}};
\node [anchor=west,inner sep=2pt] (flabel2) at (flabel.east) {\scriptsize{$f(s_2)=1/(1+e^{-s_2})$}};
\node [anchor=west,inner sep=2pt] (flabel3) at (slabel.east) {\scriptsize{$s_2=x_1 \cdot w_2 + b$}};
\draw [->,thick,dotted] ([yshift=-0.3em,xshift=-0.1em]n11.60) .. controls +(east:1) and +(west:2) .. ([xshift=-0.2em]flabel.west) ;
\begin{pgfonlayer}{background}
{
......
......@@ -26,12 +26,12 @@
\end{pgfonlayer}
\node [anchor=west] (layer00label) at ([xshift=1.3em]x5.east) {\footnotesize{第0层}};
\node [anchor=west] (layer00label2) at (layer00label.east) {\footnotesize{\red{(输入层)}}};
\node [anchor=west] (layer00label2) at (layer00label.east) {\footnotesize{(输入层)}};
{
\node [anchor=west] (layer01label) at ([xshift=1em]layer01.east) {\footnotesize{第1层}};
}
{
\node [anchor=west] (layer01label2) at (layer01label.east) {\footnotesize{\red{({隐层})}}};
\node [anchor=west] (layer01label2) at (layer01label.east) {\footnotesize{({隐层})}};
}
%%% layer 2
......@@ -57,7 +57,7 @@
\node [anchor=west] (layer02label) at ([xshift=4.4em]layer02.east) {\footnotesize{第2层}};
{
\node [anchor=west] (layer02label2) at (layer02label.east) {\footnotesize{\red{({隐层})}}};
\node [anchor=west] (layer02label2) at (layer02label.east) {\footnotesize{({隐层})}};
}
}
......@@ -87,7 +87,7 @@
\node [anchor=west] (layer03label) at ([xshift=1em]layer03.east) {\footnotesize{第3层}};
{
\node [anchor=west] (layer03label2) at (layer03label.east) {\footnotesize{\red{({输出层})}}};
\node [anchor=west] (layer03label2) at (layer03label.east) {\footnotesize{({输出层})}};
}
}
......
......@@ -4,21 +4,21 @@ $$
\begin{smallmatrix} \underbrace{
\left\{
\begin{smallmatrix}
\left[
\left(
\begin{array}{cccc}
1& 0 &0 \\
0& 1 &0 \\
0& 0 &1
\end{array}
\right ]
\right )
\cdots
\left[
\left(
\begin{array}{cccc}
1& 0 &0 \\
0& 1 &0 \\
0& 0 &1
\end{array}
\right]
\right)
\end{smallmatrix}
\right\}
}\\5
......@@ -37,21 +37,21 @@ $$
\begin{smallmatrix} \underbrace{
\left\{
\begin{smallmatrix}
\left[
\left(
\begin{array}{cccc}
1 \\
1 \\
1
\end{array}
\right ]
\right)
\cdots
\left[
\left(
\begin{array}{cccc}
1 \\
1 \\
1
\end{array}
\right]
\right)
\end{smallmatrix}
\right\}
}\\5
......
......@@ -12,7 +12,7 @@
\node [anchor=north] (data) at ([yshift=-1em]system.south) {\scriptsize{\textbf{目标任务有标注数据}}};
\draw [->,thick] (data.north) -- ([yshift=-0.1em]system.south);
\node [anchor=north] (label) at ([yshift=-0em]data.south) {\scriptsize{(a) standard method}};
\node [anchor=north] (label) at ([yshift=-0em]data.south) {\scriptsize{(a) 标准方法}};
\end{scope}
......@@ -31,7 +31,7 @@
\draw [->,thick] (data.north) -- ([yshift=-0.1em]system.south);
\node [anchor=north] (data2) at ([yshift=-1em,xshift=-7em]system.south) {\scriptsize{\textbf{大规模无标注数据}}};
\draw [->,thick] (data2.north) -- ([yshift=-0.1em]encoderpre.south);
\node [anchor=north] (label) at ([yshift=-0em,xshift=-4em]data.south) {\scriptsize{(b) pre-training + fine-tuning}};
\node [anchor=north] (label) at ([yshift=-0em,xshift=-4em]data.south) {\scriptsize{(b) 预训练 + 微调}};
\end{scope}
......
......@@ -13,7 +13,7 @@
\node[parametershard,anchor=west,fill=yellow!10] (param1) at (0,0) {$W_o$};
\node (param2) at ([xshift=1em]param1.east) {};
\node[parametershard,anchor=west,fill=red!10] (param3) at ([xshift=1em]param2.east) {$W_h$};
\node[anchor=south,inner sep=1pt] (serverlabel) at ([yshift=0.2em]param2.north) {\footnotesize{\textbf{parameter server}: $\mathbf w_{new} = \mathbf w - \alpha\cdot \frac{\partial L}{\partial \mathbf w}$}};
\node[anchor=south,inner sep=1pt] (serverlabel) at ([yshift=0.2em]param2.north) {\footnotesize{\textbf{parameter server}: $\mathbf w_{\textrm{new}} = \mathbf w - \alpha\cdot \frac{\partial L}{\partial \mathbf w}$}};
}
\begin{pgfonlayer}{background}
......@@ -33,7 +33,7 @@
{
\draw[->,very thick,red] ([xshift=-0.5em,yshift=2pt]processor2.north) -- ([xshift=-0.5em,yshift=-2pt]serverbox.south) node [pos=0.5,align=right,xshift=-2em] (pushlabel) {\scriptsize{$\frac{\partial L}{\partial \mathbf w}$}};;
\draw[<-,very thick,blue] ([xshift=0.5em,yshift=2pt]processor2.north) -- ([xshift=0.5em,yshift=-2pt]serverbox.south) node [pos=0.5,align=left,xshift=2.2em] (fetchlabel) {\scriptsize{$\mathbf w_{new}$}};;;
\draw[<-,very thick,blue] ([xshift=0.5em,yshift=2pt]processor2.north) -- ([xshift=0.5em,yshift=-2pt]serverbox.south) node [pos=0.5,align=left,xshift=2.2em] (fetchlabel) {\scriptsize{$\mathbf w_{\textrm{new}}$}};;;
\draw[->,very thick,red] ([xshift=-0.5em,yshift=2pt]processor3.north) --
([xshift=3em,yshift=-2pt]serverbox.south);
\draw[<-,very thick,blue] ([xshift=0.5em,yshift=2pt]processor3.north) -- ([xshift=4em,yshift=-2pt]serverbox.south) node [pos=0.5,align=left,xshift=2.2em] (fetchlabel) {\scriptsize{fetch (F)}};
......
\definecolor{ublue}{rgb}{0.152,0.250,0.545}
\begin{tikzpicture}
\begin{axis}[
width=8cm, height=5cm,
xtick={-6,-4,...,6},
ytick={0,0.5,1},
xlabel={\small{$x$}},
ylabel={\small{Softmax($x$)}},
xlabel style={xshift=3.0cm,yshift=1cm},
axis y line=middle,
ylabel style={xshift=-2.4cm,yshift=-0.2cm},
x axis line style={->},
axis line style={very thick},
% ymajorgrids,
%xmajorgrids,
axis x line*=bottom,
xmin=-6,
xmax=6,
ymin=0,
ymax=1]
\addplot[draw=ublue,very thick]{(tanh(x/2) + 1)/2};
\end{axis}
\end{tikzpicture}
%---------------------------------------------------------------------
\ No newline at end of file
......@@ -7,7 +7,7 @@
\begin{tikzpicture}
\begin{scope}[yshift=6.5em,xshift=1em]
\setcounter{mycount1}{1}
\draw[step=0.5cm,color=orange,line width=0.2mm] (-2,-2) grid (1,1);
\draw[step=0.5cm,color=orange,line width=0.4mm] (-2,-2) grid (1,1);
\foreach \y in {+0.5,-0.5,-1.5}
\foreach \x in {-1.5,-0.5,0.5}{
\node [fill=orange!20,inner sep=0pt,minimum height=0.98cm,minimum width=0.98cm] at (\x,\y) {\number\value{mycount1}};
......@@ -17,7 +17,7 @@
\begin{scope}[yshift=5.5em,xshift=0em]
\setcounter{mycount2}{2}
\draw[step=0.5cm,color=blue,line width=0.2mm] (-2,-2) grid (1,1);
\draw[step=0.5cm,color=blue,line width=0.4mm] (-2,-2) grid (1,1);
\foreach \y in {+0.5,-0.5,-1.5}
\foreach \x in {-1.5,-0.5,0.5}{
\node [fill=blue!20,inner sep=0pt,minimum height=0.98cm,minimum width=0.98cm] at (\x,\y) {\number\value{mycount2}};
......@@ -27,7 +27,7 @@
\begin{scope}[yshift=4.5em,xshift=-1em]
\setcounter{mycount3}{3}
\draw[step=0.5cm,color=ugreen,line width=0.2mm] (-2,-2) grid (1,1);
\draw[step=0.5cm,color=ugreen,line width=0.4mm] (-2,-2) grid (1,1);
\foreach \y in {+0.5,-0.5,-1.5}
\foreach \x in {-1.5,-0.5,0.5}{
\node [fill=green!20,inner sep=0pt,minimum height=0.98cm,minimum width=0.98cm] at (\x,\y) {\number\value{mycount3}};
......@@ -37,7 +37,7 @@
\begin{scope}[yshift=3.5em,xshift=-2em]
\setcounter{mycount4}{4}
\draw[step=0.5cm,color=red,line width=0.2mm] (-2,-2) grid (1,1);
\draw[step=0.5cm,color=red,line width=0.4mm] (-2,-2) grid (1,1);
\foreach \y in {+0.5,-0.5,-1.5}
\foreach \x in {-1.5,-0.5,0.5}{
\node [fill=red!20,inner sep=0pt,minimum height=0.98cm,minimum width=0.98cm] at (\x,\y) {\number\value{mycount4}};
......@@ -45,4 +45,4 @@
}
\end{scope}
\end{tikzpicture}
%%%------------------------------------------------------------------------------------------------------------
\ No newline at end of file
%%%------------------------------------------------------------------------------------------------------------
......@@ -44,10 +44,10 @@
%% sigmoid box
\begin{scope}
{
\node [anchor=west] (flabel) at ([xshift=1in]y.east) {\footnotesize{sigmoid:}};
\node [anchor=north east] (slabel) at ([xshift=0]flabel.south east) {\footnotesize{sum:}};
\node [anchor=west,inner sep=2pt] (flabel2) at (flabel.east) {\footnotesize{$f(s)=1/(1+e^{-s})$}};
\node [anchor=west,inner sep=2pt] (flabel3) at (slabel.east) {\footnotesize{$s=x_1 \cdot w + b$}};
\node [anchor=west] (flabel) at ([xshift=1in]y.east) {\footnotesize{Sigmoid:}};
\node [anchor=north east] (slabel) at ([xshift=0]flabel.south east) {\footnotesize{Sum:}};
\node [anchor=west,inner sep=2pt] (flabel2) at (flabel.east) {\footnotesize{$f(s_2)=1/(1+e^{-s_2})$}};
\node [anchor=west,inner sep=2pt] (flabel3) at (slabel.east) {\footnotesize{$s_2=x_1 \cdot w_2 + b$}};
\draw [->,thick,dotted] ([yshift=-0.3em,xshift=-0.1em]n11.60) .. controls +(east:1) and +(west:2) .. ([xshift=-0.2em]flabel.west) ;
\begin{pgfonlayer}{background}
......
......@@ -136,10 +136,11 @@
%\include{Chapter3/chapter3}
%\include{Chapter4/chapter4}
%\include{Chapter5/chapter5}
\include{Chapter6/chapter6}
%\include{Chapter6/chapter6}
%\include{Chapter7/chapter7}
%\include{Chapter8/chapter8}
%\include{Chapter9/chapter9}
\include{Chapter9/chapter9}
%\include{Chapter10/chapter10}
%\include{Chapter11/chapter11}
%\include{Chapter12/chapter12}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论