Commit dd9acda0 by 曹润柘

合并分支 'master' 到 'caorunzhe'

Master

查看合并请求 !49
parents 1a304483 7ae53c52
......@@ -1033,7 +1033,7 @@ a(i|j,m,l) &=\frac{\sum_{k=0}^{K}c_{\mathbb{E}}(i|j;\mathbf{s}^{[k]},\mathbf{t}^
\begin{itemize}
\item 对每个$i\in[1,l]$的目标语单词的产出率建模({\color{red!70} 红色}),即$\varphi_i$的概率。它依赖于$\mathbf{t}$和区间$[1,i-1]$的目标语单词的产出率$\varphi_1^{i-1}$\footnote{这里约定,当$i=1$ 时,$\varphi_1^0$ 表示一个空}
\item 对每个$i\in[1,l]$的目标语单词的产出率建模({\color{red!70} 红色}),即$\varphi_i$的概率。它依赖于$\mathbf{t}$和区间$[1,i-1]$的目标语单词的产出率$\varphi_1^{i-1}$\footnote{这里约定,当$i=1$ 时,$\varphi_1^0$ 表示空。}
\item $i=0$时的产出率建模({\color{blue!70} 蓝色}),即空标记$t_0$的产出率的概率。它依赖于$\mathbf{t}$和区间$[1,i-1]$的目标语单词的产出率$\varphi_1^l$
\item 词汇翻译建模({\color{green!70} 绿色}),目标语言单词$t_i$生成第$k$个源语言单词$\tau_{ik}$时的概率,依赖于$\mathbf{t}$、所有目标语言单词的产出率$\varphi_0^l$、区间$i\in[1,l]$的目标语言单词生成的源语言单词$\tau_1^{i-1}$和目标语单词$t_i$生成的前$k$个源语言单词$\tau_{i1}^{k-1}$
\item 对于每个$i\in[1,l]$的目标语言单词生成的源语言单词的{\small\bfnew{扭曲度}}\index{扭曲度}(Distortion)\index{Distortion}建模({\color{yellow!70!black} 黄色}),即第$i$个译文单词生成的第$k$个源语言单词在源文中的位置$\pi_{ik}$ 的概率。其中$\pi_1^{i-1}$$\pi_{i1}^{k-1}$分别表示区间$[1,i-1]$的目标语言单词生成的源语言单词的扭曲度和第$i$译文单词生成的前$k$个源语言单词的扭曲度。
......
......@@ -2,7 +2,7 @@
%-------------------------------------------------------------------------
\begin{tabular}{| l | l |}
\hline
& {\footnotesize{$\prod\limits_{(j,i) \in \hat{A}} \textrm{P}(s_j,t_i)$} \color{red}{{\footnotesize{$\times\textrm{P}_{lm}(\mathbf{t})$}}}} \\ \hline
& {\footnotesize{$\prod\limits_{(j,i) \in \hat{A}} \textrm{P}(s_j,t_i)$} \color{red}{{\footnotesize{$\times\textrm{P}_{\textrm{lm}}(\mathbf{t})$}}}} \\ \hline
\begin{tikzpicture}
......
......@@ -5,9 +5,9 @@
\begin{scope}[minimum height = 18pt]
\node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:};
\node[anchor=west,fill=green!20] (s1) at (0, 0) {\footnotesize{桌子 上}};
\node[anchor=west,fill=red!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{}};
\node[anchor=west,fill=blue!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
\node[anchor=west,fill=gray!20] (s1) at (0, 0) {\footnotesize{桌子 上}};
\node[anchor=west,fill=gray!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{}};
\node[anchor=west,fill=gray!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
\node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:};
......@@ -19,9 +19,9 @@
\begin{scope}[xshift=15em,minimum height = 18pt]
\node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:};
\node[anchor=west,fill=green!20] (s1) at (0, 0) {\footnotesize{桌子 上}};
\node[anchor=west,fill=gray!20] (s1) at (0, 0) {\footnotesize{桌子 上}};
\node[anchor=west,fill=red!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{}};
\node[anchor=west,fill=blue!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
\node[anchor=west,fill=gray!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
\node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:};
{
......@@ -37,17 +37,17 @@
\begin{scope}[yshift=-9.5em,minimum height = 18pt]
\node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:};
\node[anchor=west,fill=green!20] (s1) at (0, 0) {\footnotesize{桌子 上}};
\node[anchor=west,fill=red!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{}};
\node[anchor=west,fill=blue!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
\node[anchor=west,fill=gray!20] (s1) at (0, 0) {\footnotesize{桌子 上}};
\node[anchor=west,fill=gray!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{}};
\node[anchor=west,fill=red!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
\node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:};
{
\node[anchor=west,fill=red!20] (t1) at (0, -1.5) {\footnotesize{There is}};
\node[anchor=west,fill=gray!20] (t1) at (0, -1.5) {\footnotesize{There is}};
\path[<->, thick] (s2.south) edge (t1.north);
}
{
\node[anchor=west,fill=blue!20] (t2) at ([xshift=1em]t1.east) {\footnotesize{an apple}};
\node[anchor=west,fill=red!20] (t2) at ([xshift=1em]t1.east) {\footnotesize{an apple}};
\path[<->, thick] (s3.south) edge (t2.north);
}
\node[anchor=north] (l) at ([xshift=7em,yshift=-0.5em]t0.south) {\footnotesize{(c)\ }};
......@@ -59,21 +59,21 @@
\begin{scope}[xshift=15em,yshift=-9.5em,minimum height = 18pt]%[scale=0.5]
\node[anchor=east] (s0) at (-0.5em, 0) {$\textbf{s}$:};
\node[anchor=west,fill=green!20] (s1) at (0, 0) {\footnotesize{桌子 上}};
\node[anchor=west,fill=red!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{}};
\node[anchor=west,fill=blue!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
\node[anchor=west,fill=red!20] (s1) at (0, 0) {\footnotesize{桌子 上}};
\node[anchor=west,fill=gray!20] (s2) at ([xshift=1em]s1.east) {\footnotesize{}};
\node[anchor=west,fill=gray!20] (s3) at ([xshift=1em]s2.east) {\footnotesize{一个 苹果}};
\node[anchor=east] (t0) at (-0.5em, -1.5) {$\textbf{t}$:};
{
\node[anchor=west,fill=red!20] (t1) at (0, -1.5) {\footnotesize{There is}};
\node[anchor=west,fill=gray!20] (t1) at (0, -1.5) {\footnotesize{There is}};
\path[<->, thick] (s2.south) edge (t1.north);
}
{
\node[anchor=west,fill=blue!20] (t2) at ([xshift=1em]t1.east) {\footnotesize{an apple}};
\node[anchor=west,fill=gray!20] (t2) at ([xshift=1em]t1.east) {\footnotesize{an apple}};
\path[<->, thick] (s3.south) edge (t2.north);
}
{
\node[anchor=west,fill=green!20] (t3) at ([xshift=1em]t2.east) {\footnotesize{on the table}};
\node[anchor=west,fill=red!20] (t3) at ([xshift=1em]t2.east) {\footnotesize{on the table}};
\path[<->, thick] (s1.south) edge (t3.north);
}
\node[anchor=north] (l) at ([xshift=7em,yshift=-0.5em]t0.south) {\footnotesize{(d)\ }};
......
......@@ -10,7 +10,7 @@
\node [anchor=west] (s7) at ([yshift=-1.1em]s5.west) {\small{...}};
\node [anchor=west] (s6) at ([yshift=1.0em]s1.west) {\small{...}};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.3em,fill=red!20] [fit = (s1) (s3) (s4) (s6) (s7)] (box1) {};
\node [rectangle,inner sep=0.3em,fill=red!10] [fit = (s1) (s3) (s4) (s6) (s7)] (box1) {};
\end{pgfonlayer}
\end{tikzpicture}
\end{center}
\ No newline at end of file
......@@ -19,8 +19,8 @@
\path[<->, thick] (s3.south) edge (t3.north);
}
\node[anchor=south] (s0) at ([xshift=-3em,yshift=0em]s1.south) {源语:};
\node[anchor=east] (t0) at ([xshift=0em,yshift=-3.5em]s0.east) {目标语:};
\node[anchor=south] (s0) at ([xshift=-3em,yshift=0em]s1.south) {源语:};
\node[anchor=east] (t0) at ([xshift=0em,yshift=-3.5em]s0.east) {目标语:};
\end{scope}
\end{tikzpicture}
......
......@@ -7,9 +7,9 @@
\node[anchor=west, fill=red!30, inner sep=0.05cm] (sp3) at (14em, 0) {有了\ 大幅度\ 下降};
\draw[->] (sp1) edge [out=15, in=170] (sp3);
\node[anchor=west, fill=blue!30, inner sep=0.05cm] (tp1) at (0, -0.8) {The imports};
\node[anchor=west, fill=red!30, inner sep=0.05cm] (tp2) at (5.3em, -0.8) {drastically fell};
\node[anchor=west] (tp3) at (11.3em, -0.8) {in the past five to ten years};
\node[anchor=west, fill=blue!30, inner sep=0.05cm] (tp1) at (0, -1.2) {The imports};
\node[anchor=west, fill=red!30, inner sep=0.05cm] (tp2) at (5.3em, -1.2) {drastically fell};
\node[anchor=west] (tp3) at (11.3em, -1.2) {in the past five to ten years};
\path[->] (tp1) edge [out=30, in=150] (tp2);
\end{tikzpicture}
\ No newline at end of file
......@@ -1393,8 +1393,8 @@ $M=0$莉」陦ィ隸・蟄仙アり「ォ荳「蠑シ瑚$M=1$莉」陦ィ豁」蟶ク霑幄。悟ス灘燕蟄仙アら噪隶。邂励
\parinterval 除此之外,有研究者已经发现残差网络中底层的子网络通过对输入进行抽象得到的表示对最终的输出有很大的影响,上层网络是通过对底层网络得到的表示不断修正来拟合训练目标\cite{journals/corr/GreffSS16}。该结论同样适用于Transformer模型,比如,在训练中,残差支路以及底层的梯度范数通常比较大,这也间接表明底层网络在整个优化的过程中需要更大的更新。考虑到这个因素,在设计每一个子层被丢弃的概率时可以采用自底向上线性增大的策略,保证底层的网络相比于顶层更容易保留下来。这里用$L$来代表编码端块的个数,$l$代表当前的子层的编号,那么$M$可以通过以下的方式得到:
\begin{eqnarray}
M = \left\{\begin{array}{ll}
0&\textrm{p} \leqslant p_l\\
1&\textrm{p} > p_l
0&P \leqslant p_l\\
1&P > p_l
\end{array}\right.
\label{eq:7.5-11}
\end{eqnarray}
......
......@@ -22,7 +22,7 @@
\fill [black] (h) circle(1pt);
% y=0.73x + 2.54
\draw [thick,red] (-1*0.3,1.81*0.2) to (10*0.3,9.84*0.2);
\draw [thick,red] (0.5*0.3,1.81*0.2) to (10*0.3,9.84*0.2);
\node [font=\footnotesize] at (1.5,-0.5) {欠拟合};
\end{tikzpicture}
......
......@@ -3,7 +3,7 @@
\node [anchor=center] (node1) at (-2.9,1) {\small{训练:}};
\node [anchor=center] (node11) at (-2.5,1) {};
\node [anchor=center] (node12) at (-1.7,1) {};
\node [anchor=center] (node2) at (-2.9,0.5) {\small{}};
\node [anchor=center] (node2) at (-2.9,0.5) {\small{}};
\node [anchor=center] (node21) at (-2.5,0.5) {};
\node [anchor=center] (node22) at (-1.7,0.5) {};
\node [anchor=west,draw=black,minimum width=5.6em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node1-1) at (0,0) {\footnotesize{双语数据}};
......
......@@ -3,7 +3,7 @@
\node [anchor=center] (node1) at (-2.3,1) {\small{训练:}};
\node [anchor=center] (node11) at (-2.0,1) {};
\node [anchor=center] (node12) at (-1.1,1) {};
\node [anchor=center] (node2) at (-2.3,0.5) {\small{}};
\node [anchor=center] (node2) at (-2.3,0.5) {\small{}};
\node [anchor=center] (node21) at (-2.0,0.5) {};
\node [anchor=center] (node22) at (-1.1,0.5) {};
\node [anchor=west,draw=black,minimum width=5.6em,minimum height=2.2em,fill=blue!20,rounded corners=2pt] (node1-1) at (0,0) {\footnotesize{双语数据}};
......
\begin{center}
\begin{tikzpicture}[scale=1.3]
\begin{tikzpicture}[scale=1.0]
\footnotesize{
\begin{axis}[
width=.40\textwidth,
height=.30\textwidth,
width=.50\textwidth,
height=.40\textwidth,
legend style={at={(0.60,0.08)}, anchor=south west},
xlabel={\scriptsize{更新次数(10k)}},
ylabel={\scriptsize{学习率 ($10^{-3}$}},
......
......@@ -623,9 +623,9 @@
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.4.1}推断优化}{374}{subsection.7.4.1}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{推断系统的架构}{374}{section*.457}
\contentsline {subsubsection}{推断系统的架构}{375}{section*.457}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{自左向右推断 vs 自右向左推断}{375}{section*.459}
\contentsline {subsubsection}{自左向右推断 vs 自右向左推断}{376}{section*.459}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{推断加速}{376}{section*.460}
\defcounter {refsection}{0}\relax
......@@ -649,7 +649,7 @@
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.1}深层模型}{390}{subsection.7.5.1}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{Post-Norm vs Pre-Norm}{390}{section*.477}
\contentsline {subsubsection}{Post-Norm vs Pre-Norm}{391}{section*.477}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{层聚合}{393}{section*.480}
\defcounter {refsection}{0}\relax
......@@ -663,17 +663,17 @@
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{深层模型的鲁棒性训练}{397}{section*.489}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.2}单语数据的使用}{399}{subsection.7.5.2}
\contentsline {subsection}{\numberline {7.5.2}单语数据的使用}{398}{subsection.7.5.2}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{伪数据}{399}{section*.492}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{预训练}{400}{section*.495}
\contentsline {subsubsection}{预训练}{401}{section*.495}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{联合训练}{402}{section*.498}
\defcounter {refsection}{0}\relax
\contentsline {subsection}{\numberline {7.5.3}知识精炼}{403}{subsection.7.5.3}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{什么是知识精炼}{403}{section*.500}
\contentsline {subsubsection}{什么是知识精炼}{404}{section*.500}
\defcounter {refsection}{0}\relax
\contentsline {subsubsection}{知识精炼的基本方法}{405}{section*.501}
\defcounter {refsection}{0}\relax
......
......@@ -76,13 +76,13 @@
~\vfill
\thispagestyle{empty}
\noindent Copyright \copyright\ 2020 肖桐\ \ 朱靖波\\ % Copyright notice
\noindent Copyright \copyright\ 2020 肖桐\ \ 朱靖波\\
\noindent \textsc{东北大学自然语言处理实验室\ /\ 小牛翻译}\\ % Publisher
\noindent \textsc{东北大学自然语言处理实验室\ /\ 小牛翻译}\\
\noindent \textsc{\url{https://github.com/NiuTrans/MTBook}}\\ % URL
\noindent \textsc{\url{https://github.com/NiuTrans/MTBook}}\\
\noindent {\red{Licensed under the Creative Commons Attribution-NonCommercial 4.0 Unported License (the ``License''). You may not use this file except in compliance with the License. You may obtain a copy of the License at \url{http://creativecommons.org/licenses/by-nc/4.0}. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \textsc{``as is'' basis, without warranties or conditions of any kind}, either express or implied. See the License for the specific language governing permissions and limitations under the License.}}\\ % License information, replace this with your own license (if any)
\noindent {\red{Licensed under the Creative Commons Attribution-NonCommercial 4.0 Unported License (the ``License''). You may not use this file except in compliance with the License. You may obtain a copy of the License at \url{http://creativecommons.org/licenses/by-nc/4.0}. Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \textsc{``as is'' basis, without warranties or conditions of any kind}, either express or implied. See the License for the specific language governing permissions and limitations under the License.}}\\
\noindent \textit{First Edition, April 2020}
......@@ -121,14 +121,14 @@
% CHAPTERS
%----------------------------------------------------------------------------------------
%\include{Chapter1/chapter1}
%\include{Chapter2/chapter2}
%\include{Chapter3/chapter3}
%\include{Chapter4/chapter4}
%\include{Chapter5/chapter5}
%\include{Chapter6/chapter6}
%\include{Chapter7/chapter7}
%\include{ChapterAppend/chapterappend}
\include{Chapter1/chapter1}
\include{Chapter2/chapter2}
\include{Chapter3/chapter3}
\include{Chapter4/chapter4}
\include{Chapter5/chapter5}
\include{Chapter6/chapter6}
\include{Chapter7/chapter7}
\include{ChapterAppend/chapterappend}
%----------------------------------------------------------------------------------------
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论