Commit 895e3ef6 by 孟霞

修改一致

parent 855b1b50
......@@ -11,11 +11,11 @@
\node [anchor=west,tnode] (n3) at ([xshift=5em,yshift=0em]n2.east) {};
\node [anchor=north west,align=left,font=\footnotesize] (n31) at ([xshift=0.2em,yshift=-0.2em]n3.north west) {{\small 词表}\\low\\lower\\newest\\widest\\$\ldots$};
\node [anchor=north east,align=center,font=\footnotesize] (n32) at ([xshift=-0.2em,yshift=-0.2em]n3.north east) {{\small}\\6\\2\\6\\3\\$\ldots$};
\node [anchor=north east,align=center,font=\footnotesize] (n32) at ([xshift=-0.2em,yshift=-0.2em]n3.north east) {{\small}\\6\\2\\6\\3\\$\ldots$};
\node [anchor=west,tnode] (n4) at ([xshift=5em,yshift=0em]n3.east) {};
\node [anchor=north west,align=left,font=\footnotesize] (n41) at ([xshift=0.2em,yshift=-0.2em]n4.north west) {{\small 词表}\\l/o/w\\l/o/w/e/r\\n/e/w/e/s/t\\w/i/d/e/s/t\\$\ldots$};
\node [anchor=north east,align=center,font=\footnotesize] (n42) at ([xshift=-0.2em,yshift=-0.2em]n4.north east) {{\small}\\6\\2\\6\\3\\$\ldots$};
\node [anchor=north east,align=center,font=\footnotesize] (n42) at ([xshift=-0.2em,yshift=-0.2em]n4.north east) {{\small}\\6\\2\\6\\3\\$\ldots$};
\begin{pgfonlayer}{background}
......@@ -32,13 +32,13 @@
\node [anchor=north west,tnode] (n6) at ([xshift=0em,yshift=-0.5em]n5.south west) {};
\node [anchor=north west,align=left,font=\footnotesize] (n61) at ([xshift=0.2em,yshift=-0.2em]n6.north west) {{\small 词表}\\l/o/w\\l/o/w/e/r\\n/e/w/e/s/t\\w/i/d/e/s/t\\$\ldots$};
\node [anchor=north east,align=center,font=\footnotesize] (n62) at ([xshift=-0.2em,yshift=-0.2em]n6.north east) {{\small}\\6\\2\\6\\3\\$\ldots$};
\node [anchor=north east,align=center,font=\footnotesize] (n62) at ([xshift=-0.2em,yshift=-0.2em]n6.north east) {{\small}\\6\\2\\6\\3\\$\ldots$};
\draw [->,thick,ublue] ([xshift=-0em,yshift=-0em]n4.south) .. controls +(south:4em) and +(north:4em) .. ([xshift=1em,yshift=-0em]n6.north);
\node [anchor=west,pnode] (n7) at ([xshift=5em,yshift=0em]n6.east) {};
\node [anchor=north west,align=left,font=\footnotesize] (n71) at ([xshift=0.2em,yshift=-0.2em]n7.north west) {{\small 二元组}\\(e,s)\\(s,t)\\(l,o)\\(o,w)\\$\ldots$};
\node [anchor=north east,align=center,font=\footnotesize] (n72) at ([xshift=-0.2em,yshift=-0.2em]n7.north east) {{\small}\\9\\9\\8\\8\\$\ldots$};
\node [anchor=north east,align=center,font=\footnotesize] (n72) at ([xshift=-0.2em,yshift=-0.2em]n7.north east) {{\small}\\9\\9\\8\\8\\$\ldots$};
\node [anchor=west,mnode] (n8) at ([xshift=5em,yshift=0em]n7.east) {};
\node [anchor=north,align=center,font=\footnotesize] (n81) at ([xshift=0em,yshift=-0.2em]n8.north) {{\small 符号合并表}\\(e,s)};
......@@ -51,14 +51,14 @@
%第三排
\node [anchor=north,tnode] (n9) at ([xshift=0em,yshift=-2.5em]n6.south) {};
\node [anchor=north west,align=left,font=\footnotesize] (n91) at ([xshift=0.2em,yshift=-0.2em]n9.north west) {{\small 词表}\\l/o/w\\l/o/w/e/r\\n/e/w/{\red es}/t\\w/i/d/{\red es}/t\\$\ldots$};
\node [anchor=north east,align=center,font=\footnotesize] (n92) at ([xshift=-0.2em,yshift=-0.2em]n9.north east) {{\small}\\6\\2\\6\\3\\$\ldots$};
\node [anchor=north east,align=center,font=\footnotesize] (n92) at ([xshift=-0.2em,yshift=-0.2em]n9.north east) {{\small}\\6\\2\\6\\3\\$\ldots$};
\draw [->,thick,ublue] ([xshift=-0em,yshift=-0em]n8.south) .. controls +(south:3em) and +(north:3em) .. ([xshift=1em,yshift=-0em]n9.north);
\node [anchor=north west,ublue,font=\footnotesize,align=left] (l1) at ([xshift=1em,yshift=-0em]n7.south east) {在词表中\\[0.8ex]合并(e,s)};
\node [anchor=west,pnode] (n10) at ([xshift=5em,yshift=0em]n9.east) {};
\node [anchor=north west,align=left,font=\footnotesize] (n101) at ([xshift=0.2em,yshift=-0.2em]n10.north west) {{\small 二元组}\\(es,t)\\(l,o)\\(o,w)\\(n,e)\\$\ldots$};
\node [anchor=north east,align=center,font=\footnotesize] (n102) at ([xshift=-0.2em,yshift=-0.2em]n10.north east) {{\small}\\9\\8\\8\\6\\$\ldots$};
\node [anchor=north east,align=center,font=\footnotesize] (n102) at ([xshift=-0.2em,yshift=-0.2em]n10.north east) {{\small}\\9\\8\\8\\6\\$\ldots$};
\node [anchor=west,mnode] (n11) at ([xshift=5em,yshift=0em]n10.east) {};
\node [anchor=north,align=center,font=\footnotesize] (n111) at ([xshift=0em,yshift=-0.2em]n11.north) {{\small 符号合并表}\\(e,s)\\(es,t)};
......
......@@ -109,7 +109,7 @@
\parinterval 字节对编码或双字节编码(BPE)是一种常用的子词词表构建方法。BPE方法最早用于数据压缩,该方法将数据中常见的连续字符串替换为一个不存在的字符,之后通过构建一个替换关系的对应表,对压缩后的数据进行还原\upcite{Gage1994ANA}。机器翻译借用了这种思想,把子词切分看作是学习对自然语言句子进行压缩编码表示的问题\upcite{DBLP:conf/acl/SennrichHB16a}。其目的是,保证编码(即子词切分)后的结果占用的字节尽可能少。这样,子词单元会尽可能被不同单词复用,同时又不会因为使用过小的单元造成子词切分序列过长。
\parinterval 使用BPE算法进行子词切分包含两个步骤。首先,通过统计的方法构造符号合并表,\ref{fig:13-3}给出了BPE算法中符号合并表的构造过程。在得到了符号合并表后,使用符号合并表对用字符表示的单词进行合并,得到以子词形式表示的文本。BPE算法最开始将单词切分为以字符表示的符号序列,并在尾部加上终结符(为了便于理解,图\ref{fig:13-3}中没有包含终结符)。然后按照符号合并表的顺序依次遍历,找到存在于符号合并表的2-gram符号组合,则对其进行合并,直至遍历结束。图\ref{fig:13-4}给出了一个使用字符合并表对单词进行子词切分的实例。红色单元为每次合并后得到的新符号,直至无法合并,或遍历结束,得到最终的合并结果。其中每一个单元为一个子词。
\parinterval 使用BPE算法进行子词切分包含两个步骤。首先,通过统计的方法构造符号合并表,具体的方式为:先对分过词的文本进行统计,得到词表和词频,同时将词表中的单词分割为字符表示;其次统计词表中所有出现的二元组的频次,选择当前频次最高的二元组加入符号合并表,并将所有词表中出现的该二元组合并为一个单元;不断地重复上述过程,直到合并表的大小达到预先设定的大小,或者无法继续合并。图\ref{fig:13-4}给出了一个使用字符合并表对单词进行子词切分的实例。红色单元为每次合并后得到的新符号,直至无法合并,或遍历结束,得到最终的合并结果。其中每一个单元为一个子词。
%----------------------------------------------
\begin{figure}[htp]
......
\begin{tikzpicture}
\tikzstyle{node1} = [anchor=center,draw,minimum height=2em,minimum width=2em,inner sep=0pt,fill=green!80]
\tikzstyle{node2} = [anchor=center,draw,minimum height=2em,minimum width=2em,inner sep=0pt,fill=green!50]
\tikzstyle{node3} = [anchor=center,draw,minimum height=2em,minimum width=2em,inner sep=0pt,fill=green!20]
\tikzstyle{node1} = [anchor=center,draw,minimum height=2em,minimum width=2em,inner sep=0pt,fill=ugreen!80]
\tikzstyle{node2} = [anchor=center,draw,minimum height=2em,minimum width=2em,inner sep=0pt,fill=ugreen!50]
\tikzstyle{node3} = [anchor=center,draw,minimum height=2em,minimum width=2em,inner sep=0pt,fill=ugreen!20]
\tikzstyle{node4} = [anchor=center,draw,minimum height=2em,minimum width=2em,inner sep=0pt]
\tikzstyle{node5} = [anchor=center,draw,minimum height=2em,minimum width=2em,inner sep=0pt,fill=red!20]
\tikzstyle{node6} = [anchor=center,draw,minimum height=2em,minimum width=2em,inner sep=0pt,fill=red!50]
\tikzstyle{node7} = [anchor=center,draw,minimum height=2em,minimum width=2em,inner sep=0pt,fill=red!80]
\tikzstyle{node5} = [anchor=center,draw,minimum height=2em,minimum width=2em,inner sep=0pt,fill=teal!20]
\tikzstyle{node6} = [anchor=center,draw,minimum height=2em,minimum width=2em,inner sep=0pt,fill=teal!50]
\tikzstyle{node7} = [anchor=center,draw,minimum height=2em,minimum width=2em,inner sep=0pt,fill=teal!80]
\begin{scope}[scale=1.0]
\foreach \i / \j / \k / \z in
......
......@@ -113,7 +113,7 @@
\begin{figure}[htp]
\centering
\input{./Chapter15/Figures/figure-relative-position-weight}
\caption{相对位置权重$\mathbi{a}_{ij}$}
\caption{相对位置权重$\mathbi{a}_{ij}$\upcite{DBLP:conf/emnlp/HuangLXX20}}
\label{fig:15-2}
\end{figure}
%-------------------------------------------
......@@ -168,11 +168,7 @@ A_{ij}^{\rm rel} &=& \underbrace{\mathbi{E}_{x_i}\mathbi{W}_Q\mathbi{W}_{K}^{T}\
\begin{itemize}
\vspace{0.5em}
\item {\small\bfnew{引入高斯约束}}\upcite{Yang2018ModelingLF}。如图\ref{fig:15-3}所示,这类方法的核心思想是引入可学习的高斯分布$\mathbi{G}$作为局部约束,与注意力权重进行融合,具体的形式如下:
\begin{eqnarray}
\mathbi{e}_{ij} &=& \frac{(\mathbi{x}_i \mathbi{W}_Q){(\mathbi{x}_j \mathbi{W}_K)}^{T}}{\sqrt{d_k}} + \mathbi{G}
\label{eq:15-15}
\end{eqnarray}
\item {\small\bfnew{引入高斯约束}}\upcite{Yang2018ModelingLF}。如图\ref{fig:15-3}所示,这类方法的核心思想是引入可学习的高斯分布$\mathbi{G}$作为局部约束,与注意力权重进行融合。
%----------------------------------------------
\begin{figure}[htp]
......@@ -183,6 +179,12 @@ A_{ij}^{\rm rel} &=& \underbrace{\mathbi{E}_{x_i}\mathbi{W}_Q\mathbi{W}_{K}^{T}\
\end{figure}
%-------------------------------------------
\noindent 具体的形式如下:
\begin{eqnarray}
\mathbi{e}_{ij} &=& \frac{(\mathbi{x}_i \mathbi{W}_Q){(\mathbi{x}_j \mathbi{W}_K)}^{T}}{\sqrt{d_k}} + \mathbi{G}
\label{eq:15-15}
\end{eqnarray}
\noindent 其中,$\mathbi{G} \in \mathbb{R}^{m\times m}$$\mathbi{G}$中的每个元素$G_{ij}$表示位置$j$和预测的中心位置$P_i$之间的关联程度,计算公式如下:
\begin{eqnarray}
G_{ij} &=& - \frac{{(j - P_i)}^2}{2\sigma_i^2}
......@@ -719,7 +721,6 @@ C(\mathbi{x}_j \mathbi{W}_K,\omega) &=& (\mathbi{x}_{j-\omega},\ldots,\mathbi{x}
{\bm \omega_{l+1}} &=& \sqrt{\sum_{j<l}\textrm{Var}[F_{l+1}(\mathbi{x}_l)]}
\label{eq:15-48}
\end{eqnarray}
\vspace{0.5em}
\end{itemize}
\parinterval 这种动态的参数初始化方法不受限于具体的模型结构,方法有较好的通用性。
......@@ -937,7 +938,7 @@ lr &=& d_{\textrm{model}}^{-0.5}\cdot step\_num^{-0.5}
\noindent 可以翻译成:
\begin{equation}
\textrm{“私は緑茶を飲んでいます。”}{\color{red} 日语单词没有切分???可以问一下张妍}\nonumber
\textrm{“私/は/緑茶/を/飲んでいます。”} \nonumber
\end{equation}
\parinterval 在标准的英语到日语的翻译中,英语短语“a cup of green tea”只会被翻译为“緑茶”一词。在加入句法树后,“a cup of green tea”会作为树中一个节点,这样可以更容易把英语短语作为一个整体进行翻译。
......
......@@ -23,7 +23,7 @@
\draw [->,thick] ([yshift=-0.2em]new.south) .. controls +(south:2.5em) and +(north:2.5em) .. ([xshift=0.2em,yshift=0.2em]all.north);
\draw [->,thick] ([yshift=-0.2em]all.south) -- ([yshift=0.2em]final_model.north)node[pos=0.5,right,align=center,font=\footnotesize] {训练};
\node[word] at ([yshift=-2em]final_model.south){(a)数据混合};
\node[word] at ([yshift=-2em]final_model.south){(a)数据混合};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.5em,rounded corners=1pt,minimum width=4.5em,minimum height=3.2em,draw=red!50,dotted,very thick] at (0.37,0.15) {};
......@@ -54,7 +54,7 @@
\node[word] at ([yshift=2em]final_model.north) {插值};
\node[word] at ([yshift=-2em]final_model.south){(b)模型插值};
\node[word] at ([yshift=-2em]final_model.south){(b)模型插值};
%空白占位
\node[word] at ([xshift=-3em]old.west) {};
......@@ -76,7 +76,7 @@
\draw [->,thick] ([yshift=-0.2em]new.south) -- ([xshift=0.2em,yshift=0.2em]final_model.north) node[pos=0.5,right,align=center,font=\footnotesize] {目标\\函数1};
\draw [->,thick,dashed] ([yshift=-0.2em]old.south) .. controls +(south:4.5em) and +(north:4.5em) .. ([xshift=-0.2em,yshift=0.2em]final_model.north) node[align=center,font=\footnotesize] at ([xshift=-0.1em,yshift=-4em]old.south) {目标\\函数2};
\node[word] at ([yshift=-2em,xshift=-2.55em]final_model.south){(c)多目标训练};
\node[word] at ([yshift=-2em,xshift=-2.55em]final_model.south){(c)多目标训练};
\end{scope}
\end{tikzpicture}
......
\relax
\providecommand\zref@newlabel[2]{}
\providecommand\hyper@newdestlabel[2]{}
\@setckpt{ChapterPreface/ChapterPreface}{
\setcounter{page}{8}
\setcounter{equation}{0}
\setcounter{enumi}{0}
\setcounter{enumii}{0}
\setcounter{enumiii}{0}
\setcounter{enumiv}{0}
\setcounter{footnote}{0}
\setcounter{mpfootnote}{0}
\setcounter{part}{0}
\setcounter{chapter}{0}
\setcounter{section}{0}
\setcounter{subsection}{0}
\setcounter{subsubsection}{0}
\setcounter{paragraph}{0}
\setcounter{subparagraph}{0}
\setcounter{figure}{0}
\setcounter{table}{0}
\setcounter{tabx@nest}{0}
\setcounter{listtotal}{0}
\setcounter{listcount}{0}
\setcounter{liststart}{0}
\setcounter{liststop}{0}
\setcounter{citecount}{0}
\setcounter{citetotal}{0}
\setcounter{multicitecount}{0}
\setcounter{multicitetotal}{0}
\setcounter{instcount}{0}
\setcounter{maxnames}{3}
\setcounter{minnames}{1}
\setcounter{maxitems}{3}
\setcounter{minitems}{1}
\setcounter{citecounter}{0}
\setcounter{maxcitecounter}{0}
\setcounter{savedcitecounter}{0}
\setcounter{uniquelist}{0}
\setcounter{uniquename}{0}
\setcounter{refsection}{0}
\setcounter{refsegment}{0}
\setcounter{maxextratitle}{0}
\setcounter{maxextratitleyear}{0}
\setcounter{maxextraname}{0}
\setcounter{maxextradate}{0}
\setcounter{maxextraalpha}{0}
\setcounter{abbrvpenalty}{50}
\setcounter{highnamepenalty}{50}
\setcounter{lownamepenalty}{25}
\setcounter{maxparens}{3}
\setcounter{parenlevel}{0}
\setcounter{mincomprange}{10}
\setcounter{maxcomprange}{100000}
\setcounter{mincompwidth}{1}
\setcounter{afterword}{0}
\setcounter{savedafterword}{0}
\setcounter{annotator}{0}
\setcounter{savedannotator}{0}
\setcounter{author}{0}
\setcounter{savedauthor}{0}
\setcounter{bookauthor}{0}
\setcounter{savedbookauthor}{0}
\setcounter{commentator}{0}
\setcounter{savedcommentator}{0}
\setcounter{editor}{0}
\setcounter{savededitor}{0}
\setcounter{editora}{0}
\setcounter{savededitora}{0}
\setcounter{editorb}{0}
\setcounter{savededitorb}{0}
\setcounter{editorc}{0}
\setcounter{savededitorc}{0}
\setcounter{foreword}{0}
\setcounter{savedforeword}{0}
\setcounter{holder}{0}
\setcounter{savedholder}{0}
\setcounter{introduction}{0}
\setcounter{savedintroduction}{0}
\setcounter{namea}{0}
\setcounter{savednamea}{0}
\setcounter{nameb}{0}
\setcounter{savednameb}{0}
\setcounter{namec}{0}
\setcounter{savednamec}{0}
\setcounter{translator}{0}
\setcounter{savedtranslator}{0}
\setcounter{shortauthor}{0}
\setcounter{savedshortauthor}{0}
\setcounter{shorteditor}{0}
\setcounter{savedshorteditor}{0}
\setcounter{labelname}{0}
\setcounter{savedlabelname}{0}
\setcounter{institution}{0}
\setcounter{savedinstitution}{0}
\setcounter{lista}{0}
\setcounter{savedlista}{0}
\setcounter{listb}{0}
\setcounter{savedlistb}{0}
\setcounter{listc}{0}
\setcounter{savedlistc}{0}
\setcounter{listd}{0}
\setcounter{savedlistd}{0}
\setcounter{liste}{0}
\setcounter{savedliste}{0}
\setcounter{listf}{0}
\setcounter{savedlistf}{0}
\setcounter{location}{0}
\setcounter{savedlocation}{0}
\setcounter{organization}{0}
\setcounter{savedorganization}{0}
\setcounter{origlocation}{0}
\setcounter{savedoriglocation}{0}
\setcounter{origpublisher}{0}
\setcounter{savedorigpublisher}{0}
\setcounter{publisher}{0}
\setcounter{savedpublisher}{0}
\setcounter{language}{0}
\setcounter{savedlanguage}{0}
\setcounter{origlanguage}{0}
\setcounter{savedoriglanguage}{0}
\setcounter{pageref}{0}
\setcounter{savedpageref}{0}
\setcounter{textcitecount}{0}
\setcounter{textcitetotal}{0}
\setcounter{textcitemaxnames}{0}
\setcounter{biburlbigbreakpenalty}{100}
\setcounter{biburlbreakpenalty}{200}
\setcounter{biburlnumpenalty}{0}
\setcounter{biburlucpenalty}{0}
\setcounter{biburllcpenalty}{0}
\setcounter{smartand}{1}
\setcounter{bbx:relatedcount}{0}
\setcounter{bbx:relatedtotal}{0}
\setcounter{parentequation}{0}
\setcounter{notation}{0}
\setcounter{dummy}{0}
\setcounter{problem}{0}
\setcounter{exerciseT}{0}
\setcounter{exampleT}{0}
\setcounter{vocabulary}{0}
\setcounter{definitionT}{0}
\setcounter{mdf@globalstyle@cnt}{0}
\setcounter{mdfcountframes}{0}
\setcounter{mdf@env@i}{0}
\setcounter{mdf@env@ii}{0}
\setcounter{mdf@zref@counter}{0}
\setcounter{Item}{0}
\setcounter{Hfootnote}{0}
\setcounter{Hy@AnnotLevel}{0}
\setcounter{bookmark@seq@number}{0}
\setcounter{caption@flags}{0}
\setcounter{continuedfloat}{0}
\setcounter{cp@cnt}{0}
\setcounter{cp@tempcnt}{0}
\setcounter{subfigure}{0}
\setcounter{lofdepth}{1}
\setcounter{subtable}{0}
\setcounter{lotdepth}{1}
\setcounter{@pps}{0}
\setcounter{@ppsavesec}{0}
\setcounter{@ppsaveapp}{0}
\setcounter{tcbbreakpart}{0}
\setcounter{tcblayer}{0}
\setcounter{tcolorbox@number}{0}
\setcounter{section@level}{0}
}
......@@ -9345,6 +9345,16 @@ author = {Zhuang Liu and
publisher={arXiv preprint arXiv:2002.11794},
year={2020}
}
@inproceedings{DBLP:conf/emnlp/HuangLXX20,
author = {Zhiheng Huang and
Davis Liang and
Peng Xu and
Bing Xiang},
title = {Improve Transformer Models with Better Relative Position Embeddings},
pages = {3327--3335},
publisher = {Conference on Empirical Methods in Natural Language Processing},
year = {2020}
}
%%%%% chapter 15------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论