\node[anchor=north,rectangle,inner sep=0mm,minimum height=2.6em,minimum width=11em,rounded corners=5pt,fill=blue!30] (mr1) at ([xshift=0em,yshift=-0.5em]m3.south){};
\node[anchor=north,rectangle,inner sep=0mm,minimum height=2.6em,minimum width=11em,rounded corners=5pt,fill=blue!30,draw,thick,drop shadow] (mr1) at ([xshift=0em,yshift=-0.5em]m3.south){};
\end{pgfonlayer}
...
...
@@ -31,22 +31,22 @@
\node[anchor=north west,wnode,align=left] (w3) at ([xshift=0.3em,yshift=-0.3em]m3.north west){深度学习和网\\络结构搜索};
{%subfigure-left
\node[anchor=north,wnode,font=\footnotesize] (wl1) at ([xshift=0em,yshift=0em]ml1.north){训练数据};
\node[anchor=north,wnode,font=\footnotesize] (wl2) at ([xshift=0em,yshift=0em]ml2.north){特征信息};
\node[anchor=north,wnode,font=\footnotesize] (wl3) at ([xshift=0em,yshift=0em]ml3.north){模型结构};
\node[anchor=south,wnode,font=\tiny] (wl4) at ([xshift=0em,yshift=0em]ml1.south){人工/自动收集};
\node[anchor=south,wnode] (wl5) at ([xshift=0em,yshift=0em]ml2.south){人工设计};
\node[anchor=south,wnode] (wl6) at ([xshift=0em,yshift=0em]ml3.south){人工设计};
\node[anchor=north,wnode,font=\footnotesize] (wl1) at ([xshift=0em,yshift=-0.15em]ml1.north){训练数据};
\node[anchor=north,wnode,font=\footnotesize] (wl2) at ([xshift=0em,yshift=-0.15em]ml2.north){特征信息};
\node[anchor=north,wnode,font=\footnotesize] (wl3) at ([xshift=0em,yshift=-0.15em]ml3.north){模型结构};
\node[anchor=south,wnode,font=\tiny] (wl4) at ([xshift=0em,yshift=0.15em]ml1.south){人工/自动收集};
\node[anchor=south,wnode] (wl5) at ([xshift=0em,yshift=0.15em]ml2.south){人工设计};
\node[anchor=south,wnode] (wl6) at ([xshift=0em,yshift=0.15em]ml3.south){人工设计};
\node[anchor=south,cnode,fill=white] (cl1) at ([xshift=-4em,yshift=1.5em]m1.south){};
\node[anchor=north,cnode,fill=white] (cl2) at ([xshift=0em,yshift=-1em]m1.north){};
\node[anchor=south west,wnode,align=left,font=\tiny] (wl7) at ([xshift=0.5em,yshift=0em]cl1.east){使用{\color{ugreen!60}特征}对{\color{blue!60}数据}\\中信息进行提取};
\node[anchor=west,wnode,align=right,font=\tiny] (wl8) at ([xshift=0.5em,yshift=0em]cl2.east){使用提取的信息对\\{\color{red!50}模型}中的参数\\进行训练};
\node[anchor=south west,wnode,align=left,font=\tiny] (wl7) at ([xshift=0.5em,yshift=0em]cl1.east){使用{\color{ugreen}\bfnew{特征}}对{\color{blue}\bfnew{数据}}\\中信息进行提取};
\node[anchor=west,wnode,align=right,font=\tiny] (wl8) at ([xshift=0.5em,yshift=0em]cl2.east){使用提取的信息对\\{\color{red!50}\bfnew{模型}}中的参数\\进行训练};
\draw[->,thick] ([xshift=-1.5em,yshift=-0em]ml1.north)..controls +(north:3em) and +(west:0em)..([xshift=-0em,yshift=-0em]cl1.west) ;
\draw[->,thick] ([xshift=0em,yshift=-0em]ml2.north)..controls +(north:3em) and +(west:0em)..([xshift=-0em,yshift=-0em]cl1.east) ;
...
...
@@ -57,19 +57,19 @@
}
{%subfigure-center
\node[anchor=north,wnode,font=\footnotesize] (wc1) at ([xshift=0em,yshift=0em]mc1.north){训练数据};
\node[anchor=north,wnode,font=\footnotesize] (wc2) at ([xshift=0em,yshift=0em]mc2.north){模型结构};
\node[anchor=south,wnode] (wc3) at ([xshift=0em,yshift=0em]mc1.south){人工/自动收集};
\node[anchor=south,wnode] (wc4) at ([xshift=0em,yshift=0em]mc2.south){人工设计};
\node[anchor=north,wnode,font=\footnotesize] (wc1) at ([xshift=0em,yshift=-0.15em]mc1.north){训练数据};
\node[anchor=north,wnode,font=\footnotesize] (wc2) at ([xshift=0em,yshift=-0.15em]mc2.north){模型结构};
\node[anchor=south,wnode] (wc3) at ([xshift=0em,yshift=0.15em]mc1.south){人工/自动收集};
\node[anchor=south,wnode] (wc4) at ([xshift=0em,yshift=0.15em]mc2.south){人工设计};
\node[anchor=south,cnode,fill=white] (cc1) at ([xshift=-4em,yshift=1.5em]m2.south){};
\node[anchor=north,cnode,fill=white] (cc2) at ([xshift=0em,yshift=-1em]m2.north){};
\node[anchor=south west,wnode,align=left,font=\tiny] (wl7) at ([xshift=0.5em,yshift=0em]cc1.east){使用{\color{red!60}模型}对{\color{blue!60}数据}\\中信息进行提取};
\node[anchor=west,wnode,align=right,font=\tiny] (wl8) at ([xshift=0.5em,yshift=0em]cc2.east){使用提取的信息对\\{\color{red!60}模型}中的参数\\进行训练};
\node[anchor=south west,wnode,align=left,font=\tiny] (wl7) at ([xshift=0.5em,yshift=0em]cc1.east){使用{\color{red!50}\bfnew{模型}}对{\color{blue}\bfnew{数据}}\\中信息进行提取};
\node[anchor=west,wnode,align=right,font=\tiny] (wl8) at ([xshift=0.5em,yshift=0em]cc2.east){使用提取的信息对\\{\color{red!50}\bfnew{模型}}中的参数\\进行训练};
\draw[->,thick] ([xshift=-2em,yshift=-0em]mc1.north)..controls +(north:3em) and +(west:0em)..([xshift=-0em,yshift=-0em]cc1.west) ;
\draw[->,thick] ([xshift=0em,yshift=-0em]mc2.north)..controls +(north:2em) and +(west:0em)..([xshift=-0em,yshift=-0em]cc1.east) ;
...
...
@@ -80,21 +80,21 @@
}
{%subfigure-right
\node[anchor=north,wnode,font=\footnotesize] (wr1) at ([xshift=0em,yshift=0em]mr1.north){训练数据};
\node[anchor=south,wnode] (wr2) at ([xshift=0em,yshift=0em]mr1.south){人工/自动收集};
\node[anchor=north,wnode,font=\footnotesize] (wr1) at ([xshift=0em,yshift=-0.15em]mr1.north){训练数据};
\node[anchor=south,wnode] (wr2) at ([xshift=0em,yshift=0.15em]mr1.south){人工/自动收集};
\node[anchor=south,cnode,fill=white] (cr1) at ([xshift=-2.5em,yshift=2.8em]m3.south){};
\node[anchor=north,cnode,fill=white] (cr2) at ([xshift=0em,yshift=-1em]m3.north){};
\node[anchor=south,cnode,fill=white] (cr3) at ([xshift=-5.8em,yshift=0.7em]m3.south){};
\node[anchor=south,cnode,fill=white] (cr3) at ([xshift=-6.2em,yshift=0.7em]m3.south){};
\node[anchor=north,wnode,align=right,font=\tiny] (wr3) at ([xshift=1em,yshift=-0.5em]cr2.south){使用{\color{red!60}模型}提\\取{\color{blue!60}数据}\\中的\\信息};
\node[anchor=west,wnode,align=right,font=\tiny] (wr4) at ([xshift=0.5em,yshift=0em]cr2.east){使用提取的信息对\\{\color{red!60}模型}中的参数\\进行训练};
\node[anchor=west,wnode,align=left,font=\tiny] (wr5) at ([xshift=0.2em,yshift=0em]cr3.east){使用{\color{blue!60}数据}对{\color{red!60}模型}\\的结构进行搜索};
\node[anchor=north,wnode,align=right,font=\tiny] (wr3) at ([xshift=1em,yshift=-0.5em]cr2.south){使用{\color{red!50}\bfnew{模型}}提\\取{\color{blue}\bfnew{数据}}\\中的\\信息};
\node[anchor=west,wnode,align=right,font=\tiny] (wr4) at ([xshift=0.5em,yshift=0em]cr2.east){使用提取的信息对\\{\color{red!50}\bfnew{模型}}中的参数\\进行训练};
\node[anchor=west,wnode,align=left,font=\tiny] (wr5) at ([xshift=0.2em,yshift=0em]cr3.east){使用{\color{blue}\bfnew{数据}}对{\color{red!50}\bfnew{模型}}\\的结构进行搜索};
\item{\small\bfnew{结构化位置编码}}\index{基于结构化位置编码}(Structural Position Representations)\index{Structural Position Representations}\upcite{DBLP:conf/emnlp/WangTWS19a}。 例如,可以通过对输入句子进行依存句法分析得到句法树,根据叶子结点在句法树中的深度来表示其绝对位置,并在此基础上利用相对位置编码的思想计算节点之间的相对位置信息。
\parinterval 尽管窄而深的神经网络比宽网络有更快的收敛速度\upcite{WangLearning},但伴随着训练数据的增加,以及模型进一步的加深,训练代价成为不可忽视的问题。例如,在几千万甚至上亿的双语平行句对上训练一个48层的Transformer模型需要几周的时间才能达到收敛\footnote[5]{训练时间的估算是在单台8卡Titan V GPU服务器上得到的}。因此,在保证模型性能不变的前提下,高效地完成深层模型的训练也是至关重要的。
\parinterval 尽管窄而深的神经网络比宽网络有更快的收敛速度\upcite{WangLearning},但伴随着训练数据的增加,以及模型进一步的加深,训练代价成为不可忽视的问题。例如,在几千万甚至上亿的双语平行句对上训练一个48层的Transformer模型需要几周的时间才能达到收敛\footnote[5]{训练时间的估算是在单台8卡Titan V GPU服务器上得到的}。因此,在保证模型性能不变的前提下,高效地完成深层模型的训练也是至关重要的{\red (概括性不足,三种方法是并列还是依赖?)}。