Commit 899ee1b2 by 孟霞

合并分支 'caorunzhe' 到 'mengxia'

Caorunzhe

查看合并请求 !741
parents f0dd831a afebd90b
......@@ -92,7 +92,7 @@
}
{
\draw[double,->,thick,ublue] (e3.south)--([yshift=-1.2em]e3.south) node[pos=0.5,right,xshift=0.2em,yshift=0.2em] (step1) {\color{red}{\tiny{‘你’替换‘他’}}};
\draw[double,->,thick,ublue] (e3.south)--([yshift=-1.2em]e3.south) node[pos=0.5,right,xshift=0.2em,yshift=0.2em] (step1) {\color{red}{\tiny{“你”替换“他”}}};
\draw[->,dotted,thick,red] ([xshift=-0.1em]entry2.east)..controls +(east:4) and +(west:4)..([yshift=-0.6em,xshift=-0.5em]e3.south) ;
}
......@@ -122,7 +122,7 @@
}
{
\draw[double,->,thick,ublue] (e3.south)--([yshift=-1.2em]e3.south) node[pos=0.5,right,xshift=0.2em,yshift=0.2em] (step1) {\color{red}{\tiny{‘满意’替换‘高兴’}}};
\draw[double,->,thick,ublue] (e3.south)--([yshift=-1.2em]e3.south) node[pos=0.5,right,xshift=0.2em,yshift=0.2em] (step1) {\color{red}{\tiny{“满意”替换“高兴”}}};
\draw[->,dotted,thick,red] ([xshift=-1.2em,yshift=-0.6em]entry3.north east)..controls +(east:2) and +(west:3)..([yshift=-0.6em,xshift=-0.5em]e3.south) ;
}
......
......@@ -21,7 +21,7 @@
\node[node,font=\scriptsize,anchor = north,fill=ugreen!5,drop shadow] (dict) at ([xshift = 5em,yshift = -5em]node6.south){\begin{tabular}{llllll}
\multirow{3}{*}{符号合并表:} & ('e','s') & ('es','t') & ('est','$<$e$>$') & ('l','o') & ('lo','w') \\
& ('n','e') & ('ne','w') & ('new','est$<$e$>$') & ('low','$<$e$>$') & 'w','i') \\
& ('low','$<$e$>$') & ('n','e') & ('ne','w') & ('new','est$<$e$>$') & ('w','i') \\
& ('wi','d') & ('wid','est$<$e$>$') & ('low','e') & ('lowe','r') & ('lower','$<$e$>$')
\end{tabular}};
......
......@@ -60,12 +60,12 @@
%equ
\node [anchor=west,inner sep = 2pt] (line1) at (9*\nodespace,0) {未应用Dropout:};
\node [anchor=north west,inner sep = 2pt] (line2) at (line1.south west) {$z_{i}^{l+1}=\mathbf{w}_{i}^{l} \mathbf{x}+b_{i}^{l}$};
\node [anchor=north west,inner sep = 2pt] (line3) at (line2.south west) {$x_{i}^{l+1}=f\left(x_{i}^{l}\right)$};
\node [anchor=north west,inner sep = 2pt] (line2) at (line1.south west) {$z_{i}^{l+1}=\mathbf{w}^{l} \mathbf{x}^{l} + b_{i}^{l}$};
\node [anchor=north west,inner sep = 2pt] (line3) at (line2.south west) {$x_{i}^{l+1}=f\left(z_{i}^{l+1}\right)$};
\node [anchor=north west,inner sep = 2pt] (line4) at (line3.south west) {应用Dropout:};
\node [anchor=north west,inner sep = 2pt] (line5) at (line4.south west) {$r_{j}^{l} \sim$ Bernoulli $(1-p)$};
\node [anchor=north west,inner sep = 2pt] (line6) at (line5.south west) {$\tilde{\mathbf{x}}=\mathbf{r} * \mathbf{x}$};
\node [anchor=north west,inner sep = 2pt] (line7) at (line6.south west) {$z_{i}^{l+1}=\mathbf{w}_{i}^{l} \widetilde{\mathbf{x}}+b_{i}^{l}$};
\node [anchor=north west,inner sep = 2pt] (line8) at (line7.south west) {$x_{i}^{l+1}=f\left(z_{i}^{l}\right)$};
\node [anchor=north west,inner sep = 2pt] (line7) at (line6.south west) {$z_{i}^{l+1}=\mathbf{w}^{l} \widetilde{\mathbf{x}}^{l} + b_{i}^{l}$};
\node [anchor=north west,inner sep = 2pt] (line8) at (line7.south west) {$x_{i}^{l+1}=f\left(z_{i}^{l+1}\right)$};
\end{tikzpicture}
\ No newline at end of file
......@@ -2,12 +2,12 @@
\begin{tikzpicture}
\node[] (do) at (0,0) {{\red do}};
\node[anchor = west] (does) at ([xshift = 1em]do.east) {{\red do}es};
\node[anchor = west] (doing) at ([xshift = 0.7em]does.east) {{\red do}ing};
\node[anchor = west] (doing) at ([xshift = 0.7em,yshift = -0.1em]does.east) {{\red do}ing};
\node[anchor = north] (do_root) at ([yshift = -1.5em]does.south) {do};
\node[anchor = west] (new) at ([xshift = 2em]doing.east) {{\red new}};
\node[anchor = west] (newer) at ([xshift = 1em]new.east) {{\red new}er};
\node[anchor = west] (newest) at ([xshift = 0.7em]newer.east) {{\red new}est};
\node[anchor = west] (newest) at ([xshift = 0.7em,yshift = 0.08em]newer.east) {{\red new}est};
\node[anchor = north] (new_root) at ([yshift = -1.5em]newer.south) {new};
\draw [->] ([yshift=0.2em]do_root.north) .. controls +(north:0.4) and +(south:0.6) ..(do.south);
\draw [->] (do_root.north) -- (does.south);
......
\begin{tikzpicture}
\tikzstyle{word} = [font=\scriptsize,minimum height=1.4em]
\tikzstyle{model} = [rectangle,line width=0.7pt,draw,minimum height=3em,minimum width=13em,rounded corners=4pt,fill=red!20]
\tikzstyle{model} = [rectangle,line width=0.7pt,draw,minimum height=2.2em,minimum width=13em,rounded corners=4pt,fill=red!20]
\node [anchor=center] (ate) at (0,0) {};
%decoder
\node [anchor=center,model,fill=blue!20] (decoder) at ([xshift=7.5em]ate.east) {解码器};
\node [anchor=center,model,fill=blue!20] (decoder) at ([xshift=7.5em]ate.east) {\small{解码器}};
\node [anchor=north,word] (w1) at ([yshift=-1.5em,xshift=0em]decoder.south) {\small{$x_3$}};
\node [anchor=west,word] (w2) at ([xshift=0em]w1.east) {\small{$x_4$}};
\node [anchor=west,word] (w3) at ([xshift=0em]w2.east) {[M]};
......@@ -31,7 +31,7 @@
\draw [->,thick] ([yshift=-1.4em]w10.south) -- (w10.south);
%encoder
\node [model] (encoder) at ([xshift=-7.5em]ate.west) {编码器};
\node [model] (encoder) at ([xshift=-7.5em]ate.west) {\small{编码器}};
\node [anchor=north,word] (we1) at ([yshift=-1.5em,xshift=0em]encoder.south) {[M]};
\node [anchor=west,word] (we2) at ([xshift=0em]we1.east) {[M]};
......
......@@ -632,16 +632,7 @@
\parinterval\ref{unsupervised-dictionary-induction}节所述,无监督词典归纳的方法依赖于词的分布式表示,也就是词嵌入。因此当把无监督词典归纳拓展到短语上时,首先需要获得短语的分布式表示。比较简单的方法是把词换成短语,然后借助与无监督词典归纳相同的算法得到短语的分布式表示。最后直接应用无监督词典归纳方法,得到源语言短语与目标语言短语之间的对应。
\parinterval 尽管已经得到了短语的翻译,短语表的另外一个重要的组成部分,也就是短语对的得分(概率)无法由词典归纳方法直接给出,而这些得分在统计机器翻译模型中非常重要。在无监督词典归纳中,在推断词典的时候会为一对源语言单词和目标语言单词打分(词嵌入之间的相似度),再根据打分来决定哪一个目标语言单词更有可能是当前源语言单词的翻译。在无监督短语归纳中,这样一个打分已经提供了对短语对质量的度量,因此经过适当的归一化处理后就可以得到短语对的得分:
\begin{eqnarray}
P(\mathbi{y}|\mathbi{x}) & = & \frac{\mathrm{cos}(\mathbi{x},\mathbi{y})/\tau}{\sum_{\mathbi{y}^{'}}\mathrm{cos}(\mathbi{x},\mathbi{y}^{'})/\tau}
\label{eq:16-11}
\end{eqnarray}
\noindent 其中,$\mathrm{cos}$是余弦相似度,$\mathbi{x}$是经过无监督词典归纳里$\mathbi{W}$转换的源语言短语嵌入,$\mathbi{y}$是目标语言短语嵌入,$\tau$是控制分布尖锐程度的一个超参数。
\parinterval 一个问题是在无监督的情景下没有任何双语数据,那么如何得到最优的$\tau$?这里,可以寻找一个$\tau$使得所有$P(\mathbi{y}|\mathbi{x})$ 最大\upcite{DBLP:conf/emnlp/ArtetxeLA18}。通常,选取与一个给定的$\mathbi{y}$最接近的$\mathbi{x}$ 而不是选取与一个给定的$\mathbi{x}$最接近的$\mathbi{y}$来计算$P(\mathbi{y}|\mathbi{x})$,因为选取与一个给定的$\mathbi{x}$最接近的$\mathbi{y}$总是$P(\mathbi{y}|\mathbi{x})$里概率最大的元素,这时候总是可以调整$\tau$使得所有$P$的取值都接近1。实际中为了选取最优$\tau$会为$P(\mathbi{y}|\mathbi{x})$$P(\mathbi{x}|\mathbi{y})$ 同时优化$\tau$
\parinterval 尽管已经得到了短语的翻译,短语表的另外一个重要的组成部分,也就是短语对的得分(概率)无法由词典归纳方法直接给出,而这些得分在统计机器翻译模型中非常重要。在无监督词典归纳中,在推断词典的时候会为一对源语言单词和目标语言单词打分(词嵌入之间的相似度),再根据打分来决定哪一个目标语言单词更有可能是当前源语言单词的翻译。在无监督短语归纳中,这样一个打分已经提供了对短语对质量的度量,因此经过适当的归一化处理后就可以得到短语对的得分。
%----------------------------------------------------------------------------------------
% NEW SUB-SUB-SECTION
......
......@@ -24,7 +24,7 @@
\draw[-, very thick,fill=black] ([xshift=-0.6em,yshift=-1.2em]B\x) -- ([xshift=-0.3em,yshift=-1em]B\x) -- ([yshift=-1.2em]B\x) --([xshift=0.3em,yshift=-1em]B\x) -- ([xshift=0.6em,yshift=-1.2em]B\x) -- (D\x) -- (C\x) -- ([xshift=-0.6em,yshift=-1.2em]B\x);
\draw[-, very thick,fill=black] (E\x) -- ([xshift=0.2em,yshift=0.3em]E\x) -- ([xshift=0.33em]F\x) -- (F\x) -- (E\x);
\node[circle,inner sep=0pt,minimum size=0.4em,fill=black] at ([xshift=-0.7em,yshift=-0.2em]B\x){};
\node[draw,rounded corners=2pt,fill=yellow!20,minimum width=2.3cm,minimum height=2.2em](cnn\x) at ([xshift=1.8em,yshift=3.6em]A\x){CNN};
\node[draw,rounded corners=2pt,fill=yellow!20,minimum width=4.3em,minimum height=2.2em](cnn\x) at ([xshift=1.8em,yshift=3.6em]A\x){CNN};
}
\node[draw,anchor=south,rounded corners=2pt,minimum width=4.0cm,minimum height=2.2em,fill=red!20](encoder) at ([yshift=2.6em,xshift=2.2em]cnn1.north){编码器};
\node[anchor=north,font=\Large](x) at ([xshift=2.5em,yshift=-3.4em]encoder.south){$\seq{x}$};
......
......@@ -26,7 +26,7 @@
{\color{white}}
\vspace{1em}
\begin{center}
{\Huge \bfnew{后记}}
{\Huge \bfnew{\ \ \ }}
\end{center}
\vspace{2em}
......
......@@ -159,18 +159,6 @@
& & & 据规模较小 & \\
\rule{0pt}{15pt}NIST & Zh-En等 & 新闻翻译 & 评测集包括4句参 & {https://www.ldc.upenn.edu/coll} \\
& Cs Zh等 & & 考译文,质量较高 & aborations/evaluations/nist \\
\end{tabular}
\end{center}
}\end{table}
%----------------------------------------------
%----------------------------------------------
\begin{table}[htp]{
\footnotesize
\begin{center}
\begin{tabular}{p{1.6cm} | p{1.2cm} p{1.6cm} p{2.6cm} p{3.9cm}}
\rule{0pt}{15pt}{任务} & {语种} &{领域} &{描述} &{数据集地址} \\
\hline
\rule{0pt}{15pt}TVsub & Zh-En & 字幕翻译 & 数据抽取自电视剧 & {https://github.com/longyuewan} \\
& & & 字幕,用于对话中 & gdcu/tvsub \\
& & & 长距离上下文研究 & \\
......@@ -189,6 +177,7 @@
}\end{table}
%----------------------------------------------
%----------------------------------------------------------------------------------------
% NEW SECTION
%----------------------------------------------------------------------------------------
......
......@@ -6929,6 +6929,25 @@ year={2012}
publisher={Conference on Empirical Methods in Natural Language Processing},
year={2020}
}
@inproceedings{DBLP:conf/coling/ZhuWH08,
author = {Jingbo Zhu and
Huizhen Wang and
Eduard H. Hovy},
title = {Multi-Criteria-Based Strategy to Stop Active Learning for Data Annotation},
publisher = {International Conference on Computational Linguistics},
pages = {1129--1136},
year = {2008}
}
@inproceedings{DBLP:conf/iccv/TrikiABT17,
author = {Amal Rannen Triki and
Rahaf Aljundi and
Matthew B. Blaschko and
Tinne Tuytelaars},
title = {Encoder Based Lifelong Learning},
pages = {1329--1337},
publisher = {{IEEE} International Conference on Computer Vision},
year = {2017}
}
%%%%% chapter 13------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......@@ -10262,6 +10281,37 @@ author = {Zhuang Liu and
publisher = {Annual Meeting of the Association for Computational Linguistics},
year = {2018}
}
@inproceedings{DBLP:conf/naacl/GuoQLSXZ19,
author = {Qipeng Guo and
Xipeng Qiu and
Pengfei Liu and
Yunfan Shao and
Xiangyang Xue and
Zheng Zhang},
title = {Star-Transformer},
publisher = {Annual Conference of the North American Chapter of the Association for Computational Linguistics},
pages = {1315--1325},
year = {2019}
}
@inproceedings{DBLP:conf/nips/VeitWB16,
author = {Andreas Veit and
Michael J. Wilber and
Serge J. Belongie},
title = {Residual Networks Behave Like Ensembles of Relatively Shallow Networks},
publisher = {Annual Conference on Neural Information Processing Systems},
pages = {550--558},
year = {2016}
}
@inproceedings{DBLP:conf/iclr/ChangMHTB18,
author = {Bo Chang and
Lili Meng and
Eldad Haber and
Frederick Tung and
David Begert},
title = {Multi-level Residual Networks from Dynamical Systems View},
publisher = {International Conference on Learning Representations},
year = {2018}
}
%%%%% chapter 15------------------------------------------------------
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论