Commit 59f68819 by 曹润柘

add chapter6

parent 5356d8ce
......@@ -485,27 +485,28 @@ His house is on the south bank of the river.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\vspace{0.5em}
\begin{itemize}
\item NiuTrans.SMT:NiuTrans\cite{Tong2012NiuTrans}是由东北大学自然语言处理实验室自主研发的SMT系统,该系统可支持基于短语的模型、基于层次结构的模型以及基于句法树结构的模型。由于使用C++语言开发,所以该系统运行时间快,所占存储空间少且易于上手。系统中内嵌有$n$-gram语言模型,故无需使用其它的系统即可对语言进行建模。网址:\url{http://www.niutrans.com/}
\item NiuTrans.SMT:NiuTrans\cite{Tong2012NiuTrans}是由东北大学自然语言处理实验室自主研发的SMT系统,该系统可支持基于短语的模型、基于层次结构的模型以及基于句法树结构的模型。由于使用C++语言开发,所以该系统运行时间快,所占存储空间少且易于上手。系统中内嵌有$n$-gram语言模型,故无需使用其它的系统即可对语言进行建模。\url{http://www.niutrans.com/}
\vspace{0.5em}
\item Moses:Moses\cite{Koehn2007Moses}统计机器翻译时代最著名的系统之一,(主要)由爱丁堡大学的机器翻译团队开发。最新的Moses系统支持很多的功能,例如,它既支持基于短语的模型,也支持基于句法的模型。Moses 提供因子化翻译模型(factored translation model),该模型可以在不同的层次中使用不同的信息。此外,它允许将混淆网络和字格(word lattices)作为输入,可缓解系统的1-best 输出中的错误。Moses 还提供了很多有用的脚本和工具来支持其他的功能。网址:\url{http://www.statmt.org/moses/}
\item Moses:Moses\cite{Koehn2007Moses}统计机器翻译时代最著名的系统之一,(主要)由爱丁堡大学的机器翻译团队开发。最新的Moses系统支持很多的功能,例如,它既支持基于短语的模型,也支持基于句法的模型。Moses 提供因子化翻译模型(factored translation model),该模型可以在不同的层次中使用不同的信息。此外,它允许将混淆网络和字格(word lattices)作为输入,可缓解系统的1-best 输出中的错误。Moses 还提供了很多有用的脚本和工具来支持其他的功能。\url{http://www.statmt.org/moses/}
\vspace{0.5em}
\item Joshua:Joshua\cite{Li2010Joshua}是由约翰霍普金斯大学的语言和语音处理中心开发的层次短语翻译系统。由于Joshua是由Java语言开发,所以它在不同的平台上运行或开发时具有良好的可扩展性和可移植性。Joshua也是使用非常广泛的通机器翻译系统之一。网址:\url{http://joshua.sourceforge.net/Joshua/Welcome.html}
\item Joshua:Joshua\cite{Li2010Joshua}是由约翰霍普金斯大学的语言和语音处理中心开发的层次短语翻译系统。由于Joshua是由Java语言开发,所以它在不同的平台上运行或开发时具有良好的可扩展性和可移植性。Joshua也是使用非常广泛的通机器翻译系统之一。\url{http://joshua.sourceforge.net/Joshua/Welcome.html}
\vspace{0.5em}
\item SilkRoad:SilkRoad是由中国五个机构(中科院计算所、中科院软件所、中科院自动化所、厦门大学和哈尔滨工业大学)联合开发的,基于短语的统计机器翻译系统。该系统是中国乃至亚洲地区第一个开源的统计机器翻译系统。SilkRoad支持多解码器和规则提取并为不同组合的子系统提供了不同的实验选择。网址:\url{http://www.nlp.org.cn/project/project.php?projid=14}
\item SilkRoad:SilkRoad是由中国五个机构(中科院计算所、中科院软件所、中科院自动化所、厦门大学和哈尔滨工业大学)联合开发的,基于短语的统计机器翻译系统。该系统是中国乃至亚洲地区第一个开源的统计机器翻译系统。SilkRoad支持多解码器和规则提取并为不同组合的子系统提供了不同的实验选择。\url{http://www.nlp.org.cn/project/project.php?projid=14}
\\{\color{red} 关于silkroad系统,找了5个大学的nlp官网以及对相关论文搜索,也问了学长学姐和曹润柘,总共只找到了该系统的使用说明https://www.doc88.com/p-4174403220161.html以及一个提及它的文章《Machine Translation in China》}
\vspace{0.5em}
\item SAMT:SAMT\cite{zollmann2007the}是由卡内基梅隆大学机器翻译团队开发的语法增强的统计机器翻译系统。SAMT在解码的时候使用目标树来生成翻译规则,而不严格遵守目标语言的语法。SAMT 的一个亮点是它提供了简单但高效的方式来利用在机器翻译中句法信息。由于SAMT在hadoop中实现,它可受益于跨计算机群的大数据集的分布式处理。网址:\url{http://www.cs.cmu.edu/zollmann/samt/}
\item SAMT:SAMT\cite{zollmann2007the}是由卡内基梅隆大学机器翻译团队开发的语法增强的统计机器翻译系统。SAMT在解码的时候使用目标树来生成翻译规则,而不严格遵守目标语言的语法。SAMT 的一个亮点是它提供了简单但高效的方式来利用在机器翻译中句法信息。由于SAMT在hadoop中实现,它可受益于跨计算机群的大数据集的分布式处理。\url{http://www.cs.cmu.edu/zollmann/samt/}
\vspace{0.5em}
\item cdec:cdec\cite{Dyer2010cdec}是一个强大的解码器,是由Chris Dyer 和他的合作者们一起开发。cdec的主要的功能是它使用了翻译模型的一个统一的内部表示,并为实验结构预测问题的各种模型和算法提供了框架。所以,cdec也可以在被用来做一个对齐系统或者一个更通用的学习框架。此外,cdec由于使用高效的C++语言编写,运行速度较快。网址:\url{http://cdec-decoder.org/index.php?title=MainPage}
\item cdec:cdec\cite{Dyer2010cdec}是一个强大的解码器,是由Chris Dyer 和他的合作者们一起开发。cdec的主要的功能是它使用了翻译模型的一个统一的内部表示,并为实验结构预测问题的各种模型和算法提供了框架。所以,cdec也可以在被用来做一个对齐系统或者一个更通用的学习框架。此外,cdec由于使用高效的C++语言编写,运行速度较快。\url{http://cdec-decoder.org/index.php?title=MainPage}
\vspace{0.5em}
\item Phrasal:Phrasal\cite{Cer2010Phrasal}是由斯坦福自然语言处理小组开发的系统。除了传统的基于短语的模型,Phrasal还支持了基于非层次短语的模型,这种模型将基于短语的翻译延伸到非连续的短语翻译(phrasal discontinues translation),增加了模型的泛化能力。网址:\url{http://nlp.stanford.edu/phrasal/}
\item Phrasal:Phrasal\cite{Cer2010Phrasal}是由斯坦福自然语言处理小组开发的系统。除了传统的基于短语的模型,Phrasal还支持了基于非层次短语的模型,这种模型将基于短语的翻译延伸到非连续的短语翻译(phrasal discontinues translation),增加了模型的泛化能力。\url{http://nlp.stanford.edu/phrasal/}
\vspace{0.5em}
\item Jane:Jane\cite{VilarJane}是一个基于短语和基于层次短语的机器翻译系统,由亚琛工业大学的人类语言技术与模式识别小组开发。Jane提供了系统融合模块,因此可以非常方便的对多个系统进行融合。网址:\url{http://www-i6.informatik.rwth-aachen.de/jane/}
\item Jane:Jane\cite{VilarJane}是一个基于短语和基于层次短语的机器翻译系统,由亚琛工业大学的人类语言技术与模式识别小组开发。Jane提供了系统融合模块,因此可以非常方便的对多个系统进行融合。\url{http://www-i6.informatik.rwth-aachen.de/jane/}
\vspace{0.5em}
\item GIZA++:GIZA++\cite{Junczysdowmunt2012SyMGiza}是Franz Och研发的用于训练IBM模型1-5和HMM单词对齐模型的工具包。在早期,GIZA++是所有统计机器翻译系统中词对齐的标配工具。网址:\url{https://github.com/moses-smt/giza-pp}
\item GIZA++:GIZA++\cite{Junczysdowmunt2012SyMGiza}是Franz Och研发的用于训练IBM模型1-5和HMM单词对齐模型的工具包。在早期,GIZA++是所有统计机器翻译系统中词对齐的标配工具。\url{https://github.com/moses-smt/giza-pp}
\vspace{0.5em}
\item HiFST:HiFST\cite{pino2010the}是剑桥大学开发的统计机器翻译系统。该系统完全基于有限状态自动机实现,因此非常适合对搜索空间进行有效的表示。网址:\\ \url{http://ucam-smt.github.io/}
\item HiFST:HiFST\cite{pino2010the}是剑桥大学开发的统计机器翻译系统。该系统完全基于有限状态自动机实现,因此非常适合对搜索空间进行有效的表示。\\ \url{http://ucam-smt.github.io/}
\vspace{0.5em}
\item FastAlign:FastAlign\cite{dyer2013a}是一个快速,无监督的词对齐工具,由卡内基梅隆大学开发。网址:\url{https://github.com/clab/fast\_align}
\item FastAlign:FastAlign\cite{dyer2013a}是一个快速,无监督的词对齐工具,由卡内基梅隆大学开发。\url{https://github.com/clab/fast\_align}
\end{itemize}
\vspace{0.5em}
......@@ -513,33 +514,34 @@ His house is on the south bank of the river.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\vspace{0.5em}
\begin{itemize}
\item GroundHog:GroundHog\cite{bahdanau2015neural}基于Theano框架,由蒙特利尔大学LISA 实验室使用Python语言编写的一个框架,旨在提供灵活而高效的方式来实现复杂的循环神经网络模型。它提供了包括LSTM在内的多种模型。Bahdanau等人在此框架上又编写了GroundHog神经机器翻译系统。该系统被当作很多论文的基线系统。网址:\url{https://github.com/lisa-groundhog/GroundHog}
\item GroundHog:GroundHog\cite{bahdanau2015neural}基于Theano框架,由蒙特利尔大学LISA 实验室使用Python语言编写的一个框架,旨在提供灵活而高效的方式来实现复杂的循环神经网络模型。它提供了包括LSTM在内的多种模型。Bahdanau等人在此框架上又编写了GroundHog神经机器翻译系统。该系统被当作很多论文的基线系统。\url{https://github.com/lisa-groundhog/GroundHog}
\vspace{0.5em}
\item Nematus:Nematus\cite{SennrichNematus}是英国爱丁堡大学开发的,基于Theano框架的神经机器翻译系统。该系统使用GRU作为隐层单元,支持多层网络。Nematus 编码端有正向和反向的编码方式,可以同时提取源语句子中的上下文信息。该系统的一个优点是,它可以支持输入端有多个特征的输入(例如词的词性等)。网址:\url{https://github.com/EdinburghNLP/nematus}
\item Nematus:Nematus\cite{SennrichNematus}是英国爱丁堡大学开发的,基于Theano框架的神经机器翻译系统。该系统使用GRU作为隐层单元,支持多层网络。Nematus 编码端有正向和反向的编码方式,可以同时提取源语句子中的上下文信息。该系统的一个优点是,它可以支持输入端有多个特征的输入(例如词的词性等)。\url{https://github.com/EdinburghNLP/nematus}
\vspace{0.5em}
\item ZophRNN:ZophRNN\cite{zoph2016simple}是由南加州大学的Barret Zoph 等人使用C++语言开发的系统。Zoph在多个GPU 上既可以训练序列模型(如语言模型),也可以训练序列到序列的模型(如神经机器翻译模型)。当训练神经机器翻译系统时,Zoph也支持了多源输入,即在输入源语句子时可同时输入其一种译文。该系统由于使用C++语言,运行速度快。网址:\url{https://github.com/isi-nlp/Zoph\_RNN}
\item ZophRNN:ZophRNN\cite{zoph2016simple}是由南加州大学的Barret Zoph 等人使用C++语言开发的系统。Zoph在多个GPU 上既可以训练序列模型(如语言模型),也可以训练序列到序列的模型(如神经机器翻译模型)。当训练神经机器翻译系统时,Zoph也支持了多源输入,即在输入源语句子时可同时输入其一种译文。该系统由于使用C++语言,运行速度快。\url{https://github.com/isi-nlp/Zoph\_RNN}
\vspace{0.5em}
\item Fairseq:Fairseq\cite{Ottfairseq}是由Facebook开发的,基于PyTorch框架的用以解决序列到序列问题的工具包,其中包括基于卷积神经网络、基于循环神经网络、基于Transformer的模型等。Fairseq是当今使用最广泛的神经机器翻译开源系统之一。https://github.com/facebookresearch/fairseq
\vspace{0.5em}
\item Tensor2Tensor:Tensor2Tensor\cite{VaswaniTensor2Tensor}是由谷歌推出的,基于TensorFlow框架的开源系统。该系统基于Transformer模型,因此可以支持大多数序列到序列任务。得益于Transformer 的网络结构,系统的训练速度较快。现在,Tensor2Te-\\nsor也是机器翻译翻译领域广泛使用的开源系统之一。网址:\url{https://github.com/tensorflow/tensor2tensor}
\item Tensor2Tensor:Tensor2Tensor\cite{VaswaniTensor2Tensor}是由谷歌推出的,基于TensorFlow框架的开源系统。该系统基于Transformer模型,因此可以支持大多数序列到序列任务。得益于Transformer 的网络结构,系统的训练速度较快。现在,Tensor2Te-\\nsor也是机器翻译翻译领域广泛使用的开源系统之一。\url{https://github.com/tensorflow/tensor2tensor}
\vspace{0.5em}
\item OpenNMT:OpenNMT\cite{KleinOpenNMT}系统是由哈佛大学自然语言处理研究组开源的,基于Torch框架的神经机器翻译系统。OpenNMT系统的早期版本使用Lua 语言编写,现在也扩展到了TensorFlow和PyTorch,设计简单易用,易于扩展,同时保持效率和翻译精度。网址:\url{https://github.com/OpenNMT/OpenNMT}
\item OpenNMT:OpenNMT\cite{KleinOpenNMT}系统是由哈佛大学自然语言处理研究组开源的,基于Torch框架的神经机器翻译系统。OpenNMT系统的早期版本使用Lua 语言编写,现在也扩展到了TensorFlow和PyTorch,设计简单易用,易于扩展,同时保持效率和翻译精度。\url{https://github.com/OpenNMT/OpenNMT}
\vspace{0.5em}
\item 斯坦福神经机器翻译开源代码库:斯坦福大学自然语言处理组(Stanford NLP)发布了一篇教程,介绍了该研究组在神经机器翻译上的研究信息,同时实现了多种翻译模型\cite{luong2016acl_hybrid}。网址:\url{https://nlp.stanford.edu/projects/nmt/}
\item 斯坦福神经机器翻译开源代码库:斯坦福大学自然语言处理组(Stanford NLP)发布了一篇教程,介绍了该研究组在神经机器翻译上的研究信息,同时实现了多种翻译模型\cite{luong2016acl_hybrid}\url{https://nlp.stanford.edu/projects/nmt/}
\\{\color{red} 此处也问了学长学姐,可能该处的原意是引用了教程\\https://sites.google.com/site/acl16nmt/,所以按照自己的理解改了一下,另外在斯坦福nlp的官网上发现他们实现的三种结构中,只有两个开源了代码库,并且这两个连接给出的引用是一致的,所以暂时选用了它给出的引用}
\vspace{0.5em}
\item THUMT:清华大学NLP团队实现的神经机器翻译系统,支持Transformer等模型\cite{ZhangTHUMT}。该系统主要基于Tensorflow和Theano实现,其中Theano版本包含了RNNsearch模型,训练方式包括MLE (Maximum Likelihood Estimate), MRT\\(minimum risk training), SST(semi-supervised training)。Tensorflow 版本实现了Seq2Seq, RNNsearch, Transformer三种基本模型。网址:\url{https://github.com/THUNLP-MT/THUMT}
\item THUMT:清华大学NLP团队实现的神经机器翻译系统,支持Transformer等模型\cite{ZhangTHUMT}。该系统主要基于Tensorflow和Theano实现,其中Theano版本包含了RNNsearch模型,训练方式包括MLE (Maximum Likelihood Estimate), MRT\\(minimum risk training), SST(semi-supervised training)。Tensorflow 版本实现了Seq2Seq, RNNsearch, Transformer三种基本模型。\url{https://github.com/THUNLP-MT/THUMT}
\vspace{0.5em}
\item NiuTrans.NMT:由小牛翻译团队基于NiuTensor工具集实现,支持循环神经网络、Transformer等结构,并支持语言建模、序列标注、机器翻译等任务。支持机器翻译GPU与CPU 训练及解码。其小巧易用,为开发人员提供快速二次开发基础。此外,NiuTrans.NMT已经得到了大规模应用,形成了支持187种语言翻译的小牛翻译系统。网址:\url{http://niutrans.com/opensource/}
\item NiuTrans.NMT:由小牛翻译团队基于NiuTensor工具集实现,支持循环神经网络、Transformer等结构,并支持语言建模、序列标注、机器翻译等任务。支持机器翻译GPU与CPU 训练及解码。其小巧易用,为开发人员提供快速二次开发基础。此外,NiuTrans.NMT已经得到了大规模应用,形成了支持187种语言翻译的小牛翻译系统。\url{http://niutrans.com/opensource/}
\vspace{0.5em}
\item MARIANNMT:主要由微软翻译团队搭建\cite{JunczysMarian},其使用纯C++实现的用于GPU\\/CPU训练和解码的引擎,支持多GPU训练和批量解码,最小限度依赖第三方库,静态编译一次之后,复制其二进制文件就能在其他平台使用。网址:\url{https://marian-nmt.github.io/}
\item MARIANNMT:主要由微软翻译团队搭建\cite{JunczysMarian},其使用纯C++实现的用于GPU\\/CPU训练和解码的引擎,支持多GPU训练和批量解码,最小限度依赖第三方库,静态编译一次之后,复制其二进制文件就能在其他平台使用。\url{https://marian-nmt.github.io/}
\vspace{0.5em}
\item Sockeye:由Awslabs开发得神经机器翻译框架\cite{hieber2017sockeye}。其中支持RNNSearch、Tra-nsformer、CNN等翻译模型,同时还有提供了从图片翻译到文字的模块。提供了WMT 德英新闻翻译、领域适应任务、多语言零资源翻译任务的教程。网址:\url{https://awslabs.github.io/sockeye/}
\item Sockeye:由Awslabs开发得神经机器翻译框架\cite{hieber2017sockeye}。其中支持RNNSearch、Tra-nsformer、CNN等翻译模型,同时还有提供了从图片翻译到文字的模块。提供了WMT 德英新闻翻译、领域适应任务、多语言零资源翻译任务的教程。\url{https://awslabs.github.io/sockeye/}
\vspace{0.5em}
\item CytonMT:由NICT开发的一种用C++实现的高效神经机器翻译开源工具包\cite{WangCytonMT}。主要支持Transformer模型,并支持一些常用的训练方法以及解码方法。网址:\url{https://github.com/arthurxlw/cytonMt}
\item CytonMT:由NICT开发的一种用C++实现的高效神经机器翻译开源工具包\cite{WangCytonMT}。主要支持Transformer模型,并支持一些常用的训练方法以及解码方法。\url{https://github.com/arthurxlw/cytonMt}
\vspace{0.5em}
\item OpenSeq2Seq:由NVIDIA团队开发的\cite{KuchaievMixed}基于Tensorflow的模块化架构,用于序列到序列的模型,允许从可用组件中组装新模型,支持混合精度训练,利用NVIDIA Volta Turing GPU中的Tensor核心,基于Horovod的快速分布式训练,支持多GPU,多节点多模式。网址:\url{https://nvidia.github.io/OpenSeq2Seq/html/index.html}
\item OpenSeq2Seq:由NVIDIA团队开发的\cite{KuchaievMixed}基于Tensorflow的模块化架构,用于序列到序列的模型,允许从可用组件中组装新模型,支持混合精度训练,利用NVIDIA Volta Turing GPU中的Tensor核心,基于Horovod的快速分布式训练,支持多GPU,多节点多模式。\url{https://nvidia.github.io/OpenSeq2Seq/html/index.html}
\vspace{0.5em}
\item NMTPyTorch:由勒芒大学语言实验室发布的基于序列到序列框架的神经网络翻译模型\cite{nmtpy2017},Nmtpytorch的核心部分依赖于Numpy,Pytorch和tqdm。其允许训练各种端到端神经体系结构,包括但不限于神经机器翻译,图像字幕和自动语音识别系统。网址:\url{https://github.com/lium-lst/nmtpytorch}
\item NMTPyTorch:由勒芒大学语言实验室发布的基于序列到序列框架的神经网络翻译模型\cite{nmtpy2017},Nmtpytorch的核心部分依赖于Numpy,Pytorch和tqdm。其允许训练各种端到端神经体系结构,包括但不限于神经机器翻译,图像字幕和自动语音识别系统。\url{https://github.com/lium-lst/nmtpytorch}
\end{itemize}
\vspace{0.5em}
......
......@@ -1275,211 +1275,6 @@ p_0+p_1 & = & 1 \label{eqC3.62-new}
\item 除了在机器翻译建模上的开创性工作,IBM模型的另一项重要贡献是建立了统计词对齐的基础模型。在训练IBM模型的过程中,除了学习到模型参数,我们还可以得到双语数据上的词对齐结果。也就是说词对齐标注是IBM模型训练的间接产物。这也使得IBM模型成为了自动词对齐的重要方法。包括GIZA++在内的很多工作,实际上更多的是被用于自动词对齐任务,而非简单的训练IBM模型参数。随着词对齐概念的不断深入,这个任务逐渐成为了自然语言处理中的重要分支,比如,对IBM模型的结果进行对称化\cite{och2003systematic},也可以直接使用判别式模型利用分类模型解决词对齐问题\cite{ittycheriah2005maximum},甚至可以把利用对齐的思想用于短语和句法结构的双语对应\cite{xiao2013unsupervised}。除了GIZA++,研究人员也开发了很多优秀的自动词对齐工具,比如,FastAlign(\url{https://github.com/clab/fast_align})、Berkeley Aligner(\url{https://github.com/mhajiloo/berkeleyaligner})等,这些工具现在也有很广泛的应用。
\end{itemize}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%第一章附录A
\begin{appendices}
\chapter{附录A}
\label{appendix-A}
\end{appendices}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%第三章附录B
\begin{appendices}
\chapter{附录B}
\label{appendix-B}
\section{IBM模型3训练方法}
\parinterval 模型3的参数估计与模型1和模型2采用相同的方法。这里直接给出辅助函数。
\begin{eqnarray}
h(\mathbf{t},d,n,p, \lambda,\mu, \nu, \zeta) & = & \textrm{P}_{\theta}(\mathbf{s}|\mathbf{t})-\sum_{e}\lambda_{e}(\sum_{s}\mathbf{t}(\mathbf{s}|\mathbf{t})-1)-\sum_{i}\mu_{iml}(\sum_{j}d(j|i,m,l)-1) \nonumber \\
& & -\sum_{e}\nu_{e}(\sum_{\varphi}n(\varphi|e)-1)-\zeta(p^0+p^1-1)
\label{eq:1.1}
\end{eqnarray}
%----------------------------------------------
\parinterval 由于篇幅所限这里略去了推导步骤直接给出一些用于参数估计的等式。
\begin{eqnarray}
c(\mathbf{s}|\mathbf{t},\mathbf{s},\mathbf{t}) = \sum_{\mathbf{a}}(\textrm{p}_{\theta}(\mathbf{s},\mathbf{a}|\mathbf{t}) \times \sum_{i=1}^{m} (\delta(s_i,\mathbf{s}) \cdot \delta(t_{a_{i}},\mathbf{t})))
\label{eq:1.2}
\end{eqnarray}
\begin{eqnarray}
c(i|j,m,l;\mathbf{s},\mathbf{t}) = \sum_{\mathbf{a}}(\textrm{p}_{\theta}(\mathbf{s},\mathbf{a}|\mathbf{t}) \times \delta(j,a_i))
\label{eq:1.3}
\end{eqnarray}
\begin{eqnarray}
c(\varphi|e;\mathbf{s},\mathbf{t}) = \sum_{\mathbf{a}}(\textrm{p}_{\theta}(\mathbf{s},\mathbf{a}|\mathbf{t}) \times \sum_{j=1}^{l}\delta(\varphi,\varphi_{j})\delta(e,e_j))
\label{eq:1.4}
\end{eqnarray}
\begin{eqnarray}
c(0|\mathbf{s},\mathbf{t}) = \sum_{\mathbf{a}}(\textrm{p}_{\theta}(\mathbf{s},\mathbf{a}|\mathbf{t}) \times (m-2\varphi_0) )
\label{eq:1.5}
\end{eqnarray}
\begin{eqnarray}
c(1|\mathbf{s},\mathbf{t}) = \sum_{\mathbf{a}}(\textrm{p}_{\theta}(\mathbf{s},\mathbf{a}|\mathbf{t}) \times \varphi_0)
\label{eq:1.6}
\end{eqnarray}
%----------------------------------------------
\parinterval 进一步,
\begin{eqnarray}
\mathbf{t}(\mathbf{s}|\mathbf{t}) = \lambda_{t}^{-1} \times \sum_{k=1}^{S}c(\mathbf{s}|\mathbf{t};\mathbf{s}(k),\mathbf{t}(k))
\label{eq:1.7}
\end{eqnarray}
\begin{eqnarray}
d(i|j,m,l) = \mu_{jml}^{-1} \times \sum_{k=1}^{S}c(i|j,m,l;\mathbf{s}(k),\mathbf{t}(k))
\label{eq:1.8}
\end{eqnarray}
\begin{eqnarray}
n(\varphi|\mathbf{t}) = \nu_{t}^{-1} \times \sum_{s=1}^{S}c(\varphi |t;\mathbf{s}(k),\mathbf{t}(k))
\label{eq:1.9}
\end{eqnarray}
\begin{eqnarray}
pk = \zeta^{-1} \sum_{k=1}^{S}c(k;\mathbf{s}(k),\mathbf{t}(k))
\label{eq:1.10}
\end{eqnarray}
%----------------------------------------------
\parinterval 在模型3中,因为产出率的引入,我们并不能像在模型1和模型2中那样,在保证正确性的情况下加速参数估计的过程。这就使得每次迭代过程中,我们都不得不面对大小为$(l+1)^m$的词对齐空间。遍历所有$(l+1)^m$个词对齐所带来的高时间复杂度显然是不能被接受的。因此就要考虑是不是可以仅利用词对齐空间中的部分词对齐对这些参数进行估计。比较简单且直接的方法就是仅利用Viterbi对齐来进行参数估计。遗憾的是,在模型3中我们没有方法直接获得Viterbi对齐。这样只能采用一种折中的方法,即仅考虑那些使得$\textrm{P}_{\theta}(\mathbf{s},\mathbf{a}|\mathbf{t})$值较高的词对齐。这里把这部分词对齐组成的集合记为S。式(\ref{eq:1.2})可以被修改为,
\begin{eqnarray}
c(\mathbf{s}|\mathbf{t},\mathbf{s},\mathbf{t}) \approx \sum_{\mathbf{a} \in \mathbf{S}}(\textrm{P}_{\theta}(\mathbf{s},\mathbf{a}|\mathbf{t}) \times \sum_{i=1}^{m}(\delta(s_i,\mathbf{s}) \cdot \delta(t_{a_{i}},\mathbf{t})))
\label{eq:1.11}
\end{eqnarray}
%----------------------------------------------
\parinterval 同理可以获得式(\ref{eq:1.3})、式(\ref{eq:1.4})、式(\ref{eq:1.5})和式(\ref{eq:1.6})的修改结果。
\parinterval 在模型3中,可以如下定义\textrm{S}
\begin{eqnarray}
\textrm{S} = N(b^{\infty}(V(\mathbf{s}|\mathbf{t};2))) \cup (\mathop{\cup}\limits_{ij} N(b_{i \leftrightarrow j}^{\infty}(V_{i \leftrightarrow j}(\mathbf{s}|\mathbf{t},2))))
\label{eq:1.12}
\end{eqnarray}
%----------------------------------------------
\parinterval 其中 $b^{\infty}(V(\mathbf{s}|\mathbf{t};2))$$b_{i \leftrightarrow j}^{\infty}(V_{i \leftrightarrow j}(\mathbf{s}|\mathbf{t},2))$ 分别是对 $V(\mathbf{s}|\mathbf{t};3)$$V_{i \leftrightarrow j}(\mathbf{s}|\mathbf{t},3)$ 的估计。在计算\textrm{S}的过程中,我们需要知道一个对齐$\bf{a}$的邻居$\bf{a}'$的概率,即如何通过$\textrm{p}_{\theta}(\mathbf{a},\mathbf{s}|\mathbf{t})$计算$\textrm{p}_{\theta}(\mathbf{a}',\mathbf{s}|\mathbf{t})$。在模型3总,如果$\bf{a}$$\bf{a}'$区别于某个源语单词的对齐到的目标位置上($a_j$不等于$a_{j}'$),那么
\begin{small}
\begin{eqnarray}
\textrm{p}_{\theta}(\mathbf{a}',\mathbf{s}|\mathbf{t}) = \textrm{p}_{\theta}(\mathbf{a},\mathbf{s}|\mathbf{t}) \cdot \frac{\varphi_{j'}+1}{\varphi_j} \cdot \frac{n(\varphi_{j'}+1|t_{j'})}{n(\varphi_{j'}|t_{j'})} \cdot \frac{n(\varphi_{j-1}|t_{j})}{n(\varphi_{j}|t_{j})} \cdot \frac{t(s_i|t_{j'})}{t(s_{i}|t_{j})} \cdot \frac{d(i|j',m,l)}{d(i|j,m,l)}
\label{eq:1.13}
\end{eqnarray}
\end{small}
%----------------------------------------------
\parinterval 如果$\bf{a}$$\bf{a}'$区别于两个位置$i_1$$i_2$的对齐上,$a_{j_{1}}=a{j_{2}}'$$a_{j_{2}}=a{j_{1}}'$,那么
\begin{eqnarray}
\textrm{P}_{\theta}(\mathbf{a'},\mathbf{s}|\mathbf{t}) = \textrm{P}_{\theta}(\mathbf{a},\mathbf{s}|\mathbf{t}) \cdot \frac{t(s_{i_{2}}|t_{a_{i_{2}}})}{t(s_{i_{1}}|t_{a{i_{1}}})} \cdot \frac{d(i_{2})|a{i_{2}},m,l)}{d(i_{1}|a_{i_{1}},m,l)}
\label{eq:1.14}
\end{eqnarray}
%----------------------------------------------
\parinterval 这样每次迭代就可以仅在\textrm{S}上进行计数。相比整个词对齐空间,\textrm{S}只是一个非常小的子集,因此运算复杂度可以大大被降低。本质上说,这里定义\textrm{S}是为了用模型2的Viterbi对齐来估计模型3的Viterbi对齐。
\parinterval 对于模型3的参数估计过程,实际上是建立在模型1和模型2的参数估计结果上的。这不仅是因为模型3要利用模型2的Viterbi对齐,而且还因为模型3参数的初值也要直接利用模型2的参数。从这个角度说,模型1,2,3是有序的且向前依赖的。单独的对模型3的参数进行估计是极其困难的。实际上IBM的模型4和模型5也具有这样的性质,即他们都可以利用前一个模型参数估计的结果作为自身参数的初始值。
\section{IBM模型4训练方法}
\parinterval 模型4的参数估计基本与模型3一致。需要修改的是扭曲度的估计公式,如下:
\begin{eqnarray}
c_1(\Delta_i|ca,cb;\mathbf{s},\mathbf{t}) = \sum_{\mathbf{a}}(\textrm{P}_{\theta}(\mathbf{s},\mathbf{a}|\mathbf{t}) \times s_1(\Delta_i|ca,cb;\mathbf{a},\mathbf{s},\mathbf{t}))
\label{eq:1.15}
\end{eqnarray}
\begin{small}
\begin{eqnarray}
s_1(\Delta_i|ca,cb;\rm{a},\mathbf{s},\mathbf{t}) = \sum_{p=1}^l (\varepsilon(\phi_p) \cdot \delta(\pi_{p1}-\odot _{[p]},\Delta_i) \cdot \delta(A(e_{p-1}),ca) \cdot \delta(B(\tau_{p1}),cb))
\label{eq:1.16}
\end{eqnarray}
\end{small}
\begin{eqnarray}
d_1(\Delta_i|ca,cb;\mathbf{s},\mathbf{t}) = \mu_{1cacb}^{-1} \times \sum_{s=1}^{S}c(\Delta_i|ca,cb;\mathbf{s}(s),\mathbf{t}(s))
\label{eq:1.17}
\end{eqnarray}
\begin{eqnarray}
c_{>1}(\Delta_i|cb;\mathbf{s},\mathbf{t}) = \sum_{\mathbf{a}}(\textrm{p}_{\theta}(\mathbf{s},\mathbf{a}|\mathbf{t}) \times s_{>1}(\Delta_i|cb;\mathbf{a},\mathbf{s},\mathbf{t}))
\label{eq:1.18}
\end{eqnarray}
\begin{eqnarray}
s_{>1}(\Delta_i|cb;\mathbf{a},\mathbf{s},\mathbf{t}) = \sum_{p=1}^l(\varepsilon(\phi_p-1)\sum_{k=2}^{\phi_p}\delta(p-\pi_{[p]k-1},\Delta_i) \cdot \delta(B(\tau_{[p]k}),cb))
\label{eq:1.19}
\end{eqnarray}
\begin{eqnarray}
d_{>1}(\Delta_i|cb;\mathbf{s},\mathbf{t}) = \mu_{>1cb}^{-1} \times \sum_{s=1}^{S}c_{>1}(\Delta_i|cb;\mathbf{s}(s),\mathbf{t}(s))
\label{eq:1.20}
\end{eqnarray}
%----------------------------------------------
\parinterval 其中,
\begin{eqnarray}
\varepsilon(x) = \begin{cases}
0 & x \leq 0 \\
1 & x > 0
\end{cases}
\label{eq:1.21}
\end{eqnarray}
%----------------------------------------------
\parinterval $ca$$cb$分别表示目标语和源语的某个词类。
\parinterval 模型4需要像模型3一样,通过定义一个词对齐集合\textrm{S},使得每次迭代都在\textrm{S}上进行,进而降低运算量。模型4中\textrm{S}的定义为,
\begin{eqnarray}
\textrm{S} = N(\tilde{b}^{\infty}(V(\mathbf{s}|\mathbf{t};2))) \cup (\mathop{\cup}\limits_{ij} N(\tilde{b}_{i \leftrightarrow j}^{\infty}(V_{i \leftrightarrow j}(\mathbf{s}|\mathbf{t},2))))
\label{eq:1.22}
\end{eqnarray}
%----------------------------------------------
\parinterval 对于一个对齐$\mathbf{a}$,可用模型3对它的邻居进行排名,即按$\textrm{p}_{\theta}(b(\mathbf{a})|\mathbf{s},\mathbf{t};3)$排序。$\tilde{b}(\mathbf{a})$ \\ 表示这个排名表中满足$\textrm{p}_{\theta}(\mathbf{a}'|\mathbf{s},\mathbf{t};4) > \textrm{P}_{\theta}(\mathbf{a}|\mathbf{s},\mathbf{t};4)$的最高排名的$\mathbf{a}'$。同理可知$\tilde{b}_{i \leftrightarrow j}^{\infty}(\mathbf{a})$ \\ 的意义。这里之所以不用模型3中采用的方法直接利用$b^{\infty}(\mathbf{a})$得到模型4中高概率的对齐,是因为模型4中,要想获得某个对齐$\mathbf{a}$的邻居$\mathbf{a}'$,必须做很大调整,比如:调整$\tau_{[j]1}$$\odot_{[j]}$等等。这个过程要比模型3的相应过程复杂得多。因此在模型4中只能借助于模型3的中间步骤来进行估计。
\setlength{\belowdisplayskip}{3pt}%调整空白大小
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{IBM模型5训练方法}
\parinterval 模型5的参数估计过程也与模型3的过程基本一致,二者的区别在于扭曲度的估计公式。在模型5中,
\begin{eqnarray}
c_1(\Delta_i|cb,v1,v2;\mathbf{s},\mathbf{t}) = \sum_{\mathbf{a}}(\textrm{P}(\mathbf{s},\mathbf{a}|\mathbf{t}) \times s_1(\Delta_i|cb,v1,v2;\mathbf{a},\mathbf{s},\mathbf{t}))
\label{eq:1.23}
\end{eqnarray}
\begin{eqnarray}
s_1(\Delta_i|cb,v1,v2;\rm{a},\mathbf{s},\mathbf{t}) & = & \sum_{p=1}^l (\varepsilon(\phi_p) \cdot \delta(v_{\pi_{p1}},\Delta_i) \cdot \delta(X_{\{p-1\}},v1) \nonumber \\
& & \cdot \delta(v_m-\phi_p+1,v2) \cdot \delta(v_{\pi_{p1}},v_{\pi_{p1-1}})
\label{eq:1.24}
\end{eqnarray}
\begin{eqnarray}
d_1(\Delta_i|cb;\mathbf{s},\mathbf{t}) = \mu_{1cb}^{-1} \times \sum_{s=1}^{S}c(\Delta_i|cb;\mathbf{f}(s),\mathbf{e}(s))
\label{eq:1.25}
\end{eqnarray}
\begin{eqnarray}
c_{>1}(\Delta_i|cb,v;\mathbf{s},\mathbf{t}) = \sum_{\mathbf{a}}(\textrm{p}(\mathbf{f},\mathbf{s}|\mathbf{t}) \times s_{>1}(\Delta_i|cb,v;\mathbf{a},\mathbf{s},\mathbf{t}))
\label{eq:1.26}
\end{eqnarray}
%\begin{small}
\begin{eqnarray}
s_{>1}(\Delta_i|cb,v;\mathbf{a},\mathbf{s},\mathbf{t}) & = & \sum_{p=1}^l(\varepsilon(\phi_p-1)\sum_{k=2}^{\phi_p}(\delta(v_{\pi_{pk}}-V_{\pi_{[p]k-1}},\Delta_i) \nonumber \\
& & \cdot \delta(B(\tau_{[p]k}) ,cb) \cdot \delta(vm-v_{\pi_{p(k-1)}}-\phi_p+k,v) \nonumber \\
& & \cdot \delta(v_{\pi_{p1}},v_{\pi_{p1-1}})))
\label{eq:1.27}
\end{eqnarray}
%\end{small}
\begin{eqnarray}
d_{>1}(\Delta_i|cb,v;\mathbf{s},\mathbf{t}) = \mu_{>1cb}^{-1} \times \sum_{s=1}^{S}c_{>1}(\Delta_i|cb,v;\mathbf{f}(s),\mathbf{e}(s))
\label{eq:1.28}
\end{eqnarray}
%----------------------------------------------
\vspace{0.5em}
\noindent 这里$X_{\{p-1\}}$表示在位置小于$p$的非空对的目标语单词对应的源语单词的平均置位。从式(\ref{eq:1.24})中可以看出因子$\delta(v_{\pi_{p1}},v_{\pi_{p1-1}})$保证了,即使对齐$\mathbf{a}$不合理(一个源语位置对应多个目标语位置)也可以避免在这个不合理的对齐上计算结果。需要注意的是因子$\delta(v_{\pi_{p1}},v_{\pi_{p1-1}})$,只能保证$\mathbf{a}$中不合理的部分不产生坏的影响,而$\mathbf{a}$中其它正确的部分仍会参与迭代。
\parinterval 不过上面的参数估计过程与前面4个模型中参数估计过程并不完全一样。前面四个模型在每次迭代中,可以在给定$\mathbf{s}$$\mathbf{t}$和一个对齐$\mathbf{a}$的情况下直接计算并更新参数。但是在模型5的参数估计过程中,如公式(\ref{eq:1.24})中,需要模拟出由$\mathbf{t}$生成$\mathbf{s}$的过程才能得到正确的结果,因为从$\mathbf{t}$$\mathbf{s}$$\mathbf{a}$中是不能直接得到 的正确结果的。具体说,就是要从目标语句子的第一个单词开始到最后一个单词结束,依次生成每个目标语单词对应的源语单词,每处理完一个目标语单词就要暂停,然后才能计算式(\ref{eq:1.24})中求和符号里面的内容。这也就是说即使给定了$\mathbf{s}$$\mathbf{t}$和一个对齐$\mathbf{a}$,也不能直接在它们上计算,必须重新模拟$\mathbf{t}$$\mathbf{s}$的生成过程。
\parinterval 从前面的分析可以看出,虽然模型5比模型4更精确,但是模型5过于复杂以至于给参数估计增加了巨大的计算量(对于每组$\mathbf{t}$$\mathbf{s}$$\mathbf{a}$都要模拟$\mathbf{t}$生成$\mathbf{s}$的翻译过程,时间复杂度成指数增加)。因此模型5并不具有很强的实际意义。
\parinterval 在模型5中同样需要定义一个词对齐集合S,使得每次迭代都在\textrm{S}上进行。这里对\textrm{S}进行如下定义
\begin{eqnarray}
\textrm{S} = N(\tilde{\tilde{b}}^{\infty}(V(\mathbf{s}|\mathbf{t};2))) \cup (\mathop{\cup}\limits_{ij} N(\tilde{\tilde{b}}_{i \leftrightarrow j}^{\infty}(V_{i \leftrightarrow j}(\mathbf{s}|\mathbf{t},2))))
\label{eq:1.29}
\end{eqnarray}
\vspace{0.5em}
%----------------------------------------------
\parinterval 这里$\tilde{\tilde{b}}(\mathbf{a})$借用了模型4中$\tilde{b}(\mathbf{a})$的概念。不过$\tilde{\tilde{b}}(\mathbf{a})$表示在利用模型3进行排名的列表中满足$\textrm{p}_{\theta}(\mathbf{a}'|\mathbf{s},\mathbf{t};5)$的最高排名的词对齐。
\end{appendices}
......
......@@ -158,7 +158,7 @@
\begin{scope}
{\footnotesize
\node [anchor=east] (label4) at ([yshift=0.8em]ft11.west) {翻译就是一条};
\node [anchor=east] (label4) at ([yshift=0.4em]ft11.west) {翻译就是一条};
\node [anchor=north west] (label4part2) at ([yshift=0.7em]label4.south west) {译文选择路径};
}
......
This source diff could not be displayed because it is too large. You can view the blob instead.
\begin{tikzpicture}
\begin{scope}
\small{
\node [anchor=south west,minimum width=15em] (source) at (0,0) {\textbf{source}: 我\ \ \ \ \ \ \ \ \ \ \ \ 感到\ \ \ \ 满意};
{
\node [anchor=south west,minimum width=15em] (target) at ([yshift=12em]source.north west) {\textbf{target}: I\ \ am\ \ \ satisfied\ \ \ with\ \ \ you};
}
{
\node [anchor=center,minimum width=9.6em,minimum height=1.8em,draw,rounded corners=0.3em] (hidden) at ([yshift=6em]source.north) {};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!20] (cell01) at ([xshift=0.2em]hidden.west) {\footnotesize{.2}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!10] (cell02) at (cell01.east) {\footnotesize{-1}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!70] (cell03) at (cell02.east) {\footnotesize{6}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!50] (cell04) at (cell03.east) {\footnotesize{5}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!30] (cell05) at (cell04.east) {\footnotesize{.7}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em,fill=ugreen!10] (cell06) at (cell05.east) {\footnotesize{-2}};
}
%占位
\node (cell010) at ([xshift=-9em,yshift=0em]cell01.west){\quad};
%\rightarrow {}
\node [anchor=west,minimum width=1.5em,minimum size=1.5em] (cell07) at (cell06.east) {\hspace{0.07em}\footnotesize{--->}};
\node [anchor=west,minimum width=1.5em,minimum size=1.5em] (cell08) at (cell06.east){\small{
\hspace{0.6em}
\begin{tabular}{l}
源语言句\\
{\red''表示''}
\end{tabular}
}
};
{
\filldraw [fill=red!20,draw=white] (source.north west) -- (source.north east) -- ([xshift=-0.2em,yshift=-0.1em]hidden.south east) -- ([xshift=0.2em,yshift=-0.1em]hidden.south west);
}
{
\filldraw [fill=blue!20,draw=white] (target.south west) -- (target.south east) -- ([xshift=-0.2em,yshift=0.1em]hidden.north east) -- ([xshift=0.2em,yshift=0.1em]hidden.north west);
}
{
\draw [->,thick] (source.north west) -- ([xshift=0.2em,yshift=-0.1em]hidden.south west);
\draw [->,thick] (source.north east) -- ([xshift=-0.2em,yshift=-0.1em]hidden.south east);
}
{
\draw [->,thick] ([xshift=0.2em,yshift=0.1em]hidden.north west) -- (target.south west);
\draw [->,thick] ([xshift=-0.2em,yshift=0.1em]hidden.north east) -- (target.south east);
}
}
{
\node [anchor=south] (enclabel) at ([yshift=2em]source.north) {\small{\textbf{Encoder}}};
\node [anchor=north] (declabel) at ([yshift=-2em]target.south) {\small{\textbf{Decoder}}};
}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%------------------------------------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9cm}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum height=0.5\base,minimum width=1\base,draw,inner sep=0pt,outer sep=0pt]
\tikzstyle{wordnode} = [font=\tiny]
% RNN translation model
\begin{scope}[local bounding box=RNNMT]
% RNN Encoder
\coordinate (eemb0) at (0,0);
\foreach \x [count=\y from 0] in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {\tiny{$e_x()$}};
\foreach \x in {1,2,...,3}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc\x) at ([yshift=0.3\base]eemb\x.north) {};
\node[] (enclabel1) at (enc1) {\tiny{$\textbf{h}_{m-2}$}};
\node[] (enclabel2) at (enc2) {\tiny{$\textbf{h}_{m-1}$}};
\node[rnnnode,fill=purple!30!white] (enclabel3) at (enc3) {\tiny{$\textbf{h}_{m}$}};
\node[wordnode,left=0.4\base of enc1] (init1) {$\cdots$};
\node[wordnode,left=0.4\base of eemb1] (init2) {$\cdots$};
\node[wordnode,below=0pt of eemb1] () {};
\node[wordnode,below=0pt of eemb2] () {};
\node[wordnode,below=0pt of eemb3] () {$\langle$eos$\rangle$};
% RNN Decoder
\foreach \x in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=\base]enc\x.north) {\tiny{$e_y()$}};
\foreach \x in {1,2,...,3}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec\x) at ([yshift=0.3\base]demb\x.north) {{\tiny{$\textbf{s}_\x$}}};
\foreach \x in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.3\base]dec\x.north) {\tiny{Softmax}};
\node[wordnode,right=0.4\base of demb3] (end1) {$\cdots$};
\node[wordnode,right=0.4\base of dec3] (end2) {$\cdots$};
\node[wordnode,right=0.4\base of softmax3] (end3) {$\cdots$};
% Decoder input words
\node[wordnode,below=0pt of demb1] (decwordin) {$\langle$sos$\rangle$};
\ExtractX{$(demb2.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Do};
\ExtractX{$(demb3.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
% Decoder output words
\node[wordnode,above=0pt of softmax1] (decwordout) {Do};
\ExtractX{$(softmax2.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
\ExtractX{$(softmax3.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
% Connections
\draw[-latex'] (init1.east) to (enc1.west);
\draw[-latex'] (dec3.east) to (end2.west);
\foreach \x in {1,2,...,3}
\draw[-latex'] (eemb\x) to (enc\x);
\foreach \x in {1,2,...,3}
\draw[-latex'] (demb\x) to (dec\x);
\foreach \x in {1,2,...,3}
\draw[-latex'] (dec\x.north) to (softmax\x.south);
\foreach \x [count=\y from 2] in {1,2}
{
\draw[-latex'] (enc\x.east) to (enc\y.west);
\draw[-latex'] (dec\x.east) to (dec\y.west);
}
\coordinate (bridge) at ([yshift=0.4\base]enc2.north west);
\draw[-latex'] (enc3.north) .. controls +(north:0.3\base) and +(east:\base) .. (bridge) .. controls +(west:2.7\base) and +(west:0.3\base) .. (dec1.west);
{
\node [anchor=east] (line1) at ([xshift=-3em,yshift=0.5em]softmax1.west) {\scriptsize{基于RNN的隐层状态$\textbf{s}_i$}};
\node [anchor=north west] (line2) at ([yshift=0.3em]line1.south west) {\scriptsize{预测目标词的概率}};
\node [anchor=north west] (line3) at ([yshift=0.3em]line2.south west) {\scriptsize{通常,用Softmax函数}};
\node [anchor=north west] (line4) at ([yshift=0.3em]line3.south west) {\scriptsize{实现 $\textrm{P}(y_i|...)$}};
}
{
\node [anchor=north west] (line11) at ([yshift=-1.8em]line4.west) {\scriptsize{每个词的one-hot}};
\node [anchor=north west] (line12) at ([yshift=0.3em]line11.south west) {\scriptsize{离散化表示都被转化为}};
\node [anchor=north west] (line13) at ([yshift=0.3em]line12.south west) {\scriptsize{实数向量,即词嵌入}};
\node [anchor=north west] (line14) at ([yshift=0.3em]line13.south west) {\scriptsize{($e_x()$$e_y()$函数)}};
}
{
\node [anchor=west] (line21) at ([xshift=1.3em,yshift=1.5em]enc3.east) {\scriptsize{源语编码器最后一个}};
\node [anchor=north west] (line22) at ([yshift=0.3em]line21.south west) {\scriptsize{循环单元的输出被}};
\node [anchor=north west] (line23) at ([yshift=0.3em]line22.south west) {\scriptsize{看作是句子的表示,}};
\node [anchor=north west] (line24) at ([yshift=0.3em]line23.south west) {\scriptsize{记为$\textbf{C}$}};
}
\begin{pgfonlayer}{background}
{
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=red!10,drop shadow,draw=red] [fit = (line1) (line2) (line3) (line4)] (box1) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,draw=red] [fit = (softmax1) (softmax2) (softmax3)] (box4) {};
\draw [->,dotted,very thick,red] ([yshift=1em,xshift=2.5em]box1.east) -- ([yshift=1em,xshift=0.1em]box1.east);
}
{
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=green!10,drop shadow,draw=ugreen] [fit = (line11) (line12) (line13) (line14)] (box2) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,draw=ugreen] [fit = (eemb1) (eemb2) (eemb3)] (box5) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,draw=ugreen] [fit = (demb1) (demb2) (demb3)] (box6) {};
\draw [->,dotted,very thick,ugreen] ([yshift=-1.3em,xshift=2.5em]box2.east) -- ([yshift=-1.3em,xshift=0.1em]box2.east);
\draw [->,dotted,very thick,ugreen] ([xshift=0.1em]box6.west) .. controls +(west:1) and +(east:1) .. ([yshift=1.0em]box2.east) ;
}
{
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=purple!10,drop shadow,draw=purple] [fit = (line21) (line22) (line23) (line24)] (box3) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,draw=purple] [fit = (enc3)] (box7) {};
\draw [->,dotted,very thick,purple] ([xshift=0.1em]box7.east) -- ([xshift=0.8em]box7.east) ;
}
\end{pgfonlayer}
\end{scope}
\end{tikzpicture}
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnode} = [draw,minimum width=3.5em,minimum height=1.2em]
\node [rnode,anchor=south west,fill=red!20!white] (e1) at (0,0) {\scriptsize{$\textbf{e}(\textrm{``沈阳''})$}};
\node [rnode,anchor=south west,fill=red!20!white] (e2) at ([xshift=1em]e1.south east) {\scriptsize{$\textbf{e}(\textrm{``到''})$}};
\node [rnode,anchor=south west,fill=red!20!white] (e3) at ([xshift=1em]e2.south east) {\scriptsize{$\textbf{e}(\textrm{``广州''})$}};
\node [rnode,anchor=south west,fill=red!20!white] (e4) at ([xshift=1em]e3.south east) {\scriptsize{$\textbf{e}(\textrm{``的''})$}};
\node [rnode,anchor=south west,fill=red!20!white] (e5) at ([xshift=1em]e4.south east) {\scriptsize{$\textbf{e}(\textrm{``机票''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (h1) at ([yshift=1.5em]e1.north west) {\scriptsize{$\textbf{h}(\textrm{``沈阳''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (h2) at ([yshift=1.5em]e2.north west) {\scriptsize{$\textbf{h}(\textrm{``到''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (h3) at ([yshift=1.5em]e3.north west) {\scriptsize{$\textbf{h}(\textrm{``广州''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (h4) at ([yshift=1.5em]e4.north west) {\scriptsize{$\textbf{h}(\textrm{``的''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (h5) at ([yshift=1.5em]e5.north west) {\scriptsize{$\textbf{h}(\textrm{``机票''})$}};
\foreach \x in {1,2,3,4,5}{
\node [anchor=north] (plus\x) at ([yshift=-0em]e\x.south) {\scriptsize{$\mathbf{\oplus}$}};
}
\node [rnode,anchor=north,fill=yellow!20!white] (pos1) at ([yshift=-1.1em]e1.south) {\scriptsize{$\textbf{PE}(1)$}};
\node [rnode,anchor=north,fill=yellow!20!white] (pos2) at ([yshift=-1.1em]e2.south) {\scriptsize{$\textbf{PE}(2)$}};
\node [rnode,anchor=north,fill=yellow!20!white] (pos3) at ([yshift=-1.1em]e3.south) {\scriptsize{$\textbf{PE}(3)$}};
\node [rnode,anchor=north,fill=yellow!20!white] (pos4) at ([yshift=-1.1em]e4.south) {\scriptsize{$\textbf{PE}(4)$}};
\node [rnode,anchor=north,fill=yellow!20!white] (pos5) at ([yshift=-1.1em]e5.south) {\scriptsize{$\textbf{PE}(5)$}};
\foreach \x in {1,2,3,4,5}{
\node [rectangle,inner sep=0.1em,rounded corners=1pt,very thick,dotted,draw=red!60] [fit = (e\x) (pos\x)] (box\x) {};
}
\node [anchor=north] (inputs1) at ([yshift=-1em]pos1.south) {\scriptsize{沈阳}};
\node [anchor=north] (inputs2) at ([yshift=-1em]pos2.south) {\scriptsize{}};
\node [anchor=north] (inputs3) at ([yshift=-1em]pos3.south) {\scriptsize{广州}};
\node [anchor=north] (inputs4) at ([yshift=-1em]pos4.south) {\scriptsize{}};
\node [anchor=north] (inputs5) at ([yshift=-1em]pos5.south) {\scriptsize{机票}};
\draw [->] ([yshift=0.1em]e1.north) .. controls +(north:0.5) and +(south:0.5) .. ([xshift=-1em,yshift=-0.1em]h3.south);
\draw [->] ([yshift=0.1em]e2.north) .. controls +(north:0.3) and +(south:0.6) .. ([xshift=-0.5em,yshift=-0.1em]h3.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]h3.south);
\draw [->] ([yshift=0.1em]e4.north) .. controls +(north:0.3) and +(south:0.6) .. ([xshift=0.5em,yshift=-0.1em]h3.south);
\draw [->] ([yshift=0.1em]e5.north) .. controls +(north:0.5) and +(south:0.5) .. ([xshift=1em,yshift=-0.1em]h3.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]h1.south);
\draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]h2.south);
\draw [->] ([yshift=0.1em]e4.north) -- ([yshift=-0.1em]h4.south);
\draw [->] ([yshift=0.1em]e5.north) -- ([yshift=-0.1em]h5.south);
\foreach \x in {1,2,3,4,5}{
\draw [->] ([yshift=-0.1em]inputs\x.north) -- ([yshift=-0.2em]pos\x.south);
}
\node [anchor=north] (dot1) at ([xshift=0.4em,yshift=-0.2em]h1.south) {\tiny{...}};
\node [anchor=north] (dot2) at ([xshift=0.4em,yshift=-0.2em]h2.south) {\tiny{...}};
\node [anchor=north] (dot4) at ([xshift=-0.4em,yshift=-0.2em]h4.south) {\tiny{...}};
\node [anchor=north] (dot5) at ([xshift=-0.4em,yshift=-0.2em]h5.south) {\tiny{...}};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\begin{tikzpicture}
\setlength{\base}{0.9cm}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=0.5\base,draw,inner sep=0pt,outer sep=0pt]
\tikzstyle{wordnode} = [font=\tiny]
% RNN translation model
\begin{scope}[local bounding box=RNNMT]
% RNN Encoder
\coordinate (eemb0) at (0,0);
\foreach \x [count=\y from 0] in {1,2,...,4}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {};
\foreach \x in {1,2,...,4}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc\x) at ([yshift=0.5\base]eemb\x.north) {};
\node[wordnode,left=0.4\base of enc1,font=\scriptsize] (init) {0};
\node[wordnode,anchor=east] (init2) at ([xshift=-3.0em]init.west){};
{
\node[rnnnode,fill=purple] (repr) at (enc4) {};
\node[wordnode] (label) at ([xshift=3.5em]enc4.east) {源语言句子表示};
\draw[->,dashed,thick] (label.west) -- (enc4.east);
}
\node[wordnode,below=0pt of eemb1,font=\scriptsize] (encwordin1) {};
\node[wordnode,below=0pt of eemb2,font=\scriptsize] (encwordin2) {};
\node[wordnode,below=0pt of eemb3,font=\scriptsize] (encwordin3) {};
\node[wordnode,below=0pt of eemb4,font=\scriptsize] (encwordin4) {$\langle$eos$\rangle$};
%大括号
\draw[decorate,thick,decoration={mirror,brace}]([xshift=0.0em,yshift=-1.5em]eemb1.south west) --([xshift=0.0em,yshift=-1.5em]eemb4.south east) node [font=\scriptsize,xshift=-3.8em,yshift=-1.0em,align=center](label2) {编码器};
% RNN Decoder
\foreach \x in {1,2,...,4}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([xshift=0.0em,yshift=3.0em]enc\x.north) {};
\foreach \x in {1,2,...,4}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec\x) at ([yshift=0.5\base]demb\x.north) {};
\foreach \x in {1,2,...,4}
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.5\base]dec\x.north) {};
% Decoder input words
\node[wordnode,below=0pt of demb1,font=\scriptsize] (decwordin) {$\langle$sos$\rangle$};
\ExtractX{$(demb2.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base,font=\scriptsize] () at (\XCoord,\YCoord) {I};
\ExtractX{$(demb3.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base,font=\scriptsize] () at (\XCoord,\YCoord) {am};
\ExtractX{$(demb4.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base,font=\scriptsize] () at (\XCoord,\YCoord) {fine};
% Decoder output words
\node[wordnode,above=0pt of softmax1,font=\scriptsize] (decwordout1) {I};
\ExtractX{$(softmax2.north)$}
\ExtractY{$(decwordout1.base)$}
\node[wordnode,anchor=base,font=\scriptsize] (decwordout2) at (\XCoord,\YCoord) {am};
\ExtractX{$(softmax3.north)$}
\ExtractY{$(decwordout1.base)$}
\node[wordnode,anchor=base,font=\scriptsize] (decwordout3) at (\XCoord,\YCoord) {fine};
\ExtractX{$(softmax4.north)$}
\ExtractY{$(decwordout1.base)$}
\node[wordnode,anchor=base,font=\scriptsize] (decwordout4) at (\XCoord,\YCoord) {$\langle$eos$\rangle$};
%大括号
\draw[decorate,thick,decoration={brace}]([xshift=0.0em,yshift=1.3em]softmax1.north west) --([xshift=0.0em,yshift=1.3em]softmax4.north east) node [font=\scriptsize,xshift=-3.8em,yshift=1.0em,align=center](label1) {解码器};
% Connections
\draw[-latex'] (init.east) to (enc1.west);
\foreach \x in {1,2,...,4}
\draw[-latex'] (eemb\x) to (enc\x);
\foreach \x in {1,2,...,4}
\draw[-latex'] (demb\x) to (dec\x);
\foreach \x in {1,2,...,4}
\draw[-latex'] (dec\x.north) to ([yshift=0.5\base]dec\x.north);
\foreach \x [count=\y from 2] in {1,2,...,3}
{
\draw[-latex'] (enc\x.east) to (enc\y.west);
\draw[-latex'] (dec\x.east) to (dec\y.west);
}
\coordinate (bridge) at ([yshift=-1.15\base]demb2);
\draw[-latex'] (enc4.north) .. controls +(north:0.4\base) and +(east:0.5\base) .. (bridge) .. controls +(west:2.4\base) and +(west:0.5\base) .. (dec1.west);
\end{scope}
\end{tikzpicture}
%
%---------------------------------------
\begin{tikzpicture}
\begin{scope}
%\newlength{\mystep}
%\setlength{\mystep}{1.6em}
\foreach \x in {1,2,...,6}
\node[] (s\x) at (\x * 1.6em,0) {};
\node [] (ws1) at (s1) {\scriptsize{}};
\node [] (ws2) at (s2) {\scriptsize{}};
\node [] (ws3) at (s3) {\scriptsize{}};
\node [] (ws4) at (s4) {\scriptsize{很长}};
\node [] (ws5) at (s5) {\scriptsize{}};
\node [] (ws6) at (s6) {\scriptsize{句子}};
\foreach \x in {1,2,...,6}
\node[] (t\x) at (\x * 1.6em + 2.4in,0) {};
\node [] (wt1) at (t1) {\scriptsize{This}};
\node [] (wt2) at (t2) {\scriptsize{is}};
\node [] (wt3) at ([yshift=-1pt]t3) {\scriptsize{a}};
\node [] (wt4) at ([yshift=-0.1em]t4) {\scriptsize{very}};
\node [] (wt5) at (t5) {\scriptsize{long}};
\node [] (wt6) at ([xshift=1em]t6) {\scriptsize{sentence}};
\node [anchor=south west,fill=red!30,minimum width=1.6in,minimum height=1.5em] (encoder) at ([yshift=1.0em]ws1.north west) {\footnotesize{Encoder}};
\node [anchor=west,fill=blue!30,minimum width=1.9in,minimum height=1.5em] (decoder) at ([xshift=4.5em]encoder.east) {\footnotesize{Decoder}};
\node [anchor=west,fill=green!30,minimum height=1.5em] (representation) at ([xshift=1em]encoder.east) {\footnotesize{表示}};
\draw [->,thick] ([xshift=1pt]encoder.east)--([xshift=-1pt]representation.west);
\draw [->,thick] ([xshift=1pt]representation.east)--([xshift=-1pt]decoder.west);
\foreach \x in {1,2,...,6}
\draw[->] ([yshift=0.1em]s\x.north) -- ([yshift=1.2em]s\x.north);
\foreach \x in {1,2,...,5}
\draw[<-] ([yshift=0.1em]t\x.north) -- ([yshift=1.2em]t\x.north);
\draw[<-] ([yshift=0.1em,xshift=1em]t6.north) -- ([yshift=1.2em,xshift=1em]t6.north);
{
\draw [<->,ublue,thick] ([xshift=0.3em]ws4.south) .. controls +(-60:1) and +(south:1) .. (wt4.south);
\draw [<->,ublue,thick] (ws4.south) .. controls +(south:1.0) and +(south:1.5) .. (wt5.south);
}
{
\node [anchor=north,fill=green!30] (attentionlabel) at ([yshift=-3.4em]representation.south) {\footnotesize{词语的关注度}};
\draw [->,dotted,very thick,ublue] ([yshift=0.1em]attentionlabel.north)--([yshift=-0.1em]representation.south);
}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{lnode} = [minimum height=2em,minimum width=8em,inner sep=3pt,rounded corners=2pt,draw,fill=red!20];
\tikzstyle{standard} = [rounded corners=3pt]
\node [lnode,anchor=west] (l1) at (0,0) {编码器};
\node [lnode,anchor=west,fill=blue!20] (l2) at ([xshift=3em]l1.east) {解码器};
\node [anchor=north] (inputs) at ([xshift=-1.5em,yshift=-1em]l1.south) {Inputs: 五 星 红 旗};
\node [anchor=south] (outputs) at ([xshift=-3.5em,yshift=2em]l2.north) {Outputs: {\color{red}}云深处小蓬莱 {\color{red}}斗阑干次第开};
\node [anchor=south] (outputs1) at ([xshift=-1.5em,yshift=1em]l2.north) {{\color{red}}旆壁幢春色里 {\color{red}}亭鼓吹乐声来};
\draw [->,very thick] ([yshift=-1em]l1.south) -- ([yshift=-0.1em]l1.south);
\draw [->,very thick] ([yshift=0.1em]l2.north) -- ([yshift=1em]l2.north);
\draw [->,very thick] ([xshift=0.1em]l1.east) -- ([xshift=-0.1em]l2.west);
\node [rectangle,inner sep=0.5em,rounded corners=1pt,very thick,dotted,draw=ugreen!80] [fit = (l1) (l2)] (box0) {};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{lnode} = [minimum height=2.5em,minimum width=12em,inner sep=3pt,very thick,rounded corners=2pt,draw=red!75!black,fill=red!5];
\tikzstyle{rnode} = [minimum height=2.5em,minimum width=12em,inner sep=3pt,very thick,rounded corners=2pt,draw=blue!75!black,fill=blue!5];
\tikzstyle{standard} = [rounded corners=3pt]
\node [lnode,anchor=west] (l1) at (0,0) {上联:翠竹千支歌盛世};
\node [rnode,anchor=west] (l2) at ([xshift=1em]l1.east) {下联:红梅万点报新春};
\node [lnode,anchor=north] (l3) at ([yshift=-0.8em]l1.south) {上联:一帆风顺年年好};
\node [rnode,anchor=west] (l4) at ([xshift=1em]l3.east) {下联:万事如意步步高};
\node [lnode,anchor=north] (l5) at ([yshift=-0.8em]l3.south) {上联:佳节迎春春生笑脸};
\node [rnode,anchor=west] (l6) at ([xshift=1em]l5.east) {下联:新年纳福富华满堂};
\node [lnode,anchor=north] (l7) at ([yshift=-0.8em]l5.south) {上联:腊梅吐芳迎红日};
\node [rnode,anchor=west] (l8) at ([xshift=1em]l7.east) {下联:绿柳展枝舞春风};
\node [lnode,anchor=north] (l9) at ([yshift=-0.8em]l7.south) {上联:雪兆丰年丛岭翠};
\node [rnode,anchor=west] (l10) at ([xshift=1em]l9.east) {下联:春回大地满园红};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnnnode} = [minimum height=1.1em,minimum width=3.5em,inner sep=2pt,rounded corners=1pt,draw,fill=red!20];
\tikzstyle{wnode} = [minimum height=1.0em,minimum width=3.5em,inner sep=2pt,rounded corners=1pt,draw,fill=white];
{
\node [rnnnode,anchor=west,fill=green!20] (t1) at (0,0) {\scriptsize{$e_y()$}};
}
{
\node [rnnnode,anchor=west,fill=green!20] (t2) at ([xshift=2.2em]t1.east) {\scriptsize{$e_y()$ ($\times 3$)}};
}
{
\node [rnnnode,anchor=west,fill=green!20] (t3) at ([xshift=2.2em]t2.east) {\scriptsize{$e_y()$ ($\times 3$)}};
\node [anchor=west,inner sep=2pt] (t4) at ([xshift=0.3em]t3.east) {\scriptsize{...}};
}
{
\node [rnnnode,anchor=south] (s1) at ([yshift=1em]t1.north) {\scriptsize{$\textbf{s}_1$}};
}
{
\node [rnnnode,anchor=south] (s2) at ([yshift=1em]t2.north) {\scriptsize{$\textbf{s}_2$ ($\times 3$)}};
}
{
\node [rnnnode,anchor=south] (s3) at ([yshift=1em]t3.north) {\scriptsize{$\textbf{s}_3$ ($\times 3$)}};
\node [anchor=west,inner sep=2pt] (s4) at ([xshift=0.3em]s3.east) {\scriptsize{...}};
}
{
\node [rnnnode,anchor=south,fill=blue!20] (o1) at ([yshift=1em]s1.north) {\scriptsize{softmax}};
}
{
\node [rnnnode,anchor=south,fill=blue!20] (o2) at ([yshift=1em]s2.north) {\scriptsize{softmax ($\times 3$)}};
}
{
\node [rnnnode,anchor=south,fill=blue!20] (o3) at ([yshift=1em]s3.north) {\scriptsize{softmax ($\times 3$)}};
\node [anchor=west,inner sep=2pt] (o4) at ([xshift=0.3em]o3.east) {\scriptsize{...}};
}
\node [wnode,anchor=north] (wt1) at ([yshift=-0.8em]t1.south) {\scriptsize{$<eos>$}};
{
\node [wnode,anchor=north] (wt2) at ([yshift=-0.8em]t2.south) {\scriptsize{Have}};
\node [wnode,anchor=north] (wt2copy1) at ([xshift=-0.2em,yshift=-0.2em]wt2.north) {\scriptsize{Have}};
\node [wnode,anchor=north] (wt2copy2) at ([xshift=-0.4em,yshift=-0.4em]wt2.north) {\scriptsize{Have}};
}
{
\node [wnode,anchor=north,inner sep=2pt] (wt3) at ([yshift=-0.8em]t3.south) {\scriptsize{you}};
\node [wnode,anchor=north] (wt3copy1) at ([xshift=-0.2em,yshift=-0.2em]wt3.north) {\scriptsize{you}};
\node [wnode,anchor=north] (wt3copy2) at ([xshift=-0.4em,yshift=-0.4em]wt3.north) {\scriptsize{you}};
}
{
\node [wnode,anchor=center,inner sep=2pt] (wo1) at ([xshift=0.4em,yshift=1.8em]o1.north) {\scriptsize{Have}};
\node [wnode,anchor=north] (wo1copy1) at ([xshift=-0.2em,yshift=-0.2em]wo1.north) {\scriptsize{Have}};
\node [wnode,anchor=north] (wo1copy2) at ([xshift=-0.4em,yshift=-0.4em]wo1.north) {\scriptsize{Have}};
}
{
\node [wnode,anchor=center,inner sep=2pt] (wo2) at ([xshift=0.4em,yshift=1.8em]o2.north) {\scriptsize{you}};
\node [wnode,anchor=north] (wo2copy1) at ([xshift=-0.2em,yshift=-0.2em]wo2.north) {\scriptsize{you}};
\node [wnode,anchor=north] (wo2copy2) at ([xshift=-0.4em,yshift=-0.4em]wo2.north) {\scriptsize{you}};
}
{
\node [wnode,anchor=center,inner sep=2pt] (wo3) at ([xshift=0.4em,yshift=1.8em]o3.north) {\scriptsize{learned}};
\node [wnode,anchor=north] (wo3copy1) at ([xshift=-0.2em,yshift=-0.2em]wo3.north) {\scriptsize{learned}};
\node [wnode,anchor=north] (wo3copy2) at ([xshift=-0.4em,yshift=-0.4em]wo3.north) {\scriptsize{learned}};
}
{
\foreach \x in {1}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]o\x.south);
}
}
{
\draw [->] ([yshift=0.1em]o1.north) -- ([yshift=0.8em]o1.north) node [pos=0.5,right] {\scriptsize{top-3}};
}
{
\foreach \x in {2}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]o\x.south);
\draw [->] ([yshift=0.1em]o\x.north) -- ([yshift=0.8em]o\x.north) node [pos=0.5,right] {\scriptsize{top-3}};
}
}
{
\foreach \x in {3}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]o\x.south);
\draw [->] ([yshift=0.1em]o\x.north) -- ([yshift=0.8em]o\x.north) node [pos=0.5,right] {\scriptsize{top-3}};
}
}
{
\draw [->] ([xshift=-0.5em]s1.west) -- ([xshift=-0.1em]s1.west) node [pos=0,left,inner sep=1pt] {\scriptsize{0}};
}
{
\draw [->] ([xshift=0.1em]s1.east) -- ([xshift=-0.1em]s2.west);
}
{
\draw [->] ([xshift=0.1em]s2.east) -- ([xshift=-0.1em]s3.west);
}
{
\draw [->,very thick,dotted] (wo1.east) .. controls +(east:0.6) and +(west:0.8) ..(wt2copy2.west);
}
{
\draw [->,very thick,dotted] (wo2.east) .. controls +(east:0.6) and +(west:0.8) ..(wt3copy2.west);
}
{
\node [circle,draw,anchor=north,inner sep=2pt,fill=orange!20] (c2) at ([yshift=-2.5em]t1.south) {\scriptsize{$\textbf{C}_2$}};
\node [circle,draw,inner sep=2pt,fill=orange!20] (c2copy1) at ([yshift=-0.1em,xshift=-0.1em]c2) {\scriptsize{$\textbf{C}_2$}};
\node [circle,draw,inner sep=2pt,fill=orange!20] (c2copy2) at ([yshift=-0.2em,xshift=-0.2em]c2) {\scriptsize{$\textbf{C}_2$}};
\draw [->] ([xshift=-0.9em]c2.west) -- ([xshift=-0.3em]c2.west);
\draw [->] ([xshift=0.1em]c2.east) .. controls +(east:1.5) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]s2.west);
}
{
\node [circle,draw,anchor=north,inner sep=2pt,fill=orange!20] (c3) at ([yshift=-2.5em]t2.south) {\scriptsize{$\textbf{C}_3$}};
\node [circle,draw,inner sep=2pt,fill=orange!20] (c3copy1) at ([yshift=-0.1em,xshift=-0.1em]c3) {\scriptsize{$\textbf{C}_3$}};
\node [circle,draw,inner sep=2pt,fill=orange!20] (c3copy2) at ([yshift=-0.2em,xshift=-0.2em]c3) {\scriptsize{$\textbf{C}_3$}};
\draw [->] ([xshift=-0.9em]c3.west) -- ([xshift=-0.3em]c3.west);
\draw [->] ([xshift=0.1em]c3.east) .. controls +(east:1.5) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]s3.west);
}
{
\node [anchor=east] (vocab) at ([xshift=-5em]s1.west) {\tiny{$\begin{bmatrix} \textrm{Have} & 0.50 \\ \textrm{I} & 0.02 \\ \textrm{it} & 0.03 \\ \textrm{has} & 0.30 \\ \textrm{you} & 0.01 \\ \textrm{the} & 0.01 \\ \textrm{a} & 0.01 \\ \textrm{an} & 0.02 \\ \textrm{he} & 0.03 \\ \textrm{she} & 0.01 \\ \textrm{are} & 0.00 \\ \textrm{am} & 0.01 \\ ... & ... \end{bmatrix}$}};
\node [anchor=south] (vocablabel) at (vocab.north) {\scriptsize{单词的概率分布}};
\draw [->,red,very thick,dotted] (o1.west) .. controls +(west:1) and +(east:2) .. ([yshift=1em]vocab.south east);
}
{
\node [anchor=east,inner sep=1pt] (vocabtopn) at ([xshift=-0.5em,yshift=-0.5em]wo1.west) {\scriptsize{$\begin{bmatrix} \textrm{Have} \\ \textrm{has} \\ \textrm{it} \end{bmatrix}$}};
\draw [->] ([yshift=-1.6em,xshift=-0.4em]vocab.north east) .. controls +(east:1) and +(west:1) .. ([xshift=0.1em,yshift=0.4em]vocabtopn.west) node [pos=0.3,below] (topnlabel) {\scriptsize{top-3}};
{
\node [anchor=north] (cap) at (vocab.south east) {\scriptsize{\textbf{束搜索($b=3$)}}};
}
}
\end{scope}
\end{tikzpicture}
%-------------------------------------------
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnode} = [draw,minimum width=3.5em,minimum height=1.2em]
\node [rnode,anchor=south west,fill=green!20!white] (key1) at (0,0) {\scriptsize{$\textbf{h}(\textrm{``沈阳''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key2) at ([xshift=1em]key1.south east) {\scriptsize{$\textbf{h}(\textrm{``到''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key3) at ([xshift=1em]key2.south east) {\scriptsize{$\textbf{h}(\textrm{``广州''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key4) at ([xshift=2em]key3.south east) {\scriptsize{$\textbf{h}(\textrm{``机票''})$}};
\node [rnode,anchor=south west] (key5) at ([xshift=1em]key4.south east) {\scriptsize{$\textbf{h}(\textrm{``机票''})$}};
\node [anchor=west] (sep1) at ([xshift=0.3em]key3.east) {\scriptsize{$\textbf{...}$}};
\draw [->] ([yshift=1pt,xshift=-3pt]key5.north) .. controls +(90:1em) and +(90:0.7em) .. ([yshift=1pt]key4.north);
\draw [->] ([yshift=1pt,xshift=0pt]key5.north) .. controls +(90:1.4em) and +(90:1.4em) .. ([yshift=1pt]key3.north);
\draw [->] ([yshift=1pt,xshift=3pt]key5.north) .. controls +(90:1.8em) and +(90:1.8em) .. ([yshift=1pt]key2.north);
\draw [->] ([yshift=1pt,xshift=6pt]key5.north) .. controls +(90:2.2em) and +(90:2.2em) .. ([yshift=1pt]key1.north);
\node [anchor=south west] (alpha1) at ([xshift=-1em]key1.north west) {\scriptsize{$\alpha_1=.2$}};
\node [anchor=south west] (alpha2) at ([xshift=-1em]key2.north west) {\scriptsize{$\alpha_2=.3$}};
\node [anchor=south west] (alpha3) at ([xshift=-1em]key3.north west) {\scriptsize{$\alpha_3=.1$}};
\node [anchor=south west] (alpha4) at ([xshift=-1em]key4.north west) {\scriptsize{$\alpha_4=.3$}};
\vspace{0.5em}
\node [rnode,anchor=south west,fill=green!20!white] (key6) at ([yshift=2em]key1.north west) {\scriptsize{$\textbf{h}(\textrm{``广州''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key7) at ([yshift=2em]key2.north west) {\scriptsize{$\textbf{h}(\textrm{``到''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key8) at ([yshift=2em]key3.north west) {\scriptsize{$\textbf{h}(\textrm{``沈阳''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key9) at ([yshift=2em]key4.north west) {\scriptsize{$\textbf{h}(\textrm{``机票''})$}};
\node [rnode,anchor=south west] (key10) at ([yshift=2em]key5.north west) {\scriptsize{$\textbf{h}(\textrm{``机票''})$}};
\node [anchor=west] (sep1) at ([xshift=0.3em]key8.east) {\scriptsize{$\textbf{...}$}};
\draw [->] ([yshift=1pt,xshift=-3pt]key10.north) .. controls +(90:1em) and +(90:0.7em) .. ([yshift=1pt]key9.north);
\draw [->] ([yshift=1pt,xshift=0pt]key10.north) .. controls +(90:1.4em) and +(90:1.4em) .. ([yshift=1pt]key8.north);
\draw [->] ([yshift=1pt,xshift=3pt]key10.north) .. controls +(90:1.8em) and +(90:1.8em) .. ([yshift=1pt]key7.north);
\draw [->] ([yshift=1pt,xshift=6pt]key10.north) .. controls +(90:2.2em) and +(90:2.2em) .. ([yshift=1pt]key6.north);
\node [anchor=south west] (alpha5) at ([xshift=-1em]key6.north west) {\scriptsize{$\alpha_1=.1$}};
\node [anchor=south west] (alpha6) at ([xshift=-1em]key7.north west) {\scriptsize{$\alpha_2=.3$}};
\node [anchor=south west] (alpha7) at ([xshift=-1em]key8.north west) {\scriptsize{$\alpha_3=.2$}};
\node [anchor=south west] (alpha8) at ([xshift=-1em]key9.north west) {\scriptsize{$\alpha_4=.3$}};
\end{scope}
\end{tikzpicture}
\vspace{-1.0em}
\footnotesize{
\begin{eqnarray}
\textbf{C}(\textrm{''机票''}) & = & 0.2 \times \textbf{h}(\textrm{``沈阳''}) + 0.3 \times \textbf{h}(\textrm{``到''}) + \nonumber \\
& & 0.1 \times \textbf{h}(\textrm{``广州''}) + ... + 0.3 \times \textbf{h}(\textrm{``机票''}) \nonumber
\end{eqnarray}
}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\node [anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h1) at (0,0) {\scriptsize{$\textbf{h}_1$}};
\node [anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h2) at ([xshift=1em]h1.east) {\scriptsize{$\textbf{h}_2$}};
\node [anchor=west,inner sep=0pt,minimum width=3em] (h3) at ([xshift=0.5em]h2.east) {\scriptsize{...}};
\node [anchor=west,draw,fill=red!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (h4) at ([xshift=0.5em]h3.east) {\scriptsize{$\textbf{h}_m$}};
\node [anchor=south,circle,minimum size=1.0em,draw,ublue,thick] (sum) at ([yshift=2em]h2.north east) {};
\draw [thick,-,ublue] (sum.north) -- (sum.south);
\draw [thick,-,ublue] (sum.west) -- (sum.east);
\node [anchor=south,draw,fill=green!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (th1) at ([yshift=2em,xshift=-1em]sum.north west) {\scriptsize{$\textbf{s}_{j-1}$}};
\node [anchor=west,draw,fill=green!20!white,inner sep=3pt,minimum width=2em,minimum height=1.2em] (th2) at ([xshift=2em]th1.east) {\scriptsize{$\textbf{s}_{j}$}};
\draw [->] (h1.north) .. controls +(north:0.8) and +(west:1) .. (sum.190) node [pos=0.3,left] {\scriptsize{$\alpha_{1,j}$}};
\draw [->] (h2.north) .. controls +(north:0.6) and +(220:0.2) .. (sum.220) node [pos=0.2,right] {\scriptsize{$\alpha_{2,j}$}};
\draw [->] (h4.north) .. controls +(north:0.8) and +(east:1) .. (sum.-10) node [pos=0.1,left] (alphan) {\scriptsize{$\alpha_{m,j}$}};
\draw [->] ([xshift=-1.5em]th1.west) -- ([xshift=-0.1em]th1.west);
\draw [->] ([xshift=0.1em]th1.east) -- ([xshift=-0.1em]th2.west);
\draw [->] ([xshift=0.1em]th2.east) -- ([xshift=1.5em]th2.east);
\draw [->] (sum.north) .. controls +(north:0.8) and +(west:0.2) .. ([yshift=-0.4em,xshift=-0.1em]th2.west) node [pos=0.2,right] (ci) {\scriptsize{$\textbf{C}_{j}$}};
\node [anchor=south,inner sep=1pt] (output) at ([yshift=0.8em]th2.north) {\scriptsize{输出层}};
\draw [->] ([yshift=0.1em]th2.north) -- ([yshift=-0.1em]output.south);
\node [anchor=north] (enc1) at (h1.south west) {\scriptsize{编码器输出}};
\node [anchor=north] (enc12) at ([yshift=0.5em]enc1.south) {\scriptsize{(位置$1$)}};
\node [anchor=north] (enc2) at (h2.south) {\scriptsize{编码器输出}};
\node [anchor=north] (enc22) at ([yshift=0.5em]enc2.south) {\scriptsize{(位置$2$)}};
\node [anchor=north] (enc4) at (h4.south) {\scriptsize{编码器输出}};
\node [anchor=north] (enc42) at ([yshift=0.5em]enc4.south) {\scriptsize{(位置$4$)}};
{
\node [anchor=west] (math1) at ([xshift=5em,yshift=1em]th2.east) {$\textbf{C}_j = \sum_{i} \alpha_{i,j} \textbf{h}_i \ \ $};
}
{
\node [anchor=north west] (math2) at ([yshift=-2em]math1.south west) {$\alpha_{i,j} = \frac{\exp(\beta_{i,j})}{\sum_{i'} \exp(\beta_{i',j})}$};
\node [anchor=north west] (math3) at ([yshift=-0em]math2.south west) {$\beta_{i,j} = a(\textbf{s}_{j-1}, \textbf{h}_i)$};
}
\begin{pgfonlayer}{background}
{
\node [rectangle,inner sep=0.4em,rounded corners=1pt,fill=blue!10,drop shadow] [fit = (math1)] (box1) {};
}
{
\node [rectangle,inner sep=0.4em,rounded corners=1pt,fill=orange!10,drop shadow] [fit = (math2) (math3)] (box2) {};
}
\end{pgfonlayer}
{
\draw [->,dotted,thick,blue] (box1.west) .. controls +(west:1.2) and +(east:2.0) .. ([xshift=-0.3em]ci.east);
}
{
\draw [->,dotted,thick,orange] ([yshift=1em]box2.west) .. controls +(west:1.2) and +(east:1.0) .. ([xshift=-0.35em]alphan.east);
}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\tikzstyle{snode} = [draw,inner sep=1pt,minimum width=3em,minimum height=0.5em,rounded corners=1pt,fill=green!30!white]
\tikzstyle{pnode} = [draw,inner sep=1pt,minimum width=1em,minimum height=0.5em,rounded corners=1pt]
\node [anchor=west,snode] (s1) at (0,0) {\tiny{}};
\node [anchor=north west,snode,minimum width=6.3em] (s2) at ([yshift=-0.3em]s1.south west) {\tiny{}};
\node [anchor=north west,snode,minimum width=2em] (s3) at ([yshift=-0.3em]s2.south west) {\tiny{}};
\node [anchor=east] (label1) at ([xshift=-0.8em,yshift=0.6em]s1.west) {\scriptsize{Shuffle:}};
\node [anchor=west,pnode,minimum width=3em] (p1) at ([xshift=0.3em]s1.east) {\tiny{}};
\node [anchor=west,pnode,minimum width=4em] (p3) at ([xshift=0.3em]s3.east) {\tiny{}};
\node [anchor=west,snode,minimum width=5em] (s4) at ([xshift=6em]p1.east) {\tiny{}};
\node [anchor=north west,snode,minimum width=5em] (s5) at ([yshift=-0.3em]s4.south west) {\tiny{}};
\node [anchor=north west,snode,minimum width=6.3em] (s6) at ([yshift=-0.3em]s5.south west) {\tiny{}};
\node [anchor=east] (label2) at ([xshift=-0.8em,yshift=0.6em]s4.west) {\scriptsize{Sorted:}};
\node [anchor=west,pnode,minimum width=1em] (p4) at ([xshift=0.3em]s4.east) {\tiny{}};
\node [anchor=west,pnode,minimum width=1em] (p5) at ([xshift=0.3em]s5.east) {\tiny{}};
\node [rectangle,inner sep=0.5em,rounded corners=2pt,very thick,dotted,draw=ugreen!80] [fit = (s1) (s3) (p1) (p3)] (box0) {};
\node [rectangle,inner sep=0.5em,rounded corners=2pt,very thick,dotted,draw=ugreen!80] [fit = (s4) (s6) (p4) (p5)] (box0) {};
\end{tikzpicture}
\ No newline at end of file
%----------------------------------
\begin{tikzpicture}
\setlength{\base}{1.5em}
\tikzstyle{samplenode} = [rounded corners=1pt,minimum size=1\base,draw,inner sep=3pt,outer sep=0pt,fill=green!30!white]
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=1\base,draw,inner sep=0pt,outer sep=0pt,fill=blue!30!white]
\tikzstyle{wordnode} = [font=\footnotesize,align=center]
\begin{scope}
\coordinate (batch0) at (0,0);
\foreach \i [count=\j from 0,evaluate=\i as \k using int(4-\i)] in {1,2,3}
\node [samplenode,anchor=south west,font=\scriptsize] (batch\i) at ([shift={(-1em,-0.5em)}]batch\j.south west) {句子\k};
\draw [decorate,decoration={brace}] (batch1.south east) to node [auto,rotate=30,anchor=north,font=\scriptsize] {batch大小} (batch3.south east);
\node [samplenode,anchor=west,font=\scriptsize] (sample2) at ([xshift=4em]batch2.east) {句子2};
\node [samplenode,anchor=south,font=\scriptsize] (sample3) at ([yshift=3em]sample2.north) {句子3};
\node [samplenode,anchor=north,font=\scriptsize] (sample1) at ([yshift=-3em]sample2.south) {句子1};
\foreach \i in {1,2,3}
\draw [->,thick] ([xshift=1.5em]batch2.east) -- ([xshift=-3pt]sample\i.west);
\foreach \i in {1,2,3}
{
\coordinate (start) at ([xshift=2em]sample\i.east);
\node [wordnode,anchor=west] (rnn0) at (start) {$0$};
\foreach \j [count=\k from 0] in {1,2,3}
{
\node [rnnnode,anchor=west] (rnn\j) at ([xshift=1em]rnn\k.east) {};
\draw [-latex'] (rnn\k) to (rnn\j);
\coordinate (in\j) at ([yshift=-1em]rnn\j.south);
\draw [-latex'] (in\j) to (rnn\j.south);
\coordinate (out\j) at ([yshift=1em]rnn\j.north);
\draw [-latex'] (rnn\j.north) to (out\j);
}
\node [wordnode,anchor=west] (rnn4) at ([xshift=1em]rnn3.east) {$\cdots$};
\draw [-latex'] (rnn3) to (rnn4);
\node [draw,densely dashed,thick,rounded corners=0.3em,fit=(start) (in3) (out3) (rnn4),label={[font=\footnotesize,rotate=90,anchor=north]0:设备\i}] (rnn) {};
\draw [->,double] ([xshift=3pt]sample\i.east) -- ([xshift=-3pt]rnn.west);
}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnnnode} = [minimum height=1.1em,minimum width=3.5em,inner sep=2pt,rounded corners=1pt,draw,fill=red!20];
\tikzstyle{wnode} = [minimum height=1.0em,minimum width=3.5em,inner sep=2pt,rounded corners=1pt,draw,fill=white];
{
\node [rnnnode,anchor=west,fill=green!20] (t1) at (0,0) {\scriptsize{$e_y()$}};
}
{
\node [rnnnode,anchor=south] (s1) at ([yshift=1em]t1.north) {\scriptsize{$\textbf{s}_1$}};
}
\node [wnode,anchor=north] (wt1) at ([yshift=-0.8em]t1.south) {\scriptsize{$<eos>$}};
{
\node [rnnnode,anchor=south,fill=blue!20] (o1) at ([yshift=1em]s1.north) {\scriptsize{softmax}};
}
{
\foreach \x in {1}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]o\x.south);
}
}
{
\draw [->] ([xshift=-0.5em]s1.west) -- ([xshift=-0.1em]s1.west) node [pos=0,left,inner sep=1pt] {\scriptsize{0}};
}
{
\node [anchor=center,inner sep=2pt] (wo1) at ([yshift=1.2em]o1.north) {};
}
{
\node [anchor=east] (vocab) at ([xshift=-5em]s1.west) {\tiny{$\begin{bmatrix} \textrm{Have} & 0.50 \\ \textrm{I} & 0.02 \\ \textrm{it} & 0.03 \\ \textrm{has} & 0.30 \\ \textrm{you} & 0.01 \\ \textrm{the} & 0.01 \\ \textrm{a} & 0.01 \\ \textrm{an} & 0.02 \\ \textrm{he} & 0.03 \\ \textrm{she} & 0.01 \\ \textrm{are} & 0.00 \\ \textrm{am} & 0.01 \\ ... & ... \end{bmatrix}$}};
\node [anchor=south] (vocablabel) at (vocab.north) {\scriptsize{单词的概率分布}};
\draw [->,red,very thick,dotted] (o1.west) .. controls +(west:1) and +(east:2) .. ([yshift=1em]vocab.south east);
}
{
\node [anchor=east,inner sep=1pt] (vocabtopn) at ([xshift=-0.5em,yshift=-0.5em]wo1.west) {\scriptsize{$\begin{bmatrix} \textrm{Have} \end{bmatrix}$}};
\draw [->] ([yshift=-1.6em,xshift=-0.4em]vocab.north east) .. controls +(east:1) and +(west:1) .. ([xshift=0.1em,yshift=0.4em]vocabtopn.west) node [pos=0.3,below] (topnlabel) {\scriptsize{top-1}};
}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnnnode} = [minimum height=1.1em,minimum width=2.1em,inner sep=2pt,rounded corners=1pt,draw,fill=red!20];
\node [rnnnode,anchor=west] (h1) at (0,0) {\tiny{$\textbf{h}_1$}};
\node [anchor=west] (h2) at ([xshift=1em]h1.east) {\tiny{...}};
\node [rnnnode,anchor=west] (h3) at ([xshift=1em]h2.east) {\tiny{$\textbf{h}_m$}};
\node [rnnnode,anchor=north,fill=green!20] (e1) at ([yshift=-1em]h1.south) {\tiny{$e_x()$}};
\node [anchor=west] (e2) at ([xshift=1em]e1.east) {\tiny{...}};
\node [rnnnode,anchor=west,fill=green!20] (e3) at ([xshift=1em]e2.east) {\tiny{$e_x()$}};
\node [anchor=north,inner sep=2pt] (w1) at ([yshift=-0.6em]e1.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w2) at ([yshift=-0.8em]e2.south) {\tiny{...}};
\node [anchor=north,inner sep=2pt] (w3) at ([yshift=-0.6em]e3.south) {\tiny{$<eos>$}};
\draw [->] (w1.north) -- ([yshift=-0.1em]e1.south);
\draw [->] (w3.north) -- ([yshift=-0.1em]e3.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]h1.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]h3.south);
\draw [->] ([xshift=0.1em]h1.east) -- ([xshift=-0.1em]h2.west);
\draw [->] ([xshift=0.1em]h2.east) -- ([xshift=-0.1em]h3.west);
\draw [->] ([xshift=-0.8em]h1.west) -- ([xshift=-0.1em]h1.west) node [pos=0,left,inner sep=2pt] {\tiny{0}};
\node [anchor=south] (encoder) at ([xshift=-0.2em]h1.north west) {\scriptsize{\textbf{编码器}}};
{
\node [rnnnode,anchor=west,fill=green!20] (t1) at ([xshift=3em]h3.east) {\tiny{$e_y()$}};
}
{
\node [rnnnode,anchor=west,fill=green!20] (t2) at ([xshift=1.5em]t1.east) {\tiny{$e_y()$}};
}
{
\node [rnnnode,anchor=west,fill=green!20] (t3) at ([xshift=1.5em]t2.east) {\tiny{$e_y()$}};
\node [rnnnode,anchor=west,fill=green!20] (t4) at ([xshift=1.5em]t3.east) {\tiny{$e_y()$}};
\node [anchor=west,inner sep=2pt] (t5) at ([xshift=0.3em]t4.east) {\tiny{...}};
}
{
\node [rnnnode,anchor=south] (s1) at ([yshift=1em]t1.north) {\tiny{$\textbf{s}_1$}};
}
{
\node [rnnnode,anchor=south] (s2) at ([yshift=1em]t2.north) {\tiny{$\textbf{s}_2$}};
}
{
\node [rnnnode,anchor=south] (s3) at ([yshift=1em]t3.north) {\tiny{$\textbf{s}_3$}};
\node [rnnnode,anchor=south] (s4) at ([yshift=1em]t4.north) {\tiny{$\textbf{s}_4$}};
\node [anchor=west,inner sep=2pt] (s5) at ([xshift=0.3em]s4.east) {\tiny{...}};
}
{
\node [rnnnode,anchor=south,fill=blue!20] (o1) at ([yshift=1em]s1.north) {\tiny{softmax}};
\node [anchor=east] (decoder) at ([xshift=-0.5em]o1.north west) {\scriptsize{\textbf{解码器}}};
}
{
\node [rnnnode,anchor=south,fill=blue!20] (o2) at ([yshift=1em]s2.north) {\tiny{softmax}};
}
{
\node [rnnnode,anchor=south,fill=blue!20] (o3) at ([yshift=1em]s3.north) {\tiny{softmax}};
\node [rnnnode,anchor=south,fill=blue!20] (o4) at ([yshift=1em]s4.north) {\tiny{softmax}};
\node [anchor=west,inner sep=2pt] (o5) at ([xshift=0.3em]o4.east) {\tiny{...}};
}
{
\node [anchor=north,inner sep=2pt] (wt1) at ([yshift=-0.6em]t1.south) {\tiny{$<eos>$}};
}
{
\node [anchor=north,inner sep=2pt] (wt2) at ([yshift=-0.6em]t2.south) {\tiny{Have}};
}
{
\node [anchor=north,inner sep=2pt] (wt3) at ([yshift=-0.8em]t3.south) {\tiny{you}};
\node [anchor=north,inner sep=2pt] (wt4) at ([yshift=-0.6em]t4.south) {\tiny{learned}};
}
{
\node [anchor=center,inner sep=2pt] (wo1) at ([yshift=1.2em]o1.north) {\tiny{Have}};
}
{
\node [anchor=south,inner sep=2pt] (wos1) at (wo1.north) {\tiny{\textbf{[step 1]}}};
}
{
\node [anchor=center,inner sep=2pt] (wo2) at ([yshift=1.2em]o2.north) {\tiny{you}};
}
{
\node [anchor=south,inner sep=2pt] (wos2) at (wo2.north) {\tiny{\textbf{[step 2]}}};
}
{
\node [anchor=center,inner sep=2pt] (wo3) at ([yshift=1.2em]o3.north) {\tiny{learned}};
\node [anchor=south,inner sep=2pt] (wos3) at (wo3.north) {\tiny{\textbf{[step 3]}}};
\node [anchor=center,inner sep=2pt] (wo4) at ([yshift=1.2em]o4.north) {\tiny{nothing}};
\node [anchor=south,inner sep=2pt] (wos4) at (wo4.north) {\tiny{\textbf{[step 4]}}};
}
{
\foreach \x in {1}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]o\x.south);
\draw [->] ([yshift=0.1em]o\x.north) -- ([yshift=0.8em]o\x.north) node [pos=0.5,right] {\tiny{top1}};
}
}
{
\foreach \x in {2}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]o\x.south);
\draw [->] ([yshift=0.1em]o\x.north) -- ([yshift=0.8em]o\x.north) node [pos=0.5,right] {\tiny{top1}};
}
}
{
\foreach \x in {3,4}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]o\x.south);
\draw [->] ([yshift=0.1em]o\x.north) -- ([yshift=0.8em]o\x.north) node [pos=0.5,right] {\tiny{top1}};
}
}
{
\draw [->] ([xshift=-0.8em]s1.west) -- ([xshift=-0.1em]s1.west) node [pos=0,left] {\tiny{0}};
}
{
\draw [->] ([xshift=0.1em]s1.east) -- ([xshift=-0.1em]s2.west);
}
{
\draw [->] ([xshift=0.1em]s2.east) -- ([xshift=-0.1em]s3.west);
\draw [->] ([xshift=0.1em]s3.east) -- ([xshift=-0.1em]s4.west);
}
{
\draw [->,thick,dotted] (wo1.east) .. controls +(east:1.0) and +(west:1.0) ..(wt2.west);
}
{
\draw [->,thick,dotted] (wo2.east) .. controls +(east:1.3) and +(west:1.1) ..(wt3.west);
\draw [->,thick,dotted] (wo3.east) .. controls +(east:0.9) and +(west:0.9) ..(wt4.west);
}
{
\node [circle,draw,anchor=south,inner sep=3pt,fill=orange!20] (c2) at ([yshift=2em]h2.north) {\tiny{$\textbf{C}_2$}};
\node [anchor=south] (c2label) at (c2.north) {\tiny{\textbf{注意力机制:上下文}}};
\node [anchor=south] (c2more) at ([yshift=-1.5em]c2.south) {\tiny{...}};
\draw [->] (h1.north) .. controls +(north:0.6) and +(250:0.9) .. (c2.250);
\draw [->] (h3.north) .. controls +(north:0.6) and +(290:0.9) .. (c2.290);
\draw [->] ([yshift=-0.3em]s1.west) .. controls +(west:2) and +(-50:0.3) .. (c2.-40);
}
{
\draw [->] (c2.0) -- ([xshift=1.358in]c2.0) -- ([yshift=0.3em,xshift=-1.2em]s2.west) -- ([yshift=0.3em,xshift=-0.1em]s2.west);
}
{
\node [circle,draw,anchor=north,inner sep=3pt,fill=orange!20] (c3) at ([yshift=-2em]t2.south) {\tiny{$\textbf{C}_3$}};
\draw [->] ([xshift=-0.7em]c3.west) -- ([xshift=-0.1em]c3.west);
\draw [->] ([xshift=0.1em]c3.east) .. controls +(east:0.6) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]s3.west);
}
{
\node [circle,draw,anchor=north,inner sep=3pt,fill=orange!20] (c4) at ([yshift=-2em]t3.south) {\tiny{$\textbf{C}_4$}};
\draw [->] ([xshift=-0.7em]c4.west) -- ([xshift=-0.1em]c4.west);
\draw [->] ([xshift=0.1em]c4.east) .. controls +(east:0.6) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]s4.west);
}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\node [anchor=west] (w0) at (0,0) {$w_1$};
\node [anchor=west] (w1) at ([xshift=0.5em]w0.east) {$w_2$};
\node [anchor=west] (w2) at ([xshift=0.5em]w1.east) {$w_3$};
\node [anchor=west] (w3) at ([xshift=0.5em]w2.east) {$...$};
\node [anchor=west] (w4) at ([xshift=0.5em]w3.east) {$w_{m-1}$};
\node [anchor=west,fill=green!20!white] (w5) at ([xshift=0.5em]w4.east) {$w_{m}$};
\draw [->,thick,red] (w1.north).. controls +(130:0.5) and +(50:0.5) .. (w0.north);
\draw [->,thick,red] (w2.north).. controls +(130:0.5) and +(50:0.5) .. (w1.north);
\draw [->,thick,red] ([yshift=0.2em]w3.north).. controls +(130:0.5) and +(50:0.5) .. (w2.north);
\draw [->,thick,red] (w4.north).. controls +(130:0.5) and +(50:0.5) .. ([yshift=0.2em]w3.north);
\draw [->,thick,red] (w5.north).. controls +(130:0.5) and +(50:0.5) .. (w4.north);
\draw [->,very thick,red] ([xshift=-5em]w0.west) -- ([xshift=-6.5em]w0.west) node [pos=0,right] {\scriptsize{信息传递}};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\node [anchor=west] (w0) at (0,-2) {$w_1$};
\node [anchor=west] (w1) at ([xshift=0.5em]w0.east) {$w_2$};
\node [anchor=west] (w2) at ([xshift=0.5em]w1.east) {$w_3$};
\node [anchor=west] (w3) at ([xshift=0.5em]w2.east) {$...$};
\node [anchor=west] (w4) at ([xshift=0.5em]w3.east) {$w_{m-1}$};
\node [anchor=west,fill=green!20!white] (w5) at ([xshift=0.5em]w4.east) {$w_{m}$};
\draw [->,thick,red] (w5.north).. controls +(100:0.85) and +(50:0.85) .. (w0.north);
\draw [->,thick,red] (w5.north).. controls +(110:0.75) and +(50:0.75) .. (w1.north);
\draw [->,thick,red] (w5.north).. controls +(120:0.6) and +(50:0.6) .. ([yshift=0.2em]w3.north);
\draw [->,thick,red] (w5.north).. controls +(130:0.5) and +(50:0.5) .. (w4.north);
\draw [->,very thick,red] ([xshift=-5em]w0.west) -- ([xshift=-6.5em]w0.west) node [pos=0,right] {\scriptsize{信息传递}};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{lnode} = [minimum height=1.5em,minimum width=3em,inner sep=3pt,rounded corners=1.5pt,draw,fill=orange!20];
\tikzstyle{standard} = [rounded corners=3pt]
\node [lnode,anchor=west] (l1) at (0,0) {\scriptsize{子层n}};
\node [lnode,anchor=west] (l2) at ([xshift=3em]l1.east) {\scriptsize{层正则化}};
\node [lnode,anchor=west] (l3) at ([xshift=4em]l2.east) {\scriptsize{层正则化}};
\node [lnode,anchor=west] (l4) at ([xshift=1.5em]l3.east) {\scriptsize{子层n}};
\node [anchor=west] (plus1) at ([xshift=0.9em]l1.east) {\scriptsize{$\mathbf{\oplus}$}};
\node [anchor=west] (plus2) at ([xshift=0.9em]l4.east) {\scriptsize{$\mathbf{\oplus}$}};
\node [anchor=north] (label1) at ([xshift=3em,yshift=-0.5em]l1.south) {\scriptsize{(a)后正则化}};
\node [anchor=north] (label2) at ([xshift=3em,yshift=-0.5em]l3.south) {\scriptsize{(b)前正则化}};
\draw [->,thick] ([xshift=-1.5em]l1.west) -- ([xshift=-0.1em]l1.west);
\draw [->,thick] ([xshift=0.1em]l1.east) -- ([xshift=0.2em]plus1.west);
\draw [->,thick] ([xshift=-0.2em]plus1.east) -- ([xshift=-0.1em]l2.west);
\draw [->,thick] ([xshift=0.1em]l2.east) -- ([xshift=1em]l2.east);
\draw [->,thick] ([xshift=-1.5em]l3.west) -- ([xshift=-0.1em]l3.west);
\draw [->,thick] ([xshift=0.1em]l3.east) -- ([xshift=-0.1em]l4.west);
\draw [->,thick] ([xshift=0.1em]l4.east) -- ([xshift=0.2em]plus2.west);
\draw [->,thick] ([xshift=-0.2em]plus2.east) -- ([xshift=1em]plus2.east);
\draw[->,standard,thick] ([xshift=-0.8em]l1.west) -- ([xshift=-0.8em,yshift=2em]l1.west) -- ([yshift=2em]plus1.center) -- ([yshift=-0.2em]plus1.north);
\draw[->,standard,thick] ([xshift=-0.8em]l3.west) -- ([xshift=-0.8em,yshift=2em]l3.west) -- ([yshift=2em]plus2.center) -- ([yshift=-0.2em]plus2.north);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%--------------------------------------------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9cm}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=0.5\base,draw,inner sep=0pt,outer sep=0pt]
\tikzstyle{wordnode} = [font=\tiny]
% RNN translation model
\begin{scope}[local bounding box=RNNMT]
% RNN Encoder
\coordinate (eemb0) at (0,0);
\foreach \x [count=\y from 0] in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc1\x) at ([yshift=0.3\base]eemb\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc2\x) at ([yshift=0.5\base]enc1\x.north) {};
\node[wordnode,left=0.4\base of enc11] (init1) {$0$};
\node[wordnode,left=0.4\base of enc21] (init2) {$0$};
\node[wordnode,below=0pt of eemb1] () {};
\node[wordnode,below=0pt of eemb2] () {知道};
\node[wordnode,below=0pt of eemb3] () {};
\node[wordnode,below=0pt of eemb4] () {北京站};
\node[wordnode,below=0pt of eemb5] () {};
\node[wordnode,below=0pt of eemb6] () {};
\node[wordnode,below=0pt of eemb7] () {怎么};
\node[wordnode,below=0pt of eemb8] () {};
\node[wordnode,below=0pt of eemb9] () {};
\node[wordnode,below=0pt of eemb10] () {EOS};
% RNN Decoder
\foreach \x in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=1.5\base]enc2\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec1\x) at ([yshift=0.3\base]demb\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec2\x) at ([yshift=0.5\base]dec1\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.5\base]dec2\x.north) {};
% Decoder input words
\node[wordnode,below=0pt of demb1] (decwordin) {$\langle$sos$\rangle$};
\ExtractX{$(demb2.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Do};
\ExtractX{$(demb3.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
\ExtractX{$(demb4.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
\ExtractX{$(demb5.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {the};
\ExtractX{$(demb6.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {way};
\ExtractX{$(demb7.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {to};
\ExtractX{$(demb8.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Beijing};
\ExtractX{$(demb9.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Railway};
\ExtractX{$(demb10.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Station};
% Decoder output words
\node[wordnode,above=0pt of softmax1] (decwordout) {Do};
\ExtractX{$(softmax2.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
\ExtractX{$(softmax3.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
\ExtractX{$(softmax4.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {the};
\ExtractX{$(softmax5.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {way};
\ExtractX{$(softmax6.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {to};
\ExtractX{$(softmax7.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Beijing};
\ExtractX{$(softmax8.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Railway};
\ExtractX{$(softmax9.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Station};
\ExtractX{$(softmax10.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {EOS};
% Connections
\draw[-latex'] (init1.east) to (enc11.west);
\draw[-latex'] (init2.east) to (enc21.west);
\foreach \x in {1,2,...,10}
\draw[-latex'] (eemb\x) to (enc1\x);
\foreach \x in {1,2,...,10}
\draw[-latex'] (enc1\x) to (enc2\x);
\foreach \x in {1,2,...,10}
\draw[-latex'] (demb\x) to (dec1\x);
\foreach \x in {1,2,...,10}
\draw[-latex'] (dec1\x) to (dec2\x);
\foreach \x in {1,2,...,10}
\draw[-latex'] (dec2\x.north) to ([yshift=0.5\base]dec2\x.north);
\foreach \x [count=\y from 2] in {1,2,...,9}
{
\draw[-latex'] (enc1\x.east) to (enc1\y.west);
\draw[-latex'] (enc2\x.east) to (enc2\y.west);
\draw[-latex'] (dec1\x.east) to (dec1\y.west);
\draw[-latex'] (dec2\x.east) to (dec2\y.west);
}
\coordinate (bridge) at ([yshift=1.4\base]enc16.north west);
\draw[-latex'] (enc210.north) .. controls +(north:0.4\base) and +(east:1.5\base) .. (bridge) .. controls +(west:8.0\base) and +(south west:0.8\base) .. (dec21.west);
\coordinate (bridge) at ([yshift=1.6\base]enc16.north west);
\draw[-latex'] (enc110.east) .. controls +(east:0.5\base) and +(east:8\base) .. (bridge) .. controls +(west:7.5\base) and +(south west:0.1\base) .. (dec11.west);
% stack RNN
\begin{pgfonlayer}{background}
\node[draw=red,thick,densely dashed,inner sep=5pt] [fit = (init2) (enc21) (enc210)] (enc2) {};
\node[draw=red,thick,densely dashed,inner sep=5pt] [fit = (dec21) (dec210)] (dec2) {};
\end{pgfonlayer}
\node[font=\scriptsize,anchor=west] (label) at ([xshift=0.4\base]demb10.east) {堆叠RNN};
\draw[->,dashed] (label.north) to (dec2.east);
\draw[->,dashed] (label.south) to (enc2.east);
\end{scope}
\end{tikzpicture}
%---------------------------------------
\begin{frame}{}
\begin{tcolorbox}[size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black]
{
\small{古文:侍卫步军都指挥使、彰信节度使李继勋营于寿州城南,唐刘仁赡伺继勋无备,出兵击之,杀士卒数百人,焚其攻具。}
}
\end{tcolorbox}
\vspace{-0.4em}
\begin{tcolorbox}[size=normal,left=2mm,right=1mm,colback=blue!5!white,colframe=blue!75!black]
{
\small{现代文:侍卫步军都指挥使、彰信节度使李继勋在寿州城南扎营,唐刘仁赡窥伺李继勋没有防备,出兵攻打他,杀死士兵几百人,烧毁李继勋的攻城器}
}
\end{tcolorbox}
\vspace{0.2em}
\begin{tcolorbox}[size=normal,left=2mm,right=1mm,colback=red!5!white,colframe=red!75!black]
{
\small{古文:其后人稍稍识之,多延至其家,使为弟子论学。}
}
\end{tcolorbox}
\vspace{-0.4em}
\begin{tcolorbox}[size=normal,left=2mm,right=1mm,colback=blue!5!white,colframe=blue!75!black]
{
\small{现代文:后来的人渐渐认识他,多把他请到家里,让他为弟子讲授学问。}
}
\end{tcolorbox}
\vspace{-0.8em}
\end{frame}
\ No newline at end of file
%-------------------------------------------
\begin{tikzpicture}
\newlength{\mystep}
\newlength{\wseg}
\newlength{\hseg}
\newlength{\wnode}
\newlength{\hnode}
\setlength{\wseg}{1.5cm}
\setlength{\hseg}{1.0cm}
\setlength{\wnode}{3.75cm}
\setlength{\hnode}{1.0cm}
\tikzstyle{elementnode} = [rectangle,text=white,anchor=center]
\tikzstyle{srcnode} = [rotate=45,font=\small,anchor=south west]
\tikzstyle{tgtnode} = [left,font=\small,anchor=north east]
\tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6\hnode,minimum width=0.36\hnode]
\tikzstyle{probnode} = [fill=blue!30,minimum width=0.4\hnode]
\tikzstyle{labelnode} = [above]
% alignment matrix
\begin{scope}[scale=0.9,yshift=0.12in]
\foreach \i / \j / \c in
{0/7/0.2, 1/7/0.45, 2/7/0.15, 3/7/0.15, 4/7/0.15, 5/7/0.15,
0/6/0.35, 1/6/0.45, 2/6/0.15, 3/6/0.15, 4/6/0.15, 5/6/0.15,
0/5/0.25, 1/5/0.15, 2/5/0.15, 3/5/0.35, 4/5/0.15, 5/5/0.15,
0/4/0.15, 1/4/0.25, 2/4/0.2, 3/4/0.30, 4/4/0.15, 5/4/0.15,
0/3/0.15, 1/3/0.15, 2/3/0.8, 3/3/0.25, 4/3/0.15, 5/3/0.25,
0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.25, 5/2/0.3,
0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.8, 5/1/0.15,
0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.25, 5/0/0.60}
\node[elementnode,minimum size=0.6*\hnode*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*\hnode*\i-5.4*0.5*\hnode,0.5*\hnode*\j-1.05*\hnode) {};
%attention score labels
\node[align=center] (l17) at (a17) {\scriptsize{{\color{white} .4}}};
\node[align=center] (l26) at (a06) {\scriptsize{{\color{white} .3}}};
\node[align=center] (l26) at (a16) {\scriptsize{{\color{white} .4}}};
\node[align=center] (l17) at (a35) {\scriptsize{{\color{white} .3}}};
\node[align=center] (l17) at (a34) {\tiny{{\color{white} .3}}};
\node[align=center] (l17) at (a23) {\small{{\color{white} .8}}};
\node[align=center] (l17) at (a41) {\small{{\color{white} .8}}};
\node[align=center] (l17) at (a50) {\small{{\color{white} .7}}};
% source
\node[srcnode] (src1) at (-5.4*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{Have}};
\node[srcnode] (src2) at ([xshift=0.5\hnode]src1.south west) {\scriptsize{you}};
\node[srcnode] (src3) at ([xshift=0.5\hnode]src2.south west) {\scriptsize{learned}};
\node[srcnode] (src4) at ([xshift=0.5\hnode]src3.south west) {\scriptsize{nothing}};
\node[srcnode] (src5) at ([xshift=0.5\hnode]src4.south west) {\scriptsize{?}};
\node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{EOS}};
% target
\node[tgtnode] (tgt1) at (-6.0*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{}};
\node[tgtnode] (tgt2) at ([yshift=-0.5\hnode]tgt1.north east) {\scriptsize{什么}};
\node[tgtnode] (tgt3) at ([yshift=-0.5\hnode]tgt2.north east) {\scriptsize{}};
\node[tgtnode] (tgt4) at ([yshift=-0.5\hnode]tgt3.north east) {\scriptsize{}};
\node[tgtnode] (tgt5) at ([yshift=-0.5\hnode]tgt4.north east) {\scriptsize{}};
\node[tgtnode] (tgt6) at ([yshift=-0.5\hnode]tgt5.north east) {\scriptsize{}};
\node[tgtnode] (tgt7) at ([yshift=-0.5\hnode]tgt6.north east) {\scriptsize{?}};
\node[tgtnode] (tgt8) at ([yshift=-0.5\hnode]tgt7.north east) {\scriptsize{EOS}};
\end{scope}
%\visible<2->
{
% alignment rectangle 2
\node[alignmentnode, ugreen, anchor=north west] (alignment1) at ([xshift=-0.3em,yshift=0.4em]a07.north west) {};
}
%\visible<3->
{
% alignment rectangle 1
\node[alignmentnode, red, anchor=north west] (alignment2) at ([xshift=-0.1em,yshift=0.2em]a17.north west) {};
}
%\visible<3->
{
% alignment bars 2
\node[probnode,anchor=south west,minimum height=0.4\hnode,inner sep=0.1pt,fill=red!40,label=below:\scriptsize{$0.4$}] (attn21) at ([xshift=2.3\hnode,yshift=0.5\hnode]alignment2.east) {};
\node[probnode,anchor=south west,minimum height=0.4\hnode,inner sep=0.1pt,fill=red!40,label=below:\scriptsize{$0.4$}] (attn22) at ([xshift=1pt]attn21.south east) {};
\node[probnode,anchor=south west,minimum height=0.05\hnode,inner sep=0.1pt,fill=red!40,label=below:\scriptsize{$0$}] (attn23) at ([xshift=1pt]attn22.south east) {};
\node[probnode,anchor=south west,minimum height=0.1\hnode,inner sep=0.1pt,fill=red!40,label=below:\scriptsize{$0.1$}] (attn24) at ([xshift=1pt]attn23.south east) {};
\node[probnode,anchor=south west,minimum height=0.05\hnode,inner sep=0.1pt,fill=red!40,label=below:\scriptsize{$0$}] (attn25) at ([xshift=1pt]attn24.south east) {};
\node[probnode,anchor=south west,minimum height=0.05\hnode,inner sep=0.1pt,fill=red!40,label=below:\scriptsize{$...$}] (attn26) at ([xshift=1pt]attn25.south east) {};
}
%\visible<2->
{
% alignment bars 1
\node[probnode,anchor=south,minimum height=0.2\hnode,inner sep=0.1pt,fill=ugreen!40,label=below:\scriptsize{$0.2$}] (attn11) at ([xshift=2.5\hnode,yshift=-1em]alignment2.north east) {};
\node[probnode,anchor=south west,minimum height=0.3\hnode,inner sep=0.1pt,fill=ugreen!40,label=below:\scriptsize{$0.3$}] (attn12) at ([xshift=1pt]attn11.south east) {};
\node[probnode,anchor=south west,minimum height=0.2\hnode,inner sep=0.1pt,fill=ugreen!40,label=below:\scriptsize{$0.2$}] (attn13) at ([xshift=1pt]attn12.south east) {};
\node[probnode,anchor=south west,minimum height=0.05\hnode,inner sep=0.1pt,fill=ugreen!40,label=below:\scriptsize{$0$}] (attn14) at ([xshift=1pt]attn13.south east) {};
\node[probnode,anchor=south west,minimum height=0.05\hnode,inner sep=0.1pt,fill=ugreen!40,label=below:\scriptsize{$0$}] (attn15) at ([xshift=1pt]attn14.south east) {};
\node[probnode,anchor=south west,minimum height=0.05\hnode,inner sep=0.1pt,fill=ugreen!40,label=below:\scriptsize{$...$}] (attn16) at ([xshift=1pt]attn15.south east) {};
}
%\visible<3->
{
% coverage score formula node
\node [anchor=north west] (formula) at ([xshift=-0.3\hnode,yshift=-1.5\hnode]attn11.south) {\small{不同$\textbf{C}_i$所对应的源语言词的权重是不同的}};
\node [anchor=north west] (example) at (formula.south west) {\footnotesize{$\textbf{C}_2=0.4 \times \textbf{h}(\textrm{``你''}) + 0.4 \times \textbf{h}(\textrm{``什么''}) +$}};
\node [anchor=north west] (example2) at ([yshift=0.4em]example.south west) {\footnotesize{$\ \ \ \ \ \ \ \ 0 \times \textbf{h}(\textrm{``都''}) + 0.1 \times \textbf{h}(\textrm{`` 没''}) + ..$}};
}
%\visible<3->
{
% matrix -> attn2
\draw[->,red] ([xshift=0.1em,yshift=2.3em]alignment2.east).. controls +(east:1.9cm) and +(west:1.0cm) ..([xshift=-0.15\hnode,yshift=-1em]attn21.north west);
}
%\visible<2->
{
\draw[->,ugreen] ([xshift=0.1em,yshift=-1.2em]alignment1.north east)--([xshift=2.2\hnode,yshift=-1.2em]alignment2.north east);
}
%\visible<3->
{
% attn2 -> cov2
\draw[->] ([xshift=0.2\hnode,yshift=0.0\hnode]attn26.east)--([xshift=0.7\hnode,yshift=0]attn26.east) node[pos=0.5,above] (sum2) {\small{$\sum$}}; % 0.3 - 0.5 height of the
}
%\visible<2->
{
% attn1 -> cov1
\draw[->] ([xshift=0.2\hnode]attn16.east)--([xshift=0.7\hnode]attn16.east) node[pos=0.5,above] (sum1) {\small{$\sum$}};
}
% coverage score for each source word
%\visible<2->
{
\node[anchor=west] (sc1) at ([xshift=0.9\hnode]attn16.east) {$\textbf{C}_1 = \sum_{i=1}^{8} \alpha_{i1} \textbf{h}_{i}$};
}
%\visible<3->
{
\node[anchor=west] (sc2) at ([xshift=0.9\hnode,yshift=0.0\hnode]attn26.east) {$\textbf{C}_2 = \sum_{i=1}^{8} \alpha_{i2} \textbf{h}_{i}$};
}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnode} = [draw,minimum width=3.5em,minimum height=1.2em]
\node [rnode,anchor=south west,fill=red!20!white] (value1) at (0,0) {\scriptsize{$\textbf{h}(\textrm{``你''})$}};
\node [rnode,anchor=south west,fill=red!20!white] (value2) at ([xshift=1em]value1.south east) {\scriptsize{$\textbf{h}(\textrm{``什么''})$}};
\node [rnode,anchor=south west,fill=red!20!white] (value3) at ([xshift=1em]value2.south east) {\scriptsize{$\textbf{h}(\textrm{``也''})$}};
\node [rnode,anchor=south west,fill=red!20!white] (value4) at ([xshift=1em]value3.south east) {\scriptsize{$\textbf{h}(\textrm{``没''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key1) at ([yshift=0.2em]value1.north west) {\scriptsize{$\textbf{h}(\textrm{``你''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key2) at ([yshift=0.2em]value2.north west) {\scriptsize{$\textbf{h}(\textrm{``什么''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key3) at ([yshift=0.2em]value3.north west) {\scriptsize{$\textbf{h}(\textrm{``也''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key4) at ([yshift=0.2em]value4.north west) {\scriptsize{$\textbf{h}(\textrm{``没''})$}};
\node [rnode,anchor=east] (query) at ([xshift=-2em]key1.west) {\scriptsize{$\textbf{s}(\textrm{``you''})$}};
\node [anchor=east] (querylabel) at ([xshift=-0.2em]query.west) {\scriptsize{query}};
\draw [->] ([yshift=1pt,xshift=6pt]query.north) .. controls +(90:1em) and +(90:1em) .. ([yshift=1pt]key1.north);
\draw [->] ([yshift=1pt,xshift=3pt]query.north) .. controls +(90:1.5em) and +(90:1.5em) .. ([yshift=1pt]key2.north);
\draw [->] ([yshift=1pt]query.north) .. controls +(90:2em) and +(90:2em) .. ([yshift=1pt]key3.north);
\draw [->] ([yshift=1pt,xshift=-3pt]query.north) .. controls +(90:2.5em) and +(90:2.5em) .. ([yshift=1pt]key4.north);
\node [anchor=south east] (alpha1) at ([xshift=1em]key1.north east) {\scriptsize{$\alpha_1=.4$}};
\node [anchor=south east] (alpha2) at ([xshift=1em]key2.north east) {\scriptsize{$\alpha_2=.4$}};
\node [anchor=south east] (alpha3) at ([xshift=1em]key3.north east) {\scriptsize{$\alpha_3=0$}};
\node [anchor=south east] (alpha4) at ([xshift=1em]key4.north east) {\scriptsize{$\alpha_4=.1$}};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%----------------------------------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.6cm}
\tikzstyle{auxnode} = [inner sep=0pt,outer sep=0pt,opacity=0,draw=red,fill=red,circle,minimum size=3pt]
\tikzstyle{wordnode} = [font=\scriptsize]
\tikzstyle{opnode} = [inner sep=0pt,outer sep=0pt,draw,fill=green!30!white,font=\scriptsize,minimum size=10pt]
\tikzstyle{standard} = [rounded corners=5pt,thick]
\tikzstyle{emph} = [rounded corners=5pt,thick,draw=red]
\tikzstyle{formulanode} = [font=\scriptsize,align=left,draw=red,rectangle,fill=red!10!white,rounded corners=2pt,drop shadow]
% Skeleton
\begin{scope}[every label/.append style={label distance=1pt,font=\tiny,inner sep=0pt,opacity=0}]
\coordinate (aux12) at (0,0);
\node[auxnode,label={-45:12}] () at (aux12) {};
\coordinate (aux22) at ([yshift=\base]aux12);
\node[auxnode,label={-45:22}] () at (aux22) {};
\coordinate (aux23) at ([xshift=0.5\base]aux22);
\node[auxnode,label={-45:23}] () at (aux23) {};
\coordinate (aux32) at ([yshift=0.4\base]aux22);
\node[auxnode,label={-45:32}] () at (aux32) {};
\ExtractX{$([xshift=\base]aux23)$}
\ExtractY{$([yshift=\base]aux32)$}
\coordinate (aux44) at (\XCoord,\YCoord);
\node[auxnode,label={-45:44}] () at (aux44) {};
\coordinate (aux45) at ([xshift=\base]aux44);
\node[auxnode,label={-45:45}] () at (aux45) {};
\coordinate (aux46) at ([xshift=1.3\base]aux45);
\node[auxnode,label={-45:46}] () at (aux46) {};
\ExtractX{$(aux23)$}
\ExtractY{$([yshift=\base]aux44)$}
\coordinate (aux53) at (\XCoord,\YCoord);
\node[auxnode,label={-45:53}] () at (aux53) {};
\coordinate (aux56) at ([yshift=\base]aux46);
\node[auxnode,label={-45:56}] () at (aux56) {};
\ExtractX{$(aux45)$}
\ExtractY{$([yshift=0.5\base]aux56)$}
\coordinate (aux65) at (\XCoord,\YCoord);
\node[auxnode,label={-45:65}] () at (aux65) {};
\ExtractX{$([xshift=-\base]aux12)$}
\ExtractY{$([yshift=\base]aux65)$}
\coordinate (aux71) at (\XCoord,\YCoord);
\node[auxnode,label={-45:71}] () at (aux71) {};
\coordinate (aux75) at ([yshift=\base]aux65);
\node[auxnode,label={-45:75}] () at (aux75) {};
\ExtractX{$(aux56)$}
\ExtractY{$(aux75)$}
\coordinate (aux76) at (\XCoord,\YCoord);
\node[auxnode,label={-45:76}] () at (aux76) {};
\coordinate (aux78) at ([xshift=1.7\base]aux76);
\node[auxnode,label={-45:78}] () at (aux78) {};
\coordinate (aux87) at ([shift={(0.7\base,1.3\base)}]aux76);
\node[auxnode,label={-45:87}] () at (aux87) {};
\end{scope}
\begin{scope}
\node[opnode,circle,opacity=0] (r53) at (aux53) {};
\node[opnode,circle,opacity=0] (z56) at (aux56) {};
\node[opnode,circle,opacity=0] (z75) at (aux75) {};
\node[opnode,circle,opacity=0] (z76) at (aux76) {};
% reset gate
{
\draw[-latex,emph] (aux12) -- (aux32) -| (aux44) |- (r53);
\draw[emph] (aux71) -| (aux32) -| (aux44);
\node[opnode,circle,draw=red,thick] () at (aux44) {$\sigma$};
}
\end{scope}
\begin{scope}
\node[wordnode,anchor=south] () at (aux71) {$\mathbf{h}_{t-1}$};
\node[wordnode,anchor=west] () at (aux12) {$\mathbf{x}_t$};
\end{scope}
\node[] (tanh) at (aux46){};
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!30!white,rounded corners=5pt,inner sep=6pt,fit=(aux22) (aux76) (z76) (tanh)] (GRU) {};
\end{pgfonlayer}
\end{tikzpicture}
\ No newline at end of file
%----------------------------------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.6cm}
\tikzstyle{auxnode} = [inner sep=0pt,outer sep=0pt,opacity=0,draw=red,fill=red,circle,minimum size=3pt]
\tikzstyle{wordnode} = [font=\scriptsize]
\tikzstyle{opnode} = [inner sep=0pt,outer sep=0pt,draw,fill=green!30!white,font=\scriptsize,minimum size=10pt]
\tikzstyle{standard} = [rounded corners=5pt,thick]
\tikzstyle{emph} = [rounded corners=5pt,thick,draw=red]
\tikzstyle{formulanode} = [font=\scriptsize,align=left,draw=red,rectangle,fill=red!10!white,rounded corners=2pt,drop shadow]
% Skeleton
\begin{scope}[every label/.append style={label distance=1pt,font=\tiny,inner sep=0pt,opacity=0}]
\coordinate (aux12) at (0,0);
\node[auxnode,label={-45:12}] () at (aux12) {};
\coordinate (aux22) at ([yshift=\base]aux12);
\node[auxnode,label={-45:22}] () at (aux22) {};
\coordinate (aux23) at ([xshift=0.5\base]aux22);
\node[auxnode,label={-45:23}] () at (aux23) {};
\coordinate (aux32) at ([yshift=0.4\base]aux22);
\node[auxnode,label={-45:32}] () at (aux32) {};
\ExtractX{$([xshift=\base]aux23)$}
\ExtractY{$([yshift=\base]aux32)$}
\coordinate (aux44) at (\XCoord,\YCoord);
\node[auxnode,label={-45:44}] () at (aux44) {};
\coordinate (aux45) at ([xshift=\base]aux44);
\node[auxnode,label={-45:45}] () at (aux45) {};
\coordinate (aux46) at ([xshift=1.3\base]aux45);
\node[auxnode,label={-45:46}] () at (aux46) {};
\ExtractX{$(aux23)$}
\ExtractY{$([yshift=\base]aux44)$}
\coordinate (aux53) at (\XCoord,\YCoord);
\node[auxnode,label={-45:53}] () at (aux53) {};
\coordinate (aux56) at ([yshift=\base]aux46);
\node[auxnode,label={-45:56}] () at (aux56) {};
\ExtractX{$(aux45)$}
\ExtractY{$([yshift=0.5\base]aux56)$}
\coordinate (aux65) at (\XCoord,\YCoord);
\node[auxnode,label={-45:65}] () at (aux65) {};
\ExtractX{$([xshift=-\base]aux12)$}
\ExtractY{$([yshift=\base]aux65)$}
\coordinate (aux71) at (\XCoord,\YCoord);
\node[auxnode,label={-45:71}] () at (aux71) {};
\coordinate (aux75) at ([yshift=\base]aux65);
\node[auxnode,label={-45:75}] () at (aux75) {};
\ExtractX{$(aux56)$}
\ExtractY{$(aux75)$}
\coordinate (aux76) at (\XCoord,\YCoord);
\node[auxnode,label={-45:76}] () at (aux76) {};
\coordinate (aux78) at ([xshift=1.7\base]aux76);
\node[auxnode,label={-45:78}] () at (aux78) {};
\coordinate (aux87) at ([shift={(0.7\base,1.3\base)}]aux76);
\node[auxnode,label={-45:87}] () at (aux87) {};
\end{scope}
\begin{scope}
\node[opnode,circle,opacity=0] (r53) at (aux53) {};
\node[opnode,circle,opacity=0] (z56) at (aux56) {};
\node[opnode,circle,opacity=0] (z75) at (aux75) {};
\node[opnode,circle,opacity=0] (z76) at (aux76) {};
% reset gate
{
\draw[-latex,emph] (aux12) -- (aux32) -| (aux44) |- (r53);
\draw[emph] (aux71) -| (aux32) -| (aux44);
\node[opnode,circle,draw=red,thick] () at (aux44) {$\sigma$};
}
{
\draw[-latex,standard] (aux12) -- (aux32) -| (aux44) |- (r53);
\draw[standard] (aux71) -| (aux32) -| (aux44);
\node[opnode,circle] () at (aux44) {$\sigma$};
}
% update gate
{
\draw[-latex,emph] (aux12) -- (aux32) -| (aux45) -- (z75);
\draw[-latex,emph] (aux71) -| (aux32) -| (aux45) |- (z56);
\node[opnode,circle,draw=red,thick] () at (aux45) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,font=\tiny,draw=red,thick] () at (aux65) {$1-$};
}
\end{scope}
\begin{scope}
\node[wordnode,anchor=south] () at (aux71) {$\mathbf{h}_{t-1}$};
\node[wordnode,anchor=west] () at (aux12) {$\mathbf{x}_t$};
\end{scope}
\node[] (tanh) at (aux46){};
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!30!white,rounded corners=5pt,inner sep=6pt,fit=(aux22) (aux76) (z76) (tanh)] (GRU) {};
\end{pgfonlayer}
\end{tikzpicture}
\ No newline at end of file
%----------------------------------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.6cm}
\tikzstyle{auxnode} = [inner sep=0pt,outer sep=0pt,opacity=0,draw=red,fill=red,circle,minimum size=3pt]
\tikzstyle{wordnode} = [font=\scriptsize]
\tikzstyle{opnode} = [inner sep=0pt,outer sep=0pt,draw,fill=green!30!white,font=\scriptsize,minimum size=10pt]
\tikzstyle{standard} = [rounded corners=5pt,thick]
\tikzstyle{emph} = [rounded corners=5pt,thick,draw=red]
\tikzstyle{formulanode} = [font=\scriptsize,align=left,draw=red,rectangle,fill=red!10!white,rounded corners=2pt,drop shadow]
% Skeleton
\begin{scope}[every label/.append style={label distance=1pt,font=\tiny,inner sep=0pt,opacity=0}]
\coordinate (aux12) at (0,0);
\node[auxnode,label={-45:12}] () at (aux12) {};
\coordinate (aux22) at ([yshift=\base]aux12);
\node[auxnode,label={-45:22}] () at (aux22) {};
\coordinate (aux23) at ([xshift=0.5\base]aux22);
\node[auxnode,label={-45:23}] () at (aux23) {};
\coordinate (aux32) at ([yshift=0.4\base]aux22);
\node[auxnode,label={-45:32}] () at (aux32) {};
\ExtractX{$([xshift=\base]aux23)$}
\ExtractY{$([yshift=\base]aux32)$}
\coordinate (aux44) at (\XCoord,\YCoord);
\node[auxnode,label={-45:44}] () at (aux44) {};
\coordinate (aux45) at ([xshift=\base]aux44);
\node[auxnode,label={-45:45}] () at (aux45) {};
\coordinate (aux46) at ([xshift=1.3\base]aux45);
\node[auxnode,label={-45:46}] () at (aux46) {};
\ExtractX{$(aux23)$}
\ExtractY{$([yshift=\base]aux44)$}
\coordinate (aux53) at (\XCoord,\YCoord);
\node[auxnode,label={-45:53}] () at (aux53) {};
\coordinate (aux56) at ([yshift=\base]aux46);
\node[auxnode,label={-45:56}] () at (aux56) {};
\ExtractX{$(aux45)$}
\ExtractY{$([yshift=0.5\base]aux56)$}
\coordinate (aux65) at (\XCoord,\YCoord);
\node[auxnode,label={-45:65}] () at (aux65) {};
\ExtractX{$([xshift=-\base]aux12)$}
\ExtractY{$([yshift=\base]aux65)$}
\coordinate (aux71) at (\XCoord,\YCoord);
\node[auxnode,label={-45:71}] () at (aux71) {};
\coordinate (aux75) at ([yshift=\base]aux65);
\node[auxnode,label={-45:75}] () at (aux75) {};
\ExtractX{$(aux56)$}
\ExtractY{$(aux75)$}
\coordinate (aux76) at (\XCoord,\YCoord);
\node[auxnode,label={-45:76}] () at (aux76) {};
\coordinate (aux78) at ([xshift=1.7\base]aux76);
\node[auxnode,label={-45:78}] () at (aux78) {};
\coordinate (aux87) at ([shift={(0.7\base,1.3\base)}]aux76);
\node[auxnode,label={-45:87}] () at (aux87) {};
\end{scope}
\begin{scope}
\node[opnode,circle,opacity=0] (r53) at (aux53) {};
\node[opnode,circle,opacity=0] (z56) at (aux56) {};
\node[opnode,circle,opacity=0] (z75) at (aux75) {};
\node[opnode,circle,opacity=0] (z76) at (aux76) {};
% reset gate
{
\draw[-latex,emph] (aux12) -- (aux32) -| (aux44) |- (r53);
\draw[emph] (aux71) -| (aux32) -| (aux44);
\node[opnode,circle,draw=red,thick] () at (aux44) {$\sigma$};
}
{
\draw[-latex,standard] (aux12) -- (aux32) -| (aux44) |- (r53);
\draw[standard] (aux71) -| (aux32) -| (aux44);
\node[opnode,circle] () at (aux44) {$\sigma$};
}
% update gate
{
\draw[-latex,emph] (aux12) -- (aux32) -| (aux45) -- (z75);
\draw[-latex,emph] (aux71) -| (aux32) -| (aux45) |- (z56);
\node[opnode,circle,draw=red,thick] () at (aux45) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,font=\tiny,draw=red,thick] () at (aux65) {$1-$};
}
{
\draw[-latex,standard] (aux12) -- (aux32) -| (aux45) -- (z75);
\draw[-latex,standard] (aux71) -| (aux32) -| (aux45) |- (z56);
\node[opnode,circle] () at (aux45) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,font=\tiny] () at (aux65) {$1-$};
}
% hidden update
{
\draw[-latex,emph] (aux71) -- (aux78);
\draw[-latex,emph] (aux71) -| (aux87);
\draw[-latex,emph] (aux71) -| (aux53) -- (aux23) -| (aux46) -- (z76);
\draw[emph] (aux12) |- (aux23) -| (aux46);
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] (tanh) at (aux46) {$\mathrm{tanh}$};
\node[opnode,circle,draw=red,thick] () at (aux53) {X};
\node[opnode,circle,draw=red,thick] () at (aux56) {X};
\node[opnode,circle,draw=red,thick] () at (aux75) {X};
\node[opnode,circle,draw=red,thick] () at (aux76) {\textbf{+}};
}
\end{scope}
\begin{scope}
\node[wordnode,anchor=south] () at (aux71) {$\mathbf{h}_{t-1}$};
\node[wordnode,anchor=west] () at (aux12) {$\mathbf{x}_t$};
{
\node[wordnode,anchor=east] () at (aux87) {$\mathbf{h}_{t}$};
\node[wordnode,anchor=south] () at (aux78) {$\mathbf{h}_{t}$};
}
\end{scope}
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!30!white,rounded corners=5pt,inner sep=6pt,fit=(aux22) (aux76) (z76) (tanh)] (GRU) {};
\end{pgfonlayer}
\end{tikzpicture}
\ No newline at end of file
%%% outline
%-------------------------------------------------------------------------
\begin{tikzpicture}
%第一段----------------------------------------------
%原文-------------
\node [pos=0.4,left,xshift=-32em,yshift=10em,font=\small] (original0) {输入文本:};
\node [pos=0.4,left,xshift=-2em,yshift=2.3em,font=\small] (original1) {
\begin{tabular}[t]{l}
\parbox{32em}{
Jenson Button was denied his 100th race for McLaren after an ERS prevented him from making it to the startline. It capped a miserable weekend for the Briton; his time in Bahrain plagued by reliability issues. Button spent much of the race on Twitter delivering his verdict as the action unfolded. ’Kimi is the man to watch,’ and ’loving the sparks’, were among his pearls of wisdom, but the tweet which courted the most attention was a rather mischievous one: ’Ooh is Lewis backing his team mate into Vettel?’ he quizzed after Rosberg accused Hamilton of pulling off such a manoeuvre in China. Jenson Button waves to the crowd ahead of the Bahrain Grand Prix which he failed to start Perhaps a career in the media beckons... Lewis Hamilton has out-qualified and finished ahead of Nico Rosberg at every race this season. Indeed Rosberg has now beaten his Mercedes team-mate only once in the 11 races since the pair infamously collided in Belgium last year. Hamilton secured the 36th win of his career in Bahrain and his 21st from pole position. Only Michael Schumacher (40), Ayrton Senna (29) and Sebastian Vettel (27) have more. (...)
}
\end{tabular}
};
%译文1--------------mt1
\node[font=\small] (mt1) at ([xshift=0em,yshift=-16.8em]original0.south) {系统生成\quad};
\node[font=\small] (mt-2) at ([xshift=0em,yshift=-0.5em]mt1.south) {\quad 的摘要:};
\node[font=\small] (ts1) at ([xshift=0em,yshift=-3em]original1.south) {
\begin{tabular}[t]{l}
\parbox{32em}{
Button was denied his 100th race for McLaren. The ERS prevented him from making it to the start-line. Button was his team mate in the 11 races in Bahrain. He quizzed after Nico Rosberg accused Lewis Hamilton of pulling off such a manoeuvre in China.
}
\end{tabular}
};
%{
%\draw[dotted,thick,ublue] ([xshift=10.3em,yshift=0.3em]mt8.south west)--%([xshift=-5.2em,yshift=-0.3em]ht8.north);
%}
\begin{pgfonlayer}{background}
{
\node[rectangle,draw=ublue, inner sep=0mm] [fit =(original0)(mt1)(mt-2)(ts1)(original1)] {};
}
\end{pgfonlayer}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.6cm}
\tikzstyle{wordnode} = [font=\scriptsize]
\tikzstyle{auxnode} = [inner sep=0pt,outer sep=0pt,opacity=0,draw=red,fill=red,circle,minimum size=3pt]
\tikzstyle{opnode} = [inner sep=0pt,outer sep=0pt,draw,fill=green!30!white,font=\scriptsize,minimum size=10pt]
\tikzstyle{standard} = [rounded corners=5pt,thick]
\tikzstyle{emph} = [rounded corners=5pt,thick,draw=red]
\tikzstyle{formulanode} = [font=\scriptsize,align=left,draw=red,rectangle,fill=red!10!white,rounded corners=2pt,drop shadow]
% Skeleton
\begin{scope}[every label/.append style={label distance=1pt,font=\tiny,inner sep=0pt,opacity=0}]
\coordinate (aux12) at (0,0);
\node[auxnode,label={-45:12}] () at (aux12) {};
\coordinate (aux22) at ([yshift=\base]aux12);
\node[auxnode,label={-45:22}] () at (aux22) {};
\coordinate (aux21) at ([xshift=-2\base]aux22);
\node[auxnode,label={-45:21}] () at (aux21) {};
\coordinate (aux23) at ([xshift=\base]aux22);
\node[auxnode,label={-45:23}] () at (aux23) {};
\coordinate (aux24) at ([xshift=\base]aux23);
\node[auxnode,label={-45:24}] () at (aux24) {};
\coordinate (aux25) at ([xshift=\base]aux24);
\node[auxnode,label={-45:25}] () at (aux25) {};
\coordinate (aux26) at ([xshift=\base]aux25);
\node[auxnode,label={-45:26}] () at (aux26) {};
\coordinate (aux27) at ([xshift=\base]aux26);
\node[auxnode,label={-45:27}] () at (aux27) {};
\coordinate (aux28) at ([xshift=\base]aux27);
\node[auxnode,label={-45:28}] () at (aux28) {};
\coordinate (aux29) at ([xshift=2\base]aux28);
\node[auxnode,label={-45:29}] () at (aux29) {};
\coordinate (aux33) at ([yshift=\base]aux23);
\node[auxnode,label={-45:33}] () at (aux33) {};
\coordinate (aux34) at ([yshift=\base]aux24);
\node[auxnode,label={-45:34}] () at (aux34) {};
\coordinate (aux35) at ([yshift=\base]aux25);
\node[auxnode,label={-45:35}] () at (aux35) {};
\coordinate (aux37) at ([yshift=\base]aux27);
\node[auxnode,label={-45:37}] () at (aux37) {};
\coordinate (aux45) at ([yshift=\base]aux35);
\node[auxnode,label={-45:45}] () at (aux45) {};
\coordinate (aux55) at ([yshift=\base]aux45);
\node[auxnode,label={-45:55}] () at (aux55) {};
\ExtractX{$(aux21)$}
\ExtractY{$(aux55)$}
\coordinate (aux51) at (\XCoord,\YCoord);
\node[auxnode,label={-45:51}] () at (aux51) {};
\ExtractX{$(aux23)$}
\ExtractY{$(aux55)$}
\coordinate (aux53) at (\XCoord,\YCoord);
\node[auxnode,label={-45:53}] () at (aux53) {};
\ExtractX{$(aux28)$}
\ExtractY{$(aux55)$}
\coordinate (aux58) at (\XCoord,\YCoord);
\node[auxnode,label={-45:58}] () at (aux58) {};
\ExtractX{$(aux29)$}
\ExtractY{$(aux55)$}
\coordinate (aux59) at (\XCoord,\YCoord);
\node[auxnode,label={-45:59}] () at (aux59) {};
\coordinate (aux68) at ([yshift=\base]aux58);
\node[auxnode,label={-45:68}] () at (aux68) {};
\end{scope}
\begin{scope}
\node[opnode,circle,opacity=0] (f53) at (aux53) {};
\node[opnode,circle,opacity=0] (u55) at (aux55) {};
% forget gate
{
\draw[emph] (aux21) -- (aux23) -- (aux33);
\draw[-latex,emph] (aux12) -- (aux22) -- (aux23) -- (f53);
\node[opnode,circle,draw=red,thick] () at (aux33) {$\sigma$};
}
\end{scope}
\begin{scope}
\node[wordnode,anchor=south] () at ([xshift=0.5\base]aux21) {$\mathbf{h}_{t-1}$};
\node[wordnode,anchor=west] () at (aux12) {$\mathbf{x}_t$};
\node[wordnode,anchor=south] () at ([xshift=0.5\base]aux51) {$\mathbf{c}_{t-1}$};
\end{scope}
\node[ ] (o27) at (aux27) { };
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!30!white,rounded corners=5pt,inner sep=4pt,fit=(aux22) (aux58) (u55) (o27)] (LSTM) {};
\end{pgfonlayer}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.6cm}
\tikzstyle{wordnode} = [font=\scriptsize]
\tikzstyle{auxnode} = [inner sep=0pt,outer sep=0pt,opacity=0,draw=red,fill=red,circle,minimum size=3pt]
\tikzstyle{opnode} = [inner sep=0pt,outer sep=0pt,draw,fill=green!30!white,font=\scriptsize,minimum size=10pt]
\tikzstyle{standard} = [rounded corners=5pt,thick]
\tikzstyle{emph} = [rounded corners=5pt,thick,draw=red]
\tikzstyle{formulanode} = [font=\scriptsize,align=left,draw=red,rectangle,fill=red!10!white,rounded corners=2pt,drop shadow]
% Skeleton
\begin{scope}[every label/.append style={label distance=1pt,font=\tiny,inner sep=0pt,opacity=0}]
\coordinate (aux12) at (0,0);
\node[auxnode,label={-45:12}] () at (aux12) {};
\coordinate (aux22) at ([yshift=\base]aux12);
\node[auxnode,label={-45:22}] () at (aux22) {};
\coordinate (aux21) at ([xshift=-2\base]aux22);
\node[auxnode,label={-45:21}] () at (aux21) {};
\coordinate (aux23) at ([xshift=\base]aux22);
\node[auxnode,label={-45:23}] () at (aux23) {};
\coordinate (aux24) at ([xshift=\base]aux23);
\node[auxnode,label={-45:24}] () at (aux24) {};
\coordinate (aux25) at ([xshift=\base]aux24);
\node[auxnode,label={-45:25}] () at (aux25) {};
\coordinate (aux26) at ([xshift=\base]aux25);
\node[auxnode,label={-45:26}] () at (aux26) {};
\coordinate (aux27) at ([xshift=\base]aux26);
\node[auxnode,label={-45:27}] () at (aux27) {};
\coordinate (aux28) at ([xshift=\base]aux27);
\node[auxnode,label={-45:28}] () at (aux28) {};
\coordinate (aux29) at ([xshift=2\base]aux28);
\node[auxnode,label={-45:29}] () at (aux29) {};
\coordinate (aux33) at ([yshift=\base]aux23);
\node[auxnode,label={-45:33}] () at (aux33) {};
\coordinate (aux34) at ([yshift=\base]aux24);
\node[auxnode,label={-45:34}] () at (aux34) {};
\coordinate (aux35) at ([yshift=\base]aux25);
\node[auxnode,label={-45:35}] () at (aux35) {};
\coordinate (aux37) at ([yshift=\base]aux27);
\node[auxnode,label={-45:37}] () at (aux37) {};
\coordinate (aux45) at ([yshift=\base]aux35);
\node[auxnode,label={-45:45}] () at (aux45) {};
\coordinate (aux55) at ([yshift=\base]aux45);
\node[auxnode,label={-45:55}] () at (aux55) {};
\ExtractX{$(aux21)$}
\ExtractY{$(aux55)$}
\coordinate (aux51) at (\XCoord,\YCoord);
\node[auxnode,label={-45:51}] () at (aux51) {};
\ExtractX{$(aux23)$}
\ExtractY{$(aux55)$}
\coordinate (aux53) at (\XCoord,\YCoord);
\node[auxnode,label={-45:53}] () at (aux53) {};
\ExtractX{$(aux28)$}
\ExtractY{$(aux55)$}
\coordinate (aux58) at (\XCoord,\YCoord);
\node[auxnode,label={-45:58}] () at (aux58) {};
\ExtractX{$(aux29)$}
\ExtractY{$(aux55)$}
\coordinate (aux59) at (\XCoord,\YCoord);
\node[auxnode,label={-45:59}] () at (aux59) {};
\coordinate (aux68) at ([yshift=\base]aux58);
\node[auxnode,label={-45:68}] () at (aux68) {};
\end{scope}
\begin{scope}
\node[opnode,circle,opacity=0] (f53) at (aux53) {};
\node[opnode,circle,opacity=0] (u55) at (aux55) {};
% forget gate
{
\draw[emph] (aux21) -- (aux23) -- (aux33);
\draw[-latex,emph] (aux12) -- (aux22) -- (aux23) -- (f53);
\node[opnode,circle,draw=red,thick] () at (aux33) {$\sigma$};
}
{
\draw[standard] (aux21) -- (aux23) -- (aux33);
\draw[-latex,standard] (aux12) -- (aux22) -- (aux23) -- (f53);
\node[opnode,circle] () at (aux33) {$\sigma$};
}
% input gate
{
\node[opnode,circle] (i45) at (aux45) {};
\draw[-latex,emph] (aux21) -- (aux24) |- (i45);
\draw[-latex,emph] (aux21) -- (aux25) -- (u55);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,circle,draw=red,thick] (i45) at (aux45) {X};
}
\end{scope}
\begin{scope}
\node[wordnode,anchor=south] () at ([xshift=0.5\base]aux21) {$\mathbf{h}_{t-1}$};
\node[wordnode,anchor=west] () at (aux12) {$\mathbf{x}_t$};
\node[wordnode,anchor=south] () at ([xshift=0.5\base]aux51) {$\mathbf{c}_{t-1}$};
\end{scope}
\node[ ] (o27) at (aux27) { };
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!30!white,rounded corners=5pt,inner sep=4pt,fit=(aux22) (aux58) (u55) (o27)] (LSTM) {};
\end{pgfonlayer}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.6cm}
\tikzstyle{wordnode} = [font=\scriptsize]
\tikzstyle{auxnode} = [inner sep=0pt,outer sep=0pt,opacity=0,draw=red,fill=red,circle,minimum size=3pt]
\tikzstyle{opnode} = [inner sep=0pt,outer sep=0pt,draw,fill=green!30!white,font=\scriptsize,minimum size=10pt]
\tikzstyle{standard} = [rounded corners=5pt,thick]
\tikzstyle{emph} = [rounded corners=5pt,thick,draw=red]
\tikzstyle{formulanode} = [font=\scriptsize,align=left,draw=red,rectangle,fill=red!10!white,rounded corners=2pt,drop shadow]
% Skeleton
\begin{scope}[every label/.append style={label distance=1pt,font=\tiny,inner sep=0pt,opacity=0}]
\coordinate (aux12) at (0,0);
\node[auxnode,label={-45:12}] () at (aux12) {};
\coordinate (aux22) at ([yshift=\base]aux12);
\node[auxnode,label={-45:22}] () at (aux22) {};
\coordinate (aux21) at ([xshift=-2\base]aux22);
\node[auxnode,label={-45:21}] () at (aux21) {};
\coordinate (aux23) at ([xshift=\base]aux22);
\node[auxnode,label={-45:23}] () at (aux23) {};
\coordinate (aux24) at ([xshift=\base]aux23);
\node[auxnode,label={-45:24}] () at (aux24) {};
\coordinate (aux25) at ([xshift=\base]aux24);
\node[auxnode,label={-45:25}] () at (aux25) {};
\coordinate (aux26) at ([xshift=\base]aux25);
\node[auxnode,label={-45:26}] () at (aux26) {};
\coordinate (aux27) at ([xshift=\base]aux26);
\node[auxnode,label={-45:27}] () at (aux27) {};
\coordinate (aux28) at ([xshift=\base]aux27);
\node[auxnode,label={-45:28}] () at (aux28) {};
\coordinate (aux29) at ([xshift=2\base]aux28);
\node[auxnode,label={-45:29}] () at (aux29) {};
\coordinate (aux33) at ([yshift=\base]aux23);
\node[auxnode,label={-45:33}] () at (aux33) {};
\coordinate (aux34) at ([yshift=\base]aux24);
\node[auxnode,label={-45:34}] () at (aux34) {};
\coordinate (aux35) at ([yshift=\base]aux25);
\node[auxnode,label={-45:35}] () at (aux35) {};
\coordinate (aux37) at ([yshift=\base]aux27);
\node[auxnode,label={-45:37}] () at (aux37) {};
\coordinate (aux45) at ([yshift=\base]aux35);
\node[auxnode,label={-45:45}] () at (aux45) {};
\coordinate (aux55) at ([yshift=\base]aux45);
\node[auxnode,label={-45:55}] () at (aux55) {};
\ExtractX{$(aux21)$}
\ExtractY{$(aux55)$}
\coordinate (aux51) at (\XCoord,\YCoord);
\node[auxnode,label={-45:51}] () at (aux51) {};
\ExtractX{$(aux23)$}
\ExtractY{$(aux55)$}
\coordinate (aux53) at (\XCoord,\YCoord);
\node[auxnode,label={-45:53}] () at (aux53) {};
\ExtractX{$(aux28)$}
\ExtractY{$(aux55)$}
\coordinate (aux58) at (\XCoord,\YCoord);
\node[auxnode,label={-45:58}] () at (aux58) {};
\ExtractX{$(aux29)$}
\ExtractY{$(aux55)$}
\coordinate (aux59) at (\XCoord,\YCoord);
\node[auxnode,label={-45:59}] () at (aux59) {};
\coordinate (aux68) at ([yshift=\base]aux58);
\node[auxnode,label={-45:68}] () at (aux68) {};
\end{scope}
\begin{scope}
\node[opnode,circle,opacity=0] (f53) at (aux53) {};
\node[opnode,circle,opacity=0] (u55) at (aux55) {};
% forget gate
{
\draw[emph] (aux21) -- (aux23) -- (aux33);
\draw[-latex,emph] (aux12) -- (aux22) -- (aux23) -- (f53);
\node[opnode,circle,draw=red,thick] () at (aux33) {$\sigma$};
}
{
\draw[standard] (aux21) -- (aux23) -- (aux33);
\draw[-latex,standard] (aux12) -- (aux22) -- (aux23) -- (f53);
\node[opnode,circle] () at (aux33) {$\sigma$};
}
% input gate
{
\node[opnode,circle] (i45) at (aux45) {};
\draw[-latex,emph] (aux21) -- (aux24) |- (i45);
\draw[-latex,emph] (aux21) -- (aux25) -- (u55);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,circle,draw=red,thick] (i45) at (aux45) {X};
}
{
\node[opnode,circle] (i45) at (aux45) {};
\draw[-latex,standard] (aux21) -- (aux24) |- (i45);
\draw[-latex,standard] (aux21) -- (aux25) -- (u55);
\node[opnode,circle] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,circle] (i45) at (aux45) {X};
}
% cell update
{
\draw[-latex,emph] (aux51) -- (aux59);
\node[opnode,circle,draw=red,thick] (f53) at (aux53) {X};
\node[opnode,circle,draw=red,thick] (u55) at (aux55) {\textbf{+}};
}
\end{scope}
\begin{scope}
\node[wordnode,anchor=south] () at ([xshift=0.5\base]aux21) {$\mathbf{h}_{t-1}$};
\node[wordnode,anchor=west] () at (aux12) {$\mathbf{x}_t$};
\node[wordnode,anchor=south] () at ([xshift=0.5\base]aux51) {$\mathbf{c}_{t-1}$};
{
\node[wordnode,anchor=south] () at ([xshift=-0.5\base]aux59) {$\mathbf{c}_{t}$};
}
\end{scope}
\node[ ] (o27) at (aux27) { };
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!30!white,rounded corners=5pt,inner sep=4pt,fit=(aux22) (aux58) (u55) (o27)] (LSTM) {};
\end{pgfonlayer}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.6cm}
\tikzstyle{wordnode} = [font=\scriptsize]
\tikzstyle{auxnode} = [inner sep=0pt,outer sep=0pt,opacity=0,draw=red,fill=red,circle,minimum size=3pt]
\tikzstyle{opnode} = [inner sep=0pt,outer sep=0pt,draw,fill=green!30!white,font=\scriptsize,minimum size=10pt]
\tikzstyle{standard} = [rounded corners=5pt,thick]
\tikzstyle{emph} = [rounded corners=5pt,thick,draw=red]
\tikzstyle{formulanode} = [font=\scriptsize,align=left,draw=red,rectangle,fill=red!10!white,rounded corners=2pt,drop shadow]
% Skeleton
\begin{scope}[every label/.append style={label distance=1pt,font=\tiny,inner sep=0pt,opacity=0}]
\coordinate (aux12) at (0,0);
\node[auxnode,label={-45:12}] () at (aux12) {};
\coordinate (aux22) at ([yshift=\base]aux12);
\node[auxnode,label={-45:22}] () at (aux22) {};
\coordinate (aux21) at ([xshift=-2\base]aux22);
\node[auxnode,label={-45:21}] () at (aux21) {};
\coordinate (aux23) at ([xshift=\base]aux22);
\node[auxnode,label={-45:23}] () at (aux23) {};
\coordinate (aux24) at ([xshift=\base]aux23);
\node[auxnode,label={-45:24}] () at (aux24) {};
\coordinate (aux25) at ([xshift=\base]aux24);
\node[auxnode,label={-45:25}] () at (aux25) {};
\coordinate (aux26) at ([xshift=\base]aux25);
\node[auxnode,label={-45:26}] () at (aux26) {};
\coordinate (aux27) at ([xshift=\base]aux26);
\node[auxnode,label={-45:27}] () at (aux27) {};
\coordinate (aux28) at ([xshift=\base]aux27);
\node[auxnode,label={-45:28}] () at (aux28) {};
\coordinate (aux29) at ([xshift=2\base]aux28);
\node[auxnode,label={-45:29}] () at (aux29) {};
\coordinate (aux33) at ([yshift=\base]aux23);
\node[auxnode,label={-45:33}] () at (aux33) {};
\coordinate (aux34) at ([yshift=\base]aux24);
\node[auxnode,label={-45:34}] () at (aux34) {};
\coordinate (aux35) at ([yshift=\base]aux25);
\node[auxnode,label={-45:35}] () at (aux35) {};
\coordinate (aux37) at ([yshift=\base]aux27);
\node[auxnode,label={-45:37}] () at (aux37) {};
\coordinate (aux45) at ([yshift=\base]aux35);
\node[auxnode,label={-45:45}] () at (aux45) {};
\coordinate (aux55) at ([yshift=\base]aux45);
\node[auxnode,label={-45:55}] () at (aux55) {};
\ExtractX{$(aux21)$}
\ExtractY{$(aux55)$}
\coordinate (aux51) at (\XCoord,\YCoord);
\node[auxnode,label={-45:51}] () at (aux51) {};
\ExtractX{$(aux23)$}
\ExtractY{$(aux55)$}
\coordinate (aux53) at (\XCoord,\YCoord);
\node[auxnode,label={-45:53}] () at (aux53) {};
\ExtractX{$(aux28)$}
\ExtractY{$(aux55)$}
\coordinate (aux58) at (\XCoord,\YCoord);
\node[auxnode,label={-45:58}] () at (aux58) {};
\ExtractX{$(aux29)$}
\ExtractY{$(aux55)$}
\coordinate (aux59) at (\XCoord,\YCoord);
\node[auxnode,label={-45:59}] () at (aux59) {};
\coordinate (aux68) at ([yshift=\base]aux58);
\node[auxnode,label={-45:68}] () at (aux68) {};
\end{scope}
\begin{scope}
\node[opnode,circle,opacity=0] (f53) at (aux53) {};
\node[opnode,circle,opacity=0] (u55) at (aux55) {};
% forget gate
{
\draw[emph] (aux21) -- (aux23) -- (aux33);
\draw[-latex,emph] (aux12) -- (aux22) -- (aux23) -- (f53);
\node[opnode,circle,draw=red,thick] () at (aux33) {$\sigma$};
}
{
\draw[standard] (aux21) -- (aux23) -- (aux33);
\draw[-latex,standard] (aux12) -- (aux22) -- (aux23) -- (f53);
\node[opnode,circle] () at (aux33) {$\sigma$};
}
% input gate
{
\node[opnode,circle] (i45) at (aux45) {};
\draw[-latex,emph] (aux21) -- (aux24) |- (i45);
\draw[-latex,emph] (aux21) -- (aux25) -- (u55);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,circle,draw=red,thick] (i45) at (aux45) {X};
}
{
\node[opnode,circle] (i45) at (aux45) {};
\draw[-latex,standard] (aux21) -- (aux24) |- (i45);
\draw[-latex,standard] (aux21) -- (aux25) -- (u55);
\node[opnode,circle] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,circle] (i45) at (aux45) {X};
}
% cell update
{
\draw[-latex,emph] (aux51) -- (aux59);
\node[opnode,circle,draw=red,thick] (f53) at (aux53) {X};
\node[opnode,circle,draw=red,thick] (u55) at (aux55) {\textbf{+}};
}
{
\draw[-latex,standard] (aux51) -- (aux59);
\node[opnode,circle] (f53) at (aux53) {X};
\node[opnode,circle] (u55) at (aux55) {\textbf{+}};
}
% output gate
{
\node[opnode,circle,draw=red,thick] (o27) at (aux27) {X};
\draw[-latex,emph] (u55) -| (o27);
\draw[-latex,emph] (aux21) -- (o27);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux26) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux37) {$\mathrm{tanh}$};
\draw[-latex,emph] (o27) -- (aux29);
\draw[-latex,emph] (o27) -| (aux68);
}
\end{scope}
\begin{scope}
\node[wordnode,anchor=south] () at ([xshift=0.5\base]aux21) {$\mathbf{h}_{t-1}$};
\node[wordnode,anchor=west] () at (aux12) {$\mathbf{x}_t$};
\node[wordnode,anchor=south] () at ([xshift=0.5\base]aux51) {$\mathbf{c}_{t-1}$};
{
\node[wordnode,anchor=south] () at ([xshift=-0.5\base]aux59) {$\mathbf{c}_{t}$};
}
{
\node[wordnode,anchor=east] () at (aux68) {$\mathbf{h}_{t}$};
\node[wordnode,anchor=south] () at ([xshift=-0.5\base]aux29) {$\mathbf{h}_{t}$};
}
\end{scope}
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!30!white,rounded corners=5pt,inner sep=4pt,fit=(aux22) (aux58) (u55) (o27)] (LSTM) {};
\end{pgfonlayer}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{attnode} = [minimum size=1.5em,inner sep=0pt,rounded corners=1pt,draw]
\tikzstyle{srcnode} = [rotate=45,font=\small,anchor=south west]
\tikzstyle{tgtnode} = [left,font=\small,anchor=north east]
\tikzstyle{masknode} = [minimum size=5.8em,inner sep=0pt,rounded corners=1pt,draw]
\tikzstyle{elementnode} = [rectangle,text=white,anchor=center]
%\setlength{\hnode}{1.0cm}
%\node [anchor=west,attnode] (node1) at (0,0) {\tiny{}};
%\node [anchor=west,attnode] (node2) at ([xshift=1em]node1.east) {\tiny{}};
{
\foreach \i / \j / \c in
{0/5/0.25, 1/5/0.15, 2/5/0.15, 3/5/0.35, 4/5/0.25, 5/5/0.15,
0/4/0.15, 1/4/0.25, 2/4/0.2, 3/4/0.30, 4/4/0.15, 5/4/0.15,
0/3/0.15, 1/3/0.15, 2/3/0.5, 3/3/0.25, 4/3/0.15, 5/3/0.25,
0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.25, 5/2/0.3,
0/1/0.25, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.5, 5/1/0.15,
0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.25, 5/0/0.40}
\node[elementnode,minimum size=0.6*1.0cm*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*1.0cm*\i-5.4*0.5*1.0cm,0.5*1.0cm*\j-1.05*1.0cm) {};
% source
\node[srcnode] (src1) at (-5.4*0.5*1.0cm,-1.05*1.0cm+5.5*0.5*1.0cm) {\scriptsize{Have}};
\node[srcnode] (src2) at ([xshift=0.5cm]src1.south west) {\scriptsize{you}};
\node[srcnode] (src3) at ([xshift=0.5cm]src2.south west) {\scriptsize{learned}};
\node[srcnode] (src4) at ([xshift=0.5cm]src3.south west) {\scriptsize{nothing}};
\node[srcnode] (src5) at ([xshift=0.5cm]src4.south west) {\scriptsize{?}};
\node[srcnode] (src6) at ([xshift=0.5cm]src5.south west) {\scriptsize{EOS}};
% target
\node[tgtnode] (tgt1) at (-6.0*0.5*1.0cm,-1.05*1.0cm+5.5*0.5*1.0cm) {\scriptsize{Have}};
\node[tgtnode] (tgt2) at ([yshift=-0.5cm]tgt1.north east) {\scriptsize{you}};
\node[tgtnode] (tgt3) at ([yshift=-0.5cm]tgt2.north east) {\scriptsize{learned}};
\node[tgtnode] (tgt4) at ([yshift=-0.5cm]tgt3.north east) {\scriptsize{nothing}};
\node[tgtnode] (tgt5) at ([yshift=-0.5cm]tgt4.north east) {\scriptsize{?}};
\node[tgtnode] (tgt6) at ([yshift=-0.5cm]tgt5.north east) {\scriptsize{EOS}};
{
\filldraw [fill=blue!20,draw,thick,fill opacity=0.85] ([xshift=-0.9em,yshift=0.5em]a15.north west) -- ([xshift=0.5em,yshift=-0.9em]a51.south east) -- ([xshift=0.5em,yshift=0.5em]a55.north east) -- ([xshift=-0.9em,yshift=0.5em]a15.north west);
\node[anchor=west] (labelmask) at ([xshift=0.3em,yshift=0.5em]a23.north east) {Masked};
}
{
\foreach \i / \j / \c in
{0/5/0.25,
0/4/0.15, 1/4/0.25,
0/3/0.15, 1/3/0.15, 2/3/0.5,
0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15,
0/1/0.25, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.5,
0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.25, 5/0/0.40}
\node[elementnode,minimum size=0.6*1.0cm*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*1.0cm*\i+6*0.5*1.0cm,0.5*1.0cm*\j-1.05*1.0cm) {};
% source
\node[srcnode] (src1) at (6*0.5*1.0cm,-1.05*1.0cm+5.5*0.5*1.0cm) {\scriptsize{Have}};
\node[srcnode] (src2) at ([xshift=0.5cm]src1.south west) {\scriptsize{you}};
\node[srcnode] (src3) at ([xshift=0.5cm]src2.south west) {\scriptsize{learned}};
\node[srcnode] (src4) at ([xshift=0.5cm]src3.south west) {\scriptsize{nothing}};
\node[srcnode] (src5) at ([xshift=0.5cm]src4.south west) {\scriptsize{?}};
\node[srcnode] (src6) at ([xshift=0.5cm]src5.south west) {\scriptsize{EOS}};
% target
\node[tgtnode] (tgt1) at (5.4*0.5*1.0cm,-1.05*1.0cm+5.5*0.5*1.0cm) {\scriptsize{Have}};
\node[tgtnode] (tgt2) at ([yshift=-0.5cm]tgt1.north east) {\scriptsize{you}};
\node[tgtnode] (tgt3) at ([yshift=-0.5cm]tgt2.north east) {\scriptsize{learned}};
\node[tgtnode] (tgt4) at ([yshift=-0.5cm]tgt3.north east) {\scriptsize{nothing}};
\node[tgtnode] (tgt5) at ([yshift=-0.5cm]tgt4.north east) {\scriptsize{?}};
\node[tgtnode] (tgt6) at ([yshift=-0.5cm]tgt5.north east) {\scriptsize{EOS}};
}
}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%-------------------------------------------
\begin{tikzpicture}
%\setlength{\hnode}{1.2cm}
\tikzstyle{elementnode} = [rectangle,text=white,anchor=center]
\tikzstyle{srcnode} = [rotate=45,font=\small,anchor=south west]
\tikzstyle{tgtnode} = [left,font=\small,anchor=north east]
\tikzstyle{alignmentnode} = [rectangle,draw,minimum height=3.6cm,minimum width=0.36cm]
\tikzstyle{probnode} = [fill=blue!30,minimum width=0.4cm]
\tikzstyle{labelnode} = [above]
% alignment matrix
\begin{scope}[scale=0.9,yshift=0.12in]
\foreach \i / \j / \c in
{0/7/0.2, 1/7/0.45, 2/7/0.15, 3/7/0.15, 4/7/0.15, 5/7/0.15,
0/6/0.35, 1/6/0.45, 2/6/0.15, 3/6/0.15, 4/6/0.15, 5/6/0.15,
0/5/0.25, 1/5/0.15, 2/5/0.15, 3/5/0.35, 4/5/0.15, 5/5/0.15,
0/4/0.15, 1/4/0.25, 2/4/0.2, 3/4/0.30, 4/4/0.15, 5/4/0.15,
0/3/0.15, 1/3/0.15, 2/3/0.8, 3/3/0.25, 4/3/0.15, 5/3/0.25,
0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.25, 5/2/0.3,
0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.8, 5/1/0.15,
0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.25, 5/0/0.60}
\node[elementnode,minimum size=0.6*1.2cm*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*1.2cm*\i-5.4*0.5*1.2cm,0.5*1.2cm*\j-1.05*1.2cm) {};
%attention score labels
\node[align=center] (l17) at (a17) {\scriptsize{{\color{white} .4}}};
\node[align=center] (l26) at (a06) {\scriptsize{{\color{white} .3}}};
\node[align=center] (l26) at (a16) {\scriptsize{{\color{white} .4}}};
\node[align=center] (l17) at (a35) {\scriptsize{{\color{white} .3}}};
\node[align=center] (l17) at (a34) {\tiny{{\color{white} .3}}};
\node[align=center] (l17) at (a23) {\small{{\color{white} .8}}};
\node[align=center] (l17) at (a41) {\small{{\color{white} .8}}};
\node[align=center] (l17) at (a50) {\small{{\color{white} .7}}};
% source
\node[srcnode] (src1) at (-5.4*0.5*1.2cm,-1.05*1.2cm+7.5*0.5*1.2cm) {\scriptsize{Have}};
\node[srcnode] (src2) at ([xshift=0.6cm]src1.south west) {\scriptsize{you}};
\node[srcnode] (src3) at ([xshift=0.6cm]src2.south west) {\scriptsize{learned}};
\node[srcnode] (src4) at ([xshift=0.6cm]src3.south west) {\scriptsize{nothing}};
\node[srcnode] (src5) at ([xshift=0.6cm]src4.south west) {\scriptsize{?}};
\node[srcnode] (src6) at ([xshift=0.6cm]src5.south west) {\scriptsize{EOS}};
% target
\node[tgtnode] (tgt1) at (-6.0*0.5*1.2cm,-1.05*1.2cm+7.5*0.5*1.2cm) {\scriptsize{}};
\node[tgtnode] (tgt2) at ([yshift=-0.6cm]tgt1.north east) {\scriptsize{什么}};
\node[tgtnode] (tgt3) at ([yshift=-0.6cm]tgt2.north east) {\scriptsize{}};
\node[tgtnode] (tgt4) at ([yshift=-0.6cm]tgt3.north east) {\scriptsize{}};
\node[tgtnode] (tgt5) at ([yshift=-0.6cm]tgt4.north east) {\scriptsize{}};
\node[tgtnode] (tgt6) at ([yshift=-0.6cm]tgt5.north east) {\scriptsize{}};
\node[tgtnode] (tgt7) at ([yshift=-0.6cm]tgt6.north east) {\scriptsize{?}};
\node[tgtnode] (tgt8) at ([yshift=-0.6cm]tgt7.north east) {\scriptsize{EOS}};
\end{scope}
\end{tikzpicture}
%-------------------------------------------
\ No newline at end of file
%--------------------------------------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9cm}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=0.5\base,draw,inner sep=0pt,outer sep=0pt]
\tikzstyle{wordnode} = [font=\tiny]
% RNN translation model
\begin{scope}[local bounding box=RNNMT]
% RNN Encoder
\coordinate (eemb0) at (0,0);
\foreach \x [count=\y from 0] in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc\x) at ([yshift=0.5\base]eemb\x.north) {};
\node[wordnode,left=0.4\base of enc1] (init) {$0$};
\node[wordnode,below=0pt of eemb1] () {};
\node[wordnode,below=0pt of eemb2] () {知道};
\node[wordnode,below=0pt of eemb3] () {};
\node[wordnode,below=0pt of eemb4] () {北京站};
\node[wordnode,below=0pt of eemb5] () {};
\node[wordnode,below=0pt of eemb6] () {};
\node[wordnode,below=0pt of eemb7] () {怎么};
\node[wordnode,below=0pt of eemb8] () {};
\node[wordnode,below=0pt of eemb9] () {};
\node[wordnode,below=0pt of eemb10] () {$\langle$eos$\rangle$};
% RNN Decoder
\foreach \x in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=2\base]enc\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec\x) at ([yshift=0.5\base]demb\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.5\base]dec\x.north) {};
% Decoder input words
\node[wordnode,below=0pt of demb1] (decwordin) {$\langle$sos$\rangle$};
\ExtractX{$(demb2.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Do};
\ExtractX{$(demb3.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
\ExtractX{$(demb4.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
\ExtractX{$(demb5.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {the};
\ExtractX{$(demb6.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {way};
\ExtractX{$(demb7.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {to};
\ExtractX{$(demb8.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Beijing};
\ExtractX{$(demb9.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Railway};
\ExtractX{$(demb10.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Station};
% Decoder output words
\node[wordnode,above=0pt of softmax1] (decwordout) {Do};
\ExtractX{$(softmax2.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
\ExtractX{$(softmax3.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
\ExtractX{$(softmax4.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {the};
\ExtractX{$(softmax5.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {way};
\ExtractX{$(softmax6.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {to};
\ExtractX{$(softmax7.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Beijing};
\ExtractX{$(softmax8.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Railway};
\ExtractX{$(softmax9.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Station};
\ExtractX{$(softmax10.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {$\langle$eos$\rangle$};
% Connections
\draw[-latex'] (init.east) to (enc1.west);
\foreach \x in {1,2,...,10}
\draw[-latex'] (eemb\x) to (enc\x);
\foreach \x in {1,2,...,10}
\draw[-latex'] (demb\x) to (dec\x);
\foreach \x in {1,2,...,10}
\draw[-latex'] (dec\x.north) to ([yshift=0.5\base]dec\x.north);
\foreach \x [count=\y from 2] in {1,2,...,9}
{
\draw[-latex'] (enc\x.east) to (enc\y.west);
\draw[-latex'] (dec\x.east) to (dec\y.west);
}
\coordinate (bridge) at ([yshift=-1.2\base]demb2);
\draw[-latex'] (enc10.north) .. controls +(north:\base) and +(east:1.5\base) .. (bridge) .. controls +(west:2.5\base) and +(west:0.6\base) .. (dec1.west);
\end{scope}
% legend
\begin{scope}[shift={(10\base,2.5\base)}]
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,label={[label distance=3pt,font=\scriptsize]0:词嵌入层}] (emb) at (0,0) {};
\node[rnnnode,fill=blue!30!white,anchor=north west,label={[label distance=3pt,font=\scriptsize]0:循环单元}] (rnn) at ([yshift=2\base]emb.south west) {};
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=north west,label={[label distance=3pt,font=\scriptsize]0:输出层}] (softmax) at ([yshift=2\base]rnn.south west) {};
\node [anchor=north west] (softmax2) at ([xshift=0.6\base]softmax.south west) {\scriptsize{Softmax}};
\node [anchor=north west] (rnn2) at ([xshift=0.6\base]rnn.south west) {\scriptsize{LSTM}};
\node [anchor=west] (reprlabel) at ([xshift=1em]enc10.east) {\scriptsize{句子表示}};
\draw [->,dashed] (reprlabel.west) -- ([xshift=0.1em]enc10.east);
\node [rnnnode,fill=purple!30!white] at (enc10) {};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white] (Linear0) at (0,0) {\tiny{Linear}};
\node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt] (Linear01) at ([shift={(-0.2em,-0.2em)}]Linear0.south west) {\tiny{Linear}};
\node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear02) at ([shift={(-0.2em,-0.2em)}]Linear01.south west) {\tiny{Linear}};
\node [anchor=north] (Q) at ([xshift=0em,yshift=-1em]Linear02.south) {\footnotesize{$\mathbf{Q}$}};
\node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white] (Linear1) at ([xshift=1.5em]Linear0.east) {\tiny{Linear}};
\node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt] (Linear11) at ([shift={(-0.2em,-0.2em)}]Linear1.south west) {\tiny{Linear}};
\node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear12) at ([shift={(-0.2em,-0.2em)}]Linear11.south west) {\tiny{Linear}};
\node [anchor=north] (K) at ([xshift=0em,yshift=-1em]Linear12.south) {\footnotesize{$\mathbf{K}$}};
\node [anchor=west,draw=black!30,inner sep=4pt,fill=ugreen!20!white] (Linear2) at ([xshift=1.5em]Linear1.east) {\tiny{Linear}};
\node [anchor=south west,draw=black!50,fill=ugreen!20!white,draw,inner sep=4pt] (Linear21) at ([shift={(-0.2em,-0.2em)}]Linear2.south west) {\tiny{Linear}};
\node [anchor=south west,fill=ugreen!20!white,draw,inner sep=4pt] (Linear22) at ([shift={(-0.2em,-0.2em)}]Linear21.south west) {\tiny{Linear}};
\node [anchor=north] (V) at ([xshift=0em,yshift=-1em]Linear22.south) {\footnotesize{$\mathbf{V}$}};
\node [anchor=south,draw=black!30,minimum width=9em,inner sep=4pt,fill=blue!20!white] (Scale) at ([yshift=1em]Linear1.north) {\tiny{Scaled Dot-Product Attention}};
\node [anchor=south west,draw=black!50,minimum width=9em,fill=blue!20!white,draw,inner sep=4pt] (Scale1) at ([shift={(-0.2em,-0.2em)}]Scale.south west) {\tiny{Scaled Dot-Product Attention}};
\node [anchor=south west,fill=blue!20!white,draw,minimum width=9em,inner sep=4pt] (Scale2) at ([shift={(-0.2em,-0.2em)}]Scale1.south west) {\tiny{Scaled Dot-Product Attention}};
\node [anchor=south,draw,minimum width=4em,inner sep=4pt,fill=yellow!30] (Concat) at ([yshift=1em]Scale2.north) {\tiny{Concat}};
\node [anchor=south,draw,minimum width=4em,inner sep=4pt,fill=ugreen!20!white] (Linear) at ([yshift=1em]Concat.north) {\tiny{Linear}};
\draw [->] ([yshift=0.1em]Q.north) -- ([yshift=-0.1em]Linear02.south);
\draw [-,draw=black!50] ([yshift=0.1em]Q.north) -- ([xshift=0.2em,yshift=-0.1em]Linear02.south);
\draw [-,draw=black!30] ([yshift=0.1em]Q.north) -- ([xshift=0.4em,yshift=-0.1em]Linear02.south);
\draw [->] ([yshift=0.1em]K.north) -- ([yshift=-0.1em]Linear12.south);
\draw [-,draw=black!50] ([yshift=0.1em]K.north) -- ([xshift=0.2em,yshift=-0.1em]Linear12.south);
\draw [-,draw=black!30] ([yshift=0.1em]K.north) -- ([xshift=0.4em,yshift=-0.1em]Linear12.south);
\draw [->] ([yshift=0.1em]V.north) -- ([yshift=-0.1em]Linear22.south);
\draw [-,draw=black!50] ([yshift=0.1em]V.north) -- ([xshift=0.2em,yshift=-0.1em]Linear22.south);
\draw [-,draw=black!30] ([yshift=0.1em]V.north) -- ([xshift=0.4em,yshift=-0.1em]Linear22.south);
\draw [->] ([yshift=0em]Linear02.north) -- ([yshift=1em]Linear02.north);
\draw [-,draw=black!50] ([yshift=0em]Linear01.north) -- ([yshift=0.8em]Linear01.north);
\draw [-,draw=black!30] ([yshift=0em]Linear0.north) -- ([yshift=0.6em]Linear0.north);
\draw [->] ([yshift=0em]Linear12.north) -- ([yshift=1em]Linear12.north);
\draw [-,draw=black!50] ([yshift=0em]Linear11.north) -- ([yshift=0.8em]Linear11.north);
\draw [-,draw=black!30] ([yshift=0em]Linear1.north) -- ([yshift=0.6em]Linear1.north);
\draw [->] ([yshift=0em]Linear22.north) -- ([yshift=1em]Linear22.north);
\draw [-,draw=black!50] ([yshift=0em]Linear21.north) -- ([yshift=0.8em]Linear21.north);
\draw [-,draw=black!30] ([yshift=0em]Linear2.north) -- ([yshift=0.6em]Linear2.north);
\draw [->] ([yshift=0em]Scale2.north) -- ([yshift=0em]Concat.south);
\draw [-,draw=black!50] ([yshift=0em]Scale1.north) -- ([yshift=0.8em]Scale1.north);
\draw [-,draw=black!30] ([yshift=0em]Scale.north) -- ([yshift=0.6em]Scale.north);
\draw [->] ([yshift=0em]Concat.north) -- ([yshift=0em]Linear.south);
\draw [->] ([yshift=0em]Linear.north) -- ([yshift=1em]Linear.north);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
% not compatible with [scale=?]
%-----------------------------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9cm}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum height=0.5\base,minimum width=1\base,draw,inner sep=0pt,outer sep=0pt]
\tikzstyle{wordnode} = [font=\tiny]
% RNN translation model
\begin{scope}[local bounding box=RNNMT]
% RNN Encoder
\coordinate (eemb0) at (0,0);
\foreach \x [count=\y from 0] in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {\tiny{$e_x()$}};
\foreach \x in {1,2,...,3}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc\x) at ([yshift=0.3\base]eemb\x.north) {};
\node[] (enclabel1) at (enc1) {\tiny{$\textbf{h}_{m-2}$}};
\node[] (enclabel2) at (enc2) {\tiny{$\textbf{h}_{m-1}$}};
\node[rnnnode,fill=purple!30!white] (enclabel3) at (enc3) {\tiny{$\textbf{h}_{m}$}};
\node[wordnode,left=0.4\base of enc1] (init1) {$\cdots$};
\node[wordnode,left=0.4\base of eemb1] (init2) {$\cdots$};
\node[wordnode,below=0pt of eemb1] () {};
\node[wordnode,below=0pt of eemb2] () {};
\node[wordnode,below=0pt of eemb3] () {$\langle$eos$\rangle$};
% RNN Decoder
\foreach \x in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=\base]enc\x.north) {\tiny{$e_y()$}};
\foreach \x in {1,2,...,3}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec\x) at ([yshift=0.3\base]demb\x.north) {{\tiny{$\textbf{s}_\x$}}};
\foreach \x in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.3\base]dec\x.north) {\tiny{Softmax}};
\node[wordnode,right=0.4\base of demb3] (end1) {$\cdots$};
\node[wordnode,right=0.4\base of dec3] (end2) {$\cdots$};
\node[wordnode,right=0.4\base of softmax3] (end3) {$\cdots$};
% Decoder input words
\node[wordnode,below=0pt of demb1] (decwordin) {$\langle$sos$\rangle$};
\ExtractX{$(demb2.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Do};
\ExtractX{$(demb3.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
% Decoder output words
\node[wordnode,above=0pt of softmax1] (decwordout) {Do};
\ExtractX{$(softmax2.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
\ExtractX{$(softmax3.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
% Connections
\draw[-latex'] (init1.east) to (enc1.west);
\draw[-latex'] (dec3.east) to (end2.west);
\foreach \x in {1,2,...,3}
\draw[-latex'] (eemb\x) to (enc\x);
\foreach \x in {1,2,...,3}
\draw[-latex'] (demb\x) to (dec\x);
\foreach \x in {1,2,...,3}
\draw[-latex'] (dec\x.north) to (softmax\x.south);
\foreach \x [count=\y from 2] in {1,2}
{
\draw[-latex'] (enc\x.east) to (enc\y.west);
\draw[-latex'] (dec\x.east) to (dec\y.west);
}
\coordinate (bridge) at ([yshift=0.4\base]enc2.north west);
\draw[-latex'] (enc3.north) .. controls +(north:0.3\base) and +(east:\base) .. (bridge) .. controls +(west:2.7\base) and +(west:0.3\base) .. (dec1.west);
\end{scope}
\begin{scope}
\coordinate (start) at (8.5\base,0.1\base);
\node [anchor=center,minimum width=5.7em,minimum height=1.3em,draw,rounded corners=0.3em] (hidden) at (start) {};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!20] (cell01) at ([xshift=0.2em]hidden.west) {\scriptsize{.2}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!10] (cell02) at (cell01.east) {\scriptsize{-1}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=white] (cell03) at (cell02.east) {\scriptsize{$\cdots$}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!50] (cell04) at (cell03.east) {\scriptsize{5}};
{
\node [anchor=south,minimum width=10.9em,minimum height=1.3em,draw,rounded corners=0.3em] (target) at ([yshift=1.5em]hidden.north) {};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!10] (cell11) at ([xshift=0.2em]target.west) {\scriptsize{-2}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!10] (cell12) at (cell11.east) {\scriptsize{-1}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!30] (cell13) at (cell12.east) {\scriptsize{.7}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=white] (cell14) at (cell13.east) {\scriptsize{$\cdots$}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!70] (cell15) at (cell14.east) {\scriptsize{6}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!10] (cell16) at (cell15.east) {\scriptsize{-3}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!10] (cell17) at (cell16.east) {\scriptsize{-1}};
\node [anchor=west,minimum width=1em,minimum size=1em,fill=ugreen!20] (cell18) at (cell17.east) {\scriptsize{.2}};
}
{
\node [anchor=south,minimum width=1em,minimum height=0.2em,fill=ublue!80,inner sep=0pt] (label1) at ([yshift=2.5em]cell11.north) {};
\node [anchor=west,rotate=90,font=\tiny] (w1) at (label1.north) {$\langle$eos$\rangle$};
\node [anchor=south,minimum width=1em,minimum height=0.3em,fill=ublue!80,inner sep=0pt] (label2) at ([yshift=2.5em]cell12.north) {};
\node [anchor=west,rotate=90,font=\tiny] (w2) at (label2.north) {$\langle$sos$\rangle$};
\node [anchor=south,minimum width=1em,minimum height=0.5em,fill=ublue!80,inner sep=0pt] (label3) at ([yshift=2.5em]cell13.north) {};
\node [anchor=west,rotate=90,font=\tiny] (w3) at (label3.north) {Do};
\node [anchor=south,font=\scriptsize] (w4) at ([yshift=2.5em]cell14.north) {$\cdots$};
\node [anchor=south,minimum width=1em,minimum height=1em,fill=ublue!80,inner sep=0pt] (label5) at ([yshift=2.5em]cell15.north) {};
{
\node [anchor=west,rotate=90,font=\tiny] (w5) at (label5.north) {{\color{red} know}};
}
\node [anchor=south,minimum width=1em,minimum height=0.1em,fill=ublue!80,inner sep=0pt] (label6) at ([yshift=2.5em]cell16.north) {};
\node [anchor=west,rotate=90,font=\tiny] (w6) at (label6.north) {you};
\node [anchor=south,minimum width=1em,minimum height=0.3em,fill=ublue!80,inner sep=0pt] (label7) at ([yshift=2.5em]cell17.north) {};
\node [anchor=west,rotate=90,font=\tiny] (w7) at (label7.north) {?};
\node [anchor=south,minimum width=1em,minimum height=0.4em,fill=ublue!80,inner sep=0pt] (label8) at ([yshift=2.5em]cell18.north) {};
\node [anchor=west,rotate=90,font=\tiny] (w8) at (label8.north) {have};
}
{
\filldraw [fill=red!20,draw=white] (target.south west) -- (target.south east) -- ([xshift=-0.2em,yshift=0.1em]hidden.north east) -- ([xshift=0.2em,yshift=0.1em]hidden.north west);
\draw [->,thick] ([xshift=0.2em,yshift=0.1em]hidden.north west) -- (target.south west);
\draw [->,thick] ([xshift=-0.2em,yshift=0.1em]hidden.north east) -- (target.south east);
\node [anchor=south] () at ([yshift=0.3em]hidden.north) {\scriptsize{$\hat{s}=Ws$}};
}
{
\node [rounded corners=0.3em] (softmax) at ([yshift=1.25em]target.north) {\scriptsize{$p(\hat{s}_i)=\frac{e^{\hat{s}_i}}{\sum_j e^{\hat{s}_j}}$}};
\filldraw [fill=blue!20,draw=white] ([yshift=0.1em]cell11.north west) {[rounded corners=0.3em] -- (softmax.west)} -- (label1.south west) -- (label8.south east) {[rounded corners=0.3em] -- (softmax.east)} -- ([yshift=0.1em]cell18.north east) -- ([yshift=0.1em]cell11.north west);
\node [rounded corners=0.3em] (softmax) at ([yshift=1.25em]target.north) {\scriptsize{$p(\hat{s}_i)=\frac{e^{\hat{s}_i}}{\sum_j e^{\hat{s}_j}}$}};
}
\draw [-latex'] ([yshift=-0.3cm]hidden.south) to (hidden.south);
{
\draw [-latex'] (w5.east) to ([yshift=0.3cm]w5.east);
}
\coordinate (tmp) at ([yshift=-3pt]w5.east);
\node [draw=red,thick,densely dashed,rounded corners=3pt,inner sep=5pt,fit=(cell01) (cell11) (label1) (label8) (target) (hidden) (tmp)] (output) {};
\end{scope}
\draw [->,thick,densely dashed,red] ([yshift=-0.2em]softmax3.east) .. controls +(east:2\base) and +(west:\base) .. (output.west);
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\node [anchor=south west,fill=white,draw,inner sep=4pt,minimum width=4em,fill=blue!20!white] (MatMul) at (0,0) {\tiny{MatMul}};
\node [anchor=north] (Q1) at ([xshift=-1.4em,yshift=-1em]MatMul.south) {\footnotesize{$\mathbf{Q}$}};
\node [anchor=north] (K1) at ([xshift=1.4em,yshift=-1em]MatMul.south) {\footnotesize{$\mathbf{K}$}};
\node [anchor=south,draw,inner sep=4pt,fill=yellow!30,minimum width=2.5em] (Scale3) at ([yshift=1em]MatMul.north) {\tiny{Scale}};
\node [anchor=south,draw,inner sep=4pt,fill=purple!20,minimum width=3.5em] (Mask) at ([yshift=0.8em]Scale3.north) {\tiny{Mask(opt.)}};
\node [anchor=south,draw,inner sep=4pt,fill=ugreen!20!white] (SoftMax) at ([yshift=1em]Mask.north) {\tiny{SoftMax}};
\node [anchor=south,draw,minimum width=4em,inner sep=4pt,fill=blue!20!white] (MatMul1) at ([xshift=1.7em,yshift=1em]SoftMax.north) {\tiny{MatMul}};
\node [anchor=north] (V1) at ([xshift=2em]K1.north) {\footnotesize{$\mathbf{V}$}};
\node [anchor=north] (null) at ([yshift=0.8em]MatMul1.north) {};
\draw [->] ([yshift=0.1em]Q1.north) -- ([xshift=-1.4em,yshift=-0.1em]MatMul.south);
\draw [->] ([yshift=0.1em]K1.north) -- ([xshift=1.4em,yshift=-0.1em]MatMul.south);
\draw [->] ([yshift=0.1em]MatMul.north) -- ([yshift=-0.1em]Scale3.south);
\draw [->] ([yshift=0.1em]Scale3.north) -- ([yshift=-0.1em]Mask.south);
\draw [->] ([yshift=0.1em]Mask.north) -- ([yshift=-0.1em]SoftMax.south);
\draw [->] ([yshift=0.1em]SoftMax.north) -- ([yshift=0.9em]SoftMax.north);
\draw [->] ([yshift=0.1em]V1.north) -- ([yshift=9.1em]V1.north);
\draw [->] ([yshift=0.1em]MatMul1.north) -- ([yshift=0.8em]MatMul1.north);
{
\node [anchor=east] (line1) at ([xshift=-3em,yshift=1em]MatMul.west) {\scriptsize{自注意力机制的Query}};
\node [anchor=north west] (line2) at ([yshift=0.3em]line1.south west) {\scriptsize{Key和Value均来自同一句子}};
\node [anchor=north west] (line3) at ([yshift=0.3em]line2.south west) {\scriptsize{编码-解码注意力机制}};
\node [anchor=north west] (line4) at ([yshift=0.3em]line3.south west) {\scriptsize{与前面讲的一样}};
}
{
\node [anchor=west] (line11) at ([xshift=3em,yshift=0em]MatMul.east) {\scriptsize{Query和Key的转置}};
\node [anchor=north west] (line12) at ([yshift=0.3em]line11.south west) {\scriptsize{进行点积,得到句子内部}};
\node [anchor=north west] (line13) at ([yshift=0.3em]line12.south west) {\scriptsize{各个位置的相关性}};
}
{
\node [anchor=west] (line21) at ([yshift=5em]line11.west) {\scriptsize{相关性矩阵在训练中}};
\node [anchor=north west] (line22) at ([yshift=0.3em]line21.south west) {\scriptsize{方差变大,不利于训练}};
\node [anchor=north west] (line23) at ([yshift=0.3em]line22.south west) {\scriptsize{所以对其进行缩放}};
}
{
\node [anchor=west] (line31) at ([yshift=6em]line1.west) {\scriptsize{在编码端,对句子补齐}};
\node [anchor=north west] (line32) at ([yshift=0.3em]line31.south west) {\scriptsize{填充的部分进行屏蔽}};
\node [anchor=north west] (line33) at ([yshift=0.3em]line32.south west) {\scriptsize{解码时看不到未来的信息}};
\node [anchor=north west] (line34) at ([yshift=0.3em]line33.south west) {\scriptsize{需要对未来的信息进行屏蔽}};
}
{
\node [anchor=west] (line41) at ([yshift=4em]line21.west) {\scriptsize{用归一化的相关性打分}};
\node [anchor=north west] (line42) at ([yshift=0.3em]line41.south west) {\scriptsize{对Value进行加权求和}};
}
\begin{pgfonlayer}{background}
{
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=green!10,drop shadow,draw=ugreen] [fit = (line1) (line2) (line3) (line4)] (box1) {};
\node [rectangle,inner sep=0.1em,rounded corners=1pt,very thick,dotted,draw=ugreen] [fit = (Q1) (K1) (V1)] (box0) {};
\draw [->,dotted,very thick,ugreen] ([yshift=-1.5em,xshift=0.8em]box1.east) -- ([yshift=-1.5em,xshift=0.1em]box1.east);
}
{
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=blue!20!white,drop shadow,draw=blue] [fit = (line11) (line12) (line13)] (box2) {};
\draw [->,dotted,very thick,blue] ([yshift=1em,xshift=-2.8em]box2.west) -- ([yshift=1em,xshift=-0.1em]box2.west);
}
{
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=yellow!20,drop shadow,draw=black] [fit = (line21) (line22) (line23)] (box3) {};
\draw [->,dotted,very thick,black] ([xshift=0.1em]Scale3.east) .. controls +(east:1) and +(west:1) .. ([yshift=1.0em]box3.west) ;
}
{
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=red!10,drop shadow,draw=red] [fit = (line31) (line32) (line33) (line34)] (box4) {};
\draw [->,dotted,very thick,red] ([yshift=-1.5em,xshift=1.5em]box4.east) -- ([yshift=-1.5em,xshift=0.1em]box4.east);
}
{
\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=blue!20!white,drop shadow,draw=blue] [fit = (line41) (line42)] (box5) {};
\draw [->,dotted,very thick,blue] ([yshift=-0.3em,xshift=-1em]box5.west) -- ([yshift=-0.3em,xshift=-0.1em]box5.west);
}
\end{pgfonlayer}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{Sanode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw,fill=orange!20];
\tikzstyle{Resnode} = [minimum height=1.1em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw,fill=yellow!20];
\tikzstyle{ffnnode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw];
\tikzstyle{outputnode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw];
\tikzstyle{inputnode} = [minimum height=1.4em,minimum width=3.5em,inner sep=3pt,rounded corners=1.5pt,draw,fill=red!10];
\tikzstyle{posnode} = [minimum height=1.4em,minimum width=3.5em,inner sep=3pt,rounded corners=1.5pt,draw,fill=black!5!white];
\tikzstyle{standard} = [rounded corners=3pt]
\node [Sanode,anchor=west] (sa1) at (0,0) {\tiny{$\textbf{Self-Attention}$}};
\node [Resnode,anchor=south] (res1) at ([yshift=0.3em]sa1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn1) at ([yshift=1em]res1.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res2) at ([yshift=0.3em]ffn1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [inputnode,anchor=north west] (input1) at ([yshift=-1em]sa1.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos1) at ([yshift=-1em]sa1.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (inputs) at ([yshift=-3em]sa1.south) {\tiny{$\textbf{编码器输入: 我 很 好}$}};
\node [anchor=south] (encoder) at ([xshift=0.2em,yshift=0.6em]res2.north west) {\scriptsize{\textbf{编码器}}};
\draw [->] (sa1.north) -- (res1.south);
\draw [->] (res1.north) -- (ffn1.south);
\draw [->] (ffn1.north) -- (res2.south);
\draw [->] ([yshift=-1em]sa1.south) -- (sa1.south);
\draw [->] ([yshift=-0.3em]inputs.north) -- ([yshift=0.6em]inputs.north);
\node [Sanode,anchor=west] (sa2) at ([xshift=3em]sa1.east) {\tiny{$\textbf{Self-Attention}$}};
\node [Resnode,anchor=south] (res3) at ([yshift=0.3em]sa2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [Sanode,anchor=south] (ed1) at ([yshift=1em]res3.north) {\tiny{$\textbf{Encoder-Decoder Attention}$}};
\node [Resnode,anchor=south] (res4) at ([yshift=0.3em]ed1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn2) at ([yshift=1em]res4.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res5) at ([yshift=0.3em]ffn2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\tiny{$\textbf{解码器输入: $<$SOS$>$ I am fine}$}};
\node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}};
\node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\tiny{$\textbf{解码器输出: I am fine $<$EOS$>$ }$}};
\draw [->] (sa2.north) -- (res3.south);
\draw [->] (res3.north) -- (ed1.south);
\draw [->] (ed1.north) -- (res4.south);
\draw [->] (res4.north) -- (ffn2.south);
\draw [->] (ffn2.north) -- (res5.south);
\draw [->] (res5.north) -- (o1.south);
\draw [->] (o1.north) -- ([yshift=0.5em]o1.north);
\draw [->] ([yshift=-1em]sa2.south) -- (sa2.south);
\draw [->] ([yshift=-0.3em]outputs.north) -- ([yshift=0.6em]outputs.north);
\draw[->,standard] ([yshift=-0.5em]sa1.south) -- ([xshift=-4em,yshift=-0.5em]sa1.south) -- ([xshift=-4em,yshift=2.3em]sa1.south) -- ([xshift=-3.5em,yshift=2.3em]sa1.south);
\draw[->,standard] ([yshift=0.5em]res1.north) -- ([xshift=-4em,yshift=0.5em]res1.north) -- ([xshift=-4em,yshift=3.3em]res1.north) -- ([xshift=-3.5em,yshift=3.3em]res1.north);
\draw[->,standard] ([yshift=-0.5em]sa2.south) -- ([xshift=4em,yshift=-0.5em]sa2.south) -- ([xshift=4em,yshift=2.3em]sa2.south) -- ([xshift=3.5em,yshift=2.3em]sa2.south);
\draw[->,standard] ([yshift=0.5em]res3.north) -- ([xshift=4em,yshift=0.5em]res3.north) -- ([xshift=4em,yshift=3.3em]res3.north) -- ([xshift=3.5em,yshift=3.3em]res3.north);
\draw[->,standard] ([yshift=0.5em]res4.north) -- ([xshift=4em,yshift=0.5em]res4.north) -- ([xshift=4em,yshift=3.3em]res4.north) -- ([xshift=3.5em,yshift=3.3em]res4.north);
\draw[->,standard] (res2.north) -- ([yshift=0.5em]res2.north) -- ([xshift=5em,yshift=0.5em]res2.north) -- ([xshift=5em,yshift=-2.2em]res2.north) -- ([xshift=6.5em,yshift=-2.2em]res2.north);
%\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=ugreen!70] [fit = (sa1) (res1) (ffn1) (res2)] (box0) {};
%\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=red!60] [fit = (sa2) (res3) (res5)] (box1) {};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (res1)] (box1) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (res2)] (box2) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (res3)] (box3) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (res4)] (box4) {};
\end{pgfonlayer}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{Sanode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw,fill=orange!20];
\tikzstyle{Resnode} = [minimum height=1.1em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw,fill=yellow!20];
\tikzstyle{ffnnode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw,fill=blue!20];
\tikzstyle{outputnode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw];
\tikzstyle{inputnode} = [minimum height=1.4em,minimum width=3.5em,inner sep=3pt,rounded corners=1.5pt,draw,fill=red!10];
\tikzstyle{posnode} = [minimum height=1.4em,minimum width=3.5em,inner sep=3pt,rounded corners=1.5pt,draw,fill=black!5!white];
\tikzstyle{standard} = [rounded corners=3pt]
\node [Sanode,anchor=west] (sa1) at (0,0) {\tiny{$\textbf{Self-Attention}$}};
\node [Resnode,anchor=south] (res1) at ([yshift=0.3em]sa1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn1) at ([yshift=1em]res1.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res2) at ([yshift=0.3em]ffn1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [inputnode,anchor=north west] (input1) at ([yshift=-1em]sa1.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos1) at ([yshift=-1em]sa1.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (inputs) at ([yshift=-3em]sa1.south) {\tiny{$\textbf{编码器输入: 我 很 好}$}};
\node [anchor=south] (encoder) at ([xshift=0.2em,yshift=0.6em]res2.north west) {\scriptsize{\textbf{编码器}}};
\draw [->] (sa1.north) -- (res1.south);
\draw [->] (res1.north) -- (ffn1.south);
\draw [->] (ffn1.north) -- (res2.south);
\draw [->] ([yshift=-1em]sa1.south) -- (sa1.south);
\draw [->] ([yshift=-0.3em]inputs.north) -- ([yshift=0.6em]inputs.north);
\node [Sanode,anchor=west] (sa2) at ([xshift=3em]sa1.east) {\tiny{$\textbf{Self-Attention}$}};
\node [Resnode,anchor=south] (res3) at ([yshift=0.3em]sa2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [Sanode,anchor=south] (ed1) at ([yshift=1em]res3.north) {\tiny{$\textbf{Encoder-Decoder Attention}$}};
\node [Resnode,anchor=south] (res4) at ([yshift=0.3em]ed1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn2) at ([yshift=1em]res4.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res5) at ([yshift=0.3em]ffn2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\tiny{$\textbf{解码器输入: $<$SOS$>$ I am fine}$}};
\node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}};
\node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\tiny{$\textbf{解码器输出: I am fine $<$EOS$>$ }$}};
\draw [->] (sa2.north) -- (res3.south);
\draw [->] (res3.north) -- (ed1.south);
\draw [->] (ed1.north) -- (res4.south);
\draw [->] (res4.north) -- (ffn2.south);
\draw [->] (ffn2.north) -- (res5.south);
\draw [->] (res5.north) -- (o1.south);
\draw [->] (o1.north) -- ([yshift=0.5em]o1.north);
\draw [->] ([yshift=-1em]sa2.south) -- (sa2.south);
\draw [->] ([yshift=-0.3em]outputs.north) -- ([yshift=0.6em]outputs.north);
\draw[->,standard] ([yshift=-0.5em]sa1.south) -- ([xshift=-4em,yshift=-0.5em]sa1.south) -- ([xshift=-4em,yshift=2.3em]sa1.south) -- ([xshift=-3.5em,yshift=2.3em]sa1.south);
\draw[->,standard] ([yshift=0.5em]res1.north) -- ([xshift=-4em,yshift=0.5em]res1.north) -- ([xshift=-4em,yshift=3.3em]res1.north) -- ([xshift=-3.5em,yshift=3.3em]res1.north);
\draw[->,standard] ([yshift=-0.5em]sa2.south) -- ([xshift=4em,yshift=-0.5em]sa2.south) -- ([xshift=4em,yshift=2.3em]sa2.south) -- ([xshift=3.5em,yshift=2.3em]sa2.south);
\draw[->,standard] ([yshift=0.5em]res3.north) -- ([xshift=4em,yshift=0.5em]res3.north) -- ([xshift=4em,yshift=3.3em]res3.north) -- ([xshift=3.5em,yshift=3.3em]res3.north);
\draw[->,standard] ([yshift=0.5em]res4.north) -- ([xshift=4em,yshift=0.5em]res4.north) -- ([xshift=4em,yshift=3.3em]res4.north) -- ([xshift=3.5em,yshift=3.3em]res4.north);
\draw[->,standard] (res2.north) -- ([yshift=0.5em]res2.north) -- ([xshift=5em,yshift=0.5em]res2.north) -- ([xshift=5em,yshift=-2.2em]res2.north) -- ([xshift=6.5em,yshift=-2.2em]res2.north);
%\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=ugreen!70] [fit = (sa1) (res1) (ffn1) (res2)] (box0) {};
%\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=red!60] [fit = (sa2) (res3) (res5)] (box1) {};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (ffn1)] (box1) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (ffn2)] (box2) {};
\end{pgfonlayer}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{Sanode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw,fill=orange!20];
\tikzstyle{Resnode} = [minimum height=1.1em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw];
\tikzstyle{ffnnode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw];
\tikzstyle{outputnode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw];
\tikzstyle{inputnode} = [minimum height=1.4em,minimum width=3.5em,inner sep=3pt,rounded corners=1.5pt,draw,fill=red!10];
\tikzstyle{posnode} = [minimum height=1.4em,minimum width=3.5em,inner sep=3pt,rounded corners=1.5pt,draw,fill=black!5!white];
\tikzstyle{standard} = [rounded corners=3pt]
\node [Sanode,anchor=west] (sa1) at (0,0) {\tiny{$\textbf{Self-Attention}$}};
\node [Resnode,anchor=south] (res1) at ([yshift=0.3em]sa1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn1) at ([yshift=1em]res1.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res2) at ([yshift=0.3em]ffn1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [inputnode,anchor=north west] (input1) at ([yshift=-1em]sa1.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos1) at ([yshift=-1em]sa1.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (inputs) at ([yshift=-3em]sa1.south) {\tiny{$\textbf{编码器输入: 我 很 好}$}};
\node [anchor=south] (encoder) at ([xshift=0.2em,yshift=0.6em]res2.north west) {\scriptsize{\textbf{编码器}}};
\draw [->] (sa1.north) -- (res1.south);
\draw [->] (res1.north) -- (ffn1.south);
\draw [->] (ffn1.north) -- (res2.south);
\draw [->] ([yshift=-1em]sa1.south) -- (sa1.south);
\draw [->] ([yshift=-0.3em]inputs.north) -- ([yshift=0.6em]inputs.north);
\node [Sanode,anchor=west] (sa2) at ([xshift=3em]sa1.east) {\tiny{$\textbf{Self-Attention}$}};
\node [Resnode,anchor=south] (res3) at ([yshift=0.3em]sa2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [Sanode,anchor=south] (ed1) at ([yshift=1em]res3.north) {\tiny{$\textbf{Encoder-Decoder Attention}$}};
\node [Resnode,anchor=south] (res4) at ([yshift=0.3em]ed1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn2) at ([yshift=1em]res4.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res5) at ([yshift=0.3em]ffn2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\tiny{$\textbf{解码器输入: $<$SOS$>$ I am fine}$}};
\node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}};
\node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\tiny{$\textbf{解码器输出: I am fine $<$EOS$>$ }$}};
\draw [->] (sa2.north) -- (res3.south);
\draw [->] (res3.north) -- (ed1.south);
\draw [->] (ed1.north) -- (res4.south);
\draw [->] (res4.north) -- (ffn2.south);
\draw [->] (ffn2.north) -- (res5.south);
\draw [->] (res5.north) -- (o1.south);
\draw [->] (o1.north) -- ([yshift=0.5em]o1.north);
\draw [->] ([yshift=-1em]sa2.south) -- (sa2.south);
\draw [->] ([yshift=-0.3em]outputs.north) -- ([yshift=0.6em]outputs.north);
\draw[->,standard] ([yshift=-0.5em]sa1.south) -- ([xshift=-4em,yshift=-0.5em]sa1.south) -- ([xshift=-4em,yshift=2.3em]sa1.south) -- ([xshift=-3.5em,yshift=2.3em]sa1.south);
\draw[->,standard] ([yshift=0.5em]res1.north) -- ([xshift=-4em,yshift=0.5em]res1.north) -- ([xshift=-4em,yshift=3.3em]res1.north) -- ([xshift=-3.5em,yshift=3.3em]res1.north);
\draw[->,standard] ([yshift=-0.5em]sa2.south) -- ([xshift=4em,yshift=-0.5em]sa2.south) -- ([xshift=4em,yshift=2.3em]sa2.south) -- ([xshift=3.5em,yshift=2.3em]sa2.south);
\draw[->,standard] ([yshift=0.5em]res3.north) -- ([xshift=4em,yshift=0.5em]res3.north) -- ([xshift=4em,yshift=3.3em]res3.north) -- ([xshift=3.5em,yshift=3.3em]res3.north);
\draw[->,standard] ([yshift=0.5em]res4.north) -- ([xshift=4em,yshift=0.5em]res4.north) -- ([xshift=4em,yshift=3.3em]res4.north) -- ([xshift=3.5em,yshift=3.3em]res4.north);
\draw[->,standard] (res2.north) -- ([yshift=0.5em]res2.north) -- ([xshift=5em,yshift=0.5em]res2.north) -- ([xshift=5em,yshift=-2.2em]res2.north) -- ([xshift=6.5em,yshift=-2.2em]res2.north);
%\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=ugreen!70] [fit = (sa1) (res1) (ffn1) (res2)] (box0) {};
%\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=red!60] [fit = (sa2) (res3) (res5)] (box1) {};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (sa1)] (box1) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (sa2)] (box2) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (ed1)] (box3) {};
\end{pgfonlayer}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\node [anchor=south west,draw,thick,red,minimum width=0.9in,minimum height=0.7in] (space1) at (0,0) {};
\node [anchor=south west,fill=blue,minimum width=0.1in,minimum height=0.1in] (unit1) at (0.2,0.8) {};
\node [anchor=south west,fill=ugreen,minimum width=0.1in,minimum height=0.1in] (unit2) at (0.7,0.3) {};
\node [anchor=south west,fill=blue,minimum width=0.1in,minimum height=0.1in] (unit3) at (1.3,1.3) {};
\node [anchor=south west,fill=ugreen,minimum width=0.1in,minimum height=0.1in] (unit4) at ([xshift=0.1em]unit3.south east) {};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=2pt,rounded corners=1pt,draw,thick] [fit = (unit3) (unit4)] (unitbox) {};
\end{pgfonlayer}
\draw [->] ([yshift=1pt]unit1.north) .. controls +(north:0.23) and +(west:0.2) .. ([yshift=0.2em,xshift=-1pt]unitbox.west);
\draw [->] ([xshift=1pt]unit2.east) .. controls +(east:0.5) and +(south:0.2) .. ([xshift=0.2em,yshift=-1pt]unitbox.south);
\node [anchor=south] (spacelabel1) at (space1.north) {\scriptsize{离散表示空间}};
\node [anchor=north] (captain1) at ([yshift=-0.5em]space1.south) {\scriptsize{(a) \textbf{统计机器翻译}}};
\end{scope}
\begin{scope}[xshift=1.3in]
\node [anchor=south west,draw,thick,red,minimum width=0.9in,minimum height=0.7in] (space1) at (0,0) {};
\node [anchor=south west,fill=blue,minimum width=0.1in,minimum height=0.1in] (unit1) at (0.2,0.8) {};
\node [anchor=south west,fill=ugreen,minimum width=0.1in,minimum height=0.1in] (unit2) at (0.7,0.3) {};
\node [anchor=south west,draw,thick,red,minimum width=0.9in,minimum height=0.7in] (space2) at (1.1in,0) {};
\node [anchor=south west,circle,fill=orange,minimum width=0.1in,minimum height=0.1in] (unit3) at (1.5in,1.3) {};
\draw [->] ([yshift=1pt]unit1.north) .. controls +(north:0.4) and +(west:2) .. ([yshift=0.0em,xshift=-1pt]unit3.west);
\draw [->] ([xshift=1pt]unit2.east) .. controls +(east:1.5) and +(south:1) .. ([xshift=0.0em,yshift=-1pt]unit3.south);
\node [anchor=south] (spacelabel1) at (space1.north) {\scriptsize{离散表示空间}};
\node [anchor=south] (spacelabel2) at (space2.north) {\scriptsize{连续表示空间}};
\node [anchor=north] (captain1) at ([yshift=-0.5em,xshift=1em]space1.south east) {\scriptsize{(b) \textbf{神经机器翻译}}};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnode} = [draw,minimum width=3.5em,minimum height=1.2em]
\node [rnode,anchor=south west,fill=red!20!white] (value1) at (0,0) {\scriptsize{$\textbf{h}(\textrm{``你''})$}};
\node [rnode,anchor=south west,fill=red!20!white] (value2) at ([xshift=1em]value1.south east) {\scriptsize{$\textbf{h}(\textrm{``什么''})$}};
\node [rnode,anchor=south west,fill=red!20!white] (value3) at ([xshift=1em]value2.south east) {\scriptsize{$\textbf{h}(\textrm{``也''})$}};
\node [rnode,anchor=south west,fill=red!20!white] (value4) at ([xshift=1em]value3.south east) {\scriptsize{$\textbf{h}(\textrm{``没''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key1) at ([yshift=0.2em]value1.north west) {\scriptsize{$\textbf{h}(\textrm{``你''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key2) at ([yshift=0.2em]value2.north west) {\scriptsize{$\textbf{h}(\textrm{``什么''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key3) at ([yshift=0.2em]value3.north west) {\scriptsize{$\textbf{h}(\textrm{``也''})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key4) at ([yshift=0.2em]value4.north west) {\scriptsize{$\textbf{h}(\textrm{``没''})$}};
\node [rnode,anchor=east] (query) at ([xshift=-2em]key1.west) {\scriptsize{$\textbf{s}(\textrm{``you''})$}};
\node [anchor=east] (querylabel) at ([xshift=-0.2em]query.west) {\scriptsize{query}};
\draw [->] ([yshift=1pt,xshift=6pt]query.north) .. controls +(90:1em) and +(90:1em) .. ([yshift=1pt]key1.north);
\draw [->] ([yshift=1pt,xshift=3pt]query.north) .. controls +(90:1.5em) and +(90:1.5em) .. ([yshift=1pt]key2.north);
\draw [->] ([yshift=1pt]query.north) .. controls +(90:2em) and +(90:2em) .. ([yshift=1pt]key3.north);
\draw [->] ([yshift=1pt,xshift=-3pt]query.north) .. controls +(90:2.5em) and +(90:2.5em) .. ([yshift=1pt]key4.north);
\node [anchor=south east] (alpha1) at ([xshift=1em]key1.north east) {\scriptsize{$\alpha_1=.4$}};
\node [anchor=south east] (alpha2) at ([xshift=1em]key2.north east) {\scriptsize{$\alpha_2=.4$}};
\node [anchor=south east] (alpha3) at ([xshift=1em]key3.north east) {\scriptsize{$\alpha_3=0$}};
\node [anchor=south east] (alpha4) at ([xshift=1em]key4.north east) {\scriptsize{$\alpha_4=.1$}};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------------------
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnode} = [draw,minimum width=3em,minimum height=1.2em]
\node [rnode,anchor=south west,fill=blue!20!white] (value1) at (0,0) {\scriptsize{value$_1$}};
\node [rnode,anchor=south west,fill=blue!20!white] (value2) at ([xshift=1em]value1.south east) {\scriptsize{value$_2$}};
\node [rnode,anchor=south west,fill=red!20!white] (value3) at ([xshift=1em]value2.south east) {\scriptsize{value$_3$}};
\node [rnode,anchor=south west,fill=blue!20!white] (value4) at ([xshift=1em]value3.south east) {\scriptsize{value$_4$}};
\node [rnode,anchor=south west,pattern=north east lines] (key1) at ([yshift=0.2em]value1.north west) {};
\node [rnode,anchor=south west,pattern=dots] (key2) at ([yshift=0.2em]value2.north west) {};
\node [rnode,anchor=south west,pattern=horizontal lines] (key3) at ([yshift=0.2em]value3.north west) {};
\node [rnode,anchor=south west,pattern=crosshatch dots] (key4) at ([yshift=0.2em]value4.north west) {};
\node [fill=white,inner sep=1pt] (key1label) at (key1) {\scriptsize{key$_1$}};
\node [fill=white,inner sep=1pt] (key1label) at (key2) {\scriptsize{key$_2$}};
\node [fill=white,inner sep=1pt] (key1label) at (key3) {\scriptsize{key$_3$}};
\node [fill=white,inner sep=1pt] (key1label) at (key4) {\scriptsize{key$_4$}};
\node [rnode,anchor=east,pattern=horizontal lines] (query) at ([xshift=-3em]key1.west) {};
\node [anchor=east] (querylabel) at ([xshift=-0.2em]query.west) {\scriptsize{query}};
\draw [->] ([yshift=1pt]query.north) .. controls +(90:2em) and +(90:2em) .. ([yshift=1pt]key3.north) node [pos=0.5,below,yshift=0.2em] {\scriptsize{匹配}};
\node [anchor=north] (result) at (value3.south) {\scriptsize{ {\red 返回结果} }};
\node [anchor=north] (result2) at ([xshift=-2em,yshift=-2em]value2.south) {\footnotesize{ { (a)索引的查询过程} }};
\end{scope}
\end{tikzpicture}
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnode} = [draw,minimum width=3em,minimum height=1.2em]
\node [rnode,anchor=south west,fill=red!20!white] (value1) at (0,0) {\scriptsize{value$_1$}};
\node [rnode,anchor=south west,fill=red!20!white] (value2) at ([xshift=1em]value1.south east) {\scriptsize{value$_2$}};
\node [rnode,anchor=south west,fill=red!20!white] (value3) at ([xshift=1em]value2.south east) {\scriptsize{value$_3$}};
\node [rnode,anchor=south west,fill=red!20!white] (value4) at ([xshift=1em]value3.south east) {\scriptsize{value$_4$}};
\node [rnode,anchor=south west,pattern=north east lines] (key1) at ([yshift=0.2em]value1.north west) {};
\node [rnode,anchor=south west,pattern=dots] (key2) at ([yshift=0.2em]value2.north west) {};
\node [rnode,anchor=south west,pattern=horizontal lines] (key3) at ([yshift=0.2em]value3.north west) {};
\node [rnode,anchor=south west,pattern=crosshatch dots] (key4) at ([yshift=0.2em]value4.north west) {};
\node [fill=white,inner sep=1pt] (key1label) at (key1) {\scriptsize{key$_1$}};
\node [fill=white,inner sep=1pt] (key1label) at (key2) {\scriptsize{key$_2$}};
\node [fill=white,inner sep=1pt] (key1label) at (key3) {\scriptsize{key$_3$}};
\node [fill=white,inner sep=1pt] (key1label) at (key4) {\scriptsize{key$_4$}};
\node [rnode,anchor=east,pattern=vertical lines] (query) at ([xshift=-3em]key1.west) {};
\node [anchor=east] (querylabel) at ([xshift=-0.2em]query.west) {\scriptsize{query}};
\draw [->] ([yshift=1pt,xshift=6pt]query.north) .. controls +(90:1em) and +(90:1em) .. ([yshift=1pt]key1.north);
\draw [->] ([yshift=1pt,xshift=3pt]query.north) .. controls +(90:1.5em) and +(90:1.5em) .. ([yshift=1pt]key2.north);
\draw [->] ([yshift=1pt]query.north) .. controls +(90:2em) and +(90:2em) .. ([yshift=1pt]key3.north);
\draw [->] ([yshift=1pt,xshift=-3pt]query.north) .. controls +(90:2.5em) and +(90:2.5em) .. ([yshift=1pt]key4.north);
\node [anchor=south east] (alpha1) at (key1.north east) {\scriptsize{$\alpha_1$}};
\node [anchor=south east] (alpha2) at (key2.north east) {\scriptsize{$\alpha_2$}};
\node [anchor=south east] (alpha3) at (key3.north east) {\scriptsize{$\alpha_3$}};
\node [anchor=south east] (alpha4) at (key4.north east) {\scriptsize{$\alpha_4$}};
\node [anchor=north] (result) at ([xshift=-1.5em]value2.south east) {\scriptsize{{\red 返回结果}=$\alpha_1 \cdot \textrm{value}_1 + \alpha_2 \cdot \textrm{value}_2 + \alpha_3 \cdot \textrm{value}_3 + \alpha_4 \cdot \textrm{value}_4$}};
\node [anchor=north] (result2) at ([xshift=-1em,yshift=-2.5em]value2.south) {\footnotesize{ { (b)注意力机制查询过程} }};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------------------
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnode} = [draw,minimum width=3em,minimum height=1.2em]
\node [rnode,anchor=south west,fill=red!20!white] (value1) at (0,0) {\scriptsize{value$_1$}};
\node [rnode,anchor=south west,fill=red!20!white] (value2) at ([xshift=1em]value1.south east) {\scriptsize{value$_2$}};
\node [rnode,anchor=south west,fill=red!20!white] (value3) at ([xshift=1em]value2.south east) {\scriptsize{value$_3$}};
\node [rnode,anchor=south west,fill=red!20!white] (value4) at ([xshift=1em]value3.south east) {\scriptsize{value$_4$}};
\node [rnode,anchor=south west,pattern=north east lines] (key1) at ([yshift=0.2em]value1.north west) {};
\node [rnode,anchor=south west,pattern=dots] (key2) at ([yshift=0.2em]value2.north west) {};
\node [rnode,anchor=south west,pattern=horizontal lines] (key3) at ([yshift=0.2em]value3.north west) {};
\node [rnode,anchor=south west,pattern=crosshatch dots] (key4) at ([yshift=0.2em]value4.north west) {};
\node [fill=white,inner sep=1pt] (key1label) at (key1) {\scriptsize{key$_1$}};
\node [fill=white,inner sep=1pt] (key1label) at (key2) {\scriptsize{key$_2$}};
\node [fill=white,inner sep=1pt] (key1label) at (key3) {\scriptsize{key$_3$}};
\node [fill=white,inner sep=1pt] (key1label) at (key4) {\scriptsize{key$_4$}};
\node [rnode,anchor=east,pattern=vertical lines] (query) at ([xshift=-3em]key1.west) {};
\node [anchor=east] (querylabel) at ([xshift=-0.2em]query.west) {\scriptsize{query}};
\draw [->] ([yshift=1pt,xshift=6pt]query.north) .. controls +(90:1em) and +(90:1em) .. ([yshift=1pt]key1.north);
\draw [->] ([yshift=1pt,xshift=3pt]query.north) .. controls +(90:1.5em) and +(90:1.5em) .. ([yshift=1pt]key2.north);
\draw [->] ([yshift=1pt]query.north) .. controls +(90:2em) and +(90:2em) .. ([yshift=1pt]key3.north);
\draw [->] ([yshift=1pt,xshift=-3pt]query.north) .. controls +(90:2.5em) and +(90:2.5em) .. ([yshift=1pt]key4.north);
\node [anchor=south east] (alpha1) at (key1.north east) {\scriptsize{$\alpha_1$}};
\node [anchor=south east] (alpha2) at (key2.north east) {\scriptsize{$\alpha_2$}};
\node [anchor=south east] (alpha3) at (key3.north east) {\scriptsize{$\alpha_3$}};
\node [anchor=south east] (alpha4) at (key4.north east) {\scriptsize{$\alpha_4$}};
\node [anchor=north] (result) at ([xshift=-1.5em]value2.south east) {\scriptsize{{\red 返回结果}=$\alpha_1 \cdot \textrm{value}_1 + \alpha_2 \cdot \textrm{value}_2 + \alpha_3 \cdot \textrm{value}_3 + \alpha_4 \cdot \textrm{value}_4$}};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\footnotesize{
\begin{axis}[
width=.60\textwidth,
height=.40\textwidth,
legend style={at={(0.60,0.08)}, anchor=south west},
xlabel={\scriptsize{更新次数}},
ylabel={\scriptsize{学习率}},
xtick=\empty,
ytick=\empty,
ylabel style={yshift=-2.5em},xlabel style={yshift=1.5em},
legend style={yshift=-6pt, legend plot pos=right,font=\scriptsize,cells={anchor=west}}
]
\addplot[orange,line width=1.25pt] coordinates {(329,0.000045) (447,0.000078) (540,0.00012) (661,0.0002) (752,0.00032) (856,0.00051) (975,0.00089) (996,0.001) (6599,0.001) (6624,0.0005) (7200,0.0005) (7218,0.00025) (7784,0.00025) (7821,0.000125) (8398,0.000125)};
\end{axis}
}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{lnode} = [minimum height=1.5em,minimum width=3em,inner sep=3pt,rounded corners=1.5pt,draw,fill=orange!20];
\tikzstyle{standard} = [rounded corners=3pt]
\node [lnode,anchor=west] (l1) at (0,0) {\scriptsize{子层1}};
\node [lnode,anchor=west] (l2) at ([xshift=3em]l1.east) {\scriptsize{子层2}};
\node [lnode,anchor=west] (l3) at ([xshift=3em]l2.east) {\scriptsize{子层3}};
\node [anchor=west,inner sep=2pt] (dot1) at ([xshift=1em]l3.east) {\scriptsize{$\textbf{...}$}};
\node [lnode,anchor=west] (l4) at ([xshift=1em]dot1.east) {\scriptsize{子层n}};
\node [anchor=west] (plus1) at ([xshift=0.9em]l1.east) {\scriptsize{$\mathbf{\oplus}$}};
\node [anchor=west] (plus2) at ([xshift=0.9em]l2.east) {\scriptsize{$\mathbf{\oplus}$}};
\draw [->,thick] ([xshift=-1.5em]l1.west) -- ([xshift=-0.1em]l1.west);
\draw [->,thick] ([xshift=0.1em]l1.east) -- ([xshift=0.2em]plus1.west);
\draw [->,thick] ([xshift=-0.2em]plus1.east) -- ([xshift=-0.1em]l2.west);
\draw [->,thick] ([xshift=0.1em]l2.east) -- ([xshift=0.2em]plus2.west);
\draw [->,thick] ([xshift=-0.2em]plus2.east) -- ([xshift=-0.1em]l3.west);
\draw [->,thick] ([xshift=0.1em]l3.east) -- ([xshift=-0.1em]dot1.west);
\draw [->,thick] ([xshift=0.1em]dot1.east) -- ([xshift=-0.1em]l4.west);
\draw [->,thick] ([xshift=0.1em]l4.east) -- ([xshift=1.5em]l4.east);
\draw[->,standard,thick] ([xshift=-0.8em]l1.west) -- ([xshift=-0.8em,yshift=2em]l1.west) -- ([yshift=2em]plus1.center) -- ([yshift=-0.2em]plus1.north);
\draw[->,standard,thick] ([xshift=-0.8em]l2.west) -- ([xshift=-0.8em,yshift=2em]l2.west) -- ([yshift=2em]plus2.center) -- ([yshift=-0.2em]plus2.north);
\draw [->,very thick,red] ([xshift=1.5em,yshift=-0.3em]l4.east) -- ([xshift=0.1em,,yshift=-0.3em]l4.east);
\draw [->,very thick,red] ([xshift=-0.1em,yshift=-0.3em]l4.west) -- ([xshift=0.1em,yshift=-0.3em]dot1.east);
\draw [->,very thick,red] ([xshift=-0.1em,yshift=-0.3em]dot1.west) -- ([xshift=0.1em,yshift=-0.3em]l3.east);
\draw[->,standard,very thick,red] ([xshift=-0.3em,yshift=-0.2em]plus2.north) -- ([xshift=-0.3em,yshift=1.8em]plus2.center) -- ([xshift=-0.5em,yshift=1.8em]l2.west) -- ([xshift=-0.5em,yshift=0.2em]l2.west);
\draw[->,standard,very thick,red] ([xshift=-0.3em,yshift=-0.2em]plus1.north) -- ([xshift=-0.3em,yshift=1.8em]plus1.center) -- ([xshift=-0.5em,yshift=1.8em]l1.west) -- ([xshift=-0.5em,yshift=0.2em]l1.west);
\node [anchor=west] (label1) at ([xshift=1em,yshift=1.5em]l3.north) {\tiny{前向计算}};
\draw [->,thick] ([xshift=-1.5em]label1.west) -- ([xshift=-0.1em]label1.west);
\node [anchor=west] (label2) at ([xshift=2.5em]label1.east) {\tiny{反向传播}};
\draw [->,thick,red] ([xshift=-1.5em]label2.west) -- ([xshift=-0.1em]label2.west);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%------------------------------------------------------------
\begin{tikzpicture}
\begin{scope}[scale=0.7]
\tikzstyle{rnnnode} = [draw,inner sep=5pt,minimum width=3em,minimum height=0.8em,fill=green!30!white,blur shadow={shadow xshift=1pt,shadow yshift=-1pt}]
\node [anchor=west,rnnnode] (node11) at (0,0) {\tiny{RNN Cell}};
\node [anchor=west,rnnnode] (node12) at ([xshift=1em]node11.east) {\tiny{RNN Cell}};
\node [anchor=west,rnnnode] (node13) at ([xshift=1em]node12.east) {\tiny{RNN Cell}};
\node [anchor=west,rnnnode] (node14) at ([xshift=1em]node13.east) {\tiny{RNN Cell}};
\node [anchor=north,rnnnode,fill=blue!30!white] (e1) at ([yshift=-1em]node11.south) {\scriptsize{}};
\node [anchor=north,rnnnode,fill=blue!30!white] (e2) at ([yshift=-1em]node12.south) {\scriptsize{}};
\node [anchor=north,rnnnode,fill=blue!30!white] (e3) at ([yshift=-1em]node13.south) {\scriptsize{}};
\node [anchor=north,rnnnode,fill=blue!30!white] (e4) at ([yshift=-1em]node14.south) {\scriptsize{}};
\node [anchor=north,inner sep=2pt] (w1) at ([yshift=-1em]e1.south) {\scriptsize{$<$eos$>$}};
\node [anchor=north,inner sep=2pt] (w2) at ([yshift=-1em]e2.south) {\scriptsize{}};
\node [anchor=north,inner sep=2pt] (w3) at ([yshift=-1em]e3.south) {\scriptsize{我们}};
\node [anchor=north,inner sep=2pt] (w4) at ([yshift=-1em]e4.south) {\scriptsize{开始}};
\draw [->,thick] ([yshift=0.1em]w1.north)--([yshift=-0.1em]e1.south);
\draw [->,thick] ([yshift=0.1em]w2.north)--([yshift=-0.1em]e2.south);
\draw [->,thick] ([yshift=0.1em]w3.north)--([yshift=-0.1em]e3.south);
\draw [->,thick] ([yshift=0.1em]w4.north)--([yshift=-0.1em]e4.south);
\draw [->,thick] ([yshift=0.1em]e1.north)--([yshift=-0.1em]node11.south);
\draw [->,thick] ([yshift=0.1em]e2.north)--([yshift=-0.1em]node12.south);
\draw [->,thick] ([yshift=0.1em]e3.north)--([yshift=-0.1em]node13.south);
\draw [->,thick] ([yshift=0.1em]e4.north)--([yshift=-0.1em]node14.south);
\node [anchor=south,rnnnode,fill=blue!30!white] (node21) at ([yshift=1.0em]node11.north) {\scriptsize{}};
\node [anchor=south,rnnnode,fill=blue!30!white] (node22) at ([yshift=1.0em]node12.north) {\scriptsize{}};
\node [anchor=south,rnnnode,fill=blue!30!white] (node23) at ([yshift=1.0em]node13.north) {\scriptsize{}};
\node [anchor=south,rnnnode,fill=blue!30!white] (node24) at ([yshift=1.0em]node14.north) {\scriptsize{}};
\node [anchor=south] (output1) at ([yshift=1em]node21.north) {\Large{\textbf{}}};
\node [anchor=south] (output2) at ([yshift=1em]node22.north) {\Large{\textbf{}}};
\node [anchor=south] (output3) at ([yshift=1em]node23.north) {\Large{\textbf{}}};
\node [anchor=south] (output4) at ([yshift=1em]node24.north) {\Large{\textbf{}}};
\draw [->,thick] ([yshift=0.1em]node21.north)--([yshift=-0.1em]output1.south);
\draw [->,thick] ([yshift=0.1em]node22.north)--([yshift=-0.1em]output2.south);
\draw [->,thick] ([yshift=0.1em]node23.north)--([yshift=-0.1em]output3.south);
\draw [->,thick] ([yshift=0.1em]node24.north)--([yshift=-0.1em]output4.south);
\draw [->,thick] ([yshift=0.1em]node11.north)--([yshift=-0.1em]node21.south);
\draw [->,thick] ([yshift=0.1em]node12.north)--([yshift=-0.1em]node22.south);
\draw [->,thick] ([yshift=0.1em]node13.north)--([yshift=-0.1em]node23.south);
\draw [->,thick] ([yshift=0.1em]node14.north)--([yshift=-0.1em]node24.south);
\draw [->,thick] ([xshift=-1em]node11.west)--([xshift=-0.1em]node11.west);
\draw [->,thick] ([xshift=0.1em]node11.east)--([xshift=-0.1em]node12.west);
\draw [->,thick] ([xshift=0.1em]node12.east)--([xshift=-0.1em]node13.west);
\draw [->,thick] ([xshift=0.1em]node13.east)--([xshift=-0.1em]node14.west);
\draw [->,thick] ([xshift=0.1em]node14.east)--([xshift=1em]node14.east);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnnnode} = [minimum height=1.1em,minimum width=2.1em,inner sep=2pt,rounded corners=1pt,draw,fill=red!20];
\tikzstyle{lossnode} = [minimum height=1.1em,minimum width=6em,inner sep=2pt,rounded corners=1pt,draw,fill=red!20];
\node [rnnnode,anchor=west] (h1) at (0,0) {\tiny{$\textbf{h}_1$}};
\node [rnnnode,anchor=west] (h2) at ([xshift=1em]h1.east) {\tiny{$\textbf{h}_2$}};
\node [rnnnode,anchor=west] (h3) at ([xshift=1em]h2.east) {\tiny{$\textbf{h}_3$}};
\node [rnnnode,anchor=north,fill=green!20] (e1) at ([yshift=-1em]h1.south) {\tiny{$e_x()$}};
\node [rnnnode,anchor=west,fill=green!20] (e2) at ([xshift=1em]e1.east) {\tiny{$e_x()$}};
\node [rnnnode,anchor=west,fill=green!20] (e3) at ([xshift=1em]e2.east) {\tiny{$e_x()$}};
\node [anchor=north,inner sep=2pt] (w1) at ([yshift=-0.6em]e1.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w2) at ([yshift=-0.6em]e2.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w3) at ([yshift=-0.6em]e3.south) {\tiny{<eos>}};
\node [anchor=south] (dot1) at ([xshift=0.4em,yshift=-0.7em]h1.south) {\tiny{...}};
\node [anchor=south] (dot2) at ([xshift=-0.4em,yshift=-0.7em]h3.south) {\tiny{...}};
\draw [->] (w1.north) -- ([yshift=-0.1em]e1.south);
\draw [->] (w2.north) -- ([yshift=-0.1em]e2.south);
\draw [->] (w3.north) -- ([yshift=-0.1em]e3.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]h1.south);
\draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]h2.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]h3.south);
\draw [->] ([xshift=0.2em,yshift=0.1em]e1.north) .. controls +(north:0.3) and +(south:0.4) .. ([xshift=-0.3em,yshift=-0.1em]h2.south);
\draw [->] ([xshift=-0.2em,yshift=0.1em]e3.north) .. controls +(north:0.3) and +(south:0.4) .. ([xshift=0.3em,yshift=-0.1em]h2.south);
\node [anchor=south] (encoder) at ([xshift=-0.2em]h1.north west) {\scriptsize{\textbf{编码器}}};
{
\node [rnnnode,anchor=west,fill=green!20] (t1) at ([xshift=3em]e3.east) {\tiny{$e_y()$}};
\node [rnnnode,anchor=west,fill=green!20] (t2) at ([xshift=1.5em]t1.east) {\tiny{$e_y()$}};
\node [rnnnode,anchor=west,fill=green!20] (t3) at ([xshift=1.5em]t2.east) {\tiny{$e_y()$}};
\node [rnnnode,anchor=west,fill=green!20] (t4) at ([xshift=1.5em]t3.east) {\tiny{$e_y()$}};
}
{
\node [rnnnode,anchor=south] (s1) at ([yshift=1em]t1.north) {\tiny{$\textbf{s}_1$}};
\node [rnnnode,anchor=south] (s2) at ([yshift=1em]t2.north) {\tiny{$\textbf{s}_2$}};
\node [rnnnode,anchor=south] (s3) at ([yshift=1em]t3.north) {\tiny{$\textbf{s}_3$}};
\node [rnnnode,anchor=south] (s4) at ([yshift=1em]t4.north) {\tiny{$\textbf{s}_4$}};
\node [anchor=south] (dot3) at ([xshift=-0.4em,yshift=-0.7em]s3.south) {\tiny{...}};
\node [anchor=south] (dot4) at ([xshift=-0.4em,yshift=-0.7em]s4.south) {\tiny{...}};
}
{
\node [rnnnode,anchor=south] (f1) at ([yshift=1em]s1.north) {\tiny{$\textbf{f}_1$}};
\node [rnnnode,anchor=south] (f2) at ([yshift=1em]s2.north) {\tiny{$\textbf{f}_2$}};
\node [rnnnode,anchor=south] (f3) at ([yshift=1em]s3.north) {\tiny{$\textbf{f}_3$}};
\node [rnnnode,anchor=south] (f4) at ([yshift=1em]s4.north) {\tiny{$\textbf{f}_4$}};
\node [rnnnode,anchor=south,fill=blue!20] (o1) at ([yshift=1em]f1.north) {\tiny{softmax}};
\node [rnnnode,anchor=south,fill=blue!20] (o2) at ([yshift=1em]f2.north) {\tiny{softmax}};
\node [rnnnode,anchor=south,fill=blue!20] (o3) at ([yshift=1em]f3.north) {\tiny{softmax}};
\node [rnnnode,anchor=south,fill=blue!20] (o4) at ([yshift=1em]f4.north) {\tiny{softmax}};
\node [anchor=east] (decoder) at ([xshift=-0.3em,yshift=0.5em]o1.north west) {\scriptsize{\textbf{解码器}}};
\node [anchor=south,fill=black!5!white,minimum height=1.1em,minimum width=13em,inner sep=2pt,rounded corners=1pt,draw] (loss) at ([xshift=1.8em,yshift=1em]o2.north) {\scriptsize{\textbf{Cross Entropy Loss}}};
}
{
\node [anchor=north,inner sep=2pt] (wt1) at ([yshift=-0.6em]t1.south) {\tiny{<eos>}};
\node [anchor=north,inner sep=2pt] (wt2) at ([yshift=-0.6em]t2.south) {\tiny{How}};
\node [anchor=north,inner sep=2pt] (wt3) at ([yshift=-0.8em]t3.south) {\tiny{are}};
\node [anchor=north,inner sep=2pt] (wt4) at ([yshift=-0.8em]t4.south) {\tiny{you}};
}
{
\foreach \x in {1,2,3,4}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([xshift=0.2em,yshift=0.1em]t1.north) .. controls +(north:0.3) and +(south:0.3) .. ([xshift=-0.3em,yshift=-0.1em]s2.south);
}
}
{
\foreach \x in {1,2,3,4}{
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]f\x.south);
\draw [->] ([yshift=0.1em]f\x.north) -- ([yshift=-0.1em]o\x.south);
\draw [->] ([yshift=0.1em]o\x.north) -- ([yshift=0.8em]o\x.north);
}
}
{
\node [circle,draw,anchor=south,inner sep=3pt,fill=orange!20] (c1) at ([yshift=2em]h2.north) {\tiny{$\textbf{C}_1$}};
\node [anchor=south] (c1label) at (c1.north) {\tiny{\textbf{编码-解码注意力机制:上下文}}};
\draw [->] (h1.north) .. controls +(north:0.6) and +(250:0.9) .. (c1.250);
\draw [->] (h2.north) .. controls +(north:0.6) and +(270:0.9) .. (c1.270);
\draw [->] (h3.north) .. controls +(north:0.6) and +(290:0.9) .. (c1.290);
\draw [->] ([yshift=0.3em]s1.west) .. controls +(west:1) and +(east:1) .. (c1.-30);
\draw [->] (c1.0) .. controls +(east:1) and +(west:1) .. ([yshift=0em]f1.west);
}
{
\node [circle,draw,anchor=north,inner sep=3pt,fill=orange!20] (c2) at ([yshift=-2em]t1.south) {\tiny{$\textbf{C}_2$}};
\draw [->] ([xshift=-0.7em]c2.west) -- ([xshift=-0.1em]c2.west);
\draw [->] ([xshift=0.1em]c2.east) .. controls +(east:0.6) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]f2.west);
\node [circle,draw,anchor=north,inner sep=3pt,fill=orange!20] (c3) at ([yshift=-2em]t2.south) {\tiny{$\textbf{C}_3$}};
\draw [->] ([xshift=-0.7em]c3.west) -- ([xshift=-0.1em]c3.west);
\draw [->] ([xshift=0.1em]c3.east) .. controls +(east:0.6) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]f3.west);
\node [circle,draw,anchor=north,inner sep=3pt,fill=orange!20] (c4) at ([yshift=-2em]t3.south) {\tiny{$\textbf{C}_4$}};
\draw [->] ([xshift=-0.7em]c4.west) -- ([xshift=-0.1em]c4.west);
\draw [->] ([xshift=0.1em]c4.east) .. controls +(east:0.6) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]f4.west);
}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{Sanode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw];
\tikzstyle{Resnode} = [minimum height=1.1em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw];
\tikzstyle{ffnnode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw];
\tikzstyle{outputnode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw];
\tikzstyle{inputnode} = [minimum height=1.4em,minimum width=3.5em,inner sep=3pt,rounded corners=1.5pt,draw,fill=red!10];
\tikzstyle{posnode} = [minimum height=1.4em,minimum width=3.5em,inner sep=3pt,rounded corners=1.5pt,draw,fill=black!5!white];
\tikzstyle{standard} = [rounded corners=3pt]
\node [Sanode,anchor=west] (sa1) at (0,0) {\tiny{$\textbf{Self-Attention}$}};
\node [Resnode,anchor=south] (res1) at ([yshift=0.3em]sa1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn1) at ([yshift=1em]res1.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res2) at ([yshift=0.3em]ffn1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [inputnode,anchor=north west] (input1) at ([yshift=-1em]sa1.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos1) at ([yshift=-1em]sa1.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (inputs) at ([yshift=-3em]sa1.south) {\tiny{$\textbf{编码器输入: 我 很 好}$}};
\node [anchor=south] (encoder) at ([xshift=0.2em,yshift=0.6em]res2.north west) {\scriptsize{\textbf{编码器}}};
\draw [->] (sa1.north) -- (res1.south);
\draw [->] (res1.north) -- (ffn1.south);
\draw [->] (ffn1.north) -- (res2.south);
\draw [->] ([yshift=-1em]sa1.south) -- (sa1.south);
\draw [->] ([yshift=-0.3em]inputs.north) -- ([yshift=0.6em]inputs.north);
\node [Sanode,anchor=west] (sa2) at ([xshift=3em]sa1.east) {\tiny{$\textbf{Self-Attention}$}};
\node [Resnode,anchor=south] (res3) at ([yshift=0.3em]sa2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [Sanode,anchor=south] (ed1) at ([yshift=1em]res3.north) {\tiny{$\textbf{Encoder-Decoder Attention}$}};
\node [Resnode,anchor=south] (res4) at ([yshift=0.3em]ed1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn2) at ([yshift=1em]res4.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res5) at ([yshift=0.3em]ffn2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\tiny{$\textbf{解码器输入: $<$SOS$>$ I am fine}$}};
\node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}};
\node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\tiny{$\textbf{解码器输出: I am fine $<$EOS$>$ }$}};
\draw [->] (sa2.north) -- (res3.south);
\draw [->] (res3.north) -- (ed1.south);
\draw [->] (ed1.north) -- (res4.south);
\draw [->] (res4.north) -- (ffn2.south);
\draw [->] (ffn2.north) -- (res5.south);
\draw [->] (res5.north) -- (o1.south);
\draw [->] (o1.north) -- ([yshift=0.5em]o1.north);
\draw [->] ([yshift=-1em]sa2.south) -- (sa2.south);
\draw [->] ([yshift=-0.3em]outputs.north) -- ([yshift=0.6em]outputs.north);
\draw[->,standard] ([yshift=-0.5em]sa1.south) -- ([xshift=-4em,yshift=-0.5em]sa1.south) -- ([xshift=-4em,yshift=2.3em]sa1.south) -- ([xshift=-3.5em,yshift=2.3em]sa1.south);
\draw[->,standard] ([yshift=0.5em]res1.north) -- ([xshift=-4em,yshift=0.5em]res1.north) -- ([xshift=-4em,yshift=3.3em]res1.north) -- ([xshift=-3.5em,yshift=3.3em]res1.north);
\draw[->,standard] ([yshift=-0.5em]sa2.south) -- ([xshift=4em,yshift=-0.5em]sa2.south) -- ([xshift=4em,yshift=2.3em]sa2.south) -- ([xshift=3.5em,yshift=2.3em]sa2.south);
\draw[->,standard] ([yshift=0.5em]res3.north) -- ([xshift=4em,yshift=0.5em]res3.north) -- ([xshift=4em,yshift=3.3em]res3.north) -- ([xshift=3.5em,yshift=3.3em]res3.north);
\draw[->,standard] ([yshift=0.5em]res4.north) -- ([xshift=4em,yshift=0.5em]res4.north) -- ([xshift=4em,yshift=3.3em]res4.north) -- ([xshift=3.5em,yshift=3.3em]res4.north);
\draw[->,standard] (res2.north) -- ([yshift=0.5em]res2.north) -- ([xshift=5em,yshift=0.5em]res2.north) -- ([xshift=5em,yshift=-2.2em]res2.north) -- ([xshift=6.5em,yshift=-2.2em]res2.north);
%\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=ugreen!70] [fit = (sa1) (res1) (ffn1) (res2)] (box0) {};
%\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=red!60] [fit = (sa2) (res3) (res5)] (box1) {};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (input1) (pos1)] (box1) {};
\node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,fill=red!40] [fit = (input2) (pos2)] (box2) {};
\end{pgfonlayer}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%------------------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9cm}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum height=0.5\base,minimum width=1\base,draw,inner sep=0pt,outer sep=0pt]
\tikzstyle{wordnode} = [font=\tiny]
% RNN translation model
\begin{scope}[local bounding box=RNNMT]
% RNN Encoder
\coordinate (eemb0) at (0,0);
\foreach \x [count=\y from 0] in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {\tiny{$e_x()$}};
\foreach \x in {1,2,...,3}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc\x) at ([yshift=0.3\base]eemb\x.north) {};
\node[] (enclabel1) at (enc1) {\tiny{$\textbf{h}_{m-2}$}};
\node[] (enclabel2) at (enc2) {\tiny{$\textbf{h}_{m-1}$}};
\node[rnnnode,fill=purple!30!white] (enclabel3) at (enc3) {\tiny{$\textbf{h}_{m}$}};
\node[wordnode,left=0.4\base of enc1] (init1) {$\cdots$};
\node[wordnode,left=0.4\base of eemb1] (init2) {$\cdots$};
\node[wordnode,below=0pt of eemb1] () {};
\node[wordnode,below=0pt of eemb2] () {};
\node[wordnode,below=0pt of eemb3] () {$\langle$eos$\rangle$};
% RNN Decoder
\foreach \x in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=\base]enc\x.north) {\tiny{$e_y()$}};
\foreach \x in {1,2,...,3}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec\x) at ([yshift=0.3\base]demb\x.north) {{\tiny{$\textbf{s}_\x$}}};
\foreach \x in {1,2,...,3}
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.3\base]dec\x.north) {\tiny{Softmax}};
\node[wordnode,right=0.4\base of demb3] (end1) {$\cdots$};
\node[wordnode,right=0.4\base of dec3] (end2) {$\cdots$};
\node[wordnode,right=0.4\base of softmax3] (end3) {$\cdots$};
% Decoder input words
\node[wordnode,below=0pt of demb1] (decwordin) {$\langle$sos$\rangle$};
\ExtractX{$(demb2.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Do};
\ExtractX{$(demb3.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
% Decoder output words
\node[wordnode,above=0pt of softmax1] (decwordout) {Do};
\ExtractX{$(softmax2.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
\ExtractX{$(softmax3.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
% Connections
\draw[-latex'] (init1.east) to (enc1.west);
\draw[-latex'] (dec3.east) to (end2.west);
\foreach \x in {1,2,...,3}
\draw[-latex'] (eemb\x) to (enc\x);
\foreach \x in {1,2,...,3}
\draw[-latex'] (demb\x) to (dec\x);
\foreach \x in {1,2,...,3}
\draw[-latex'] (dec\x.north) to (softmax\x.south);
\foreach \x [count=\y from 2] in {1,2}
{
\draw[-latex'] (enc\x.east) to (enc\y.west);
\draw[-latex'] (dec\x.east) to (dec\y.west);
}
\coordinate (bridge) at ([yshift=0.4\base]enc2.north west);
\draw[-latex'] (enc3.north) .. controls +(north:0.3\base) and +(east:\base) .. (bridge) .. controls +(west:2.7\base) and +(west:0.3\base) .. (dec1.west);
\end{scope}
\begin{scope}
\coordinate (start) at (5.8\base,0.3\base);
{
\node [anchor=south west] (one) at (start) {\scriptsize{$\begin{bmatrix} 0 \\ 0 \\ 0 \\ \vdots \\ 0 \\ {\color{ugreen} 1} \\ 0 \\ 0 \end{bmatrix}$}};
\node [anchor=south west,inner sep=0pt] (T) at ([yshift=-0.5em,xshift=-0.5em]one.north east) {\tiny{T}};
}
{
\node [draw=ugreen,fill=green!20!white,rounded corners=0.3em,minimum width=3.8cm,minimum height=0.9em,anchor=south west] (emb) at ([shift={(1.25cm,0.8cm)}]start) {};
}
\node [anchor=north] (w) at ([yshift=3pt]one.south) {\scriptsize{\color{ugreen} you}};
\node [anchor=north west] (words) at ([xshift=10pt]one.north east) {\scriptsize{$\begin{matrix} \langle\textrm{eos}\rangle \\ \langle\textrm{sos}\rangle \\ \textrm{Do} \\ \vdots \\ \textrm{know} \\ \textrm{you} \\ \textrm{?} \\ \textrm{have} \end{matrix}$}};
\node [anchor=north west] (mat) at ([xshift=-6pt]words.north east) {\scriptsize{$
\begin{bmatrix}
.1 & -4 & \cdots & 2 \\
5 & 2 & \cdots & .2 \\
2 & .1 & \cdots & .3 \\
\vdots & \vdots & \ddots & \vdots \\
0 & .8 & \cdots & 4 \\
-1 & -2 & \cdots & -3 \\
.7 & .5 & \cdots & 3 \\
-2 & .3 & \cdots & .1
\end{bmatrix}
$}};
\draw [decorate,decoration={brace,mirror}] ([shift={(6pt,2pt)}]mat.south west) to node [auto,swap,font=\scriptsize] {词嵌入矩阵} ([shift={(-6pt,2pt)}]mat.south east);
{
\draw [-latex'] ([xshift=-2pt,yshift=-0.65cm]one.east) to ([yshift=-0.65cm]words.west);
}
{
\draw [-latex'] (emb.east) -| ([yshift=0.4cm]mat.north east) node [pos=1,above] {\scriptsize{RNN输入}};
}
\draw [-latex'] ([yshift=-0.4cm]w.south) to ([yshift=2pt]w.south);
\node [anchor=north] (wlabel) at ([yshift=-0.6em]w.south) {\scriptsize{输入的单词}};
\node [draw=ugreen,densely dashed,thick,rounded corners=3pt,fit=(one) (words) (mat) (w)] (input) {};
\end{scope}
\draw [->,thick,densely dashed,ugreen] ([yshift=-0.2em]demb3.east) to [out=0,in=180] ([yshift=-1cm]input.west);
\end{tikzpicture}
\ No newline at end of file
%%% outline
%-------------------------------------------------------------------------
\begin{tikzpicture}
%第一段----------------------------------------------
%原文-------------
\node [pos=0.4,left,xshift=-36em,yshift=7em,font=\small] (original0) {\quad 源语(中文)输入:};
\node [pos=0.4,left,xshift=-22em,yshift=7em,font=\small] (original1) {
\begin{tabular}[t]{l}
\parbox{14em}{''我''、''很''、''好''、''<eos>'' }
\end{tabular}
};
%译文1--------------mt1
\node[font=\small] (mt1) at ([xshift=0em,yshift=-1em]original0.south) {目标语(英文)输出:};
\node[font=\small] (ts1) at ([xshift=0em,yshift=-1em]original1.south) {
\begin{tabular}[t]{l}
\parbox{14em}{''I''、''am''、''fine''、''<eos>''}
\end{tabular}
};
\begin{pgfonlayer}{background}
{
\node[rectangle,draw=ublue, inner sep=0mm] [fit =(original0)(mt1)(ts1)(ts1)(original1)(original1)] {};
}
\end{pgfonlayer}
\end{tikzpicture}
\ No newline at end of file
%--------------------------------------------------------------------------------
\begin{tikzpicture}
%\newlength{\base}
\setlength{\base}{0.9cm}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=0.5\base,draw,inner sep=0pt,outer sep=0pt]
\tikzstyle{wordnode} = [font=\tiny]
% RNN translation model
\begin{scope}[local bounding box=RNNMT]
% RNN Encoder
\coordinate (eemb0) at (0,0);
\foreach \x [count=\y from 0] in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=west] (eemb\x) at ([xshift=0.4\base]eemb\y.east) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (backenc\x) at ([yshift=0.5\base]eemb\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (enc\x) at ([yshift=0.5\base]backenc\x.north) {};
\node[wordnode,left=0.4\base of enc1] (init) {$0$};
\node[wordnode,right=0.4\base of backenc10] (backinit) {$0$};
\node [rnnnode,fill=purple!30!white] at (enc10) {};
\node [rnnnode,fill=purple!30!white] at (backenc1) {};
\node[wordnode,below=0pt of eemb1] () {};
\node[wordnode,below=0pt of eemb2] () {知道};
\node[wordnode,below=0pt of eemb3] () {};
\node[wordnode,below=0pt of eemb4] () {北京站};
\node[wordnode,below=0pt of eemb5] () {};
\node[wordnode,below=0pt of eemb6] () {};
\node[wordnode,below=0pt of eemb7] () {怎么};
\node[wordnode,below=0pt of eemb8] () {};
\node[wordnode,below=0pt of eemb9] () {};
\node[wordnode,below=0pt of eemb10] () {$\langle$eos$\rangle$};
% RNN Decoder
\foreach \x in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=green!30!white,anchor=south] (demb\x) at ([yshift=1.5\base]enc\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,fill=blue!30!white,anchor=south] (dec\x) at ([yshift=0.5\base]demb\x.north) {};
\foreach \x in {1,2,...,10}
\node[rnnnode,minimum height=0.5\base,fill=red!30!white,anchor=south] (softmax\x) at ([yshift=0.5\base]dec\x.north) {};
% Decoder input words
\node[wordnode,below=0pt of demb1] (decwordin) {EOS};
\ExtractX{$(demb2.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Do};
\ExtractX{$(demb3.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
\ExtractX{$(demb4.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
\ExtractX{$(demb5.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {the};
\ExtractX{$(demb6.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {way};
\ExtractX{$(demb7.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {to};
\ExtractX{$(demb8.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Beijing};
\ExtractX{$(demb9.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Railway};
\ExtractX{$(demb10.south)$}
\ExtractY{$(decwordin.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Station};
% Decoder output words
\node[wordnode,above=0pt of softmax1] (decwordout) {Do};
\ExtractX{$(softmax2.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {you};
\ExtractX{$(softmax3.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {know};
\ExtractX{$(softmax4.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {the};
\ExtractX{$(softmax5.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {way};
\ExtractX{$(softmax6.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {to};
\ExtractX{$(softmax7.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Beijing};
\ExtractX{$(softmax8.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Railway};
\ExtractX{$(softmax9.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {Station};
\ExtractX{$(softmax10.north)$}
\ExtractY{$(decwordout.base)$}
\node[wordnode,anchor=base] () at (\XCoord,\YCoord) {EOS};
% Connections
\draw[-latex'] (init.east) to (enc1.west);
\draw[-latex'] (backinit.west) to (backenc10.east);
\foreach \x in {1,2,...,10}
\draw[-latex'] (eemb\x) to (backenc\x);
\foreach \x in {1,2,...,10}
\draw[-latex'] (eemb\x.north) to [out=15,in=-15] (enc\x.south);
\foreach \x in {1,2,...,10}
\draw[-latex'] (demb\x) to (dec\x);
\foreach \x in {1,2,...,10}
\draw[-latex'] (dec\x.north) to ([yshift=0.5\base]dec\x.north);
\foreach \x [count=\y from 2] in {1,2,...,9}
{
\draw[-latex'] (enc\x.east) to (enc\y.west);
\draw[-latex'] (dec\x.east) to (dec\y.west);
}
\def\y{0}
\foreach \x in {10,9,...,2}
{
\pgfmathtruncatemacro{\y}{\x - 1}
\draw[-latex'] (backenc\x.west) to (backenc\y.east);
}
\coordinate (bridge) at ([yshift=-1.2\base]demb2);
\draw[-latex'] (enc10.north) .. controls +(north:0.7\base) and +(east:1.5\base) .. (bridge) .. controls +(west:2.5\base) and +(west:0.6\base) .. (dec1.west);
\draw[-latex'] (backenc1) to [out=180,in=180] (dec1.west);
% Backward RNN
\begin{pgfonlayer}{background}
\node[draw=red,thick,densely dashed,inner sep=5pt] [fit = (backinit) (backenc1) (backenc10)] (backrnn) {};
\end{pgfonlayer}
\node[font=\scriptsize,anchor=south] (backrnnlabel) at ([xshift=-0.5\base,yshift=\base]backrnn.north east) {反向RNN};
\draw[->,dashed] (backrnnlabel.south) to ([xshift=-0.5\base]backrnn.north east);
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\definecolor{ublue}{rgb}{0.152,0.250,0.545}
\begin{tikzpicture}
\begin{axis}[
name=s1,
width=7cm, height=4cm,
xtick={-4,-3,-2,-1,0,1,2,3,4},
ytick={0,1,...,4},
xticklabel style={opacity=0},
yticklabel style={opacity=0},
xlabel={\textbf{$\textrm{W}_t$}},
ylabel={\textbf{L($\textrm{W}_t$)}},
axis line style={->},
xlabel style={xshift=2.2cm,yshift=1.2cm},
ylabel style={rotate=-90,xshift=1.5cm,yshift=1.6cm},
tick align=inside,
axis y line*=left,
axis x line*=bottom,
tick style={opacity=0},
xmin=-4,
xmax=4,
ymin=0,
ymax=4]
\addplot [dashed,ublue,thick] {x^2/4};
\addplot [quiver={u=1,v=x/2,scale arrows = 0.25},domain=-4:-0.3,->,samples=10,red!60,ultra thick] {x^2/4};
\addplot [draw=ublue,fill=red,mark=*] coordinates{(0,0)};
\end{axis}
\begin{axis}[
at={(s1.south)},
anchor=south,
xshift=6cm,
yshift=0cm,
width=7cm, height=4cm,
xtick={-4,-3,-2,-1,0,1,2,3,4},
ytick={0,1,...,4},
xticklabel style={opacity=0},
yticklabel style={opacity=0},
xlabel={\textbf{$\textrm{W}_t$}},
ylabel={\textbf{L($\textrm{W}_t$)}},
axis line style={->},
xlabel style={xshift=2.2cm,yshift=1.2cm},
ylabel style={rotate=-90,xshift=1.5cm,yshift=1.6cm},
tick align=inside,
axis y line*=left,
axis x line*=bottom,
tick style={opacity=0},
xmin=-4,
xmax=4,
ymin=0,
ymax=4]
\addplot [dashed,ublue,thick] {x^2/4};
\addplot [quiver={u=-x-(x/abs(x))*(1+x^2-4)^(1/2),v=-0.7},domain=-4:3.6,->,samples=2,red!60,ultra thick] {x^2/4};
\addplot [quiver={u=-x-(x/abs(x))*(1+x^2-4)^(1/2),v=-0.7},domain=-3.13:2.6,->,samples=2,red!60,ultra thick] {x^2/4};
\addplot [draw=ublue,fill=red,mark=*] coordinates{(0,0)};
\end{axis}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\begin{scope}
\tikzstyle{rnnnode} = [minimum height=1.1em,minimum width=2.1em,inner sep=2pt,rounded corners=1pt,draw,fill=red!20];
\node [rnnnode,anchor=west] (h1) at (0,0) {\tiny{$\textbf{h}_1$}};
\node [rnnnode,anchor=west] (h2) at ([xshift=1em]h1.east) {\tiny{$\textbf{h}_2$}};
\node [rnnnode,anchor=west] (h3) at ([xshift=1em]h2.east) {\tiny{$\textbf{h}_3$}};
\node [rnnnode,anchor=north,fill=green!20] (e1) at ([yshift=-1em]h1.south) {\tiny{$e_x()$}};
\node [rnnnode,anchor=west,fill=green!20] (e2) at ([xshift=1em]e1.east) {\tiny{$e_x()$}};
\node [rnnnode,anchor=west,fill=green!20] (e3) at ([xshift=1em]e2.east) {\tiny{$e_x()$}};
\node [anchor=north,inner sep=2pt] (w1) at ([yshift=-0.6em]e1.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w2) at ([yshift=-0.6em]e2.south) {\tiny{}};
\node [anchor=north,inner sep=2pt] (w3) at ([yshift=-0.6em]e3.south) {\tiny{<eos>}};
%\node [anchor=south] (dot1) at ([xshift=0.4em,yshift=-0.7em]h1.south) {\tiny{...}};
%\node [anchor=south] (dot2) at ([xshift=-0.4em,yshift=-0.7em]h3.south) {\tiny{...}};
\draw [->] (w1.north) -- ([yshift=-0.1em]e1.south);
\draw [->] (w2.north) -- ([yshift=-0.1em]e2.south);
\draw [->] (w3.north) -- ([yshift=-0.1em]e3.south);
\draw [->] ([yshift=0.1em]e1.north) -- ([yshift=-0.1em]h1.south);
\draw [->] ([yshift=0.1em]e2.north) -- ([yshift=-0.1em]h2.south);
\draw [->] ([yshift=0.1em]e3.north) -- ([yshift=-0.1em]h3.south);
\draw [->] ([xshift=0.2em,yshift=0.1em]e1.north) .. controls +(north:0.3) and +(south:0.4) .. ([xshift=-0.3em,yshift=-0.1em]h2.south);
\draw [->] ([xshift=-0.2em,yshift=0.1em]e3.north) .. controls +(north:0.3) and +(south:0.4) .. ([xshift=0.3em,yshift=-0.1em]h2.south);
\draw [->] ([xshift=0.4em,yshift=-0.4em]h1.south) -- ([xshift=0.3em,yshift=-0.1em]h1.south);
\draw [->] ([xshift=0.8em,yshift=-0.4em]h1.south) -- ([xshift=0.6em,yshift=-0.1em]h1.south);
\draw [->] ([xshift=-0.4em,yshift=-0.4em]h3.south) -- ([xshift=-0.3em,yshift=-0.1em]h3.south);
\draw [->] ([xshift=-0.8em,yshift=-0.4em]h3.south) -- ([xshift=-0.6em,yshift=-0.1em]h3.south);
\node [anchor=south] (encoder) at ([xshift=-0.2em]h1.north west) {\scriptsize{\textbf{编码器}}};
{
\node [rnnnode,anchor=west,fill=green!20] (t1) at ([xshift=3em]e3.east) {\tiny{$e_y()$}};
}
{
\node [rnnnode,anchor=west,fill=green!20] (t2) at ([xshift=1.5em]t1.east) {\tiny{$e_y()$}};
}
{
\node [rnnnode,anchor=west,fill=green!20] (t3) at ([xshift=1.5em]t2.east) {\tiny{$e_y()$}};
\node [rnnnode,anchor=west,fill=green!20] (t4) at ([xshift=1.5em]t3.east) {\tiny{$e_y()$}};
%\node [anchor=west,inner sep=2pt] (t5) at ([xshift=0.3em]t4.east) {\tiny{...}};
}
{
\node [rnnnode,anchor=south] (s1) at ([yshift=1em]t1.north) {\tiny{$\textbf{s}_1$}};
\node [rnnnode,anchor=south] (f1) at ([yshift=1em]s1.north) {\tiny{$\textbf{f}_1$}};
}
{
\node [rnnnode,anchor=south] (s2) at ([yshift=1em]t2.north) {\tiny{$\textbf{s}_2$}};
\node [rnnnode,anchor=south] (f2) at ([yshift=1em]s2.north) {\tiny{$\textbf{f}_2$}};
}
{
\node [rnnnode,anchor=south] (s3) at ([yshift=1em]t3.north) {\tiny{$\textbf{s}_3$}};
\node [rnnnode,anchor=south] (f3) at ([yshift=1em]s3.north) {\tiny{$\textbf{f}_3$}};
\node [rnnnode,anchor=south] (s4) at ([yshift=1em]t4.north) {\tiny{$\textbf{s}_4$}};
\node [rnnnode,anchor=south] (f4) at ([yshift=1em]s4.north) {\tiny{$\textbf{f}_4$}};
%\node [anchor=west,inner sep=2pt] (s5) at ([xshift=0.3em]s4.east) {\tiny{...}};
%\node [anchor=south] (dot3) at ([xshift=-0.4em,yshift=-0.7em]s3.south) {\tiny{...}};
\node [anchor=south] (dot4) at ([xshift=-0.4em,yshift=-0.7em]s4.south) {\tiny{...}};
}
{
\node [rnnnode,anchor=south,fill=blue!20] (o1) at ([yshift=1em]f1.north) {\tiny{softmax}};
\node [anchor=east] (decoder) at ([xshift=-0.3em,yshift=0.5em]o1.north west) {\scriptsize{\textbf{解码器}}};
}
{
\node [rnnnode,anchor=south,fill=blue!20] (o2) at ([yshift=1em]f2.north) {\tiny{softmax}};
}
{
\node [rnnnode,anchor=south,fill=blue!20] (o3) at ([yshift=1em]f3.north) {\tiny{softmax}};
\node [rnnnode,anchor=south,fill=blue!20] (o4) at ([yshift=1em]f4.north) {\tiny{softmax}};
%\node [anchor=west,inner sep=2pt] (o5) at ([xshift=0.3em]o4.east) {\tiny{...}};
}
{
\node [anchor=north,inner sep=2pt] (wt1) at ([yshift=-0.6em]t1.south) {\tiny{<eos>}};
}
{
\node [anchor=north,inner sep=2pt] (wt2) at ([yshift=-0.6em]t2.south) {\tiny{How}};
}
{
\node [anchor=north,inner sep=2pt] (wt3) at ([yshift=-0.8em]t3.south) {\tiny{are}};
\node [anchor=north,inner sep=2pt] (wt4) at ([yshift=-0.8em]t4.south) {\tiny{you}};
}
{
\node [anchor=center,inner sep=2pt] (wo1) at ([yshift=1.2em]o1.north) {\tiny{How}};
}
{
\node [anchor=south,inner sep=2pt] (wos1) at (wo1.north) {\tiny{\textbf{[step 1]}}};
}
{
\node [anchor=center,inner sep=2pt] (wo2) at ([yshift=1.2em]o2.north) {\tiny{are}};
}
{
\node [anchor=south,inner sep=2pt] (wos2) at (wo2.north) {\tiny{\textbf{[step 2]}}};
}
{
\node [anchor=center,inner sep=2pt] (wo3) at ([yshift=1.2em]o3.north) {\tiny{you}};
\node [anchor=south,inner sep=2pt] (wos3) at (wo3.north) {\tiny{\textbf{[step 3]}}};
\node [anchor=center,inner sep=2pt] (wo4) at ([yshift=1.2em]o4.north) {\tiny{<eos>}};
\node [anchor=south,inner sep=2pt] (wos4) at (wo4.north) {\tiny{\textbf{[step 4]}}};
}
{
\foreach \x in {1}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]f\x.south);
\draw [->] ([yshift=0.1em]f\x.north) -- ([yshift=-0.1em]o\x.south);
\draw [->] ([yshift=0.1em]o\x.north) -- ([yshift=0.8em]o\x.north) node [pos=0.5,right] {\tiny{top1}};
}
}
{
\foreach \x in {2}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]f\x.south);
\draw [->] ([yshift=0.1em]f\x.north) -- ([yshift=-0.1em]o\x.south);
\draw [->] ([yshift=0.1em]o\x.north) -- ([yshift=0.8em]o\x.north) node [pos=0.5,right] {\tiny{top1}};
\draw [->] ([xshift=0.2em,yshift=0.1em]t1.north) .. controls +(north:0.3) and +(south:0.3) .. ([xshift=-0.3em,yshift=-0.1em]s2.south);
}
}
{
\foreach \x in {3,4}{
\draw [->] ([yshift=-0.7em]t\x.south) -- ([yshift=-0.1em]t\x.south);
\draw [->] ([yshift=0.1em]t\x.north) -- ([yshift=-0.1em]s\x.south);
\draw [->] ([yshift=0.1em]s\x.north) -- ([yshift=-0.1em]f\x.south);
\draw [->] ([yshift=0.1em]f\x.north) -- ([yshift=-0.1em]o\x.south);
\draw [->] ([yshift=0.1em]o\x.north) -- ([yshift=0.8em]o\x.north) node [pos=0.5,right] {\tiny{top1}};
%\draw [->] ([xshift=0.4em,yshift=0.1em]t1.north) .. controls +(north:0.25) and +(south:0.3) .. ([xshift=-0.6em,yshift=-0.1em]s3.south);
%\draw [->] ([xshift=0.2em,yshift=0.1em]t2.north) .. controls +(north:0.2) and +(south:0.4) .. ([xshift=-0.3em,yshift=-0.1em]s3.south);
\draw [->] ([xshift=-0.6em,yshift=-0.5em]s3.south) .. controls +(north:0) and +(south:0.2) .. ([xshift=-0.3em,yshift=-0.1em]s3.south);
\draw [->] ([xshift=-1.5em,yshift=-0.5em]s3.south) .. controls +(north:0) and +(south:0.15) .. ([xshift=-0.6em,yshift=-0.1em]s3.south);
}
}
{
\draw [->,thick,dotted] (wo1.east) .. controls +(east:1.0) and +(west:1.0) ..(wt2.west);
}
{
\draw [->,thick,dotted] (wo2.east) .. controls +(east:1.3) and +(west:1.1) ..(wt3.west);
\draw [->,thick,dotted] (wo3.east) .. controls +(east:1.1) and +(west:0.9) ..(wt4.west);
}
{
\node [circle,draw,anchor=south,inner sep=3pt,fill=orange!20] (c1) at ([yshift=2em]h2.north) {\tiny{$\textbf{C}_1$}};
\node [anchor=south] (c1label) at (c1.north) {\tiny{\textbf{编码-解码注意力机制:上下文}}};
\draw [->] (h1.north) .. controls +(north:0.6) and +(250:0.9) .. (c1.250);
\draw [->] (h2.north) .. controls +(north:0.6) and +(270:0.9) .. (c1.270);
\draw [->] (h3.north) .. controls +(north:0.6) and +(290:0.9) .. (c1.290);
\draw [->] ([yshift=0.3em]s1.west) .. controls +(west:1) and +(east:1) .. (c1.-30);
\draw [->] (c1.0) .. controls +(east:1) and +(west:1) .. ([yshift=0em]f1.west);
}
{
\node [circle,draw,anchor=north,inner sep=3pt,fill=orange!20] (c2) at ([yshift=-2em]t1.south) {\tiny{$\textbf{C}_2$}};
\draw [->] ([xshift=-0.7em]c2.west) -- ([xshift=-0.1em]c2.west);
\draw [->] ([xshift=0.1em]c2.east) .. controls +(east:0.6) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]f2.west);
}
{
\node [circle,draw,anchor=north,inner sep=3pt,fill=orange!20] (c3) at ([yshift=-2em]t2.south) {\tiny{$\textbf{C}_3$}};
\draw [->] ([xshift=-0.7em]c3.west) -- ([xshift=-0.1em]c3.west);
\draw [->] ([xshift=0.1em]c3.east) .. controls +(east:0.6) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]f3.west);
}
{
\node [circle,draw,anchor=north,inner sep=3pt,fill=orange!20] (c4) at ([yshift=-2em]t3.south) {\tiny{$\textbf{C}_4$}};
\draw [->] ([xshift=-0.7em]c4.west) -- ([xshift=-0.1em]c4.west);
\draw [->] ([xshift=0.1em]c4.east) .. controls +(east:0.6) and +(west:0.8) ..([yshift=-0.3em,xshift=-0.1em]f4.west);
}
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%---------------------------------------------------------
\begin{tikzpicture}
%\setlength{\mystep}{1.6em}
%%% a simple encoder-decoder model
\begin{scope}
\foreach \x in {1,2,...,6}
\node[] (s\x) at (\x * 1.6em,0) {};
\node [] (ws1) at (s1) {\scriptsize{}};
\node [] (ws2) at (s2) {\scriptsize{}};
\node [] (ws3) at (s3) {\scriptsize{}};
\node [] (ws4) at (s4) {\scriptsize{很长}};
\node [] (ws5) at (s5) {\scriptsize{}};
\node [] (ws6) at (s6) {\scriptsize{句子}};
\foreach \x in {1,2,...,6}
\node[] (t\x) at (\x * 1.6em + 2.4in,0) {};
\node [] (wt1) at (t1) {\scriptsize{This}};
\node [] (wt2) at (t2) {\scriptsize{is}};
\node [] (wt3) at ([yshift=-1pt]t3) {\scriptsize{a}};
\node [] (wt4) at ([yshift=-0.1em]t4) {\scriptsize{very}};
\node [] (wt5) at (t5) {\scriptsize{long}};
\node [] (wt6) at ([xshift=1em]t6) {\scriptsize{sentence}};
\node [anchor=south west,fill=red!30,minimum width=1.6in,minimum height=1.5em] (encoder) at ([yshift=1.0em]ws1.north west) {\footnotesize{Encoder}};
\node [anchor=west,fill=blue!30,minimum width=1.9in,minimum height=1.5em] (decoder) at ([xshift=4.5em]encoder.east) {\footnotesize{Decoder}};
\node [anchor=west,fill=green!30,minimum height=1.5em] (representation) at ([xshift=1em]encoder.east) {\footnotesize{表示}};
\draw [->,thick] ([xshift=1pt]encoder.east)--([xshift=-1pt]representation.west);
\draw [->,thick] ([xshift=1pt]representation.east)--([xshift=-1pt]decoder.west);
\foreach \x in {1,2,...,6}
\draw[->] ([yshift=0.1em]s\x.north) -- ([yshift=1.2em]s\x.north);
\foreach \x in {1,2,...,5}
\draw[<-] ([yshift=0.1em]t\x.north) -- ([yshift=1.2em]t\x.north);
\draw[<-] ([yshift=0.1em,xshift=1em]t6.north) -- ([yshift=1.2em,xshift=1em]t6.north);
\node [anchor=north] (cap) at ([xshift=2em,yshift=-2.5em]encoder.south east) {\small{(a) 简单的编码器-解码器框架}};
\end{scope}
%%% a encoder-decoder model with attention
\begin{scope}[yshift=-1.7in]
\foreach \x in {1,2,...,6}
\node[] (s\x) at (\x * 1.6em,0) {};
\node [] (ws1) at (s1) {\scriptsize{}};
\node [] (ws2) at (s2) {\scriptsize{}};
\node [] (ws3) at (s3) {\scriptsize{}};
\node [] (ws4) at (s4) {\scriptsize{很长}};
\node [] (ws5) at (s5) {\scriptsize{}};
\node [] (ws6) at (s6) {\scriptsize{句子}};
\foreach \x in {1,2,...,6}
\node[] (t\x) at (\x * 1.6em + 2.4in,0) {};
\node [] (wt1) at (t1) {\scriptsize{This}};
\node [] (wt2) at (t2) {\scriptsize{is}};
\node [] (wt3) at ([yshift=-1pt]t3) {\scriptsize{a}};
\node [] (wt4) at ([yshift=-0.1em]t4) {\scriptsize{very}};
\node [] (wt5) at (t5) {\scriptsize{long}};
\node [] (wt6) at ([xshift=1em]t6) {\scriptsize{sentence}};
\node [anchor=south west,fill=red!30,minimum width=1.6in,minimum height=1.5em] (encoder) at ([yshift=1.0em]ws1.north west) {\footnotesize{Encoder}};
\node [anchor=west,fill=blue!30,minimum width=1.9in,minimum height=1.5em] (decoder) at ([xshift=4.5em]encoder.east) {\footnotesize{Decoder}};
\foreach \x in {1,2,...,6}
\draw[->] ([yshift=0.1em]s\x.north) -- ([yshift=1.2em]s\x.north);
\foreach \x in {1,2,...,5}
\draw[<-] ([yshift=0.1em]t\x.north) -- ([yshift=1.2em]t\x.north);
\draw[<-] ([yshift=0.1em,xshift=1em]t6.north) -- ([yshift=1.2em,xshift=1em]t6.north);
\draw [->] ([yshift=3em]s6.north) -- ([yshift=4em]s6.north) -- ([yshift=4em]t1.north) node [pos=0.5,fill=green!30,inner sep=2pt] (c1) {\scriptsize{表示$\textbf{C}_1$}} -- ([yshift=3em]t1.north) ;
\draw [->] ([yshift=3em]s5.north) -- ([yshift=5.3em]s5.north) -- ([yshift=5.3em]t2.north) node [pos=0.5,fill=green!30,inner sep=2pt] (c2) {\scriptsize{表示$\textbf{C}_2$}} -- ([yshift=3em]t2.north) ;
\draw [->] ([yshift=3.5em]s3.north) -- ([yshift=6.6em]s3.north) -- ([yshift=6.6em]t4.north) node [pos=0.5,fill=green!30,inner sep=2pt] (c3) {\scriptsize{表示$\textbf{C}_i$}} -- ([yshift=3.5em]t4.north) ;
\node [anchor=north] (smore) at ([yshift=3.5em]s3.north) {...};
\node [anchor=north] (tmore) at ([yshift=3.5em]t4.north) {...};
\node [anchor=north] (cap) at ([xshift=2em,yshift=-2.5em]encoder.south east) {\small{(b) 引入注意力机制的编码器-解码器框架}};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
%第一段----------------------------------------------
%原文-------------
\node [pos=0.4,left,xshift=-36em,yshift=7.3em,font=\small] (original0) {原文:};
\node [pos=0.4,left,xshift=-2em,yshift=3.3em,font=\small] (original1) {
\begin{tabular}[t]{l}
\parbox{36em}{During Soviet times, if a city’s population topped one million, it would become eligible for its own metro. Planners wanted to brighten the lives of everyday Soviet citizens, and saw the metros, with their tens of thousands of daily passengers, as a singular opportunity to do so. In 1977, Tashkent, the capital of Uzbekistan, became the seventh Soviet city to have a metro built. Grand themes celebrating the history of Uzbekistan and the Soviet Union were brought to life, as art was commissioned and designers set to work. The stations reflected different themes, some with domed ceilings and painted tiles reminiscent of Uzbekistan’s Silk Road mosques, while others ...}
\end{tabular}
};
%译文1--------------mt1
\node[font=\small] (mt1) at ([xshift=0em,yshift=-9.1em]original0.south) {译文1:};
\node[font=\small] (ts1) at ([xshift=0em,yshift=-4em]original1.south) {
\begin{tabular}[t]{l}
\parbox{36em}{在苏联时代,如果一个城市的人口突破一百万,这将成为合资格为自己的地铁。规划者想去照亮每天的苏联公民的生命,看到地铁,与他们的数十每天数千乘客,作为一个独特的机会来这样做。1977年,塔什干,乌兹别克斯坦的首都,成了苏联第七城市建有地铁。宏大主题,庆祝乌兹别克斯坦和苏联的历史被带到生活,因为艺术是委托和设计师开始工作。车站反映了不同的主题,有的圆顶天花板和绘瓷砖让人想起乌兹别克斯坦是丝绸之路的清真寺,而另一些则装饰着...}
\end{tabular}
};
%译文2---------------mt2
\node[font=\small] (mt2) at ([xshift=0em,yshift=-6.7em]mt1.south) {译文2:};
\node[font=\small] (mt3) at ([xshift=0em,yshift=-4em]ts1.south) {
\begin{tabular}[t]{l}
\parbox{36em}{在苏联时期,如果一个城市的人口超过一百万,它就有资格拥有自己的地铁。 规划者想要照亮日常苏联公民的生活,并把拥有数万名每日乘客的地铁看作是这样做的一个绝佳机会。 1977年,乌兹别克斯坦首都塔什干成为苏联第七个修建地铁的城市。 随着艺术的委托和设计师们的工作,乌兹别克斯坦和苏联历史的宏伟主题被赋予了生命力。 这些电台反映了不同的主题,有的有穹顶和彩砖,让人想起乌兹别克斯坦的丝绸之路清真寺,有的则用...}
\end{tabular}
};
%{
%\draw[dotted,thick,ublue] ([xshift=10.3em,yshift=0.3em]mt8.south west)--%([xshift=-5.2em,yshift=-0.3em]ht8.north);
%}
\begin{pgfonlayer}{background}
{
\node[rectangle,draw=ublue, inner sep=0mm] [fit =(original0)(mt1)(mt3)(mt1)(ts1)(mt2)(original1)] {};
}
\end{pgfonlayer}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\footnotesize{
\begin{axis}[
width=.60\textwidth,
height=.40\textwidth,
legend style={at={(0.60,0.08)}, anchor=south west},
xlabel={\footnotesize{num update (10k)}},
ylabel={\footnotesize{Learn rate (\scriptsize{$10^{-3}$)}}},
ylabel style={yshift=-1em},xlabel style={yshift=0.0em},
yticklabel style={/pgf/number format/precision=2,/pgf/number format/fixed zerofill},
ymin=0,ymax=0.9, ytick={0.2, 0.4, 0.6, 0.8},
xmin=0,xmax=12,xtick={2,4,6,8,10},
legend style={yshift=-6pt, legend plot pos=right,font=\scriptsize,cells={anchor=west}}
]
\addplot[orange,line width=1.25pt] coordinates {(0,0) (4,0.7) (5,0.63) (6,0.57) (7,0.525) (8,0.49) (9,0.465) (10,0.44) (11,0.42) (12,0.4)};
\end{axis}
}
\end{tikzpicture}
% not compatible with [scale=?]
%----------------------------------------------------
\begin{tikzpicture}
\begin{scope}[local bounding box=WMT]
\draw[->,thick] (0.4,0) to (9.5,0);
\draw[->,thick] (0.4,-0) to (0.4,3.5);
% 2015
\node[minimum width=0.5cm,thick,minimum height=7*0.2cm,draw,fill=blue!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (smt2015) at (1.5*0.7,0.5pt) {};
\node[minimum width=0.5cm,thick,minimum height=2*0.2cm,draw,fill=red!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (nmt2015) at (smt2015.south east) {};
\node[font=\normalsize,anchor=north] () at (smt2015.south east) {2015};
% 2016
\node[minimum width=0.5cm,thick,minimum height=3*0.2cm,draw,fill=blue!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (smt2016) at ($(nmt2015.south east)+(0.7,0)$) {};
\node[minimum width=0.5cm,thick,minimum height=8*0.2cm,draw,fill=red!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (nmt2016) at (smt2016.south east) {};
\node[font=\normalsize,anchor=north] () at (smt2016.south east) {2016};
% 2017
\node[minimum width=0.5cm,thick,minimum height=3*0.2cm,draw,fill=blue!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (smt2017) at ($(nmt2016.south east)+(0.7,0)$) {};
\node[minimum width=0.5cm,thick,minimum height=13*0.2cm,draw,fill=red!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (nmt2017) at (smt2017.south east) {};
\node[font=\normalsize,anchor=north] () at (smt2017.south east) {2017};
% 2018
\node[minimum width=0.5cm,thick,minimum height=0cm,draw,fill=blue!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (smt2018) at ($(nmt2017.south east)+(0.7,0)$) {};
\node[minimum width=0.5cm,thick,minimum height=14*0.2cm,draw,fill=red!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (nmt2018) at (smt2018.south east) {};
\node[font=\normalsize,anchor=north] () at (smt2018.south east) {2018};
% 2019
\node[minimum width=0.5cm,thick,minimum height=0cm,draw,fill=blue!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (smt2019) at ($(nmt2018.south east)+(0.7,0)$) {};
\node[minimum width=0.5cm,thick,minimum height=15*0.2cm,draw,fill=red!30!white,inner sep=0pt,outer sep=0pt,anchor=south west] (nmt2019) at (smt2019.south east) {};
\node[font=\normalsize,anchor=north] () at (smt2019.south east) {2019};
\end{scope}
% legend
\ExtractX{$(nmt2015.west)$}
\ExtractY{$(WMT.north)$}
\node[minimum width=0.5cm,rectangle,draw,fill=blue!30!white,anchor=north west,label={[label distance=1pt,font=\scriptsize]0:统计机器翻译}] () at (\XCoord,\YCoord) {};
\ExtractX{$(nmt2017.west)$}
\node[minimum width=0.5cm,rectangle,draw,fill=red!30!white,anchor=north west,label={[label distance=1pt,font=\scriptsize]0:神经机器翻译}] () at (\XCoord,\YCoord) {};
\node[font=\normalsize,rotate=90] () at ([xshift=-1em]WMT.west) {数量};
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\setlength{\base}{1.0em}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=1\base,draw,inner sep=0pt,outer sep=0pt,fill=blue!30!white]
\tikzstyle{wordnode} = [font=\footnotesize,align=center]
\begin{scope}
% rnn[layer][step]
\coordinate (rnn00) at (0,0);
\foreach \i [count=\j from 0] in {1,2,3}
\node[wordnode,font=\tiny] (rnn\i0) at ([yshift=2\base]rnn\j0) {$0$};
\foreach \i [count=\j from 0] in {1,2,...,4}
\coordinate (rnn0\i) at ([xshift=2\base]rnn0\j);
% step 1
{
\node[rnnnode] (rnn11) at ([xshift=2\base]rnn10) {};
\draw[-latex'] ([yshift=0.5\base]rnn01) to (rnn11);
\draw[-latex'] ([xshift=0.5\base]rnn10) to (rnn11);
}
% step 2
{
\node[rnnnode] (rnn12) at ([xshift=2\base]rnn11) {};
\node[rnnnode] (rnn21) at ([yshift=2\base]rnn11) {};
\draw[-latex'] ([yshift=0.5\base]rnn02) to (rnn12);
\draw[-latex'] ([xshift=0.5\base]rnn20) to (rnn21);
\draw[-latex'] (rnn11) to (rnn12);
\draw[-latex'] (rnn11) to (rnn21);
}
% step 3
{\footnotesize
\node[rnnnode] (rnn13) at ([xshift=2\base]rnn12) {};
\node[rnnnode] (rnn31) at ([yshift=2\base]rnn21) {};
\node[rnnnode] (rnn22) at ([xshift=2\base]rnn21) {};
\node[wordnode,anchor=south,font=\scriptsize] (o1) at ([yshift=\base]rnn31.north) {};
\draw[-latex'] ([yshift=0.5\base]rnn03) to (rnn13);
\draw[-latex'] ([xshift=0.5\base]rnn30) to (rnn31);
\draw[-latex'] (rnn12) to (rnn13);
\draw[-latex'] (rnn21) to (rnn31);
\draw[-latex'] (rnn12) to (rnn22);
\draw[-latex'] (rnn21) to (rnn22);
\draw[-latex'] (rnn31) to (o1);
}
% step 4
{\footnotesize
\node[rnnnode] (rnn14) at ([xshift=2\base]rnn13) {};
\node[rnnnode] (rnn23) at ([xshift=2\base]rnn22) {};
\node[rnnnode] (rnn32) at ([xshift=2\base]rnn31) {};
\node[wordnode,anchor=south,font=\scriptsize] (o2) at ([yshift=\base]rnn32.north) {不错};
\draw[-latex'] ([yshift=0.5\base]rnn04) to (rnn14);
\draw[-latex'] (rnn13) to (rnn14);
\draw[-latex'] (rnn13) to (rnn23);
\draw[-latex'] (rnn22) to (rnn23);
\draw[-latex'] (rnn22) to (rnn32);
\draw[-latex'] (rnn31) to (rnn32);
\draw[-latex'] (rnn32) to (o2);
}
% step 5
{\footnotesize
\node[rnnnode] (rnn24) at ([xshift=2\base]rnn23) {};
\node[rnnnode] (rnn33) at ([xshift=2\base]rnn32) {};
\node[wordnode,anchor=south,font=\scriptsize] (o3) at ([yshift=\base]rnn33.north) {};
\draw[-latex'] (rnn14) to (rnn24);
\draw[-latex'] (rnn23) to (rnn24);
\draw[-latex'] (rnn23) to (rnn33);
\draw[-latex'] (rnn32) to (rnn33);
\draw[-latex'] (rnn33) to (o3);
}
% step 6
{
\node[rnnnode] (rnn34) at ([xshift=2\base]rnn33) {};
\node[wordnode,anchor=south,font=\scriptsize] (o4) at ([yshift=\base]rnn34.north) {EOS};
\draw[-latex'] (rnn33) to (rnn34);
\draw[-latex'] (rnn24) to (rnn34);
\draw[-latex'] (rnn34) to (o4);
}
{
% frontier
\node[rnnnode,fill=purple] () at (rnn34) {};
\node[draw=red,thick,inner sep=7pt,rounded corners=0.3em,rotate fit=-45,label={[font=\scriptsize,align=center]90:正在运算的\\{\color{red} 循环单元}},fit=(rnn34)] () {};
}
% labels
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn10.west) to node[wordnode,align=right,left,font=\scriptsize] {空闲的\\设备1} ([yshift=\base]rnn10.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn20.west) to node[wordnode,align=right,left,font=\scriptsize] {空闲的\\设备2} ([yshift=\base]rnn20.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn30.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备3} ([yshift=\base]rnn30.west);
}
\foreach \i in {1,2,3}
\node[wordnode,font=\tiny,anchor=south west] () at ([yshift=-0.25em]rnn\i0.north west) {\i};
\node[wordnode,font=\scriptsize] () at (rnn01) {};
\node[wordnode,font=\scriptsize] () at (rnn02) {};
\node[wordnode,font=\scriptsize] () at (rnn03) {不错};
\node[wordnode,font=\scriptsize] () at ([xshift=0.25em]rnn04) {};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9em}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=1\base,draw,inner sep=0pt,outer sep=0pt,fill=blue!30!white]
\tikzstyle{wordnode} = [font=\footnotesize,align=center]
\begin{scope}
% rnn[layer][step]
\coordinate (rnn00) at (0,0);
\foreach \i [count=\j from 0] in {1,2,3}
\node[wordnode] (rnn\i0) at ([yshift=2\base]rnn\j0) {$0$};
\foreach \i [count=\j from 0] in {1,2,...,4}
\coordinate (rnn0\i) at ([xshift=2\base]rnn0\j);
% step 1
{
\node[rnnnode] (rnn11) at ([xshift=2\base]rnn10) {};
\draw[-latex'] ([yshift=0.5\base]rnn01) to (rnn11);
\draw[-latex'] ([xshift=0.5\base]rnn10) to (rnn11);
}
{
% frontier
\node[rnnnode,fill=purple] () at (rnn11) {};
\node[draw=red,thick,inner sep=5pt,rounded corners=0.3em,rotate fit=-45,label={[font=\scriptsize,align=center]90:正在运算的\\{\color{red} 循环单元}},fit=(rnn11)] () {};
}
% labels
%\alt<1-4>
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn10.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备1} ([yshift=\base]rnn10.west);
}
%\alt<2-5>
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn20.west) to node[wordnode,align=right,left,font=\scriptsize] {空闲的\\设备2} ([yshift=\base]rnn20.west);
}
%\alt<3-6>
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn30.west) to node[wordnode,align=right,left,font=\scriptsize] {空闲的\\设备3} ([yshift=\base]rnn30.west);
}
\foreach \i in {1,2,3}
\node[wordnode,font=\tiny,anchor=south west] () at ([yshift=-0.25em]rnn\i0.north west) {\i};
\node[wordnode,font=\scriptsize] () at (rnn01) {};
\node[wordnode,font=\scriptsize] () at (rnn02) {};
\node[wordnode,font=\scriptsize] () at (rnn03) {不错};
\node[wordnode,font=\scriptsize] () at ([xshift=0.25em]rnn04) {};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9em}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=1\base,draw,inner sep=0pt,outer sep=0pt,fill=blue!30!white]
\tikzstyle{wordnode} = [font=\footnotesize,align=center]
\begin{scope}
% rnn[layer][step]
\coordinate (rnn00) at (0,0);
\foreach \i [count=\j from 0] in {1,2,3}
\node[wordnode] (rnn\i0) at ([yshift=2\base]rnn\j0) {$0$};
\foreach \i [count=\j from 0] in {1,2,...,4}
\coordinate (rnn0\i) at ([xshift=2\base]rnn0\j);
% step 1
{
\node[rnnnode] (rnn11) at ([xshift=2\base]rnn10) {};
\draw[-latex'] ([yshift=0.5\base]rnn01) to (rnn11);
\draw[-latex'] ([xshift=0.5\base]rnn10) to (rnn11);
}
% step 2
{
\node[rnnnode] (rnn12) at ([xshift=2\base]rnn11) {};
\node[rnnnode] (rnn21) at ([yshift=2\base]rnn11) {};
\draw[-latex'] ([yshift=0.5\base]rnn02) to (rnn12);
\draw[-latex'] ([xshift=0.5\base]rnn20) to (rnn21);
\draw[-latex'] (rnn11) to (rnn12);
\draw[-latex'] (rnn11) to (rnn21);
}
{
% frontier
\node[rnnnode,fill=purple] () at (rnn12) {};
\node[rnnnode,fill=purple] () at (rnn21) {};
\node[draw=red,thick,inner sep=5pt,rounded corners=0.3em,rotate fit=-45,label={[font=\scriptsize,align=center]90:正在运算的\\{\color{red} 循环单元}},fit=(rnn12) (rnn21)] () {};
}
% labels
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn10.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备1} ([yshift=\base]rnn10.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn20.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备2} ([yshift=\base]rnn20.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn30.west) to node[wordnode,align=right,left,font=\scriptsize] {空闲的\\设备3} ([yshift=\base]rnn30.west);
}
\foreach \i in {1,2,3}
\node[wordnode,font=\tiny,anchor=south west] () at ([yshift=-0.25em]rnn\i0.north west) {\i};
\node[wordnode,font=\scriptsize] () at (rnn01) {};
\node[wordnode,font=\scriptsize] () at (rnn02) {};
\node[wordnode,font=\scriptsize] () at (rnn03) {不错};
\node[wordnode,font=\scriptsize] () at ([xshift=0.25em]rnn04) {};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9em}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=1\base,draw,inner sep=0pt,outer sep=0pt,fill=blue!30!white]
\tikzstyle{wordnode} = [font=\footnotesize,align=center]
\begin{scope}
% rnn[layer][step]
\coordinate (rnn00) at (0,0);
\foreach \i [count=\j from 0] in {1,2,3}
\node[wordnode] (rnn\i0) at ([yshift=2\base]rnn\j0) {$0$};
\foreach \i [count=\j from 0] in {1,2,...,4}
\coordinate (rnn0\i) at ([xshift=2\base]rnn0\j);
% step 1
{
\node[rnnnode] (rnn11) at ([xshift=2\base]rnn10) {};
\draw[-latex'] ([yshift=0.5\base]rnn01) to (rnn11);
\draw[-latex'] ([xshift=0.5\base]rnn10) to (rnn11);
}
% step 2
{
\node[rnnnode] (rnn12) at ([xshift=2\base]rnn11) {};
\node[rnnnode] (rnn21) at ([yshift=2\base]rnn11) {};
\draw[-latex'] ([yshift=0.5\base]rnn02) to (rnn12);
\draw[-latex'] ([xshift=0.5\base]rnn20) to (rnn21);
\draw[-latex'] (rnn11) to (rnn12);
\draw[-latex'] (rnn11) to (rnn21);
}
% step 3
{
\node[rnnnode] (rnn13) at ([xshift=2\base]rnn12) {};
\node[rnnnode] (rnn31) at ([yshift=2\base]rnn21) {};
\node[rnnnode] (rnn22) at ([xshift=2\base]rnn21) {};
\node[wordnode,anchor=south] (o1) at ([yshift=\base]rnn31.north) {};
\draw[-latex'] ([yshift=0.5\base]rnn03) to (rnn13);
\draw[-latex'] ([xshift=0.5\base]rnn30) to (rnn31);
\draw[-latex'] (rnn12) to (rnn13);
\draw[-latex'] (rnn21) to (rnn31);
\draw[-latex'] (rnn12) to (rnn22);
\draw[-latex'] (rnn21) to (rnn22);
\draw[-latex'] (rnn31) to (o1);
}
{
% frontier
\node[rnnnode,fill=purple] () at (rnn13) {};
\node[rnnnode,fill=purple] () at (rnn31) {};
\node[rnnnode,fill=purple] () at (rnn22) {};
\node[draw=red,thick,inner sep=5pt,rounded corners=0.3em,rotate fit=-45,label={[font=\scriptsize,align=center]90:正在运算的\\{\color{red} 循环单元}},fit=(rnn13) (rnn31) (rnn22)] () {};
}
% labels
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn10.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备1} ([yshift=\base]rnn10.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn20.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备2} ([yshift=\base]rnn20.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn30.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备3} ([yshift=\base]rnn30.west);
}
\foreach \i in {1,2,3}
\node[wordnode,font=\tiny,anchor=south west] () at ([yshift=-0.25em]rnn\i0.north west) {\i};
\node[wordnode,font=\scriptsize] () at (rnn01) {};
\node[wordnode,font=\scriptsize] () at (rnn02) {};
\node[wordnode,font=\scriptsize] () at (rnn03) {不错};
\node[wordnode,font=\scriptsize] () at ([xshift=0.25em]rnn04) {};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9em}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=1\base,draw,inner sep=0pt,outer sep=0pt,fill=blue!30!white]
\tikzstyle{wordnode} = [font=\footnotesize,align=center]
\begin{scope}
% rnn[layer][step]
\coordinate (rnn00) at (0,0);
\foreach \i [count=\j from 0] in {1,2,3}
\node[wordnode] (rnn\i0) at ([yshift=2\base]rnn\j0) {$0$};
\foreach \i [count=\j from 0] in {1,2,...,4}
\coordinate (rnn0\i) at ([xshift=2\base]rnn0\j);
% step 1
{
\node[rnnnode] (rnn11) at ([xshift=2\base]rnn10) {};
\draw[-latex'] ([yshift=0.5\base]rnn01) to (rnn11);
\draw[-latex'] ([xshift=0.5\base]rnn10) to (rnn11);
}
% step 2
{
\node[rnnnode] (rnn12) at ([xshift=2\base]rnn11) {};
\node[rnnnode] (rnn21) at ([yshift=2\base]rnn11) {};
\draw[-latex'] ([yshift=0.5\base]rnn02) to (rnn12);
\draw[-latex'] ([xshift=0.5\base]rnn20) to (rnn21);
\draw[-latex'] (rnn11) to (rnn12);
\draw[-latex'] (rnn11) to (rnn21);
}
% step 3
{
\node[rnnnode] (rnn13) at ([xshift=2\base]rnn12) {};
\node[rnnnode] (rnn31) at ([yshift=2\base]rnn21) {};
\node[rnnnode] (rnn22) at ([xshift=2\base]rnn21) {};
\node[wordnode,anchor=south] (o1) at ([yshift=\base]rnn31.north) {};
\draw[-latex'] ([yshift=0.5\base]rnn03) to (rnn13);
\draw[-latex'] ([xshift=0.5\base]rnn30) to (rnn31);
\draw[-latex'] (rnn12) to (rnn13);
\draw[-latex'] (rnn21) to (rnn31);
\draw[-latex'] (rnn12) to (rnn22);
\draw[-latex'] (rnn21) to (rnn22);
\draw[-latex'] (rnn31) to (o1);
}
% step 4
{
\node[rnnnode] (rnn14) at ([xshift=2\base]rnn13) {};
\node[rnnnode] (rnn23) at ([xshift=2\base]rnn22) {};
\node[rnnnode] (rnn32) at ([xshift=2\base]rnn31) {};
\node[wordnode,anchor=south] (o2) at ([yshift=\base]rnn32.north) {不错};
\draw[-latex'] ([yshift=0.5\base]rnn04) to (rnn14);
\draw[-latex'] (rnn13) to (rnn14);
\draw[-latex'] (rnn13) to (rnn23);
\draw[-latex'] (rnn22) to (rnn23);
\draw[-latex'] (rnn22) to (rnn32);
\draw[-latex'] (rnn31) to (rnn32);
\draw[-latex'] (rnn32) to (o2);
}
{
% frontier
\node[rnnnode,fill=purple] () at (rnn14) {};
\node[rnnnode,fill=purple] () at (rnn23) {};
\node[rnnnode,fill=purple] () at (rnn32) {};
\node[draw=red,thick,inner sep=5pt,rounded corners=0.3em,rotate fit=-45,label={[font=\scriptsize,align=center]90:正在运算的\\{\color{red} 循环单元}},fit=(rnn14) (rnn23) (rnn32)] () {};
}
% labels
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn10.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备1} ([yshift=\base]rnn10.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn20.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备2} ([yshift=\base]rnn20.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn30.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备3} ([yshift=\base]rnn30.west);
}
\foreach \i in {1,2,3}
\node[wordnode,font=\tiny,anchor=south west] () at ([yshift=-0.25em]rnn\i0.north west) {\i};
\node[wordnode,font=\scriptsize] () at (rnn01) {};
\node[wordnode,font=\scriptsize] () at (rnn02) {};
\node[wordnode,font=\scriptsize] () at (rnn03) {不错};
\node[wordnode,font=\scriptsize] () at ([xshift=0.25em]rnn04) {};
\node[wordnode,font=\scriptsize] () at ([xshift=0.25em]rnn04) {};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9em}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=1\base,draw,inner sep=0pt,outer sep=0pt,fill=blue!30!white]
\tikzstyle{wordnode} = [font=\footnotesize,align=center]
\begin{scope}
% rnn[layer][step]
\coordinate (rnn00) at (0,0);
\foreach \i [count=\j from 0] in {1,2,3}
\node[wordnode] (rnn\i0) at ([yshift=2\base]rnn\j0) {$0$};
\foreach \i [count=\j from 0] in {1,2,...,4}
\coordinate (rnn0\i) at ([xshift=2\base]rnn0\j);
% step 1
{
\node[rnnnode] (rnn11) at ([xshift=2\base]rnn10) {};
\draw[-latex'] ([yshift=0.5\base]rnn01) to (rnn11);
\draw[-latex'] ([xshift=0.5\base]rnn10) to (rnn11);
}
% step 2
{
\node[rnnnode] (rnn12) at ([xshift=2\base]rnn11) {};
\node[rnnnode] (rnn21) at ([yshift=2\base]rnn11) {};
\draw[-latex'] ([yshift=0.5\base]rnn02) to (rnn12);
\draw[-latex'] ([xshift=0.5\base]rnn20) to (rnn21);
\draw[-latex'] (rnn11) to (rnn12);
\draw[-latex'] (rnn11) to (rnn21);
}
% step 3
{
\node[rnnnode] (rnn13) at ([xshift=2\base]rnn12) {};
\node[rnnnode] (rnn31) at ([yshift=2\base]rnn21) {};
\node[rnnnode] (rnn22) at ([xshift=2\base]rnn21) {};
\node[wordnode,anchor=south] (o1) at ([yshift=\base]rnn31.north) {};
\draw[-latex'] ([yshift=0.5\base]rnn03) to (rnn13);
\draw[-latex'] ([xshift=0.5\base]rnn30) to (rnn31);
\draw[-latex'] (rnn12) to (rnn13);
\draw[-latex'] (rnn21) to (rnn31);
\draw[-latex'] (rnn12) to (rnn22);
\draw[-latex'] (rnn21) to (rnn22);
\draw[-latex'] (rnn31) to (o1);
}
% step 4
{
\node[rnnnode] (rnn14) at ([xshift=2\base]rnn13) {};
\node[rnnnode] (rnn23) at ([xshift=2\base]rnn22) {};
\node[rnnnode] (rnn32) at ([xshift=2\base]rnn31) {};
\node[wordnode,anchor=south] (o2) at ([yshift=\base]rnn32.north) {不错};
\draw[-latex'] ([yshift=0.5\base]rnn04) to (rnn14);
\draw[-latex'] (rnn13) to (rnn14);
\draw[-latex'] (rnn13) to (rnn23);
\draw[-latex'] (rnn22) to (rnn23);
\draw[-latex'] (rnn22) to (rnn32);
\draw[-latex'] (rnn31) to (rnn32);
\draw[-latex'] (rnn32) to (o2);
}
% step 5
{
\node[rnnnode] (rnn24) at ([xshift=2\base]rnn23) {};
\node[rnnnode] (rnn33) at ([xshift=2\base]rnn32) {};
\node[wordnode,anchor=south] (o3) at ([yshift=\base]rnn33.north) {};
\draw[-latex'] (rnn14) to (rnn24);
\draw[-latex'] (rnn23) to (rnn24);
\draw[-latex'] (rnn23) to (rnn33);
\draw[-latex'] (rnn32) to (rnn33);
\draw[-latex'] (rnn33) to (o3);
}
{
% frontier
\node[rnnnode,fill=purple] () at (rnn24) {};
\node[rnnnode,fill=purple] () at (rnn33) {};
\node[draw=red,thick,inner sep=5pt,rounded corners=0.3em,rotate fit=-45,label={[font=\scriptsize,align=center]90:正在运算的\\{\color{red} 循环单元}},fit=(rnn24) (rnn33)] () {};
}
% labels
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn10.west) to node[wordnode,align=right,left,font=\scriptsize] {空闲的\\设备1} ([yshift=\base]rnn10.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn20.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备2} ([yshift=\base]rnn20.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn30.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备3} ([yshift=\base]rnn30.west);
}
\foreach \i in {1,2,3}
\node[wordnode,font=\tiny,anchor=south west] () at ([yshift=-0.25em]rnn\i0.north west) {\i};
\node[wordnode,font=\scriptsize] () at (rnn01) {};
\node[wordnode,font=\scriptsize] () at (rnn02) {};
\node[wordnode,font=\scriptsize] () at (rnn03) {不错};
\node[wordnode,font=\scriptsize] () at ([xshift=0.25em]rnn04) {};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
%-----------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.9em}
\tikzstyle{rnnnode} = [rounded corners=1pt,minimum size=1\base,draw,inner sep=0pt,outer sep=0pt,fill=blue!30!white]
\tikzstyle{wordnode} = [font=\footnotesize,align=center]
\begin{scope}
% rnn[layer][step]
\coordinate (rnn00) at (0,0);
\foreach \i [count=\j from 0] in {1,2,3}
\node[wordnode] (rnn\i0) at ([yshift=2\base]rnn\j0) {$0$};
\foreach \i [count=\j from 0] in {1,2,...,4}
\coordinate (rnn0\i) at ([xshift=2\base]rnn0\j);
% step 1
{
\node[rnnnode] (rnn11) at ([xshift=2\base]rnn10) {};
\draw[-latex'] ([yshift=0.5\base]rnn01) to (rnn11);
\draw[-latex'] ([xshift=0.5\base]rnn10) to (rnn11);
}
% step 2
{
\node[rnnnode] (rnn12) at ([xshift=2\base]rnn11) {};
\node[rnnnode] (rnn21) at ([yshift=2\base]rnn11) {};
\draw[-latex'] ([yshift=0.5\base]rnn02) to (rnn12);
\draw[-latex'] ([xshift=0.5\base]rnn20) to (rnn21);
\draw[-latex'] (rnn11) to (rnn12);
\draw[-latex'] (rnn11) to (rnn21);
}
% step 3
{
\node[rnnnode] (rnn13) at ([xshift=2\base]rnn12) {};
\node[rnnnode] (rnn31) at ([yshift=2\base]rnn21) {};
\node[rnnnode] (rnn22) at ([xshift=2\base]rnn21) {};
\node[wordnode,anchor=south] (o1) at ([yshift=\base]rnn31.north) {};
\draw[-latex'] ([yshift=0.5\base]rnn03) to (rnn13);
\draw[-latex'] ([xshift=0.5\base]rnn30) to (rnn31);
\draw[-latex'] (rnn12) to (rnn13);
\draw[-latex'] (rnn21) to (rnn31);
\draw[-latex'] (rnn12) to (rnn22);
\draw[-latex'] (rnn21) to (rnn22);
\draw[-latex'] (rnn31) to (o1);
}
% step 4
{
\node[rnnnode] (rnn14) at ([xshift=2\base]rnn13) {};
\node[rnnnode] (rnn23) at ([xshift=2\base]rnn22) {};
\node[rnnnode] (rnn32) at ([xshift=2\base]rnn31) {};
\node[wordnode,anchor=south] (o2) at ([yshift=\base]rnn32.north) {不错};
\draw[-latex'] ([yshift=0.5\base]rnn04) to (rnn14);
\draw[-latex'] (rnn13) to (rnn14);
\draw[-latex'] (rnn13) to (rnn23);
\draw[-latex'] (rnn22) to (rnn23);
\draw[-latex'] (rnn22) to (rnn32);
\draw[-latex'] (rnn31) to (rnn32);
\draw[-latex'] (rnn32) to (o2);
}
% step 5
{
\node[rnnnode] (rnn24) at ([xshift=2\base]rnn23) {};
\node[rnnnode] (rnn33) at ([xshift=2\base]rnn32) {};
\node[wordnode,anchor=south] (o3) at ([yshift=\base]rnn33.north) {};
\draw[-latex'] (rnn14) to (rnn24);
\draw[-latex'] (rnn23) to (rnn24);
\draw[-latex'] (rnn23) to (rnn33);
\draw[-latex'] (rnn32) to (rnn33);
\draw[-latex'] (rnn33) to (o3);
}
% step 6
{
\node[rnnnode] (rnn34) at ([xshift=2\base]rnn33) {};
\node[wordnode,anchor=south] (o4) at ([yshift=\base]rnn34.north) {EOS};
\draw[-latex'] (rnn33) to (rnn34);
\draw[-latex'] (rnn24) to (rnn34);
\draw[-latex'] (rnn34) to (o4);
}
{
% frontier
\node[rnnnode,fill=purple] () at (rnn34) {};
\node[draw=red,thick,inner sep=5pt,rounded corners=0.3em,rotate fit=-45,label={[font=\scriptsize,align=center]90:正在运算的\\{\color{red} 循环单元}},fit=(rnn34)] () {};
}
% labels
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn10.west) to node[wordnode,align=right,left,font=\scriptsize] {空闲的\\设备1} ([yshift=\base]rnn10.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn20.west) to node[wordnode,align=right,left,font=\scriptsize] {空闲的\\设备2} ([yshift=\base]rnn20.west);
}
{
\draw[decorate,decoration={brace}] ([yshift=-\base]rnn30.west) to node[wordnode,align=right,left,text=red,font=\scriptsize] {正在使用的\\设备3} ([yshift=\base]rnn30.west);
}
\foreach \i in {1,2,3}
\node[wordnode,font=\tiny,anchor=south west] () at ([yshift=-0.25em]rnn\i0.north west) {\i};
\node[wordnode,font=\scriptsize] () at (rnn01) {};
\node[wordnode,font=\scriptsize] () at (rnn02) {};
\node[wordnode,font=\scriptsize] () at (rnn03) {不错};
\node[wordnode,font=\scriptsize] () at ([xshift=0.25em]rnn04) {};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\definecolor{ublue}{rgb}{0.152,0.250,0.545}
\begin{tikzpicture}
\begin{axis}[
width=10cm, height=6cm,
symbolic x coords={1-15,16-25,26-35,>35},
xtick=data,
ytick={10,12,...,28},
xlabel={Sentence Length(range)},
ylabel={$\%$\footnotesize{mTER}},
xlabel style={align=center},
ylabel style={},
y tick style={opacity=0},
x tick label style={font=\small},
y tick label style={font=\small},
tick align=inside,
ymajorgrids,
major grid style={draw=ublue,dashed},
legend pos=outer north east,
legend style={anchor=north west,yshift=-2.5cm},
ymin=10,
ymax=28]
\addplot [sharp plot,very thick,red!60,mark=diamond*] coordinates{(1-15,11.3) (16-25,16.4) (26-35,17) (>35,19.8)};
\addplot [sharp plot,very thick,purple!60,mark=triangle*] coordinates{(1-15,14.4) (16-25,22.6) (26-35,23.8) (>35,25.9)};
\addplot [sharp plot,very thick,green!60,mark=square*] coordinates{(1-15,14.9) (16-25,23.7) (26-35,24.7) (>35,26.4)};
\addplot [sharp plot,very thick,blue!60,mark=*] coordinates{(1-15,17.5) (16-25,24) (26-35,25) (>35,27)};
\legend{\scriptsize{NMT},\scriptsize{SPB},\scriptsize{HPB},\scriptsize{PBSY}}
\end{axis}
\end{tikzpicture}
%---------------------------------------------------------------------
\ No newline at end of file
\begin{tikzpicture}
\node[rounded corners=1pt,minimum width=11.0em,minimum height=2.0em,fill=pink!30,draw=black](p1) at (0,0) {\small{Self-Attention}};
\node[anchor=north](word1) at ([xshift=0.0em,yshift=-2.0em]p1.south) {\small \textbf{K}};
\node[anchor=west](word2) at ([xshift=2.2em]word1.east) {\small \textbf{V}};
\node[anchor=east](word3) at ([xshift=-2.2em]word1.west) {\small \textbf{Q}};
\draw[->,thick](word1.north)--(p1.south);
\draw[->,thick]([xshift=-3.6em]word1.north)--([xshift=-3.6em]p1.south);
\draw[->,thick]([xshift=3.6em]word1.north)--([xshift=3.6em]p1.south);
\node[anchor=north,rounded corners=1pt,minimum width=11.0em,minimum height=3.5em,draw=ugreen!70,very thick,dotted](p1-1) at ([yshift=-5.2em]p1.south) {\small{解码端每个位置的表示}};
\draw [->,thick,dashed] (word3.south) .. controls +(south:1em) and +(north:1em) .. (p1-1.north);
\draw [->,thick,dashed](word1.south) --(p1-1.north);
\draw [->,thick,dashed] (word2.south) .. controls +(south:1em) and +(north:1em) .. (p1-1.north);
\node[anchor=north](caption1) at ([xshift=0.0em,yshift=-9.5em]p1.south){\small{(a)Self-Attention的输入}};
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\node[anchor=west,rounded corners=1pt,minimum width=14.0em,minimum height=2.0em,fill=pink!30,draw=black](p2) at ([xshift=5.0em]p1.east){\small{Encoder-Decoder Attention}};
\node[anchor=north](word1-2) at ([xshift=0.0em,yshift=-2.0em]p2.south) {\small \textbf{K}};
\node[anchor=west](word2-2) at ([xshift=2.2em]word1-2.east) {\small \textbf{V}};
\node[anchor=east](word3-2) at ([xshift=-2.2em]word1-2.west) {\small \textbf{Q}};
\draw[->,thick](word1-2.north)--(p2.south);
\draw[->,thick]([xshift=-3.6em]word1-2.north)--([xshift=-3.6em]p2.south);
\draw[->,thick]([xshift=3.6em]word1-2.north)--([xshift=3.6em]p2.south);
\node[anchor=north,rounded corners=1pt](p2-1) at ([xshift=-3.55em,yshift=-5.5em]p2.south) {\small{解码端每个}};
\node[anchor=north,rounded corners=1pt](p2-2) at ([xshift=-3.55em,yshift=-6.8em]p2.south) {\small{位置的表示}};
\begin{pgfonlayer}{background}
{
\node[rounded corners=1pt,draw=ugreen!70,very thick,dotted] [fit = (p2-1) (p2-2)] (p2-12) {};
}
\end{pgfonlayer}
\node[anchor=north,rounded corners=1pt](p2-3) at ([xshift=3.55em,yshift=-5.5em]p2.south) {\small{编码端每个}};
\node[anchor=north,rounded corners=1pt](p2-4) at ([xshift=3.55em,yshift=-6.8em]p2.south) {\small{位置的表示}};
\begin{pgfonlayer}{background}
{
\node[rounded corners=1pt,draw=ugreen!70,very thick,dotted] [fit = (p2-3) (p2-4)] (p2-34) {};
}
\end{pgfonlayer}
\draw[<-,thick,dashed]([xshift=-3.6em,yshift=-3.2em]word1-2.north)--([xshift=-3.6em,yshift=-3.2em]p2.south);
\draw[<-,thick,dashed]([xshift=3.6em,yshift=-3.2em]word1-2.north)--([xshift=3.6em,yshift=-3.2em]p2.south);
\draw [->,thick,dashed] (word1-2.south) .. controls +(south:1em) and +(north:1em) .. ([yshift=0.3em]p2-3.north);
\node[anchor=north](caption2) at ([xshift=0.0em,yshift=-9.5em]p2.south){\small{(b)Encoder-Decoder Attention的输入}};
\end{tikzpicture}
\ No newline at end of file
\definecolor{ublue}{rgb}{0.152,0.250,0.545}
\begin{tikzpicture}
\begin{axis}[
width=8cm, height=5cm,
xtick={-6,-4,...,6},
ytick={0,0.5,1},
xlabel={\small{\textbf{x}}},
ylabel={\small{\textbf{Softmax(x)}}},
xlabel style={xshift=3.0cm,yshift=1cm},
axis y line=middle,
ylabel style={xshift=-2.4cm,yshift=-0.2cm},
x axis line style={->},
axis line style={very thick},
% ymajorgrids,
%xmajorgrids,
axis x line*=bottom,
xmin=-6,
xmax=6,
ymin=0,
ymax=1]
\addplot[draw=ublue,thick]{(tanh(x/2) + 1)/2};
\end{axis}
\end{tikzpicture}
%---------------------------------------------------------------------
\ No newline at end of file
%-------------------------------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.25cm}
\tikzstyle{rnnnode} = [minimum height=1.1em,minimum width=1.1em,inner sep=2pt,rounded corners=1pt,draw,fill=red!20,font=\scriptsize];
\tikzstyle{wnode} = [minimum height=1.2em,inner sep=3pt,rounded corners=1pt,font=\scriptsize];
% Encoder
\begin{scope}
\node[rnnnode,fill=green!20] (encemb1) at (0,0) {};
\node[rnnnode,fill=green!20,right=\base of encemb1] (encemb2) {};
\node[rnnnode,draw=white,fill=white,right=\base of encemb2] (encemb3) {$\cdots$};
\node[rnnnode,fill=green!20,right=\base of encemb3] (encemb4) {};
\node[rnnnode,above=\base of encemb1] (enc11) {};
\node[rnnnode,above=\base of encemb2] (enc12) {};
\node[rnnnode,draw=white,fill=white,above=\base of encemb3] (enc13) {$\cdots$};
\node[rnnnode,above=\base of encemb4] (enc14) {};
\foreach \cur [count=\prev from 1] in {2,...,4}
{
\node[rnnnode,above=\base of enc\prev1] (enc\cur1) {};
\node[rnnnode,above=\base of enc\prev2] (enc\cur2) {};
\node[rnnnode,draw=white,fill=white,above=\base of enc\prev3] (enc\cur3) {$\cdots$};
\node[rnnnode,above=\base of enc\prev4] (enc\cur4) {};
}
\node[rnnnode,draw=white,fill=white,above=\base of enc41] (enc51) {$\cdots$};
\node[rnnnode,draw=white,fill=white,above=\base of enc42] (enc52) {$\cdots$};
\node[rnnnode,draw=white,fill=white,above=\base of enc43] (enc53) {};
\node[rnnnode,draw=white,fill=white,above=\base of enc44] (enc54) {$\cdots$};
\node[rnnnode,above=\base of enc51] (enc61) {};
\node[rnnnode,above=\base of enc52] (enc62) {};
\node[rnnnode,draw=white,fill=white,above=\base of enc53] (enc63) {$\cdots$};
\node[rnnnode,above=\base of enc54] (enc64) {};
% words
\node[wnode,below=0pt of encemb1] (encword1) {};
\node[wnode,below=0pt of encemb2] (encword2) {什么};
\node[wnode,below=0pt of encemb4] (encword4) {EOS};
% connections
\draw[-latex'] (enc11) to (enc12);
\draw[-latex'] (enc12) to (enc13);
\draw[-latex'] (enc13) to (enc14);
\draw[-latex'] (enc24) to (enc23);
\draw[-latex'] (enc23) to (enc22);
\draw[-latex'] (enc22) to (enc21);
\draw[-latex'] (enc31) to (enc32);
\draw[-latex'] (enc32) to (enc33);
\draw[-latex'] (enc33) to (enc34);
\draw[-latex'] (enc41) to (enc42);
\draw[-latex'] (enc42) to (enc43);
\draw[-latex'] (enc43) to (enc44);
\draw[-latex'] (enc61) to (enc62);
\draw[-latex'] (enc62) to (enc63);
\draw[-latex'] (enc63) to (enc64);
\draw[-latex'] (encemb1) to (enc11);
\draw[-latex'] (encemb2) to (enc12);
\draw[-latex'] (encemb4) to (enc14);
\draw[-latex'] ([xshift=2pt]encemb1.north) to [out=30,in=-30] ([xshift=2pt]enc21.south);
\draw[-latex'] ([xshift=2pt]encemb2.north) to [out=30,in=-30] ([xshift=2pt]enc22.south);
\draw[-latex'] ([xshift=2pt]encemb4.north) to [out=30,in=-30] ([xshift=2pt]enc24.south);
\draw[-latex'] ([xshift=-2pt]enc11.north) to [out=150,in=-150] ([xshift=-2pt]enc31.south);
\draw[-latex'] ([xshift=-2pt]enc12.north) to [out=150,in=-150] ([xshift=-2pt]enc32.south);
\draw[-latex'] ([xshift=-2pt]enc14.north) to [out=150,in=-150] ([xshift=-2pt]enc34.south);
\draw[-latex'] (enc22) to (enc32);
\draw[-latex'] (enc21) to (enc31);
\draw[-latex'] (enc24) to (enc34);
\draw[-latex'] ([xshift=-2pt]enc31.north) to [out=150,in=-150] ([xshift=-2pt]enc51.south);
\draw[-latex'] ([xshift=-2pt]enc32.north) to [out=150,in=-150] ([xshift=-2pt]enc52.south);
\draw[-latex'] ([xshift=-2pt]enc34.north) to [out=150,in=-150] ([xshift=-2pt]enc54.south);
\draw[-latex'] (enc31) to (enc41);
\draw[-latex'] (enc32) to (enc42);
\draw[-latex'] (enc34) to (enc44);
\draw[-latex'] (enc41) to (enc51);
\draw[-latex'] (enc42) to (enc52);
\draw[-latex'] (enc44) to (enc54);
\draw[-latex'] (enc51) to (enc61);
\draw[-latex'] (enc52) to (enc62);
\draw[-latex'] (enc54) to (enc64);
\draw[-latex'] (enc61) to ([yshift=\base]enc61.north);
\draw[-latex'] (enc62) to ([yshift=\base]enc62.north);
\draw[-latex'] (enc64) to ([yshift=\base]enc64.north);
\end{scope}
\node[rnnnode,fill=orange!20,minimum width=3.5cm,anchor=south west] (attention) at ([yshift=\base]enc61.north west) {注意力机制};
\begin{scope}
\node[rnnnode,fill=green!20,right=2.5cm of encemb4] (decemb1) {};
\node[rnnnode,fill=green!20,right=\base of decemb1] (decemb2) {};
\node[rnnnode,draw=white,fill=white,right=\base of decemb2] (decemb3) {$\cdots$};
\node[rnnnode,fill=green!20,right=\base of decemb3] (decemb4) {};
\node[rnnnode,above=\base of decemb1] (dec11) {};
\node[rnnnode,above=\base of decemb2] (dec12) {};
\node[rnnnode,draw=white,fill=white,above=\base of decemb3] (dec13) {$\cdots$};
\node[rnnnode,above=\base of decemb4] (dec14) {};
\node[rnnnode,above=\base of dec11] (dec21) {};
\node[rnnnode,above=\base of dec12] (dec22) {};
\node[rnnnode,draw=white,fill=white,above=\base of dec13] (dec23) {$\cdots$};
\node[rnnnode,above=\base of dec14] (dec24) {};
\node[rnnnode,above=\base of dec21] (dec31) {};
\node[rnnnode,above=\base of dec22] (dec32) {};
\node[rnnnode,draw=white,fill=white,above=\base of dec23] (dec33) {$\cdots$};
\node[rnnnode,above=\base of dec24] (dec34) {};
\node[rnnnode,draw=white,fill=white,above=\base of dec31] (dec41) {$\cdots$};
\node[rnnnode,draw=white,fill=white,above=\base of dec32] (dec42) {$\cdots$};
\node[rnnnode,draw=white,fill=white,above=\base of dec33] (dec43) {};
\node[rnnnode,draw=white,fill=white,above=\base of dec34] (dec44) {$\cdots$};
\node[rnnnode,above=\base of dec41] (dec51) {};
\node[rnnnode,above=\base of dec42] (dec52) {};
\node[rnnnode,draw=white,fill=white,above=\base of dec43] (dec53) {$\cdots$};
\node[rnnnode,above=\base of dec44] (dec54) {};
\node[rnnnode,fill=blue!20,above=\base of dec51] (softmax1) {};
\node[rnnnode,fill=blue!20,above=\base of dec52] (softmax2) {};
\node[rnnnode,draw=white,fill=white,above=\base of dec53] (softmax3) {$\cdots$};
\node[rnnnode,fill=blue!20,above=\base of dec54] (softmax4) {};
% words
\node[wnode,below=0pt of decemb1] (decinword1) {SOS};
\node[wnode,below=0pt of decemb2] (decinword2) {Have};
\node[wnode,below=0pt of decemb4] (decinword4) {?};
\node[wnode,above=0pt of softmax1] (decoutword1) {Have};
\ExtractX{$(softmax2.north)$}
\ExtractY{$(decoutword1.base)$}
\node[wnode,anchor=base] (decoutword2) at (\XCoord,\YCoord) {you};
\ExtractX{$(softmax4.north)$}
\ExtractY{$(decoutword1.base)$}
\node[wnode,anchor=base] (decoutword4) at (\XCoord,\YCoord) {EOS};
% connections
\draw[-latex'] (dec11) to (dec12);
\draw[-latex'] (dec12) to (dec13);
\draw[-latex'] (dec13) to (dec14);
\draw[-latex'] (dec21) to (dec22);
\draw[-latex'] (dec22) to (dec23);
\draw[-latex'] (dec23) to (dec24);
\draw[-latex'] (dec31) to (dec32);
\draw[-latex'] (dec32) to (dec33);
\draw[-latex'] (dec33) to (dec34);
\draw[-latex'] (dec51) to (dec52);
\draw[-latex'] (dec52) to (dec53);
\draw[-latex'] (dec53) to (dec54);
\draw[-latex'] (decemb1) to (dec11);
\draw[-latex'] (decemb2) to (dec12);
\draw[-latex'] (decemb4) to (dec14);
\foreach \cur [count=\prev from 1] in {2,...,5}
{
\draw[-latex'] (dec\prev1) to (dec\cur1);
\draw[-latex'] (dec\prev2) to (dec\cur2);
\draw[-latex'] (dec\prev4) to (dec\cur4);
}
\draw[-latex'] ([xshift=-2pt]dec21.north) to [out=150,in=-150] ([xshift=-2pt]dec41.south);
\draw[-latex'] ([xshift=-2pt]dec22.north) to [out=150,in=-150] ([xshift=-2pt]dec42.south);
\draw[-latex'] ([xshift=-2pt]dec24.north) to [out=150,in=-150] ([xshift=-2pt]dec44.south);
\draw[-latex'] (dec51) to (softmax1);
\draw[-latex'] (dec52) to (softmax2);
\draw[-latex'] (dec54) to (softmax4);
\end{scope}
% attention connections
\draw[-latex',rounded corners=2pt] (dec11) -| ([xshift=-0.4cm]attention.south east);
\ExtractX{$([xshift=9pt]attention.east)$}
\ExtractY{$([yshift=2pt]dec11.north)$}
\coordinate (tmp1) at (\XCoord,\YCoord);
\ExtractX{$([xshift=-5pt]dec12.west)$}
\coordinate (tmp2) at (\XCoord,\YCoord);
\draw[-latex',rounded corners=2pt] ([yshift=-3pt]attention.east) -| (tmp1) -- (tmp2) |- ([yshift=3pt]dec12.west);
\ExtractX{$([xshift=11pt]attention.east)$}
\ExtractY{$([yshift=2pt]dec21.north)$}
\coordinate (tmp1) at (\XCoord,\YCoord);
\ExtractX{$([xshift=-5pt]dec22.west)$}
\coordinate (tmp2) at (\XCoord,\YCoord);
\draw[-latex',rounded corners=2pt] ([yshift=-1pt]attention.east) -| (tmp1) -- (tmp2) |- ([yshift=3pt]dec22.west);
\ExtractX{$([xshift=13pt]attention.east)$}
\ExtractY{$([yshift=2pt]dec31.north)$}
\coordinate (tmp1) at (\XCoord,\YCoord);
\ExtractX{$([xshift=-5pt]dec32.west)$}
\coordinate (tmp2) at (\XCoord,\YCoord);
\draw[-latex',rounded corners=2pt] ([yshift=1pt]attention.east) -| (tmp1) -- (tmp2) |- ([yshift=3pt]dec32.west);
\ExtractX{$([xshift=15pt]attention.east)$}
\ExtractY{$([yshift=2pt]dec51.north)$}
\coordinate (tmp1) at (\XCoord,\YCoord);
\ExtractX{$([xshift=-5pt]dec52.west)$}
\coordinate (tmp2) at (\XCoord,\YCoord);
\draw[-latex',rounded corners=2pt] ([yshift=3pt]attention.east) -| (tmp1) -- (tmp2) |- ([yshift=3pt]dec52.west);
% label
\draw[decorate,decoration={brace}] ([xshift=-5pt]enc11.south west) to node [auto,font=\scriptsize,name=label1] {8层} ([xshift=-5pt]enc61.north west);
\draw[decorate,decoration={brace,mirror}] ([xshift=5pt]dec14.south east) to node [auto,swap,font=\scriptsize,name=label2] {8层} ([xshift=5pt]dec54.north east);
\begin{pgfonlayer}{background}
\coordinate (tmp) at ([xshift=-4pt]label1.west);
\node[draw,densely dashed,rounded corners=2pt,inner sep=2pt,fit=(label1) (encword1) (attention) (tmp)] (encoder) {};
\ExtractX{$([xshift=4pt]label2.east)$}
\ExtractY{$([yshift=6pt]decoutword4.north)$}
\coordinate (tmp) at (\XCoord,\YCoord);
\node[draw,densely dashed,rounded corners=2pt,inner sep=2pt,fit=(label2) (decinword1) (decoutword4) (tmp)] (decoder) {};
\end{pgfonlayer}
\node[wnode,anchor=north west] () at (encoder.north west) {编码器};
\node[wnode,anchor=north east] () at (decoder.north east) {解码器};
\end{tikzpicture}
\ No newline at end of file
%--------------------------------------------------------------------------------
\begin{tikzpicture}
\setlength{\base}{0.6cm}
\tikzstyle{wordnode} = [font=\scriptsize]
\tikzstyle{auxnode} = [inner sep=0pt,outer sep=0pt,opacity=0,draw=red,fill=red,circle,minimum size=3pt]
\tikzstyle{opnode} = [inner sep=0pt,outer sep=0pt,draw,fill=green!30!white,font=\scriptsize,minimum size=10pt]
\tikzstyle{standard} = [rounded corners=5pt,thick]
\tikzstyle{emph} = [rounded corners=5pt,thick,draw=red]
\tikzstyle{formulanode} = [font=\scriptsize,align=left,draw=red,rectangle,fill=red!10!white,rounded corners=2pt,drop shadow]
% Skeleton
\begin{scope}[every label/.append style={label distance=1pt,font=\tiny,inner sep=0pt,opacity=0}]
\coordinate (aux12) at (0,0);
\node[auxnode,label={-45:12}] () at (aux12) {};
\coordinate (aux22) at ([yshift=\base]aux12);
\node[auxnode,label={-45:22}] () at (aux22) {};
\coordinate (aux21) at ([xshift=-2\base]aux22);
\node[auxnode,label={-45:21}] () at (aux21) {};
\coordinate (aux23) at ([xshift=\base]aux22);
\node[auxnode,label={-45:23}] () at (aux23) {};
\coordinate (aux24) at ([xshift=\base]aux23);
\node[auxnode,label={-45:24}] () at (aux24) {};
\coordinate (aux25) at ([xshift=\base]aux24);
\node[auxnode,label={-45:25}] () at (aux25) {};
\coordinate (aux26) at ([xshift=\base]aux25);
\node[auxnode,label={-45:26}] () at (aux26) {};
\coordinate (aux27) at ([xshift=\base]aux26);
\node[auxnode,label={-45:27}] () at (aux27) {};
\coordinate (aux28) at ([xshift=\base]aux27);
\node[auxnode,label={-45:28}] () at (aux28) {};
\coordinate (aux29) at ([xshift=2\base]aux28);
\node[auxnode,label={-45:29}] () at (aux29) {};
\coordinate (aux33) at ([yshift=\base]aux23);
\node[auxnode,label={-45:33}] () at (aux33) {};
\coordinate (aux34) at ([yshift=\base]aux24);
\node[auxnode,label={-45:34}] () at (aux34) {};
\coordinate (aux35) at ([yshift=\base]aux25);
\node[auxnode,label={-45:35}] () at (aux35) {};
\coordinate (aux37) at ([yshift=\base]aux27);
\node[auxnode,label={-45:37}] () at (aux37) {};
\coordinate (aux45) at ([yshift=\base]aux35);
\node[auxnode,label={-45:45}] () at (aux45) {};
\coordinate (aux55) at ([yshift=\base]aux45);
\node[auxnode,label={-45:55}] () at (aux55) {};
\ExtractX{$(aux21)$}
\ExtractY{$(aux55)$}
\coordinate (aux51) at (\XCoord,\YCoord);
\node[auxnode,label={-45:51}] () at (aux51) {};
\ExtractX{$(aux23)$}
\ExtractY{$(aux55)$}
\coordinate (aux53) at (\XCoord,\YCoord);
\node[auxnode,label={-45:53}] () at (aux53) {};
\ExtractX{$(aux28)$}
\ExtractY{$(aux55)$}
\coordinate (aux58) at (\XCoord,\YCoord);
\node[auxnode,label={-45:58}] () at (aux58) {};
\ExtractX{$(aux29)$}
\ExtractY{$(aux55)$}
\coordinate (aux59) at (\XCoord,\YCoord);
\node[auxnode,label={-45:59}] () at (aux59) {};
\coordinate (aux68) at ([yshift=\base]aux58);
\node[auxnode,label={-45:68}] () at (aux68) {};
\end{scope}
\begin{scope}
\node[opnode,circle,opacity=0] (f53) at (aux53) {};
\node[opnode,circle,opacity=0] (u55) at (aux55) {};
% forget gate
{
\draw[emph] (aux21) -- (aux23) -- (aux33);
\draw[-latex,emph] (aux12) -- (aux22) -- (aux23) -- (f53);
\node[opnode,circle,draw=red,thick] () at (aux33) {$\sigma$};
}
{
\draw[standard] (aux21) -- (aux23) -- (aux33);
\draw[-latex,standard] (aux12) -- (aux22) -- (aux23) -- (f53);
\node[opnode,circle] () at (aux33) {$\sigma$};
}
% input gate
{
\node[opnode,circle] (i45) at (aux45) {};
\draw[-latex,emph] (aux21) -- (aux24) |- (i45);
\draw[-latex,emph] (aux21) -- (aux25) -- (u55);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,circle,draw=red,thick] (i45) at (aux45) {X};
}
{
\node[opnode,circle] (i45) at (aux45) {};
\draw[-latex,standard] (aux21) -- (aux24) |- (i45);
\draw[-latex,standard] (aux21) -- (aux25) -- (u55);
\node[opnode,circle] () at (aux34) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux35) {$\mathrm{tanh}$};
\node[opnode,circle] (i45) at (aux45) {X};
}
% cell update
{
\draw[-latex,emph] (aux51) -- (aux59);
\node[opnode,circle,draw=red,thick] (f53) at (aux53) {X};
\node[opnode,circle,draw=red,thick] (u55) at (aux55) {\textbf{+}};
}
{
\draw[-latex,standard] (aux51) -- (aux59);
\node[opnode,circle] (f53) at (aux53) {X};
\node[opnode,circle] (u55) at (aux55) {\textbf{+}};
}
% output gate
{
\node[opnode,circle,draw=red,thick] (o27) at (aux27) {X};
\draw[-latex,emph] (u55) -| (o27);
\draw[-latex,emph] (aux21) -- (o27);
\draw[emph] (aux12) -- (aux22) -- (aux23);
\node[opnode,circle,draw=red,thick] () at (aux26) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt,draw=red,thick] () at (aux37) {$\mathrm{tanh}$};
\draw[-latex,emph] (o27) -- (aux29);
\draw[-latex,emph] (o27) -| (aux68);
}
{
\node[opnode,circle] (o27) at (aux27) {X};
\draw[-latex,standard] (u55) -| (o27);
\draw[-latex,standard] (aux21) -- (o27);
\node[opnode,circle] () at (aux26) {$\sigma$};
\node[opnode,rectangle,rounded corners=2pt,inner sep=2pt] () at (aux37) {$\mathrm{tanh}$};
\draw[-latex,standard] (o27) -- (aux29);
\draw[-latex,standard] (o27) -| (aux68);
}
\end{scope}
\begin{scope}
\node[wordnode,anchor=south] () at ([xshift=0.5\base]aux21) {$\mathbf{h}_{t-1}$};
\node[wordnode,anchor=west] () at (aux12) {$\mathbf{x}_t$};
\node[wordnode,anchor=south] () at ([xshift=0.5\base]aux51) {$\mathbf{c}_{t-1}$};
{
\node[wordnode,anchor=south] () at ([xshift=-0.5\base]aux59) {$\mathbf{c}_{t}$};
}
{
\node[wordnode,anchor=east] () at (aux68) {$\mathbf{h}_{t}$};
\node[wordnode,anchor=south] () at ([xshift=-0.5\base]aux29) {$\mathbf{h}_{t}$};
}
\end{scope}
\begin{pgfonlayer}{background}
\node[draw,very thick,rectangle,fill=blue!30!white,rounded corners=5pt,inner sep=4pt,fit=(aux22) (aux58) (u55) (o27)] (LSTM) {};
\end{pgfonlayer}
\begin{scope}
{
% forget gate formula
\node[formulanode,anchor=south east,text width=3.4cm] () at ([shift={(4\base,1.5\base)}]aux51) {遗忘门\\$\mathbf{f}_t=\sigma(\mathbf{W}_f[\mathbf{h}_{t-1},\mathbf{x}_t]+\mathbf{b}_f)$};
}
{
% input gate formula
\node[formulanode,anchor=north east] () at ([shift={(4\base,-1.5\base)}]aux21) {输入门\\$\mathbf{i}_t=\sigma(\mathbf{W}_i[\mathbf{h}_{t-1},\mathbf{x}_t]+\mathbf{b}_i)$\\$\hat{\mathbf{c}}_t=\mathrm{tanh}(\mathbf{W}_c[\mathbf{h}_{t-1},\mathbf{x}_t]+\mathbf{b}_c)$};
}
{
% cell update formula
\node[formulanode,anchor=south west,text width=3.02cm] () at ([shift={(-4\base,1.5\base)}]aux59) {记忆更新\\$\mathbf{c}_{t}=\mathbf{f}_t\cdot \mathbf{c}_{t-1}+\mathbf{i}_t\cdot \hat{\mathbf{c}}_t$};
}
{
% output gate formula
\node[formulanode,anchor=north west] () at ([shift={(-4\base,-1.5\base)}]aux29) {输出门\\$\mathbf{o}_t=\sigma(\mathbf{W}_o[\mathbf{h}_{t-1},\mathbf{x}_t]+\mathbf{b}_o)$\\$\mathbf{h}_{t}=\mathbf{o}_t\cdot \mathrm{tanh}(\mathbf{c}_{t})$};
}
\end{scope}
\end{tikzpicture}
\begin{tikzpicture}
\begin{scope}
\tikzstyle{Sanode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw,fill=orange!20];
\tikzstyle{Resnode} = [minimum height=1.1em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw,fill=yellow!20];
\tikzstyle{ffnnode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw,fill=blue!10];
\tikzstyle{outputnode} = [minimum height=1.4em,minimum width=7em,inner sep=3pt,rounded corners=1.5pt,draw,fill=blue!30];
\tikzstyle{inputnode} = [minimum height=1.4em,minimum width=3.5em,inner sep=3pt,rounded corners=1.5pt,draw,fill=red!10];
\tikzstyle{posnode} = [minimum height=1.4em,minimum width=3.5em,inner sep=3pt,rounded corners=1.5pt,draw,fill=black!5!white];
\tikzstyle{standard} = [rounded corners=3pt]
\node [Sanode,anchor=west] (sa1) at (0,0) {\tiny{$\textbf{Self-Attention}$}};
\node [Resnode,anchor=south] (res1) at ([yshift=0.3em]sa1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn1) at ([yshift=1em]res1.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res2) at ([yshift=0.3em]ffn1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [inputnode,anchor=north west] (input1) at ([yshift=-1em]sa1.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos1) at ([yshift=-1em]sa1.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (inputs) at ([yshift=-3em]sa1.south) {\tiny{$\textbf{编码器输入: 我 很 好}$}};
\node [anchor=south] (encoder) at ([xshift=0.2em,yshift=0.6em]res2.north west) {\scriptsize{\textbf{编码器}}};
\draw [->] (sa1.north) -- (res1.south);
\draw [->] (res1.north) -- (ffn1.south);
\draw [->] (ffn1.north) -- (res2.south);
\draw [->] ([yshift=-1em]sa1.south) -- (sa1.south);
\draw [->] ([yshift=-0.3em]inputs.north) -- ([yshift=0.6em]inputs.north);
\node [Sanode,anchor=west] (sa2) at ([xshift=3em]sa1.east) {\tiny{$\textbf{Self-Attention}$}};
\node [Resnode,anchor=south] (res3) at ([yshift=0.3em]sa2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [Sanode,anchor=south] (ed1) at ([yshift=1em]res3.north) {\tiny{$\textbf{Encoder-Decoder Attention}$}};
\node [Resnode,anchor=south] (res4) at ([yshift=0.3em]ed1.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [ffnnode,anchor=south] (ffn2) at ([yshift=1em]res4.north) {\tiny{$\textbf{Feed Forward Network}$}};
\node [Resnode,anchor=south] (res5) at ([yshift=0.3em]ffn2.north) {\tiny{$\textbf{Add \& LayerNorm}$}};
\node [outputnode,anchor=south] (o1) at ([yshift=1em]res5.north) {\tiny{$\textbf{Output layer}$}};
\node [inputnode,anchor=north west] (input2) at ([yshift=-1em]sa2.south west) {\tiny{$\textbf{Embedding}$}};
\node [posnode,anchor=north east] (pos2) at ([yshift=-1em]sa2.south east) {\tiny{$\textbf{Postion}$}};
\node [anchor=north] (outputs) at ([yshift=-3em]sa2.south) {\tiny{$\textbf{解码器输入: $<$SOS$>$ I am fine}$}};
\node [anchor=east] (decoder) at ([xshift=-1em,yshift=-1.5em]o1.west) {\scriptsize{\textbf{解码器}}};
\node [anchor=north] (decoutputs) at ([yshift=1.5em]o1.north) {\tiny{$\textbf{解码器输出: I am fine $<$EOS$>$ }$}};
\draw [->] (sa2.north) -- (res3.south);
\draw [->] (res3.north) -- (ed1.south);
\draw [->] (ed1.north) -- (res4.south);
\draw [->] (res4.north) -- (ffn2.south);
\draw [->] (ffn2.north) -- (res5.south);
\draw [->] (res5.north) -- (o1.south);
\draw [->] (o1.north) -- ([yshift=0.5em]o1.north);
\draw [->] ([yshift=-1em]sa2.south) -- (sa2.south);
\draw [->] ([yshift=-0.3em]outputs.north) -- ([yshift=0.6em]outputs.north);
\draw[->,standard] ([yshift=-0.5em]sa1.south) -- ([xshift=-4em,yshift=-0.5em]sa1.south) -- ([xshift=-4em,yshift=2.3em]sa1.south) -- ([xshift=-3.5em,yshift=2.3em]sa1.south);
\draw[->,standard] ([yshift=0.5em]res1.north) -- ([xshift=-4em,yshift=0.5em]res1.north) -- ([xshift=-4em,yshift=3.3em]res1.north) -- ([xshift=-3.5em,yshift=3.3em]res1.north);
\draw[->,standard] ([yshift=-0.5em]sa2.south) -- ([xshift=4em,yshift=-0.5em]sa2.south) -- ([xshift=4em,yshift=2.3em]sa2.south) -- ([xshift=3.5em,yshift=2.3em]sa2.south);
\draw[->,standard] ([yshift=0.5em]res3.north) -- ([xshift=4em,yshift=0.5em]res3.north) -- ([xshift=4em,yshift=3.3em]res3.north) -- ([xshift=3.5em,yshift=3.3em]res3.north);
\draw[->,standard] ([yshift=0.5em]res4.north) -- ([xshift=4em,yshift=0.5em]res4.north) -- ([xshift=4em,yshift=3.3em]res4.north) -- ([xshift=3.5em,yshift=3.3em]res4.north);
\draw[->,standard] (res2.north) -- ([yshift=0.5em]res2.north) -- ([xshift=5em,yshift=0.5em]res2.north) -- ([xshift=5em,yshift=-2.2em]res2.north) -- ([xshift=6.5em,yshift=-2.2em]res2.north);
\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=ugreen!70] [fit = (sa1) (res1) (ffn1) (res2)] (box0) {};
\node [rectangle,inner sep=0.7em,rounded corners=1pt,very thick,dotted,draw=red!60] [fit = (sa2) (res3) (res5)] (box1) {};
\end{scope}
\end{tikzpicture}
\ No newline at end of file
\begin{tikzpicture}
\node(atten) at (0,0){Attention(};
%%%% Q
\node(tbq) at ([xshift=0.5em,yshift=0]atten.east){
\begin{tabular}{|c|}
\hline
\rowcolor{yellow!20} \\ \hline
\rowcolor{yellow!20} \\ \hline
\rowcolor{yellow!20} \\ \hline
\end{tabular}
};
\node at ([xshift=0em,yshift=0.5em]tbq.north){$\mathbf{Q}$};
\node(comma1) at ([xshift=0.15em,yshift=-2em]tbq.east){,};
%%%% k
\node(tbk) at ([xshift=1em,yshift=0]tbq.east){
\begin{tabular}{|c|}
\hline
\rowcolor{blue!20} \\ \hline
\rowcolor{blue!20} \\ \hline
\rowcolor{blue!20} \\ \hline
\end{tabular}
};
\node at ([xshift=0em,yshift=0.5em]tbk.north){$\mathbf{K}$};
\node(comma2) at ([xshift=0.15em,yshift=-2em]tbk.east){,};
%%%% v
\node(tbv) at ([xshift=1em,yshift=0]tbk.east){
\begin{tabular}{|c|}
\hline
\rowcolor{orange!20} \\ \hline
\rowcolor{orange!20} \\ \hline
\rowcolor{orange!20} \\ \hline
\end{tabular}
};
\node at ([xshift=0em,yshift=0.5em]tbv.north){$\mathbf{V}$};
\node(bra) at ([xshift=0.3em,yshift=0]tbv.east){)};
\node(eq1) at ([xshift=0.5em,yshift=0]bra.east){=};
\node(sof1) at ([xshift=2em,yshift=0]eq1.east){softmax(};
%-----------------------------------------------------------
%QK+MASK
\node(tbq2) at ([xshift=0.5em,yshift=2em]sof1.east){
\begin{tabular}{|c|}
\hline
\rowcolor{yellow!20} \\ \hline
\rowcolor{yellow!20} \\ \hline
\rowcolor{yellow!20} \\ \hline
\end{tabular}
};
\node at ([xshift=0em,yshift=0.5em]tbq2.north){$\mathbf{Q}$};
% x
\node (times) at ([xshift=1em,yshift=0em]tbq2.east){$\times$};
%k
\node(tbk2) at ([xshift=2em,yshift=0em]times.east){
\begin{tabular}{|l|l|l|}
\hline
\cellcolor{blue!20} & \cellcolor{blue!20} &\cellcolor{blue!20} \\ \hline
\end{tabular}
};
\node at ([xshift=0em,yshift=0.5em]tbk2.north){$\mathbf{K}^{\mathrm{T}}$};
\draw [-] (5.6,-0.2) -- (8,-0.2);
\node at ([xshift=0em,yshift=-3em]times.south){$\sqrt{d_k}$};
% MASK
\node(mask) at ([xshift=3em,yshift=-2em]tbk2.east){
\begin{tabular}{|l|l|l|}
\hline
\cellcolor{green!20} &\cellcolor{green!20} &\cellcolor{green!20} \\ \hline
\cellcolor{green!20} &\cellcolor{green!20} &\cellcolor{green!20} \\ \hline
\cellcolor{green!20} &\cellcolor{green!20} &\cellcolor{green!20} \\ \hline
\end{tabular}
};
\node at ([xshift=0em,yshift=0.5em]mask.north){$\mathbf{Mask}$};
%+
\node at ([xshift=-0.6em,yshift=0em]mask.west){$+$};
%)
\node at ([xshift=0.2em,yshift=0em]mask.east){)};
%%%% v
\node(tbv2) at ([xshift=1.2em,yshift=0]mask.east){
\begin{tabular}{|c|}
\hline
\rowcolor{orange!20} \\ \hline
\rowcolor{orange!20} \\ \hline
\rowcolor{orange!20} \\ \hline
\end{tabular}
};
\node at ([xshift=0em,yshift=0.5em]tbv2.north){$\mathbf{V}$};
%------------------------------
%第二行
\node(eq2) at ([xshift=0em,yshift=-6em]eq1.south){=};
\node(sof2) at ([xshift=2em,yshift=0]eq2.east){softmax(};
%中间粉色矩阵
\node(mid) at ([xshift=1.5em,yshift=0em]sof2.east){
\begin{tabular}{|l|l|l|}
\hline
\cellcolor{pink!30} &\cellcolor{pink!30} &\cellcolor{pink!30} \\ \hline
\cellcolor{pink!30} &\cellcolor{pink!30} &\cellcolor{pink!30} \\ \hline
\cellcolor{pink!30} &\cellcolor{pink!30} &\cellcolor{pink!30} \\ \hline
\end{tabular}
};
% )
\node(bra2) at ([xshift=0.2em,yshift=0]mid.east){)};
%红色框
\node[rectangle,minimum width=4.0em,minimum height=1.5em,draw=red](p222) at([xshift=0em,yshift=-1.0em]mid.north) {};
%%%% v
\node(tbv3) at ([xshift=0.5em,yshift=0]bra2.east){
\begin{tabular}{|c|}
\hline
\rowcolor{orange!20} \\ \hline
\rowcolor{orange!20} \\ \hline
\rowcolor{orange!20} \\ \hline
\end{tabular}
};
\node at ([xshift=0em,yshift=0.5em]tbv3.north){$\mathbf{V}$};
%------------------------------------
%第三行
\node(eq3) at ([xshift=0em,yshift=-6em]eq2.south){=};
%%%% softmax结果 红色矩阵
\node(result) at ([xshift=2em,yshift=0]eq3.east){
\begin{tabular}{|l|l|l|}
\hline
\cellcolor{red!20} &\cellcolor{red!20} &\cellcolor{red!20} \\ \hline
\cellcolor{red!20}&\cellcolor{red!20} &\cellcolor{red!20} \\ \hline
\cellcolor{red!20} &\cellcolor{red!20} &\cellcolor{red!20} \\ \hline
\end{tabular}
};
% x
\node (times) at ([xshift=0.5em,yshift=0em]result.east){$\times$};
%%%% v
\node(tbv4) at ([xshift=0.5em,yshift=0]times.east){
\begin{tabular}{|c|}
\hline
\rowcolor{orange!20} \\ \hline
\rowcolor{orange!20} \\ \hline
\rowcolor{orange!20} \\ \hline
\end{tabular}
};
\node at ([xshift=0em,yshift=0.5em]tbv4.north){$\mathbf{V}$};
%=
\node(eq4) at ([xshift=0.5em,yshift=0em]tbv4.east){=};
%%%% 灰色矩阵
\node(gre) at ([xshift=0.5em,yshift=0]eq4.east){
\begin{tabular}{|c|}
\hline
\rowcolor{black!15} \\ \hline
\rowcolor{black!15} \\ \hline
\rowcolor{black!15} \\ \hline
\end{tabular}
};
\end{tikzpicture}
\ No newline at end of file
......@@ -581,3 +581,753 @@ year={2013}}
month = {August},
year = {2016}
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%section6
@article{DBLP:journals/corr/abs-1905-13324,
author = {Biao Zhang and
Rico Sennrich},
title = {A Lightweight Recurrent Network for Sequence Modeling},
journal = {CoRR},
volume = {abs/1905.13324},
year = {2019},
url = {http://arxiv.org/abs/1905.13324},
archivePrefix = {arXiv},
eprint = {1905.13324},
timestamp = {Mon, 03 Jun 2019 13:42:33 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1905-13324.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{Wu2016GooglesNM,
title={Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation},
author={Yonghui Wu and Mike Schuster and Zhifeng Chen and Quoc V. Le and Mohammad Norouzi and Wolfgang Macherey and Maxim Krikun and Yuan Cao and Qin Gao and Klaus Macherey and Jeff Klingner and Apurva Shah and Melvin Johnson and Xiaobing Liu and Lukasz Kaiser and Stephan Gouws and Yoshikiyo Kato and Taku Kudo and Hideto Kazawa and Keith Stevens and George Kurian and Nishant Patil and Wei Wang and Cliff Young and Jason Smith and Jason Riesa and Alex Rudnick and Oriol Vinyals and Gregory S. Corrado and Macduff Hughes and Jeffrey Dean},
journal={ArXiv},
year={2016},
volume={abs/1609.08144}
}
@inproceedings{li-etal-2018-simple,
title = "A Simple and Effective Approach to Coverage-Aware Neural Machine Translation",
author = "Li, Yanyang and
Xiao, Tong and
Li, Yinqiao and
Wang, Qiang and
Xu, Changming and
Zhu, Jingbo",
booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)",
month = jul,
year = "2018",
address = "Melbourne, Australia",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/P18-2047",
doi = "10.18653/v1/P18-2047",
pages = "292--297",
abstract = "We offer a simple and effective method to seek a better balance between model confidence and length preference for Neural Machine Translation (NMT). Unlike the popular length normalization and coverage models, our model does not require training nor reranking the limited n-best outputs. Moreover, it is robust to large beam sizes, which is not well studied in previous work. On the Chinese-English and English-German translation tasks, our approach yields +0.4 1.5 BLEU improvements over the state-of-the-art baselines.",
}
@article{DBLP:journals/corr/LinFSYXZB17,
author = {Zhouhan Lin and
Minwei Feng and
C{\'{\i}}cero Nogueira dos Santos and
Mo Yu and
Bing Xiang and
Bowen Zhou and
Yoshua Bengio},
title = {A Structured Self-attentive Sentence Embedding},
journal = {CoRR},
volume = {abs/1703.03130},
year = {2017},
url = {http://arxiv.org/abs/1703.03130},
archivePrefix = {arXiv},
eprint = {1703.03130},
timestamp = {Mon, 13 Aug 2018 16:46:06 +0200},
biburl = {https://dblp.org/rec/journals/corr/LinFSYXZB17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/ChenLCL17,
author = {Yun Chen and
Yang Liu and
Yong Cheng and
Victor O. K. Li},
title = {A Teacher-Student Framework for Zero-Resource Neural Machine Translation},
journal = {CoRR},
volume = {abs/1705.00753},
year = {2017},
url = {http://arxiv.org/abs/1705.00753},
archivePrefix = {arXiv},
eprint = {1705.00753},
timestamp = {Thu, 04 Jul 2019 16:25:18 +0200},
biburl = {https://dblp.org/rec/journals/corr/ChenLCL17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1805-00631,
author = {Biao Zhang and
Deyi Xiong and
Jinsong Su},
title = {Accelerating Neural Transformer via an Average Attention Network},
journal = {CoRR},
volume = {abs/1805.00631},
year = {2018},
url = {http://arxiv.org/abs/1805.00631},
archivePrefix = {arXiv},
eprint = {1805.00631},
timestamp = {Mon, 13 Aug 2018 16:46:01 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1805-00631.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}@article{DBLP:journals/corr/abs-1805-00631,
author = {Biao Zhang and
Deyi Xiong and
Jinsong Su},
title = {Accelerating Neural Transformer via an Average Attention Network},
journal = {CoRR},
volume = {abs/1805.00631},
year = {2018},
url = {http://arxiv.org/abs/1805.00631},
archivePrefix = {arXiv},
eprint = {1805.00631},
timestamp = {Mon, 13 Aug 2018 16:46:01 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1805-00631.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/CourbariauxB16,
author = {Matthieu Courbariaux and
Yoshua Bengio},
title = {BinaryNet: Training Deep Neural Networks with Weights and Activations
Constrained to +1 or -1},
journal = {CoRR},
volume = {abs/1602.02830},
year = {2016},
url = {http://arxiv.org/abs/1602.02830},
archivePrefix = {arXiv},
eprint = {1602.02830},
timestamp = {Mon, 13 Aug 2018 16:46:57 +0200},
biburl = {https://dblp.org/rec/journals/corr/CourbariauxB16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1905-09418,
author = {Elena Voita and
David Talbot and
Fedor Moiseev and
Rico Sennrich and
Ivan Titov},
title = {Analyzing Multi-Head Self-Attention: Specialized Heads Do the Heavy
Lifting, the Rest Can Be Pruned},
journal = {CoRR},
volume = {abs/1905.09418},
year = {2019},
url = {http://arxiv.org/abs/1905.09418},
archivePrefix = {arXiv},
eprint = {1905.09418},
timestamp = {Wed, 29 May 2019 11:27:50 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1905-09418.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
%//找不到,自己写的 层次短语翻译的神经网络调序模型
@incollection{liumodel,
title = {层次短语翻译的神经网络调序模型},
author = {李鹏,刘洋,孙茂松},
booktitle = {清华大学学报(自然科学版)},
pages = {1529-1533},
year = {2014}
}
@incollection{NIPS2017_7181,
title = {Attention is All you Need},
author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, \L ukasz and Polosukhin, Illia},
booktitle = {Advances in Neural Information Processing Systems 30},
editor = {I. Guyon and U. V. Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett},
pages = {5998--6008},
year = {2017},
publisher = {Curran Associates, Inc.},
url = {http://papers.nips.cc/paper/7181-attention-is-all-you-need.pdf}
}
@article{DBLP:journals/corr/ZhangZ16c,
author = {Jiajun Zhang and
Chengqing Zong},
title = {Bridging Neural Machine Translation and Bilingual Dictionaries},
journal = {CoRR},
volume = {abs/1610.07272},
year = {2016},
url = {http://arxiv.org/abs/1610.07272},
archivePrefix = {arXiv},
eprint = {1610.07272},
timestamp = {Mon, 13 Aug 2018 16:47:14 +0200},
biburl = {https://dblp.org/rec/journals/corr/ZhangZ16c.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/SeeLM16,
author = {Abigail See and
Minh{-}Thang Luong and
Christopher D. Manning},
title = {Compression of Neural Machine Translation Models via Pruning},
journal = {CoRR},
volume = {abs/1606.09274},
year = {2016},
url = {http://arxiv.org/abs/1606.09274},
archivePrefix = {arXiv},
eprint = {1606.09274},
timestamp = {Mon, 13 Aug 2018 16:48:35 +0200},
biburl = {https://dblp.org/rec/journals/corr/SeeLM16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1805-10163,
author = {Elena Voita and
Pavel Serdyukov and
Rico Sennrich and
Ivan Titov},
title = {Context-Aware Neural Machine Translation Learns Anaphora Resolution},
journal = {CoRR},
volume = {abs/1805.10163},
year = {2018},
url = {http://arxiv.org/abs/1805.10163},
archivePrefix = {arXiv},
eprint = {1805.10163},
timestamp = {Mon, 13 Aug 2018 16:49:01 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1805-10163.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/GehringAGYD17,
author = {Jonas Gehring and
Michael Auli and
David Grangier and
Denis Yarats and
Yann N. Dauphin},
title = {Convolutional Sequence to Sequence Learning},
journal = {CoRR},
volume = {abs/1705.03122},
year = {2017},
url = {http://arxiv.org/abs/1705.03122},
archivePrefix = {arXiv},
eprint = {1705.03122},
timestamp = {Mon, 13 Aug 2018 16:48:03 +0200},
biburl = {https://dblp.org/rec/journals/corr/GehringAGYD17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{Ba2016LayerN,
title={Layer Normalization},
author={Jimmy Ba and Jamie Ryan Kiros and Geoffrey E. Hinton},
journal={ArXiv},
year={2016},
volume={abs/1607.06450}
}
@article{DBLP:journals/corr/HeZRS15,
author = {Kaiming He and
Xiangyu Zhang and
Shaoqing Ren and
Jian Sun},
title = {Deep Residual Learning for Image Recognition},
journal = {CoRR},
volume = {abs/1512.03385},
year = {2015},
url = {http://arxiv.org/abs/1512.03385},
archivePrefix = {arXiv},
eprint = {1512.03385},
timestamp = {Wed, 17 Apr 2019 17:23:45 +0200},
biburl = {https://dblp.org/rec/journals/corr/HeZRS15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{Hinton2015Distilling,
title={Distilling the Knowledge in a Neural Network},
author={Hinton, Geoffrey and Vinyals, Oriol and Dean, Jeff},
journal={Computer Science},
volume={14},
number={7},
pages={38-39},
year={2015},
}
@InProceedings{pmlr-v9-glorot10a,
title = {Understanding the difficulty of training deep feedforward neural networks},
author = {Xavier Glorot and Yoshua Bengio},
booktitle = {Proceedings of the Thirteenth International Conference on Artificial Intelligence and Statistics},
pages = {249--256},
year = {2010},
editor = {Yee Whye Teh and Mike Titterington},
volume = {9},
series = {Proceedings of Machine Learning Research},
address = {Chia Laguna Resort, Sardinia, Italy},
month = {13--15 May},
publisher = {PMLR},
pdf = {http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf},
url = {http://proceedings.mlr.press/v9/glorot10a.html},
abstract = {Whereas before 2006 it appears that deep multi-layer neural networks were not successfully trained, since then several algorithms have been shown to successfully train them, with experimental results showing the superiority of deeper vs less deep architectures. All these experimental results were obtained with new initialization or training mechanisms. Our objective here is to understand better why standard gradient descent from random initialization is doing so poorly with deep neural networks, to better understand these recent relative successes and help design better algorithms in the future. We first observe the influence of the non-linear activations functions. We find that the logistic sigmoid activation is unsuited for deep networks with random initialization because of its mean value, which can drive especially the top hidden layer into saturation. Surprisingly, we find that saturated units can move out of saturation by themselves, albeit slowly, and explaining the plateaus sometimes seen when training neural networks. We find that a new non-linearity that saturates less can often be beneficial. Finally, we study how activations and gradients vary across layers and during training, with the idea that training may be more difficult when the singular values of the Jacobian associated with each layer are far from 1. Based on these considerations, we propose a new initialization scheme that brings substantially faster convergence.}
}
@article{DBLP:journals/corr/LuongPM15,
author = {Minh{-}Thang Luong and
Hieu Pham and
Christopher D. Manning},
title = {Effective Approaches to Attention-based Neural Machine Translation},
journal = {CoRR},
volume = {abs/1508.04025},
year = {2015},
url = {http://arxiv.org/abs/1508.04025},
archivePrefix = {arXiv},
eprint = {1508.04025},
timestamp = {Mon, 13 Aug 2018 16:46:14 +0200},
biburl = {https://dblp.org/rec/journals/corr/LuongPM15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1906-00532,
author = {Aishwarya Bhandare and
Vamsi Sripathi and
Deepthi Karkada and
Vivek Menon and
Sun Choi and
Kushal Datta and
Vikram Saletore},
title = {Efficient 8-Bit Quantization of Transformer Neural Machine Language
Translation Model},
journal = {CoRR},
volume = {abs/1906.00532},
year = {2019},
url = {http://arxiv.org/abs/1906.00532},
archivePrefix = {arXiv},
eprint = {1906.00532},
timestamp = {Thu, 13 Jun 2019 13:36:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1906-00532.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@InProceedings{Liu_2019_CVPR,
author = {Liu, Shikun and Johns, Edward and Davison, Andrew J.},
title = {End-To-End Multi-Task Learning With Attention},
booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2019}
}
@article{MoradiInterrogating,
title={Interrogating the Explanatory Power of Attention in Neural Machine Translation},
author={Moradi, Pooya and Kambhatla, Nishant and Sarkar, Anoop},
}
@article{WangLearning,
title={Learning Deep Transformer Models for Machine Translation},
author={Wang, Qiang and Li, Bei and Xiao, Tong and Zhu, Jingbo and Li, Changliang and Wong, Derek F. and Chao, Lidia S.},
}
@article{JMLR:v15:srivastava14a,
author = {Nitish Srivastava and Geoffrey Hinton and Alex Krizhevsky and Ilya Sutskever and Ruslan Salakhutdinov},
title = {Dropout: A Simple Way to Prevent Neural Networks from Overfitting},
journal = {Journal of Machine Learning Research},
year = {2014},
volume = {15},
pages = {1929-1958},
url = {http://jmlr.org/papers/v15/srivastava14a.html}
}
@InProceedings{Szegedy_2016_CVPR,
author = {Szegedy, Christian and Vanhoucke, Vincent and Ioffe, Sergey and Shlens, Jon and Wojna, Zbigniew},
title = {Rethinking the Inception Architecture for Computer Vision},
booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2016}
}
@article{BENGIO1994Learning,
title={Learning Long-term Dependencies With Gradient Descent is Difficult},
author={BENGIO,Y.},
journal={IEEE Trans Neural Netw},
volume={5},
year={1994},
}
@article{Cho2014Learning,
title={Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation},
author={Cho, Kyunghyun and Van Merrienboer, Bart and Gulcehre, Caglar and Bahdanau, Dzmitry and Bougares, Fethi and Schwenk, Holger and Bengio, Yoshua},
journal={Computer Science},
year={2014},
}
@article{WangNeural,
title={Neural Machine Translation Advised by Statistical Machine Translation},
author={Wang, Xing and Lu, Zhengdong and Tu, Zhaopeng and Li, Hang and Xiong, Deyi and Zhang, Min},
}
@article{HochreiterLong,
title={Long Short-Term Memory},
author={Hochreiter, S and Schmidhuber, J},
journal={Neural Computation},
volume={9},
number={8},
pages={1735-1780},
}
@article{TuModeling,
title={Modeling Coverage for Neural Machine Translation},
author={Tu, Zhaopeng and Lu, Zhengdong and Liu, Yang and Liu, Xiaohua and Li, Hang},
}
@inproceedings{devlin-etal-2014-fast,
title = "Fast and Robust Neural Network Joint Models for Statistical Machine Translation",
author = "Devlin, Jacob and
Zbib, Rabih and
Huang, Zhongqiang and
Lamar, Thomas and
Schwartz, Richard and
Makhoul, John",
booktitle = "Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
month = jun,
year = "2014",
address = "Baltimore, Maryland",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/P14-1129",
doi = "10.3115/v1/P14-1129",
pages = "1370--1380",
}
@inproceedings{wang-etal-2018-multi-layer,
title = "Multi-layer Representation Fusion for Neural Machine Translation",
author = "Wang, Qiang and
Li, Fuxue and
Xiao, Tong and
Li, Yanyang and
Li, Yinqiao and
Zhu, Jingbo",
booktitle = "Proceedings of the 27th International Conference on Computational Linguistics",
month = aug,
year = "2018",
address = "Santa Fe, New Mexico, USA",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/C18-1255",
pages = "3015--3026",
abstract = "Neural machine translation systems require a number of stacked layers for deep models. But the prediction depends on the sentence representation of the top-most layer with no access to low-level representations. This makes it more difficult to train the model and poses a risk of information loss to prediction. In this paper, we propose a multi-layer representation fusion (MLRF) approach to fusing stacked layers. In particular, we design three fusion functions to learn a better representation from the stack. Experimental results show that our approach yields improvements of 0.92 and 0.56 BLEU points over the strong Transformer baseline on IWSLT German-English and NIST Chinese-English MT tasks respectively. The result is new state-of-the-art in German-English translation.",
}
@article{DBLP:journals/corr/abs-1811-00498,
author = {Ra{\'{u}}l V{\'{a}}zquez and
Alessandro Raganato and
J{\"{o}}rg Tiedemann and
Mathias Creutz},
title = {Multilingual {NMT} with a language-independent attention bridge},
journal = {CoRR},
volume = {abs/1811.00498},
year = {2018},
url = {http://arxiv.org/abs/1811.00498},
archivePrefix = {arXiv},
eprint = {1811.00498},
timestamp = {Thu, 22 Nov 2018 17:58:30 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-1811-00498.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{bahdanau2014neural,
title={Neural machine translation by jointly learning to align and translate},
author={Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua},
journal={arXiv preprint arXiv:1409.0473},
year={2014}
}
@article{DBLP:journals/corr/SennrichFCBHHJL17,
author = {Rico Sennrich and
Orhan Firat and
Kyunghyun Cho and
Alexandra Birch and
Barry Haddow and
Julian Hitschler and
Marcin Junczys{-}Dowmunt and
Samuel L{\"{a}}ubli and
Antonio Valerio Miceli Barone and
Jozef Mokry and
Maria Nadejde},
title = {Nematus: a Toolkit for Neural Machine Translation},
journal = {CoRR},
volume = {abs/1703.04357},
year = {2017},
url = {http://arxiv.org/abs/1703.04357},
archivePrefix = {arXiv},
eprint = {1703.04357},
timestamp = {Mon, 13 Aug 2018 16:46:45 +0200},
biburl = {https://dblp.org/rec/journals/corr/SennrichFCBHHJL17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{Li2020NeuralMT,
title={Neural Machine Translation with Joint Representation},
author={YanYang Li and Qiang Wang and Tong Xiao and T Liu and Jingbo Zhu},
journal={ArXiv},
year={2020},
volume={abs/2002.06546}
}
@article{Hassan2018AchievingHP,
title={Achieving Human Parity on Automatic Chinese to English News Translation},
author={Hany Hassan and Anthony Aue and Chang Chen and Vishal Chowdhary and Jonathan Clark and Christian Federmann and Xuedong Huang and Marcin Junczys-Dowmunt and William Lewis and Mengnan Li and Shujie Liu and Tie-Yan Liu and Renqian Luo and Arul Menezes and Tao Qin and Frank Seide and Xu Tan and Fei Tian and Lijun Wu and Shuangzhi Wu and Yingce Xia and Dongdong Zhang and Zhirui Zhang and Ming Zhou},
journal={ArXiv},
year={2018},
volume={abs/1803.05567}
}
@article{StahlbergNeural,
title={Neural Machine Translation: A Review},
author={Stahlberg, Felix},
}
@inproceedings{Bentivogli2016NeuralVP,
title={Neural versus Phrase-Based Machine Translation Quality: a Case Study},
author={Luisa Bentivogli and Arianna Bisazza and Mauro Cettolo and Marcello Federico},
booktitle={EMNLP},
year={2016}
}
@article{Gu2017NonAutoregressiveNM,
title={Non-Autoregressive Neural Machine Translation},
author={Jiatao Gu and James Bradbury and Caiming Xiong and Victor O. K. Li and Richard Socher},
journal={ArXiv},
year={2017},
volume={abs/1711.02281}
}
@inproceedings{li-etal-2019-word,
title = "On the Word Alignment from Neural Machine Translation",
author = "Li, Xintong and
Li, Guanlin and
Liu, Lemao and
Meng, Max and
Shi, Shuming",
booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics",
month = jul,
year = "2019",
address = "Florence, Italy",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/P19-1124",
doi = "10.18653/v1/P19-1124",
pages = "1293--1303",
abstract = "Prior researches suggest that neural machine translation (NMT) captures word alignment through its attention mechanism, however, this paper finds attention may almost fail to capture word alignment for some NMT models. This paper thereby proposes two methods to induce word alignment which are general and agnostic to specific NMT models. Experiments show that both methods induce much better word alignment than attention. This paper further visualizes the translation through the word alignment induced by NMT. In particular, it analyzes the effect of alignment errors on translation errors at word level and its quantitative analysis over many testing examples consistently demonstrate that alignment errors are likely to lead to translation errors measured by different metrics.",
}
@inproceedings{Sun2019PatientKD,
title={Patient Knowledge Distillation for BERT Model Compression},
author={Siqi Sun and Yu Cheng and Zhe Gan and Jingjing Liu},
booktitle={EMNLP/IJCNLP},
year={2019}
}
@article{Wu2019PayLA,
title={Pay Less Attention with Lightweight and Dynamic Convolutions},
author={Felix Wu and Angela Fan and Alexei Baevski and Yann Dauphin and Michael Auli},
journal={ArXiv},
year={2019},
volume={abs/1901.10430}
}
@inproceedings{Zhang2017PriorKI,
title={Prior Knowledge Integration for Neural Machine Translation using Posterior Regularization},
author={Jiacheng Zhang and Yang Liu and Huanbo Luan and Jingfang Xu and Maosong Sun},
booktitle={ACL},
year={2017}
}
@article{Shaw2018SelfAttentionWR,
title={Self-Attention with Relative Position Representations},
author={Peter Shaw and Jakob Uszkoreit and Ashish Vaswani},
journal={ArXiv},
year={2018},
volume={abs/1803.02155}
}
@article{DBLP:journals/corr/abs-1904-03107,
author = {Baosong Yang and
Longyue Wang and
Derek F. Wong and
Lidia S. Chao and
Zhaopeng Tu},
title = {Convolutional Self-Attention Networks},
journal = {CoRR},
volume = {abs/1904.03107},
year = {2019},
url = {http://arxiv.org/abs/1904.03107},
archivePrefix = {arXiv},
eprint = {1904.03107},
timestamp = {Wed, 24 Apr 2019 12:21:25 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1904-03107.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@incollection{NIPS2014_5346,
title = {Sequence to Sequence Learning with Neural Networks},
author = {Sutskever, Ilya and Vinyals, Oriol and Le, Quoc V},
booktitle = {Advances in Neural Information Processing Systems 27},
editor = {Z. Ghahramani and M. Welling and C. Cortes and N. D. Lawrence and K. Q. Weinberger},
pages = {3104--3112},
year = {2014},
publisher = {Curran Associates, Inc.},
url = {http://papers.nips.cc/paper/5346-sequence-to-sequence-learning-with-neural-networks.pdf}
}
@inproceedings{Xiao2019SharingAW,
title={Sharing Attention Weights for Fast Transformer},
author={Tong Xiao and Yinqiao Li and Jingbo Zhu and Zheng-tao Yu and T Liu},
booktitle={IJCAI},
year={2019}
}
@article{DBLP:journals/corr/PaulusXS17,
author = {Romain Paulus and
Caiming Xiong and
Richard Socher},
title = {A Deep Reinforced Model for Abstractive Summarization},
journal = {CoRR},
volume = {abs/1705.04304},
year = {2017},
url = {http://arxiv.org/abs/1705.04304},
archivePrefix = {arXiv},
eprint = {1705.04304},
timestamp = {Mon, 13 Aug 2018 16:48:58 +0200},
biburl = {https://dblp.org/rec/journals/corr/PaulusXS17.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/RushCW15,
author = {Alexander M. Rush and
Sumit Chopra and
Jason Weston},
title = {A Neural Attention Model for Abstractive Sentence Summarization},
journal = {CoRR},
volume = {abs/1509.00685},
year = {2015},
url = {http://arxiv.org/abs/1509.00685},
archivePrefix = {arXiv},
eprint = {1509.00685},
timestamp = {Mon, 13 Aug 2018 16:46:49 +0200},
biburl = {https://dblp.org/rec/journals/corr/RushCW15.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{kalchbrenner-blunsom-2013-recurrent,
title = "Recurrent Continuous Translation Models",
author = "Kalchbrenner, Nal and
Blunsom, Phil",
booktitle = "Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing",
month = oct,
year = "2013",
address = "Seattle, Washington, USA",
publisher = "Association for Computational Linguistics",
url = "https://www.aclweb.org/anthology/D13-1176",
pages = "1700--1709",
}
//Simple Recurrent Units for Highly Parallelizable Recurrence
@article{Lei2017TrainingRA,
title={Training RNNs as Fast as CNNs},
author={Tao Lei and Yu Zhang and Yoav Artzi},
journal={ArXiv},
year={2017},
volume={abs/1709.02755}
}
@inproceedings{Zhang2018SimplifyingNM,
title={Simplifying Neural Machine Translation with Addition-Subtraction Twin-Gated Recurrent Networks},
author={Biao Zhang and Deyi Xiong and Jinsong Su and Qian Lin and Huiji Zhang},
booktitle={EMNLP},
year={2018}
}
@inproceedings{Zhang2018SpeedingUN,
title={Speeding Up Neural Machine Translation Decoding by Cube Pruning},
author={Wen Zhang and Liang Huang and Yang Feng and Lei Shen and Qun Liu},
booktitle={EMNLP},
year={2018}
}
@article{Chen2018TheBO,
title={The Best of Both Worlds: Combining Recent Advances in Neural Machine Translation},
author={Mia Xu Chen and Orhan Firat and Ankur Bapna and Melvin Johnson and Wolfgang Macherey and George Foster and Llion Jones and Niki Parmar and Michael Schuster and Zhi-Feng Chen and Yonghui Wu and Macduff Hughes},
journal={ArXiv},
year={2018},
volume={abs/1804.09849}
}
@article{HochreiterThe,
title={The Vanishing Gradient Problem During Learning Recurrent Neural Nets and Problem Solutions},
author={Hochreiter, S.},
journal={International Journal of Uncertainty, Fuzziness and Knowledge-Based Systems},
volume={6},
number={2},
pages={107---116116},
}
@inproceedings{Yang2017TowardsBH,
title={Towards Bidirectional Hierarchical Representations for Attention-based Neural Machine Translation},
author={Baosong Yang and Derek F. Wong and Tong Xiao and Lidia S. Chao and Jingbo Zhu},
booktitle={EMNLP},
year={2017}
}
@inproceedings{Dai2019TransformerXLAL,
title={Transformer-XL: Attentive Language Models Beyond a Fixed-Length Context},
author={Zihang Dai and Zhilin Yang and Yiming Yang and Jaime G. Carbonell and Quoc V. Le and Ruslan Salakhutdinov},
booktitle={ACL},
year={2019}
}
@article{DBLP:journals/corr/abs-1808-09374,
author = {Xinyi Wang and
Hieu Pham and
Pengcheng Yin and
Graham Neubig},
title = {A Tree-based Decoder for Neural Machine Translation},
journal = {CoRR},
volume = {abs/1808.09374},
year = {2018},
url = {http://arxiv.org/abs/1808.09374},
archivePrefix = {arXiv},
eprint = {1808.09374},
timestamp = {Mon, 03 Sep 2018 13:36:40 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1808-09374.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1809-01854,
author = {Jetic Gu and
Hassan Shavarani and
Anoop Sarkar},
title = {Top-down Tree Structured Decoding with Syntactic Connections for Neural
Machine Translation and Parsing},
journal = {CoRR},
volume = {abs/1809.01854},
year = {2018},
url = {http://arxiv.org/abs/1809.01854},
archivePrefix = {arXiv},
eprint = {1809.01854},
timestamp = {Fri, 05 Oct 2018 11:34:52 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-1809-01854.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{Wang2019TreeTI,
title={Tree Transformer: Integrating Tree Structures into Self-Attention},
author={Yau-Shian Wang and Hung-yi Lee and Yun-Nung Chen},
booktitle={EMNLP/IJCNLP},
year={2019}
}
@inproceedings{Werlen2018DocumentLevelNM,
title={Document-Level Neural Machine Translation with Hierarchical Attention Networks},
author={Lesly Miculicich Werlen and Dhananjay Ram and Nikolaos Pappas and James Henderson},
booktitle={EMNLP},
year={2018}
}
@inproceedings{Zhang2017Fast,
title={Fast Parallel Training of Neural Language Models},
author={Zhang, Chunliang and Tong, Xiao and Zhu, Jingbo and Liu, Tongran},
year={2017},
}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -55,7 +55,7 @@
\IfFileExists{C:/WINDOWS/win.ini}
{\newcommand{\mycfont}{song}}
{\newcommand{\mycfont}{gbsn}}
%{\newcommand{\mycfont}{gbsn}}
\begin{CJK}{UTF8}{\mycfont}
\end{CJK}
......@@ -105,7 +105,7 @@
\include{Chapter1/chapter1}
\include{Chapter2/chapter2}
\include{Chapter3/chapter3}
%\include{Chapter6/chapter6}
\include{Chapter6/chapter6}
%----------------------------------------------------------------------------------------
......
......@@ -559,4 +559,34 @@ addtohook={%
\usepackage{multirow}
\usepackage{tcolorbox}
\newcommand{\dash}{\raisebox{0.5mm}{------}}%中文破折号
\usepackage{colortbl} %table上色
\newlength{\base}
\newdimen\XCoord
\newdimen\YCoord
\newdimen\TMP
\newcommand*{\ExtractCoordinate}[1]{\path (#1); \pgfgetlastxy{\XCoord}{\YCoord};}%
\newcommand*{\ExtractX}[1]{\path (#1); \pgfgetlastxy{\XCoord}{\TMP};}%
\newcommand*{\ExtractY}[1]{\path (#1); \pgfgetlastxy{\TMP}{\YCoord};}%
\newcommand{\specialcell}[3][c]{%
\begin{tabular}[#1]{@{}#2@{}}#3\end{tabular}}
\usetikzlibrary{calc,intersections}
\usetikzlibrary{matrix}
\usetikzlibrary{patterns}
\usetikzlibrary{shadows.blur}
\usepgflibrary{arrows}
%\usetikzlibrary{arrows}
%\usetikzlibrary{decorations}
\usetikzlibrary{arrows,shapes}
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论