合并分支 'zengxin' 到 'caorunzhe'

Zengxin 查看合并请求 !470

合并分支 'zengxin' 到 'caorunzhe'
Zengxin 查看合并请求 !470
b9b1020b · zengxin · 12cb6a07 · cf9b4cdb · b9b1020b · b9b1020b
Commit b9b1020b authored Nov 25, 2020 by zengxin
--- a/Chapter11/Figures/figure-fairseq-0.tex
+++ b/Chapter11/Figures/figure-fairseq-0.tex
@@ -34,7 +34,7 @@
 \node[anchor=north,word] (tgt_1) at ([yshift=-0.4em]i_0.south){$<$p$>$};
 \node[anchor=north,word] at ([yshift=-0.4em]i_1.south){$<$p$>$};
-\node[anchor=north,word] at ([yshift=-0.4em]i_2.south){$<$s$>$};
+\node[anchor=north,word] at ([yshift=-0.4em]i_2.south){$<$sos$>$};
 \node[anchor=north,word] at ([yshift=-0.4em]i_3.south){go};
 \node[anchor=north,word] at ([yshift=-0.4em]i_4.south){to};
 \node[anchor=north,word] (tgt_2) at ([yshift=-0.4em]i_5.south){school};
@@ -103,7 +103,7 @@
 \node[anchor=north,word] at ([yshift=-0.4em]i_0.south){go};
 \node[anchor=north,word] at ([yshift=-0.4em]i_1.south){to};
 \node[anchor=north,word] at ([yshift=-0.4em]i_2.south){school};
-\node[anchor=north,word] at ([yshift=-0.4em]i_3.south){$<$/s$>$};
+\node[anchor=north,word] at ([yshift=-0.4em]i_3.south){$<$eos$>$};
 \foreach \point in {0,1,2,3}{
 \node[cir,font=\fontsize{6}{6}\selectfont,inner sep=0.8pt](c_\point) at (8.2cm+\point*2em,7.5cm-1em*\point){\bm{$\sum$}};
@@ -140,7 +140,7 @@
 \node[anchor=south,word] (src_1) at ([xshift=-2em,yshift=0.4em]r_0.north){$<$p$>$};
 \node[anchor=south,word] at ([yshift=0.4em]r_0.north){去};
 \node[anchor=south,word] at ([yshift=0.4em]r_1.north){上学};
-\node[anchor=south,word] at ([yshift=0.4em]r_2.north){$<$s$>$};
+\node[anchor=south,word] at ([yshift=0.4em]r_2.north){$<$sos$>$};
 \node[anchor=south,word] (src_2) at ([xshift=2em,yshift=0.4em]r_2.north){$<$p$>$};

--- a/Chapter11/Figures/figure-fairseq-2.tex
+++ b/Chapter11/Figures/figure-fairseq-2.tex
@@ -34,7 +34,7 @@
 \node[anchor=north,word] at ([yshift=-0.4em]i_0.south){$<$p$>$};
 \node[anchor=north,word] at ([yshift=-0.4em]i_1.south){$<$p$>$};
-\node[anchor=north,word] at ([yshift=-0.4em]i_2.south){$<$s$>$};
+\node[anchor=north,word] at ([yshift=-0.4em]i_2.south){$<$sos$>$};
 \node[anchor=north,word] at ([yshift=-0.4em]i_3.south){go};
 \node[anchor=north,word] at ([yshift=-0.4em]i_4.south){to};
 \node[anchor=north,word] at ([yshift=-0.4em]i_5.south){school};
@@ -98,7 +98,7 @@
 \node[anchor=north,word] at ([yshift=-0.4em]i_0.south){go};
 \node[anchor=north,word] at ([yshift=-0.4em]i_1.south){to};
 \node[anchor=north,word] at ([yshift=-0.4em]i_2.south){school};
-\node[anchor=north,word] at ([yshift=-0.4em]i_3.south){$<$/s$>$};
+\node[anchor=north,word] at ([yshift=-0.4em]i_3.south){$<$eos$>$};
 \foreach \point in {0,1,2,3}{
 \node[cir,font=\fontsize{6}{6}\selectfont,inner sep=0.8pt](c_\point) at (8.2cm+\point*2em,7.5cm-1em*\point){\bm{$\sum$}};
@@ -135,7 +135,7 @@
 \node[anchor=south,word] (src_1) at ([xshift=-2em,yshift=0.4em]r_0.north){$<$p$>$};
 \node[anchor=south,word] at ([yshift=0.4em]r_0.north){去};
 \node[anchor=south,word] at ([yshift=0.4em]r_1.north){上学};
-\node[anchor=south,word] at ([yshift=0.4em]r_2.north){$<$s$>$};
+\node[anchor=south,word] at ([yshift=0.4em]r_2.north){$<$sos$>$};
 \node[anchor=south,word] (src_2) at ([xshift=2em,yshift=0.4em]r_2.north){$<$p$>$};

--- a/Chapter11/Figures/figure-fairseq-3.tex
+++ b/Chapter11/Figures/figure-fairseq-3.tex
@@ -34,7 +34,7 @@
 \node[anchor=north,word] at ([yshift=-0.4em]i_0.south){$<$p$>$};
 \node[anchor=north,word] at ([yshift=-0.4em]i_1.south){$<$p$>$};
-\node[anchor=north,word] at ([yshift=-0.4em]i_2.south){$<$s$>$};
+\node[anchor=north,word] at ([yshift=-0.4em]i_2.south){$<$sos$>$};
 \node[anchor=north,word] at ([yshift=-0.4em]i_3.south){go};
 \node[anchor=north,word] at ([yshift=-0.4em]i_4.south){to};
 \node[anchor=north,word] at ([yshift=-0.4em]i_5.south){school};
@@ -99,7 +99,7 @@
 \node[anchor=north,word] at ([yshift=-0.4em]i_0.south){go};
 \node[anchor=north,word] at ([yshift=-0.4em]i_1.south){to};
 \node[anchor=north,word] at ([yshift=-0.4em]i_2.south){school};
-\node[anchor=north,word] at ([yshift=-0.4em]i_3.south){$<$/s$>$};
+\node[anchor=north,word] at ([yshift=-0.4em]i_3.south){$<$eos$>$};
 \foreach \point in {0,1,2,3}{
 \node[cir,font=\fontsize{6}{6}\selectfont,inner sep=0.8pt](c_\point) at (8.2cm+\point*2em,7.5cm-1em*\point){\bm{$\sum$}};
@@ -136,7 +136,7 @@
 \node[anchor=south,word] (src_1) at ([xshift=-2em,yshift=0.4em]r_0.north){$<$p$>$};
 \node[anchor=south,word] at ([yshift=0.4em]r_0.north){去};
 \node[anchor=south,word] at ([yshift=0.4em]r_1.north){上学};
-\node[anchor=south,word] at ([yshift=0.4em]r_2.north){$<$s$>$};
+\node[anchor=south,word] at ([yshift=0.4em]r_2.north){$<$sos$>$};
 \node[anchor=south,word] (src_2) at ([xshift=2em,yshift=0.4em]r_2.north){$<$p$>$};

--- a/Chapter11/Figures/figure-max-pooling.tex
+++ b/Chapter11/Figures/figure-max-pooling.tex
@@ -22,8 +22,8 @@
 \draw[->,thick]([xshift=0.4cm,yshift=-0.4cm]num8.east)--([xshift=1.5cm,yshift=-0.4cm]num8.east);
 \node(num17)[num,right of = num8,xshift= 2.5cm,fill=red!10]{6};
-\node(num18)[num,right of = num17,xshift= 0.6cm,fill=green!10]{3};
+\node(num18)[num,right of = num17,xshift= 0.6cm,fill=green!10]{8};
-\node(num19)[num,below of = num17,yshift=-0.6cm,fill=yellow!10]{8};
+\node(num19)[num,below of = num17,yshift=-0.6cm,fill=yellow!10]{3};
 \node(num20)[num,below of = num18,yshift= -0.6cm,fill=blue!10]{4};
 \node [right of = num20,xshift= 0.7cm]{};

--- a/Chapter11/Figures/figure-single-glu.tex
+++ b/Chapter11/Figures/figure-single-glu.tex
@@ -63,9 +63,9 @@ $\otimes$： & 按位乘运算 \\
 	\draw[-latex,thick] (b.east) -- (c2.west);
 	\draw[-latex,thick] (c2.east) -- ([xshift=0.4cm]c2.east); 
-	\node[inner sep=0pt, font=\tiny] at (0.75cm, -0.4cm) {$\mathbi{X}$};
+	\node[inner sep=0pt, font=\tiny] at (0.75cm, -0.4cm) {$\mathbi{x}$};
-	\node[inner sep=0pt, font=\tiny] at ([yshift=-0.8cm]a.south) {$\mathbi{B}=\mathbi{X} * \mathbi{V} + \mathbi{b}_{\mathbi{W}}$};
+	\node[inner sep=0pt, font=\tiny] at ([yshift=-0.8cm]a.south) {$\mathbi{B}=\mathbi{x} * \mathbi{V} + \mathbi{b}_{\mathbi{W}}$};
-	\node[inner sep=0pt, font=\tiny] at ([yshift=-0.8cm]b.south) {$\mathbi{A}=\mathbi{X} * \mathbi{W} + \mathbi{b}_{\mathbi{V}}$};
+	\node[inner sep=0pt, font=\tiny] at ([yshift=-0.8cm]b.south) {$\mathbi{A}=\mathbi{x} * \mathbi{W} + \mathbi{b}_{\mathbi{V}}$};
-	\node[inner sep=0pt, font=\tiny] at (8.2cm, -0.4cm) {$\mathbi{Y}=\mathbi{A} \otimes \sigma(\mathbi{B})$};
+	\node[inner sep=0pt, font=\tiny] at (8.2cm, -0.4cm) {$\mathbi{y}=\mathbi{A} \otimes \sigma(\mathbi{B})$};
 \end{tikzpicture}
\ No newline at end of file
--- a/Chapter11/Figures/figure-standard.tex
+++ b/Chapter11/Figures/figure-standard.tex
@@ -40,7 +40,7 @@
 	\node[vuale] at ([xshift=0.9em]r3_1.east) {$\mathbi{z}_1$};
 	\node (t1) at (2.5em, -1em) {\large{$\cdots$}};
-	\node [anchor=north,font=\tiny] at ([yshift=-0.2em]t1.south) {(a) 传统卷积};
+	\node [anchor=north,font=\tiny] at ([yshift=-0.2em]t1.south) {(a) 标准卷积};
 	\end{scope}
 	\begin{scope}[xshift=4cm]

--- a/Chapter11/Figures/figure-use-cnn-in-sentence-classification.tex
+++ b/Chapter11/Figures/figure-use-cnn-in-sentence-classification.tex
@@ -85,10 +85,10 @@
 	%\draw [thick] (3.6cm, -0.3cm) -- (3.6cm, -0.5cm)  -- node[font=\tiny, align=center,yshift=-0.5cm]{Convolutional layer with \\ multiple filter widths and \\ feature maps} (6cm,-0.5cm) -- (6cm, -0.3cm);
 	%\draw [thick] (7.2cm, -0.3cm) -- (7.2cm, -0.5cm)  -- node[font=\tiny, align=center,yshift=-0.5cm]{Max-over-time\\  pooling} (9cm,-0.5cm) -- (9cm, -0.3cm);
 	%\draw [thick] (10cm, -0.3cm) -- (10cm, -0.5cm)  -- node[font=\tiny, align=center,yshift=-0.5cm]{Fully connected layer \\ with dropout and \\ softmax output} (11.7cm,-0.5cm) -- (11.7cm, -0.3cm);
-	\draw [thick] (0cm, -0.3cm) -- (0cm, -0.5cm)  -- node[font=\tiny, align=center,yshift=-0.5cm]{维度大小为 $m \times k$ \\ 的静态与非静态通道\\的句子表示} (2.4cm,-0.5cm) -- (2.4cm, -0.3cm);	
+	\draw [thick] (0cm, -0.3cm) -- (0cm, -0.5cm)  -- node[font=\tiny, align=center,yshift=-0.5cm]{维度大小为 $m \times K$ \\ 的静态与非静态通道\\的句子表示} (2.4cm,-0.5cm) -- (2.4cm, -0.3cm);	
 	\draw [thick] (3.6cm, -0.3cm) -- (3.6cm, -0.5cm)  -- node[font=\tiny, align=center,yshift=-0.5cm]{具有多个不同大小\\的卷积核和特征图\\的卷积层} (6cm,-0.5cm) -- (6cm, -0.3cm);
 	\draw [thick] (7.2cm, -0.3cm) -- (7.2cm, -0.5cm)  -- node[font=\tiny, align=center,yshift=-0.5cm]{最大池化} (9cm,-0.5cm) -- (9cm, -0.3cm);
-	\draw [thick] (10cm, -0.3cm) -- (10cm, -0.5cm)  -- node[font=\tiny, align=center,yshift=-0.5cm]{带有dropout\\和softmax输出\\的全连接层} (11.7cm,-0.5cm) -- (11.7cm, -0.3cm);
+	\draw [thick] (10cm, -0.3cm) -- (10cm, -0.5cm)  -- node[font=\tiny, align=center,yshift=-0.5cm]{带有Dropout\\和Softmax输出\\的全连接层} (11.7cm,-0.5cm) -- (11.7cm, -0.3cm);
 	 %\node [font=\Large] at (5.2cm,-2cm){$h_i = dot(F,x_{i:i+l-1})+b$};

--- a/Chapter11/chapter11.tex
+++ b/Chapter11/chapter11.tex
--- a/Chapter12/chapter12.tex
+++ b/Chapter12/chapter12.tex
@@ -102,7 +102,7 @@
 \parinterval 首先再来回顾一下{\chapterten}介绍的循环神经网络，虽然它很强大，但是也存在一些弊端。其中比较突出的问题是，循环神经网络每个循环单元都有向前依赖性，也就是当前时间步的处理依赖前一时间步处理的结果。这个性质可以使序列的“历史”信息不断被传递，但是也造成模型运行效率的下降。特别是对于自然语言处理任务，序列往往较长，无论是传统的RNN结构，还是更为复杂的LSTM结构，都需要很多次循环单元的处理才能够捕捉到单词之间的长距离依赖。由于需要多个循环单元的处理，距离较远的两个单词之间的信息传递变得很复杂。
-\parinterval 针对这些问题，研究人员提出了一种全新的模型$\ \dash\ $Transformer\index{Transformer}\upcite{vaswani2017attention}。与循环神经网络等传统模型不同，Transformer模型仅仅使用自注意力机制和标准的前馈神经网络，完全不依赖任何循环单元或者卷积操作。自注意力机制的优点在于可以直接对序列中任意两个单元之间的关系进行建模，这使得长距离依赖等问题可以更好地被求解。此外，自注意力机制非常适合在GPU 上进行并行化，因此模型训练的速度更快。表\ref{tab:12-11}对比了RNN、CNN和Transformer层类型的复杂度\footnote{顺序操作数指序列中的位置按顺序操作的次数，由于Transformer和CNN都可以并行计算，所以是1；路径长度指序列中的一个位置和另外任意一个位置在网络中的距离。}。
+\parinterval 针对这些问题，研究人员提出了一种全新的模型$\ \dash\ $Transformer\index{Transformer}\upcite{vaswani2017attention}。与循环神经网络等传统模型不同，Transformer模型仅仅使用自注意力机制和标准的前馈神经网络，完全不依赖任何循环单元或者卷积操作。自注意力机制的优点在于可以直接对序列中任意两个单元之间的关系进行建模，这使得长距离依赖等问题可以更好地被求解。此外，自注意力机制非常适合在GPU 上进行并行化，因此模型训练的速度更快。表\ref{tab:12-11}对比了RNN、CNN和Transformer层类型的复杂度\footnote{顺序操作数指模型处理一个序列所需要的操作数，由于Transformer和CNN都可以并行计算，所以是1；路径长度指序列中任意两个单词在网络中的距离。}。
 %----------------------------------------------
 \begin{table}[htp]

--- a/bibliography.bib
+++ b/bibliography.bib
@@ -4975,6 +4975,94 @@ author    = {Yoshua Bengio and
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%% chapter 11------------------------------------------------------
+@article{DBLP:journals/pami/RenHG017,
+  author    = {Shaoqing Ren and
+               Kaiming He and
+               Ross Girshick and
+               Jian Sun},
+  title     = {Faster {R-CNN:} Towards Real-Time Object Detection with Region Proposal
+               Networks},
+  journal   = {Institute of Electrical and Electronics Engineers},
+  volume    = {39},
+  number    = {6},
+  pages     = {1137--1149},
+  year      = {2017}
+}
+@inproceedings{DBLP:conf/eccv/LiuAESRFB16,
+  author    = {Wei Liu and
+               Dragomir Anguelov and
+               Dumitru Erhan and
+               Christian Szegedy and
+               Scott Reed and
+               Cheng-Yang Fu and
+               Alexander C. Berg},
+  title     = {{SSD:} Single Shot MultiBox Detector},
+  publisher    = {European Conference on Computer Vision},
+  volume    = {9905},
+  pages     = {21--37},
+  publisher = {Springer},
+  year      = {2016}
+}
+@inproceedings{devlin-etal-2014-fast,
+  author    = {Jacob Devlin and
+               Rabih Zbib and
+               Zhongqiang Huang and
+               Thomas Lamar and
+               Richard M. Schwartz and
+               John Makhoul},
+  title     = {Fast and Robust Neural Network Joint Models for Statistical Machine
+               Translation},
+  pages     = {1370--1380},
+  publisher = {Annual Meeting of the Association for Computational Linguistics},
+  year      = {2014}
+}
+@inproceedings{DBLP:conf/acl/WangLLJL15,
+  author    = {Mingxuan Wang and
+               Zhengdong Lu and
+               Hang Li and
+               Wenbin Jiang and
+               Qun Liu},
+  title     = {genCNN: {A} Convolutional Architecture for Word Sequence Prediction},
+  pages     = {1567--1576},
+  publisher = {The Association for Computer Linguistics},
+  year      = {2015}
+}
+@inproceedings{DBLP:conf/icassp/ZhangCJ17,
+  author    = {Yu Zhang and
+               William Chan and
+               Navdeep Jaitly},
+  title     = {Very deep convolutional networks for end-to-end speech recognition},
+  pages     = {4845--4849},
+  publisher = {Institute of Electrical and Electronics Engineers},
+  year      = {2017}
+}
+@inproceedings{DBLP:conf/icassp/DengAY13,
+  author    = {Li Deng and
+               Ossama Abdel-Hamid and
+               Dong Yu},
+  title     = {A deep convolutional neural network using heterogeneous pooling for
+               trading acoustic invariance with phonetic confusion},
+  pages     = {6669--6673},
+  publisher = {Institute of Electrical and Electronics Engineers},
+  year      = {2013}
+}
+@inproceedings{DBLP:journals/corr/LuongPM15,
+  author    = {Thang Luong and
+               Hieu Pham and
+               Christopher D. Manning},
+  title     = {Effective Approaches to Attention-based Neural Machine Translation},
+  publisher = {Conference on Empirical Methods in Natural
+               Language Processing},
+  pages     = {1412--1421},
+  year      = {2015}
+}
 @inproceedings{DBLP:conf/acl-codeswitch/WangCK18,
  author    = {Changhan Wang and
               Kyunghyun Cho and
@@ -5112,11 +5200,12 @@ author    = {Yoshua Bengio and
 }
 @article{Sennrich2016ImprovingNM,
-  title={Improving Neural Machine Translation Models with Monolingual Data},
+  author    = {Rico Sennrich and
-  author={Rico Sennrich and B. Haddow and Alexandra Birch},
+               Barry Haddow and
-  journal={ArXiv},
+               Alexandra Birch},
-  year={2016},
+  title     = {Improving Neural Machine Translation Models with Monolingual Data},
-  volume={abs/1511.06709}
+  publisher = {The Association for Computer Linguistics},
+  year      = {2016}
 }
 @inproceedings{bahdanau2014neural,
@@ -5130,7 +5219,7 @@ author    = {Yoshua Bengio and
 @article{Waibel1989PhonemeRU,
  title={Phoneme recognition using time-delay neural networks},
-  author={Alexander H. Waibel and Toshiyuki Hanazawa and Geoffrey E. Hinton and K. Shikano and K. Lang},
+  author={Alexander Waibel and Toshiyuki Hanazawa and Geoffrey Hinton and Kiyohiro Shikano and K.J. Lang},
  journal={IEEE Transactions on Acoustics, Speech, and Signal Processing},
  year={1989},
  volume={37},
@@ -5139,7 +5228,7 @@ author    = {Yoshua Bengio and
 @article{LeCun1989BackpropagationAT,
  title={Backpropagation Applied to Handwritten Zip Code Recognition},
-  author={Y. LeCun and B. Boser and J. Denker and D. Henderson and R. Howard and W. Hubbard and L. Jackel},
+  author={Yann LeCun and Bernhard Boser and John Denker and Don Henderson and R. Howard and W.E. Hubbard and Larry Jackel},
  journal={Neural Computation},
  year={1989},
  volume={1},
@@ -5147,7 +5236,7 @@ author    = {Yoshua Bengio and
 }
 @article{726791,
-  author={Y. {Lecun} and L. {Bottou} and Y. {Bengio} and P. {Haffner}},
+  author={Yann {Lecun} and Leon {Bottou} and Y. {Bengio} and Patrick {Haffner}},
  journal={Proceedings of the IEEE}, 
  title={Gradient-based learning applied to document recognition}, 
  year={1998},
@@ -5180,7 +5269,7 @@ author    = {Yoshua Bengio and
 @article{Girshick2015FastR,
  title={Fast R-CNN},
-  author={Ross B. Girshick},
+  author={Ross Girshick},
  journal={International Conference on Computer Vision},
  year={2015},
  pages={1440-1448}
@@ -5197,7 +5286,7 @@ author    = {Yoshua Bengio and
 @inproceedings{Kalchbrenner2014ACN,
  title={A Convolutional Neural Network for Modelling Sentences},
-  author={Nal Kalchbrenner and Edward Grefenstette and P. Blunsom},
+  author={Nal Kalchbrenner and Edward Grefenstette and Phil Blunsom},
  publisher={Annual Meeting of the Association for Computational Linguistics},
  pages={655--665},
  year={2014}
@@ -5414,26 +5503,11 @@ author    = {Yoshua Bengio and
  year={2017},
 }
-@article{Minaee2020DeepLB,
+@article{sifre2014rigid,
-  title={Deep Learning Based Text Classification: A Comprehensive Review},
+  title={Rigid-motion scattering for image classification},
-  author    = {Shervin Minaee and
+  author={Sifre, Laurent and Mallat, St{\'e}phane},
-               Nal Kalchbrenner and
+  year={2014},
-               Erik Cambria and
+  publisher={Citeseer}
-               Narjes Nikzad and
-               Meysam Chenaghlu and
-               Jianfeng Gao},
-  journal={CoRR},
-  year={2020},
-  volume={abs/2004.03705}
-}
-@article{Sifre2013RotationSA,
-  title={Rotation, Scaling and Deformation Invariant Scattering for Texture Discrimination},
-  author    = {Laurent Sifre and
-               St{\'{e}}phane Mallat},
-  journal={IEEE Conference on Computer Vision and Pattern Recognition},
-  year={2013},
-  pages={1233-1240}
 }
 @article{Taigman2014DeepFaceCT,
@@ -5475,6 +5549,27 @@ author    = {Yoshua Bengio and
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%%% chapter 12------------------------------------------------------
+@inproceedings{DBLP:conf/coling/ZengLLZZ14,
+  author    = {Daojian Zeng and
+               Kang Liu and
+               Siwei Lai and
+               Guangyou Zhou and
+               Jun Zhao},
+  title     = {Relation Classification via Convolutional Deep Neural Network},
+  pages     = {2335--2344},
+  publisher = {International Conference on Computational Linguistics},
+  year      = {2014}
+}
+@inproceedings{DBLP:conf/acl/JohnsonZ17,
+  author    = {Rie Johnson and
+               Tong Zhang},
+  title     = {Deep Pyramid Convolutional Neural Networks for Text Categorization},
+  pages     = {562--570},
+  publisher = {Association for Computational Linguistics},
+  year      = {2017}
+}
 @inproceedings{DBLP:conf/interspeech/GulatiQCPZYHWZW20,
  author    = {Anmol Gulati and
               James Qin and