Commit 662af9b1 by 孟霞

合并分支 'caorunzhe' 到 'mengxia'

Caorunzhe

查看合并请求 !243
parents 8c41b8c3 f8cd8e1d
...@@ -147,7 +147,7 @@ ...@@ -147,7 +147,7 @@
\draw [->] ([yshift=-1.6em,xshift=-0.4em]vocab.north east) .. controls +(east:1) and +(west:1) .. ([xshift=0.1em,yshift=0.4em]vocabtopn.west) node [pos=0.3,below] (topnlabel) {\scriptsize{top-3}}; \draw [->] ([yshift=-1.6em,xshift=-0.4em]vocab.north east) .. controls +(east:1) and +(west:1) .. ([xshift=0.1em,yshift=0.4em]vocabtopn.west) node [pos=0.3,below] (topnlabel) {\scriptsize{top-3}};
{ {
\node [anchor=north] (cap) at (vocab.south east) {\scriptsize{\textbf{束搜索($b=3$)}}}; \node [anchor=north] (cap) at (vocab.south east) {\scriptsize{\textbf{束搜索($k=3$)}}};
} }
} }
......

113 KB | W: | H:

170 KB | W: | H:

Chapter10/Figures/mt-history.png
Chapter10/Figures/mt-history.png
Chapter10/Figures/mt-history.png
Chapter10/Figures/mt-history.png
  • 2-up
  • Swipe
  • Onion skin
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
\node [anchor=south west] (alpha1) at ([xshift=-1em]key1.north west) {\scriptsize{$\alpha_1=.2$}}; \node [anchor=south west] (alpha1) at ([xshift=-1em]key1.north west) {\scriptsize{$\alpha_1=.2$}};
\node [anchor=south west] (alpha2) at ([xshift=-1em]key2.north west) {\scriptsize{$\alpha_2=.3$}}; \node [anchor=south west] (alpha2) at ([xshift=-1em]key2.north west) {\scriptsize{$\alpha_2=.3$}};
\node [anchor=south west] (alpha3) at ([xshift=-1em]key3.north west) {\scriptsize{$\alpha_3=.1$}}; \node [anchor=south west] (alpha3) at ([xshift=-1em]key3.north west) {\scriptsize{$\alpha_3=.1$}};
\node [anchor=south west] (alpha4) at ([xshift=-1em]key4.north west) {\scriptsize{$\alpha_4=.3$}}; \node [anchor=south west] (alpha4) at ([xshift=-1em]key4.north west) {\scriptsize{$\alpha_i=.3$}};
\vspace{0.5em} \vspace{0.5em}
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
\node [rnode,anchor=south west,fill=green!20!white] (key7) at ([yshift=2em]key2.north west) {\scriptsize{$h(\textrm{})$}}; \node [rnode,anchor=south west,fill=green!20!white] (key7) at ([yshift=2em]key2.north west) {\scriptsize{$h(\textrm{})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key8) at ([yshift=2em]key3.north west) {\scriptsize{$h(\textrm{沈阳})$}}; \node [rnode,anchor=south west,fill=green!20!white] (key8) at ([yshift=2em]key3.north west) {\scriptsize{$h(\textrm{沈阳})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key9) at ([yshift=2em]key4.north west) {\scriptsize{$h(\textrm{机票})$}}; \node [rnode,anchor=south west,fill=green!20!white] (key9) at ([yshift=2em]key4.north west) {\scriptsize{$h(\textrm{机票})$}};
\node [rnode,anchor=south west] (key10) at ([yshift=2em]key5.north west) {\scriptsize{$h(\textrm{``机票''})$}}; \node [rnode,anchor=south west] (key10) at ([yshift=2em]key5.north west) {\scriptsize{$h(\textrm{机票})$}};
\node [anchor=west] (sep1) at ([xshift=0.3em]key8.east) {\scriptsize{$\textbf{...}$}}; \node [anchor=west] (sep1) at ([xshift=0.3em]key8.east) {\scriptsize{$\textbf{...}$}};
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
\node [anchor=south west] (alpha5) at ([xshift=-1em]key6.north west) {\scriptsize{$\alpha_1=.1$}}; \node [anchor=south west] (alpha5) at ([xshift=-1em]key6.north west) {\scriptsize{$\alpha_1=.1$}};
\node [anchor=south west] (alpha6) at ([xshift=-1em]key7.north west) {\scriptsize{$\alpha_2=.3$}}; \node [anchor=south west] (alpha6) at ([xshift=-1em]key7.north west) {\scriptsize{$\alpha_2=.3$}};
\node [anchor=south west] (alpha7) at ([xshift=-1em]key8.north west) {\scriptsize{$\alpha_3=.2$}}; \node [anchor=south west] (alpha7) at ([xshift=-1em]key8.north west) {\scriptsize{$\alpha_3=.2$}};
\node [anchor=south west] (alpha8) at ([xshift=-1em]key9.north west) {\scriptsize{$\alpha_4=.3$}}; \node [anchor=south west] (alpha8) at ([xshift=-1em]key9.north west) {\scriptsize{$\alpha_i=.3$}};
\end{scope} \end{scope}
\end{tikzpicture} \end{tikzpicture}
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
\node [anchor=north west,snode,minimum width=6.5em] (s2) at ([yshift=-0.3em]s1.south west) {}; \node [anchor=north west,snode,minimum width=6.5em] (s2) at ([yshift=-0.3em]s1.south west) {};
\node [anchor=north west,snode,minimum width=2em] (s3) at ([yshift=-0.3em]s2.south west) {}; \node [anchor=north west,snode,minimum width=2em] (s3) at ([yshift=-0.3em]s2.south west) {};
\node [anchor=east] (label1) at ([xshift=-0.8em,yshift=0.6em]s1.west) {\scriptsize{Shuffled:}}; \node [anchor=east] (label1) at ([xshift=-0.8em,yshift=0.6em]s1.west) {\scriptsize{随机:}};
\node [anchor=west,pnode,minimum width=3em] (p1) at ([xshift=0.3em]s1.east) {}; \node [anchor=west,pnode,minimum width=3em] (p1) at ([xshift=0.3em]s1.east) {};
\node [anchor=west,pnode,minimum width=4em] (p3) at ([xshift=0.3em]s3.east) {}; \node [anchor=west,pnode,minimum width=4em] (p3) at ([xshift=0.3em]s3.east) {};
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
\node [anchor=north west,snode,minimum width=5em] (s5) at ([yshift=-0.3em]s4.south west) {}; \node [anchor=north west,snode,minimum width=5em] (s5) at ([yshift=-0.3em]s4.south west) {};
\node [anchor=north west,snode,minimum width=6.5em] (s6) at ([yshift=-0.3em]s5.south west) {}; \node [anchor=north west,snode,minimum width=6.5em] (s6) at ([yshift=-0.3em]s5.south west) {};
\node [anchor=east] (label2) at ([xshift=-0.8em,yshift=0.6em]s4.west) {\scriptsize{Sorted:}}; \node [anchor=east] (label2) at ([xshift=-0.8em,yshift=0.6em]s4.west) {\scriptsize{按句长排序:}};
\node [anchor=west,pnode,minimum width=1em] (p4) at ([xshift=0.3em]s4.east) {}; \node [anchor=west,pnode,minimum width=1em] (p4) at ([xshift=0.3em]s4.east) {};
\node [anchor=west,pnode,minimum width=1em] (p5) at ([xshift=0.3em]s5.east) {}; \node [anchor=west,pnode,minimum width=1em] (p5) at ([xshift=0.3em]s5.east) {};
......
...@@ -56,7 +56,7 @@ ...@@ -56,7 +56,7 @@
\node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{$\langle$eos$\rangle$}}; \node[srcnode] (src6) at ([xshift=0.5\hnode]src5.south west) {\scriptsize{$\langle$eos$\rangle$}};
% target % target
\node[tgtnode] (tgt1) at (-6.0*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{}}; \node[tgtnode] (tgt1) at (-6.0*0.5*\hnode,-1.05*\hnode+7.5*0.5*\hnode) {\scriptsize{}};
\node[tgtnode] (tgt2) at ([yshift=-0.5\hnode]tgt1.north east) {\scriptsize{什么}}; \node[tgtnode] (tgt2) at ([yshift=-0.5\hnode]tgt1.north east) {\scriptsize{什么}};
\node[tgtnode] (tgt3) at ([yshift=-0.5\hnode]tgt2.north east) {\scriptsize{}}; \node[tgtnode] (tgt3) at ([yshift=-0.5\hnode]tgt2.north east) {\scriptsize{}};
\node[tgtnode] (tgt4) at ([yshift=-0.5\hnode]tgt3.north east) {\scriptsize{}}; \node[tgtnode] (tgt4) at ([yshift=-0.5\hnode]tgt3.north east) {\scriptsize{}};
...@@ -105,8 +105,8 @@ ...@@ -105,8 +105,8 @@
{ {
% coverage score formula node % coverage score formula node
\node [anchor=north west] (formula) at ([xshift=-0.3\hnode,yshift=-1.5\hnode]attn11.south) {\small{不同$\vectorn{\emph{C}}_j$所对应的源语言词的权重是不同的}}; \node [anchor=north west] (formula) at ([xshift=-0.3\hnode,yshift=-1.5\hnode]attn11.south) {\small{不同$\vectorn{\emph{C}}_j$所对应的源语言词的权重是不同的}};
\node [anchor=north west] (example) at (formula.south west) {\footnotesize{$\vectorn{\emph{C}}_2=0.4 \times \vectorn{\emph{h}}(\textrm{“你”}) + 0.4 \times \vectorn{\emph{h}}(\textrm{“什么”}) +$}}; \node [anchor=north west] (example) at (formula.south west) {\footnotesize{$\vectorn{\emph{C}}_2=0.4 \times \vectorn{\emph{h}}(\textrm{}) + 0.4 \times \vectorn{\emph{h}}(\textrm{什么}) +$}};
\node [anchor=north west] (example2) at ([yshift=0.4em]example.south west) {\footnotesize{$\ \ \ \ \ \ \ \ 0 \times \vectorn{\emph{h}}(\textrm{“都”}) + 0.1 \times \vectorn{\emph{h}}(\textrm{“ 没”}) + ..$}}; \node [anchor=north west] (example2) at ([yshift=0.4em]example.south west) {\footnotesize{$\ \ \ \ \ \ \ \ 0 \times \vectorn{\emph{h}}(\textrm{}) + 0.1 \times \vectorn{\emph{h}}(\textrm{}) + ..$}};
} }
%\visible<3-> %\visible<3->
......
%------------------------------------------- %-------------------------------------------
\begin{tikzpicture} \begin{tikzpicture}
...@@ -17,7 +13,7 @@ ...@@ -17,7 +13,7 @@
\tikzstyle{labelnode} = [above] \tikzstyle{labelnode} = [above]
% alignment matrix % alignment matrix
\begin{scope}[scale=0.9,yshift=0.12in] \begin{scope}[scale=1.2,yshift=0.12in]
\foreach \i / \j / \c in \foreach \i / \j / \c in
{0/7/0.2, 1/7/0.45, 2/7/0.15, 3/7/0.15, 4/7/0.15, 5/7/0.15, {0/7/0.2, 1/7/0.45, 2/7/0.15, 3/7/0.15, 4/7/0.15, 5/7/0.15,
0/6/0.35, 1/6/0.45, 2/6/0.15, 3/6/0.15, 4/6/0.15, 5/6/0.15, 0/6/0.35, 1/6/0.45, 2/6/0.15, 3/6/0.15, 4/6/0.15, 5/6/0.15,
...@@ -27,7 +23,7 @@ ...@@ -27,7 +23,7 @@
0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.25, 5/2/0.3, 0/2/0.15, 1/2/0.15, 2/2/0.15, 3/2/0.15, 4/2/0.25, 5/2/0.3,
0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.8, 5/1/0.15, 0/1/0.15, 1/1/0.15, 2/1/0.15, 3/1/0.15, 4/1/0.8, 5/1/0.15,
0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.25, 5/0/0.60} 0/0/0.15, 1/0/0.15, 2/0/0.15, 3/0/0.15, 4/0/0.25, 5/0/0.60}
\node[elementnode,minimum size=0.6*1.2cm*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*1.2cm*\i-5.4*0.5*1.2cm,0.5*1.2cm*\j-1.05*1.2cm) {}; \node[elementnode,minimum size=0.8*1.2cm*\c,inner sep=0.1pt,fill=blue] (a\i\j) at (0.5*1.2cm*\i-5.4*0.5*1.2cm,0.5*1.2cm*\j-1.05*1.2cm) {};
%attention score labels %attention score labels
\node[align=center] (l17) at (a17) {\scriptsize{{\color{white} .4}}}; \node[align=center] (l17) at (a17) {\scriptsize{{\color{white} .4}}};
...@@ -40,22 +36,22 @@ ...@@ -40,22 +36,22 @@
\node[align=center] (l17) at (a50) {\small{{\color{white} .7}}}; \node[align=center] (l17) at (a50) {\small{{\color{white} .7}}};
% source % source
\node[srcnode] (src1) at (-5.4*0.5*1.2cm,-1.05*1.2cm+7.5*0.5*1.2cm) {\scriptsize{Have}}; \node[srcnode] (src1) at (-5.4*0.5*1.2cm,-1.05*1.2cm+7.5*0.5*1.2cm) {\small{Have}};
\node[srcnode] (src2) at ([xshift=0.6cm]src1.south west) {\scriptsize{you}}; \node[srcnode] (src2) at ([xshift=0.6cm]src1.south west) {\small{you}};
\node[srcnode] (src3) at ([xshift=0.6cm]src2.south west) {\scriptsize{learned}}; \node[srcnode] (src3) at ([xshift=0.6cm]src2.south west) {\small{learned}};
\node[srcnode] (src4) at ([xshift=0.6cm]src3.south west) {\scriptsize{nothing}}; \node[srcnode] (src4) at ([xshift=0.6cm]src3.south west) {\small{nothing}};
\node[srcnode] (src5) at ([xshift=0.6cm]src4.south west) {\scriptsize{?}}; \node[srcnode] (src5) at ([xshift=0.6cm]src4.south west) {\small{?}};
\node[srcnode] (src6) at ([xshift=0.6cm]src5.south west) {\scriptsize{$\langle$eos$\rangle$}}; \node[srcnode] (src6) at ([xshift=0.6cm]src5.south west) {\small{$\langle$eos$\rangle$}};
% target % target
\node[tgtnode] (tgt1) at (-6.0*0.5*1.2cm,-1.05*1.2cm+7.5*0.5*1.2cm) {\scriptsize{}}; \node[tgtnode] (tgt1) at (-6.0*0.5*1.2cm,-1.05*1.2cm+7.5*0.5*1.2cm) {\small{}};
\node[tgtnode] (tgt2) at ([yshift=-0.6cm]tgt1.north east) {\scriptsize{什么}}; \node[tgtnode] (tgt2) at ([yshift=-0.6cm]tgt1.north east) {\small{什么}};
\node[tgtnode] (tgt3) at ([yshift=-0.6cm]tgt2.north east) {\scriptsize{}}; \node[tgtnode] (tgt3) at ([yshift=-0.6cm]tgt2.north east) {\small{}};
\node[tgtnode] (tgt4) at ([yshift=-0.6cm]tgt3.north east) {\scriptsize{}}; \node[tgtnode] (tgt4) at ([yshift=-0.6cm]tgt3.north east) {\small{}};
\node[tgtnode] (tgt5) at ([yshift=-0.6cm]tgt4.north east) {\scriptsize{}}; \node[tgtnode] (tgt5) at ([yshift=-0.6cm]tgt4.north east) {\small{}};
\node[tgtnode] (tgt6) at ([yshift=-0.6cm]tgt5.north east) {\scriptsize{}}; \node[tgtnode] (tgt6) at ([yshift=-0.6cm]tgt5.north east) {\small{}};
\node[tgtnode] (tgt7) at ([yshift=-0.6cm]tgt6.north east) {\scriptsize{?}}; \node[tgtnode] (tgt7) at ([yshift=-0.6cm]tgt6.north east) {\small{?}};
\node[tgtnode] (tgt8) at ([yshift=-0.6cm]tgt7.north east) {\scriptsize{$\langle$eos$\rangle$}}; \node[tgtnode] (tgt8) at ([yshift=-0.6cm]tgt7.north east) {\small{$\langle$eos$\rangle$}};
\end{scope} \end{scope}
......
...@@ -12,17 +12,17 @@ ...@@ -12,17 +12,17 @@
\tikzstyle{rnode} = [draw,minimum width=3.5em,minimum height=1.2em] \tikzstyle{rnode} = [draw,minimum width=3.5em,minimum height=1.2em]
\node [rnode,anchor=south west,fill=red!20!white] (value1) at (0,0) {\scriptsize{$\vectorn{\emph{h}}(\textrm{“你”})$}}; \node [rnode,anchor=south west,fill=red!20!white] (value1) at (0,0) {\scriptsize{${{h}}(\textrm{})$}};
\node [rnode,anchor=south west,fill=red!20!white] (value2) at ([xshift=1em]value1.south east) {\scriptsize{$\vectorn{\emph{h}}(\textrm{“什么”})$}}; \node [rnode,anchor=south west,fill=red!20!white] (value2) at ([xshift=1em]value1.south east) {\scriptsize{${{h}}(\textrm{什么})$}};
\node [rnode,anchor=south west,fill=red!20!white] (value3) at ([xshift=1em]value2.south east) {\scriptsize{$\vectorn{\emph{h}}(\textrm{“也”})$}}; \node [rnode,anchor=south west,fill=red!20!white] (value3) at ([xshift=1em]value2.south east) {\scriptsize{${{h}}(\textrm{})$}};
\node [rnode,anchor=south west,fill=red!20!white] (value4) at ([xshift=1em]value3.south east) {\scriptsize{$\vectorn{\emph{h}}(\textrm{“没”})$}}; \node [rnode,anchor=south west,fill=red!20!white] (value4) at ([xshift=1em]value3.south east) {\scriptsize{${{h}}(\textrm{})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key1) at ([yshift=0.2em]value1.north west) {\scriptsize{$\vectorn{\emph{h}}(\textrm{“你”})$}}; \node [rnode,anchor=south west,fill=green!20!white] (key1) at ([yshift=0.2em]value1.north west) {\scriptsize{${{h}}(\textrm{})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key2) at ([yshift=0.2em]value2.north west) {\scriptsize{$\vectorn{\emph{h}}(\textrm{“什么”})$}}; \node [rnode,anchor=south west,fill=green!20!white] (key2) at ([yshift=0.2em]value2.north west) {\scriptsize{${{h}}(\textrm{什么})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key3) at ([yshift=0.2em]value3.north west) {\scriptsize{$\vectorn{\emph{h}}(\textrm{“也”})$}}; \node [rnode,anchor=south west,fill=green!20!white] (key3) at ([yshift=0.2em]value3.north west) {\scriptsize{${{h}}(\textrm{})$}};
\node [rnode,anchor=south west,fill=green!20!white] (key4) at ([yshift=0.2em]value4.north west) {\scriptsize{$\vectorn{\emph{h}}(\textrm{“没”})$}}; \node [rnode,anchor=south west,fill=green!20!white] (key4) at ([yshift=0.2em]value4.north west) {\scriptsize{${{h}}(\textrm{})$}};
\node [rnode,anchor=east] (query) at ([xshift=-2em]key1.west) {\scriptsize{$\vectorn{\emph{s}}(\textrm{“you”})$}}; \node [rnode,anchor=east] (query) at ([xshift=-2em]key1.west) {\scriptsize{${{s}}(\textrm{you})$}};
\node [anchor=east] (querylabel) at ([xshift=-0.2em]query.west) {\scriptsize{query}}; \node [anchor=east] (querylabel) at ([xshift=-0.2em]query.west) {\scriptsize{query}};
\draw [->] ([yshift=1pt,xshift=6pt]query.north) .. controls +(90:1em) and +(90:1em) .. ([yshift=1pt]key1.north); \draw [->] ([yshift=1pt,xshift=6pt]query.north) .. controls +(90:1em) and +(90:1em) .. ([yshift=1pt]key1.north);
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
\node[rounded corners=1pt,minimum width=11.0em,minimum height=2.0em,fill=pink!30,draw=black](p1) at (0,0) {\small{Self-Attention}}; \node[rounded corners=1pt,minimum width=11.0em,minimum height=2.0em,fill=pink!30,draw=black](p1) at (0,0) {\small{Self-Attention}};
\node[anchor=north](word1) at ([xshift=0.0em,yshift=-2.0em]p1.south) {\small \vectorn{\emph{K}}}; \node[anchor=north](word1) at ([xshift=0.0em,yshift=-2.0em]p1.south) {\small \vectorn{\emph{K}}};
\node[anchor=west](word2) at ([xshift=2.2em]word1.east) {\small \vectorn{\emph{Q}}}; \node[anchor=west](word2) at ([xshift=2.2em]word1.east) {\small \vectorn{\emph{V}}};
\node[anchor=east](word3) at ([xshift=-2.2em]word1.west) {\small \vectorn{\emph{Q}}}; \node[anchor=east](word3) at ([xshift=-2.2em]word1.west) {\small \vectorn{\emph{Q}}};
\draw[->,thick](word1.north)--(p1.south); \draw[->,thick](word1.north)--(p1.south);
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
\node[anchor=west,rounded corners=1pt,minimum width=14.0em,minimum height=2.0em,fill=pink!30,draw=black](p2) at ([xshift=5.0em]p1.east){\small{Encoder-Decoder Attention}}; \node[anchor=west,rounded corners=1pt,minimum width=14.0em,minimum height=2.0em,fill=pink!30,draw=black](p2) at ([xshift=5.0em]p1.east){\small{Encoder-Decoder Attention}};
\node[anchor=north](word1-2) at ([xshift=0.0em,yshift=-2.0em]p2.south) {\small \vectorn{\emph{K}}}; \node[anchor=north](word1-2) at ([xshift=0.0em,yshift=-2.0em]p2.south) {\small \vectorn{\emph{K}}};
\node[anchor=west](word2-2) at ([xshift=2.2em]word1-2.east) {\small \vectorn{\emph{Q}}}; \node[anchor=west](word2-2) at ([xshift=2.2em]word1-2.east) {\small \vectorn{\emph{V}}};
\node[anchor=east](word3-2) at ([xshift=-2.2em]word1-2.west) {\small \vectorn{\emph{Q}}}; \node[anchor=east](word3-2) at ([xshift=-2.2em]word1-2.west) {\small \vectorn{\emph{Q}}};
\draw[->,thick](word1-2.north)--(p2.south); \draw[->,thick](word1-2.north)--(p2.south);
......
...@@ -51,7 +51,7 @@ ...@@ -51,7 +51,7 @@
\parinterval {\small\bfnew{概率}}\index{概率}(Probability)\index{Probability}是度量随机事件呈现其每个可能状态的可能性的数值,本质上它是一个测度函数\upcite{mao-prob-book-2011,kolmogorov2018foundations}。概率的大小表征了随机事件在一次试验中发生的可能性大小。用$\funp{P}(\cdot )$表示一个随机事件的可能性,即事件发生的概率。比如$\funp{P}(\textrm{太阳从东方升起})$表示“太阳从东方升起”的可能性,同理,$\funp{P}(A=B)$ 表示的就是“$A=B$”这件事的可能性。 \parinterval {\small\bfnew{概率}}\index{概率}(Probability)\index{Probability}是度量随机事件呈现其每个可能状态的可能性的数值,本质上它是一个测度函数\upcite{mao-prob-book-2011,kolmogorov2018foundations}。概率的大小表征了随机事件在一次试验中发生的可能性大小。用$\funp{P}(\cdot )$表示一个随机事件的可能性,即事件发生的概率。比如$\funp{P}(\textrm{太阳从东方升起})$表示“太阳从东方升起”的可能性,同理,$\funp{P}(A=B)$ 表示的就是“$A=B$”这件事的可能性。
\parinterval 在实际问题中,往往需要得到随机变量的概率值。但是,真实的概率值可能是无法准确知道的,这时就需要对概率进行{\small\sffamily\bfseries{估计}}\index{估计},得到的结果是概率的{\small\sffamily\bfseries{估计值}}\index{估计值}(Estimate)\index{Estimate}。概率值的估计是概率论和统计学中的经典问题,有十分多样的方法可以选择。比如,一个很简单的方法是利用相对频次作为概率的估计值。如果$\{x_1,x_2,\dots,x_n \}$ 是一个试验的样本空间,在相同情况下重复试验$N$次,观察到样本$x_i (1\leq{i}\leq{n})$的次数为$n (x_i )$,那么$x_i$在这$N$次试验中的相对频率是$\frac{n(x_i )}{N}$。 当$N$越来越大时,相对概率也就越来越接近真实概率$\funp{P}(x_i)$,即$\lim_{N \to \infty}\frac{n(x_i )}{N}=\funp{P}(x_i)$。 实际上,很多概率模型都等同于相对频次估计。比如,对于一个服从多项式分布的变量,它的极大似然估计就可以用相对频次估计实现。 \parinterval 在实际问题中,往往需要得到随机变量的概率值。但是,真实的概率值可能是无法准确知道的,这时就需要对概率进行{\small\sffamily\bfseries{估计}}\index{估计}(Estimation\index{Estimation},得到的结果是概率的{\small\sffamily\bfseries{估计值}}\index{估计值}(Estimate)\index{Estimate}。概率值的估计是概率论和统计学中的经典问题,有十分多样的方法可以选择。比如,一个很简单的方法是利用相对频次作为概率的估计值。如果$\{x_1,x_2,\dots,x_n \}$ 是一个试验的样本空间,在相同情况下重复试验$N$次,观察到样本$x_i (1\leq{i}\leq{n})$的次数为$n (x_i )$,那么$x_i$在这$N$次试验中的相对频率是$\frac{n(x_i )}{N}$。 当$N$越来越大时,相对概率也就越来越接近真实概率$\funp{P}(x_i)$,即$\lim_{N \to \infty}\frac{n(x_i )}{N}=\funp{P}(x_i)$。 实际上,很多概率模型都等同于相对频次估计。比如,对于一个服从多项式分布的变量,它的极大似然估计就可以用相对频次估计实现。
\parinterval 概率函数是用函数形式给出离散变量每个取值发生的概率,其实就是将变量的概率分布转化为数学表达形式。如果把$A$看做一个离散变量,$a$看做变量$A$的一个取值,那么$\funp{P}(A)$被称作变量$A$的概率函数,$\funp{P}(A=a)$被称作$A = a$的概率值,简记为$\funp{P}(a)$。例如,在相同条件下掷一个骰子50次,用$A$表示投骰子出现的点数这个离散变量,$a_i$表示点数的取值,$\funp{P}_i$表示$A=a_i$的概率值。表\ref{tab:2-1}$A$的概率分布,给出了$A$的所有取值及其概率。 \parinterval 概率函数是用函数形式给出离散变量每个取值发生的概率,其实就是将变量的概率分布转化为数学表达形式。如果把$A$看做一个离散变量,$a$看做变量$A$的一个取值,那么$\funp{P}(A)$被称作变量$A$的概率函数,$\funp{P}(A=a)$被称作$A = a$的概率值,简记为$\funp{P}(a)$。例如,在相同条件下掷一个骰子50次,用$A$表示投骰子出现的点数这个离散变量,$a_i$表示点数的取值,$\funp{P}_i$表示$A=a_i$的概率值。表\ref{tab:2-1}$A$的概率分布,给出了$A$的所有取值及其概率。
...@@ -470,7 +470,7 @@ F(x)=\int_{-\infty}^x f(x)\textrm{d}x ...@@ -470,7 +470,7 @@ F(x)=\int_{-\infty}^x f(x)\textrm{d}x
\parinterval 这样,$w_1 w_2 \ldots w_m$的生成可以被看作是逐个生成每个单词的过程,即首先生成$w_1$,然后根据$w_1$再生成$w_2$,然后根据$w_1 w_2$再生成$w_3$,以此类推,直到根据所有前$m-1$个词生成序列的最后一个单词$w_m$。这个模型把联合概率$\funp{P}(w_1 w_2 \ldots w_m)$分解为多个条件概率的乘积,虽然对生成序列的过程进行了分解,但是模型的复杂度和以前是一样的,比如,$\funp{P}(w_m|w_1 w_2 \ldots w_{m-1})$ 仍然不好计算。 \parinterval 这样,$w_1 w_2 \ldots w_m$的生成可以被看作是逐个生成每个单词的过程,即首先生成$w_1$,然后根据$w_1$再生成$w_2$,然后根据$w_1 w_2$再生成$w_3$,以此类推,直到根据所有前$m-1$个词生成序列的最后一个单词$w_m$。这个模型把联合概率$\funp{P}(w_1 w_2 \ldots w_m)$分解为多个条件概率的乘积,虽然对生成序列的过程进行了分解,但是模型的复杂度和以前是一样的,比如,$\funp{P}(w_m|w_1 w_2 \ldots w_{m-1})$ 仍然不好计算。
\parinterval 换一个角度看,$\funp{P}(w_m|w_1 w_2 \ldots w_{m-1})$体现了一种基于“历史”的单词生成模型,也就是把前面生成的所有单词作为“历史”,并参考这个“历史”生成当前单词。但是这个“历史”的长度和整个序列长度是相关的,也是一种长度变化的历史序列。为了化简问题,一种简单的想法是使用定长历史,比如,每次只考虑前面$n-1$个历史单词来生成当前单词。这就是$n$-gram语言模型,其中$n$-gram 表示$n$个连续单词构成的单元,也被称作{\small\bfnew{n元语法单元}}\index{n元语法单元}。这个模型的数学描述如下: \parinterval 换一个角度看,$\funp{P}(w_m|w_1 w_2 \ldots w_{m-1})$体现了一种基于“历史”的单词生成模型,也就是把前面生成的所有单词作为“历史”,并参考这个“历史”生成当前单词。但是这个“历史”的长度和整个序列长度是相关的,也是一种长度变化的历史序列。为了化简问题,一种简单的想法是使用定长历史,比如,每次只考虑前面$n-1$个历史单词来生成当前单词。这就是$n$-gram语言模型,其中$n$-gram 表示$n$个连续单词构成的单元,也被称作{\small\bfnew{$n$元语法单元}}\index{$n$元语法单元}。这个模型的数学描述如下:
\begin{eqnarray} \begin{eqnarray}
\funp{P}(w_m|w_1 w_2 \ldots w_{m-1}) = \funp{P}(w_m|w_{m-n+1} \ldots w_{m-1}) \funp{P}(w_m|w_1 w_2 \ldots w_{m-1}) = \funp{P}(w_m|w_{m-n+1} \ldots w_{m-1})
\label{eq:2-23} \label{eq:2-23}
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
} }
{ {
\node [anchor=south west,inner sep=2pt] (label2) at (eq5.north west) {{\scriptsize{配对的总次数}}}; \node [anchor=south west,inner sep=2pt] (label2) at (eq5.north west) {{\scriptsize{配对的总次数}}};
\node [anchor=south west,inner sep=2pt] (label2part2) at ([yshift=-3pt]label2.north west) {{\scriptsize{$(s_u,t_v)$在句对$(\mathbf{s},\mathbf{t})$}}}; \node [anchor=south west,inner sep=2pt] (label2part2) at ([yshift=-3pt]label2.north west) {{\scriptsize{$(s_u,t_v)$在句对$(\seq{s},\seq{t})$}}};
} }
{ {
\node [anchor=south west,inner sep=2pt] (label3) at (eq6.north west) {{\scriptsize{有的$t_i$的相对值}}}; \node [anchor=south west,inner sep=2pt] (label3) at (eq6.north west) {{\scriptsize{有的$t_i$的相对值}}};
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
{ {
\node [anchor=east,rotate=90] (neweq1) at ([yshift=-0em]eq4.south) {=}; \node [anchor=east,rotate=90] (neweq1) at ([yshift=-0em]eq4.south) {=};
\node [anchor=north,inner sep=1pt] (neweq1full) at (neweq1.west) {\large{$\textrm{P}(\mathbf{s}|\mathbf{t})$}}; \node [anchor=north,inner sep=1pt] (neweq1full) at (neweq1.west) {\large{$\funp{P}(\seq{s}|\seq{t})$}};
} }
{ {
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
\draw [-] (s1.south) -- (t0.north); \draw [-] (s1.south) -- (t0.north);
\draw [-] (s2.south) -- (t0.north); \draw [-] (s2.south) -- (t0.north);
{ {
\node [anchor=south east,inner sep=0pt] (p) at (t0.north west) {\small{{\color{ugreen} P\;(}}}; \node [anchor=south east,inner sep=0pt] (p) at (t0.north west) {\small{{\color{ugreen} $\funp{P}$\;(}}};
\node [anchor=south west,inner sep=0pt] (p2) at ([yshift=0.2em]t2.north east) {\small{{\color{ugreen} )}}}; \node [anchor=south west,inner sep=0pt] (p2) at ([yshift=0.2em]t2.north east) {\small{{\color{ugreen} )}}};
\node [anchor=west] (eq) at ([xshift=0.7em]p2.east) {\small{+}}; \node [anchor=west] (eq) at ([xshift=0.7em]p2.east) {\small{+}};
} }
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
\draw [-] (s1.south) -- (t0.north); \draw [-] (s1.south) -- (t0.north);
\draw [-] (s2.south) -- (t1.north); \draw [-] (s2.south) -- (t1.north);
{ {
\node [anchor=south east,inner sep=0pt] (p) at (t0.north west) {\small{{\color{ugreen} P\;(}}}; \node [anchor=south east,inner sep=0pt] (p) at (t0.north west) {\small{{\color{ugreen} $\funp{P}$\;(}}};
\node [anchor=south west,inner sep=0pt] (p2) at ([yshift=0.2em]t2.north east) {\small{{\color{ugreen} )}}}; \node [anchor=south west,inner sep=0pt] (p2) at ([yshift=0.2em]t2.north east) {\small{{\color{ugreen} )}}};
\node [anchor=west] (eq) at ([xshift=0.7em]p2.east) {\small{+}}; \node [anchor=west] (eq) at ([xshift=0.7em]p2.east) {\small{+}};
} }
...@@ -45,7 +45,7 @@ ...@@ -45,7 +45,7 @@
\draw [-] (s1.south) -- (t0.north); \draw [-] (s1.south) -- (t0.north);
\draw [-] (s2.south) -- (t2.north); \draw [-] (s2.south) -- (t2.north);
{ {
\node [anchor=south east,inner sep=0pt] (p) at (t0.north west) {\small{{\color{ugreen} P\;(}}}; \node [anchor=south east,inner sep=0pt] (p) at (t0.north west) {\small{{\color{ugreen} $\funp{P}$\;(}}};
\node [anchor=south west,inner sep=0pt] (p2) at ([yshift=0.2em]t2.north east) {\small{{\color{ugreen} )}}}; \node [anchor=south west,inner sep=0pt] (p2) at ([yshift=0.2em]t2.north east) {\small{{\color{ugreen} )}}};
\node [anchor=west] (eq) at ([xshift=0.7em]p2.east) {\small{+}}; \node [anchor=west] (eq) at ([xshift=0.7em]p2.east) {\small{+}};
} }
...@@ -61,7 +61,7 @@ ...@@ -61,7 +61,7 @@
\draw [-] (s1.south) -- ([yshift=-0.2em]t1.north); \draw [-] (s1.south) -- ([yshift=-0.2em]t1.north);
\draw [-] (s2.south) -- (t0.north); \draw [-] (s2.south) -- (t0.north);
{ {
\node [anchor=south east,inner sep=0pt] (p) at (t0.north west) {\small{{\color{ugreen} P\;(}}}; \node [anchor=south east,inner sep=0pt] (p) at (t0.north west) {\small{{\color{ugreen} $\funp{P}$\;(}}};
\node [anchor=south west,inner sep=0pt] (p2) at ([yshift=0.2em]t2.north east) {\small{{\color{ugreen} )}}}; \node [anchor=south west,inner sep=0pt] (p2) at ([yshift=0.2em]t2.north east) {\small{{\color{ugreen} )}}};
\node [anchor=west] (eq) at ([xshift=0.7em]p2.east) {\small{+}}; \node [anchor=west] (eq) at ([xshift=0.7em]p2.east) {\small{+}};
} }
...@@ -77,7 +77,7 @@ ...@@ -77,7 +77,7 @@
\draw [-] (s1.south) -- ([yshift=-0.2em]t1.north); \draw [-] (s1.south) -- ([yshift=-0.2em]t1.north);
\draw [-] (s2.south) -- ([yshift=-0.2em]t1.north); \draw [-] (s2.south) -- ([yshift=-0.2em]t1.north);
{ {
\node [anchor=south east,inner sep=0pt] (p) at (t0.north west) {\small{{\color{ugreen} P\;(}}}; \node [anchor=south east,inner sep=0pt] (p) at (t0.north west) {\small{{\color{ugreen} $\funp{P}$\;(}}};
\node [anchor=south west,inner sep=0pt] (p2) at ([yshift=0.2em]t2.north east) {\small{{\color{ugreen} )}}}; \node [anchor=south west,inner sep=0pt] (p2) at ([yshift=0.2em]t2.north east) {\small{{\color{ugreen} )}}};
\node [anchor=west] (eq) at ([xshift=0.7em]p2.east) {\small{+}}; \node [anchor=west] (eq) at ([xshift=0.7em]p2.east) {\small{+}};
} }
...@@ -93,7 +93,7 @@ ...@@ -93,7 +93,7 @@
\draw [-] (s1.south) -- ([yshift=-0.2em]t1.north); \draw [-] (s1.south) -- ([yshift=-0.2em]t1.north);
\draw [-] (s2.south) -- (t2.north); \draw [-] (s2.south) -- (t2.north);
{ {
\node [anchor=south east,inner sep=0pt] (p) at (t0.north west) {\small{{\color{ugreen} P\;(}}}; \node [anchor=south east,inner sep=0pt] (p) at (t0.north west) {\small{{\color{ugreen} $\funp{P}$\;(}}};
\node [anchor=south west,inner sep=0pt] (p2) at ([yshift=0.2em]t2.north east) {\small{{\color{ugreen} )}}}; \node [anchor=south west,inner sep=0pt] (p2) at ([yshift=0.2em]t2.north east) {\small{{\color{ugreen} )}}};
\node [anchor=west] (eq) at ([xshift=0.7em]p2.east) {\small{+}}; \node [anchor=west] (eq) at ([xshift=0.7em]p2.east) {\small{+}};
} }
...@@ -109,7 +109,7 @@ ...@@ -109,7 +109,7 @@
\draw [-] (s1.south) -- (t2.north); \draw [-] (s1.south) -- (t2.north);
\draw [-] (s2.south) -- (t0.north); \draw [-] (s2.south) -- (t0.north);
{ {
\node [anchor=south east,inner sep=0pt] (p) at (t0.north west) {\small{{\color{ugreen} P\;(}}}; \node [anchor=south east,inner sep=0pt] (p) at (t0.north west) {\small{{\color{ugreen} $\funp{P}$\;(}}};
\node [anchor=south west,inner sep=0pt] (p2) at ([yshift=0.2em]t2.north east) {\small{{\color{ugreen} )}}}; \node [anchor=south west,inner sep=0pt] (p2) at ([yshift=0.2em]t2.north east) {\small{{\color{ugreen} )}}};
\node [anchor=west] (eq) at ([xshift=0.7em]p2.east) {\small{+}}; \node [anchor=west] (eq) at ([xshift=0.7em]p2.east) {\small{+}};
} }
...@@ -125,7 +125,7 @@ ...@@ -125,7 +125,7 @@
\draw [-] (s1.south) -- (t2.north); \draw [-] (s1.south) -- (t2.north);
\draw [-] (s2.south) -- (t1.north); \draw [-] (s2.south) -- (t1.north);
{ {
\node [anchor=south east,inner sep=0pt] (p) at (t0.north west) {\small{{\color{ugreen} P\;(}}}; \node [anchor=south east,inner sep=0pt] (p) at (t0.north west) {\small{{\color{ugreen} $\funp{P}$\;(}}};
\node [anchor=south west,inner sep=0pt] (p2) at ([yshift=0.2em]t2.north east) {\small{{\color{ugreen} )}}}; \node [anchor=south west,inner sep=0pt] (p2) at ([yshift=0.2em]t2.north east) {\small{{\color{ugreen} )}}};
\node [anchor=west] (eq) at ([xshift=0.7em]p2.east) {\small{+}}; \node [anchor=west] (eq) at ([xshift=0.7em]p2.east) {\small{+}};
} }
...@@ -141,9 +141,9 @@ ...@@ -141,9 +141,9 @@
\draw [-] (s1.south) -- (t2.north); \draw [-] (s1.south) -- (t2.north);
\draw [-] (s2.south) -- (t2.north); \draw [-] (s2.south) -- (t2.north);
{ {
\node [anchor=south east,inner sep=0pt] (p) at (t0.north west) {\small{{\color{ugreen} P\;(}}}; \node [anchor=south east,inner sep=0pt] (p) at (t0.north west) {\small{{\color{ugreen} $\funp{P}$\;(}}};
\node [anchor=south west,inner sep=0pt] (p2) at ([yshift=0.2em]t2.north east) {\small{{\color{ugreen} )}}}; \node [anchor=south west,inner sep=0pt] (p2) at ([yshift=0.2em]t2.north east) {\small{{\color{ugreen} )}}};
\node [anchor=west] (eq) at ([xshift=0.7em]p2.east) {\normalsize{= \ P\,($\mathbf{s}|\mathbf{t}$)}}; \node [anchor=west] (eq) at ([xshift=0.7em]p2.east) {\normalsize{= \ $\funp{P}$\,($\seq{s}|\seq{t}$)}};
} }
} }
\end{scope} \end{scope}
......
...@@ -3,14 +3,14 @@ ...@@ -3,14 +3,14 @@
%------------------------------------------------------------------------- %-------------------------------------------------------------------------
\begin{tikzpicture} \begin{tikzpicture}
\node [anchor=west] (e1) at (0,0) {$g(\mathbf{s},\mathbf{t})$}; \node [anchor=west] (e1) at (0,0) {$g(\seq{s},\seq{t})$};
\node [anchor=west] (e2) at (e1.east) {$=$}; \node [anchor=west] (e2) at (e1.east) {$=$};
\node [anchor=west,inner sep=2pt,fill=red!20] (e3) at (e2.east) {$\prod\nolimits_{(j,i) \in \hat{A}} \textrm{P}(s_j,t_i)$}; \node [anchor=west,inner sep=2pt,fill=red!20] (e3) at (e2.east) {$\prod\nolimits_{(j,i) \in \hat{A}} \funp{P}(s_j,t_i)$};
\node [anchor=west,inner sep=1pt] (e4) at (e3.east) {$\times$}; \node [anchor=west,inner sep=1pt] (e4) at (e3.east) {$\times$};
\node [anchor=west,inner sep=3pt,fill=blue!20] (e5) at (e4.east) {$\textrm{P}_{\textrm{lm}}(\mathbf{t})$}; \node [anchor=west,inner sep=3pt,fill=blue!20] (e5) at (e4.east) {$\funp{P}_{\textrm{lm}}(\seq{t})$};
\node [anchor=north west,inner sep=1pt] (n1) at ([xshift=7.0em,yshift=-0.5em]e1.south west) {$\textrm{P}(\mathbf{s}|\mathbf{t})$}; \node [anchor=north west,inner sep=1pt] (n1) at ([xshift=7.0em,yshift=-0.5em]e1.south west) {$\funp{P}(\seq{s}|\seq{t})$};
\node [anchor=north] (n1part2) at ([yshift=0.3em]n1.south) {\scriptsize{{翻译模型}}}; \node [anchor=north] (n1part2) at ([yshift=0.3em]n1.south) {\scriptsize{{翻译模型}}};
\node [anchor=west,inner sep=1pt] (n2) at ([xshift=4.0em]n1.east) {$\textrm{P}(\mathbf{t})$}; \node [anchor=west,inner sep=1pt] (n2) at ([xshift=4.0em]n1.east) {$\funp{P}(\seq{t})$};
\node [anchor=north] (n2part2) at ([yshift=0.3em]n2.south) {\scriptsize{{语言模型}}}; \node [anchor=north] (n2part2) at ([yshift=0.3em]n2.south) {\scriptsize{{语言模型}}};
\end{tikzpicture} \end{tikzpicture}
......
...@@ -7,26 +7,26 @@ ...@@ -7,26 +7,26 @@
\begin{tikzpicture} \begin{tikzpicture}
\node [draw,red,fill=red!10,thick,anchor=center,circle,inner sep=3.5pt] (s1) at (0,0) {\black{$\mathbf{s}$}}; \node [draw,red,fill=red!10,thick,anchor=center,circle,inner sep=3.5pt] (s1) at (0,0) {\black{$\seq{s}$}};
\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t) at ([xshift=1in]s1.east) {\black{$\widehat{\mathbf{t}}$}}; \node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t) at ([xshift=1in]s1.east) {\black{$\widehat{\seq{t}}$}};
\draw [->,thick,] (s1.north east) .. controls +(north east:1em) and +(north west:1em).. (t.north west) node[pos=0.5,below] {\tiny{正确翻译}}; \draw [->,thick,] (s1.north east) .. controls +(north east:1em) and +(north west:1em).. (t.north west) node[pos=0.5,below] {\tiny{正确翻译}};
\node [draw,red,fill=red!10,thick,anchor=center,circle,inner sep=3.5pt] (s) at ([xshift=13em,yshift=0em]s1.east) {\black{$\mathbf{s}$}}; \node [draw,red,fill=red!10,thick,anchor=center,circle,inner sep=3.5pt] (s) at ([xshift=13em,yshift=0em]s1.east) {\black{$\seq{s}$}};
\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t1) at ([xshift=1in]s.east) {\black{$\mathbf{t}_1$}}; \node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t1) at ([xshift=1in]s.east) {\black{$\seq{t}_1$}};
\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t2) at ([xshift=3em,yshift=2em]t1.north east) {\black{$\mathbf{t}_2$}}; \node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t2) at ([xshift=3em,yshift=2em]t1.north east) {\black{$\seq{t}_2$}};
\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t3) at ([xshift=1em,yshift=4em]t1.north east) {\black{$\mathbf{t}_3$}}; \node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t3) at ([xshift=1em,yshift=4em]t1.north east) {\black{$\seq{t}_3$}};
\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t4) at ([xshift=3em,yshift=-1.5em]t1.north east) {\black{$\mathbf{t}_4$}}; \node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=2pt] (t4) at ([xshift=3em,yshift=-1.5em]t1.north east) {\black{$\seq{t}_4$}};
\node [draw,dashed,ublue,fill=blue!10,thick,anchor=center,circle,minimum size=18pt] (t5) at ([xshift=3em]t3.east) {}; \node [draw,dashed,ublue,fill=blue!10,thick,anchor=center,circle,minimum size=18pt] (t5) at ([xshift=3em]t3.east) {};
\node [draw,dashed,ublue,fill=blue!10,thick,anchor=center,circle,minimum size=18pt] (t6) at ([xshift=3em]t2.east) {}; \node [draw,dashed,ublue,fill=blue!10,thick,anchor=center,circle,minimum size=18pt] (t6) at ([xshift=3em]t2.east) {};
\node [draw,dashed,ublue,fill=blue!10,thick,anchor=center,circle,minimum size=18pt] (t7) at ([xshift=3em]t4.east) {}; \node [draw,dashed,ublue,fill=blue!10,thick,anchor=center,circle,minimum size=18pt] (t7) at ([xshift=3em]t4.east) {};
\draw [->,thick,] (s.north east) .. controls +(north east:1em) and +(north west:1em).. (t1.north west) node[pos=0.5,below] {\tiny{P ($\mathbf{t}_1|\mathbf{s}$)=0.1}}; \draw [->,thick,] (s.north east) .. controls +(north east:1em) and +(north west:1em).. (t1.north west) node[pos=0.5,below] {\tiny{P ($\seq{t}_1|\seq{s}$)=0.1}};
\draw [->,thick,] (s.60) .. controls +(50:4em) and +(west:1em).. (t2.west) node[pos=0.5,below] {\tiny{P($\mathbf{t}_2|\mathbf{s}$)=0.2}}; \draw [->,thick,] (s.60) .. controls +(50:4em) and +(west:1em).. (t2.west) node[pos=0.5,below] {\tiny{P($\seq{t}_2|\seq{s}$)=0.2}};
\draw [->,thick,] (s.north) .. controls +(70:4em) and +(west:1em).. (t3.west) node[pos=0.5,above,xshift=-1em] {\tiny{P($\mathbf{t}_3|\mathbf{s}$)=0.3}}; \draw [->,thick,] (s.north) .. controls +(70:4em) and +(west:1em).. (t3.west) node[pos=0.5,above,xshift=-1em] {\tiny{P($\seq{t}_3|\seq{s}$)=0.3}};
\draw [->,thick,] (s.south east) .. controls +(300:3em) and +(south west:1em).. (t4.south west) node[pos=0.5,below] {\tiny{P($\mathbf{t}_4|\mathbf{s}$)=0.1}}; \draw [->,thick,] (s.south east) .. controls +(300:3em) and +(south west:1em).. (t4.south west) node[pos=0.5,below] {\tiny{P($\seq{t}_4|\seq{s}$)=0.1}};
\node [anchor=center] (foot1) at ([xshift=3.8em,yshift=-3em]s1.south) {\footnotesize{人的翻译候选空间}}; \node [anchor=center] (foot1) at ([xshift=3.8em,yshift=-3em]s1.south) {\footnotesize{人的翻译候选空间}};
\node [anchor=center] (foot2) at ([xshift=7em,yshift=-3em]s.south) {\footnotesize{机器的翻译候选空间}}; \node [anchor=center] (foot2) at ([xshift=7em,yshift=-3em]s.south) {\footnotesize{机器的翻译候选空间}};
......
...@@ -6,17 +6,17 @@ ...@@ -6,17 +6,17 @@
%------------------------------------------------------------------------- %-------------------------------------------------------------------------
\begin{tikzpicture} \begin{tikzpicture}
\node [anchor=north west] (line1) at (0,0) {\small\sffamily\bfseries{IBM模型1的训练(EM算法)}}; \node [anchor=north west] (line1) at (0,0) {\small\sffamily\bfseries{IBM模型1的训练(EM算法)}};
\node [anchor=north west] (line2) at ([yshift=-0.3em]line1.south west) {输入: 平行语料${(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),...,(\mathbf{s}^{[N]},\mathbf{t}^{[N]})}$}; \node [anchor=north west] (line2) at ([yshift=-0.3em]line1.south west) {输入: 平行语料${(\seq{s}^{[1]},\seq{t}^{[1]}),...,(\seq{s}^{[N]},\seq{t}^{[N]})}$};
\node [anchor=north west] (line3) at ([yshift=-0.1em]line2.south west) {输出: 参数$f(\cdot|\cdot)$的最优值}; \node [anchor=north west] (line3) at ([yshift=-0.1em]line2.south west) {输出: 参数$f(\cdot|\cdot)$的最优值};
\node [anchor=north west] (line4) at ([yshift=-0.1em]line3.south west) {1: \textbf{Function} \textsc{EM}($\{(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),...,(\mathbf{s}^{[N]},\mathbf{t}^{[N]})\}$) }; \node [anchor=north west] (line4) at ([yshift=-0.1em]line3.south west) {1: \textbf{Function} \textsc{EM}($\{(\seq{s}^{[1]},\seq{t}^{[1]}),...,(\seq{s}^{[N]},\seq{t}^{[N]})\}$) };
\node [anchor=north west] (line5) at ([yshift=-0.1em]line4.south west) {2: \ \ Initialize $f(\cdot|\cdot)$ \hspace{5em} $\rhd$ 比如给$f(\cdot|\cdot)$一个均匀分布}; \node [anchor=north west] (line5) at ([yshift=-0.1em]line4.south west) {2: \ \ Initialize $f(\cdot|\cdot)$ \hspace{5em} $\rhd$ 比如给$f(\cdot|\cdot)$一个均匀分布};
\node [anchor=north west] (line6) at ([yshift=-0.1em]line5.south west) {3: \ \ Loop until $f(\cdot|\cdot)$ converges}; \node [anchor=north west] (line6) at ([yshift=-0.1em]line5.south west) {3: \ \ Loop until $f(\cdot|\cdot)$ converges};
\node [anchor=north west] (line7) at ([yshift=-0.1em]line6.south west) {4: \ \ \ \ \textbf{foreach} $k = 1$ to $N$ \textbf{do}}; \node [anchor=north west] (line7) at ([yshift=-0.1em]line6.south west) {4: \ \ \ \ \textbf{foreach} $k = 1$ to $N$ \textbf{do}};
\node [anchor=north west] (line8) at ([yshift=-0.1em]line7.south west) {5: \ \ \ \ \ \ \ \footnotesize{$c_{\mathbb{E}}(\mathbf{s}_u|\mathbf{t}_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]}) = \sum\limits_{j=1}^{|\mathbf{s}^{[k]}|} \delta(s_j,s_u) \sum\limits_{i=0}^{|\mathbf{t}^{[k]}|} \delta(t_i,t_v) \cdot \frac{f(s_u|t_v)}{\sum_{i=0}^{l}f(s_u|t_i)}$}\normalsize{}}; \node [anchor=north west] (line8) at ([yshift=-0.1em]line7.south west) {5: \ \ \ \ \ \ \ \footnotesize{$c_{\mathbb{E}}(\seq{s}_u|\seq{t}_v;\seq{s}^{[k]},\seq{t}^{[k]}) = \sum\limits_{j=1}^{|\seq{s}^{[k]}|} \delta(s_j,s_u) \sum\limits_{i=0}^{|\seq{t}^{[k]}|} \delta(t_i,t_v) \cdot \frac{f(s_u|t_v)}{\sum_{i=0}^{l}f(s_u|t_i)}$}\normalsize{}};
\node [anchor=north west] (line9) at ([yshift=-0.1em]line8.south west) {6: \ \ \ \ \textbf{foreach} $t_v$ appears at least one of $\{\mathbf{t}^{[1]},...,\mathbf{t}^{[N]}\}$ \textbf{do}}; \node [anchor=north west] (line9) at ([yshift=-0.1em]line8.south west) {6: \ \ \ \ \textbf{foreach} $t_v$ appears at least one of $\{\seq{t}^{[1]},...,\seq{t}^{[N]}\}$ \textbf{do}};
\node [anchor=north west] (line10) at ([yshift=-0.1em]line9.south west) {7: \ \ \ \ \ \ \ $\lambda_{t_v}^{'} = \sum_{s_u} \sum_{k=1}^{N} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})$}; \node [anchor=north west] (line10) at ([yshift=-0.1em]line9.south west) {7: \ \ \ \ \ \ \ $\lambda_{t_v}^{'} = \sum_{s_u} \sum_{k=1}^{N} c_{\mathbb{E}}(s_u|t_v;\seq{s}^{[k]},\seq{t}^{[k]})$};
\node [anchor=north west] (line11) at ([yshift=-0.1em]line10.south west) {8: \ \ \ \ \ \ \ \textbf{foreach} $s_u$ appears at least one of $\{\mathbf{s}^{[1]},...,\mathbf{s}^{[N]}\}$ \textbf{do}}; \node [anchor=north west] (line11) at ([yshift=-0.1em]line10.south west) {8: \ \ \ \ \ \ \ \textbf{foreach} $s_u$ appears at least one of $\{\seq{s}^{[1]},...,\seq{s}^{[N]}\}$ \textbf{do}};
\node [anchor=north west] (line12) at ([yshift=-0.1em]line11.south west) {9: \ \ \ \ \ \ \ \ \ $f(s_u|t_v) = \sum_{k=1}^{N} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]}) \cdot (\lambda_{t_v}^{'})^{-1}$}; \node [anchor=north west] (line12) at ([yshift=-0.1em]line11.south west) {9: \ \ \ \ \ \ \ \ \ $f(s_u|t_v) = \sum_{k=1}^{N} c_{\mathbb{E}}(s_u|t_v;\seq{s}^{[k]},\seq{t}^{[k]}) \cdot (\lambda_{t_v}^{'})^{-1}$};
\node [anchor=north west] (line13) at ([yshift=-0.1em]line12.south west) {10: \ \textbf{return} $f(\cdot|\cdot)$}; \node [anchor=north west] (line13) at ([yshift=-0.1em]line12.south west) {10: \ \textbf{return} $f(\cdot|\cdot)$};
\begin{pgfonlayer}{background} \begin{pgfonlayer}{background}
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
\begin{tabular}{| l | l |} \begin{tabular}{| l | l |}
\hline \hline
& {\footnotesize{$\prod\limits_{(j,i) \in \hat{A}} \textrm{P}(s_j,t_i)$} } \\ \hline & {\footnotesize{$\prod\limits_{(j,i) \in \hat{A}} \funp{P}(s_j,t_i)$} } \\ \hline
\begin{tikzpicture} \begin{tikzpicture}
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
\node [anchor=west] (s3) at ([xshift=3.2em]s2.east) {$_3$}; \node [anchor=west] (s3) at ([xshift=3.2em]s2.east) {$_3$};
\node [anchor=west] (s4) at ([xshift=3.6em]s3.east) {感到$_4$}; \node [anchor=west] (s4) at ([xshift=3.6em]s3.east) {感到$_4$};
\node [anchor=west] (s5) at ([xshift=1.9em]s4.east) {满意$_5$}; \node [anchor=west] (s5) at ([xshift=1.9em]s4.east) {满意$_5$};
\node [anchor=east] (s) at (s1.west) {$\mathbf{s}=$}; \node [anchor=east] (s) at (s1.west) {$\seq{s}=$};
\end{scope} \end{scope}
\begin{scope}[yshift=-3.6em] \begin{scope}[yshift=-3.6em]
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
\node [anchor=west] (t3) at ([xshift=2.3em,yshift=0.1em]t2.east) {satisfied$_3$}; \node [anchor=west] (t3) at ([xshift=2.3em,yshift=0.1em]t2.east) {satisfied$_3$};
\node [anchor=west] (t4) at ([xshift=2.3em]t3.east) {with$_4$}; \node [anchor=west] (t4) at ([xshift=2.3em]t3.east) {with$_4$};
\node [anchor=west] (t5) at ([xshift=2.3em,yshift=-0.2em]t4.east) {you$_5$}; \node [anchor=west] (t5) at ([xshift=2.3em,yshift=-0.2em]t4.east) {you$_5$};
\node [anchor=east] (t) at ([xshift=-0.35em]t1.west) {$\mathbf{t}'=$}; \node [anchor=east] (t) at ([xshift=-0.35em]t1.west) {$\seq{t}'=$};
\end{scope} \end{scope}
...@@ -56,7 +56,7 @@ ...@@ -56,7 +56,7 @@
\node [anchor=west] (s3) at ([xshift=2.5em]s2.east) {$_3$}; \node [anchor=west] (s3) at ([xshift=2.5em]s2.east) {$_3$};
\node [anchor=west] (s4) at ([xshift=2.5em]s3.east) {感到$_4$}; \node [anchor=west] (s4) at ([xshift=2.5em]s3.east) {感到$_4$};
\node [anchor=west] (s5) at ([xshift=2.5em]s4.east) {满意$_5$}; \node [anchor=west] (s5) at ([xshift=2.5em]s4.east) {满意$_5$};
\node [anchor=east] (s) at (s1.west) {$\mathbf{s}=$}; \node [anchor=east] (s) at (s1.west) {$\seq{s}=$};
\end{scope} \end{scope}
\begin{scope}[yshift=-3.6em] \begin{scope}[yshift=-3.6em]
...@@ -65,7 +65,7 @@ ...@@ -65,7 +65,7 @@
\node [anchor=center] (t3) at ([yshift=-1.7em]s3.south) {you$_3$}; \node [anchor=center] (t3) at ([yshift=-1.7em]s3.south) {you$_3$};
\node [anchor=center] (t4) at ([yshift=-1.7em]s4.south) {am$_4$}; \node [anchor=center] (t4) at ([yshift=-1.7em]s4.south) {am$_4$};
\node [anchor=center] (t5) at ([yshift=-1.6em]s5.south) {satisfied$_5$}; \node [anchor=center] (t5) at ([yshift=-1.6em]s5.south) {satisfied$_5$};
\node [anchor=center] (t) at ([xshift=-1.3em]t1.west) {$\mathbf{t}''=$}; \node [anchor=center] (t) at ([xshift=-1.3em]t1.west) {$\seq{t}''=$};
\end{scope} \end{scope}
......
...@@ -233,7 +233,7 @@ ...@@ -233,7 +233,7 @@
\draw [-] (glabel.south west) -- ([xshift=3.5in]glabel.south west); \draw [-] (glabel.south west) -- ([xshift=3.5in]glabel.south west);
\node [anchor=center,rotate=90] (hlabel2) at ([xshift=-1.3em,yshift=-8.5em]glabel.west) {\tiny{$h$存放临时翻译结果}}; \node [anchor=center,rotate=90] (hlabel2) at ([xshift=-1.3em,yshift=-8.5em]glabel.west) {\tiny{$h$存放临时翻译结果}};
\node [anchor=north west] (foot2) at ([xshift=0.0em,yshift=-23.0em]translabel.south west) {\scriptsize{(b)\; 6: \textbf{if} $used[j]=$ \textbf{true} \textbf{then}}}; \node [anchor=north west] (foot2) at ([xshift=0.0em,yshift=-23.0em]translabel.south west) {\scriptsize{(b)\; 6: \textbf{if} $used[j]=$ \textbf{false} \textbf{then}}};
} }
{%大大的join {%大大的join
\node [anchor=center,draw=ublue,circle,thick,fill=white,inner sep=2.5pt,circular drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] (join) at ([xshift=4em,yshift=-1em]hlabel.north east) {\tiny{\textsc{Join}}}; \node [anchor=center,draw=ublue,circle,thick,fill=white,inner sep=2.5pt,circular drop shadow={shadow xshift=0.1em,shadow yshift=-0.1em}] (join) at ([xshift=4em,yshift=-1em]hlabel.north east) {\tiny{\textsc{Join}}};
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
\node [anchor=north west,inner sep=2pt,align=left] (line11) at ([yshift=-1pt]line10.south west) {\textrm{10: \textbf{return} $best.translatoin$}}; \node [anchor=north west,inner sep=2pt,align=left] (line11) at ([yshift=-1pt]line10.south west) {\textrm{10: \textbf{return} $best.translatoin$}};
\node [anchor=south west,inner sep=2pt,align=left] (head1) at ([yshift=1pt]line1.north west) {输出: 找的最佳译文}; \node [anchor=south west,inner sep=2pt,align=left] (head1) at ([yshift=1pt]line1.north west) {输出: 找的最佳译文};
\node [anchor=south west,inner sep=2pt,align=left] (head2) at ([yshift=1pt]head1.north west) {输入: 源语句子$\mathbf{s}=s_1...s_m$}; \node [anchor=south west,inner sep=2pt,align=left] (head2) at ([yshift=1pt]head1.north west) {输入: 源语句子$\seq{s}=s_1...s_m$};
} }
......
...@@ -6,8 +6,8 @@ ...@@ -6,8 +6,8 @@
\begin{tikzpicture} \begin{tikzpicture}
\node [draw,red,fill=red!10,thick,anchor=center,circle,inner sep=3.5pt] (s) at (0,0) {\black{$\mathbf{s}$}}; \node [draw,red,fill=red!10,thick,anchor=center,circle,inner sep=3.5pt] (s) at (0,0) {\black{$\seq{s}$}};
\node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=3.3pt] (t) at ([xshift=1.5in]s.east) {\black{$\mathbf{t}$}}; \node [draw,ublue,fill=blue!10,thick,anchor=center,circle,inner sep=3.3pt] (t) at ([xshift=1.5in]s.east) {\black{$\seq{t}$}};
\draw [->,thick,] (s.east) -- (t.west) node [pos=0.5,draw,fill=white] {噪声信道}; \draw [->,thick,] (s.east) -- (t.west) node [pos=0.5,draw,fill=white] {噪声信道};
\draw[->,thick](s.east) -- ([xshift=2.2em]s.east); \draw[->,thick](s.east) -- ([xshift=2.2em]s.east);
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
%------------------------------------------------------------------------- %-------------------------------------------------------------------------
\begin{tabular}{| l | l |} \begin{tabular}{| l | l |}
\hline \hline
& {\footnotesize{$\prod\limits_{(j,i) \in \hat{A}} \textrm{P}(s_j,t_i)$} \color{red}{{\footnotesize{$\times\textrm{P}_{\textrm{lm}}(\mathbf{t})$}}}} \\ \hline & {\footnotesize{$\prod\limits_{(j,i) \in \hat{A}} \funp{P}(s_j,t_i)$} \color{red}{{\footnotesize{$\times\funp{P}_{\textrm{lm}}(\mathbf{t})$}}}} \\ \hline
\begin{tikzpicture} \begin{tikzpicture}
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
\node [anchor=west] (s3) at ([xshift=3.2em]s2.east) {$_3$}; \node [anchor=west] (s3) at ([xshift=3.2em]s2.east) {$_3$};
\node [anchor=west] (s4) at ([xshift=3.6em]s3.east) {感到$_4$}; \node [anchor=west] (s4) at ([xshift=3.6em]s3.east) {感到$_4$};
\node [anchor=west] (s5) at ([xshift=1.9em]s4.east) {满意$_5$}; \node [anchor=west] (s5) at ([xshift=1.9em]s4.east) {满意$_5$};
\node [anchor=east] (s) at (s1.west) {$\mathbf{s}=$}; \node [anchor=east] (s) at (s1.west) {$\seq{s}=$};
\end{scope} \end{scope}
\begin{scope}[yshift=-3.6em] \begin{scope}[yshift=-3.6em]
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
\node [anchor=west] (t3) at ([xshift=2.3em,yshift=0.1em]t2.east) {satisfied$_3$}; \node [anchor=west] (t3) at ([xshift=2.3em,yshift=0.1em]t2.east) {satisfied$_3$};
\node [anchor=west] (t4) at ([xshift=2.3em]t3.east) {with$_4$}; \node [anchor=west] (t4) at ([xshift=2.3em]t3.east) {with$_4$};
\node [anchor=west] (t5) at ([xshift=2.3em,yshift=-0.2em]t4.east) {you$_5$}; \node [anchor=west] (t5) at ([xshift=2.3em,yshift=-0.2em]t4.east) {you$_5$};
\node [anchor=east] (t) at (t1.west) {$\mathbf{t}'=$}; \node [anchor=east] (t) at (t1.west) {$\seq{t}'=$};
\end{scope} \end{scope}
...@@ -52,7 +52,7 @@ ...@@ -52,7 +52,7 @@
\node [anchor=west] (s3) at ([xshift=2.5em]s2.east) {$_3$}; \node [anchor=west] (s3) at ([xshift=2.5em]s2.east) {$_3$};
\node [anchor=west] (s4) at ([xshift=2.5em]s3.east) {感到$_4$}; \node [anchor=west] (s4) at ([xshift=2.5em]s3.east) {感到$_4$};
\node [anchor=west] (s5) at ([xshift=2.5em]s4.east) {满意$_5$}; \node [anchor=west] (s5) at ([xshift=2.5em]s4.east) {满意$_5$};
\node [anchor=east] (s) at (s1.west) {$\mathbf{s}=$}; \node [anchor=east] (s) at (s1.west) {$\seq{s}=$};
\end{scope} \end{scope}
\begin{scope}[yshift=-3.6em] \begin{scope}[yshift=-3.6em]
...@@ -61,7 +61,7 @@ ...@@ -61,7 +61,7 @@
\node [anchor=center] (t3) at ([yshift=-1.7em]s3.south) {you$_3$}; \node [anchor=center] (t3) at ([yshift=-1.7em]s3.south) {you$_3$};
\node [anchor=center] (t4) at ([yshift=-1.7em]s4.south) {am$_4$}; \node [anchor=center] (t4) at ([yshift=-1.7em]s4.south) {am$_4$};
\node [anchor=center] (t5) at ([yshift=-1.6em]s5.south) {satisfied$_5$}; \node [anchor=center] (t5) at ([yshift=-1.6em]s5.south) {satisfied$_5$};
\node [anchor=center] (t) at ([xshift=-1.3em]t1.west) {$\mathbf{t}''=$}; \node [anchor=center] (t) at ([xshift=-1.3em]t1.west) {$\seq{t}''=$};
\end{scope} \end{scope}
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
\node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la5) at (a30) {}; \node[align=center,elementnode,minimum size=0.3cm,inner sep=0.1pt,fill=blue!50] (la5) at (a30) {};
\node[anchor=west] (f1) at ([xshift=3em,yshift=0.8em]a43.east) {\small{$\funp{P}_{\textrm{lex}}(\bar{t}|\bar{s})=\sigma (t_1|s_1)\times$}}; \node[anchor=west] (f1) at ([xshift=3em,yshift=0.8em]a43.east) {\small{$\funp{P}_{\textrm{lex}}(\bar{t}|\bar{s})=\sigma (t_1|s_1)\times$}};
\node[anchor=north] (f2) at ([xshift=5.2em]f1.south) {\small{$\frac{1}{2}(\sigma (t_2|s_2)+\sigma (t_4|s_2))\times$}}; \node[anchor=north] (f2) at ([xshift=5.2em]f1.south) {\small{$\frac{1}{2}(\sigma (t_2|s_2)+\sigma (t_3|s_2))\times$}};
\node[anchor=north west] (f3) at (f2.south west) {\small{$\sigma (N|s_3)\times$}}; \node[anchor=north west] (f3) at (f2.south west) {\small{$\sigma (N|s_3)\times$}};
\node[anchor=north west] (f4) at (f3.south west) {\small{$\sigma (t_4|s_4)\times$}}; \node[anchor=north west] (f4) at (f3.south west) {\small{$\sigma (t_4|s_4)\times$}};
......
...@@ -334,7 +334,7 @@ d = {r_1} \circ {r_2} \circ {r_3} \circ {r_4} ...@@ -334,7 +334,7 @@ d = {r_1} \circ {r_2} \circ {r_3} \circ {r_4}
\begin{definition} 与词对齐相兼容的层次短语规则 \begin{definition} 与词对齐相兼容的层次短语规则
{\small {\small
对于句对$(\vectorn{\emph{s}},\vectorn{\emph{t}})$和它们之间的词对齐$\vectorn{\emph{a}}$,令$\Phi$表示在句对$(\vectorn{\emph{s}},\vectorn{\emph{t}})$上与$\vectorn{\emph{a}}$相兼容的双语短语集合。则: 对于句对$(\seq{s},\seq{t})$和它们之间的词对齐$\seq{a}$,令$\Phi$表示在句对$(\seq{s},\seq{t})$上与$\seq{a}$相兼容的双语短语集合。则:
\begin{enumerate} \begin{enumerate}
\item 如果$(x,y)\in \Phi$,则$\textrm{X} \to \langle x,y,\phi \rangle$是与词对齐相兼容的层次短语规则。 \item 如果$(x,y)\in \Phi$,则$\textrm{X} \to \langle x,y,\phi \rangle$是与词对齐相兼容的层次短语规则。
\item 对于$(x,y)\in \Phi$,存在$m$个双语短语$(x_i,y_j)\in \Phi$,同时存在(1,$...$,$m$)上面的一个排序$\sim = \{\pi_1 , ... ,\pi_m\}$,且: \item 对于$(x,y)\in \Phi$,存在$m$个双语短语$(x_i,y_j)\in \Phi$,同时存在(1,$...$,$m$)上面的一个排序$\sim = \{\pi_1 , ... ,\pi_m\}$,且:
...@@ -382,7 +382,7 @@ y&=&\beta_0 y_{\pi_1} \beta_1 y_{\pi_2} ... \beta_{m-1} y_{\pi_m} \beta_m ...@@ -382,7 +382,7 @@ y&=&\beta_0 y_{\pi_1} \beta_1 y_{\pi_2} ... \beta_{m-1} y_{\pi_m} \beta_m
\subsection{翻译特征} \subsection{翻译特征}
\parinterval 在层次短语模型中,每个翻译推导都有一个模型得分$\textrm{score}(d,\vectorn{\emph{s}},\vectorn{\emph{t}})$$\textrm{score}(d,\vectorn{\emph{s}},\vectorn{\emph{t}})$是若干特征的线性加权之和:$\textrm{score}(d,\vectorn{\emph{t}},\vectorn{\emph{s}})=\sum_{i=1}^M\lambda_i\cdot h_i (d,\vectorn{\emph{t}},\vectorn{\emph{s}})$,其中$\lambda_i$是特征权重,$h_i (d,\vectorn{\emph{t}},\vectorn{\emph{s}})$是特征函数。层次短语模型的特征包括与规则相关的特征和语言模型特征,如下: \parinterval 在层次短语模型中,每个翻译推导都有一个模型得分$\textrm{score}(d,\seq{s},\seq{t})$$\textrm{score}(d,\seq{s},\seq{t})$是若干特征的线性加权之和:$\textrm{score}(d,\seq{t},\seq{s})=\sum_{i=1}^M\lambda_i\cdot h_i (d,\seq{t},\seq{s})$,其中$\lambda_i$是特征权重,$h_i (d,\seq{t},\seq{s})$是特征函数。层次短语模型的特征包括与规则相关的特征和语言模型特征,如下:
\parinterval 对于每一条翻译规则LHS$\to \langle \alpha, \beta ,\sim \rangle$,有: \parinterval 对于每一条翻译规则LHS$\to \langle \alpha, \beta ,\sim \rangle$,有:
...@@ -402,19 +402,19 @@ y&=&\beta_0 y_{\pi_1} \beta_1 y_{\pi_2} ... \beta_{m-1} y_{\pi_m} \beta_m ...@@ -402,19 +402,19 @@ y&=&\beta_0 y_{\pi_1} \beta_1 y_{\pi_2} ... \beta_{m-1} y_{\pi_m} \beta_m
\parinterval 这些特征可以被具体描述为: \parinterval 这些特征可以被具体描述为:
\begin{eqnarray} \begin{eqnarray}
h_i (d,\vectorn{\emph{t}},\vectorn{\emph{s}})=\sum_{r \in d}h_i (r) h_i (d,\seq{t},\seq{s})=\sum_{r \in d}h_i (r)
\label{eq:8-4} \label{eq:8-4}
\end{eqnarray} \end{eqnarray}
\parinterval 公式\eqref{eq:8-4}中,$r$表示推导$d$中的一条规则,$h_i (r)$表示规则$r$上的第$i$个特征。可以看出,推导$d$的特征值就是所有包含在$d$中规则的特征值的和。进一步,可以定义 \parinterval 公式\eqref{eq:8-4}中,$r$表示推导$d$中的一条规则,$h_i (r)$表示规则$r$上的第$i$个特征。可以看出,推导$d$的特征值就是所有包含在$d$中规则的特征值的和。进一步,可以定义
\begin{eqnarray} \begin{eqnarray}
\textrm{rscore}(d,\vectorn{\emph{t}},\vectorn{\emph{s}})=\sum_{i=1}^7 \lambda_i \cdot h_i (d,\vectorn{\emph{t}},\vectorn{\emph{s}}) \textrm{rscore}(d,\seq{t},\seq{s})=\sum_{i=1}^7 \lambda_i \cdot h_i (d,\seq{t},\seq{s})
\label{eq:8-5} \label{eq:8-5}
\end{eqnarray} \end{eqnarray}
\parinterval 最终,模型得分被定义为: \parinterval 最终,模型得分被定义为:
\begin{eqnarray} \begin{eqnarray}
\textrm{score}(d,\vectorn{\emph{t}},\vectorn{\emph{s}})=\textrm{rscore}(d,\vectorn{\emph{t}},\vectorn{\emph{s}})+ \lambda_8 \textrm{log}⁡(\textrm{P}_{\textrm{lm}}(\vectorn{\emph{t}}))+\lambda_9 \mid \vectorn{\emph{t}} \mid \textrm{score}(d,\seq{t},\seq{s})=\textrm{rscore}(d,\seq{t},\seq{s})+ \lambda_8 \textrm{log}⁡(\textrm{P}_{\textrm{lm}}(\seq{t}))+\lambda_9 \mid \seq{t} \mid
\label{eq:8-6} \label{eq:8-6}
\end{eqnarray} \end{eqnarray}
...@@ -438,14 +438,14 @@ h_i (d,\vectorn{\emph{t}},\vectorn{\emph{s}})=\sum_{r \in d}h_i (r) ...@@ -438,14 +438,14 @@ h_i (d,\vectorn{\emph{t}},\vectorn{\emph{s}})=\sum_{r \in d}h_i (r)
\parinterval 层次短语模型解码的目标是找到模型得分最高的推导,即: \parinterval 层次短语模型解码的目标是找到模型得分最高的推导,即:
\begin{eqnarray} \begin{eqnarray}
\hat{d} = \argmax_{d}\ \textrm{score}(d,\vectorn{\emph{s}},\vectorn{\emph{t}}) \hat{d} = \argmax_{d}\ \textrm{score}(d,\seq{s},\seq{t})
\label{eq:8-7} \label{eq:8-7}
\end{eqnarray} \end{eqnarray}
\noindent 这里,$\hat{d}$的目标语部分即最佳译文$\hat{\vectorn{\emph{t}}}$。令函数$t(\cdot)$返回翻译推导的目标语词串,于是有: \noindent 这里,$\hat{d}$的目标语部分即最佳译文$\hat{\seq{t}}$。令函数$t(\cdot)$返回翻译推导的目标语词串,于是有:
\begin{eqnarray} \begin{eqnarray}
\hat{\vectorn{\emph{t}}}=t(\hat{d}) \hat{\seq{t}}=t(\hat{d})
\label{eq:8-8} \label{eq:8-8}
\end{eqnarray} \end{eqnarray}
...@@ -841,7 +841,7 @@ span\textrm{[0,4]}&=&\textrm{“猫} \quad \textrm{喜欢} \quad \textrm{吃} \q ...@@ -841,7 +841,7 @@ span\textrm{[0,4]}&=&\textrm{“猫} \quad \textrm{喜欢} \quad \textrm{吃} \q
\subsubsection{2. 基于树结构的翻译推导} \subsubsection{2. 基于树结构的翻译推导}
\parinterval 规则中的变量预示着一种替换操作,即变量可以被其他树结构替换。实际上,上面的树到树翻译规则就是一种{\small\bfnew{同步树替换文法规则}}\index{同步树替换文法规则}(Synchronous Tree Substitution Grammar Rule)\index{Synchronous Tree Substitution Grammar Rule}。不论是源语言端还是目标语言端,都可以通过这种替换操作不断生成更大的树结构,也就是通过树片段的组合得到更大的树片段。图\ref{fig:8-20}就展示了树替换操作的一个实例。 \parinterval 规则中的变量预示着一种替换操作,即变量可以被其他树结构替换。实际上,上面的树到树翻译规则就是一种{\small\bfnew{同步树替换文法}}\index{同步树替换文法}(Synchronous Tree-Substitution Grammar)\index{Synchronous Tree-Substitution Grammar}规则。不论是源语言端还是目标语言端,都可以通过这种替换操作不断生成更大的树结构,也就是通过树片段的组合得到更大的树片段。图\ref{fig:8-20}就展示了树替换操作的一个实例。
%---------------------------------------------- %----------------------------------------------
\begin{figure}[htp] \begin{figure}[htp]
...@@ -1308,7 +1308,7 @@ r_9: \quad \textrm{IP(}\textrm{NN}_1\ \textrm{VP}_2) \rightarrow \textrm{S(}\tex ...@@ -1308,7 +1308,7 @@ r_9: \quad \textrm{IP(}\textrm{NN}_1\ \textrm{VP}_2) \rightarrow \textrm{S(}\tex
\subsection{句法翻译模型的特征} \subsection{句法翻译模型的特征}
\parinterval 基于语言学句法的翻译模型使用判别式模型对翻译推导进行建模({\chapterseven}数学建模小节)。给定双语句对($\vectorn{\emph{s}}$,$\vectorn{\emph{t}}$),由$M$个特征经过线性加权,得到每个翻译推导$d$的得分,记为$\textrm{score(}d,\vectorn{\emph{t}},\vectorn{\emph{s}})=\sum_{i=1}^{M} \lambda_i \cdot h_{i}(d,\vectorn{\emph{t}},\vectorn{\emph{s}})$,其中$\lambda_i$表示特征权重,$h_{i}(d,\vectorn{\emph{t}},\vectorn{\emph{s}})$表示特征函数。翻译的目标就是要找到使$\textrm{score(}d,\vectorn{\emph{t}},\vectorn{\emph{s}})$达到最高的推导$d$ \parinterval 基于语言学句法的翻译模型使用判别式模型对翻译推导进行建模({\chapterseven}数学建模小节)。给定双语句对($\seq{s}$,$\seq{t}$),由$M$个特征经过线性加权,得到每个翻译推导$d$的得分,记为$\textrm{score(}d,\seq{t},\seq{s})=\sum_{i=1}^{M} \lambda_i \cdot h_{i}(d,\seq{t},\seq{s})$,其中$\lambda_i$表示特征权重,$h_{i}(d,\seq{t},\seq{s})$表示特征函数。翻译的目标就是要找到使$\textrm{score(}d,\seq{t},\seq{s})$达到最高的推导$d$
\parinterval 这里,可以使用最小错误率训练对特征权重进行调优({\chapterseven}最小错误率训练小节)。而特征函数可参考如下定义: \parinterval 这里,可以使用最小错误率训练对特征权重进行调优({\chapterseven}最小错误率训练小节)。而特征函数可参考如下定义:
...@@ -1349,9 +1349,9 @@ r_9: \quad \textrm{IP(}\textrm{NN}_1\ \textrm{VP}_2) \rightarrow \textrm{S(}\tex ...@@ -1349,9 +1349,9 @@ r_9: \quad \textrm{IP(}\textrm{NN}_1\ \textrm{VP}_2) \rightarrow \textrm{S(}\tex
\begin{itemize} \begin{itemize}
\vspace{0.5em} \vspace{0.5em}
\item (h8)语言模型得分(取对数),即$\log(\textrm{P}_{\textrm{lm}}(\vectorn{\emph{t}}))$,用于度量译文的流畅度; \item (h8)语言模型得分(取对数),即$\log(\textrm{P}_{\textrm{lm}}(\seq{t}))$,用于度量译文的流畅度;
\vspace{0.5em} \vspace{0.5em}
\item (h9)译文长度,即$|\vectorn{\emph{t}}|$,用于避免模型过于倾向生成短译文(因为短译文语言模型分数高); \item (h9)译文长度,即$|\seq{t}|$,用于避免模型过于倾向生成短译文(因为短译文语言模型分数高);
\vspace{0.5em} \vspace{0.5em}
\item (h10)翻译规则数量,学习对使用规则数量的偏好。比如,如果这个特征的权重较高,则表明系统更喜欢使用数量多的规则; \item (h10)翻译规则数量,学习对使用规则数量的偏好。比如,如果这个特征的权重较高,则表明系统更喜欢使用数量多的规则;
\vspace{0.5em} \vspace{0.5em}
...@@ -1458,7 +1458,7 @@ d_1 = {d'} \circ {r_5} ...@@ -1458,7 +1458,7 @@ d_1 = {d'} \circ {r_5}
\parinterval 解码的目标是找到得分score($d$)最高的推导$d$。这个过程通常被描述为: \parinterval 解码的目标是找到得分score($d$)最高的推导$d$。这个过程通常被描述为:
\begin{eqnarray} \begin{eqnarray}
\hat{d} = \argmax_d\ \textrm{score} (d,\vectorn{\emph{s}},\vectorn{\emph{t}}) \hat{d} = \argmax_d\ \textrm{score} (d,\seq{s},\seq{t})
\label{eq:8-13} \label{eq:8-13}
\end{eqnarray} \end{eqnarray}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论