Commit dd40b862 by xiaotong

bug fixes (sec 3)

parent 02fe77a7
...@@ -909,12 +909,12 @@ ...@@ -909,12 +909,12 @@
\begin{itemize} \begin{itemize}
\item 很多时候,我们有多个互译句对$(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),...,(\mathbf{s}^{[n]},\mathbf{t}^{[n]})$,称之为\alert{双语平行数据(语料)}。翻译概率可以被定义为 \item 如果有多个互译句对$\{(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),...,(\mathbf{s}^{[K]},\mathbf{t}^{[K]})\}$,称之为\alert{双语平行数据(语料)}。翻译概率可以被定义为
\vspace{-1em} \vspace{-1em}
\begin{eqnarray} \begin{eqnarray}
\textrm{P}(x,y) & = & \frac{\sum_{i=1}^{n}c(x,y;\mathbf{s}^{[i]},\mathbf{t}^{[i]})}{\sum_{i=1}^{n} \sum_{x',y'} c(x',y';\mathbf{s}^{[i]},\mathbf{t}^{[i]})} \nonumber \textrm{P}(x,y) & = & \frac{\sum_{k=1}^{K}c(x,y;\mathbf{s}^{[k]},\mathbf{t}^{[k]})}{\sum_{k=1}^{K} \sum_{x',y'} c(x',y';\mathbf{s}^{[k]},\mathbf{t}^{[k]})} \nonumber
\end{eqnarray} \end{eqnarray}
\item<2-> 说白了就是计算$(x,y)$的频次时,在每个句子上累加 \item<2-> 说白了就是计算$(x,y)$的频次时,在每个句子上累加
...@@ -1414,7 +1414,7 @@ $m$ & $n$ & $n^m \cdot m!$ \\ \hline ...@@ -1414,7 +1414,7 @@ $m$ & $n$ & $n^m \cdot m!$ \\ \hline
\node [anchor=north west,inner sep=2pt,align=left] (line4) at ([yshift=-1pt]line3.south west) {\textrm{3: \textbf{for} $i$ in $[1,m]$ \textbf{do}}}; \node [anchor=north west,inner sep=2pt,align=left] (line4) at ([yshift=-1pt]line3.south west) {\textrm{3: \textbf{for} $i$ in $[1,m]$ \textbf{do}}};
\node [anchor=north west,inner sep=2pt,align=left] (line5) at ([yshift=-1pt]line4.south west) {\textrm{4: \hspace{1em} $h = \phi$}}; \node [anchor=north west,inner sep=2pt,align=left] (line5) at ([yshift=-1pt]line4.south west) {\textrm{4: \hspace{1em} $h = \phi$}};
\node [anchor=north west,inner sep=2pt,align=left] (line6) at ([yshift=-1pt]line5.south west) {\textrm{5: \hspace{1em} \textbf{foreach} $j$ in $[1,m]$ \textbf{do}}}; \node [anchor=north west,inner sep=2pt,align=left] (line6) at ([yshift=-1pt]line5.south west) {\textrm{5: \hspace{1em} \textbf{foreach} $j$ in $[1,m]$ \textbf{do}}};
\node [anchor=north west,inner sep=2pt,align=left] (line7) at ([yshift=-1pt]line6.south west) {\textrm{6: \hspace{2em} \textbf{if} $used[j]=$ \textbf{true} \textbf{then}}}; \node [anchor=north west,inner sep=2pt,align=left] (line7) at ([yshift=-1pt]line6.south west) {\textrm{6: \hspace{2em} \textbf{if} $used[j]=$ \textbf{false} \textbf{then}}};
\node [anchor=north west,inner sep=2pt,align=left] (line8) at ([yshift=-1pt]line7.south west) {\textrm{7: \hspace{3em} $h = h \cup \textrm{\textsc{Join}}(best,\pi[j])$}}; \node [anchor=north west,inner sep=2pt,align=left] (line8) at ([yshift=-1pt]line7.south west) {\textrm{7: \hspace{3em} $h = h \cup \textrm{\textsc{Join}}(best,\pi[j])$}};
\node [anchor=north west,inner sep=2pt,align=left] (line9) at ([yshift=-1pt]line8.south west) {\textrm{8: \hspace{1em} $best = \textrm{\textsc{PruneForTop1}}(h)$}}; \node [anchor=north west,inner sep=2pt,align=left] (line9) at ([yshift=-1pt]line8.south west) {\textrm{8: \hspace{1em} $best = \textrm{\textsc{PruneForTop1}}(h)$}};
\node [anchor=north west,inner sep=2pt,align=left] (line10) at ([yshift=-1pt]line9.south west) {\textrm{9: \hspace{1em} $used[best.j] = \textrm{\textsc{\textbf{true}}}$}}; \node [anchor=north west,inner sep=2pt,align=left] (line10) at ([yshift=-1pt]line9.south west) {\textrm{9: \hspace{1em} $used[best.j] = \textrm{\textsc{\textbf{true}}}$}};
...@@ -2395,7 +2395,7 @@ $m$ & $n$ & $n^m \cdot m!$ \\ \hline ...@@ -2395,7 +2395,7 @@ $m$ & $n$ & $n^m \cdot m!$ \\ \hline
\item \textbf{翻译模型参数估计} - 计算$\textrm{P}(\mathbf{s}|\mathbf{t})$所需的参数 \item \textbf{翻译模型参数估计} - 计算$\textrm{P}(\mathbf{s}|\mathbf{t})$所需的参数
\end{itemize} \end{itemize}
\vspace{0.5em} \vspace{0.5em}
\item<2-> \textbf{IBM模型的假设}$\mathbf{s}=s_1...s_m$$\mathbf{t}=t_1...t_n$之间有单词一级的对应,称作\alert{单词对齐}或者\alert{词对齐}。此外: \item<2-> \textbf{IBM模型的假设}$\mathbf{s}=s_1...s_m$$\mathbf{t}=t_1...t_l$之间有单词一级的对应,称作\alert{单词对齐}或者\alert{词对齐}。此外:
\begin{itemize} \begin{itemize}
\item \textbf{约束}:一个源语言单词只能对应一个目标语单词 \item \textbf{约束}:一个源语言单词只能对应一个目标语单词
\vspace{0.5em} \vspace{0.5em}
...@@ -2792,11 +2792,11 @@ $\mathbf{s}$ = 在 桌子 上 \ \ \ \ \ $\mathbf{t}$ = $t_0$ on the table \ \ \ ...@@ -2792,11 +2792,11 @@ $\mathbf{s}$ = 在 桌子 上 \ \ \ \ \ $\mathbf{t}$ = $t_0$ on the table \ \ \
\textrm{P}(\mathbf{s},\mathbf{a}|\mathbf{t}) & = & \textrm{P}(m|\mathbf{t}) \prod\limits_{j=1}^{m} \textrm{P}(a_j|a_{1}^{j-1},s_{1}^{j-1},m,\mathbf{t}) \textrm{P}(s_j|a_{1}^{j},s_{1}^{j-1},m,\mathbf{t}) \nonumber \\ \textrm{P}(\mathbf{s},\mathbf{a}|\mathbf{t}) & = & \textrm{P}(m|\mathbf{t}) \prod\limits_{j=1}^{m} \textrm{P}(a_j|a_{1}^{j-1},s_{1}^{j-1},m,\mathbf{t}) \textrm{P}(s_j|a_{1}^{j},s_{1}^{j-1},m,\mathbf{t}) \nonumber \\
& \visible<2->{=} & \visible<2->{\textrm{P}(m=3 \mid \textrm{'$t_0$ on the table'})} \visible<3->{\times} \nonumber \\ & \visible<2->{=} & \visible<2->{\textrm{P}(m=3 \mid \textrm{'$t_0$ on the table'})} \visible<3->{\times} \nonumber \\
& & \visible<3->{\textrm{P}(a_1=0 \mid \phi,\phi,3,\textrm{'$t_0$ on the table'})} \visible<4->{\times} \nonumber \\ & & \visible<3->{\textrm{P}(a_1=0 \mid \phi,\phi,3,\textrm{'$t_0$ on the table'})} \visible<4->{\times} \nonumber \\
& & \visible<4->{\textrm{P}(f_1=\textrm{} \mid \textrm{\{1-0\}},\phi,3,\textrm{'$t_0$ on the table'})} \visible<5->{\times} \nonumber \\ & & \visible<4->{\textrm{P}(s_1=\textrm{} \mid \textrm{\{1-0\}},\phi,3,\textrm{'$t_0$ on the table'})} \visible<5->{\times} \nonumber \\
& & \visible<5->{\textrm{P}(a_2=3 \mid \textrm{\{1-0\}},\textrm{'在'},3,\textrm{'$t_0$ on the table'})} \visible<6->{\times} \nonumber \\ & & \visible<5->{\textrm{P}(a_2=3 \mid \textrm{\{1-0\}},\textrm{'在'},3,\textrm{'$t_0$ on the table'})} \visible<6->{\times} \nonumber \\
& & \visible<6->{\textrm{P}(f_2=\textrm{桌子} \mid \textrm{\{1-0,2-3\}},\textrm{'在'},3,\textrm{'$t_0$ on the table'})} \visible<7->{\times} \nonumber \\ & & \visible<6->{\textrm{P}(s_2=\textrm{桌子} \mid \textrm{\{1-0,2-3\}},\textrm{'在'},3,\textrm{'$t_0$ on the table'})} \visible<7->{\times} \nonumber \\
& & \visible<7->{\textrm{P}(a_3=1 \mid \textrm{\{1-0,2-3\}},\textrm{'在 桌子'},3,\textrm{'$t_0$ on the table'})} \visible<8->{\times} \nonumber \\ & & \visible<7->{\textrm{P}(a_3=1 \mid \textrm{\{1-0,2-3\}},\textrm{'在 桌子'},3,\textrm{'$t_0$ on the table'})} \visible<8->{\times} \nonumber \\
& & \visible<8->{\textrm{P}(f_3=\textrm{} \mid \textrm{\{1-0,2-3,3-1\}},\textrm{'在 桌子'},3,\textrm{'$t_0$ on the table'})} \nonumber & & \visible<8->{\textrm{P}(s_3=\textrm{} \mid \textrm{\{1-0,2-3,3-1\}},\textrm{'在 桌子'},3,\textrm{'$t_0$ on the table'})} \nonumber
\end{eqnarray} \end{eqnarray}
} }
...@@ -3730,7 +3730,7 @@ $\mathbf{s}$ = 在 桌子 上 \ \ \ \ \ $\mathbf{t}$ = $t_0$ on the table \ \ \ ...@@ -3730,7 +3730,7 @@ $\mathbf{s}$ = 在 桌子 上 \ \ \ \ \ $\mathbf{t}$ = $t_0$ on the table \ \ \
{\small {\small
\begin{eqnarray} \begin{eqnarray}
L(f,\lambda) & = & \frac{\epsilon}{(l+1)^{m}} \prod\limits_{j=1}^{m} \sum\limits_{i=0}^{l} \prod\limits_{j=1}^{m} f(s_j|t_i) - \nonumber \\ L(f,\lambda) & = & \frac{\epsilon}{(l+1)^{m}} \prod\limits_{j=1}^{m} \sum\limits_{i=0}^{l} f(s_j|t_i) - \nonumber \\
& & \sum_{t_y} \lambda_{t_y} (\sum_{s_x} f(s_x|t_y) -1) \nonumber & & \sum_{t_y} \lambda_{t_y} (\sum_{s_x} f(s_x|t_y) -1) \nonumber
\end{eqnarray} \end{eqnarray}
} }
...@@ -4190,9 +4190,9 @@ f(s_u|t_v) & = & \lambda_{t_v}^{-1} \cdot \textrm{P}(\mathbf{s}|\mathbf{t}) \cdo ...@@ -4190,9 +4190,9 @@ f(s_u|t_v) & = & \lambda_{t_v}^{-1} \cdot \textrm{P}(\mathbf{s}|\mathbf{t}) \cdo
%%% scale it up to the full corpus %%% scale it up to the full corpus
\begin{frame}{在整个数据集上计算} \begin{frame}{在整个数据集上计算}
\begin{itemize} \begin{itemize}
\item \textbf{更真实的情况}:我们拥有一系列互译的句对(称作\alert{平行语料}),记为$\{(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),(\mathbf{s}^{[2]},\mathbf{t}^{[2]}),...,(\mathbf{s}^{[N]},\mathbf{t}^{[N]})\}$。对于这$N$个训练用句对,定义$f(s_u|t_v)$的期望频次为 \item \textbf{更真实的情况}:我们拥有一系列互译的句对(称作\alert{平行语料}),记为$\{(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),(\mathbf{s}^{[2]},\mathbf{t}^{[2]}),...,(\mathbf{s}^{[K]},\mathbf{t}^{[K]})\}$。对于这$K$个训练用句对,定义$f(s_u|t_v)$的期望频次为
\begin{displaymath} \begin{displaymath}
c_{\mathbb{E}}(s_u|t_v) = \sum_{i=1}^{N} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[i]},\mathbf{t}^{[i]}) c_{\mathbb{E}}(s_u|t_v) = \sum_{k=1}^{K} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})
\end{displaymath} \end{displaymath}
\item<2-> \textbf{于是} \item<2-> \textbf{于是}
\begin{center} \begin{center}
...@@ -4200,8 +4200,8 @@ f(s_u|t_v) & = & \lambda_{t_v}^{-1} \cdot \textrm{P}(\mathbf{s}|\mathbf{t}) \cdo ...@@ -4200,8 +4200,8 @@ f(s_u|t_v) & = & \lambda_{t_v}^{-1} \cdot \textrm{P}(\mathbf{s}|\mathbf{t}) \cdo
\node [anchor=west,inner sep=2pt] (eq1) at (0,0) {$f(s_u|t_v)$}; \node [anchor=west,inner sep=2pt] (eq1) at (0,0) {$f(s_u|t_v)$};
\node [anchor=west] (eq2) at (eq1.east) {$=$\ }; \node [anchor=west] (eq2) at (eq1.east) {$=$\ };
\draw [-] ([xshift=0.3em]eq2.east) -- ([xshift=11.6em]eq2.east); \draw [-] ([xshift=0.3em]eq2.east) -- ([xshift=11.6em]eq2.east);
\node [anchor=south west] (eq3) at ([xshift=1em]eq2.east) {$\sum_{i=1}^{N} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[i]},\mathbf{t}^{[i]})$}; \node [anchor=south west] (eq3) at ([xshift=1em]eq2.east) {$\sum_{k=1}^{K} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})$};
\node [anchor=north west] (eq4) at (eq2.east) {$\sum_{s_u} \sum_{i=1}^{N} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[i]},\mathbf{t}^{[i]})$}; \node [anchor=north west] (eq4) at (eq2.east) {$\sum_{s_u} \sum_{k=1}^{K} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})$};
\visible<4->{ \visible<4->{
\node [anchor=south] (label1) at ([yshift=-6em,xshift=3em]eq1.north west) {利用这个公式计算}; \node [anchor=south] (label1) at ([yshift=-6em,xshift=3em]eq1.north west) {利用这个公式计算};
...@@ -4250,16 +4250,16 @@ f(s_u|t_v) & = & \lambda_{t_v}^{-1} \cdot \textrm{P}(\mathbf{s}|\mathbf{t}) \cdo ...@@ -4250,16 +4250,16 @@ f(s_u|t_v) & = & \lambda_{t_v}^{-1} \cdot \textrm{P}(\mathbf{s}|\mathbf{t}) \cdo
\label{ibmtraining} \label{ibmtraining}
\begin{beamerboxesrounded}[upper=uppercolblue,lower=lowercolblue,shadow=true]{IBM模型1的训练(EM算法)} \begin{beamerboxesrounded}[upper=uppercolblue,lower=lowercolblue,shadow=true]{IBM模型1的训练(EM算法)}
输入: 平行语料$\{(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),...,(\mathbf{s}^{[N]},\mathbf{t}^{[N]})\}$\\ 输入: 平行语料$\{(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),...,(\mathbf{s}^{[K]},\mathbf{t}^{[K]})\}$\\
输出:参数$f(\cdot|\cdot)$的最优值\\ 输出:参数$f(\cdot|\cdot)$的最优值\\
1: \textbf{Function} \textsc{TrainItWithEM}($\{(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),...,(\mathbf{s}^{[N]},\mathbf{t}^{[N]})\}$) \\ 1: \textbf{Function} \textsc{TrainItWithEM}($\{(\mathbf{s}^{[1]},\mathbf{t}^{[1]}),...,(\mathbf{s}^{[K]},\mathbf{t}^{[K]})\}$) \\
2: \ \ Initialize $f(\cdot|\cdot)$ \hspace{5em} $\rhd$ 比如给$f(\cdot|\cdot)$一个均匀分布\\ 2: \ \ Initialize $f(\cdot|\cdot)$ \hspace{5em} $\rhd$ 比如给$f(\cdot|\cdot)$一个均匀分布\\
3: \ \ Loop until $f(\cdot|\cdot)$ converges\\ 3: \ \ Loop until $f(\cdot|\cdot)$ converges\\
4: \ \ \ \ \textbf{foreach} $k = 1$ to $N$ \textbf{do}\\ 4: \ \ \ \ \textbf{foreach} $k = 1$ to $K$ \textbf{do}\\
5: \ \ \ \ \ \ \ \footnotesize{$c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]}) = \sum\limits_{j=1}^{|\mathbf{s}^{[k]}|} \delta(s_j,s_u) \sum\limits_{i=0}^{|\mathbf{t}^{[k]}|} \delta(t_i,t_v) \cdot \frac{f(s_u|t_v)}{\sum_{i=0}^{l}f(s_u|t_i)}$}\normalsize{}\\ 5: \ \ \ \ \ \ \ \footnotesize{$c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]}) = \sum\limits_{j=1}^{|\mathbf{s}^{[k]}|} \delta(s_j,s_u) \sum\limits_{i=0}^{|\mathbf{t}^{[k]}|} \delta(t_i,t_v) \cdot \frac{f(s_u|t_v)}{\sum_{i=0}^{l}f(s_u|t_i)}$}\normalsize{}\\
6: \ \ \ \ \textbf{foreach} $t_v$ appears at least one of $\{\mathbf{t}^{[1]},...,\mathbf{t}^{[N]}\}$ \textbf{do}\\ 6: \ \ \ \ \textbf{foreach} $t_v$ appears at least one of $\{\mathbf{t}^{[1]},...,\mathbf{t}^{[K]}\}$ \textbf{do}\\
7: \ \ \ \ \ \ \ $\lambda_{t_v}^{'} = \sum_{s_u} \sum_{k=1}^{N} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})$\\ 7: \ \ \ \ \ \ \ $\lambda_{t_v}^{'} = \sum_{s_u} \sum_{k=1}^{N} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})$\\
8: \ \ \ \ \ \ \ \textbf{foreach} $s_u$ appears at least one of $\{\mathbf{s}^{[1]},...,\mathbf{s}^{[N]}\}$ \textbf{do}\\ 8: \ \ \ \ \ \ \ \textbf{foreach} $s_u$ appears at least one of $\{\mathbf{s}^{[1]},...,\mathbf{s}^{[K]}\}$ \textbf{do}\\
9: \ \ \ \ \ \ \ \ \ $f(s_u|t_v) = \sum_{k=1}^{N} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]}) \cdot (\lambda_{t_v}^{'})^{-1}$\\ 9: \ \ \ \ \ \ \ \ \ $f(s_u|t_v) = \sum_{k=1}^{N} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]}) \cdot (\lambda_{t_v}^{'})^{-1}$\\
10: \ \textbf{return} $f(\cdot|\cdot)$ 10: \ \textbf{return} $f(\cdot|\cdot)$
\end{beamerboxesrounded} \end{beamerboxesrounded}
...@@ -4287,8 +4287,8 @@ c_{\mathbb{E}}(i|j,m,l;\mathbf{s},\mathbf{t}) & = & \frac{f(s_j|t_i)a(i|j,m,l)}{ ...@@ -4287,8 +4287,8 @@ c_{\mathbb{E}}(i|j,m,l;\mathbf{s},\mathbf{t}) & = & \frac{f(s_j|t_i)a(i|j,m,l)}{
\end{eqnarray} \end{eqnarray}
\item \textbf{M-Step} \item \textbf{M-Step}
\begin{eqnarray} \begin{eqnarray}
f(s_u|t_v) & = & \frac{\sum_{k=0}^{K} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})}{\sum_{s_u} \sum_{k=0}^{K} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})} \nonumber \\ f(s_u|t_v) & = & \frac{\sum_{k=1}^{K} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})}{\sum_{s_u} \sum_{k=1}^{K} c_{\mathbb{E}}(s_u|t_v;\mathbf{s}^{[k]},\mathbf{t}^{[k]})} \nonumber \\
a(i|j,m,l) & = & \frac{\sum_{k=0}^{K} c_{\mathbb{E}}(i|j;\mathbf{s}^{[k]},\mathbf{t}^{[k]})}{\sum_{i} \sum_{k=0}^{K} c_{\mathbb{E}}(i|j;\mathbf{s}^{[k]},\mathbf{t}^{[k]})} \nonumber a(i|j,m,l) & = & \frac{\sum_{k=1}^{K} c_{\mathbb{E}}(i|j;\mathbf{s}^{[k]},\mathbf{t}^{[k]})}{\sum_{i} \sum_{k=1}^{K} c_{\mathbb{E}}(i|j;\mathbf{s}^{[k]},\mathbf{t}^{[k]})} \nonumber
\end{eqnarray} \end{eqnarray}
\end{enumerate} \end{enumerate}
\end{frame} \end{frame}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论