Commit 19545a0c by 曹润柘

合并分支 'caorunzhe' 到 'master'

Caorunzhe

查看合并请求 !183
parents 1197c874 efa27b49
......@@ -13,7 +13,7 @@
\node [ugreen] (input) at (0,0) {猫喜欢吃鱼};
\node [draw,thick,anchor=west,ublue] (preprocessing) at ([xshift=1em]input.east) {分词系统};
\node [ugreen,anchor=west] (mtinput) at ([xshift=1em]preprocessing.east) {猫/喜欢/吃/鱼};
\node [draw,thick,anchor=west,ublue] (smt) at ([xshift=1em]mtinput.east) {MT系统};
\node [draw,thick,anchor=west,ublue] (smt) at ([xshift=1em]mtinput.east) {机器翻译系统};
\node [anchor=west] (mtoutput) at ([xshift=1em]smt.east) {...};
\draw [->,thick,ublue] ([xshift=0.1em]input.east) -- ([xshift=-0.2em]preprocessing.west);
\draw [->,thick,ublue] ([xshift=0.2em]preprocessing.east) -- ([xshift=-0.1em]mtinput.west);
......
......@@ -5,10 +5,10 @@
\node[anchor=west,hide](y2)at([xshift=2em]y1.east){$y_2$};
\node[anchor=west,hide](y3)at([xshift=2em]y2.east){$y_3$};
\node[anchor=west,line width=1pt,inner sep=2pt,minimum size=2em](dots)at([xshift=2em]y3.east){$\cdots$};
\node[anchor=west,hide](yn-1)at([xshift=2em]dots.east){$y_{n-1}$};
\node[anchor=west,hide](yn)at([xshift=2em]yn-1.east){$y_n$};
\node[anchor=west,hide](yn-1)at([xshift=2em]dots.east){$y_{m-1}$};
\node[anchor=west,hide](yn)at([xshift=2em]yn-1.east){$y_m$};
\node[anchor=north,draw,line width=1pt,inner sep=2pt,fill=red!30,minimum height=2em,minimum width=12em](see)at ([yshift=-3em,xshift=2em]y3.south){$\mathbf{X}=(x_1,x_2,\ldots,x_{n-1},x_n)$};
\node[anchor=north,draw,line width=1pt,inner sep=2pt,fill=red!30,minimum height=2em,minimum width=12em](see)at ([yshift=-3em,xshift=2em]y3.south){$\mathbf{X}=(x_1,x_2,\ldots,x_{m-1},x_m)$};
\node[anchor=south,font=\footnotesize] at ([yshift=1em,xshift=2em]y3.north){待预测的隐藏状态序列};
\node[anchor=north,font=\footnotesize] at ([yshift=-1em]see.south){可见状态序列};
......
......@@ -56,18 +56,18 @@
\node[rectangle,draw=ublue, inner sep=0.2em] [fit = (treebanklabel) (t1n1) (t2w1) (t2wn)] (treebank) {};
\end{pgfonlayer}
\node [anchor=north west] (math1) at ([xshift=2em]treebank.north east) {P(VP $\to$ VV NN)};
\node [anchor=north west] (math1part2) at ([xshift=-1em,yshift=0.2em]math1.south west) {$=\frac{\textrm{``VP''和``VV NN''同时出现的次数=1}}{\textrm{``VP''出现的次数}=4}$};
\node [anchor=north west] (math1) at ([xshift=2em]treebank.north east) {$\funp{P}$(VP $\to$ VV NN)};
\node [anchor=north west] (math1part2) at ([xshift=-1em,yshift=0.2em]math1.south west) {$=\frac{\textrm{“VP”和“VV NN”同时出现的次数=1}}{\textrm{“VP”出现的次数}=4}$};
\node [anchor=north west] (math1part3) at ([yshift=0.2em]math1part2.south west){$=\frac{1}{4}$};
\node [anchor=north west] (math2) at ([yshift=-6em]math1.north west) {P(NP $\to$ NN)};
\node [anchor=north west] (math2part2) at ([xshift=-1em,yshift=0.2em]math2.south west) {$=\frac{\textrm{``NP''和``NN''同时出现的次数=2}}{\textrm{``NP''出现的次数}=3}$};
\node [anchor=north west] (math2) at ([yshift=-6em]math1.north west) {$\funp{P}$(NP $\to$ NN)};
\node [anchor=north west] (math2part2) at ([xshift=-1em,yshift=0.2em]math2.south west) {$=\frac{\textrm{“NP”和“NN”同时出现的次数=2}}{\textrm{“NP”出现的次数}=3}$};
\node [anchor=north west] (math2part3) at ([yshift=0.2em]math2part2.south west){$=\frac{2}{3}$};
\node [anchor=north west] (math3) at ([yshift=-6em]math2.north west) {P(IP $\to$ NP NP)};
\node [anchor=north west] (math3part2) at ([xshift=-1em,yshift=0.2em]math3.south west) {$=\frac{\textrm{``IP''和``NP NP''同时出现的次数=0}}{\textrm{``IP''出现的次数}=3}$};
\node [anchor=north west] (math3) at ([yshift=-6em]math2.north west) {$\funp{P}$(IP $\to$ NP NP)};
\node [anchor=north west] (math3part2) at ([xshift=-1em,yshift=0.2em]math3.south west) {$=\frac{\textrm{“IP”和“NP NP”同时出现的次数=0}}{\textrm{“IP”出现的次数}=3}$};
\node [anchor=north west] (math3part3) at ([yshift=0.2em]math3part2.south west){$=\frac{0}{3}$};
\begin{pgfonlayer}{background}
......
......@@ -19,7 +19,7 @@
\end{pgfonlayer}
}
\node [anchor=west,ugreen] (P) at ([xshift=5.2em,yshift=-0.8em]corpus.east){\large{\funp{P}($\cdot$)}};
\node [anchor=west,ugreen] (P) at ([xshift=5.2em,yshift=-0.8em]corpus.east){\large{$\funp{P}(\cdot)$}};
\node [anchor=south] (modellabel) at (P.north) {{\color{ublue} {\scriptsize \textbf{统计模型}}}};
\begin{pgfonlayer}{background}
......@@ -41,9 +41,9 @@
{\footnotesize
{
\node [anchor=west] (label1) at (0,6em) {实际上,通过学习我们得到了一个分词模型\funp{P}($\cdot$),给定任意的分词结果};
\node [anchor=north west] (label1part2) at ([yshift=0.5em]label1.south west) {$W=w_1 w_2...w_n$,都能通过\funp{P}($W$)=$\funp{P}(w_1) \cdot \funp{P}(w_2) \cdot ... \cdot \funp{P}(w_n)$ 计算这种分词的\hspace{0.13em} };
\node [anchor=north west] (label1part3) at ([yshift=0.5em]label1part2.south west) {概率值};
\node [anchor=west] (label1) at (0,6em) {实际上,通过学习我们得到了一个分词模型$\funp{P}(\cdot)$,给定任意的分词结果};
\node [anchor=north west] (label1part2) at ([yshift=0.5em]label1.south west) {$W=w_1 w_2...w_n$,都能通过$\funp{P}(W)=\funp{P}(w_1) \cdot \funp{P}(w_2) \cdot ... \cdot \funp{P}(w_n)$ 计算这种分\hspace{0.13em} };
\node [anchor=north west] (label1part3) at ([yshift=0.5em]label1part2.south west) {词的概率值};
}
\begin{pgfonlayer}{background}
......@@ -96,13 +96,13 @@
\node [anchor=north west,minimum height=1.6em] (data33) at ([yshift=0.3em]data23.south west) {};
{
\node [anchor=north west] (data41) at (data31.south west) {确实/现在/数据/很多};
\node [anchor=north west] (data41) at (data31.south west) {确实/现在/数据/很/};
}
{
\node [anchor=north west] (data42) at (data32.south west) {$\funp{P}(\textrm{确实}) \cdot \funp{P}(\textrm{现在}) \cdot \funp{P}(\textrm{数据}) \cdot $};
}
{
\node [anchor=north west] (data43) at ([yshift=-0.2em,xshift=-2em]data33.south west) {\color{red}{\textbf{输出}}};
\node [anchor=north west] (data43) at ([yshift=-0.4em,xshift=-1.4em]data33.south west) {\color{red}{\textbf{输出}}};
\draw [->,red,thick] (data43.west)--([xshift=-1em]data43.west);
}
{
......
......@@ -35,7 +35,7 @@
}
{
\node[rectangle,fill=ublue,inner sep=2pt] [fit = (mtinputlabel) (mtoutputlabel) (inputmarking) (outputmarking)] {{\color{white} \textbf{\Large{MT 系统}}}};
\node[rectangle,fill=ublue,inner sep=2pt] [fit = (mtinputlabel) (mtoutputlabel) (inputmarking) (outputmarking)] {{\color{white} \textbf{\Large{机器翻译系统}}}};
}
......
......@@ -25,7 +25,7 @@
}
\end{scope}
\node [anchor=west,draw,thick,inner sep=3pt,ublue] (mtengine) at ([xshift=1.05in]input.east) {{\scriptsize MT系统}};
\node [anchor=west,draw,thick,inner sep=3pt,ublue] (mtengine) at ([xshift=1.0in]input.east) {{\scriptsize 机器翻译系统}};
\begin{scope}[scale=0.8,xshift=3.0in,yshift=-0.87in,level distance=20pt,sibling distance=-3pt,grow'=up]
{\scriptsize
......@@ -49,8 +49,8 @@
\draw[->,thick] ([xshift=-6pt]output.west) -- ([xshift=2pt]output.west);
{
\draw[->,thick] ([xshift=-12pt]mtengine.west) -- ([xshift=-2pt]mtengine.west);
\draw[->,thick] ([xshift=2pt]mtengine.east) -- ([xshift=12pt]mtengine.east);
\draw[->,thick] ([xshift=-10pt]mtengine.west) -- ([xshift=-2pt]mtengine.west);
\draw[->,thick] ([xshift=2pt]mtengine.east) -- ([xshift=10pt]mtengine.east);
}
{
......
......@@ -72,9 +72,9 @@
\\
语言学家: & 不对 && 不对 \\
我们: & 似乎对了 & 比较肯定 & 不太可能 \\
分析器: & $\textrm{P}=0.2$ & $\textrm{P}=0.6$ & $\textrm{P}=0.1$
语言学家 & 不对 && 不对 \\
我们 & 似乎对了 & 比较肯定 & 不太可能 \\
分析器 & $\textrm{P}=0.2$ & $\textrm{P}=0.6$ & $\textrm{P}=0.1$
\end{tabular}
%---------------------------------------------------------------------
......
......@@ -76,11 +76,11 @@
\node [] (d2) at (0em,-10em) {$d_2$};
\node [] (d3) at (8.5em,-10em) {$d_2$};
\node [anchor=east] (d1p) at ([xshift=0.4em]d1.west) {$\textrm{P}($};
\node [anchor=east] (d1p) at ([xshift=0.4em]d1.west) {$\funp{P}($};
\node [anchor=west] (d1p2) at ([xshift=-0.4em]d1.east) {$)=0.0123$};
\node [anchor=east] (d2p) at ([xshift=0.4em]d2.west) {$\textrm{P}($};
\node [anchor=east] (d2p) at ([xshift=0.4em]d2.west) {$\funp{P}($};
\node [anchor=west] (d2p2) at ([xshift=-0.4em]d2.east) {$)=0.4031$};
\node [anchor=east] (d3p) at ([xshift=0.4em]d3.west) {$\textrm{P}($};
\node [anchor=east] (d3p) at ([xshift=0.4em]d3.west) {$\funp{P}($};
\node [anchor=west] (d3p2) at ([xshift=-0.4em]d3.east) {$)=0.0056$};
\end{tikzpicture}
......
......@@ -46,7 +46,7 @@
\end{pgfonlayer}
}
\node [anchor=west,ugreen] (P) at ([xshift=5.95em,yshift=-0.8em]corpus.east){\large{P($\cdot$)}};
\node [anchor=west,ugreen] (P) at ([xshift=5.95em,yshift=-0.8em]corpus.east){\large{$P(\cdot)$}};
\node [anchor=south] (modellabel) at (P.north) {{\color{ublue} {\scriptsize \textbf{统计分析模型}}}};
\begin{pgfonlayer}{background}
......
......@@ -52,7 +52,7 @@
\draw[thick] (o.north west) -- (o.south east);
\node[anchor=south west] at ([yshift=-1em,xshift=-1.4em]o.45){\tiny{可见}};
\node[anchor=north east] at ([yshift=1em,xshift=1em]o.-135){\tiny{}};
\node[anchor=north east] at ([yshift=1em,xshift=1em]o.-135){\tiny{}};
\begin{pgfonlayer}{background}
\node [rectangle,inner sep=0.5em,rounded corners=2pt,fill=red!10] [fit = (o)(n32)(rc)(cb) ] (box1) {};
......
......@@ -29,7 +29,7 @@
}
{
\node [anchor=west,ugreen] (P) at ([xshift=5.2em,yshift=-0.8em]corpus.east){\large{\funp{P}($\cdot$)}};
\node [anchor=west,ugreen] (P) at ([xshift=5.2em,yshift=-0.8em]corpus.east){{$\funp{P}(\cdot)$}};
\node [anchor=south] (modellabel) at (P.north) {{\color{ublue} {\scriptsize \textbf{统计模型}}}};
}
......@@ -59,16 +59,16 @@
}
{
\node [anchor=north west] (seg4) at ([xshift=-1.0em,yshift=0.4em]seg3.south west) {...};
\node [anchor=east,ugreen] (p1seg1) at ([xshift=0.5em]seg1.west) {P(};
\node [anchor=east,ugreen] (p1seg1) at ([xshift=0.5em]seg1.west) {$\funp{P}($};
\node [anchor=west,ugreen] (p2seg1) at ([xshift=-0.5em]seg1.east) {)=0.1};
\node [anchor=east,ugreen] (p1seg2) at ([xshift=0.5em]seg2.west) {P(};
\node [anchor=east,ugreen] (p1seg2) at ([xshift=0.5em]seg2.west) {$\funp{P}($};
\node [anchor=west,ugreen] (p2seg2) at ([xshift=-0.5em]seg2.east) {)=0.6};
\node [anchor=east,ugreen] (p1seg3) at ([xshift=0.5em]seg3.west) {P(};
\node [anchor=east,ugreen] (p1seg3) at ([xshift=0.5em]seg3.west) {$\funp{P}($};
\node [anchor=west,ugreen] (p2seg3) at ([xshift=-0.5em]seg3.east) {)=0.2};
}
{
\node [anchor=east,draw,dashed,red,thick,minimum width=13em,minimum height=1.4em] (final) at (p2seg2.east) {};
\node [anchor=east,draw,dashed,red,thick,minimum width=13.2em,minimum height=1.4em] (final) at (p2seg2.east) {};
\node [anchor=west,red] (finallabel) at ([xshift=3.1em]sentlabel.east) {输出概率最大的结果};
%\node [anchor=north east,red] (finallabel2) at ([yshift=0.5em]finallabel.south east) {的结果};
\draw [->,thick,red] ([xshift=0.0em,yshift=-0.5em]final.north east) ..controls +(east:0.2) and +(south:1.0).. ([xshift=2.0em]finallabel.south);
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -328,7 +328,7 @@ d = {r_1} \circ {r_2} \circ {r_3} \circ {r_4}
\begin{definition} 与词对齐相兼容的层次短语规则
{\small
对于句对$(\vectorn{s},\vectorn{t})$和它们之间的词对齐$\vectorn{a}$,令$\Phi$表示在句对$(\vectorn{s},\vectorn{t})$上与$\vectorn{a}$相兼容的双语短语集合。则:
对于句对$(\vectorn{\emph{s}},\vectorn{\emph{t}})$和它们之间的词对齐$\vectorn{\emph{a}}$,令$\Phi$表示在句对$(\vectorn{\emph{s}},\vectorn{\emph{t}})$上与$\vectorn{\emph{a}}$相兼容的双语短语集合。则:
\begin{enumerate}
\item 如果$(x,y)\in \Phi$,则$\textrm{X} \to \langle x,y,\phi \rangle$是与词对齐相兼容的层次短语规则。
\item 对于$(x,y)\in \Phi$,存在$m$个双语短语$(x_i,y_j)\in \Phi$,同时存在(1,$...$,$m$)上面的一个排序$\sim = \{\pi_1 , ... ,\pi_m\}$,且:
......@@ -376,7 +376,7 @@ y&=&\beta_0 y_{\pi_1} \beta_1 y_{\pi_2} ... \beta_{m-1} y_{\pi_m} \beta_m
\subsection{翻译特征}
\parinterval 在层次短语模型中,每个翻译推导都有一个模型得分$\textrm{score}(d,\vectorn{s},\vectorn{t})$$\textrm{score}(d,\vectorn{s},\vectorn{t})$是若干特征的线性加权之和:$\textrm{score}(d,\vectorn{t},\vectorn{s})=\sum_{i=1}^M\lambda_i\cdot h_i (d,\vectorn{t},\vectorn{s})$,其中$\lambda_i$是特征权重,$h_i (d,\vectorn{t},\vectorn{s})$是特征函数。层次短语模型的特征包括与规则相关的特征和语言模型特征,如下:
\parinterval 在层次短语模型中,每个翻译推导都有一个模型得分$\textrm{score}(d,\vectorn{\emph{s}},\vectorn{\emph{t}})$$\textrm{score}(d,\vectorn{\emph{s}},\vectorn{\emph{t}})$是若干特征的线性加权之和:$\textrm{score}(d,\vectorn{\emph{t}},\vectorn{\emph{s}})=\sum_{i=1}^M\lambda_i\cdot h_i (d,\vectorn{\emph{t}},\vectorn{\emph{s}})$,其中$\lambda_i$是特征权重,$h_i (d,\vectorn{\emph{t}},\vectorn{\emph{s}})$是特征函数。层次短语模型的特征包括与规则相关的特征和语言模型特征,如下:
\parinterval 对于每一条翻译规则LHS$\to \langle \alpha, \beta ,\sim \rangle$,有:
......@@ -396,19 +396,19 @@ y&=&\beta_0 y_{\pi_1} \beta_1 y_{\pi_2} ... \beta_{m-1} y_{\pi_m} \beta_m
\parinterval 这些特征可以被具体描述为:
\begin{eqnarray}
h_i (d,\vectorn{t},\vectorn{s})=\sum_{r \in d}h_i (r)
h_i (d,\vectorn{\emph{t}},\vectorn{\emph{s}})=\sum_{r \in d}h_i (r)
\label{eq:8-4}
\end{eqnarray}
\parinterval 公式\ref{eq:8-4}中,$r$表示推导$d$中的一条规则,$h_i (r)$表示规则$r$上的第$i$个特征。可以看出,推导$d$的特征值就是所有包含在$d$中规则的特征值的和。进一步,可以定义
\begin{eqnarray}
\textrm{rscore}(d,\vectorn{t},\vectorn{s})=\sum_{i=1}^7 \lambda_i \cdot h_i (d,\vectorn{t},\vectorn{s})
\textrm{rscore}(d,\vectorn{\emph{t}},\vectorn{\emph{s}})=\sum_{i=1}^7 \lambda_i \cdot h_i (d,\vectorn{\emph{t}},\vectorn{\emph{s}})
\label{eq:8-5}
\end{eqnarray}
\parinterval 最终,模型得分被定义为:
\begin{eqnarray}
\textrm{score}(d,\vectorn{t},\vectorn{s})=\textrm{rscore}(d,\vectorn{t},\vectorn{s})+ \lambda_8 \textrm{log}⁡(\textrm{P}_{\textrm{lm}}(\vectorn{t}))+\lambda_9 \mid \vectorn{t} \mid
\textrm{score}(d,\vectorn{\emph{t}},\vectorn{\emph{s}})=\textrm{rscore}(d,\vectorn{\emph{t}},\vectorn{\emph{s}})+ \lambda_8 \textrm{log}⁡(\textrm{P}_{\textrm{lm}}(\vectorn{\emph{t}}))+\lambda_9 \mid \vectorn{\emph{t}} \mid
\label{eq:8-6}
\end{eqnarray}
......@@ -432,14 +432,14 @@ h_i (d,\vectorn{t},\vectorn{s})=\sum_{r \in d}h_i (r)
\parinterval 层次短语模型解码的目标是找到模型得分最高的推导,即:
\begin{eqnarray}
\hat{d} = \argmax_{d}\ \textrm{score}(d,\vectorn{s},\vectorn{t})
\hat{d} = \argmax_{d}\ \textrm{score}(d,\vectorn{\emph{s}},\vectorn{\emph{t}})
\label{eq:8-7}
\end{eqnarray}
\noindent 这里,$\hat{d}$的目标语部分即最佳译文$\hat{\vectorn{t}}$。令函数$t(\cdot)$返回翻译推导的目标语词串,于是有:
\noindent 这里,$\hat{d}$的目标语部分即最佳译文$\hat{\vectorn{\emph{t}}}$。令函数$t(\cdot)$返回翻译推导的目标语词串,于是有:
\begin{eqnarray}
\hat{\vectorn{t}}=t(\hat{d})
\hat{\vectorn{\emph{t}}}=t(\hat{d})
\label{eq:8-8}
\end{eqnarray}
......@@ -1305,7 +1305,7 @@ r_9: \quad \textrm{IP(}\textrm{NN}_1\ \textrm{VP}_2) \rightarrow \textrm{S(}\tex
\subsection{句法翻译模型的特征}
\parinterval 基于语言学句法的翻译模型使用判别式模型对翻译推导进行建模({\chapterseven}数学建模小节)。给定双语句对($\vectorn{s}$,$\vectorn{t}$),由$M$个特征经过线性加权,得到每个翻译推导$d$的得分,记为$\textrm{score(}d,\vectorn{t},\vectorn{s})=\sum_{i=1}^{M} \lambda_i \cdot h_{i}(d,\vectorn{t},\vectorn{s})$,其中$\lambda_i$表示特征权重,$h_{i}(d,\vectorn{t},\vectorn{s})$表示特征函数。翻译的目标就是要找到使$\textrm{score(}d,\vectorn{t},\vectorn{s})$达到最高的推导$d$
\parinterval 基于语言学句法的翻译模型使用判别式模型对翻译推导进行建模({\chapterseven}数学建模小节)。给定双语句对($\vectorn{\emph{s}}$,$\vectorn{\emph{t}}$),由$M$个特征经过线性加权,得到每个翻译推导$d$的得分,记为$\textrm{score(}d,\vectorn{\emph{t}},\vectorn{\emph{s}})=\sum_{i=1}^{M} \lambda_i \cdot h_{i}(d,\vectorn{\emph{t}},\vectorn{\emph{s}})$,其中$\lambda_i$表示特征权重,$h_{i}(d,\vectorn{\emph{t}},\vectorn{\emph{s}})$表示特征函数。翻译的目标就是要找到使$\textrm{score(}d,\vectorn{\emph{t}},\vectorn{\emph{s}})$达到最高的推导$d$
\parinterval 这里,可以使用最小错误率训练对特征权重进行调优({\chapterseven}最小错误率训练小节)。而特征函数可参考如下定义:
......@@ -1346,9 +1346,9 @@ r_9: \quad \textrm{IP(}\textrm{NN}_1\ \textrm{VP}_2) \rightarrow \textrm{S(}\tex
\begin{itemize}
\vspace{0.5em}
\item (h8)语言模型得分(取对数),即$\log(\textrm{P}_{\textrm{lm}}(\vectorn{t}))$,用于度量译文的流畅度;
\item (h8)语言模型得分(取对数),即$\log(\textrm{P}_{\textrm{lm}}(\vectorn{\emph{t}}))$,用于度量译文的流畅度;
\vspace{0.5em}
\item (h9)译文长度,即$|\vectorn{t}|$,用于避免模型过于倾向生成短译文(因为短译文语言模型分数高);
\item (h9)译文长度,即$|\vectorn{\emph{t}}|$,用于避免模型过于倾向生成短译文(因为短译文语言模型分数高);
\vspace{0.5em}
\item (h10)翻译规则数量,学习对使用规则数量的偏好。比如,如果这个特征的权重较高,则表明系统更喜欢使用数量多的规则;
\vspace{0.5em}
......@@ -1455,7 +1455,7 @@ d_1 = {d'} \circ {r_5}
\parinterval 解码的目标是找到得分score($d$)最高的推导$d$。这个过程通常被描述为:
\begin{eqnarray}
\hat{d} = \argmax_d\ \textrm{score} (d,\vectorn{s},\vectorn{t})
\hat{d} = \argmax_d\ \textrm{score} (d,\vectorn{\emph{s}},\vectorn{\emph{t}})
\label{eq:8-13}
\end{eqnarray}
......
Markdown 格式
0%
您添加了 0 到此讨论。请谨慎行事。
请先完成此评论的编辑!
注册 或者 后发表评论