figure-examples-of-chinese-word-segmentation-based-on-1-gram-model.tex 4.42 KB
Newer Older
曹润柘 committed
1 2 3 4 5 6 7 8 9 10
\definecolor{ublue}{rgb}{0.152,0.250,0.545}
\definecolor{ugreen}{rgb}{0,0.5,0}


%%% outline
%-------------------------------------------------------------------------

\begin{tikzpicture}

{\scriptsize
孟霞 committed
11 12 13
\node [anchor=north west] (entry1) at (0,0) {1: 这 / 是 / 数据};
\node [anchor=north west] (entry2) at ([yshift=0.1em]entry1.south west) {2: 现在 / 已经 / 实现};
\node [anchor=north west] (entry3) at ([yshift=0.1em]entry2.south west) {3: 确实 / 有 / 很 / 多};
曹润柘 committed
14 15 16 17 18 19 20 21
\node [anchor=north west] (entry4) at ([yshift=0.1em]entry3.south west) {...};
\node [anchor=south west] (corpuslabel) at (entry1.north west) {{\color{ublue} \textbf{学习用数据}}};

\begin{pgfonlayer}{background}
\node[rectangle,draw=ublue,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (entry1) (entry2) (entry3) (entry4) (corpuslabel)] (corpus) {};
\end{pgfonlayer}
}

zhoutao committed
22
\node [anchor=west,ugreen] (P) at ([xshift=5.2em,yshift=-0.8em]corpus.east){\large{$\funp{P}(\cdot)$}};
曹润柘 committed
23 24 25 26 27 28
\node [anchor=south] (modellabel) at (P.north) {{\color{ublue} {\scriptsize \textbf{统计模型}}}};

\begin{pgfonlayer}{background}
\node[rectangle,draw=ublue,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (P) (modellabel)] (model) {};
\end{pgfonlayer}

29
\draw [->,very thick,ublue] ([xshift=0.2em]corpus.east) -- ([xshift=4.2em]corpus.east)  node [pos=0.5, above] {\color{black}{\scriptsize{统计学习}}};
曹润柘 committed
30

31
\draw [->,very thick,ublue] ([xshift=0.2em]model.east) -- ([xshift=4.2em]model.east)  node [pos=0.5, above] {\color{black}{\scriptsize{预测}}};
曹润柘 committed
32 33

{\scriptsize
34
\node [anchor=north west] (sentlabel) at ([xshift=6.2em,yshift=-1em]model.north east) {\color{black}{自动分词系统}};
曹润柘 committed
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
\node [anchor=north west] (sent) at (sentlabel.south west) {\textbf{对任意句子进行分词}};
}

\begin{pgfonlayer}{background}
\node[rectangle,draw=ublue,thick,inner sep=0.2em,fill=white,drop shadow] [fit = (sentlabel) (sent)] (segsystem) {};
\end{pgfonlayer}



{\footnotesize
{
\node [anchor=west] (label1) at (0,-6.8em) {\textbf{自动分词系统}:对任意的数据句子$S$,找到最佳的分词结果$W^{*}$输出};
}
{
\node [anchor=north west] (label2) at (label1.south west) {假设输入$S$=“确实现在数据很多”};
}
{
\node [anchor=north west,draw,thick,inner sep=2pt] (data11) at (label2.south west) {枚举所有可能的切分};
}
{
\node [anchor=west,draw,thick,inner sep=2pt] (data12) at ([xshift=4em]data11.east) {计算每种切分的概率};
}
{
\node [anchor=west,draw,thick,inner sep=2pt] (data13) at ([xshift=4.0em]data12.east) {选择最佳结果};
}
{
\draw [->,thick] ([xshift=0.1em]data11.east) -- ([xshift=-0.1em]data12.west);
}
{
\draw [->,thick] ([xshift=0.1em]data12.east) -- ([xshift=-0.1em]data13.west);
}

{\scriptsize
{
\node [anchor=north west] (data21) at (data11.south west) {确/实现/在/数/据很/多};
}
{
\node [anchor=north west] (data22) at (data12.south west) {$\funp{P}(\textrm{}) \cdot \funp{P}(\textrm{实现}) \cdot \funp{P}(\textrm{}) \cdot \funp{P}(\textrm{}) \cdot $};
}
\node [anchor=north west,minimum height=1.6em] (data23) at (data13.south west) {};
\node [anchor=north west,minimum height=1.6em] (data31) at ([yshift=0.3em]data21.south west) {};
{
\node [anchor=north west] (data32) at ([yshift=0.3em]data22.south west) {$\funp{P}(\textrm{据很}) \cdot  \funp{P}(\textrm{}) = 2.13 \times 10^{-45}$};
}
\node [anchor=north west,minimum height=1.6em] (data33) at ([yshift=0.3em]data23.south west) {};

{
zhoutao committed
82
\node [anchor=north west] (data41) at (data31.south west) {确实/现在/数据/很/多};
曹润柘 committed
83 84 85 86 87
}
{
\node [anchor=north west] (data42) at (data32.south west) {$\funp{P}(\textrm{确实}) \cdot \funp{P}(\textrm{现在}) \cdot \funp{P}(\textrm{数据}) \cdot $};
}
{
zhoutao committed
88
\node [anchor=north west] (data43) at ([yshift=-0.4em,xshift=-1.4em]data33.south west) {\color{red}{\textbf{输出}}};
曹润柘 committed
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
\draw [->,red,thick] (data43.west)--([xshift=-1em]data43.west);
}
{
\node [anchor=north west] (data51) at (data41.south west) {...};
}
{
\node [anchor=north west] (data52) at ([yshift=0.3em]data42.south west) {$\funp{P}(\textrm{}) \cdot  \funp{P}(\textrm{}) = 1.54 \times 10^{-25}$};
}
\node [anchor=north west] (data53) at ([yshift=0.3em]data43.south west) {};
}
}

\begin{pgfonlayer}{background}
{
\node[rectangle,fill=blue!10,thick,dotted,inner sep=0.1em] [fit = (label1) (data11) (data13) (data51) (data52) (data53)] (segcontent) {};
}
\end{pgfonlayer}

{
xiaotong committed
108
\draw [-,thick,dotted] (segcontent.north) ..controls +(north:0.7) and +(south:0.7).. (segsystem.south);
曹润柘 committed
109 110 111 112 113 114 115
}

\end{tikzpicture}