\begin{tikzpicture} \tikzstyle{node} =[font=\scriptsize] \tikzstyle{sentence} =[font=\scriptsize,fill=blue!5!white] \node[sentence] (node1) at (0,0) {['low', 'lower', 'newest', 'widest']}; \node[sentence,anchor = north] (node2) at ([yshift = -1em]node1.south) {['l o w $<$e$>$':5, 'l o w e r $<$e$>$':2, 'n e w e s t $<$e$>$':6, 'w i d e s t $<$e$>$':3]}; \node[sentence,anchor = north] (node3) at ([yshift = -1.5em]node2.south) {['l o w $<$e$>$':5, 'l o w e r $<$e$>$':2, 'n e w {\red es} t $<$e$>$':6, 'w i d {\red es} t $<$e$>$':3]}; \node[sentence,anchor = north] (node4) at ([yshift = -1em]node3.south) {['l o w $<$e$>$':5, 'l o w e r $<$e$>$':2, 'n e w {\red est} $<$e$>$':6, 'w i d {\red est} $<$e$>$':3]}; \node[sentence,anchor = north] (node5) at ([yshift = -1em]node4.south) {['l o w $<$e$>$':5, 'l o w e r $<$e$>$':2, 'n e w {\red est$<$e$>$}':6, 'w i d {\red est$<$e$>$}':3]}; \node[sentence,anchor = north] (node6) at ([yshift = -1em]node5.south) {$\cdots$}; \node[node,anchor = north] (node7) at ([yshift = -1.6em]node6.south) {直到达到预设的子词词表大小或下一个最高频的字节对出现频率为1。}; \draw[->,line width=.03cm] ([yshift=0em]node1.south) -- ([yshift=0em]node2.north); \draw[->,line width=.03cm] ([yshift=0em]node3.south) -- ([yshift=0em]node4.north); \draw[->,line width=.03cm] ([yshift=0em]node4.south) -- ([yshift=0em]node5.north); \draw[->,line width=.03cm] ([yshift=0em]node5.south) -- ([yshift=0em]node6.north); \node[node,anchor = west] (node8) at ([xshift = 2em,yshift = 2em]node7.east) {对于词表外的词lowest}; \node[node,anchor = north west] (node9) at ([yshift = 0.3em]node8.south west) {可以被分割为low est}; \node[node,font=\scriptsize,anchor = north,fill=ugreen!5,drop shadow] (dict) at ([xshift = 5em,yshift = -5em]node6.south){\begin{tabular}{llllll} \multirow{3}{*}{符号合并表:} & ('e','s') & ('es','t') & ('est','$<$e$>$') & ('l','o') & ('lo','w') \\ & ('low','$<$e$>$') & ('n','e') & ('ne','w') & ('new','est$<$e$>$') & ('w','i') \\ & ('wi','d') & ('wid','est$<$e$>$') & ('low','e') & ('lowe','r') & ('lower','$<$e$>$') \end{tabular}}; \node[node,anchor=west] (line1) at ([xshift = 8em]node1.south east) {按字符拆分,并添加}; \node[node,anchor=north west] (line2) at ([yshift=0.3em]line1.south west) {终结符$<$e$>$,统计词频。}; \node[node,anchor=north west] (line3) at ([yshift=-4em]line2.south west) {统计每一个连续字节对}; \node[node,anchor=north west] (line4) at ([yshift=0.3em]line3.south west) {的出现频率,选择最高}; \node[node,anchor=north west] (line5) at ([yshift=0.3em]line4.south west) {频者合并成新的子词}; \begin{pgfonlayer}{background} %\node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=red!10,drop shadow,draw=red] [fit = (line1) (line2) (line3) (line4)] (box1) {}; \node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,draw=purple] [fit = (node1) (node2)] (box1) {}; \node [rectangle,inner sep=0.2em,rounded corners=1pt,very thick,dotted,draw=teal] [fit = (node3) (node4) (node5) (node6)] (box2) {}; \node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=purple!5,drop shadow] [fit = (line1) (line2)] (box3) {}; \node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=ugreen!5,drop shadow] [fit = (line3) (line4) (line5)] (box4) {}; \node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=purple!5,drop shadow] [fit = (node7)] (box5) {}; \node [rectangle,inner sep=0.2em,rounded corners=1pt,fill=blue!5,drop shadow] [fit = (node8) (node9)] (box6) {}; \end{pgfonlayer} \draw[->,line width=.03cm] ([yshift=0em]box2.south) -- ([yshift=0.2em]node7.north); \draw[->,line width=.03cm] ([yshift=0em]box1.south) -- ([yshift=0em]box2.north); \draw [->,dotted,very thick,purple] (box3.west) -- ([xshift=-1.5em]box3.west); \draw [->,dotted,very thick,teal] (box4.west) -- ([xshift=-1.7em]box4.west); \draw [->,dotted,very thick] ([xshift=6em]dict.north) .. controls +(north:1) and +(south:1) .. (box6.south); \end{tikzpicture}