%%%------------------------------------------------------------------------------------------------------------
\begin{tikzpicture}

%%%%%%%%%%%%%%%%
% parameter server + processor
\begin{scope}[]

{\scriptsize

\tikzstyle{parametershard} = [draw,thick,minimum width=4em,align=left,rounded corners=2pt]

{
\node[parametershard,anchor=west,fill=yellow!10] (param1) at (0,0) {$W_o$, $b_o$};
\node (param2) at ([xshift=1em]param1.east) {};
\node[parametershard,anchor=west,fill=red!10] (param3) at ([xshift=1em]param2.east) {$W_h$, $b_h$};
\node[anchor=south,inner sep=1pt] (serverlabel) at ([yshift=0.2em]param2.north) {\footnotesize{\textbf{parameter server}: $\mathbf w_{new} = \mathbf w - \alpha\cdot \frac{\partial L}{\partial \mathbf w}$}};
}

\begin{pgfonlayer}{background}
{
\node[rectangle,draw,thick,inner sep=2pt,fill=gray!20] [fit = (param1) (param2) (param3) (serverlabel)] (serverbox) {};
}
\end{pgfonlayer}

\tikzstyle{processor} = [draw,thick,fill=orange!20,minimum width=4em,align=left,rounded corners=2pt]

{
\node [processor,anchor=north,align=center] (processor2) at ([yshift=-1.2in]serverlabel.south) {\scriptsize{Processor 2}\\\scriptsize{on GPU2 (G2)}};
\node [anchor=north] (labela) at ([xshift=4em,yshift=-1em]processor2.south) {\footnotesize {(a)同步更新}};
\node [processor,anchor=east,align=center] (processor1) at ([xshift=-1em]processor2.west) {\scriptsize{Processor 1}\\\scriptsize{on GPU1 (G1)}};
\node [processor,anchor=west,align=center] (processor3) at ([xshift=1em]processor2.east) {\scriptsize{Processor 3}\\\scriptsize{on GPU3 (G3)}};
}

{
\draw[->,very thick,red] ([xshift=-0.5em,yshift=2pt]processor2.north) -- ([xshift=-0.5em,yshift=-2pt]serverbox.south) node [pos=0.5,align=right,xshift=-2em] (pushlabel) {\scriptsize{$\frac{\partial L}{\partial \mathbf w}$}};;
\draw[<-,very thick,blue] ([xshift=0.5em,yshift=2pt]processor2.north) -- ([xshift=0.5em,yshift=-2pt]serverbox.south) node [pos=0.5,align=left,xshift=2.2em] (fetchlabel) {\scriptsize{$\mathbf w_{new}$}};;;
\draw[->,very thick,red] ([xshift=-0.5em,yshift=2pt]processor3.north) --
 ([xshift=3em,yshift=-2pt]serverbox.south);
\draw[<-,very thick,blue] ([xshift=0.5em,yshift=2pt]processor3.north) -- ([xshift=4em,yshift=-2pt]serverbox.south) node [pos=0.5,align=left,xshift=2.2em] (fetchlabel) {\scriptsize{fetch (F)}};
\draw[->,very thick,red] ([xshift=-0.5em,yshift=2pt]processor1.north) -- ([xshift=-4em,yshift=-2pt]serverbox.south) node [pos=0.5,align=right,xshift=-2em] (pushlabel) {\scriptsize{push (P)}};
\draw[<-,very thick,blue] ([xshift=0.5em,yshift=2pt]processor1.north) -- ([xshift=-3em,yshift=-2pt]serverbox.south);
}

%%%%%%%%%%%
% synchronous mode
\tikzstyle{job} = [draw,rotate=90,minimum height=0.25in]

\scriptsize{
{
\node[job,anchor=south west,fill=blue!50] (fetch11) at ([xshift=6em,yshift=1em]processor3.east) {\textbf{F}};
\node[job,anchor=west,fill=orange!30] (minibatch11) at ([yshift=1pt]fetch11.east) {\tiny{minibatch3}};
\node[job,anchor=west,fill=red!50] (push11) at ([yshift=1pt]minibatch11.east) {\textbf{P}};
\node[job,anchor=north west,fill=blue!50] (fetch12) at ([xshift=0.8em]fetch11.south west) {\textbf{F}};
\node[job,anchor=west,fill=orange!30] (minibatch12) at ([yshift=1pt]fetch12.east) {\tiny{minibatch2}};
\node[job,anchor=west,fill=red!50] (push12) at ([yshift=1pt]minibatch12.east) {\textbf{P}};
\node[job,anchor=north west,fill=blue!50] (fetch13) at ([xshift=0.8em]fetch12.south west) {\textbf{F}};
\node[job,anchor=west,fill=orange!30,minimum width=8em] (minibatch13) at ([yshift=1pt]fetch13.east) {\scriptsize{minibatch1}};
\node[job,anchor=west,fill=red!50] (push13) at ([yshift=1pt]minibatch13.east) {\textbf{P}};
\node[anchor=south west,draw,fill=gray!20,minimum width=8.0em] (update11) at ([yshift=4.0em]push11.north east) {Update};

\node[anchor=north] (G11) at (fetch11.west) {\small{G3}};
\node[anchor=north] (G12) at (fetch12.west) {\small{G2}};
\node[anchor=north] (G13) at (fetch13.west) {\small{G1}};
\node[anchor=north,align=center] (synlabel) at (G12.south) {\small{\textbf{Synchronous}}\\\small{\textbf{Training}}};
\draw[->,thick] ([xshift=1em]G13.east) -- ([xshift=1em,yshift=1.4in]G13.east) node [pos=0.5,rotate=90,yshift=-1em] {\small{time line}};
}
}

{
\draw [<->,thin,dotted] ([xshift=-1pt]minibatch11.north) .. controls +(west:3em) and +(east:3em) .. ([xshift=1pt]processor3.east);
\draw [<->,thin,dotted] ([xshift=-1pt]fetch11.north) .. controls +(west:4em) and +(east:4em) .. ([xshift=-0.5em,yshift=0.3in]processor3.north);
\draw [<->,thin,dotted] ([xshift=-1pt]push11.north) -- ([xshift=-4em,yshift=0.8in]processor3.north);
}

{
\draw [<->,thin,dotted] ([xshift=-1pt]update11.west) -- ([xshift=1pt,yshift=-1.5em]serverbox.north east);
}


}

\end{scope}

\begin{scope}[yshift=-2.5in]

{\scriptsize

\tikzstyle{parametershard} = [draw,thick,minimum width=4em,align=left,rounded corners=2pt]

{
\node[parametershard,anchor=west,fill=yellow!10] (param1) at (0,0) {$W_o$, $b_o$};
\node (param2) at ([xshift=1em]param1.east) {};
\node[parametershard,anchor=west,fill=red!10] (param3) at ([xshift=1em]param2.east) {$W_h$, $b_h$};
\node[anchor=south,inner sep=1pt] (serverlabel) at ([yshift=0.2em]param2.north) {\footnotesize{\textbf{parameter server}: $\mathbf w_{new} = \mathbf w - \alpha\cdot \frac{\partial L}{\partial \mathbf w}$}};
}

\begin{pgfonlayer}{background}
{
\node[rectangle,draw,thick,inner sep=2pt,fill=gray!20] [fit = (param1) (param2) (param3) (serverlabel)] (serverbox) {};
}
\end{pgfonlayer}

\tikzstyle{processor} = [draw,thick,fill=orange!20,minimum width=4em,align=left,rounded corners=2pt]

{
\node [processor,anchor=north,align=center] (processor2) at ([yshift=-1.2in]serverlabel.south) {\scriptsize{Processor 2}\\\scriptsize{on GPU2 (G2)}};
\node [anchor=north] (label) at ([xshift=4em,yshift=-1em]processor2.south) {\footnotesize {(b)异步更新}};
\node [processor,anchor=east,align=center] (processor1) at ([xshift=-1em]processor2.west) {\scriptsize{Processor 1}\\\scriptsize{on GPU1 (G1)}};
\node [processor,anchor=west,align=center] (processor3) at ([xshift=1em]processor2.east) {\scriptsize{Processor 3}\\\scriptsize{on GPU3 (G3)}};
}

{
\draw[->,very thick,red] ([xshift=-0.5em,yshift=2pt]processor2.north) -- ([xshift=-0.5em,yshift=-2pt]serverbox.south) node [pos=0.5,align=right,xshift=-2em] (pushlabel) {\scriptsize{$\frac{\partial L}{\partial \mathbf w}$}};;
\draw[<-,very thick,blue] ([xshift=0.5em,yshift=2pt]processor2.north) -- ([xshift=0.5em,yshift=-2pt]serverbox.south) node [pos=0.5,align=left,xshift=2.2em] (fetchlabel) {\scriptsize{$\mathbf w_{new}$}};;;
\draw[->,very thick,red] ([xshift=-0.5em,yshift=2pt]processor3.north) --
 ([xshift=3em,yshift=-2pt]serverbox.south);
\draw[<-,very thick,blue] ([xshift=0.5em,yshift=2pt]processor3.north) -- ([xshift=4em,yshift=-2pt]serverbox.south) node [pos=0.5,align=left,xshift=2.2em] (fetchlabel) {\scriptsize{fetch (F)}};
\draw[->,very thick,red] ([xshift=-0.5em,yshift=2pt]processor1.north) -- ([xshift=-4em,yshift=-2pt]serverbox.south) node [pos=0.5,align=right,xshift=-2em] (pushlabel) {\scriptsize{push (P)}};
\draw[<-,very thick,blue] ([xshift=0.5em,yshift=2pt]processor1.north) -- ([xshift=-3em,yshift=-2pt]serverbox.south);
}

%%%%%%%%%%%
% synchronous mode
\tikzstyle{job} = [draw,rotate=90,minimum height=0.25in]

\scriptsize{
{
\node[job,anchor=south west,fill=blue!50] (fetch21) at ([xshift=6em,yshift=1em]processor3.east) {\textbf{F}};
\node[job,anchor=west,fill=orange!30] (minibatch21) at ([yshift=1pt]fetch21.east) {\tiny{minibatch3}};
\node[job,anchor=west,fill=red!50] (push21) at ([yshift=1pt]minibatch21.east) {\textbf{P}};
\node[job,anchor=north west,fill=blue!50] (fetch22) at ([xshift=0.8em]fetch21.south west) {\textbf{F}};
\node[job,anchor=west,fill=orange!30] (minibatch22) at ([yshift=1pt]fetch22.east) {\tiny{minibatch2}};
\node[job,anchor=west,fill=red!50] (push22) at ([yshift=1pt]minibatch22.east) {\textbf{P}};
\node[job,anchor=north west,fill=blue!50] (fetch23) at ([xshift=0.8em]fetch22.south west) {\textbf{F}};
\node[job,anchor=west,fill=orange!30,minimum width=8em] (minibatch23) at ([yshift=1pt]fetch23.east) {\scriptsize{minibatch1}};
\node[job,anchor=west,fill=red!50] (push23) at ([yshift=1pt]minibatch23.east) {\textbf{P}};
\node[anchor=south west,draw,fill=gray!20,minimum width=0.59in] (update21) at ([yshift=2pt]push21.north east) {Update};
\node[anchor=south west,draw,fill=gray!20,minimum width=0.25in] (update22) at ([yshift=2pt]push23.north east) {\tiny{Upd.}};

\node[anchor=north] (G21) at (fetch21.west) {\small{G3}};
\node[anchor=north] (G22) at (fetch22.west) {\small{G2}};
\node[anchor=north] (G23) at (fetch23.west) {\small{G1}};
\node[anchor=north,align=center] (synlabel) at (G22.south) {\small{\textbf{Asynchronous}}\\\small{\textbf{Training}}};
\draw[->,thick] ([xshift=1em]G23.east) -- ([xshift=1em,yshift=1.4in]G23.east) node [pos=0.5,rotate=90,yshift=-1em] {\small{time line}};

\draw [<->,thin,dotted] ([xshift=-1pt]update21.west) -- ([xshift=1pt,yshift=-1.5em]serverbox.north east);
\draw [<->,thin,dotted] ([xshift=-1pt]update22.west) -- ([xshift=1pt,yshift=-1.5em]serverbox.north east);
}
}

}

\end{scope}

\end{tikzpicture}
%%%------------------------------------------------------------------------------------------------------------