\chapter{ANN first approximations} \section{ANN Representations for One-Dimensional Identity and some associated properties} \begin{definition}[One Dimensional Identity Neural Network]\label{7.2.1} We will denote by $\id_d \in \neu$ the neural network satisfying for all $d \in \N$ that: \begin{enumerate}[label = (\roman*)] \item \begin{align} \id_1 = \lp \lp \begin{bmatrix} 1 \\ -1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \end{bmatrix}\rp \lp \begin{bmatrix} 1 \quad -1 \end{bmatrix},\begin{bmatrix} 0\end{bmatrix}\rp \rp \in \lp \lp \R^{2 \times 1} \times \R^2 \rp \times \lp \R^{1\times 2} \times \R^1 \rp \rp \end{align} \item \begin{align}\label{7.2.2} \id_d = \boxminus^d_{i=1} \id_1 \end{align} For $d>1$. \end{enumerate} \end{definition} \begin{lemma}\label{idprop} Let $d \in \N$, it is then the case that: \begin{enumerate}[label = (\roman*)] \item $\lay(\id_d) = \lp d, 2d, d \rp \in \N^3$. \item $\real_{\rect} \lp \id_d \rp \in C \lp \R^d, \R^d \rp$. \item For all $x \in \R^d$ that: \begin{align} \lp \real_{\rect} \lp \id_d \rp \rp \lp x \rp = x \nonumber \end{align} \item For $d\in \N$ it is the case that $\dep\lp \id_d\rp = 2$ \end{enumerate} \end{lemma} \begin{proof} Note that (\ref{7.2.1}) ensure that $\lay(\id_d) = \lp 1,2,1 \rp$. Furthermore, ($\ref{7.2.2}$) and Remark \ref{5.3.5} prove that $\lay(\id_d) = \lp d,2d,d \rp$ which in turn proves Item (i). Note now that Remark \ref{5.3.5} tells us that: \begin{align} \id_d = \boxminus^d_{i=1}\lp \id_1 \rp \in \lp \bigtimes^L_{i=1}\lb \R^{dl_i \times dl_{i-1}} \times \R^{dl_i} \rb \rp = \lp \lp \R^{2d \times d} \times \R^{2d}\rp \times \lp \R^{d \times 2d} \times \R^d\rp \rp \end{align} Note that \ref{7.2.1} ensures that for all $x \in \R$ it is the case that: \begin{align} \lp \real_{\rect} \lp \id_1 \rp \rp \lp x \rp = \rect(x) - \rect(-x) = \max\{x,0\} - \max\{-x,0\} = x \end{align} And Lemma \ref{5.3.4} shows us that for all $x = \lp x_1,x_2,...,x_d\rp \in \R^d$ it is the case that $\real_{\rect}\lp \id_d \rp \in C \lp \R^d, \R^d \rp $ and that: \begin{align} \lp \real_{\act} \lp \id_d \rp \rp \lp x \rp &= \lp \real_{\act} \lp \boxminus_{i=1}^d \lp \id_1\rp \rp \rp \lp x_1,x_2,...,x_d \rp \nonumber \\ &= \lp \lp \real_{\act} \lp \id_1 \rp \rp \lp x_1 \rp, \lp \real_{\act} \lp \id_1 \rp \rp \lp x_1 \rp,..., \lp \real_{\act} \lp \id_1 \rp \rp \lp x_d \rp \rp \nonumber \\ &= \lp x_1, x_2,...,x_d \rp = x \end{align} This proves Item (ii)\textemdash(iii). Item (iv) follows straightforwardly from Item (i). This establishes the lemma. \end{proof} \begin{remark} Note here the difference between Definition \ref{actnn} and Definition \ref{7.2.1}. \end{remark} \begin{lemma}[R\textemdash, 2023]\label{id_param} Let $d \in \N$. It then the case that for all $d \in \N$ we have that $\param\lp \id_d\rp = 4d^2+3d$ \end{lemma} \begin{proof} By observation we have that $\param \lp \id_1\rp = 4(1)^2+3(1) = 7$. By induction, suppose that this holds for all natural numbers up to and including $n$, i.e., for all naturals up to and including $n$; it is the case that $\param \lp \id_n\rp = 4n^2+3n$. Note then that $\id_{n+1} = \id_n \boxminus \id_1$. For $W_1$ and $W_2$ of this new network, this adds a combined extra $8n+4$ parameters. For $b_1$ and $b_2$ of this new network, this adds a combined extra $3$ parameters. Thus, we have the following: \begin{align} 4n^2+3n + 8n+4 + 3 &= 4(n+1)^2+3(n+1) \end{align} This completes the induction and hence proves the Lemma. \end{proof} \begin{lemma}\label{7.2.3} Let $\nu \in \neu$ with end-widths $d$. It is then the case that $ \real_{\rect} \lp \id_d \bullet \nu \rp \lp x \rp = \real_{\rect} \lp \nu \bullet \id_d \rp = \real_{\rect} \lp \nu\rp $, i.e. $\id_d$ acts as a compositional identity. \end{lemma} \begin{proof} From (\ref{5.2.1}) and Definition \ref{7.2.1} we have eight cases. Case 1 where $d=1$ and subcases: \begin{enumerate}[label = (1.\roman*)] \item $\id_d \bullet \nu$ where $\dep(\nu) = 1$ \item $\id_d \bullet \nu$ where $\dep(\nu) > 1$ \item $\nu \bullet \id_d$ where $\dep(\nu) =1$ \item $\nu \bullet \id_d$ where $\dep(\nu) > 1$ \end{enumerate} Case 2 where $d>1$ and subcases: \begin{enumerate}[label = (2.\roman*)] \item $\id_d \bullet \nu$ where $\dep(\nu) = 1$ \item $\id_d \bullet \nu$ where $\dep(\nu) > 1$ \item $\nu \bullet \id_d$ where $\dep(\nu) =1$ \item $\nu \bullet \id_d$ where $\dep(\nu) > 1$ \end{enumerate} \textit{Case 1.i:} Let $\nu = \lp \lp W_1,b_1 \rp \rp$. Deriving from Definitions \ref{7.2.1} and \ref{5.2.1} we have that: \begin{align} \id_1 \bullet \nu &=\lp \lp \begin{bmatrix} 1 \\ -1 \end{bmatrix} W_1, \begin{bmatrix} 1 \\ -1 \end{bmatrix}b_1 + \begin{bmatrix} 0 \\ 0 \end{bmatrix}\rp, \lp \begin{bmatrix} 1 \quad -1, \end{bmatrix}, \begin{bmatrix} 0 \end{bmatrix} \rp \rp \\ &= \lp \lp \begin{bmatrix} W_1 \\-W_{1} \end{bmatrix}, \begin{bmatrix} b_1 \\ -b_1 \end{bmatrix} \rp,\lp \begin{bmatrix} 1 \quad -1 \end{bmatrix}, \begin{bmatrix} 0 \end{bmatrix} \rp \rp \end{align} Let $x \in \R$. Upon instantiation with $\rect$ and $d=1$ we have: \begin{align} \lp \real_{\rect}\lp \id_1\bullet \nu \rp \rp \lp x \rp &= \rect(W_1x+b_1)-\rect(-W_1x - b_1) \nonumber\\ &= \max\{W_1x+b_1,0\}-\max\{-W_1x-b_1,0\} \nonumber \\ &= W_1x+b_1 \nonumber\\ &= \real_{\rect}(\nu) \nonumber \end{align} \textit{Case 1.ii:} Let $\nu = \lp \lp W_1,b_1 \rp, \lp W_2,b_2 \rp, ..., \lp W_L, b_L \rp \rp $. Deriving from Definition \ref{7.2.1} and \ref{5.2.1} we have that: \begin{align} &\id_1\bullet \nu \nonumber\\ &= \lp \lp W_1,b_1\rp,\lp W_2,b_2 \rp,...,\lp W_{L-1},b_{L-1} \rp, \lp \begin{bmatrix} 1 \\-1 \end{bmatrix} W_L, \begin{bmatrix} 1 \\ -1 \end{bmatrix}b_L + \begin{bmatrix} 0 \\ 0 \end{bmatrix} \rp, \lp \begin{bmatrix} 1 \quad -1 \end{bmatrix}, \begin{bmatrix} 0 \end{bmatrix} \rp \rp \nonumber \\ &= \lp \lp W_1,b_1\rp, \lp W_2, b_2 \rp,...,\lp W_{L-1},b_{L-1} \rp, \lp \begin{bmatrix} W_L \\ -W_L \end{bmatrix} ,\begin{bmatrix} b_L \\ -b_L \end{bmatrix} \rp ,\lp \begin{bmatrix} 1 & -1 \end{bmatrix}, \begin{bmatrix} 0 \end{bmatrix} \rp \rp \nonumber \end{align} Let $x \in \R$. Note that upon instantiation with $\rect$, the last two layers are: \begin{align} &\rect(W_Lx+b_L)-\rect(-W_Lx - b_L,0) \nonumber\\ &=\max\{W_Lx+b_L,0\}-\max\{-W_Lx-b_L,0\} \nonumber \\ &= W_Lx+b_L \label{7.2.8} \end{align} This, along with Case 1. i, implies that the uninstantiated last layer is equivalent to $(W_L,b_L)$ whence $\id_1\bullet \nu = \nu$. \textit{Case 1.iii:} Let $\nu = \lp \lp W_1,b_1\rp \rp$. Deriving from Definition \ref{7.2.1} and \ref{5.2.1} we have: \begin{align} \nu \bullet \id_1 &= \lp \lp \begin{bmatrix} 1 \\-1 \end{bmatrix}, \begin{bmatrix} 0 \\0 \end{bmatrix}\rp, \lp W_1\begin{bmatrix} 1 \quad -1 \end{bmatrix},W_1 \begin{bmatrix} 0 \end{bmatrix} + b_1\rp \rp \nonumber \\ &= \lp \lp \begin{bmatrix} 1 \\-1 \end{bmatrix}, \begin{bmatrix} 0 \\0 \end{bmatrix}\rp, \lp \begin{bmatrix} W_1 \quad -W_1 \end{bmatrix}, b_1\rp \rp \nonumber \end{align} Let $x \in \R$. Upon instantiation with $\rect$ we have that: \begin{align} \lp \real_{\rect} \lp \nu \bullet \id_1 \rp \rp \lp x \rp &= \begin{bmatrix} W_1 \quad -W_1 \end{bmatrix} \rect \lp \begin{bmatrix} x \\ -x \end{bmatrix} \rp +b_1 \nonumber \\ &= W_1\rect(x)-W_1\rect(-x) + b_1 \nonumber \\ &=W_1 \lp \rect(x) - \rect(-x) \rp +b_1 \nonumber \\ &=W_x+b_1 = \real_{\rect} \lp \nu \rp \end{align} \textit{Case 1.iv:} Let $\nu = \lp \lp W_1,b_1\rp , \lp W_2,b_2 \rp,...,\lp W_L, b_L \rp \rp $. Deriving from Definitions \ref{7.2.1} and \ref{5.2.1} we have that: \begin{align} \nu \bullet \id_1 = \lp \lp \begin{bmatrix} 1 \\-1 \end{bmatrix}, \begin{bmatrix} 0 \\0 \end{bmatrix}\rp, \lp \begin{bmatrix} W_1 \quad -W_1 \end{bmatrix}, b_1\rp, \lp W_2,b_2 \rp ,...,\lp W_L,b_L \rp \rp \end{align} Let $x \in \R$. Upon instantiation with $\rect$, we have that the first two layers are: \begin{align} &\begin{bmatrix} W_1 \quad -W_1 \end{bmatrix} \rect \lp \begin{bmatrix} x \\ -x \end{bmatrix} \rp +b_1 \nonumber \\ &= W_1\rect(x)-W_1\rect(-x) + b_1 \nonumber \\ &=W_1 \lp \rect(x) - \rect(-x) \rp + b_1 \nonumber \\ &= W_1x+b_1 = \real_{\rect} \lp \nu \rp \end{align} This, along with Case 1. iii, implies that the uninstantiated first layer is equivalent $(W_1,b_1)$ whence we have that $\nu \bullet \id_1 = \nu$. Observe that Definitions \ref{5.2.5} and \ref{7.2.1} tells us that: \begin{align} &\boxminus^d_{i=1} \id_i\\ &= \lp \lp \overbrace{\begin{bmatrix} \we_{\id_1,1} \\ &&\ddots \\ &&& \we_{\id_1,1} \end{bmatrix}}^{d-many} , \mymathbb{0}_{2d}\rp, \lp \overbrace{\begin{bmatrix} \we_{\id_1,2} \\ && \ddots \\ &&& \we_{\id_1,2} \end{bmatrix}}^{d-many}, \mymathbb{0}_d\rp \rp \nonumber \end{align} \textit{Case 2.i} Let $d \in \N \cap [1,\infty)$. Let $\nu \in \neu$ be $\nu = \lp W_1,b_1 \rp$ with end-widths $d$. Deriving from Definitions \ref{5.2.1} and \ref{7.2.1} we have: \begin{align} \id_d \bullet \nu = \lp \lp \begin{bmatrix} \we_{\id_1,1} \\ &&\ddots \\ &&& \we_{\id_1,1} \end{bmatrix}W_1 , \begin{bmatrix} \we_{\id_1,1} \\ &&\ddots \\ &&& \we_{\id_1,1} \end{bmatrix} b_1\rp, \right. \nonumber\\ \left. \lp \begin{bmatrix} \we_{\id_1,2} \\ && \ddots \\ &&& \we_{\id_1,2} \end{bmatrix}, \mymathbb{0}_d\rp \rp \nonumber \\ = \lp \lp \begin{bmatrix} [W_1]_{1,*} \\ -[W_1]_{1,*} \\ \vdots \\ [W_1]_{d,*}\\ -[W_1]_{d,*} \end{bmatrix}, \begin{bmatrix} [b_1]_1\\ -[b_1]_1 \\ \vdots \\ [b_1]_d \\ -[b_1]_d \end{bmatrix} \rp, \lp \begin{bmatrix} \we_{\id_1,2} \\ && \ddots \\ &&& \we_{\id_1,2} \end{bmatrix}, \mymathbb{0}_d\rp \rp \nonumber \end{align} Let $x \in \R^d$. Upon instantiation with $\rect$ we have that: \begin{align} &\lp \real_{\rect} \lp \id_d \bullet \nu \rp \rp \lp x \rp \nonumber \\ &= \rect([W_1]_{1,*} \cdot x + [b_1]_1)-\rect(-[W_1]_{1,*}\cdot x -[b_1]_1)+\cdots \nonumber\\& +\rect([W_1]_{d,*}\cdot x+[b_1]_d)-\rect (-[W_1]_{d,*}\cdot x-[b_1]_d) \nonumber \\ &= [W_1]_{1,*}\cdot x + [b_1]_1 + \cdots + [W_1]_{d,*}\cdot x + [b_1]_d \nonumber \\ &= W_1x + b_1 = \real_{\rect} \lp \nu \rp \nonumber \end{align} \textit{Case 2.ii:} Let $\nu = \lp \lp W_1,b_1 \rp, \lp W_2,b_2 \rp, ..., \lp W_L, b_L \rp \rp $. Deriving from Definition \ref{7.2.1} and \ref{5.2.1} we have that: \begin{align} \id_d \bullet \nu =\lp \lp W_1,b_1\rp, \lp W_2, b_2 \rp,...,\lp W_{L-1},b_{L-1} \rp, \lp \begin{bmatrix} [W_L]_{1,*} \\ -[W_L]_{1,*}\\ \vdots \\ [W_L]_{d,*} \\ -[W_L]_{d,*} \end{bmatrix} ,\begin{bmatrix} [b_L]_1 \\ -[b_L]_1 \\ \vdots \\ [b_L]_d \\ -[b_L]_d \end{bmatrix} \rp ,\lp \begin{bmatrix} 1 & -1 \end{bmatrix}, \begin{bmatrix} 0 \end{bmatrix} \rp \rp \nonumber \end{align} Note that upon instantiation with $\rect$, the last two layers become: \begin{align} &\rect([W_L]_{1,*} \cdot x + [b_L]_1)-\rect(-[W_L]_{1,*}\cdot x -[b_L]_1)+\cdots \nonumber\\& +\rect([W_L]_{d,*}\cdot x+[b_L]_d)-\rect (-[W_L]_{d,*}\cdot x-[b_L]_d) \nonumber \\ &=[W_L]_{1,*}\cdot x + [b_L]_1 + \cdots + [W_L]_{d,*}\cdot x + [b_L]_d \nonumber \\ &= W_Lx + b_L \end{align} This, along with Case 2.i implies that the uninstantiated last layer is equivalent to $(W_L,b_L)$ whence $\id_d\bullet \nu = \nu$. \textit{Case 2.iii:} Let $\nu = \lp \lp W_1,b_1\rp \rp$. Deriving from Definition \ref{7.2.1} and \ref{5.2.1} we have: \begin{align} &\nu \bullet \id_d \nonumber\\ &= \lp \lp \begin{bmatrix} \we_{\id_1,1} \\ &&\ddots \\ &&& \we_{\id_1,1} \end{bmatrix}, \mymathbb{0}_{2d}\rp, \lp W_1\begin{bmatrix} \we_{\id_1,2} \\ &&\ddots \\ &&& \we_{\id_1,2} \end{bmatrix}, b_1\rp \rp \nonumber \end{align} Upon instantiation with $\rect$ we have that: \begin{align} &\lp \real_{\rect} \lp \nu \rp \rp \lp x \rp \\ &= \begin{bmatrix} [W_1]_{*,1} \ -[W_1]_{*,1} \ \cdots \ [W_1]_{*,d} \ -[W_1]_{*,d} \end{bmatrix}\rect \lp \begin{bmatrix} [x]_1 \\ -[x]_1 \\ \vdots \\ [x]_d \\ -[x]_d \end{bmatrix}\rp + b_1 \nonumber \\ &= [W_1]_{*,1} \rect([x]_1) - [W_1]_{*,1} \rect(-[x]_1)+ \cdots +[W_1]_{*,d}\rect([x]_d)-[W_1]_{*,d}\rect(-[x]_d) + b_1 \nonumber \\ &= [W_1]_{*,1}\cdot [x]_1 + \cdots + [W_1]_{*,d} \cdot [x]_d \nonumber \\ &= W_1x+b_1 = \real_{\rect}(\nu) \end{align} \textit{Case 2.iv:} Let $\nu = \lp \lp W_1,b_1 \rp, \lp W_2,b_2 \rp ,...,\lp W_L,b_L \rp \rp $. Deriving from Definitions \ref{7.2.1} and \ref{5.2.1} we have: \begin{align} &\nu \bullet \id_d \nonumber \\ = &\lp \lp \begin{bmatrix} \we_{\id_1,1} \\ &&\ddots \\ &&& \we_{\id_1,1} \end{bmatrix}, \mymathbb{0}_{2d}\rp, \lp \begin{bmatrix} [W_1]_{*,1} \ -[W_1]_{*,1} \ \cdots \ [W_1]_{*,d} \ -[W_1]_{*,d} \end{bmatrix}, b_1\rp,... \right. \nonumber \\ &\left.\lp W_2,b_2 \rp ,...,\lp W_L,b_L \rp \rp \nonumber \end{align} Upon instantiation with $\rect$, we have that the first two layers are: \begin{align} &\lp \real_{\rect} \lp \nu \rp \rp \lp x \rp \\ &= \begin{bmatrix} [W_1]_{*,1} \ -[W_1]_{*,1} \ \cdots \ [W_1]_{*,d} \ -[W_1]_{*,d} \end{bmatrix}\rect \lp \begin{bmatrix} [x]_1 \\ -[x]_1 \\ \vdots \\ [x]_d \\ -[x]_d \end{bmatrix}\rp + b_1 \nonumber \\ &= [W_1]_{*,1} \rect([x]_1) - [W_1]_{*,1} \rect(-[x]_1)+ \cdots +[W_1]_{*,d}\rect([x]_d)-[W_1]_{*,d}\rect(-[x]_d) + b_1 \nonumber \\ &= [W_1]_{*,1}\cdot [x]_1 + \cdots + [W_1]_{*,d} \cdot [x]_d \nonumber \\ &= W_1x+b_1 \end{align} This, along with Case 2. iii, implies that the uninstantiated first layer is equivalent to $(W_L,b_L)$ whence $\id_d\bullet \nu = \nu$. This completes the proof. \end{proof} \begin{definition}[Monoid] Given a set $X$ with binary operation $*$, we say that $X$ is a monoid under the operation $*$ if: \begin{enumerate}[label = (\roman*)] \item For all $x,y \in X$ it is the case that $x*y \in X$ \item For all $x,y,z \in X$ it is the case that $(x *y)*z = x*(y*z)$ \item The exists a unique element $e \in X$ such that $e*x=x*e = x$ \end{enumerate} \end{definition} \begin{theorem} Let $d\in \N$. For a fixed $d$, the set of all neural networks $\nu \in \neu$ with instantiations in $\rect$ and end-widths $d$ form a monoid under the operation of $\bullet$. \end{theorem} \begin{proof} This is a consequence of Lemma \ref{7.2.3} and Lemma \ref{5.2.3}. \end{proof} \begin{remark} By analogy with matrices, we may find it helpful to refer to neural networks of end-widths $d$ as ``square neural networks of size $d$''. \end{remark} %\section{Modulus of Continuity} %\begin{definition} % Let $A\subseteq \R$ and let $f:A \rightarrow \R$. We denote the modulus of continuity $\omega_f: \lb 0,\infty \rb \rightarrow \lb 0,\infty \rb$ as the function given for all $h \in \lb 0,\infty \rb$ as: % \begin{align}\label{9.3.1} % \omega_f \lp h \rp = \sup \lp \left\{\left| f(x) - f(y)\right| \in \lb 0 ,\infty \rp : \lp x,y \in A, \left| x-y\right| \les h\rp \right\} \cup \left\{ 0\right\} \rp % \end{align} %\end{definition} %\begin{lemma} % Let $\alpha \in \lb -\infty, \infty \rb$, $b \in \lb a, \infty \rb$, and let $f: \lb a,b \rb \cap \R \rightarrow \R$ be a function. It is then the case that for all all $x,y \in \lb a,b\rb \cap \R$ that $\left| f(x) -f(y)\right| \les \omega_f \lp \left| x-y \right| \rp$. %\end{lemma} %\begin{proof} % Note that (\ref{9.3.1}) implies the lemma. %\end{proof} %\begin{lemma}\label{lem:9.3.3} % Let $A\subseteq \R$, $L \in \lb 0,\infty \rp$, and let $f:A \rightarrow \R$ satisfy for all $x,y \in A$ that $\left| f(x) - f(y)\right| \les L \left|x-y \right|$. It is then the case for all $h \in \lb 0,\infty \rp$ that $\omega_f(h) \les Lh$. %\end{lemma} %\begin{proof} % Since it holds for all $x,y \in \R$ that $\left| f(x) - f(y)\right| \les L \left|x-y \right|$, it then, with (\ref{9.3.1}) imply for all $h \in \lb 0,\infty \rp$ that: % \begin{align} % \omega_f \lp h \rp &= \sup \lp \left\{\left| f(x) - f(y)\right| \in \lb 0 ,\infty \rp : \lp x,y \in A, \left| x-y\right| \les h\rp \right\} \cup \left\{ 0\right\} \rp \nonumber\\ % &\les \sup \lp \left\{L\left|x -y\right| \in \lb 0 ,\infty \rp : \lp x,y \in A, \left| x-y\right| \les h\rp \right\} \cup \left\{ 0\right\} \rp \nonumber \\ % &\les \sup \lp \left\{Lh,0 \right\} \rp = Lh % \end{align} % This completes the proof of the lemma. %\end{proof} %\section{Linear Interpolation of Real-Valued Functions} %Note that we need a framework for approximating generic 1-dimensional continuous functions to approximate more complex functions. We introduce the linear interpolation operator and later see how neural networks can approximate 1-dimensional continuous functions to arbitrary precision. % %\subsection{The Linear Interpolation Operator} %\begin{definition}[Linear Interpolation Operator]\label{lio} % Let $n \in \N$, $x_0,x_1,...,x_n, y_0,y_1,...,y_n \in \R$. Let it also be the case that $x_0 \leqslant x_1 \leqslant \cdots \leqslant x_n$. We denote by $\lin^{y_0,y_1,...,y_n}_{x_0,x_1,...,x_n}: \R \rightarrow \R$, the function that satisfies for $i \in \{1,2,...,n\}$, and for all $w \in \lp -\infty, x_0 \rp$, $x \in [ x_{i-1},x_i )$, $z \in [ x_n, \infty)$ that: % \begin{enumerate}[label = (\roman*)] % \item $\lin^{y_0,y_1,...,y_n}_{x_0,x_1,...,x_n}\lp w \rp = y_0$ % \item $\lin^{y_0,y_1,...,y_n}_{x_0,x_1,...,x_n}\lp x \rp = y_{i-1} + \frac{y_i-y_{i-1}}{x_i-x_{i-1}}\lp x- x_{i-1} \rp $ % \item $\lin^{y_0,y_1,...,y_n}_{x_0,x_1,...,x_n}\lp z \rp = y_n$ % \end{enumerate} %\end{definition} %\begin{lemma} % Let $n\in \N$, $x_0,x_1,...,x_n,y_0,y_1,...,y_n \in \R$ with $x_0 \les x_1 \les \cdots \les x_n$, it is then the case that: % \begin{enumerate}[label = (\roman*)] % \item for all $i \in \{0,1,...,n\}$ that: % \begin{align}\label{7.3.1} % \lp \lin^{y_0,y_1,...,y_n}_{x_0,x_1,...,x_n} \rp \lp x_i \rp = y_i % \end{align} % \item for all $i\in \{0,1,...,n\}$ and $x \in [x_{i-1},x_{i}]$ that: % \begin{align}\label{7.3.2} % \lp \lin^{y_0,y_1,...,y_n}_{x_0,x_1,...,x_n} \rp \lp x \rp = \lp \frac{x_i-x}{x_i - x_{i-1}} \rp y_{i-1} + \lp \frac{x-x_{i-1}}{x_i-x_{i-1}} \rp y_i % \end{align} % \end{enumerate} %\end{lemma} %\begin{proof} % Note that (\ref{7.3.1}) is a direct consequence of Definition \ref{lio}. Item (i) then implies for all $i \in \{1,2,...,n\}$ $x \in [x_{i-1},x_i]$ that: % \begin{align} % \lp \lin^{y_0,y_1,...,y_n}_{x_0,x_1,...,x_n} \rp \lp x \rp &= \lb \lp \frac{x_i-x_{i-1}}{x_i-x_{i-1}} \rp - \lp \frac{x-x_{i-1}}{x_i-x_{i-1}} \rp \rb y_{i-1} + \lp \frac{x-x_{i-1}}{x_i-x_{i-1}}\rp y_i \nonumber \\ % &= \lp \frac{x_i-x}{x_i-x_{i-1}} \rp y_{i-1} + \lp \frac{x-x_{i-1}}{x_i-x_{i-1}} \rp y_i \nonumber % \end{align} %\end{proof} %\begin{lemma}\label{lem:9.4.3} % Let $N\in \N$, $L,x_0,x_1,...,x_N \in \R$ satisfy $x_0 < x_1 < \cdots < x_N$, and set let $f:\lb x_0,x_N \rb \rightarrow \R$ satisfy for all $x,y \in \lb x_0,x_N\rb$ that $\left| f(x)-f(y)\right| \les L \left| x-y\right|$, it is then the case that: % \begin{enumerate}[label = (\roman*)] % \item for all $x,y \in \R$ that: % \begin{align} % \left| \lp \lin^{f(x_0),f(x_1),...,f(x_N)}_{x_0,x_1,...,x_N}\rp \lp x \rp - \lp \lin^{f(x_0),f(x_1),...,f(x_N)}_{x_0,x_1,...,x_N}\rp \lp y \rp \right| \les L \left| x-y \right| % \end{align}, and % \item that: % \begin{align} % \sup_{x \in \lb x_0,x_N \rb }\left| \lp \lin^{f(x_0),f(x_1),...,f(x_N)}_{x_1,x_2,...,x_N}\rp \lp x \rp -f\lp x \rp\right| \les L \lp \max_{i \in \{ 1,2,...N\}} \left| x_i-x_{i-1}\right|\rp % \end{align} % \end{enumerate} %\end{lemma} %\begin{proof} % The assumption that for all $x,y \in \lb x_0, x_k \rb$ it is the case that $\left| f(x) - f(y) \right| \les L \left| x-y\right|$ and Lemma \ref{lem:9.3.3} prove Item (i) and Item (ii). %\end{proof} %\subsection{Neural Networks to Approximate the $\lin$ Operator} %\begin{lemma}\label{7.3.3} % Let $\alpha,\beta,h \in \R$. Denote by $\relu \in \neu$ the neural network given by $\relu = h \circledast \lp \mathsf{i}_1 \bullet \aff_{\alpha,\beta}\rp $. It is then the case that: % \begin{enumerate}[label = (\roman*)] % \item $\relu = \lp \lp \alpha, \beta \rp , \lp h,0 \rp \rp$ % \item $\lay(\relu) = \lp 1,1,1 \rp \in \N^3$. % \item $\real_{\rect}\lp \relu \rp \in C \lp \R, \R \rp$ % \item for all $x \in \R$ that $\lp \real_{\rect} \lp \relu \rp \rp \lp x \rp = h\max \{\alpha x+\beta ,0\}$ % \end{enumerate} %\end{lemma} %\begin{proof} % Note that by Definition \ref{5.3.1} we know that $\aff_{\alpha,\beta} = \lp \lp \alpha,\beta \rp \rp$, this with Definition \ref{actnn}, and Definition \ref{5.2.1} together tell us that $\mathfrak{i}_1\bullet \aff_{\alpha,\beta} = \lp \alpha,\beta \rp$. A further application of Definition \ref{5.2.1}, and an application of Definition \ref{slm} yields that $h \circledast \lp \mathfrak{i}_1 \bullet \aff_{\alpha,\beta} \rp = \lp \lp \alpha,\beta \rp, \lp h ,0 \rp \rp$. This proves Item (i). % % Note that $\lay(\aff_{\alpha,\beta})= (1,1)$, $\lay(\mathfrak{i}_1) = \lp 1,1,1 \rp $, and $\lay(h)=1$. Item (i) of Lemma \ref{6.0.3} therefore tells us that $\lay (\relu) = \lay \lp h \circledast \lp \mathfrak{i}_1 \bullet \aff_{\alpha,\beta}\rp \rp$. This proves Item (ii). % % Note that Lemmas \ref{7.1.2} and \ref{6.0.3} tell us that: % \begin{align} % \forall x\in \R: \lp \real_{\rect}\lp \mathfrak{i}_1 \bullet \aff_{\alpha,\beta} \rp \rp \lp x \rp = \rect \lp \real_{\rect} \rp \lp x \rp = \max\{ \alpha x+ \beta \} % \end{align} % This and Lemma \ref{slm} ensures that $\real_{\rect}\lp \relu \rp \in C\lp \R, \R \rp$ and further that: % \begin{align} % \lp \real_{\rect} \lp \relu \rp \rp \lp x \rp = h \lp \lp \real_{\rect}\lp \mathfrak{i}_1\bullet \aff_{\alpha,\beta} \rp \rp \lp x\rp \rp = h\max\{\alpha x+\beta,0 \} % \end{align} % This proves Item (iii)-(iv). This completes the proof of the lemma. %\end{proof} %\begin{lemma}\label{9.3.4} % Let $N\in \N$, $x_0,x_1,...,x_N,y_0,y_1,...,y_N \in \R$ and further that $x_0 \les x_2 \les \cdots \les x_N$. Let $\Phi \in \neu$ satisfy that: % \begin{align}\label{7.3.5} % \Phi = \aff_{1,y_0} \bullet \lp \bigoplus^N_{i=0} \lb \lp \frac{y_{\min\{i+1,N\}}-y_i}{x_{\min\{i+1,N\}}-x_{\min\{i,N-1\}}}- \frac{y_i-y_{\max\{i-1,0\}}}{x_{\max\{i,1\}}-x_{\max\{i-1,0\}}}\rp \circledast \lp \mathfrak{i}_1\bullet \aff_{1,-x_i} \rp \rb \rp % \end{align} % It is then the case that: % \begin{enumerate}[label=(\roman*)] % \item $\lay(\Phi)= \lp 1,N+1,1 \rp \in \N^3$ % \item $\real_{\rect} \lp \Phi \rp \in C \lp \R, \R \rp$ % \item $\lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp= \lin ^{y_0,y_1,...,y_N}_{x_0,x_1,...,x_N}\lp x \rp$ % \item $\param(\Phi) = 3N+4$ % \end{enumerate} %\end{lemma} %\begin{proof} % For notational convenience, let it be the case that for all $i \in \{0,1,2,..., N\}$: % \begin{align}\label{7.3.6} % h_i = \frac{y_{\min\{i+1,N\}}-y_i}{x_{\min\{i+1,N\}}-x_{\min\{i,N-1\}}}- \frac{y_i-y_{\max\{i-1,0\}}}{x_{\max\{i,1\}}-x_{\max\{i-1,0\}}} % \end{align} % Note that $\lay \lp \mathfrak{i}_1 \bullet \aff_{1,-x_0} \rp= \lp1,1,1 \rp$, and further that for all $i\in \{0,1,2,...,N\}$, $h_i \in \R$. Lemma \ref{7.3.3} then tells us that for all $i \in \{0,1,2,...,N\}$, $\lay \lp h_i \circledast \lp \mathfrak{i}_1 \bullet \aff_{1,-x_i} \rp \rp = \lp 1,1,1 \rp $, $\real_{\rect}\lp h_i \circledast \lp \mathfrak{i}_1 \bullet \aff_{1,-x_i} \rp \rp \in C \lp \R,\R \rp$, and that $ \lp \real_{\act} \lp h_i \circledast \lp \mathfrak{i}_1 \bullet \aff_{1,-x_i} \rp \rp \rp \lp x \rp = h_i \max\{x-x_k,0 \}$. This, (\ref{7.3.5}), Lemma \ref{5.3.3}, and \cite[Lemma~3.28]{Grohs_2022} ensure that $\lay(\Phi) = \lp 1,N+1,1 \rp \in \N^3$ and that $\real_{\rect} \lp \Phi \rp \in C \lp \R, \R \rp$ establishing Items (i)--(ii). % % In addition, note that Item (i) and (\ref{widthdef}), tell us that: % \begin{align} %% NOTE: Ask Dr. P about this parameter % \param(\Phi) = \overbrace{(N+1)}^{W_1}+\underbrace{(N+1)}_{b_1}+\overbrace{(N+1)}^{W_2}+\underbrace{1}_{b_2} =3N+4 % \end{align} % Which proves Item (iv). For all $i \in \{0,1,2,...,N\}$, let $\phi_i$ be $\phi_i = h_i \circledast \lp \mathfrak{i} \bullet \aff_{1,-x_i} \rp $. Next note that \ref{7.3.6}, Lemma \ref{5.3.3}, and \cite[Lemma~3.28]{Grohs_2022} then tell us that: % \begin{align}\label{7.3.8} % \lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp = y_0 + \sum^n_{i=1} \lp \real_{\act} \lp \phi_i \rp \rp\lp x \rp = y_0 + \sum^n_{i=1}h_i \max\{x-x_i,0\} % \end{align} % Since $x_0 \les x_i$ for all $i\in\{1,2,...,n\}$, it then is the case for all $x \in (\infty, x_0]$ that: % \begin{align}\label{7.3.10.2} % \lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp = y_0+0 = y_0 % \end{align} % \begin{claim} % For all $i \in \{1,2,...,N\}$ it is the case that : % \begin{align}\label{7.3.10} % \sum_{j=0}^{i-1}h_j = \frac{y_{i}-y_{i-1}}{x_i-x_{i-1}} % \end{align} % \end{claim} % We prove this claim by induction. For the base case of $i=1$, we have: % \begin{align} % \sum^0_{j=0} h_0 = h_0 = \frac{y_{1}-y_0}{x_{1}-x_{0}}- \frac{y_0-y_{0}}{x_{1}-x_{0}} =\frac{y_1-y_0}{x_1-x_0} % \end{align} % This proves the base base for (\ref{7.3.10}). Assume next that this holds for $k$, for the $(k+1)$-th induction step we have: % \begin{align} % \sum^{k+1}_{j=0}h_j = \sum^k_{j=0}h_j + h_{k+1} &=\frac{y_k-y_{k-1}}{x_k-x_{k-1}}+h_{k+1} \nonumber\\ % &= \frac{y_k-y_{k-1}}{x_k-x_{k-1}} + \frac{y_{k+2}-y_{k-1}}{x_{k+2}-x_{k+1}} - \frac{y_{k+1}-y_{k}}{x_{k+1} - x_k} \nonumber\\ % &= \frac{y_{k+1}-y_k}{x_{k+1}-x_k} % \end{align} %%TODO: Double-check this proof %This proves (\ref{7.3.10}). In addition, note that (\ref{7.3.8}), (\ref{7.3.10}), and the fact that for all $i \in \{1,2,...,n\}$ it is the case that $x_{i-1} \les x_{i}$ tells us that for all $i \in \{1,2,...,n\}$ and $x \in [x_{i-1},x_i]$ it is the case that: % \begin{align}\label{7.3.13} % &\lp \real_{\rect}\lp \Phi \rp \rp \lp x \rp - \lp \real_{\act}\lp \Phi \rp \rp \lp x_{i-1}\rp = \sum^n_{j=0} h_j \lp \max \{ x-x_j,0 \}-\max \{x_{i-1}-x_j,0\} \rp \nonumber\\ % &= \sum^{i-1}_{j=0}c_j \lb \lp x-x_j \rp -\lp x_{i-1}-x_j \rp \rb = \sum^{i-1}_{j=0} c_j \lp x - x_{i-1} \rp = \lp \frac{y_i-y_{i-1}}{x_i-x_{i-1}}\rp \lp x-x_{i-1} \rp % \end{align} % \begin{claim} % For all $i \in \{1,2,...,N\}$, $x\in [x_{i-1},x_i]$ it is the case that: % \begin{align}\label{7.3.14} % \lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp = y_{i-1}+ \lp \frac{y_i-y_{i-1}}{x_i-x_{i-1}} \rp \lp x - x_{i-1} \rp % \end{align} % \end{claim} % We will prove this claim by induction. For the base case of $i=1$, (\ref{7.3.13}) and (\ref{7.3.10}) tell us that: % \begin{align} % \lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp &=\lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp - \lp \real_{\rect}\lp \Phi \rp \rp \lp x_{i-1} \rp + \lp \real_{\rect}\lp \Phi \rp \rp \lp x_{i-1} \rp \nonumber \\ % &= y_0 + \lp \frac{y_1-y_0}{x_i-x_{i-1}} \rp \lp x - x_{i-1} \rp % \end{align} % For the induction step notice that (\ref{7.3.13}) implies that for all $i \in \{2,3,...,N\}$, $x \in [x_{i-1},x_i]$, with the instantiation that $\forall x \in [x_{i-2},x_{i-1}]: \lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp = y_{i-2} + \lp \frac{y_{i-1}-y_{i-2}}{x_{i-1}-x_{i-2}} \rp \lp x-x_{i-2} \rp $, it is then the case that: % \begin{align} % \lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp &= \lp \real_{\rect} \lp \Phi \rp \rp \lp x_{i-1}\rp + \lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp - \lp \real_{\rect} \lp \Phi \rp \rp \lp x_{i-1} \rp \nonumber\\ % &=y_{i-2} + \lp \frac{y_{i-1}-y_{i-2}}{x_{i-1}-x_{i-2}} \rp \lp x_{i-1}+x_{i-2} \rp + \lp \frac{y_i-y_{i-1}}{x_i-x_{i-1}} \rp \lp x - x_{i-1} \rp \nonumber\\ % &= y_{i-1} + \lp \frac{y_i-y_{i-1}}{x_i-x_{i-1}} \rp \lp x-x_{i-1} \rp % \end{align} % Thus induction proves (\ref{7.3.14}). Furthermore note that (\ref{7.3.10}) and (\ref{7.3.6}) tell us that: % \begin{align} % \sum^N_{i=0} h_i = c_N +\sum^{N-1}_{i=0}h_i = -\frac{y_N-y_{N-1}}{x_N-x_{N-1}}+\frac{y_N-y_{N-1}}{x_N-x_{N-1}} = 0 % \end{align} % The fact that $\forall i \in \{0,1,...,N\}:x_i \les x_N$, together with (\ref{7.3.8}) imply for all $x \in [x_N,\infty)$ that: % \begin{align} % \lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp - \lp \real_{\rect} \lp \Phi \rp \rp \lp x_N \rp &= \lb \sum^N_{i=0} h_i \lp \max\{x-x_i,0\}-\max\{x_N-x_i,0\} \rp \rb \nonumber\\ % &= \sum^N_{i=0} h_i \lb \lp x- x_i \rp - \lp x_N - r_i \rp \rb = \sum^N_{i=0} h_i \lp x - x_N \rp =0 \nonumber % \end{align} % This and (\ref{7.3.14}) tells us that for all $x \in [x_N,\infty)$ we have: % \begin{align} % \lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp = \lp \real_{\rect} \lp \Phi \rp \rp \lp x_N \rp = y_{N-1}+ \lp \frac{y_N-y_{N-1}}{x_N - x_{N-1}} \rp \lp x_N-x_{N-1} \rp = x_N % \end{align} % Together with (\ref{7.3.10.2}), (\ref{7.3.14}), and Definition \ref{lio} establishes Item (iii) thus proving the lemma. % \end{proof} %\section{Neural Network Approximations of 1-dimensional Functions.} % %\begin{lemma}\label{lem:9.5.1} % Let $N\in \N$, $L. a. x_0,x_1,...,x_N \in \R$, $b\in \lp a,\infty \rp$, satisfy for all $i \in \left\{0,1,...,N\right\}$ that $x_i = a+ \frac{i(b-a)}{N}$. Let $f:\lb a,b\rb \rightarrow \R$ satisfy for all $x,y \in \lb a,b\rb$ that $\left|f(x) - f(y) \right| \les L\left|x-y\right|$ and let $\mathsf{F} \in \neu$ satisfy: % \begin{align} % \F = \aff_{1,f(x_0)}\bullet \lb\bigoplus^N_{i=0} \lp \lp \frac{N \lp f \lp x_{\min \{i+1,N\}}\rp-2f\lp x_i\rp + f \lp x_{\max \{ i-1,0 \}}\rp\rp}{b-a}\rp \circledast \lp \mathsf{i}_1 \bullet \aff_{1,-x_k} \rp\rp \rb % \end{align} % It is then the case that: % \begin{enumerate}[label = (\roman*)] % \item $\lay \lp \F \rp = \lp 1, N+1,1\rp$ % \item $\real_{\rect} \lp \F \rp\in C\lp \R, \R \rp$ % \item $\real_{\rect} \lp \F \rp = \lin ^{f(x_0),f(x_1),...,f(x_N)}_{x_1,x_2,...,x_N}$ % \item it holds that for all $x,y \in \R$ that $\left| \lp \real_{\rect} \lp \F \rp \rp \lp x \rp -\lp \real_{\rect} \lp \F \rp\rp\lp y \rp \right| \les L \left| x-y \right|$ % \item it holds that $\sup_{x \in \lb a,b \rb} \left| \lp \real_{\rect} \lp \F \rp \rp\lp x\rp -f(x)\right| \les \frac{L \lp b-a\rp}{N}$, and % \item $\param\lp \F \rp = 3N+4$. % \end{enumerate} %\end{lemma} %\begin{proof} % Note that since it is the case that for all $i \in \left\{0,1,...,N \right\}: x_{\min \{i+1,N\}} - x_{\min \{i, N-1\}} = x_{\max\{i,1\}} - x_{\max \{i-1,0\}} = \frac{b-a}{N}$, we have that: % \begin{align} % \frac{f\lp x_{\min\{i+1,N\}}\rp- f \lp x_i \rp}{x_{\min \{ i+1,N\}}-x_{\min \{i,N-1\}}} - \frac{f(x_i)-f\lp x_{\max\{i-1,0\}}\rp}{x_{\max \{ i,1\}}-x_{\max \{i-1,0\}}} = \frac{N \lp f \lp x_{\min \{i+1,N\}}\rp -2f\lp x_i \rp +f\lp x_{\max \{ i-1,0\}}\rp\rp}{b-a} % \end{align} % Thus Items (i)-(iv) of Lemma \ref{9.3.4} prove Items (i)-(iii), and (vi) of this lemma. Item (iii) combined with the assumption that for all $x,y \in \lb a,b \rb: \left| f(x) - f(y) \right| \les \left| x-y \right|$ and Item (i) in Lemma \ref{lem:9.4.3} establish Item (iv). Furthermore, note that Item (iii), the assumption that for all $x,y \in \lb a,b \rb: \left| f(x) -f(y)\right| \les L\left| x-y\right|$, Item (ii) in Lemma \ref{lem:9.4.3} and the fact that for all $i \in \{1,2,..., N\}: x_i-x_{i-1} = \frac{b-a}{N}$ demonstrate for all $x \in \lb a,b \rb$ it holds that: % \begin{align} % \left| \lp \real_{\rect} \lp \F \rp\rp \lp x \rp -f\lp x \rp \right| \les L \lp \max_{i \in \{1,2,...,N\}} \left| x_i - x_{i-1}\right|\rp = \frac{L(b-a)}{N} % \end{align} %\end{proof} %\begin{lemma}\label{lem:9.5.2} % Let $L,a \in \R$, $b\in \lb a, \infty \rp$, $\xi \in \lb a,b \rb$, let $f: \lb a,b \rb \rightarrow \R$ satisfy for all $x,y \in \lb a,b \rb$ that $\left| f(x) - f(y) \right| \les L\left|x-y \right|$, and let $\F \in \neu$ satisfy $\F = \aff_{1,f(\xi)} \bullet \lp 0 \circledast \lp \mathsf{i}_1 \bullet \aff_{1,-\xi} \rp \rp $, it is then the case that: % \begin{enumerate}[label = (\roman*)] % \item $\lay \lp \F \rp = \lp 1,1,1 \rp$ % \item $\real_{\rect} \lp \F \rp \in C \lp \R, \R \rp$ % \item for all $x \in \R$, we have $\lp \real_{\rect}\lp \F \rp \rp \lp x \rp = f \lp \xi \rp$ % \item $\sup_{x \in \lb a,b\rb} \left| \lp \real_{\rect} \lp \F \rp \rp\lp x \rp -f(x)\right| \les L \max \{ \xi -a, b-\xi\}$ % \item $\param \lp \F \rp = 4$ % \end{enumerate} %\end{lemma} % %\begin{proof} % Note that Item (i) is a consequence of the fact that $\aff_{1,-\xi}$ is a neural network with a real number as weight and a real number as a bias and the fact that $\lay \lp \mathsf{i}_1 \rp = \lp 1,1,1 \rp$. Note also that Item (iii) of Lemma \ref{7.3.3} proves Item (iii). % % Note that from the construction of $\aff$ we have that: % \begin{align}\label{(9.5.4)} % \lp \real_{\rect} \lp \F \rp\rp \lp x \rp &= \lp \real_{\rect} \lp 0 \circledast \lp \mathsf{i}_1 \bullet \aff_{1,-\xi}\rp\rp \rp \lp x \rp + f \lp \xi \rp \nonumber \\ % &= 0 \lp \lp \real_{\rect} \lp \mathsf{i}_1 \bullet \aff_{1,-\xi} \rp\rp \lp x \rp \rp + f \lp \xi \rp = f \lp \xi \rp % \end{align} % Which establishes Item (iii). Note that (\ref{(9.5.4)}), the fact that $\xi \in \lb a,b\rb$ and the fact that for all $x,y \in \lb a,b \rb$ it is the case that $\left| f(x) - f(y) \right| \les \left| x-y \right|$ give us that for all $x \in \lb a,b \rb$ it holds that: % \begin{align} % \left| \lp \real_{\rect} \lp \F \rp\rp \lp x \rp - f\lp x \rp\right| = \left| f\lp \xi \rp - f \lp x \rp\right| \les L \left| x- \xi \right| \les L \max\left\{ \xi -a, b-\xi \right\} % \end{align} % This establishes Item (iv). Note a simple parameter count yields the following: % \begin{align} % \param \lp \F \rp = 1(1+1)+1(1+1) = 4 % \end{align} % Establishing Item (v) and hence the lemma. This completes the proof. %\end{proof} %\begin{corollary} % Let $\ve \in (0,\infty)$, $L,a \in \R$, $b \in \lp a,\infty \rp$, $N \in \N_0 \cap \lb \frac{L(b-a)}{\ve}, \frac{L(b-a)}{\ve}+1\rb$, $x_0, x_1,...,x_N \in \R$ satisfy for all $i \in \{ 0,1,...,N\}$ that $x_i = a + \frac{i(b-a)}{\max\{N,1\}}$, let $f: \lb a,b \rb \rightarrow \R$ satisfy for all $x,y \in \lb a,b \rb$ that $\left| f(x) - f(y) \rb \les L\left| x-y \right|$, and let $\F \in \neu$ satisfy: % \begin{align} % \F = \aff_{1,f(x_0)}\bullet \lb\bigoplus^N_{i=0} \lp \lp \frac{N \lp f \lp x_{\min \{i+1,N\}}\rp-2f\lp x_i\rp + f \lp x_{\max \{ i-1,0 \}}\rp\rp}{b-a}\rp \circledast \lp \mathsf{i}_1 \bullet \aff_{1,-x_k} \rp\rp \rb % \end{align} % It is then the case that: % \begin{enumerate}[label = (\roman*)] % \item $\lay\lp \F \rp = \lp 1,N+1,1 \rp$ % \item $\real_{\rect} \lp \F \rp \in C \lp \R, \R \rp$ % \item for all $x,y \in \R$ that $\left| \lp \real_{\rect} \lp \F \rp \rp \lp x \rp - \lp \real_{\rect} \lp \F \rp \rp \lp x \rp \right| \les L \left| x-y \right|$ % \item $\sup_{x \in \lb a,b \rb} \left| \lp \real_{\rect} \lp \F \rp \rp \lp x \rp -f(x) \right| \les \frac{L(b-a)}{\max \{N,1\}} \les \ve$, and % \item $\param \lp \F \rp = 3N+4 \les 3L \lb \frac{b-a}{\ve} \rb +7$. % \end{enumerate} %\end{corollary} %\begin{proof} % The fact that $N \in \N_0 \cap \lb \frac{L(b-a)}{\ve}, \frac{L(b-a)}{\ve}+1 \rb$ ensures that $\frac{L(b-a)}{\max\{ K,1\}} \les \ve$. This and Items (i),(ii),(iv), and (v) in Lemma \ref{lem:9.5.1} and Items (i)-(iii), and (iv) of Lemma $\ref{lem:9.5.2}$ establishes Items (i)-(iv). Furthermore, note that since $N\les 1 + \frac{L(b-a)}{\ve}$, Item (vi) in Lemma \ref{lem:9.5.1} and Item (v) in Lemma \ref{lem:9.5.2} tells us that: % \begin{align} % \param \lp \F\rp = 3N+4 \les \frac{3L\lp b-a \rp}{\ve} + 7. % \end{align} % Which establishes Item (v) and proves the result. %\end{proof} \section{$\trp^h$, $\etr^{n,h}$ and Neural Network Approximations For the Trapezoidal Rule.} \begin{definition}[The $\trp$ neural network] Let $h \in \R_{\ges 0}$. We define the $\trp^h \in \neu$ neural network as: \begin{align} \trp^h \coloneqq \aff_{\lb \frac{h}{2} \: \frac{h}{2}\rb,0} \end{align} \end{definition} \begin{lemma} Let $h\in \lp -\infty, \infty\rp$. It is then the case that: \begin{enumerate}[label = (\roman*)] \item for $x = \{x_1,x_2\} \in \R^2$ that $\lp \real_{\rect} \lp \trp^h \rp \rp \lp x \rp \in C \lp \R^2, \R \rp$ \item for $x = \{x_1,x_2 \} \in \R^2$ that $\lp \real_{\rect} \lp \trp^h \rp \rp \lp x \rp = \frac{1}{2}h \lp x_1+x_2 \rp$ \item $\dep \lp \trp^h \rp = 1$ \item $\param\lp \trp^h \rp = 3$ \item $\lay \lp \trp^h \rp = \lp 2,1 \rp$ \end{enumerate} \end{lemma} \begin{proof} This a straight-forward consequence of Lemma \ref{5.3.1} \end{proof} \begin{definition}[The $\etr$ neural network] Let $n\in \N$ and $h \in \R_{\ges 0}$. We define the neural network $\etr^{n,h} \in \neu$ as: \begin{align} \etr^{n,h} \coloneqq \aff_{\underbrace{\lb \frac{h}{2} \ h \ h\ ... \ h \ \frac{h}{2}\rb}_{n+1-many},0} \end{align} \end{definition} \begin{lemma}\label{etr_prop} Let $n\in \N$. Let $x_0 \in \lp -\infty, \infty \rp$, and $x_n \in \lb x_0, \infty \rp$. Let $ x = \lb x_0 \: x_1 \:...\: x_n\rb \in \R^{n+1}$ and $h\in \lp -\infty, \infty\rp$ such that for all $i \in \{0,1,...,n\}$ it is the case that $x_i = x_0+i\cdot h$. Then: \begin{enumerate}[label = (\roman*)] \item for all $x \in \R^{n+1}$ it is the case that $\lp \real_{\rect} \lp \etr^{n,h} \rp \rp \lp x \rp \in C \lp \R^{n+1}, \R \rp$ \item for all $n\in \N$, and $h\in \lp 0,\infty\rp$ it is the case that $\lp \real_{\rect} \lp \etr^{n,h} \rp \rp \lp x \rp = \frac{h}{2} \cdot x_0+h\cdot x_1 + \cdots + h\cdot x_{n-1} + \frac{h}{2}\cdot x_n$ \item for all $n \in \N$, and $h \in \lp 0,\infty \rp$ it is the case that $\dep \lp \etr^{n,h} \rp = 1$ \item for all $n \in \N$ and $h \in \lp 0,\infty\rp$ it is the case that $\param\lp \etr^{n,h} \rp = n+2$ \item for all $n\in \N$ and $h \in \lp 0,\infty\rp$ it is the case that $\lay \lp \etr^{n,h} \rp = \lp n+1,1 \rp$ \end{enumerate} \end{lemma} \begin{proof} This a straightforward consequence of Lemma \ref{5.3.1}. \end{proof} \begin{remark} Let $h \in \lp 0,\infty\rp$. Note then that $\trp^h$ is simply $\etr^{2,h}$. \end{remark} %\begin{lemma} % Let $f \in C \lp \R, \R \rp$, $a\in \R, b \in \lb a,\infty\rp$, $N\in \N$, and let $h = \frac{b-a}{N}$. Assume also that $f$ has first and second derivatives almost everywhere. Let $ x = \lb x_0 \: x_1 \:...\: x_n\rb \in \R^{n+1}$ such that for all $i \in \{0,1,...,n\}$ it is the case that $x_i = x_0+i\cdot h$, as such let it also be the case that $f\lp \lb x \rb_{*,*}\rp = \lb f(x_0)\: f(x_1) \: \cdots f(x_n) \rb$. Let $a = x_0$ and $b = x_n$. It is then the case that: % \begin{align}\label{(9.6.3)} % \left| \int^b_a f\lp x \rp dx - \lp \real_{\rect}\lp \etr^{n,h} \rp\rp \lp f\lp \lb x \rb_{*,*}\rp\rp \right| \les \frac{\lp b-a \rp^3}{12N^2} f''\lp \xi \rp % \end{align} % Where $\xi \in \lb a,b \rb$. %\end{lemma} %\begin{proof} % Consider the fact that we may express the left-hand side of (\ref{(9.6.3)}) as: % \begin{align} % \left| \int^b_af dx - \lp \real_{\rect}\lp \etr^{n,h} \rp\rp \lp x \rp \right| = \left| \sum_{i=1}^n \lb \int^{x_i}_{x_{i-1}} f\lp x \rp dx-\frac{h}{2}\lp f\lp x_{i-1} \rp + f\lp x_i\rp\rp \rb \right| % \end{align} % We then denote by $L_i$ the error at sub-interval $\lb x_{i-1},x_i \rb$ as given by: % \begin{align} % L_i = \left| \int^{x_i}_{x_{i-1}}f\lp x \rp dx - \frac{h}{2}\lp f\lp x_{i-1}\rp -f\lp x_i \rp \rp \right| % \end{align} % Furthermore, we denote $c_i = \frac{x_{i-1}+x_i}{2}$ as the midpoint of the interval $\lb x_{i-1}, x_i\rb$, which yields the observation that: % \begin{align}\label{(9.6.6)} % c_i-x_{i-1} = x_i - c_i = \frac{b-a}{2N} % \end{align} % Integration by parts and (\ref{(9.6.6)}) then yields that: % \begin{align} % \int^{x_i}_{x_{i-1}}\lp t- c_i\rp f' \lp t \rp dt &= \int^{x_i}_{x_{i-1}} \lp t-c_i\rp df\lp t \rp \nonumber \\ % &= \lp x_i -c_i \rp f\lp x_i \rp - \lp x_{i-1} - c_i\rp f \lp x_{i-1}\rp - \int^{x_i}_{x_{i-1}}f \lp t \rp dt \nonumber \\ % &= \frac{b-a}{2N} \lp f\lp x_{i}\rp - f\lp x_{i-1}\rp\rp - \int^{x_i}_{x_{i-1}}f\lp t \rp dt = L_i % \end{align} % Whence we have: % \begin{align} % L_i = \int^{x_i}_{x_{i-1}}\lp t-c_i\rp f'\lp t\rp dt % \end{align} % Integration by parts, (\ref{(9.6.6)}), and the Fundamental Theorem of Calculus then gives us: % \begin{align} % L_i &= \int^{x_i}_{x_{i-1}} f' \lp t \rp d \frac{\lp t-c_i \rp^2}{2} \nonumber\\ % &= \frac{\lp x_i - c_i\rp^2}{2} f' \lp x_i \rp - \frac{\lp x_{i-1} - c_i\rp^2}{2} f' \lp x_{i-1} \rp - \frac{1}{2} \int^{x_i}_{x_{i-1}} \lp t-c_i \rp^2 f'' \lp t\rp \nonumber\\ % &= \frac{1}{2}\lb \frac{b-a}{2N}\rb^2 \lp f'\lp x_i \rp - f' \lp x_{i-1}\rp \rp - \frac{1}{2} \int^{x_i}_{x_{i-1}} \lp t-c_i\rp^2 f'' \lp t \rp dt \nonumber\\ % &= \frac{1}{2} \int^{x_i}_{x_{i-1}} f'' \lp t \rp dt - \frac{1}{2} \int^{x_i}_{x_{i-1}} \lp t-c_i\rp^2 f'' \lp t\rp dt \nonumber \\ % &= \frac{1}{2} \int^{x_i}_{x_{i-1}}\lp \lb \frac{b-a}{2N} \rb^2 - \lp t-c_i\rp^2 \rp f'' \lp t\rp dt % \end{align} % Assuming that $f''\lp x \rp \les M$ within $\lb a,b \rb$ we then have that: % \begin{align} % \left| \int^b_af dx - \lp \real_{\rect}\lp \etr^{n,h} \rp\rp \lp x \rp \right| &\les \sum_{i=1}^N \left| L_i\right| \nonumber\\ % &\les \frac{1}{2}\sum^N_{i=1} \int^{x_i}_{x_{i-1}} \left| \lp \lb \frac{b-a}{2N} \rb^2 - \lp t-c_i\rp^2 \rp \right| \left| f'' \lp t\rp dt\right| \nonumber \\ % &\les \frac{M}{2} \sum_{i=1}^N \int^{x_i}_{x_{i-1}} \lb \frac{b-a}{2N}\rb^2 - \lp t-c_i\rp^2 dt \nonumber \\ % &= \frac{M}{2} \lp \lb \frac{b-2}{2N}\rb^2\lp b-a \rp - \frac{2n}{3} \lb \frac{b-a}{2N}\rb^3 \rp \nonumber\\ % &= \frac{M \lp b-a \rp^3}{12N^2} % \end{align} % This completes the proof of the lemma. %\end{proof} \section{Maximum Convolution Approximations for Multi-Dimensional Functions} We will present here an approximation scheme for continuous functions called maximum convolution approximation. This derives mainly from Chapter 4 of \cite{bigbook}, and our contribution is mainly to show parameter bounds, and convergence in the case of $1$-D approximation. \subsection{The $\nrm^d_1$ Networks} \begin{definition}[The $\nrm_1^d$ neural network] We denote by $\lp \nrm_1^d \rp _{d\in \N} \subseteq \neu$ the family of neural networks that satisfy: \begin{enumerate}[label = (\roman*)] \item for $d=1$:\begin{align}\label{(9.7.1)} \nrm^1_1 = \lp \lp \begin{bmatrix} 1 \\ -1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \end{bmatrix}\rp, \lp \begin{bmatrix} 1 && 1 \end{bmatrix}, \begin{bmatrix} 0 \end{bmatrix}\rp \rp \in \lp \R^{2 \times 1} \times \R^2 \rp \times \lp \R^{1 \times 2} \times \R^1 \rp \end{align} \item for $d \in \{2,3,...\}$: \begin{align} \nrm_1^d = \sm_{d,1} \bullet \lb \boxminus_{i=1}^d \nrm_1^1 \rb \end{align} \end{enumerate} \end{definition} \begin{lemma}\label{9.7.2}\label{lem:nrm_prop} Let $d \in \N$. It is then the case that: \begin{enumerate}[label = (\roman*)] \item $\lay \lp \nrm^d_1 \rp = \lp d,2d,1 \rp$ \item $\lp \real_{\rect} \lp \nrm^d_1\rp \rp \lp x \rp \in C \lp \R^d,\R \rp$ \item that for all $x \in \R^d$ that $\lp \real_{\rect}\lp \nrm^d_1 \rp \rp \lp x \rp = \left\| x \right\|_1$ \item it holds $\hid\lp \nrm^d_1\rp=1$ \item it holds that $\param \lp \nrm_1^d \rp \les 7d^2$ \item it holds that $\dep\lp \nrm^d_1\rp =2 $ \end{enumerate} \end{lemma} \begin{proof} Note that by observation, it is the case that $\lay\lp \nrm^d_1 \rp = \lp 1,2,1\rp$. This and Remark \ref{5.3.2} tells us that for all $d \in \{2,3,...\}$ it is the case that $\lay \lp \boxminus_{i=1}^d \nrm^d_1 \rp = \lp d,2d,d\rp$. This, Lemma \ref{comp_prop}, and Lemma \ref{5.3.2} ensure that for all $d \in \{2,3,4,...\}$ it is the case that $\lay\lp \nrm^d_1 \rp = \lp d,2d,1 \rp$, which in turn establishes Item (i). Notice now that (\ref{(9.7.1)}) ensures that: \begin{align} \lp \real_{\rect} \lp \nrm^d_1 \rp \rp \lp x \rp = \rect \lp x \rp + \rect \lp -x \rp = \max \{x,0 \} + \max \{ -x,0\} = \left| x \right| = \| x \|_1 \end{align} This along with \cite[Proposition~2.19]{grohs2019spacetime} tells us that for all $d \in \{2,3,4,...\}$ and $x = \lp x_1,x_2,...,x_d\rp \in \R^d$ it is the case that: \begin{align} \lp \real_{\rect} \lb \boxminus^d_{i=1} \nrm^1_1\rb\rp \lp x \rp = \lp \left| x_1 \right|, \left| x_2\right|,..., \left| x_d \right| \rp \end{align} This together with Lemma \ref{depthofcomposition} tells us that: \begin{align} \lp \real_{\rect} \lp \nrm^d_1 \rp \rp &= \lp \real_{\rect} \lp \sm_{d,1} \bullet \lb \boxminus_{i=1}^d \nrm^d_1\rb\rp \rp \lp x \rp \nonumber\\ &= \lp \real_{\rect} \lp \sm_{d,1} \rp \rp \lp |x_1|,|x_2|,...,|x_d|\rp = \sum^d_{i=1} |x_i| =\|x\|_1 \end{align} Note next that by observation $\hid\lp \nrm^1_1 \rp = 1$. Remark \ref{5.3.2} then tells us that since the number of layers remains unchanged under stacking, it is then the case that $\hid \lp \nrm^1_1 \rp = \hid \lp \boxminus_{i=1}^d \nrm_1^1\rp = 1$. Note next that Lemma \ref{5.2.3} then tells us that $\hid \lp \sm_{d,1} \rp = 0$ whence Lemma \ref{comp_prop} tells us that: \begin{align} \hid \lp \nrm^d_1 \rp &= \hid \lp \sm_{d,1}\bullet \lb \boxminus_{i=1}^d \nrm^1_1 \rb \rp \nonumber \\ &= \hid \lp \sm_{d,1} \rp + \hid \lp \lb \boxminus_{i=1}^d \nrm^1_1 \rb \rp = 0+1=1 \end{align} Note next that: \begin{align} \nrm^1_1 = \lp \lp \begin{bmatrix} 1 \\ -1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \end{bmatrix}\rp, \lp \begin{bmatrix} 1 && 1 \end{bmatrix}, \begin{bmatrix} 0 \end{bmatrix}\rp \rp \in \lp \R^{2 \times 1} \times \R^2 \rp \times \lp \R^{1 \times 2} \times \R^1 \rp \end{align} and as such $\param\lp \nrm^1_1 \rp = 7$. This, combined with Cor \ref{cor:sameparal}, and the fact that we are stacking identical neural networks then tells us that: \begin{align} \param \lp \lb \boxminus_{i=1}^d \nrm_1^1 \rb \rp &\les 7d^2 \end{align} Then Lemma Corollary \ref{affcor}, Lemma \ref{lem:5.5.4}, and Lemma \ref{comp_prop} tells us that: \begin{align} \param \lp \nrm^d_1 \rp &= \param \lp \sm_{d,1} \bullet \lb \boxminus_{i=1}^d \nrm_1^1 \rb\rp \nonumber \\ &\les \param \lp \lb \boxminus_{i=1}^d \nrm_1^1 \rb \rp \les 7d^2 \end{align} This establishes Item (v). Finally, by observation $\dep \lp \nrm^1_1\rp = 2$, we are stacking the same neural network when we have $\nrm^d_1$. Stacking has no effect on depth from Definition \ref{def:stacking}, and by Lemma \ref{comp_prop}, $\dep \lp \sm_{d,1} \bullet \lb \boxminus^d_{i=1} \nrm_1^1\rb \rp = \dep \lp \boxminus \nrm^1_1\rp$. Thus we may conclude that $\dep \lp \nrm^d_1\rp = \dep \lp \nrm_1^1\rp =2$. This concludes the proof of the lemma. \end{proof} \subsection{The $\mxm^d$ Neural Networks} Given $x\in \R$, it is straightforward to find the maximum; $ x$ is the maximum. For $x \in \R^d$ we may find the maximum via network (\ref{9.7.6.1}), i.e. $\mxm^2$. The strategy is to find maxima for half our entries and half repeatedly until we have one maximum. For $x \in \R^d$ where $d$ is even we may stack $d$ copies of $\mxm^2$ to halve, and for $x \in \R^d$ where $d$ is odd and greater than $3$ we may introduce ``padding'' via the $\id_1$ network and thus require $\frac{d-1}{2}$ copies of $\mxm^2$ to halve. \begin{definition}[Maxima ANN representations] Let $\lp \mxm ^d\rp_{d \in \N} \subseteq \neu$ represent the neural networks that satisfy: \begin{enumerate}[label = (\roman*)] \item for all $d \in \N$ that $\inn \lp \mxm^d \rp = d$ \item for all $d \in \N$ that $\out\lp \mxm^d \rp = 1$ \item that $\mxm^1 = \aff_{1,0} \in \R^{1 \times 1} \times \R^1$ \item that: \begin{align}\label{9.7.6} \mxm^2 = \lp \lp \begin{bmatrix} 1 & -1 \\ 0 & 1 \\ 0 & -1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \\0 \end{bmatrix}\rp, \lp \begin{bmatrix} 1&1&-1 \end{bmatrix}, \begin{bmatrix} 0 \end{bmatrix}\rp\rp \end{align} \item it holds for all $d \in \{2,3,...\}$ that $\mxm^{2d} = \mxm^d \bullet \lb \boxminus_{i=1}^d \mxm^2\rb$, and \item it holds for all $d \in \{ 2,3,...\}$ that $\mxm^{2d-1} = \mxm^d \bullet \lb \lp \boxminus^d_{i=1} \mxm^2 \rp \boxminus \id_1\rb$. \end{enumerate} \end{definition} \begin{remark} Diagrammatically, this can be represented as: \begin{figure}[h] \begin{center} \tikzset{every picture/.style={line width=0.75pt}} %set default line width to 0.75pt \begin{tikzpicture}[x=0.75pt,y=0.75pt,yscale=-1,xscale=1] %uncomment if require: \path (0,498); %set diagram left start at 0, and has height of 498 %Shape: Rectangle [id:dp977616844446347] \draw (560,138) -- (630,138) -- (630,178) -- (560,178) -- cycle ; %Shape: Rectangle [id:dp8371611327934396] \draw (560,206) -- (630,206) -- (630,246) -- (560,246) -- cycle ; %Shape: Rectangle [id:dp900733317366978] \draw (562,274) -- (632,274) -- (632,314) -- (562,314) -- cycle ; %Shape: Rectangle [id:dp2381571768613] \draw (565,350) -- (635,350) -- (635,390) -- (565,390) -- cycle ; %Shape: Rectangle [id:dp47165779567431265] \draw (568,425) -- (638,425) -- (638,465) -- (568,465) -- cycle ; %Shape: Rectangle [id:dp26730884303141045] \draw (438,175) -- (508,175) -- (508,215) -- (438,215) -- cycle ; %Shape: Rectangle [id:dp43479154744962956] \draw (439,310) -- (509,310) -- (509,350) -- (439,350) -- cycle ; %Shape: Rectangle [id:dp14664308815255211] \draw (302,234) -- (372,234) -- (372,274) -- (302,274) -- cycle ; %Straight Lines [id:da5196233580766983] \draw (437,196.5) -- (374.51,251.18) ; \draw [shift={(373,252.5)}, rotate = 318.81] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da6126209944613533] \draw (559,155.5) -- (509.66,188.88) ; \draw [shift={(508,190)}, rotate = 325.92] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da5768174542895418] \draw (558,224.5) -- (512.81,203.35) ; \draw [shift={(511,202.5)}, rotate = 25.08] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da12927244412488015] \draw (560,290.5) -- (510.66,323.88) ; \draw [shift={(509,325)}, rotate = 325.92] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da9640125892832212] \draw (566,372.5) -- (511.72,340.03) ; \draw [shift={(510,339)}, rotate = 30.89] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da6747547273640673] \draw (437,331.5) -- (373.38,264.95) ; \draw [shift={(372,263.5)}, rotate = 46.29] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Shape: Rectangle [id:dp14455818861310843] \draw (445,425) -- (515,425) -- (515,465) -- (445,465) -- cycle ; %Shape: Rectangle [id:dp03375582603009031] \draw (301,367) -- (371,367) -- (371,407) -- (301,407) -- cycle ; %Shape: Rectangle [id:dp0789527597033911] \draw (163,296) -- (233,296) -- (233,336) -- (163,336) -- cycle ; %Straight Lines [id:da6246849218035846] \draw (302,252.5) -- (236.47,313.14) ; \draw [shift={(235,314.5)}, rotate = 317.22] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da5611532984284957] \draw (299,390.5) -- (235.38,323.95) ; \draw [shift={(234,322.5)}, rotate = 46.29] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da6903134547643467] \draw (162,315.5) -- (108,315.5) ; \draw [shift={(106,315.5)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da647770723003481] \draw (567,447.5) -- (518,447.5) ; \draw [shift={(516,447.5)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da1985911653622896] \draw (443,448.5) -- (373.61,397.68) ; \draw [shift={(372,396.5)}, rotate = 36.22] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da06349763555732901] \draw (437,342) -- (373.67,383.41) ; \draw [shift={(372,384.5)}, rotate = 326.82] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; % Text Node \draw (574,150.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{2}$}; % Text Node \draw (574,214.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{2}$}; % Text Node \draw (576,283.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{2}$}; % Text Node \draw (579,358.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{2}$}; % Text Node \draw (585,428.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{2}$}; % Text Node \draw (453,185.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{2}$}; % Text Node \draw (456,322.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{2}$}; % Text Node \draw (316,242.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{2}$}; % Text Node \draw (470,434.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Id}_{1}$}; % Text Node \draw (317,377.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{2}$}; % Text Node \draw (177,305.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{2}$}; \end{tikzpicture} \end{center} \caption{Neural network diagram for $\mxm^5$.} \end{figure} \end{remark} \begin{lemma}\label{9.7.4}\label{lem:mxm_prop} Let $d \in \N$, it is then the case that: \begin{enumerate}[label = (\roman*)] \item $\hid \lp \mxm^d \rp = \lceil \log_2 \lp d \rp \rceil $ \item for all $i \in \N$ that $\wid_i \lp \mxm^d \rp \les 3 \left\lceil \frac{d}{2^i} \right\rceil$ \item $\real_{\rect} \lp \mxm^d\rp \in C \lp \R^d, \R \rp$ and \item for all $x = \lp x_1,x_2,...,x_d \rp \in \R^d$ we have that $\lp \real_{\rect} \lp \mxm^d \rp \rp \lp x \rp = \max \{x_1,x_2,...,x_d \}$. \item $\param \lp \mxm^d \rp \les \lp \frac{4}{3}d^2+3d\rp \lp 1+\frac{1}{2}^{\left\lceil \log_2\lp d\rp\right\rceil+1}\rp$ \item $\dep \lp \mxm^d\rp = \left\lceil \log_2 \lp d\rp \right\rceil + 1$ \end{enumerate} \end{lemma} \begin{proof} Assume w.l.o.g. that $d > 1$. Note that (\ref{9.7.6}) ensures that $\hid \lp \mxm^d \rp = 1$. This and (\ref{5.2.5}) then tell us that for all $d \in \{2,3,4,...\}$ it is the case that: \begin{align} \hid \lp \boxminus_{i=1}^d \mxm^2\rp = \hid \lp \lb \boxminus_{i=1}^d \mxm^2 \rb \boxminus \id_1 \rp = \hid \lp \mxm^2 \rp = 1 \nonumber \end{align} This and Lemma \ref{comp_prop} tells us that for all $d \in \{3,4,5,...\}$ it holds that: \begin{align}\label{9.7.7} \hid \lp \mxm^d \rp = \hid \lp \mxm^{\left\lceil \frac{d}{2} \right\rceil}\rp + 1 \end{align} And for $d \in \{4,6,8,...\}$ with $\hid \lp \mxm^{\left\lceil \frac{d}{2} \right\rceil} \rp = \left\lceil \log_2 \lp \frac{d}{2} \rp\right\rceil$ it holds that: \begin{align}\label{9.7.8} \hid \lp \mxm^d \rp = \left\lceil \log_2 \lp \frac{d}{2} \rp\right\rceil + 1 = \left\lceil \log_2 \lp d \rp -1 \right\rceil +1 = \left\lceil \log_2 \lp d \rp \right\rceil \end{align} Moreover (\ref{9.7.7}) and the fact that for all $d \in \{3,5,7,...\}$ it holds that $\left\lceil \log_2 \lp d+1 \rp \right\rceil = \left\lceil \log_2 \lp d \rp \right\rceil$ ensures that for all $d \in \{3,5,7,...\}$ with $\hid \lp \mxm^{\left\lceil \frac{d}{2}\right\rceil}\rp = \left\lceil \log_2 \lp \left\lceil \frac{d}{2} \right\rceil\rp \right\rceil$ it holds that: \begin{align} \hid \lp \mxm^d\rp &= \left\lceil \log_2 \lp \left\lceil \frac{d}{2} \right\rceil\rp \right\rceil + 1 = \left\lceil \log_2 \lp \left\lceil \frac{d+1}{2} \right\rceil\rp \right\rceil + 1 \nonumber\\ &= \left\lceil \log_2 \lp d+1\rp-1 \right\rceil + 1 = \left\lceil \log_2 \lp d+1 \rp \right\rceil = \left\lceil \log_2 \lp d \rp \right\rceil \end{align} This and (\ref{9.7.8}) demonstrate that for all $d \in \{3,4,5,...\}$ with $\forall k \in \{2,3,...,d-1\}: \hid \lp \mxm^d\rp = \left\lceil \log_2 \lp k \rp \right\rceil$ it holds htat $\hid \lp \mxm^d \rp = \left\lceil \log_2 \lp d \rp \right\rceil$. The fact that $\hid \lp \mxm^2 \rp =1$ and induction establish Item (i). We next note that $\lay \lp \mxm^2 \rp = \lp 2,3,1 \rp$. This then indicates that for all $i\in \N$ that: \begin{align}\label{9.7.10} \wid_i \lp \mxm^2 \rp \les 3 = 3 \left\lceil \frac{2}{2^i} \right\rceil. \end{align} Note then that Lemma \ref{comp_prop} and Remark \ref{5.3.2} tells us that: \begin{align}\label{9.7.11} \wid_i \lp \mxm^{2d} \rp = \begin{cases} 3d &:i=1 \\ \wid_{i-1}\lp \mxm^d \rp &:i\ges 2 \end{cases} \end{align} And: \begin{align}\label{9.7.12} \wid_i \lp \mxm^{2d-1}\rp = \begin{cases} 3d-1 &:i=1 \\ \wid_{i-1}\lp \mxm^d \rp &:i \ges 2 \end{cases} \end{align} This in turn assures us that for all $d \in \{ 2,4,6,...,\}$ it holds that: \begin{align}\label{9.7.13} \wid_1 \lp \mxm^d \rp = 3\lp \frac{d}{2} \rp \les 3 \left\lceil \frac{d}{2} \right\rceil \end{align} Moreover, note that (\ref{9.7.12}) tells us that for all $d \in \{3,5,7,...\}$ it holds that: \begin{align} \wid_1 \lp \mxm^d \rp = 3\left\lceil \frac{d}{2}\right\rceil -1 \les 3 \left\lceil \frac{d}{2} \right\rceil \end{align} This and (\ref{9.7.13}) shows that for all $d \in \{2,3,...\}$ it holds that: \begin{align}\label{9.7.15} \wid_1 \lp \mxm^d\rp \les 3 \left\lceil \frac{d}{2}\right\rceil \end{align} Additionally note that (\ref{9.7.11}) demonstrates that for all $d \in \{ 4,6,8,...\}$, $i \in \{2,3,...\}$ with $\wid_{i-1} \lp \mxm^{\frac{d}{2}} \rp \les 3 \left\lceil \lp \frac{d}{2}\rp \frac{1}{2^{i-1}}\right\rceil$ it holds that: \begin{align}\label{9.7.16} \wid_i \lp \mxm^d \rp = \wid_{i-1}\lp \mxm^{\frac{d}{2}}\rp \les 3 \left\lceil \lp \frac{d}{2}\rp \frac{1}{2^{i-1}} \right\rceil = 3 \left\lceil \frac{d}{2^i} \right\rceil \end{align} Furthermore note also the fact that for all $d \in \{3,5,7,...\}$, $i \in \N$ it holds that $\left\lceil \frac{d+1}{2^i} \right\rceil = \left\lceil \frac{d}{2^i}\right\rceil$ and (\ref{9.7.12}) assure that for all $d \in \{3,5,7,...\}$, $i\in \{2,3,...\}$ with $\wid_{i-1} \lp \mxm^{\left\lceil \frac{d}{2}\right\rceil}\rp \les 3 \left\lceil \left\lceil \frac{d}{2}\right\rceil \frac{1}{2^{i-1}}\right\rceil$ it holds that: \begin{align} \wid_i \lp \mxm^d \rp = \wid_{i-1} \lp \mxm^{\left\lceil \frac{d}{2}\right\rceil}\rp \les 3 \left\lceil \left\lceil \frac{d}{2} \right\rceil \frac{1}{2^{i-1}} \right\rceil = 3 \left\lceil \frac{d+1}{2^i}\right\rceil = 3 \left\lceil \frac{d}{2^i} \right\rceil \end{align} This and (\ref{9.7.16}) tells us that for all $d \in \{3,4,...\}$, $i \in \{2,3,...\}$ with $\forall k \in \{2,3,...,d-1\}$, $j \in \{1,2,...,i-1\}: \wid_j \lp \mxm^k \rp \les 3 \left\lceil \frac{k}{2^j} \right\rceil$ it holds that: \begin{align} \wid_i \lp \mxm^d \rp \les 3 \left\lceil \frac{d}{2^i}\right\rceil \end{align} This, combined with (\ref{9.7.10}), (\ref{9.7.15}), with induction establishes Item (ii). Next observe that (\ref{9.7.6}) tells that for $x = \begin{bmatrix} x_1 \\ x_2 \end{bmatrix} \in \R^2$ it becomes the case that: \begin{align} \lp\real_{\rect} \lp \mxm^2 \rp \rp \lp x \rp &= \max \{x_1-x_2,0\} + \max\{x_2,0 \} - \max\{ -x_2,0\} \nonumber \\ &= \max \{x_1-x_2,0\} + x_2 = \max\{x_1,x_2\} \end{align} Note next that Lemma \ref{idprop}, Lemma \ref{comp_prop}, and \cite[Proposition~2.19]{grohs2019spacetime} then imply for all $d \in \{2,3,4,...\}$, $x = \{x_1,x_2,...,x_d\} \in \R^d$ it holds that $\lp \real_{\rect} \lp \mxm^d \rp \rp \lp x \rp \in C \lp \R^d,\R \rp$. and $\lp \real_{\rect} \lp \mxm^d \rp \rp \lp x \rp = \max\{ x_1,x_2,...,x_d \}$. This establishes Items (iii)\textemdash(iv). Consider now the fact that Item (ii) implies that the layer architecture forms a geometric series whence we have that the number of bias parameters is bounded by: \begin{align} \frac{\frac{3d}{2} \lp 1 - \lp \frac{1}{2} \rp^{\left\lceil \log_2 \lp d\rp\right\rceil +1} \rp }{\frac{1}{2}} &= 3d \lp 1 - \frac{1}{2}^{\left\lceil \log_2 \lp d \rp \right\rceil +1}\rp \nonumber \\ &\les \left\lceil 3d \lp 1 - \frac{1}{2}^{\left\lceil \log_2 \lp d \rp \right\rceil +1}\rp \right\rceil \end{align} For the weight parameters, consider the fact that our widths follow a geometric series with ratio $\frac{1}{2}$, and considering that we have an upper bound for the number of hidden layers, and the fact that $\wid_0 \lp \mxm^d\rp = d$, would then tell us that the number of weight parameters is bounded by: \begin{align} &\sum^{\left\lceil \log_2\lp d\rp\right \rceil}_{i=0} \lb \lp \frac{1}{2}\rp ^i \cdot \wid_0\lp \mxm^d\rp \cdot \lp \frac{1}{2}\rp^{i+1}\cdot \wid_0 \lp \mxm^d\rp\rb \nonumber \\ &= \sum^{\left\lceil \log_2\lp d\rp\right\rceil}_{i=0} \lb \lp \frac{1}{2}\rp^{2i+1}\lp \wid_0 \lp \mxm^d\rp\rp^2\rb \nonumber \\ &= \frac{1}{2} \sum^{\left\lceil \log_2 \lp d\rp \right\rceil}_{i=0} \lb \lp \lp \frac{1}{2}\rp^{i} \wid_0\lp \mxm^d\rp\rp^2\rb = \frac{1}{2} \sum^{\left\lceil \log_2\lp d\rp\right\rceil}_{i=0} \lb \lp \frac{1}{4}\rp^id^2\rb \end{align} Notice that this is a geometric series with ratio $\frac{1}{4}$, which would then reveal that: \begin{align} \frac{1}{2} \sum^{\left\lceil \log_2\lp d\rp\right\rceil}_{i=0} \lb \lp \frac{1}{4}\rp^id^2\rb \les \frac{2}{3} d^2\lp 1- \frac{1}{2}^{2\lp \left\lceil \log_2(d)\right\rceil + 1\rp}\rp \end{align} Thus, we get that: \begin{align} \param \lp \mxm^d\rp &\les \frac{2}{3} d^2\lp 1- \frac{1}{2}^{2\lp \left\lceil \log_2(d)\right\rceil \rp + 1}\rp + \left\lceil 3d \lp 1 - \frac{1}{2}^{\left\lceil \log_2 \lp d \rp \right\rceil +1}\rp \right\rceil \nonumber\\ &\les \frac{2}{3} d^2\lp 1- \frac{1}{2}^{2\lp \left\lceil \log_2(d)\right\rceil \rp + 1}\rp + \left\lceil 3d \lp 1 - \frac{1}{2}^{2\lp\left\lceil \log_2 \lp d \rp \right\rceil +1\rp}\rp\right\rceil\\ &\les \left\lceil \lp \frac{2}{3}d^2+3d\rp \lp 1+\frac{1}{2}^{2\lp \left\lceil \log_2\lp d\rp\right\rceil+1 \rp}\rp + 1 \right\rceil \end{align} This proves Item (v). Item (vi) is a straightforward consequence of Item (i). This completes the proof of the lemma. \end{proof} \subsection{The $\mathsf{MC}$ Neural Network and Approximations via Maximum Convolutions } Let $f: [a,b] \rightarrow \R$ be a continuous bounded function with Lipschitz constant $L$. Let $x_0 \les x_1 \les \cdots \les x_N$ be a set of sample points within $[a,b]$, with it being possibly the case that that for all $i \in \{0,1,\hdots, N\}$, $x_i \sim \unif([a,b])$. For all $i \in \{0,1,\hdots, N\}$, define a series of functions $f_0,f_1,\hdots f_N: [a,b] \rightarrow \R$, as such: \begin{align} f_i = f(x_i) - L \cdot \left| x-x_i\right| \end{align} We will call the approximant $\max_{i \in \{0,1,\hdots, N\}}\{ f_i\}$, the \textit{maximum convolution approximation}. This converges to $f$, as shown in \begin{lemma}\label{(9.7.5)}\label{lem:mc_prop} Let $d,N\in \N$, $L\in \lb 0,\infty \rp$, $x_1,x_2,\hdots, x_N \in \R^d$, $y = \lp y_1,y_2,\hdots,y_N \rp \in \R^N$ and $\mathsf{MC} \in \neu$ satisfy that: \begin{align}\label{9.7.20} \mathsf{MC}^{N,d}_{x,y} = \mxm^N \bullet \aff_{-L\mathbb{I}_N,y} \bullet \lp \boxminus_{i=1}^N \lb \nrm^d_1 \bullet \aff_{\mathbb{I}_d,-x_i} \rb \rp \bullet \cpy_{N,d} \end{align} It is then the case that: \begin{enumerate}[label = (\roman*)] \item $\inn \lp \mathsf{MC}^{N,d}_{x,y} \rp = d$ \item $\out\lp \mathsf{MC}^{N,d}_{x,y} \rp = 1$ \item $\hid \lp \mathsf{MC}^{N,d}_{x,y} \rp = \left\lceil \log_2 \lp N \rp \right\rceil +1$ \item $\wid_1 \lp \mathsf{MC}^{N,d}_{x,y} \rp = 2dN$ \item for all $i \in \{ 2,3,...\}$ we have $\wid_i \lp \mathsf{MC}^{N,d}_{x,y} \rp \les 3 \left\lceil \frac{N}{2^{i-1}} \right\rceil$ \item it holds for all $x \in \R^d$ that $\lp \real_{\rect} \lp \mathsf{MC}^{N,d}_{x,y} \rp \rp \lp x \rp = \max_{i \in \{1,2,...,N\}} \lp y_i - L \left\| x-x_i \right\|_1\rp$ \item it holds that $\param \lp \mathsf{MC}^{N,d}_{x,y} \rp \les \left\lceil \lp \frac{2}{3}d^2+3d\rp \lp 1+\frac{1}{2}^{2\lp \left\lceil \log_2\lp d\rp\right\rceil+1 \rp}\rp + 1 \right\rceil + 7N^2d^2 + 3\left\lceil \frac{N}{2}\right\rceil \cdot 2dN$ \end{enumerate} \end{lemma} \begin{proof} Throughout this proof let $\mathsf{S}_i \in \neu$ satisfy for all $i \in \{1,2,...,N\}$ that $\mathsf{S}_i = \nrm_1^d \bullet \aff_{\mathbb{I}_d,-x_i}$ and let $\mathsf{X} \in \neu$ satisfy: \begin{align} \mathsf{X} = \aff_{-L\mathbb{I}_N,y} \bullet \lp \lb \boxminus_{i=1}^N \mathsf{S}_i \rb \rp \bullet \cpy_{N,d} \end{align} Note that (\ref{9.7.20}) and Lemma \ref{comp_prop} tells us that $\out \lp \R \rp = \out \lp \mxm^N \rp = 1$ and $\inn \lp \mathsf{MC}^{N,d}_{x,y} \rp = \inn \lp \cpy_{N,d} \rp =d $. This proves Items (i)--(ii). Next observe that since it is the case that $\hid \lp \cpy_{N,d} \rp$ and $\hid \lp \nrm^d_1 \rp = 1$, Lemma \ref{comp_prop} then tells us that: \begin{align} \hid \lp \mathsf{X} \rp = \hid \lp\aff_{-L\mathbb{I}_N,y} \rp + \hid \lp \boxminus_{i=1}^N \mathsf{S}_i\rp + \hid \lp \cpy_{N,d} \rp = 1 \end{align} Thus Lemma \ref{comp_prop} and Lemma \ref{9.7.4} then tell us that: \begin{align} \hid \lp \mathsf{MC} \rp = \hid \lp \mxm^N \bullet \mathsf{X}\rp = \hid \lp \mxm^N \rp + \hid \lp \mathsf{X}\rp = \left\lceil \log_2 \lp N \rp \right\rceil +1 \end{align} Which in turn establishes Item (iii). Note next that Lemma \ref{comp_prop} and \cite[Proposition~2.20]{grohs2019spacetime} tells us that: \begin{align}\label{8.3.33} \wid_1 \lp \mathsf{MC}^{N,d}_{x,y} \rp = \wid_1 \lp \mathsf{X} \rp = \wid_1 \lp \boxminus^N_{i=1} \mathsf{S}_i\rp = \sum^N_{i=1} \wid_1 \lp \mathsf{S}_i \rp = \sum^N_{i=1} \wid_1 \lp \nrm^d_1 \rp = 2dN \end{align} This establishes Item (iv). Next observe that the fact that $\hid \lp \mathsf{X} \rp=1$, Lemma \ref{comp_prop} and Lemma \ref{9.7.4} tells us that for all $i \in \{2,3,...\}$ it is the case that: \begin{align} \wid_i \lp \mathsf{MC}^{N,d}_{x,y} \rp = \wid_{i-1} \lp \mxm^N \rp \les 3 \left\lceil \frac{N}{2^{i-1}} \right\rceil \end{align} This establishes Item (v). Next observe that Lemma \ref{9.7.2} and Lemma \ref{5.3.3} tells us that for all $x \in \R^d$, $i \in \{1,2,...,N\}$ it holds that: \begin{align} \lp \real_{\rect} \lp \mathsf{MC}^{N,d}_{x,y} \rp \rp \lp x \rp - \lp \real_{\rect}\lp \nrm^d_1 \rp \circ \real_{\rect}\lp \aff_{\mathbb{I}_d,-x_i} \rp \rp \lp x \rp = \left\| x-x_i \right\|_1 \end{align} This an \cite[Proposition~2.20]{grohs2019spacetime} combined establishes that for all $x \in \R^d$ it holds that: \begin{align} \lp \real_{\rect} \lp \lb \boxminus_{i=1}^N \mathsf{S}_i \rb \bullet \cpy_{N,d} \rp \rp \lp x \rp = \lp \| x-x_1 \|_1, \|x-x_2\|_1,...,\|x-x_N\|_1\rp \nonumber \\ \end{align} This and Lemma \ref{5.3.3} establishes that for all $x \in \R^d$ it holds that: \begin{align} \lp \real_{\rect}\lp \mathsf{X}\rp \rp \lp x \rp &= \lp \real_{\rect}\lp \aff_{-L\mathbb{I}_N,y}\rp\rp \circ \lp\real_{\rect} \lp \lb \boxminus_{i=1}^N \mathsf{S}_i\rb \bullet \cpy_{N,d}\rp \rp \lp x \rp \nonumber\\ &= \lp y_1-L \|x-x_1 \|, y_2-L\|x-x_2\|,...,y_N-L \| x-x_N \|_1\rp \end{align} Then Lemma \ref{comp_prop} and Lemma \ref{9.7.4} tells us that for all $x\in \R^d$ it holds that: \begin{align} \lp \real_{\rect} \lp \mathsf{MC}^{N,d}_{x,y} \rp \rp \lp x \rp &= \lp \real_{\rect}\lp \mxm^N \rp \circ \lp \real_{\rect}\lp \mathsf{X} \rp \rp \rp \lp x \rp \nonumber \\ &= \lp \real_{\rect}\lp \mxm^N \rp \rp \lp y_1-L \|x-x_1\|_1,y_2-L\|x-x_2\|_1,...,y_N-L\|x-x_N\|_1\rp \nonumber\\ &=\max_{i\in \{1,2,...,N\} } \lp y_i - L \|x-x_i\|_1\rp \end{align} This establishes Item (vi). For Item (vii) note that Lemma \ref{lem:nrm_prop}, Remark \ref{rem:stk_remark}, Lemma \ref{lem:nrm_prop}, and Corollary \ref{affcor} tells us that for all $d\in \N$ and $x \in \R^d$ it is the case that: \begin{align} \param \lp \nrm^d_1\bullet \aff_{\mathbb{I}_d, -x}\rp \les \param \lp \nrm_1^d\rp \les 7d^2 \end{align} This, along with Corollary \ref{cor:sameparal}, and because we are stacking identical neural networks, then tells us that for all $N \in \N$, it is the case that: \begin{align} \param \lp \boxminus_{i=1}^N \lb \nrm^d_1\bullet \aff_{\mathbb{I}_d, -x} \rb\rp \les 7N^2d^2 \end{align} Observe next that Corollary \ref{affcor} tells us that for all $d,N \in \N$ and $x \in \R^d$ it is the case that: \begin{align}\label{8.3.38} \param \lp \lp \boxminus^N_{i=1} \lb \nrm^d_1 \bullet \aff_{\mathbb{I}_d,-x}\rb\rp \bullet \cpy_{N,d}\rp \les \param \lp \boxminus_{i=1}^N \lb \nrm^d_1\bullet \aff_{\mathbb{I}_d, -x} \rb\rp \les 7N^2d^2 \end{align} Now, let $d,N \in \N$, $L \in [0,\infty)$, let $x_1,x_2,\hdots, x_N \in \R^d$ and let $y = \{y_1,y_2,\hdots, y_N \} \in \R^N$. Observe that again, Corollary \ref{affcor}, and (\ref{8.3.38}) tells us that: \begin{align} \param\lp \aff_{-L\mathbb{I}_N,y} \bullet \lp \boxminus_{i=1}^N \lb \nrm^d_1 \bullet \aff_{\mathbb{I}_d,-x_i} \rb \rp \bullet \cpy_{N,d}\rp \nonumber\\ \les \param \lp \boxminus_{i=1}^N \lb \nrm^d_1\bullet \aff_{\mathbb{I}_d, -x} \rb\rp \les 7N^2d^2 \nonumber \end{align} Finally Lemma \ref{comp_prop}, (\ref{8.3.33}), and Lemma \ref{lem:mxm_prop} yields that: \begin{align} \param(\mathsf{MC}^{N,d}_{x,y}) &= \param \lp \mxm^N \bullet \aff_{-L\mathbb{I}_N,y} \bullet \lp \boxminus_{i=1}^N \lb \nrm^d_1 \bullet \aff_{\mathbb{I}_d,-x_i} \rb \rp \bullet \cpy_{N,d} \rp \nonumber\\ &\les \param \lp \mxm^N \bullet \lp \boxminus_{i=1}^N \lb \nrm^d_1\bullet \aff_{\mathbb{I}_d, -x} \rb \rp \rp \nonumber\\ &\les \param \lp \mxm^N \rp + \param \lp \lp \boxminus_{i=1}^N \lb \nrm^d_1\bullet \aff_{\mathbb{I}_d, -x} \rb\rp \rp + \nonumber\\ &\wid_1\lp \mxm^N\rp \cdot \wid_{\hid \lp \boxminus_{i=1}^N \lb \nrm^d_1\bullet \aff_{\mathbb{I}_d, -x} \rb\rp} \lp \boxminus_{i=1}^N \lb \nrm^d_1\bullet \aff_{\mathbb{I}_d, -x} \rb\rp \nonumber \\ &\les \left\lceil \lp \frac{2}{3}d^2+3d\rp \lp 1+\frac{1}{2}^{2\lp \left\lceil \log_2\lp d\rp\right\rceil+1 \rp}\rp + 1 \right\rceil + 7N^2d^2 + 3\left\lceil \frac{N}{2}\right\rceil \cdot 2dN \end{align} \end{proof} \begin{remark} We may represent the neural network diagram for $\mxm^d$ as: \end{remark} \begin{figure}[h] \begin{center} \tikzset{every picture/.style={line width=0.75pt}} %set default line width to 0.75pt \begin{tikzpicture}[x=0.75pt,y=0.75pt,yscale=-0.9,xscale=0.9] %uncomment if require: \path (0,560); %set diagram left start at 0, and has height of 560 %Shape: Rectangle [id:dp1438938274656144] \draw (574,235) -- (644,235) -- (644,275) -- (574,275) -- cycle ; %Straight Lines [id:da7383135897500558] \draw (574,241) -- (513.72,84.37) ; \draw [shift={(513,82.5)}, rotate = 68.95] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da09141712653411305] \draw (572,251) -- (514.14,168.14) ; \draw [shift={(513,166.5)}, rotate = 55.08] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da19953508691566213] \draw (573,259) -- (515.07,350.81) ; \draw [shift={(514,352.5)}, rotate = 302.25] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da5900315817761441] \draw (575,268) -- (515.66,436.61) ; \draw [shift={(515,438.5)}, rotate = 289.39] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Shape: Rectangle [id:dp9847868081693099] \draw (421,59) -- (512,59) -- (512,99) -- (421,99) -- cycle ; %Shape: Rectangle [id:dp2868551357079474] \draw (419,330) -- (510,330) -- (510,370) -- (419,370) -- cycle ; %Shape: Rectangle [id:dp9383613429980815] \draw (420,150) -- (511,150) -- (511,190) -- (420,190) -- cycle ; %Shape: Rectangle [id:dp5827241951133133] \draw (420,420) -- (511,420) -- (511,460) -- (420,460) -- cycle ; %Shape: Rectangle [id:dp7299058955170046] \draw (290,60) -- (381,60) -- (381,100) -- (290,100) -- cycle ; %Shape: Rectangle [id:dp08440877870624452] \draw (290,150) -- (381,150) -- (381,190) -- (290,190) -- cycle ; %Shape: Rectangle [id:dp7098854649776141] \draw (290,330) -- (381,330) -- (381,370) -- (290,370) -- cycle ; %Shape: Rectangle [id:dp6165394921489369] \draw (290,420) -- (381,420) -- (381,460) -- (290,460) -- cycle ; %Straight Lines [id:da37215648665173995] \draw (420,80) -- (401,80) -- (382,80) ; \draw [shift={(380,80)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da2316129338082229] \draw (420,170) -- (401,170) -- (382,170) ; \draw [shift={(380,170)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da40267704179559083] \draw (419,350) -- (400,350) -- (381,350) ; \draw [shift={(379,350)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da5321116904741454] \draw (420,440) -- (401,440) -- (382,440) ; \draw [shift={(380,440)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Shape: Rectangle [id:dp9652335934440622] \draw (170,60) -- (250,60) -- (250,460) -- (170,460) -- cycle ; %Straight Lines [id:da2568661285688787] \draw (170,240) -- (132,240) ; \draw [shift={(130,240)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da9075024165320872] \draw (290,80) -- (252,80) ; \draw [shift={(250,80)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da36733568107592385] \draw (290,170) -- (252,170) ; \draw [shift={(250,170)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da027221622247677213] \draw (290,350) -- (252,350) ; \draw [shift={(250,350)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da456971589533403] \draw (290,440) -- (252,440) ; \draw [shift={(250,440)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Shape: Rectangle [id:dp5834320101871477] \draw (60,220) -- (130,220) -- (130,260) -- (60,260) -- cycle ; %Straight Lines [id:da39697402951042593] \draw (60,240) -- (22,240) ; \draw [shift={(20,240)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da09195032177210305] \draw (690,250) -- (652,250) ; \draw [shift={(650,250)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; % Text Node \draw (583,245.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Cpy}_{N}{}_{,d}$}; % Text Node \draw (441,66.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Aff}_{\mathbb{I}}{}_{_{d}}{}_{-x}{}_{_{i}}$}; % Text Node \draw (442,158.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Aff}_{\mathbb{I}}{}_{_{d}}{}_{-x}{}_{_{i}}$}; % Text Node \draw (442,338.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Aff}_{\mathbb{I}}{}_{_{d}}{}_{-x}{}_{_{i}}$}; % Text Node \draw (442,428.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Aff}_{\mathbb{I}}{}_{_{d}}{}_{-x}{}_{_{i}}$}; % Text Node \draw (318,72.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Nrm}_{1}^{d}$}; % Text Node \draw (318,159.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Nrm}_{1}^{d}$}; % Text Node \draw (318,339.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Nrm}_{1}^{d}$}; % Text Node \draw (321,427.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Nrm}_{1}^{d}$}; % Text Node \draw (322,237.4) node [anchor=north west][inner sep=0.75pt] [font=\LARGE] {$\vdots $}; % Text Node \draw (462,232.4) node [anchor=north west][inner sep=0.75pt] [font=\LARGE] {$\vdots $}; % Text Node \draw (181,238.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Aff}_{-L}{}_{\mathbb{I}}{}_{_{N} ,y}$}; % Text Node \draw (71,231.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{N}$}; \end{tikzpicture} \end{center} \caption{Neural network diagramfor the $\mxm$ network} \end{figure} \subsection{Lipschitz Function Approximations}\label{(9.7.6)} \begin{lemma}%TODO: Should we stipulate compact sets? Let $\lp E,d \rp$ be a metric space. Let $L \in \lb 0,\infty \rp$, $D \subseteq E$, $\varnothing \neq C \subseteq D$. Let $f:D \rightarrow \R$ satisfy for all $x\in D$, $y \in C$ that $\left| f(x) -f(y)\right| \les L d \lp x,y \rp$, and let $F:E \rightarrow \R \cup \{\infty\}$ satisfy for all $x\in E$ that: \begin{align}\label{9.7.30} F\lp x \rp = \sup_{y\in C} \lb f\lp y \rp - Ld\lp x,y \rp \rb \end{align} It is then the case that: \begin{enumerate}[label = (\roman*)] \item for all $x \in C$ that $F(x) = f(x)$ \item it holds for all $x \in D$, that $F(x) \les f(x)$ \item it holds for all $x\in E$ that $F\lp x \rp < \infty$ \item it holds for all $x,y \in E$ that $\left| F(x)-F(y)\right| \les Ld\lp x,y \rp$ and, \item it holds for all $x \in D$ that: \begin{align}\label{9.7.31} \left| F\lp x \rp - f \lp x \rp \right| \les 2L \lb \inf_{y\in C} d \lp x,y \rp\rb \end{align} \end{enumerate} \end{lemma} \begin{proof} The assumption that $\forall x \in D, y \in C: \left| f(x) - f(y)\right| \les Ld\lp x,y \rp$ ensures that: \begin{align}\label{9.7.32} f(y) - Ld\lp x,y\rp \les f\lp x \rp \les f(y) + Ld\lp x,y \rp \end{align} For $x\in D$, it then renders as: \begin{align}\label{9.7.33} f(x) \ges \sup_{y \in C} \lb f(y) - Ld\lp x,y \rp \rb \end{align} This establishes Item (i). Note that (\ref{9.7.31}) then tells us that for all $x\in C$ it holds that: \begin{align} F\lp x \rp \ges f(x) - Ld\lp x,y \rp = f\lp x \rp \end{align} This with (\ref{9.7.33}) then yields Item (i). Note next that (\ref{9.7.32}, with $x \curvearrowleft y \text{ and } y \curvearrowleft z)$ and the triangle inequality ensure that for all $x \in E$, $y,z \in C$ it holds that: \begin{align} f(y) - Ld\lp x,y\rp \les f(z)+Ld\lp y,z \rp - Ld\lp x,y \rp \les f(z) + Ld\lp x,z \rp \end{align} We then obtain for all $x\in E, z\in C$ it holds that: \begin{align} F\lp x \rp = \sup_{y\in C} \lb f(y) - Ld\lp x,y \rp \rb \les f\lp x \rp + Ld\lp x,z \rp < \infty \end{align} This proves Item (iii). Item (iii), (\ref{9.7.30}), and the triangle inequality then shows that for all $x,y \in E$, it holds that: \begin{align} F(x) - F(y) &= \lb \sup_{v \in C} \lp f(v) - Ld\lp x,v \rp \rp \rb - \lb \sup_{w\in C} \lp f(w)-Ld\lp y,w \rp \rp\rb \nonumber \\ &= \sup_{v \in C}\lb f(v) - Ld\lp x,v \rp -\sup_{w\in C} \lp f(w) - L d\lp y,w \rp \rp\rb \nonumber\\ &\les \sup_{v \in C}\lb f(v) - Ld\lp x,v \rp - \lp f(v) - Ld\lp y,w \rp \rp\rb \nonumber\\ &= \sup_{v\in C} \lp Ld\lp y,v \rp + Ld\lp x,v \rp -Ld\lp x,v\rp \rp = Ld \lp x,y \rp \end{align} This establishes Item (v). Finally, note that Items (i) and (iv), the triangle inequality, and the assumption that $\forall x \in D, y\in C: \left| f(x) - f(y) \right| \les Ld\lp x,y \rp$ ensure that for all $x\in D$ it holds that: \begin{align} \left| F(x) - f(x) \right| &= \inf_{y\in C} \left| F(x) - F(y) +f(y) - f(x)\right| \nonumber \\ &\les \inf_{y\in C} \lp \left| F(x) - F(y) \right| + \left| f(y) - f(x) \right|\rp \nonumber\\ &\les \inf_{y\in C} \lp 2Ld\lp x,y \rp\rp = 2L \lb \inf_{y\in C} d \lp x,y \rp \rb \end{align} This establishes Item (v) and hence establishes the Lemma. \end{proof} \begin{corollary}\label{9.7.6.1} Let $\lp E,d \rp$ be a metric space, let $L \in \lb 0,\infty \rp$, $\varnothing \neq C \subseteq E$, let $f: E \rightarrow \R$ satisfy for all $x\in E$, $y \in C$ that $\left\| f(x) - f(y) \right| \les Ld \lp x,y \rp$, and let $F:E \rightarrow \R \cup \{\infty\}$ satisfy for all $x\in E$ that: \begin{align} F \lp x \rp = \sup_{y\in C} \lb f(y) - Ld \lp x,y \rp\rb \end{align} It is then the case that: \begin{enumerate}[label = (\roman*)] \item for all $x\in C$ that $F(x) = f(x)$ \item for all $x\in E$ that $F(x) \les f(x)$ \item for all $x,y \in E$ that $\left| F(x) - f(y) \right| \les L d \lp x,y \rp$ and \item for all $x\in E$ that: \begin{align} \left| F\lp x \rp - f\lp x \rp \right| \les 2L \lb \inf_{y\in C} d \lp x,y \rp \rb \end{align} \end{enumerate} \end{corollary} \begin{proof} Note that Lemma \ref{(9.7.6)} establishes Items (i)\textemdash(iv). \end{proof} \subsection{Explicit ANN Approximations } \begin{lemma}\label{lem:maxconv_accuracy} Let $d,N \in \N$, $L \in \lb 0,\infty \rp$. Let $E \subseteq \R^d$. Let $x_1,x_2,...,x_N \in E$, let $f:E \rightarrow \R$ satisfy for all $x_1,y_1 \in E$ that $\left| f(x_1) -f(y_1)\right| \les L \left\| x_1-x_2 \right\|_1$ and let $\mathsf{MC} \in \neu$ and $y = \lp f\lp x_1 \rp, f \lp x_2 \rp,...,f\lp x_N \rp\rp$ satisfy: \begin{align} \mathsf{MC}^{N,d}_{x,y} = \mxm^N \bullet \aff_{-L\mathbb{I}_N,y} \bullet \lb \boxminus^N_{i=1} \nrm^d_1 \bullet \aff_{\mathbb{I}_d,-x_i} \rb \bullet \cpy_{N,d} \end{align} It is then the case that: \begin{align}\label{(9.7.42)} \sup_{x\in E} \left| \lp \real_{\rect}\lp \mathsf{MC}^{N,d}_{x,y} \rp \rp \lp x \rp -f\lp x \rp \right| \les 2L \lb \sup _{x\in E} \lp \min_{i\in \{1,2,...,N\}} \left\| x-x_i\right\|_1\rp\rb \end{align} \end{lemma} \begin{proof} Throughout this proof let $F: \R^d \rightarrow \R$ satisfy that: \begin{align}\label{9.7.43} F\lp x \rp = \max_{i \in \{1,2,...,N\}} \lp f\lp x_i \rp- L \left\| x-x_i \right\|_1 \rp \end{align} Note then that Corollary \ref{9.7.6.1}, (\ref{9.7.43}), and the assumption that for all $x,y \in E$ it holds that $\left| f(x) - f(y)\right| \les L \left\|x-y \right\|_1$ assures that: \begin{align}\label{(9.7.44)} \sup_{x\in E} \left| F(x) - f(x) \right| \les 2L \lb \sup_{x\in E} \lp \min_{i \in \{1,2,...,N\}} \left\| x-x_i\right\|_1\rp\rb \end{align} Then Lemma \ref{(9.7.5)} tells us that for all $x\in E$ it holds that $F(x) = \lp \real_{\rect} \lp \mathsf{MC} \rp \rp \lp x \rp$. This combined with (\ref{(9.7.44)}) establishes (\ref{(9.7.42)}). \end{proof} \begin{lemma} Let $d,N \in \N$, $L \in \lb 0,\infty \rp$. Let $\lb a,b\rb \subsetneq \R^d$. Let $x_1,x_2,...,x_N \in \lb a,b\rb$, let $f:\lb a,b\rb \rightarrow \R$ satisfy for all $x_1,x_2 \in \lb a,b\rb$ that $\left| f(x_1) -f(x_2)\right| \les L \left| x_1-x_2 \right|$ and let $\mathsf{MC}^{N,1}_{x,y} \in \neu$ and $y = f\lp \lb x \rb_*\rp$ satisfy: \begin{align} \mathsf{MC}^{N,1}_{x,y} = \mxm^N \bullet \aff_{-L\mathbb{I}_N,y} \bullet \lb \boxminus^N_{i=1} \nrm^1_1 \bullet \aff_{1,-x_i} \rb \bullet \cpy_{N,1} \end{align} It is then the case that for approximant $\mathsf{MC}^{N,1}_{x,y}$ that: \begin{enumerate}[label = (\roman*)] \item $\inn \lp \mathsf{MC}^{N,1}_{x,y} \rp = 1$ \item $\out\lp \mathsf{MC}^{N,1}_{x,y} \rp = 1$ \item $\hid \lp \mathsf{MC}^{N,1}_{x,y} \rp = \left\lceil \log_2 \lp N \rp \right\rceil +1$ \item $\wid_1 \lp \mathsf{MC}^{N,1}_{x,y} \rp = 2N$ \item for all $i \in \{ 2,3,...\}$ we have $\wid_1 \lp \mathsf{MC}^{N,1}_{x,y} \rp \les 3 \left\lceil \frac{N}{2^{i-1}} \right\rceil$ \item it holds for all $x \in \R^d$ that $\lp \real_{\rect} \lp \mathsf{MC}^{N,1}_{x,y} \rp \rp \lp x \rp = \max_{i \in \{1,2,...,N\}} \lp y_i - L \left| x-x_i \right|\rp$ \item it holds that $\param \lp \mathsf{MC}^{N,1}_{x,y} \rp \les 6 + 7N^2 + 3\left\lceil \frac{N}{2}\right\rceil \cdot 2N$ \item $\sup_{x\in \lb a,b\rb} \left| F(x) - f(x) \right| \les 2L \frac{|a-b|}{N}$ \end{enumerate} \end{lemma} \begin{proof} Items (i)\textemdash(vii) is an assertion of Lemma \ref{lem:mc_prop}. Item (viii) is an assertion of Lemma \ref{lem:maxconv_accuracy} with $d \curvearrowleft 1$. \end{proof}