dissertation_work/Dissertation/ann_first_approximations.tex

1379 lines
84 KiB
TeX
Raw Normal View History

2024-02-19 17:04:37 +00:00
\chapter{ANN first approximations}
\section{ANN Representations for One-Dimensional Identity and some associated properties}
\begin{definition}[One Dimensional Identity Neural Network]\label{7.2.1}
We will denote by $\id_d \in \neu$ the neural network satisfying for all $d \in \N$ that:
\begin{enumerate}[label = (\roman*)]
\item \begin{align}
\id_1 = \lp \lp \begin{bmatrix}
1 \\
-1
\end{bmatrix}, \begin{bmatrix}
0 \\
0
\end{bmatrix}\rp \lp \begin{bmatrix}
1 \quad -1
\end{bmatrix},\begin{bmatrix} 0\end{bmatrix}\rp \rp \in \lp \lp \R^{2 \times 1} \times \R^2 \rp \times \lp \R^{1\times 2} \times \R^1 \rp \rp
\end{align}
\item \begin{align}\label{7.2.2}
\id_d = \boxminus^d_{i=1} \id_1
\end{align}
For $d>1$.
\end{enumerate}
\end{definition}
\begin{lemma}\label{idprop}
Let $d \in \N$, it is then the case that:
\begin{enumerate}[label = (\roman*)]
\item $\lay(\id_d) = \lp d, 2d, d \rp \in \N^3$.
\item $\real_{\rect} \lp \id_d \rp \in C \lp \R^d, \R^d \rp$.
\item For all $x \in \R^d$ that:
\begin{align}
\lp \real_{\rect} \lp \id_d \rp \rp \lp x \rp = x \nonumber
\end{align}
\item For $d\in \N$ it is the case that $\dep\lp \id_d\rp = 2$
\end{enumerate}
\end{lemma}
\begin{proof}
Note that (\ref{7.2.1}) ensure that $\lay(\id_d) = \lp 1,2,1 \rp$. Furthermore, ($\ref{7.2.2}$) and Remark \ref{5.3.5} prove that $\lay(\id_d) = \lp d,2d,d \rp$ which in turn proves Item (i). Note now that Remark \ref{5.3.5} tells us that:
\begin{align}
\id_d = \boxminus^d_{i=1}\lp \id_1 \rp \in \lp \bigtimes^L_{i=1}\lb \R^{dl_i \times dl_{i-1}} \times \R^{dl_i} \rb \rp = \lp \lp \R^{2d \times d} \times \R^{2d}\rp \times \lp \R^{d \times 2d} \times \R^d\rp \rp
\end{align}
Note that \ref{7.2.1} ensures that for all $x \in \R$ it is the case that:
\begin{align}
\lp \real_{\rect} \lp \id_1 \rp \rp \lp x \rp = \rect(x) - \rect(-x) = \max\{x,0\} - \max\{-x,0\} = x
\end{align}
And Lemma \ref{5.3.4} shows us that for all $x = \lp x_1,x_2,...,x_d\rp \in \R^d$ it is the case that $\real_{\rect}\lp \id_d \rp \in C \lp \R^d, \R^d \rp $ and that:
\begin{align}
\lp \real_{\act} \lp \id_d \rp \rp \lp x \rp &= \lp \real_{\act} \lp \boxminus_{i=1}^d \lp \id_1\rp \rp \rp \lp x_1,x_2,...,x_d \rp \nonumber \\
&= \lp \lp \real_{\act} \lp \id_1 \rp \rp \lp x_1 \rp, \lp \real_{\act} \lp \id_1 \rp \rp \lp x_1 \rp,..., \lp \real_{\act} \lp \id_1 \rp \rp \lp x_d \rp \rp \nonumber \\
&= \lp x_1, x_2,...,x_d \rp = x
\end{align}
This proves Item (ii)\textemdash(iii). Item (iv) follows straightforwardly from Item (i). This establishes the lemma.
\end{proof}
\begin{remark}
Note here the difference between Definition \ref{actnn} and Definition \ref{7.2.1}.
\end{remark}
\begin{lemma}[R\textemdash, 2023]\label{id_param}
Let $d \in \N$. It then the case that for all $d \in \N$ we have that $\param\lp \id_d\rp = 4d^2+3d$
\end{lemma}
\begin{proof}
By observation we have that $\param \lp \id_1\rp = 4(1)^2+3(1) = 7$. By induction, suppose that this holds for all natural numbers up to and including $n$, i.e., for all naturals up to and including $n$; it is the case that $\param \lp id_n\rp = 4n^2+3n$. Note then that $\id_{n+1} = \id_n \boxminus \id_1$. For $W_1$ and $W_2$ of this new network, this adds a combined extra $8n+4$ parameters. For $b_1$ and $b_2$ of this new network, this adds a combined extra $3$ parameters. Thus, we have the following:
\begin{align}
4n^2+3n + 8n+4 + 3 &= 4(n+1)^2+3(n+1)
\end{align}
This completes the induction and hence proves the Lemma.
\end{proof}
\begin{lemma}\label{7.2.3}
Let $\nu \in \neu$ with end-widths $d$. It is then the case that $ \real_{\rect} \lp \id_d \bullet \nu \rp \lp x \rp = \real_{\rect} \lp \nu \bullet \id_d \rp = \real_{\rect} \lp \nu\rp $, i.e. $\id_d$ acts as a compositional identity.
\end{lemma}
\begin{proof} From (\ref{5.2.1}) and Definition \ref{7.2.1} we have eight cases.
Case 1 where $d=1$ and subcases:
\begin{enumerate}[label = (1.\roman*)]
\item $\id_d \bullet \nu$ where $\dep(\nu) = 1$
\item $\id_d \bullet \nu$ where $\dep(\nu) > 1$
\item $\nu \bullet \id_d$ where $\dep(\nu) =1$
\item $\nu \bullet \id_d$ where $\dep(\nu) > 1$
\end{enumerate}
Case 2 where $d>1$ and subcases:
\begin{enumerate}[label = (2.\roman*)]
\item $\id_d \bullet \nu$ where $\dep(\nu) = 1$
\item $\id_d \bullet \nu$ where $\dep(\nu) > 1$
\item $\nu \bullet \id_d$ where $\dep(\nu) =1$
\item $\nu \bullet \id_d$ where $\dep(\nu) > 1$
\end{enumerate}
\textit{Case 1.i:} Let $\nu = \lp \lp W_1,b_1 \rp \rp$. Deriving from Definitions \ref{7.2.1} and \ref{5.2.1} we have that:
\begin{align}
\id_1 \bullet \nu &=\lp \lp \begin{bmatrix}
1 \\
-1
\end{bmatrix} W_1, \begin{bmatrix}
1 \\
-1
\end{bmatrix}b_1 + \begin{bmatrix}
0 \\ 0
\end{bmatrix}\rp, \lp \begin{bmatrix}
1 \quad -1,
\end{bmatrix}, \begin{bmatrix}
0
\end{bmatrix} \rp \rp \\
&= \lp \lp \begin{bmatrix}
W_1 \\-W_{1}
\end{bmatrix}, \begin{bmatrix}
b_1 \\ -b_1
\end{bmatrix} \rp,\lp \begin{bmatrix}
1 \quad -1
\end{bmatrix}, \begin{bmatrix}
0
\end{bmatrix} \rp \rp
\end{align}
Let $x \in \R$. Upon instantiation with $\rect$ and $d=1$ we have:
\begin{align}
\lp \real_{\rect}\lp \id_1\bullet \nu \rp \rp \lp x \rp &= \rect(W_1x+b_1)-\rect(-W_1x - b_1) \nonumber\\
&= \max\{W_1x+b_1,0\}-\max\{-W_1x-b_1,0\} \nonumber \\
&= W_1x+b_1 \nonumber\\
&= \real_{\rect}(\nu) \nonumber
\end{align}
\textit{Case 1.ii:} Let $\nu = \lp \lp W_1,b_1 \rp, \lp W_2,b_2 \rp, ..., \lp W_L, b_L \rp \rp $. Deriving from Definition \ref{7.2.1} and \ref{5.2.1} we have that:
\begin{align}
\id_1\bullet \nu &= \lp \lp W_1,b_1\rp,\lp W_2,b_2 \rp,...,\lp W_{L-1},b_{L-1} \rp, \lp \begin{bmatrix}
1 \\-1
\end{bmatrix} W_L, \begin{bmatrix}
1 \\ -1
\end{bmatrix}b_L + \begin{bmatrix}
0 \\ 0
\end{bmatrix} \rp, \lp \begin{bmatrix}
1 \quad -1
\end{bmatrix}, \begin{bmatrix}
0
\end{bmatrix} \rp \rp \nonumber \\
&= \lp \lp W_1,b_1\rp, \lp W_2, b_2 \rp,...,\lp W_{L-1},b_{L-1} \rp, \lp \begin{bmatrix}
W_L \\ -W_L
\end{bmatrix} ,\begin{bmatrix}
b_L \\ -b_L
\end{bmatrix} \rp ,\lp \begin{bmatrix}
1 & -1
\end{bmatrix}, \begin{bmatrix}
0
\end{bmatrix} \rp \rp \nonumber
\end{align}
Let $x \in \R$. Note that upon instantiation with $\rect$, the last two layers are:
\begin{align}
&\rect(W_Lx+b_L)-\rect(-W_Lx - b_L,0) \nonumber\\
&=\max\{W_Lx+b_L,0\}-\max\{-W_Lx-b_L,0\} \nonumber \\
&= W_Lx+b_L \label{7.2.8}
\end{align}
This, along with Case 1. i, implies that the uninstantiated last layer is equivalent to $(W_L,b_L)$ whence $\id_1\bullet \nu = \nu$.
\textit{Case 1.iii:} Let $\nu = \lp \lp W_1,b_1\rp \rp$. Deriving from Definition \ref{7.2.1} and \ref{5.2.1} we have:
\begin{align}
\nu \bullet \id_1 &= \lp \lp \begin{bmatrix}
1 \\-1
\end{bmatrix}, \begin{bmatrix}
0 \\0
\end{bmatrix}\rp, \lp W_1\begin{bmatrix}
1 \quad -1
\end{bmatrix},W_1 \begin{bmatrix}
0
\end{bmatrix} + b_1\rp \rp \nonumber \\
&= \lp \lp \begin{bmatrix}
1 \\-1
\end{bmatrix}, \begin{bmatrix}
0 \\0
\end{bmatrix}\rp, \lp \begin{bmatrix}
W_1 \quad -W_1
\end{bmatrix}, b_1\rp \rp \nonumber
\end{align}
Let $x \in \R$. Upon instantiation with $\rect$ we have that:
\begin{align}
\lp \real_{\rect} \lp \nu \bullet \id_1 \rp \rp \lp x \rp &= \begin{bmatrix}
W_1 \quad -W_1
\end{bmatrix} \rect \lp \begin{bmatrix}
x \\ -x
\end{bmatrix} \rp +b_1 \nonumber \\
&= W_1\rect(x)-W_1\rect(-x) + b_1 \nonumber \\
&=W_1 \lp \rect(x) - \rect(-x) \rp +b_1 \nonumber \\
&=W_x+b_1 = \real_{\rect} \lp \nu \rp
\end{align}
\textit{Case 1.iv:} Let $\nu = \lp \lp W_1,b_1\rp , \lp W_2,b_2 \rp,...,\lp W_L, b_L \rp \rp $. Deriving from Definitions \ref{7.2.1} and \ref{5.2.1} we have that:
\begin{align}
\nu \bullet \id_1 = \lp \lp \begin{bmatrix}
1 \\-1
\end{bmatrix}, \begin{bmatrix}
0 \\0
\end{bmatrix}\rp, \lp \begin{bmatrix}
W_1 \quad -W_1
\end{bmatrix}, b_1\rp, \lp W_2,b_2 \rp ,...,\lp W_L,b_L \rp \rp
\end{align}
Let $x \in \R$. Upon instantiation with $\rect$, we have that the first two layers are:
\begin{align}
&\begin{bmatrix}
W_1 \quad -W_1
\end{bmatrix} \rect \lp \begin{bmatrix}
x \\ -x
\end{bmatrix} \rp +b_1 \nonumber \\
&= W_1\rect(x)-W_1\rect(-x) + b_1 \nonumber \\
&=W_1 \lp \rect(x) - \rect(-x) \rp + b_1 \nonumber \\
&= W_1x+b_1 = \real_{\rect} \lp \nu \rp
\end{align}
This, along with Case 1. iii, implies that the uninstantiated first layer is equivalent $(W_1,b_1)$ whence we have that $\nu \bullet \id_1 = \nu$.
Observe that Definitions \ref{5.2.5} and \ref{7.2.1} tells us that:
\begin{align}
\boxminus^d_{i=1} \id_i = \lp \lp \overbrace{\begin{bmatrix}
\we_{\id_1,1} \\
&&\ddots \\
&&& \we_{\id_1,1}
\end{bmatrix}}^{d-many} , \mymathbb{0}_{2d}\rp, \lp \overbrace{\begin{bmatrix}
\we_{\id_1,2} \\
&& \ddots \\
&&& \we_{\id_1,2}
\end{bmatrix}}^{d-many}, \mymathbb{0}_d\rp \rp \nonumber
\end{align}
\textit{Case 2.i} Let $d \in \N \cap [1,\infty)$. Let $\nu \in \neu$ be $\nu = \lp W_1,b_1 \rp$ with end-widths $d$. Deriving from Definitions \ref{5.2.1} and \ref{7.2.1} we have:
\begin{align}
\id_d \bullet \nu = \lp \lp \begin{bmatrix}
\we_{\id_1,1} \\
&&\ddots \\
&&& \we_{\id_1,1}
\end{bmatrix}W_1 , \begin{bmatrix}
\we_{\id_1,1} \\
&&\ddots \\
&&& \we_{\id_1,1}
\end{bmatrix} b_1\rp, \right. \nonumber\\ \left. \lp \begin{bmatrix}
\we_{\id_1,2} \\
&& \ddots \\
&&& \we_{\id_1,2}
\end{bmatrix}, \mymathbb{0}_d\rp \rp \nonumber \\
= \lp \lp \begin{bmatrix}
[W_1]_{1,*} \\
-[W_1]_{1,*} \\
\vdots \\
[W_1]_{d,*}\\
-[W_1]_{d,*}
\end{bmatrix}, \begin{bmatrix}
[b_1]_1\\
-[b_1]_1 \\
\vdots \\
[b_1]_d \\
-[b_1]_d
\end{bmatrix} \rp, \lp \begin{bmatrix}
\we_{\id_1,2} \\
&& \ddots \\
&&& \we_{\id_1,2}
\end{bmatrix}, \mymathbb{0}_d\rp \rp \nonumber
\end{align}
Let $x \in \R^d$. Upon instantiation with $\rect$ we have that:
\begin{align}
&\lp \real_{\rect} \lp \id_d \bullet \nu \rp \rp \lp x \rp \nonumber \\ &= \rect([W_1]_{1,*} \cdot x + [b_1]_1)-\rect(-[W_1]_{1,*}\cdot x -[b_1]_1)+\cdots \nonumber\\& +\rect([W_1]_{d,*}\cdot x+[b_1]_d)-\rect (-[W_1]_{d,*}\cdot x-[b_1]_d) \nonumber \\
&= [W_1]_{1,*}\cdot x + [b_1]_1 + \cdots + [W_1]_{d,*}\cdot x + [b_1]_d \nonumber \\
&= W_1x + b_1 = \real_{\rect} \lp \nu \rp \nonumber
\end{align}
\textit{Case 2.ii:} Let $\nu = \lp \lp W_1,b_1 \rp, \lp W_2,b_2 \rp, ..., \lp W_L, b_L \rp \rp $. Deriving from Definition \ref{7.2.1} and \ref{5.2.1} we have that:
\begin{align}
\id_d \bullet \nu =\lp \lp W_1,b_1\rp, \lp W_2, b_2 \rp,...,\lp W_{L-1},b_{L-1} \rp, \lp \begin{bmatrix}
[W_L]_{1,*} \\
-[W_L]_{1,*}\\
\vdots \\
[W_L]_{d,*} \\
-[W_L]_{d,*}
\end{bmatrix} ,\begin{bmatrix}
[b_L]_1 \\
-[b_L]_1 \\
\vdots \\
[b_L]_d \\
-[b_L]_d
\end{bmatrix} \rp ,\lp \begin{bmatrix}
1 & -1
\end{bmatrix}, \begin{bmatrix}
0
\end{bmatrix} \rp \rp \nonumber
\end{align}
Note that upon instantiation with $\rect$, the last two layers become:
\begin{align}
&\rect([W_L]_{1,*} \cdot x + [b_L]_1)-\rect(-[W_L]_{1,*}\cdot x -[b_L]_1)+\cdots \nonumber\\& +\rect([W_L]_{d,*}\cdot x+[b_L]_d)-\rect (-[W_L]_{d,*}\cdot x-[b_L]_d) \nonumber \\
&=[W_L]_{1,*}\cdot x + [b_L]_1 + \cdots + [W_L]_{d,*}\cdot x + [b_L]_d \nonumber \\
&= W_Lx + b_L
\end{align}
This, along with Case 2.i implies that the uninstantiated last layer is equivalent to $(W_L,b_L)$ whence $\id_d\bullet \nu = \nu$.
\textit{Case 2.iii:} Let $\nu = \lp \lp W_1,b_1\rp \rp$. Deriving from Definition \ref{7.2.1} and \ref{5.2.1} we have:
\begin{align}
&\nu \bullet \id_d \nonumber\\ &= \lp \lp \begin{bmatrix}
\we_{\id_1,1} \\
&&\ddots \\
&&& \we_{\id_1,1}
\end{bmatrix}, \mymathbb{0}_{2d}\rp, \lp W_1\begin{bmatrix}
\we_{\id_1,2} \\
&&\ddots \\
&&& \we_{\id_1,2}
\end{bmatrix}, b_1\rp \rp \nonumber
\end{align}
Upon instantiation with $\rect$ we have that:
\begin{align}
&\lp \real_{\rect} \lp \nu \rp \rp \lp x \rp \\ &= \begin{bmatrix}
[W_1]_{*,1} \ -[W_1]_{*,1} \ \cdots \ [W_1]_{*,d} \ -[W_1]_{*,d}
\end{bmatrix}\rect \lp \begin{bmatrix}
[x]_1 \\
-[x]_1 \\
\vdots \\
[x]_d \\
-[x]_d
\end{bmatrix}\rp + b_1 \nonumber \\
&= [W_1]_{*,1} \rect([x]_1) - [W_1]_{*,1} \rect(-[x]_1)+ \cdots +[W_1]_{*,d}\rect([x]_d)-[W_1]_{*,d}\rect(-[x]_d) + b_1 \nonumber \\
&= [W_1]_{*,1}\cdot [x]_1 + \cdots + [W_1]_{*,d} \cdot [x]_d \nonumber \\
&= W_1x+b_1 = \real_{\rect}(\nu)
\end{align}
\textit{Case 2.iv:} Let $\nu = \lp \lp W_1,b_1 \rp, \lp W_2,b_2 \rp ,...,\lp W_L,b_L \rp \rp $. Deriving from Definitions \ref{7.2.1} and \ref{5.2.1} we have:
\begin{align}
&\nu \bullet \id_d \nonumber \\ = &\lp \lp \begin{bmatrix}
\we_{\id_1,1} \\
&&\ddots \\
&&& \we_{\id_1,1}
\end{bmatrix}, \mymathbb{0}_{2d}\rp, \lp \begin{bmatrix}
[W_1]_{*,1} \ -[W_1]_{*,1} \ \cdots \ [W_1]_{*,d} \ -[W_1]_{*,d}
\end{bmatrix}, b_1\rp,... \right. \nonumber \\ &\left.\lp W_2,b_2 \rp ,...,\lp W_L,b_L \rp \rp \nonumber
\end{align}
Upon instantiation with $\rect$, we have that the first two layers are:
\begin{align}
&\lp \real_{\rect} \lp \nu \rp \rp \lp x \rp \\ &= \begin{bmatrix}
[W_1]_{*,1} \ -[W_1]_{*,1} \ \cdots \ [W_1]_{*,d} \ -[W_1]_{*,d}
\end{bmatrix}\rect \lp \begin{bmatrix}
[x]_1 \\
-[x]_1 \\
\vdots \\
[x]_d \\
-[x]_d
\end{bmatrix}\rp + b_1 \nonumber \\
&= [W_1]_{*,1} \rect([x]_1) - [W_1]_{*,1} \rect(-[x]_1)+ \cdots +[W_1]_{*,d}\rect([x]_d)-[W_1]_{*,d}\rect(-[x]_d) + b_1 \nonumber \\
&= [W_1]_{*,1}\cdot [x]_1 + \cdots + [W_1]_{*,d} \cdot [x]_d \nonumber \\
&= W_1x+b_1
\end{align}
This, along with Case 2. iii, implies that the uninstantiated first layer is equivalent to $(W_L,b_L)$ whence $\id_d\bullet \nu = \nu$.
This completes the proof.
\end{proof}
\begin{definition}[Monoid]
Given a set $X$ with binary operation $*$, we say that $X$ is a monoid under the operation $*$ if:
\begin{enumerate}[label = (\roman*)]
\item For all $x,y \in X$ it is the case that $x*y \in X$
\item For all $x,y,z \in X$ it is the case that $(x *y)*z = x*(y*z)$
\item The exists a unique element $e \in X$ such that $e*x=x*e = x$
\end{enumerate}
\end{definition}
\begin{theorem}
Let $d\in \N$. For a fixed $d$, the set of all neural networks $\nu \in \neu$ with instantiations in $\rect$ and end-widths $d$ form a monoid under the operation of $\bullet$.
\end{theorem}
\begin{proof}
This is a consequence of Lemma \ref{7.2.3} and Lemma \ref{5.2.3}.
\end{proof}
\begin{remark}
By analogy with matrices, we may find it helpful to refer to neural networks of end-widths $d$ as ``square neural networks of size $d$''.
\end{remark}
%\section{Modulus of Continuity}
%\begin{definition}
% Let $A\subseteq \R$ and let $f:A \rightarrow \R$. We denote the modulus of continuity $\omega_f: \lb 0,\infty \rb \rightarrow \lb 0,\infty \rb$ as the function given for all $h \in \lb 0,\infty \rb$ as:
% \begin{align}\label{9.3.1}
% \omega_f \lp h \rp = \sup \lp \left\{\left| f(x) - f(y)\right| \in \lb 0 ,\infty \rp : \lp x,y \in A, \left| x-y\right| \les h\rp \right\} \cup \left\{ 0\right\} \rp
% \end{align}
%\end{definition}
%\begin{lemma}
% Let $\alpha \in \lb -\infty, \infty \rb$, $b \in \lb a, \infty \rb$, and let $f: \lb a,b \rb \cap \R \rightarrow \R$ be a function. It is then the case that for all all $x,y \in \lb a,b\rb \cap \R$ that $\left| f(x) -f(y)\right| \les \omega_f \lp \left| x-y \right| \rp$.
%\end{lemma}
%\begin{proof}
% Note that (\ref{9.3.1}) implies the lemma.
%\end{proof}
%\begin{lemma}\label{lem:9.3.3}
% Let $A\subseteq \R$, $L \in \lb 0,\infty \rp$, and let $f:A \rightarrow \R$ satisfy for all $x,y \in A$ that $\left| f(x) - f(y)\right| \les L \left|x-y \right|$. It is then the case for all $h \in \lb 0,\infty \rp$ that $\omega_f(h) \les Lh$.
%\end{lemma}
%\begin{proof}
% Since it holds for all $x,y \in \R$ that $\left| f(x) - f(y)\right| \les L \left|x-y \right|$, it then, with (\ref{9.3.1}) imply for all $h \in \lb 0,\infty \rp$ that:
% \begin{align}
% \omega_f \lp h \rp &= \sup \lp \left\{\left| f(x) - f(y)\right| \in \lb 0 ,\infty \rp : \lp x,y \in A, \left| x-y\right| \les h\rp \right\} \cup \left\{ 0\right\} \rp \nonumber\\
% &\les \sup \lp \left\{L\left|x -y\right| \in \lb 0 ,\infty \rp : \lp x,y \in A, \left| x-y\right| \les h\rp \right\} \cup \left\{ 0\right\} \rp \nonumber \\
% &\les \sup \lp \left\{Lh,0 \right\} \rp = Lh
% \end{align}
% This completes the proof of the lemma.
%\end{proof}
%\section{Linear Interpolation of Real-Valued Functions}
%Note that we need a framework for approximating generic 1-dimensional continuous functions to approximate more complex functions. We introduce the linear interpolation operator and later see how neural networks can approximate 1-dimensional continuous functions to arbitrary precision.
%
%\subsection{The Linear Interpolation Operator}
%\begin{definition}[Linear Interpolation Operator]\label{lio}
% Let $n \in \N$, $x_0,x_1,...,x_n, y_0,y_1,...,y_n \in \R$. Let it also be the case that $x_0 \leqslant x_1 \leqslant \cdots \leqslant x_n$. We denote by $\lin^{y_0,y_1,...,y_n}_{x_0,x_1,...,x_n}: \R \rightarrow \R$, the function that satisfies for $i \in \{1,2,...,n\}$, and for all $w \in \lp -\infty, x_0 \rp$, $x \in [ x_{i-1},x_i )$, $z \in [ x_n, \infty)$ that:
% \begin{enumerate}[label = (\roman*)]
% \item $\lin^{y_0,y_1,...,y_n}_{x_0,x_1,...,x_n}\lp w \rp = y_0$
% \item $\lin^{y_0,y_1,...,y_n}_{x_0,x_1,...,x_n}\lp x \rp = y_{i-1} + \frac{y_i-y_{i-1}}{x_i-x_{i-1}}\lp x- x_{i-1} \rp $
% \item $\lin^{y_0,y_1,...,y_n}_{x_0,x_1,...,x_n}\lp z \rp = y_n$
% \end{enumerate}
%\end{definition}
%\begin{lemma}
% Let $n\in \N$, $x_0,x_1,...,x_n,y_0,y_1,...,y_n \in \R$ with $x_0 \les x_1 \les \cdots \les x_n$, it is then the case that:
% \begin{enumerate}[label = (\roman*)]
% \item for all $i \in \{0,1,...,n\}$ that:
% \begin{align}\label{7.3.1}
% \lp \lin^{y_0,y_1,...,y_n}_{x_0,x_1,...,x_n} \rp \lp x_i \rp = y_i
% \end{align}
% \item for all $i\in \{0,1,...,n\}$ and $x \in [x_{i-1},x_{i}]$ that:
% \begin{align}\label{7.3.2}
% \lp \lin^{y_0,y_1,...,y_n}_{x_0,x_1,...,x_n} \rp \lp x \rp = \lp \frac{x_i-x}{x_i - x_{i-1}} \rp y_{i-1} + \lp \frac{x-x_{i-1}}{x_i-x_{i-1}} \rp y_i
% \end{align}
% \end{enumerate}
%\end{lemma}
%\begin{proof}
% Note that (\ref{7.3.1}) is a direct consequence of Definition \ref{lio}. Item (i) then implies for all $i \in \{1,2,...,n\}$ $x \in [x_{i-1},x_i]$ that:
% \begin{align}
% \lp \lin^{y_0,y_1,...,y_n}_{x_0,x_1,...,x_n} \rp \lp x \rp &= \lb \lp \frac{x_i-x_{i-1}}{x_i-x_{i-1}} \rp - \lp \frac{x-x_{i-1}}{x_i-x_{i-1}} \rp \rb y_{i-1} + \lp \frac{x-x_{i-1}}{x_i-x_{i-1}}\rp y_i \nonumber \\
% &= \lp \frac{x_i-x}{x_i-x_{i-1}} \rp y_{i-1} + \lp \frac{x-x_{i-1}}{x_i-x_{i-1}} \rp y_i \nonumber
% \end{align}
%\end{proof}
%\begin{lemma}\label{lem:9.4.3}
% Let $N\in \N$, $L,x_0,x_1,...,x_N \in \R$ satisfy $x_0 < x_1 < \cdots < x_N$, and set let $f:\lb x_0,x_N \rb \rightarrow \R$ satisfy for all $x,y \in \lb x_0,x_N\rb$ that $\left| f(x)-f(y)\right| \les L \left| x-y\right|$, it is then the case that:
% \begin{enumerate}[label = (\roman*)]
% \item for all $x,y \in \R$ that:
% \begin{align}
% \left| \lp \lin^{f(x_0),f(x_1),...,f(x_N)}_{x_0,x_1,...,x_N}\rp \lp x \rp - \lp \lin^{f(x_0),f(x_1),...,f(x_N)}_{x_0,x_1,...,x_N}\rp \lp y \rp \right| \les L \left| x-y \right|
% \end{align}, and
% \item that:
% \begin{align}
% \sup_{x \in \lb x_0,x_N \rb }\left| \lp \lin^{f(x_0),f(x_1),...,f(x_N)}_{x_1,x_2,...,x_N}\rp \lp x \rp -f\lp x \rp\right| \les L \lp \max_{i \in \{ 1,2,...N\}} \left| x_i-x_{i-1}\right|\rp
% \end{align}
% \end{enumerate}
%\end{lemma}
%\begin{proof}
% The assumption that for all $x,y \in \lb x_0, x_k \rb$ it is the case that $\left| f(x) - f(y) \right| \les L \left| x-y\right|$ and Lemma \ref{lem:9.3.3} prove Item (i) and Item (ii).
%\end{proof}
%\subsection{Neural Networks to Approximate the $\lin$ Operator}
%\begin{lemma}\label{7.3.3}
% Let $\alpha,\beta,h \in \R$. Denote by $\relu \in \neu$ the neural network given by $\relu = h \circledast \lp \mathsf{i}_1 \bullet \aff_{\alpha,\beta}\rp $. It is then the case that:
% \begin{enumerate}[label = (\roman*)]
% \item $\relu = \lp \lp \alpha, \beta \rp , \lp h,0 \rp \rp$
% \item $\lay(\relu) = \lp 1,1,1 \rp \in \N^3$.
% \item $\real_{\rect}\lp \relu \rp \in C \lp \R, \R \rp$
% \item for all $x \in \R$ that $\lp \real_{\rect} \lp \relu \rp \rp \lp x \rp = h\max \{\alpha x+\beta ,0\}$
% \end{enumerate}
%\end{lemma}
%\begin{proof}
% Note that by Definition \ref{5.3.1} we know that $\aff_{\alpha,\beta} = \lp \lp \alpha,\beta \rp \rp$, this with Definition \ref{actnn}, and Definition \ref{5.2.1} together tell us that $\mathfrak{i}_1\bullet \aff_{\alpha,\beta} = \lp \alpha,\beta \rp$. A further application of Definition \ref{5.2.1}, and an application of Definition \ref{slm} yields that $h \circledast \lp \mathfrak{i}_1 \bullet \aff_{\alpha,\beta} \rp = \lp \lp \alpha,\beta \rp, \lp h ,0 \rp \rp$. This proves Item (i).
%
% Note that $\lay(\aff_{\alpha,\beta})= (1,1)$, $\lay(\mathfrak{i}_1) = \lp 1,1,1 \rp $, and $\lay(h)=1$. Item (i) of Lemma \ref{6.0.3} therefore tells us that $\lay (\relu) = \lay \lp h \circledast \lp \mathfrak{i}_1 \bullet \aff_{\alpha,\beta}\rp \rp$. This proves Item (ii).
%
% Note that Lemmas \ref{7.1.2} and \ref{6.0.3} tell us that:
% \begin{align}
% \forall x\in \R: \lp \real_{\rect}\lp \mathfrak{i}_1 \bullet \aff_{\alpha,\beta} \rp \rp \lp x \rp = \rect \lp \real_{\rect} \rp \lp x \rp = \max\{ \alpha x+ \beta \}
% \end{align}
% This and Lemma \ref{slm} ensures that $\real_{\rect}\lp \relu \rp \in C\lp \R, \R \rp$ and further that:
% \begin{align}
% \lp \real_{\rect} \lp \relu \rp \rp \lp x \rp = h \lp \lp \real_{\rect}\lp \mathfrak{i}_1\bullet \aff_{\alpha,\beta} \rp \rp \lp x\rp \rp = h\max\{\alpha x+\beta,0 \}
% \end{align}
% This proves Item (iii)-(iv). This completes the proof of the lemma.
%\end{proof}
%\begin{lemma}\label{9.3.4}
% Let $N\in \N$, $x_0,x_1,...,x_N,y_0,y_1,...,y_N \in \R$ and further that $x_0 \les x_2 \les \cdots \les x_N$. Let $\Phi \in \neu$ satisfy that:
% \begin{align}\label{7.3.5}
% \Phi = \aff_{1,y_0} \bullet \lp \bigoplus^N_{i=0} \lb \lp \frac{y_{\min\{i+1,N\}}-y_i}{x_{\min\{i+1,N\}}-x_{\min\{i,N-1\}}}- \frac{y_i-y_{\max\{i-1,0\}}}{x_{\max\{i,1\}}-x_{\max\{i-1,0\}}}\rp \circledast \lp \mathfrak{i}_1\bullet \aff_{1,-x_i} \rp \rb \rp
% \end{align}
% It is then the case that:
% \begin{enumerate}[label=(\roman*)]
% \item $\lay(\Phi)= \lp 1,N+1,1 \rp \in \N^3$
% \item $\real_{\rect} \lp \Phi \rp \in C \lp \R, \R \rp$
% \item $\lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp= \lin ^{y_0,y_1,...,y_N}_{x_0,x_1,...,x_N}\lp x \rp$
% \item $\param(\Phi) = 3N+4$
% \end{enumerate}
%\end{lemma}
%\begin{proof}
% For notational convenience, let it be the case that for all $i \in \{0,1,2,..., N\}$:
% \begin{align}\label{7.3.6}
% h_i = \frac{y_{\min\{i+1,N\}}-y_i}{x_{\min\{i+1,N\}}-x_{\min\{i,N-1\}}}- \frac{y_i-y_{\max\{i-1,0\}}}{x_{\max\{i,1\}}-x_{\max\{i-1,0\}}}
% \end{align}
% Note that $\lay \lp \mathfrak{i}_1 \bullet \aff_{1,-x_0} \rp= \lp1,1,1 \rp$, and further that for all $i\in \{0,1,2,...,N\}$, $h_i \in \R$. Lemma \ref{7.3.3} then tells us that for all $i \in \{0,1,2,...,N\}$, $\lay \lp h_i \circledast \lp \mathfrak{i}_1 \bullet \aff_{1,-x_i} \rp \rp = \lp 1,1,1 \rp $, $\real_{\rect}\lp h_i \circledast \lp \mathfrak{i}_1 \bullet \aff_{1,-x_i} \rp \rp \in C \lp \R,\R \rp$, and that $ \lp \real_{\act} \lp h_i \circledast \lp \mathfrak{i}_1 \bullet \aff_{1,-x_i} \rp \rp \rp \lp x \rp = h_i \max\{x-x_k,0 \}$. This, (\ref{7.3.5}), Lemma \ref{5.3.3}, and \cite[Lemma~3.28]{Grohs_2022} ensure that $\lay(\Phi) = \lp 1,N+1,1 \rp \in \N^3$ and that $\real_{\rect} \lp \Phi \rp \in C \lp \R, \R \rp$ establishing Items (i)--(ii).
%
% In addition, note that Item (i) and (\ref{widthdef}), tell us that:
% \begin{align}
%% NOTE: Ask Dr. P about this parameter
% \param(\Phi) = \overbrace{(N+1)}^{W_1}+\underbrace{(N+1)}_{b_1}+\overbrace{(N+1)}^{W_2}+\underbrace{1}_{b_2} =3N+4
% \end{align}
% Which proves Item (iv). For all $i \in \{0,1,2,...,N\}$, let $\phi_i$ be $\phi_i = h_i \circledast \lp \mathfrak{i} \bullet \aff_{1,-x_i} \rp $. Next note that \ref{7.3.6}, Lemma \ref{5.3.3}, and \cite[Lemma~3.28]{Grohs_2022} then tell us that:
% \begin{align}\label{7.3.8}
% \lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp = y_0 + \sum^n_{i=1} \lp \real_{\act} \lp \phi_i \rp \rp\lp x \rp = y_0 + \sum^n_{i=1}h_i \max\{x-x_i,0\}
% \end{align}
% Since $x_0 \les x_i$ for all $i\in\{1,2,...,n\}$, it then is the case for all $x \in (\infty, x_0]$ that:
% \begin{align}\label{7.3.10.2}
% \lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp = y_0+0 = y_0
% \end{align}
% \begin{claim}
% For all $i \in \{1,2,...,N\}$ it is the case that :
% \begin{align}\label{7.3.10}
% \sum_{j=0}^{i-1}h_j = \frac{y_{i}-y_{i-1}}{x_i-x_{i-1}}
% \end{align}
% \end{claim}
% We prove this claim by induction. For the base case of $i=1$, we have:
% \begin{align}
% \sum^0_{j=0} h_0 = h_0 = \frac{y_{1}-y_0}{x_{1}-x_{0}}- \frac{y_0-y_{0}}{x_{1}-x_{0}} =\frac{y_1-y_0}{x_1-x_0}
% \end{align}
% This proves the base base for (\ref{7.3.10}). Assume next that this holds for $k$, for the $(k+1)$-th induction step we have:
% \begin{align}
% \sum^{k+1}_{j=0}h_j = \sum^k_{j=0}h_j + h_{k+1} &=\frac{y_k-y_{k-1}}{x_k-x_{k-1}}+h_{k+1} \nonumber\\
% &= \frac{y_k-y_{k-1}}{x_k-x_{k-1}} + \frac{y_{k+2}-y_{k-1}}{x_{k+2}-x_{k+1}} - \frac{y_{k+1}-y_{k}}{x_{k+1} - x_k} \nonumber\\
% &= \frac{y_{k+1}-y_k}{x_{k+1}-x_k}
% \end{align}
%%TODO: Double-check this proof
%This proves (\ref{7.3.10}). In addition, note that (\ref{7.3.8}), (\ref{7.3.10}), and the fact that for all $i \in \{1,2,...,n\}$ it is the case that $x_{i-1} \les x_{i}$ tells us that for all $i \in \{1,2,...,n\}$ and $x \in [x_{i-1},x_i]$ it is the case that:
% \begin{align}\label{7.3.13}
% &\lp \real_{\rect}\lp \Phi \rp \rp \lp x \rp - \lp \real_{\act}\lp \Phi \rp \rp \lp x_{i-1}\rp = \sum^n_{j=0} h_j \lp \max \{ x-x_j,0 \}-\max \{x_{i-1}-x_j,0\} \rp \nonumber\\
% &= \sum^{i-1}_{j=0}c_j \lb \lp x-x_j \rp -\lp x_{i-1}-x_j \rp \rb = \sum^{i-1}_{j=0} c_j \lp x - x_{i-1} \rp = \lp \frac{y_i-y_{i-1}}{x_i-x_{i-1}}\rp \lp x-x_{i-1} \rp
% \end{align}
% \begin{claim}
% For all $i \in \{1,2,...,N\}$, $x\in [x_{i-1},x_i]$ it is the case that:
% \begin{align}\label{7.3.14}
% \lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp = y_{i-1}+ \lp \frac{y_i-y_{i-1}}{x_i-x_{i-1}} \rp \lp x - x_{i-1} \rp
% \end{align}
% \end{claim}
% We will prove this claim by induction. For the base case of $i=1$, (\ref{7.3.13}) and (\ref{7.3.10}) tell us that:
% \begin{align}
% \lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp &=\lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp - \lp \real_{\rect}\lp \Phi \rp \rp \lp x_{i-1} \rp + \lp \real_{\rect}\lp \Phi \rp \rp \lp x_{i-1} \rp \nonumber \\
% &= y_0 + \lp \frac{y_1-y_0}{x_i-x_{i-1}} \rp \lp x - x_{i-1} \rp
% \end{align}
% For the induction step notice that (\ref{7.3.13}) implies that for all $i \in \{2,3,...,N\}$, $x \in [x_{i-1},x_i]$, with the instantiation that $\forall x \in [x_{i-2},x_{i-1}]: \lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp = y_{i-2} + \lp \frac{y_{i-1}-y_{i-2}}{x_{i-1}-x_{i-2}} \rp \lp x-x_{i-2} \rp $, it is then the case that:
% \begin{align}
% \lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp &= \lp \real_{\rect} \lp \Phi \rp \rp \lp x_{i-1}\rp + \lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp - \lp \real_{\rect} \lp \Phi \rp \rp \lp x_{i-1} \rp \nonumber\\
% &=y_{i-2} + \lp \frac{y_{i-1}-y_{i-2}}{x_{i-1}-x_{i-2}} \rp \lp x_{i-1}+x_{i-2} \rp + \lp \frac{y_i-y_{i-1}}{x_i-x_{i-1}} \rp \lp x - x_{i-1} \rp \nonumber\\
% &= y_{i-1} + \lp \frac{y_i-y_{i-1}}{x_i-x_{i-1}} \rp \lp x-x_{i-1} \rp
% \end{align}
% Thus induction proves (\ref{7.3.14}). Furthermore note that (\ref{7.3.10}) and (\ref{7.3.6}) tell us that:
% \begin{align}
% \sum^N_{i=0} h_i = c_N +\sum^{N-1}_{i=0}h_i = -\frac{y_N-y_{N-1}}{x_N-x_{N-1}}+\frac{y_N-y_{N-1}}{x_N-x_{N-1}} = 0
% \end{align}
% The fact that $\forall i \in \{0,1,...,N\}:x_i \les x_N$, together with (\ref{7.3.8}) imply for all $x \in [x_N,\infty)$ that:
% \begin{align}
% \lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp - \lp \real_{\rect} \lp \Phi \rp \rp \lp x_N \rp &= \lb \sum^N_{i=0} h_i \lp \max\{x-x_i,0\}-\max\{x_N-x_i,0\} \rp \rb \nonumber\\
% &= \sum^N_{i=0} h_i \lb \lp x- x_i \rp - \lp x_N - r_i \rp \rb = \sum^N_{i=0} h_i \lp x - x_N \rp =0 \nonumber
% \end{align}
% This and (\ref{7.3.14}) tells us that for all $x \in [x_N,\infty)$ we have:
% \begin{align}
% \lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp = \lp \real_{\rect} \lp \Phi \rp \rp \lp x_N \rp = y_{N-1}+ \lp \frac{y_N-y_{N-1}}{x_N - x_{N-1}} \rp \lp x_N-x_{N-1} \rp = x_N
% \end{align}
% Together with (\ref{7.3.10.2}), (\ref{7.3.14}), and Definition \ref{lio} establishes Item (iii) thus proving the lemma.
% \end{proof}
%\section{Neural Network Approximations of 1-dimensional Functions.}
%
%\begin{lemma}\label{lem:9.5.1}
% Let $N\in \N$, $L. a. x_0,x_1,...,x_N \in \R$, $b\in \lp a,\infty \rp$, satisfy for all $i \in \left\{0,1,...,N\right\}$ that $x_i = a+ \frac{i(b-a)}{N}$. Let $f:\lb a,b\rb \rightarrow \R$ satisfy for all $x,y \in \lb a,b\rb$ that $\left|f(x) - f(y) \right| \les L\left|x-y\right|$ and let $\mathsf{F} \in \neu$ satisfy:
% \begin{align}
% \F = \aff_{1,f(x_0)}\bullet \lb\bigoplus^N_{i=0} \lp \lp \frac{N \lp f \lp x_{\min \{i+1,N\}}\rp-2f\lp x_i\rp + f \lp x_{\max \{ i-1,0 \}}\rp\rp}{b-a}\rp \circledast \lp \mathsf{i}_1 \bullet \aff_{1,-x_k} \rp\rp \rb
% \end{align}
% It is then the case that:
% \begin{enumerate}[label = (\roman*)]
% \item $\lay \lp \F \rp = \lp 1, N+1,1\rp$
% \item $\real_{\rect} \lp \F \rp\in C\lp \R, \R \rp$
% \item $\real_{\rect} \lp \F \rp = \lin ^{f(x_0),f(x_1),...,f(x_N)}_{x_1,x_2,...,x_N}$
% \item it holds that for all $x,y \in \R$ that $\left| \lp \real_{\rect} \lp \F \rp \rp \lp x \rp -\lp \real_{\rect} \lp \F \rp\rp\lp y \rp \right| \les L \left| x-y \right|$
% \item it holds that $\sup_{x \in \lb a,b \rb} \left| \lp \real_{\rect} \lp \F \rp \rp\lp x\rp -f(x)\right| \les \frac{L \lp b-a\rp}{N}$, and
% \item $\param\lp \F \rp = 3N+4$.
% \end{enumerate}
%\end{lemma}
%\begin{proof}
% Note that since it is the case that for all $i \in \left\{0,1,...,N \right\}: x_{\min \{i+1,N\}} - x_{\min \{i, N-1\}} = x_{\max\{i,1\}} - x_{\max \{i-1,0\}} = \frac{b-a}{N}$, we have that:
% \begin{align}
% \frac{f\lp x_{\min\{i+1,N\}}\rp- f \lp x_i \rp}{x_{\min \{ i+1,N\}}-x_{\min \{i,N-1\}}} - \frac{f(x_i)-f\lp x_{\max\{i-1,0\}}\rp}{x_{\max \{ i,1\}}-x_{\max \{i-1,0\}}} = \frac{N \lp f \lp x_{\min \{i+1,N\}}\rp -2f\lp x_i \rp +f\lp x_{\max \{ i-1,0\}}\rp\rp}{b-a}
% \end{align}
% Thus Items (i)-(iv) of Lemma \ref{9.3.4} prove Items (i)-(iii), and (vi) of this lemma. Item (iii) combined with the assumption that for all $x,y \in \lb a,b \rb: \left| f(x) - f(y) \right| \les \left| x-y \right|$ and Item (i) in Lemma \ref{lem:9.4.3} establish Item (iv). Furthermore, note that Item (iii), the assumption that for all $x,y \in \lb a,b \rb: \left| f(x) -f(y)\right| \les L\left| x-y\right|$, Item (ii) in Lemma \ref{lem:9.4.3} and the fact that for all $i \in \{1,2,..., N\}: x_i-x_{i-1} = \frac{b-a}{N}$ demonstrate for all $x \in \lb a,b \rb$ it holds that:
% \begin{align}
% \left| \lp \real_{\rect} \lp \F \rp\rp \lp x \rp -f\lp x \rp \right| \les L \lp \max_{i \in \{1,2,...,N\}} \left| x_i - x_{i-1}\right|\rp = \frac{L(b-a)}{N}
% \end{align}
%\end{proof}
%\begin{lemma}\label{lem:9.5.2}
% Let $L,a \in \R$, $b\in \lb a, \infty \rp$, $\xi \in \lb a,b \rb$, let $f: \lb a,b \rb \rightarrow \R$ satisfy for all $x,y \in \lb a,b \rb$ that $\left| f(x) - f(y) \right| \les L\left|x-y \right|$, and let $\F \in \neu$ satisfy $\F = \aff_{1,f(\xi)} \bullet \lp 0 \circledast \lp \mathsf{i}_1 \bullet \aff_{1,-\xi} \rp \rp $, it is then the case that:
% \begin{enumerate}[label = (\roman*)]
% \item $\lay \lp \F \rp = \lp 1,1,1 \rp$
% \item $\real_{\rect} \lp \F \rp \in C \lp \R, \R \rp$
% \item for all $x \in \R$, we have $\lp \real_{\rect}\lp \F \rp \rp \lp x \rp = f \lp \xi \rp$
% \item $\sup_{x \in \lb a,b\rb} \left| \lp \real_{\rect} \lp \F \rp \rp\lp x \rp -f(x)\right| \les L \max \{ \xi -a, b-\xi\}$
% \item $\param \lp \F \rp = 4$
% \end{enumerate}
%\end{lemma}
%
%\begin{proof}
% Note that Item (i) is a consequence of the fact that $\aff_{1,-\xi}$ is a neural network with a real number as weight and a real number as a bias and the fact that $\lay \lp \mathsf{i}_1 \rp = \lp 1,1,1 \rp$. Note also that Item (iii) of Lemma \ref{7.3.3} proves Item (iii).
%
% Note that from the construction of $\aff$ we have that:
% \begin{align}\label{(9.5.4)}
% \lp \real_{\rect} \lp \F \rp\rp \lp x \rp &= \lp \real_{\rect} \lp 0 \circledast \lp \mathsf{i}_1 \bullet \aff_{1,-\xi}\rp\rp \rp \lp x \rp + f \lp \xi \rp \nonumber \\
% &= 0 \lp \lp \real_{\rect} \lp \mathsf{i}_1 \bullet \aff_{1,-\xi} \rp\rp \lp x \rp \rp + f \lp \xi \rp = f \lp \xi \rp
% \end{align}
% Which establishes Item (iii). Note that (\ref{(9.5.4)}), the fact that $\xi \in \lb a,b\rb$ and the fact that for all $x,y \in \lb a,b \rb$ it is the case that $\left| f(x) - f(y) \right| \les \left| x-y \right|$ give us that for all $x \in \lb a,b \rb$ it holds that:
% \begin{align}
% \left| \lp \real_{\rect} \lp \F \rp\rp \lp x \rp - f\lp x \rp\right| = \left| f\lp \xi \rp - f \lp x \rp\right| \les L \left| x- \xi \right| \les L \max\left\{ \xi -a, b-\xi \right\}
% \end{align}
% This establishes Item (iv). Note a simple parameter count yields the following:
% \begin{align}
% \param \lp \F \rp = 1(1+1)+1(1+1) = 4
% \end{align}
% Establishing Item (v) and hence the lemma. This completes the proof.
%\end{proof}
%\begin{corollary}
% Let $\ve \in (0,\infty)$, $L,a \in \R$, $b \in \lp a,\infty \rp$, $N \in \N_0 \cap \lb \frac{L(b-a)}{\ve}, \frac{L(b-a)}{\ve}+1\rb$, $x_0, x_1,...,x_N \in \R$ satisfy for all $i \in \{ 0,1,...,N\}$ that $x_i = a + \frac{i(b-a)}{\max\{N,1\}}$, let $f: \lb a,b \rb \rightarrow \R$ satisfy for all $x,y \in \lb a,b \rb$ that $\left| f(x) - f(y) \rb \les L\left| x-y \right|$, and let $\F \in \neu$ satisfy:
% \begin{align}
% \F = \aff_{1,f(x_0)}\bullet \lb\bigoplus^N_{i=0} \lp \lp \frac{N \lp f \lp x_{\min \{i+1,N\}}\rp-2f\lp x_i\rp + f \lp x_{\max \{ i-1,0 \}}\rp\rp}{b-a}\rp \circledast \lp \mathsf{i}_1 \bullet \aff_{1,-x_k} \rp\rp \rb
% \end{align}
% It is then the case that:
% \begin{enumerate}[label = (\roman*)]
% \item $\lay\lp \F \rp = \lp 1,N+1,1 \rp$
% \item $\real_{\rect} \lp \F \rp \in C \lp \R, \R \rp$
% \item for all $x,y \in \R$ that $\left| \lp \real_{\rect} \lp \F \rp \rp \lp x \rp - \lp \real_{\rect} \lp \F \rp \rp \lp x \rp \right| \les L \left| x-y \right|$
% \item $\sup_{x \in \lb a,b \rb} \left| \lp \real_{\rect} \lp \F \rp \rp \lp x \rp -f(x) \right| \les \frac{L(b-a)}{\max \{N,1\}} \les \ve$, and
% \item $\param \lp \F \rp = 3N+4 \les 3L \lb \frac{b-a}{\ve} \rb +7$.
% \end{enumerate}
%\end{corollary}
%\begin{proof}
% The fact that $N \in \N_0 \cap \lb \frac{L(b-a)}{\ve}, \frac{L(b-a)}{\ve}+1 \rb$ ensures that $\frac{L(b-a)}{\max\{ K,1\}} \les \ve$. This and Items (i),(ii),(iv), and (v) in Lemma \ref{lem:9.5.1} and Items (i)-(iii), and (iv) of Lemma $\ref{lem:9.5.2}$ establishes Items (i)-(iv). Furthermore, note that since $N\les 1 + \frac{L(b-a)}{\ve}$, Item (vi) in Lemma \ref{lem:9.5.1} and Item (v) in Lemma \ref{lem:9.5.2} tells us that:
% \begin{align}
% \param \lp \F\rp = 3N+4 \les \frac{3L\lp b-a \rp}{\ve} + 7.
% \end{align}
% Which establishes Item (v) and proves the result.
%\end{proof}
\section{$\trp^h$, $\etr^{n,h}$ and Neural Network Approximations For the Trapezoidal Rule.}
\begin{definition}[The $\trp$ neural network]
Let $h \in \R_{\ges 0}$. We define the $\trp^h \in \neu$ neural network as:
\begin{align}
\trp^h \coloneqq \aff_{\lb \frac{h}{2} \: \frac{h}{2}\rb,0}
\end{align}
\end{definition}
\begin{lemma}
Let $h\in \lp -\infty, \infty\rp$. It is then the case that:
\begin{enumerate}[label = (\roman*)]
\item for $x = \{x_1,x_2\} \in \R^2$ that $\lp \real_{\rect} \lp \trp^h \rp \rp \lp x \rp \in C \lp \R^2, \R \rp$
\item for $x = \{x_1,x_2 \} \in \R^2$ that $\lp \real_{\rect} \lp \trp^h \rp \rp \lp x \rp = \frac{1}{2}h \lp x_1+x_2 \rp$
\item $\dep \lp \trp^h \rp = 1$
\item $\param\lp \trp^h \rp = 3$
\item $\lay \lp \trp^h \rp = \lp 2,1 \rp$
\end{enumerate}
\end{lemma}
\begin{proof}
This a straight-forward consequence of Lemma \ref{5.3.1}
\end{proof}
\begin{definition}[The $\etr$ neural network]
Let $n\in \N$ and $h \in \R_{\ges 0}$. We define the neural network $\etr^{n,h} \in \neu$ as:
\begin{align}
\etr^{n,h} \coloneqq \aff_{\underbrace{\lb \frac{h}{2} \ h \ h\ ... \ h \ \frac{h}{2}\rb}_{n+1-many},0}
\end{align}
\end{definition}
\begin{lemma}\label{etr_prop}
Let $n\in \N$. Let $x_0 \in \lp -\infty, \infty \rp$, and $x_n \in \lb x_0, \infty \rp$. Let $ x = \lb x_0 \: x_1 \:...\: x_n\rb \in \R^{n+1}$ and $h\in \lp -\infty, \infty\rp$ such that for all $i \in \{0,1,...,n\}$ it is the case that $x_i = x_0+i\cdot h$. Then:
\begin{enumerate}[label = (\roman*)]
\item for all $x \in \R^{n+1}$ it is the case that $\lp \real_{\rect} \lp \etr^{n,h} \rp \rp \lp x \rp \in C \lp \R^{n+1}, \R \rp$
\item for all $n\in \N$, and $h\in \lp 0,\infty\rp$ it is the case that $\lp \real_{\rect} \lp \etr^{n,h} \rp \rp \lp x \rp = \frac{h}{2} \cdot x_0+h\cdot x_1 + \cdots + h\cdot x_{n-1} + \frac{h}{2}\cdot x_n$
\item for all $n \in \N$, and $h \in \lp 0,\infty \rp$ it is the case that $\dep \lp \etr^{n,h} \rp = 1$
\item for all $n \in \N$ and $h \in \lp 0,\infty\rp$ it is the case that $\param\lp \etr^{n,h} \rp = n+2$
\item for all $n\in \N$ and $h \in \lp 0,\infty\rp$ it is the case that $\lay \lp \etr^{n,h} \rp = \lp n+1,1 \rp$
\end{enumerate}
\end{lemma}
\begin{proof}
This a straightforward consequence of Lemma \ref{5.3.1}.
\end{proof}
\begin{remark}
Let $h \in \lp 0,\infty\rp$. Note then that $\trp^h$ is simply $\etr^{2,h}$.
\end{remark}
%\begin{lemma}
% Let $f \in C \lp \R, \R \rp$, $a\in \R, b \in \lb a,\infty\rp$, $N\in \N$, and let $h = \frac{b-a}{N}$. Assume also that $f$ has first and second derivatives almost everywhere. Let $ x = \lb x_0 \: x_1 \:...\: x_n\rb \in \R^{n+1}$ such that for all $i \in \{0,1,...,n\}$ it is the case that $x_i = x_0+i\cdot h$, as such let it also be the case that $f\lp \lb x \rb_{*,*}\rp = \lb f(x_0)\: f(x_1) \: \cdots f(x_n) \rb$. Let $a = x_0$ and $b = x_n$. It is then the case that:
% \begin{align}\label{(9.6.3)}
% \left| \int^b_a f\lp x \rp dx - \lp \real_{\rect}\lp \etr^{n,h} \rp\rp \lp f\lp \lb x \rb_{*,*}\rp\rp \right| \les \frac{\lp b-a \rp^3}{12N^2} f''\lp \xi \rp
% \end{align}
% Where $\xi \in \lb a,b \rb$.
%\end{lemma}
%\begin{proof}
% Consider the fact that we may express the left-hand side of (\ref{(9.6.3)}) as:
% \begin{align}
% \left| \int^b_af dx - \lp \real_{\rect}\lp \etr^{n,h} \rp\rp \lp x \rp \right| = \left| \sum_{i=1}^n \lb \int^{x_i}_{x_{i-1}} f\lp x \rp dx-\frac{h}{2}\lp f\lp x_{i-1} \rp + f\lp x_i\rp\rp \rb \right|
% \end{align}
% We then denote by $L_i$ the error at sub-interval $\lb x_{i-1},x_i \rb$ as given by:
% \begin{align}
% L_i = \left| \int^{x_i}_{x_{i-1}}f\lp x \rp dx - \frac{h}{2}\lp f\lp x_{i-1}\rp -f\lp x_i \rp \rp \right|
% \end{align}
% Furthermore, we denote $c_i = \frac{x_{i-1}+x_i}{2}$ as the midpoint of the interval $\lb x_{i-1}, x_i\rb$, which yields the observation that:
% \begin{align}\label{(9.6.6)}
% c_i-x_{i-1} = x_i - c_i = \frac{b-a}{2N}
% \end{align}
% Integration by parts and (\ref{(9.6.6)}) then yields that:
% \begin{align}
% \int^{x_i}_{x_{i-1}}\lp t- c_i\rp f' \lp t \rp dt &= \int^{x_i}_{x_{i-1}} \lp t-c_i\rp df\lp t \rp \nonumber \\
% &= \lp x_i -c_i \rp f\lp x_i \rp - \lp x_{i-1} - c_i\rp f \lp x_{i-1}\rp - \int^{x_i}_{x_{i-1}}f \lp t \rp dt \nonumber \\
% &= \frac{b-a}{2N} \lp f\lp x_{i}\rp - f\lp x_{i-1}\rp\rp - \int^{x_i}_{x_{i-1}}f\lp t \rp dt = L_i
% \end{align}
% Whence we have:
% \begin{align}
% L_i = \int^{x_i}_{x_{i-1}}\lp t-c_i\rp f'\lp t\rp dt
% \end{align}
% Integration by parts, (\ref{(9.6.6)}), and the Fundamental Theorem of Calculus then gives us:
% \begin{align}
% L_i &= \int^{x_i}_{x_{i-1}} f' \lp t \rp d \frac{\lp t-c_i \rp^2}{2} \nonumber\\
% &= \frac{\lp x_i - c_i\rp^2}{2} f' \lp x_i \rp - \frac{\lp x_{i-1} - c_i\rp^2}{2} f' \lp x_{i-1} \rp - \frac{1}{2} \int^{x_i}_{x_{i-1}} \lp t-c_i \rp^2 f'' \lp t\rp \nonumber\\
% &= \frac{1}{2}\lb \frac{b-a}{2N}\rb^2 \lp f'\lp x_i \rp - f' \lp x_{i-1}\rp \rp - \frac{1}{2} \int^{x_i}_{x_{i-1}} \lp t-c_i\rp^2 f'' \lp t \rp dt \nonumber\\
% &= \frac{1}{2} \int^{x_i}_{x_{i-1}} f'' \lp t \rp dt - \frac{1}{2} \int^{x_i}_{x_{i-1}} \lp t-c_i\rp^2 f'' \lp t\rp dt \nonumber \\
% &= \frac{1}{2} \int^{x_i}_{x_{i-1}}\lp \lb \frac{b-a}{2N} \rb^2 - \lp t-c_i\rp^2 \rp f'' \lp t\rp dt
% \end{align}
% Assuming that $f''\lp x \rp \les M$ within $\lb a,b \rb$ we then have that:
% \begin{align}
% \left| \int^b_af dx - \lp \real_{\rect}\lp \etr^{n,h} \rp\rp \lp x \rp \right| &\les \sum_{i=1}^N \left| L_i\right| \nonumber\\
% &\les \frac{1}{2}\sum^N_{i=1} \int^{x_i}_{x_{i-1}} \left| \lp \lb \frac{b-a}{2N} \rb^2 - \lp t-c_i\rp^2 \rp \right| \left| f'' \lp t\rp dt\right| \nonumber \\
% &\les \frac{M}{2} \sum_{i=1}^N \int^{x_i}_{x_{i-1}} \lb \frac{b-a}{2N}\rb^2 - \lp t-c_i\rp^2 dt \nonumber \\
% &= \frac{M}{2} \lp \lb \frac{b-2}{2N}\rb^2\lp b-a \rp - \frac{2n}{3} \lb \frac{b-a}{2N}\rb^3 \rp \nonumber\\
% &= \frac{M \lp b-a \rp^3}{12N^2}
% \end{align}
% This completes the proof of the lemma.
%\end{proof}
2024-02-19 22:27:30 +00:00
\section{Maximum Convolution Approximations for Multi-Dimensional Functions}
2024-02-19 17:04:37 +00:00
\subsection{The $\nrm^d_1$ Networks}
\begin{definition}[The $\nrm_1^d$ neural network]
We denote by $\lp \nrm_1^d \rp _{d\in \N} \subseteq \neu$ the family of neural networks that satisfy:
\begin{enumerate}[label = (\roman*)]
\item for $d=1$:\begin{align}\label{(9.7.1)}
\nrm^1_1 = \lp \lp \begin{bmatrix}
1 \\ -1
\end{bmatrix}, \begin{bmatrix}
0 \\ 0
\end{bmatrix}\rp, \lp \begin{bmatrix}
1 && 1
\end{bmatrix}, \begin{bmatrix}
0
\end{bmatrix}\rp \rp \in \lp \R^{2 \times 1} \times \R^2 \rp \times \lp \R^{1 \times 2} \times \R^1 \rp
\end{align}
\item for $d \in \{2,3,...\}$: \begin{align}
\nrm_1^d = \sm_{d,1} \bullet \lb \boxminus_{i=1}^d \nrm_1^1 \rb
\end{align}
\end{enumerate}
\end{definition}
\begin{lemma}\label{9.7.2}\label{lem:nrm_prop}
Let $d \in \N$. It is then the case that:
\begin{enumerate}[label = (\roman*)]
\item $\lay \lp \nrm^d_1 \rp = \lp d,2d,1 \rp$
\item $\lp \real_{\rect} \lp \nrm^d_1\rp \rp \lp x \rp \in C \lp \R^d,\R \rp$
\item that for all $x \in \R^d$ that $\lp \real_{\rect}\lp \nrm^d_1 \rp \rp \lp x \rp = \left\| x \right\|_1$
\item it holds $\hid\lp \nrm^d_1\rp=1$
\item it holds that $\param \lp \nrm_1^d \rp \les 7d^2$
\item it holds that $\dep\lp \nrm^d_1\rp =2 $
\end{enumerate}
\end{lemma}
\begin{proof}
Note that by observation, it is the case that $\lay\lp \nrm^d_1 \rp = \lp 1,2,1\rp$. This and Remark \ref{5.3.2} tells us that for all $d \in \{2,3,...\}$ it is the case that $\lay \lp \boxminus_{i=1}^d \nrm^d_1 \rp = \lp d,2d,d\rp$. This, Lemma \ref{comp_prop}, and Lemma \ref{5.3.2} ensure that for all $d \in \{2,3,4,...\}$ it is the case that $\lay\lp \nrm^d_1 \rp = \lp d,2d,1 \rp$, which in turn establishes Item (i).
Notice now that (\ref{(9.7.1)}) ensures that:
\begin{align}
\lp \real_{\rect} \lp \nrm^d_1 \rp \rp \lp x \rp = \rect \lp x \rp + \rect \lp -x \rp = \max \{x,0 \} + \max \{ -x,0\} = \left| x \right| = \| x \|_1
\end{align}
This along with \cite[Proposition~2.19]{grohs2019spacetime} tells us that for all $d \in \{2,3,4,...\}$ and $x = \lp x_1,x_2,...,x_d\rp \in \R^d$ it is the case that:
\begin{align}
\lp \real_{\rect} \lb \boxminus^d_{i=1} \nrm^1_1\rb\rp \lp x \rp = \lp \left| x_1 \right|, \left| x_2\right|,..., \left| x_d \right| \rp
\end{align}
This together with Lemma \ref{depthofcomposition} tells us that:
\begin{align}
\lp \real_{\rect} \lp \nrm^d_1 \rp \rp &= \lp \real_{\rect} \lp \sm_{d,1} \bullet \lb \boxminus_{i=1}^d \nrm^d_1\rb\rp \rp \lp x \rp \nonumber\\
&= \lp \real_{\rect} \lp \sm_{d,1} \rp \rp \lp |x_1|,|x_2|,...,|x_d|\rp = \sum^d_{i=1} |x_i| =\|x\|_1
\end{align}
Note next that by observation $\hid\lp \nrm^1_1 \rp = 1$. Remark \ref{5.3.2} then tells us that since the number of layers remains unchanged under stacking, it is then the case that $\hid \lp \nrm^1_1 \rp = \hid \lp \boxminus_{i=1}^d \nrm_1^1\rp = 1$. Note next that Lemma \ref{5.2.3} then tells us that $\hid \lp \sm_{d,1} \rp = 0$ whence Lemma \ref{comp_prop} tells us that:
\begin{align}
\hid \lp \nrm^d_1 \rp &= \hid \lp \sm_{d,1}\bullet \lb \boxminus_{i=1}^d \nrm^1_1 \rb \rp \nonumber \\
&= \hid \lp \sm_{d,1} \rp + \hid \lp \lb \boxminus_{i=1}^d \nrm^1_1 \rb \rp = 0+1=1
\end{align}
Note next that:
\begin{align}
\nrm^1_1 = \lp \lp \begin{bmatrix}
1 \\ -1
\end{bmatrix}, \begin{bmatrix}
0 \\ 0
\end{bmatrix}\rp, \lp \begin{bmatrix}
1 && 1
\end{bmatrix}, \begin{bmatrix}
0
\end{bmatrix}\rp \rp \in \lp \R^{2 \times 1} \times \R^2 \rp \times \lp \R^{1 \times 2} \times \R^1 \rp
\end{align}
and as such $\param\lp \nrm^1_1 \rp = 7$. This, combined with Cor \ref{cor:sameparal}, and the fact that we are stacking identical neural networks then tells us that:
\begin{align}
\param \lp \lb \boxminus_{i=1}^d \nrm_1^1 \rb \rp &\les 7d^2
\end{align}
Then Lemma Corollary \ref{affcor}, Lemma \ref{lem:5.5.4}, and Lemma \ref{comp_prop} tells us that:
\begin{align}
\param \lp \nrm^d_1 \rp &= \param \lp \sm_{d,1} \bullet \lb \boxminus_{i=1}^d \nrm_1^1 \rb\rp \nonumber \\
&\les \param \lp \lb \boxminus_{i=1}^d \nrm_1^1 \rb \rp \les 7d^2
\end{align}
This establishes Item (v).
Finally, by observation $\dep \lp \nrm^1_1\rp = 2$, we are stacking the same neural network when we have $\nrm^d_1$. Stacking has no effect on depth from Definition \ref{def:stacking}, and by Lemma \ref{comp_prop}, $\dep \lp \sm_{d,1} \bullet \lb \boxminus^d_{i=1} \nrm_1^1\rb \rp = \dep \lp \boxminus \nrm^1_1\rp$. Thus we may conclude that $\dep \lp \nrm^d_1\rp = \dep \lp \nrm_1^1\rp =2$.
This concludes the proof of the lemma.
\end{proof}
\subsection{The $\mxm^d$ Neural Networks}
Given $x\in \R$, it is straightforward to find the maximum; $ x$ is the maximum. For $x \in \R^d$ we may find the maximum via network (\ref{9.7.6.1}), i.e. $\mxm^2$. The strategy is to find maxima for half our entries and half repeatedly until we have one maximum. For $x \in \R^d$ where $d$ is even we may stack $d$ copies of $\mxm^2$ to halve, and for $x \in \R^d$ where $d$ is odd and greater than $3$ we may introduce ``padding'' via the $\id_1$ network and thus require $\frac{d-1}{2}$ copies of $\mxm^2$ to halve.
\begin{definition}[Maxima ANN representations]
Let $\lp \mxm ^d\rp_{d \in \N} \subseteq \neu$ represent the neural networks that satisfy:
\begin{enumerate}[label = (\roman*)]
\item for all $d \in \N$ that $\inn \lp \mxm^d \rp = d$
\item for all $d \in \N$ that $\out\lp \mxm^d \rp = 1$
\item that $\mxm^1 = \aff_{1,0} \in \R^{1 \times 1} \times \R^1$
\item that:
\begin{align}\label{9.7.6}
\mxm^2 = \lp \lp \begin{bmatrix}
1 & -1 \\ 0 & 1 \\ 0 & -1
\end{bmatrix}, \begin{bmatrix}
0 \\ 0 \\0
\end{bmatrix}\rp, \lp \begin{bmatrix}
1&1&-1
\end{bmatrix}, \begin{bmatrix}
0
\end{bmatrix}\rp\rp
\end{align}
\item it holds for all $d \in \{2,3,...\}$ that $\mxm^{2d} = \mxm^d \bullet \lb \boxminus_{i=1}^d \mxm^2\rb$, and
\item it holds for all $d \in \{ 2,3,...\}$ that $\mxm^{2d-1} = \mxm^d \bullet \lb \lp \boxminus^d_{i=1} \mxm^2 \rp \boxminus \id_1\rb$.
\end{enumerate}
\end{definition}
\begin{remark}
Diagrammatically, this can be represented as:
\begin{figure}[h]
\begin{center}
\tikzset{every picture/.style={line width=0.75pt}} %set default line width to 0.75pt
\begin{tikzpicture}[x=0.75pt,y=0.75pt,yscale=-1,xscale=1]
%uncomment if require: \path (0,498); %set diagram left start at 0, and has height of 498
%Shape: Rectangle [id:dp977616844446347]
\draw (560,138) -- (630,138) -- (630,178) -- (560,178) -- cycle ;
%Shape: Rectangle [id:dp8371611327934396]
\draw (560,206) -- (630,206) -- (630,246) -- (560,246) -- cycle ;
%Shape: Rectangle [id:dp900733317366978]
\draw (562,274) -- (632,274) -- (632,314) -- (562,314) -- cycle ;
%Shape: Rectangle [id:dp2381571768613]
\draw (565,350) -- (635,350) -- (635,390) -- (565,390) -- cycle ;
%Shape: Rectangle [id:dp47165779567431265]
\draw (568,425) -- (638,425) -- (638,465) -- (568,465) -- cycle ;
%Shape: Rectangle [id:dp26730884303141045]
\draw (438,175) -- (508,175) -- (508,215) -- (438,215) -- cycle ;
%Shape: Rectangle [id:dp43479154744962956]
\draw (439,310) -- (509,310) -- (509,350) -- (439,350) -- cycle ;
%Shape: Rectangle [id:dp14664308815255211]
\draw (302,234) -- (372,234) -- (372,274) -- (302,274) -- cycle ;
%Straight Lines [id:da5196233580766983]
\draw (437,196.5) -- (374.51,251.18) ;
\draw [shift={(373,252.5)}, rotate = 318.81] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da6126209944613533]
\draw (559,155.5) -- (509.66,188.88) ;
\draw [shift={(508,190)}, rotate = 325.92] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da5768174542895418]
\draw (558,224.5) -- (512.81,203.35) ;
\draw [shift={(511,202.5)}, rotate = 25.08] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da12927244412488015]
\draw (560,290.5) -- (510.66,323.88) ;
\draw [shift={(509,325)}, rotate = 325.92] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da9640125892832212]
\draw (566,372.5) -- (511.72,340.03) ;
\draw [shift={(510,339)}, rotate = 30.89] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da6747547273640673]
\draw (437,331.5) -- (373.38,264.95) ;
\draw [shift={(372,263.5)}, rotate = 46.29] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Shape: Rectangle [id:dp14455818861310843]
\draw (445,425) -- (515,425) -- (515,465) -- (445,465) -- cycle ;
%Shape: Rectangle [id:dp03375582603009031]
\draw (301,367) -- (371,367) -- (371,407) -- (301,407) -- cycle ;
%Shape: Rectangle [id:dp0789527597033911]
\draw (163,296) -- (233,296) -- (233,336) -- (163,336) -- cycle ;
%Straight Lines [id:da6246849218035846]
\draw (302,252.5) -- (236.47,313.14) ;
\draw [shift={(235,314.5)}, rotate = 317.22] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da5611532984284957]
\draw (299,390.5) -- (235.38,323.95) ;
\draw [shift={(234,322.5)}, rotate = 46.29] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da6903134547643467]
\draw (162,315.5) -- (108,315.5) ;
\draw [shift={(106,315.5)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da647770723003481]
\draw (567,447.5) -- (518,447.5) ;
\draw [shift={(516,447.5)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da1985911653622896]
\draw (443,448.5) -- (373.61,397.68) ;
\draw [shift={(372,396.5)}, rotate = 36.22] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da06349763555732901]
\draw (437,342) -- (373.67,383.41) ;
\draw [shift={(372,384.5)}, rotate = 326.82] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
% Text Node
\draw (574,150.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{2}$};
% Text Node
\draw (574,214.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{2}$};
% Text Node
\draw (576,283.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{2}$};
% Text Node
\draw (579,358.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{2}$};
% Text Node
\draw (585,428.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{2}$};
% Text Node
\draw (453,185.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{2}$};
% Text Node
\draw (456,322.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{2}$};
% Text Node
\draw (316,242.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{2}$};
% Text Node
\draw (470,434.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Id}_{1}$};
% Text Node
\draw (317,377.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{2}$};
% Text Node
\draw (177,305.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{2}$};
\end{tikzpicture}
\end{center}
\caption{Neural network diagram for $\mxm^5$.}
\end{figure}
\end{remark}
\begin{lemma}\label{9.7.4}\label{lem:mxm_prop}
Let $d \in \N$, it is then the case that:
\begin{enumerate}[label = (\roman*)]
\item $\hid \lp \mxm^d \rp = \lceil \log_2 \lp d \rp \rceil $
\item for all $i \in \N$ that $\wid_i \lp \mxm^d \rp \les 3 \left\lceil \frac{d}{2^i} \right\rceil$
\item $\real_{\rect} \lp \mxm^d\rp \in C \lp \R^d, \R \rp$ and
\item for all $x = \lp x_1,x_2,...,x_d \rp \in \R^d$ we have that $\lp \real_{\rect} \lp \mxm^d \rp \rp \lp x \rp = \max \{x_1,x_2,...,x_d \}$.
\item $\param \lp \mxm^d \rp \les \lp \frac{4}{3}d^2+3d\rp \lp 1+\frac{1}{2}^{\left\lceil \log_2\lp d\rp\right\rceil+1}\rp$
\item $\dep \lp \mxm^d\rp = \left\lceil \log_2 \lp d\rp \right\rceil + 1$
\end{enumerate}
\end{lemma}
\begin{proof}
Assume w.l.o.g. that $d > 1$. Note that (\ref{9.7.6}) ensures that $\hid \lp \mxm^d \rp = 1$. This and (\ref{5.2.5}) then tell us that for all $d \in \{2,3,4,...\}$ it is the case that:
\begin{align}
\hid \lp \boxminus_{i=1}^d \mxm^2\rp = \hid \lp \lb \boxminus_{i=1}^d \mxm^2 \rb \boxminus \id_1 \rp = \hid \lp \mxm^2 \rp = 1 \nonumber
\end{align}
This and Lemma \ref{comp_prop} tells us that for all $d \in \{3,4,5,...\}$ it holds that:
\begin{align}\label{9.7.7}
\hid \lp \mxm^d \rp = \hid \lp \mxm^{\left\lceil \frac{d}{2} \right\rceil}\rp + 1
\end{align}
And for $d \in \{4,6,8,...\}$ with $\hid \lp \mxm^{\left\lceil \frac{d}{2} \right\rceil} \rp = \left\lceil \log_2 \lp \frac{d}{2} \rp\right\rceil$ it holds that:
\begin{align}\label{9.7.8}
\hid \lp \mxm^d \rp = \left\lceil \log_2 \lp \frac{d}{2} \rp\right\rceil + 1 = \left\lceil \log_2 \lp d \rp -1 \right\rceil +1 = \left\lceil \log_2 \lp d \rp \right\rceil
\end{align}
Moreover (\ref{9.7.7}) and the fact that for all $d \in \{3,5,7,...\}$ it holds that $\left\lceil \log_2 \lp d+1 \rp \right\rceil = \left\lceil \log_2 \lp d \rp \right\rceil$ ensures that for all $d \in \{3,5,7,...\}$ with $\hid \lp \mxm^{\left\lceil \frac{d}{2}\right\rceil}\rp = \left\lceil \log_2 \lp \left\lceil \frac{d}{2} \right\rceil\rp \right\rceil$ it holds that:
\begin{align}
\hid \lp \mxm^d\rp &= \left\lceil \log_2 \lp \left\lceil \frac{d}{2} \right\rceil\rp \right\rceil + 1 = \left\lceil \log_2 \lp \left\lceil \frac{d+1}{2} \right\rceil\rp \right\rceil + 1 \nonumber\\
&= \left\lceil \log_2 \lp d+1\rp-1 \right\rceil + 1 = \left\lceil \log_2 \lp d+1 \rp \right\rceil = \left\lceil \log_2 \lp d \rp \right\rceil
\end{align}
This and (\ref{9.7.8}) demonstrate that for all $d \in \{3,4,5,...\}$ with $\forall k \in \{2,3,...,d-1\}: \hid \lp \mxm^d\rp = \left\lceil \log_2 \lp k \rp \right\rceil$ it holds htat $\hid \lp \mxm^d \rp = \left\lceil \log_2 \lp d \rp \right\rceil$. The fact that $\hid \lp \mxm^2 \rp =1$ and induction establish Item (i).
We next note that $\lay \lp \mxm^2 \rp = \lp 2,3,1 \rp$. This then indicates that for all $i\in \N$ that:
\begin{align}\label{9.7.10}
\wid_i \lp \mxm^2 \rp \les 3 = 3 \left\lceil \frac{2}{2^i} \right\rceil.
\end{align}
Note then that Lemma \ref{comp_prop} and Remark \ref{5.3.2} tells us that:
\begin{align}\label{9.7.11}
\wid_i \lp \mxm^{2d} \rp = \begin{cases}
3d &:i=1 \\
\wid_{i-1}\lp \mxm^d \rp &:i\ges 2
\end{cases}
\end{align}
And:
\begin{align}\label{9.7.12}
\wid_i \lp \mxm^{2d-1}\rp = \begin{cases}
3d-1 &:i=1 \\
\wid_{i-1}\lp \mxm^d \rp &:i \ges 2
\end{cases}
\end{align}
This in turn assures us that for all $d \in \{ 2,4,6,...,\}$ it holds that:
\begin{align}\label{9.7.13}
\wid_1 \lp \mxm^d \rp = 3\lp \frac{d}{2} \rp \les 3 \left\lceil \frac{d}{2} \right\rceil
\end{align}
Moreover, note that (\ref{9.7.12}) tells us that for all $d \in \{3,5,7,...\}$ it holds that:
\begin{align}
\wid_1 \lp \mxm^d \rp = 3\left\lceil \frac{d}{2}\right\rceil -1 \les 3 \left\lceil \frac{d}{2} \right\rceil
\end{align}
This and (\ref{9.7.13}) shows that for all $d \in \{2,3,...\}$ it holds that:
\begin{align}\label{9.7.15}
\wid_1 \lp \mxm^d\rp \les 3 \left\lceil \frac{d}{2}\right\rceil
\end{align}
Additionally note that (\ref{9.7.11}) demonstrates that for all $d \in \{ 4,6,8,...\}$, $i \in \{2,3,...\}$ with $\wid_{i-1} \lp \mxm^{\frac{d}{2}} \rp \les 3 \left\lceil \lp \frac{d}{2}\rp \frac{1}{2^{i-1}}\right\rceil$ it holds that:
\begin{align}\label{9.7.16}
\wid_i \lp \mxm^d \rp = \wid_{i-1}\lp \mxm^{\frac{d}{2}}\rp \les 3 \left\lceil \lp \frac{d}{2}\rp \frac{1}{2^{i-1}} \right\rceil = 3 \left\lceil \frac{d}{2^i} \right\rceil
\end{align}
Furthermore note also the fact that for all $d \in \{3,5,7,...\}$, $i \in \N$ it holds that $\left\lceil \frac{d+1}{2^i} \right\rceil = \left\lceil \frac{d}{2^i}\right\rceil$ and (\ref{9.7.12}) assure that for all $d \in \{3,5,7,...\}$, $i\in \{2,3,...\}$ with $\wid_{i-1} \lp \mxm^{\left\lceil \frac{d}{2}\right\rceil}\rp \les 3 \left\lceil \left\lceil \frac{d}{2}\right\rceil \frac{1}{2^{i-1}}\right\rceil$ it holds that:
\begin{align}
\wid_i \lp \mxm^d \rp = \wid_{i-1} \lp \mxm^{\left\lceil \frac{d}{2}\right\rceil}\rp \les 3 \left\lceil \left\lceil \frac{d}{2} \right\rceil \frac{1}{2^{i-1}} \right\rceil = 3 \left\lceil \frac{d+1}{2^i}\right\rceil = 3 \left\lceil \frac{d}{2^i} \right\rceil
\end{align}
This and (\ref{9.7.16}) tells us that for all $d \in \{3,4,...\}$, $i \in \{2,3,...\}$ with $\forall k \in \{2,3,...,d-1\}$, $j \in \{1,2,...,i-1\}: \wid_j \lp \mxm^k \rp \les 3 \left\lceil \frac{k}{2^j} \right\rceil$ it holds that:
\begin{align}
\wid_i \lp \mxm^d \rp \les 3 \left\lceil \frac{d}{2^i}\right\rceil
\end{align}
This, combined with (\ref{9.7.10}), (\ref{9.7.15}), with induction establishes Item (ii).
Next observe that (\ref{9.7.6}) tells that for $x = \begin{bmatrix}
x_1 \\ x_2
\end{bmatrix} \in \R^2$ it becomes the case that:
\begin{align}
\lp\real_{\rect} \lp \mxm^2 \rp \rp \lp x \rp &= \max \{x_1-x_2,0\} + \max\{x_2,0 \} - \max\{ -x_2,0\} \nonumber \\
&= \max \{x_1-x_2,0\} + x_2 = \max\{x_1,x_2\}
\end{align}
Note next that Lemma \ref{idprop}, Lemma \ref{comp_prop}, and \cite[Proposition~2.19]{grohs2019spacetime} then imply for all $d \in \{2,3,4,...\}$, $x = \{x_1,x_2,...,x_d\} \in \R^d$ it holds that $\lp \real_{\rect} \lp \mxm^d \rp \rp \lp x \rp \in C \lp \R^d,\R \rp$. and $\lp \real_{\rect} \lp \mxm^d \rp \rp \lp x \rp = \max\{ x_1,x_2,...,x_d \}$. This establishes Items (iii)-(iv).
Consider now the fact that Item (ii) implies that the layer architecture forms a geometric series whence we have that the number of bias parameters is bounded by:
\begin{align}
\frac{\frac{3d}{2} \lp 1 - \lp \frac{1}{2} \rp^{\left\lceil \log_2 \lp d\rp\right\rceil +1} \rp }{\frac{1}{2}} &= 3d \lp 1 - \frac{1}{2}^{\left\lceil \log_2 \lp d \rp \right\rceil +1}\rp \nonumber \\
&\les \left\lceil 3d \lp 1 - \frac{1}{2}^{\left\lceil \log_2 \lp d \rp \right\rceil +1}\rp \right\rceil
\end{align}
For the weight parameters, consider the fact that our widths follow a geometric series with ratio $\frac{1}{2}$, and considering that we have an upper bound for the number of hidden layers, and the fact that $\wid_0 \lp \mxm^d\rp = d$, would then tell us that the number of weight parameters is bounded by:
\begin{align}
&\sum^{\left\lceil \log_2\lp d\rp\right \rceil}_{i=0} \lb \lp \frac{1}{2}\rp ^i \cdot \wid_0\lp \mxm^d\rp \cdot \lp \frac{1}{2}\rp^{i+1}\cdot \wid_0 \lp \mxm^d\rp\rb \nonumber \\
&= \sum^{\left\lceil \log_2\lp d\rp\right\rceil}_{i=0} \lb \lp \frac{1}{2}\rp^{2i+1}\lp \wid_0 \lp \mxm^d\rp\rp^2\rb \nonumber \\
&= \frac{1}{2} \sum^{\left\lceil \log_2 \lp d\rp \right\rceil}_{i=0} \lb \lp \lp \frac{1}{2}\rp^{i} \wid_0\lp \mxm^d\rp\rp^2\rb
= \frac{1}{2} \sum^{\left\lceil \log_2\lp d\rp\right\rceil}_{i=0} \lb \lp \frac{1}{4}\rp^id^2\rb
\end{align}
Notice that this is a geometric series with ratio $\frac{1}{4}$, which would then reveal that:
\begin{align}
\frac{1}{2} \sum^{\left\lceil \log_2\lp d\rp\right\rceil}_{i=0} \lb \lp \frac{1}{4}\rp^id^2\rb \les \frac{2}{3} d^2\lp 1- \frac{1}{2}^{2\lp \left\lceil \log_2(d)\right\rceil + 1\rp}\rp
\end{align}
Thus, we get that:
\begin{align}
\param \lp \mxm^d\rp &\les \frac{2}{3} d^2\lp 1- \frac{1}{2}^{2\lp \left\lceil \log_2(d)\right\rceil \rp + 1}\rp + \left\lceil 3d \lp 1 - \frac{1}{2}^{\left\lceil \log_2 \lp d \rp \right\rceil +1}\rp \right\rceil \nonumber\\
&\les \frac{2}{3} d^2\lp 1- \frac{1}{2}^{2\lp \left\lceil \log_2(d)\right\rceil \rp + 1}\rp + \left\lceil 3d \lp 1 - \frac{1}{2}^{2\lp\left\lceil \log_2 \lp d \rp \right\rceil +1\rp}\rp\right\rceil\\
&\les \left\lceil \lp \frac{2}{3}d^2+3d\rp \lp 1+\frac{1}{2}^{2\lp \left\lceil \log_2\lp d\rp\right\rceil+1 \rp}\rp + 1 \right\rceil
\end{align}
This proves Item (v).
Item (vi) is a straightforward consequence of Item (i). This completes the proof of the lemma.
\end{proof}
\subsection{The $\mathsf{MC}$ Neural Network and Approximations via Maximum Convolutions }
Let $f: [a,b] \rightarrow \R$ be a continuous bounded function with Lipschitz constant $L$. Let $x_0 \les x_1 \les \cdots \les x_N$ be a set of sample points within $[a,b]$, with it being possibly the case that that for all $i \in \{0,1,\hdots, N\}$, $x_i \sim \unif([a,b])$. For all $i \in \{0,1,\hdots, N\}$, define a series of functions $f_0,f_1,\hdots f_N: [a,b] \rightarrow \R$, as such:
\begin{align}
f_i = f(x_i) - L \cdot \left| x-x_i\right|
\end{align}
We will call the approximant $\max_{i \in \{0,1,\hdots, N\}}\{ f_i\}$, the \textit{maximum convolution approximation}. This converges to $f$, as shown in
\begin{lemma}\label{(9.7.5)}\label{lem:mc_prop}
Let $d,N\in \N$, $L\in \lb 0,\infty \rp$, $x_1,x_2,\hdots, x_N \in \R^d$, $y = \lp y_1,y_2,\hdots,y_N \rp \in \R^N$ and $\mathsf{MC} \in \neu$ satisfy that:
\begin{align}\label{9.7.20}
\mathsf{MC}^{N,d}_{x,y} = \mxm^N \bullet \aff_{-L\mathbb{I}_N,y} \bullet \lp \boxminus_{i=1}^N \lb \nrm^d_1 \bullet \aff_{\mathbb{I}_d,-x_i} \rb \rp \bullet \cpy_{N,d}
\end{align}
It is then the case that:
\begin{enumerate}[label = (\roman*)]
2024-02-19 22:27:30 +00:00
\item $\inn \lp \mathsf{MC}^{N,d}_{x,y} \rp = d$
\item $\out\lp \mathsf{MC}^{N,d}_{x,y} \rp = 1$
\item $\hid \lp \mathsf{MC}^{N,d}_{x,y} \rp = \left\lceil \log_2 \lp N \rp \right\rceil +1$
\item $\wid_1 \lp \mathsf{MC}^{N,d}_{x,y} \rp = 2dN$
\item for all $i \in \{ 2,3,...\}$ we have $\wid_i \lp \mathsf{MC}^{N,d}_{x,y} \rp \les 3 \left\lceil \frac{N}{2^{i-1}} \right\rceil$
\item it holds for all $x \in \R^d$ that $\lp \real_{\rect} \lp \mathsf{MC}^{N,d}_{x,y} \rp \rp \lp x \rp = \max_{i \in \{1,2,...,N\}} \lp y_i - L \left\| x-x_i \right\|_1\rp$
\item it holds that $\param \lp \mathsf{MC}^{N,d}_{x,y} \rp \les \left\lceil \lp \frac{2}{3}d^2+3d\rp \lp 1+\frac{1}{2}^{2\lp \left\lceil \log_2\lp d\rp\right\rceil+1 \rp}\rp + 1 \right\rceil + 7N^2d^2 + 3\left\lceil \frac{N}{2}\right\rceil \cdot 2dN$
2024-02-19 17:04:37 +00:00
\end{enumerate}
\end{lemma}
\begin{proof}
Throughout this proof let $\mathsf{S}_i \in \neu$ satisfy for all $i \in \{1,2,...,N\}$ that $\mathsf{S}_i = \nrm_1^d \bullet \aff_{\mathbb{I}_d,-x_i}$ and let $\mathsf{X} \in \neu$ satisfy:
\begin{align}
\mathsf{X} = \aff_{-L\mathbb{I}_N,y} \bullet \lp \lb \boxminus_{i=1}^N \mathsf{S}_i \rb \rp \bullet \cpy_{N,d}
\end{align}
2024-02-19 22:27:30 +00:00
Note that (\ref{9.7.20}) and Lemma \ref{comp_prop} tells us that $\out \lp \R \rp = \out \lp \mxm^N \rp = 1$ and $\inn \lp \mathsf{MC}^{N,d}_{x,y} \rp = \inn \lp \cpy_{N,d} \rp =d $. This proves Items (i)--(ii). Next observe that since it is the case that $\hid \lp \cpy_{N,d} \rp$ and $\hid \lp \nrm^d_1 \rp = 1$, Lemma \ref{comp_prop} then tells us that:
2024-02-19 17:04:37 +00:00
\begin{align}
\hid \lp \mathsf{X} \rp = \hid \lp\aff_{-L\mathbb{I}_N,y} \rp + \hid \lp \boxminus_{i=1}^N \mathsf{S}_i\rp + \hid \lp \cpy_{N,d} \rp = 1
\end{align}
Thus Lemma \ref{comp_prop} and Lemma \ref{9.7.4} then tell us that:
\begin{align}
\hid \lp \mathsf{MC} \rp = \hid \lp \mxm^N \bullet \mathsf{X}\rp = \hid \lp \mxm^N \rp + \hid \lp \mathsf{X}\rp = \left\lceil \log_2 \lp N \rp \right\rceil +1
\end{align}
Which in turn establishes Item (iii).
Note next that Lemma \ref{comp_prop} and \cite[Proposition~2.20]{grohs2019spacetime} tells us that:
\begin{align}\label{8.3.33}
2024-02-19 22:27:30 +00:00
\wid_1 \lp \mathsf{MC}^{N,d}_{x,y} \rp = \wid_1 \lp \mathsf{X} \rp = \wid_1 \lp \boxminus^N_{i=1} \mathsf{S}_i\rp = \sum^N_{i=1} \wid_1 \lp \mathsf{S}_i \rp = \sum^N_{i=1} \wid_1 \lp \nrm^d_1 \rp = 2dN
2024-02-19 17:04:37 +00:00
\end{align}
This establishes Item (iv).
Next observe that the fact that $\hid \lp \mathsf{X} \rp=1$, Lemma \ref{comp_prop} and Lemma \ref{9.7.4} tells us that for all $i \in \{2,3,...\}$ it is the case that:
\begin{align}
2024-02-19 22:27:30 +00:00
\wid_i \lp \mathsf{MC}^{N,d}_{x,y} \rp = \wid_{i-1} \lp \mxm^N \rp \les 3 \left\lceil \frac{N}{2^{i-1}} \right\rceil
2024-02-19 17:04:37 +00:00
\end{align}
This establishes Item (v).
Next observe that Lemma \ref{9.7.2} and Lemma \ref{5.3.3} tells us that for all $x \in \R^d$, $i \in \{1,2,...,N\}$ it holds that:
\begin{align}
2024-02-19 22:27:30 +00:00
\lp \real_{\rect} \lp \mathsf{MC}^{N,d}_{x,y} \rp \rp \lp x \rp - \lp \real_{\rect}\lp \nrm^d_1 \rp \circ \real_{\rect}\lp \aff_{\mathbb{I}_d,-x_i} \rp \rp \lp x \rp = \left\| x-x_i \right\|_1
2024-02-19 17:04:37 +00:00
\end{align}
This an \cite[Proposition~2.20]{grohs2019spacetime} combined establishes that for all $x \in \R^d$ it holds that:
\begin{align}
\lp \real_{\rect} \lp \lb \boxminus_{i=1}^N \mathsf{S}_i \rb \bullet \cpy_{N,d} \rp \rp \lp x \rp = \lp \| x-x_1 \|_1, \|x-x_2\|_1,...,\|x-x_N\|_1\rp \nonumber \\
\end{align}
This and Lemma \ref{5.3.3} establishes that for all $x \in \R^d$ it holds that:
\begin{align}
\lp \real_{\rect}\lp \mathsf{X}\rp \rp \lp x \rp &= \lp \real_{\rect}\lp \aff_{-L\mathbb{I}_N,y}\rp\rp \circ \lp\real_{\rect} \lp \lb \boxminus_{i=1}^N \mathsf{S}_i\rb \bullet \cpy_{N,d}\rp \rp \lp x \rp \nonumber\\
&= \lp y_1-L \|x-x_1 \|, y_2-L\|x-x_2\|,...,y_N-L \| x-x_N \|_1\rp
\end{align}
Then Lemma \ref{comp_prop} and Lemma \ref{9.7.4} tells us that for all $x\in \R^d$ it holds that:
\begin{align}
2024-02-19 22:27:30 +00:00
\lp \real_{\rect} \lp \mathsf{MC}^{N,d}_{x,y} \rp \rp \lp x \rp &= \lp \real_{\rect}\lp \mxm^N \rp \circ \lp \real_{\rect}\lp \mathsf{X} \rp \rp \rp \lp x \rp \nonumber \\
2024-02-19 17:04:37 +00:00
&= \lp \real_{\rect}\lp \mxm^N \rp \rp \lp y_1-L \|x-x_1\|_1,y_2-L\|x-x_2\|_1,...,y_N-L\|x-x_N\|_1\rp \nonumber\\
&=\max_{i\in \{1,2,...,N\} } \lp y_i - L \|x-x_i\|_1\rp
\end{align}
This establishes Item (vi).
For Item (vii) note that Lemma \ref{lem:nrm_prop}, Remark \ref{rem:stk_remark}, Lemma \ref{lem:nrm_prop}, and Corollary \ref{affcor} tells us that for all $d\in \N$ and $x \in \R^d$ it is the case that:
\begin{align}
\param \lp \nrm^d_1\bullet \aff_{\mathbb{I}_d, -x}\rp \les \param \lp \nrm_1^d\rp \les 7d^2
\end{align}
This, along with Corollary \ref{cor:sameparal}, and because we are stacking identical neural networks, then tells us that for all $N \in \N$, it is the case that:
\begin{align}
\param \lp \boxminus_{i=1}^N \lb \nrm^d_1\bullet \aff_{\mathbb{I}_d, -x} \rb\rp \les 7N^2d^2
\end{align}
Observe next that Corollary \ref{affcor} tells us that for all $d,N \in \N$ and $x \in \R^d$ it is the case that:
\begin{align}\label{8.3.38}
\param \lp \lp \boxminus^N_{i=1} \lb \nrm^d_1 \bullet \aff_{\mathbb{I}_d,-x}\rb\rp \bullet \cpy_{N,d}\rp \les \param \lp \boxminus_{i=1}^N \lb \nrm^d_1\bullet \aff_{\mathbb{I}_d, -x} \rb\rp \les 7N^2d^2
\end{align}
Now, let $d,N \in \N$, $L \in [0,\infty)$, let $x_1,x_2,\hdots, x_N \in \R^d$ and let $y = \{y_1,y_2,\hdots, y_N \} \in \R^N$. Observe that again, Corollary \ref{affcor}, and (\ref{8.3.38}) tells us that:
\begin{align}
\param\lp \aff_{-L\mathbb{I}_N,y} \bullet \lp \boxminus_{i=1}^N \lb \nrm^d_1 \bullet \aff_{\mathbb{I}_d,-x_i} \rb \rp \bullet \cpy_{N,d}\rp \nonumber\\ \les \param \lp \boxminus_{i=1}^N \lb \nrm^d_1\bullet \aff_{\mathbb{I}_d, -x} \rb\rp \les 7N^2d^2 \nonumber
\end{align}
Finally Lemma \ref{comp_prop}, (\ref{8.3.33}), and Lemma \ref{lem:mxm_prop} yields that:
\begin{align}
2024-02-19 22:27:30 +00:00
\param(\mathsf{MC}^{N,d}_{x,y}) &= \param \lp \mxm^N \bullet \aff_{-L\mathbb{I}_N,y} \bullet \lp \boxminus_{i=1}^N \lb \nrm^d_1 \bullet \aff_{\mathbb{I}_d,-x_i} \rb \rp \bullet \cpy_{N,d} \rp \nonumber\\
2024-02-19 17:04:37 +00:00
&\les \param \lp \mxm^N \bullet \lp \boxminus_{i=1}^N \lb \nrm^d_1\bullet \aff_{\mathbb{I}_d, -x} \rb \rp \rp \nonumber\\
&\les \param \lp \mxm^N \rp + \param \lp \lp \boxminus_{i=1}^N \lb \nrm^d_1\bullet \aff_{\mathbb{I}_d, -x} \rb\rp \rp + \nonumber\\ &\wid_1\lp \mxm^N\rp \cdot \wid_{\hid \lp \boxminus_{i=1}^N \lb \nrm^d_1\bullet \aff_{\mathbb{I}_d, -x} \rb\rp} \lp \boxminus_{i=1}^N \lb \nrm^d_1\bullet \aff_{\mathbb{I}_d, -x} \rb\rp \nonumber \\
&\les \left\lceil \lp \frac{2}{3}d^2+3d\rp \lp 1+\frac{1}{2}^{2\lp \left\lceil \log_2\lp d\rp\right\rceil+1 \rp}\rp + 1 \right\rceil + 7N^2d^2 + 3\left\lceil \frac{N}{2}\right\rceil \cdot 2dN
\end{align}
\end{proof}
\begin{remark}
We may represent the neural network diagram for $\mxm^d$ as:
\end{remark}
\begin{figure}[h]
\begin{center}
\tikzset{every picture/.style={line width=0.75pt}} %set default line width to 0.75pt
\begin{tikzpicture}[x=0.75pt,y=0.75pt,yscale=-1,xscale=1]
%uncomment if require: \path (0,560); %set diagram left start at 0, and has height of 560
%Shape: Rectangle [id:dp1438938274656144]
\draw (574,235) -- (644,235) -- (644,275) -- (574,275) -- cycle ;
%Straight Lines [id:da7383135897500558]
\draw (574,241) -- (513.72,84.37) ;
\draw [shift={(513,82.5)}, rotate = 68.95] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da09141712653411305]
\draw (572,251) -- (514.14,168.14) ;
\draw [shift={(513,166.5)}, rotate = 55.08] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da19953508691566213]
\draw (573,259) -- (515.07,350.81) ;
\draw [shift={(514,352.5)}, rotate = 302.25] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da5900315817761441]
\draw (575,268) -- (515.66,436.61) ;
\draw [shift={(515,438.5)}, rotate = 289.39] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Shape: Rectangle [id:dp9847868081693099]
\draw (421,59) -- (512,59) -- (512,99) -- (421,99) -- cycle ;
%Shape: Rectangle [id:dp2868551357079474]
\draw (419,330) -- (510,330) -- (510,370) -- (419,370) -- cycle ;
%Shape: Rectangle [id:dp9383613429980815]
\draw (420,150) -- (511,150) -- (511,190) -- (420,190) -- cycle ;
%Shape: Rectangle [id:dp5827241951133133]
\draw (420,420) -- (511,420) -- (511,460) -- (420,460) -- cycle ;
%Shape: Rectangle [id:dp7299058955170046]
\draw (290,60) -- (381,60) -- (381,100) -- (290,100) -- cycle ;
%Shape: Rectangle [id:dp08440877870624452]
\draw (290,150) -- (381,150) -- (381,190) -- (290,190) -- cycle ;
%Shape: Rectangle [id:dp7098854649776141]
\draw (290,330) -- (381,330) -- (381,370) -- (290,370) -- cycle ;
%Shape: Rectangle [id:dp6165394921489369]
\draw (290,420) -- (381,420) -- (381,460) -- (290,460) -- cycle ;
%Straight Lines [id:da37215648665173995]
\draw (420,80) -- (401,80) -- (382,80) ;
\draw [shift={(380,80)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da2316129338082229]
\draw (420,170) -- (401,170) -- (382,170) ;
\draw [shift={(380,170)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da40267704179559083]
\draw (419,350) -- (400,350) -- (381,350) ;
\draw [shift={(379,350)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da5321116904741454]
\draw (420,440) -- (401,440) -- (382,440) ;
\draw [shift={(380,440)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Shape: Rectangle [id:dp9652335934440622]
\draw (170,60) -- (250,60) -- (250,460) -- (170,460) -- cycle ;
%Straight Lines [id:da2568661285688787]
\draw (170,240) -- (132,240) ;
\draw [shift={(130,240)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da9075024165320872]
\draw (290,80) -- (252,80) ;
\draw [shift={(250,80)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da36733568107592385]
\draw (290,170) -- (252,170) ;
\draw [shift={(250,170)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da027221622247677213]
\draw (290,350) -- (252,350) ;
\draw [shift={(250,350)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da456971589533403]
\draw (290,440) -- (252,440) ;
\draw [shift={(250,440)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Shape: Rectangle [id:dp5834320101871477]
\draw (60,220) -- (130,220) -- (130,260) -- (60,260) -- cycle ;
%Straight Lines [id:da39697402951042593]
\draw (60,240) -- (22,240) ;
\draw [shift={(20,240)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da09195032177210305]
\draw (690,250) -- (652,250) ;
\draw [shift={(650,250)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
% Text Node
\draw (583,245.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Cpy}_{N}{}_{,d}$};
% Text Node
\draw (441,66.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Aff}_{\mathbb{I}}{}_{_{d}}{}_{-x}{}_{_{i}}$};
% Text Node
\draw (442,158.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Aff}_{\mathbb{I}}{}_{_{d}}{}_{-x}{}_{_{i}}$};
% Text Node
\draw (442,338.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Aff}_{\mathbb{I}}{}_{_{d}}{}_{-x}{}_{_{i}}$};
% Text Node
\draw (442,428.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Aff}_{\mathbb{I}}{}_{_{d}}{}_{-x}{}_{_{i}}$};
% Text Node
\draw (318,72.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Nrm}_{1}^{d}$};
% Text Node
\draw (318,159.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Nrm}_{1}^{d}$};
% Text Node
\draw (318,339.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Nrm}_{1}^{d}$};
% Text Node
\draw (321,427.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Nrm}_{1}^{d}$};
% Text Node
\draw (322,237.4) node [anchor=north west][inner sep=0.75pt] [font=\LARGE] {$\vdots $};
% Text Node
\draw (462,232.4) node [anchor=north west][inner sep=0.75pt] [font=\LARGE] {$\vdots $};
% Text Node
\draw (181,238.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Aff}_{-L}{}_{\mathbb{I}}{}_{_{N} ,y}$};
% Text Node
\draw (71,231.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Mxm}^{N}$};
\end{tikzpicture}
\end{center}
\caption{Neural network diagramfor the $\mxm$ network}
\end{figure}
\subsection{Lipschitz Function Approximations}\label{(9.7.6)}
\begin{lemma}%TODO: Should we stipulate compact sets?
Let $\lp E,d \rp$ be a metric space. Let $L \in \lb 0,\infty \rp$, $D \subseteq E$, $\emptyset \neq C \subseteq D$. Let $f:D \rightarrow \R$ satisfy for all $x\in D$, $y \in C$ that $\left| f(x) -f(y)\right| \les L d \lp x,y \rp$, and let $F:E \rightarrow \R \cup \{\infty\}$ satisfy for all $x\in E$ that:
\begin{align}\label{9.7.30}
F\lp x \rp = \sup_{y\in C} \lb f\lp y \rp - Ld\lp x,y \rp \rb
\end{align}
It is then the case that:
\begin{enumerate}[label = (\roman*)]
\item for all $x \in C$ that $F(x) = f(x)$
\item it holds for all $x \in D$, that $F(x) \les f(x)$
\item it holds for all $x\in E$ that $F\lp x \rp < \infty$
\item it holds for all $x,y \in E$ that $\left| F(x)-F(y)\right| \les Ld\lp x,y \rp$ and,
\item it holds for all $x \in D$ that:
\begin{align}\label{9.7.31}
\left| F\lp x \rp - f \lp x \rp \right| \les 2L \lb \inf_{y\in C} d \lp x,y \rp\rb
\end{align}
\end{enumerate}
\end{lemma}
\begin{proof}
The assumption that $\forall x \in D, y \in C: \left| f(x) - f(y)\right| \les Ld\lp x,y \rp$ ensures that:
\begin{align}\label{9.7.32}
f(y) - Ld\lp x,y\rp \les f\lp x \rp \les f(y) + Ld\lp x,y \rp
\end{align}
For $x\in D$, it then renders as:
\begin{align}\label{9.7.33}
f(x) \ges \sup_{y \in C} \lb f(y) - Ld\lp x,y \rp \rb
\end{align}
This establishes Item (i). Note that (\ref{9.7.31}) then tells us that for all $x\in C$ it holds that:
\begin{align}
F\lp x \rp \ges f(x) - Ld\lp x,y \rp = f\lp x \rp
\end{align}
This with (\ref{9.7.33}) then yields Item (i).
Note next that (\ref{9.7.32}, with $x \curvearrowleft y \text{ and } y \curvearrowleft z)$ and the triangle inequality ensure that for all $x \in E$, $y,z \in C$ it holds that:
\begin{align}
f(y) - Ld\lp x,y\rp \les f(z)+Ld\lp y,z \rp - Ld\lp x,y \rp \les f(z) + Ld\lp x,z \rp
\end{align}
We then obtain for all $x\in E, z\in C$ it holds that:
\begin{align}
F\lp x \rp = \sup_{y\in C} \lb f(y) - Ld\lp x,y \rp \rb \les f\lp x \rp + Ld\lp x,z \rp < \infty
\end{align}
This proves Item (iii). Item (iii), (\ref{9.7.30}), and the triangle inequality then shows that for all $x,y \in E$, it holds that:
\begin{align}
F(x) - F(y) &= \lb \sup_{v \in C} \lp f(v) - Ld\lp x,v \rp \rp \rb - \lb \sup_{w\in C} \lp f(w)-Ld\lp y,w \rp \rp\rb \nonumber \\
&= \sup_{v \in C}\lb f(v) - Ld\lp x,v \rp -\sup_{w\in C} \lp f(w) - L d\lp y,w \rp \rp\rb \nonumber\\
&\les \sup_{v \in C}\lb f(v) - Ld\lp x,v \rp - \lp f(v) - Ld\lp y,w \rp \rp\rb \nonumber\\
&= \sup_{v\in C} \lp Ld\lp y,v \rp + Ld\lp x,v \rp -Ld\lp x,v\rp \rp = Ld \lp x,y \rp
\end{align}
This establishes Item (v). Finally, note that Items (i) and (iv), the triangle inequality, and the assumption that $\forall x \in D, y\in C: \left| f(x) - f(y) \right| \les Ld\lp x,y \rp$ ensure that for all $x\in D$ it holds that:
\begin{align}
\left| F(x) - f(x) \right| &= \inf_{y\in C} \left| F(x) - F(y) +f(y) - f(x)\right| \nonumber \\
&\les \inf_{y\in C} \lp \left| F(x) - F(y) \right| + \left| f(y) - f(x) \right|\rp \nonumber\\
&\les \inf_{y\in C} \lp 2Ld\lp x,y \rp\rp = 2L \lb \inf_{y\in C} d \lp x,y \rp \rb
\end{align}
This establishes Item (v) and hence establishes the Lemma.
\end{proof}
\begin{corollary}\label{9.7.6.1}
Let $\lp E,d \rp$ be a metric space, let $L \in \lb 0,\infty \rp$, $\emptyset \neq C \subseteq E$, let $f: E \rightarrow \R$ satisfy for all $x\in E$, $y \in C$ that $\left\| f(x) - f(y) \right| \les Ld \lp x,y \rp$, and let $F:E \rightarrow \R \cup \{\infty\}$ satisfy for all $x\in E$ that:
\begin{align}
F \lp x \rp = \sup_{y\in C} \lb f(y) - Ld \lp x,y \rp\rb
\end{align}
It is then the case that:
\begin{enumerate}[label = (\roman*)]
\item for all $x\in C$ that $F(x) = f(x)$
\item for all $x\in E$ that $F(x) \les f(x)$
\item for all $x,y \in E$ that $\left| F(x) - f(y) \right| \les L d \lp x,y \rp$ and
\item for all $x\in E$ that: \begin{align}
\left| F\lp x \rp - f\lp x \rp \right| \les 2L \lb \inf_{y\in C} d \lp x,y \rp \rb
\end{align}
\end{enumerate}
\end{corollary}
\begin{proof}
Note that Lemma \ref{(9.7.6)} establishes Items (i)\textemdash(iv).
\end{proof}
\subsection{Explicit ANN Approximations }
2024-02-19 22:27:30 +00:00
\begin{lemma}\label{lem:maxconv_accuracy}
2024-02-19 17:04:37 +00:00
Let $d,N \in \N$, $L \in \lb 0,\infty \rp$. Let $E \subseteq \R^d$. Let $x_1,x_2,...,x_N \in E$, let $f:E \rightarrow \R$ satisfy for all $x_1,y_1 \in E$ that $\left| f(x_1) -f(y_1)\right| \les L \left\| x_1-x_2 \right\|_1$ and let $\mathsf{MC} \in \neu$ and $y = \lp f\lp x_1 \rp, f \lp x_2 \rp,...,f\lp x_N \rp\rp$ satisfy:
\begin{align}
2024-02-19 22:27:30 +00:00
\mathsf{MC}^{N,d}_{x,y} = \mxm^N \bullet \aff_{-L\mathbb{I}_N,y} \bullet \lb \boxminus^N_{i=1} \nrm^d_1 \bullet \aff_{\mathbb{I}_d,-x_i} \rb \bullet \cpy_{N,d}
2024-02-19 17:04:37 +00:00
\end{align}
It is then the case that:
\begin{align}\label{(9.7.42)}
2024-02-19 22:27:30 +00:00
\sup_{x\in E} \left| \lp \real_{\rect}\lp \mathsf{MC}^{N,d}_{x,y} \rp \rp \lp x \rp -f\lp x \rp \right| \les 2L \lb \sup _{x\in E} \lp \min_{i\in \{1,2,...,N\}} \left\| x-x_i\right\|_1\rp\rb
2024-02-19 17:04:37 +00:00
\end{align}
\end{lemma}
\begin{proof}
Throughout this proof let $F: \R^d \rightarrow \R$ satisfy that:
\begin{align}\label{9.7.43}
F\lp x \rp = \max_{i \in \{1,2,...,N\}} \lp f\lp x_i \rp- L \left\| x-x_i \right\|_1 \rp
\end{align}
Note then that Corollary \ref{9.7.6.1}, (\ref{9.7.43}), and the assumption that for all $x,y \in E$ it holds that $\left| f(x) - f(y)\right| \les L \left\|x-y \right\|_1$ assures that:
\begin{align}\label{(9.7.44)}
\sup_{x\in E} \left| F(x) - f(x) \right| \les 2L \lb \sup_{x\in E} \lp \min_{i \in \{1,2,...,N\}} \left\| x-x_i\right\|_1\rp\rb
\end{align}
Then Lemma \ref{(9.7.5)} tells us that for all $x\in E$ it holds that $F(x) = \lp \real_{\rect} \lp \mathsf{MC} \rp \rp \lp x \rp$. This combined with (\ref{(9.7.44)}) establishes (\ref{(9.7.42)}).
\end{proof}
\begin{lemma}
2024-02-19 22:27:30 +00:00
Let $d,N \in \N$, $L \in \lb 0,\infty \rp$. Let $\lb a,b\rb \subsetneq \R^d$. Let $x_1,x_2,...,x_N \in \lb a,b\rb$, let $f:\lb a,b\rb \rightarrow \R$ satisfy for all $x_1,x_2 \in \lb a,b\rb$ that $\left| f(x_1) -f(x_2)\right| \les L \left| x_1-x_2 \right|$ and let $\mathsf{MC}^{N,1}_{x,y} \in \neu$ and $y = f\lp \lb x \rb_*\rp$ satisfy:
2024-02-19 17:04:37 +00:00
\begin{align}
2024-02-19 22:27:30 +00:00
\mathsf{MC}^{N,1}_{x,y} = \mxm^N \bullet \aff_{-L\mathbb{I}_N,y} \bullet \lb \boxminus^N_{i=1} \nrm^1_1 \bullet \aff_{1,-x_i} \rb \bullet \cpy_{N,1}
2024-02-19 17:04:37 +00:00
\end{align}
2024-02-19 22:27:30 +00:00
It is then the case that for approximant $\mathsf{MC}^{N,1}_{x,y}$ that:
\begin{enumerate}[label = (\roman*)]
\item $\inn \lp \mathsf{MC}^{N,1}_{x,y} \rp = 1$
\item $\out\lp \mathsf{MC}^{N,1}_{x,y} \rp = 1$
\item $\hid \lp \mathsf{MC}^{N,1}_{x,y} \rp = \left\lceil \log_2 \lp N \rp \right\rceil +1$
\item $\wid_1 \lp \mathsf{MC}^{N,1}_{x,y} \rp = 2N$
\item for all $i \in \{ 2,3,...\}$ we have $\wid_1 \lp \mathsf{MC}^{N,1}_{x,y} \rp \les 3 \left\lceil \frac{N}{2^{i-1}} \right\rceil$
\item it holds for all $x \in \R^d$ that $\lp \real_{\rect} \lp \mathsf{MC}^{N,1}_{x,y} \rp \rp \lp x \rp = \max_{i \in \{1,2,...,N\}} \lp y_i - L \left| x-x_i \right|\rp$
\item it holds that $\param \lp \mathsf{MC}^{N,1}_{x,y} \rp \les 6 + 7N^2 + 3\left\lceil \frac{N}{2}\right\rceil \cdot 2N$
\item $\sup_{x\in \lb a,b\rb} \left| F(x) - f(x) \right| \les 2L \frac{|a-b|}{N}$
\end{enumerate}
2024-02-19 17:04:37 +00:00
\end{lemma}
\begin{proof}
2024-02-19 22:27:30 +00:00
Items (i)\textemdash(vii) is an assertion of Lemma \ref{lem:mc_prop}. Item (viii) is an assertion of Lemma \ref{lem:maxconv_accuracy} with $d \curvearrowleft 1$.
2024-02-19 17:04:37 +00:00
\end{proof}