dissertation_work/Dissertation/ann_product.tex

1641 lines
110 KiB
TeX
Raw Normal View History

2024-02-19 17:04:37 +00:00
\chapter{ANN Product Approximations}
\section{Approximation for Products of Two Real Numbers}
We will build up the tools necessary to approximate $e^x$ via neural networks in the framework described in the previous sections. While much of the foundation comes from, e.g., \cite{grohs2019spacetime} way, we will, along the way, encounter neural networks not seen in the literature, such as the $\tay$, $\pwr$, $\tun$, and finally a neural network approximant for $e^x$. For each of these neural networks, we will be concerned with at least the following:
\begin{enumerate}[label = (\roman*)]
\item whether their instantiations using the ReLU function (often just continuous functions) are continuous.
\item whether their depths are bounded, at most polynomially, on the type of accuracy we want, $\ve$.
\item whether their parameter estimates are bounded at most polynomially on the type of accuracy we want, $\ve$.
\item The accuracy of our neural networks.
\end{enumerate}
\subsection{The squares of real numbers in $\lb 0,1 \rb$}
\begin{definition}[The $\mathfrak{i}_d$ Network]\label{def:mathfrak_i}
For all $d \in \N$ we will define the following set of neural networks as ``activation neural networks'' denoted $\mathfrak{i}_d$ as:
\begin{align}
\mathfrak{i}_d = \lp \lp \mathbb{I}_d, \mymathbb{0}_d\rp, \lp \mathbb{I}_d, \mymathbb{0}_d\rp \rp
\end{align}
\end{definition}
\begin{lemma}\label{lem:mathfrak_i}
Let $d \in \N$. It is then the case that:
\begin{enumerate}[label = (\roman*)]
\item $\real_{\rect} \lp \mathfrak{i}_4\rp \in C \lp \R^d, \R^d\rp$.
\item $\lay \lp \mathfrak{i}_d\rp = \lp d,d,d\rp$
\item $\param \lp \mathfrak{i}_4\rp = 2d^2+2d$
\end{enumerate}
\end{lemma}
\begin{proof}
Item (i) is straightforward from the fact that for all $d \in \N$ it is the case that $\real_{\rect} \lp \mathfrak{i}_d\rp = \mathbb{I}_d\lp \real_{\rect} \lp \lb \mathbb{I}_d\rb_*\rp + \mymathbb{0}_d\rp + \mymathbb{0}_d$. Item (ii) is straightforward from the fact that $\mathbb{I}_d \in \R^{d \times d}$. We realize Item (iii) by observation.
\end{proof}
\begin{lemma}\label{lem:6.1.1}\label{lem:phi_k}
Let $\lp c_k \rp _{k \in \N} \subseteq \R$, $\lp A_k \rp _{k \in \N} \in \R^{4 \times 4},$ $\mathbb{B}\in \R^{4 \times 1}$, $\lp C_k \rp _{k\in \N}$ satisfy for all $k \in \N$ that:
\begin{align}\label{(6.0.1)}
A_k = \begin{bmatrix}
2 & -4 &2 & 0 \\
2 & -4 & 2 & 0\\
2 & -4 & 2 & 0\\
-c_k & 2c_k & -c_k & 1
\end{bmatrix} \quad B=\begin{bmatrix}
0 \\ -\frac{1}{2} \\ -1 \\ 0
\end{bmatrix} \quad C_k = \begin{bmatrix}
-c_k & 2c_k &-c_k & 1
\end{bmatrix}
\end{align}
and that:
\begin{align}
c_k = 2^{1-2k}
\end{align}
Let $\Phi_k \in \neu$, $k\in \N$ satisfy for all $k \in [2,\infty) \cap \N$ that $\Phi_1 = \lp \aff_{C_1,0} \bullet \mathfrak{i}_4 \rp \bullet \aff_{\mymathbb{e}_4,B}$, that for all $d \in \N$, $\mathfrak{i}_d = \lp \lp \mathbb{I}_d, \mymathbb{0}_d \rp, \lp \mathbb{I}_d, \mymathbb{0}_d \rp \rp$ and that:
\begin{align}
\Phi_k =\lp \aff_{C_k,0}\bullet \mathfrak{i}_4 \rp \bullet \lp \aff_{A_{k-1},B} \bullet \mathfrak{i}_4\rp \bullet \cdots \bullet \lp \aff_{A_1,B} \bullet \mathfrak{i}_4 \rp \bullet \aff_{\mymathbb{e}_4,B}
\end{align}
It is then the case that:
\begin{enumerate}[label = (\roman*)]
\item for all $k \in \N$, $x \in \R$ we have $\real_{\rect}\lp \Phi_k\rp\lp x \rp \in C \lp \R, \R \rp $
\item for all $k \in \N$ we have $\lay \lp \Phi_k \rp = \lp 1,4,4,...,4,1 \rp \in \N^{k+2}$
\item for all $k \in \N$, $x \in \R \setminus \lb 0,1 \rb $ that $\lp \real_{\rect} \lp \Phi_k \rp \rp \lp x \rp = \rect \lp x \rp$
\item for all $k \in \N$, $x \in \lb 0,1 \rb$, we have $\left| x^2 - \lp \real_{\rect} \lp \xi_k \rp \rp \lp x \rp \right| \les 2^{-2k-2}$, and
\item for al $k \in \N$ , we have that $\param \lp \Phi_k \rp = 20k-7$
\end{enumerate}
\end{lemma}
\begin{proof}
Let $g_k: \R \rightarrow \lb 0,1 \rb$, $k \in \N$ be the functions defined as such, satisfying for all $k \in \N$, $x \in \R$ that:
\begin{align}\label{(6.0.3)}
g_1 \lp x \rp &= \begin{cases}
2x & : x \in \lb 0,\frac{1}{2} \rp \\
2-2x &: x\in \lb \frac{1}{2},1\rb \\
0 &: x \in \R \setminus \lb 0,1 \rb
\end{cases} \\
g_{k+1} &= g_1(g_{k}) \nonumber
\end{align}
and let $f_k: \lb 0,1 \rb \rightarrow \lb 0,1 \rb$, $k \in \N_0$ be the functions satisfying for all $k \in \N_0$, $n \in \{0,1,...,2^k-1\}$, $x \in \lb \frac{n}{2^k}, \frac{n+1}{2^k} \rp$ that $f_k(1)=1$ and:
\begin{align}\label{(6.0.4.2)}
f_k(x) = \lb \frac{2n+1}{2^k} \rb x-\frac{n^2+n}{2^{2k}}
\end{align}
and let $r_k = \lp r_{k,1},r_{k,2},r_{k,3},r_{k,4} \rp: \R \rightarrow \R^4$, $k \in \N$ be the functions which which satisfy for all $x \in \R$, $k \in \N$ that:
\begin{align}\label{(6.0.5)}
r_1\lp x \rp &= \begin{bmatrix}
r_{1,1}(x) \\ r_{2,1}(x) \\ r_{3,1}(x) \\ r_{4,1}(x)
\end{bmatrix}= \rect \lp \begin{bmatrix}
x \\ x-\frac{1}{2} \\ x-1 \\ x
\end{bmatrix} \rp \\
r_{k+1} &= A_{k+1}r_k(x) \nonumber
\end{align}
Note that since it is the case that for all $x \in \R$ that $\rect(x) = \max\{x,0\}$, (\ref{(6.0.3)}) and (\ref{(6.0.5)}) shows that it holds for all $x \in \R$ that:
\begin{align}\label{6.0.6}
2r_{1,1}(x) -4r_{2,1}(x) + 2r_{3,1}(x) &= 2 \rect(x) -4\rect \lp x-\frac{1}{2}\rp+2\rect\lp x-1\rp \nonumber \\
&= 2\max\{x,0\} -4\max\left\{x-\frac{1}{2} ,0\right\}+2\max\{x-1,0\} \nonumber \\
&=g_1(x)
\end{align}
Note also that combined with (\ref{(6.0.4.2)}), the fact that for all $x\in [0,1]$ it holds that $f_0(x) = x = \max\{x,0\}$ tells us that for all $x \in \R$:
\begin{align}\label{6.0.7}
r_{4,1}(x) = \max \{x,0\} = \begin{cases}
f_0(x) & :x\in [0,1] \\
\max\{x,0\}& :x \in \R \setminus \lb 0,1\rb
\end{cases}
\end{align}
We next claim that for all $k \in \N$, it is the case that:
\begin{align}\label{6.0.8}
\lp \forall x \in \R : 2r_{1,k}(x)-4r_{2,k}(x) + 2r_{3,k}(x) =g(x) \rp
\end{align}
and that:
\begin{align}\label{6.0.9}
\lp \forall x \in \R: r_{4,k} (x) = \begin{cases}
f_{k-1}(x) & :x \in \lb 0,1 \rb \\
\max\{x,0\} & : x \in \R \setminus \lb 0,1\rb
\end{cases} \rp
\end{align}
We prove (\ref{6.0.8}) and (\ref{6.0.9}) by induction. The base base of $k=1$ is proved by (\ref{6.0.6}) and (\ref{6.0.7}). For the induction step $\N \ni k \rightarrow k+1$ assume there does exist a $k \in \N$ such that for all $x \in \R$ it is the case that:
\begin{align}
2r_{1,k}(x) - 4r_{2,k}(x) + 2r_{3,k}(x) = g_k(x)
\end{align}
and:
\begin{align}\label{6.0.11}
r_{4,k}(x) = \begin{cases}
f_{k-1}(x) & : x \in [0,1] \\
\max\{x,0\} &: x \in \R \setminus \lb 0,1 \rb
\end{cases}
\end{align}
Note that then (\ref{(6.0.3)}),(\ref{(6.0.5)}), and (\ref{6.0.6}) then tells us that for all $x \in \R$ it is the case that:
\begin{align}\label{6.0.12}
g_{k+1}\lp x \rp &= g_1(g_k(x)) = g_1(2r_{1,k}(x)+4r_{2,k}(x) + 2r_{3,k}(x)) \nonumber \\
&= 2\rect \lp 2r_{1,k}(x)) + 4r_{2,k} +2r_{3,k}(x) \rp \nonumber \\
&-4\rect \lp 2r_{1,k}\lp x \rp -4r_{2,k}+2r_{3,k}(x) - \frac{1}{2} \rp \nonumber \\
&+ 2\rect \lp 2r_{1,k} (x) - 4r_{2,k}(x) + 2r_{3,k}(x)-1 \rp \nonumber \\
&=2r_{1,k+1}(x) -4r_{2,k+1}(x) + 2r_{3,k+1}(x)
\end{align}
In addition note that (\ref{(6.0.4.2)}), (\ref{(6.0.5)}), and (\ref{6.0.7}) tells us that for all $x \in \R$:
%TODO: Ask about the extra powers of 2 and b_k
\begin{align}\label{6.0.13}
r_{4,k+1}(x) &= \rect \lp \lp -2 \rp ^{3-2 \lp k+1 \rp }r_{1,k} \lp x \rp + 2^{4-2 \lp k+1 \rp}r_{2,k} \lp x \rp + \lp -2 \rp^{3-2\lp k+1\rp }r_{3,k} \lp x \rp + r_{4,k} \lp x\rp \rp \nonumber \\
&= \rect \lp \lp -2 \rp ^{1-2k}r_{1,k} \lp x \rp + 2^{2-2k}r_{k,2}\lp x \rp + \lp -2 \rp ^{1-2k}r_{3,k} \lp x \rp + r_{4,k}\lp x \rp \rp \nonumber \\
&=\rect \lp 2^{-2k} \lb -2r_{1,k}\lp x \rp + 2^2r_{2,k} \lp x \rp -2r_{3,k} \lp x \rp \rb +r_{4,k}\lp x \rp \rp \nonumber \\
&= \rect \lp - \lb 2^{-2k} \rb \lb 2r_{1,k}\lp x \rp -4r_{2,k} \lp x \rp +2r_{3,k}\lp x \rp \rb +r_{4,k}\lp x \rp \rp \nonumber \\
&= \rect\lp -\lb 2^{-2k} \rb g_k \lp x \rp +r_{4,k}\lp x \rp \rp
\end{align}
This and the fact that for all $x\in \R$ it is the case that $\rect \lp x \rp = \max\{x,0\}$, that for all $x\in \lb 0 ,1 \rb$ it is the case that $f_k \lp x \rp \ges 0$, (\ref{6.0.11}), shows that for all $x \in \lb 0,1 \rb$ it holds that:
\begin{align}\label{6.0.14}
r_{4,k+1}\lp x \rp &= \rect \lp -2 \lb 2^{-2k} g_k \rb + f_{k-1}\lp x \rp \rp = \rect \lp -2 \lp 2^{-2k}g_k \lp x \rp \rp +x-\lb \sum^{k-1}_{j=1} \lp 2^{-2j}g_j \lp x \rp \rp \rb \rp \nonumber \\
&= \rect \lp x - \lb \sum^k_{j=1}2^{-2j}g_j \lp x \rp \rb \rp = \rect \lp f_k \lp x \rp \rp =f_k \lp x \rp
\end{align}
Note next that (\ref{6.0.11}) and (\ref{6.0.13}) then tells us that for all $x\in \R \setminus \lb 0,1\rb$:
\begin{align}
r_{4,k+1}\lp x \rp = \max \left\{ -\lp 2^{-2k}g_x \lp x \rp \rp + r_{4,k}\lp x \rp \right\} = \max\{\max\{x,0\},0\} = \max\{x,0\}
\end{align}
Combining (\ref{6.0.12}) and (\ref{6.0.14}) proves (\ref{6.0.8}) and (\ref{6.0.9}). Note that then (\ref{(6.0.1)}) and (\ref{6.0.8}) assure that for all $k\in \N$, $x\in \R$ it holds that $\real_{\rect} \lp \Phi_k \rp \in C \lp \R,\R \rp$ and that:
\begin{align}\label{(6.0.17)}
&\lp \real_{\rect} \lp \Phi_k \rp \rp \lp x \rp \nonumber \\
&= \lp \real_{\rect} \lp \lp \aff_{C_k,0} \bullet \mathfrak{i}_4 \rp \bullet \lp \aff_{A_{k-1},B} \bullet \mathfrak{i}_4 \rp \bullet \cdots \bullet\lp \aff_{A_1,B} \bullet \mathfrak{i}_4 \rp \bullet \aff_{\mymathbb{e}_4,B} \rp \rp \lp x \rp \nonumber \\
&= \lp -2\rp^{1-2k}r_{1,k}\lp x \rp + 2^{2-2k} r_{2,k} \lp x \rp + \lp -2 \rp ^{1-2k} r_{3,k} \lp x \rp + r_{4,k} \lp x \rp \nonumber \\
&=\lp -2 \rp ^{2-2k} \lp \lb \frac{r_{1,k}\lp x \rp +r_{3,k} \lp x \rp }{-2} \rb + r_{2,k}\lp x \rp \rp +r_{4,k}\lp x \rp \nonumber \\
&=2^{2-2k} \lp \lb \frac{r_{1,k}\lp x \rp+r_{3,k} \lp x \rp }{-2} \rb + r_{2,k} \lp x \rp \rp +r_{4,k} \lp x \rp \nonumber \\
&=2^{-2k}\lp 4r_{2,k} \lp x \rp -2r_{1,k}\lp x \rp -2r_{3,k} \lp x \rp \rp +r_{4,k} \lp x \rp \nonumber \\
&=-\lb 2^{-2k} \rb \lb 2r_{1,k} \lp x \rp -4r_{2,k} \lp x \rp +2r_{3,k} \lp x \rp \rb +r_{4,k} \lp x \rp = -\lb 2^{-2k} \rb g_k \lp x \rp + r_{4,k} \lp x \rp
\end{align}
This and (\ref{6.0.9}) tell us that:
\begin{align}
\lp \real_{\rect} \lp \Phi_k \rp \rp \lp x \rp = - \lp 2^{-2k}g_k \lp x \rp \rp +f_{k-1}\lp x \rp &= -\lp 2^{-2k}g_k \lp x \rp \rp +x-\lb \sum^{k-1}_{j=1} 2^{-2j}g_j \lp x \rp \rb \nonumber \\
&=x-\lb \sum^k_{j=1}2^{-2j}g_j \lp x \rp \rb =f_k\lp x\rp \nonumber
\end{align}
Which then implies for all $k\in \N$, $x \in \lb 0,1\rb$ that it holds that:
\begin{align}
\left\| x^2-\lp \real_{\rect} \lp \Phi_k \rp \rp \lp x \rp \right\| \les 2^{-2k-2}
\end{align}
This, in turn, establishes Item (i).
Finally observe that (\ref{(6.0.17)}) then tells us that for all $k\in \N$, $x \in \R \setminus \lb 0,1\rb$ it holds that:
\begin{align}
\lp \real_{\rect} \lp \Phi_k \rp \rp \lp x \rp = -2^{-2k}g_k \lp x \rp +r_{4,k} \lp x \rp =r_{4,k} \lp x \rp = \max\{x,0\} = \rect(x)
\end{align}
This establishes Item(iv). Note next that Item(iii) ensures for all $k\in \N$ that $\dep\lp \xi_k \rp = k+1$, and:
\begin{align}
\param \lp \Phi_k \rp = 4(1+1) + \lb \sum^k_{j=2} 4 \lp 4+1\rp \rb + \lp 4+1 \rp =8+20\lp k-1\rp+5 = 20k-7
\end{align}
This, in turn, proves Item(vi). The proof of the lemma is thus complete.
\end{proof}
\begin{remark}
For an \texttt{R} implementation see Listing \ref{Phi_k}
\end{remark}
\begin{figure}[h]
\includegraphics[width = \linewidth]{/Users/shakilrafi/R-simulations/Phi_k_properties/diff.png}
\caption{Plot of $\log_{10}$ of the $L^1$ difference between $\Phi_k$ and $x^2$ over $\lb 0,1\rb$ for different values of $k$}
\end{figure}
\begin{corollary}\label{6.1.1.1}\label{cor:phi_network}
Let $\ve \in \lp 0,\infty\rp$, $M= \min \{ \frac{1}{2}\log_2 \lp \ve^{-1} \rp -1,\infty\}\cap \N$, $\lp c_k\rp_{k \in \N} \subseteq \R$, $\lp A_k\rp_{k\in\N} \subseteq \R^{4 \times 4}$, $B \in \R^{4\times 1}$, $\lp C_k\rp_{k\in \N}$ satisfy for all $k \in \N$ that:
\begin{align}
A_k = \begin{bmatrix}
2&-4&2&0 \\
2&-4&2&0\\
2&-4&2&0\\
-c_k&2c_k & -c_k&1
\end{bmatrix}, \quad B = \begin{bmatrix}
0\\ -\frac{1}{2}\quad \\ -1 \\ 0
\end{bmatrix}\quad C_k = \begin{bmatrix}
-c_k &2c)_k&-c_k&1
\end{bmatrix}
\end{align}
where:
\begin{align}
c_k = 2^{1-2k}
\end{align}
and let $\Phi \in \neu$ be defined as:
\begin{align}
\Phi = \begin{cases}\label{def:Phi}
\lb \aff_{C_1,0}\bullet \mathfrak{i}_4\rb \bullet \aff_{\mymathbb{e}_4,B} & M=1 \\
\lb \aff_{C_M,0} \bullet \mathfrak{i}_4\rb\bullet \lb \aff_{A_{M-1},0} \bullet \mathfrak{i}_4 \rb \bullet \cdots \bullet \lb \aff_{A_1,B}\bullet \mathfrak{i}_4\rb \bullet \aff_{\mymathbb{e}_4,B} & M \in \lb 2,\infty \rp \cap \N
\end{cases}
\end{align}
it is then the case that:
\begin{enumerate}[label = (\roman*)]
\item $\real_{\rect} \lp \Phi\rp \in C \lp \R,\R\rp$
\item $\lay \lp \Phi\rp = \lp 1,4,4,...,4,1\rp \in \N^{M+2} $
\item it holds for all $x \in \R \setminus\lb 0,1 \rb$ that $\lp \real_{\rect} \lp \Phi\rp\rp \lp x \rp = \rect(x)$
\item it holds for all $x \in \lb 0,1 \rb$ that $\left| x^2 - \lp \real_{\rect} \lp \Phi \rp \rp\lp x \rp \right| \les 2^{-2M-2} \les \ve$
\item $\dep \lp \Phi \rp \les M+1 \les \max\{ \frac{1}{2}\log_2 \lp \ve^{-1}\rp+1,2\}$, and
\item $\param \lp \Phi\rp = 20M-7 \les \max\left\{ 10\log_2 \lp \ve^{-1}\rp-7,13\right\}$
\end{enumerate}
\end{corollary}
\begin{proof}
Items (i)--(iii) are direct consequences of Lemma \ref{lem:6.1.1}, Items (i)--(iii). Note next the fact that $M = \min \left\{\N \cap \lb \frac{1}{2} \log_2 \lp \ve^{-1}\rp-1\rb,\infty\right\}$ ensures that:
\begin{align}
M = \min \left\{ \N \cap \lb \frac{1}{2}\log_2\lp \ve^{-1}\rp-1\rb, \infty\right\} \ges \min \left\{ \lb\max \left\{ 1,\frac{1}{2}\log_2 \lp\ve^{-1} \rp-1\right\},\infty \rb\right\} \ges \frac{1}{2}\log_2 \lp \ve^{-1}\rp-1
\end{align}
This and Item (v) of Lemma \ref{lem:6.1.1} demonstrate that for all $x\in \lb 0,1\rb$ it then holds that:
\begin{align}
\left| x^2 - \lp \real_{\rect}\lp \Phi\rp\rp \lp x\rp \right| \les 2^{-2M-2} = 2^{-2(M+1)} \les 2^{-\log_2\lp\ve^{-1} \rp} = \ve
\end{align}
Thus establishing Item (iv). The fact that $M = \min \left\{ \N \cap \lb \frac{1}{2}\log_2 \lp \ve^{-1}\rp -1,\infty\rb\right\}$ and Item (ii) of Lemma \ref{lem:6.1.1} tell us that:
\begin{align}
\dep \lp \Phi \rp = M+1 \les \max \left\{ \frac{1}{2} \log_2 \lp \ve^{-1}\rp+1,2\right\}
\end{align}
Which establishes Item(v). This and Item (v) of Lemma \ref{lem:6.1.1} then tell us that:
\begin{align}
\param \lp \Phi_M\rp \les 20M-7 \les 20 \max\left\{ \frac{1}{2}\log_2\lp\ve^{-1}\rp,2\right\}-7 = \max\left\{ 10\log_2 \lp\ve^{-1} \rp-7,13\right\}
\end{align}
This completes the proof of the corollary.
\end{proof}
\begin{remark}
For an implementation in \texttt{R}, see Listing \ref{Phi}
\end{remark}
\begin{figure}[h]
\centering
\includegraphics[width = \linewidth]{/Users/shakilrafi/R-simulations/Phi_properties/Phi_diff_contour.png}
\caption{Contour plot of the $L^1$ difference between $\Phi$ and $x^2$ over $\lb 0,1 \rb$ for different values of $\ve$.}
\end{figure}
\begin{remark}
Note that (\ref{def:Phi}) implies that $\dep \lp \Phi \rp \ges 4$.
\end{remark}
Now that we have neural networks that perform the squaring operation inside $\lb -1,1\rb$, we may extend to all of $\R$. Note that this neural network representation differs somewhat from the ones in \cite{grohs2019spacetime}.
\subsection{The $\sqr$ network}
\begin{lemma}\label{6.0.3}\label{lem:sqr_network}
Let $\delta,\epsilon \in (0,\infty)$, $\alpha \in (0,\infty)$, $q\in (2,\infty)$, $ \Phi \in \neu$ satisfy that $\delta = 2^{\frac{-2}{q-2}}\ve ^{\frac{q}{q-2}}$, $\alpha = \lp \frac{\ve}{2}\rp^{\frac{1}{q-2}}$, $\real{\rect}\lp\Phi\rp \in C\lp \R,\R\rp$, $\dep(\Phi) \les \max \left\{\frac{1}{2} \log_2(\delta^{-1})+1,2\right\}$, $\param(\Phi) \les \max\left\{10\log_2\lp \delta^{-1}\rp-7,13\right\}$, $\sup_{x \in \R \setminus [0,1]} | \lp \real_{\rect} \lp \Phi \rp -\rect(x) \right| =0$, and $\sup_{x\in \lb 0,1\rb} |x^2-\lp \real_{\rect} \lp \Phi \rp \rp \lp x\rp | \les \delta$, let $\Psi \in \neu$ be the neural network given by:
\begin{align}
\Psi = \lp \aff_{\alpha^{-2},0} \bullet \Phi \bullet \aff_{\alpha,0} \rp \bigoplus\lp \aff_{\alpha^{-2},0} \bullet \Phi \bullet \aff_{-\alpha,0}\rp
\end{align}
\begin{enumerate}[label = (\roman*)]
\item it holds that $\real_{\rect} \lp \Psi \rp \in C \lp \R,\R \rp$.
\item it holds that $\lp \real_{\rect} \lp \Psi \rp \rp \lp 0\rp=0$
\item it holds for all $x\in \R$ that $0\les \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp \les \ve + |x|^2$
\item it holds for all $x \in \R$ that $|x^2-\lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp |\les \ve \max\{1,|x|^q\}$
\item it holds that $\dep (\Psi)\les \max\left\{1+\frac{1}{q-2}+\frac{q}{2(q-2)}\log_2 \lp \ve^{-1} \rp,2\right\}$, and
\item it holds that $\param\lp \Psi \rp \les \max\left\{ \lb \frac{40q}{q-2} \rb \log_2 \lp \ve^{-1} \rp +\frac{80}{q-2}-28,52 \right\}$
\end{enumerate}
\end{lemma}
\begin{proof}
Note that for all $x\in \R$ it is the case that:
\begin{align}\label{6.0.21}
\lp \real_{\rect}\lp \Psi \rp \rp\lp x \rp &= \lp \real_{\rect} \lp \lp \aff_{\alpha^{-2}}\bullet \Phi \bullet \aff_{\alpha,0}\rp \oplus\lp \aff_{\alpha^{-2},0} \bullet \Phi \bullet \aff_{-\alpha,0} \rp \rp \rp \lp x \rp \nonumber\\
&= \lp \real_{\rect}\lp \aff_{\alpha^{-2},0} \bullet \Phi \bullet \aff_{\alpha,0} \rp \rp \lp x\rp + \lp \real_{\rect}\lp \aff_{\alpha^{-2},0} \bullet \Phi \bullet \aff_{-\alpha,0}\rp \rp \lp x\rp \nonumber \\
&= \frac{1}{\alpha^2}\lp \real_{\rect}\lp \Phi \rp \rp \lp \alpha x\rp + \frac{1}{\alpha^2}\lp \real_{\rect} \lp \Phi \rp \rp \lp -\alpha x\rp \nonumber\\
&= \frac{1}{\lp \frac{\ve}{2}\rp^{\frac{2}{q-2}}}\lb \lp \real_{\rect}\lp \Phi \rp \rp \lp \lp \frac{\ve}{2}\rp ^{\frac{1}{q-2}}x \rp + \lp \real_{\rect}\lp \Phi \rp \rp \lp -\lp \frac{\ve}{2}\rp^{\frac{1}{q-2}}x\rp \rb
\end{align}
This and the assumption that $\Phi \in C\lp \R, \R \rp$ along with the assumption that $\sup_{x\in \R \setminus \lb 0,1\rb } | \lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp -\rect\lp x\rp | =0$ tells us that for all $x\in \R$ it holds that:
\begin{align}
\lp \real_{\rect}\lp \Psi \rp \rp \lp 0 \rp &= \lp \frac{\ve}{2}\rp^{\frac{-2}{q-2}}\lb \lp \real_{\rect}\lp \Phi \rp \rp \lp 0 \rp +\lp \real_{\rect} \lp \Phi\rp \rp \lp 0 \rp \rb \nonumber \\
&=\lp \frac{\ve}{2}\rp ^{\frac{-2}{q-2}} \lb \rect (0)+\rect(0) \rb \nonumber \\
&=0
\end{align}
This, in turn, establishes Item (i)--(ii). Observe next that from the assumption that $\real_{\rect} \lp \Phi \rp \in C\lp \R,\R \rp$ and the assumption that $\sup_{x\in \R \setminus \lb 0,1\rb} | \lp \real_{\rect}\lp \Phi \rp \rp \lp x \rp -\rect(x) |=0$ ensure that for all $x\in \R \setminus \lb -1,1 \rb$ it holds that:
\begin{align}\label{6.0.23}
\lb \real_{\rect}\lp \Phi \rp \rb \lp x\rp + \lb \real_{\rect}\lp \Phi \rp \lp -x \rp\rb = \rect\lp x\rp +\rect(-x) &= \max\{x,0\}+\max\{-x,0\} \nonumber\\
&=|x|
\end{align}
The assumption that for all $\sup_{x\in \R \setminus \lb 0,1\rb }|\lp \real_{\rect} \lp \Phi \rp \rp \lp x\rp -\rect\lp x\rp |=0$ and the assumption that $\sup_{x\in\lb 0,1\rb} |x^2-\lp \real_{\rect} \lp \Phi \rp \rp \lp x\rp |\les \delta$ show that:
\begin{align}\label{6.0.24}
&\sup_{x \in \lb -1,1\rb} \left|x^2 - \lp \lb \real_{\rect}\lp \Phi \rp \rb \lp x\rp +\lb \real_{\rect}\lp \Phi \rp \lp x \rp \rb \rp \right| \nonumber \\
&= \max\left\{ \sup_{x\in \lb -1,0 \rb} \left| x^2-\lp \rect(x)+ \lb \real_{\rect}\lp \Phi \rp \rb \lp -x \rp \rp \right|,\sup _{x\in \lb 0,1 \rb} \left| x^2-\lp \lb \real_{\rect} \lp \Phi \rp \rb \lp x \rp + \rect \lp -x \rp \rp \right| \right\} \nonumber\\
&= \max\left\{\sup_{x\in \lb -1,0 \rb}\left|\lp -x \rp^2 - \lp \real_{\rect}\lp \Phi \rp \rp \lp -x \rp \right|, \sup_{x\in \lb 0,1\rb} \left| x^2-\lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp \right| \right\} \nonumber \\
&=\sup_{x\in \lb 0,1 \rb}\left| x^2 - \lp \real_{\rect}\lp \Phi \rp \rp \lp x\rp \right| \les \delta
\end{align}
Next observe that (\ref{6.0.21}) and (\ref{6.0.23}) show that for all $x \in \R \setminus \lb -\lp \frac{\ve}{2} \rp^{\frac{-1}{q-2}}, \lp \frac{\ve}{2}\rp ^{\frac{-1}{q-2}} \rb$ it holds that:
\begin{align}\label{6.0.25}
0 \les \lb \real_{\rect} \lp \Psi \rp \rb \lp x \rp &= \lp \frac{\ve}{2} \rp ^{\frac{-2}{q-2}}\lp \lb \real_{\rect} \lp \Phi \rp \rb \lp \lp \frac{\ve}{2}\rp ^{\frac{1}{q-2}}x \rp + \lb \real_{\rect} \lp \Phi \rp \rb \lp -\lp \frac{\ve}{2}\rp^{\frac{1}{q-2}} x\rp \rp \nonumber \\
&= \lp \frac{\ve}{2} \rp ^{\frac{-2}{q-2}} \left| \lp \frac{\ve}{2} \rp^{\frac{1}{q-2}}x \right| = \lp \frac{\ve}{2} \rp^{\frac{-1}{q-2}|x|} \les |x|^2
\end{align}
The triangle inequality then tells us that for all $x\in \R \setminus \lb - \lp \frac{\ve}{2} \rp^{\frac{-1}{q-2}}, \lp \frac{\ve}{2} \rp^{\frac{-1}{q-2}} \rb$ it holds that:
\begin{align} \label{6.0.25}
\left| x^2- \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp \right| &= \left| x^2 - \lp \frac{\ve}{2} \rp ^{\frac{-1}{q-2}}\left|x\right| \right| \les \lp \left|x \right|^2 + \lp \frac{\ve}{2} \rp ^{\frac{-1}{q-2}} \left| x \right| \rp \nonumber\\
&= \lp \left| x \right|^q \left|x\right|^{-(q-2)} + \lp \frac{\ve}{2} \rp^{\frac{-1}{q-2}} \left| x \right|^q\left| x \right|^{-(q-1)} \rp \nonumber \\
&\les \lp \left| x \right|^q \lp \frac{\ve}{2} \rp^{\frac{q-2}{q-2}} + \lp \frac{\ve}{2} \rp ^{\frac{-1}{q-2}} \left| x \right|^q \lp \frac{\ve}{2} \rp ^{\frac{q-1}{q-2}} \rp \nonumber \\
&= \lp \frac{\ve}{2}+ \frac{\ve}{2} \rp \left| x \right|^q = \ve \left| x \right|^q \les \ve \max \left\{ 1, \left| x \right|^q \right\}
\end{align}
Note that (\ref{6.0.24}), (\ref{6.0.21}) and the fact that $\delta = 2^{\frac{-2}{q-2}}\ve^{\frac{q}{q-2}}$ then tell for all $x \in \lb -\lp \frac{\ve}{2} \rp ^{\frac{-1}{q-2}}, \lp \frac{\ve}{2} \rp ^{\frac{-1}{q-2}} \rb$ it holds that:
\begin{equation}
\begin{aligned}\label{6.0.26}
% &\left| x^2-\lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp \right| \\
% &= \lp \frac{\ve}{2} \rp^{\frac{-2}{q-2}} \left| \lp \lp \frac{\ve}{2} \rp ^{\frac{1}{q-2}}x \rp^2 - \lp \lb \real_{\rect} \lp \Phi \rp \rb \lp \lp \frac{\ve}{2} \rp ^{\frac{1}{q-2}}x \rp + \lb \real_{\rect} \lp \Phi \rp \rb \lp -y \rp \rp \right| \\
% &\les \lp \frac{\ve}{2} \rp^{\frac{-2}{q-2}} \lb \sup_{y \in \lb -1,1\rb} \left| y^2 - \left \lb \real_{\rect} \lp \Phi \rp \rb \lp y \rp + \lb \real_{\rect} \lp \Phi \rp \rb \lp -y \rp \right| \rb \\
&\left| x^2-\left( \real_{\rect} (\Phi) \right) (x) \right| \\
&= \left( \frac{\varepsilon}{2} \right)^{\frac{-2}{q-2}} \left| \left( \left( \frac{\varepsilon}{2} \right) ^{\frac{1}{q-2}}x \right)^2 - \left( \left[ \real_{\rect} (\Phi) \right] \left( \left( \frac{\varepsilon}{2} \right) ^{\frac{1}{q-2}}x \right) + \left[ \real_{\rect} (\Phi) \right] (-y) \right) \right| \\
&\les \left( \frac{\varepsilon}{2} \right)^{\frac{-2}{q-2}} \left[ \sup_{y \in \left[-1,1\right]} \left| y^2 - \left[ \real_{\rect} (\Phi) \right] (y) + \left[ \real_{\rect} (\Phi) \right] (-y) \right| \right] \\
&\les \lp \frac{\ve}{2} \rp^{\frac{-2}{q-2}} \delta = \lp \frac{\ve}{2} \rp^{\frac{-2}{q-2}} 2^{\frac{-2}{q-2}} \ve^{\frac{q}{q-2}} = \ve \les \ve \max \{ 1, \left| x \right|^q \}
\end{aligned}
\end{equation}
Now note that this and (\ref{6.0.25}) tells us that for all $x\in \R$ it is the case that:
\begin{align}
\left| x^2-\lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp \right| \les \ve \max\{1,|x|^q \}
\end{align}
This establishes Item (v). Note that, (\ref{6.0.26}) tells that for all $x \in \lb - \lp \frac{\ve}{2} \rp ^{\frac{-1}{q-2}}, \lp \frac{\ve}{2} \rp ^{\frac{1}{q-2}} \rb $ it is the case that:
\begin{align}
\left| \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp \right| \les \left| x^2 - \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp \right| + \left| x \right|^2 \les \ve + \left| x \right| ^2
\end{align}
This and (\ref{6.0.25}) tells us that for all $x\in \R$:
\begin{align}
\left| \lp \real_{\rect} \rp \lp x \rp \right| \les \ve + |x|^2
\end{align}
This establishes Item (iv).
Note next that by Corollary \ref{affcor}, Remark \ref{5.3.2}, the hypothesis, and the fact that $\delta = 2^{\frac{-2}{q-2}}\ve ^{\frac{q}{q-2}}$ tells us that:
\begin{align}
\dep \lp \Psi \rp = \dep \lp \Phi \rp &\les \max \left\{\frac{1}{2} \log_2(\delta^{-1})+1,2\right\} \nonumber \\
&= \max \left\{ \frac{1}{q-2} + \lb \frac{q}{q-2}\rb\log_2 \lp \ve \rp +1,2\right\}
\end{align}
This establishes Item (v).
Notice next that the fact that $\delta = 2^{\frac{-2}{q-2}}\ve^{\frac{q}{q-2}}$ tells us that:
\begin{align}
\log_2 \lp \delta^{-1} \rp = \log_2 \lp 2^{\frac{2}{q-2}} \ve^{\frac{-q}{q-2}}\rp = \frac{2}{q-2} + \lb \lb \frac{q}{q-2}\rb \log_2 \lp \ve^{-1}\rp \rb
\end{align}
Note that by , Corollary \ref{affcor} we have that:
\begin{align}
\param \lp \Phi \bullet \aff_{-\alpha,0} \rp &\les \lb \max\left\{ 1, \frac{\inn \lp \aff_{-\alpha,0}\rp+1}{\inn\lp \Phi\rp+1}\right\}\rb \param \lp \Phi\rp = \param \lp \Phi\rp
\end{align}
and further that:
\begin{align}
\param \lp \aff_{\alpha^{-2},0} \bullet \Phi \bullet \aff_{-\alpha,0} \rp &= \lb \max\left\{ 1, \frac{\out \lp \aff_{-\alpha^2,0}\rp}{\out\lp \Phi \bullet \aff_{-\alpha,0}\rp}\right\}\rb \param \lp \Phi \bullet \aff_{-\alpha,0}\rp \nonumber\\
&\les \param \lp \Phi\rp
\end{align}
By symmetry note also that $ \param \lp \aff_{\alpha^{-2},0} \bullet \Phi \bullet \aff_{\alpha,0}\rp = \param \lp \aff_{\alpha^{-2},0} \bullet \Phi \bullet \aff_{-\alpha,0}\rp $ and also that $ \lay \lp \aff_{\alpha^{-2},0} \bullet \Phi \bullet \aff_{\alpha,0}\rp = \lay \lp \aff_{\alpha^{-2},0} \bullet \Phi \bullet \aff_{-\alpha,0}\rp $. Thus Lemma \ref{paramsum}, Corollary \ref{cor:sameparal}, and the hypothesis tells us that:
\begin{align}\label{(6.1.42)}
\param \lp \Psi \rp &= \param \lp \Phi \boxminus \Phi \rp \nonumber \\
&\les 4\param \lp \Phi\rp \nonumber \\
&= 4\max\left\{10\log_2\lp \delta^{-1}\rp-7,13\right\}
\end{align}
This, and the fact that $\delta = 2^{\frac{-2}{q-2}}\ve ^{\frac{q}{q-2}}$ renders (\ref{(6.1.42)}) as:
\begin{align}
4\max\left\{10\log_2\lp \delta^{-1}\rp-7,13\right\} &= 4\max\left\{10\log_2\lp \delta^{-1}\rp-7,13\right\} \nonumber\\
&= 4\max \left\{ 10 \lp \frac{2}{q-2} +\frac{q}{q-2}\log_2 \lp \ve^{-1}\rp\rp-7,13\right\} \nonumber \\
&=\max \left\{ \lb \frac{40q}{q-2}\rb \log_2 \lp \ve^{-1}\rp + \frac{80}{q-2}-28,52\right\}
\end{align}
\end{proof}
\begin{remark}
We will often find it helpful to refer to this network for fixed $\ve \in \lp 0, \infty \rp$ and $q \in \lp 2,\infty\rp$ as the $\sqr^{q,\ve}$ network.
\end{remark}
\begin{remark}
For an \texttt{R} implementation see Listing \ref{Sqr}
\end{remark}
\begin{figure}[h]
\centering
\includegraphics[width = 0.45\linewidth]{/Users/shakilrafi/R-simulations/Sqr_properties/experimental_deps.png}
\includegraphics[width = 0.45\linewidth]{/Users/shakilrafi/R-simulations/Sqr_properties/dep_theoretical_upper_limits.png}
\caption{Left: $\log_{10}$ of depths for a simulation with $q \in \lb 2.1, 4 \rb $, $\ve \in \lp 0.1, 2 \rb$, and $x \in \lb -5,5 \rb$, all with $50$ mesh-points. Right: The theoretical upper limits over the same range of values}
\end{figure}
% Please add the following required packages to your document preamble:
% \usepackage{booktabs}
\begin{table}[h]
\begin{tabular}{@{}l|llllll@{}}
\toprule
& Min. & 1\textsuperscript{st} Qu. & Median & Mean & 3\textsuperscript{rd} Qu. & Max. \\ \midrule
Experimental $|x^2 - \real_{\rect}(\mathsf{Sqr}^{q,\ve})(x)$ & 0.000003 & 0.089438 & 0.337870 & 3.148933 & 4.674652 & 20.00 \\ \midrule
Theoretical $|x^2 - \real_{\rect}(\mathsf{Sqr})^{q,\ve}(x)$ & 0.010 & 1.715 & 10.402 & 48.063 & 45.538 & 1250.00 \\ \midrule
Difference & 0.001 & 1.6012 & 9.8655 & 44.9141 & 40.7102 & 1230
\end{tabular}
\caption{Theoretical upper bounds for $L^1$ error, experimental $L^1$ error and their forward difference, with $q \in \lb 2.1, 4 \rb $, $\ve \in \lp 0.1, 2 \rb$, and $x \in \lb -5,5 \rb$, all with $50$ mesh-points.}
\end{table}
\subsection{The $\prd$ network}
We are finally ready to give neural network representations of arbitrary products of real numbers. However, this representation differs somewhat from those found in the literature, especially \cite{grohs2019spacetime}, where parallelization (stacking) is used instead of neural network sums. This will help us calculate $\wid_1$ and the width of the second to last layer.
\begin{lemma}\label{prd_network}
Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$, $A_1,A_2,A_3 \in \R^{1\times 2}$, $\Psi \in \neu$ satisfy for all $x\in \R$ that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$, $A_1 = \lb 1 \quad 1 \rb$, $A_2 = \lb 1 \quad 0 \rb$, $A_3 = \lb 0 \quad 1 \rb$, $\real_{\rect} \in C\lp \R, \R \rp$, $\lp \real_{\rect} \lp \Psi \rp \rp \lp 0\rp = 0$, $0\les \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp \les \delta+|x|^2$, $|x^2-\lp \real_{\rect}\lp \Psi \rp \rp \lp x \rp |\les \delta \max \{1,|x|^q\}$, $\dep\lp \Psi \rp \les \max\{ 1+\frac{1}{q-2}+\frac{q}{2(q-2)}\log_2 \lp \delta^{-1} \rp ,2\}$, and $\param \lp \Psi \rp \les \max\left\{\lb \frac{40q}{q-2} \rb \log_2\lp \delta^{-1} \rp +\frac{80}{q-2}-28,52\right\}$, then:
\begin{enumerate}[label=(\roman*)]
\item there exists a unique $\Gamma \in \neu$ satisfying:
\begin{align}
\Gamma = \lp \frac{1}{2}\triangleright \lp \Psi \bullet \aff_{A_1,0} \rp \rp \bigoplus \lp \lp -\frac{1}{2}\rp \triangleright\lp \Psi \bullet \aff_{A_2,0} \rp \rp \bigoplus\lp \lp -\frac{1}{2}\rp \triangleright \lp \Psi \bullet \aff_{A_3,0} \rp \rp
\end{align}
\item it that $\real_{\rect} \lp \Gamma \rp \in C \lp \R^2,\R \rp$
\item it holds for all $x\in \R$ that $\lp \real_{\rect}\lp \Gamma \rp \rp \lp x,0\rp = \lp \real_{\rect}\lp \Gamma \rp \rp \lp 0,y\rp =0$
\item it holds for any $x,y \in \R$ that $\left|xy - \lp \real_{\rect} \lp \Gamma \rp \rp \lp \begin{bmatrix}
x \\
y
\end{bmatrix} \rp \right| \les \ve \max \{1,|x|^q,|y|^q \}$
\item it holds that $\param(\Gamma) \les \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb -252$
\item it holds that $\dep\lp \Gamma \rp \les \frac{q}{q-2} \lb \log_2 \lp \ve^{-1}\rp +q \rb $
\item it holds that $\wid_1 \lp \Gamma \rp=24$
\item it holds that $\wid_{\hid \lp\Gamma\rp} = 24$
\end{enumerate}
\end{lemma}
\begin{proof}
Note that:
\begin{align}
&\lp \real_{\rect} \lp \Gamma \rp \rp \lp \begin{bmatrix}
x\\y
\end{bmatrix} \rp = \real_{\rect} \lp \lp \frac{1}{2}\triangleright \lp \Psi \bullet \aff_{A_1,0} \rp \rp \bigoplus \lp \lp -\frac{1}{2}\rp \triangleright\lp \Psi \bullet \aff_{A_2,0} \rp \rp \bigoplus \right. \\
&\left. \lp \lp -\frac{1}{2}\rp \triangleright \lp \Psi \bullet \aff_{A_3,0} \rp \rp \rp \nonumber \lp \begin{bmatrix}
x \\ y
\end{bmatrix} \nonumber\rp\\
&= \real_{\rect} \lp \frac{1}{2}\triangleright \lp \Psi \bullet \aff_{A_1,0} \rp \rp \lp \begin{bmatrix}
x\\y
\end{bmatrix} \rp + \real_{\rect}\lp \lp -\frac{1}{2}\rp \triangleright\lp \Psi \bullet \aff_{A_2,0} \rp \rp \lp \begin{bmatrix}
x \\ y
\end{bmatrix} \rp \nonumber \\
&+\real_{\rect}\lp \lp -\frac{1}{2}\rp \triangleright \lp \Psi \bullet \aff_{A_3,0} \rp \rp \lp \begin{bmatrix}
x\\y
\end{bmatrix} \rp \nonumber \\
&= \frac{1}{2} \lp \real_{\rect} \lp \Psi \rp \rp \lp \begin{bmatrix}
1 && 1
\end{bmatrix} \begin{bmatrix}
x \\ y
\end{bmatrix}\rp - \frac{1}{2} \lp \real_{\rect} \lp \Psi \rp \rp \lp \begin{bmatrix}
1 && 0
\end{bmatrix} \begin{bmatrix}
x \\ y
\end{bmatrix} \rp \nonumber\\
&-\frac{1}{2} \lp \real_{\rect}\lp \Psi \rp \rp \lp \begin{bmatrix}
0 && 1
\end{bmatrix} \begin{bmatrix}
x \\y
\end{bmatrix} \rp \nonumber \\
&=\frac{1}{2} \lp \real_{\rect}\lp \Psi \rp \rp \lp x+y \rp -\frac{1}{2} \lp \real_{\rect}\lp \Psi \rp \rp \lp x \rp - \frac{1}{2} \lp \real_{\rect}\lp \Psi \rp \rp \lp y \rp \label{6.0.33}
%TODO: Revisit this estimate
\end{align}
Note that this, and the assumption that $\lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp \in C \lp \R, \R \rp$ and that $\lp \real_{\rect}\lp \Psi \rp \rp \lp 0 \rp = 0$ ensures:
\begin{align}
\lp \real_{\rect} \lp \Gamma \rp \rp \lp \begin{bmatrix}
x \\0
\end{bmatrix} \rp &= \frac{1}{2} \lp \real_{\rect} \lp \Psi \rp \rp \lp x+0 \rp -\frac{1}{2} \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp - \frac{1}{2} \lp \real_{\rect} \lp \Psi \rp \rp \lp 0 \rp \nonumber \\
&= 0 \nonumber\\
&= \frac{1}{2} \lp \real_{\rect} \lp \Psi \rp \rp \lp 0+y \rp -\frac{1}{2} \lp \real_{\rect} \lp \Psi \rp \rp \lp 0 \rp - \frac{1}{2}\lp \real_{\rect} \lp \Psi \rp \rp \lp y \rp \nonumber \\
&=\lp \real_{\rect} \lp \Gamma \rp \rp \lp \begin{bmatrix}
0 \\y
\end{bmatrix} \rp
\end{align}
Next, observe that since by assumption it is the case for all $x,y\in \R$ that $|x^2 - \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp | \les \delta \max\{1,|x|^q\}$, $xy = \frac{1}{2}|x+y|^2-\frac{1}{2}|x|^2-\frac{1}{2}|y|^2$, triangle Inequality and from (\ref{6.0.33}) we have that:
\begin{align}
&\left| \lp \real_{\rect} \lp \Gamma\rp\lp x,y \rp \rp -xy\right| \nonumber\\
&=\left|\frac{1}{2}\lb \lp \real_{\rect} \lp \Psi \rp \rp \lp x + y \rp - \left|x+y\right|^2 \rb - \frac{1}{2} \lb \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp -\left| x \right|^2\rb - \frac{1}{2} \lb \lp \real_{\rect} \lp \Psi\rp \rp \lp x \rp -\left|y\right|^2\rb \right| \nonumber \\
&\les \left|\frac{1}{2}\lb \lp \real_{\rect} \lp \Psi \rp \rp \lp x + y \rp - \left|x+y\right|^2 \rb + \frac{1}{2} \lb \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp -\left| x \right|^2\rb + \frac{1}{2} \lb \lp \real_{\rect} \lp \Psi\rp \rp \lp x \rp -\left|y\right|^2\rb \right| \nonumber \\
&\les \frac{\delta}{2} \lb \max \left\{ 1, |x+y|^q\right\} + \max\left\{ 1,|x|^q\right\} + \max \left\{1,|y|^q \right\}\rb\nonumber
\end{align}
Note also that since for all $\alpha,\beta \in \R$ and $p \in \lb 1, \infty \rp$ we have that $|\alpha + \beta|^p \les 2^{p-1}\lp |\alpha|^p + |\beta|^p \rp$ we have that:
\begin{align}
&\left| \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp - xy \right| \nonumber \\
&\les \frac{\delta}{2} \lb \max \left\{1, 2^{q-1}|x|^q+ 2^{q-1}\left| y\right|^q\right\} + \max\left\{1,\left|x\right|^q \right\} + \max \left\{1,\left| y \right|^q \right\}\rb \nonumber \\
&\les \frac{\delta}{2} \lb \max \left\{1, 2^{q-1}|x|^q \right\}+ 2^{q-1}\left| y\right|^q + \max\left\{1,\left|x\right|^q \right\} + \max \left\{1,\left| y \right|^q \right\}\rb \nonumber \\
&\les \frac{\delta}{2} \lb 2^q + 2\rb \max \left\{1, \left|x\right|^q, \left| y \right|^q \right\} = \ve \max \left\{ 1,\left| x \right|^q, \left| x \right|^q\right\} \nonumber
\end{align}
This proves Item (iv).
By symmetry it holds that $\param \lp \frac{1}{2}\triangleright \lp \Psi \bullet \aff_{A_1,0} \rp \rp = \param \lp -\frac{1}{2}\triangleright \lp \Psi \bullet \aff_{A_2,0} \rp \rp = \param \lp -\frac{1}{2}\triangleright \lp \Psi \bullet \aff_{A_3,0} \rp \rp$ and further that $\lay \lp \frac{1}{2}\triangleright \lp \Psi \bullet \aff_{A_1,0} \rp \rp = \lay \lp -\frac{1}{2}\triangleright \lp \Psi \bullet \aff_{A_2,0} \rp \rp = \lay \lp -\frac{1}{2}\triangleright\lp \Psi \bullet \aff_{A_3,0} \rp \rp$.
Note also that Corollary \ref{affcor} tells us that for all $i \in \{1,2,3\}$ and $a \in \{ \frac{1}{2},-\frac{1}{2}\}$ it is the case that:
\begin{align}
\param \lp a \triangleright \lp \Psi \bullet \aff_{A_i,0}\rp \rp = \param \lp \Psi \rp
\end{align}
This, together with Corollary \ref{corsum} indicates that:
\begin{align}\label{(6.1.49)}
\param \lp \Gamma \rp &\les 9\param\lp \Psi \rp \nonumber \\
&\les 9\max\left\{\lb \frac{40q}{q-2} \rb \log_2\lp \delta^{-1} \rp +\frac{80}{q-2}-28,52\right\}
\end{align}
Combined with the fact that $\delta =\ve \lp 2^{q-1} +1\rp^{-1}$, this is then rendered as:
\begin{align}\label{(6.1.50)}
&9\max\left\{\lb \frac{40q}{q-2} \rb \log_2\lp \delta^{-1} \rp +\frac{80}{q-2}-28,52\right\} \nonumber \\
&= 9\max \left\{ \lb \frac{40q}{q-2}\rb \lp \log_2 \lp \ve^{-1}\rp +\log_2 \lp 2^{q-1}+1\rp\rp + \frac{80}{q-2}-28,52 \right\}
\end{align}
Note that:
\begin{align}
\log_2 \lp 2^{q-1}+1\rp &= \log_2\lp 2^{q-1}+1\rp - \log_2 \lp 2^q\rp + q \nonumber\\
&=\log_2 \lp \frac{2^{q-1}+1}{2^q}\rp + q = \log_2 \lp 2^{-1}+2^{-q}\rp +q\nonumber \\
&\les \log_2 \lp 2^{-1} + 2^{-2}\rp + q = \log_2 \lp \frac{3}{4}\rp + q = \log_2 \lp 3\rp-2+q
\end{align}
Combine this with the fact that for all $q\in \lp 2,\infty\rp$ it is the case that $\frac{q(q-1)}{q-2} \ges 2$ then gives us that:
\begin{align}
\lb \frac{40q}{q-2}\rb \log_2 \lp 2^{q-1}+1\rp -28\ges \lb \frac{40q}{q-2}\rb \log_2 \lp 2^{q-1}\rp -28= \frac{40q(q-1)}{q-2}-28 \ges 52
\end{align}
This then finally renders (\ref{(6.1.50)}) as:
\begin{align}
&9\max \left\{ \lb \frac{40q}{q-2}\rb \lp \log_2 \lp \ve^{-1}\rp +\log_2 \lp 2^{q-1}+1\rp\rp + \frac{80}{q-2}-28,52 \right\} \nonumber \\
&\les 9 \lb \lb \frac{40q}{q-2}\rb \lp \log_2\lp \ve^{-1}\rp + \log_2\lp 3\rp-2+q\rp +\frac{80}{q-2}-28\rb \nonumber\\
&= 9 \lb \lb \frac{40q}{q-2}\rb \lp \log_2\lp \ve^{-1}\rp + \log_2\lp 3\rp-2+\frac{2}{q}\rp-28\rb \nonumber\\
&\les 9 \lb \lb \frac{40q}{q-2}\rb \lp \log_2\lp \ve^{-1}\rp + \log_2\lp 3\rp-1\rp -28\rb \nonumber\\
&= \frac{360q}{q-2}\lb \log_2 \lp \ve^{-1} \rp +q+\log_2\lp 3\rp-1\rb -252
\end{align}
Note that Lemma \ref{depth_prop}, Lemma \ref{5.3.3}, the hypothesis, and the fact that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$ tell us that:
\begin{align}
\dep \lp \Gamma \rp = \dep\lp \Psi \rp &\les \max\left\{ 1+\frac{1}{q-2}+\frac{q}{2(q-2)}\log_2 \lp \delta^{-1} \rp ,2\right\} \nonumber\\
&= \max \left\{1+\frac{1}{q-2} +\frac{q}{2(q-2)}\lb \log_2\lp \ve^{-1}\rp + \log_2 \lp 2^{q-1}+1\rp\rb,2 \right\} \nonumber\\
&= \max \left\{ 1+\frac{1}{q-2}+\frac{q}{2(q-2)} \lp \log_2\lp \ve^{-1}\rp +q-1\rp,2\right\}
\end{align}
Since it is the case that $\frac{q(q-1)}{2(q-2)} > 2$ for $q \in \lp 2, \infty \rp$ we have that:
\begin{align}
& \max \left\{ 1+\frac{1}{q-2}+\frac{q}{2(q-2)} \lp \log_2\lp \ve^{-1}\rp +q-1\rp,2\right\} \nonumber \\
&= 1+\frac{1}{q-2}+\frac{q}{2(q-2)} \lp \log_2\lp \ve^{-1}\rp +q-1\rp \nonumber \\
&\les \frac{q-1}{q-2} +\frac{q}{2\lp q-2\rp} \lp \log_2 \lp \ve^{-1}\rp+q\rp \nonumber \\
&
\end{align}
Observe next that for $q\in \lp 0,\infty\rp$, $\ve \in \lp 0,\infty \rp$, $\Gamma$ consists of, among other things, three stacked $\lp \Psi \bullet \aff_{A_i,0}\rp$ networks where $i \in \{1,2,3\}$. Corollary \ref{affcor} tells us therefore, that $\wid_1\lp \Gamma\rp = 3\cdot \wid_1 \lp \Psi \rp$. On the other hand, note that each $\Psi$ networks consist of, among other things, two stacked $\Phi$ networks, which by Corollary \ref{affcor} and Lemma \ref{lem:sqr_network}, yields that $\wid_1 \lp \Gamma\rp = 6 \cdot \wid_1 \lp \Phi\rp$. Finally from Corollary \ref{cor:phi_network}, and Corollary \ref{affcor}, we see that the only thing contributing to the $\wid_1\lp \Phi\rp$ is $\wid_1 \lp \mathfrak{i}_4\rp$, which was established from Lemma \ref{lem:mathfrak_i} as $4$. Whence we get that $\wid_1\lp \Gamma\rp = 6 \cdot 4 = 24$, and that $\wid_{\hid\lp \Gamma\rp}\lp \Gamma\rp = 24$. This proves Item (vii)\textemdash(viii). This then completes the proof of the Lemma.
\end{proof}
\begin{corollary}\label{cor_prd}
Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$, $A_1,A_2,A_3 \in \R^{1\times 2}$, $\Psi \in \N$ satisfy for all $x\in \R$ that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$, $A_1 = \lb 1 \quad 1 \rb$, $A_2 = \lb 1 \quad 0 \rb$, $A_3 = \lb 0 \quad 1 \rb$, $\real_{\rect} \in C\lp \R, \R \rp$, $\lp \real_{\rect} \lp \Psi \rp \rp \lp 0\rp = 0$, $0\les \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp \les \delta+|x|^2$, $|x^2-\lp \real_{\rect}\lp \Psi \rp \rp \lp x \rp |\les \delta \max \{1,|x|^q\}$, $\dep\lp \Psi \rp \les \max\{ 1+\frac{1}{q-2}+\frac{q}{2(q-2)}\log_2 \lp \delta^{-1} \rp ,2\}$, and $\param \lp \Psi \rp \les \max\left\{\lb \frac{40q}{q-2} \rb \log_2\lp \delta^{-1} \rp +\frac{80}{q-2}-28,52\right\}$, and finally let $\Gamma$ be defined as in Lemma \ref{prd_network}, i.e.:
\begin{align}
\Gamma = \lp \frac{1}{2}\circledast \lp \Psi \bullet \aff_{A_1,0} \rp \rp \bigoplus \lp \lp -\frac{1}{2}\rp \circledast\lp \Psi \bullet \aff_{A_2,0} \rp \rp \bigoplus\lp \lp -\frac{1}{2}\rp \circledast \lp \Psi \bullet \aff_{A_3,0} \rp \rp
\end{align}
It is then the case for all $x,y \in \R$ that:
\begin{align}
\real_{\rect} \lp \Gamma \rp \lp x,y \rp \les \frac{3}{2} \lp \frac{\ve}{3} +x^2+y^2\rp \les \ve + 2x^2+2y^2
\end{align}
\end{corollary}
\begin{proof}
Note that the triangle inequality, the fact that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$, the fact that for all $x,y\in \R$ it is the case that $|x+y|^2 \les 2\lp |x|^2+|y|^2\rp $ and (\ref{6.0.33}) tell us that:
\begin{align}
\left| \real_{\rect} \lp \Gamma \rp\lp x,y\rp \right| &\les \frac{1}{2}\left| \real_{\rect} \lp \Psi \rp\lp x+y \rp \right| + \frac{1}{2}\left| \real_{\rect} \lp \Psi \rp\lp x \rp \right| + \frac{1}{2}\left| \real_{\rect} \lp \Psi \rp\lp y \rp \right| \nonumber \\
&\les \frac{1}{2} \lp \delta + |x+y|^2 \rp + \frac{1}{2}\lp \delta + |x|^2\rp + \frac{1}{2}\lp \delta + |y|^2\rp\nonumber \\
&\les \frac{3\delta}{2} +\frac{3}{2}\lp |x|^2+|y|^2\rp = \lp \frac{3\ve}{2}\rp \lp 2^{q-1}+1\rp^{-1} + \frac{3}{2}\lp |x|^2+|y|^2\rp \nonumber\\
&= \frac{3}{2}\lp \frac{\ve}{2^{q-1}+1} + |x|^2 + |y|^2 \rp \les \frac{3}{2} \lp \frac{\ve}{3}+|x|^2+|y|^2\rp \nonumber \\
&\les \ve + 2x^2+2y^2
\end{align}
\end{proof}
\begin{remark}
We shall refer to this neural network for a given $q \in \lp 2,\infty \rp$ and given $\ve \in \lp 0,\infty \rp$ from now on as $\prd^{q,\ve}$.
\end{remark}
\begin{remark}
For an \texttt{R} implementation see Listing \ref{Prd}
\end{remark}
\begin{remark}
Diagrammatically, this can be represented as:
\end{remark}
\begin{figure}
\begin{center}
\tikzset{every picture/.style={line width=0.75pt}} %set default line width to 0.75pt
\begin{tikzpicture}[x=0.75pt,y=0.75pt,yscale=-1,xscale=1]
%uncomment if require: \path (0,475); %set diagram left start at 0, and has height of 475
%Shape: Rectangle [id:dp5102621452939872]
\draw (242,110.33) -- (430.67,110.33) -- (430.67,162.33) -- (242,162.33) -- cycle ;
%Shape: Rectangle [id:dp5404063577476766]
\draw (238.67,204.33) -- (427.33,204.33) -- (427.33,256.33) -- (238.67,256.33) -- cycle ;
%Shape: Rectangle [id:dp36108799479514775]
\draw (240,308.33) -- (428.67,308.33) -- (428.67,360.33) -- (240,360.33) -- cycle ;
%Shape: Rectangle [id:dp8902718451088835]
\draw (515.33,202.67) -- (600.67,202.67) -- (600.67,252.33) -- (515.33,252.33) -- cycle ;
%Shape: Rectangle [id:dp787158651575801]
\draw (74,204.67) -- (159.33,204.67) -- (159.33,254.33) -- (74,254.33) -- cycle ;
%Straight Lines [id:da7097969194866411]
\draw (515.33,202.67) -- (433.55,136.26) ;
\draw [shift={(432,135)}, rotate = 39.08] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da06987054821586158]
\draw (514.67,226) -- (432,226.98) ;
\draw [shift={(430,227)}, rotate = 359.32] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da6649718583556108]
\draw (515.33,252.33) -- (430.79,331.63) ;
\draw [shift={(429.33,333)}, rotate = 316.83] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da522975332769982]
\draw (240.67,136) -- (160.86,203.38) ;
\draw [shift={(159.33,204.67)}, rotate = 319.83] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da23420272890635796]
\draw (238.67,230.67) -- (160.67,231.64) ;
\draw [shift={(158.67,231.67)}, rotate = 359.28] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da3786949398178764]
\draw (239.33,333.33) -- (160.76,255.74) ;
\draw [shift={(159.33,254.33)}, rotate = 44.64] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da6573206574101601]
\draw (640.67,228.33) -- (602.33,228.33) ;
\draw [shift={(600.33,228.33)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da2877353538717321]
\draw (74,227.67) -- (35.67,227.67) ;
\draw [shift={(33.67,227.67)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
% Text Node
\draw (286,124) node [anchor=north west][inner sep=0.75pt] {$\frac{1}{2} \rhd \lp \Phi \bullet \aff_{A_1,0}\rp$};
% Text Node
\draw (286,220) node [anchor=north west][inner sep=0.75pt] {$\frac{1}{2} \rhd \lp \Phi \bullet \aff_{A_2,0}\rp$};
% Text Node
\draw (286,326) node [anchor=north west][inner sep=0.75pt] {$\frac{1}{2} \rhd \lp \Phi \bullet \aff_{A_2,0}\rp$};
% Text Node
\draw (543,220) node [anchor=north west][inner sep=0.75pt] {$\cpy$};
% Text Node
\draw (100,225) node [anchor=north west][inner sep=0.75pt] {$\sm$};
\end{tikzpicture}
\end{center}
\caption{A neural network diagram of the $\sqr$. }
\end{figure}
\section{Higher Approximations}\label{sec_tun}
We take inspiration from the $\sm$ neural network to create the $\prd$ neural network. However, we first need to define a special neural network called \textit{tunneling neural network} to stack two neural networks not of the same length effectively.
\subsection{The $\tun$ Neural Networks and Their Properties}
\begin{definition}[R\textemdash,2023, The Tunneling Neural Networks]\label{def:tun}
We define the tunneling neural network, denoted as $\tun_n$ for $n\in \N$ by:
\begin{align}
\tun_n = \begin{cases}
\aff_{1,0} &:n= 1 \\
\id_1 &: n=2 \\
\bullet^{n-2} \id_1 & n \in \N \cap [3,\infty)
\end{cases}
\end{align}
Where $\id_1$ is as in Definition \ref{7.2.1}.
\end{definition}
\begin{remark}
For an \texttt{R} implementation see Listing \ref{Tun}
\end{remark}
\begin{lemma}\label{6.2.2}\label{tun_1}
Let $n\in \N$, $x \in \R$ and $\tun_n \in \neu$. For all $n\in \N$ and $x\in \R$, it is then the case that:
\begin{enumerate}[label = (\roman*)]
\item $\real_{\rect} \lp \tun_n \rp \in C \lp \R, \R \rp$
\item $\dep \lp \tun_n \rp =n$
\item $\lp \real_{\rect} \lp \tun_n \rp \rp \lp x \rp = x$
\item $\param \lp \tun_n \rp = \begin{cases}
2 &:n=1 \\
7+6(n-2) &:n \in \N \cap [2,\infty)
\end{cases}$
\item $\lay \lp \tun_n \rp = \lp l_0, l_1,...,l_{L-1}, l_L \rp = \lp 1,2,...,2,1 \rp $
\end{enumerate}
\end{lemma}
\begin{proof}
Note that $\aff_{0,1} \in C \lp \R, \R\rp$ and by Lemma \ref{idprop} we have that $\id_1 \in C\lp \R, \R\rp$. Finally, the composition of continuous functions is continuous, hence $\tun_n \in C\lp \R, \R\rp$ for $n \in \N \cap \lb 2,\infty\rp$. This proves Item (i).
Note that by Lemma \ref{5.3.2} it is the case that $\dep\lp \aff_{1,0} \rp = 1$ and by Lemma \ref{7.2.1} it is the case that $\dep \lp \id_1 \rp = 2$.
Assume now that for all $n \les N$ that $\dep\lp \tun_n \rp = n$, then for the inductive step, by Lemma \ref{comp_prop} we have that:
\begin{align}
\dep \lp \tun_{n+1} \rp &= \dep \lp \bullet^{n-1} \id_1 \rp \nonumber \\
&= \dep \lp \lp \bullet^{n-2} \id_1 \rp \bullet \id_1 \rp \nonumber \\
&=n+2-1 = n+1
\end{align}
This completes the induction and proves Item (i)\textemdash(iii).
Note next that by (\ref{5.1.11}) we have that:
\begin{align}
\lp \real_{\rect} \lp \aff_{1,0} \rp \rp \lp x \rp = x
\end{align}
Lemma \ref{idprop}, Item (iii) also tells us that:
\begin{align}
\lp \real_{\rect} \lp \id_1 \rp \rp \lp x \rp = \rect(x) - \rect(-x) = x
\end{align}
Assume now that for all $n\les N$ that $\tun_n \lp x \rp = x$. For the inductive step, by Lemma \ref{idprop}, Item (iii), and we then have that:
\begin{align}
\lp \real_{\rect} \lp \tun_{n+1} \rp \rp \lp x \rp &= \lp \real_{\rect} \lp \bullet^{n-1} \id_1 \rp \rp \lp x \rp \lp x \rp \nonumber\\
&= \lp \real_{\rect} \lp \lp \bullet^{n-2} \id_1 \rp \bullet \id_1 \rp \rp \nonumber\\
&= \lp \lp \real_{\rect} \lp \bullet^{n-2} \id_1 \rp \rp \circ \lp \real_{\rect} \lp \id_1 \rp \rp \rp \lp x \rp \nonumber \\
&= \lp \lp \real_{\rect} \lp \tun_n \rp \rp \circ \lp \real_{\rect} \lp \id_1 \rp \rp \rp \lp x \rp \nonumber \\
&= x
\end{align}
This proves Item (ii). Next note that $\param\lp \tun_1\rp = \param\lp \aff_{1,0}\rp = 2$. Note also that:
\begin{align}
\param\lp \tun_2\rp = \param \lp \id_1 \rp &= \param \lb \lp \lp \begin{bmatrix}
1 \\ -1
\end{bmatrix}, \begin{bmatrix}
0 \\ 0
\end{bmatrix}\rp, \lp \begin{bmatrix}
1 & -1
\end{bmatrix}, \begin{bmatrix}
0
\end{bmatrix}\rp \rp \rb \nonumber \\
&= 7 \nonumber
\end{align}
And that by definition of composition:
\begin{align}
\param \lp \tun_3 \rp &= \param \lb \lp \lp \begin{bmatrix}
1 \\ -1
\end{bmatrix}, \begin{bmatrix}
0 \\ 0
\end{bmatrix}\rp, \lp \begin{bmatrix}
1 & -1
\end{bmatrix}, \begin{bmatrix}
0
\end{bmatrix}\rp \rp \bullet \lp \lp \begin{bmatrix}
1 \\ -1
\end{bmatrix}, \begin{bmatrix}
0 \\ 0
\end{bmatrix}\rp, \lp \begin{bmatrix}
1 & -1
\end{bmatrix}, \begin{bmatrix}
0
\end{bmatrix}\rp \rp \rb \nonumber \\
&= \param \lb \lp \lp \begin{bmatrix}
1 \\ -1
\end{bmatrix}, \begin{bmatrix}
0 \\ 0
\end{bmatrix} \rp, \lp \begin{bmatrix}
1 & -1 \\ -1 & 1
\end{bmatrix}, \begin{bmatrix}
0 \\ 0
\end{bmatrix}\rp, \lp \begin{bmatrix}
1&-1
\end{bmatrix},\begin{bmatrix}
0
\end{bmatrix}\rp \rp \rb \nonumber \\
&=13 \nonumber
\end{align}
Now for the inductive step assume that for all $n\les N\in \N$, it is the case that $\param\lp \tun_n \rp = 7+6(n-2)$. For the inductive step, we then have:
\begin{align}
&\param \lp \tun_{n+1} \rp = \param \lp \tun_n \bullet \id_1 \rp \nonumber\\
&=\param \lb \lp \lp \begin{bmatrix}
1 \\ -1
\end{bmatrix}, \begin{bmatrix}
0 \\ 0
\end{bmatrix}\rp, \lp \begin{bmatrix}
1 & -1 \\ -1 & 1
\end{bmatrix}, \begin{bmatrix}
0 \\0
\end{bmatrix}\rp, \cdots, \lp \begin{bmatrix}
1 & -1
\end{bmatrix}, \begin{bmatrix}
0
\end{bmatrix}\rp \rp \bullet \id_1 \rb \nonumber \\
&= \param \lb \lp \lp \begin{bmatrix}
1 \\ -1
\end{bmatrix}, \begin{bmatrix}
0 \\ 0
\end{bmatrix}\rp, \lp \begin{bmatrix}
1 & -1 \\ -1 & 1
\end{bmatrix}, \begin{bmatrix}
0 \\0
\end{bmatrix}\rp, \cdots, \lp \begin{bmatrix}
1 & -1 \\ -1 & 1
\end{bmatrix}, \begin{bmatrix}
0 \\ 0
\end{bmatrix} \rp, \lp \begin{bmatrix}
1 & -1
\end{bmatrix}, \begin{bmatrix}
0
\end{bmatrix}\rp \rp \rb \nonumber \\
&=7+6(n-2)+6 = 7+6\lp \lp n+1 \rp -2 \rp
\end{align}
This proves Item (iv).
Note finally that Item (v) is a consequence of Lemma \ref{idprop}, Item (i), and Lemma \ref{comp_prop}
\end{proof}
\begin{definition}[R\textemdash, 2023, The Multi-dimensional Tunneling Network]\label{def:tun_mult}
We define the multi-dimensional tunneling neural network, denoted as $\tun^d_n$ for $n\in \N$ and $d \in \N$ by:
\begin{align}
\tun_n^d = \begin{cases}
\aff_{\mathbb{I}_d,\mymathbb{0}_d} &:n= 1 \\
\id_d &: n=2 \\
\bullet^{n-2} \id_d & :n \in \N \cap [3,\infty)
\end{cases}
\end{align}
Where $\id_d$ is as in Definition \ref{7.2.1}.
\end{definition}
\begin{remark}
We may drop the requirement for a $d$ and write $\tun_n$ where $d=1$, and it is evident from the context.
\end{remark}
\begin{lemma}\label{tun_mult}
Let $n\in \N$, $d\in \N$, $x \in \R$ and $\tun_n^d \in \neu$. For all $n\in \N$, $d\in \N$, and $x\in \R$, it is then the case that:
\begin{enumerate}[label = (\roman*)]
\item $\real_{\rect} \lp \tun_n^d \rp \in C \lp \R, \R \rp$
\item $\dep \lp \tun_n^d \rp =n$
\item $\lp \real_{\rect} \lp \tun_n^d \rp \rp \lp x \rp = x$
\item $\param \lp \tun_n^d \rp = \begin{cases}
8d^2+5d &:n=1 \\
4d^2+3d+ (n-1)\lp 4d^2+2d\rp &:n \in \N \cap [2,\infty)
\end{cases}$
\item $\lay \lp \tun_n^d \rp = \lp l_0, l_1,...,l_{L-1}, l_L \rp = \lp d,2d,...,2d,d \rp$
\end{enumerate}
\end{lemma}
\begin{proof}
Note that Items (i)\textendash(iii) are consequences of Lemma \ref{idprop} and Lemma \ref{comp_prop} respectively. Note now that by observation $\param \lp \tun^d_1\rp = d^2+d$. Next Lemma $\ref{id_param}$ tells us that $\param\lp \tun^d_2\rp = 4d^2+3d$
Note also that by definition of neural network composition, we have the following:
\begin{align}
&\param\lp \tun_3^d\rp \\ &= \param \lb \lp \lp \begin{bmatrix}
1 \\ -1 \\ &\ddots \\& & 1 \\& & -1
\end{bmatrix}, \begin{bmatrix}
0 \\ 0 \\ \vdots \\ 0 \\0
\end{bmatrix}\rp, \lp \begin{bmatrix}
1 & -1 \\ & &\ddots \\ & & & 1 & -1
\end{bmatrix}, \begin{bmatrix}
0 \\ \vdots \\ 0
\end{bmatrix}\rp \rp \bullet \right.\\ &\left. \lp \lp \begin{bmatrix}
1 \\ -1 \\ & \ddots \\ & & 1 \\ & & -1
\end{bmatrix}, \begin{bmatrix}
0 \\ 0 \\ \vdots \\ 0 \\0
\end{bmatrix}\rp, \lp \begin{bmatrix}
1 & -1\\ & &\ddots \\ & & & 1 & -1
\end{bmatrix}, \begin{bmatrix}
0 \\ \vdots \\ 0
\end{bmatrix}\rp \rp \rb \nonumber \\
&= \param \lb \lp \lp \begin{bmatrix}
1 \\ -1 \\ & \ddots \\ & & 1 \\ & &-1
\end{bmatrix}, \begin{bmatrix}
0 \\ 0 \\\vdots \\ 0\\0
\end{bmatrix} \rp, \lp \begin{bmatrix}
1 & -1 \\ -1 & 1 \\ & & \ddots \\ & & & 1 & -1 \\ & & & -1 & 1
\end{bmatrix}, \begin{bmatrix}
0 \\ 0 \\ \vdots \\ 0 \\ 0
\end{bmatrix}\rp, \lp \begin{bmatrix}
1 &-1 \\ & &\ddots \\ & & & 1 & -1
\end{bmatrix},\begin{bmatrix}
0 \\ \vdots \\ 0
\end{bmatrix}\rp \rp \rb \nonumber \\
&=2d \times d + 2d + 2d\times 2d +2d+2d\times d + d \nonumber \\
&=2d^2+2d+4d^2+2d+2d^2 +d \nonumber \\
&= 8d^2+5d
\end{align}
Suppose now that for all naturals up to and including $n$, it is the case that $\param\lp \tun_n^d\rp = 4d^2+3d + \lp n-2 \rp \lp 4d^2+2d\rp$. For the inductive step, we have the following:
\begin{align}
& \param\lp \tun^d_{n+1}\rp = \param \lp \tun_n^d \bullet \id_d\rp \nonumber \\
& = \param \lb \lp \begin{bmatrix}
1 \\ -1 \\ & \ddots \\ & & 1 \\ & &-1
\end{bmatrix}, \begin{bmatrix}
0 \\ 0 \\ \vdots \\ 0 \\ 0
\end{bmatrix} \rp, \lp \begin{bmatrix}
1 & -1 \\ -1 & 1 \\ & \ddots \\ & & 1 & -1 \\ & & -1 & 1
\end{bmatrix}, \begin{bmatrix}
0 \\ 0 \\ \vdots \\ 0 \\ 0
\end{bmatrix} \rp, \hdots, \lp \begin{bmatrix}
1 &-1 \\ & \ddots \\ & & 1 & -1
\end{bmatrix}, \begin{bmatrix}
0 \\ \vdots \\ 0
\end{bmatrix}\rp \right. \nonumber \\
& \left. \bullet \id_d \rb \nonumber\\
& = \param \lb \lp \begin{bmatrix}
1 \\ -1 \\ & \ddots \\ & & 1 \\ & &-1
\end{bmatrix}, \begin{bmatrix}
0 \\ 0 \\ \vdots \\ 0 \\ 0
\end{bmatrix} \rp, \lp \begin{bmatrix}
1 & -1 \\ -1 & 1 \\ & \ddots \\ & & 1 & -1 \\ & & -1 & 1
\end{bmatrix}, \begin{bmatrix}
0 \\ 0 \\ \vdots \\ 0 \\ 0
\end{bmatrix} \rp, \hdots, \lp \begin{bmatrix}
1 & -1 \\ -1 & 1 \\ & \ddots \\ & & 1 & -1 \\ & & -1 & 1
\end{bmatrix}, \begin{bmatrix}
0 \\ 0 \\ \vdots \\ 0 \\ 0
\end{bmatrix} \rp, \right. \nonumber\\ &\left. \lp \begin{bmatrix}
1 &-1 \\ & \ddots \\ & & 1 & -1
\end{bmatrix}, \begin{bmatrix}
0 \\ \vdots \nonumber\\ 0
\end{bmatrix}\rp \rb \nonumber\\
&= 4d^2+3d+ (n-2)\lp 4d^2+2d\rp + 4d^2+2d \nonumber \\
&=4d^2+3d+\lp n-1\rp\lp 4d^2+2d\rp \nonumber
\end{align}
This proves Item (iv). Finally, Item (v) is a consequence of Lemma \ref{5.3.2}
\end{proof}
\subsection{The $\pwr$ Neural Networks and Their Properties}
\begin{definition}[R\textemdash, 2023, The Power Neural Network]\label{def:pwr}
Let $n\in \N$. Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$, satisfy that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. We define the power neural networks $\pwr_n^{q,\ve} \in \neu$, denoted for $n\in \N_0$ as:
\begin{align}
\pwr_n^{q,\ve} = \begin{cases}
\aff_{0,1} & :n=0\\
\prd^{q,\ve} \bullet \lb \tun_{\dep(\pwr_{n-1}^{q,\ve})} \boxminus \pwr_{n-1}^{q,\ve} \rb \bullet \cpy_{2,1} & :n \in \N
\end{cases} \nonumber
\end{align}
Diagrammatically, this can be represented as:
\begin{figure}
\begin{center}
\begin{tikzpicture}
% Define nodes
\node[draw, rectangle] (top) at (0, 2) {$\pwr_{n-1}^{q,\ve}$};
\node[draw, rectangle] (right) at (2, 0) {$\cpy_{2,1}$};
\node[draw, rectangle] (bottom) at (0, -2) {$\tun_{\dep(\pwr_{n-1}^{q,\ve})}$};
\node[draw, rectangle] (left) at (-2, 0) {$\prd^{q,\ve} $};
% Arrows with labels
\draw[->] (right) -- node[midway, above] {$x$} (top);
\draw[<-] (right) -- node[midway, above] {$x$} (4,0)(right);
\draw[->] (right) -- node[midway, right] {$x$} (bottom);
\draw[->] (top) -- node[midway, left] {$\lp \real_{\rect}\lp \pwr^{q,\ve}_{n-1}\rp \rp \lp x \rp $} (left);
\draw[->] (bottom) -- node[midway, left] {$x$} (left);
\draw[->] (left) -- node[midway, above] {} (-5.5,0);
% \draw[->] (-3,0) -- node[midway, above] {Arrow 6} (left);
\end{tikzpicture}
\end{center}
\caption{A representation of a typical $\pwr^{q,\ve}_n$ network.}
\end{figure}
\begin{remark}
For an \texttt{R} implementation see Listing \ref{Pwr}
\end{remark}
\begin{remark}
Note that for all $i \in \N$, $q\in \lp 2,\infty\rp$, $\ve \in \lp 0, \infty \rp$, each $\pwr_i^{q,\ve}$ differs from $\pwr_{i+1}^{q,\ve}$ by atleast one $\prd^{q,\ve}$ network.
\end{remark}
\end{definition}
\begin{lemma}\label{6.2.4}
Let $x,y \in \R$, $\ve \in \lp 0,\infty \rp$ and $q \in \lp 2,\infty \rp$. It is then the case for all $x,y \in \R$ that:
\begin{align}
\ve \max \left\{ 1,|x|^q,|y|^q\right\} \les \ve + \ve |x|^q+\ve |y|^q.
\end{align}
\end{lemma}
\begin{proof}
We will do this in the following cases:
For the case that $|x| \les 1$ and $|y| \les 1$ we then have:
\begin{align}
\ve \max \left\{ 1,|x|^q,|y|^q \right\} = \ve \les \ve + \ve |x|^q+\ve |y|^q
\end{align}
For the case that $|x| \les 1$ and $|y| \ges 1$, without loss of generality we have then:
\begin{align}
\ve \max \left\{1,|x|^q,|y|^q \right\} \les \ve | y|^q \les \ve + \ve |x|^q+\ve |y|^q:
\end{align}
For the case that $|x| \ges 1$ and $|y| \ges 1$, and without loss of generality that $|x| \ges |y|$ we have that:
\begin{align}
\ve \max\{ 1, |x|^q,|y|^q \} = \ve |x|^q \les \ve + \ve |x|^q+\ve |y|^q
\end{align}
\end{proof}
\begin{lemma}
Let $\mathfrak{p}_i$ for $i \in \{1,2,...\}$ be the set of functions defined for $\ve \in \lp 0,\infty\rp$, and $x \in \R$ as follows:
\begin{align}
\mathfrak{p}_1 &= \ve+2+2|x|^2 \nonumber\\
\mathfrak{p}_i &= \ve +2\lp \mathfrak{p}_{i-1} \rp^2+2|x|^2 \text{ for } i \ges 2
\end{align}
For all $n\in \N$ and $\ve \in (0,\infty)$ and $q\in (2,\infty)$ it holds for all $x\in \R$ that:
\begin{align}
\left| \real_{\rect} \lp \pwr^{q,\ve}_n \rp \lp x \rp\right| \les \mathfrak{p}_n
\end{align}
\end{lemma}
\begin{proof}
Note that by Corollary \ref{cor_prd} it is the case that:
\begin{align}\label{(6.2.31)}
\left|\real_{\rect} \lp \pwr^{q,\ve}_1 \rp \lp x \rp \right| =\left| \real_{\rect}\lp \prd^{q,\ve}\rp \lp1,x \rp \right| \les \mathfrak{p}_1
\end{align}
and applying (\ref{(6.2.31)}) twice, it is the case that:
\begin{align}
\left| \real_{\rect} \lp \pwr_2^{q,\ve}\rp \lp x \rp \right| &= \left| \real_{\rect} \lp \prd^{q,\ve} \rp \lp \real_{\rect} \lp \prd ^{q,\ve}\lp 1,x \rp\rp,x\rp \right| \nonumber \\
&\les \ve + 2\left| \real_{\rect} \lp \prd^{q,\ve}\rp\lp 1,x\rp \right|^2 + 2|x|^2 \nonumber \\
&\les \ve + 2\mathfrak{p}_1^2 +2|x|^2 = \mathfrak{p}_2
\end{align}
Let's assume this holds for all cases up to and including $n$. For the inductive step, Corollary \ref{cor_prd} tells us that:
\begin{align}
\left| \real_{\rect} \lp \pwr_{n+1}^{q,\ve}\rp \lp x\rp \right| &\les \left| \real_{\rect} \lp \prd^{q,\ve} \lp \real_{\rect} \lp \prd^{q,\ve} \lp \real_{\rect}\cdots \lp 1,x\rp,x \rp ,x\rp \cdots \rp \rp \right| \nonumber \\
&\les \real_{\rect} \lb \prd^{q,\ve} \lp \pwr^{q,\ve}_n \lp x\rp,x \rp\rb \nonumber \\
&\les \ve + 2\mathfrak{p}_n^2 + 2|x|^2 = \mathfrak{p}_{n+1}
\end{align}
This completes the proof of the lemma.
\end{proof}
\begin{remark}
Note that since any instance of $\mathfrak{p}_i$ contains an instance of $\mathfrak{p}_{i-1}$ for $i \in \N \cap \lb 2,\infty\rp$, we have that $\mathfrak{p}_n \in \mathcal{O}\lp \ve^{2(n-1)}\rp$
\end{remark}
\begin{lemma}\label{param_pwr_geq_param_tun}
For all $n \in \N$, $q\in \lp 2,\infty\rp$, and $\ve \in \lp 0,\infty\rp$, it is the case that $\param \lp \tun_{\dep\lp\pwr^{q,\ve}_n\rp}\rp \les \param \lp \pwr^{q,\ve}_n\rp$.
\end{lemma}
\begin{proof}
Note that for all $n \in \N$ it is straightforwardly the case that $\param\lp \pwr_n^{q,\ve}\rp \ges \param \lp \tun_{\dep\lp \pwr^{q,\ve}_{n-1}\rp}\rp$ because for all $n\in \N$, a $\pwr^{q,\ve}_n$ network contains a $\tun_{\dep\lp \pwr^{q,\ve}_{n-1}\rp}$ network. Note now that for all $i \in \N$ we have from Lemma \ref{tun_1} that $5 \les \param\lp \tun_{i+1}\rp - \param\lp \tun_i\rp \les 6$. Recall from Corollary \ref{cor:phi_network} that every instance of the $\Phi$ network contains atleast one $\mathfrak{i}_4$ network, which by Lemma \ref{lem:mathfrak_i} has $40$ parameters, whence the $\prd^{q,\ve}$ network has atleast $40$ parameters for all $\ve \in \lp 0,\infty \rp$ and $q \in \lp 2,\infty\rp$. Note now that for all $i\in \N$, $\pwr^{q,\ve}_{i}$ and $\pwr^{q,\ve}_{i+1}$ differ by atleast as many parameters as there are in $\prd^{q,\ve}$, since, indeed, they differ by atleast one more $\prd^{q,\ve}$. Thus for every increment in $i$, $\pwr_i^{q,\ve}$ outstrips $\tun_i$ by at-least $40-6 = 34$ parameters. This is true for all $i\in \N$. Whence it is the case that for all $i \in \N$, it is the case that $\param\lp \tun_i\rp \les \param \lp \pwr^{q,\ve}_i\rp$.
\end{proof}
\begin{lemma}[R\textemdash,2023]\label{power_prop}
Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$, and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. Let $n \in \N_0$, and $\pwr_n \in \neu$. It is then the case for all $n \in \N_0$, and $x \in \R$ that:
\begin{enumerate}[label = (\roman*)]
\item $\lp \real_{\rect} \lp \pwr_n^{q,\ve} \rp \rp \lp x \rp \in C \lp \R, \R \rp $
\item $\dep(\pwr_n^{q,\ve}) \les \begin{cases}
1 & :n=0\\
n\lb \frac{q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q\rb -1 \rb +1 & :n \in \N
\end{cases}$
\item $\wid_1 \lp \pwr^{q,\ve}_{n}\rp = \begin{cases}
1 & :n=0 \\
24+2\lp n-1 \rp & :n \in \N
\end{cases}$
\item $\param(\pwr_n^{q,\ve}) \les \begin{cases}
2 & :n=0 \\
4^{n+\frac{3}{2}} + \lp \frac{4^{n+1}-1}{3}\rp \lp \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb +372\rp &: n\in \N
\end{cases}$\\~\\
\item $\left|x^n -\lp \real_{\rect} \lp \pwr^{q,\ve}_n \rp \rp \lp x \rp \right| \les \begin{cases}
0 & :n=0 \\
\left| x \lp x^{n-1} - \real_{\rect}\lp \pwr^{q,\ve}_{n-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{n-1}^q & :n\in \N
\end{cases}$ \\~\\
Where we let $\mathfrak{p}_i$ for $i \in \{1,2,...\}$ be the set of functions defined as follows:
\begin{align}
\mathfrak{p}_1 &= \ve + 2 + 2|x|^2 \nonumber\\
\mathfrak{p}_i &= \ve + 2\lp \mathfrak{p}_{i-1} \rp^2+2|x|^2
\end{align}
And whence we get that:
\begin{align}
\left| x^{n} - \real_{\rect} \lp \pwr^{q,\ve}_n\rp \lp x\rp\right| \in \mathcal{O} \lp \ve^{2q\lp n-1\rp} \rp &\text{ for } n \ges 2
\end{align}
\item $\wid_{\hid \lp \pwr_n^{q,\ve}\rp}\lp \pwr^{q,\ve}_n\rp = \begin{cases}
1 & n=0 \\
24 & n \in \N
\end{cases}$
\end{enumerate}
\end{lemma}
\begin{proof}
Note that Item (ii) of Lemma \ref{5.3.2} ensures that $\real_{\rect} \lp \pwr_0 \rp = \aff_{1,0} \in C \lp \R, \R \rp$. Note next that by Item (v) of Lemma \ref{comp_prop}, with $\Phi_1 \curvearrowleft \nu_1, \Phi_2 \curvearrowleft \nu_2, a \curvearrowleft \rect$, we have that:
\begin{align}
\lp \real_{\rect} \lp \nu_1 \bullet \nu_2 \rp\rp \lp x \rp = \lp\lp \real_{\rect}\lp \nu_1 \rp \rp \circ \lp \real_{\rect}\lp \nu_2 \rp \rp \rp \lp x \rp
\end{align}
This, with the fact that the composition of continuous functions is continuous, the fact the stacking of continuous instantiated neural networks is continuous tells us that $\lp \real_{\rect} \pwr_n \rp \in C \lp \R, \R \rp$ for $n \in \N \cap \lb 2,\infty \rp$. This establishes Item (i).
Note next that by observation $\dep \lp \pwr_0^{q,\ve} \rp=1$ and by Item (iv) of Lemma \ref{idprop}, it is the case that $\dep\lp \id_1 \rp = 2$. By Lemmas $\ref{dep_cpy}$ and $\ref{depthofcomposition}$ it is also the case that: $\dep\lp \prd^{q,\ve} \bullet \lb \tun_{\dep(\pwr^{q,\ve}_{n-1})} \boxminus \pwr^{q,\ve}_{n-1} \rb \bullet \cpy \rp = \dep \lp \prd^{q,\ve} \bullet \lb \tun_{\dep(\pwr^{q,\ve}_{n-1})} \boxminus \pwr^{q,\ve}_{n-1} \rb\rp $. Note also that by Lemma we have that $\dep \lp \tun_{\dep \lp \pwr^{q,\ve}_{n-1}\rp} \boxminus \pwr^{q,\ve}_{n-1}\rp = \dep \lp \pwr^{q,\ve}_{n-1} \rp$.
This with Lemma \ref{comp_prop} then yields for $n \in \N$ that:
\begin{align}
\dep \lp \pwr^{q,\ve}_n \rp &= \dep \lp \prd \bullet \lb \tun_{\mathcal{D} \lp \pwr^{q,\ve}_{n-1} \rp } \boxminus \pwr^{q,\ve}_{n-1} \rb \bullet \cpy_{2,1} \rp \nonumber \\
&= \dep \lp \prd \bullet \lb \tun_{\dep \lp \pwr^{q,\ve}_{n-1} \rp } \boxminus \pwr^{q,\ve}_{n-1} \rb \rp \nonumber \\
&= \dep \lp \prd \rp + \dep \lp \tun_{\dep \lp \pwr^{q,\ve}_{n-1} \rp} \rp -1 \nonumber \\
&\les \frac{q}{q-2} \lb \log_2 \lp \ve^{-1}\rp +q \rb + \dep \lp \tun_{\dep\lp \pwr^{q,\ve}_{n-1} \rp} \rp - 1 \nonumber \\
&= \frac{q}{q-2}\lb \log_2 \lp\ve^{-1} \rp + q\rb + \dep \lp \pwr^{q,\ve}_{n-1}\rp - 1
\end{align}
And hence for all $n \in \N$ it is the case that:
\begin{align}
\dep\lp \pwr^{q,\ve}_n\rp - \dep \lp \pwr^{q,\ve}_{n-1}\rp \les \frac{q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q\rb -1
\end{align}
This, in turn, indicates that:
\begin{align}
\dep \lp \pwr^{q,\ve}_n\rp &\les n\lb \frac{q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q\rb -1 \rb +1 \nonumber \\
&\les n\lb \frac{q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q\rb -1 \rb +1
\end{align}
This proves Item (ii).
Note now that $\wid_1 \lp \pwr^{q,\ve}_0\rp = \wid_1 \lp \aff_{0,1}\rp = 1$. Further Lemma \ref{comp_prop}, Remark \ref{5.3.2}, tells us that for all $i,k \in \N$ it is the case that $\wid_i \lp \tun_k\rp \les 2$. Observe that since $\cpy_{2,1}, \pwr_0^{q,\ve}$, and $\tun_{\dep \lp \pwr_0^{q,\ve}\rp}$ are all affine neural networks, Lemma \ref{aff_effect_on_layer_architecture}, Corollary \ref{affcor}, and Lemma \ref{prd_network} tells us that:
\begin{align}
\wid_1 \lp \pwr_1^{q,\ve} \rp &= \wid_1 \lp \prd^{q,\ve} \bullet \lb \tun_{\dep(\pwr_{0}^{q,\ve})} \boxminus \pwr_{0}^{q,\ve} \rb \bullet \cpy_{2,1} \rp \nonumber \\
&= \wid_1 \lp \prd^{q,\ve}\rp = 24
\end{align}
And that:
\begin{align}
\wid_1 \lp \pwr_2^{q,\ve} \rp &= \wid_1 \lp \prd^{q,\ve} \bullet \lb \tun_{\dep(\pwr_{1}^{q,\ve})} \boxminus \pwr_{1}^{q,\ve} \rb \bullet \cpy_{2,1} \rp \nonumber \\
&= \wid_1 \lp \lb \tun_{\dep \lp \pwr^{q,\ve}_1 \rp} \boxminus \pwr_{1}^{q,\ve} \rb \rp \nonumber\\
&= 24+2 = 26 \nonumber
\end{align}
This completes the base case. For the inductive case, assume that for all $i$ up to and including $k\in \N$ it is the case that $\wid_1 \lp \pwr_i^{q,\ve}\rp \les \begin{cases}
1 & :i=0 \\
24+2(i-1) & :i \in \N
\end{cases}$. For the case of $k+1$, we get that:
\begin{align}
\wid_1 \lp \pwr_{k+1}^{q,\ve} \rp &= \wid_1 \lp \prd^{q,\ve} \bullet \lb \tun_{\dep(\pwr_{k}^{q,\ve})} \boxminus \pwr_{k}^{q,\ve} \rb \bullet \cpy_{2,1} \rp \nonumber \\
&=\wid_1 \lp \lb \tun_{\dep(\pwr_{k}^{q,\ve})} \boxminus \pwr_{k}^{q,\ve} \rb \rp \nonumber \\
&=\wid_1 \lp \tun_{\dep \lp \pwr^{q,\ve}_{k}\rp}\rp + \wid_1 \lp \pwr^{q,\ve}_k\rp \nonumber \\
&\les \begin{cases}
2 & :k=0 \\
24 +2 k & :k\in \N
\end{cases}
\end{align}
This establishes Item (iii).
For Item (iv), we will prove this in cases.
\textbf{Case 1: $\pwr_0^{q,\ve}:$}
Note that by Lemma \ref{5.3.2} we have that:
\begin{align}
\param\lp \pwr_0^{q,\ve} \rp = \param \lp \aff_{0,1} \rp =2
\end{align}
This completes Case 1.
% \textbf{Case 2: $\pwr_1^{q,\ve}:$}
%
% For this case, Lemma \ref{paramofparallel} tells us that we have:
% \begin{align}
% \param \lp \pwr_{0}^{q,\ve} \boxminus \tun_{\dep \lp \pwr_{0}^{q,\ve}\rp }\rp &= \frac{1}{2} \lp \param \lp \pwr^{q,\ve}_{0}\rp + \param \lp \tun_{ 1 } \rp\rp^2 \nonumber\\
% &= \frac{1}{2} \lp 2+2\rp^2 \nonumber \\
% &=8
% \end{align}
% Notice now that by Corollary \ref{affcor}, we have that:
% \begin{align}
% \param \lp\lb \pwr_{0}^{q,\ve} \boxminus \tun_{\dep \lp \pwr_{0}^{q,\ve}\rp } \rb \bullet \cpy_{2,1}\rp &= \param \lp \pwr_{0}^{q,\ve} \boxminus \tun_{\dep \lp \pwr_{0}^{q,\ve}\rp }\rp \nonumber \\
% &=8
% \end{align}
% This now, coupled with Lemma \ref{comp_prop} and Lemma \ref{prd_network} tells us that:
% \begin{align}\label{(6.2.19)}
% \param \lp \prd^{q,\ve} \bullet \lb \pwr_{0}^{q,\ve} \boxminus \tun_{\dep \lp \pwr_{0}^{q,\ve}\rp } \rb \bullet \cpy_{2,1}\rp &= \param \lp \prd^{q,\ve}\bullet \lb \pwr_{0}^{q,\ve} \boxminus \tun_{\dep \lp \pwr_{0}^{q,\ve}\rp } \rb \rp\nonumber\\
% &\les \param \lp \prd^{q,\ve}\rp + 8 + \wid_1 \lp \prd^{q,\ve} \rp \cdot \wid_0 \lp \tun_1\rp \nonumber \\
% &=\param \lp \prd^{q,\ve}\rp + 32 \nonumber\\
% &\les \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb -220
% \end{align}
\textbf{Case 2: $\pwr_n^{q,\ve}$ where $n\in \N$:}
Note that Lemma \ref{paramofparallel}, Lemma \ref{param_pwr_geq_param_tun}, Corollary \ref{cor:sameparal}, Lemma \ref{lem:paramparal_geq_param_sum}, and Corollary \ref{cor:bigger_is_better}, tells us it is the case that:
\begin{align}
\param \lp \pwr_{n-1}^{q,\ve} \boxminus \tun_{\dep \lp \pwr_{n-1}^{q,\ve}\rp }\rp &\les \param \lp \pwr^{q,\ve}_{n-1} \boxminus \pwr^{q,\ve}_{n-1}\rp \nonumber\\
&\les 4\param\lp \pwr^{q,\ve}_{n-1}\rp
\end{align}
Then Lemma \ref{comp_prop} and Corollary \ref{affcor} tells us that:
\begin{align}\label{(6.2.34)}
&\param \lp \lb \pwr^{q,\ve}_{n-1} \boxminus\tun_{\dep \lp \pwr_{n-1}^{q,\ve} \rp}\rb \bullet \cpy_{2,1}\rp \nonumber\\&= \param \lp \lb \pwr^{q,\ve}_{n-1} \boxminus\tun_{\dep \lp \pwr_{n-1}^{q,\ve} \rp}\rb \rp \nonumber\\
&\les 4\param \lp \pwr^{q,\ve}_{n-1}\rp
\end{align}
Note next that by definition for all $q\in \lp 2,\infty\rp$, and $\ve \in \lp 0,\infty\rp$ it is case that $\wid_{\hid\lp \pwr_0^{q,\ve}\rp}\pwr_0^{q,\ve} = \wid_{\hid \lp \aff_{0,1}\rp} = 1$. Now, by Lemma \ref{prd_network}, and by construction of $\pwr_i^{q,\ve}$ we may say that for $i\in \N$ it is the case that:
\begin{align}
\wid_{\hid \lp \pwr^{q,\ve}_i\rp} = \wid _{\hid \lp \prd^{q,\ve}\rp} = 24
\end{align}
Note also that by Lemma \ref{6.2.2} it is the case that:
\begin{align}
\wid_{\hid \lp \tun_{\dep \lp \pwr_{i-1}^{q,\ve}\rp}\rp} \lp \tun_{\dep \lp \pwr^{q,\ve}_{i-1}\rp} \rp = 2 \end{align}
Furthermore, note that for $n\in \lb 2, \infty \rp \cap \N$ Lemma \ref{prd_network} tells us that:
\begin{align}
\wid_{\hid \lp \lb \pwr^{q,\ve}_{n-1} \boxminus\tun_{\dep \lp \pwr_{n-1}^{q,\ve} \rp}\rb\rp} \lp \lb \pwr^{q,\ve}_{n-1} \boxminus\tun_{\dep \lp \pwr_{n-1}^{q,\ve} \rp}\rb\rp = 24+2=26
\end{align}
Finally Lemma \ref{comp_prop}, (\ref{(6.2.34)}), and Corollary \ref{cor:sameparal}, also tells us that:
\begin{align}
&\param \lp \pwr_{n}^{q,\ve}\rp\\ &= \param \lp \prd^{q,\ve} \bullet\lb \pwr^{q,\ve}_{n-1} \boxminus\tun_{\dep \lp \pwr_{n-1}^{q,\ve} \rp}\rb \bullet \cpy_{2,1}\rp \nonumber \\
&= \param \lp \prd^{q,\ve} \bullet \lb \pwr^{q,\ve}_{n-1} \boxminus\tun_{\dep \lp \pwr_{n-1}^{q,\ve} \rp}\rb\rp \nonumber \\
&\les \param \lp \prd^{q,\ve} \rp + 4\param \lp \pwr_{n-1}^{q,\ve}\rp+\nonumber\\
&+ \wid_1 \lp \prd^{q,\ve} \rp\ \cdot \wid_{\hid \lp \lb \pwr^{q,\ve}_{n-1} \boxminus\tun_{\dep \lp \pwr_{n-1}^{q,\ve} \rp}\rb\rp} \lp \lb \pwr^{q,\ve}_{n-1} \boxminus\tun_{\dep \lp \pwr_{n-1}^{q,\ve} \rp}\rb\rp \nonumber \\
&= \param\lp \prd^{q,\ve}\rp + 4\param\lp \pwr^{q,\ve}_{n-1}\rp + 624 \nonumber\\
&= 4^{n+1}\param\lp \pwr^{q,\ve}_0\rp + \lp \frac{4^{n+1}-1}{3}\rp \lp \param\lp \prd^{q,\ve}\rp + 624\rp \nonumber\\
&= 4^{n+\frac{3}{2}} + \lp \frac{4^{n+1}-1}{3}\rp \lp \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb +372\rp
\end{align}
Next note that $\lp \real_{\rect} \lp \pwr_{0,1} \rp\rp \lp x \rp$ is exactly $1$, which implies that for all $x\in \R$ we have that $|x^0-\lp \real_{\rect} \lp \pwr_{0.1}\rp\lp x \rp\rp |=0$. Note also that the instantiations of $\tun_n$ and $\cpy_{2,1}$ are exact. Note next that since $\tun_n$ and $\cpy_{2,1}$ are exact, the only sources of error for $\pwr^{q,\ve}_n$ a are $n$ compounding applications of $\prd^{q,\ve}$.
Note also that by definition, it is the case that:
\begin{align}
\real_{\rect}\lp \pwr_n^{q,\ve} \rp = \real_{\rect} \lb \underbrace{\prd^{q,\ve} \lp \inst_{\rect} \lb \prd^{q,\ve}\lp\cdots \inst_{\rect}\lb \prd^{q,\ve} \lp 1,x\rp \rb, \cdots x\rp \rb, x \rp}_{n-copies } \rb
\end{align}
Lemma \ref{prd_network} tells us that:
\begin{align}
\left|x-\real_{\rect}\lp \prd^{q,\ve} \lp 1,x \rp \rp \right| \les \ve \max\{ 1,|x|^q\} \les \ve + \left| x\right|^q
\end{align}
The triangle inequality, Lemma \ref{6.2.4}, Lemma \ref{prd_network}, and Corollary \ref{cor_prd} then tells us that:
\begin{align}
&\left| x^2 - \real_{\rect} \lp \pwr^{q,\ve}_2 \rp \lp x \rp \right| \nonumber\\
&=\left| x\cdot x-\real_{\rect}\lp \prd^{q,\ve}\lp \inst_{\rect}\lp \prd^{q,\ve} \lp 1,x \rp \rp,x\rp \rp\right| \nonumber\\
&\les \left| x\cdot x - x \cdot \inst_{\rect} \lp \prd^{q,\ve}\lp 1,x\rp \rp \right| + \left| x\cdot \inst_{\rect}\lp \prd^{q,\ve} \lp 1,x \rp\rp -\inst_{\rect}\lp \prd^{q,\ve} \lp \inst_{\rect}\lp \prd^{q,\ve}\lp 1,x\rp \rp,x \rp \rp \right| \nonumber\\
&=\left| x\lp x-\inst_{\rect}\lp \prd^{q,\ve}\lp 1,x\rp\rp\rp\right|+ \ve + \ve\left| x\right|^q+\ve \left| \inst_{\rect}\lp \prd^{q,\ve}\lp 1,x\rp\rp\right|^q \nonumber\\
&\les \left|x\ve + x\ve\left|x\right|^q \right| + \ve + \ve\left|x\right|^q+\ve \left|\ve + 2+x^2 \right|^q \nonumber\\
&= \left| x\ve + x\ve \left| x\right|^q\right| + \ve + \ve\left| x\right|^q + \ve \mathfrak{p}_{1}^q
\end{align}
Note that this takes care of our base case. Assume now that for all integers up to and including $n$, it is the case that:
\begin{align}\label{(6.2.39)}
\left| x^n - \real_{\rect}\lp \pwr_n^{q,\ve}\rp \lp x \rp \right| &\les \left| x\cdot x^{n-1}-x \cdot \real_{\rect}\lp \pwr_{n-1}^{q,\ve}\rp \lp x\rp\right| + \left| x \cdot \real_{\rect}\lp \pwr_{n-1}^{q,\ve}\rp \lp x\rp -\real_{\rect} \lp \pwr_n^{q,\ve} \rp \lp x \rp \right| \nonumber \\
&\les \left| x\lp x^{n-1}-\real_{\rect} \lp \pwr^{q,\ve}_{n-1}\rp \lp x\rp\rp\right| + \ve + \ve|x|^q + \ve\left| \real_{\rect}\lp \pwr^{q,\ve}_{n-1}\rp \lp x \rp \right| ^q\nonumber \\
&\les \left| x \lp x^{n-1} - \real_{\rect}\lp \pwr^{q,\ve}_{n-1}\rp\lp x\rp\rp\right| + \ve + \ve|x|^q + \ve\mathfrak{p}_{n-1}^q
\end{align}
For the inductive case, we see that:
\begin{align}
\left|x^{n+1}-\real_{\rect}\lp \pwr_{n+1}^{q,\ve}\rp\lp x\rp \right| &\les \left| x^{n+1}-x\cdot \real_{\rect}\lp \pwr_{n}^{q,\ve}\rp \lp x \rp\right| + \left| x\cdot \real_{\rect}\lp \pwr^{q,\ve}_n\rp \lp x \rp - \real_{\rect} \lp \pwr^{q,\ve}_{n+1}\rp\right| \nonumber \\
&\les \left|x\lp x^n-\real_{\rect} \lp \pwr^{q,\ve}_n\rp \lp x\rp\rp \right| + \ve + \ve|x|^q+\ve\left| \real_{\rect} \lp \pwr^{q,\ve}_{n}\rp \lp x \rp\right|^q \nonumber \\
&\les \left|x\lp x^n-\real_{\rect} \lp \pwr^{q,\ve}_n\rp \lp x\rp\rp \right| + \ve + \ve|x|^q + \ve\mathfrak{p}^q_n
\end{align}
Note that since $\mathfrak{p}_n \in \mathcal{O} \lp \ve^{2(n-1)}\rp$ for $n\in \N \cap \lb 2,\infty \rp$, it is the case for all $x\in \R$ then that $\left| x^{n} - \real_{\rect} \lp \pwr^{q,\ve}_n\rp \lp x\rp\right| \in \mathcal{O} \lp \ve^{2q(n-1)} \rp$ for $n \ges 2$.
Finally note that $\wid_{\hid \lp \pwr^{q,\ve}_0\rp}\lp \pwr^{q,\ve}_0\rp = 1$ from observation. For $n\in \N$, note that the second to last layer is the second to last layer of the $\prd^{q,\ve}$ network. Thus Lemma \ref{prd_network} tells us that:
\begin{align}
\wid_{\hid\lp \pwr^{q,\ve}_m\rp} \lp \pwr^{q,\ve}_n\rp = \begin{cases}
1 & n=0 \\
24 & n\in \N
\end{cases}
\end{align}
This completes the proof of the lemma.
\end{proof}
\begin{remark}\label{rem:pwr_gets_deeper}
Note each power network $\pwr_n^{q,\ve}$ is at least as big as the previous power network $\pwr_{n-1}^{q,\ve}$, one differs from the other by one $\prd^{q, ve}$ network.
\end{remark}
\subsection{$\pnm_{n,C}^{q,\ve}$ and Neural Network Polynomials.}
\begin{definition}[Neural Network Polynomials]
Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. For fixed $q,\ve$, fixed $n \in \N_0$, and for $C = \{c_0,c_1,\hdots, c_n \} \in \R^{n+1}$ (the set of coefficients), we will define the following objects as neural network polynomials:
\begin{align}
\pnm^{q,\ve}_{n,C} \coloneqq \bigoplus^n_{i=0} \lp c_i \triangleright\lb \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_i^{q,\ve}\rb \rp
\end{align}
\end{definition}
\begin{remark}
Diagrammatically, these can be represented as
\end{remark}
\begin{figure}[h]
\begin{center}
\tikzset{every picture/.style={line width=0.75pt}} %set default line width to 0.75pt
\begin{tikzpicture}[x=0.75pt,y=0.75pt,yscale=-1,xscale=1]
%uncomment if require: \path (0,475); %set diagram left start at 0, and has height of 475
%Shape: Rectangle [id:dp8950407412127579]
\draw (390,52) -- (455.33,52) -- (455.33,85) -- (390,85) -- cycle ;
%Shape: Rectangle [id:dp6602004057057332]
\draw (359.33,108.67) -- (454,108.67) -- (454,141.67) -- (359.33,141.67) -- cycle ;
%Shape: Rectangle [id:dp6567335394697266]
\draw (300,168.67) -- (455.33,168.67) -- (455.33,201.67) -- (300,201.67) -- cycle ;
%Shape: Rectangle [id:dp40847692689766735]
\draw (200,255.33) -- (456,255.33) -- (456,288.33) -- (200,288.33) -- cycle ;
%Shape: Rectangle [id:dp9479406055744195]
\draw (200.67,51.33) -- (358.67,51.33) -- (358.67,84.33) -- (200.67,84.33) -- cycle ;
%Shape: Rectangle [id:dp8579663805783284]
\draw (199.33,108) -- (330,108) -- (330,141) -- (199.33,141) -- cycle ;
%Shape: Rectangle [id:dp41506308397634806]
\draw (200.67,168.67) -- (268.67,168.67) -- (268.67,201.67) -- (200.67,201.67) -- cycle ;
%Straight Lines [id:da4565055641527326]
\draw (390.67,68.33) -- (361.33,68.33) ;
\draw [shift={(359.33,68.33)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da26211042309965304]
\draw (358,123.67) -- (332.67,123.67) ;
\draw [shift={(330.67,123.67)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da19391185534075384]
\draw (298,185) -- (272,185) ;
\draw [shift={(270,185)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Shape: Rectangle [id:dp5947036121491401]
\draw (518.67,155.33) -- (584,155.33) -- (584,188.33) -- (518.67,188.33) -- cycle ;
%Straight Lines [id:da9888083048478233]
\draw (518.67,155.33) -- (457.85,71.95) ;
\draw [shift={(456.67,70.33)}, rotate = 53.89] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da8782251261566656]
\draw (517.33,166) -- (457.03,128.72) ;
\draw [shift={(455.33,127.67)}, rotate = 31.73] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da09841373540031018]
\draw (518.67,178) -- (459.33,178.32) ;
\draw [shift={(457.33,178.33)}, rotate = 359.69] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da1515899374288483]
\draw (518.67,188.33) -- (458.51,271.38) ;
\draw [shift={(457.33,273)}, rotate = 305.92] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Shape: Rectangle [id:dp031165707162986944]
\draw (78.67,154.67) -- (144,154.67) -- (144,187.67) -- (78.67,187.67) -- cycle ;
%Straight Lines [id:da9492662023556374]
\draw (200,68.33) -- (145.09,152.99) ;
\draw [shift={(144,154.67)}, rotate = 302.97] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da028520602639475978]
\draw (198.67,123) -- (146.92,162.45) ;
\draw [shift={(145.33,163.67)}, rotate = 322.67] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da6814861591796668]
\draw (200,185) -- (147.29,174.07) ;
\draw [shift={(145.33,173.67)}, rotate = 11.71] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da019305885926265143]
\draw (198.67,271) -- (145.1,189.34) ;
\draw [shift={(144,187.67)}, rotate = 56.74] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da8585029210721031]
\draw (616,172.33) -- (586.67,172.33) ;
\draw [shift={(584.67,172.33)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
%Straight Lines [id:da9805678030848519]
\draw (78.67,169.67) -- (49.33,169.67) ;
\draw [shift={(47.33,169.67)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ;
% Text Node
\draw (412,217.73) node [anchor=north west][inner sep=0.75pt] {$\vdots $};
% Text Node
\draw (406,61.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Pwr}^{q,\ve}_{0}$};
% Text Node
\draw (406,118.07) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Pwr}^{q,\ve}_{1}$};
% Text Node
\draw (403.33,177.07) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Pwr}^{q,\ve}_{2}$};
% Text Node
\draw (265.33,58.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Tun}$};
% Text Node
\draw (404,263.07) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Pwr}^{q,\ve}_{n}$};
% Text Node
\draw (249.33,115.73) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Tun}$};
% Text Node
\draw (222,176.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Tun}$};
% Text Node
\draw (525,162.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Cpy}_{n+1,1}$};
% Text Node
\draw (471.33,198.4) node [anchor=north west][inner sep=0.75pt] {$\vdots $};
% Text Node
\draw (83,163.73) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Sum}_{n+1,1}$};
% Text Node
\draw (230.67,214.4) node [anchor=north west][inner sep=0.75pt] {$\vdots $};
% Text Node
\draw (172,193.73) node [anchor=north west][inner sep=0.75pt] {$\vdots $};
\end{tikzpicture}
\end{center}
\caption{Neural network diagram for an elementary neural network polynomial.}
\end{figure}
\begin{lemma}[R\textemdash,2023]\label{6.2.9}\label{nn_poly}\label{mnm_prop}
Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. It is then the case for all $n\in\N_0$ and $x\in \R$ that:
\begin{enumerate}[label = (\roman*)]
\item $\real_{\rect} \lp \pnm_{n,C}^{q,\ve}\rp \in C \lp \R, \R \rp $
\item $\dep \lp \pnm_{n,C}^{q,\ve} \rp \les \begin{cases}
1 & :n=0\\
n\lb \frac{q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q\rb -1 \rb +1 &:n\in \N
\end{cases}$
\item $\param \lp \pnm_{n,C}^{q,\ve} \rp \les \begin{cases}
2 & :n =0 \\
\lp n+1\rp\lb 4^{n+\frac{3}{2}} + \lp \frac{4^{n+1}-1}{3}\rp \lp \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb +372\rp\rb &:n\in \N
\end{cases}$ \\~\\
\item $\left|\sum^n_{i=0} c_ix^i - \real_{\rect} \lp \pnm_{n,C}^{q,\ve} \rp \lp x \rp \right| \les \sum^n_{i=1} c_i\lp \left| x \lp x^{i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{i-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{i-1}^q \rp $\\~\\
Where $\mathfrak{p}_i$ are the set of functions defined for $i \in \N$ as such:
\begin{align}
\mathfrak{p}_1 &= \ve+1+|x|^2 \nonumber\\
\mathfrak{p}_i &= \ve +\lp \mathfrak{p}_{i-1} \rp^2+|x|^2
\end{align}
Whence it is the case that:
\begin{align}
\left|\sum^n_{i=0} c_ix^i - \real_{\rect} \lp \pnm_{n,C}^{q,\ve} \rp \lp x \rp \right| \in \mathcal{O} \lp \ve^{2q(n-1)}\rp
\end{align}
\item $\wid_1 \lp \pnm_{n,C}^{q,\ve} \rp = 2+23n+n^2 $
\item $\wid_{\hid \lp \pnm_{n,C}^{q,\ve}\rp} \lp \pnm_{n,C}^{q,\ve}\rp \les\begin{cases}
1 &:n=0 \\
24 + 2n &:n\in \N \end{cases}$
\end{enumerate}
\end{lemma}
\begin{proof}
Note that by Lemma \ref{5.6.3}, Lemma \ref{power_prop}, and Lemma \ref{comp_prop} for all $n\in \N_0$ it is the case that:
\begin{align}
\real_{\rect}\lp \pnm_{n,C}^{q,\ve} \rp &= \real_{\rect} \lp \bigoplus^n_{i=0} \lb c_i \triangleright\lb \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_i^{q,\ve}\rb \rb \rp \nonumber\\
&= \sum^n_{i=1}c_i \real_{\rect}\lp \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_i^{q,\ve} \rp \nonumber\\
&= \sum^n_{i=1}c_i\real_{\rect}\lp \pwr^{q,\ve}_i \rp\nonumber
\end{align}
Since Lemma \ref{power_prop} tells us that $\lp \real_{\rect} \lp \pwr_n^{q,\ve} \rp \rp \lp x \rp \in C \lp \R, \R \rp$, for all $n\in \N_0$ and since the finite sum of continuous functions is continuous, this proves Item (i).
Note that $\pnm_n^{q,\ve}$ is only as deep as the deepest of the $\pwr^{q,\ve}_i$ networks, which from the definition is $\pwr_n^{q,\ve}$, which in turn also has the largest bound. Therefore, by Lemma \ref{comp_prop}, Lemma $\ref{5.3.3}$, Lemma $\ref{depth_prop}$, and Lemma \ref{power_prop}, we have that:
\begin{align}
\dep \lp \pnm_{n,C}^{q,\ve} \rp &\les \dep \lp \pwr_n^{q,\ve}\rp \nonumber\\
&\les \begin{cases}
1 & :n=0\\
n\lb \frac{q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q\rb -1 \rb +1 &:n\in \N
\end{cases} \nonumber
\end{align}
This proves Item (ii).
Note next that for the case of $n=0$, we have that:
\begin{align}
\pnm_n^{q,\ve} = c_i \triangleright\pwr_0^{q,\ve}
\end{align}
This then yields us $2$ parameters.
Note that each neural network summand in $\pnm_n^{q,\ve}$ consists of a combination of $\tun_k$ and $\pwr_k$ for some $k\in \N$. Each $\pwr_k$ has at least as many parameters as a tunneling neural network of that depth, as Lemma \ref{param_pwr_geq_param_tun} tells us. This, finally, with Lemma \ref{aff_effect_on_layer_architecture}, Corollary \ref{affcor}, and Lemma \ref{power_prop} then implies that:
\begin{align}
\param\lp \pnm^{q,\ve}_{n,C} \rp &= \param \lp \bigoplus^n_{i=0} \lb c_i \triangleright\lb \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_i^{q,\ve}\rb \rb \rp\nonumber \\
&\les \lp n+1 \rp \cdot \param \lp c_i \triangleright \lb \tun_1 \bullet \pwr_n^{q,\ve} \rb\rp \nonumber\\
&\les \lp n+1 \rp \cdot \param \lp \pwr_n^{q,\ve} \rp \nonumber \\
&\les \begin{cases}
2 & :n =0 \\
\lp n+1\rp\lb 4^{n+\frac{3}{2}} + \lp \frac{4^{n+1}-1}{3}\rp \lp \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb +372\rp\rb &:n\in \N
\end{cases} \nonumber
\end{align}
This proves Item (iii).
Finally, note that for all $i\in \N$, Lemma \ref{power_prop}, and the triangle inequality then tells us that it is the case for all $i \in \N$ that:
\begin{align}
\left| x^i - \real_{\rect}\lp \pwr_i^{q,\ve}\rp \lp x \rp \right| &\les \left| x^i-x \cdot \real_{\rect}\lp \pwr_{i-1}^{q,\ve}\rp \lp x\rp\right| + \left| x \cdot \real_{\rect}\lp \pwr_{i-1}^{q,\ve}\rp \lp x\rp -\real_{\rect} \lp \pwr_i^{q,\ve} \rp \lp x \rp \right| \nonumber \\
\end{align}
This, Lemma \ref{6.2.9}, and the fact that instantiation of the tunneling neural network leads to the identity function (Lemma \ref{6.2.2} and Lemma \ref{comp_prop}), together with Lemma \ref{scalar_left_mult_distribution}, and the absolute homogeneity condition of norms, then tells us that for all $x\in \R$, and $c_0,c_1,\hdots, c_n \in \R$ it is the case that:
\begin{align}
&\left|\sum^n_{i=0} c_ix^i - \real_{\rect} \lp \pnm^{q,\ve}_{n,C} \lp x\rp \rp \right| \nonumber\\
&= \left| \sum^n_{i=0} c_ix^i - \real_{\rect} \lb \bigoplus^n_{i=0} \lb c_i \triangleright \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_i^{q,\ve} \rb \rb\lp x \rp\right| \nonumber \\
&=\left| \sum^n_{i=1} c_ix^i-\sum_{i=0}^n c_i \lp \inst_{\rect}\lb \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_i^{q,\ve}\rb\lp x\rp\rp\right| \nonumber\\
&\les \sum_{i=1}^n \left|c_i\right| \cdot\left| x^i - \inst_{\rect}\lb \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_i^{q,\ve}\rb\lp x\rp\right| \nonumber\\
&\les \sum^n_{i=1} \left|c_i\right|\cdot\lp \left| x \lp x^{i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{i-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{i-1}^q \rp \nonumber
\end{align}
Note however that since for all $x\in \R$ and $i \in \N \cap \lb 2, \infty\rp$, Lemma \ref{prd_network} tells us that $\left| x^{i} - \real_{\rect} \lp \pwr^{q,\ve}_i\rp \lp x\rp\right| \in \mathcal{O} \lp \ve^{2q\lp i-1\rp} \rp$, this, and the fact that $f+g \in \mathcal{O}\lp x^a \rp$ if $f \in \mathcal{O}\lp x^a\rp$, $g \in \mathcal{O}\lp x^b\rp$, and $a \ges b$, then implies that:
\begin{align}
\sum^n_{i=1} \left| c_i\right|\cdot\lp \left| x \lp x^{i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{i-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{i-1}^q \rp \in \mathcal{O} \lp \ve^{2q(n-1)}\rp
\end{align}
This proves Item (iv).
Note next in our construction $\aff_{0,1}$ will require tunneling whenever $i\in \N$ in $\pwr_{i}^{q,\ve}$. Lemma \ref{aff_effect_on_layer_architecture} and Corollary \ref{affcor} then tell us that:
\begin{align}
\wid_1 \lp \pnm_n^{q,\ve} \rp &= \wid_1 \lp \bigoplus^n_{i=0} \lb c_i \triangleright\lb \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_i^{q,\ve}\rb \rb\rp \nonumber\\
&= \wid_1 \lp \bigoplus^n_{i=0}\pwr^{q,\ve}_i\rp \nonumber \\
&\les \sum^n_{i=0}\wid_1 \lp \pwr^{q,\ve}_i\rp =2 + \frac{n}{2}\lp 24+24+2\lp n-1\rp\rp = 2+23n+n^2 \nonumber \\
\end{align}
This proves Item (v).
Finally note that from the definition of the $\pnm_{n,C}^{q,\ve}$, it is evident that $\wid_{\hid\lp \pwr_{0,C}^{q,\ve}\rp}\lp \pwr_{0,C}^{q,\ve}\rp = 1$ since $\pwr_{0,C}^{q,\ve} = \aff_{0,1}$. Other than this network, for all $i \in \N$, $\pwr_{i,C}^{q,\ve}$ end in the $\prd^{q,\ve}$ network, and the deepest of the $\pwr_i^{q,\ve}$ networks is $\pwr^{q,\ve}_n$ inside $\pnm_{n,C}^{q,\ve}$. All other $\pwr_i^{q,\ve}$ must end in tunnels. Whence in the second to last layer, Lemma \ref{prd_network} tells us that:
\begin{align}
\wid_{\hid\lp \pnm_{n,C}^{q,\ve}\rp} \les \begin{cases}
1 &: n =0 \\
24+2n &:n \in \N
\end{cases}
\end{align}
This completes the proof of the Lemma.
\end{proof}
\subsection{$\xpn_n^{q,\ve}$, $\csn_n^{q,\ve}$, $\sne_n^{q,\ve}$, and Neural Network Approximations of $e^x$, $\cos(x)$, and $\sin(x)$.}
Once we have neural network polynomials, we may take the next leap to transcendental functions. Here, we will explore neural network approximations for three common transcendental functions: $e^x$, $\cos(x)$, and $\sin(x)$.
\begin{lemma}
Let $\nu_1,\nu_2 \in \neu$, $f,g \in C \lp \R, \R \rp$, and $\ve_1,\ve_2 \in \lp 0 ,\infty \rp$ such that for all $x\in \R$ it holds that $\left| f(x) - \real_{\rect} \lp \nu_1 \rp \right| \les \ve_1 $ and $\left| g(x) - \real_{\rect} \lp \nu_2 \rp \right| \les \ve_2$. It is then the case for all $x \in \R$ that:
\begin{align}\label{6.2.14}
\left| \lb f+g \rb \lp x \rp - \real_{\rect} \lp \lb \nu_1 \oplus \nu_2 \rb \rp \lp x \rp\right| \les \ve_1 + \ve_2
\end{align}
\end{lemma}
\begin{proof}
Note that the triangle inequality tells us:
\begin{align}
\left| \lb f+g \rb \lp x \rp - \real_{\rect} \lb \nu_1 \oplus \nu_2 \rb \lp x \rp \right| &= \left| f\lp x \rp +g\lp x \rp -\real_{\rect} \lp \nu_1\rp \lp x \rp -\real_{\rect} \lp \nu_2 \rp\lp x \rp \right|\nonumber \\
&\les \left| f\lp x \rp -\real_{\rect}\lp \nu_1 \rp \lp x \rp \right| + \left| g\lp x \rp - \real_{\rect} \lp \nu_2 \rp \lp x \rp \right| \nonumber\\
&\les \ve_1 + \ve_2 \nonumber
\end{align}
\end{proof}
\begin{lemma}\label{6.2.8}
Let $n\in \N$. Let $\nu_1,\nu_2,...,\nu_n \in \neu$, $\ve_1,\ve_2,...,\ve_n \in \lp 0,\infty \rp$ and $f_1,f_2,...,f_n \in C\lp \R, \R \rp$ such that for all $i \in \{1,2,...,n\}$, and for all $x\in \R$, it is the case that, $\left| f_i\lp x \rp - \real_{\rect} \lp \nu_i \rp\lp x \rp \right| \les \ve_i$. It is then the case for all $x\in \R$, that:
\begin{align}
\left| \sum^n_{i=1} f_i \lp x \rp -\bigoplus^n_{i=1} \lp \real_{\rect}\lp \nu_i \rp \rp \lp x\rp\right| \les \sum_{i=1}^n \ve_i
\end{align}
\end{lemma}
\begin{proof}
This is a consequence of a finite number of applications of (\ref{6.2.14}).
\end{proof}
\begin{definition}[R\textemdash 2023, $\xpn_n^{q,\ve}$ and the Neural Network Taylor Approximations for $e^x$ around $x=0$]
Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$, and let $\pwr_n^{q,\ve}$ be as in Lemma \ref{power_prop}. We define, for all $n\in \N_0$, the family of neural networks $\xpn_n^{q,\ve} as$:
\begin{align}
\xpn_n^{q,\ve}\coloneqq \bigoplus^n_{i=0} \lb \frac{1}{i!} \triangleright\lb \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_i^{q,\ve}\rb \rb
\end{align}
\end{definition}
\begin{lemma}[R\textemdash,2023]\label{6.2.9}\label{tay_for_exp}\label{xpn_properties}
Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. It is then the case for all $n\in\N_0$ and $x\in \R$ that:
\begin{enumerate}[label = (\roman*)]
\item $\real_{\rect} \lp \xpn_n^{q,\ve}\rp \lp x \rp\in C \lp \R, \R \rp $
\item $\dep \lp \xpn_n^{q,\ve} \rp \les \begin{cases}
1 & :n=0\\
n\lb \frac{q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q\rb -1 \rb +1 &:n\in \N
\end{cases}$
\item $\param \lp \xpn_n^{q,\ve} \rp \les \begin{cases}
2 & :n =0 \\
\lp n+1\rp\lb 4^{n+\frac{3}{2}} + \lp \frac{4^{n+1}-1}{3}\rp \lp \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb +372\rp\rb &:n\in \N
\end{cases}$ \\~\\
\item \begin{align*}\left|\sum^n_{i=0} \lb \frac{x^i}{i!} \rb- \real_{\rect} \lp \xpn_n^{q,\ve} \rp \lp x \rp \right| \les \sum^n_{i=1} \frac{1}{i!}\lp \left| x \lp x^{i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{i-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{i-1}^q \rp \end{align*}\\~\\
Where $\mathfrak{p}_i$ are the set of functions defined for $i \in \N$ as such:
\begin{align}
\mathfrak{p}_1 &= \ve+1+|x|^2 \nonumber\\
\mathfrak{p}_i &= \ve +\lp \mathfrak{p}_{i-1} \rp^2+|x|^2
\end{align}
Whence it is the case that:
\begin{align}
\left|\sum^n_{i=0} \lb \frac{x^i}{i!} \rb- \real_{\rect} \lp \xpn_n^{q,\ve} \rp \lp x \rp \right|\in \mathcal{O} \lp \ve^{2q(n-1)}\rp
\end{align}
\item $\wid_1 \lp \xpn_n^{q,\ve} \rp = 2+23n+n^2 $
\item $\wid_{\hid \lp \xpn^n_{q,\ve} \rp}\lp \xpn_n^{q,\ve}\rp \les 24 + 2n$
\end{enumerate}
\end{lemma}
\begin{proof}
This follows straightforwardly from Lemma \ref{nn_poly} with $c_i \curvearrowleft \frac{1}{i!}$ for all $n \in \N$ and $i \in \{0,1,\hdots, n\}$. In particular, Item (iv) benefits from the fact that for all $i \in \N_0$, it is the case that $\frac{1}{i!} \ges 0$.
\end{proof}
\begin{lemma}[R\textemdash, 2023]
Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}.$ It is then the case for all $n\in\N_0$ and $x\in \lb a,b \rb\subsetneq \R$, where $0 \in \lb a,b\rb \subsetneq \R$ that:
\begin{align}
\left| e^x - \real_{\rect} \lp \xpn_n^{q,\ve} \rp \lp x \rp \right| \les \sum^n_{i=0} \frac{1}{i!}\lp \left| x \lp x^{n-1} - \real_{\rect}\lp \pwr^{q,\ve}_{n-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{n-1}^q \rp + \frac{e^{b}\cdot |x|^{n+1}}{(n+1)!}
\end{align}
\end{lemma}
\begin{proof}
Note that Taylor's theorem states that for $x \in \lb a,b\rb \subsetneq \R$ it is the case that:
\begin{align}
e^x = \sum^n_{i=0} \lb \frac{x^i}{i!} \rb + \frac{e^{\xi}\cdot x^{n+1}}{(n+1)!}
\end{align}
Where $\xi$ is between $0$ and $x$ in the Lagrange form of the remainder. Note then, for all $n\in \N_0$, $x\in \lb a,b \rb \subsetneq \R$, and $\xi$ between $0$ and $x$, it is the case, by monotonicity of $e^x$ that the second summand is bounded by:
\begin{align}
\frac{e^\xi \cdot x^{n+1}}{(n+1)!} \les \frac{e^b\cdot |x|^{n+1}}{(n+1)!}
\end{align}
This, and the triangle inequality, then indicates that for all $x \in \lb a,b \rb \subsetneq \R$, and $\xi$ between $0$ and $x$ that:
\begin{align}
\left| e^x -\real_{\rect} \lp \xpn_n^{q,\ve} \rp \lp x \rp \right| &=\left| \sum^n_{i=0} \lb \frac{x^i}{i!} \rb + \frac{e^{\xi}\cdot x^{n+1}}{(n+1)!}-\real_{\rect} \lp \xpn_n^{q,\ve} \rp \lp x \rp\right| \nonumber\\
&\les \left| \sum^n_{i=0} \lb \frac{x^i}{i!} \rb - \real_{\rect} \lp \xpn_n^{q,\ve} \rp \lp x \rp \right| + \frac{e^{b}\cdot |x|^{n+1}}{(n+1)!} \nonumber \\
&\les \sum^n_{i=1} \frac{1}{i!}\lp \left| x \lp x^{n-1} - \real_{\rect}\lp \pwr^{q,\ve}_{n-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{n-1}^q \rp + \frac{e^{b}\cdot |x|^{n+1}}{(n+1)!} \nonumber
\end{align}
Whence we have that for fixed $n\in \N_0$ and $b \in \lb 0, \infty\rp$, the last summand is constant, whence it is the case that:
\begin{align}
\left| e^x -\real_{\rect} \lp \xpn_n^{q,\ve} \rp \lp x \rp \right| \in \mathcal{O} \lp \ve^{2q(n-1)}\rp
\end{align}
\end{proof}
\begin{definition}[The $\mathsf{Csn}_n^{q,\ve}$ Networks, and Neural Network Cosines]
Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. Let $\pwr^{q,\ve}_n$ be a neural networks as defined in Definition \ref{def:pwr}. We will define the neural networks $\mathsf{Csn}_{n}^{q,\ve}$ as:
\begin{align}
\mathsf{Csn}_n^{q,\ve} \coloneqq \bigoplus^n_{i=0} \lb \frac{(-1)^i}{2i!}\triangleright\lb \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_{2i}^{q,\ve}\rb \rb
\end{align}
\end{definition}
\begin{lemma}[R\textemdash, 2023]\label{6.2.9}\label{csn_properties}
Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. It is then the case for all $n\in\N_0$ and $x\in \R$ that:
\begin{enumerate}[label = (\roman*)]
\item $\real_{\rect} \lp \csn_n^{q,\ve}\rp \lp x\rp\in C \lp \R, \R \rp $
\item $\dep \lp \csn_n^{q,\ve}\rp \les \begin{cases}
1 & :n=0\\
2n\lb \frac{q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q\rb -1 \rb +1 &:n\in \N
\end{cases}$
\item $\param \lp \csn_n^{q,\ve} \rp \les \begin{cases}
2 & :n =0 \\
\lp 2n+1\rp\lb 4^{2n+\frac{3}{2}} + \lp \frac{4^{2n+1}-1}{3}\rp \lp \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb +372\rp\rb &:n\in \N
\end{cases}$ \\~\\
\item $\left|\sum^n_{i=0} \frac{(-1)^i}{2i!}x^{2i} - \real_{\rect} \lp \csn_n^{q,\ve} \rp \lp x \rp \right| \les \sum^n_{i=1} \left| \frac{\lp -1\rp^i}{2i!}\right|\lp \left| x \lp x^{2i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{2i-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{2i-1}^q \rp $\\~\\
Where $\mathfrak{p}_i$ are the set of functions defined for $i \in \N$ as such:
\begin{align}
\mathfrak{p}_1 &= \ve+1+|x|^2 \nonumber\\
\mathfrak{p}_i &= \ve +\lp \mathfrak{p}_{i-1} \rp^2+|x|^2
\end{align}
Whence it is the case that:
\begin{align}
\left|\sum^n_{i=0} \frac{\lp -1\rp^i}{2i!}x^{2i} - \real_{\rect} \lp \csn_n^{q,\ve} \rp \lp x \rp \right| \in \mathcal{O} \lp \ve^{2q(2n-1)}\rp
\end{align}
\end{enumerate}
\end{lemma}
\begin{proof}
Item (i) derives straightforwardly from Lemma \ref{nn_poly}. This proves Item (i).
Next, observe that since $\csn_n^{q,\ve}$ will contain, as the deepest network in the summand, $\pwr_{2n}^{q,\ve}$, we may then conclude that
\begin{align}
\dep \lp \csn_n^{q,\ve} \rp &\les \dep \lp \pwr_{2n}^{q,\ve}\rp \nonumber\\
&\les \begin{cases}
1 & :n=0\\
2n\lb \frac{q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q\rb -1 \rb +1 &:n\in \N
\end{cases} \nonumber
\end{align}
This proves Item (ii).
A similar argument to the above, Lemma \ref{aff_effect_on_layer_architecture}, and Corollary \ref{affcor} reveals that:
\begin{align}
\param\lp \csn_n^{q,\ve} \rp &= \param \lp \bigoplus^n_{i=0} \lb \frac{\lp -1\rp^i}{2i!} \triangleright\lb \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_i^{q,\ve}\rb \rb \rp\nonumber \\
&\les \lp n+1 \rp \cdot \param \lp c_i \triangleright \lb \tun_1 \bullet \pwr_{2n}^{q,\ve} \rb\rp \nonumber\\
&\les \lp n+1 \rp \cdot \param \lp \pwr_{2n}^{q,\ve} \rp \nonumber \\
&\les \begin{cases}
2 & :n =0 \\
\lp n+1\rp\lb 4^{2n+\frac{3}{2}} + \lp \frac{4^{2n+1}-1}{3}\rp \lp \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb +372\rp\rb &:n\in \N
\end{cases} \nonumber
\end{align}
This proves Item (iii).
In a similar vein, we may argue from Lemma \ref{nn_poly} and from the absolute homogeneity property of norms that:
\begin{align}
&\left|\sum^n_{i=0} \frac{\lp -1\rp^i}{2i!}x^{2i} - \real_{\rect} \lp \csn_n^{q,\ve} \lp x\rp \rp \right| \nonumber\\
&= \left| \sum^n_{i=0} \frac{\lp -1\rp^i}{2i!}x^{2i} - \real_{\rect} \lb \bigoplus^n_{i=0} \lb \frac{\lp -1\rp^i}{2i!} \triangleright \tun_{\max_{2i} \left\{\dep \lp \pwr_{2i}^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_{2i}\rp} \bullet \pwr_{2i}^{q,\ve} \rb \rb\lp x \rp\right| \nonumber \\
&=\left| \sum^n_{i=1} \frac{\lp -1\rp^i}{2i!}x^{2i}-\sum_{i=0}^n \frac{\lp -1 \rp^i}{2i!} \lp \inst_{\rect}\lb \tun_{\max_{2i} \left\{\dep \lp \pwr_{2i}^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_{2i}\rp} \bullet \pwr_{2i}^{q,\ve}\rb\lp x\rp\rp\right| \nonumber\\
&\les \sum_{i=1}^n \left|\frac{\lp -1\rp^i}{2i!} \right|\cdot\left| x^{2i} - \inst_{\rect}\lb \tun_{\max_{2i} \left\{\dep \lp \pwr_{2i}^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_{2i}\rp} \bullet \pwr_{2i}^{q,\ve}\rb\lp x\rp\right| \nonumber\\
&\les \sum^n_{i=1} \left|\frac{\lp -1\rp^i}{2i!}\right|\cdot \left|\lp \left| x \lp x^{2i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{2i-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{2i-1}^q \rp\right| \nonumber
\end{align}
Whence we have that:
\begin{align}
\left|\sum^n_{i=0} \lb \frac{\lp -1\rp^i x^{2i}}{2i!} \rb- \real_{\rect} \lp \csn_n^{q,\ve} \rp \lp x \rp \right|\in \mathcal{O} \lp \ve^{2q(2n-1)}\rp
\end{align}
This proves Item (iv).
\end{proof}
\begin{lemma}[R\textemdash, 2023]
Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}.$ It is then the case for all $n\in\N_0$ and $x\in [a,b]\subseteq \lb 0,\infty \rp$ that:
\begin{align}
\left| \cos\lp x\rp - \real_{\rect} \lp \csn_n^{q,\ve} \rp \lp x \rp \right| \les \sum^n_{i=0} \frac{\lp -1\rp^i}{2i!}\lp \left| x \lp x^{n-1} - \real_{\rect}\lp \pwr^{q,\ve}_{n-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{n-1}^q \rp + + \frac{|x|^{n+1}}{(n+1)!}\nonumber
\end{align}
\end{lemma}
\begin{proof}
Note that Taylor's theorem states that for all $x \in \lb a,b\rb \subsetneq \R$, where $0 \in \lb a,b\rb$, it is the case that:
\begin{align}
\cos\lp x \rp= \sum^n_{i=0} \frac{\lp -1\rp^i}{2i!}x^i + \frac{\cos^{\lp n+1\rp}\lp \xi \rp \cdot x^{n+1}}{(n+1)!}
\end{align}
Note further that for all $n \in \N_0$, and $x \in \R$, it is the case that $\cos^{\lp n \rp} \lp x\rp \les 1$. Whence we may conclude that for all $n\in \N_0$, $x\in \lb a,b \rb \subseteq \R$, where $0 \in \lb a,b\rb$ and $\xi$ between $0$ and $x$, we may bound the second summand by:
\begin{align}
\frac{\cos^{\lp n+1\rp}\lp \xi \rp \cdot x^{n+1}}{(n+1)!} \les \frac{|x|^{n+1}}{\lp n+1\rp!}
\end{align}
This, and the triangle inequality, then indicates that for all $x \in \lb a,b \rb \subsetneq \lb 0,\infty\rp$ and $\xi \in \lb 0,x\rb$:
\begin{align}
\left| \cos \lp x \rp -\real_{\rect} \lp \csn_n^{q,\ve} \rp \lp x \rp \right| &=\left| \sum^n_{i=0} \frac{\lp -1\rp^i}{2i!}x^i + \frac{\cos^{(n+1)}\lp \xi \rp \cdot x^{n+1}}{(n+1)!}-\real_{\rect} \lp \csn_n^{q,\ve} \rp \lp x \rp\right| \nonumber\\
&\les \left| \sum^n_{i=0} \frac{\lp -1\rp^i}{2i!}x^i - \real_{\rect} \lp \csn_n^{q,\ve} \rp \lp x \rp \right| + \frac{|x|^{n+1}}{(n+1)!} \nonumber \\
&\les \sum^n_{i=1} \left|\frac{\lp -1\rp^i}{2i!}\right|\cdot \left|\lp \left| x \lp x^{2i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{2i-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{2i-1}^q \rp\right| \nonumber\\&+ \frac{|x|^{n+1}}{(n+1)!} \nonumber
\end{align}
This completes the proof of the Lemma.
\end{proof}
\begin{definition}[R\textemdash, 2023, The $\mathsf{Sne}_n^{q,\ve}$ Newtorks and Neural Network Sines.].
Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. Let $\pwr^{q,\ve}$ be a neural network defined in Definition \ref{def:pwr}. We will define the neural network $\mathsf{Csn}_{n,q,\ve}$ as:
\begin{align}
\mathsf{Sne}_n^{q,\ve} \coloneqq \csn^{q,\ve} \bullet \aff_{1, -\frac{\pi}{2}}
\end{align}
\end{definition}
\begin{lemma}[R\textemdash, 2023]\label{6.2.9}\label{sne_properties}
Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. It is then the case for all $n\in\N_0$ and $x\in \R$ that:
\begin{enumerate}[label = (\roman*)]
\item $\real_{\rect} \lp \sne_n^{q,\ve}\rp \in C \lp \R, \R \rp $
\item $\dep \lp \sne_n^{q,\ve}\rp \les \begin{cases}
1 & :n=0\\
2n\lb \frac{q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q\rb -1 \rb +1 &:n\in \N
\end{cases}$
\item $\param \lp \sne_n^{q,\ve} \rp \les \begin{cases}
2 & :n =0 \\
\lp 2n+1\rp\lb 4^{2n+\frac{3}{2}} + \lp \frac{4^{2n+1}-1}{3}\rp \lp \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb +372\rp\rb &:n\in \N
\end{cases}$ \\~\\
\item \begin{align}&\left|\sum^n_{i=0} \frac{(-1)^i}{2i!}{\lp x-\frac{\pi}{2}\rp}^{2i} - \real_{\rect} \lp \sne_n^{q,\ve} \rp \lp x \rp \right| \nonumber\\
&= \left|\sum^n_{i=0} \frac{(-1)^i}{2i!}{\lp x-\frac{\pi}{2}\rp}^{2i} - \real_{\rect} \lp \csn_n^{q,\ve} \bullet \aff_{1,-\frac{\pi}{2}}\rp \lp x \rp \right|\nonumber\\
&\les \sum^n_{i=1} \left| \frac{\lp -1\rp^i}{2i!}\right|\lp \left| \lp x -\frac{\pi}{2}\rp\lp \lp x -\frac{\pi}{2}\rp^{2i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{i-1}\rp\lp x-\frac{\pi}{2}\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{i-1}^q \rp \nonumber \end{align}\\~\\
Where $\mathfrak{p}_i$ are the set of functions defined for $i \in \N$ as such:
\begin{align}
\mathfrak{p}_1 &= \ve+1+|x|^2 \nonumber\\
\mathfrak{p}_i &= \ve +\lp \mathfrak{p}_{i-1} \rp^2+|x|^2
\end{align}
Whence it is the case that:
\begin{align}
\left|\sum^n_{i=0} \frac{\lp -1\rp^i}{2i!}\lp x-\frac{\pi}{2}\rp^{2i} - \real_{\rect} \lp \sne_n^{q,\ve} \rp \lp x \rp \right| \in \mathcal{O} \lp \ve^{2q(2n-1)}\rp
\end{align}
\end{enumerate}
\end{lemma}
\begin{proof}
This follows straightforwardly from Lemma \ref{csn_properties}, and the fact that by Corollary \ref{affcor}, there is not a change to the parameter count, by Lemma \ref{comp_cont}, there is no change in depth, by Lemma \ref{aff_prop}, and Lemma \ref{csn_properties}, continuity is preserved, and the fact that $\aff_{1,-\frac{\pi}{2}}$ is exact and hence contributes nothing to the error, and finally by the fact that $\aff_{1,-\frac{\pi}{2}} \rightarrow \lp \cdot\rp -\frac{\pi}{2}$ under instantiation, assures us that the $\sne^{q,\ve}_n$ has the same error bounds as $\csn_n^{q,\ve}$.
\end{proof}
\begin{lemma}[R\textemdash, 2023]
Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}.$ It is then the case for all $n\in\N_0$ and $x\in [a,b]\subseteq \lb 0,\infty \rp$ that:
\begin{align}
&\left| \sin\lp x\rp - \real_{\rect} \lp \sne_n^{q,\ve} \rp \lp x \rp \right|\nonumber \\
&\les \sum^n_{i=1} \left| \frac{\lp -1\rp^i}{2i!}\right|\lp \left| \lp x -\frac{\pi}{2}\rp\lp \lp x -\frac{\pi}{2}\rp^{2i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{i-1}\rp\lp x-\frac{\pi}{2}\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{i-1}^q \rp \nonumber\\
&+\frac{|x|^{n+1}}{(n+1)!}\label{sin_diff}
\end{align}
\end{lemma}
\begin{proof}
Note that the fact that $\sin\lp x\rp = \cos\lp x-\frac{\pi}{2}\rp$, Lemma \ref{comp_prop}, and Lemma \ref{aff_prop} then renders (\ref{sin_diff}) as:
\begin{align}
&\left| \sin\lp x\rp - \inst_{\rect}\lp \sne_n^{q,\ve}\rp\right| \nonumber\\
&= \left| \cos \lp x - \frac{\pi}{2}\rp - \inst_{\rect}\lp \csn_n^{q,\ve}\bullet \aff_{1,-\frac{\pi}{2}}\rp\lp x\rp\right| \nonumber\\
&=\left| \cos \lp x-\frac{x}{2}\rp - \inst_{\rect}\csn_n^{q,\ve}\lp x-\frac{\pi}{2} \rp\right| \nonumber \\
&\les \sum^n_{i=1} \left| \frac{\lp -1\rp^i}{2i!}\right|\lp \left| \lp x -\frac{\pi}{2}\rp\lp \lp x -\frac{\pi}{2}\rp^{2i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{i-1}\rp\lp x-\frac{\pi}{2}\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{i-1}^q \rp+ \frac{|x|^{n+1}}{(n+1)!}\nonumber
\end{align}
\end{proof}
\begin{remark}
Note that under these neural network architectures the famous Pythagorean identity $\sin^2\lp x\rp + \cos^2 \lp x\rp = 1$, may be rendered approximately, for fixed $n,q,\ve$ as: $\lb \sqr^{q,\ve}\bullet \csn^{q,\ve}_n \rb \oplus\lb \sqr^{q,\ve}\bullet \sne^{q,\ve}_n\rb$. A full discussion of the associated parameter, depth, and accuracy bounds are beyond the scope of this dissertation, and may be appropriate for future work.
\end{remark}