\chapter{ANN Product Approximations} \section{Approximation for Products of Two Real Numbers} We will build up the tools necessary to approximate $e^x$ via neural networks in the framework described in the previous sections. While much of the foundation comes from, e.g., \cite{grohs2019spacetime} way, we will, along the way, encounter neural networks not seen in the literature, such as the $\tay$, $\pwr$, $\tun$, and finally a neural network approximant for $e^x$. For each of these neural networks, we will be concerned with at least the following: \begin{enumerate}[label = (\roman*)] \item whether their instantiations using the ReLU function (often just continuous functions) are continuous. \item whether their depths are bounded, at most polynomially, on the type of accuracy we want, $\ve$. \item whether their parameter estimates are bounded at most polynomially on the type of accuracy we want, $\ve$. \item The accuracy of our neural networks. \end{enumerate} \subsection{The squares of real numbers in $\lb 0,1 \rb$} \begin{definition}[The $\mathfrak{i}_d$ Network]\label{def:mathfrak_i} For all $d \in \N$ we will define the following set of neural networks as ``activation neural networks'' denoted $\mathfrak{i}_d$ as: \begin{align} \mathfrak{i}_d = \lp \lp \mathbb{I}_d, \mymathbb{0}_d\rp, \lp \mathbb{I}_d, \mymathbb{0}_d\rp \rp \end{align} \end{definition} \begin{lemma}\label{lem:mathfrak_i} Let $d \in \N$. It is then the case that: \begin{enumerate}[label = (\roman*)] \item $\real_{\rect} \lp \mathfrak{i}_4\rp \in C \lp \R^d, \R^d\rp$. \item $\lay \lp \mathfrak{i}_d\rp = \lp d,d,d\rp$ \item $\param \lp \mathfrak{i}_4\rp = 2d^2+2d$ \end{enumerate} \end{lemma} \begin{proof} Item (i) is straightforward from the fact that for all $d \in \N$ it is the case that $\real_{\rect} \lp \mathfrak{i}_d\rp = \mathbb{I}_d\lp \real_{\rect} \lp \lb \mathbb{I}_d\rb_*\rp + \mymathbb{0}_d\rp + \mymathbb{0}_d$. Item (ii) is straightforward from the fact that $\mathbb{I}_d \in \R^{d \times d}$. We realize Item (iii) by observation. \end{proof} \begin{lemma}\label{lem:6.1.1}\label{lem:phi_k} Let $\lp c_k \rp _{k \in \N} \subseteq \R$, $\lp A_k \rp _{k \in \N} \in \R^{4 \times 4},$ $\mathbb{B}\in \R^{4 \times 1}$, $\lp C_k \rp _{k\in \N}$ satisfy for all $k \in \N$ that: \begin{align}\label{(6.0.1)} A_k = \begin{bmatrix} 2 & -4 &2 & 0 \\ 2 & -4 & 2 & 0\\ 2 & -4 & 2 & 0\\ -c_k & 2c_k & -c_k & 1 \end{bmatrix} \quad B=\begin{bmatrix} 0 \\ -\frac{1}{2} \\ -1 \\ 0 \end{bmatrix} \quad C_k = \begin{bmatrix} -c_k & 2c_k &-c_k & 1 \end{bmatrix} \end{align} and that: \begin{align} c_k = 2^{1-2k} \end{align} Let $\Phi_k \in \neu$, $k\in \N$ satisfy for all $k \in [2,\infty) \cap \N$ that $\Phi_1 = \lp \aff_{C_1,0} \bullet \mathfrak{i}_4 \rp \bullet \aff_{\mymathbb{e}_4,B}$, that for all $d \in \N$, $\mathfrak{i}_d = \lp \lp \mathbb{I}_d, \mymathbb{0}_d \rp, \lp \mathbb{I}_d, \mymathbb{0}_d \rp \rp$ and that: \begin{align} \Phi_k =\lp \aff_{C_k,0}\bullet \mathfrak{i}_4 \rp \bullet \lp \aff_{A_{k-1},B} \bullet \mathfrak{i}_4\rp \bullet \cdots \bullet \lp \aff_{A_1,B} \bullet \mathfrak{i}_4 \rp \bullet \aff_{\mymathbb{e}_4,B} \end{align} It is then the case that: \begin{enumerate}[label = (\roman*)] \item for all $k \in \N$, $x \in \R$ we have $\real_{\rect}\lp \Phi_k\rp\lp x \rp \in C \lp \R, \R \rp $ \item for all $k \in \N$ we have $\lay \lp \Phi_k \rp = \lp 1,4,4,...,4,1 \rp \in \N^{k+2}$ \item for all $k \in \N$, $x \in \R \setminus \lb 0,1 \rb $ that $\lp \real_{\rect} \lp \Phi_k \rp \rp \lp x \rp = \rect \lp x \rp$ \item for all $k \in \N$, $x \in \lb 0,1 \rb$, we have $\left| x^2 - \lp \real_{\rect} \lp \xi_k \rp \rp \lp x \rp \right| \les 2^{-2k-2}$, and \item for al $k \in \N$ , we have that $\param \lp \Phi_k \rp = 20k-7$ \end{enumerate} \end{lemma} \begin{proof} Let $g_k: \R \rightarrow \lb 0,1 \rb$, $k \in \N$ be the functions defined as such, satisfying for all $k \in \N$, $x \in \R$ that: \begin{align}\label{(6.0.3)} g_1 \lp x \rp &= \begin{cases} 2x & : x \in \lb 0,\frac{1}{2} \rp \\ 2-2x &: x\in \lb \frac{1}{2},1\rb \\ 0 &: x \in \R \setminus \lb 0,1 \rb \end{cases} \\ g_{k+1} &= g_1(g_{k}) \nonumber \end{align} and let $f_k: \lb 0,1 \rb \rightarrow \lb 0,1 \rb$, $k \in \N_0$ be the functions satisfying for all $k \in \N_0$, $n \in \{0,1,...,2^k-1\}$, $x \in \lb \frac{n}{2^k}, \frac{n+1}{2^k} \rp$ that $f_k(1)=1$ and: \begin{align}\label{(6.0.4.2)} f_k(x) = \lb \frac{2n+1}{2^k} \rb x-\frac{n^2+n}{2^{2k}} \end{align} and let $r_k = \lp r_{k,1},r_{k,2},r_{k,3},r_{k,4} \rp: \R \rightarrow \R^4$, $k \in \N$ be the functions which which satisfy for all $x \in \R$, $k \in \N$ that: \begin{align}\label{(6.0.5)} r_1\lp x \rp &= \begin{bmatrix} r_{1,1}(x) \\ r_{2,1}(x) \\ r_{3,1}(x) \\ r_{4,1}(x) \end{bmatrix}= \rect \lp \begin{bmatrix} x \\ x-\frac{1}{2} \\ x-1 \\ x \end{bmatrix} \rp \\ r_{k+1} &= A_{k+1}r_k(x) \nonumber \end{align} Note that since it is the case that for all $x \in \R$ that $\rect(x) = \max\{x,0\}$, (\ref{(6.0.3)}) and (\ref{(6.0.5)}) shows that it holds for all $x \in \R$ that: \begin{align}\label{6.0.6} 2r_{1,1}(x) -4r_{2,1}(x) + 2r_{3,1}(x) &= 2 \rect(x) -4\rect \lp x-\frac{1}{2}\rp+2\rect\lp x-1\rp \nonumber \\ &= 2\max\{x,0\} -4\max\left\{x-\frac{1}{2} ,0\right\}+2\max\{x-1,0\} \nonumber \\ &=g_1(x) \end{align} Note also that combined with (\ref{(6.0.4.2)}), the fact that for all $x\in [0,1]$ it holds that $f_0(x) = x = \max\{x,0\}$ tells us that for all $x \in \R$: \begin{align}\label{6.0.7} r_{4,1}(x) = \max \{x,0\} = \begin{cases} f_0(x) & :x\in [0,1] \\ \max\{x,0\}& :x \in \R \setminus \lb 0,1\rb \end{cases} \end{align} We next claim that for all $k \in \N$, it is the case that: \begin{align}\label{6.0.8} \lp \forall x \in \R : 2r_{1,k}(x)-4r_{2,k}(x) + 2r_{3,k}(x) =g(x) \rp \end{align} and that: \begin{align}\label{6.0.9} \lp \forall x \in \R: r_{4,k} (x) = \begin{cases} f_{k-1}(x) & :x \in \lb 0,1 \rb \\ \max\{x,0\} & : x \in \R \setminus \lb 0,1\rb \end{cases} \rp \end{align} We prove (\ref{6.0.8}) and (\ref{6.0.9}) by induction. The base base of $k=1$ is proved by (\ref{6.0.6}) and (\ref{6.0.7}). For the induction step $\N \ni k \rightarrow k+1$ assume there does exist a $k \in \N$ such that for all $x \in \R$ it is the case that: \begin{align} 2r_{1,k}(x) - 4r_{2,k}(x) + 2r_{3,k}(x) = g_k(x) \end{align} and: \begin{align}\label{6.0.11} r_{4,k}(x) = \begin{cases} f_{k-1}(x) & : x \in [0,1] \\ \max\{x,0\} &: x \in \R \setminus \lb 0,1 \rb \end{cases} \end{align} Note that then (\ref{(6.0.3)}),(\ref{(6.0.5)}), and (\ref{6.0.6}) then tells us that for all $x \in \R$ it is the case that: \begin{align}\label{6.0.12} g_{k+1}\lp x \rp &= g_1(g_k(x)) = g_1(2r_{1,k}(x)+4r_{2,k}(x) + 2r_{3,k}(x)) \nonumber \\ &= 2\rect \lp 2r_{1,k}(x)) + 4r_{2,k} +2r_{3,k}(x) \rp \nonumber \\ &-4\rect \lp 2r_{1,k}\lp x \rp -4r_{2,k}+2r_{3,k}(x) - \frac{1}{2} \rp \nonumber \\ &+ 2\rect \lp 2r_{1,k} (x) - 4r_{2,k}(x) + 2r_{3,k}(x)-1 \rp \nonumber \\ &=2r_{1,k+1}(x) -4r_{2,k+1}(x) + 2r_{3,k+1}(x) \end{align} In addition note that (\ref{(6.0.4.2)}), (\ref{(6.0.5)}), and (\ref{6.0.7}) tells us that for all $x \in \R$: %TODO: Ask about the extra powers of 2 and b_k \begin{align}\label{6.0.13} r_{4,k+1}(x) &= \rect \lp \lp -2 \rp ^{3-2 \lp k+1 \rp }r_{1,k} \lp x \rp + 2^{4-2 \lp k+1 \rp}r_{2,k} \lp x \rp + \lp -2 \rp^{3-2\lp k+1\rp }r_{3,k} \lp x \rp + r_{4,k} \lp x\rp \rp \nonumber \\ &= \rect \lp \lp -2 \rp ^{1-2k}r_{1,k} \lp x \rp + 2^{2-2k}r_{k,2}\lp x \rp + \lp -2 \rp ^{1-2k}r_{3,k} \lp x \rp + r_{4,k}\lp x \rp \rp \nonumber \\ &=\rect \lp 2^{-2k} \lb -2r_{1,k}\lp x \rp + 2^2r_{2,k} \lp x \rp -2r_{3,k} \lp x \rp \rb +r_{4,k}\lp x \rp \rp \nonumber \\ &= \rect \lp - \lb 2^{-2k} \rb \lb 2r_{1,k}\lp x \rp -4r_{2,k} \lp x \rp +2r_{3,k}\lp x \rp \rb +r_{4,k}\lp x \rp \rp \nonumber \\ &= \rect\lp -\lb 2^{-2k} \rb g_k \lp x \rp +r_{4,k}\lp x \rp \rp \end{align} This and the fact that for all $x\in \R$ it is the case that $\rect \lp x \rp = \max\{x,0\}$, that for all $x\in \lb 0 ,1 \rb$ it is the case that $f_k \lp x \rp \ges 0$, (\ref{6.0.11}), shows that for all $x \in \lb 0,1 \rb$ it holds that: \begin{align}\label{6.0.14} r_{4,k+1}\lp x \rp &= \rect \lp -2 \lb 2^{-2k} g_k \rb + f_{k-1}\lp x \rp \rp = \rect \lp -2 \lp 2^{-2k}g_k \lp x \rp \rp +x-\lb \sum^{k-1}_{j=1} \lp 2^{-2j}g_j \lp x \rp \rp \rb \rp \nonumber \\ &= \rect \lp x - \lb \sum^k_{j=1}2^{-2j}g_j \lp x \rp \rb \rp = \rect \lp f_k \lp x \rp \rp =f_k \lp x \rp \end{align} Note next that (\ref{6.0.11}) and (\ref{6.0.13}) then tells us that for all $x\in \R \setminus \lb 0,1\rb$: \begin{align} r_{4,k+1}\lp x \rp = \max \left\{ -\lp 2^{-2k}g_x \lp x \rp \rp + r_{4,k}\lp x \rp \right\} = \max\{\max\{x,0\},0\} = \max\{x,0\} \end{align} Combining (\ref{6.0.12}) and (\ref{6.0.14}) proves (\ref{6.0.8}) and (\ref{6.0.9}). Note that then (\ref{(6.0.1)}) and (\ref{6.0.8}) assure that for all $k\in \N$, $x\in \R$ it holds that $\real_{\rect} \lp \Phi_k \rp \in C \lp \R,\R \rp$ and that: \begin{align}\label{(6.0.17)} &\lp \real_{\rect} \lp \Phi_k \rp \rp \lp x \rp \nonumber \\ &= \lp \real_{\rect} \lp \lp \aff_{C_k,0} \bullet \mathfrak{i}_4 \rp \bullet \lp \aff_{A_{k-1},B} \bullet \mathfrak{i}_4 \rp \bullet \cdots \bullet\lp \aff_{A_1,B} \bullet \mathfrak{i}_4 \rp \bullet \aff_{\mymathbb{e}_4,B} \rp \rp \lp x \rp \nonumber \\ &= \lp -2\rp^{1-2k}r_{1,k}\lp x \rp + 2^{2-2k} r_{2,k} \lp x \rp + \lp -2 \rp ^{1-2k} r_{3,k} \lp x \rp + r_{4,k} \lp x \rp \nonumber \\ &=\lp -2 \rp ^{2-2k} \lp \lb \frac{r_{1,k}\lp x \rp +r_{3,k} \lp x \rp }{-2} \rb + r_{2,k}\lp x \rp \rp +r_{4,k}\lp x \rp \nonumber \\ &=2^{2-2k} \lp \lb \frac{r_{1,k}\lp x \rp+r_{3,k} \lp x \rp }{-2} \rb + r_{2,k} \lp x \rp \rp +r_{4,k} \lp x \rp \nonumber \\ &=2^{-2k}\lp 4r_{2,k} \lp x \rp -2r_{1,k}\lp x \rp -2r_{3,k} \lp x \rp \rp +r_{4,k} \lp x \rp \nonumber \\ &=-\lb 2^{-2k} \rb \lb 2r_{1,k} \lp x \rp -4r_{2,k} \lp x \rp +2r_{3,k} \lp x \rp \rb +r_{4,k} \lp x \rp = -\lb 2^{-2k} \rb g_k \lp x \rp + r_{4,k} \lp x \rp \end{align} This and (\ref{6.0.9}) tell us that: \begin{align} \lp \real_{\rect} \lp \Phi_k \rp \rp \lp x \rp = - \lp 2^{-2k}g_k \lp x \rp \rp +f_{k-1}\lp x \rp &= -\lp 2^{-2k}g_k \lp x \rp \rp +x-\lb \sum^{k-1}_{j=1} 2^{-2j}g_j \lp x \rp \rb \nonumber \\ &=x-\lb \sum^k_{j=1}2^{-2j}g_j \lp x \rp \rb =f_k\lp x\rp \nonumber \end{align} Which then implies for all $k\in \N$, $x \in \lb 0,1\rb$ that it holds that: \begin{align} \left\| x^2-\lp \real_{\rect} \lp \Phi_k \rp \rp \lp x \rp \right\| \les 2^{-2k-2} \end{align} This, in turn, establishes Item (i). Finally observe that (\ref{(6.0.17)}) then tells us that for all $k\in \N$, $x \in \R \setminus \lb 0,1\rb$ it holds that: \begin{align} \lp \real_{\rect} \lp \Phi_k \rp \rp \lp x \rp = -2^{-2k}g_k \lp x \rp +r_{4,k} \lp x \rp =r_{4,k} \lp x \rp = \max\{x,0\} = \rect(x) \end{align} This establishes Item(iv). Note next that Item(iii) ensures for all $k\in \N$ that $\dep\lp \xi_k \rp = k+1$, and: \begin{align} \param \lp \Phi_k \rp = 4(1+1) + \lb \sum^k_{j=2} 4 \lp 4+1\rp \rb + \lp 4+1 \rp =8+20\lp k-1\rp+5 = 20k-7 \end{align} This, in turn, proves Item(vi). The proof of the lemma is thus complete. \end{proof} \begin{remark} For an \texttt{R} implementation see Listing \ref{Phi_k} \end{remark} \begin{figure}[h] \includegraphics[width = \linewidth]{/Users/shakilrafi/R-simulations/Phi_k_properties/diff.png} \caption{Plot of $\log_{10}$ of the $L^1$ difference between $\Phi_k$ and $x^2$ over $\lb 0,1\rb$ for different values of $k$} \end{figure} \begin{corollary}\label{6.1.1.1}\label{cor:phi_network} Let $\ve \in \lp 0,\infty\rp$, $M= \min \{ \frac{1}{2}\log_2 \lp \ve^{-1} \rp -1,\infty\}\cap \N$, $\lp c_k\rp_{k \in \N} \subseteq \R$, $\lp A_k\rp_{k\in\N} \subseteq \R^{4 \times 4}$, $B \in \R^{4\times 1}$, $\lp C_k\rp_{k\in \N}$ satisfy for all $k \in \N$ that: \begin{align} A_k = \begin{bmatrix} 2&-4&2&0 \\ 2&-4&2&0\\ 2&-4&2&0\\ -c_k&2c_k & -c_k&1 \end{bmatrix}, \quad B = \begin{bmatrix} 0\\ -\frac{1}{2}\quad \\ -1 \\ 0 \end{bmatrix}\quad C_k = \begin{bmatrix} -c_k &2c)_k&-c_k&1 \end{bmatrix} \end{align} where: \begin{align} c_k = 2^{1-2k} \end{align} and let $\Phi \in \neu$ be defined as: \begin{align} \Phi = \begin{cases}\label{def:Phi} \lb \aff_{C_1,0}\bullet \mathfrak{i}_4\rb \bullet \aff_{\mymathbb{e}_4,B} & M=1 \\ \lb \aff_{C_M,0} \bullet \mathfrak{i}_4\rb\bullet \lb \aff_{A_{M-1},0} \bullet \mathfrak{i}_4 \rb \bullet \cdots \bullet \lb \aff_{A_1,B}\bullet \mathfrak{i}_4\rb \bullet \aff_{\mymathbb{e}_4,B} & M \in \lb 2,\infty \rp \cap \N \end{cases} \end{align} it is then the case that: \begin{enumerate}[label = (\roman*)] \item $\real_{\rect} \lp \Phi\rp \in C \lp \R,\R\rp$ \item $\lay \lp \Phi\rp = \lp 1,4,4,...,4,1\rp \in \N^{M+2} $ \item it holds for all $x \in \R \setminus\lb 0,1 \rb$ that $\lp \real_{\rect} \lp \Phi\rp\rp \lp x \rp = \rect(x)$ \item it holds for all $x \in \lb 0,1 \rb$ that $\left| x^2 - \lp \real_{\rect} \lp \Phi \rp \rp\lp x \rp \right| \les 2^{-2M-2} \les \ve$ \item $\dep \lp \Phi \rp \les M+1 \les \max\{ \frac{1}{2}\log_2 \lp \ve^{-1}\rp+1,2\}$, and \item $\param \lp \Phi\rp = 20M-7 \les \max\left\{ 10\log_2 \lp \ve^{-1}\rp-7,13\right\}$ \end{enumerate} \end{corollary} \begin{proof} Items (i)--(iii) are direct consequences of Lemma \ref{lem:6.1.1}, Items (i)--(iii). Note next the fact that $M = \min \left\{\N \cap \lb \frac{1}{2} \log_2 \lp \ve^{-1}\rp-1\rb,\infty\right\}$ ensures that: \begin{align} M = \min \left\{ \N \cap \lb \frac{1}{2}\log_2\lp \ve^{-1}\rp-1\rb, \infty\right\} \ges \min \left\{ \lb\max \left\{ 1,\frac{1}{2}\log_2 \lp\ve^{-1} \rp-1\right\},\infty \rb\right\} \ges \frac{1}{2}\log_2 \lp \ve^{-1}\rp-1 \end{align} This and Item (v) of Lemma \ref{lem:6.1.1} demonstrate that for all $x\in \lb 0,1\rb$ it then holds that: \begin{align} \left| x^2 - \lp \real_{\rect}\lp \Phi\rp\rp \lp x\rp \right| \les 2^{-2M-2} = 2^{-2(M+1)} \les 2^{-\log_2\lp\ve^{-1} \rp} = \ve \end{align} Thus establishing Item (iv). The fact that $M = \min \left\{ \N \cap \lb \frac{1}{2}\log_2 \lp \ve^{-1}\rp -1,\infty\rb\right\}$ and Item (ii) of Lemma \ref{lem:6.1.1} tell us that: \begin{align} \dep \lp \Phi \rp = M+1 \les \max \left\{ \frac{1}{2} \log_2 \lp \ve^{-1}\rp+1,2\right\} \end{align} Which establishes Item(v). This and Item (v) of Lemma \ref{lem:6.1.1} then tell us that: \begin{align} \param \lp \Phi_M\rp \les 20M-7 \les 20 \max\left\{ \frac{1}{2}\log_2\lp\ve^{-1}\rp,2\right\}-7 = \max\left\{ 10\log_2 \lp\ve^{-1} \rp-7,13\right\} \end{align} This completes the proof of the corollary. \end{proof} \begin{remark} For an implementation in \texttt{R}, see Listing \ref{Phi} \end{remark} \begin{figure}[h] \centering \includegraphics[width = \linewidth]{/Users/shakilrafi/R-simulations/Phi_properties/Phi_diff_contour.png} \caption{Contour plot of the $L^1$ difference between $\Phi$ and $x^2$ over $\lb 0,1 \rb$ for different values of $\ve$.} \end{figure} \begin{remark} Note that (\ref{def:Phi}) implies that $\dep \lp \Phi \rp \ges 4$. \end{remark} Now that we have neural networks that perform the squaring operation inside $\lb -1,1\rb$, we may extend to all of $\R$. Note that this neural network representation differs somewhat from the ones in \cite{grohs2019spacetime}. \subsection{The $\sqr$ network} \begin{lemma}\label{6.0.3}\label{lem:sqr_network} Let $\delta,\epsilon \in (0,\infty)$, $\alpha \in (0,\infty)$, $q\in (2,\infty)$, $ \Phi \in \neu$ satisfy that $\delta = 2^{\frac{-2}{q-2}}\ve ^{\frac{q}{q-2}}$, $\alpha = \lp \frac{\ve}{2}\rp^{\frac{1}{q-2}}$, $\real{\rect}\lp\Phi\rp \in C\lp \R,\R\rp$, $\dep(\Phi) \les \max \left\{\frac{1}{2} \log_2(\delta^{-1})+1,2\right\}$, $\param(\Phi) \les \max\left\{10\log_2\lp \delta^{-1}\rp-7,13\right\}$, $\sup_{x \in \R \setminus [0,1]} | \lp \real_{\rect} \lp \Phi \rp -\rect(x) \right| =0$, and $\sup_{x\in \lb 0,1\rb} |x^2-\lp \real_{\rect} \lp \Phi \rp \rp \lp x\rp | \les \delta$, let $\Psi \in \neu$ be the neural network given by: \begin{align} \Psi = \lp \aff_{\alpha^{-2},0} \bullet \Phi \bullet \aff_{\alpha,0} \rp \bigoplus\lp \aff_{\alpha^{-2},0} \bullet \Phi \bullet \aff_{-\alpha,0}\rp \end{align} \begin{enumerate}[label = (\roman*)] \item it holds that $\real_{\rect} \lp \Psi \rp \in C \lp \R,\R \rp$. \item it holds that $\lp \real_{\rect} \lp \Psi \rp \rp \lp 0\rp=0$ \item it holds for all $x\in \R$ that $0\les \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp \les \ve + |x|^2$ \item it holds for all $x \in \R$ that $|x^2-\lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp |\les \ve \max\{1,|x|^q\}$ \item it holds that $\dep (\Psi)\les \max\left\{1+\frac{1}{q-2}+\frac{q}{2(q-2)}\log_2 \lp \ve^{-1} \rp,2\right\}$, and \item it holds that $\param\lp \Psi \rp \les \max\left\{ \lb \frac{40q}{q-2} \rb \log_2 \lp \ve^{-1} \rp +\frac{80}{q-2}-28,52 \right\}$ \end{enumerate} \end{lemma} \begin{proof} Note that for all $x\in \R$ it is the case that: \begin{align}\label{6.0.21} \lp \real_{\rect}\lp \Psi \rp \rp\lp x \rp &= \lp \real_{\rect} \lp \lp \aff_{\alpha^{-2}}\bullet \Phi \bullet \aff_{\alpha,0}\rp \oplus\lp \aff_{\alpha^{-2},0} \bullet \Phi \bullet \aff_{-\alpha,0} \rp \rp \rp \lp x \rp \nonumber\\ &= \lp \real_{\rect}\lp \aff_{\alpha^{-2},0} \bullet \Phi \bullet \aff_{\alpha,0} \rp \rp \lp x\rp + \lp \real_{\rect}\lp \aff_{\alpha^{-2},0} \bullet \Phi \bullet \aff_{-\alpha,0}\rp \rp \lp x\rp \nonumber \\ &= \frac{1}{\alpha^2}\lp \real_{\rect}\lp \Phi \rp \rp \lp \alpha x\rp + \frac{1}{\alpha^2}\lp \real_{\rect} \lp \Phi \rp \rp \lp -\alpha x\rp \nonumber\\ &= \frac{1}{\lp \frac{\ve}{2}\rp^{\frac{2}{q-2}}}\lb \lp \real_{\rect}\lp \Phi \rp \rp \lp \lp \frac{\ve}{2}\rp ^{\frac{1}{q-2}}x \rp + \lp \real_{\rect}\lp \Phi \rp \rp \lp -\lp \frac{\ve}{2}\rp^{\frac{1}{q-2}}x\rp \rb \end{align} This and the assumption that $\Phi \in C\lp \R, \R \rp$ along with the assumption that $\sup_{x\in \R \setminus \lb 0,1\rb } | \lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp -\rect\lp x\rp | =0$ tells us that for all $x\in \R$ it holds that: \begin{align} \lp \real_{\rect}\lp \Psi \rp \rp \lp 0 \rp &= \lp \frac{\ve}{2}\rp^{\frac{-2}{q-2}}\lb \lp \real_{\rect}\lp \Phi \rp \rp \lp 0 \rp +\lp \real_{\rect} \lp \Phi\rp \rp \lp 0 \rp \rb \nonumber \\ &=\lp \frac{\ve}{2}\rp ^{\frac{-2}{q-2}} \lb \rect (0)+\rect(0) \rb \nonumber \\ &=0 \end{align} This, in turn, establishes Item (i)--(ii). Observe next that from the assumption that $\real_{\rect} \lp \Phi \rp \in C\lp \R,\R \rp$ and the assumption that $\sup_{x\in \R \setminus \lb 0,1\rb} | \lp \real_{\rect}\lp \Phi \rp \rp \lp x \rp -\rect(x) |=0$ ensure that for all $x\in \R \setminus \lb -1,1 \rb$ it holds that: \begin{align}\label{6.0.23} \lb \real_{\rect}\lp \Phi \rp \rb \lp x\rp + \lb \real_{\rect}\lp \Phi \rp \lp -x \rp\rb = \rect\lp x\rp +\rect(-x) &= \max\{x,0\}+\max\{-x,0\} \nonumber\\ &=|x| \end{align} The assumption that for all $\sup_{x\in \R \setminus \lb 0,1\rb }|\lp \real_{\rect} \lp \Phi \rp \rp \lp x\rp -\rect\lp x\rp |=0$ and the assumption that $\sup_{x\in\lb 0,1\rb} |x^2-\lp \real_{\rect} \lp \Phi \rp \rp \lp x\rp |\les \delta$ show that: \begin{align}\label{6.0.24} &\sup_{x \in \lb -1,1\rb} \left|x^2 - \lp \lb \real_{\rect}\lp \Phi \rp \rb \lp x\rp +\lb \real_{\rect}\lp \Phi \rp \lp x \rp \rb \rp \right| \nonumber \\ &= \max\left\{ \sup_{x\in \lb -1,0 \rb} \left| x^2-\lp \rect(x)+ \lb \real_{\rect}\lp \Phi \rp \rb \lp -x \rp \rp \right|,\sup _{x\in \lb 0,1 \rb} \left| x^2-\lp \lb \real_{\rect} \lp \Phi \rp \rb \lp x \rp + \rect \lp -x \rp \rp \right| \right\} \nonumber\\ &= \max\left\{\sup_{x\in \lb -1,0 \rb}\left|\lp -x \rp^2 - \lp \real_{\rect}\lp \Phi \rp \rp \lp -x \rp \right|, \sup_{x\in \lb 0,1\rb} \left| x^2-\lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp \right| \right\} \nonumber \\ &=\sup_{x\in \lb 0,1 \rb}\left| x^2 - \lp \real_{\rect}\lp \Phi \rp \rp \lp x\rp \right| \les \delta \end{align} Next observe that (\ref{6.0.21}) and (\ref{6.0.23}) show that for all $x \in \R \setminus \lb -\lp \frac{\ve}{2} \rp^{\frac{-1}{q-2}}, \lp \frac{\ve}{2}\rp ^{\frac{-1}{q-2}} \rb$ it holds that: \begin{align}\label{6.0.25} 0 \les \lb \real_{\rect} \lp \Psi \rp \rb \lp x \rp &= \lp \frac{\ve}{2} \rp ^{\frac{-2}{q-2}}\lp \lb \real_{\rect} \lp \Phi \rp \rb \lp \lp \frac{\ve}{2}\rp ^{\frac{1}{q-2}}x \rp + \lb \real_{\rect} \lp \Phi \rp \rb \lp -\lp \frac{\ve}{2}\rp^{\frac{1}{q-2}} x\rp \rp \nonumber \\ &= \lp \frac{\ve}{2} \rp ^{\frac{-2}{q-2}} \left| \lp \frac{\ve}{2} \rp^{\frac{1}{q-2}}x \right| = \lp \frac{\ve}{2} \rp^{\frac{-1}{q-2}|x|} \les |x|^2 \end{align} The triangle inequality then tells us that for all $x\in \R \setminus \lb - \lp \frac{\ve}{2} \rp^{\frac{-1}{q-2}}, \lp \frac{\ve}{2} \rp^{\frac{-1}{q-2}} \rb$ it holds that: \begin{align} \label{6.0.25} \left| x^2- \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp \right| &= \left| x^2 - \lp \frac{\ve}{2} \rp ^{\frac{-1}{q-2}}\left|x\right| \right| \les \lp \left|x \right|^2 + \lp \frac{\ve}{2} \rp ^{\frac{-1}{q-2}} \left| x \right| \rp \nonumber\\ &= \lp \left| x \right|^q \left|x\right|^{-(q-2)} + \lp \frac{\ve}{2} \rp^{\frac{-1}{q-2}} \left| x \right|^q\left| x \right|^{-(q-1)} \rp \nonumber \\ &\les \lp \left| x \right|^q \lp \frac{\ve}{2} \rp^{\frac{q-2}{q-2}} + \lp \frac{\ve}{2} \rp ^{\frac{-1}{q-2}} \left| x \right|^q \lp \frac{\ve}{2} \rp ^{\frac{q-1}{q-2}} \rp \nonumber \\ &= \lp \frac{\ve}{2}+ \frac{\ve}{2} \rp \left| x \right|^q = \ve \left| x \right|^q \les \ve \max \left\{ 1, \left| x \right|^q \right\} \end{align} Note that (\ref{6.0.24}), (\ref{6.0.21}) and the fact that $\delta = 2^{\frac{-2}{q-2}}\ve^{\frac{q}{q-2}}$ then tell for all $x \in \lb -\lp \frac{\ve}{2} \rp ^{\frac{-1}{q-2}}, \lp \frac{\ve}{2} \rp ^{\frac{-1}{q-2}} \rb$ it holds that: \begin{equation} \begin{aligned}\label{6.0.26} % &\left| x^2-\lp \real_{\rect} \lp \Phi \rp \rp \lp x \rp \right| \\ % &= \lp \frac{\ve}{2} \rp^{\frac{-2}{q-2}} \left| \lp \lp \frac{\ve}{2} \rp ^{\frac{1}{q-2}}x \rp^2 - \lp \lb \real_{\rect} \lp \Phi \rp \rb \lp \lp \frac{\ve}{2} \rp ^{\frac{1}{q-2}}x \rp + \lb \real_{\rect} \lp \Phi \rp \rb \lp -y \rp \rp \right| \\ % &\les \lp \frac{\ve}{2} \rp^{\frac{-2}{q-2}} \lb \sup_{y \in \lb -1,1\rb} \left| y^2 - \left \lb \real_{\rect} \lp \Phi \rp \rb \lp y \rp + \lb \real_{\rect} \lp \Phi \rp \rb \lp -y \rp \right| \rb \\ &\left| x^2-\left( \real_{\rect} (\Phi) \right) (x) \right| \\ &= \left( \frac{\varepsilon}{2} \right)^{\frac{-2}{q-2}} \left| \left( \left( \frac{\varepsilon}{2} \right) ^{\frac{1}{q-2}}x \right)^2 - \left( \left[ \real_{\rect} (\Phi) \right] \left( \left( \frac{\varepsilon}{2} \right) ^{\frac{1}{q-2}}x \right) + \left[ \real_{\rect} (\Phi) \right] (-y) \right) \right| \\ &\les \left( \frac{\varepsilon}{2} \right)^{\frac{-2}{q-2}} \left[ \sup_{y \in \left[-1,1\right]} \left| y^2 - \left[ \real_{\rect} (\Phi) \right] (y) + \left[ \real_{\rect} (\Phi) \right] (-y) \right| \right] \\ &\les \lp \frac{\ve}{2} \rp^{\frac{-2}{q-2}} \delta = \lp \frac{\ve}{2} \rp^{\frac{-2}{q-2}} 2^{\frac{-2}{q-2}} \ve^{\frac{q}{q-2}} = \ve \les \ve \max \{ 1, \left| x \right|^q \} \end{aligned} \end{equation} Now note that this and (\ref{6.0.25}) tells us that for all $x\in \R$ it is the case that: \begin{align} \left| x^2-\lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp \right| \les \ve \max\{1,|x|^q \} \end{align} This establishes Item (v). Note that, (\ref{6.0.26}) tells that for all $x \in \lb - \lp \frac{\ve}{2} \rp ^{\frac{-1}{q-2}}, \lp \frac{\ve}{2} \rp ^{\frac{1}{q-2}} \rb $ it is the case that: \begin{align} \left| \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp \right| \les \left| x^2 - \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp \right| + \left| x \right|^2 \les \ve + \left| x \right| ^2 \end{align} This and (\ref{6.0.25}) tells us that for all $x\in \R$: \begin{align} \left| \lp \real_{\rect} \rp \lp x \rp \right| \les \ve + |x|^2 \end{align} This establishes Item (iv). Note next that by Corollary \ref{affcor}, Remark \ref{5.3.2}, the hypothesis, and the fact that $\delta = 2^{\frac{-2}{q-2}}\ve ^{\frac{q}{q-2}}$ tells us that: \begin{align} \dep \lp \Psi \rp = \dep \lp \Phi \rp &\les \max \left\{\frac{1}{2} \log_2(\delta^{-1})+1,2\right\} \nonumber \\ &= \max \left\{ \frac{1}{q-2} + \lb \frac{q}{q-2}\rb\log_2 \lp \ve \rp +1,2\right\} \end{align} This establishes Item (v). Notice next that the fact that $\delta = 2^{\frac{-2}{q-2}}\ve^{\frac{q}{q-2}}$ tells us that: \begin{align} \log_2 \lp \delta^{-1} \rp = \log_2 \lp 2^{\frac{2}{q-2}} \ve^{\frac{-q}{q-2}}\rp = \frac{2}{q-2} + \lb \lb \frac{q}{q-2}\rb \log_2 \lp \ve^{-1}\rp \rb \end{align} Note that by , Corollary \ref{affcor} we have that: \begin{align} \param \lp \Phi \bullet \aff_{-\alpha,0} \rp &\les \lb \max\left\{ 1, \frac{\inn \lp \aff_{-\alpha,0}\rp+1}{\inn\lp \Phi\rp+1}\right\}\rb \param \lp \Phi\rp = \param \lp \Phi\rp \end{align} and further that: \begin{align} \param \lp \aff_{\alpha^{-2},0} \bullet \Phi \bullet \aff_{-\alpha,0} \rp &= \lb \max\left\{ 1, \frac{\out \lp \aff_{-\alpha^2,0}\rp}{\out\lp \Phi \bullet \aff_{-\alpha,0}\rp}\right\}\rb \param \lp \Phi \bullet \aff_{-\alpha,0}\rp \nonumber\\ &\les \param \lp \Phi\rp \end{align} By symmetry note also that $ \param \lp \aff_{\alpha^{-2},0} \bullet \Phi \bullet \aff_{\alpha,0}\rp = \param \lp \aff_{\alpha^{-2},0} \bullet \Phi \bullet \aff_{-\alpha,0}\rp $ and also that $ \lay \lp \aff_{\alpha^{-2},0} \bullet \Phi \bullet \aff_{\alpha,0}\rp = \lay \lp \aff_{\alpha^{-2},0} \bullet \Phi \bullet \aff_{-\alpha,0}\rp $. Thus Lemma \ref{paramsum}, Corollary \ref{cor:sameparal}, and the hypothesis tells us that: \begin{align}\label{(6.1.42)} \param \lp \Psi \rp &= \param \lp \Phi \boxminus \Phi \rp \nonumber \\ &\les 4\param \lp \Phi\rp \nonumber \\ &= 4\max\left\{10\log_2\lp \delta^{-1}\rp-7,13\right\} \end{align} This, and the fact that $\delta = 2^{\frac{-2}{q-2}}\ve ^{\frac{q}{q-2}}$ renders (\ref{(6.1.42)}) as: \begin{align} 4\max\left\{10\log_2\lp \delta^{-1}\rp-7,13\right\} &= 4\max\left\{10\log_2\lp \delta^{-1}\rp-7,13\right\} \nonumber\\ &= 4\max \left\{ 10 \lp \frac{2}{q-2} +\frac{q}{q-2}\log_2 \lp \ve^{-1}\rp\rp-7,13\right\} \nonumber \\ &=\max \left\{ \lb \frac{40q}{q-2}\rb \log_2 \lp \ve^{-1}\rp + \frac{80}{q-2}-28,52\right\} \end{align} \end{proof} \begin{remark} We will often find it helpful to refer to this network for fixed $\ve \in \lp 0, \infty \rp$ and $q \in \lp 2,\infty\rp$ as the $\sqr^{q,\ve}$ network. \end{remark} \begin{remark} For an \texttt{R} implementation see Listing \ref{Sqr} \end{remark} \begin{figure}[h] \centering \includegraphics[width = 0.45\linewidth]{/Users/shakilrafi/R-simulations/Sqr_properties/experimental_deps.png} \includegraphics[width = 0.45\linewidth]{/Users/shakilrafi/R-simulations/Sqr_properties/dep_theoretical_upper_limits.png} \caption{Left: $\log_{10}$ of depths for a simulation with $q \in \lb 2.1, 4 \rb $, $\ve \in \lp 0.1, 2 \rb$, and $x \in \lb -5,5 \rb$, all with $50$ mesh-points. Right: The theoretical upper limits over the same range of values} \end{figure} % Please add the following required packages to your document preamble: % \usepackage{booktabs} \begin{table}[h] \begin{tabular}{@{}l|llllll@{}} \toprule & Min. & 1\textsuperscript{st} Qu. & Median & Mean & 3\textsuperscript{rd} Qu. & Max. \\ \midrule Experimental $|x^2 - \real_{\rect}(\mathsf{Sqr}^{q,\ve})(x)$ & 0.000003 & 0.089438 & 0.337870 & 3.148933 & 4.674652 & 20.00 \\ \midrule Theoretical $|x^2 - \real_{\rect}(\mathsf{Sqr})^{q,\ve}(x)$ & 0.010 & 1.715 & 10.402 & 48.063 & 45.538 & 1250.00 \\ \midrule Difference & 0.001 & 1.6012 & 9.8655 & 44.9141 & 40.7102 & 1230 \end{tabular} \caption{Theoretical upper bounds for $L^1$ error, experimental $L^1$ error and their forward difference, with $q \in \lb 2.1, 4 \rb $, $\ve \in \lp 0.1, 2 \rb$, and $x \in \lb -5,5 \rb$, all with $50$ mesh-points.} \end{table} \subsection{The $\prd$ network} We are finally ready to give neural network representations of arbitrary products of real numbers. However, this representation differs somewhat from those found in the literature, especially \cite{grohs2019spacetime}, where parallelization (stacking) is used instead of neural network sums. This will help us calculate $\wid_1$ and the width of the second to last layer. \begin{lemma}\label{prd_network} Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$, $A_1,A_2,A_3 \in \R^{1\times 2}$, $\Psi \in \neu$ satisfy for all $x\in \R$ that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$, $A_1 = \lb 1 \quad 1 \rb$, $A_2 = \lb 1 \quad 0 \rb$, $A_3 = \lb 0 \quad 1 \rb$, $\real_{\rect} \in C\lp \R, \R \rp$, $\lp \real_{\rect} \lp \Psi \rp \rp \lp 0\rp = 0$, $0\les \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp \les \delta+|x|^2$, $|x^2-\lp \real_{\rect}\lp \Psi \rp \rp \lp x \rp |\les \delta \max \{1,|x|^q\}$, $\dep\lp \Psi \rp \les \max\{ 1+\frac{1}{q-2}+\frac{q}{2(q-2)}\log_2 \lp \delta^{-1} \rp ,2\}$, and $\param \lp \Psi \rp \les \max\left\{\lb \frac{40q}{q-2} \rb \log_2\lp \delta^{-1} \rp +\frac{80}{q-2}-28,52\right\}$, then: \begin{enumerate}[label=(\roman*)] \item there exists a unique $\Gamma \in \neu$ satisfying: \begin{align} \Gamma = \lp \frac{1}{2}\triangleright \lp \Psi \bullet \aff_{A_1,0} \rp \rp \bigoplus \lp \lp -\frac{1}{2}\rp \triangleright\lp \Psi \bullet \aff_{A_2,0} \rp \rp \bigoplus\lp \lp -\frac{1}{2}\rp \triangleright \lp \Psi \bullet \aff_{A_3,0} \rp \rp \end{align} \item it that $\real_{\rect} \lp \Gamma \rp \in C \lp \R^2,\R \rp$ \item it holds for all $x\in \R$ that $\lp \real_{\rect}\lp \Gamma \rp \rp \lp x,0\rp = \lp \real_{\rect}\lp \Gamma \rp \rp \lp 0,y\rp =0$ \item it holds for any $x,y \in \R$ that $\left|xy - \lp \real_{\rect} \lp \Gamma \rp \rp \lp \begin{bmatrix} x \\ y \end{bmatrix} \rp \right| \les \ve \max \{1,|x|^q,|y|^q \}$ \item it holds that $\param(\Gamma) \les \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb -252$ \item it holds that $\dep\lp \Gamma \rp \les \frac{q}{q-2} \lb \log_2 \lp \ve^{-1}\rp +q \rb $ \item it holds that $\wid_1 \lp \Gamma \rp=24$ \item it holds that $\wid_{\hid \lp\Gamma\rp} = 24$ \end{enumerate} \end{lemma} \begin{proof} Note that: \begin{align} &\lp \real_{\rect} \lp \Gamma \rp \rp \lp \begin{bmatrix} x\\y \end{bmatrix} \rp = \real_{\rect} \lp \lp \frac{1}{2}\triangleright \lp \Psi \bullet \aff_{A_1,0} \rp \rp \bigoplus \lp \lp -\frac{1}{2}\rp \triangleright\lp \Psi \bullet \aff_{A_2,0} \rp \rp \bigoplus \right. \\ &\left. \lp \lp -\frac{1}{2}\rp \triangleright \lp \Psi \bullet \aff_{A_3,0} \rp \rp \rp \nonumber \lp \begin{bmatrix} x \\ y \end{bmatrix} \nonumber\rp\\ &= \real_{\rect} \lp \frac{1}{2}\triangleright \lp \Psi \bullet \aff_{A_1,0} \rp \rp \lp \begin{bmatrix} x\\y \end{bmatrix} \rp + \real_{\rect}\lp \lp -\frac{1}{2}\rp \triangleright\lp \Psi \bullet \aff_{A_2,0} \rp \rp \lp \begin{bmatrix} x \\ y \end{bmatrix} \rp \nonumber \\ &+\real_{\rect}\lp \lp -\frac{1}{2}\rp \triangleright \lp \Psi \bullet \aff_{A_3,0} \rp \rp \lp \begin{bmatrix} x\\y \end{bmatrix} \rp \nonumber \\ &= \frac{1}{2} \lp \real_{\rect} \lp \Psi \rp \rp \lp \begin{bmatrix} 1 && 1 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix}\rp - \frac{1}{2} \lp \real_{\rect} \lp \Psi \rp \rp \lp \begin{bmatrix} 1 && 0 \end{bmatrix} \begin{bmatrix} x \\ y \end{bmatrix} \rp \nonumber\\ &-\frac{1}{2} \lp \real_{\rect}\lp \Psi \rp \rp \lp \begin{bmatrix} 0 && 1 \end{bmatrix} \begin{bmatrix} x \\y \end{bmatrix} \rp \nonumber \\ &=\frac{1}{2} \lp \real_{\rect}\lp \Psi \rp \rp \lp x+y \rp -\frac{1}{2} \lp \real_{\rect}\lp \Psi \rp \rp \lp x \rp - \frac{1}{2} \lp \real_{\rect}\lp \Psi \rp \rp \lp y \rp \label{6.0.33} %TODO: Revisit this estimate \end{align} Note that this, and the assumption that $\lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp \in C \lp \R, \R \rp$ and that $\lp \real_{\rect}\lp \Psi \rp \rp \lp 0 \rp = 0$ ensures: \begin{align} \lp \real_{\rect} \lp \Gamma \rp \rp \lp \begin{bmatrix} x \\0 \end{bmatrix} \rp &= \frac{1}{2} \lp \real_{\rect} \lp \Psi \rp \rp \lp x+0 \rp -\frac{1}{2} \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp - \frac{1}{2} \lp \real_{\rect} \lp \Psi \rp \rp \lp 0 \rp \nonumber \\ &= 0 \nonumber\\ &= \frac{1}{2} \lp \real_{\rect} \lp \Psi \rp \rp \lp 0+y \rp -\frac{1}{2} \lp \real_{\rect} \lp \Psi \rp \rp \lp 0 \rp - \frac{1}{2}\lp \real_{\rect} \lp \Psi \rp \rp \lp y \rp \nonumber \\ &=\lp \real_{\rect} \lp \Gamma \rp \rp \lp \begin{bmatrix} 0 \\y \end{bmatrix} \rp \end{align} Next, observe that since by assumption it is the case for all $x,y\in \R$ that $|x^2 - \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp | \les \delta \max\{1,|x|^q\}$, $xy = \frac{1}{2}|x+y|^2-\frac{1}{2}|x|^2-\frac{1}{2}|y|^2$, triangle Inequality and from (\ref{6.0.33}) we have that: \begin{align} &\left| \lp \real_{\rect} \lp \Gamma\rp\lp x,y \rp \rp -xy\right| \nonumber\\ &=\left|\frac{1}{2}\lb \lp \real_{\rect} \lp \Psi \rp \rp \lp x + y \rp - \left|x+y\right|^2 \rb - \frac{1}{2} \lb \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp -\left| x \right|^2\rb - \frac{1}{2} \lb \lp \real_{\rect} \lp \Psi\rp \rp \lp x \rp -\left|y\right|^2\rb \right| \nonumber \\ &\les \left|\frac{1}{2}\lb \lp \real_{\rect} \lp \Psi \rp \rp \lp x + y \rp - \left|x+y\right|^2 \rb + \frac{1}{2} \lb \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp -\left| x \right|^2\rb + \frac{1}{2} \lb \lp \real_{\rect} \lp \Psi\rp \rp \lp x \rp -\left|y\right|^2\rb \right| \nonumber \\ &\les \frac{\delta}{2} \lb \max \left\{ 1, |x+y|^q\right\} + \max\left\{ 1,|x|^q\right\} + \max \left\{1,|y|^q \right\}\rb\nonumber \end{align} Note also that since for all $\alpha,\beta \in \R$ and $p \in \lb 1, \infty \rp$ we have that $|\alpha + \beta|^p \les 2^{p-1}\lp |\alpha|^p + |\beta|^p \rp$ we have that: \begin{align} &\left| \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp - xy \right| \nonumber \\ &\les \frac{\delta}{2} \lb \max \left\{1, 2^{q-1}|x|^q+ 2^{q-1}\left| y\right|^q\right\} + \max\left\{1,\left|x\right|^q \right\} + \max \left\{1,\left| y \right|^q \right\}\rb \nonumber \\ &\les \frac{\delta}{2} \lb \max \left\{1, 2^{q-1}|x|^q \right\}+ 2^{q-1}\left| y\right|^q + \max\left\{1,\left|x\right|^q \right\} + \max \left\{1,\left| y \right|^q \right\}\rb \nonumber \\ &\les \frac{\delta}{2} \lb 2^q + 2\rb \max \left\{1, \left|x\right|^q, \left| y \right|^q \right\} = \ve \max \left\{ 1,\left| x \right|^q, \left| x \right|^q\right\} \nonumber \end{align} This proves Item (iv). By symmetry it holds that $\param \lp \frac{1}{2}\triangleright \lp \Psi \bullet \aff_{A_1,0} \rp \rp = \param \lp -\frac{1}{2}\triangleright \lp \Psi \bullet \aff_{A_2,0} \rp \rp = \param \lp -\frac{1}{2}\triangleright \lp \Psi \bullet \aff_{A_3,0} \rp \rp$ and further that $\lay \lp \frac{1}{2}\triangleright \lp \Psi \bullet \aff_{A_1,0} \rp \rp = \lay \lp -\frac{1}{2}\triangleright \lp \Psi \bullet \aff_{A_2,0} \rp \rp = \lay \lp -\frac{1}{2}\triangleright\lp \Psi \bullet \aff_{A_3,0} \rp \rp$. Note also that Corollary \ref{affcor} tells us that for all $i \in \{1,2,3\}$ and $a \in \{ \frac{1}{2},-\frac{1}{2}\}$ it is the case that: \begin{align} \param \lp a \triangleright \lp \Psi \bullet \aff_{A_i,0}\rp \rp = \param \lp \Psi \rp \end{align} This, together with Corollary \ref{corsum} indicates that: \begin{align}\label{(6.1.49)} \param \lp \Gamma \rp &\les 9\param\lp \Psi \rp \nonumber \\ &\les 9\max\left\{\lb \frac{40q}{q-2} \rb \log_2\lp \delta^{-1} \rp +\frac{80}{q-2}-28,52\right\} \end{align} Combined with the fact that $\delta =\ve \lp 2^{q-1} +1\rp^{-1}$, this is then rendered as: \begin{align}\label{(6.1.50)} &9\max\left\{\lb \frac{40q}{q-2} \rb \log_2\lp \delta^{-1} \rp +\frac{80}{q-2}-28,52\right\} \nonumber \\ &= 9\max \left\{ \lb \frac{40q}{q-2}\rb \lp \log_2 \lp \ve^{-1}\rp +\log_2 \lp 2^{q-1}+1\rp\rp + \frac{80}{q-2}-28,52 \right\} \end{align} Note that: \begin{align} \log_2 \lp 2^{q-1}+1\rp &= \log_2\lp 2^{q-1}+1\rp - \log_2 \lp 2^q\rp + q \nonumber\\ &=\log_2 \lp \frac{2^{q-1}+1}{2^q}\rp + q = \log_2 \lp 2^{-1}+2^{-q}\rp +q\nonumber \\ &\les \log_2 \lp 2^{-1} + 2^{-2}\rp + q = \log_2 \lp \frac{3}{4}\rp + q = \log_2 \lp 3\rp-2+q \end{align} Combine this with the fact that for all $q\in \lp 2,\infty\rp$ it is the case that $\frac{q(q-1)}{q-2} \ges 2$ then gives us that: \begin{align} \lb \frac{40q}{q-2}\rb \log_2 \lp 2^{q-1}+1\rp -28\ges \lb \frac{40q}{q-2}\rb \log_2 \lp 2^{q-1}\rp -28= \frac{40q(q-1)}{q-2}-28 \ges 52 \end{align} This then finally renders (\ref{(6.1.50)}) as: \begin{align} &9\max \left\{ \lb \frac{40q}{q-2}\rb \lp \log_2 \lp \ve^{-1}\rp +\log_2 \lp 2^{q-1}+1\rp\rp + \frac{80}{q-2}-28,52 \right\} \nonumber \\ &\les 9 \lb \lb \frac{40q}{q-2}\rb \lp \log_2\lp \ve^{-1}\rp + \log_2\lp 3\rp-2+q\rp +\frac{80}{q-2}-28\rb \nonumber\\ &= 9 \lb \lb \frac{40q}{q-2}\rb \lp \log_2\lp \ve^{-1}\rp + \log_2\lp 3\rp-2+\frac{2}{q}\rp-28\rb \nonumber\\ &\les 9 \lb \lb \frac{40q}{q-2}\rb \lp \log_2\lp \ve^{-1}\rp + \log_2\lp 3\rp-1\rp -28\rb \nonumber\\ &= \frac{360q}{q-2}\lb \log_2 \lp \ve^{-1} \rp +q+\log_2\lp 3\rp-1\rb -252 \end{align} Note that Lemma \ref{depth_prop}, Lemma \ref{5.3.3}, the hypothesis, and the fact that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$ tell us that: \begin{align} \dep \lp \Gamma \rp = \dep\lp \Psi \rp &\les \max\left\{ 1+\frac{1}{q-2}+\frac{q}{2(q-2)}\log_2 \lp \delta^{-1} \rp ,2\right\} \nonumber\\ &= \max \left\{1+\frac{1}{q-2} +\frac{q}{2(q-2)}\lb \log_2\lp \ve^{-1}\rp + \log_2 \lp 2^{q-1}+1\rp\rb,2 \right\} \nonumber\\ &= \max \left\{ 1+\frac{1}{q-2}+\frac{q}{2(q-2)} \lp \log_2\lp \ve^{-1}\rp +q-1\rp,2\right\} \end{align} Since it is the case that $\frac{q(q-1)}{2(q-2)} > 2$ for $q \in \lp 2, \infty \rp$ we have that: \begin{align} & \max \left\{ 1+\frac{1}{q-2}+\frac{q}{2(q-2)} \lp \log_2\lp \ve^{-1}\rp +q-1\rp,2\right\} \nonumber \\ &= 1+\frac{1}{q-2}+\frac{q}{2(q-2)} \lp \log_2\lp \ve^{-1}\rp +q-1\rp \nonumber \\ &\les \frac{q-1}{q-2} +\frac{q}{2\lp q-2\rp} \lp \log_2 \lp \ve^{-1}\rp+q\rp \nonumber \\ & \end{align} Observe next that for $q\in \lp 0,\infty\rp$, $\ve \in \lp 0,\infty \rp$, $\Gamma$ consists of, among other things, three stacked $\lp \Psi \bullet \aff_{A_i,0}\rp$ networks where $i \in \{1,2,3\}$. Corollary \ref{affcor} tells us therefore, that $\wid_1\lp \Gamma\rp = 3\cdot \wid_1 \lp \Psi \rp$. On the other hand, note that each $\Psi$ networks consist of, among other things, two stacked $\Phi$ networks, which by Corollary \ref{affcor} and Lemma \ref{lem:sqr_network}, yields that $\wid_1 \lp \Gamma\rp = 6 \cdot \wid_1 \lp \Phi\rp$. Finally from Corollary \ref{cor:phi_network}, and Corollary \ref{affcor}, we see that the only thing contributing to the $\wid_1\lp \Phi\rp$ is $\wid_1 \lp \mathfrak{i}_4\rp$, which was established from Lemma \ref{lem:mathfrak_i} as $4$. Whence we get that $\wid_1\lp \Gamma\rp = 6 \cdot 4 = 24$, and that $\wid_{\hid\lp \Gamma\rp}\lp \Gamma\rp = 24$. This proves Item (vii)\textemdash(viii). This then completes the proof of the Lemma. \end{proof} \begin{corollary}\label{cor_prd} Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$, $A_1,A_2,A_3 \in \R^{1\times 2}$, $\Psi \in \N$ satisfy for all $x\in \R$ that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$, $A_1 = \lb 1 \quad 1 \rb$, $A_2 = \lb 1 \quad 0 \rb$, $A_3 = \lb 0 \quad 1 \rb$, $\real_{\rect} \in C\lp \R, \R \rp$, $\lp \real_{\rect} \lp \Psi \rp \rp \lp 0\rp = 0$, $0\les \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp \les \delta+|x|^2$, $|x^2-\lp \real_{\rect}\lp \Psi \rp \rp \lp x \rp |\les \delta \max \{1,|x|^q\}$, $\dep\lp \Psi \rp \les \max\{ 1+\frac{1}{q-2}+\frac{q}{2(q-2)}\log_2 \lp \delta^{-1} \rp ,2\}$, and $\param \lp \Psi \rp \les \max\left\{\lb \frac{40q}{q-2} \rb \log_2\lp \delta^{-1} \rp +\frac{80}{q-2}-28,52\right\}$, and finally let $\Gamma$ be defined as in Lemma \ref{prd_network}, i.e.: \begin{align} \Gamma = \lp \frac{1}{2}\circledast \lp \Psi \bullet \aff_{A_1,0} \rp \rp \bigoplus \lp \lp -\frac{1}{2}\rp \circledast\lp \Psi \bullet \aff_{A_2,0} \rp \rp \bigoplus\lp \lp -\frac{1}{2}\rp \circledast \lp \Psi \bullet \aff_{A_3,0} \rp \rp \end{align} It is then the case for all $x,y \in \R$ that: \begin{align} \real_{\rect} \lp \Gamma \rp \lp x,y \rp \les \frac{3}{2} \lp \frac{\ve}{3} +x^2+y^2\rp \les \ve + 2x^2+2y^2 \end{align} \end{corollary} \begin{proof} Note that the triangle inequality, the fact that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$, the fact that for all $x,y\in \R$ it is the case that $|x+y|^2 \les 2\lp |x|^2+|y|^2\rp $ and (\ref{6.0.33}) tell us that: \begin{align} \left| \real_{\rect} \lp \Gamma \rp\lp x,y\rp \right| &\les \frac{1}{2}\left| \real_{\rect} \lp \Psi \rp\lp x+y \rp \right| + \frac{1}{2}\left| \real_{\rect} \lp \Psi \rp\lp x \rp \right| + \frac{1}{2}\left| \real_{\rect} \lp \Psi \rp\lp y \rp \right| \nonumber \\ &\les \frac{1}{2} \lp \delta + |x+y|^2 \rp + \frac{1}{2}\lp \delta + |x|^2\rp + \frac{1}{2}\lp \delta + |y|^2\rp\nonumber \\ &\les \frac{3\delta}{2} +\frac{3}{2}\lp |x|^2+|y|^2\rp = \lp \frac{3\ve}{2}\rp \lp 2^{q-1}+1\rp^{-1} + \frac{3}{2}\lp |x|^2+|y|^2\rp \nonumber\\ &= \frac{3}{2}\lp \frac{\ve}{2^{q-1}+1} + |x|^2 + |y|^2 \rp \les \frac{3}{2} \lp \frac{\ve}{3}+|x|^2+|y|^2\rp \nonumber \\ &\les \ve + 2x^2+2y^2 \end{align} \end{proof} \begin{remark} We shall refer to this neural network for a given $q \in \lp 2,\infty \rp$ and given $\ve \in \lp 0,\infty \rp$ from now on as $\prd^{q,\ve}$. \end{remark} \begin{remark} For an \texttt{R} implementation see Listing \ref{Prd} \end{remark} \begin{remark} Diagrammatically, this can be represented as: \end{remark} \begin{figure} \begin{center} \tikzset{every picture/.style={line width=0.75pt}} %set default line width to 0.75pt \begin{tikzpicture}[x=0.75pt,y=0.75pt,yscale=-1,xscale=1] %uncomment if require: \path (0,475); %set diagram left start at 0, and has height of 475 %Shape: Rectangle [id:dp5102621452939872] \draw (242,110.33) -- (430.67,110.33) -- (430.67,162.33) -- (242,162.33) -- cycle ; %Shape: Rectangle [id:dp5404063577476766] \draw (238.67,204.33) -- (427.33,204.33) -- (427.33,256.33) -- (238.67,256.33) -- cycle ; %Shape: Rectangle [id:dp36108799479514775] \draw (240,308.33) -- (428.67,308.33) -- (428.67,360.33) -- (240,360.33) -- cycle ; %Shape: Rectangle [id:dp8902718451088835] \draw (515.33,202.67) -- (600.67,202.67) -- (600.67,252.33) -- (515.33,252.33) -- cycle ; %Shape: Rectangle [id:dp787158651575801] \draw (74,204.67) -- (159.33,204.67) -- (159.33,254.33) -- (74,254.33) -- cycle ; %Straight Lines [id:da7097969194866411] \draw (515.33,202.67) -- (433.55,136.26) ; \draw [shift={(432,135)}, rotate = 39.08] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da06987054821586158] \draw (514.67,226) -- (432,226.98) ; \draw [shift={(430,227)}, rotate = 359.32] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da6649718583556108] \draw (515.33,252.33) -- (430.79,331.63) ; \draw [shift={(429.33,333)}, rotate = 316.83] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da522975332769982] \draw (240.67,136) -- (160.86,203.38) ; \draw [shift={(159.33,204.67)}, rotate = 319.83] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da23420272890635796] \draw (238.67,230.67) -- (160.67,231.64) ; \draw [shift={(158.67,231.67)}, rotate = 359.28] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da3786949398178764] \draw (239.33,333.33) -- (160.76,255.74) ; \draw [shift={(159.33,254.33)}, rotate = 44.64] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da6573206574101601] \draw (640.67,228.33) -- (602.33,228.33) ; \draw [shift={(600.33,228.33)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da2877353538717321] \draw (74,227.67) -- (35.67,227.67) ; \draw [shift={(33.67,227.67)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; % Text Node \draw (286,124) node [anchor=north west][inner sep=0.75pt] {$\frac{1}{2} \rhd \lp \Phi \bullet \aff_{A_1,0}\rp$}; % Text Node \draw (286,220) node [anchor=north west][inner sep=0.75pt] {$\frac{1}{2} \rhd \lp \Phi \bullet \aff_{A_2,0}\rp$}; % Text Node \draw (286,326) node [anchor=north west][inner sep=0.75pt] {$\frac{1}{2} \rhd \lp \Phi \bullet \aff_{A_2,0}\rp$}; % Text Node \draw (543,220) node [anchor=north west][inner sep=0.75pt] {$\cpy$}; % Text Node \draw (100,225) node [anchor=north west][inner sep=0.75pt] {$\sm$}; \end{tikzpicture} \end{center} \caption{A neural network diagram of the $\sqr$. } \end{figure} \section{Higher Approximations}\label{sec_tun} We take inspiration from the $\sm$ neural network to create the $\prd$ neural network. However, we first need to define a special neural network called \textit{tunneling neural network} to stack two neural networks not of the same length effectively. \subsection{The $\tun$ Neural Networks and Their Properties} \begin{definition}[R\textemdash,2023, The Tunneling Neural Networks]\label{def:tun} We define the tunneling neural network, denoted as $\tun_n$ for $n\in \N$ by: \begin{align} \tun_n = \begin{cases} \aff_{1,0} &:n= 1 \\ \id_1 &: n=2 \\ \bullet^{n-2} \id_1 & n \in \N \cap [3,\infty) \end{cases} \end{align} Where $\id_1$ is as in Definition \ref{7.2.1}. \end{definition} \begin{remark} For an \texttt{R} implementation see Listing \ref{Tun} \end{remark} \begin{lemma}\label{6.2.2}\label{tun_1} Let $n\in \N$, $x \in \R$ and $\tun_n \in \neu$. For all $n\in \N$ and $x\in \R$, it is then the case that: \begin{enumerate}[label = (\roman*)] \item $\real_{\rect} \lp \tun_n \rp \in C \lp \R, \R \rp$ \item $\dep \lp \tun_n \rp =n$ \item $\lp \real_{\rect} \lp \tun_n \rp \rp \lp x \rp = x$ \item $\param \lp \tun_n \rp = \begin{cases} 2 &:n=1 \\ 7+6(n-2) &:n \in \N \cap [2,\infty) \end{cases}$ \item $\lay \lp \tun_n \rp = \lp l_0, l_1,...,l_{L-1}, l_L \rp = \lp 1,2,...,2,1 \rp $ \end{enumerate} \end{lemma} \begin{proof} Note that $\aff_{0,1} \in C \lp \R, \R\rp$ and by Lemma \ref{idprop} we have that $\id_1 \in C\lp \R, \R\rp$. Finally, the composition of continuous functions is continuous, hence $\tun_n \in C\lp \R, \R\rp$ for $n \in \N \cap \lb 2,\infty\rp$. This proves Item (i). Note that by Lemma \ref{5.3.2} it is the case that $\dep\lp \aff_{1,0} \rp = 1$ and by Lemma \ref{7.2.1} it is the case that $\dep \lp \id_1 \rp = 2$. Assume now that for all $n \les N$ that $\dep\lp \tun_n \rp = n$, then for the inductive step, by Lemma \ref{comp_prop} we have that: \begin{align} \dep \lp \tun_{n+1} \rp &= \dep \lp \bullet^{n-1} \id_1 \rp \nonumber \\ &= \dep \lp \lp \bullet^{n-2} \id_1 \rp \bullet \id_1 \rp \nonumber \\ &=n+2-1 = n+1 \end{align} This completes the induction and proves Item (i)\textemdash(iii). Note next that by (\ref{5.1.11}) we have that: \begin{align} \lp \real_{\rect} \lp \aff_{1,0} \rp \rp \lp x \rp = x \end{align} Lemma \ref{idprop}, Item (iii) also tells us that: \begin{align} \lp \real_{\rect} \lp \id_1 \rp \rp \lp x \rp = \rect(x) - \rect(-x) = x \end{align} Assume now that for all $n\les N$ that $\tun_n \lp x \rp = x$. For the inductive step, by Lemma \ref{idprop}, Item (iii), and we then have that: \begin{align} \lp \real_{\rect} \lp \tun_{n+1} \rp \rp \lp x \rp &= \lp \real_{\rect} \lp \bullet^{n-1} \id_1 \rp \rp \lp x \rp \lp x \rp \nonumber\\ &= \lp \real_{\rect} \lp \lp \bullet^{n-2} \id_1 \rp \bullet \id_1 \rp \rp \nonumber\\ &= \lp \lp \real_{\rect} \lp \bullet^{n-2} \id_1 \rp \rp \circ \lp \real_{\rect} \lp \id_1 \rp \rp \rp \lp x \rp \nonumber \\ &= \lp \lp \real_{\rect} \lp \tun_n \rp \rp \circ \lp \real_{\rect} \lp \id_1 \rp \rp \rp \lp x \rp \nonumber \\ &= x \end{align} This proves Item (ii). Next note that $\param\lp \tun_1\rp = \param\lp \aff_{1,0}\rp = 2$. Note also that: \begin{align} \param\lp \tun_2\rp = \param \lp \id_1 \rp &= \param \lb \lp \lp \begin{bmatrix} 1 \\ -1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \end{bmatrix}\rp, \lp \begin{bmatrix} 1 & -1 \end{bmatrix}, \begin{bmatrix} 0 \end{bmatrix}\rp \rp \rb \nonumber \\ &= 7 \nonumber \end{align} And that by definition of composition: \begin{align} \param \lp \tun_3 \rp &= \param \lb \lp \lp \begin{bmatrix} 1 \\ -1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \end{bmatrix}\rp, \lp \begin{bmatrix} 1 & -1 \end{bmatrix}, \begin{bmatrix} 0 \end{bmatrix}\rp \rp \bullet \lp \lp \begin{bmatrix} 1 \\ -1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \end{bmatrix}\rp, \lp \begin{bmatrix} 1 & -1 \end{bmatrix}, \begin{bmatrix} 0 \end{bmatrix}\rp \rp \rb \nonumber \\ &= \param \lb \lp \lp \begin{bmatrix} 1 \\ -1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \end{bmatrix} \rp, \lp \begin{bmatrix} 1 & -1 \\ -1 & 1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \end{bmatrix}\rp, \lp \begin{bmatrix} 1&-1 \end{bmatrix},\begin{bmatrix} 0 \end{bmatrix}\rp \rp \rb \nonumber \\ &=13 \nonumber \end{align} Now for the inductive step assume that for all $n\les N\in \N$, it is the case that $\param\lp \tun_n \rp = 7+6(n-2)$. For the inductive step, we then have: \begin{align} &\param \lp \tun_{n+1} \rp = \param \lp \tun_n \bullet \id_1 \rp \nonumber\\ &=\param \lb \lp \lp \begin{bmatrix} 1 \\ -1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \end{bmatrix}\rp, \lp \begin{bmatrix} 1 & -1 \\ -1 & 1 \end{bmatrix}, \begin{bmatrix} 0 \\0 \end{bmatrix}\rp, \cdots, \lp \begin{bmatrix} 1 & -1 \end{bmatrix}, \begin{bmatrix} 0 \end{bmatrix}\rp \rp \bullet \id_1 \rb \nonumber \\ &= \param \lb \lp \lp \begin{bmatrix} 1 \\ -1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \end{bmatrix}\rp, \lp \begin{bmatrix} 1 & -1 \\ -1 & 1 \end{bmatrix}, \begin{bmatrix} 0 \\0 \end{bmatrix}\rp, \cdots, \lp \begin{bmatrix} 1 & -1 \\ -1 & 1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \end{bmatrix} \rp, \lp \begin{bmatrix} 1 & -1 \end{bmatrix}, \begin{bmatrix} 0 \end{bmatrix}\rp \rp \rb \nonumber \\ &=7+6(n-2)+6 = 7+6\lp \lp n+1 \rp -2 \rp \end{align} This proves Item (iv). Note finally that Item (v) is a consequence of Lemma \ref{idprop}, Item (i), and Lemma \ref{comp_prop} \end{proof} \begin{definition}[R\textemdash, 2023, The Multi-dimensional Tunneling Network]\label{def:tun_mult} We define the multi-dimensional tunneling neural network, denoted as $\tun^d_n$ for $n\in \N$ and $d \in \N$ by: \begin{align} \tun_n^d = \begin{cases} \aff_{\mathbb{I}_d,\mymathbb{0}_d} &:n= 1 \\ \id_d &: n=2 \\ \bullet^{n-2} \id_d & :n \in \N \cap [3,\infty) \end{cases} \end{align} Where $\id_d$ is as in Definition \ref{7.2.1}. \end{definition} \begin{remark} We may drop the requirement for a $d$ and write $\tun_n$ where $d=1$, and it is evident from the context. \end{remark} \begin{lemma}\label{tun_mult} Let $n\in \N$, $d\in \N$, $x \in \R$ and $\tun_n^d \in \neu$. For all $n\in \N$, $d\in \N$, and $x\in \R$, it is then the case that: \begin{enumerate}[label = (\roman*)] \item $\real_{\rect} \lp \tun_n^d \rp \in C \lp \R, \R \rp$ \item $\dep \lp \tun_n^d \rp =n$ \item $\lp \real_{\rect} \lp \tun_n^d \rp \rp \lp x \rp = x$ \item $\param \lp \tun_n^d \rp = \begin{cases} 8d^2+5d &:n=1 \\ 4d^2+3d+ (n-1)\lp 4d^2+2d\rp &:n \in \N \cap [2,\infty) \end{cases}$ \item $\lay \lp \tun_n^d \rp = \lp l_0, l_1,...,l_{L-1}, l_L \rp = \lp d,2d,...,2d,d \rp$ \end{enumerate} \end{lemma} \begin{proof} Note that Items (i)\textendash(iii) are consequences of Lemma \ref{idprop} and Lemma \ref{comp_prop} respectively. Note now that by observation $\param \lp \tun^d_1\rp = d^2+d$. Next Lemma $\ref{id_param}$ tells us that $\param\lp \tun^d_2\rp = 4d^2+3d$ Note also that by definition of neural network composition, we have the following: \begin{align} &\param\lp \tun_3^d\rp \\ &= \param \lb \lp \lp \begin{bmatrix} 1 \\ -1 \\ &\ddots \\& & 1 \\& & -1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \\ \vdots \\ 0 \\0 \end{bmatrix}\rp, \lp \begin{bmatrix} 1 & -1 \\ & &\ddots \\ & & & 1 & -1 \end{bmatrix}, \begin{bmatrix} 0 \\ \vdots \\ 0 \end{bmatrix}\rp \rp \bullet \right.\\ &\left. \lp \lp \begin{bmatrix} 1 \\ -1 \\ & \ddots \\ & & 1 \\ & & -1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \\ \vdots \\ 0 \\0 \end{bmatrix}\rp, \lp \begin{bmatrix} 1 & -1\\ & &\ddots \\ & & & 1 & -1 \end{bmatrix}, \begin{bmatrix} 0 \\ \vdots \\ 0 \end{bmatrix}\rp \rp \rb \nonumber \\ &= \param \lb \lp \lp \begin{bmatrix} 1 \\ -1 \\ & \ddots \\ & & 1 \\ & &-1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \\\vdots \\ 0\\0 \end{bmatrix} \rp, \lp \begin{bmatrix} 1 & -1 \\ -1 & 1 \\ & & \ddots \\ & & & 1 & -1 \\ & & & -1 & 1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \\ \vdots \\ 0 \\ 0 \end{bmatrix}\rp, \lp \begin{bmatrix} 1 &-1 \\ & &\ddots \\ & & & 1 & -1 \end{bmatrix},\begin{bmatrix} 0 \\ \vdots \\ 0 \end{bmatrix}\rp \rp \rb \nonumber \\ &=2d \times d + 2d + 2d\times 2d +2d+2d\times d + d \nonumber \\ &=2d^2+2d+4d^2+2d+2d^2 +d \nonumber \\ &= 8d^2+5d \end{align} Suppose now that for all naturals up to and including $n$, it is the case that $\param\lp \tun_n^d\rp = 4d^2+3d + \lp n-2 \rp \lp 4d^2+2d\rp$. For the inductive step, we have the following: \begin{align} & \param\lp \tun^d_{n+1}\rp = \param \lp \tun_n^d \bullet \id_d\rp \nonumber \\ & = \param \lb \lp \begin{bmatrix} 1 \\ -1 \\ & \ddots \\ & & 1 \\ & &-1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \\ \vdots \\ 0 \\ 0 \end{bmatrix} \rp, \lp \begin{bmatrix} 1 & -1 \\ -1 & 1 \\ & \ddots \\ & & 1 & -1 \\ & & -1 & 1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \\ \vdots \\ 0 \\ 0 \end{bmatrix} \rp, \hdots, \lp \begin{bmatrix} 1 &-1 \\ & \ddots \\ & & 1 & -1 \end{bmatrix}, \begin{bmatrix} 0 \\ \vdots \\ 0 \end{bmatrix}\rp \right. \nonumber \\ & \left. \bullet \id_d \rb \nonumber\\ & = \param \lb \lp \begin{bmatrix} 1 \\ -1 \\ & \ddots \\ & & 1 \\ & &-1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \\ \vdots \\ 0 \\ 0 \end{bmatrix} \rp, \lp \begin{bmatrix} 1 & -1 \\ -1 & 1 \\ & \ddots \\ & & 1 & -1 \\ & & -1 & 1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \\ \vdots \\ 0 \\ 0 \end{bmatrix} \rp, \hdots, \lp \begin{bmatrix} 1 & -1 \\ -1 & 1 \\ & \ddots \\ & & 1 & -1 \\ & & -1 & 1 \end{bmatrix}, \begin{bmatrix} 0 \\ 0 \\ \vdots \\ 0 \\ 0 \end{bmatrix} \rp, \right. \nonumber\\ &\left. \lp \begin{bmatrix} 1 &-1 \\ & \ddots \\ & & 1 & -1 \end{bmatrix}, \begin{bmatrix} 0 \\ \vdots \nonumber\\ 0 \end{bmatrix}\rp \rb \nonumber\\ &= 4d^2+3d+ (n-2)\lp 4d^2+2d\rp + 4d^2+2d \nonumber \\ &=4d^2+3d+\lp n-1\rp\lp 4d^2+2d\rp \nonumber \end{align} This proves Item (iv). Finally, Item (v) is a consequence of Lemma \ref{5.3.2} \end{proof} \subsection{The $\pwr$ Neural Networks and Their Properties} \begin{definition}[R\textemdash, 2023, The Power Neural Network]\label{def:pwr} Let $n\in \N$. Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$, satisfy that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. We define the power neural networks $\pwr_n^{q,\ve} \in \neu$, denoted for $n\in \N_0$ as: \begin{align} \pwr_n^{q,\ve} = \begin{cases} \aff_{0,1} & :n=0\\ \prd^{q,\ve} \bullet \lb \tun_{\dep(\pwr_{n-1}^{q,\ve})} \boxminus \pwr_{n-1}^{q,\ve} \rb \bullet \cpy_{2,1} & :n \in \N \end{cases} \nonumber \end{align} Diagrammatically, this can be represented as: \begin{figure} \begin{center} \begin{tikzpicture} % Define nodes \node[draw, rectangle] (top) at (0, 2) {$\pwr_{n-1}^{q,\ve}$}; \node[draw, rectangle] (right) at (2, 0) {$\cpy_{2,1}$}; \node[draw, rectangle] (bottom) at (0, -2) {$\tun_{\dep(\pwr_{n-1}^{q,\ve})}$}; \node[draw, rectangle] (left) at (-2, 0) {$\prd^{q,\ve} $}; % Arrows with labels \draw[->] (right) -- node[midway, above] {$x$} (top); \draw[<-] (right) -- node[midway, above] {$x$} (4,0)(right); \draw[->] (right) -- node[midway, right] {$x$} (bottom); \draw[->] (top) -- node[midway, left] {$\lp \real_{\rect}\lp \pwr^{q,\ve}_{n-1}\rp \rp \lp x \rp $} (left); \draw[->] (bottom) -- node[midway, left] {$x$} (left); \draw[->] (left) -- node[midway, above] {} (-5.5,0); % \draw[->] (-3,0) -- node[midway, above] {Arrow 6} (left); \end{tikzpicture} \end{center} \caption{A representation of a typical $\pwr^{q,\ve}_n$ network.} \end{figure} \begin{remark} For an \texttt{R} implementation see Listing \ref{Pwr} \end{remark} \begin{remark} Note that for all $i \in \N$, $q\in \lp 2,\infty\rp$, $\ve \in \lp 0, \infty \rp$, each $\pwr_i^{q,\ve}$ differs from $\pwr_{i+1}^{q,\ve}$ by atleast one $\prd^{q,\ve}$ network. \end{remark} \end{definition} \begin{lemma}\label{6.2.4} Let $x,y \in \R$, $\ve \in \lp 0,\infty \rp$ and $q \in \lp 2,\infty \rp$. It is then the case for all $x,y \in \R$ that: \begin{align} \ve \max \left\{ 1,|x|^q,|y|^q\right\} \les \ve + \ve |x|^q+\ve |y|^q. \end{align} \end{lemma} \begin{proof} We will do this in the following cases: For the case that $|x| \les 1$ and $|y| \les 1$ we then have: \begin{align} \ve \max \left\{ 1,|x|^q,|y|^q \right\} = \ve \les \ve + \ve |x|^q+\ve |y|^q \end{align} For the case that $|x| \les 1$ and $|y| \ges 1$, without loss of generality we have then: \begin{align} \ve \max \left\{1,|x|^q,|y|^q \right\} \les \ve | y|^q \les \ve + \ve |x|^q+\ve |y|^q: \end{align} For the case that $|x| \ges 1$ and $|y| \ges 1$, and without loss of generality that $|x| \ges |y|$ we have that: \begin{align} \ve \max\{ 1, |x|^q,|y|^q \} = \ve |x|^q \les \ve + \ve |x|^q+\ve |y|^q \end{align} \end{proof} \begin{lemma} Let $\mathfrak{p}_i$ for $i \in \{1,2,...\}$ be the set of functions defined for $\ve \in \lp 0,\infty\rp$, and $x \in \R$ as follows: \begin{align} \mathfrak{p}_1 &= \ve+2+2|x|^2 \nonumber\\ \mathfrak{p}_i &= \ve +2\lp \mathfrak{p}_{i-1} \rp^2+2|x|^2 \text{ for } i \ges 2 \end{align} For all $n\in \N$ and $\ve \in (0,\infty)$ and $q\in (2,\infty)$ it holds for all $x\in \R$ that: \begin{align} \left| \real_{\rect} \lp \pwr^{q,\ve}_n \rp \lp x \rp\right| \les \mathfrak{p}_n \end{align} \end{lemma} \begin{proof} Note that by Corollary \ref{cor_prd} it is the case that: \begin{align}\label{(6.2.31)} \left|\real_{\rect} \lp \pwr^{q,\ve}_1 \rp \lp x \rp \right| =\left| \real_{\rect}\lp \prd^{q,\ve}\rp \lp1,x \rp \right| \les \mathfrak{p}_1 \end{align} and applying (\ref{(6.2.31)}) twice, it is the case that: \begin{align} \left| \real_{\rect} \lp \pwr_2^{q,\ve}\rp \lp x \rp \right| &= \left| \real_{\rect} \lp \prd^{q,\ve} \rp \lp \real_{\rect} \lp \prd ^{q,\ve}\lp 1,x \rp\rp,x\rp \right| \nonumber \\ &\les \ve + 2\left| \real_{\rect} \lp \prd^{q,\ve}\rp\lp 1,x\rp \right|^2 + 2|x|^2 \nonumber \\ &\les \ve + 2\mathfrak{p}_1^2 +2|x|^2 = \mathfrak{p}_2 \end{align} Let's assume this holds for all cases up to and including $n$. For the inductive step, Corollary \ref{cor_prd} tells us that: \begin{align} \left| \real_{\rect} \lp \pwr_{n+1}^{q,\ve}\rp \lp x\rp \right| &\les \left| \real_{\rect} \lp \prd^{q,\ve} \lp \real_{\rect} \lp \prd^{q,\ve} \lp \real_{\rect}\cdots \lp 1,x\rp,x \rp ,x\rp \cdots \rp \rp \right| \nonumber \\ &\les \real_{\rect} \lb \prd^{q,\ve} \lp \pwr^{q,\ve}_n \lp x\rp,x \rp\rb \nonumber \\ &\les \ve + 2\mathfrak{p}_n^2 + 2|x|^2 = \mathfrak{p}_{n+1} \end{align} This completes the proof of the lemma. \end{proof} \begin{remark} Note that since any instance of $\mathfrak{p}_i$ contains an instance of $\mathfrak{p}_{i-1}$ for $i \in \N \cap \lb 2,\infty\rp$, we have that $\mathfrak{p}_n \in \mathcal{O}\lp \ve^{2(n-1)}\rp$ \end{remark} \begin{lemma}\label{param_pwr_geq_param_tun} For all $n \in \N$, $q\in \lp 2,\infty\rp$, and $\ve \in \lp 0,\infty\rp$, it is the case that $\param \lp \tun_{\dep\lp\pwr^{q,\ve}_n\rp}\rp \les \param \lp \pwr^{q,\ve}_n\rp$. \end{lemma} \begin{proof} Note that for all $n \in \N$ it is straightforwardly the case that $\param\lp \pwr_n^{q,\ve}\rp \ges \param \lp \tun_{\dep\lp \pwr^{q,\ve}_{n-1}\rp}\rp$ because for all $n\in \N$, a $\pwr^{q,\ve}_n$ network contains a $\tun_{\dep\lp \pwr^{q,\ve}_{n-1}\rp}$ network. Note now that for all $i \in \N$ we have from Lemma \ref{tun_1} that $5 \les \param\lp \tun_{i+1}\rp - \param\lp \tun_i\rp \les 6$. Recall from Corollary \ref{cor:phi_network} that every instance of the $\Phi$ network contains atleast one $\mathfrak{i}_4$ network, which by Lemma \ref{lem:mathfrak_i} has $40$ parameters, whence the $\prd^{q,\ve}$ network has atleast $40$ parameters for all $\ve \in \lp 0,\infty \rp$ and $q \in \lp 2,\infty\rp$. Note now that for all $i\in \N$, $\pwr^{q,\ve}_{i}$ and $\pwr^{q,\ve}_{i+1}$ differ by atleast as many parameters as there are in $\prd^{q,\ve}$, since, indeed, they differ by atleast one more $\prd^{q,\ve}$. Thus for every increment in $i$, $\pwr_i^{q,\ve}$ outstrips $\tun_i$ by at-least $40-6 = 34$ parameters. This is true for all $i\in \N$. Whence it is the case that for all $i \in \N$, it is the case that $\param\lp \tun_i\rp \les \param \lp \pwr^{q,\ve}_i\rp$. \end{proof} \begin{lemma}[R\textemdash,2023]\label{power_prop} Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$, and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. Let $n \in \N_0$, and $\pwr_n \in \neu$. It is then the case for all $n \in \N_0$, and $x \in \R$ that: \begin{enumerate}[label = (\roman*)] \item $\lp \real_{\rect} \lp \pwr_n^{q,\ve} \rp \rp \lp x \rp \in C \lp \R, \R \rp $ \item $\dep(\pwr_n^{q,\ve}) \les \begin{cases} 1 & :n=0\\ n\lb \frac{q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q\rb -1 \rb +1 & :n \in \N \end{cases}$ \item $\wid_1 \lp \pwr^{q,\ve}_{n}\rp = \begin{cases} 1 & :n=0 \\ 24+2\lp n-1 \rp & :n \in \N \end{cases}$ \item $\param(\pwr_n^{q,\ve}) \les \begin{cases} 2 & :n=0 \\ 4^{n+\frac{3}{2}} + \lp \frac{4^{n+1}-1}{3}\rp \lp \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb +372\rp &: n\in \N \end{cases}$\\~\\ \item $\left|x^n -\lp \real_{\rect} \lp \pwr^{q,\ve}_n \rp \rp \lp x \rp \right| \les \begin{cases} 0 & :n=0 \\ \left| x \lp x^{n-1} - \real_{\rect}\lp \pwr^{q,\ve}_{n-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{n-1}^q & :n\in \N \end{cases}$ \\~\\ Where we let $\mathfrak{p}_i$ for $i \in \{1,2,...\}$ be the set of functions defined as follows: \begin{align} \mathfrak{p}_1 &= \ve + 2 + 2|x|^2 \nonumber\\ \mathfrak{p}_i &= \ve + 2\lp \mathfrak{p}_{i-1} \rp^2+2|x|^2 \end{align} And whence we get that: \begin{align} \left| x^{n} - \real_{\rect} \lp \pwr^{q,\ve}_n\rp \lp x\rp\right| \in \mathcal{O} \lp \ve^{2q\lp n-1\rp} \rp &\text{ for } n \ges 2 \end{align} \item $\wid_{\hid \lp \pwr_n^{q,\ve}\rp}\lp \pwr^{q,\ve}_n\rp = \begin{cases} 1 & n=0 \\ 24 & n \in \N \end{cases}$ \end{enumerate} \end{lemma} \begin{proof} Note that Item (ii) of Lemma \ref{5.3.2} ensures that $\real_{\rect} \lp \pwr_0 \rp = \aff_{1,0} \in C \lp \R, \R \rp$. Note next that by Item (v) of Lemma \ref{comp_prop}, with $\Phi_1 \curvearrowleft \nu_1, \Phi_2 \curvearrowleft \nu_2, a \curvearrowleft \rect$, we have that: \begin{align} \lp \real_{\rect} \lp \nu_1 \bullet \nu_2 \rp\rp \lp x \rp = \lp\lp \real_{\rect}\lp \nu_1 \rp \rp \circ \lp \real_{\rect}\lp \nu_2 \rp \rp \rp \lp x \rp \end{align} This, with the fact that the composition of continuous functions is continuous, the fact the stacking of continuous instantiated neural networks is continuous tells us that $\lp \real_{\rect} \pwr_n \rp \in C \lp \R, \R \rp$ for $n \in \N \cap \lb 2,\infty \rp$. This establishes Item (i). Note next that by observation $\dep \lp \pwr_0^{q,\ve} \rp=1$ and by Item (iv) of Lemma \ref{idprop}, it is the case that $\dep\lp \id_1 \rp = 2$. By Lemmas $\ref{dep_cpy}$ and $\ref{depthofcomposition}$ it is also the case that: $\dep\lp \prd^{q,\ve} \bullet \lb \tun_{\dep(\pwr^{q,\ve}_{n-1})} \boxminus \pwr^{q,\ve}_{n-1} \rb \bullet \cpy \rp = \dep \lp \prd^{q,\ve} \bullet \lb \tun_{\dep(\pwr^{q,\ve}_{n-1})} \boxminus \pwr^{q,\ve}_{n-1} \rb\rp $. Note also that by Lemma we have that $\dep \lp \tun_{\dep \lp \pwr^{q,\ve}_{n-1}\rp} \boxminus \pwr^{q,\ve}_{n-1}\rp = \dep \lp \pwr^{q,\ve}_{n-1} \rp$. This with Lemma \ref{comp_prop} then yields for $n \in \N$ that: \begin{align} \dep \lp \pwr^{q,\ve}_n \rp &= \dep \lp \prd \bullet \lb \tun_{\mathcal{D} \lp \pwr^{q,\ve}_{n-1} \rp } \boxminus \pwr^{q,\ve}_{n-1} \rb \bullet \cpy_{2,1} \rp \nonumber \\ &= \dep \lp \prd \bullet \lb \tun_{\dep \lp \pwr^{q,\ve}_{n-1} \rp } \boxminus \pwr^{q,\ve}_{n-1} \rb \rp \nonumber \\ &= \dep \lp \prd \rp + \dep \lp \tun_{\dep \lp \pwr^{q,\ve}_{n-1} \rp} \rp -1 \nonumber \\ &\les \frac{q}{q-2} \lb \log_2 \lp \ve^{-1}\rp +q \rb + \dep \lp \tun_{\dep\lp \pwr^{q,\ve}_{n-1} \rp} \rp - 1 \nonumber \\ &= \frac{q}{q-2}\lb \log_2 \lp\ve^{-1} \rp + q\rb + \dep \lp \pwr^{q,\ve}_{n-1}\rp - 1 \end{align} And hence for all $n \in \N$ it is the case that: \begin{align} \dep\lp \pwr^{q,\ve}_n\rp - \dep \lp \pwr^{q,\ve}_{n-1}\rp \les \frac{q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q\rb -1 \end{align} This, in turn, indicates that: \begin{align} \dep \lp \pwr^{q,\ve}_n\rp &\les n\lb \frac{q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q\rb -1 \rb +1 \nonumber \\ &\les n\lb \frac{q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q\rb -1 \rb +1 \end{align} This proves Item (ii). Note now that $\wid_1 \lp \pwr^{q,\ve}_0\rp = \wid_1 \lp \aff_{0,1}\rp = 1$. Further Lemma \ref{comp_prop}, Remark \ref{5.3.2}, tells us that for all $i,k \in \N$ it is the case that $\wid_i \lp \tun_k\rp \les 2$. Observe that since $\cpy_{2,1}, \pwr_0^{q,\ve}$, and $\tun_{\dep \lp \pwr_0^{q,\ve}\rp}$ are all affine neural networks, Lemma \ref{aff_effect_on_layer_architecture}, Corollary \ref{affcor}, and Lemma \ref{prd_network} tells us that: \begin{align} \wid_1 \lp \pwr_1^{q,\ve} \rp &= \wid_1 \lp \prd^{q,\ve} \bullet \lb \tun_{\dep(\pwr_{0}^{q,\ve})} \boxminus \pwr_{0}^{q,\ve} \rb \bullet \cpy_{2,1} \rp \nonumber \\ &= \wid_1 \lp \prd^{q,\ve}\rp = 24 \end{align} And that: \begin{align} \wid_1 \lp \pwr_2^{q,\ve} \rp &= \wid_1 \lp \prd^{q,\ve} \bullet \lb \tun_{\dep(\pwr_{1}^{q,\ve})} \boxminus \pwr_{1}^{q,\ve} \rb \bullet \cpy_{2,1} \rp \nonumber \\ &= \wid_1 \lp \lb \tun_{\dep \lp \pwr^{q,\ve}_1 \rp} \boxminus \pwr_{1}^{q,\ve} \rb \rp \nonumber\\ &= 24+2 = 26 \nonumber \end{align} This completes the base case. For the inductive case, assume that for all $i$ up to and including $k\in \N$ it is the case that $\wid_1 \lp \pwr_i^{q,\ve}\rp \les \begin{cases} 1 & :i=0 \\ 24+2(i-1) & :i \in \N \end{cases}$. For the case of $k+1$, we get that: \begin{align} \wid_1 \lp \pwr_{k+1}^{q,\ve} \rp &= \wid_1 \lp \prd^{q,\ve} \bullet \lb \tun_{\dep(\pwr_{k}^{q,\ve})} \boxminus \pwr_{k}^{q,\ve} \rb \bullet \cpy_{2,1} \rp \nonumber \\ &=\wid_1 \lp \lb \tun_{\dep(\pwr_{k}^{q,\ve})} \boxminus \pwr_{k}^{q,\ve} \rb \rp \nonumber \\ &=\wid_1 \lp \tun_{\dep \lp \pwr^{q,\ve}_{k}\rp}\rp + \wid_1 \lp \pwr^{q,\ve}_k\rp \nonumber \\ &\les \begin{cases} 2 & :k=0 \\ 24 +2 k & :k\in \N \end{cases} \end{align} This establishes Item (iii). For Item (iv), we will prove this in cases. \textbf{Case 1: $\pwr_0^{q,\ve}:$} Note that by Lemma \ref{5.3.2} we have that: \begin{align} \param\lp \pwr_0^{q,\ve} \rp = \param \lp \aff_{0,1} \rp =2 \end{align} This completes Case 1. % \textbf{Case 2: $\pwr_1^{q,\ve}:$} % % For this case, Lemma \ref{paramofparallel} tells us that we have: % \begin{align} % \param \lp \pwr_{0}^{q,\ve} \boxminus \tun_{\dep \lp \pwr_{0}^{q,\ve}\rp }\rp &= \frac{1}{2} \lp \param \lp \pwr^{q,\ve}_{0}\rp + \param \lp \tun_{ 1 } \rp\rp^2 \nonumber\\ % &= \frac{1}{2} \lp 2+2\rp^2 \nonumber \\ % &=8 % \end{align} % Notice now that by Corollary \ref{affcor}, we have that: % \begin{align} % \param \lp\lb \pwr_{0}^{q,\ve} \boxminus \tun_{\dep \lp \pwr_{0}^{q,\ve}\rp } \rb \bullet \cpy_{2,1}\rp &= \param \lp \pwr_{0}^{q,\ve} \boxminus \tun_{\dep \lp \pwr_{0}^{q,\ve}\rp }\rp \nonumber \\ % &=8 % \end{align} % This now, coupled with Lemma \ref{comp_prop} and Lemma \ref{prd_network} tells us that: % \begin{align}\label{(6.2.19)} % \param \lp \prd^{q,\ve} \bullet \lb \pwr_{0}^{q,\ve} \boxminus \tun_{\dep \lp \pwr_{0}^{q,\ve}\rp } \rb \bullet \cpy_{2,1}\rp &= \param \lp \prd^{q,\ve}\bullet \lb \pwr_{0}^{q,\ve} \boxminus \tun_{\dep \lp \pwr_{0}^{q,\ve}\rp } \rb \rp\nonumber\\ % &\les \param \lp \prd^{q,\ve}\rp + 8 + \wid_1 \lp \prd^{q,\ve} \rp \cdot \wid_0 \lp \tun_1\rp \nonumber \\ % &=\param \lp \prd^{q,\ve}\rp + 32 \nonumber\\ % &\les \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb -220 % \end{align} \textbf{Case 2: $\pwr_n^{q,\ve}$ where $n\in \N$:} Note that Lemma \ref{paramofparallel}, Lemma \ref{param_pwr_geq_param_tun}, Corollary \ref{cor:sameparal}, Lemma \ref{lem:paramparal_geq_param_sum}, and Corollary \ref{cor:bigger_is_better}, tells us it is the case that: \begin{align} \param \lp \pwr_{n-1}^{q,\ve} \boxminus \tun_{\dep \lp \pwr_{n-1}^{q,\ve}\rp }\rp &\les \param \lp \pwr^{q,\ve}_{n-1} \boxminus \pwr^{q,\ve}_{n-1}\rp \nonumber\\ &\les 4\param\lp \pwr^{q,\ve}_{n-1}\rp \end{align} Then Lemma \ref{comp_prop} and Corollary \ref{affcor} tells us that: \begin{align}\label{(6.2.34)} &\param \lp \lb \pwr^{q,\ve}_{n-1} \boxminus\tun_{\dep \lp \pwr_{n-1}^{q,\ve} \rp}\rb \bullet \cpy_{2,1}\rp \nonumber\\&= \param \lp \lb \pwr^{q,\ve}_{n-1} \boxminus\tun_{\dep \lp \pwr_{n-1}^{q,\ve} \rp}\rb \rp \nonumber\\ &\les 4\param \lp \pwr^{q,\ve}_{n-1}\rp \end{align} Note next that by definition for all $q\in \lp 2,\infty\rp$, and $\ve \in \lp 0,\infty\rp$ it is case that $\wid_{\hid\lp \pwr_0^{q,\ve}\rp}\pwr_0^{q,\ve} = \wid_{\hid \lp \aff_{0,1}\rp} = 1$. Now, by Lemma \ref{prd_network}, and by construction of $\pwr_i^{q,\ve}$ we may say that for $i\in \N$ it is the case that: \begin{align} \wid_{\hid \lp \pwr^{q,\ve}_i\rp} = \wid _{\hid \lp \prd^{q,\ve}\rp} = 24 \end{align} Note also that by Lemma \ref{6.2.2} it is the case that: \begin{align} \wid_{\hid \lp \tun_{\dep \lp \pwr_{i-1}^{q,\ve}\rp}\rp} \lp \tun_{\dep \lp \pwr^{q,\ve}_{i-1}\rp} \rp = 2 \end{align} Furthermore, note that for $n\in \lb 2, \infty \rp \cap \N$ Lemma \ref{prd_network} tells us that: \begin{align} \wid_{\hid \lp \lb \pwr^{q,\ve}_{n-1} \boxminus\tun_{\dep \lp \pwr_{n-1}^{q,\ve} \rp}\rb\rp} \lp \lb \pwr^{q,\ve}_{n-1} \boxminus\tun_{\dep \lp \pwr_{n-1}^{q,\ve} \rp}\rb\rp = 24+2=26 \end{align} Finally Lemma \ref{comp_prop}, (\ref{(6.2.34)}), a geometric series argument, and Corollary \ref{cor:sameparal}, also tells us that: \begin{align} &\param \lp \pwr_{n}^{q,\ve}\rp\\ &= \param \lp \prd^{q,\ve} \bullet\lb \pwr^{q,\ve}_{n-1} \boxminus\tun_{\dep \lp \pwr_{n-1}^{q,\ve} \rp}\rb \bullet \cpy_{2,1}\rp \nonumber \\ &= \param \lp \prd^{q,\ve} \bullet \lb \pwr^{q,\ve}_{n-1} \boxminus\tun_{\dep \lp \pwr_{n-1}^{q,\ve} \rp}\rb\rp \nonumber \\ &\les \param \lp \prd^{q,\ve} \rp + 4\param \lp \pwr_{n-1}^{q,\ve}\rp+\nonumber\\ &+ \wid_1 \lp \prd^{q,\ve} \rp\ \cdot \wid_{\hid \lp \lb \pwr^{q,\ve}_{n-1} \boxminus\tun_{\dep \lp \pwr_{n-1}^{q,\ve} \rp}\rb\rp} \lp \lb \pwr^{q,\ve}_{n-1} \boxminus\tun_{\dep \lp \pwr_{n-1}^{q,\ve} \rp}\rb\rp \nonumber \\ &= \param\lp \prd^{q,\ve}\rp + 4\param\lp \pwr^{q,\ve}_{n-1}\rp + 624 \nonumber\\ &= 4^{n+1}\param\lp \pwr^{q,\ve}_0\rp + \lp \frac{4^{n+1}-1}{3}\rp \lp \param\lp \prd^{q,\ve}\rp + 624\rp \nonumber\\ &= 4^{n+\frac{3}{2}} + \lp \frac{4^{n+1}-1}{3}\rp \lp \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb +372\rp \end{align} Next note that $\lp \real_{\rect} \lp \pwr_{0,1} \rp\rp \lp x \rp$ is exactly $1$, which implies that for all $x\in \R$ we have that $|x^0-\lp \real_{\rect} \lp \pwr_{0.1}\rp\lp x \rp\rp |=0$. Note also that the instantiations of $\tun_n$ and $\cpy_{2,1}$ are exact. Note next that since $\tun_n$ and $\cpy_{2,1}$ are exact, the only sources of error for $\pwr^{q,\ve}_n$ are $n$ compounding applications of $\prd^{q,\ve}$. Note also that by definition, it is the case that: \begin{align} \real_{\rect}\lp \pwr_n^{q,\ve} \rp = \real_{\rect} \lb \underbrace{\prd^{q,\ve} \lp \inst_{\rect} \lb \prd^{q,\ve}\lp\cdots \inst_{\rect}\lb \prd^{q,\ve} \lp 1,x\rp \rb, \cdots x\rp \rb, x \rp}_{n-copies } \rb \end{align} Lemma \ref{prd_network} tells us that: \begin{align} \left|x-\real_{\rect}\lp \prd^{q,\ve} \lp 1,x \rp \rp \right| \les \ve \max\{ 1,|x|^q\} \les \ve + \left| x\right|^q \end{align} The triangle inequality, Lemma \ref{6.2.4}, Lemma \ref{prd_network}, and Corollary \ref{cor_prd} then tells us that: \begin{align} &\left| x^2 - \real_{\rect} \lp \pwr^{q,\ve}_2 \rp \lp x \rp \right| \nonumber\\ &=\left| x\cdot x-\real_{\rect}\lp \prd^{q,\ve}\lp \inst_{\rect}\lp \prd^{q,\ve} \lp 1,x \rp \rp,x\rp \rp\right| \nonumber\\ &\les \left| x\cdot x - x \cdot \inst_{\rect} \lp \prd^{q,\ve}\lp 1,x\rp \rp \right| + \left| x\cdot \inst_{\rect}\lp \prd^{q,\ve} \lp 1,x \rp\rp -\inst_{\rect}\lp \prd^{q,\ve} \lp \inst_{\rect}\lp \prd^{q,\ve}\lp 1,x\rp \rp,x \rp \rp \right| \nonumber\\ &=\left| x\lp x-\inst_{\rect}\lp \prd^{q,\ve}\lp 1,x\rp\rp\rp\right|+ \ve + \ve\left| x\right|^q+\ve \left| \inst_{\rect}\lp \prd^{q,\ve}\lp 1,x\rp\rp\right|^q \nonumber\\ &\les \left|x\ve + x\ve\left|x\right|^q \right| + \ve + \ve\left|x\right|^q+\ve \left|\ve + 2+x^2 \right|^q \nonumber\\ &= \left| x\ve + x\ve \left| x\right|^q\right| + \ve + \ve\left| x\right|^q + \ve \mathfrak{p}_{1}^q \end{align} Note that this takes care of our base case. Assume now that for all integers up to and including $n$, it is the case that: \begin{align}\label{(6.2.39)} \left| x^n - \real_{\rect}\lp \pwr_n^{q,\ve}\rp \lp x \rp \right| &\les \left| x\cdot x^{n-1}-x \cdot \real_{\rect}\lp \pwr_{n-1}^{q,\ve}\rp \lp x\rp\right| + \left| x \cdot \real_{\rect}\lp \pwr_{n-1}^{q,\ve}\rp \lp x\rp -\real_{\rect} \lp \pwr_n^{q,\ve} \rp \lp x \rp \right| \nonumber \\ &\les \left| x\lp x^{n-1}-\real_{\rect} \lp \pwr^{q,\ve}_{n-1}\rp \lp x\rp\rp\right| + \ve + \ve|x|^q + \ve\left| \real_{\rect}\lp \pwr^{q,\ve}_{n-1}\rp \lp x \rp \right| ^q\nonumber \\ &\les \left| x \lp x^{n-1} - \real_{\rect}\lp \pwr^{q,\ve}_{n-1}\rp\lp x\rp\rp\right| + \ve + \ve|x|^q + \ve\mathfrak{p}_{n-1}^q \end{align} For the inductive case, we see that: \begin{align} \left|x^{n+1}-\real_{\rect}\lp \pwr_{n+1}^{q,\ve}\rp\lp x\rp \right| &\les \left| x^{n+1}-x\cdot \real_{\rect}\lp \pwr_{n}^{q,\ve}\rp \lp x \rp\right| + \left| x\cdot \real_{\rect}\lp \pwr^{q,\ve}_n\rp \lp x \rp - \real_{\rect} \lp \pwr^{q,\ve}_{n+1}\rp\right| \nonumber \\ &\les \left|x\lp x^n-\real_{\rect} \lp \pwr^{q,\ve}_n\rp \lp x\rp\rp \right| + \ve + \ve|x|^q+\ve\left| \real_{\rect} \lp \pwr^{q,\ve}_{n}\rp \lp x \rp\right|^q \nonumber \\ &\les \left|x\lp x^n-\real_{\rect} \lp \pwr^{q,\ve}_n\rp \lp x\rp\rp \right| + \ve + \ve|x|^q + \ve\mathfrak{p}^q_n \end{align} Note that since $\mathfrak{p}_n \in \mathcal{O} \lp \ve^{2(n-1)}\rp$ for $n\in \N \cap \lb 2,\infty \rp$, it is the case for all $x\in \R$ then that $\left| x^{n} - \real_{\rect} \lp \pwr^{q,\ve}_n\rp \lp x\rp\right| \in \mathcal{O} \lp \ve^{2q(n-1)} \rp$ for $n \ges 2$. Finally note that $\wid_{\hid \lp \pwr^{q,\ve}_0\rp}\lp \pwr^{q,\ve}_0\rp = 1$ from observation. For $n\in \N$, note that the second to last layer is the second to last layer of the $\prd^{q,\ve}$ network. Thus Lemma \ref{prd_network} tells us that: \begin{align} \wid_{\hid\lp \pwr^{q,\ve}_m\rp} \lp \pwr^{q,\ve}_n\rp = \begin{cases} 1 & n=0 \\ 24 & n\in \N \end{cases} \end{align} This completes the proof of the lemma. \end{proof} \begin{remark}\label{rem:pwr_gets_deeper} Note each power network $\pwr_n^{q,\ve}$ is at least as big as the previous power network $\pwr_{n-1}^{q,\ve}$, one differs from the other by one $\prd^{q, ve}$ network. \end{remark} \subsection{$\pnm_{n,C}^{q,\ve}$ and Neural Network Polynomials.} \begin{definition}[Neural Network Polynomials] Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. For fixed $q,\ve$, fixed $n \in \N_0$, and for $C = \{c_0,c_1,\hdots, c_n \} \in \R^{n+1}$ (the set of coefficients), we will define the following objects as neural network polynomials: \begin{align} \pnm^{q,\ve}_{n,C} \coloneqq \bigoplus^n_{i=0} \lp c_i \triangleright\lb \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_i^{q,\ve}\rb \rp \end{align} \end{definition} \begin{remark} Diagrammatically, these can be represented as \end{remark} \begin{figure}[h] \begin{center} \tikzset{every picture/.style={line width=0.75pt}} %set default line width to 0.75pt \begin{tikzpicture}[x=0.75pt,y=0.75pt,yscale=-1,xscale=1] %uncomment if require: \path (0,475); %set diagram left start at 0, and has height of 475 %Shape: Rectangle [id:dp8950407412127579] \draw (390,52) -- (455.33,52) -- (455.33,85) -- (390,85) -- cycle ; %Shape: Rectangle [id:dp6602004057057332] \draw (359.33,108.67) -- (454,108.67) -- (454,141.67) -- (359.33,141.67) -- cycle ; %Shape: Rectangle [id:dp6567335394697266] \draw (300,168.67) -- (455.33,168.67) -- (455.33,201.67) -- (300,201.67) -- cycle ; %Shape: Rectangle [id:dp40847692689766735] \draw (200,255.33) -- (456,255.33) -- (456,288.33) -- (200,288.33) -- cycle ; %Shape: Rectangle [id:dp9479406055744195] \draw (200.67,51.33) -- (358.67,51.33) -- (358.67,84.33) -- (200.67,84.33) -- cycle ; %Shape: Rectangle [id:dp8579663805783284] \draw (199.33,108) -- (330,108) -- (330,141) -- (199.33,141) -- cycle ; %Shape: Rectangle [id:dp41506308397634806] \draw (200.67,168.67) -- (268.67,168.67) -- (268.67,201.67) -- (200.67,201.67) -- cycle ; %Straight Lines [id:da4565055641527326] \draw (390.67,68.33) -- (361.33,68.33) ; \draw [shift={(359.33,68.33)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da26211042309965304] \draw (358,123.67) -- (332.67,123.67) ; \draw [shift={(330.67,123.67)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da19391185534075384] \draw (298,185) -- (272,185) ; \draw [shift={(270,185)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Shape: Rectangle [id:dp5947036121491401] \draw (518.67,155.33) -- (584,155.33) -- (584,188.33) -- (518.67,188.33) -- cycle ; %Straight Lines [id:da9888083048478233] \draw (518.67,155.33) -- (457.85,71.95) ; \draw [shift={(456.67,70.33)}, rotate = 53.89] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da8782251261566656] \draw (517.33,166) -- (457.03,128.72) ; \draw [shift={(455.33,127.67)}, rotate = 31.73] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da09841373540031018] \draw (518.67,178) -- (459.33,178.32) ; \draw [shift={(457.33,178.33)}, rotate = 359.69] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da1515899374288483] \draw (518.67,188.33) -- (458.51,271.38) ; \draw [shift={(457.33,273)}, rotate = 305.92] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Shape: Rectangle [id:dp031165707162986944] \draw (78.67,154.67) -- (144,154.67) -- (144,187.67) -- (78.67,187.67) -- cycle ; %Straight Lines [id:da9492662023556374] \draw (200,68.33) -- (145.09,152.99) ; \draw [shift={(144,154.67)}, rotate = 302.97] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da028520602639475978] \draw (198.67,123) -- (146.92,162.45) ; \draw [shift={(145.33,163.67)}, rotate = 322.67] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da6814861591796668] \draw (200,185) -- (147.29,174.07) ; \draw [shift={(145.33,173.67)}, rotate = 11.71] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da019305885926265143] \draw (198.67,271) -- (145.1,189.34) ; \draw [shift={(144,187.67)}, rotate = 56.74] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da8585029210721031] \draw (616,172.33) -- (586.67,172.33) ; \draw [shift={(584.67,172.33)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; %Straight Lines [id:da9805678030848519] \draw (78.67,169.67) -- (49.33,169.67) ; \draw [shift={(47.33,169.67)}, rotate = 360] [color={rgb, 255:red, 0; green, 0; blue, 0 } ][line width=0.75] (10.93,-3.29) .. controls (6.95,-1.4) and (3.31,-0.3) .. (0,0) .. controls (3.31,0.3) and (6.95,1.4) .. (10.93,3.29) ; % Text Node \draw (412,217.73) node [anchor=north west][inner sep=0.75pt] {$\vdots $}; % Text Node \draw (406,61.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Pwr}^{q,\ve}_{0}$}; % Text Node \draw (406,118.07) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Pwr}^{q,\ve}_{1}$}; % Text Node \draw (403.33,177.07) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Pwr}^{q,\ve}_{2}$}; % Text Node \draw (265.33,58.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Tun}$}; % Text Node \draw (404,263.07) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Pwr}^{q,\ve}_{n}$}; % Text Node \draw (249.33,115.73) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Tun}$}; % Text Node \draw (222,176.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Tun}$}; % Text Node \draw (525,162.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Cpy}_{n+1,1}$}; % Text Node \draw (471.33,198.4) node [anchor=north west][inner sep=0.75pt] {$\vdots $}; % Text Node \draw (83,163.73) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Sum}_{n+1,1}$}; % Text Node \draw (230.67,214.4) node [anchor=north west][inner sep=0.75pt] {$\vdots $}; % Text Node \draw (172,193.73) node [anchor=north west][inner sep=0.75pt] {$\vdots $}; \end{tikzpicture} \end{center} \caption{Neural network diagram for an elementary neural network polynomial.} \end{figure} \begin{lemma}[R\textemdash,2023]\label{6.2.9}\label{nn_poly}\label{mnm_prop} Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. It is then the case for all $n\in\N_0$ and $x\in \R$ that: \begin{enumerate}[label = (\roman*)] \item $\real_{\rect} \lp \pnm_{n,C}^{q,\ve}\rp \in C \lp \R, \R \rp $ \item $\dep \lp \pnm_{n,C}^{q,\ve} \rp \les \begin{cases} 1 & :n=0\\ n\lb \frac{q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q\rb -1 \rb +1 &:n\in \N \end{cases}$ \item $\param \lp \pnm_{n,C}^{q,\ve} \rp \les \begin{cases} 2 & :n =0 \\ \lp n+1\rp\lb 4^{n+\frac{3}{2}} + \lp \frac{4^{n+1}-1}{3}\rp \lp \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb +372\rp\rb &:n\in \N \end{cases}$ \\~\\ \item $\left|\sum^n_{i=0} c_ix^i - \real_{\rect} \lp \pnm_{n,C}^{q,\ve} \rp \lp x \rp \right| \les \sum^n_{i=1} c_i\lp \left| x \lp x^{i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{i-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{i-1}^q \rp $\\~\\ Where $\mathfrak{p}_i$ are the set of functions defined for $i \in \N$ as such: \begin{align} \mathfrak{p}_1 &= \ve+1+|x|^2 \nonumber\\ \mathfrak{p}_i &= \ve +\lp \mathfrak{p}_{i-1} \rp^2+|x|^2 \end{align} Whence it is the case that: \begin{align} \left|\sum^n_{i=0} c_ix^i - \real_{\rect} \lp \pnm_{n,C}^{q,\ve} \rp \lp x \rp \right| \in \mathcal{O} \lp \ve^{2q(n-1)}\rp \end{align} \item $\wid_1 \lp \pnm_{n,C}^{q,\ve} \rp = 2+23n+n^2 $ \item $\wid_{\hid \lp \pnm_{n,C}^{q,\ve}\rp} \lp \pnm_{n,C}^{q,\ve}\rp \les\begin{cases} 1 &:n=0 \\ 24 + 2n &:n\in \N \end{cases}$ \end{enumerate} \end{lemma} \begin{proof} Note that by Lemma \ref{5.6.3}, Lemma \ref{power_prop}, and Lemma \ref{comp_prop} for all $n\in \N_0$ it is the case that: \begin{align} \real_{\rect}\lp \pnm_{n,C}^{q,\ve} \rp &= \real_{\rect} \lp \bigoplus^n_{i=0} \lb c_i \triangleright\lb \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_i^{q,\ve}\rb \rb \rp \nonumber\\ &= \sum^n_{i=1}c_i \real_{\rect}\lp \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_i^{q,\ve} \rp \nonumber\\ &= \sum^n_{i=1}c_i\real_{\rect}\lp \pwr^{q,\ve}_i \rp\nonumber \end{align} Since Lemma \ref{power_prop} tells us that $\lp \real_{\rect} \lp \pwr_n^{q,\ve} \rp \rp \lp x \rp \in C \lp \R, \R \rp$, for all $n\in \N_0$ and since the finite sum of continuous functions is continuous, this proves Item (i). Note that $\pnm_n^{q,\ve}$ is only as deep as the deepest of the $\pwr^{q,\ve}_i$ networks, which from the definition is $\pwr_n^{q,\ve}$, which in turn also has the largest bound. Therefore, by Lemma \ref{comp_prop}, Lemma $\ref{5.3.3}$, Lemma $\ref{depth_prop}$, and Lemma \ref{power_prop}, we have that: \begin{align} \dep \lp \pnm_{n,C}^{q,\ve} \rp &\les \dep \lp \pwr_n^{q,\ve}\rp \nonumber\\ &\les \begin{cases} 1 & :n=0\\ n\lb \frac{q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q\rb -1 \rb +1 &:n\in \N \end{cases} \nonumber \end{align} This proves Item (ii). Note next that for the case of $n=0$, we have that: \begin{align} \pnm_n^{q,\ve} = c_i \triangleright\pwr_0^{q,\ve} \end{align} This then yields us $2$ parameters. Note that each neural network summand in $\pnm_n^{q,\ve}$ consists of a combination of $\tun_k$ and $\pwr_k$ for some $k\in \N$. Each $\pwr_k$ has at least as many parameters as a tunneling neural network of that depth, as Lemma \ref{param_pwr_geq_param_tun} tells us. This, finally, with Lemma \ref{aff_effect_on_layer_architecture}, Corollary \ref{affcor}, and Lemma \ref{power_prop} then implies that: \begin{align} \param\lp \pnm^{q,\ve}_{n,C} \rp &= \param \lp \bigoplus^n_{i=0} \lb c_i \triangleright\lb \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_i^{q,\ve}\rb \rb \rp\nonumber \\ &\les \lp n+1 \rp \cdot \param \lp c_i \triangleright \lb \tun_1 \bullet \pwr_n^{q,\ve} \rb\rp \nonumber\\ &\les \lp n+1 \rp \cdot \param \lp \pwr_n^{q,\ve} \rp \nonumber \\ &\les \begin{cases} 2 & :n =0 \\ \lp n+1\rp\lb 4^{n+\frac{3}{2}} + \lp \frac{4^{n+1}-1}{3}\rp \lp \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb +372\rp\rb &:n\in \N \end{cases} \nonumber \end{align} This proves Item (iii). Finally, note that for all $i\in \N$, Lemma \ref{power_prop}, and the triangle inequality then tells us that it is the case for all $i \in \N$ that: \begin{align} \left| x^i - \real_{\rect}\lp \pwr_i^{q,\ve}\rp \lp x \rp \right| &\les \left| x^i-x \cdot \real_{\rect}\lp \pwr_{i-1}^{q,\ve}\rp \lp x\rp\right| + \left| x \cdot \real_{\rect}\lp \pwr_{i-1}^{q,\ve}\rp \lp x\rp -\real_{\rect} \lp \pwr_i^{q,\ve} \rp \lp x \rp \right| \nonumber \\ \end{align} This, Lemma \ref{6.2.9}, and the fact that instantiation of the tunneling neural network leads to the identity function (Lemma \ref{6.2.2} and Lemma \ref{comp_prop}), together with Lemma \ref{scalar_left_mult_distribution}, and the absolute homogeneity condition of norms, then tells us that for all $x\in \R$, and $c_0,c_1,\hdots, c_n \in \R$ it is the case that: \begin{align} &\left|\sum^n_{i=0} c_ix^i - \real_{\rect} \lp \pnm^{q,\ve}_{n,C} \lp x\rp \rp \right| \nonumber\\ &= \left| \sum^n_{i=0} c_ix^i - \real_{\rect} \lb \bigoplus^n_{i=0} \lb c_i \triangleright \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_i^{q,\ve} \rb \rb\lp x \rp\right| \nonumber \\ &=\left| \sum^n_{i=1} c_ix^i-\sum_{i=0}^n c_i \lp \inst_{\rect}\lb \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_i^{q,\ve}\rb\lp x\rp\rp\right| \nonumber\\ &\les \sum_{i=1}^n \left|c_i\right| \cdot\left| x^i - \inst_{\rect}\lb \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_i^{q,\ve}\rb\lp x\rp\right| \nonumber\\ &\les \sum^n_{i=1} \left|c_i\right|\cdot\lp \left| x \lp x^{i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{i-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{i-1}^q \rp \nonumber \end{align} Note however that since for all $x\in \R$ and $i \in \N \cap \lb 2, \infty\rp$, Lemma \ref{prd_network} tells us that $\left| x^{i} - \real_{\rect} \lp \pwr^{q,\ve}_i\rp \lp x\rp\right| \in \mathcal{O} \lp \ve^{2q\lp i-1\rp} \rp$, this, and the fact that $f+g \in \mathcal{O}\lp x^a \rp$ if $f \in \mathcal{O}\lp x^a\rp$, $g \in \mathcal{O}\lp x^b\rp$, and $a \ges b$, then implies that: \begin{align} \sum^n_{i=1} \left| c_i\right|\cdot\lp \left| x \lp x^{i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{i-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{i-1}^q \rp \in \mathcal{O} \lp \ve^{2q(n-1)}\rp \end{align} This proves Item (iv). Note next in our construction $\aff_{0,1}$ will require tunneling whenever $i\in \N$ in $\pwr_{i}^{q,\ve}$. Lemma \ref{aff_effect_on_layer_architecture} and Corollary \ref{affcor} then tell us that: \begin{align} \wid_1 \lp \pnm_n^{q,\ve} \rp &= \wid_1 \lp \bigoplus^n_{i=0} \lb c_i \triangleright\lb \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_i^{q,\ve}\rb \rb\rp \nonumber\\ &= \wid_1 \lp \bigoplus^n_{i=0}\pwr^{q,\ve}_i\rp \nonumber \\ &\les \sum^n_{i=0}\wid_1 \lp \pwr^{q,\ve}_i\rp =2 + \frac{n}{2}\lp 24+24+2\lp n-1\rp\rp = 2+23n+n^2 \nonumber \\ \end{align} This proves Item (v). Finally note that from the definition of the $\pnm_{n,C}^{q,\ve}$, it is evident that $\wid_{\hid\lp \pwr_{0,C}^{q,\ve}\rp}\lp \pwr_{0,C}^{q,\ve}\rp = 1$ since $\pwr_{0,C}^{q,\ve} = \aff_{0,1}$. Other than this network, for all $i \in \N$, $\pwr_{i,C}^{q,\ve}$ end in the $\prd^{q,\ve}$ network, and the deepest of the $\pwr_i^{q,\ve}$ networks is $\pwr^{q,\ve}_n$ inside $\pnm_{n,C}^{q,\ve}$. All other $\pwr_i^{q,\ve}$ must end in tunnels. Whence in the second to last layer, Lemma \ref{prd_network} tells us that: \begin{align} \wid_{\hid\lp \pnm_{n,C}^{q,\ve}\rp} \les \begin{cases} 1 &: n =0 \\ 24+2n &:n \in \N \end{cases} \end{align} This completes the proof of the Lemma. \end{proof} \subsection{$\xpn_n^{q,\ve}$, $\csn_n^{q,\ve}$, $\sne_n^{q,\ve}$, and Neural Network Approximations of $e^x$, $\cos(x)$, and $\sin(x)$.} Once we have neural network polynomials, we may take the next leap to transcendental functions. Here, we will explore neural network approximations for three common transcendental functions: $e^x$, $\cos(x)$, and $\sin(x)$. \begin{lemma} Let $\nu_1,\nu_2 \in \neu$, $f,g \in C \lp \R, \R \rp$, and $\ve_1,\ve_2 \in \lp 0 ,\infty \rp$ such that for all $x\in \R$ it holds that $\left| f(x) - \real_{\rect} \lp \nu_1 \rp \right| \les \ve_1 $ and $\left| g(x) - \real_{\rect} \lp \nu_2 \rp \right| \les \ve_2$. It is then the case for all $x \in \R$ that: \begin{align}\label{6.2.14} \left| \lb f+g \rb \lp x \rp - \real_{\rect} \lp \lb \nu_1 \oplus \nu_2 \rb \rp \lp x \rp\right| \les \ve_1 + \ve_2 \end{align} \end{lemma} \begin{proof} Note that the triangle inequality tells us: \begin{align} \left| \lb f+g \rb \lp x \rp - \real_{\rect} \lb \nu_1 \oplus \nu_2 \rb \lp x \rp \right| &= \left| f\lp x \rp +g\lp x \rp -\real_{\rect} \lp \nu_1\rp \lp x \rp -\real_{\rect} \lp \nu_2 \rp\lp x \rp \right|\nonumber \\ &\les \left| f\lp x \rp -\real_{\rect}\lp \nu_1 \rp \lp x \rp \right| + \left| g\lp x \rp - \real_{\rect} \lp \nu_2 \rp \lp x \rp \right| \nonumber\\ &\les \ve_1 + \ve_2 \nonumber \end{align} \end{proof} \begin{lemma}\label{6.2.8} Let $n\in \N$. Let $\nu_1,\nu_2,...,\nu_n \in \neu$, $\ve_1,\ve_2,...,\ve_n \in \lp 0,\infty \rp$ and $f_1,f_2,...,f_n \in C\lp \R, \R \rp$ such that for all $i \in \{1,2,...,n\}$, and for all $x\in \R$, it is the case that, $\left| f_i\lp x \rp - \real_{\rect} \lp \nu_i \rp\lp x \rp \right| \les \ve_i$. It is then the case for all $x\in \R$, that: \begin{align} \left| \sum^n_{i=1} f_i \lp x \rp -\bigoplus^n_{i=1} \lp \real_{\rect}\lp \nu_i \rp \rp \lp x\rp\right| \les \sum_{i=1}^n \ve_i \end{align} \end{lemma} \begin{proof} This is a consequence of a finite number of applications of (\ref{6.2.14}). \end{proof} \begin{definition}[R\textemdash 2023, $\xpn_n^{q,\ve}$ and the Neural Network Taylor Approximations for $e^x$ around $x=0$] Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$, and let $\pwr_n^{q,\ve}$ be as in Lemma \ref{power_prop}. We define, for all $n\in \N_0$, the family of neural networks $\xpn_n^{q,\ve} as$: \begin{align} \xpn_n^{q,\ve}\coloneqq \bigoplus^n_{i=0} \lb \frac{1}{i!} \triangleright\lb \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_i^{q,\ve}\rb \rb \end{align} \end{definition} \begin{lemma}[R\textemdash,2023]\label{6.2.9}\label{tay_for_exp}\label{xpn_properties} Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. It is then the case for all $n\in\N_0$ and $x\in \R$ that: \begin{enumerate}[label = (\roman*)] \item $\real_{\rect} \lp \xpn_n^{q,\ve}\rp \lp x \rp\in C \lp \R, \R \rp $ \item $\dep \lp \xpn_n^{q,\ve} \rp \les \begin{cases} 1 & :n=0\\ n\lb \frac{q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q\rb -1 \rb +1 &:n\in \N \end{cases}$ \item $\param \lp \xpn_n^{q,\ve} \rp \les \begin{cases} 2 & :n =0 \\ \lp n+1\rp\lb 4^{n+\frac{3}{2}} + \lp \frac{4^{n+1}-1}{3}\rp \lp \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb +372\rp\rb &:n\in \N \end{cases}$ \\~\\ \item \begin{align*}\left|\sum^n_{i=0} \lb \frac{x^i}{i!} \rb- \real_{\rect} \lp \xpn_n^{q,\ve} \rp \lp x \rp \right| \les \sum^n_{i=1} \frac{1}{i!}\lp \left| x \lp x^{i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{i-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{i-1}^q \rp \end{align*}\\~\\ Where $\mathfrak{p}_i$ are the set of functions defined for $i \in \N$ as such: \begin{align} \mathfrak{p}_1 &= \ve+1+|x|^2 \nonumber\\ \mathfrak{p}_i &= \ve +\lp \mathfrak{p}_{i-1} \rp^2+|x|^2 \end{align} Whence it is the case that: \begin{align} \left|\sum^n_{i=0} \lb \frac{x^i}{i!} \rb- \real_{\rect} \lp \xpn_n^{q,\ve} \rp \lp x \rp \right|\in \mathcal{O} \lp \ve^{2q(n-1)}\rp \end{align} \item $\wid_1 \lp \xpn_n^{q,\ve} \rp = 2+23n+n^2 $ \item $\wid_{\hid \lp \xpn^n_{q,\ve} \rp}\lp \xpn_n^{q,\ve}\rp \les 24 + 2n$ \end{enumerate} \end{lemma} \begin{proof} This follows straightforwardly from Lemma \ref{nn_poly} with $c_i \curvearrowleft \frac{1}{i!}$ for all $n \in \N$ and $i \in \{0,1,\hdots, n\}$. In particular, Item (iv) benefits from the fact that for all $i \in \N_0$, it is the case that $\frac{1}{i!} \ges 0$. \end{proof} \begin{lemma}[R\textemdash, 2023] Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}.$ It is then the case for all $n\in\N_0$ and $x\in \lb a,b \rb\subsetneq \R$, where $0 \in \lb a,b\rb \subsetneq \R$ that: \begin{align} \left| e^x - \real_{\rect} \lp \xpn_n^{q,\ve} \rp \lp x \rp \right| \les \sum^n_{i=0} \frac{1}{i!}\lp \left| x \lp x^{n-1} - \real_{\rect}\lp \pwr^{q,\ve}_{n-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{n-1}^q \rp + \frac{e^{b}\cdot |x|^{n+1}}{(n+1)!} \end{align} \end{lemma} \begin{proof} Note that Taylor's theorem states that for $x \in \lb a,b\rb \subsetneq \R$ it is the case that: \begin{align} e^x = \sum^n_{i=0} \lb \frac{x^i}{i!} \rb + \frac{e^{\xi}\cdot x^{n+1}}{(n+1)!} \end{align} Where $\xi$ is between $0$ and $x$ in the Lagrange form of the remainder. Note then, for all $n\in \N_0$, $x\in \lb a,b \rb \subsetneq \R$, and $\xi$ between $0$ and $x$, it is the case, by monotonicity of $e^x$ that the second summand is bounded by: \begin{align} \frac{e^\xi \cdot x^{n+1}}{(n+1)!} \les \frac{e^b\cdot |x|^{n+1}}{(n+1)!} \end{align} This, and the triangle inequality, then indicates that for all $x \in \lb a,b \rb \subsetneq \R$, and $\xi$ between $0$ and $x$ that: \begin{align} \left| e^x -\real_{\rect} \lp \xpn_n^{q,\ve} \rp \lp x \rp \right| &=\left| \sum^n_{i=0} \lb \frac{x^i}{i!} \rb + \frac{e^{\xi}\cdot x^{n+1}}{(n+1)!}-\real_{\rect} \lp \xpn_n^{q,\ve} \rp \lp x \rp\right| \nonumber\\ &\les \left| \sum^n_{i=0} \lb \frac{x^i}{i!} \rb - \real_{\rect} \lp \xpn_n^{q,\ve} \rp \lp x \rp \right| + \frac{e^{b}\cdot |x|^{n+1}}{(n+1)!} \nonumber \\ &\les \sum^n_{i=1} \frac{1}{i!}\lp \left| x \lp x^{n-1} - \real_{\rect}\lp \pwr^{q,\ve}_{n-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{n-1}^q \rp + \frac{e^{b}\cdot |x|^{n+1}}{(n+1)!} \nonumber \end{align} Whence we have that for fixed $n\in \N_0$ and $b \in \lb 0, \infty\rp$, the last summand is constant, whence it is the case that: \begin{align} \left| e^x -\real_{\rect} \lp \xpn_n^{q,\ve} \rp \lp x \rp \right| \in \mathcal{O} \lp \ve^{2q(n-1)}\rp \end{align} \end{proof} \begin{definition}[The $\mathsf{Csn}_n^{q,\ve}$ Networks, and Neural Network Cosines] Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. Let $\pwr^{q,\ve}_n$ be a neural networks as defined in Definition \ref{def:pwr}. We will define the neural networks $\mathsf{Csn}_{n}^{q,\ve}$ as: \begin{align} \mathsf{Csn}_n^{q,\ve} \coloneqq \bigoplus^n_{i=0} \lb \frac{(-1)^i}{2i!}\triangleright\lb \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_{2i}^{q,\ve}\rb \rb \end{align} \end{definition} \begin{lemma}[R\textemdash, 2023]\label{6.2.9}\label{csn_properties} Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. It is then the case for all $n\in\N_0$ and $x\in \R$ that: \begin{enumerate}[label = (\roman*)] \item $\real_{\rect} \lp \csn_n^{q,\ve}\rp \lp x\rp\in C \lp \R, \R \rp $ \item $\dep \lp \csn_n^{q,\ve}\rp \les \begin{cases} 1 & :n=0\\ 2n\lb \frac{q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q\rb -1 \rb +1 &:n\in \N \end{cases}$ \item $\param \lp \csn_n^{q,\ve} \rp \les \begin{cases} 2 & :n =0 \\ \lp 2n+1\rp\lb 4^{2n+\frac{3}{2}} + \lp \frac{4^{2n+1}-1}{3}\rp \lp \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb +372\rp\rb &:n\in \N \end{cases}$ \\~\\ \item $\left|\sum^n_{i=0} \frac{(-1)^i}{2i!}x^{2i} - \real_{\rect} \lp \csn_n^{q,\ve} \rp \lp x \rp \right| \les \sum^n_{i=1} \left| \frac{\lp -1\rp^i}{2i!}\right|\lp \left| x \lp x^{2i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{2i-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{2i-1}^q \rp $\\~\\ Where $\mathfrak{p}_i$ are the set of functions defined for $i \in \N$ as such: \begin{align} \mathfrak{p}_1 &= \ve+1+|x|^2 \nonumber\\ \mathfrak{p}_i &= \ve +\lp \mathfrak{p}_{i-1} \rp^2+|x|^2 \end{align} Whence it is the case that: \begin{align} \left|\sum^n_{i=0} \frac{\lp -1\rp^i}{2i!}x^{2i} - \real_{\rect} \lp \csn_n^{q,\ve} \rp \lp x \rp \right| \in \mathcal{O} \lp \ve^{2q(2n-1)}\rp \end{align} \end{enumerate} \end{lemma} \begin{proof} Item (i) derives straightforwardly from Lemma \ref{nn_poly}. This proves Item (i). Next, observe that since $\csn_n^{q,\ve}$ will contain, as the deepest network in the summand, $\pwr_{2n}^{q,\ve}$, we may then conclude that \begin{align} \dep \lp \csn_n^{q,\ve} \rp &\les \dep \lp \pwr_{2n}^{q,\ve}\rp \nonumber\\ &\les \begin{cases} 1 & :n=0\\ 2n\lb \frac{q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q\rb -1 \rb +1 &:n\in \N \end{cases} \nonumber \end{align} This proves Item (ii). A similar argument to the above, Lemma \ref{aff_effect_on_layer_architecture}, and Corollary \ref{affcor} reveals that: \begin{align} \param\lp \csn_n^{q,\ve} \rp &= \param \lp \bigoplus^n_{i=0} \lb \frac{\lp -1\rp^i}{2i!} \triangleright\lb \tun_{\max_i \left\{\dep \lp \pwr_i^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_i\rp} \bullet \pwr_i^{q,\ve}\rb \rb \rp\nonumber \\ &\les \lp n+1 \rp \cdot \param \lp c_i \triangleright \lb \tun_1 \bullet \pwr_{2n}^{q,\ve} \rb\rp \nonumber\\ &\les \lp n+1 \rp \cdot \param \lp \pwr_{2n}^{q,\ve} \rp \nonumber \\ &\les \begin{cases} 2 & :n =0 \\ \lp n+1\rp\lb 4^{2n+\frac{3}{2}} + \lp \frac{4^{2n+1}-1}{3}\rp \lp \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb +372\rp\rb &:n\in \N \end{cases} \nonumber \end{align} This proves Item (iii). In a similar vein, we may argue from Lemma \ref{nn_poly} and from the absolute homogeneity property of norms that: \begin{align} &\left|\sum^n_{i=0} \frac{\lp -1\rp^i}{2i!}x^{2i} - \real_{\rect} \lp \csn_n^{q,\ve} \lp x\rp \rp \right| \nonumber\\ &= \left| \sum^n_{i=0} \frac{\lp -1\rp^i}{2i!}x^{2i} - \real_{\rect} \lb \bigoplus^n_{i=0} \lb \frac{\lp -1\rp^i}{2i!} \triangleright \tun_{\max_{2i} \left\{\dep \lp \pwr_{2i}^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_{2i}\rp} \bullet \pwr_{2i}^{q,\ve} \rb \rb\lp x \rp\right| \nonumber \\ &=\left| \sum^n_{i=1} \frac{\lp -1\rp^i}{2i!}x^{2i}-\sum_{i=0}^n \frac{\lp -1 \rp^i}{2i!} \lp \inst_{\rect}\lb \tun_{\max_{2i} \left\{\dep \lp \pwr_{2i}^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_{2i}\rp} \bullet \pwr_{2i}^{q,\ve}\rb\lp x\rp\rp\right| \nonumber\\ &\les \sum_{i=1}^n \left|\frac{\lp -1\rp^i}{2i!} \right|\cdot\left| x^{2i} - \inst_{\rect}\lb \tun_{\max_{2i} \left\{\dep \lp \pwr_{2i}^{q,\ve} \rp\right\} +1 - \dep \lp \pwr^{q,\ve}_{2i}\rp} \bullet \pwr_{2i}^{q,\ve}\rb\lp x\rp\right| \nonumber\\ &\les \sum^n_{i=1} \left|\frac{\lp -1\rp^i}{2i!}\right|\cdot \left|\lp \left| x \lp x^{2i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{2i-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{2i-1}^q \rp\right| \nonumber \end{align} Whence we have that: \begin{align} \left|\sum^n_{i=0} \lb \frac{\lp -1\rp^i x^{2i}}{2i!} \rb- \real_{\rect} \lp \csn_n^{q,\ve} \rp \lp x \rp \right|\in \mathcal{O} \lp \ve^{2q(2n-1)}\rp \end{align} This proves Item (iv). \end{proof} \begin{lemma}[R\textemdash, 2023] Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}.$ It is then the case for all $n\in\N_0$ and $x\in [a,b]\subseteq \lb 0,\infty \rp$ that: \begin{align} \left| \cos\lp x\rp - \real_{\rect} \lp \csn_n^{q,\ve} \rp \lp x \rp \right| \les \sum^n_{i=0} \frac{\lp -1\rp^i}{2i!}\lp \left| x \lp x^{n-1} - \real_{\rect}\lp \pwr^{q,\ve}_{n-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{n-1}^q \rp + + \frac{|x|^{n+1}}{(n+1)!}\nonumber \end{align} \end{lemma} \begin{proof} Note that Taylor's theorem states that for all $x \in \lb a,b\rb \subsetneq \R$, where $0 \in \lb a,b\rb$, it is the case that: \begin{align} \cos\lp x \rp= \sum^n_{i=0} \frac{\lp -1\rp^i}{2i!}x^i + \frac{\cos^{\lp n+1\rp}\lp \xi \rp \cdot x^{n+1}}{(n+1)!} \end{align} Note further that for all $n \in \N_0$, and $x \in \R$, it is the case that $\cos^{\lp n \rp} \lp x\rp \les 1$. Whence we may conclude that for all $n\in \N_0$, $x\in \lb a,b \rb \subseteq \R$, where $0 \in \lb a,b\rb$ and $\xi$ between $0$ and $x$, we may bound the second summand by: \begin{align} \frac{\cos^{\lp n+1\rp}\lp \xi \rp \cdot x^{n+1}}{(n+1)!} \les \frac{|x|^{n+1}}{\lp n+1\rp!} \end{align} This, and the triangle inequality, then indicates that for all $x \in \lb a,b \rb \subsetneq \lb 0,\infty\rp$ and $\xi \in \lb 0,x\rb$: \begin{align} \left| \cos \lp x \rp -\real_{\rect} \lp \csn_n^{q,\ve} \rp \lp x \rp \right| &=\left| \sum^n_{i=0} \frac{\lp -1\rp^i}{2i!}x^i + \frac{\cos^{(n+1)}\lp \xi \rp \cdot x^{n+1}}{(n+1)!}-\real_{\rect} \lp \csn_n^{q,\ve} \rp \lp x \rp\right| \nonumber\\ &\les \left| \sum^n_{i=0} \frac{\lp -1\rp^i}{2i!}x^i - \real_{\rect} \lp \csn_n^{q,\ve} \rp \lp x \rp \right| + \frac{|x|^{n+1}}{(n+1)!} \nonumber \\ &\les \sum^n_{i=1} \left|\frac{\lp -1\rp^i}{2i!}\right|\cdot \left|\lp \left| x \lp x^{2i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{2i-1}\rp\lp x\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{2i-1}^q \rp\right| \nonumber\\&+ \frac{|x|^{n+1}}{(n+1)!} \nonumber \end{align} This completes the proof of the Lemma. \end{proof} \begin{definition}[R\textemdash, 2023, The $\mathsf{Sne}_n^{q,\ve}$ Newtorks and Neural Network Sines.]. Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. Let $\pwr^{q,\ve}$ be a neural network defined in Definition \ref{def:pwr}. We will define the neural network $\mathsf{Csn}_{n,q,\ve}$ as: \begin{align} \mathsf{Sne}_n^{q,\ve} \coloneqq \csn^{q,\ve} \bullet \aff_{1, -\frac{\pi}{2}} \end{align} \end{definition} \begin{lemma}[R\textemdash, 2023]\label{6.2.9}\label{sne_properties} Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. It is then the case for all $n\in\N_0$ and $x\in \R$ that: \begin{enumerate}[label = (\roman*)] \item $\real_{\rect} \lp \sne_n^{q,\ve}\rp \in C \lp \R, \R \rp $ \item $\dep \lp \sne_n^{q,\ve}\rp \les \begin{cases} 1 & :n=0\\ 2n\lb \frac{q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q\rb -1 \rb +1 &:n\in \N \end{cases}$ \item $\param \lp \sne_n^{q,\ve} \rp \les \begin{cases} 2 & :n =0 \\ \lp 2n+1\rp\lb 4^{2n+\frac{3}{2}} + \lp \frac{4^{2n+1}-1}{3}\rp \lp \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb +372\rp\rb &:n\in \N \end{cases}$ \\~\\ \item \begin{align}&\left|\sum^n_{i=0} \frac{(-1)^i}{2i!}{\lp x-\frac{\pi}{2}\rp}^{2i} - \real_{\rect} \lp \sne_n^{q,\ve} \rp \lp x \rp \right| \nonumber\\ &= \left|\sum^n_{i=0} \frac{(-1)^i}{2i!}{\lp x-\frac{\pi}{2}\rp}^{2i} - \real_{\rect} \lp \csn_n^{q,\ve} \bullet \aff_{1,-\frac{\pi}{2}}\rp \lp x \rp \right|\nonumber\\ &\les \sum^n_{i=1} \left| \frac{\lp -1\rp^i}{2i!}\right|\lp \left| \lp x -\frac{\pi}{2}\rp\lp \lp x -\frac{\pi}{2}\rp^{2i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{i-1}\rp\lp x-\frac{\pi}{2}\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{i-1}^q \rp \nonumber \end{align}\\~\\ Where $\mathfrak{p}_i$ are the set of functions defined for $i \in \N$ as such: \begin{align} \mathfrak{p}_1 &= \ve+1+|x|^2 \nonumber\\ \mathfrak{p}_i &= \ve +\lp \mathfrak{p}_{i-1} \rp^2+|x|^2 \end{align} Whence it is the case that: \begin{align} \left|\sum^n_{i=0} \frac{\lp -1\rp^i}{2i!}\lp x-\frac{\pi}{2}\rp^{2i} - \real_{\rect} \lp \sne_n^{q,\ve} \rp \lp x \rp \right| \in \mathcal{O} \lp \ve^{2q(2n-1)}\rp \end{align} \end{enumerate} \end{lemma} \begin{proof} This follows straightforwardly from Lemma \ref{csn_properties}, and the fact that by Corollary \ref{affcor}, there is not a change to the parameter count, by Lemma \ref{comp_cont}, there is no change in depth, by Lemma \ref{aff_prop}, and Lemma \ref{csn_properties}, continuity is preserved, and the fact that $\aff_{1,-\frac{\pi}{2}}$ is exact and hence contributes nothing to the error, and finally by the fact that $\aff_{1,-\frac{\pi}{2}} \rightarrow \lp \cdot\rp -\frac{\pi}{2}$ under instantiation, assures us that the $\sne^{q,\ve}_n$ has the same error bounds as $\csn_n^{q,\ve}$. \end{proof} \begin{lemma}[R\textemdash, 2023] Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}.$ It is then the case for all $n\in\N_0$ and $x\in [a,b]\subseteq \lb 0,\infty \rp$ that: \begin{align} &\left| \sin\lp x\rp - \real_{\rect} \lp \sne_n^{q,\ve} \rp \lp x \rp \right|\nonumber \\ &\les \sum^n_{i=1} \left| \frac{\lp -1\rp^i}{2i!}\right|\lp \left| \lp x -\frac{\pi}{2}\rp\lp \lp x -\frac{\pi}{2}\rp^{2i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{i-1}\rp\lp x-\frac{\pi}{2}\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{i-1}^q \rp \nonumber\\ &+\frac{|x|^{n+1}}{(n+1)!}\label{sin_diff} \end{align} \end{lemma} \begin{proof} Note that the fact that $\sin\lp x\rp = \cos\lp x-\frac{\pi}{2}\rp$, Lemma \ref{comp_prop}, and Lemma \ref{aff_prop} then renders (\ref{sin_diff}) as: \begin{align} &\left| \sin\lp x\rp - \inst_{\rect}\lp \sne_n^{q,\ve}\rp\right| \nonumber\\ &= \left| \cos \lp x - \frac{\pi}{2}\rp - \inst_{\rect}\lp \csn_n^{q,\ve}\bullet \aff_{1,-\frac{\pi}{2}}\rp\lp x\rp\right| \nonumber\\ &=\left| \cos \lp x-\frac{x}{2}\rp - \inst_{\rect}\csn_n^{q,\ve}\lp x-\frac{\pi}{2} \rp\right| \nonumber \\ &\les \sum^n_{i=1} \left| \frac{\lp -1\rp^i}{2i!}\right|\lp \left| \lp x -\frac{\pi}{2}\rp\lp \lp x -\frac{\pi}{2}\rp^{2i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{i-1}\rp\lp x-\frac{\pi}{2}\rp\rp\right| + \ve + |x|^q + \mathfrak{p}_{i-1}^q \rp+ \frac{|x|^{n+1}}{(n+1)!}\nonumber \end{align} \end{proof} \begin{remark} Note that under these neural network architectures the famous Pythagorean identity $\sin^2\lp x\rp + \cos^2 \lp x\rp = 1$, may be rendered approximately, for fixed $n,q,\ve$ as: $\lb \sqr^{q,\ve}\bullet \csn^{q,\ve}_n \rb \oplus\lb \sqr^{q,\ve}\bullet \sne^{q,\ve}_n\rb$. A full discussion of the associated parameter, depth, and accuracy bounds are beyond the scope of this dissertation, and may be appropriate for future work. \end{remark}