Removed all references to learning

This commit is contained in:
Shakil Rafi 2024-04-09 09:47:55 -05:00
parent c0593180fc
commit 8836f29605
18 changed files with 197 additions and 138 deletions

BIN
.DS_Store vendored

Binary file not shown.

View File

@ -103,7 +103,7 @@ and let $(\Omega, \mathcal{F},\mathbb{P})$ be a probability space. Let $\mathcal
\end{align} \end{align}
and let let $U^\theta:[0,T] \times \mathbb{R}^d \times \Omega \rightarrow \mathbb{R}$, $\theta \in \Theta$ satisfy, $\theta \in \Theta$, $t \in [0,T]$, $x\in \mathbb{R}^d$, that: and let let $U^\theta:[0,T] \times \mathbb{R}^d \times \Omega \rightarrow \mathbb{R}$, $\theta \in \Theta$ satisfy, $\theta \in \Theta$, $t \in [0,T]$, $x\in \mathbb{R}^d$, that:
\begin{align}\label{(2.1.4)} \begin{align}\label{(2.1.4)}
U^\theta_m(t,x) = \frac{1}{m}\left[\sum^{m}_{k=1}g\left(x+\mathcal{W}^{(\theta,0,-k)}_{T-t}\right)\right] U^\theta_m(t,x) = \frac{1}{m}\left[\sum^{m}_{k=1}g\left(x+\mathcal{W}^{\theta}_{T-t}\right)\right]
\end{align} \end{align}
\end{definition} \end{definition}
\begin{lemma} \label{lemma1.1} \begin{lemma} \label{lemma1.1}
@ -118,7 +118,7 @@ Assume Setting \ref{primarysetting} then:
\end{enumerate} \end{enumerate}
\end{lemma} \end{lemma}
\begin{proof} For (i) Consider that $\mathcal{W}^{(\theta,0,-k)}_{T-t}$ are continuous random fields and that $g\in C(\mathbb{R}^d,\mathbb{R})$, we have that $U^\theta(t,x)$ is the composition of continuous functions with $m > 0$ by hypothesis, ensuring no singularities. Thus $U^\theta: [0,T] \times \mathbb{R}^d\times \Omega \rightarrow \mathbb{R}$ is a continuous random field. \begin{proof} For (i) Consider that $\mathcal{W}^{\theta}_{T-t}$ are continuous random fields and that $g\in C(\mathbb{R}^d,\mathbb{R})$, we have that $U^\theta(t,x)$ is the composition of continuous functions with $m > 0$ by hypothesis, ensuring no singularities. Thus $U^\theta: [0,T] \times \mathbb{R}^d\times \Omega \rightarrow \mathbb{R}$ is a continuous random field.
\medskip \medskip
@ -154,7 +154,7 @@ We next claim that for all $s\in [0,T]$, $t\in[s,T]$, $\theta \in \Theta$ it hol
To prove this claim observe the triangle inequality and (\ref{(2.1.4)}), demonstrate that for all $s\in[0,T]$, $t\in[s,T]$, $\theta \in \Theta$, it holds that: To prove this claim observe the triangle inequality and (\ref{(2.1.4)}), demonstrate that for all $s\in[0,T]$, $t\in[s,T]$, $\theta \in \Theta$, it holds that:
\begin{align}\label{(1.18)} \begin{align}\label{(1.18)}
\mathbb{E}\lb \lv U^\theta \lp t,x+\mathcal{W}^\theta_{t-s}\rp \rv \rb \leqslant \frac{1}{m}\left[ \sum^{m}_{i=1}\mathbb{E}\lb \lv g \lp x+\mathcal{W}^\theta_{t-s}+\mathcal{W}^{(\theta,0,-i)}_{T-t} \rp \rv \rb \rb \mathbb{E}\lb \lv U^\theta \lp t,x+\mathcal{W}^\theta_{t-s}\rp \rv \rb \leqslant \frac{1}{m}\left[ \sum^{m}_{i=1}\mathbb{E}\lb \lv g \lp x+\mathcal{W}^\theta_{t-s}+\mathcal{W}^{\theta}_{T-t} \rp \rv \rb \rb
\end{align} \end{align}
Now observe that (\ref{(2.1.6)}) and the fact that $(W^\theta)_{\theta \in \Theta}$ are independent imply that for all $s \in [0,T]$, $t\in [s,T]$, $\theta \in \Theta$, $i\in \mathbb{Z}$ it holds that: Now observe that (\ref{(2.1.6)}) and the fact that $(W^\theta)_{\theta \in \Theta}$ are independent imply that for all $s \in [0,T]$, $t\in [s,T]$, $\theta \in \Theta$, $i\in \mathbb{Z}$ it holds that:
@ -179,11 +179,11 @@ Combining (\ref{(1.16)}), (\ref{(1.20)}), and (\ref{(1.21)}) completes the proof
\begin{enumerate}[label = (\roman*)] \begin{enumerate}[label = (\roman*)]
\item it holds that $t \in [0,T],x\in \mathbb{R}^d$ that: \item it holds that $t \in [0,T],x\in \mathbb{R}^d$ that:
\begin{align} \begin{align}
\mathbb{E}\lb \lv U^0 \lp t,x \rp \rv \rb + \mathbb{E}\lb \lv g \lp x+\mathcal{W}^{(0,0,-1)}_{T-t} \rp \rv \rb < \infty \mathbb{E}\lb \lv U^0 \lp t,x \rp \rv \rb + \mathbb{E}\lb \lv g \lp x+\mathcal{W}^{0}_{T-t} \rp \rv \rb < \infty
\end{align} \end{align}
\item it holds that $t\in [0,T],x\in \mathbb{R}^d$ that: \item it holds that $t\in [0,T],x\in \mathbb{R}^d$ that:
\begin{align} \begin{align}
\mathbb{E}\lb U^0\lp t,x \rp \rb = \mathbb{E} \lb g \lp x+\mathcal{W}^{(0,0,-1)}_{T-t}\rp\rb \mathbb{E}\lb U^0\lp t,x \rp \rb = \mathbb{E} \lb g \lp x+\mathcal{W}^{0}_{T-t}\rp\rb
\end{align} \end{align}
\end{enumerate} \end{enumerate}
\end{corollary} \end{corollary}
@ -191,12 +191,12 @@ Combining (\ref{(1.16)}), (\ref{(1.20)}), and (\ref{(1.21)}) completes the proof
\begin{proof} \begin{proof}
(i) is a restatement of Lemma \ref{lem:1.20} in that for all $t\in [0,T]$: (i) is a restatement of Lemma \ref{lem:1.20} in that for all $t\in [0,T]$:
\begin{align} \begin{align}
&\mathbb{E}\left[ \left| U^0\left( t,x \right) \right| \right] + \mathbb{E} \left[ \left|g \left(x+\mathcal{W}^{(0,0,-1)}_{T-t}\right)\right|\right] \nonumber\\ &\mathbb{E}\left[ \left| U^0\left( t,x \right) \right| \right] + \mathbb{E} \left[ \left|g \left(x+\mathcal{W}^{0}_{T-t}\right)\right|\right] \nonumber\\
&<\mathbb{E} \left[ \left|U^\theta \lp t,x+\mathcal{W}^\theta_{t-s} \rp \right| \right] +\mathbb{E}\left[ \left|g \left(x+\mathcal{W}^\theta_{t-s}\right) \right| \right]+ \int^T_s \mathbb{E}\lb \lv U^\theta \lp r,x+\mathcal{W}^\theta_{r-s} \rp \rv \rb dr \nonumber\\ &<\mathbb{E} \left[ \left|U^\theta \lp t,x+\mathcal{W}^\theta_{t-s} \rp \right| \right] +\mathbb{E}\left[ \left|g \left(x+\mathcal{W}^\theta_{t-s}\right) \right| \right]+ \int^T_s \mathbb{E}\lb \lv U^\theta \lp r,x+\mathcal{W}^\theta_{r-s} \rp \rv \rb dr \nonumber\\
&< \infty &< \infty
\end{align} \end{align}
Furthermore (ii) is a restatement of (\ref{(1.14)}) with $\theta = 0$, $m=1$, and $k=1$. This completes the proof of Corollary \ref{cor:1.20.1}. Furthermore (ii) is a restatement of Lemma \ref{lem:1.20} with $\theta = 0$, $m=1$, and $k=1$. This completes the proof of Corollary \ref{cor:1.20.1}.
\end{proof} \end{proof}
\section{Monte Carlo Approximations} \section{Monte Carlo Approximations}
@ -252,7 +252,7 @@ This completes the proof of the lemma.
\end{proof} \end{proof}
\section{Bounds and Covnvergence} \section{Bounds and Covnvergence}
\begin{lemma}\label{lem:1.21} Assume Setting \ref{def:1.18}. Then it holds for all $t\in [0,T]$, $x\in \mathbb{R}^d$ \begin{lemma}\label{lem:1.21} Assume Setting \ref{primarysetting}. Then it holds for all $t\in [0,T]$, $x\in \mathbb{R}^d$
\begin{align} \begin{align}
&\left(\E\left[\left|U^0(t,x+\mathcal{W}^0_t)-\E \left[U^0 \left(t,x+\mathcal{W}^0_t \right)\right]\right|^\mathfrak{p}\right]\right)^{\frac{1}{\mathfrak{p}}} \nonumber\\ &\left(\E\left[\left|U^0(t,x+\mathcal{W}^0_t)-\E \left[U^0 \left(t,x+\mathcal{W}^0_t \right)\right]\right|^\mathfrak{p}\right]\right)^{\frac{1}{\mathfrak{p}}} \nonumber\\
&\leqslant \frac{\mathfrak{m}}{m^{\frac{1}{2}}} \left[\left(\E\left[ \lv g \lp x+\mathcal{W}^0_T \rp \rv^\mathfrak{p}\right]\right)^{\frac{1}{\mathfrak{p}}}\right] &\leqslant \frac{\mathfrak{m}}{m^{\frac{1}{2}}} \left[\left(\E\left[ \lv g \lp x+\mathcal{W}^0_T \rp \rv^\mathfrak{p}\right]\right)^{\frac{1}{\mathfrak{p}}}\right]
@ -264,19 +264,19 @@ This completes the proof of the lemma.
G_k(t,x) = g\left(x+\mathcal{W}^{(0,0,-k)}_{T-t}\right) G_k(t,x) = g\left(x+\mathcal{W}^{(0,0,-k)}_{T-t}\right)
\end{align} \end{align}
\medskip \medskip
Observe that the hypothesis that $(\mathcal{W}^\theta)_{\theta \in \Theta}$ are independent Brownian motions and the hypothesis that $g \in C(\mathbb{R}^d,\mathbb{R})$ assure that for all $t \in [0,T]$,$x\in \mathbb{R}^d$ it holds that $(G_k(t,x))_{k\in \mathbb{Z}}$ are i.i.d. random variables. This and Corollary \ref{cor:1.22.2} (applied for every $t\in [0,T]$, $x\in \mathbb{R}^d$ with $p \curvearrowleft \mathfrak{p}$, $n \curvearrowleft m$, $(X_k)_{k\in \{1,2,...,m\}} \curvearrowleft (G_k(t,x))_{k\in \{1,2,...,m\}}$), with the notation of Corollary \ref{cor:1.22.2} ensure that for all $t\in [0,T]$, $x \in \mathbb{R}^d$, it holds that: Observe that the hypothesis that $(\mathcal{W}^\theta)_{\theta \in \Theta}$ are independent Brownian motions and the hypothesis that $g \in C(\mathbb{R}^d,\mathbb{R})$ assure that for all $t \in [0,T]$,$x\in \mathbb{R}^d$ it holds that $(G_k(t,x))_{k\in \mathbb{Z}}$ are i.i.d. random variables. This and Corollary \ref{cor:1.22.2} (applied for every $t\in [0,T]$, $x\in \mathbb{R}^d$ with $p \curvearrowleft \mathfrak{p}$, $n \curvearrowleft m$, $(X_k)_{k\in \{1,2,\..,m\}} \curvearrowleft (G_k(t,x))_{k\in \{1,2,...,m\}}$), with the notation of Corollary \ref{cor:1.22.2} ensure that for all $t\in [0,T]$, $x \in \mathbb{R}^d$, it holds that:
\begin{align} \begin{align}
\left( \E \left[ \left| \frac{1}{m} \left[ \sum^{m}_{k=1} G_k(t,x) \right] - \E \left[ G_1(t,x) \right] \right| ^\mathfrak{p} \right] \right)^{\frac{1}{\mathfrak{p}}} \leqslant \frac{\mathfrak{m}}{m^{\frac{1}{2}}}\left(\E \left[|G_1(t,x)|^\mathfrak{p} \right] \right)^{\frac{1}{\mathfrak{p}}} \left( \E \left[ \left| \frac{1}{m} \left[ \sum^{m}_{k=1} G_k(t,x) \right] - \E \left[ G_1(t,x) \right] \right| ^\mathfrak{p} \right] \right)^{\frac{1}{\mathfrak{p}}} \leqslant \frac{\mathfrak{m}}{m^{\frac{1}{2}}}\left(\E \left[|G_1(t,x)|^\mathfrak{p} \right] \right)^{\frac{1}{\mathfrak{p}}}
\end{align} \end{align}
\medskip \medskip
Combining this, with (1.16), (1.17), and item (ii) of Corollary \ref{cor:1.20.1} yields that: Combining this, with (\ref{(1.12)}), (\ref{(2.1.4)}), and Item (ii) of Corollary \ref{cor:1.20.1} yields that:
\begin{align} \begin{align}
&\left(\E\left[\left|U^0(t,x) - \E \left[U^0(t,x)\right]\right|^\mathfrak{p}\right]\right)^\frac{1}{\mathfrak{p}} \nonumber\\ &\left(\E\left[\left|U^0(t,x) - \E \left[U^0(t,x)\right]\right|^\mathfrak{p}\right]\right)^\frac{1}{\mathfrak{p}} \nonumber\\
&= \left(\E \left[\left|\frac{1}{m}\left[\sum^{m}_{k=1}G_k(t,x)\right]- \E \left[G_1(t,x)\right]\right|^\mathfrak{p} \right]\right)^{\frac{1}{\mathfrak{p}}} \\ &= \left(\E \left[\left|\frac{1}{m}\left[\sum^{m}_{k=1}G_k(t,x)\right]- \E \left[G_1(t,x)\right]\right|^\mathfrak{p} \right]\right)^{\frac{1}{\mathfrak{p}}} \\
&\leqslant \frac{\mathfrak{m}}{m^{\frac{1}{2}}}\left(\E \left[\left| G_1(t,x)\right| ^\mathfrak{p}\right]\right)^{\frac{1}{\mathfrak{p}}} \\ &\leqslant \frac{\mathfrak{m}}{m^{\frac{1}{2}}}\left(\E \left[\left| G_1(t,x)\right| ^\mathfrak{p}\right]\right)^{\frac{1}{\mathfrak{p}}} \\
&= \frac{\mathfrak{m}}{m^{\frac{1}{2}}} \left[\left(\E \left[\left|g\left(x+\mathcal{W}^1_{T-t}\right)\right|^\mathfrak{p}\right]\right)^\frac{1}{\mathfrak{p}}\right] &= \frac{\mathfrak{m}}{m^{\frac{1}{2}}} \left[\left(\E \left[\left|g\left(x+\mathcal{W}^1_{T-t}\right)\right|^\mathfrak{p}\right]\right)^\frac{1}{\mathfrak{p}}\right]
\end{align} \end{align}
This and the fact that $\mathcal{W}^0$ has independent increments ensure that for all $n\in $, $t\in [0,T]$, $x\in \mathbb{R}^d$ it holds that: This and the fact that $\mathcal{W}^0$ has independent increments ensure that for all $m\in \N$, $t\in [0,T]$, $x\in \mathbb{R}^d$ it holds that:
\begin{align} \begin{align}
\left(\E \left[\left| U^0 \left(t,x+\mathcal{W}^0_t\right) - \E \left[U^0 \left(t,x+\mathcal{W}^0_t\right)\right]\right|^\mathfrak{p}\right]\right)^{\frac{1}{\mathfrak{p}}} \leqslant \frac{\mathfrak{m}}{m^{\frac{1}{2}}} \left[\left(\E \left[\left| g \left(x+\mathcal{W}^0_T\right)\right|^\p\right]\right)^{\frac{1}{\mathfrak{p}}} \right] \left(\E \left[\left| U^0 \left(t,x+\mathcal{W}^0_t\right) - \E \left[U^0 \left(t,x+\mathcal{W}^0_t\right)\right]\right|^\mathfrak{p}\right]\right)^{\frac{1}{\mathfrak{p}}} \leqslant \frac{\mathfrak{m}}{m^{\frac{1}{2}}} \left[\left(\E \left[\left| g \left(x+\mathcal{W}^0_T\right)\right|^\p\right]\right)^{\frac{1}{\mathfrak{p}}} \right]
\end{align} \end{align}
@ -341,7 +341,7 @@ Which in turn yields that:
\begin{align}\label{(1.48)} \begin{align}\label{(1.48)}
\mathfrak{L}\left( \frac{ \mathfrak{m}}{m^{\frac{1}{2}}}\right)\left(\E \left[ \left( 1+ \left\| x+\mathcal{W}^0_T \right\|_E^p \right)^\p\right]\right)^\frac{1}{\p} \leqslant \mathfrak{L}\left( \frac{ \mathfrak{m}}{m^{\frac{1}{2}}}\right)\left(\sup_{s \in [0,T]}\E \left[ \left( 1+ \left\| x+\mathcal{W}^0_s \right\|_E^p \right)^\p\right]\right)^\frac{1}{\p} \mathfrak{L}\left( \frac{ \mathfrak{m}}{m^{\frac{1}{2}}}\right)\left(\E \left[ \left( 1+ \left\| x+\mathcal{W}^0_T \right\|_E^p \right)^\p\right]\right)^\frac{1}{\p} \leqslant \mathfrak{L}\left( \frac{ \mathfrak{m}}{m^{\frac{1}{2}}}\right)\left(\sup_{s \in [0,T]}\E \left[ \left( 1+ \left\| x+\mathcal{W}^0_s \right\|_E^p \right)^\p\right]\right)^\frac{1}{\p}
\end{align} \end{align}
Combining \ref{(1.46)}, \ref{(1.47)}, and \ref{(1.48)} yields that: Combining (\ref{(1.46)}), (\ref{(1.47)}), and (\ref{(1.48)}) yields that:
\begin{align} \begin{align}
\left( \E \left[ \left| U^0 \left(t,x+\mathcal{W}^0_t \right) - u \left( t, x+\mathcal{W}^0_t \right) \right|^\p \right] \right)^{\frac{1}{\p}} &\leqslant \left( \frac{ \mathfrak{m}}{m^{\frac{1}{2}}}\right)\left(\E \left[\left| g \left(x+\mathcal{W}^0_T\right)\right|^\p\right]\right)^\frac{1}{\p} \nonumber\\ \left( \E \left[ \left| U^0 \left(t,x+\mathcal{W}^0_t \right) - u \left( t, x+\mathcal{W}^0_t \right) \right|^\p \right] \right)^{\frac{1}{\p}} &\leqslant \left( \frac{ \mathfrak{m}}{m^{\frac{1}{2}}}\right)\left(\E \left[\left| g \left(x+\mathcal{W}^0_T\right)\right|^\p\right]\right)^\frac{1}{\p} \nonumber\\
&\les\mathfrak{L}\left( \frac{ \mathfrak{m}}{m^{\frac{1}{2}}}\right)\left(\sup_{s\in[0,T]}\E \left[ \left( 1+ \left\| x+\mathcal{W}^0_s \right\|_E^p \right)^\p\right]\right)^\frac{1}{\p} &\les\mathfrak{L}\left( \frac{ \mathfrak{m}}{m^{\frac{1}{2}}}\right)\left(\sup_{s\in[0,T]}\E \left[ \left( 1+ \left\| x+\mathcal{W}^0_s \right\|_E^p \right)^\p\right]\right)^\frac{1}{\p}
@ -379,19 +379,19 @@ Thus we get for all $\mft \in [0,T]$, $x\in \R^d$, $n \in $:
This completes the proof of Corollary \ref{cor:1.25.1}. This completes the proof of Corollary \ref{cor:1.25.1}.
\end{proof} \end{proof}
\begin{theorem}\label{tentpole_1} Let $T,L,p,q, \mathfrak{d} \in [0,\infty), m \in \mathbb{N}$, $\Theta = \bigcup_{n\in \mathbb{N}} \Z^n$, let $g_d\in C(\R^d,\R)$, and assume that $d\in \N$, $t \in [0,T]$, $x = (x_1,x_2,...,x_d)\in \R^d$, $v,w \in \R$ and that $\max \{ |g_d(x)|\} \leqslant Ld^p \left(1+\Sigma^d_{k=1}\left|x_k \right|\right)$, let $\left(\Omega, \mathcal{F}, \mathbb{P}\right)$ be a probability space, let $\mathcal{W}^{d,\theta}: [0,T] \times \Omega \rightarrow \R^d$, $d\in \N$, $\theta \in \Theta$, be independent standard Brownian motions, assume for every $d\in \N$ that $\left(\mathcal{W}^{d,\theta}\right)_{\theta \in \Theta}$ are independent, let $u_d \in C([0,T] \times \R^d,\R)$, $d \in \N$, satisfy for all $d\in \N$, $t\in [0,T]$, $x \in \R^d$ that $\E \left[g_x \left(x+\mathcal{W}^{d,0}_{T-t} \right)\right] < \infty$ and: \begin{theorem}\label{tentpole_1} Let $T,L,p,q, \mathfrak{d} \in [0,\infty), m \in \mathbb{N}$, $\Theta = \bigcup_{n\in \mathbb{N}} \Z^n$, let $g_d\in C(\R^d,\R)$, and assume that $d\in \N$, $t \in [0,T]$, $x = (x_1,x_2,...,x_d)\in \R^d$, $v,w \in \R$ and that $\max \{ |g_d(x)|\} \leqslant Ld^p \left(1+\Sigma^d_{k=1}\left|x_k \right|^q\right)$, let $\left(\Omega, \mathcal{F}, \mathbb{P}\right)$ be a probability space, let $\mathcal{W}^{d,\theta}: [0,T] \times \Omega \rightarrow \R^d$, $d\in \N$, $\theta \in \Theta$, be independent standard Brownian motions, assume for every $d\in \N$ that $\left(\mathcal{W}^{d,\theta}\right)_{\theta \in \Theta}$ are independent, let $u_d \in C([0,T] \times \R^d,\R)$, $d \in \N$, satisfy for all $d\in \N$, $t\in [0,T]$, $x \in \R^d$ that $\E \left[g_x \left(x+\mathcal{W}^{d,0}_{T-t} \right)\right] < \infty$ and:
\begin{align} \begin{align}
u_d\left(t,x\right) = \E \left[g_d \left(x + \mathcal{W}^{d,0}_{T-t}\right)\right] u_d\left(t,x\right) = \E \left[g_d \left(x + \mathcal{W}^{d}_{T-t}\right)\right]
\end{align} \end{align}
Let $U^{d,\theta}_m: [0,T] \times \R^d \times \Omega \rightarrow \R$, $d \in \N$, $m\in \Z$, $\theta \in \Theta$, satisfy for all, $d\in \N$, $m \in \Z$, $\theta \in \Theta$, $t\in [0,T]$, $x\in \R^d$ that: Let $U^{d,\theta}_m: [0,T] \times \R^d \times \Omega \rightarrow \R$, $d \in \N$, $m\in \Z$, $\theta \in \Theta$, satisfy for all, $d\in \N$, $m \in \Z$, $\theta \in \Theta$, $t\in [0,T]$, $x\in \R^d$ that:
\begin{align} \begin{align}
U^{d,\theta}_m(t,x) = \frac{1}{m} \left[\sum^{m}_{k=1} g_d \left(x + \mathcal{W}^{d,(\theta, 0,-k)}_{T-t}\right)\right] U^{d}_m(t,x) = \frac{1}{m} \left[\sum^{m}_{k=1} g_d \left(x + \mathcal{W}^{d}_{T-t}\right)\right]
\end{align} \end{align}
and for every $d,n,m \in \N$ let $\mathfrak{C}_{d,n,m} \in \Z$ be the number of function evaluations of $u_d(0,\cdot)$ and the number of realizations of scalar random variables which are used to compute one realization of $U^{d,0}_m(T,0): \Omega \rightarrow \R$. and for every $d,n,m \in \N$ let $\mathfrak{C}_{d,n,m} \in \Z$ be the number of function evaluations of $u_d(0,\cdot)$ and the number of realizations of scalar random variables which are used to compute one realization of $U^{d,0}_m(T,0): \Omega \rightarrow \R$.
There then exists $c \in \R$, and $\mathfrak{N}:\N \times (0,1] \rightarrow \N$ such that for all $d \in \N$, $\varepsilon \in (0,1]$ it holds that: There then exists $c \in \R$, and $\mathfrak{N}:\N \times (0,1] \rightarrow \N$ such that for all $d \in \N$, $\varepsilon \in (0,1]$ it holds that:
\begin{align}\label{(2.48)} \begin{align}\label{(2.48)}
\sup_{t\in[0,T]} \sup_{x \in [-L,L]^d} \left(\E \left[\left| u_d(t,x) - U^{d,0}_{\mathfrak{N}(d,\epsilon)}\right|^\p\right]\right)^\frac{1}{\p} \leqslant \epsilon \sup_{t\in[0,T]} \sup_{x \in [-L,L]^d} \left(\E \left[\left| u_d(t,x) - U^{d,0}_{\mathfrak{N}(d,\epsilon)}\right|^\p\right]\right)^\frac{1}{\p} \leqslant \ve
\end{align} \end{align}
and: and:
@ -399,7 +399,7 @@ and:
\mathfrak{C}_{d,\mathfrak{N}(d,\varepsilon), \mathfrak{N}(d,\varepsilon)} \leqslant cd^c\varepsilon^{-(2+\delta)} \mathfrak{C}_{d,\mathfrak{N}(d,\varepsilon), \mathfrak{N}(d,\varepsilon)} \leqslant cd^c\varepsilon^{-(2+\delta)}
\end{align} \end{align}
\end{theorem} \end{theorem}
\begin{proof} Throughout the proof let $\mathfrak{m}_\mathfrak{p} = \sqrt{\mathfrak{p} -1}$, $\mathfrak{p} \in [2,\infty)$, let $\mathbb{F}^d_t \subseteq \mathcal{F}$, $d\in \N$, $t\in [0,T]$ satisfy for all $d \in \N$, $t\in [0,T]$ that: \begin{proof} Throughout the proof let $\mathfrak{m}_\mathfrak{p} = \fk_p\sqrt{\mathfrak{p} -1}$, $\mathfrak{p} \in [2,\infty)$, let $\mathbb{F}^d_t \subseteq \mathcal{F}$, $d\in \N$, $t\in [0,T]$ satisfy for all $d \in \N$, $t\in [0,T]$ that:
\begin{align}\label{2.3.29} \begin{align}\label{2.3.29}
\mathbb{F}^d_t = \begin{cases} \mathbb{F}^d_t = \begin{cases}
\bigcap_{s\in[t,T]} \sigma \left(\sigma \left(W^{d,0}_r: r \in [0,s]\right) \cup \{A\in \mathcal{F}: \mathbb{P}(A)=0\}\right) & :t<T \\ \bigcap_{s\in[t,T]} \sigma \left(\sigma \left(W^{d,0}_r: r \in [0,s]\right) \cup \{A\in \mathcal{F}: \mathbb{P}(A)=0\}\right) & :t<T \\
@ -456,7 +456,7 @@ Thus (\ref{(2.3.33)}) and (\ref{2.3.34}) together proves (\ref{(2.48)}).
Note that $\mathfrak{C}_{d,\mathfrak{N}_{d,\epsilon},\mathfrak{N}_{d,\epsilon}}$ is the number of function evaluations of $u_d(0,\cdot)$ and the number of realizations of scalar random variables which are used to compute one realization of $U^{d,0}_{\mathfrak{N}_{d,\epsilon}}(T,0):\Omega \rightarrow \R$. Let $\widetilde{\mathfrak{N}_{d,\ve}}$ be the value of $\mathfrak{N}_{d,\ve}$ that causes equality in $(\ref{2.3.34})$. In such a situation the number of evaluations of $u_d(0,\cdot)$ do not exceed $\widetilde{\mathfrak{N}_{d,\ve}}$. Each evaluation of $u_d(0,\cdot)$ requires at most one realization of scalar random variables. Thus we do not exceed $2\widetilde{\mathfrak{N}_{d,\epsilon}}$. Thus note that: Note that $\mathfrak{C}_{d,\mathfrak{N}_{d,\epsilon},\mathfrak{N}_{d,\epsilon}}$ is the number of function evaluations of $u_d(0,\cdot)$ and the number of realizations of scalar random variables which are used to compute one realization of $U^{d,0}_{\mathfrak{N}_{d,\epsilon}}(T,0):\Omega \rightarrow \R$. Let $\widetilde{\mathfrak{N}_{d,\ve}}$ be the value of $\mathfrak{N}_{d,\ve}$ that causes equality in $(\ref{2.3.34})$. In such a situation the number of evaluations of $u_d(0,\cdot)$ do not exceed $\widetilde{\mathfrak{N}_{d,\ve}}$. Each evaluation of $u_d(0,\cdot)$ requires at most one realization of scalar random variables. Thus we do not exceed $2\widetilde{\mathfrak{N}_{d,\epsilon}}$. Thus note that:
\begin{align}\label{(2.3.35)} \begin{align}\label{(2.3.35)}
\mathfrak{C}_{d,\mathfrak{N}_{d,\ve},\mathfrak{N}_{d,\ve}} \leqslant 2\lb L\mathfrak{m}_\p\left(\left(1+L^2d\right)^{\frac{q\p}{2}} + (q\p+1)d^{\frac{q\p}{2}}\right) \exp \left(\frac{\left[q(q\p+3)+1\right]T}{2}\right) \rb \ve^{-1} \mathfrak{C}_{d,\mathfrak{N}_{d,\ve},\mathfrak{N}_{d,\ve}} \leqslant \left\lceil 2\lb L\mathfrak{m}_\p\left(\left(1+L^2d\right)^{\frac{q\p}{2}} + (q\p+1)d^{\frac{q\p}{2}}\right) \exp \left(\frac{\left[q(q\p+3)+1\right]T}{2}\right) \rb \ve^{-1}\right \rceil
\end{align} \end{align}
Note that other than $d$ and $\ve$ everything on the right-hand side is constant or fixed. Hence (\ref{(2.3.35)}) can be rendered as: Note that other than $d$ and $\ve$ everything on the right-hand side is constant or fixed. Hence (\ref{(2.3.35)}) can be rendered as:
\begin{align} \begin{align}

View File

@ -1,5 +1,5 @@
\chapter{Introduction.} \chapter{Introduction}
\section{Motivation} \section{Motivation}
Artificial neural networks represent a sea change in computing. They have successfully been used in a wide range of applications, from protein-folding in \cite{tsaban_harnessing_2022}, knot theory in \cite{davies_signature_2022}, and extracting data from gravitational waves in \cite{zhao_space-based_2023}. Artificial neural networks represent a sea change in computing. They have successfully been used in a wide range of applications, from protein-folding in \cite{tsaban_harnessing_2022}, knot theory in \cite{davies_signature_2022}, and extracting data from gravitational waves in \cite{zhao_space-based_2023}.
@ -10,9 +10,9 @@ Our goal in this dissertation is threefold:
\begin{enumerate}[label = (\roman*)] \begin{enumerate}[label = (\roman*)]
\item Firstly, we will take something called Multi-Level Picard first introduced in \cite{e_multilevel_2019} and \cite{e_multilevel_2021}, and in particular, the version of Multi-Level Picard that appears in \cite{hutzenthaler_strong_2021}. We show that dropping the drift term and substantially simplifying the process still results in convergence of the method and polynomial bounds for the number of computations required and rather nice properties for the approximations, such as integrability and measurability. \item Firstly, we will take something called Multi-Level Picard first introduced in \cite{e_multilevel_2019} and \cite{e_multilevel_2021}, and in particular, the version of Multi-Level Picard that appears in \cite{hutzenthaler_strong_2021}. We show that dropping the drift term and substantially simplifying the process still results in convergence of the method and polynomial bounds for the number of computations required and rather nice properties for the approximations, such as integrability and measurability.
\item We will then go on to realize that the solution to a modified version of the heat equation has a solution represented as a stochastic differential equation by Feynman-Kac and further that a version of this can be realized by the modified multi-level Picard technique mentioned in Item (i), with certain simplifying assumptions since we dropped the drift term. A substantial amount of this is inspired by \cite{bhj20} and much earlier work in \cite{karatzas1991brownian} and \cite{da_prato_zabczyk_2002}. \item We will then go on to realize that the solution to a modified version of the heat equation has a solution represented as a stochastic differential equation by Feynman-Kac and further that a version of this can be realized by the modified multi-level Picard technique mentioned in Item (i), with certain simplifying assumptions since we dropped the drift term. A substantial amount of this is inspired by \cite{bhj20} and much earlier work in \cite{karatzas1991brownian} and \cite{da_prato_zabczyk_2002}.
\item By far, the most significant part of this dissertation is dedicated to expanding and building upon a framework of neural networks as appears in \cite{grohs2019spacetime}. We modify this definition highly and introduce several new neural network architectures to this framework ($\pwr_n^{q,\ve}$, $\pnm_C^{q,\ve}$, $\tun^d_n$,$\etr^{N,h}$, $\xpn_n^{q,\ve}$, $\csn_n^{q,\ve}$, $\sne_n^{q,\ve}$, $\mathsf{E}^{N,h,q,\ve}_n$,$\mathsf{UE}^{N,h,q,\ve}_{n,\mathsf{G}_d}$, $\mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega}$, and $\mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\Omega}$, among others) and show, for all these neural networks, that the parameter count grows only polynomially as the accuracy of our model increases, thus beating the curse of dimensionality. This finally paves the way for giving neural network approximations to the techniques realized in Item (ii). We show that it is not too wasteful (defined on the polynomiality of parameter counts) to use neural networks to approximate MLP to approximate a stochastic differential equation equivalent to certain parabolic PDEs as Feynman-Kac necessitates. \item By far, the most significant part of this dissertation is dedicated to expanding and building upon a framework of neural networks as appears in \cite{grohs2019spacetime}. We modify this definition highly and introduce several new neural network architectures to this framework ($\pwr_n^{q,\ve}$, $\pnm_C^{q,\ve}$, $\tun^d_n$,$\etr^{N,h}$, $\xpn_n^{q,\ve}$, $\csn_n^{q,\ve}$, $\sne_n^{q,\ve}$, $\mathsf{E}^{N,h,q,\ve}_n$,$\mathsf{UE}^{N,h,q,\ve}_{n,\mathsf{G}_d}$, $\mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega}$, and $\mathsf{UES}^{N,h,q,\ve}_{n,\mathsf{G}_d,\Omega}$, among others) and show, for all these neural networks, that the parameter count grows only polynomially as the accuracy of our model increases, thus beating the curse of dimensionality. This finally paves the way for giving neural network approximations to the techniques realized in Item (ii). We show that it is not too wasteful (defined on the polynomiality of parameter counts) to use neural networks to approximate MLP to approximate a stochastic differential equation equivalent to certain parabolic PDEs as Feynman-Kac necessitates.
\\~\\ \\~\\
We end this dissertation by proposing two avenues of further research: analytical and algebraic. This framework of understanding neural networks as ordered tuples of ordered pairs may be extended to give neural network approximation of classical PDE approximation techniques such as Runge-Kutta, Adams-Moulton, and Bashforth. We also propose three conjectures about neural networks, as defined in \cite{grohs2019spacetime}. They form a bimodule, and that realization is a functor. We end this dissertation by proposing two avenues of further research: analytical and algebraic. This framework of understanding neural networks as ordered tuples of ordered pairs may be extended to give neural network approximation of classical PDE approximation techniques such as Runge-Kutta, Adams-Moulton, and Bashforth. We also propose three conjectures about neural networks, as defined in \cite{grohs2019spacetime}. They form a bimodule, and that instantiation is a functor.
\end{enumerate} \end{enumerate}
This dissertation is broken down into three parts. At the end of each part, we will encounter tent-pole theorems, which will eventually lead to the final neural network approximation outcome. These tentpole theorems are Theorem \ref{tentpole_1}, Theorem \ref{thm:3.21}, and Theorem \ref{ues}. Finally, the culmination of these three theorems is Corollary \ref{cor_ues}, the end product of the dissertation. We hope, you, the reader will enjoy this. This dissertation is broken down into three parts. At the end of each part, we will encounter tent-pole theorems, which will eventually lead to the final neural network approximation outcome. These tentpole theorems are Theorem \ref{tentpole_1}, Theorem \ref{thm:3.21}, and Theorem \ref{ues}. Finally, the culmination of these three theorems is Corollary \ref{cor_ues}, the end product of the dissertation. We hope, you, the reader will enjoy this.
@ -139,6 +139,11 @@ is adapted to the filtration $\mathbb{F}:= (\mathcal{F}_i )_{i \in [0,T]}$
Let it be the case that whenever we have two strong solutions $\mathcal{X}$ and $\widetilde{\mathcal{X}}$, w.r.t. process $\mathcal{W}$ and initial condition $\mathcal{X}_0 = 0$, as defined in Definition \ref{1.9}, it is also the case that $\mathbb{P}(\mathcal{X}_t = \widetilde{\mathcal{X}}_t) =1$ for all $t\in [0, T]$. We then say that the pair $(\mu, \sigma)$ exhibits a strong uniqueness property. Let it be the case that whenever we have two strong solutions $\mathcal{X}$ and $\widetilde{\mathcal{X}}$, w.r.t. process $\mathcal{W}$ and initial condition $\mathcal{X}_0 = 0$, as defined in Definition \ref{1.9}, it is also the case that $\mathbb{P}(\mathcal{X}_t = \widetilde{\mathcal{X}}_t) =1$ for all $t\in [0, T]$. We then say that the pair $(\mu, \sigma)$ exhibits a strong uniqueness property.
\end{definition} \end{definition}
\subsection{Lipschitz and Related Notions} \subsection{Lipschitz and Related Notions}
\begin{definition}
Given a function $f:\R \rightarrow \R$. We will say that this function is continuous everywhere if the Lebesgue measure of the subsets of the domain where it is not continuous is $0$. We will say that $f\in C_{ae}\left( \R, \R\right)$.
\end{definition}
\begin{definition}[Globally Lipschitz Function]\label{def:1.13} \begin{definition}[Globally Lipschitz Function]\label{def:1.13}
Let $d \in \N_0$. For every $d\in \N_0$, we say a function $f: \R^d \rightarrow \R^d$ is (globally) Lipschitz if there exists an $L \in (0,\infty)$ such that for all $x,y \in \R^d$ it is the case that : Let $d \in \N_0$. For every $d\in \N_0$, we say a function $f: \R^d \rightarrow \R^d$ is (globally) Lipschitz if there exists an $L \in (0,\infty)$ such that for all $x,y \in \R^d$ it is the case that :
\begin{align} \begin{align}

View File

@ -59,7 +59,7 @@ We will finally go on to show the $\mathsf{MC}^{N,d}_{x,y}$ neural network which
This proves Item (ii)\textemdash(iii). Item (iv) follows straightforwardly from Item (i). This establishes the lemma. This proves Item (ii)\textemdash(iii). Item (iv) follows straightforwardly from Item (i). This establishes the lemma.
\end{proof} \end{proof}
\begin{remark} \begin{remark}
Note here the difference between Definition \ref{actnn} and Definition \ref{7.2.1}. Note here the difference between Definition \ref{def:mathfrak_i} and Definition \ref{7.2.1}.
\end{remark} \end{remark}
\begin{lemma}[R\textemdash, 2023]\label{id_param} \begin{lemma}[R\textemdash, 2023]\label{id_param}
Let $d \in \N$. It then the case that for all $d \in \N$ we have that $\param\lp \id_d\rp = 4d^2+3d$ Let $d \in \N$. It then the case that for all $d \in \N$ we have that $\param\lp \id_d\rp = 4d^2+3d$
@ -668,6 +668,10 @@ This completes the proof.
\begin{remark} \begin{remark}
Let $h \in \lp 0,\infty\rp$. Note then that $\trp^h$ is simply $\etr^{2,h}$. Let $h \in \lp 0,\infty\rp$. Note then that $\trp^h$ is simply $\etr^{2,h}$.
\end{remark} \end{remark}
\begin{remark}
For an R implementation, see Listing \ref{Etr}
\end{remark}
%\begin{lemma} %\begin{lemma}
% Let $f \in C \lp \R, \R \rp$, $a\in \R, b \in \lb a,\infty\rp$, $N\in \N$, and let $h = \frac{b-a}{N}$. Assume also that $f$ has first and second derivatives almost everywhere. Let $ x = \lb x_0 \: x_1 \:...\: x_n\rb \in \R^{n+1}$ such that for all $i \in \{0,1,...,n\}$ it is the case that $x_i = x_0+i\cdot h$, as such let it also be the case that $f\lp \lb x \rb_{*,*}\rp = \lb f(x_0)\: f(x_1) \: \cdots f(x_n) \rb$. Let $a = x_0$ and $b = x_n$. It is then the case that: % Let $f \in C \lp \R, \R \rp$, $a\in \R, b \in \lb a,\infty\rp$, $N\in \N$, and let $h = \frac{b-a}{N}$. Assume also that $f$ has first and second derivatives almost everywhere. Let $ x = \lb x_0 \: x_1 \:...\: x_n\rb \in \R^{n+1}$ such that for all $i \in \{0,1,...,n\}$ it is the case that $x_i = x_0+i\cdot h$, as such let it also be the case that $f\lp \lb x \rb_{*,*}\rp = \lb f(x_0)\: f(x_1) \: \cdots f(x_n) \rb$. Let $a = x_0$ and $b = x_n$. It is then the case that:
% \begin{align}\label{(9.6.3)} % \begin{align}\label{(9.6.3)}
@ -800,6 +804,10 @@ We will present here an approximation scheme for continuous functions called max
This concludes the proof of the lemma. This concludes the proof of the lemma.
\end{proof} \end{proof}
\begin{remark}
For an R implementation, see Listing \ref{Nrm}
\end{remark}
\subsection{The $\mxm^d$ Neural Networks} \subsection{The $\mxm^d$ Neural Networks}
Given $x\in \R$, it is straightforward to find the maximum; $ x$ is the maximum. For $x \in \R^d$ we may find the maximum via network (\ref{9.7.6.1}), i.e. $\mxm^2$. The strategy is to find maxima for half our entries and half repeatedly until we have one maximum. For $x \in \R^d$ where $d$ is even we may stack $d$ copies of $\mxm^2$ to halve, and for $x \in \R^d$ where $d$ is odd and greater than $3$ we may introduce ``padding'' via the $\id_1$ network and thus require $\frac{d-1}{2}$ copies of $\mxm^2$ to halve. Given $x\in \R$, it is straightforward to find the maximum; $ x$ is the maximum. For $x \in \R^d$ we may find the maximum via network (\ref{9.7.6.1}), i.e. $\mxm^2$. The strategy is to find maxima for half our entries and half repeatedly until we have one maximum. For $x \in \R^d$ where $d$ is even we may stack $d$ copies of $\mxm^2$ to halve, and for $x \in \R^d$ where $d$ is odd and greater than $3$ we may introduce ``padding'' via the $\id_1$ network and thus require $\frac{d-1}{2}$ copies of $\mxm^2$ to halve.
@ -1037,6 +1045,10 @@ Given $x\in \R$, it is straightforward to find the maximum; $ x$ is the maximum.
Item (vi) is a straightforward consequence of Item (i). This completes the proof of the lemma. Item (vi) is a straightforward consequence of Item (i). This completes the proof of the lemma.
\end{proof} \end{proof}
\begin{remark}
For an R implementation, see Listing \ref{Mxm}
\end{remark}
\subsection{The $\mathsf{MC}^{N,d}_{x,y}$ Neural Networks} \subsection{The $\mathsf{MC}^{N,d}_{x,y}$ Neural Networks}
Let $f: [a,b] \rightarrow \R$ be a continuous bounded function with Lipschitz constant $L$. Let $x_0 \les x_1 \les \cdots \les x_N$ be a set of sample points within $[a,b]$, with it being possibly the case that that for all $i \in \{0,1,\hdots, N\}$, $x_i \sim \unif([a,b])$. For all $i \in \{0,1,\hdots, N\}$, define a series of functions $f_0,f_1,\hdots f_N: [a,b] \rightarrow \R$, as such: Let $f: [a,b] \rightarrow \R$ be a continuous bounded function with Lipschitz constant $L$. Let $x_0 \les x_1 \les \cdots \les x_N$ be a set of sample points within $[a,b]$, with it being possibly the case that that for all $i \in \{0,1,\hdots, N\}$, $x_i \sim \unif([a,b])$. For all $i \in \{0,1,\hdots, N\}$, define a series of functions $f_0,f_1,\hdots f_N: [a,b] \rightarrow \R$, as such:
@ -1245,8 +1257,11 @@ We will call the approximant $\max_{i \in \{0,1,\hdots, N\}}\{ f_i\}$, the \text
\end{center} \end{center}
\caption{Neural network diagramfor the $\mxm$ network} \caption{Neural network diagram for the $\mathsf{MC}^{N,d}_{x,y}$ network}
\end{figure} \end{figure}
\begin{remark}
For an R implementation, see Listing \ref{MC}.
\end{remark}
\subsection{Lipschitz Function Approximations}\label{(9.7.6)} \subsection{Lipschitz Function Approximations}\label{(9.7.6)}
\begin{lemma}%TODO: Should we stipulate compact sets? \begin{lemma}%TODO: Should we stipulate compact sets?

View File

@ -404,14 +404,14 @@ This, and the fact that $\delta = 2^{\frac{-2}{q-2}}\ve ^{\frac{q}{q-2}}$ render
\centering \centering
\includegraphics[width = 0.45\linewidth]{/Users/shakilrafi/R-simulations/Sqr_properties/experimental_deps.png} \includegraphics[width = 0.45\linewidth]{/Users/shakilrafi/R-simulations/Sqr_properties/experimental_deps.png}
\includegraphics[width = 0.45\linewidth]{/Users/shakilrafi/R-simulations/Sqr_properties/dep_theoretical_upper_limits.png} \includegraphics[width = 0.45\linewidth]{/Users/shakilrafi/R-simulations/Sqr_properties/dep_theoretical_upper_limits.png}
\caption{Left: $\log_{10}$ of depths for a simulation with $q \in \lb 2.1, 4 \rb $, $\ve \in \lp 0.1, 2 \rb$, and $x \in \lb -5,5 \rb$, all with $50$ mesh-points. Right: The theoretical upper limits over the same range of values} \caption{Left: $\log_{10}$ of depths for a simulation with $q \in \lb 2.1, 4 \rb $, $\ve \in \lp 0.1, 2 \rb$, and $x \in \lb -5,5 \rb$, all with $50$ mesh-points for $\sqr^{q,\ve}$. Right: The theoretical upper limits over the same range of values}
\end{figure} \end{figure}
\begin{figure}[h] \begin{figure}[h]
\centering \centering
\includegraphics[width = 0.45\linewidth]{/Users/shakilrafi/R-simulations/Sqr_properties/experimental_params.png} \includegraphics[width = 0.45\linewidth]{/Users/shakilrafi/R-simulations/Sqr_properties/experimental_params.png}
\includegraphics[width = 0.45\linewidth]{/Users/shakilrafi/R-simulations/Sqr_properties/param_theoretical_upper_limits.png} \includegraphics[width = 0.45\linewidth]{/Users/shakilrafi/R-simulations/Sqr_properties/param_theoretical_upper_limits.png}
\caption{Left: $\log_{10}$ of params for a simulation with $q \in \lb 2.1, 4 \rb $, $\ve \in \lp 0.1, 2 \rb$, and $x \in \lb -5,5 \rb$, all with $50$ mesh-points. Right: The theoretical upper limits over the same range of values} \caption{Left: $\log_{10}$ of params for a simulation with $q \in \lb 2.1, 4 \rb $, $\ve \in \lp 0.1, 2 \rb$, and $x \in \lb -5,5 \rb$, all with $50$ mesh-points for $\sqr^{q,\ve}$. Right: The theoretical upper limits over the same range of values}
\end{figure} \end{figure}
% Please add the following required packages to your document preamble: % Please add the following required packages to your document preamble:
@ -422,15 +422,31 @@ This, and the fact that $\delta = 2^{\frac{-2}{q-2}}\ve ^{\frac{q}{q-2}}$ render
\toprule \toprule
& Min. & 1\textsuperscript{st} Qu. & Median & Mean & 3\textsuperscript{rd} Qu. & Max. \\ \midrule & Min. & 1\textsuperscript{st} Qu. & Median & Mean & 3\textsuperscript{rd} Qu. & Max. \\ \midrule
Experimental $|x^2 - \real_{\rect}(\mathsf{Sqr}^{q,\ve})(x)$ & 0.00000 & 0.08943 & 0.33787 & 3.14893 & 4.67465 & 20.00 \\ \midrule Experimental $|x^2 - \real_{\rect}(\mathsf{Sqr}^{q,\ve})(x)$ & 0.00000 & 0.08943 & 0.33787 & 3.14893 & 4.67465 & 20.00 \\ \midrule
Theoretical $|x^2 - \real_{\rect}(\mathsf{Sqr})^{q,\ve}(x)$ & 0.010 & 1.715 & 10.402 & 48.063 & 45.538 & 1250.00 \\ \midrule Theoretical $|x^2 - \real_{\rect}(\mathsf{Sqr}^{q,\ve})(x)$ & 0.010 & 1.715 & 10.402 & 48.063 & 45.538 & 1250.00 \\ \midrule
Forward Difference & 0.01 & 1.6012 & 9.8655 & 44.9141 & 40.7102 & 1230 \textbf{Forward Difference} & 0.01 & 1.6012 & 9.8655 & 44.9141 & 40.7102 & 1230
\end{tabular} \end{tabular}
\caption{Theoretical upper bounds for $L^1$ error, experimental $L^1$ error and their forward difference, with $q \in \lb 2.1, 4 \rb $, $\ve \in \lp 0.1, 2 \rb$, and $x \in \lb -5,5 \rb$, all with $50$ mesh-points.} \caption{Theoretical upper bounds for $L^1$ error, experimental $L^1$ error and their forward difference, with $q \in \lb 2.1, 4 \rb $, $\ve \in \lp 0.1, 2 \rb$, and $x \in \lb -5,5 \rb$, all with $50$ mesh-points for $\sqr^{q,\ve}$.}
\end{table} \end{table}
\begin{table}[h]
\begin{tabular}{l|llllll}
\hline
& Min & 1st. Qu & Median & Mean & 3rd Qu & Max. \\ \hline
Experimental \\ $|x^2 - \inst_{\rect}\lp \sqr^{q,\ve}\rp(x)|$ & 0.0000 & 0.0894 & 0.3378 & 3.1489 & 4.6746 & 20.0000 \\ \hline
Theoretical upper limits for\\ $|x^2 - \mathfrak{I}_{\mathfrak{r}}(\mathsf{Sqr}^{q,\ve})(x)$ & 0.010 & 1.715 & 10.402 & 48.063 & 45.538 & 1250.000 \\ \hline
\textbf{Forward Difference} & 0.001 & 1.6012 & 9.8655 & 44.9141 & 40.7102 & 1230 \\ \hline
Experimental depths & 2 & 2 & 2 & 2.307 & 2 & 80 \\ \hline
Theoretical upper bound on\\ depths & 2 & 2 & 2 & 2.73 & 2 & 91 \\ \hline
\textbf{Forward Difference} & 0 & 0 & 0 & 0.423 & 0 & 11 \\ \hline
Experimental params & 25 & 25 & 25 & 47.07 & 25 & 5641 \\ \hline
Theoretical upper limit on \\ params & 52 & 52 & 52 & 82.22 & 52 & 6353 \\ \hline
\textbf{Forward Differnce} & 27 & 27 & 27 & 35.16 & 27 & 712 \\ \hline
\end{tabular}
\caption{Table showing the experimental and theoretical $1$-norm difference, depths, and parameter counts respectively for $\sqr^{q,\ve}$ with $q\in [2.1,4]$, $\ve \in [0.01,2]$, and $x \in [-5,5]$ all with $50$ mesh-points, and their forward differences.}
\end{table}
\subsection{The $\prd^{q,\ve}$ Neural Networks and Products of Two Real Numbers} \subsection{The $\prd^{q,\ve}$ Neural Networks and Products of Two Real Numbers}
We are finally ready to give neural network representations of arbitrary products of real numbers. However, this representation differs somewhat from those found in the literature, especially \cite{grohs2019spacetime}, where parallelization (stacking) is used instead of neural network sums. This will help us calculate $\wid_1$ and the width of the second to last layer. We are finally ready to give neural network representations of arbitrary products of real numbers. However, this representation differs somewhat from those found in the literature, especially \cite{grohs2019spacetime}, where parallelization (stacking) is used instead of neural network sums. This will help us calculate $\wid_1$ and the width of the second to last layer for later neural network calculations.
\begin{lemma}\label{prd_network} \begin{lemma}\label{prd_network}
Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$, $A_1,A_2,A_3 \in \R^{1\times 2}$, $\Psi \in \neu$ satisfy for all $x\in \R$ that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$, $A_1 = \lb 1 \quad 1 \rb$, $A_2 = \lb 1 \quad 0 \rb$, $A_3 = \lb 0 \quad 1 \rb$, $\real_{\rect} \in C\lp \R, \R \rp$, $\lp \real_{\rect} \lp \Psi \rp \rp \lp 0\rp = 0$, $0\les \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp \les \delta+|x|^2$, $|x^2-\lp \real_{\rect}\lp \Psi \rp \rp \lp x \rp |\les \delta \max \{1,|x|^q\}$, $\dep\lp \Psi \rp \les \max\{ 1+\frac{1}{q-2}+\frac{q}{2(q-2)}\log_2 \lp \delta^{-1} \rp ,2\}$, and $\param \lp \Psi \rp \les \max\left\{\lb \frac{40q}{q-2} \rb \log_2\lp \delta^{-1} \rp +\frac{80}{q-2}-28,52\right\}$, then: Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$, $A_1,A_2,A_3 \in \R^{1\times 2}$, $\Psi \in \neu$ satisfy for all $x\in \R$ that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$, $A_1 = \lb 1 \quad 1 \rb$, $A_2 = \lb 1 \quad 0 \rb$, $A_3 = \lb 0 \quad 1 \rb$, $\real_{\rect} \in C\lp \R, \R \rp$, $\lp \real_{\rect} \lp \Psi \rp \rp \lp 0\rp = 0$, $0\les \lp \real_{\rect} \lp \Psi \rp \rp \lp x \rp \les \delta+|x|^2$, $|x^2-\lp \real_{\rect}\lp \Psi \rp \rp \lp x \rp |\les \delta \max \{1,|x|^q\}$, $\dep\lp \Psi \rp \les \max\{ 1+\frac{1}{q-2}+\frac{q}{2(q-2)}\log_2 \lp \delta^{-1} \rp ,2\}$, and $\param \lp \Psi \rp \les \max\left\{\lb \frac{40q}{q-2} \rb \log_2\lp \delta^{-1} \rp +\frac{80}{q-2}-28,52\right\}$, then:
\begin{enumerate}[label=(\roman*)] \begin{enumerate}[label=(\roman*)]
@ -588,7 +604,7 @@ Observe next that for $q\in \lp 0,\infty\rp$, $\ve \in \lp 0,\infty \rp$, $\Gamm
We shall refer to this neural network for a given $q \in \lp 2,\infty \rp$ and given $\ve \in \lp 0,\infty \rp$ from now on as $\prd^{q,\ve}$. We shall refer to this neural network for a given $q \in \lp 2,\infty \rp$ and given $\ve \in \lp 0,\infty \rp$ from now on as $\prd^{q,\ve}$.
\end{remark} \end{remark}
\begin{remark} \begin{remark}
For an \texttt{R} implementation see Listing \ref{Prd} For an \texttt{R} implementation see Listing \ref{Pwr}
\end{remark} \end{remark}
\begin{remark} \begin{remark}
Diagrammatically, this can be represented as: Diagrammatically, this can be represented as:
@ -672,25 +688,6 @@ Observe next that for $q\in \lp 0,\infty\rp$, $\ve \in \lp 0,\infty \rp$, $\Gamm
\caption{Isosurface plot showing $|x^2 - \sqr^{q,\ve}|$ for $q \in [2.1,4]$, $\ve \in [0.01,2]$, and $x \in [-5,5]$ with 50 mesh-points in each.} \caption{Isosurface plot showing $|x^2 - \sqr^{q,\ve}|$ for $q \in [2.1,4]$, $\ve \in [0.01,2]$, and $x \in [-5,5]$ with 50 mesh-points in each.}
\end{figure} \end{figure}
\begin{table}[h]
\begin{tabular}{l|llllll}
\hline
& Min & 1st. Qu & Median & Mean & 3rd Qu & Max. \\ \hline
Experimental \\ $|x^2 - \inst_{\rect}\lp \sqr^{q,\ve}\rp(x)|$ & 0.0000 & 0.0894 & 0.3378 & 3.1489 & 4.6746 & 20.0000 \\ \hline
Theoretical upper limits for\\ $|x^2 - \mathfrak{R}_{\mathfrak{r}}(\mathsf{Sqr})(x)$ & 0.010 & 1.715 & 10.402 & 48.063 & 45.538 & 1250.000 \\ \hline
\textbf{Forward Difference} & 0.001 & 1.6012 & 9.8655 & 44.9141 & 40.7102 & 1230 \\ \hline
Experimental depths & 2 & 2 & 2 & 2.307 & 2 & 80 \\ \hline
Theoretical upper bound on\\ depths & 2 & 2 & 2 & 2.73 & 2 & 91 \\ \hline
\textbf{Forward Difference} & 0 & 0 & 0 & 0.423 & 0 & 11 \\ \hline
Experimental params & 25 & 25 & 25 & 47.07 & 25 & 5641 \\ \hline
Theoretical upper limit on \\ params & 52 & 52 & 52 & 82.22 & 52 & 6353 \\ \hline
\textbf{Forward Differnce} & 27 & 27 & 27 & 35.16 & 27 & 712 \\ \hline
\end{tabular}
\caption{Table showing the experimental and theoretical $1$-norm difference, depths, and parameter counts respectively for $\sqr^{q,\ve}$ with $q\in [2.1,4]$, $\ve \in [0.01,2]$, and $x \in [-5,5]$ all with $50$ mesh-points, and their forward differences.}
\end{table}
\section{Higher Approximations}\label{sec_tun} \section{Higher Approximations}\label{sec_tun}
We take inspiration from the $\sm$ neural network to create the $\prd$ neural network. However, we first need to define a special neural network called \textit{tunneling neural network} to stack two neural networks not of the same length effectively. We take inspiration from the $\sm$ neural network to create the $\prd$ neural network. However, we first need to define a special neural network called \textit{tunneling neural network} to stack two neural networks not of the same length effectively.
\subsection{The $\tun^d_n$ Neural Networks and Their Properties} \subsection{The $\tun^d_n$ Neural Networks and Their Properties}
@ -1266,7 +1263,7 @@ Let $\mathfrak{p}_i$ for $i \in \{1,2,...\}$ be the set of functions defined for
\begin{figure}[h] \begin{figure}[h]
\centering \centering
\includegraphics[width = \linewidth]{/Users/shakilrafi/R-simulations/Pwr_3_properties/isosurface.png} \includegraphics[width = \linewidth]{/Users/shakilrafi/R-simulations/Pwr_3_properties/isosurface.png}
\caption{Isosurface plot showing $|x^3 - \pwr^{q,\ve}_3|$ for $q \in [2.1,4]$, $\ve \in [0.01,2]$, and $x \in [-5,5]$ with 50 mesh-points in each.} \caption{Isosurface plot showing $|x^3 - \real_{\rect}(\pwr^{q,\ve}_3)(x)|$ for $q \in [2.1,4]$, $\ve \in [0.01,2]$, and $x \in [-5,5]$ with 50 mesh-points in each.}
\end{figure} \end{figure}
@ -1381,8 +1378,23 @@ Let $\mathfrak{p}_i$ for $i \in \{1,2,...\}$ be the set of functions defined for
\caption{Neural network diagram for an elementary neural network polynomial, with all coefficients being uniformly $1$.} \caption{Neural network diagram for an elementary neural network polynomial, with all coefficients being uniformly $1$.}
\end{figure} \end{figure}
\begin{table}[h]
\begin{tabular}{l|llllll}
\hline & Min & 1st. Qu & Median & Mean & 3rd Qu & Max. \\ \hline
Experimental \\ $|x^3 - \inst_{\rect}\lp \pwr^{q,\ve}\rp(x)|$ & 0.0000 & 0.2053 & 7.2873 & 26.7903 & 45.4275 & 125.00 \\ \hline
Experimental depths & 4 & 4 & 4 & 4.92 & 4 & 238 \\ \hline
Theoretical upper bound on\\ depths & 4.30 & 17.82 & 23.91 & 25.80 & 29.63 & 548.86 \\ \hline
\textbf{Forward Difference} & 0.30 & 13.82 & 19.91 & 20.88 & 25.63 & 310.86 \\ \hline
Experimental params & 1483 & 1483 & 1483 & 1546 & 1483 & 5711 \\ \hline
Theoretical upper limit on \\ params & 9993 & 9993 & 9993 & 11589 & 9993 & 126843 \\ \hline
\textbf{Forward Differnce} & 8510 & 8510 & 8510 & 10043 & 8510 & 121132 \\ \hline
\end{tabular}
\caption{Table showing the experimental and theoretical $1$-norm difference, depths, and parameter counts respectively for $\pwr_3^{q,\ve}$ with $q\in [2.1,4]$, $\ve \in [0.01,2]$, and $x \in [-5,5]$ all with $50$ mesh-points, and their forward differences.}
\end{table}
\begin{lemma}[R\textemdash,2023]\label{6.2.9}\label{nn_poly}\label{mnm_prop} \begin{lemma}[R\textemdash,2023]\label{6.2.9}\label{nn_poly}\label{mnm_prop}
Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. It is then the case for all $n\in\N_0$ and $x\in \R$ that: Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$ and $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$ and let $C = \{ c_1,c_2,\hdots c_n\} \in \R^n$ be a set of real numbers, i.e. the set of coefficients. It is then the case for all $n\in\N_0$ and $x\in \R$ that:
\begin{enumerate}[label = (\roman*)] \begin{enumerate}[label = (\roman*)]
\item $\real_{\rect} \lp \pnm_{n,C}^{q,\ve}\rp \in C \lp \R, \R \rp $ \item $\real_{\rect} \lp \pnm_{n,C}^{q,\ve}\rp \in C \lp \R, \R \rp $
\item $\dep \lp \pnm_{n,C}^{q,\ve} \rp \les \begin{cases} \item $\dep \lp \pnm_{n,C}^{q,\ve} \rp \les \begin{cases}
@ -1481,6 +1493,9 @@ Let $\mathfrak{p}_i$ for $i \in \{1,2,...\}$ be the set of functions defined for
\end{align} \end{align}
This completes the proof of the Lemma. This completes the proof of the Lemma.
\end{proof} \end{proof}
\begin{remark}
Note that we will implement this in R as the so-called \texttt{Tay} function. Our implementations of neural network exponentials, cosines, and sines will be instantiations of this \texttt{Tay} function with the appropriate coefficients and exponents being replaced to give the appropriate Taylor expansions.
\end{remark}
\subsection{$\xpn_n^{q,\ve}$, $\csn_n^{q,\ve}$, $\sne_n^{q,\ve}$, and ANN Approximations of $e^x$, $\cos(x)$, and $\sin(x)$.} \subsection{$\xpn_n^{q,\ve}$, $\csn_n^{q,\ve}$, $\sne_n^{q,\ve}$, and ANN Approximations of $e^x$, $\cos(x)$, and $\sin(x)$.}
Once we have neural network polynomials, we may take the next leap to transcendental functions. For approximating them we will use Taylor expansions which will swiftly give us our approximations for our desired functions. Here, we will explore neural network approximations for three common transcendental functions: $e^x$, $\cos(x)$, and $\sin(x)$. Once we have neural network polynomials, we may take the next leap to transcendental functions. For approximating them we will use Taylor expansions which will swiftly give us our approximations for our desired functions. Here, we will explore neural network approximations for three common transcendental functions: $e^x$, $\cos(x)$, and $\sin(x)$.

View File

@ -16,12 +16,12 @@ We will now take the modified and simplified version of Multi-level Picard intro
\item for all $\theta \in \Theta$, $t \in [0,T]$, that $\dep \lp \mathsf{U}^\theta_t \rp \les \dep (\mathsf{G}_d)$ \item for all $\theta \in \Theta$, $t \in [0,T]$, that $\dep \lp \mathsf{U}^\theta_t \rp \les \dep (\mathsf{G}_d)$
\item for all $\theta \in \Theta$, $t \in [0,T]$ that: \item for all $\theta \in \Theta$, $t \in [0,T]$ that:
\begin{align} \begin{align}
\left\| \lay\lp \mathsf{U}^\theta_t \rp \right\|_{\max} \les \|\lay \lp \mathsf{G}_d \rp \|_{\max} \lp 1+ \sqrt{2} \rp M \left\| \lay\lp \mathsf{U}^\theta_t \rp \right\|_{\infty} \les \|\lay \lp \mathsf{G}_d \rp \|_{\infty} \lp 1+ \sqrt{2} \rp M
\end{align} \end{align}
\item for all $\theta \in \Theta$, $t \in [0,T]$, $x \in \R^d$ that $U^\theta (t,x) = \lp \real_{\act} \lp \mathbf{U}^\theta_t \rp \rp \lp x \rp $ and \item for all $\theta \in \Theta$, $t \in [0,T]$, $x \in \R^d$ that $U^\theta (t,x) = \lp \real_{\act} \lp \mathbf{U}^\theta_t \rp \rp \lp x \rp $ and
\item for all $\theta \in \Theta$, $t \in [0,T]$ that: \item for all $\theta \in \Theta$, $t \in [0,T]$ that:
\begin{align} \begin{align}
\param \lp \mathsf{U}^\theta_t \rp \les 2 \dep \lp \mathsf{G}_d \rp \lb \lp 1+\sqrt{2} \rp M \left\| \lay \lp \mathsf{G}_d \rp \right\|_{\max}\rb^2 \param \lp \mathsf{U}^\theta_t \rp \les 2 \dep \lp \mathsf{G}_d \rp \lb \lp 1+\sqrt{2} \rp M \left\| \lay \lp \mathsf{G}_d \rp \right\|_{\infty}\rb^2
\end{align} \end{align}
\end{enumerate} \end{enumerate}
\end{lemma} \end{lemma}
@ -29,7 +29,7 @@ We will now take the modified and simplified version of Multi-level Picard intro
\begin{proof} \begin{proof}
Throughout the proof let $\mathsf{P}^\theta_t \in \neu$, $\theta \in \Theta$, $t \in [0,T]$ satisfy for all $\theta \in \Theta$, $t \in [0,T]$ that: Throughout the proof let $\mathsf{P}^\theta_t \in \neu$, $\theta \in \Theta$, $t \in [0,T]$ satisfy for all $\theta \in \Theta$, $t \in [0,T]$ that:
\begin{align} \begin{align}
\mathsf{P}^\theta_t = \bigoplus^M_{k=1} \lb \frac{1}{M} \triangleright \lp \mathsf{G}_d \bullet \aff_{\mathbb{I}_d, \mathcal{W}^{\theta,0,-k}_{T-t}} \rp \rb \mathsf{P}^\theta_t = \bigoplus^M_{k=1} \lb \frac{1}{M} \triangleright \lp \mathsf{G}_d \bullet \aff_{\mathbb{I}_d, \mathcal{W}^{\theta}_{T-t}} \rp \rb
\end{align} \end{align}
Note the hypothesis that for all $\theta \in \Theta$, $t \in [0,T]$ it holds that $\mathcal{W}^\theta_t \in \R^d$ and Lemma \ref{5.6.5} applied for every $\theta \in \Theta$ $t \in [0,T]$ with $v \curvearrowleft M$, $ c_{i \in \{u,u+1,...,v\}} \curvearrowleft \lp \frac{1}{M} \rp_{i \in \{u,u+1,...,v\}}$, $\lp B_i \rp _{i \in \{u,u+1,...,v\}} \curvearrowleft \lp \mathcal{W}^{\lp \theta, 0 , -k \rp }_{T-t} \rp_{k \in \{1,2,...,M\}}$, $\lp \nu_i \rp_{i \in \{u,u+1,...,v\}} \curvearrowleft \lp \mathsf{G}_d \rp _{i \in \{u,u+1,...,v\}}$, $\mu \curvearrowleft \Phi^\theta_t$ and with the notation of Lemma \ref{5.6.5} tells us that for all $\theta \in \Theta$, $t \in [0,T]$, and $x \in \R^d$ it holds that: Note the hypothesis that for all $\theta \in \Theta$, $t \in [0,T]$ it holds that $\mathcal{W}^\theta_t \in \R^d$ and Lemma \ref{5.6.5} applied for every $\theta \in \Theta$ $t \in [0,T]$ with $v \curvearrowleft M$, $ c_{i \in \{u,u+1,...,v\}} \curvearrowleft \lp \frac{1}{M} \rp_{i \in \{u,u+1,...,v\}}$, $\lp B_i \rp _{i \in \{u,u+1,...,v\}} \curvearrowleft \lp \mathcal{W}^{\lp \theta, 0 , -k \rp }_{T-t} \rp_{k \in \{1,2,...,M\}}$, $\lp \nu_i \rp_{i \in \{u,u+1,...,v\}} \curvearrowleft \lp \mathsf{G}_d \rp _{i \in \{u,u+1,...,v\}}$, $\mu \curvearrowleft \Phi^\theta_t$ and with the notation of Lemma \ref{5.6.5} tells us that for all $\theta \in \Theta$, $t \in [0,T]$, and $x \in \R^d$ it holds that:
\begin{align}\label{8.0.6} \begin{align}\label{8.0.6}
@ -100,9 +100,10 @@ While we realize that the modified Multi-Level {Picard may approximate solutions
\item for all $x = \{x_0,x_1,\hdots, x_N \}\in \R^{N+1}$, where $a=x_0 \les x_1\les \cdots \les x_{N-1} \les x_N=b$ we have that: \item for all $x = \{x_0,x_1,\hdots, x_N \}\in \R^{N+1}$, where $a=x_0 \les x_1\les \cdots \les x_{N-1} \les x_N=b$ we have that:
\begin{align} \begin{align}
&\left| \exp \lb \int^b_afdx\rb - \real_{\rect} \lp \mathsf{E}^{N,h,q,\ve}_{n}\rp\lp f \lp \lb x \rb _{*,*}\rp\rp\right| \nonumber\\ &\left| \exp \lb \int^b_afdx\rb - \real_{\rect} \lp \mathsf{E}^{N,h,q,\ve}_{n}\rp\lp f \lp \lb x \rb _{*,*}\rp\rp\right| \nonumber\\
&\les \frac{\lp b-a\rp^3}{12N^2}f''\lp \xi \rp \cdot n^2 \cdot \lb \Xi + \frac{\lp b-a\rp^3}{12N^2} f''\lp \xi\rp\rb^{n-1} + \nonumber \\ &\les \frac{\lp b-a\rp^3}{12N^2}f''\lp \xi \rp \cdot n^2 \cdot \lb \Xi + \frac{\lp b-a\rp^3}{12N^2} f''\lp \xi\rp\rb^{n-1} \nonumber \\
&\sum^n_{i=1} \frac{1}{i!}\lp \left| \Xi \lp \Xi^{i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{i-1}\rp\lp \Xi\rp\rp\right| + \ve + |\Xi|^q + \mathfrak{p}_{i-1}^q \rp &+\sum^n_{i=1} \frac{1}{i!}\lp \left| \Xi \lp \Xi^{i-1} - \real_{\rect}\lp \pwr^{q,\ve}_{i-1}\rp\lp \Xi\rp\rp\right| + \ve + |\Xi|^q + \mathfrak{p}_{i-1}^q \rp + \frac{e^{\xi}\cdot \left| \int^b_a f dx\right|^{n+1}}{(n+1)!}
\end{align} \end{align}
Where $\Xi = \real_{\rect} \lp \etr^{N,h}\rp \lp f\lp \lb x\rb_{*,*}\rp\rp$.
\item it is the case that $\wid_{\hid \lp \mathsf{E}^{N,h,q,\ve}_{n}\rp} \lp \mathsf{E}^{N,h,q,\ve}_{n}\rp \les 24+2n $ \item it is the case that $\wid_{\hid \lp \mathsf{E}^{N,h,q,\ve}_{n}\rp} \lp \mathsf{E}^{N,h,q,\ve}_{n}\rp \les 24+2n $
\end{enumerate} \end{enumerate}
\end{lemma} \end{lemma}
@ -344,7 +345,7 @@ While we realize that the modified Multi-Level {Picard may approximate solutions
\section{The $\mathsf{UE}^{N,h,q,\ve}_{n,\mathsf{G}_d}$ Neural Networks} \section{The $\mathsf{UE}^{N,h,q,\ve}_{n,\mathsf{G}_d}$ Neural Networks}
\begin{lemma}[R\textemdash,2023]\label{UE-prop} \begin{lemma}[R\textemdash,2023]\label{UE-prop}
Let $n, N,h\in \N$. Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$, satisfy that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. Let $a\in \lp -\infty,\infty \rp$, $b \in \lb a, \infty \rp$. Let $f:[a,b] \rightarrow \R$ be continuous and have second derivatives almost everywhere in $\lb a,b \rb$. Let $a=x_0 \les x_1\les \cdots \les x_{N-1} \les x_N=b$ such that for all $i \in \{0,1,...,N\}$ it is the case that $h = \frac{b-a}{N}$, and $x_i = x_0+i\cdot h$ . Let $x = \lb x_0 \: x_1\: \cdots x_N \rb$ and as such let $f\lp\lb x \rb_{*,*} \rp = \lb f(x_0) \: f(x_1)\: \cdots \: f(x_N) \rb$. Let $\mathsf{E}^{\exp}_{n,h,q,\ve} \in \neu$ be the neural network given by: Let $n, N\in \N$, $h \in \lp 0,\infty\rp$. Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$, satisfy that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. Let $a\in \lp -\infty,\infty \rp$, $b \in \lb a, \infty \rp$. Let $f:[a,b] \rightarrow \R$ be continuous and have second derivatives almost everywhere in $\lb a,b \rb$. Let $a=x_0 \les x_1\les \cdots \les x_{N-1} \les x_N=b$ such that for all $i \in \{0,1,...,N\}$ it is the case that $h = \frac{b-a}{N}$, and $x_i = x_0+i\cdot h$ . Let $x = \lb x_0 \: x_1\: \cdots x_N \rb$ and as such let $f\lp\lb x \rb_{*,*} \rp = \lb f(x_0) \: f(x_1)\: \cdots \: f(x_N) \rb$. Let $\mathsf{E}^{\exp}_{n,h,q,\ve} \in \neu$ be the neural network given by:
\begin{align} \begin{align}
\mathsf{E}^{N,h,q,\ve}_n = \xpn_n^{q,\ve} \bullet \etr^{N,h} \mathsf{E}^{N,h,q,\ve}_n = \xpn_n^{q,\ve} \bullet \etr^{N,h}
\end{align} \end{align}
@ -382,7 +383,7 @@ Let $n, N,h\in \N$. Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \
\begin{center} \begin{center}
\begin{figure}
\tikzset{every picture/.style={line width=0.75pt}} %set default line width to 0.75pt \tikzset{every picture/.style={line width=0.75pt}} %set default line width to 0.75pt
\begin{tikzpicture}[x=0.75pt,y=0.75pt,yscale=-1,xscale=1] \begin{tikzpicture}[x=0.75pt,y=0.75pt,yscale=-1,xscale=1]
@ -437,6 +438,8 @@ Let $n, N,h\in \N$. Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \
\end{tikzpicture} \end{tikzpicture}
\caption{Neural network diagram for $\mathsf{UE}^{N,h,q,\ve}_{n,\mathsf{G}_d}$}
\end{figure}
\end{center} \end{center}
\begin{proof} \begin{proof}
@ -507,7 +510,7 @@ Let $n, N,h\in \N$. Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \
\section{The $\mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}$ Neural Networks} \section{The $\mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}$ Neural Networks}
\begin{lemma}[R\textemdash,2023]\label{UEX} \begin{lemma}[R\textemdash,2023]\label{UEX}
Let $n, N,h\in \N$. Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$, satisfy that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. Let $a\in \lp -\infty,\infty \rp$, $b \in \lb a, \infty \rp$. Let $f:[a,b] \rightarrow \R$ be continuous and have second derivatives almost everywhere in $\lb a,b \rb$. Let $a=x_0 \les x_1\les \cdots \les x_{N-1} \les x_N=b$ such that for all $i \in \{0,1,...,N\}$ it is the case that $h = \frac{b-a}{N}$, and $x_i = x_0+i\cdot h$ . Let $x = \lb x_0 \: x_1\: \cdots \: x_N \rb$ and as such let $f\lp\lb x \rb_{*,*} \rp = \lb f(x_0) \: f(x_1)\: \cdots \: f(x_N) \rb$. Let $\mathsf{E}^{\exp}_{n,h,q,\ve} \in \neu$ be the neural network given by: Let $n, N\in \N$, $h \in \lp 0,\infty\rp$. Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$, satisfy that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. Let $a\in \lp -\infty,\infty \rp$, $b \in \lb a, \infty \rp$. Let $f:[a,b] \rightarrow \R$ be continuous and have second derivatives almost everywhere in $\lb a,b \rb$. Let $a=x_0 \les x_1\les \cdots \les x_{N-1} \les x_N=b$ such that for all $i \in \{0,1,...,N\}$ it is the case that $h = \frac{b-a}{N}$, and $x_i = x_0+i\cdot h$ . Let $x = \lb x_0 \: x_1\: \cdots \: x_N \rb$ and as such let $f\lp\lb x \rb_{*,*} \rp = \lb f(x_0) \: f(x_1)\: \cdots \: f(x_N) \rb$. Let $\mathsf{E}^{\exp}_{n,h,q,\ve} \in \neu$ be the neural network given by:
\begin{align} \begin{align}
\mathsf{E}^{N,h,q,\ve}_n = \xpn_n^{q,\ve} \bullet \etr^{N,h} \mathsf{E}^{N,h,q,\ve}_n = \xpn_n^{q,\ve} \bullet \etr^{N,h}
\end{align} \end{align}
@ -615,7 +618,7 @@ Let $t \in \lp 0,\infty\rp$ and $T \in \lp t,\infty\rp$. Let $\lp \Omega, \mathc
\begin{proof} \begin{proof}
Note that for a fixed $T \in \lp 0,\infty \rp$ it is the case that $u_d\lp t,x \rp \in C^{1,2}\lp \lb 0,T\rb \times \R^d, \R \rp$ projects down to a function $\mathfrak{u}_d^T\lp x\rp \in C^2\lp \R^d, \R\rp$. Furthermore given a probability space $\lp \Omega, \mathcal{F}, \mathbb{P}\rp$ and a stochastic process $\mathcal{X}^{d,t,x}: \lb t,T\rb \times \Omega \rightarrow \R^d$, for a fixed outcome space $\omega_i \in \Omega$ it is the case that $\mathcal{X}^{d,t,x}$ projects down to $\mathcal{X}^{d,t,x}_{\omega_i}: \lb t,T\rb \rightarrow \R^d$. Thus given $\alpha_d: \R^d \rightarrow \R$ that is infinitely often differentiable, we get that $\alpha_d\circ \mathcal{X}_{\omega_i}^{d,t,x}: \lb t,T\rb \rightarrow\R$. Note that for a fixed $T \in \lp 0,\infty \rp$ it is the case that $u_d\lp t,x \rp \in C^{1,2}\lp \lb 0,T\rb \times \R^d, \R \rp$ projects down to a function $\mathfrak{u}_d^T\lp x\rp \in C^2\lp \R^d, \R\rp$. Furthermore given a probability space $\lp \Omega, \mathcal{F}, \mathbb{P}\rp$ and a stochastic process $\mathcal{X}^{d,t,x}: \lb t,T\rb \times \Omega \rightarrow \R^d$, for a fixed outcome space $\omega_i \in \Omega$ it is the case that $\mathcal{X}^{d,t,x}$ projects down to $\mathcal{X}^{d,t,x}_{\omega_i}: \lb t,T\rb \rightarrow \R^d$. Thus given $\alpha_d: \R^d \rightarrow \R$ that is infinitely often differentiable, we get that $\alpha_d\circ \mathcal{X}_{\omega_i}^{d,t,x}: \lb t,T\rb \rightarrow\R$.
Taken together with Lemma \ref{UE-prop} with $x \curvearrowleft \mathcal{X}^{d,t,x}_{r,\omega}, f \curvearrowleft \alpha_d\circ \mathcal{X}_{\omega_i}^{d,t,x}$, $b \curvearrowleft T$, $a \curvearrowleft t$, and $\mathfrak{u}_d^T\lp x\rp \curvearrowleft u_d \lp T,\mathcal{X}^{d,t,x}_{\omega_i}\rp$, our error term is rendered as is rendered as: Taken together with Lemma \ref{UE-prop} with $x \curvearrowleft \mathcal{X}^{d,t,x}_{r,\omega}, f \curvearrowleft \alpha_d\circ \mathcal{X}_{\omega_i}^{d,t,x}$, $b \curvearrowleft T$, $a \curvearrowleft t$, and $\mathfrak{u}_d^T\lp x\rp \curvearrowleft u_d \lp T,\mathcal{X}^{d,t,x}_{\omega_i}\rp$, our error term is rendered as:
\begin{align} \begin{align}
&\left| \exp \lp \int^T_t \lp \alpha_d \circ \mathcal{X}^{d,t,x}_{r,\omega_i} ds\rp \rp u_d\lp T,\mathcal{X}^{d,t,x}_{\omega_i}\rp - \real_{\rect}\lp \mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}\rp \right|\nonumber\\ &\left| \exp \lp \int^T_t \lp \alpha_d \circ \mathcal{X}^{d,t,x}_{r,\omega_i} ds\rp \rp u_d\lp T,\mathcal{X}^{d,t,x}_{\omega_i}\rp - \real_{\rect}\lp \mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}\rp \right|\nonumber\\
&\les 3\ve +2\ve \left| u_d\lp T,\mathcal{X}_{r,\omega_i}^{d,t,x}\rp\right|^q+2\ve \left| \exp \lp \int^b_afdx\rp\right|^q + \ve \left| \exp \lp \int^b_afdx\rp - \mathfrak{e}\right|^q -\mathfrak{e}u_d\lp T,\mathcal{X}^{d,t,x}_{r,\omega_i} \rp\nonumber &\les 3\ve +2\ve \left| u_d\lp T,\mathcal{X}_{r,\omega_i}^{d,t,x}\rp\right|^q+2\ve \left| \exp \lp \int^b_afdx\rp\right|^q + \ve \left| \exp \lp \int^b_afdx\rp - \mathfrak{e}\right|^q -\mathfrak{e}u_d\lp T,\mathcal{X}^{d,t,x}_{r,\omega_i} \rp\nonumber
@ -626,6 +629,7 @@ Note that for a fixed $T \in \lp 0,\infty \rp$ it is the case that $u_d\lp t,x \
Diagrammatically, this can be represented as: Diagrammatically, this can be represented as:
\begin{center} \begin{center}
\begin{figure}[h]
\tikzset{every picture/.style={line width=0.75pt}} %set default line width to 0.75pt \tikzset{every picture/.style={line width=0.75pt}} %set default line width to 0.75pt
\begin{tikzpicture}[x=0.75pt,y=0.75pt,yscale=-1,xscale=0.9] \begin{tikzpicture}[x=0.75pt,y=0.75pt,yscale=-1,xscale=0.9]
@ -692,10 +696,12 @@ Note that for a fixed $T \in \lp 0,\infty \rp$ it is the case that $u_d\lp t,x \
% Text Node % Text Node
\draw (490,225.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Aff}_{\mymathbb{0}_{d,d} ,\mathcal{X}^{d,t,x}_{r,\omega_i}}$}; \draw (490,225.4) node [anchor=north west][inner sep=0.75pt] {$\mathsf{Aff}_{\mymathbb{0}_{d,d} ,\mathcal{X}^{d,t,x}_{r,\omega_i}}$};
\end{tikzpicture} \end{tikzpicture}
\caption{Neural network diagram for $\mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d, \omega_i}$}
\end{figure}
\end{center} \end{center}
\end{remark} \end{remark}
\section{The $\mathsf{UES}^{N,h,q,\ve}_{n,\mathsf{G}_d,\Omega,\fn}$ Neural Networks} \section{The $\mathsf{UES}^{N,h,q,\ve}_{n,\mathsf{G}_d,\Omega,\fn}$ Neural Networks}
\begin{lemma}\label{lem:sm_sum} \begin{lemma}\label{lem:sm_sum}
@ -757,7 +763,7 @@ Note that for a fixed $T \in \lp 0,\infty \rp$ it is the case that $u_d\lp t,x \
\end{proof} \end{proof}
\begin{lemma}[R\textemdash, 2024, Approximants for Brownian Motion]\label{ues} \begin{lemma}[R\textemdash, 2024, Approximants for Brownian Motion]\label{ues}
Let $t \in \lp 0,\infty\rp$ and $T \in \lp t,\infty\rp$. Let $\lp \Omega, \mathcal{F}, \mathbb{P}\rp$ be a probability space. Let $n,N\in \N$, and $h \in \lp 0, \infty \rp$. Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$, satisfy that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. Let $f:[t, T] \rightarrow \R$ be continuous almost everywhere in $\lb t, T \rb$. Let it also be the case that $f = g \circ \fh$, where $\fh: \lb t,T\rb \rightarrow \R^d$, and $g: \R^d \rightarrow \R$. Let $t=t_0 \les t_1\les \cdots \les t_{N-1} \les t_N=T$ such that for all $i \in \{0,1,...,N\}$ it is the case that $h = \frac{T-t}{N}$, and $t_i = t_0+i\cdot h$ . Let $\mathbf{t} = \lb t_0 \: t_1\: \cdots t_N \rb$ and as such let $f\lp\lb \mathbf{t} \rb_{*,*} \rp = \lb f(t_0) \: f(t_1)\: \cdots \: f(t_N) \rb$. Let $u_d \in C \lp \R^d,\R\rp$ satisfy for all $d \in \N$, $t \in \lb 0,T\rb$, $x \in \R^d$ that: Let $t \in \lp 0,\infty\rp$ and $T \in \lp t,\infty\rp$. Let $\lp \Omega, \mathcal{F}, \mathbb{P}\rp$ be a probability space. Let $n,N\in \N$, and $h \in \lp 0, \infty \rp$. Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$, satisfy that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. Let $f:[t, T] \rightarrow \R$ be continuous almost everywhere in $\lb t, T \rb$. Let it also be the case that $f = g \circ \fh$, where $\fh: \lb t,T\rb \rightarrow \R^d$, and $g: \R^d \rightarrow \R$. Let $t=t_0 \les t_1\les \cdots \les t_{N-1} \les t_N=T$ such that for all $i \in \{0,1,...,N\}$ it is the case that $h = \frac{T-t}{N}$, and $t_i = t_0+i\cdot h$ . Let $\mathbf{t} = \lb t_0 \: t_1\: \cdots t_N \rb$ and as such let $f\lp\lb \mathbf{t} \rb_{*,*} \rp = \lb f(t_0) \: f(t_1)\: \cdots \: f(t_N) \rb$. Let $u_d \in C \lp \R^d,\R\rp$, bounded by $\mathfrak{M}_{u,d}$ satisfy for all $d \in \N$, $t \in \lb 0,T\rb$, $x \in \R^d$ that:
\begin{align} \begin{align}
\lp \frac{\partial}{\partial t} u_d\rp \lp t,x\rp + \lp \nabla_x^2 u_d\rp \lp t,x \rp + \alpha_d\lp x\rp u_d \lp t,x\rp = 0 \lp \frac{\partial}{\partial t} u_d\rp \lp t,x\rp + \lp \nabla_x^2 u_d\rp \lp t,x \rp + \alpha_d\lp x\rp u_d \lp t,x\rp = 0
\end{align} \end{align}
@ -867,7 +873,7 @@ Let $t \in \lp 0,\infty\rp$ and $T \in \lp t,\infty\rp$. Let $\lp \Omega, \mathc
Let $N,n,\fn \in \N$, $h,\ve \in \lp 0,\infty\rp$, $q\in\lp 2,\infty\rp$, given $\mathsf{UES}^{N,h,q,\ve}_{n,\mathsf{G}_d, \Omega, \fn} \subsetneq \neu $, it is then the case that: Let $N,n,\fn \in \N$, $h,\ve \in \lp 0,\infty\rp$, $q\in\lp 2,\infty\rp$, given $\mathsf{UES}^{N,h,q,\ve}_{n,\mathsf{G}_d, \Omega, \fn} \subsetneq \neu $, it is then the case that:
\begin{align} \begin{align}
&\lp \E\lb \left| \E \lb \exp\lp \int^T_t \alpha_d \circ \cX^{d,t,x}_{r,\Omega } ds\rp \cdot \fu_d^T\lp \cX^{d,t,x}_{r,\Omega}\rp\rb \right.\right.\right.\nonumber\\ &\left. \left.\left.-\frac{1}{\mathfrak{n}}\lp \sum^{\mathfrak{n}}_{i=1}\lb \exp \lp \int_t^T \alpha_d \circ \mathcal{X}^{d,t,x}_{r,\omega_i} ds \rp \cdot \fu_d^T\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp\rb \rp \right|^2\rb\rp^{\frac{1}{2}} \nonumber \\ &\lp \E\lb \left| \E \lb \exp\lp \int^T_t \alpha_d \circ \cX^{d,t,x}_{r,\Omega } ds\rp \cdot \fu_d^T\lp \cX^{d,t,x}_{r,\Omega}\rp\rb \right.\right.\right.\nonumber\\ &\left. \left.\left.-\frac{1}{\mathfrak{n}}\lp \sum^{\mathfrak{n}}_{i=1}\lb \exp \lp \int_t^T \alpha_d \circ \mathcal{X}^{d,t,x}_{r,\omega_i} ds \rp \cdot \fu_d^T\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp\rb \rp \right|^2\rb\rp^{\frac{1}{2}} \nonumber \\
&\les \frac{\fk_2 }{\fn^{\frac{1}{2}}} \cdot \fL \lp T+1\rp \exp \lp LT\rp \lb \sup_{s\in \lb 0,T\rb} \lp \E \lb \lp 1+\left\| x + \cW_s\right\|^p\rp^2\rb\rp^{\frac{1}{2}}\rb &\les \frac{\fk_2}{\fn^{\frac{1}{2}}} \lp \E \lb \left| \exp\lp \int^T_t \alpha_d \circ \cX^{d,t,x}_{r,\Omega } ds\rp \cdot \fu_d^T\lp\cX^{d,t,x}_{r,\Omega}\rp \right|^2\rb \rp^{\frac{1}{2}}
\end{align} \end{align}
\end{corollary} \end{corollary}
@ -876,35 +882,48 @@ Let $t \in \lp 0,\infty\rp$ and $T \in \lp t,\infty\rp$. Let $\lp \Omega, \mathc
\begin{proof} \begin{proof}
Note that $\E \lb \cX^{d,tx}_{r,\Omega}\rb < \infty$, and $\fu^T$ being bounded yields that $\E \lb \fu^T\lp \cX^{d,t,x}_{r,\Omega}\rp\rb < \infty$, and also that $\E \lb \alpha_d \circ \cX^{d,t,x}_{r,\Omega}\rb < \infty$. Thus we also see that $\E \lb \int^T_t\alpha_d\circ \cX^{d,t,x}_{r,\Omega} ds\rb < \infty$, and thus $\E \lb \exp \lp \int^T_t \alpha_d \circ \cX^{d,t,x}_{r,\Omega}ds\rp\rb < \infty$. Thus together these two facts, along with the fact that the two factors are independent, then assert that $\E \lb \exp \lp \int^T_t \alpha_d\circ \cX^{d,t,x}_{r,\Omega}\rp \cdot \fu^T\lp \cX^{d,t,x}_{r,\Omega}\rp\rb < \infty$. Note that $\E \lb \cX^{d,tx}_{r,\Omega}\rb < \infty$, and $\fu^T$ being bounded yields that $\E \lb \fu^T\lp \cX^{d,t,x}_{r,\Omega}\rp\rb < \infty$, and also that $\E \lb \alpha_d \circ \cX^{d,t,x}_{r,\Omega}\rb < \infty$. Thus we also see that $\E \lb \int^T_t\alpha_d\circ \cX^{d,t,x}_{r,\Omega} ds\rb < \infty$, and thus $\E \lb \exp \lp \int^T_t \alpha_d \circ \cX^{d,t,x}_{r,\Omega}ds\rp\rb < \infty$. Thus together these two facts, along with the fact that the two factors are independent by \cite[Corollary~2.5]{hutzenthaler_overcoming_2020}, then assert that $\E \lb \exp \lp \int^T_t \alpha_d\circ \cX^{d,t,x}_{r,\Omega}\rp \cdot \fu^T\lp \cX^{d,t,x}_{r,\Omega}\rp\rb < \infty$.
Note that \cite[Corollary~3.8]{hutzenthaler_strong_2021} tells us that: Note that \cite[Corollary~3.8]{hutzenthaler_strong_2021} tells us that:
\begin{align}\label{kk_application} \begin{align}\label{kk_application}
&\lp \E\lb \left| \E \lb \exp\lp \int^T_t \alpha_d \circ \cX^{d,t,x}_{r,\Omega } ds\rp \cdot \fu_d^T\lp \cX^{d,t,x}_{r,\Omega}\rp\rb \right.\right.\right.\nonumber\\ &\left. \left.\left.-\frac{1}{\mathfrak{n}}\lp \sum^{\mathfrak{n}}_{i=1}\lb \exp \lp \int_t^T \alpha_d \circ \mathcal{X}^{d,t,x}_{r,\omega_i} ds \rp \cdot \fu_d^T\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp\rb \rp \right|^2\rb\rp^{\frac{1}{2}} \nonumber \\ &\lp \E\lb \left| \E \lb \exp\lp \int^T_t \alpha_d \circ \cX^{d,t,x}_{r,\Omega } ds\rp \cdot \fu_d^T\lp \cX^{d,t,x}_{r,\Omega}\rp\rb \right.\right.\right.\nonumber\\ &\left. \left.\left.-\frac{1}{\mathfrak{n}}\lp \sum^{\mathfrak{n}}_{i=1}\lb \exp \lp \int_t^T \alpha_d \circ \mathcal{X}^{d,t,x}_{r,\omega_i} ds \rp \cdot \fu_d^T\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp\rb \rp \right|^2\rb\rp^{\frac{1}{2}} \nonumber \\
&\les \frac{\fk_2}{\fn^{\frac{1}{2}}} \lp \E \lb \left| \exp\lp \int^T_t \alpha_d \circ \cX^{d,t,x}_{r,\Omega } ds\rp \cdot \fu_d^T\lp\cX^{d,t,x}_{r,\Omega}\rp \right|^2\rb \rp^{\frac{1}{2}} &\les \frac{\fk_2}{\fn^{\frac{1}{2}}} \lp \E \lb \left| \exp\lp \int^T_t \alpha_d \circ \cX^{d,t,x}_{r,\Omega } ds\rp \cdot \fu_d^T\lp\cX^{d,t,x}_{r,\Omega}\rp \right|^2\rb \rp^{\frac{1}{2}} \nonumber \\
&\les \frac{\fk}{\fn^\frac{1}{2}}\lp \exp \lp \lp T-t\rp \mathfrak{M}_{\alpha,d}\rp \mathfrak{M}_{u,d}\rp
\end{align} \end{align}
For the purposes of this proof let it be the case that $\ff: [0,T] \rightarrow \R$ is the function represented for all $t \in \lb 0,T \rb$ as: This, combined with Lyapunov's Inequality for Expectation and the Triangle Inequality yields that:
\begin{align} \begin{align}
\ff\lp t\rp = \int^T_{T-t} \alpha_d\circ \cX^{d,t,x}_{r,\Omega} ds &\E\lb \left| \E \lb \exp\lp \int^T_t \alpha_d \circ \cX^{d,t,x}_{r,\Omega } ds\rp \cdot \fu_d^T\lp \cX^{d,t,x}_{r,\Omega}\rp\rb \right.\right.\nonumber\\ &\left.\left.-\frac{1}{\mathfrak{n}}\lp \sum^{\mathfrak{n}}_{i=1}\lb \exp \lp \int_t^T \alpha_d \circ \mathcal{X}^{d,t,x}_{r,\omega_i} ds \rp \cdot \fu_d^T\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp\rb \rp \right|\rb \nonumber \\
&\les \frac{\fk}{\fn^\frac{1}{2}}\lp \exp \lp \lp T-t\rp \mathfrak{M}_{\alpha,d}\rp \mathfrak{M}_{u,d}\rp
\end{align} \end{align}
In which case we haved that $\ff\lp 0\rp = 0$, and thus, stipulating $g\lp x\rp = \fu^T\lp \cX^{d,t,x}_{r,\Omega}\rp$ we may define $u\lp t,x\rp$ as the function given by: Finally, combined with, the linearity of expectation, and the fact that the expectation of a deterministic constant and a deterministic function is, respectively, the constant and function itself, and the triangle inequality then yields that:
\begin{align} \begin{align}
u\lp t,x\rp &= \exp \lp \ff\lp t\rp\rp \cdot g\lp x\rp \nonumber\\ &\E\lb \left| \E \lb \exp\lp \int^T_t \alpha_d \circ \cX^{d,t,x}_{r,\Omega } ds\rp \cdot \fu_d^T\lp \cX^{d,t,x}_{r,\Omega}\rp\rb -\real_{\rect}\lp \mathsf{UES}^{N,h,q,\ve}_{n,\mathsf{G}_d,\Omega ,\fn}\rp\right|\rb \nonumber \\
&= \lb \exp\lp \ff\lp 0\rp\rp + \int_0^s \ff'\lp s\rp\cdot \exp \lp \ff\lp s\rp\rp ds\rb \cdot g\lp x\rp\nonumber \\ &\les \frac{\fk}{\fn^\frac{1}{2}}\lp \exp \lp \lp T-t\rp \mathfrak{M}_{\alpha,d}\rp \mathfrak{M}_{u,d}\rp \nonumber \\
&= g\lp x\rp + \int_0^s \ff'\lp s\rp \cdot \exp\lp \ff\lp s\rp\rp \cdot g\lp x\rp ds \nonumber\\ &+ 3\ve +2\ve \left| \mathfrak{u}_d^T\lp t,x\rp\right|^q+2\ve \left| \exp \lp \int^b_afdx\rp\right|^q + \ve \left| \exp \lp \int^b_afdx\rp - \mathfrak{e}\right|^q -\mathfrak{e}\mathfrak{u}_d^T\lp x \rp \nonumber
&= g\lp x\rp + \int^s_0 \ff'\lp s\rp\cdot u\lp s,x \rp ds \nonumber \\
&= g\lp x\rp+ \int^s_0 \fF \lp s,x, u\lp s,x \rp\rp ds
\end{align}
Then \cite[Corollary~2.5]{hutzenthaler_strong_2021} with $f \curvearrowleft \fF$, $u \curvearrowleft u$, $x+ \cW_{s-t} \curvearrowleft \cX^{d,t,s}_{r,\Omega}$, and tells us that with $q \curvearrowleft 2$ that:
\begin{align}
& \lp \E \lb \left| \exp\lp \int^T_t \alpha_d \circ \cX^{d,t,x}_{r,\Omega } ds\rp \cdot \fu_d^T\lp\cX^{d,t,x}_{r,\Omega}\rp \right|^2\rb \rp^{\frac{1}{2}} \nonumber\\
&\les \fL \lp T+1\rp \exp \lp LT\rp \lb \sup_{s\in \lb 0,T\rb} \lp \E \lb \lp 1+\left\| x + \cW_s\right\|^p\rp^2\rb\rp^{\frac{1}{2}}\rb
\end{align}
Together with (\ref{kk_application}) we then get that:
\begin{align}
&\lp \E\lb \left| \E \lb \exp\lp \int^T_t \alpha_d \circ \cX^{d,t,x}_{r,\Omega } ds\rp \cdot \fu_d^T\lp \cX^{d,t,x}_{r,\Omega}\rp\rb \right.\right.\right.\nonumber\\ &\left. \left.\left.-\frac{1}{\mathfrak{n}}\lp \sum^{\mathfrak{n}}_{i=1}\lb \exp \lp \int_t^T \alpha_d \circ \mathcal{X}^{d,t,x}_{r,\omega_i} ds \rp \cdot \fu_d^T\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp\rb \rp \right|^2\rb\rp^{\frac{1}{2}} \nonumber \\
&\les \frac{\fk_p }{n^{\frac{1}{2}}} \cdot \fL \lp T+1\rp \exp \lp LT\rp \lb \sup_{s\in \lb 0,T\rb} \lp \E \lb \lp 1+\left\| x + \cW_s\right\|^p\rp^2\rb\rp^{\frac{1}{2}}\rb
\end{align} \end{align}
This completes the proof of the corollary.
% For the purposes of this proof let it be the case that $\ff: [0,T] \rightarrow \R$ is the function represented for all $t \in \lb 0,T \rb$ as:
% \begin{align}
% \ff\lp t\rp = \int^T_{T-t} \alpha_d\circ \cX^{d,t,x}_{r,\Omega} ds
% \end{align}
% In which case we haved that $\ff\lp 0\rp = 0$, and thus, stipulating $g\lp x\rp = \fu^T\lp \cX^{d,t,x}_{r,\Omega}\rp$ we may define $u\lp t,x\rp$ as the function given by:
% \begin{align}
% u\lp t,x\rp &= \exp \lp \ff\lp t\rp\rp \cdot g\lp x\rp \nonumber\\
% &= \lb \exp\lp \ff\lp 0\rp\rp + \int_0^s \ff'\lp s\rp\cdot \exp \lp \ff\lp s\rp\rp ds\rb \cdot g\lp x\rp\nonumber \\
% &= g\lp x\rp + \int_0^s \ff'\lp s\rp \cdot \exp\lp \ff\lp s\rp\rp \cdot g\lp x\rp ds \nonumber\\
% &= g\lp x\rp + \int^s_0 \ff'\lp s\rp\cdot u\lp s,x \rp ds \nonumber \\
% &= g\lp x\rp+ \int^s_0 \fF \lp s,x, u\lp s,x \rp\rp ds
% \end{align}
% Then \cite[Corollary~2.5]{hutzenthaler_strong_2021} with $f \curvearrowleft \fF$, $u \curvearrowleft u$, $x+ \cW_{s-t} \curvearrowleft \cX^{d,t,s}_{r,\Omega}$, and tells us that with $q \curvearrowleft 2$ that:
% \begin{align}
% & \lp \E \lb \left| \exp\lp \int^T_t \alpha_d \circ \cX^{d,t,x}_{r,\Omega } ds\rp \cdot \fu_d^T\lp\cX^{d,t,x}_{r,\Omega}\rp \right|^2\rb \rp^{\frac{1}{2}} \nonumber\\
% &\les \fL \lp T+1\rp \exp \lp LT\rp \lb \sup_{s\in \lb 0,T\rb} \lp \E \lb \lp 1+\left\| x + \cW_s\right\|^p\rp^2\rb\rp^{\frac{1}{2}}\rb
% \end{align}
% Together with (\ref{kk_application}) we then get that:
% \begin{align}
% &\lp \E\lb \left| \E \lb \exp\lp \int^T_t \alpha_d \circ \cX^{d,t,x}_{r,\Omega } ds\rp \cdot \fu_d^T\lp \cX^{d,t,x}_{r,\Omega}\rp\rb \right.\right.\right.\nonumber\\ &\left. \left.\left.-\frac{1}{\mathfrak{n}}\lp \sum^{\mathfrak{n}}_{i=1}\lb \exp \lp \int_t^T \alpha_d \circ \mathcal{X}^{d,t,x}_{r,\omega_i} ds \rp \cdot \fu_d^T\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp\rb \rp \right|^2\rb\rp^{\frac{1}{2}} \nonumber \\
% &\les \frac{\fk_p }{n^{\frac{1}{2}}} \cdot \fL \lp T+1\rp \exp \lp LT\rp \lb \sup_{s\in \lb 0,T\rb} \lp \E \lb \lp 1+\left\| x + \cW_s\right\|^p\rp^2\rb\rp^{\frac{1}{2}}\rb
% \end{align}
\end{proof} \end{proof}
% Note that Taylor's theorem states that: % Note that Taylor's theorem states that:
% \begin{align} % \begin{align}

View File

@ -41,9 +41,6 @@ Parts of this code have been released on \texttt{CRAN} under the package name \t
\lstinputlisting[language = R, style = rstyle, label = Phi, caption = {R code for $\Phi$}]{"/Users/shakilrafi/R-simulations/Phi.R"} \lstinputlisting[language = R, style = rstyle, label = Phi, caption = {R code for $\Phi$}]{"/Users/shakilrafi/R-simulations/Phi.R"}
\includegraphics{"/Users/shakilrafi/R-simulations/Phi_properties/Phi_diff_contour.png"}
\lstinputlisting[language = R, style = rstyle, label = Phi_properties, caption = {R code for simulations involving $\Phi$}]{"/Users/shakilrafi/R-simulations/Phi_properties.R"} \lstinputlisting[language = R, style = rstyle, label = Phi_properties, caption = {R code for simulations involving $\Phi$}]{"/Users/shakilrafi/R-simulations/Phi_properties.R"}
@ -51,18 +48,25 @@ Parts of this code have been released on \texttt{CRAN} under the package name \t
\lstinputlisting[language = R, style = rstyle, label = Sqr_properties, caption = {R code simulations involving $\sqr$}]{"/Users/shakilrafi/R-simulations/Sqr_properties.R"} \lstinputlisting[language = R, style = rstyle, label = Sqr_properties, caption = {R code simulations involving $\sqr$}]{"/Users/shakilrafi/R-simulations/Sqr_properties.R"}
\lstinputlisting[language = R, style = rstyle, label = Prd, caption = {R code simulations involving $\sqr$}]{"/Users/shakilrafi/R-simulations/Sqr_properties.R"}
\lstinputlisting[language = R, style = rstyle, label = Pwr, caption = {R code for $\pwr^{q,\ve}$ networks}]{"/Users/shakilrafi/R-simulations/Pwr.R"} \lstinputlisting[language = R, style = rstyle, label = Pwr, caption = {R code for $\pwr^{q,\ve}$ networks}]{"/Users/shakilrafi/R-simulations/Pwr.R"}
\lstinputlisting[language = R, style = rstyle, label = Pwr_3_properties, caption = {R code simulations involving $\pwr_3^{q,\ve}$}]{"/Users/shakilrafi/R-simulations/Pwr_3_properties.R"} \lstinputlisting[language = R, style = rstyle, label = Pwr_3_properties, caption = {R code simulations involving $\pwr_3^{q,\ve}$}]{"/Users/shakilrafi/R-simulations/Pwr_3_properties.R"}
\lstinputlisting[language = R, style = rstyle, label = Pwr_3_properties, caption = {R code simulations involving $\nrm^d_1$}]{"/Users/shakilrafi/R-simulations/Nrm.R"} \lstinputlisting[language = R, style = rstyle, label = Nrm, caption = {R code simulations involving $\nrm^d_1$}]{"/Users/shakilrafi/R-simulations/Nrm.R"}
\lstinputlisting[language = R, style = rstyle, label = Pwr_3_properties, caption = {R code simulations involving $\mxm_d$}]{"/Users/shakilrafi/R-simulations/Mxm.R"} \lstinputlisting[language = R, style = rstyle, label = Mxm, caption = {R code simulations involving $\mxm_d$}]{"/Users/shakilrafi/R-simulations/Mxm.R"}
\lstinputlisting[language = R, style = rstyle, label = Pwr_3_properties, caption = {R code simulations involving $\tay$}]{"/Users/shakilrafi/R-simulations/Tay.R"} \lstinputlisting[language = R, style = rstyle, label = Tay, caption = {R code simulations involving $\tay$, note that this implementation is different from how it is presented in the exposition. We chose to explicitly define the $\tay$ network, and let neural network exponentials, cosines, and sines be instantiations of this network with various different coefficients.}]{"/Users/shakilrafi/R-simulations/Tay.R"}
\lstinputlisting[language = R, style = rstyle, label = Pwr_3_properties, caption = {R code simulations involving $\etr$}]{"/Users/shakilrafi/R-simulations/Etr.R"} \lstinputlisting[language = R, style = rstyle, label = Mxm, caption = {R code simulations for $\csn_n^{q,\ve}$}]{"/Users/shakilrafi/R-simulations/Csn.R"}
\lstinputlisting[language = R, style = rstyle, label = Mxm, caption = {R code simulations for $\sne_n^{q,\ve}$}]{"/Users/shakilrafi/R-simulations/Sne.R"}
\lstinputlisting[language = R, style = rstyle, label = Etr, caption = {R code simulations involving $\etr$}]{"/Users/shakilrafi/R-simulations/Etr.R"}
\lstinputlisting[language = R, style = rstyle, label = MC, caption = {R code simulations involving $\etr$}]{"/Users/shakilrafi/R-simulations/MC.R"}
\newpage \newpage

View File

@ -17,7 +17,7 @@ Overfitting presents an important challenge for all machine learning models, inc
We will also define the dropout operator introduced in \cite{srivastava_dropout_2014}, and explained further in \cite{Goodfellow-et-al-2016}. We will also define the dropout operator introduced in \cite{srivastava_dropout_2014}, and explained further in \cite{Goodfellow-et-al-2016}.
\begin{definition}[Realization with dropout] \begin{definition}[Instantiation with dropout]
Let $\nu \in \neu$, $L,n \in \N$, $p \in \lp 0,1\rp$, $\lay \lp \nu\rp = \lp l_0,l_1,\hdots, \l_L\rp$, and that $\nu = \lp \lp W_1,b_1\rp, \lp W_2,b_2\rp, \hdots , \lp W_L,b_L\rp \rp$. Let it be the case that for each $n\in \N$, $\rho_n = \{ x_1,x_2,\hdots,x_n\} \in \R^n$ where for each $i \in \{1,2,\hdots,n\}$ it is the case that $x_i \sim \bern(p)$. We will then denote $\real_{\act}^{D,p} \lp \nu \rp \in C\lp \R^{\inn\lp \nu\rp},\R^{\out\lp \nu \rp}\rp$, the continuous function given by: Let $\nu \in \neu$, $L,n \in \N$, $p \in \lp 0,1\rp$, $\lay \lp \nu\rp = \lp l_0,l_1,\hdots, \l_L\rp$, and that $\nu = \lp \lp W_1,b_1\rp, \lp W_2,b_2\rp, \hdots , \lp W_L,b_L\rp \rp$. Let it be the case that for each $n\in \N$, $\rho_n = \{ x_1,x_2,\hdots,x_n\} \in \R^n$ where for each $i \in \{1,2,\hdots,n\}$ it is the case that $x_i \sim \bern(p)$. We will then denote $\real_{\act}^{D,p} \lp \nu \rp \in C\lp \R^{\inn\lp \nu\rp},\R^{\out\lp \nu \rp}\rp$, the continuous function given by:
\begin{align} \begin{align}
\real_{\act}^{D,p}\lp \nu \rp = \rho_{l_L}\odot \act \lp W_l\lp \rho_{l_{L-1}} \odot \act \lp W_{L-1}\lp \hdots\rp + b_{L-1}\rp\rp + b_L\rp \real_{\act}^{D,p}\lp \nu \rp = \rho_{l_L}\odot \act \lp W_l\lp \rho_{l_{L-1}} \odot \act \lp W_{L-1}\lp \hdots\rp + b_{L-1}\rp\rp + b_L\rp
@ -40,7 +40,7 @@ In such a case, note that the instantiation operation preserves the axiom of fun
Note for example that a neural network analog for derivatives, one that respects the chain rule under instantiation already exist in the literature, e.g. \cite{nn_diff}. Thus there is a growing and rather rich and growing set of algebraic operations that are and have been proposed for neural networks. Note for example that a neural network analog for derivatives, one that respects the chain rule under instantiation already exist in the literature, e.g. \cite{nn_diff}. Thus there is a growing and rather rich and growing set of algebraic operations that are and have been proposed for neural networks.
A further exploration of the algebraic properties of this artificial neural network framework could present a fruitful avenue of future study. Taken together, these facts seem to imply that a further exploration of the algebraic properties of this artificial neural network framework could present a fruitful avenue of future study. Much remains to be studied.
This completes this Dissertation. This completes this Dissertation.

View File

@ -1,6 +1,6 @@
\begin{singlespace} \begin{singlespace}
\begin{center} \begin{center}
Artificial Neural Networks Applied to Stochastic Monte Carlo as a Way to Approximate Modified Heat Equations, and Their Associated Parameters, Depths, and Accuracies. Analysis and Construction of Artificial Neural Networks for the Heat Equations, and Their Associated Parameters, Depths, and Accuracies.
\end{center} \end{center}
\vspace{0.5cm} \vspace{0.5cm}
\begin{center} \begin{center}
@ -62,6 +62,7 @@ Committee Member
\end{center} \end{center}
\vspace{1cm} \vspace{1cm}
\end{singlespace} \end{singlespace}
\newpage \newpage
\begin{center} \begin{center}
\textbf{Abstract} \textbf{Abstract}
@ -79,14 +80,16 @@ We will, in the last chapter, look at how the technology of neural networks deve
As an added bonus we will also look at the simplified MLP technque from the previous chapters of this dissertation and show that yes, they can indeed be approximated with artificial neural networks, and that yes, they can be done so with neural networks whose parameters and depth counts grow only polynomially on $\frac{1}{\ve}$. As an added bonus we will also look at the simplified MLP technque from the previous chapters of this dissertation and show that yes, they can indeed be approximated with artificial neural networks, and that yes, they can be done so with neural networks whose parameters and depth counts grow only polynomially on $\frac{1}{\ve}$.
\\~\\ \\~\\
Our appendix will contain code listings of these neural network operations, some of the architectures, and some small scale simulation results. Our appendix will contain code listings of these neural network operations, some of the architectures, and some small scale simulation results.
\newpage \newpage
\begin{center} \begin{center}
\vspace*{\fill} \vspace*{\fill}
\copyright 2024 by Shakil Ahmed Rafi \\ \copyright\: 2024 by Shakil Ahmed Rafi \\
All Rights Reserved. All Rights Reserved.
\vspace*{\fill} \vspace*{\fill}
\end{center} \end{center}
\newpage \newpage
\begin{center} \begin{center}
\textbf{Acknowledgements} \textbf{Acknowledgements}
@ -99,9 +102,10 @@ I would like to acknowledge Marufa Mumu for believing in me when I didn't. You r
\\~\\ \\~\\
I would like to acknowledge my cat, a beautiful Turkish Angora, Tommy. He was pretty useless, but stroking his fur made me stress a little less. I would like to acknowledge my cat, a beautiful Turkish Angora, Tommy. He was pretty useless, but stroking his fur made me stress a little less.
\\~\\ \\~\\
I would like to acknowledge my office-mate Eric Walker, without whom I would never haver realized that rage and spite are equally as valid motivators as encouragement and praise. I would like to acknowledge my office-mate Eric Walker, without whom I would never have realized that rage and spite are equally as valid motivators as encouragement and praise.
\\~\\ \\~\\
Finally, I would like to thank Valetta Ventures, Inc. and their product Texifier. It is marvel of software engineering and made the process of creating this dissertation much less painful than it already was. Finally, I would like to thank Valetta Ventures, Inc. and their product Texifier. It is marvel of software engineering and made the process of creating this dissertation much less painful than it already was.
\newpage \newpage
\begin{center} \begin{center}
@ -113,7 +117,7 @@ Finally, I would like to thank Valetta Ventures, Inc. and their product Texifier
Kamal Uddin Ahmed, M.A. \& Shahnaz Parveen, M.A.,\\ Kamal Uddin Ahmed, M.A. \& Shahnaz Parveen, M.A.,\\
who kept faith in me, always; \\ who kept faith in me, always; \\
and finally to my brothers, \\ and finally to my brothers, \\
Wakil Ahmed Shabi, BBA \& Nabeel Ahmed Sami, B.Eng., \\ Wakil Ahmed Shabi, BBA \& Nabbil Ahmed Sami, B.Eng., \\
for whom I have been somewhat imperfect a role model.\\ for whom I have been somewhat imperfect a role model.\\
@ -127,7 +131,7 @@ Finally, I would like to thank Valetta Ventures, Inc. and their product Texifier
\begin{center} \begin{center}
\textbf{Epigraph}\\~\\ \textbf{Epigraph}\\~\\
\textit{Read, in the name of your Lord}\\ \textit{Read, in the name of your Lord}\\
\textemdash Surah Al-Alaq:1\\~\\ \textemdash Surah Al-Alaq:\:1\\~\\
\textit{The conquest of nature must be achieved with number and measure.} \\ \textit{The conquest of nature must be achieved with number and measure.} \\
\textemdash Ren\'e Descartes \\ \textemdash Ren\'e Descartes \\
\vspace*{\fill} \vspace*{\fill}
@ -136,12 +140,16 @@ Finally, I would like to thank Valetta Ventures, Inc. and their product Texifier
\newpage \newpage
\tableofcontents \tableofcontents
\listoffigures \listoffigures
\listoftables
\newpage \newpage
\textbf{List of Published Papers} \\~\\ \textbf{List of Published Papers} \\~\\
Parts of Chapter \ref{chp:ann_prod} have been published as \textit{An Algebraic Framework for Understanding Fully Connected Feedforward Artificial Neural Networks, and Their Associated Parameter, Depth, and Accuracy Properties} by Rafi S., Padgett, J.L., and Nakarmi, U. and is currently undergoing review for publication for ICML 2024 at Vienna, Austria. Parts of Chapter \ref{chp:ann_prod} have been made into a paper as \textit{An Algebraic Framework for Understanding Fully Connected Feedforward Artificial Neural Networks, and Their Associated Parameter, Depth, and Accuracy Properties} by Rafi S., Padgett, J.L., and Nakarmi, U. and is currently undergoing review for publication for ICML 2024 at Vienna, Austria.
\\~\\ \\~\\
Parts of the simulation codebase have been submitted for review as \textit{nnR: Neural Networks Made Algebraic} at the Journal of Open Source Software. Parts of the simulation codebase have been submitted for review as \textit{nnR: Neural Networks Made Algebraic} at \textit{The R Journal}. They have further been published as a package \texttt{nnR} currently available on \texttt{CRAN}.

View File

@ -585,7 +585,7 @@ archivePrefix = {arXiv},
year={2019}, year={2019},
volume={}, volume={},
number={}, number={},
pages={1-5}, pages={1\textemdash5},
keywords={Neural networks;Standards;Approximation methods;Machine learning;Partial differential equations;Level set}, keywords={Neural networks;Standards;Approximation methods;Machine learning;Partial differential equations;Level set},
doi={10.1109/SampTA45681.2019.9031005}} doi={10.1109/SampTA45681.2019.9031005}}

Binary file not shown.

View File

@ -2,10 +2,10 @@
\include{commands} \include{commands}
\begin{document} \begin{document}
\pagenumbering{gobble}
\include{front_matter} \include{front_matter}
\pagenumbering{arabic}
\part{On Convergence of Brownian Motion Monte Carlo} \part{On Convergence of Brownian Motion Monte Carlo}
\include{Introduction} \include{Introduction}
@ -26,7 +26,7 @@
\include{ann_first_approximations} \include{ann_first_approximations}
\part{A deep-learning solution for $u$ and Brownian motions} \part{Artificial Neural Networks for $u$ and Brownian motions}
\include{ann_rep_brownian_motion_monte_carlo} \include{ann_rep_brownian_motion_monte_carlo}

View File

@ -1,5 +1,7 @@
\chapter{Introduction and Basic Notions About Neural Networks} \chapter{Introduction and Basic Notions About Neural Networks}
We seek here to introduce a unified framework for artificial neural networks. This framework borrows from the work presented in \cite{grohsetal} and work done by Joshua Padgett, Benno Kuckuk, and Arnulf Jentzen (unpublished). With this framework in place, we wish to study ANNs from the perspective of trying to see the number of parameters required to define a neural network to solve certain PDEs. The \textit{curse of dimensionality} here refers to the number of parameters necessary to model PDEs and their growth (exponential or otherwise) as dimensions $d$ increase. We seek here to introduce a unified framework for artificial neural networks. This framework borrows from the work presented in \cite{grohsetal}, which was in turn inspired by work done in \cite{petersen_optimal_2018}. The most recent exposition of this framework can be found in \cite{bigbook}, and it is this exposition that our work will be based on and extended upon.
With this framework in place, we wish to study ANNs from the perspective of trying to see the number of parameters required to define a neural network to solve certain PDEs. The \textit{curse of dimensionality} here refers to the number of parameters and depths of neural networks necessary to approximate functions to a certain accuracy. Specifically a scheme is said to have beat the curse of dimensionality if the number of parameters and depths necessary to approximate an underlying function to an accuracy (specifically the upper bound on the the 1-norm difference between the approximant and the function over the entire domain), only grows polynomially or at-least sub-exponentially on $\frac{1}{\ve}$.
\section{The Basic Definition of ANNs and instantiations of ANNs} \section{The Basic Definition of ANNs and instantiations of ANNs}
\begin{definition}[Rectifier Function] \begin{definition}[Rectifier Function]
@ -8,13 +10,19 @@ We seek here to introduce a unified framework for artificial neural networks. Th
\rect(x) = \max \left\{ 0,x\right\} \rect(x) = \max \left\{ 0,x\right\}
\end{align} \end{align}
\end{definition} \end{definition}
\begin{remark}
By analogy the multidimensional rectifier function, defined for $x = \lb x_1 \: x_2 \: \cdots \right.\\ \left. \: x_n\rb^\intercal \in \R^n$ is:
\begin{align}
\rect ([x]_*) = \left[ \max\{ 0,x_1\} \: \max \{ 0,x_2\}\: \cdots \max\{ 0,x_n\}\right]^\intercal
\end{align}
\end{remark}
\begin{definition}[Artificial Neural Networks]\label{5.1.2}\label{def:nn_def} \begin{definition}[Artificial Neural Networks]\label{5.1.2}\label{def:nn_def}
Denote by $\neu$ the set given by: Denote by $\neu$ the set given by:
\begin{align} \begin{align}
\neu = \bigcup_{L\in \N} \bigcup_{l_0,l_1,...,l_L \in \N} \lp \bigtimes^L_{k=1} \lb \R^{l_k \times l_{k-1}} \times \R^{l_k}\rb \rp \neu = \bigcup_{L\in \N} \bigcup_{l_0,l_1,...,l_L \in \N} \lp \bigtimes^L_{k=1} \lb \R^{l_k \times l_{k-1}} \times \R^{l_k}\rb \rp
\end{align} \end{align}
An artificial neural network is a tuple $\lp \nu, \param, \dep, \inn, \out, \hid, \lay, \wid \rp $ where $\nu \in \neu$ and is equipped with the following functions (referred to as auxiliary functions) satisfying for all \\$\nu \in \lp \bigtimes^L_{k=1} \lb \R^{l_k \times l_{k-1}} \times \R^{l_k}\rb \rp$: An artificial neural network is a tuple $\lp \nu, \param, \dep, \inn, \out, \hid, \lay, \wid \rp $ where $\nu \in \neu$ and is equipped with the following functions (referred to as auxiliary functions) satisfying for all \\$\nu \in \lp \bigtimes^L_{k=1} \lb \R^{l_k \times l_{k-1}} \times \R^{l_k}\rb \rp$ that:
\begin{enumerate}[label = (\roman*)] \begin{enumerate}[label = (\roman*)]
\item $\param: \neu \rightarrow \N$ denoting the number of parameters of $\nu$, given by: \item $\param: \neu \rightarrow \N$ denoting the number of parameters of $\nu$, given by:
\begin{align}\label{paramdef} \begin{align}\label{paramdef}
@ -70,18 +78,9 @@ We seek here to introduce a unified framework for artificial neural networks. Th
\end{definition} \end{definition}
\begin{figure} \begin{figure}
\begin{center} \begin{center}
\begin{neuralnetwork} \includegraphics[scale=0.5]{nn-example.png}
\newcommand{\x}[2]{$x_0$}
\newcommand{\y}[2]{$x_3$}
\newcommand{\hfirst}[2]{\small $x_1$}
\newcommand{\hsecond}[2]{\small $x_2$}
\inputlayer[count=3, title=, text=\x]
\hiddenlayer[count=4, title=, text=\hfirst] \linklayers
\hiddenlayer[count=3, title=, text=\hsecond] \linklayers
\outputlayer[count=2, title=, text=\y] \linklayers
\end{neuralnetwork}
\end{center} \end{center}
\caption{A neural network $\nu$ with $\lay \lp \nu \rp = \lp 4,4,3,2\rp$} \caption{A neural network $\nu$ with $\lay \lp \nu \rp = \lp 6,8,6,3\rp$}
\end{figure} \end{figure}
\begin{remark} \begin{remark}
For an R implementation see Listings \ref{nn_creator}, \ref{aux_fun}, \ref{activations}, and \ref{instantiation}. For an R implementation see Listings \ref{nn_creator}, \ref{aux_fun}, \ref{activations}, and \ref{instantiation}.
@ -101,7 +100,7 @@ We seek here to introduce a unified framework for artificial neural networks. Th
\end{align} \end{align}
This ensures that there exist $l_0,l_1,...,l_L,L \in \N$ such that: This ensures that there exist $l_0,l_1,...,l_L,L \in \N$ such that:
\begin{align} \begin{align}
\nu \in \lp \bigtimes^L_{j=1} \lb \R^{l_j \times l_{j-1}} \times \R^{\l_j} \rb \rp \nu \in \lp \bigtimes^L_{j=1} \lb \R^{l_j \times l_{j-1}} \times \R^{l_j} \rb \rp
\end{align} \end{align}
This also ensures that $\lay(\nu) = \lp l_0,l_1,...,l_L \rp \in \N^{L+1} = \N^{\dep(\nu)+1}$ and further that $\inn(\nu) = l_0$, $\out(\nu) = l_L$, and that $\dep(\nu) = L$. Together with ($\ref{5.1.11}$), this proves the lemma. This also ensures that $\lay(\nu) = \lp l_0,l_1,...,l_L \rp \in \N^{L+1} = \N^{\dep(\nu)+1}$ and further that $\inn(\nu) = l_0$, $\out(\nu) = l_L$, and that $\dep(\nu) = L$. Together with ($\ref{5.1.11}$), this proves the lemma.
\end{proof} \end{proof}
@ -142,7 +141,7 @@ The first operation we want to be able to do is to compose neural networks. Note
\end{enumerate} \end{enumerate}
\end{lemma} \end{lemma}
\begin{proof} \begin{proof}
This is a consequence of (\ref{5.2.1}), which implies both (i) and (ii). This is a consequence of (\ref{5.2.1}), which implies (i)\textemdash (ii).
\end{proof} \end{proof}
\begin{lemma} \label{5.2.3} \begin{lemma} \label{5.2.3}
Let $\nu_1,\nu_2,\nu_3 \in \neu$ satisfy that $\inn(\nu_1) = \out(\nu_2)$ and $\inn(\nu_2) = \out(\nu_3)$, it is then the case\\ that: Let $\nu_1,\nu_2,\nu_3 \in \neu$ satisfy that $\inn(\nu_1) = \out(\nu_2)$ and $\inn(\nu_2) = \out(\nu_3)$, it is then the case\\ that:
@ -168,7 +167,7 @@ The following Lemma will be important later on, referenced numerous times, and f
\end{enumerate} \end{enumerate}
\end{lemma} \end{lemma}
\begin{proof} \begin{proof}
Note that Items (i)---(iii) are a simple consequence of Definition \ref{5.2.1}. Specifically, given neural networks $\nu_1,\nu_2 \in \neu$, and $\dep\lp \nu_1\rp = n$ and $\dep \lp \nu_2\rp = m$, note that for all four cases, we have that the depth of the composed neural network $\nu_1 \bullet \nu_2$ is given by $n-1+m-1=n+m-1$ proving Item (i). Note that the outer neural network loses its last layer, yielding Item (ii) in all four cases. Finally since, for all $\nu \in \neu$ it is the case that $\hid \lp \nu\rp =\dep \lp \nu\rp-1$, Item (i) yields Item (iii). Note that Items (i)---(iii) are a simple consequence of Definition \ref{5.2.1}. Specifically, given neural networks $\nu_1,\nu_2 \in \neu$, and $\dep\lp \nu_1\rp = n$ and $\dep \lp \nu_2\rp = m$, note that for all four cases, we have that the depth of the composed neural network $\nu_1 \bullet \nu_2$ is given by $n-1+m-1+1=n+m-1$ proving Item (i). Note that the outer neural network loses its last layer, yielding Item (ii) in all four cases. Finally since, for all $\nu \in \neu$ it is the case that $\hid \lp \nu\rp =\dep \lp \nu\rp-1$, Item (i) yields Item (iii).
Now, suppose it is the case that $\nu_3 = \nu_1\bullet \nu_2$ and that: Now, suppose it is the case that $\nu_3 = \nu_1\bullet \nu_2$ and that:
\begin{align} \begin{align}
@ -254,7 +253,7 @@ The following Lemma will be important later on, referenced numerous times, and f
This and (\ref{comp_cont}) then prove Item (v), hence proving the lemma. This and (\ref{comp_cont}) then prove Item (v), hence proving the lemma.
\end{proof} \end{proof}
\section{Stacking of ANNs} \section{Stacking of ANNs}
We will introduce here the important concept of stacking of ANNs. Given an input vector $x\in \R^d$, it is sometimes very helpful to imagine two neural networks working on them simultaneously, whence we have stacking. Because vectors are ordered tuples, stacking $\nu_1$ and $\nu_2$ is not necessarily the same as stacking $\nu_2$ and $\nu_1$. We will introduce here the important concept of stacking of ANNs. Given an input vector $x\in \R^d$, it is sometimes very helpful to imagine two neural networks working on them simultaneously, whence we have stacking. Because vectors are ordered tuples, stacking $\nu_1$ and $\nu_2$ is not necessarily the same as stacking $\nu_2$ and $\nu_1$. We will thus forego the phrase "parallelization" used in e.g. \cite{grohs2019spacetime} and \cite{bigbook}, and opt to use the term "stacking". This because parallelization implies commutativity, but it is clearly not the case that $\nu_1 \boxminus \nu_2$ is the same as $\nu_2 \boxminus \nu_1$.
\subsection{Stacking of ANNs of Equal Depth} \subsection{Stacking of ANNs of Equal Depth}
\begin{definition}[Stacking of ANNs of same depth]\label{5.2.5}\label{def:stacking} \begin{definition}[Stacking of ANNs of same depth]\label{5.2.5}\label{def:stacking}
Let $L,n\in \N$, and let $\nu_1,\nu_2,\hdots, \nu_n \in \neu$, such that $\dep\lp \nu_1\rp= \dep \lp \nu_2\rp= \cdots = \dep\lp \nu_n\rp = L$. As such, for all $i \in \{1,\hdots,n\}$, let it also be the case that $\lay\lp \nu_i\rp = \lp \lp W_1^i,b^i_1\rp, \lp W^i_2,b^i_2\rp,\hdots, \lp W_L^i,b_L^i\rp \rp$. We then denote by $\boxminus^n_{i=1}\nu_i$, the neural network whose layer architecture is given by: Let $L,n\in \N$, and let $\nu_1,\nu_2,\hdots, \nu_n \in \neu$, such that $\dep\lp \nu_1\rp= \dep \lp \nu_2\rp= \cdots = \dep\lp \nu_n\rp = L$. As such, for all $i \in \{1,\hdots,n\}$, let it also be the case that $\lay\lp \nu_i\rp = \lp \lp W_1^i,b^i_1\rp, \lp W^i_2,b^i_2\rp,\hdots, \lp W_L^i,b_L^i\rp \rp$. We then denote by $\boxminus^n_{i=1}\nu_i$, the neural network whose layer architecture is given by:
@ -265,7 +264,7 @@ We will introduce here the important concept of stacking of ANNs. Given an input
\end{definition} \end{definition}
\begin{remark} \begin{remark}
For an \texttt{R} implementation see Listing \ref{par} For an \texttt{R} implementation see Listing \ref{stk}
\end{remark} \end{remark}
\begin{lemma}\label{inst_of_stk} \begin{lemma}\label{inst_of_stk}
Let $\nu_1,\nu_2\in \neu$, with $\dep\lp \nu_1\rp = \dep\lp \nu_2\rp$, $x_1 \in \R^{m_1}$, $x_2 \in \R^{m_2}$, and $\mathfrak{x} \in \R^{m_1+m_2}$. Let $\inst_{\rect}\lp \nu_1\rp: \R^{m_1} \rightarrow \R^{n_1}$, and $\inst_{\rect}:\R^{m_2} \rightarrow \R^{n_2}$. It is then the case that $\real_{\rect}\lp \nu_1\boxminus\nu_2\rp\lp \mathfrak{x}\rp = \inst_{\rect}\lp \nu_1\rp\lp x_1\rp \frown \inst_{\rect}\lp \nu_2\rp\lp x_2\rp$. Let $\nu_1,\nu_2\in \neu$, with $\dep\lp \nu_1\rp = \dep\lp \nu_2\rp$, $x_1 \in \R^{m_1}$, $x_2 \in \R^{m_2}$, and $\mathfrak{x} \in \R^{m_1+m_2}$. Let $\inst_{\rect}\lp \nu_1\rp: \R^{m_1} \rightarrow \R^{n_1}$, and $\inst_{\rect}:\R^{m_2} \rightarrow \R^{n_2}$. It is then the case that $\real_{\rect}\lp \nu_1\boxminus\nu_2\rp\lp \mathfrak{x}\rp = \inst_{\rect}\lp \nu_1\rp\lp x_1\rp \frown \inst_{\rect}\lp \nu_2\rp\lp x_2\rp$.
@ -353,6 +352,7 @@ We will introduce here the important concept of stacking of ANNs. Given an input
0 \les \param\lp \nu_3\rp - \param\lp \nu_2\rp &\les \param\lp \nu_1 \boxminus\nu_3\rp - \param\lp \nu_1\boxminus\nu_2\rp \nonumber\\ 0 \les \param\lp \nu_3\rp - \param\lp \nu_2\rp &\les \param\lp \nu_1 \boxminus\nu_3\rp - \param\lp \nu_1\boxminus\nu_2\rp \nonumber\\
\param\lp \nu_1 \boxminus\nu_2\rp &\les \param\lp \nu_1 \boxminus \nu_2\rp \nonumber \param\lp \nu_1 \boxminus\nu_2\rp &\les \param\lp \nu_1 \boxminus \nu_2\rp \nonumber
\end{align} \end{align}
This completes the proof of the Corollary.
\end{proof} \end{proof}
@ -410,7 +410,7 @@ We will often encounter neural networks that we want to stack but have unequal d
\item \begin{align}\label{7.2.2} \item \begin{align}\label{7.2.2}
\id_d = \boxminus^d_{i=1} \id_1 \id_d = \boxminus^d_{i=1} \id_1
\end{align} \end{align}
For $d>1$. For $d \in \N \cap \lb 2,\infty\rp$.
\end{enumerate} \end{enumerate}
\begin{remark} \begin{remark}
We will discuss some properties of $\id_d$ in Section \ref{sec_tun}. We will discuss some properties of $\id_d$ in Section \ref{sec_tun}.
@ -428,16 +428,16 @@ We will often encounter neural networks that we want to stack but have unequal d
We will drop the requirement for $d$ and $\tun_n$ by itself will be used to denote $\tun_n^1$. We will drop the requirement for $d$ and $\tun_n$ by itself will be used to denote $\tun_n^1$.
\end{definition} \end{definition}
\begin{remark} \begin{remark}
We will discuss some properties of the $\tun^d_n$ network in Section \ref{sec_tun}. We will discuss some properties of the $\tun^d_n$ network in Section \ref{sec_tun}. We will also discuss properties of wider tunneling neural network in Lemma \ref{tun_mult}.
\end{remark} \end{remark}
\begin{definition} \begin{definition}
Let $n \in \N$, and $\nu_1,\nu_2,...,\nu_n \in \neu$. We will define the stacking of unequal length neural networks, denoted $\DDiamond^n_{i=1}\nu_i$ as the neural network given by: Let $n \in \N$, and $\nu_1,\nu_2,...,\nu_n \in \neu$. We will define the stacking of unequal length neural networks, denoted $\DDiamond^n_{i=1}\nu_i$ as the neural network given by:
\begin{align} \begin{align}
\DDiamond^n_{i=1}\nu_i = \DDiamond^n_{i=1}\nu_i =
\boxminus^n_{i=1} \lb \tun_{\max_i \left\{\dep \lp \nu_i \rp\right\} +1 - \dep \lp \nu_i\rp} \bullet \nu_i \rb \boxminus^n_{i=1} \lb \tun_{\max_i \left\{\dep \lp \nu_i \rp\right\} +1 - \dep \lp \nu_i\rp}^{\out \lp \nu_i\rp} \bullet \nu_i \rb
\end{align} \end{align}
\end{definition} \end{definition}
Diagrammatically, this can be thought of as: Diagrammatically, this can be thought of as shown below.
\begin{figure} \begin{figure}
\begin{center} \begin{center}
@ -719,7 +719,7 @@ Affine neural networks present an important class of neural networks. By virtue
\begin{corollary}\label{corsum} \begin{corollary}\label{corsum}
Let $n\in \N$. Let $\nu_1,\nu_2,...,\nu_n \in \neu$ satisfy that $\lay \lp \nu_1\rp = \lay \lp \nu_2\rp= \cdots =\lay \lp \nu_n\rp$. It is then the case that: Let $n\in \N$. Let $\nu_1,\nu_2,...,\nu_n \in \neu$ satisfy that $\lay \lp \nu_1\rp = \lay \lp \nu_2\rp= \cdots =\lay \lp \nu_n\rp$. It is then the case that:
\begin{align} \begin{align}
\param \lp \bigoplus_{i=1}^n \nu_i\rp \les n^2\param \lp \nu_1\rp \param \lp \bigoplus_{i=1}^n \nu_i\rp \les n^2 \cdot \param \lp \nu_1\rp
\end{align} \end{align}
\end{corollary} \end{corollary}
\begin{proof} \begin{proof}

BIN
Dissertation/nn-example.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 141 KiB

View File

@ -10,14 +10,8 @@
\usepackage{mathtools} \usepackage{mathtools}
\numberwithin{equation}{section} \numberwithin{equation}{section}
\usepackage[]{amssymb} \usepackage[]{amssymb}
\usepackage{geometry} \usepackage[margin=1in]{geometry}
\geometry{ \usepackage{url}
left=1in,
right=1in,
top=1in,
bottom=1in
}
\usepackage[T1]{fontenc} \usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc} \usepackage[utf8]{inputenc}

View File

@ -1,6 +1,6 @@
\chapter{That $u$ is a Viscosity Solution} \chapter{That $u$ is a Viscosity Solution}
We can extend the work for the heat equation to generic parabolic partial differential equations. We do this by first introducing viscosity solutions to Kolmogorov PDEs as given in Crandall \& Lions \cite{crandall_lions} and further extended, esp. in \cite{Beck_2021}. Our goal this chapter is to use Feynman-Kac to see that the solutions to certain versions of the heat equations can be expressed as the expectation of a stocahstic integral. Parts of this work is heavily inspired from \cite{crandall_lions} and esp. \cite{Beck_2021}.
%\subsection{The case without $f$} %\subsection{The case without $f$}
%\subsection{Linear Algebra Preliminaries} %\subsection{Linear Algebra Preliminaries}
%\begin{lemma} %\begin{lemma}
@ -44,7 +44,7 @@ We can extend the work for the heat equation to generic parabolic partial differ
% Leading to a contradiction. Thus there are no generalized eigenvectors of order 2 or higher, and so $A$ must be diagonalizable. % Leading to a contradiction. Thus there are no generalized eigenvectors of order 2 or higher, and so $A$ must be diagonalizable.
%\end{proof} %\end{proof}
\section{Some Preliminaries} \section{Some Preliminaries}
We take work previously pioneered by \cite{Ito1942a} and \cite{Ito1946}, and then seek to re-apply concepts first applied in \cite{Beck_2021} and \cite{BHJ21}. We take work previously pioneered by \cite{Ito1942a} and \cite{Ito1946}, and then seek to re-apply concepts applied in \cite{Beck_2021} and \cite{BHJ21}.
\begin{lemma}\label{lemma:2.7} \begin{lemma}\label{lemma:2.7}
Let $d,m \in \N$, $T \in (0,\infty)$. Let $\mu \in C^{1,2}([0,T] \times \R^d, \R^d)$ and $\sigma \in C^{1,2}([0,T] \times \R^d, \R^{d\times m})$ satisfying that they have non-empty compact supports and let $\mathfrak{S}= \supp(\mu)\cup \supp(\sigma) \subseteq [0,T] \times \R^d$. Let $( \Omega, \mathcal{F}, \mathbb{P}, ( \mathbb{F}_t )_{t \in [0,T]})$ be a filtered probability space satisfying usual conditions. Let $W:[0,T ]\times \Omega \rightarrow \R^m$ be a standard $(\mathbb{F}_t)_{t\in [0,T]}$ -Brownian motion, and let $\mathcal{X}:[0,T] \times \Omega \rightarrow \R^d$ be an $(\mathbb{F}_t)_{t\in [0,T]}$-adapted stochastic process with continuous sample paths satisfying for all $t \in [0,T]$ with $\mathbb{P}$-a.s. that: Let $d,m \in \N$, $T \in (0,\infty)$. Let $\mu \in C^{1,2}([0,T] \times \R^d, \R^d)$ and $\sigma \in C^{1,2}([0,T] \times \R^d, \R^{d\times m})$ satisfying that they have non-empty compact supports and let $\mathfrak{S}= \supp(\mu)\cup \supp(\sigma) \subseteq [0,T] \times \R^d$. Let $( \Omega, \mathcal{F}, \mathbb{P}, ( \mathbb{F}_t )_{t \in [0,T]})$ be a filtered probability space satisfying usual conditions. Let $W:[0,T ]\times \Omega \rightarrow \R^m$ be a standard $(\mathbb{F}_t)_{t\in [0,T]}$ -Brownian motion, and let $\mathcal{X}:[0,T] \times \Omega \rightarrow \R^d$ be an $(\mathbb{F}_t)_{t\in [0,T]}$-adapted stochastic process with continuous sample paths satisfying for all $t \in [0,T]$ with $\mathbb{P}$-a.s. that:
\begin{align} \begin{align}
@ -891,7 +891,6 @@ Let $T \in (0,\infty)$. Let $\lp \Omega, \mathcal{F}, \mathbb{P} \rp$ be a proba
\begin{proof} \begin{proof}
This is a consequence of Lemma \ref{lem:3.4} and \ref{2.19}. This is a consequence of Lemma \ref{lem:3.4} and \ref{2.19}.
\end{proof} \end{proof}
\newpage
\begin{corollary}\label{lem:3.19} Let $T \in (0,\infty)$,\\ let $\left( \Omega, \mathcal{F}, \mathbb{P} \right)$ be a probability space, let $u_d \in C^{1,2} \left( \left[ 0,T \right] \times \R^d, \R \right)$, $d \in \N$ satisfy for all $d \in \N$, $t \in [0,T]$, $x \in \R^d$ that: \begin{corollary}\label{lem:3.19} Let $T \in (0,\infty)$,\\ let $\left( \Omega, \mathcal{F}, \mathbb{P} \right)$ be a probability space, let $u_d \in C^{1,2} \left( \left[ 0,T \right] \times \R^d, \R \right)$, $d \in \N$ satisfy for all $d \in \N$, $t \in [0,T]$, $x \in \R^d$ that:
\begin{align} \begin{align}
\left( \frac{\partial}{\partial t} u_d \right) \left(t,x\right) + \frac{1}{2}\left(\nabla^2_x u_d\right) \left(t,x\right) = 0 \left( \frac{\partial}{\partial t} u_d \right) \left(t,x\right) + \frac{1}{2}\left(\nabla^2_x u_d\right) \left(t,x\right) = 0

Binary file not shown.

Binary file not shown.