diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..46de23a Binary files /dev/null and b/.DS_Store differ diff --git a/Dissertation/Brownian_motion_monte_carlo.tex b/Dissertation/Brownian_motion_monte_carlo.tex index 9d2e79b..f535812 100644 --- a/Dissertation/Brownian_motion_monte_carlo.tex +++ b/Dissertation/Brownian_motion_monte_carlo.tex @@ -202,7 +202,7 @@ Furthermore (ii) is a restatement of (\ref{(1.14)}) with $\theta = 0$, $m=1$, an \section{Monte Carlo Approximations} -\begin{lemma}\label{lem:1.21}Let $p \in (2,\infty)$,$n\in \mathbb{N}$, let $(\Omega, \mathcal{F}, \mathbb{P})$, be a probability space and let $\mathcal{X}_i: \Omega \rightarrow \mathbb{R}$, $i \in \{1,2,...,n\}$ be i.i.d. random variables with $\mathbb{E}[|\mathcal{X}_1|]<\infty$. Then it holds that: +\begin{lemma}\label{lem:1.21}Let $p \in (2,\infty)$, $n\in \mathbb{N}$, let $(\Omega, \mathcal{F}, \mathbb{P})$, be a probability space and let $\mathcal{X}_i: \Omega \rightarrow \mathbb{R}$, $i \in \{1,2,...,n\}$ be i.i.d. random variables with $\mathbb{E}[|\mathcal{X}_1|]<\infty$. Then it holds that: \begin{align} \lp\E \lb \lv \E \lb \mathcal{X}_1 \rb-\frac{1}{n} \lp \sum^n_{i=1} \mathcal{X}_i \rp \rv^p \rb \rp^{\frac{1}{p}} \leqslant \lb \frac{p-1}{n}\rb ^{\frac{1}{2}}\left(\E\lb \lv \mathcal{X}_1-\E \lb \mathcal{X}_1 \rb \rv^p \rp \rb^{\frac{1}{p}} \end{align} @@ -237,7 +237,7 @@ This completes the proof of the lemma. \end{corollary} \begin{proof} - Observe that e.g. \cite[Lemma~2.3]{grohsetal} and Lemma \ref{lem:1.21} establish (\ref{(1.26)}). + Observe that e.g. \cite[Proposition~2.3]{grohsetal} and Lemma \ref{lem:1.21} establish (\ref{(1.26)}). \end{proof} \begin{corollary}\label{cor:1.22.2} diff --git a/Dissertation/ann_first_approximations.tex b/Dissertation/ann_first_approximations.tex index 52f76a1..2ce2a5b 100644 --- a/Dissertation/ann_first_approximations.tex +++ b/Dissertation/ann_first_approximations.tex @@ -1242,7 +1242,7 @@ We will call the approximant $\max_{i \in \{0,1,\hdots, N\}}\{ f_i\}$, the \text \subsection{Lipschitz Function Approximations}\label{(9.7.6)} \begin{lemma}%TODO: Should we stipulate compact sets? - Let $\lp E,d \rp$ be a metric space. Let $L \in \lb 0,\infty \rp$, $D \subseteq E$, $\emptyset \neq C \subseteq D$. Let $f:D \rightarrow \R$ satisfy for all $x\in D$, $y \in C$ that $\left| f(x) -f(y)\right| \les L d \lp x,y \rp$, and let $F:E \rightarrow \R \cup \{\infty\}$ satisfy for all $x\in E$ that: + Let $\lp E,d \rp$ be a metric space. Let $L \in \lb 0,\infty \rp$, $D \subseteq E$, $\varnothing \neq C \subseteq D$. Let $f:D \rightarrow \R$ satisfy for all $x\in D$, $y \in C$ that $\left| f(x) -f(y)\right| \les L d \lp x,y \rp$, and let $F:E \rightarrow \R \cup \{\infty\}$ satisfy for all $x\in E$ that: \begin{align}\label{9.7.30} F\lp x \rp = \sup_{y\in C} \lb f\lp y \rp - Ld\lp x,y \rp \rb \end{align} @@ -1297,7 +1297,7 @@ We will call the approximant $\max_{i \in \{0,1,\hdots, N\}}\{ f_i\}$, the \text This establishes Item (v) and hence establishes the Lemma. \end{proof} \begin{corollary}\label{9.7.6.1} - Let $\lp E,d \rp$ be a metric space, let $L \in \lb 0,\infty \rp$, $\emptyset \neq C \subseteq E$, let $f: E \rightarrow \R$ satisfy for all $x\in E$, $y \in C$ that $\left\| f(x) - f(y) \right| \les Ld \lp x,y \rp$, and let $F:E \rightarrow \R \cup \{\infty\}$ satisfy for all $x\in E$ that: + Let $\lp E,d \rp$ be a metric space, let $L \in \lb 0,\infty \rp$, $\varnothing \neq C \subseteq E$, let $f: E \rightarrow \R$ satisfy for all $x\in E$, $y \in C$ that $\left\| f(x) - f(y) \right| \les Ld \lp x,y \rp$, and let $F:E \rightarrow \R \cup \{\infty\}$ satisfy for all $x\in E$ that: \begin{align} F \lp x \rp = \sup_{y\in C} \lb f(y) - Ld \lp x,y \rp\rb \end{align} diff --git a/Dissertation/ann_product.tex b/Dissertation/ann_product.tex index 5271fe2..736bb06 100644 --- a/Dissertation/ann_product.tex +++ b/Dissertation/ann_product.tex @@ -1128,7 +1128,7 @@ Let $\mathfrak{p}_i$ for $i \in \{1,2,...\}$ be the set of functions defined for \end{align} - Next note that $\lp \real_{\rect} \lp \pwr_{0,1} \rp\rp \lp x \rp$ is exactly $1$, which implies that for all $x\in \R$ we have that $|x^0-\lp \real_{\rect} \lp \pwr_{0.1}\rp\lp x \rp\rp |=0$. Note also that the instantiations of $\tun_n$ and $\cpy_{2,1}$ are exact. Note next that since $\tun_n$ and $\cpy_{2,1}$ are exact, the only sources of error for $\pwr^{q,\ve}_n$ a are $n$ compounding applications of $\prd^{q,\ve}$. + Next note that $\lp \real_{\rect} \lp \pwr_{0,1} \rp\rp \lp x \rp$ is exactly $1$, which implies that for all $x\in \R$ we have that $|x^0-\lp \real_{\rect} \lp \pwr_{0.1}\rp\lp x \rp\rp |=0$. Note also that the instantiations of $\tun_n$ and $\cpy_{2,1}$ are exact. Note next that since $\tun_n$ and $\cpy_{2,1}$ are exact, the only sources of error for $\pwr^{q,\ve}_n$ are $n$ compounding applications of $\prd^{q,\ve}$. Note also that by definition, it is the case that: \begin{align} diff --git a/Dissertation/ann_rep_brownian_motion_monte_carlo.tex b/Dissertation/ann_rep_brownian_motion_monte_carlo.tex index a6abe2c..247eab8 100644 --- a/Dissertation/ann_rep_brownian_motion_monte_carlo.tex +++ b/Dissertation/ann_rep_brownian_motion_monte_carlo.tex @@ -81,7 +81,7 @@ This proves Item (v) and hence the whole lemma. \end{proof} \section{The $\mathsf{E}^{N,h,q,\ve}_n$ Neural Network} \begin{lemma}[R\textemdash, 2023]\label{mathsfE} - Let $n, N\in \N$ and $h \in \lp 0,\infty\rp$. Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$, satisfy that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. Let $a\in \lp -\infty,\infty \rp$, $b \in \lb a, \infty \rp$. Let $f:[a,b] \rightarrow \R$ be continuous and have second derivatives almost everywhere in $\lb a,b \rb$. Let $a=x_0 \les x_1\les \cdots \les x_{N-1} \les x_N=b$ such that for all $i \in \{0,1,...,N\}$ it is the case that $h = \frac{b-a}{N}$, and $x_i = x_0+i\cdot h$ . Let $x = \lb x_0 \: x_1\: \cdots x_N \rb$ and as such let $f\lp\lb x \rb_{*,*} \rp = \lb f(x_0) \: f(x_1)\: \cdots \: f(x_N) \rb$. Let $\mathsf{E}^{N,h,q,\ve}_{n} \in \neu$ be the neural network given by: + Let $n, N\in \N$ and $h \in \lp 0,\infty\rp$. Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$, satisfy that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. Let $a\in \lp -\infty,\infty \rp$, $b \in \lb a, \infty \rp$. Let $f:[a,b] \rightarrow \R$ be continuous and have second derivatives almost everywhere in $\lb a,b \rb$. Let $a=x_0 \les x_1\les \cdots \les x_{N-1} \les x_N=b$ such that for all $i \in \{0,1,...,N\}$ it is the case that $h = \frac{b-a}{N}$, and $x_i = x_0+i\cdot h$ . Let $x = \lb x_0 \: x_1\: \cdots \: x_N \rb$ and as such let $f\lp\lb x \rb_{*,*} \rp = \lb f(x_0) \: f(x_1)\: \cdots \: f(x_N) \rb$. Let $\mathsf{E}^{N,h,q,\ve}_{n} \in \neu$ be the neural network given by: \begin{align} \mathsf{E}^{N,h,q,\ve}_n = \xpn_n^{q,\ve} \bullet \etr^{N,h} \end{align} @@ -506,7 +506,7 @@ Let $n, N,h\in \N$. Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \ \section{The $\mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}$ network}\label{UEX} \begin{lemma}[R\textemdash,2023]\label{UE-prop} -Let $n, N,h\in \N$. Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$, satisfy that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. Let $a\in \lp -\infty,\infty \rp$, $b \in \lb a, \infty \rp$. Let $f:[a,b] \rightarrow \R$ be continuous and have second derivatives almost everywhere in $\lb a,b \rb$. Let $a=x_0 \les x_1\les \cdots \les x_{N-1} \les x_N=b$ such that for all $i \in \{0,1,...,N\}$ it is the case that $h = \frac{b-a}{N}$, and $x_i = x_0+i\cdot h$ . Let $x = \lb x_0 \: x_1\: \cdots x_N \rb$ and as such let $f\lp\lb x \rb_{*,*} \rp = \lb f(x_0) \: f(x_1)\: \cdots \: f(x_N) \rb$. Let $\mathsf{E}^{\exp}_{n,h,q,\ve} \in \neu$ be the neural network given by: +Let $n, N,h\in \N$. Let $\delta,\ve \in \lp 0,\infty \rp $, $q\in \lp 2,\infty \rp$, satisfy that $\delta = \ve \lp 2^{q-1} +1\rp^{-1}$. Let $a\in \lp -\infty,\infty \rp$, $b \in \lb a, \infty \rp$. Let $f:[a,b] \rightarrow \R$ be continuous and have second derivatives almost everywhere in $\lb a,b \rb$. Let $a=x_0 \les x_1\les \cdots \les x_{N-1} \les x_N=b$ such that for all $i \in \{0,1,...,N\}$ it is the case that $h = \frac{b-a}{N}$, and $x_i = x_0+i\cdot h$ . Let $x = \lb x_0 \: x_1\: \cdots \: x_N \rb$ and as such let $f\lp\lb x \rb_{*,*} \rp = \lb f(x_0) \: f(x_1)\: \cdots \: f(x_N) \rb$. Let $\mathsf{E}^{\exp}_{n,h,q,\ve} \in \neu$ be the neural network given by: \begin{align} \mathsf{E}^{N,h,q,\ve}_n = \xpn_n^{q,\ve} \bullet \etr^{N,h} \end{align} @@ -619,7 +619,7 @@ Note that for a fixed $T \in \lp 0,\infty \rp$ it is the case that $u_d\lp t,x \ &\left| \exp \lp \int^T_t \lp \alpha_d \circ \mathcal{X}^{d,t,x}_{r,\omega_i} ds\rp \rp u_d\lp T,\mathcal{X}^{d,t,x}_{\omega_i}\rp - \real_{\rect}\lp \mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}\rp \right|\nonumber\\ &\les 3\ve +2\ve \left| u_d\lp T,\mathcal{X}_{r,\omega_i}^{d,t,x}\rp\right|^q+2\ve \left| \exp \lp \int^b_afdx\rp\right|^q + \ve \left| \exp \lp \int^b_afdx\rp - \mathfrak{e}\right|^q -\mathfrak{e}u_d\lp T,\mathcal{X}^{d,t,x}_{r,\omega_i} \rp\nonumber \end{align} - This completes the proof of the lemma. + This completes the proof of the Lemma. \end{proof} \begin{remark} Diagrammatically, this can be represented as: @@ -695,9 +695,15 @@ Note that for a fixed $T \in \lp 0,\infty \rp$ it is the case that $u_d\lp t,x \ \end{tikzpicture} \end{center} \end{remark} -\section{The $\mathsf{UES}$ network} +\section{The $\mathsf{UES}^{N,h,q,\ve}_{n,\mathsf{G}_d,\Omega,\fn}$ network} +\begin{definition}[The Kahane-Kintchine Constant] + Let $p,q \in \lp 0,\infty\rp$. We will then denote by $\fK_{p,q}\in \lb 0,\infty\rb$, the extended real number given by: + \begin{align} + \fK_{p,q} = \sup \left\{ c \in \lb 0,\infty \rp : \lb \exists \text{ an }\R-\text{Banach Space} \rb \right\} + \end{align} +\end{definition} \begin{lemma}\label{lem:sm_sum} - Let $\nu_1,\nu_2,\hdots, \nu_n \in \neu$ such that for all $i \in \{1,2,\hdots, n\}$ it is the cast that $\out\lp \nu_i\rp = 1$, and it is also the case that $\dep \lp \nu_1 \rp = \dep \lp \nu_2 \rp = \cdots =\dep \lp \nu_n\rp$. Let $x_1 \in \R^{\inn\lp \nu_1\rp},x_2 \in \R^{\inn\lp \nu_2\rp},\hdots x_n \in \R^{\inn\lp \nu_n\rp}$ and $\fx \in \R^{\sum_{i=1}^n \inn \lp \nu_i\rp}$. It is then the case that we have that: + Let $\nu_1,\nu_2,\hdots, \nu_n \in \neu$ such that for all $i \in \{1,2,\hdots, n\}$ it is the cast that $\out\lp \nu_i\rp = 1$, and it is also the case that $\dep \lp \nu_1 \rp = \dep \lp \nu_2 \rp = \cdots =\dep \lp \nu_n\rp$. Let $x_1 \in \R^{\inn\lp \nu_1\rp},x_2 \in \R^{\inn\lp \nu_2\rp},\hdots, x_n \in \R^{\inn\lp \nu_n\rp}$ and $\fx \in \R^{\sum_{i=1}^n \inn \lp \nu_i\rp}$. It is then the case that we have that: \begin{align} \real_{\rect}\lp \sm_{n,1} \bullet \lb \boxminus_{i=1}^n \nu_i \rb \rp \lp \fx\rp = \sum^n_{i=1} \real_{\rect} \lp \nu_i\rp \lp x_i\rp \end{align} @@ -738,6 +744,9 @@ Note that for a fixed $T \in \lp 0,\infty \rp$ it is the case that $u_d\lp t,x \ \end{align} This proves the inductive case and hence the Lemma. \end{proof} +\begin{lemma} + Let, $\lp \Omega, \mathcal{F}, \mathbb{P}\rp$ be a probability space and let $\mathcal{X}: \Omega \rightarrow \R^d$ be a random variable with $\E\lb\mathcal{X}\rb = \mu < \infty$, and probability density function $\ff_{\cX }$. Let $g: \R^d \rightarrow \R$ be a measurable continuous function. It is then the case that +\end{lemma} \begin{lemma}[R\textemdash, 2024, Approximants for Brownian Motion] @@ -763,7 +772,7 @@ Let $t \in \lp 0,\infty\rp$ and $T \in \lp t,\infty\rp$. Let $\lp \Omega, \mathc Furthermore, let $\mathsf{UE}^{N,h,q,\ve}_{n, \mathsf{G}_d}\subsetneq \neu$ be neural networks given by: \begin{align} - \mathsf{UE}^{N,h,q,\ve}_{n,\mathsf{G}_d} = \prd^{q,\ve} \bullet \lb \mathsf{E}^{N,h,q,\ve}_{n,h,q,\ve} \DDiamond \mathsf{G}_d \rb + \mathsf{UE}^{N,h,q,\ve}_{n,\mathsf{G}_d} = \prd^{q,\ve} \bullet \lb \mathsf{E}^{N,h,q,\ve}_{n} \DDiamond \mathsf{G}_d \rb \end{align} Futhermore, let $\mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i} \subsetneq \neu$ be neural networks given by: \begin{align} @@ -781,9 +790,9 @@ Let $t \in \lp 0,\infty\rp$ and $T \in \lp t,\infty\rp$. Let $\lp \Omega, \mathc \frac{q}{q-2} \lb \log_2 \lp \ve^{-1}\rp +q \rb +\dep \lp \mathsf{G}_d\rp-1 &:n = 0\\ \frac{q}{q-2} \lb \log_2 \lp \ve^{-1}\rp +q \rb +\max\left\{\dep \lp \mathsf{E}^{N,h,q,\ve}_{n}\rp,\dep \lp \mathsf{G}_d\rp\right\}-1 &:n \in \N\\ \end{cases}$ - \item It is also the case that:\begin{align} - \param \lp \mathsf{UES}^{N,h,q,\ve}_{n,\mathsf{G}_d,\Omega, \fn}\rp &\les \param \lp \prd^{q,\ve}\rp + 2\lp\max \left\{\param \lp \mathsf{E}^{N,h,q,\ve}_{n}\rp, \param \lp \mathsf{G}_d\rp \right\}\rp^2 \nonumber\\ - &+ 8 \max\left\{\lp 1+4n\rp, \wid_{\hid \lp \mathsf{G}_d\rp} \lp \mathsf{G}_d\rp \right\}\nonumber + \item It is also the case that: + \begin{align} + \param \lp \mathsf{UES}^{N,h,q,\ve}_{n,\mathsf{G}_d,\Omega, \fn}\rp &\les \fn^2 \cdot \lb \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb +324+ 48n\right. \nonumber\\ &\left. +24 \wid_{\hid\lp \mathsf{G}_d\rp}\lp \mathsf{G}_d\rp + 4\max \left\{\param \lp \mathsf{E}^{N,h,q,\ve}_{n}\rp, \param \lp \mathsf{G}_d\rp \right\} \rb \end{align} \item It is also the case that: \begin{align} @@ -805,7 +814,7 @@ Let $t \in \lp 0,\infty\rp$ and $T \in \lp t,\infty\rp$. Let $\lp \Omega, \mathc Whence by Lemma \ref{comp_prop} it is the case that $\dep \lp \mathsf{UES}^{N,h,q,\ve}_{n,\mathsf{G}_d,\Omega,\fn} \rp = \dep \lp \mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}\rp$. This then proves Item (ii). - Next, observe that each of the $\mathsf{UEX}$ networks has the same architecture by construction. Corollary \ref{cor:sameparal} then yields that: + Next, observe that each of the $\mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}$ networks has the same architecture for all $\omega_i \in \Omega$ by construction. Corollary \ref{cor:sameparal} then yields that: \begin{align} \param \lp \boxminus_{i=1}^{\mathfrak{n}} \mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i} \rp \les \mathfrak{n}^2\cdot \param \lp \mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}\rp \end{align} @@ -823,29 +832,80 @@ Let $t \in \lp 0,\infty\rp$ and $T \in \lp t,\infty\rp$. Let $\lp \Omega, \mathc &\les \mathfrak{n}^2\cdot \param \lp \mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}\rp \nonumber \\ &\les \fn^2 \cdot \lb \frac{360q}{q-2} \lb \log_2 \lp \ve^{-1} \rp +q+1 \rb +324+ 48n\right. \nonumber\\ &\left. +24 \wid_{\hid\lp \mathsf{G}_d\rp}\lp \mathsf{G}_d\rp + 4\max \left\{\param \lp \mathsf{E}^{N,h,q,\ve}_{n}\rp, \param \lp \mathsf{G}_d\rp \right\} \rb \end{align} - Now observe that by the triangle inequality, we have that: + Now observe that by the triangle inequality, we have that: \begin{align} - &\left| \E \lb \exp \lp \int^T_t f\lp \mathcal{X}^{d,t,x}_{r,\Omega}\rp ds\rp u_d^T\lp \mathcal{X}^{d,t,x}_{r,\Omega}\rp\rb - \real_{\rect}\lp \mathsf{UES}^{N,h,q,\ve}_{n,\mathsf{G}_d,\Omega, \fn}\rp \right| \nonumber \\ - &=\left| \E \lb \exp \lp \int^T_t f\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp ds\rp u_d^T\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp \rb - \inst_{\rect}\lb \frac{1}{\mathfrak{n}} \triangleright\lp \sm_{\mathfrak{n},1}\bullet\lb \boxminus_{i=1}^{\mathfrak{n}} \mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}\rb\rp \rb\right| \nonumber\\ - &\les \left| \E \lb \exp \lp \int^T_t f\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp ds\rp u_d^T\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp \rb - \frac{1}{\mathfrak{n}}\lb \sum^{\mathfrak{n}}_{i=1}\lb \exp \lp \int_t^T f\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp ds \cdot u_d^T\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp\rp\rb \rb \right|\nonumber \\ - &+\left| \frac{1}{\mathfrak{n}}\lb \sum^{\mathfrak{n}}_{i=1}\lb \exp \lp \int_t^T f\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp ds \cdot u_d^T\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp\rp\rb \rb - \real_{\rect}\lb \frac{1}{\mathfrak{n}} \triangleright\lp \sm_{\mathfrak{n},1}\bullet\lb \boxminus_{i=1}^{\mathfrak{n}} \mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}\rb\rp \rb\right| \nonumber \\ + &\left| \E \lb \exp \lp \int^T_t f ds\rp \fu_d^T\lp x\rp\rb - \real_{\rect}\lp \mathsf{UES}^{N,h,q,\ve}_{n,\mathsf{G}_d,\Omega, \fn}\rp \right| \label{big_eqn_lhs} \\ + &=\left| \E \lb \exp \lp \int^T_t f ds\rp \fu_d^T\lp x\rp \rb - \inst_{\rect}\lb \frac{1}{\mathfrak{n}} \triangleright\lp \sm_{\mathfrak{n},1}\bullet\lb \boxminus_{i=1}^{\mathfrak{n}} \mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}\rb\rp \rb\right| \nonumber\\ + &\les \left| \E \lb \exp \lp \int^T_t f ds\rp \fu_d^T\lp x\rp \rb - \frac{1}{\mathfrak{n}}\lb \sum^{\mathfrak{n}}_{i=1}\lb \exp \lp \int_t^T f ds \rp \cdot \fu_d^T\lp x\rp\rb \rb \right|\label{big_eqn_rhs_summand_1} \\ + &+\left| \frac{1}{\mathfrak{n}}\lb \sum^{\mathfrak{n}}_{i=1}\lb \exp \lp \int_t^T f\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp ds \rp \cdot \fu_d^T\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp\rb \rb - \real_{\rect}\lb \frac{1}{\mathfrak{n}} \triangleright\lp \sm_{\mathfrak{n},1}\bullet\lb \boxminus_{i=1}^{\mathfrak{n}} \mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}\rb\rp \rb\right| \label{big_eqn_lhs_summand_2} \end{align} - Observe that by the triangle inequality, the absolute homogeneity condition for norms, the fact that the Brownian motions are independent of each other, Lemma \ref{lem:sm_sum}, the fact that $\mathfrak{n}\in \N$, the fact that the upper limit of error remains bounded by the same bound for all $\omega_i \in \Omega$, and Lemma \ref{sum_of_errors_of_stacking}, then renders the second summand as: + Observe that by the triangle inequality, the absolute homogeneity condition for norms, the fact that the Brownian motions are independent of each other, Lemma \ref{lem:sm_sum}, the fact that $\mathfrak{n}\in \N$, the fact that the upper limit of error remains bounded by the same bound for all $\omega_i \in \Omega$, and Lemma \ref{sum_of_errors_of_stacking}, then renders the second summand, (\ref{big_eqn_lhs_summand_2}), as: \begin{align} &\left| \frac{1}{\mathfrak{n}}\lb \sum^{\mathfrak{n}}_{i=1}\lb \exp \lp \int_t^T f\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp ds \cdot u_d^T\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp\rp\rb \rb - \real_{\rect}\lb \frac{1}{\mathfrak{n}} \triangleright\lp \sm_{\mathfrak{n},1}\bullet\lb \boxminus_{i=1}^{\mathfrak{n}} \mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}\rb\rp\rb\right| \nonumber \\ &\les \left|\frac{1}{\mathfrak{n}}\lb \sum^{\mathfrak{n}}_{i=1} \exp \lp \int_t^T f\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp ds \cdot u_d^T\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp\rp \rb - \frac{1}{\mathfrak{n}}\lb \sum^{\mathfrak{n}}_{i=1}\lp \real_{\rect}\lb \mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}\rb\rp \rb \right| \nonumber \\ - &\les \cancel{\frac{1}{\mathfrak{n}} \sum^{\mathfrak{n}}_{i=1}}\left| \exp \lp \int^T_tf\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp ds \cdot u^T_d\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp\rp - \mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}\right| \nonumber\\ - &\les \left| \exp \lp \int^T_tf\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp ds \cdot u^T_d\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp\rp - \mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}\right| \nonumber\\ + &\les \cancel{\frac{1}{\mathfrak{n}} \sum^{\mathfrak{n}}_{i=1}}\left| \exp \lp \int^T_tf\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp ds \cdot u^T_d\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp\rp - \real_{\rect}\lp \mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i} \rp \right| \nonumber\\ + &\les \left| \exp \lp \int^T_tf\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp ds \cdot u^T_d\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp\rp - \real_{\rect}\lp \mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}\rp \right| \nonumber \end{align} -% Note that by Lemma \ref{iobm} each of the $\mathcal{X}^{d,t,x}_{r,\omega_i}$ are pairwise independent of each other for all $i \in \{1,2,\hdots,\mathfrak{n}\}$. Note also that by Definition \ref{def:brown_motion} it is the case, for all $\omega_i \in \Omega$ that $\mathcal{X}^{d,t,x}_{T,\omega_i} \sim \norm \lp \mymathbb{0}_d, \diag_d(T) \rp$ - -Note for the first summand that it is in $\mathcal{O}\lp \frac{1}{\sqrt{\mathfrak{n}}}\rp$. Notice that both $f$ and $\fu^T_d$ are continuous functions for $d\in \N$. Note also that $F:[t,T] \rightarrow \R$ defined as: -\begin{align} - F(\fx) \coloneqq \int_t^\ft f\lp\fx\rp dx -\end{align} -is continuous on $\lb t,T\rb$. Thus , notice that \cite[Theorem~2.1]{rio_moment_2009} with $k$ - + This renders (\ref{big_eqn_lhs}) as: + \begin{align} + &\left| \E \lb \exp \lp \int^T_t f ds\rp \fu_d^T\lp x\rp\rb - \real_{\rect}\lp \mathsf{UES}^{N,h,q,\ve}_{n,\mathsf{G}_d,\Omega, \fn}\rp \right| \nonumber \\ + &\les \left| \E \lb \exp \lp \int^T_t f ds\rp \fu_d^T\lp x\rp \rb - \frac{1}{\mathfrak{n}}\lb \sum^{\mathfrak{n}}_{i=1}\lb \exp \lp \int_t^T f ds \rp \cdot \fu_d^T\lp x\rp\rb \rb \right| \nonumber \\ + &+\left| \exp \lp \int^T_tf\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp ds \cdot u^T_d\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp\rp - \real_{\rect}\lp \mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}\rp \right| + \end{align} + Taking the expectation on both sides of this inequality, and applying the linearity and monotonicity of expectation yields: + \begin{align} + &\E \lb \left| \E \lb \exp \lp \int^T_t f ds\rp \fu_d^T\lp x\rp\rb - \real_{\rect}\lp \mathsf{UES}^{N,h,q,\ve}_{n,\mathsf{G}_d,\Omega, \fn}\rp \right|\rb \label{big_eqn_stage_2_lhs}\\ + &\les \E \lb \left| \E \lb \exp \lp \int^T_t f ds\rp \fu_d^T\lp x\rp \rb - \frac{1}{\mathfrak{n}}\lb \sum^{\mathfrak{n}}_{i=1}\lb \exp \lp \int_t^T f ds \rp \cdot \fu_d^T\lp x\rp\rb \rb \right|\rb \label{big_eqn_stage_2_rhs_1} \\ + &+\E\lb \left| \exp \lp \int^T_tf\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp ds \cdot u^T_d\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp\rp - \real_{\rect}\lp \mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}\rp \right|\rb \label{big_eqn_stage_2_rhs_2} + \end{align} + Consider now, the Lyapunov inequality applied to (\ref{big_eqn_stage_2_rhs_1}), which renders it as: + \begin{align} + &\E \lb \left| \E \lb \exp \lp \int^T_t f ds\rp \fu_d^T\lp x\rp \rb - \frac{1}{\mathfrak{n}}\lb \sum^{\mathfrak{n}}_{i=1}\lb \exp \lp \int_t^T f ds \rp \cdot \fu_d^T\lp x\rp\rb \rb \right|\rb \nonumber\\ + &\les \lp \E \lb \left| \E \lb \exp \lp \int^T_t f ds\rp \fu_d^T\lp x\rp \rb - \frac{1}{\mathfrak{n}}\lb \sum^{\mathfrak{n}}_{i=1}\lb \exp \lp \int_t^T f ds \rp \cdot \fu_d^T\lp x\rp\rb \rb \right|^2\rb \rp^{\frac{1}{2}} \label{where_grohs_will be applied} + \end{align} + Then, \cite[Corollary~2.6]{grohsetal} applied to (\ref{where_grohs_will be applied}), then yields that: + \begin{align} + &\lp \E \lb \left| \E \lb \exp \lp \int^T_t f ds\rp \fu_d^T\lp x\rp \rb - \frac{1}{\mathfrak{n}}\lb \sum^{\mathfrak{n}}_{i=1}\lb \exp \lp \int_t^T f ds \rp \cdot \fu_d^T\lp x\rp\rb \rb \right|^2\rb \rp^{\frac{1}{2}} \nonumber\\ + &\les 2\sqrt{\frac{1}{\fn}} \lp \E \lb \left| \E \lb \exp \lp \int^T_t f ds\rp \fu_d^T\lp x\rp \rb \right|^2\rb \rp^{\frac{1}{2}} + \end{align} + Looking back at (\ref{big_eqn_stage_2_rhs_2}), we see that the monotonicity and linearity of expectation tells us that: + \begin{align} + &\E\lb \left| \exp \lp \int^T_tf\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp ds \cdot u^T_d\lp \mathcal{X}^{d,t,x}_{r,\omega_i}\rp\rp - \real_{\rect}\lp \mathsf{UEX}^{N,h,q,\ve}_{n,\mathsf{G}_d,\omega_i}\rp \right|\rb \\ + &\les \E \lb 3\ve +2\ve \left| \fu^T_d\lp x\rp\right|^q+2\ve \left| \exp \lp \int^b_afdx\rp\right|^q + \ve \left| \exp \lp \int^b_afdx\rp - \mathfrak{e}\right|^q -\mathfrak{e}\fu^T_d\lp x \rp\rb \\ + &\les 3\ve +2\ve \cdot\E\lb \left| \fu^T_d\lp x\rp\right|^q\rb + 2\ve \cdot \E \lb \left| \exp \lp \int^b_afdx\rp\right|^q\rb + \ve \cdot\E \lb \left| \exp \lp \int^b_a f dx\rp - \mathfrak{e}\right|^q\rb -\fe\cdot \E \lb \fu_d^T \lp x\rp\rb \nonumber\\ + \end{align} + Note that: + \begin{align} + \E\lb \mathcal{X}^{d,t,x}_s\rb &= \E\lb x + \int^t_s \sqrt{2} d\mathcal{W}^d_r\rb \nonumber\\ + &\les x + \sqrt{2}\cdot\E \lb \int^t_s d\mathcal{W}^d_r \rb \\ + &= x + \sqrt{2}\cdot \E \lb \mathcal{W}^d_{t-s}\rb \\ + &= x + \end{align} + Consider now: + \begin{align} + \va \lb \cX^{d,t,x}_s\rb &= \va \lb x + \int^t_s \sqrt{2}d\cW^d_r\rb \nonumber \\ + &= \E \lb\lp x+\int^t_s\sqrt{2}d\cW^d_r - \E \lb x+\int^t_s\sqrt{2}d\cW^d_r\rb\rp^2\rb \nonumber\\ + &=\E \lb\lp x+\int^t_s\sqrt{2}d\cW^d_r -x\rp^2\rb \nonumber \\ + &=2\cdot \E\lb \lp \int^t_s d\cW_r^d\rp^2\rb \nonumber\\ + &=2\cdot \E \lb \lp \cW^d_{t-s}\rp^2\rb + \end{align} \end{proof} +Note now that: +\begin{align} + \va \lb \cW^d_{t-s}\rb &= \E \lb \lp \cW_{t-s}^d\rp^2\rb - \E \lb \cW^d_{t-s}\rb^2 \nonumber \\ + \E\lb \lp \cW^d_{t-s}\rp^2\rb &= \lp t-s \rp\mathbb{I}_d \\ + 2\cdot \E\lb \lp \cW^d_{t-s}\rp^2\rb &= 2\lp t-s\rp\mathbb{I}_d +\end{align} +Now note that since $\cW^d_r$ are standard Brownian motions, and their expectation and variance are $\mymathbb{0}_d$ and $\mathbb{I}_d$ respectively. Whence it is the case that the probability density function for $\cW_{t-s}^d$ is: +\begin{align} + \lp 2\pi\rp^{-\frac{d}{2}}\lp t-s\rp^{-\frac{1}{2}}\exp \lp \frac{-1}{2(t-s)}\mymathbb{e}_{1,d}\cdot \lb x \rb_*^2\rp +\end{align} +However $\cX^{d,t,x}_s$ is a shifted normal distribution + + + + + \begin{remark} Note that diagrammatically, this can be represented as in figure below. \begin{figure}[h] @@ -1017,6 +1077,8 @@ is continuous on $\lb t,T\rb$. Thus , notice that \cite[Theorem~2.1]{rio_moment_ \caption{Neural network diagram for the $\mathsf{UES}$ network.} \end{figure} \end{remark} + + \begin{remark} It may be helpful to think of this as a very crude form of ensembling. \end{remark} diff --git a/Dissertation/commands.tex b/Dissertation/commands.tex index 7cfb090..933aaa5 100644 --- a/Dissertation/commands.tex +++ b/Dissertation/commands.tex @@ -26,7 +26,7 @@ \newcommand{\cZ}{\mathcal{Z}} \newcommand{\fA}{\mathfrak{A}} -\newcommand{\fB}{\mathfrak{B}} +\newcommand{\fB}{\mathfrak{B}} \newcommand{\fC}{\mathfrak{C}} \newcommand{\fD}{\mathfrak{D}} \newcommand{\fE}{\mathfrak{E}} diff --git a/Dissertation/main.bib b/Dissertation/main.bib index f635ff9..d719af5 100644 --- a/Dissertation/main.bib +++ b/Dissertation/main.bib @@ -573,40 +573,6 @@ archivePrefix = {arXiv}, primaryClass={cs.LG} } -@inproceedings{vaswani2, - author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, \L ukasz and Polosukhin, Illia}, - booktitle = {Advances in Neural Information Processing Systems}, - editor = {I. Guyon and U. Von Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett}, - pages = {}, - publisher = {Curran Associates, Inc.}, - title = {Attention is All you Need}, - url = {https://proceedings.neurips.cc/paper_files/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf}, - volume = {30}, - year = {2017} -} - -@article{arik2, - title = {{TabNet}: {Attentive} {Interpretable} {Tabular} {Learning}}, - volume = {35}, - copyright = {Copyright (c) 2021 Association for the Advancement of Artificial Intelligence}, - issn = {2374-3468}, - shorttitle = {{TabNet}}, - url = {https://ojs.aaai.org/index.php/AAAI/article/view/16826}, - doi = {10.1609/aaai.v35i8.16826}, - abstract = {We propose a novel high-performance and interpretable canonical deep tabular data learning architecture, TabNet. TabNet uses sequential attention to choose which features to reason from at each decision step, enabling interpretability and more efficient learning as the learning capacity is used for the most salient features. We demonstrate that TabNet outperforms other variants on a wide range of non-performance-saturated tabular datasets and yields interpretable feature attributions plus insights into its global behavior. Finally, we demonstrate self-supervised learning for tabular data, significantly improving performance when unlabeled data is abundant.}, - language = {en}, - number = {8}, - urldate = {2024-02-01}, - journal = {Proceedings of the AAAI Conference on Artificial Intelligence}, - author = {Arik, Sercan \"O and Pfister, Tomas}, - month = may, - year = {2021}, - note = {Number: 8}, - keywords = {Unsupervised \& Self-Supervised Learning}, - pages = {6679--6687}, - file = {Full Text PDF:/Users/shakilrafi/Zotero/storage/7MTMXR4G/Arik and Pfister - 2021 - TabNet Attentive Interpretable Tabular Learning.pdf:application/pdf}, -} - @Manual{dplyr, title = {dplyr: A Grammar of Data Manipulation}, author = {Hadley Wickham and Romain François and Lionel Henry and Kirill Müller and Davis Vaughan}, diff --git a/Dissertation/main.pdf b/Dissertation/main.pdf index 95fc7f9..573886d 100644 Binary files a/Dissertation/main.pdf and b/Dissertation/main.pdf differ diff --git a/Dissertation/main.tex b/Dissertation/main.tex index aa1bfb5..347cd69 100644 --- a/Dissertation/main.tex +++ b/Dissertation/main.tex @@ -1,7 +1,7 @@ \include{preamble} \include{commands} -\title{Artificial Neural Networks Applied to Stochastic Monte Carlo as a Way to Approximate Modified Heat Equations, and Their Associated Parameters.} +\title{Artificial Neural Networks Applied to Stochastic Monte Carlo as a Way to Approximate Modified Heat Equations, and Their Associated Parameters, Depths, and Accuracies.} \author{Shakil Rafi} \begin{document} \maketitle diff --git a/Dissertation/neural_network_introduction.tex b/Dissertation/neural_network_introduction.tex index af88ea0..b4c9e71 100644 --- a/Dissertation/neural_network_introduction.tex +++ b/Dissertation/neural_network_introduction.tex @@ -58,7 +58,7 @@ We seek here to introduce a unified framework for artificial neural networks. Th Note that we develop this definition to closely align to popular deep-learning frameworks such as \texttt{PyTorch}, \texttt{TensorFlow}, and \texttt{Flux}, where, in principle, it is always possible to know the parameter count, depth, number of layers, and other auxiliary information. - We will often say let $\nu\in \neu$, and it is implied that the tuple $\nu$ with the auxiliary functions is what is being referred to. + We will often say let $\nu\in \neu$, and it is implied that the tuple $\nu$ with the auxiliary functions is what is being referred to. This is analogous to when we say that $X$ is a topological but we mean the pair $\lp X,\tau\rp$, i.e. $X$ endowed with topology $\tau$, or when we say that $Y$ is a measurable space when we mean the triple $\lp X,\Omega, \mu\rp$, i.e. $X$, endowed with $\sigma-$algebra $\Omega$, and measure $\mu$. \end{remark} @@ -68,21 +68,21 @@ We seek here to introduce a unified framework for artificial neural networks. Th \real_{\act}\lp \nu \rp \in C \lp \R^{l_0}, \R^{l_L} \rp & \text{ and } & \lp \real_{\act}\lp \nu\rp \rp \lp x_0 \rp = W_Lx_{L-1}+b_L \end{align} \end{definition} -We will often denote the instantiated neural network $\nu^{l_0,l_L}$ taking $\R^{l_0}$ to $\R^{l_L}$ as $\nu^{l_0,l_L}: \R^{l_0} \rightarrowtail \R^{l_L}$ or simply as $\R^{l_0} \overset{\nu}{\rightarrowtail} \R^{l_L}$ where $l_0$ and $l_L$ are obvious. - +\begin{figure} \begin{center} - \begin{neuralnetwork}[height=4, title = {A neural network $\nu$ with $\lay(\nu) = \lp 4,5,4,2\rp$}, nodesize = 10pt, maintitleheight=1em] - \newcommand{\x}[2]{$x$} - \newcommand{\y}[2]{$x$} - \newcommand{\hfirst}[2]{\small $h$} - \newcommand{\hsecond}[2]{\small $h$} - \inputlayer[count=3, bias=true, title=, text=\x] - \hiddenlayer[count=4, bias=true, title=, text=\hfirst] \linklayers - \hiddenlayer[count=3, bias=true, title=, text=\hsecond] \linklayers + \begin{neuralnetwork} + \newcommand{\x}[2]{$x_0$} + \newcommand{\y}[2]{$x_3$} + \newcommand{\hfirst}[2]{\small $x_1$} + \newcommand{\hsecond}[2]{\small $x_2$} + \inputlayer[count=3, title=, text=\x] + \hiddenlayer[count=4, title=, text=\hfirst] \linklayers + \hiddenlayer[count=3, title=, text=\hsecond] \linklayers \outputlayer[count=2, title=, text=\y] \linklayers \end{neuralnetwork} - \end{center} +\caption{A neural network $\nu$ with $\lay \lp \nu \rp = \lp 4,4,3,2\rp$} +\end{figure} \begin{remark} For an R implementation see Listings \ref{nn_creator}, \ref{aux_fun}, \ref{activations}, and \ref{instantiation} \end{remark} diff --git a/Dissertation/preamble.tex b/Dissertation/preamble.tex index 4421e22..f5a60d2 100644 --- a/Dissertation/preamble.tex +++ b/Dissertation/preamble.tex @@ -146,6 +146,7 @@ \DeclareMathOperator{\inst}{\mathfrak{I}} \DeclareMathOperator{\rows}{rows} \DeclareMathOperator{\columns}{columns} +\DeclareMathOperator{\va}{Var} \DeclareMathOperator{\obj}{obj} \DeclareMathOperator{\dom}{dom} diff --git a/MLP and DNN Material/.DS_Store b/MLP and DNN Material/.DS_Store index 2666aff..85d6c9d 100644 Binary files a/MLP and DNN Material/.DS_Store and b/MLP and DNN Material/.DS_Store differ