dissertation_work/Dissertation/main.bib

@book{karatzas1991brownian,
  title={Brownian Motion and Stochastic Calculus},
  author={Karatzas, I. and Shreve, S.E.},
  isbn={9780387976556},
  lccn={96167783},
  series={Graduate Texts in Mathematics (113) (Book 113)},
  url={https://books.google.com/books?id=ATNy\_Zg3PSsC},
  year={1991},
  publisher={Springer New York}
}

@article{grohs2019spacetime,
	abstract = {Over the last few years deep artificial neural networks (ANNs) have very successfully been used in numerical simulations for a wide variety of computational problems including computer vision, image classification, speech recognition, natural language processing, as well as computational advertisement. In addition, it has recently been proposed to approximate solutions of high-dimensional partial differential equations (PDEs) by means of stochastic learning problems involving deep ANNs. There are now also a few rigorous mathematical results in the scientific literature which provide error estimates for such deep learning based approximation methods for PDEs. All of these articles provide spatial error estimates for ANN approximations for PDEs but do not provide error estimates for the entire space-time error for the considered ANN approximations. It is the subject of the main result of this article to provide space-time error estimates for deep ANN approximations of Euler approximations of certain perturbed differential equations. Our proof of this result is based (i) on a certain ANN calculus and (ii) on ANN approximation results for products of the form {\$}{$[$}0,T{$]$}{$\backslash$}times {$\backslash$}mathbb {\{}R{\}}\^{}{\{}d{\}}{$\backslash$}ni (t,x){\{}{$\backslash$}kern -.5pt{\}}{$\backslash$}mapsto {\{}{$\backslash$}kern -.5pt{\}} tx{\{}{$\backslash$}kern -.5pt{\}}{$\backslash$}in {\{}{$\backslash$}kern -.5pt{\}} {$\backslash$}mathbb {\{}R{\}}\^{}{\{}d{\}}{\$}where {\$}T{\{}{$\backslash$}kern -.5pt{\}}{$\backslash$}in {\{}{$\backslash$}kern -.5pt{\}} (0,{$\backslash$}infty ){\$}, {\$}d{\{}{$\backslash$}kern -.5pt{\}}{$\backslash$}in {\{}{$\backslash$}kern -.5pt{\}} {$\backslash$}mathbb {\{}N{\}}{\$}, which we both develop within this article.},
	author = {Grohs, Philipp and Hornung, Fabian and Jentzen, Arnulf and Zimmermann, Philipp},
	date = {2023/01/11},
	date-added = {2023-09-08 14:49:03 -0500},
	date-modified = {2023-09-08 14:49:03 -0500},
	doi = {10.1007/s10444-022-09970-2},
	id = {Grohs2023},
	isbn = {1572-9044},
	journal = {Advances in Computational Mathematics},
	number = {1},
	pages = {4},
	title = {Space-time error estimates for deep neural network approximations for differential equations},
	url = {https://doi.org/10.1007/s10444-022-09970-2},
	volume = {49},
	year = {2023},
	bdsk-url-1 = {https://doi.org/10.1007/s10444-022-09970-2}}

@article{Grohs_2022,
	doi = {10.1007/s42985-021-00100-z},
  
	url = {https://doi.org/10.1007%2Fs42985-021-00100-z},
  
	year = 2022,
	month = {jun},
  
	publisher = {Springer Science and Business Media {LLC}
},
  
	volume = {3},
  
	number = {4},
  
	author = {Philipp Grohs and Arnulf Jentzen and Diyora Salimova},
  
	title = {Deep neural network approximations for solutions of {PDEs} based on Monte Carlo algorithms},
  
	journal = {Partial Differential Equations and Applications}
}
@article{Ito1942a,
author={It\^o, K.},
title={Differential Equations Determining {Markov} Processes (Original in {Japanese})},
journal={Zenkoku Shijo Sugaku Danwakai},
year="1942",
volume="244",
number="1077",
pages={1352{\textendash}1400}
}
@article{Ito1946,
author={It\^o, K.},
title={On a stochastic integral equation},
journal={Proc. Imperial Acad. Tokyo},
year={1942},
volume={244},
number={1077},
pages={1352{\textendash}1400
}

@inbook{bass_2011, place={Cambridge}, series={Cambridge Series in Statistical and Probabilistic Mathematics}, title={Brownian Motion}, DOI={10.1017/CBO9780511997044.004}, booktitle={Stochastic Processes}, publisher={Cambridge University Press}, author={Bass, Richard F.}, year={2011}, pages={6–12}, collection={Cambridge Series in Statistical and Probabilistic Mathematics}}

@article{hutzenthaler_overcoming_2020,
author = {Hutzenthaler, Martin  and Jentzen, Arnulf  and Kruse, Thomas  and Anh Nguyen, Tuan  and von Wurstemberger, Philippe },
title = {Overcoming the curse of dimensionality in the numerical approximation of semilinear parabolic partial differential equations},
journal = {Proceedings of the Royal Society A: Mathematical, Physical and Engineering Sciences},
volume = {476},
number = {2244},
pages = {20190630},
year = {2020},
doi = {10.1098/rspa.2019.0630},

URL = {https://royalsocietypublishing.org/doi/abs/10.1098/rspa.2019.0630},
eprint = {https://royalsocietypublishing.org/doi/pdf/10.1098/rspa.2019.0630}
,
    abstract = { For a long time it has been well-known that high-dimensional linear parabolic partial differential equations (PDEs) can be approximated by Monte Carlo methods with a computational effort which grows polynomially both in the dimension and in the reciprocal of the prescribed accuracy. In other words, linear PDEs do not suffer from the curse of dimensionality. For general semilinear PDEs with Lipschitz coefficients, however, it remained an open question whether these suffer from the curse of dimensionality. In this paper we partially solve this open problem. More precisely, we prove in the case of semilinear heat equations with gradient-independent and globally Lipschitz continuous nonlinearities that the computational effort of a variant of the recently introduced multilevel Picard approximations grows at most polynomially both in the dimension and in the reciprocal of the required accuracy. }
}

@article{Beck_2021,
	doi = {10.3934/dcdsb.2020320},
  
	url = {https://doi.org/10.3934%2Fdcdsb.2020320},
  
	year = {2021},
	publisher = {American Institute of Mathematical Sciences (AIMS)},
  
	volume = {26},
  
	number = {9},
  
	pages = {4927},
  
	author = {Christian Beck and Lukas Gonon and Martin Hutzenthaler and Arnulf Jentzen},
  
	title = {On existence and uniqueness properties for solutions of stochastic fixed point equations},
  
	journal = {Discrete \& Continuous Dynamical Systems - B}
}

@article{BHJ21,
	doi = {10.1142/s0219493721500489},
  
	url = {https://doi.org/10.1142%2Fs0219493721500489},
  
	year = 2021,
	month = {jul},
  
	publisher = {World Scientific Pub Co Pte Ltd},
  
	volume = {21},
  
	number = {08},
  
	author = {Christian Beck and Martin Hutzenthaler and Arnulf Jentzen},
  
	title = {On nonlinear {Feynman}{\textendash}{Kac} formulas for viscosity solutions of semilinear parabolic partial differential equations},
  
	journal = {Stochastics and Dynamics}
}
@article{Gyngy1996ExistenceOS,
  title={Existence of strong solutions for {It\^o}'s stochastic equations via approximations},
  author={Istv{\'a}n Gy{\"o}ngy and Nicolai V. Krylov},
  journal={Probability Theory and Related Fields},
  year={1996},
  volume={105},
  pages={143-158}
}

@book{durrett2019probability,
  title={Probability: Theory and Examples},
  author={Durrett, R.},
  isbn={9781108473682},
  lccn={2018047195},
  series={Cambridge Series in Statistical and Probabilistic Mathematics},
  url={https://books.google.com/books?id=b22MDwAAQBAJ},
  year={2019},
  publisher={Cambridge University Press}
}

@techreport{hutzenthaler_strong_2021,
	title = {Strong {$L^p$}-error analysis of nonlinear {Monte} {Carlo} approximations for high-dimensional semilinear partial differential equations},
	url = {http://arxiv.org/abs/2110.08297},

	number = {arXiv:2110.08297},
	urldate = {2022-10-29},
	institution = {arXiv},
	author = {Hutzenthaler, Martin and Jentzen, Arnulf and Kuckuck, Benno and Padgett, Joshua Lee},
	month = oct,
	year = {2021},
	doi = {10.48550/arXiv.2110.08297},
	note = {arXiv:2110.08297 [cs, math]
type: article},
	keywords = {Mathematics - Numerical Analysis, Mathematics - Probability},
	annote = {Comment: 42 pages.},
	file = {arXiv Fulltext PDF:files/6/Hutzenthaler et al. - 2021 - Strong \$L^p\$-error analysis of nonlinear Monte Car.pdf:application/pdf;arXiv.org Snapshot:files/7/2110.html:text/html},
}

@TechReport{grohsetal,
  author={Philipp Grohs and Fabian Hornung and Arnulf Jentzen and Philippe von Wurstemberger},
  title={{A proof that artificial neural networks overcome the curse of dimensionality in the numerical approximation of Black-Scholes partial differential equations}},
  year=2018,
  month=Sep,
  institution={arXiv.org},
  type={Papers},
  url={https://ideas.repec.org/p/arx/papers/1809.02362.html},
  number={1809.02362},
  abstract={Artificial neural networks (ANNs) have very successfully been used in numerical simulations for a series of computational problems ranging from image classification/image recognition, speech recognition, time series analysis, game intelligence, and computational advertising to numerical approximations of partial differential equations (PDEs). Such numerical simulations suggest that ANNs have the capacity to very efficiently approximate high-dimensional functions and, especially, indicate that ANNs seem to admit the fundamental power to overcome the curse of dimensionality when approximating the high-dimensional functions appearing in the above named computational problems. There are a series of rigorous mathematical approximation results for ANNs in the scientific literature. Some of them prove convergence without convergence rates and some even rigorously establish convergence rates but there are only a few special cases where mathematical results can rigorously explain the empirical success of ANNs when approximating high-dimensional functions. The key contribution of this article is to disclose that ANNs can efficiently approximate high-dimensional functions in the case of numerical approximations of Black-Scholes PDEs. More precisely, this work reveals that the number of required parameters of an ANN to approximate the solution of the Black-Scholes PDE grows at most polynomially in both the reciprocal of the prescribed approximation accuracy $\varepsilon > 0$ and the PDE dimension $d \in \mathbb{N}$. We thereby prove, for the first time, that ANNs do indeed overcome the curse of dimensionality in the numerical approximation of Black-Scholes PDEs.},
  keywords={},
  doi={},
}


@article{crandall_lions,
	title = {User’s guide to viscosity solutions of second order partial differential equations},
	volume = {27},
	issn = {0273-0979, 1088-9485},
	url = {https://www.ams.org/bull/1992-27-01/S0273-0979-1992-00266-5/},
	doi = {10.1090/S0273-0979-1992-00266-5},
	abstract = {Advancing research. Creating connections.},
	language = {en},
	number = {1},
	urldate = {2023-03-07},
	journal = {Bull. Amer. Math. Soc.},
	author = {Crandall, Michael G. and Ishii, Hitoshi and Lions, Pierre-Louis},
	year = {1992},
	keywords = {comparison theorems, dynamic programming, elliptic equations, fully nonlinear equations, generalized solutions, Hamilton-Jacobi equations, maximum principles, nonlinear boundary value problems, parabolic equations, partial differential equations, Perron’s method, Viscosity solutions},
	pages = {1{\textendash}67},
	file = {Full Text PDF:files/129/Crandall et al. - 1992 - User’s guide to viscosity solutions of second orde.pdf:application/pdf},
}

@book{da_prato_zabczyk_2002, 
place={Cambridge}, series={London Mathematical Society Lecture Note Series}, title={Second Order Partial Differential Equations in Hilbert Spaces}, DOI={10.1017/CBO9780511543210}, publisher={Cambridge University Press}, author={Da Prato, Giuseppe and Zabczyk, Jerzy}, year={2002}, collection={London Mathematical Society Lecture Note Series}}

@article{rio_moment_2009,
	title = {Moment {Inequalities} for {Sums} of {Dependent} {Random} {Variables} under {Projective} {Conditions}},
	volume = {22},
	issn = {1572-9230},
	url = {https://doi.org/10.1007/s10959-008-0155-9},
	doi = {10.1007/s10959-008-0155-9},
	abstract = {We obtain precise constants in the Marcinkiewicz-Zygmund inequality for martingales in \${\textbackslash}mathbb\{L\}{\textasciicircum}\{p\}\$for p{\textgreater}2 and a new Rosenthal type inequality for stationary martingale differences for p in ]2,3]. The Rosenthal inequality is then extended to stationary and adapted sequences. As in Peligrad et al. (Proc. Am. Math. Soc. 135:541–550, [2007]), the bounds are expressed in terms of \${\textbackslash}mathbb\{L\}{\textasciicircum}\{p\}\$-norms of conditional expectations with respect to an increasing field of sigma algebras. Some applications to a particular Markov chain are given.},
	language = {en},
	number = {1},
	urldate = {2023-01-06},
	journal = {J Theor Probab},
	author = {Rio, Emmanuel},
	month = mar,
	year = {2009},
	keywords = {60 F 05, 60 F 17, Martingale, Moment inequality, Projective criteria, Rosenthal inequality, Stationary sequences},
	pages = {146{\textendash}163},
}
@book{golub2013matrix,
  title={Matrix Computations},
  author={Golub, G.H. and Van Loan, C.F.},
  isbn={9781421407944},
  lccn={2012943449},
  series={Johns Hopkins Studies in the Mathematical Sciences},
  url={https://books.google.com/books?id=X5YfsuCWpxMC},
  year={2013},
  publisher={Johns Hopkins University Press}
}

@article{hjw2020,
author = {Martin Hutzenthaler and Arnulf Jentzen and von Wurstemberger},
title = {{Overcoming the curse of dimensionality in the approximative pricing of financial derivatives with default risks}},
volume = {25},
journal = {Electronic Journal of Probability},
number = {none},
publisher = {Institute of Mathematical Statistics and Bernoulli Society},
pages = {1{\textendash}73},
keywords = {curse of dimensionality, high-dimensional PDEs, multilevel Picard method, semilinear KolmogorovPDEs, Semilinear PDEs},
year = {2020},
doi = {10.1214/20-EJP423},
URL = {https://doi.org/10.1214/20-EJP423}
}

@article{bhj20,
author = {Beck, Christian and Hutzenthaler, Martin and Jentzen, Arnulf},
title = {On nonlinear {Feynman}–{Kac} formulas for viscosity solutions of semilinear parabolic partial differential equations},
journal = {Stochastics and Dynamics},
volume = {21},
number = {08},
pages = {2150048},
year = {2021},
doi = {10.1142/S0219493721500489},

URL = { 
    
        https://doi.org/10.1142/S0219493721500489
    
    
},
eprint = { 
    
        https://doi.org/10.1142/S0219493721500489
    
    
}
,
    abstract = { The classical Feynman–Kac identity builds a bridge between stochastic analysis and partial differential equations (PDEs) by providing stochastic representations for classical solutions of linear Kolmogorov PDEs. This opens the door for the derivation of sampling based Monte Carlo approximation methods, which can be meshfree and thereby stand a chance to approximate solutions of PDEs without suffering from the curse of dimensionality. In this paper, we extend the classical Feynman–Kac formula to certain semilinear Kolmogorov PDEs. More specifically, we identify suitable solutions of stochastic fixed point equations (SFPEs), which arise when the classical Feynman–Kac identity is formally applied to semilinear Kolmorogov PDEs, as viscosity solutions of the corresponding PDEs. This justifies, in particular, employing full-history recursive multilevel Picard (MLP) approximation algorithms, which have recently been shown to overcome the curse of dimensionality in the numerical approximation of solutions of SFPEs, in the numerical approximation of semilinear Kolmogorov PDEs. }
}

@article{tsaban_harnessing_2022,
	title = {Harnessing protein folding neural networks for peptide–protein docking},
	volume = {13},
	copyright = {2022 The Author(s)},
	issn = {2041-1723},
	url = {https://www.nature.com/articles/s41467-021-27838-9},
	doi = {10.1038/s41467-021-27838-9},
	abstract = {Highly accurate protein structure predictions by deep neural networks such as AlphaFold2 and RoseTTAFold have tremendous impact on structural biology and beyond. Here, we show that, although these deep learning approaches have originally been developed for the in silico folding of protein monomers, AlphaFold2 also enables quick and accurate modeling of peptide–protein interactions. Our simple implementation of AlphaFold2 generates peptide–protein complex models without requiring multiple sequence alignment information for the peptide partner, and can handle binding-induced conformational changes of the receptor. We explore what AlphaFold2 has memorized and learned, and describe specific examples that highlight differences compared to state-of-the-art peptide docking protocol PIPER-FlexPepDock. These results show that AlphaFold2 holds great promise for providing structural insight into a wide range of peptide–protein complexes, serving as a starting point for the detailed characterization and manipulation of these interactions.},
	language = {en},
	number = {1},
	urldate = {2023-11-15},
	journal = {Nat Commun},
	author = {Tsaban, Tomer and Varga, Julia K. and Avraham, Orly and Ben-Aharon, Ziv and Khramushin, Alisa and Schueler-Furman, Ora},
	month = jan,
	year = {2022},
	note = {Number: 1
Publisher: Nature Publishing Group},
	keywords = {Machine learning, Molecular modelling, Peptides, Protein structure predictions},
	pages = {176},
	file = {Full Text PDF:/Users/shakilrafi/Zotero/storage/EKLDKE65/Tsaban et al. - 2022 - Harnessing protein folding neural networks for pep.pdf:application/pdf},
}

@article{davies_signature_2022,
	title = {The signature and cusp geometry of hyperbolic knots},
	journal = {Geometry and Topology},
	author = {Davies, A and Juhasz, A and Lackenby, M and Tomasev, N},
	year = {2022},
	note = {Publisher: Mathematical Sciences Publishers},
}

@article{zhao_space-based_2023,
	title = {Space-based gravitational wave signal detection and extraction with deep neural network},
	volume = {6},
	copyright = {2023 Springer Nature Limited},
	issn = {2399-3650},
	url = {https://www.nature.com/articles/s42005-023-01334-6},
	doi = {10.1038/s42005-023-01334-6},
	abstract = {Space-based gravitational wave (GW) detectors will be able to observe signals from sources that are otherwise nearly impossible from current ground-based detection. Consequently, the well established signal detection method, matched filtering, will require a complex template bank, leading to a computational cost that is too expensive in practice. Here, we develop a high-accuracy GW signal detection and extraction method for all space-based GW sources. As a proof of concept, we show that a science-driven and uniform multi-stage self-attention-based deep neural network can identify synthetic signals that are submerged in Gaussian noise. Our method exhibits a detection rate exceeding 99\% in identifying signals from various sources, with the signal-to-noise ratio at 50, at a false alarm rate of 1\%. while obtaining at least 95\% similarity compared with target signals. We further demonstrate the interpretability and strong generalization behavior for several extended scenarios.},
	language = {en},
	number = {1},
	urldate = {2023-11-15},
	journal = {Commun Phys},
	author = {Zhao, Tianyu and Lyu, Ruoxi and Wang, He and Cao, Zhoujian and Ren, Zhixiang},
	month = aug,
	year = {2023},
	note = {Number: 1
Publisher: Nature Publishing Group},
	keywords = {Astronomy and planetary science, Computational science},
	pages = {1{\textendash}12},
	file = {Full Text PDF:/Users/shakilrafi/Zotero/storage/JCCM78TZ/Zhao et al. - 2023 - Space-based gravitational wave signal detection an.pdf:application/pdf},
}
@misc{wu2022sustainable,
      title={Sustainable AI: Environmental Implications, Challenges and Opportunities}, 
      author={Carole-Jean Wu and Ramya Raghavendra and Udit Gupta and Bilge Acun and Newsha Ardalani and Kiwan Maeng and Gloria Chang and Fiona Aga Behram and James Huang and Charles Bai and Michael Gschwind and Anurag Gupta and Myle Ott and Anastasia Melnikov and Salvatore Candido and David Brooks and Geeta Chauhan and Benjamin Lee and Hsien-Hsin S. Lee and Bugra Akyildiz and Maximilian Balandat and Joe Spisak and Ravi Jain and Mike Rabbat and Kim Hazelwood},
      year={2022},
      eprint={2111.00364},
      archivePrefix={arXiv},
      primaryClass={cs.LG}
}
@misc{strubell2019energy,
      title={Energy and Policy Considerations for Deep Learning in NLP}, 
      author={Emma Strubell and Ananya Ganesh and Andrew McCallum},
      year={2019},
      eprint={1906.02243},
      archivePrefix={arXiv},
      primaryClass={cs.CL}
}


@article{e_multilevel_2021,
	title = {Multilevel {Picard} iterations for solving smooth semilinear parabolic heat equations},
	volume = {2},
	issn = {2662-2971},
	url = {https://doi.org/10.1007/s42985-021-00089-5},
	doi = {10.1007/s42985-021-00089-5},
	abstract = {We introduce a new family of numerical algorithms for approximating solutions of general high-dimensional semilinear parabolic partial differential equations at single space-time points. The algorithm is obtained through a delicate combination of the Feynman–Kac and the Bismut–Elworthy–Li formulas, and an approximate decomposition of the Picard fixed-point iteration with multilevel accuracy. The algorithm has been tested on a variety of semilinear partial differential equations that arise in physics and finance, with satisfactory results. Analytical tools needed for the analysis of such algorithms, including a semilinear Feynman–Kac formula, a new class of seminorms and their recursive inequalities, are also introduced. They allow us to prove for semilinear heat equations with gradient-independent nonlinearities that the computational complexity of the proposed algorithm is bounded by \$\$O(d{\textbackslash},\{{\textbackslash}varepsilon \}{\textasciicircum}\{-(4+{\textbackslash}delta )\})\$\$for any \$\${\textbackslash}delta {\textbackslash}in (0,{\textbackslash}infty )\$\$under suitable assumptions, where \$\$d{\textbackslash}in \{\{{\textbackslash}mathbb \{N\}\}\}\$\$is the dimensionality of the problem and \$\$\{{\textbackslash}varepsilon \}{\textbackslash}in (0,{\textbackslash}infty )\$\$is the prescribed accuracy. Moreover, the introduced class of numerical algorithms is also powerful for proving high-dimensional approximation capacities for deep neural networks.},
	language = {en},
	number = {6},
	urldate = {2023-11-27},
	journal = {Partial Differ. Equ. Appl.},
	author = {E, Weinan and Hutzenthaler, Martin and Jentzen, Arnulf and Kruse, Thomas},
	month = nov,
	year = {2021},
	keywords = {65M75, Curse of dimensionality, High-dimensional PDEs, High-dimensional semilinear BSDEs, Multilevel Monte Carlo method, Multilevel Picard iteration},
	pages = {80},
	file = {Full Text PDF:/Users/shakilrafi/Zotero/storage/5ADX78DS/E et al. - 2021 - Multilevel Picard iterations for solving smooth se.pdf:application/pdf},
}


@article{e_multilevel_2019,
	title = {On {Multilevel} {Picard} {Numerical} {Approximations} for {High}-{Dimensional} {Nonlinear} {Parabolic} {Partial} {Differential} {Equations} and {High}-{Dimensional} {Nonlinear} {Backward} {Stochastic} {Differential} {Equations}},
	volume = {79},
	issn = {1573-7691},
	url = {https://doi.org/10.1007/s10915-018-00903-0},
	doi = {10.1007/s10915-018-00903-0},
	abstract = {Parabolic partial differential equations (PDEs) and backward stochastic differential equations (BSDEs) are key ingredients in a number of models in physics and financial engineering. In particular, parabolic PDEs and BSDEs are fundamental tools in pricing and hedging models for financial derivatives. The PDEs and BSDEs appearing in such applications are often high-dimensional and nonlinear. Since explicit solutions of such PDEs and BSDEs are typically not available, it is a very active topic of research to solve such PDEs and BSDEs approximately. In the recent article (E et al., Multilevel Picard iterations for solving smooth semilinear parabolic heat equations, arXiv:1607.03295) we proposed a family of approximation methods based on Picard approximations and multilevel Monte Carlo methods and showed under suitable regularity assumptions on the exact solution of a semilinear heat equation that the computational complexity is bounded by \$\$O( d {\textbackslash}, \{{\textbackslash}varepsilon \}{\textasciicircum}\{-(4+{\textbackslash}delta )\})\$\$for any \$\${\textbackslash}delta {\textbackslash}in (0,{\textbackslash}infty )\$\$where d is the dimensionality of the problem and \$\$\{{\textbackslash}varepsilon \}{\textbackslash}in (0,{\textbackslash}infty )\$\$is the prescribed accuracy. In this paper, we test the applicability of this algorithm on a variety of 100-dimensional nonlinear PDEs that arise in physics and finance by means of numerical simulations presenting approximation accuracy against runtime. The simulation results for many of these 100-dimensional example PDEs are very satisfactory in terms of both accuracy and speed. Moreover, we also provide a review of other approximation methods for nonlinear PDEs and BSDEs from the scientific literature.},
	language = {en},
	number = {3},
	urldate = {2023-11-27},
	journal = {J Sci Comput},
	author = {E, Weinan and Hutzenthaler, Martin and Jentzen, Arnulf and Kruse, Thomas},
	month = jun,
	year = {2019},
	keywords = {65M75, Curse of dimensionality, High-dimensional nonlinear BSDEs, High-dimensional PDEs, Multilevel Monte Carlo method, Multilevel Picard approximations},
	pages = {1534--1571},
	file = {Full Text PDF:/Users/shakilrafi/Zotero/storage/7KHG4238/E et al. - 2019 - On Multilevel Picard Numerical Approximations for .pdf:application/pdf},
}


@inproceedings{vaswani_attention_2017,
	title = {Attention is {All} you {Need}},
	volume = {30},
	url = {https://proceedings.neurips.cc/paper_files/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html},
	abstract = {The dominant sequence transduction models are based on complex recurrent orconvolutional neural networks in an encoder and decoder configuration. The best performing such models also connect the encoder and decoder through an attentionm echanisms.  We propose a novel, simple network architecture based solely onan attention mechanism, dispensing with recurrence and convolutions entirely.Experiments on two machine translation tasks show these models to be superiorin quality while being more parallelizable and requiring significantly less timeto train. Our single model with 165 million parameters, achieves 27.5 BLEU onEnglish-to-German translation, improving over the existing best ensemble result by over 1 BLEU. On English-to-French translation, we outperform the previoussingle state-of-the-art with model by 0.7 BLEU, achieving a BLEU score of 41.1.},
	urldate = {2023-12-01},
	booktitle = {Advances in {Neural} {Information} {Processing} {Systems}},
	publisher = {Curran Associates, Inc.},
	author = {Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, Łukasz and Polosukhin, Illia},
	year = {2017},
	file = {Full Text PDF:/Users/shakilrafi/Zotero/storage/T7R9QP6K/Vaswani et al. - 2017 - Attention is All you Need.pdf:application/pdf},
}


@article{arik_tabnet_2021,
	title = {{TabNet}: {Attentive} {Interpretable} {Tabular} {Learning}},
	volume = {35},
	copyright = {Copyright (c) 2021 Association for the Advancement of Artificial Intelligence},
	issn = {2374-3468},
	shorttitle = {{TabNet}},
	url = {https://ojs.aaai.org/index.php/AAAI/article/view/16826},
	doi = {10.1609/aaai.v35i8.16826},
	abstract = {We propose a novel high-performance and interpretable canonical deep tabular data learning architecture, TabNet. TabNet uses sequential attention to choose which features to reason from at each decision step, enabling interpretability and more efficient learning as the learning capacity is used for the most salient features. We demonstrate that TabNet outperforms other variants on a wide range of non-performance-saturated tabular datasets and yields interpretable feature attributions plus insights into its global behavior. Finally, we demonstrate self-supervised learning for tabular data, significantly improving performance when unlabeled data is abundant.},
	language = {en},
	number = {8},
	urldate = {2023-12-01},
	journal = {Proceedings of the AAAI Conference on Artificial Intelligence},
	author = {Arik, Sercan Ö and Pfister, Tomas},
	month = may,
	year = {2021},
	note = {Number: 8},
	keywords = {Unsupervised \& Self-Supervised Learning},
	pages = {6679--6687},
	file = {Full Text PDF:/Users/shakilrafi/Zotero/storage/SHV66I4Y/Arik and Pfister - 2021 - TabNet Attentive Interpretable Tabular Learning.pdf:application/pdf},
}

@INPROCEEDINGS {8099678,
author = {F. Chollet},
booktitle = {2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
title = {Xception: Deep Learning with Depthwise Separable Convolutions},
year = {2017},
volume = {},
issn = {1063-6919},
pages = {1800{\textendash}1807},
abstract = {We present an interpretation of Inception modules in convolutional neural networks as being an intermediate step in-between regular convolution and the depthwise separable convolution operation (a depthwise convolution followed by a pointwise convolution). In this light, a depthwise separable convolution can be understood as an Inception module with a maximally large number of towers. This observation leads us to propose a novel deep convolutional neural network architecture inspired by Inception, where Inception modules have been replaced with depthwise separable convolutions. We show that this architecture, dubbed Xception, slightly outperforms Inception V3 on the ImageNet dataset (which Inception V3 was designed for), and significantly outperforms Inception V3 on a larger image classification dataset comprising 350 million images and 17,000 classes. Since the Xception architecture has the same number of parameters as Inception V3, the performance gains are not due to increased capacity but rather to a more efficient use of model parameters.},
keywords = {computer architecture;correlation;convolutional codes;google;biological neural networks},
doi = {10.1109/CVPR.2017.195},
url = {https://doi.ieeecomputersociety.org/10.1109/CVPR.2017.195},
publisher = {IEEE Computer Society},
address = {Los Alamitos, CA, USA},
month = {jul}
}

@article{srivastava_dropout_2014,
	title = {Dropout: a simple way to prevent neural networks from overfitting},
	volume = {15},
	issn = {1532-4435},
	shorttitle = {Dropout},
	abstract = {Deep neural nets with a large number of parameters are very powerful machine learning systems. However, overfitting is a serious problem in such networks. Large networks are also slow to use, making it difficult to deal with overfitting by combining the predictions of many different large neural nets at test time. Dropout is a technique for addressing this problem. The key idea is to randomly drop units (along with their connections) from the neural network during training. This prevents units from co-adapting too much. During training, dropout samples from an exponential number of different "thinned" networks. At test time, it is easy to approximate the effect of averaging the predictions of all these thinned networks by simply using a single unthinned network that has smaller weights. This significantly reduces overfitting and gives major improvements over other regularization methods. We show that dropout improves the performance of neural networks on supervised learning tasks in vision, speech recognition, document classification and computational biology, obtaining state-of-the-art results on many benchmark data sets.},
	number = {1},
	journal = {J. Mach. Learn. Res.},
	author = {Srivastava, Nitish and Hinton, Geoffrey and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
	month = jan,
	year = {2014},
	keywords = {deep learning, model combination, neural networks, regularization},
	pages = {1929--1958},
	file = {Full Text PDF:/Users/shakilrafi/Zotero/storage/JK87IU3H/Srivastava et al. - 2014 - Dropout a simple way to prevent neural networks f.pdf:application/pdf},
}


@article{petersen_optimal_2018,
	title = {Optimal approximation of piecewise smooth functions using deep {ReLU} neural networks},
	volume = {108},
	issn = {1879-2782},
	doi = {10.1016/j.neunet.2018.08.019},
	abstract = {We study the necessary and sufficient complexity of ReLU neural networks - in terms of depth and number of weights - which is required for approximating classifier functions in an Lp-sense. As a model class, we consider the set Eβ(Rd) of possibly discontinuous piecewise Cβ functions f:[-12,12]d→R, where the different "smooth regions" of f are separated by Cβ hypersurfaces. For given dimension d≥2, regularity β{\textgreater}0, and accuracy ε{\textgreater}0, we construct artificial neural networks with ReLU activation function that approximate functions from Eβ(Rd) up to an L2 error of ε. The constructed networks have a fixed number of layers, depending only on d and β, and they have O(ε-2(d-1)∕β) many nonzero weights, which we prove to be optimal. For the proof of optimality, we establish a lower bound on the description complexity of the class Eβ(Rd). By showing that a family of approximating neural networks gives rise to an encoder for Eβ(Rd), we then prove that one cannot approximate a general function f∈Eβ(Rd) using neural networks that are less complex than those produced by our construction. In addition to the optimality in terms of the number of weights, we show that in order to achieve this optimal approximation rate, one needs ReLU networks of a certain minimal depth. Precisely, for piecewise Cβ(Rd) functions, this minimal depth is given - up to a multiplicative constant - by β∕d. Up to a log factor, our constructed networks match this bound. This partly explains the benefits of depth for ReLU networks by showing that deep networks are necessary to achieve efficient approximation of (piecewise) smooth functions. Finally, we analyze approximation in high-dimensional spaces where the function f to be approximated can be factorized into a smooth dimension reducing feature map τ and classifier function g - defined on a low-dimensional feature space - as f=g∘τ. We show that in this case the approximation rate depends only on the dimension of the feature space and not the input dimension.},
	language = {eng},
	journal = {Neural Netw},
	author = {Petersen, Philipp and Voigtlaender, Felix},
	month = dec,
	year = {2018},
	pmid = {30245431},
	keywords = {Curse of dimension, Deep neural networks, Function approximation, Metric entropy, Neural Networks, Computer, Piecewise smooth functions, Sparse connectivity},
	pages = {296{\textendash}330},
	file = {Submitted Version:/Users/shakilrafi/Zotero/storage/UL4GLF59/Petersen and Voigtlaender - 2018 - Optimal approximation of piecewise smooth function.pdf:application/pdf},
}

  
  @misc{bigbook,
      title={Mathematical Introduction to Deep Learning: Methods, Implementations, and Theory}, 
      author={Arnulf Jentzen and Benno Kuckuck and Philippe von Wurstemberger},
      year={2023},
      eprint={2310.20360},
      archivePrefix={arXiv},
      primaryClass={cs.LG}
}

@article{mcculloch_logical_1943,
	title = {A logical calculus of the ideas immanent in nervous activity},
	volume = {5},
	issn = {1522-9602},
	url = {https://doi.org/10.1007/BF02478259},
	doi = {10.1007/BF02478259},
	abstract = {Because of the “all-or-none” character of nervous activity, neural events and the relations among them can be treated by means of propositional logic. It is found that the behavior of every net can be described in these terms, with the addition of more complicated logical means for nets containing circles; and that for any logical expression satisfying certain conditions, one can find a net behaving in the fashion it describes. It is shown that many particular choices among possible neurophysiological assumptions are equivalent, in the sense that for every net behaving under one assumption, there exists another net which behaves under the other and gives the same results, although perhaps not in the same time. Various applications of the calculus are discussed.},
	number = {4},
	journal = {The bulletin of mathematical biophysics},
	author = {McCulloch, Warren S. and Pitts, Walter},
	month = dec,
	year = {1943},
	pages = {115--133},
}

@article{Hornik1991ApproximationCO,
  title={Approximation capabilities of multilayer feedforward networks},
  author={Kurt Hornik},
  journal={Neural Networks},
  year={1991},
  volume={4},
  pages={251-257},
  url={https://api.semanticscholar.org/CorpusID:7343126}
}


@article{cybenko_approximation_1989,
	title = {Approximation by superpositions of a sigmoidal function},
	volume = {2},
	issn = {1435-568X},
	url = {https://doi.org/10.1007/BF02551274},
	doi = {10.1007/BF02551274},
	abstract = {In this paper we demonstrate that finite linear combinations of compositions of a fixed, univariate function and a set of affine functionals can uniformly approximate any continuous function ofn real variables with support in the unit hypercube; only mild conditions are imposed on the univariate function. Our results settle an open question about representability in the class of single hidden layer neural networks. In particular, we show that arbitrary decision regions can be arbitrarily well approximated by continuous feedforward neural networks with only a single internal, hidden layer and any continuous sigmoidal nonlinearity. The paper discusses approximation properties of other possible types of nonlinearities that might be implemented by artificial neural networks.},
	number = {4},
	journal = {Mathematics of Control, Signals and Systems},
	author = {Cybenko, G.},
	month = dec,
	year = {1989},
	pages = {303--314},
}

@article{KNOKE2021100035,
title = {Solving differential equations via artificial neural networks: Findings and failures in a model problem},
journal = {Examples and Counterexamples},
volume = {1},
pages = {100035},
year = {2021},
issn = {2666-657X},
doi = {https://doi.org/10.1016/j.exco.2021.100035},
url = {https://www.sciencedirect.com/science/article/pii/S2666657X21000197},
author = {Tobias Knoke and Thomas Wick},
keywords = {Ordinary differential equation, Logistic equation, Feedforward neural network, numerical optimization, PyTorch},
abstract = {In this work, we discuss some pitfalls when solving differential equations with neural networks. Due to the highly nonlinear cost functional, local minima might be approximated by which functions may be obtained, that do not solve the problem. The main reason for these failures is a sensitivity on initial guesses for the nonlinear iteration. We apply known algorithms and corresponding implementations, including code snippets, and present an example and counter example for the logistic differential equations. These findings are further substantiated with variations in collocation points and learning rates.}
}

@article{Lagaris_1998,
   title={Artificial neural networks for solving ordinary and partial differential equations},
   volume={9},
   ISSN={1045-9227},
   url={http://dx.doi.org/10.1109/72.712178},
   DOI={10.1109/72.712178},
   number={5},
   journal={IEEE Transactions on Neural Networks},
   publisher={Institute of Electrical and Electronics Engineers (IEEE)},
   author={Lagaris, I.E. and Likas, A. and Fotiadis, D.I.},
   year={1998},
   pages={987–1000} }

@ARTICLE{gunnar_carlsson,
       author = {{Carlsson}, Gunnar and {Br{\"u}el Gabrielsson}, Rickard},
        title = "{Topological Approaches to Deep Learning}",
      journal = {arXiv e-prints},
     keywords = {Computer Science - Machine Learning, Computer Science - Artificial Intelligence, Mathematics - Algebraic Topology, Statistics - Machine Learning, 68T05, 55N35, 62-07},
         year = 2018,
        month = nov,
          eid = {arXiv:1811.01122},
        pages = {arXiv:1811.01122},
          doi = {10.48550/arXiv.1811.01122},
archivePrefix = {arXiv},
       eprint = {1811.01122},
 primaryClass = {cs.LG},
       adsurl = {https://ui.adsabs.harvard.edu/abs/2018arXiv181101122C},
      adsnote = {Provided by the SAO/NASA Astrophysics Data System}
}

@misc{shiebler2021category,
      title={Category Theory in Machine Learning}, 
      author={Dan Shiebler and Bruno Gavranović and Paul Wilson},
      year={2021},
      eprint={2106.07032},
      archivePrefix={arXiv},
      primaryClass={cs.LG}
}

@Manual{dplyr,
  title = {dplyr: A Grammar of Data Manipulation},
  author = {Hadley Wickham and Romain François and Lionel Henry and Kirill Müller and Davis Vaughan},
  year = {2023},
  note = {R package version 1.1.4, https://github.com/tidyverse/dplyr},
  url = {https://dplyr.tidyverse.org},
}

@INPROCEEDINGS{nn_diff,
  author={Berner, Julius and Elbrächter, Dennis and Grohs, Philipp and Jentzen, Arnulf},
  booktitle={2019 13th International conference on Sampling Theory and Applications (SampTA)}, 
  title={Towards a regularity theory for ReLU networks – chain rule and global error estimates}, 
  year={2019},
  volume={},
  number={},
  pages={1-5},
  keywords={Neural networks;Standards;Approximation methods;Machine learning;Partial differential equations;Level set},
  doi={10.1109/SampTA45681.2019.9031005}}


@Book{ggplot2,
    author = {Hadley Wickham},
    title = {ggplot2: Elegant Graphics for Data Analysis},
    publisher = {Springer-Verlag New York},
    year = {2016},
    isbn = {978-3-319-24277-4},
    url = {https://ggplot2.tidyverse.org},
  }
  
  @online{plotly,
  author = {{Plotly Technologies Inc}},
  title = {Collaborative data science},
  publisher = {Plotly Technologies Inc.},
  address = {Montreal, QC},
  year = {2015},
  url = {https://plot.ly}
}

@misc{rafi_towards_2024,
	title = {Towards an {Algebraic} {Framework} {For} {Approximating} {Functions} {Using} {Neural} {Network} {Polynomials}},
	url = {https://arxiv.org/abs/2402.01058v1},
	abstract = {We make the case for neural network objects and extend an already existing neural network calculus explained in detail in Chapter 2 on {\textbackslash}cite\{bigbook\}. Our aim will be to show that, yes, indeed, it makes sense to talk about neural network polynomials, neural network exponentials, sine, and cosines in the sense that they do indeed approximate their real number counterparts subject to limitations on certain of their parameters, \$q\$, and \${\textbackslash}varepsilon\$. While doing this, we show that the parameter and depth growth are only polynomial on their desired accuracy (defined as a 1-norm difference over \${\textbackslash}mathbb\{R\}\$), thereby showing that this approach to approximating, where a neural network in some sense has the structural properties of the function it is approximating is not entire intractable.},
	language = {en},
	urldate = {2024-02-11},
	journal = {arXiv.org},
	author = {Rafi, Shakil and Padgett, Joshua Lee and Nakarmi, Ukash},
	month = feb,
	year = {2024},
	file = {Full Text PDF:/Users/shakilrafi/Zotero/storage/A8LPKNZK/Rafi et al. - 2024 - Towards an Algebraic Framework For Approximating F.pdf:application/pdf},
}

@Manual{nnR-package,
    title = {nnR: Neural Networks Made Algebraic},
    author = {Shakil Rafi and Joshua Lee Padgett},
    year = {2024},
    note = {R package version 0.1.0},
    url = {https://github.com/2shakilrafi/nnR/},
  }

@misc{ackermann2023deep,
      title={Deep neural networks with ReLU, leaky ReLU, and softplus activation provably overcome the curse of dimensionality for Kolmogorov partial differential equations with Lipschitz nonlinearities in the $L^p$-sense}, 
      author={Julia Ackermann and Arnulf Jentzen and Thomas Kruse and Benno Kuckuck and Joshua Lee Padgett},
      year={2023},
      eprint={2309.13722},
      archivePrefix={arXiv},
      primaryClass={math.NA}
  
@book{graham_concrete_1994,
	address = {Upper Saddle River, NJ},
	edition = {2nd edition},
	title = {Concrete {Mathematics}: {A} {Foundation} for {Computer} {Science}},
	isbn = {978-0-201-55802-9},
	shorttitle = {Concrete {Mathematics}},
	abstract = {This book introduces the mathematics that supports advanced computer programming and the analysis of algorithms. The primary aim of its well-known authors is to provide a solid and relevant base of mathematical skills - the skills needed to solve complex problems, to evaluate horrendous sums, and to discover subtle patterns in data. It is an indispensable text and reference not only for computer scientists - the authors themselves rely heavily on it! - but for serious users of mathematics in virtually every discipline.Concrete Mathematics is a blending of CONtinuous and disCRETE mathematics. "More concretely," the authors explain, "it is the controlled manipulation of mathematical formulas, using a collection of techniques for solving problems." The subject matter is primarily an expansion of the Mathematical Preliminaries section in Knuth's classic Art of Computer Programming, but the style of presentation is more leisurely, and individual topics are covered more deeply. Several new topics have been added, and the most significant ideas have been traced to their historical roots. The book includes more than 500 exercises, divided into six categories. Complete answers are provided for all exercises, except research problems, making the book particularly valuable for self-study.Major topics include:SumsRecurrencesInteger functionsElementary number theoryBinomial coefficientsGenerating functionsDiscrete probabilityAsymptotic methodsThis second edition includes important new material about mechanical summation. In response to the widespread use of the first edition as a reference book, the bibliography and index have also been expanded, and additional nontrivial improvements can be found on almost every page. Readers will appreciate the informal style of Concrete Mathematics. Particularly enjoyable are the marginal graffiti contributed by students who have taken courses based on this material. The authors want to convey not only the importance of the techniques presented, but some of the fun in learning and using them.},
	language = {English},
	publisher = {Addison-Wesley Professional},
	author = {Graham, Ronald and Knuth, Donald and Patashnik, Oren},
	month = feb,
	year = {1994},
}


@software{Rafi_nnR_2024,
author = {Rafi, Shakil},
license = {GPL-3.0},
month = feb,
title = {{nnR}},
url = {https://github.com/2shakilrafi/nnR},
version = {0.10},
year = {2024}
}

@article{https://doi.org/10.1002/cnm.3535,
author = {Rego, Bruno V. and Weiss, Dar and Bersi, Matthew R. and Humphrey, Jay D.},
title = {Uncertainty quantification in subject-specific estimation of local vessel mechanical properties},
journal = {International Journal for Numerical Methods in Biomedical Engineering},
volume = {37},
number = {12},
pages = {e3535},
keywords = {digital image correlation, image-based modeling, subject-specific model, uncertainty quantification},
doi = {https://doi.org/10.1002/cnm.3535},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/cnm.3535},
eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/cnm.3535},
abstract = {Abstract Quantitative estimation of local mechanical properties remains critically important in the ongoing effort to elucidate how blood vessels establish, maintain, or lose mechanical homeostasis. Recent advances based on panoramic digital image correlation (pDIC) have made high-fidelity 3D reconstructions of small-animal (e.g., murine) vessels possible when imaged in a variety of quasi-statically loaded configurations. While we have previously developed and validated inverse modeling approaches to translate pDIC-measured surface deformations into biomechanical metrics of interest, our workflow did not heretofore include a methodology to quantify uncertainties associated with local point estimates of mechanical properties. This limitation has compromised our ability to infer biomechanical properties on a subject-specific basis, such as whether stiffness differs significantly between multiple material locations on the same vessel or whether stiffness differs significantly between multiple vessels at a corresponding material location. In the present study, we have integrated a novel uncertainty quantification and propagation pipeline within our inverse modeling approach, relying on empirical and analytic Bayesian techniques. To demonstrate the approach, we present illustrative results for the ascending thoracic aorta from three mouse models, quantifying uncertainties in constitutive model parameters as well as circumferential and axial tangent stiffness. Our extended workflow not only allows parameter uncertainties to be systematically reported, but also facilitates both subject-specific and group-level statistical analyses of the mechanics of the vessel wall.},
year = {2021}
}

@article{schapire_strength_1990,
	title = {The strength of weak learnability},
	volume = {5},
	issn = {1573-0565},
	url = {https://doi.org/10.1007/BF00116037},
	doi = {10.1007/BF00116037},
	abstract = {This paper addresses the problem of improving the accuracy of an hypothesis output by a learning algorithm in the distribution-free (PAC) learning model. A concept class islearnable (orstrongly learnable) if, given access to a source of examples of the unknown concept, the learner with high probability is able to output an hypothesis that is correct on all but an arbitrarily small fraction of the instances. The concept class isweakly learnable if the learner can produce an hypothesis that performs only slightly better than random guessing. In this paper, it is shown that these two notions of learnability are equivalent.},
	language = {en},
	number = {2},
	urldate = {2024-03-06},
	journal = {Mach Learn},
	author = {Schapire, Robert E.},
	month = jun,
	year = {1990},
	keywords = {learnability theory, learning from examples, Machine learning, PAC learning, polynomial-time identification},
	pages = {197{\textendash}227}
}


@article{schwab_deep_2019,
	title = {Deep learning in high dimension: {Neural} network expression rates for generalized polynomial chaos expansions in {UQ}},
	volume = {17},
	issn = {0219-5305},
	shorttitle = {Deep learning in high dimension},
	url = {https://www.worldscientific.com/doi/abs/10.1142/S0219530518500203},
	doi = {10.1142/S0219530518500203},
	abstract = {We estimate the expressive power of certain deep neural networks (DNNs for short) on a class of countably-parametric, holomorphic maps 
𝑢:𝑈→ℝ
𝑢
:
𝑈
→
ℝ
 on the parameter domain 
𝑈=
[−1,1]
ℕ
𝑈
=
[
−
1
,
1
]
ℕ
. Dimension-independent rates of best 
𝑛
𝑛
-term truncations of generalized polynomial chaos (gpc for short) approximations depend only on the summability exponent of the sequence of their gpc expansion coefficients. So-called 
(𝑏,𝜀)
(
𝑏
,
𝜀
)
-holomorphic maps 
𝑢
𝑢
, with 
𝑏∈
ℓ
𝑝
𝑏
∈
ℓ
𝑝
 for some 
𝑝∈(0,1)
𝑝
∈
(
0
,
1
)
, are known to allow gpc expansions with coefficient sequences in 
ℓ
𝑝
ℓ
𝑝
. Such maps arise for example as response surfaces of parametric PDEs, with applications in PDE uncertainty quantification (UQ) for many mathematical models in engineering and the sciences. Up to logarithmic terms, we establish the dimension independent approximation rate 
𝑠=1/𝑝−1
𝑠
=
1
/
𝑝
−
1
 for these functions in terms of the total number 
𝑁
𝑁
 of units and weights in the DNN. It follows that certain DNN architectures can overcome the curse of dimensionality when expressing possibly countably-parametric, real-valued maps with a certain degree of sparsity in the sequences of their gpc expansion coefficients. We also obtain rates of expressive power of DNNs for countably-parametric maps 
𝑢:𝑈→𝑉
𝑢
:
𝑈
→
𝑉
, where 
𝑉
𝑉
 is the Hilbert space 
𝐻
1
0
([0,1])
𝐻
0
1
(
[
0
,
1
]
)
.},
	number = {01},
	urldate = {2024-03-07},
	journal = {Anal. Appl.},
	author = {Schwab, Christoph and Zech, Jakob},
	month = jan,
	year = {2019},
	note = {Publisher: World Scientific Publishing Co.},
	keywords = {deep networks, Generalized polynomial chaos, sparse grids, uncertainty quantification},
	pages = {19--55},
}

@book{Goodfellow-et-al-2016,
    title={Deep Learning},
    author={Ian Goodfellow and Yoshua Bengio and Aaron Courville},
    publisher={MIT Press},
    note={\url{http://www.deeplearningbook.org}},
    year={2016}
}

@article{yarotsky_error_2017,
	title = {Error bounds for approximations with deep {ReLU} networks},
	volume = {94},
	issn = {0893-6080},
	url = {https://www.sciencedirect.com/science/article/pii/S0893608017301545},
	doi = {10.1016/j.neunet.2017.07.002},
	abstract = {We study expressive power of shallow and deep neural networks with piece-wise linear activation functions. We establish new rigorous upper and lower bounds for the network complexity in the setting of approximations in Sobolev spaces. In particular, we prove that deep ReLU networks more efficiently approximate smooth functions than shallow networks. In the case of approximations of 1D Lipschitz functions we describe adaptive depth-6 network architectures more efficient than the standard shallow architecture.},
	urldate = {2024-03-22},
	journal = {Neural Networks},
	author = {Yarotsky, Dmitry},
	month = oct,
	year = {2017},
	keywords = {Approximation complexity, Deep ReLU networks},
	pages = {103--114},
	file = {ScienceDirect Snapshot:/Users/shakilrafi/Zotero/storage/4HS3Z6ZE/S0893608017301545.html:text/html;Submitted Version:/Users/shakilrafi/Zotero/storage/C6KQ6BFJ/Yarotsky - 2017 - Error bounds for approximations with deep ReLU net.pdf:application/pdf},
}