diff --git a/docs/Project.toml b/docs/Project.toml index 61d676d..595c9a9 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,6 +1,7 @@ [deps] Boltz = "4544d5e4-abc5-4dea-817f-29e4c205d9c8" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +DocumenterCitations = "daee34ce-89f3-4625-b898-19384cb65244" DocumenterVitepress = "4710194d-e776-4893-9690-8d956a29c365" Lux = "b2108857-7c20-44ae-9111-449ecde12c47" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" diff --git a/docs/make.jl b/docs/make.jl index 86415e6..9cc8848 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,4 +1,4 @@ -using Documenter, DocumenterVitepress, Boltz +using Documenter, DocumenterVitepress, DocumenterCitations, Boltz #! format: off pages = [ @@ -16,6 +16,11 @@ pages = [ ] #! format: on +bib = CitationBibliography( + joinpath(@__DIR__, "ref.bib"); + style=:authoryear +) + deploy_config = Documenter.auto_detect_deploy_system() deploy_decision = Documenter.deploy_folder(deploy_config; repo="github.com/LuxDL/Boltz.jl", devbranch="main", devurl="dev", push_preview=true) @@ -30,6 +35,7 @@ makedocs(; sitename="Boltz.jl Docs", format=DocumenterVitepress.MarkdownVitepress(; repo="github.com/LuxDL/Boltz.jl", devbranch="main", devurl="dev", deploy_decision), draft=false, + plugins=[bib], pages) deploydocs(; repo="github.com/LuxDL/Boltz.jl.git", diff --git a/docs/ref.bib b/docs/ref.bib new file mode 100644 index 0000000..da6e339 --- /dev/null +++ b/docs/ref.bib @@ -0,0 +1,91 @@ +@article{dosovitskiy2020image, + title = {An image is worth 16x16 words: Transformers for image recognition at scale}, + author = {Dosovitskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and others}, + journal = {arXiv preprint arXiv:2010.11929}, + year = {2020} +} + +@article{simonyan2014very, + title = {Very deep convolutional networks for large-scale image recognition}, + author = {Simonyan, Karen}, + journal = {arXiv preprint arXiv:1409.1556}, + year = {2014} +} + +@article{greydanus2019hamiltonian, + title = {Hamiltonian neural networks}, + author = {Greydanus, Samuel and Dzamba, Misko and Yosinski, Jason}, + journal = {Advances in neural information processing systems}, + volume = {32}, + year = {2019} +} + +@article{krizhevsky2012imagenet, + title = {Imagenet classification with deep convolutional neural networks}, + author = {Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E}, + journal = {Advances in neural information processing systems}, + volume = {25}, + year = {2012} +} + +@inproceedings{he2016deep, + title = {Deep residual learning for image recognition}, + author = {He, Kaiming and Zhang, Xiangyu and Ren, Shaoqing and Sun, Jian}, + booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages = {770--778}, + year = {2016} +} + +@inproceedings{xie2017aggregated, + title = {Aggregated residual transformations for deep neural networks}, + author = {Xie, Saining and Girshick, Ross and Doll{\'a}r, Piotr and Tu, Zhuowen and He, Kaiming}, + booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages = {1492--1500}, + year = {2017} +} + +@inproceedings{szegedy2015going, + title = {Going deeper with convolutions}, + author = {Szegedy, Christian and Liu, Wei and Jia, Yangqing and Sermanet, Pierre and Reed, Scott and Anguelov, Dragomir and Erhan, Dumitru and Vanhoucke, Vincent and Rabinovich, Andrew}, + booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages = {1--9}, + year = {2015} +} + +@inproceedings{huang2017densely, + title = {Densely connected convolutional networks}, + author = {Huang, Gao and Liu, Zhuang and Van Der Maaten, Laurens and Weinberger, Kilian Q}, + booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages = {4700--4708}, + year = {2017} +} + +@article{trockman2022patches, + title = {Patches are all you need?}, + author = {Trockman, Asher and Kolter, J Zico}, + journal = {arXiv preprint arXiv:2201.09792}, + year = {2022} +} + +@article{howard2017mobilenets, + title = {MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications}, + author = {Howard, Andrew G}, + journal = {arXiv preprint arXiv:1704.04861}, + year = {2017} +} + +@inproceedings{sandler2018mobilenetv2, + title = {Mobilenetv2: Inverted residuals and linear bottlenecks}, + author = {Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh}, + booktitle = {Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages = {4510--4520}, + year = {2018} +} + +@inproceedings{howard2019searching, + title = {Searching for mobilenetv3}, + author = {Howard, Andrew and Sandler, Mark and Chu, Grace and Chen, Liang-Chieh and Chen, Bo and Tan, Mingxing and Wang, Weijun and Zhu, Yukun and Pang, Ruoming and Vasudevan, Vijay and others}, + booktitle = {Proceedings of the IEEE/CVF international conference on computer vision}, + pages = {1314--1324}, + year = {2019} +} diff --git a/docs/src/api/layers.md b/docs/src/api/layers.md index 0dd6038..66a1692 100644 --- a/docs/src/api/layers.md +++ b/docs/src/api/layers.md @@ -1,5 +1,12 @@ # `Boltz.Layers` API Reference +--- + ```@autodocs Modules = [Boltz.Layers] ``` + +```@bibliography +Pages = [@__FILE__] +Style = :authoryear +``` diff --git a/docs/src/api/vision.md b/docs/src/api/vision.md index dcd7672..f64789a 100644 --- a/docs/src/api/vision.md +++ b/docs/src/api/vision.md @@ -46,3 +46,8 @@ Vision.ResNeXt All the pretrained models require that the images be normalized with the parameters `mean = [0.485f0, 0.456f0, 0.406f0]` and `std = [0.229f0, 0.224f0, 0.225f0]`. + +```@bibliography +Pages = [@__FILE__] +Style = :authoryear +``` diff --git a/src/layers/encoder.jl b/src/layers/encoder.jl index 7a5742e..086a626 100644 --- a/src/layers/encoder.jl +++ b/src/layers/encoder.jl @@ -2,7 +2,7 @@ VisionTransformerEncoder(in_planes, depth, number_heads; mlp_ratio = 4.0f0, dropout = 0.0f0) -Transformer as used in the base ViT architecture. +Transformer as used in the base ViT architecture [dosovitskiy2020image](@citep). ## Arguments @@ -14,11 +14,6 @@ Transformer as used in the base ViT architecture. - `mlp_ratio`: ratio of MLP layers to the number of input channels - `dropout_rate`: dropout rate - -## References - -[1] Dosovitskiy, Alexey, et al. "An image is worth 16x16 words: Transformers for image -recognition at scale." arXiv preprint arXiv:2010.11929 (2020). """ function VisionTransformerEncoder( in_planes, depth, number_heads; mlp_ratio=4.0f0, dropout_rate=0.0f0) diff --git a/src/layers/hamiltonian.jl b/src/layers/hamiltonian.jl index f3209cd..5cad99d 100644 --- a/src/layers/hamiltonian.jl +++ b/src/layers/hamiltonian.jl @@ -1,11 +1,11 @@ """ HamiltonianNN{FST}(model; autodiff=nothing) where {FST} -Constructs a Hamiltonian Neural Network [1]. This neural network is useful for learning -symmetries and conservation laws by supervision on the gradients of the trajectories. It -takes as input a concatenated vector of length `2n` containing the position (of size `n`) -and momentum (of size `n`) of the particles. It then returns the time derivatives for -position and momentum. +Constructs a Hamiltonian Neural Network [greydanus2019hamiltonian](@citep). This neural +network is useful for learning symmetries and conservation laws by supervision on the +gradients of the trajectories. It takes as input a concatenated vector of length `2n` +containing the position (of size `n`) and momentum (of size `n`) of the particles. It then +returns the time derivatives for position and momentum. ## Arguments @@ -35,11 +35,6 @@ position and momentum. This layer uses nested autodiff. Please refer to the manual entry on [Nested Autodiff](https://lux.csail.mit.edu/stable/manual/nested_autodiff) for more information and known limitations. - -## References - -[1] Greydanus, Samuel, Misko Dzamba, and Jason Yosinski. "Hamiltonian Neural Networks." -Advances in Neural Information Processing Systems 32 (2019): 15379-15389. """ @concrete struct HamiltonianNN{FST} <: AbstractExplicitContainerLayer{(:model,)} model diff --git a/src/vision/extensions.jl b/src/vision/extensions.jl index ae93950..62b541e 100644 --- a/src/vision/extensions.jl +++ b/src/vision/extensions.jl @@ -1,24 +1,18 @@ """ AlexNet(; kwargs...) -Create an AlexNet model [1] +Create an AlexNet model [krizhevsky2012imagenet](@citep). ## Keyword Arguments $(INITIALIZE_KWARGS) - -## References - -[1] Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton. "Imagenet classification with -deep convolutional neural networks." Advances in neural information processing systems 25 -(2012): 1097-1105. """ function AlexNet end """ ResNet(depth::Int; kwargs...) -Create a ResNet model [1]. +Create a ResNet model [he2016deep](@citep). ## Arguments @@ -27,18 +21,13 @@ Create a ResNet model [1]. ## Keyword Arguments $(INITIALIZE_KWARGS) - -## References - -[1] He, Kaiming, et al. "Deep residual learning for image recognition." Proceedings of the - IEEE conference on computer vision and pattern recognition. 2016. """ function ResNet end """ ResNeXt(depth::Int; kwargs...) -Create a ResNeXt model [1]. +Create a ResNeXt model [xie2017aggregated](@citep). ## Arguments @@ -47,37 +36,24 @@ Create a ResNeXt model [1]. ## Keyword Arguments $(INITIALIZE_KWARGS) - -## References - -[1] Saining Xie, Ross Girshick, Piotr Dollár, Zhuowen Tu, Kaiming He, Ross Gorshick, and - Piotr Dollár. "Aggregated residual transformations for deep neural networks." - Proceedings of the IEEE conference on computer vision and pattern recognition. 2016. """ function ResNeXt end """ GoogLeNet(; kwargs...) -Create a GoogLeNet model [1]. +Create a GoogLeNet model [szegedy2015going](@citep). ## Keyword Arguments $(INITIALIZE_KWARGS) - -## References - -[1] Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed, Dragomir Anguelov, - Dumitru Erhan, Vincent Vanhoucke, and Andrew Rabinovich. "Going deeper with - convolutions." Proceedings of the IEEE conference on computer vision and pattern - recognition. 2015. """ function GoogLeNet end """ DenseNet(depth::Int; kwargs...) -Create a DenseNet model [1]. +Create a DenseNet model [huang2017densely](@citep). ## Arguments @@ -86,19 +62,14 @@ Create a DenseNet model [1]. ## Keyword Arguments $(INITIALIZE_KWARGS) - -## References - -[1] Gao Huang, Zhuang Liu, Laurens van der Maaten, Kilian Q. Weinberger. "Densely connected - convolutional networks." Proceedings of the IEEE conference on computer vision and - pattern recognition. 2016. """ function DenseNet end """ MobileNet(name::Symbol; kwargs...) -Create a MobileNet model [1, 2, 3]. +Create a MobileNet model +[howard2017mobilenets, sandler2018mobilenetv2, howard2019searching](@citep). ## Arguments @@ -108,23 +79,13 @@ Create a MobileNet model [1, 2, 3]. ## Keyword Arguments $(INITIALIZE_KWARGS) - -## References - -[1] Howard, Andrew G., et al. "Mobilenets: Efficient convolutional neural networks for - mobile vision applications." arXiv preprint arXiv:1704.04861 (2017). -[2] Sandler, Mark, et al. "Mobilenetv2: Inverted residuals and linear bottlenecks." - Proceedings of the IEEE conference on computer vision and pattern recognition. 2018. -[3] Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias - Weyand, Marco Andreetto, Hartwig Adam. "Searching for MobileNetV3." arXiv preprint - arXiv:1905.02244. 2019. """ function MobileNet end """ ConvMixer(name::Symbol; kwargs...) -Create a ConvMixer model [1]. +Create a ConvMixer model [trockman2022patches](@citep). ## Arguments @@ -134,11 +95,6 @@ Create a ConvMixer model [1]. ## Keyword Arguments $(INITIALIZE_KWARGS) - -## References - -[1] Zhu, Zhuoyuan, et al. "ConvMixer: A Convolutional Neural Network with Faster - Depth-wise Convolutions for Computer Vision." arXiv preprint arXiv:1911.11907 (2019). """ function ConvMixer end diff --git a/src/vision/vgg.jl b/src/vision/vgg.jl index cd3380e..6b962bd 100644 --- a/src/vision/vgg.jl +++ b/src/vision/vgg.jl @@ -20,7 +20,7 @@ end """ VGG(imsize; config, inchannels, batchnorm = false, nclasses, fcsize, dropout) -Create a VGG model [1]. +Create a VGG model [simonyan2014very](@citep). ## Arguments @@ -31,11 +31,6 @@ Create a VGG model [1]. - `nclasses`: number of output classes - `fcsize`: intermediate fully connected layer size - `dropout`: dropout level between fully connected layers - -## References - -[1] Simonyan, Karen, and Andrew Zisserman. "Very deep convolutional networks for large-scale -image recognition." arXiv preprint arXiv:1409.1556 (2014). """ function VGG(imsize; config, inchannels, batchnorm=false, nclasses, fcsize, dropout) feature_extractor = vgg_convolutional_layers(config, batchnorm, inchannels) @@ -63,7 +58,7 @@ const VGG_CONFIG = Dict( """ VGG(depth::Int; batchnorm=false, kwargs...) -Create a VGG model [1] with ImageNet Configuration. +Create a VGG model [simonyan2014very](@citep) with ImageNet Configuration. ## Arguments @@ -73,11 +68,6 @@ Create a VGG model [1] with ImageNet Configuration. * `batchnorm = false`: set to `true` to use batch normalization after each convolution. $(INITIALIZE_KWARGS) - -## References - -[1] Simonyan, Karen, and Andrew Zisserman. "Very deep convolutional networks for large-scale - image recognition." arXiv preprint arXiv:1409.1556 (2014). """ function VGG(depth::Int; batchnorm::Bool=false, kwargs...) name = Symbol(:vgg, depth, ifelse(batchnorm, "_bn", ""))