Merge pull request #6 from plant-ai-biophysics-lab/dev

Release 0.2
Project-AgML · Nov 24, 2021 · 60b7e09 · 60b7e09
2 parents 8fff704 + ec5209c
commit 60b7e09
Show file tree

Hide file tree

Showing 53 changed files with 6,414 additions and 4,473 deletions.
diff --git a/.gitignore b/.gitignore
@@ -138,13 +138,16 @@ __MACOSX
 # Extra directories/files
 /**data*/
 
-agml/_helios/Helios/
-agml/_helios/output/
-agml/_helios/xmloutput_for_helios/
-
 **/**test*.*
 !tests/*/*
 agml/_helios
-agml/_internal
 agml/helios_config.sh
 agml/train
+
+# Controls for the `_internal` directory.
+agml/_internal/*
+!agml/_internal/preprocess*.py
+!agml/_internal/s3internal.py
+!agml/_internal/__init__.py
+
+*.pptx
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,2 +1,4 @@
 include README.md
-include agml/_assets/*.json
+include agml/_assets/*.json
+include agml/_assets/shape_info.pickle
+prune agml/_internal
diff --git a/README.md b/README.md
@@ -1,8 +1,16 @@
-# AgML
+<p align="center">
+<img src="/figures/agml-logo.png" alt="agml framework" width="400" height="400">
+</p>
 
+## Overview
 AgML is a comprehensive library for agricultural machine learning. Currently, AgML provides
-access to a wealth of public agricultural datasets for common agricultural deep learning tasks.  
+access to a wealth of public agricultural datasets for common agricultural deep learning tasks. In the future, AgML will provide ag-specific ML functionality related to data, training, and evaluation. Here's a conceptual diagram of the overall framework. 
 
+<p align="center">
+<img src="/figures/agml-framework.png" alt="agml framework" width="350" height="291">
+</p>
+
+AgML supports both the [TensorFlow](https://www.tensorflow.org/) and [PyTorch](https://pytorch.org/) machine learning frameworks.
 
 ## Installation
 
@@ -14,27 +22,23 @@ pip install agml
 
 ## Getting Started
 
-AgML aims to provide seamless access to resources for users of all levels. The core of AgML's public data pipeline is 
+### Using Public Agricultural Data
+
+AgML aims to provide easy access to a range of existing public agricultural datasets The core of AgML's public data pipeline is 
 [`AgMLDataLoader`](/agml/data/loader.py). Simply running the following line of code:
 
 ```python
 loader = AgMLDataLoader('<dataset_name_here>')
 ```
 
-will download the dataset locally from which point it will be automatically loaded from the disk on future runs. For high-level
-users who just want the dataset information, accessing the raw metadata is as easy as
-
-```python
-dataset = loader.export_contents()
-```
-
-On the other hand, users who want to integrate the loader into their existing pipelines can use a number
-of methods can use a number of methods to process and export their data, including applying transforms, batching
-and splitting the data, and even exporting to PyTorch DataLoaders or TensorFlow Dataset pipelines.
+will download the dataset locally from which point it will be automatically loaded from the disk on future runs. 
+From this point, the data within the loader can be split into train/val/test sets, batched, have augmentations and transforms
+applied, and be converted into a training-ready dataset (including batching, tensor conversion, and image formatting).
 
-For more detailed information about the API, see [insert documentation link here]().
+To see the various ways in which you can use AgML datasets in your training pipelines, check out 
+the [example notebook](/examples/AgML-Data.ipynb).
 
-### Annotation Formats
+## Annotation Formats
 
 A core aim of AgML is to provide datasets in a standardized format, enabling the synthesizing of multiple datasets
 into a single training pipeline. To this end, we provide annotations in the following formats:
@@ -43,27 +47,7 @@ into a single training pipeline. To this end, we provide annotations in the foll
 - **Object Detection**: [COCO JSON](https://cocodataset.org/#format-data)
 - **Semantic Segmentation**: Dense Pixel-Wise
 
-## Optional
-
-We aim to provide additional datasets for different deep learning tasks in the future.
-
-## Vision
-
-AgML aims to be an end-to-end resource encompassing all facets of agricultural machine learning.
-
-```text
-Include a nicely-formatted graphic of the slide that Mason
-showed in the first lab meeting showing the vision for AgML?
-```
-
-
-<!-- 
-
-INTERNAL NOTE:
-
-As new releases of AgML are published, this README is going to change significantly.
-E.g., right now the 'installation' section just discusses `pip install agml`, but that
-will evolve to discussing CUDA/dev versions or other features as we continue to add
-features to the library. So, this is just the first template as we introduce the first releases.
+## Contributions
 
--->
+We welcome contributions! If you would like to contribue a new feature, fix an issue that you've noticed, or even just mention
+a bug or feature that you would like to see implemented, please don't hesitate to use the *Issues* tab to bring it to our attention.
diff --git a/agml/__init__.py b/agml/__init__.py
@@ -12,5 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+__version__ = '0.2'
+__all__ = ['data', 'backend', 'viz']
+
+# If AgML is being imported for the first time, then we need to setup
+# the module, namely prepping the config file.
+def _setup():
+    import os as _os
+    import json as _json
+    if not _os.path.exists(_os.path.expanduser('~/.agml')):
+        _os.makedirs(_os.path.expanduser('~/.agml'))
+        with open(_os.path.join(
+                _os.path.expanduser('~/.agml/config.json')), 'w') as f:
+            _json.dump({'dataset_path': _os.path.expanduser('~/.agml/datasets')}, f)
+_setup(); del _setup # noqa
+
+# There are no top-level imported functions or classes, only the modules.
 from . import data, backend, viz
 
+
+
diff --git a/agml/_assets/public_datasources.json b/agml/_assets/public_datasources.json
@@ -16,7 +16,7 @@
         "platform": "handheld",
         "input_data_format": "jpg",
         "annotation_format": "directory_names",
-        "n_images": "1296",
+        "n_images": "1295",
         "docs_url": "https://github.com/AI-Lab-Makerere/ibean/"
     },
     "carrot_weeds_germany": {
@@ -107,7 +107,7 @@
         "platform": "ground_fixed",
         "input_data_format": "bmp",
         "annotation_format": "directory_names",
-        "n_images": "154",
+        "n_images": "153",
         "docs_url": "https://github.com/The77Lab/SugarcaneBilletsDataset"
     },
     "crop_weeds_greece": {
@@ -323,7 +323,7 @@
             "continent": "asia",
             "country": "china"
         },
-        "sensor_modality": "RGB",
+        "sensor_modality": "rgb",
         "real_synthetic": "real",
         "platform": "handheld",
         "input_data_format": "jpg",
@@ -379,12 +379,58 @@
             "continent": "north_america",
             "country": "usa"
         },
-        "sensor_modality": "RGB",
+        "sensor_modality": "rgb",
         "real_synthetic": "real",
         "platform": "ground",
         "input_data_format": "jpg",
         "annotation_format": "directory_names",
         "n_images": "55448",
         "docs_url": "https://github.com/spMohanty/PlantVillage-Dataset"
+    },
+    "plant_doc_classification": {
+        "crop_types": {
+            "1": "Apple_Scab_Leaf",
+            "2": "Apple_leaf",
+            "3": "Apple_rust_leaf",
+            "4": "Bell_pepper_leaf_spot",
+            "5": "Bell_pepper_leaf",
+            "6": "Blueberry_leaf",
+            "7": "Cherry_leaf",
+            "8": "Corn_Gray_leaf_spot",
+            "9": "Corn_leaf_blight",
+            "10": "Corn_rust_leaf",
+            "11": "Peach_leaf",
+            "12": "Potato_leaf_early_blight",
+            "13": "Potato_leaf_late_blight",
+            "14": "Raspberry_leaf",
+            "15": "Soybean_leaf",
+            "16": "Squash_Powdery_mildew_leaf",
+            "17": "Strawberry_leaf",
+            "18": "Tomato_Early_blight_leaf",
+            "19": "Tomato_Septoria_leaf_spot",
+            "20": "Tomato_leaf_bacterial_spot",
+            "21": "Tomato_leaf_late_blight",
+            "22": "Tomato_leaf_mosaic_virus",
+            "23": "Tomato_leaf_yellow_virus",
+            "24": "Tomato_leaf",
+            "25": "Tomato_mold_leaf",
+            "26": "Tomato_two_spotted_spider_mites_leaf",
+            "27": "grape_leaf_black_rot",
+            "28": "grape_leaf"
+        },
+        "ml_task": "image_classification",
+        "ag_task": "disease_classification",
+        "location": {
+            "continent": "worldwide",
+            "country": "worldwide"
+        },
+        "sensor_modality": "rgb",
+        "real_synthetic": "real",
+        "platform": "ground",
+        "input_data_format": "jpg",
+        "annotation_format": "directory_names",
+        "n_images": "2598",
+        "docs_url": "https://github.com/pratikkayal/PlantDoc-Dataset"
     }
-}
+}
+
diff --git a/agml/_assets/shape_info.pickle b/agml/_assets/shape_info.pickle
diff --git a/agml/_assets/source_citations.json b/agml/_assets/source_citations.json
@@ -12,7 +12,7 @@
         "citation": "@article{Giselsson2017,\n  author = {Giselsson, Thomas Mosgaard and Dyrmann, Mads and J{\\o}rgensen, Rasmus Nyholm and Jensen, Peter Kryger and Midtiby, Henrik Skov},\n  journal = {arXiv preprint},\n  keywords = {benchmark,database,plant seedlings,segmentation,site-specific weed control},\n  title = {{A Public Image Database for Benchmark of Plant Seedling Classification Algorithms}},\n  year = {2017}\n}"
     },
     "soybean_weed_uav_brazil": {
-        "license": "CC BY NC 3.0",
+        "license": "CC BY-NC 3.0",
         "citation": "dos Santos Ferreira, Alessandro; Pistori, Hemerson; Matte Freitas, Daniel; Gon\u00e7alves da Silva, Gercina (2017), \u201cData for: Weed Detection in Soybean Crops Using ConvNets\u201d, Mendeley Data, V2, doi: 10.17632/3fmjm7ncc6.2"
     },
     "sugarcane_damage_usa": {
@@ -28,17 +28,13 @@
         "citation": "@ARTICLE{8115245,\n  author={I. Sa and Z. Chen and M. Popovi\u0107 and R. Khanna and F. Liebisch and J. Nieto and R. Siegwart},\n  journal={IEEE Robotics and Automation Letters},\n  title={weedNet: Dense Semantic Weed Classification Using Multispectral Images and MAV for Smart Farming},\n  year={2018},\n  volume={3},\n  number={1},\n  pages={588-595},\n  keywords={agriculture;agrochemicals;autonomous aerial vehicles;control engineering computing;convolution;crops;feature extraction;image classification;learning (artificial intelligence);neural nets;vegetation;MAV;SegNet;convolutional neural network;crop health;crop management;curve classification metrics;dense semantic classes;dense semantic weed classification;encoder-decoder;input image channels;multispectral images;selective weed treatment;vegetation index;weed detection;Agriculture;Cameras;Image segmentation;Robots;Semantics;Training;Vegetation mapping;Aerial systems;agricultural automation;applications;robotics in agriculture and forestry},\n  doi={10.1109/LRA.2017.2774979},\n  ISSN={},\n  month={Jan}\n}"
     },
     "rangeland_weeds_australia": {
-        "license": "CC BY 4.0",
+        "license": "CC BY-SA 4.0",
         "citation": "@Article{Olsen2019,\n  author={Olsen, Alex and Konovalov, Dmitry A. and Philippa, Bronson and Ridd, Peter and Wood, Jake C. and Johns, Jamie and Banks, Wesley and Girgenti, Benjamin and Kenny, Owen and Whinney, James and Calvert, Brendan and Azghadi, Mostafa Rahimi and White, Ronald D.},\n  title={DeepWeeds: A Multiclass Weed Species Image Dataset for Deep Learning},\n  journal={Scientific Reports},\n  year={2019},\n  month={Feb},\n  day={14},\n  volume={9},\n  number={1},\n  pages={2058},\n  abstract={Robotic weed control has seen increased research of late with its potential for boosting productivity in agriculture. Majority of works focus on developing robotics for croplands, ignoring the weed management problems facing rangeland stock farmers. Perhaps the greatest obstacle to widespread uptake of robotic weed control is the robust classification of weed species in their natural environment. The unparalleled successes of deep learning make it an ideal candidate for recognising various weed species in the complex rangeland environment. This work contributes the first large, public, multiclass image dataset of weed species from the Australian rangelands; allowing for the development of robust classification methods to make robotic weed control viable. The DeepWeeds dataset consists of 17,509 labelled images of eight nationally significant weed species native to eight locations across northern Australia. This paper presents a baseline for classification performance on the dataset using the benchmark deep learning models, Inception-v3 and ResNet-50. These models achieved an average classification accuracy of 95.1{\\%} and 95.7{\\%}, respectively. We also demonstrate real time performance of the ResNet-50 architecture, with an average inference time of 53.4 ms per image. These strong results bode well for future field implementation of robotic weed control methods in the Australian rangelands.},\n  issn={2045-2322},\n  doi={10.1038/s41598-018-38343-3},\n  url={https://doi.org/10.1038/s41598-018-38343-3}\n}"
     },
     "fruit_detection_worldwide": {
         "license": "",
         "citation": "@Article{s16081222,\n  AUTHOR = {Sa, Inkyu and Ge, Zongyuan and Dayoub, Feras and Upcroft, Ben and Perez, Tristan and McCool, Chris},\n  TITLE = {DeepFruits: A Fruit Detection System Using Deep Neural Networks},\n  JOURNAL = {Sensors},\n  VOLUME = {16},\n  YEAR = {2016},\n  NUMBER = {8},\n  ARTICLE-NUMBER = {1222},\n  URL = {https://www.mdpi.com/1424-8220/16/8/1222},\n  ISSN = {1424-8220},\n  ABSTRACT = {This paper presents a novel approach to fruit detection using deep convolutional neural networks. The aim is to build an accurate, fast and reliable fruit detection system, which is a vital element of an autonomous agricultural robotic platform; it is a key element for fruit yield estimation and automated harvesting. Recent work in deep neural networks has led to the development of a state-of-the-art object detector termed Faster Region-based CNN (Faster R-CNN). We adapt this model, through transfer learning, for the task of fruit detection using imagery obtained from two modalities: colour (RGB) and Near-Infrared (NIR). Early and late fusion methods are explored for combining the multi-modal (RGB and NIR) information. This leads to a novel multi-modal Faster R-CNN model, which achieves state-of-the-art results compared to prior work with the F1 score, which takes into account both precision and recall performances improving from     0 . 807     to     0 . 838     for the detection of sweet pepper. In addition to improved accuracy, this approach is also much quicker to deploy for new fruits, as it requires bounding box annotation rather than pixel-level annotation (annotating bounding boxes is approximately an order of magnitude quicker to perform). The model is retrained to perform the detection of seven fruits, with the entire process taking four hours to annotate and train the new model per fruit.},\n  DOI = {10.3390/s16081222}\n}"
     },
-    "plant_weeds_denmark": {
-        "license": "",
-        "citation": ""
-    },
     "leaf_counting_denmark": {
         "license": "CC BY-SA 4.0",
         "citation": "@Article{s18051580,\n  author = {Teimouri, Nima and Dyrmann, Mads and Nielsen, Per  Rydahl and Mathiassen, Solvejg  Kopp and Somerville, Gayle  J. and J\u00f8rgensen, Rasmus  Nyholm},\n  title = {Weed Growth Stage Estimator Using Deep Convolutional Neural Networks},\n  journal = {Sensors},\n  volume = {18},\n  year = {2018},\n  number = {5},\n  url = {http://www.mdpi.com/1424-8220/18/5/1580},\n  issn = {1424-8220}\n}"
@@ -51,18 +47,6 @@
         "license": "",
         "citation": "@article{2019,\n  doi = {10.1186/s13007-019-0528-3},\n  url = {https://doi.org/10.1186/s13007-019-0528-3},\n  year = {2019},\n  month = nov,\n  publisher = {Springer Science and Business Media {LLC}},\n  volume = {15},\n  number = {1},\n  author = {Yu Jiang and Changying Li and Andrew H. Paterson and Jon S. Robertson},\n  title = {{DeepSeedling}: deep convolutional network and Kalman filter for plant seedling detection and counting in the field}\n}"
     },
-    "apple_detection_spain": {
-        "license": "",
-        "citation": ""
-    },
-    "fruit_detection_australia": {
-        "license": "",
-        "citation": "@article{bargoti2016deep,\n  title={Deep Fruit Detection in Orchards},\n  author={Bargoti, Suchet and Underwood, James},\n  journal={arXiv preprint arXiv:1610.03677},\n  year={2016}\n}"
-    },
-    "datepalm_classification_saudi": {
-        "license": "",
-        "citation": ""
-    },
     "mango_detection_australia": {
         "license": "",
         "citation": "@Misc{Koirala2019,\n  author={Koirala, Anand and Walsh, Kerry and Wang, Z. and McCarthy, C.},\n  title={MangoYOLO data set},\n  year={2019},\n  month={2021},\n  day={10-19},\n  publisher={Central Queensland University},\n  keywords={Mango images; Fruit detection; Yield estimation; Mango; Agricultural Land Management; Horticultural Crop Growth and Development},\n  abstract={Datasets and directories are structured similar to the PASCAL VOC dataset, avoiding the need to change scripts already available, with the detection frameworks ready to parse PASCAL VOC annotations into their format. The sub-directory JPEGImages consist of 1730 images (612x512 pixels) used for train, test and validation. Each image has at least one annotated fruit. The sub-directory Annotations consists of all the annotation files (record of bounding box coordinates for each image) in xml format and have the same name as the image name. The sub-directory Main consists of the text file that contains image names (without extension) used for train, test and validation. Training set (train.txt) lists 1300 train images  Validation set (val.txt) lists 130 validation images Test set (test.txt) lists 300 test images Each image has an XML annotation file (filename = image name) and each image set (training validation and test set) has associated text files (train.txt, val.txt and test.txt) containing the list of image names to be used for training and testing.  The XML annotation file contains the image attributes (name, width, height), the object attributes (class name, object bounding box co-ordinates (xmin, ymin, xmax, ymax)). (xmin, ymin) and (xmax, ymax) are the pixel co-ordinates of the bounding box's top-left corner and bottom-right corner respectively.},\n  note={CC-BY-4.0},\n  url={https://figshare.com/articles/dataset/MangoYOLO_data_set/13450661, https://researchdata.edu.au/mangoyolo-set},\n  language={English}\n}"
@@ -82,5 +66,9 @@
     "plant_village_classification": {
         "license": "",
         "citation": "@article{DBLP:journals/corr/HughesS15,\n  author    = {David P. Hughes and\n               Marcel Salath{'{e} } },\n  title     = {An open access repository of images on plant health to enable the\n               development of mobile disease diagnostics through machine\n               learning and crowdsourcing},\n  journal   = {CoRR},\n  volume    = {abs/1511.08060},\n  year      = {2015},\n  url       = {http://arxiv.org/abs/1511.08060},\n  archivePrefix = {arXiv},\n  eprint    = {1511.08060},\n  timestamp = {Mon, 13 Aug 2018 16:48:21 +0200},\n  biburl    = {https://dblp.org/rec/bib/journals/corr/HughesS15},\n  bibsource = {dblp computer science bibliography, https://dblp.org}\n}"
+    },
+    "plant_doc_classification": {
+        "license": "CC BY-SA 4.0",
+        "citation": "@inproceedings{10.1145/3371158.3371196,\n  author = {Singh, Davinder and Jain, Naman and Jain, Pranjali and Kayal, Pratik and Kumawat, Sudhakar and Batra, Nipun},\n  title = {PlantDoc: A Dataset for Visual Plant Disease Detection},\n  year = {2020},\n  isbn = {9781450377386},\n  publisher = {Association for Computing Machinery},\n  address = {New York, NY, USA},\n  url = {https://doi.org/10.1145/3371158.3371196},\n  doi = {10.1145/3371158.3371196},\n  booktitle = {Proceedings of the 7th ACM IKDD CoDS and 25th COMAD},\n  pages = {249–253},\n  numpages = {5},\n  keywords = {Deep Learning, Object Detection, Image Classification},\n  location = {Hyderabad, India},\n  series = {CoDS COMAD 2020}\n  }"
     }
 }
diff --git a/agml/_internal/__init__.py b/agml/_internal/__init__.py