diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5509140 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.DS_Store diff --git a/Classification/Audio/MusicGenre/music_classification_auto.nml b/Classification/Audio/MusicGenre/music_classification_auto.nml deleted file mode 100644 index ac8e313..0000000 --- a/Classification/Audio/MusicGenre/music_classification_auto.nml +++ /dev/null @@ -1,30 +0,0 @@ -oracle("mode")="classification" - -source: - bind = "/DM-Dash/NeoPulse_Examples/Classification/Audio/training_data.csv" ; - input: - x ~ from "Audio File" - -> audio: [maxlen = 1366, nbands = 96] - -> AudioDataGenerator: []; - output: - y ~ from "Genre" - -> flat: [10] - -> FlatDataGenerator: [] ; - params: - batch_size = 5, - validation_split = 0.2 ; - -architecture: - input: x ~ audio: [maxlen = 1366, nbands = 96]; - output: y ~ flat: [10] ; - - x -> auto -> y ; - -train: - compile: - optimizer = auto, - loss = auto, - metrics = ['accuracy'] ; - run: - epochs = 4 ; - dashboard: ; diff --git a/Classification/Image/CIFAR10/cifar10_call_auto.nml b/Classification/Image/CIFAR10/cifar10_call_auto.nml deleted file mode 100644 index 4ec42ae..0000000 --- a/Classification/Image/CIFAR10/cifar10_call_auto.nml +++ /dev/null @@ -1,47 +0,0 @@ -oracle("mode") = "classification" - -source: - bind = "/DM-Dash/NeoPulse_Examples/Classification/Image/CIFAR10/training_data.csv" ; - input: - x ~ from "Image" - -> image: [shape=[32, 32], channels=3] - -> ImageDataGenerator: [rescale= 0.003921568627451]; - output: - y ~ from "Class"-> flat: [10] - -> FlatDataGenerator: [] ; - params: - number_validation = 10000, - batch_size = 32 ; - -architecture: - input: x ~ image: [shape=[32, 32], channels=3] ; - output: y ~ flat: [10] ; - - x -> Conv2D: [32, [3,3]] - -> Activation: ['relu'] - -> Conv2D: [32, [3,3]] - -> Activation: ['relu'] - -> MaxPooling2D: [pool_size=3] - -> Dropout: auto - -> Conv2D: [64, [3,3]] - -> Activation: ['relu'] - -> Conv2D: [64, [3,3]] - -> Activation: ['relu'] - -> MaxPooling2D: [pool_size=3] - -> Dropout: auto - -> Flatten: [] - -> Dense: [512] - -> Activation: ['relu'] - -> Dropout: auto - -> Dense: [10] - -> Activation: ['softmax'] - -> y ; - -train: - compile: - optimizer = 'rmsprop', - loss = 'categorical_crossentropy', - metrics = ['accuracy'] ; - run: - epochs = 4 ; - dashboard: ; diff --git a/Classification/Image/CIFAR10/cifar10_choice_auto.nml b/Classification/Image/CIFAR10/cifar10_choice_auto.nml deleted file mode 100644 index 60cd1bc..0000000 --- a/Classification/Image/CIFAR10/cifar10_choice_auto.nml +++ /dev/null @@ -1,47 +0,0 @@ -oracle("mode") = "classification" - -source: - bind = "/DM-Dash/NeoPulse_Examples/Classification/Image/CIFAR10/training_data.csv" ; - input: - x ~ from "Image" - -> image: [shape=[32, 32], channels=3] - -> ImageDataGenerator: [rescale= 0.003921568627451]; - output: - y ~ from "Class"-> flat: [10] - -> FlatDataGenerator: [] ; - params: - number_validation = 10000, - batch_size = 32 ; - -architecture: - input: x ~ image: [shape=[32, 32], channels=3] ; - output: y ~ flat: [10] ; - - x -> Conv2D: [32,[3,3]] - -> Activation: ['relu'] - -> Conv2D: [32,[3,3]] - -> Activation: ['relu'] - -> MaxPooling2D: [pool_size=3] - -> Dropout: [auto(0.25 ? 0.75 | name = "Drop1")] - -> Conv2D: [64,[3,3]] - -> Activation: ['relu'] - -> Conv2D: [64,[3,3]] - -> Activation: ['relu'] - -> MaxPooling2D: [pool_size=3] - -> Dropout: [auto(0.25 ? 0.75 | name = "Drop2")] - -> Flatten: [] - -> Dense: [512] - -> Activation: ['relu'] - -> Dropout: [auto(0.25 ? 0.75 | name = "Drop3")] - -> Dense: [10] - -> Activation: ['softmax'] - -> y ; - -train: - compile: - optimizer = 'rmsprop', - loss = 'categorical_crossentropy', - metrics = ['accuracy'] ; - run: - epochs = 4 ; - dashboard: ; diff --git a/Classification/Image/CIFAR10/cifar10_dist_auto.nml b/Classification/Image/CIFAR10/cifar10_dist_auto.nml deleted file mode 100644 index 5102e71..0000000 --- a/Classification/Image/CIFAR10/cifar10_dist_auto.nml +++ /dev/null @@ -1,47 +0,0 @@ -oracle("mode") = "classification" - -source: - bind = "/DM-Dash/NeoPulse_Examples/Classification/Image/CIFAR10/training_data.csv" ; - input: - x ~ from "Image" - -> image: [shape=[32, 32], channels=3] - -> ImageDataGenerator: [rescale= 0.003921568627451]; - output: - y ~ from "Class"-> flat: [10] - -> FlatDataGenerator: [] ; - params: - number_validation = 10000, - batch_size = 32 ; - -architecture: - input: x ~ image: [shape=[32, 32], channels=3] ; - output: y ~ flat: [10] ; - - x -> Convolution2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_1"), kernel_size=[3,3]] - -> Activation: ['relu'] - -> Conv2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_2"), kernel_size=[3,3]] - -> Activation: ['relu'] - -> MaxPooling2D: [pool_size=3] - -> Dropout: [0.25] - -> Conv2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_3"), kernel_size=[3,3]] - -> Activation: ['relu'] - -> Conv2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_4"), kernel_size=[3,3]] - -> Activation: ['relu'] - -> MaxPooling2D: [pool_size=3] - -> Dropout: [0.25] - -> Flatten: [] - -> Dense: [512] - -> Activation: ['relu'] - -> Dropout: [0.5] - -> Dense: [10] - -> Activation: ['softmax'] - -> y ; - -train: - compile: - optimizer = 'rmsprop', - loss = 'categorical_crossentropy', - metrics = ['accuracy'] ; - run: - epochs = 4 ; - dashboard: ; diff --git a/Classification/Image/CIFAR10/cifar10_full_auto.nml b/Classification/Image/CIFAR10/cifar10_full_auto.nml deleted file mode 100644 index 3dfbfd2..0000000 --- a/Classification/Image/CIFAR10/cifar10_full_auto.nml +++ /dev/null @@ -1,29 +0,0 @@ -oracle("mode") = "classification" - -source: - bind = "/DM-Dash/NeoPulse_Examples/Classification/Image/CIFAR10/training_data.csv" ; - input: - x ~ from "Image" - -> image: [shape=[32, 32], channels=3] - -> ImageDataGenerator: [rescale= 0.003921568627451]; - output: - y ~ from "Class"-> flat: [10] - -> FlatDataGenerator: [] ; - params: - number_validation = 10000, - batch_size = 32 ; - -architecture: - input: x ~ image: [shape=[32, 32], channels=3] ; - output: y ~ flat: [10] ; - - x -> auto -> y ; - -train: - compile: - optimizer = auto, - loss = auto, - metrics = ['accuracy'] ; - run: - epochs = 4 ; - dashboard: ; diff --git a/Classification/Image/CIFAR100/cifar100_call_auto.nml b/Classification/Image/CIFAR100/cifar100_call_auto.nml deleted file mode 100644 index 4e5d234..0000000 --- a/Classification/Image/CIFAR100/cifar100_call_auto.nml +++ /dev/null @@ -1,48 +0,0 @@ -oracle("mode") = "classification" - -source: - bind = "/DM-Dash/NeoPulse_Examples/Classification/Image/CIFAR100/training_data.csv" ; - input: - x ~ from "Image" - -> image: [shape=[32, 32], channels=3] - -> ImageDataGenerator: [rescale= 0.003921568627451]; - output: - y ~ from "Class" - -> flat: [100] - -> FlatDataGenerator: [] ; - params: - number_validation = 10000, - batch_size = 32 ; - -architecture: - input: x ~ image: [shape=[32, 32], channels=3] ; - output: y ~ flat: [100] ; - - x -> Conv2D: [32, [3,3]] - -> Activation: ['relu'] - -> Conv2D: [32, [3,3]] - -> Activation: ['relu'] - -> MaxPooling2D: [pool_size=3] - -> Dropout: auto - -> Conv2D: [64, [3,3]] - -> Activation: ['relu'] - -> Conv2D: [64, [3,3]] - -> Activation: ['relu'] - -> MaxPooling2D: [pool_size=3] - -> Dropout: auto - -> Flatten: [] - -> Dense: [512] - -> Activation: ['relu'] - -> Dropout: auto - -> Dense: [100] - -> Activation: ['softmax'] - -> y ; - -train: - compile: - optimizer = 'rmsprop', - loss = 'categorical_crossentropy', - metrics = ['accuracy'] ; - run: - epochs = 8 ; - dashboard: ; diff --git a/Classification/Image/CIFAR100/cifar100_choice_auto.nml b/Classification/Image/CIFAR100/cifar100_choice_auto.nml deleted file mode 100644 index 80cb05d..0000000 --- a/Classification/Image/CIFAR100/cifar100_choice_auto.nml +++ /dev/null @@ -1,48 +0,0 @@ -oracle("mode") = "classification" - -source: - bind = "/DM-Dash/NeoPulse_Examples/Classification/Image/CIFAR100/training_data.csv" ; - input: - x ~ from "Image" - -> image: [shape=[32, 32], channels=3] - -> ImageDataGenerator: [rescale= 0.003921568627451]; - output: - y ~ from "Class" - -> flat: [100] - -> FlatDataGenerator: [] ; - params: - number_validation = 10000, - batch_size = 32 ; - -architecture: - input: x ~ image: [shape=[32, 32], channels=3] ; - output: y ~ flat: [100] ; - - x -> Conv2D: [32,[3,3]] - -> Activation: ['relu'] - -> Conv2D: [32,[3,3]] - -> Activation: ['relu'] - -> MaxPooling2D: [pool_size=3] - -> Dropout: [auto(0.25 ? 0.75 | name = "Drop1")] - -> Conv2D: [64,[3,3]] - -> Activation: ['relu'] - -> Conv2D: [64,[3,3]] - -> Activation: ['relu'] - -> MaxPooling2D: [pool_size=3] - -> Dropout: [auto(0.25 ? 0.75 | name = "Drop2")] - -> Flatten: [] - -> Dense: [512] - -> Activation: ['relu'] - -> Dropout: [auto(0.25 ? 0.75 | name = "Drop3")] - -> Dense: [100] - -> Activation: ['softmax'] - -> y ; - -train: - compile: - optimizer = 'rmsprop', - loss = 'categorical_crossentropy', - metrics = ['accuracy'] ; - run: - epochs = 4 ; - dashboard: ; diff --git a/Classification/Image/CIFAR100/cifar100_dist_auto.nml b/Classification/Image/CIFAR100/cifar100_dist_auto.nml deleted file mode 100644 index c2e0ca4..0000000 --- a/Classification/Image/CIFAR100/cifar100_dist_auto.nml +++ /dev/null @@ -1,48 +0,0 @@ -oracle("mode") = "classification" - -source: - bind = "/DM-Dash/NeoPulse_Examples/Classification/Image/CIFAR100/training_data.csv" ; - input: - x ~ from "Image" - -> image: [shape=[32, 32], channels=3] - -> ImageDataGenerator: [rescale= 0.003921568627451]; - output: - y ~ from "Class" - -> flat: [100] - -> FlatDataGenerator: [] ; - params: - number_validation = 10000, - batch_size = 32 ; - -architecture: - input: x ~ image: [shape=[32, 32], channels=3] ; - output: y ~ flat: [100] ; - - x -> Convolution2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_1"), kernel_size=[3,3]] - -> Activation: ['relu'] - -> Conv2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_2"), kernel_size=[3,3]] - -> Activation: ['relu'] - -> MaxPooling2D: [pool_size=3] - -> Dropout: [0.25] - -> Conv2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_3"), kernel_size=[3,3]] - -> Activation: ['relu'] - -> Conv2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_4"), kernel_size=[3,3]] - -> Activation: ['relu'] - -> MaxPooling2D: [pool_size=3] - -> Dropout: [0.25] - -> Flatten: [] - -> Dense: [512] - -> Activation: ['relu'] - -> Dropout: [0.5] - -> Dense: [100] - -> Activation: ['softmax'] - -> y ; - -train: - compile: - optimizer = 'rmsprop', - loss = 'categorical_crossentropy', - metrics = ['accuracy'] ; - run: - epochs = 4 ; - dashboard: ; diff --git a/Classification/Image/CIFAR100/cifar100_full_auto.nml b/Classification/Image/CIFAR100/cifar100_full_auto.nml deleted file mode 100644 index 2052092..0000000 --- a/Classification/Image/CIFAR100/cifar100_full_auto.nml +++ /dev/null @@ -1,32 +0,0 @@ -oracle("mode") = "classification" - -source: - bind = "/DM-Dash/NeoPulse_Examples/Classification/Image/CIFAR100/training_data.csv" ; - input: - x ~ from "Image" - -> image: [shape=[32, 32], channels=3] - -> ImageDataGenerator: [rescale= 0.003921568627451]; - output: - y ~ from "Class" - -> flat: [100] - -> FlatDataGenerator: [] ; - params: - number_validation = 10000, - batch_size = 32 ; - - -architecture: - input: x ~ image: [shape=[32, 32], channels= 3] ; - output: y ~ flat: [100] ; - - x -> auto -> y ; - - -train: - compile: - optimizer = auto, - loss = auto, - metrics = ['accuracy'] ; - run: - epochs = 4 ; - dashboard: ; diff --git a/Classification/Image/MNIST/mnist_call_auto.nml b/Classification/Image/MNIST/mnist_call_auto.nml deleted file mode 100644 index fe12a3c..0000000 --- a/Classification/Image/MNIST/mnist_call_auto.nml +++ /dev/null @@ -1,47 +0,0 @@ -oracle("mode") = "classification" - -source: - bind = "/DM-Dash/NeoPulse_Examples/Classification/Image/MNIST/training_data.csv" ; - input: - x ~ from "Image" - -> image: [shape=[28, 28], channels=1] - -> ImageDataGenerator: [rescale= 0.003921568627451]; - output: - y ~ from "Label"-> flat: [10] - -> FlatDataGenerator: [] ; - params: - number_validation = 10000, - batch_size = 32 ; - -architecture: - input: x ~ image: [shape=[28, 28], channels=1] ; - output: y ~ flat: [10] ; - - x -> Conv2D: [32, [3,3]] - -> Activation: ['relu'] - -> Conv2D: [32, [3,3]] - -> Activation: ['relu'] - -> MaxPooling2D: [pool_size=2] - -> Dropout: auto - -> Conv2D: [64, [3,3]] - -> Activation: ['relu'] - -> Conv2D: [64, [3,3]] - -> Activation: ['relu'] - -> MaxPooling2D: [pool_size=2] - -> Dropout: auto - -> Flatten: [] - -> Dense: [512] - -> Activation: ['relu'] - -> Dropout: auto - -> Dense: [10] - -> Activation: ['softmax'] - -> y ; - -train: - compile: - optimizer = 'rmsprop', - loss = 'categorical_crossentropy', - metrics = ['accuracy'] ; - run: - epochs = 4 ; - dashboard: ; diff --git a/Classification/Image/MNIST/mnist_choice_auto.nml b/Classification/Image/MNIST/mnist_choice_auto.nml deleted file mode 100644 index d046f28..0000000 --- a/Classification/Image/MNIST/mnist_choice_auto.nml +++ /dev/null @@ -1,47 +0,0 @@ -oracle("mode") = "classification" - -source: - bind = "/DM-Dash/NeoPulse_Examples/Classification/Image/MNIST/training_data.csv" ; - input: - x ~ from "Image" - -> image: [shape=[28, 28], channels=1] - -> ImageDataGenerator: [rescale= 0.003921568627451]; - output: - y ~ from "Label"-> flat: [10] - -> FlatDataGenerator: [] ; - params: - number_validation = 10000, - batch_size = 32 ; - -architecture: - input: x ~ image: [shape=[28, 28], channels=1] ; - output: y ~ flat: [10] ; - - x -> Conv2D: [32,[3,3]] - -> Activation: ['relu'] - -> Conv2D: [32,[3,3]] - -> Activation: ['relu'] - -> MaxPooling2D: [pool_size=2] - -> Dropout: [auto(0.25 ? 0.75 | name = "Drop")] - -> Conv2D: [64,[3,3]] - -> Activation: ['relu'] - -> Conv2D: [64,[3,3]] - -> Activation: ['relu'] - -> MaxPooling2D: [pool_size=2] - -> Dropout: [auto(0.25 ? 0.75 | name = "Drop")] - -> Flatten: [] - -> Dense: [512] - -> Activation: ['relu'] - -> Dropout: [auto(0.25 ? 0.75 | name = "Drop")] - -> Dense: [10] - -> Activation: ['softmax'] - -> y ; - -train: - compile: - optimizer = 'rmsprop', - loss = 'categorical_crossentropy', - metrics = ['accuracy'] ; - run: - epochs = 4 ; - dashboard: ; diff --git a/Classification/Image/MNIST/mnist_dist_auto.nml b/Classification/Image/MNIST/mnist_dist_auto.nml deleted file mode 100644 index 96d11c1..0000000 --- a/Classification/Image/MNIST/mnist_dist_auto.nml +++ /dev/null @@ -1,47 +0,0 @@ -oracle("mode") = "classification" - -source: - bind = "/DM-Dash/NeoPulse_Examples/Classification/Image/MNIST/training_data.csv" ; - input: - x ~ from "Image" - -> image: [shape=[28, 28], channels=1] - -> ImageDataGenerator: [rescale= 0.003921568627451]; - output: - y ~ from "Label"-> flat: [10] - -> FlatDataGenerator: [] ; - params: - number_validation = 10000, - batch_size = 32 ; - -architecture: - input: x ~ image: [shape=[28, 28], channels=1] ; - output: y ~ flat: [10] ; - - x -> Convolution2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_1"), kernel_size=[3,3]] - -> Activation: ['relu'] - -> Conv2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_2"), kernel_size=[3,3]] - -> Activation: ['relu'] - -> MaxPooling2D: [pool_size=3] - -> Dropout: [0.25] - -> Conv2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_3"), kernel_size=[3,3]] - -> Activation: ['relu'] - -> Conv2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_4"), kernel_size=[3,3]] - -> Activation: ['relu'] - -> MaxPooling2D: [pool_size=3] - -> Dropout: [0.25] - -> Flatten: [] - -> Dense: [512] - -> Activation: ['relu'] - -> Dropout: [0.5] - -> Dense: [10] - -> Activation: ['softmax'] - -> y ; - -train: - compile: - optimizer = 'rmsprop', - loss = 'categorical_crossentropy', - metrics = ['accuracy'] ; - run: - epochs = 4 ; - dashboard: ; diff --git a/Classification/Image/MNIST/mnist_full_auto.nml b/Classification/Image/MNIST/mnist_full_auto.nml deleted file mode 100644 index b4dd9c6..0000000 --- a/Classification/Image/MNIST/mnist_full_auto.nml +++ /dev/null @@ -1,30 +0,0 @@ -oracle("mode") = "classification" - -source: - bind = "/DM-Dash/NeoPulse_Examples/Classification/Image/MNIST/training_data.csv" ; - input: - x ~ from "Image" - -> image: [shape=[28, 28], channels=1] - -> ImageDataGenerator: [rescale= 0.003921568627451] ; - output: - y ~ from "Label" - -> flat: [10] - -> FlatDataGenerator: [] ; - params: - number_validation = 10000, - batch_size = 32 ; - -architecture: - input: x ~ image: [shape=[28, 28], channels=1] ; - output: y ~ flat: [10] ; - - x -> auto -> y ; - -train: - compile: - optimizer = auto, - loss = auto, - metrics = ['accuracy'] ; - run: - epochs = 4 ; - dashboard: ; diff --git a/Classification/Text/Sentiment/sentiment_choice_auto.nml b/Classification/Text/Sentiment/sentiment_choice_auto.nml deleted file mode 100644 index 50ff86f..0000000 --- a/Classification/Text/Sentiment/sentiment_choice_auto.nml +++ /dev/null @@ -1,38 +0,0 @@ -oracle("generated") = 2 -oracle("complexity") = 0.1 -oracle("regularization") = 0.99 - -source: - bind = "/DM-Dash/examples/sentiment/data.csv" ; - input: - x ~ from "Review" - -> text: [200] - -> TextDataGenerator: [nb_words=20000] ; - output: - y ~ from "Label" - -> flat: [2] - -> FlatDataGenerator: [] ; - params: - validation_split = 0.5, - batch_size = 64, - shuffle_init = False; - -architecture: - input: x ~ text: [200] ; - output: y ~ flat: [2] ; - - x -> Embedding: [20000, 128] - -> Dropout: [auto(0.25 ? 0.50 | name="Drop")] - -> Convolution1D: [64, 4] - -> MaxPooling1D: [pool_size=4] - -> LSTM: [128] - -> Dense: [2, activation='softmax'] -> y ; - -train: - compile: - optimizer = 'rmsprop', - loss = 'categorical_crossentropy', - metrics = ['accuracy'] ; - run: - epochs = 4 ; - dashboard: ; diff --git a/Classification/Video/HumanAction/video_class.nml b/Classification/Video/HumanAction/video_class.nml deleted file mode 100644 index 185d2e0..0000000 --- a/Classification/Video/HumanAction/video_class.nml +++ /dev/null @@ -1,48 +0,0 @@ -source: -bind = "/DM-Dash/NeoPulseExamples/Classification/Video/training_data.csv" ; -input: - x ~ from "Video" - -> video: [shape=[80, 80], channels=3, seqlength=32] - -> ImageDataGenerator: []; -output: - y ~ from "Class" - -> flat: [6] - -> FlatDataGenerator: [] ; -params: - number_validation = 119, - batch_size = 2; - - -architecture: - input: x ~ video: [shape=[80, 80], channels=3, seqlength=32] ; - output: y ~ flat: [6] ; - - x -> TimeDistributed: [Conv2D: [32, [3,3], kernel_initializer="he_normal", activation='relu'], input_shape=[32, 80, 80, 3]] - -> TimeDistributed: [Conv2D: [32, [3,3], kernel_initializer="he_normal", activation='relu']] - -> TimeDistributed: [MaxPooling2D: []] - -> TimeDistributed: [Conv2D: [48, [3,3], kernel_initializer="he_normal", activation='relu']] - -> TimeDistributed: [Conv2D: [48, [3,3], kernel_initializer="he_normal", activation='relu']] - -> TimeDistributed: [MaxPooling2D: []] - -> TimeDistributed: [Conv2D: [64, [3,3], kernel_initializer="he_normal", activation='relu']] - -> TimeDistributed: [Conv2D: [64, [3,3], kernel_initializer="he_normal", activation='relu']] - -> TimeDistributed: [MaxPooling2D: []] - -> TimeDistributed: [Conv2D: [128, [3,3], kernel_initializer="he_normal", activation='relu']] - -> TimeDistributed: [Conv2D: [128, [3,3], kernel_initializer="he_normal", activation='relu']] - -> TimeDistributed: [Flatten: []] - -> LSTM: [256, return_sequences=True] - -> Flatten:[] - -> Dense: [512, activation='relu'] - -> Dropout: [0.5] - -> Dense: [1, activation='softmax'] - -> y; - -train: - compile: - optimizer = SGD: [lr=0.0001, momentum=0.9], - loss = 'binary_crossentropy', - metrics = ['accuracy'] ; - - run: - epochs = 4; - - dashboard: ; diff --git a/Classification/Video/HumanAction/video_class_auto.nml b/Classification/Video/HumanAction/video_class_auto.nml deleted file mode 100644 index c6f686c..0000000 --- a/Classification/Video/HumanAction/video_class_auto.nml +++ /dev/null @@ -1,32 +0,0 @@ -oracle("mode")= "classification" - -source: - bind = "/DM-Dash/NeoPulseExamples/Classification/Video/training_data.csv" ; - input: - x ~ from "Video" - -> video: [shape=[80, 80], channels=3, seqlength=32] - -> ImageDataGenerator: []; - output: - y ~ from "Class" - -> flat: [6] - -> FlatDataGenerator: [] ; - params: - number_validation = 119, - batch_size = 2; - -architecture: - input: x ~ video: [shape=[80, 80], channels=3, seqlength=32] ; - output: y ~ flat: [6] ; - - x -> auto- > y; - -train: - compile: - optimizer = auto, - loss = auto, - metrics = ['accuracy'] ; - - run: - epochs = 4; - - dashboard: ; diff --git a/Classification/Audio/MusicGenre/.gitignore b/DataSets/Audio/MusicGenre/.gitignore similarity index 100% rename from Classification/Audio/MusicGenre/.gitignore rename to DataSets/Audio/MusicGenre/.gitignore diff --git a/DataSets/Audio/MusicGenre/README.md b/DataSets/Audio/MusicGenre/README.md new file mode 100644 index 0000000..be542d9 --- /dev/null +++ b/DataSets/Audio/MusicGenre/README.md @@ -0,0 +1,34 @@ +# Introduction +These sample .nml files are for training a classification model using audio data in [NeoPulse™ AI Studio](https://aws.amazon.com/marketplace/pp/B074NDG36S/ref=vdr_rf). + +# Data +Data for this example is from the [Music Genres Dataset](http://opihi.cs.uvic.ca/sound/genres.tar.gz). The dataset features 100 audio samples from 10 music genres. +To run this example, first you will need to download and pre-process the raw data for the music classification task using the included ```build_csv.py``` script: + +```bash +$ python build_csv.py +``` + +If the script fails, make sure that you have installed all the package dependencies of this script which are listed at the top of the script: +`tarfile, shutil, pathlib, requests, natsort, and random`. Missing packages can be installed using pip: + +```bash +$ pip install +``` + +Once you've downloaded and pre-processed the data, you can start training using any of the NML scripts provided. To begin training: +```bash +$ neopulse train -p -f /full/path/to/music_classification_auto.nml +``` + +NOTE: Audio files are big! Be careful with your batch size, or you may get out of memory (OOM) errors. If that happens, reduce the batch size. + +# Tutorial Videos and Guides +Tutorial videos are available in the *Tutorials & Guides* section of the [DimensionalMechanics™ Developer Portal](https://dimensionalmechanics.com/ai-developer-portal) +For more information on using the AudioDataGenerator visit the [Data section] of the NeoPulse™ AI Studio Documentation(https://docs.neopulse.ai/NML-source/#data) + + +# License +Tutorial materials are published under the MIT license. See LICENSE for commercial, academic, and personal use. + +You are welcome to modify these tutorial files. If citing please link to this repository. diff --git a/Classification/Audio/MusicGenre/build_csv.py b/DataSets/Audio/MusicGenre/build_csv.py similarity index 92% rename from Classification/Audio/MusicGenre/build_csv.py rename to DataSets/Audio/MusicGenre/build_csv.py index 89f752a..f1d1773 100644 --- a/Classification/Audio/MusicGenre/build_csv.py +++ b/DataSets/Audio/MusicGenre/build_csv.py @@ -4,6 +4,7 @@ from random import shuffle import requests + from natsort import humansorted @@ -50,7 +51,7 @@ def write_file(validation_split): # Construct lines for the csv file in the form: # /path/to/audio/file.au,class_number # where class_number is the index of each genre class. - csv_lines = humansorted(['/DM-Dash/NeoPulse_Examples/Classification/Audio/MusicGenre' + str(p) + ',' + str(index) + '\n' for p in Path(d).iterdir()]) + csv_lines = humansorted([str(p) + ',' + str(index) + '\n' for p in Path(d).iterdir()]) # shuffle the list: shuffle(csv_lines) # calculate the index on which to split the list into training/validation @@ -67,7 +68,7 @@ def write_file(validation_split): # Write the CSV file. with open('training_data.csv', 'w') as of: - of.write('Audio File,Genre\n') + of.write('Audio,Label\n') for l in train: of.write(l) for l in valid: diff --git a/DataSets/Audio/MusicGenre/music_classification_auto.nml b/DataSets/Audio/MusicGenre/music_classification_auto.nml new file mode 100644 index 0000000..aac58d5 --- /dev/null +++ b/DataSets/Audio/MusicGenre/music_classification_auto.nml @@ -0,0 +1,30 @@ +oracle("mode") = "classification" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Audio" + -> audio: [maxlen = 1366, nbands = 96] + -> AudioDataGenerator: [] ; + output: + y ~ from "Label" + -> flat: [10] + -> FlatDataGenerator: [] ; + params: + batch_size = 5, + validation_split = 0.2 ; + +architecture: + input: x ~ audio: [maxlen = 1366, nbands = 96]; + output: y ~ flat: [10] ; + + x -> auto -> y ; + +train: + compile: + optimizer = auto, + loss = auto, + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/Classification/Audio/MusicGenre/music_spectrogram.nml b/DataSets/Audio/MusicGenre/music_spectrogram.nml similarity index 77% rename from Classification/Audio/MusicGenre/music_spectrogram.nml rename to DataSets/Audio/MusicGenre/music_spectrogram.nml index 80ce904..5474e82 100644 --- a/Classification/Audio/MusicGenre/music_spectrogram.nml +++ b/DataSets/Audio/MusicGenre/music_spectrogram.nml @@ -1,19 +1,19 @@ source: - bind = "/DM-Dash/NeoPulse_Examples/Classification/Audio/training_data.csv" ; + bind = "training_data.csv" ; input: - x ~ from "Audio File" - -> audio: [maxlen = 1366, nbands = 96] - -> AudioDataGenerator: [feature = 'spectrogram']; + x ~ from "Audio" + -> audio: [maxlen = 1366, nbands = 96] + -> AudioDataGenerator: [feature = 'spectrogram'] ; output: - y ~ from "Genre" - -> flat: [10] - -> FlatDataGenerator: [] ; + y ~ from "Label" + -> flat: [10] + -> FlatDataGenerator: [] ; params: batch_size = 5, - validation_split = 0.2; + validation_split = 0.2 ; architecture: - input: x ~ audio: [maxlen = 1366, nbands = 96]; + input: x ~ audio: [maxlen = 1366, nbands = 96] ; output: y ~ flat: [10] ; x -> Reshape: [[1366,96,1]] @@ -61,7 +61,6 @@ train: optimizer = Adam:[lr = 0.0001, beta_1 = 0.9, beta_2 = 0.999, epsilon = 0.00000001], loss = 'categorical_crossentropy', metrics = ['accuracy'] ; - run: - epochs = 4 ; + epochs = 2 ; dashboard: ; diff --git a/DataSets/Audio/MusicGenre/music_vector_capsule.nml b/DataSets/Audio/MusicGenre/music_vector_capsule.nml new file mode 100644 index 0000000..b8979a7 --- /dev/null +++ b/DataSets/Audio/MusicGenre/music_vector_capsule.nml @@ -0,0 +1,34 @@ +source: + bind = "training_data.csv" ; + input: + audio ~ from "Audio" + -> audio: [maxlen = 1536, nbands = 24] + -> AudioDataGenerator: [] ; + output: + label ~ from "Label" + -> flat: [10] + -> FlatDataGenerator:[] ; + params: + batch_size = 32, + shuffle = True, + shuffle_init = True ; + +architecture: + input: audio ~ audio: [maxlen = 1536, nbands = 24] ; + output: label ~ flat: [10] ; + + audio -> Reshape: [[1536, 24, 1]] + -> Conv2D:[filters = 128, kernel_size = 9, strides = 1, padding = 'valid', activation = 'relu', name = 'conv1'] + -> PrimaryCaps_Vector:[capsule_dim = 8, channels = 32, kernel_size = [9,9],strides = [2,2], padding = 'valid', name = 'primarycap_conv2D'] + -> DigitCaps: [num_capsule = 10, capsule_dim = 16, routings = 3, name = 'digitcaps'] + -> ClassCaps:[num_capsule = 10] + -> label ; + +train: + compile: + optimizer = Adam:[lr = 0.0001], + loss = margin_loss, + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/DataSets/Audio/MusicGenre/music_vector_capsule_auto.nml b/DataSets/Audio/MusicGenre/music_vector_capsule_auto.nml new file mode 100644 index 0000000..62a5b37 --- /dev/null +++ b/DataSets/Audio/MusicGenre/music_vector_capsule_auto.nml @@ -0,0 +1,31 @@ +oracle("mode") = "vector_capsule" + +source: + bind = "training_data.csv" ; + input: + audio ~ from "Audio" + -> audio: [maxlen = 1536, nbands = 24] + -> AudioDataGenerator: [] ; + output: + label ~ from "Label" + -> flat: [10] + -> FlatDataGenerator:[] ; + params: + batch_size = 32, + shuffle = True, + shuffle_init = True ; + +architecture: + input: audio ~ audio: [maxlen = 1536, nbands = 24] ; + output: label ~ flat: [10]; + + audio -> auto -> label ; + +train: + compile: + optimizer = Adam:[lr = 0.0001], + loss = margin_loss, + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/DataSets/Dicom/IXIT1_BrainSex/.gitignore b/DataSets/Dicom/IXIT1_BrainSex/.gitignore new file mode 100644 index 0000000..d32d485 --- /dev/null +++ b/DataSets/Dicom/IXIT1_BrainSex/.gitignore @@ -0,0 +1,4 @@ +raw_data/ +images/ +training_data.csv +querying_data.csv diff --git a/Classification/Audio/MusicGenre/README.md b/DataSets/Dicom/IXIT1_BrainSex/README.md similarity index 64% rename from Classification/Audio/MusicGenre/README.md rename to DataSets/Dicom/IXIT1_BrainSex/README.md index e2b6d19..c2243fc 100644 --- a/Classification/Audio/MusicGenre/README.md +++ b/DataSets/Dicom/IXIT1_BrainSex/README.md @@ -1,16 +1,16 @@ # Introduction -These sample .nml files are for training a classification model using audio data in [NeoPulse™ AI Studio](https://aws.amazon.com/marketplace/pp/B074NDG36S/ref=vdr_rf). +These sample .nml files are for training a classification model using dicom(Digital Imaging and Communications in Medicine) data in [NeoPulse™ AI Studio](https://aws.amazon.com/marketplace/pp/B074NDG36S/ref=vdr_rf). # Data -Data for this example is from the [Music Genres Dataset](http://opihi.cs.uvic.ca/sound/genres.tar.gz). The dataset features 100 audio samples from 10 music genres. -To run this example, first you will need to download and pre-process the raw data for the music classification task using the included ```build_genres.py``` script: +Data for this example is from the [IXI-T1 Dataset](http://biomedic.doc.ic.ac.uk/brain-development/downloads/IXI/IXI-T1.tar). The dataset contains 591 human brain 3D MR Images with T1 weighted, and corresponding gender label of each image, male or female. +To run this example, first you will need to download and pre-process the raw data for the dicom classification task using the included ```build_csv.py``` script: ```bash -$ python build_genres.py +$ python build_csv.py ``` If the script failes, make sure that you have installed all the package dependencies of this script which are listed at the top of the script: -`tarfile, shutil, pathlib, requests, natsort, and random`. Missing packages can be installed using pip: +`tarfile, shutil, pathlib, requests, natsort, pandas and random`. Missing packages can be installed using pip: ```bash $ pip install @@ -18,14 +18,14 @@ $ pip install Once you've downloaded and pre-processed the data, you can start training using any of the NML scripts provided. To begin training: ```bash -$ neopulse train -p -f /DM-Dash/NeoPulse_Examples/Classification/Audio/music_classification_auto.nml +$ neopulse train -p -f /DM-Dash/NeoPulse_Examples/Classification/Dicom/IXIT1_BrainSex/dicom_sex.nml ``` The paths in the NML scripts in this directory assume that you have cloned this repository into the /DM-Dash directory of your machine. If you have put it somewhere else, you'll need to move the NML files into a location under the /DM-Dash directory, and change the path in the line: ```bash -bind = "/DM-Dash/NeoPulse_Examples/Classification/Audio/training_data.csv" ; +bind = "/DM-Dash/NeoPulse_Examples/Classification/Dicom/IXIT1_BrainSex/training_data.csv" ; ``` -NOTE: Audio files are big! Be careful with your batch size, or you may get out of memory (OOM) errors. If that happens, reduce the batch size. +NOTE: Dicom files are big! Be careful with your batch size, or you may get out of memory (OOM) errors. If that happens, reduce the batch size. # Tutorial Videos and Guides Tutorial videos are available in the *Tutorials & Guides* section of the [DimensionalMechanics™ Developer Portal](https://dimensionalmechanics.com/ai-developer-portal) diff --git a/DataSets/Dicom/IXIT1_BrainSex/build_csv.py b/DataSets/Dicom/IXIT1_BrainSex/build_csv.py new file mode 100644 index 0000000..5eb7a5d --- /dev/null +++ b/DataSets/Dicom/IXIT1_BrainSex/build_csv.py @@ -0,0 +1,92 @@ +import os +import shutil +import tarfile +from pathlib import Path +from random import shuffle + +import requests + +import pandas as pd +from natsort import humansorted + + +def download_data(): + ''' + Check if raw IXI dicom data is present. If not, download data from the + official site. + ''' + + Path('raw_data').mkdir(parents=True, exist_ok=True) + + nii_URL = 'http://biomedic.doc.ic.ac.uk/brain-development/downloads/IXI/IXI-T1.tar' + xls_URL = 'http://biomedic.doc.ic.ac.uk/brain-development/downloads/IXI/IXI.xls' + + nii_f = 'IXI_T1.tar.gz' + xls_f = 'IXI.xls' + + if not Path('raw_data/' + nii_f).is_file(): + r = requests.get(nii_URL, stream=True) + with open('raw_data/' + nii_f, 'wb') as f_z: + shutil.copyfileobj(r.raw, f_z) + + Path('images').mkdir(parents=True, exist_ok=True) + tarfile.open('raw_data/' + nii_f).extractall('images/') + + if not Path('raw_data/' + xls_f).is_file(): + r = requests.get(xls_URL, stream=True) + with open('raw_data/' + xls_f, 'wb') as f_z: + shutil.copyfileobj(r.raw, f_z) + + +def write_file(validation_split): + + xls = pd.ExcelFile("raw_data/IXI.xls") + df = xls.parse('Table') + img_plist = os.listdir("images") + + pdict = {} + + csv_lines = [] + + cwd = Path.cwd() + + for img_p in img_plist: + if img_p[:3] == "IXI": + pdict[int(img_p[3:6])] = img_p + + for index, row in df.iterrows(): + IXI_id = int(row['IXI_ID']) + sex_id = row['SEX_ID (1=m, 2=f)'] + sex_id -= 1 + if IXI_id in pdict: + csv_lines.append("{0},{1}\n".format("images/" + pdict[IXI_id], sex_id)) + + shuffle(csv_lines) + + split_index = int(validation_split * len(csv_lines)) + + train = csv_lines[:-split_index] + valid = csv_lines[-split_index:] + + # Write the training CSV file. + with open('training_data.csv', 'w') as of: + of.write('data,label\n') + for l in train: + of.write(l) + for l in valid: + of.write(l) + + # Write the querying CSV file. + with open('querying_data.csv', 'w') as of: + of.write('data\n') + for l in valid: + of.write(l.split(',')[0] + '\n') + + +if __name__ == '__main__': + + # Download data if necessary + download_data() + + # Write files with 20% validation split + write_file(0.2) diff --git a/DataSets/Dicom/IXIT1_BrainSex/dicom_sex.nml b/DataSets/Dicom/IXIT1_BrainSex/dicom_sex.nml new file mode 100644 index 0000000..d2f630d --- /dev/null +++ b/DataSets/Dicom/IXIT1_BrainSex/dicom_sex.nml @@ -0,0 +1,39 @@ +source: + bind = "training_data.csv" ; + input: + x ~ from "data" + -> dicom: [shape = [64, 64, 64, 1]] + -> DicomDataGenerator:[spacing=[2.0,2.0,2.0], normalise_zero_to_one = True, flip=True] ; + output: + y ~ from "label" + -> flat: [2] + -> FlatDataGenerator:[] ; + params: + batch_size = 8, + shuffle = True, + shuffle_init = True, + repeat_per_load = 10 ; + +architecture: + input: x ~ dicom: [shape = [64,64,64,1]] ; + output: y ~ flat: [2] ; + + x -> Conv3D:[16, kernel_size=[3, 3, 3],strides = [2,2,2], padding = 'same',activation='relu'] + -> Conv3D:[16, kernel_size=[3, 3, 3],strides = [2,2,2], padding = 'same',activation='relu'] + -> MaxPooling3D:[pool_size=[2, 2, 2], padding = 'same'] + -> Conv3D:[32, kernel_size=[3, 3, 3], padding = 'same',activation='relu'] + -> Conv3D:[64, kernel_size=[3, 3, 3], padding = 'same', activation='relu'] + -> MaxPooling3D:[pool_size=[2, 2, 2], padding = 'same'] + -> Flatten:[] + -> Dense:[256, activation='relu'] + -> Dense:[2, activation='softmax'] + -> y ; + +train: + compile: + optimizer = Adam:[lr = 0.0001], + loss = categorical_crossentropy, + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/DataSets/Dicom/IXIT1_BrainSex/dicom_sex_auto.nml b/DataSets/Dicom/IXIT1_BrainSex/dicom_sex_auto.nml new file mode 100644 index 0000000..acf4b0e --- /dev/null +++ b/DataSets/Dicom/IXIT1_BrainSex/dicom_sex_auto.nml @@ -0,0 +1,32 @@ +oracle("mode")="classification" + +source: + bind = "training_data.csv" ; + input: + x ~ from "data" + -> dicom: [shape = [64, 64, 64, 1]] + -> DicomDataGenerator:[spacing=[2.0,2.0,2.0],normalise_zero_to_one = True,flip=True] ; + output: + y ~ from "label" + -> flat: [2] + -> FlatDataGenerator:[] ; + params: + batch_size = 5, + shuffle = True, + shuffle_init = True, + repeat_per_load = 10 ; + +architecture: + input: x ~ dicom: [shape = [64,64,64,1]] ; + output: y ~ flat: [2] ; + + x -> auto -> y ; + +train: + compile: + optimizer = Adam:[lr = 0.0001], + loss = categorical_crossentropy, + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/Classification/Image/CIFAR10/.gitignore b/DataSets/Image/CIFAR10/.gitignore similarity index 100% rename from Classification/Image/CIFAR10/.gitignore rename to DataSets/Image/CIFAR10/.gitignore diff --git a/Classification/Image/CIFAR10/README.md b/DataSets/Image/CIFAR10/README.md similarity index 100% rename from Classification/Image/CIFAR10/README.md rename to DataSets/Image/CIFAR10/README.md diff --git a/Classification/Image/CIFAR10/build_csv.py b/DataSets/Image/CIFAR10/build_csv.py similarity index 97% rename from Classification/Image/CIFAR10/build_csv.py rename to DataSets/Image/CIFAR10/build_csv.py index 75df8a2..284dfc6 100644 --- a/Classification/Image/CIFAR10/build_csv.py +++ b/DataSets/Image/CIFAR10/build_csv.py @@ -3,8 +3,9 @@ import tarfile from pathlib import Path -import numpy as np import requests + +import numpy as np from imageio import imwrite from natsort import humansorted @@ -98,7 +99,7 @@ def write_data(): for ind, image in enumerate(image_list): file_path = image_path + str(count) + '.png' imwrite(file_path, image) - of.write(str(Path(file_path).resolve()) + ',' + str(labels[ind]) + '\n') + of.write(str(Path(file_path)) + ',' + str(labels[ind]) + '\n') count += 1 diff --git a/DataSets/Image/CIFAR10/cifar10_call_auto.nml b/DataSets/Image/CIFAR10/cifar10_call_auto.nml new file mode 100644 index 0000000..812b2af --- /dev/null +++ b/DataSets/Image/CIFAR10/cifar10_call_auto.nml @@ -0,0 +1,48 @@ +oracle("mode") = "classification" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape=[32, 32], channels=3] + -> ImageDataGenerator: [rescale= 0.003921568627451] ; + output: + y ~ from "Class" + -> flat: [10] + -> FlatDataGenerator: [] ; + params: + number_validation = 10000, + batch_size = 32 ; + +architecture: + input: x ~ image: [shape=[32, 32], channels=3] ; + output: y ~ flat: [10] ; + + x -> Conv2D: [32, [3,3]] + -> Activation: ['relu'] + -> Conv2D: [32, [3,3]] + -> Activation: ['relu'] + -> MaxPooling2D: [pool_size=3] + -> Dropout: auto + -> Conv2D: [64, [3,3]] + -> Activation: ['relu'] + -> Conv2D: [64, [3,3]] + -> Activation: ['relu'] + -> MaxPooling2D: [pool_size=3] + -> Dropout: auto + -> Flatten: [] + -> Dense: [512] + -> Activation: ['relu'] + -> Dropout: auto + -> Dense: [10] + -> Activation: ['softmax'] + -> y ; + +train: + compile: + optimizer = 'rmsprop', + loss = 'categorical_crossentropy', + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/DataSets/Image/CIFAR10/cifar10_choice_auto.nml b/DataSets/Image/CIFAR10/cifar10_choice_auto.nml new file mode 100644 index 0000000..ac268c4 --- /dev/null +++ b/DataSets/Image/CIFAR10/cifar10_choice_auto.nml @@ -0,0 +1,48 @@ +oracle("mode") = "classification" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape=[32, 32], channels=3] + -> ImageDataGenerator: [rescale= 0.003921568627451] ; + output: + y ~ from "Class" + -> flat: [10] + -> FlatDataGenerator: [] ; + params: + number_validation = 10000, + batch_size = 32 ; + +architecture: + input: x ~ image: [shape=[32, 32], channels=3] ; + output: y ~ flat: [10] ; + + x -> Conv2D: [32,[3,3]] + -> Activation: ['relu'] + -> Conv2D: [32,[3,3]] + -> Activation: ['relu'] + -> MaxPooling2D: [pool_size=3] + -> Dropout: [auto(0.25 ? 0.75 | name = "Drop1")] + -> Conv2D: [64,[3,3]] + -> Activation: ['relu'] + -> Conv2D: [64,[3,3]] + -> Activation: ['relu'] + -> MaxPooling2D: [pool_size=3] + -> Dropout: [auto(0.25 ? 0.75 | name = "Drop2")] + -> Flatten: [] + -> Dense: [512] + -> Activation: ['relu'] + -> Dropout: [auto(0.25 ? 0.75 | name = "Drop3")] + -> Dense: [10] + -> Activation: ['softmax'] + -> y ; + +train: + compile: + optimizer = 'rmsprop', + loss = 'categorical_crossentropy', + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/DataSets/Image/CIFAR10/cifar10_dist_auto.nml b/DataSets/Image/CIFAR10/cifar10_dist_auto.nml new file mode 100644 index 0000000..5bb8cdc --- /dev/null +++ b/DataSets/Image/CIFAR10/cifar10_dist_auto.nml @@ -0,0 +1,48 @@ +oracle("mode") = "classification" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape=[32, 32], channels=3] + -> ImageDataGenerator: [rescale= 0.003921568627451] ; + output: + y ~ from "Class" + -> flat: [10] + -> FlatDataGenerator: [] ; + params: + number_validation = 10000, + batch_size = 32 ; + +architecture: + input: x ~ image: [shape=[32, 32], channels=3] ; + output: y ~ flat: [10] ; + + x -> Convolution2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_1"), kernel_size=[3,3]] + -> Activation: ['relu'] + -> Conv2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_2"), kernel_size=[3,3]] + -> Activation: ['relu'] + -> MaxPooling2D: [pool_size=3] + -> Dropout: [0.25] + -> Conv2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_3"), kernel_size=[3,3]] + -> Activation: ['relu'] + -> Conv2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_4"), kernel_size=[3,3]] + -> Activation: ['relu'] + -> MaxPooling2D: [pool_size=3] + -> Dropout: [0.25] + -> Flatten: [] + -> Dense: [512] + -> Activation: ['relu'] + -> Dropout: [0.5] + -> Dense: [10] + -> Activation: ['softmax'] + -> y ; + +train: + compile: + optimizer = 'rmsprop', + loss = 'categorical_crossentropy', + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/DataSets/Image/CIFAR10/cifar10_full_auto.nml b/DataSets/Image/CIFAR10/cifar10_full_auto.nml new file mode 100644 index 0000000..5e83063 --- /dev/null +++ b/DataSets/Image/CIFAR10/cifar10_full_auto.nml @@ -0,0 +1,30 @@ +oracle("mode") = "classification" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape=[32, 32], channels=3] + -> ImageDataGenerator: [rescale= 0.003921568627451] ; + output: + y ~ from "Class" + -> flat: [10] + -> FlatDataGenerator: [] ; + params: + number_validation = 10000, + batch_size = 32 ; + +architecture: + input: x ~ image: [shape=[32, 32], channels=3] ; + output: y ~ flat: [10] ; + + x -> auto -> y ; + +train: + compile: + optimizer = auto, + loss = auto, + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/Classification/Image/CIFAR100/.gitignore b/DataSets/Image/CIFAR100/.gitignore similarity index 100% rename from Classification/Image/CIFAR100/.gitignore rename to DataSets/Image/CIFAR100/.gitignore diff --git a/Classification/Image/CIFAR100/README.md b/DataSets/Image/CIFAR100/README.md similarity index 100% rename from Classification/Image/CIFAR100/README.md rename to DataSets/Image/CIFAR100/README.md diff --git a/Classification/Image/CIFAR100/build_csv.py b/DataSets/Image/CIFAR100/build_csv.py similarity index 97% rename from Classification/Image/CIFAR100/build_csv.py rename to DataSets/Image/CIFAR100/build_csv.py index 11f096d..7ee43ba 100644 --- a/Classification/Image/CIFAR100/build_csv.py +++ b/DataSets/Image/CIFAR100/build_csv.py @@ -3,8 +3,9 @@ import tarfile from pathlib import Path -import numpy as np import requests + +import numpy as np from imageio import imwrite from natsort import humansorted @@ -90,7 +91,7 @@ def write_data(): for ind, image in enumerate(image_list): file_path = image_path + str(count) + '.png' imwrite(file_path, image) - of.write(str(Path(file_path).resolve()) + ',' + str(labels[ind]) + '\n') + of.write(str(Path(file_path)) + ',' + str(labels[ind]) + '\n') count += 1 diff --git a/DataSets/Image/CIFAR100/cifar100_call_auto.nml b/DataSets/Image/CIFAR100/cifar100_call_auto.nml new file mode 100644 index 0000000..a9c65f5 --- /dev/null +++ b/DataSets/Image/CIFAR100/cifar100_call_auto.nml @@ -0,0 +1,48 @@ +oracle("mode") = "classification" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape=[32, 32], channels=3] + -> ImageDataGenerator: [rescale= 0.003921568627451] ; + output: + y ~ from "Class" + -> flat: [100] + -> FlatDataGenerator: [] ; + params: + number_validation = 10000, + batch_size = 32 ; + +architecture: + input: x ~ image: [shape=[32, 32], channels=3] ; + output: y ~ flat: [100] ; + + x -> Conv2D: [32, [3,3]] + -> Activation: ['relu'] + -> Conv2D: [32, [3,3]] + -> Activation: ['relu'] + -> MaxPooling2D: [pool_size=3] + -> Dropout: auto + -> Conv2D: [64, [3,3]] + -> Activation: ['relu'] + -> Conv2D: [64, [3,3]] + -> Activation: ['relu'] + -> MaxPooling2D: [pool_size=3] + -> Dropout: auto + -> Flatten: [] + -> Dense: [512] + -> Activation: ['relu'] + -> Dropout: auto + -> Dense: [100] + -> Activation: ['softmax'] + -> y ; + +train: + compile: + optimizer = 'rmsprop', + loss = 'categorical_crossentropy', + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/DataSets/Image/CIFAR100/cifar100_choice_auto.nml b/DataSets/Image/CIFAR100/cifar100_choice_auto.nml new file mode 100644 index 0000000..dfaedec --- /dev/null +++ b/DataSets/Image/CIFAR100/cifar100_choice_auto.nml @@ -0,0 +1,48 @@ +oracle("mode") = "classification" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape=[32, 32], channels=3] + -> ImageDataGenerator: [rescale= 0.003921568627451] ; + output: + y ~ from "Class" + -> flat: [100] + -> FlatDataGenerator: [] ; + params: + number_validation = 10000, + batch_size = 32 ; + +architecture: + input: x ~ image: [shape=[32, 32], channels=3] ; + output: y ~ flat: [100] ; + + x -> Conv2D: [32,[3,3]] + -> Activation: ['relu'] + -> Conv2D: [32,[3,3]] + -> Activation: ['relu'] + -> MaxPooling2D: [pool_size=3] + -> Dropout: [auto(0.25 ? 0.75 | name = "Drop1")] + -> Conv2D: [64,[3,3]] + -> Activation: ['relu'] + -> Conv2D: [64,[3,3]] + -> Activation: ['relu'] + -> MaxPooling2D: [pool_size=3] + -> Dropout: [auto(0.25 ? 0.75 | name = "Drop2")] + -> Flatten: [] + -> Dense: [512] + -> Activation: ['relu'] + -> Dropout: [auto(0.25 ? 0.75 | name = "Drop3")] + -> Dense: [100] + -> Activation: ['softmax'] + -> y ; + +train: + compile: + optimizer = 'rmsprop', + loss = 'categorical_crossentropy', + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/DataSets/Image/CIFAR100/cifar100_dist_auto.nml b/DataSets/Image/CIFAR100/cifar100_dist_auto.nml new file mode 100644 index 0000000..d2bcf54 --- /dev/null +++ b/DataSets/Image/CIFAR100/cifar100_dist_auto.nml @@ -0,0 +1,48 @@ +oracle("mode") = "classification" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape=[32, 32], channels=3] + -> ImageDataGenerator: [rescale= 0.003921568627451] ; + output: + y ~ from "Class" + -> flat: [100] + -> FlatDataGenerator: [] ; + params: + number_validation = 10000, + batch_size = 32 ; + +architecture: + input: x ~ image: [shape=[32, 32], channels=3] ; + output: y ~ flat: [100] ; + + x -> Convolution2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_1"), kernel_size=[3,3]] + -> Activation: ['relu'] + -> Conv2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_2"), kernel_size=[3,3]] + -> Activation: ['relu'] + -> MaxPooling2D: [pool_size=3] + -> Dropout: [0.25] + -> Conv2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_3"), kernel_size=[3,3]] + -> Activation: ['relu'] + -> Conv2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_4"), kernel_size=[3,3]] + -> Activation: ['relu'] + -> MaxPooling2D: [pool_size=3] + -> Dropout: [0.25] + -> Flatten: [] + -> Dense: [512] + -> Activation: ['relu'] + -> Dropout: [0.5] + -> Dense: [100] + -> Activation: ['softmax'] + -> y ; + +train: + compile: + optimizer = 'rmsprop', + loss = 'categorical_crossentropy', + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/DataSets/Image/CIFAR100/cifar100_full_auto.nml b/DataSets/Image/CIFAR100/cifar100_full_auto.nml new file mode 100644 index 0000000..58debfd --- /dev/null +++ b/DataSets/Image/CIFAR100/cifar100_full_auto.nml @@ -0,0 +1,30 @@ +oracle("mode") = "classification" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape=[32, 32], channels=3] + -> ImageDataGenerator: [rescale= 0.003921568627451] ; + output: + y ~ from "Class" + -> flat: [100] + -> FlatDataGenerator: [] ; + params: + number_validation = 10000, + batch_size = 32 ; + +architecture: + input: x ~ image: [shape=[32, 32], channels= 3] ; + output: y ~ flat: [100] ; + + x -> auto -> y ; + +train: + compile: + optimizer = auto, + loss = auto, + metrics = ['accuracy'] ; + run: + epochs = 4 ; + dashboard: ; diff --git a/Classification/Image/MNIST/.gitignore b/DataSets/Image/MNIST/.gitignore similarity index 100% rename from Classification/Image/MNIST/.gitignore rename to DataSets/Image/MNIST/.gitignore diff --git a/Classification/Image/MNIST/README.md b/DataSets/Image/MNIST/README.md similarity index 74% rename from Classification/Image/MNIST/README.md rename to DataSets/Image/MNIST/README.md index 549df7b..033db69 100644 --- a/Classification/Image/MNIST/README.md +++ b/DataSets/Image/MNIST/README.md @@ -28,14 +28,34 @@ bind = "/DM-Dash/NeoPulse_Examples/Classification/Image/MNIST/training_data.csv" # Tutorial Files *build_csv.py:** Script creates list of training files and writes training full image paths and corresponding labels to a training CSV file. -**mnist_full_auto.nml:** Features full use of the auto keyword to automatically generate the entire architecture. - **mnist_call_auto.nml:** Features the use of auto to automatically select an architecture later. **mnist_choice_auto.nml:** Features use of auto keyword to automatically select from range of values for a given parameter. **mnist_dist_auto.nml:** Features use of the auto keyword to automatically select a value from a specified distribution of values (e.g. gaussian). +**mnist_full_auto.nml:** Features full use of the auto keyword to automatically generate the entire architecture. + +**mnist_kmeans.nml:** Demonstrates the k-means unsupervised clustering algorithm. + +**mnist_kmeans_batch.nml:** Demonstrates using batch processing for k-means clustering. + +**mnist_KmeansPCA.nml:** Demonstrates using k-means + PCA for clustering. + +**mnist_KmeansPCA_batch.nml:** Demonstrates batch processing for k-means + PCA. + +**mnist_matrix_capsule_auto.nml:** Demonstrates matrix capsule networks using the oracle. + +**mnist_matrix_capsule.nml:** Demonstrates matrix capsule networks without using the oracle. + +**mnist_pca.nml:** Demonstrates using Principal Component analysis (PCA). + +**mnist_pca_batch.nml:** Demonstrates batch processing for PCA. + +**mnist_vector_capsule_auto.nml:** Demonstrates vector capsule networks using the oracle. + +**mnist_vector_capsule.nml:** Demonstrates vector capsule networks without using the oracle. + # Tutorial Videos and Guides Tutorial videos are available in the *Tutorials & Guides* section of the [DimensionalMechanics™ Developer Portal](https://dimensionalmechanics.com/ai-developer-portal) diff --git a/Classification/Image/MNIST/build_csv.py b/DataSets/Image/MNIST/build_csv.py similarity index 93% rename from Classification/Image/MNIST/build_csv.py rename to DataSets/Image/MNIST/build_csv.py index 2f82682..68250c9 100644 --- a/Classification/Image/MNIST/build_csv.py +++ b/DataSets/Image/MNIST/build_csv.py @@ -2,8 +2,9 @@ import shutil from pathlib import Path -import numpy as np import requests + +import numpy as np from imageio import imwrite from mnist import MNIST @@ -60,12 +61,12 @@ def write_csv_file(): for index, image in enumerate(train_images): img_file = 'images/mnist_train_' + str(index) + '.png' imwrite(img_file, image) - of.write(str(Path(img_file).resolve()) + ',' + str(train_labels[index]) + '\n') + of.write(str(Path(img_file)) + ',' + str(train_labels[index]) + '\n') for index, image in enumerate(test_images): img_file = 'images/mnist_test_' + str(index) + '.png' imwrite(img_file, image) - of.write(str(Path(img_file).resolve()) + ',' + str(test_labels[index]) + '\n') + of.write(str(Path(img_file)) + ',' + str(test_labels[index]) + '\n') if __name__ == '__main__': diff --git a/DataSets/Image/MNIST/mnist_KmeansPCA.nml b/DataSets/Image/MNIST/mnist_KmeansPCA.nml new file mode 100644 index 0000000..4a34b6e --- /dev/null +++ b/DataSets/Image/MNIST/mnist_KmeansPCA.nml @@ -0,0 +1,24 @@ +oracle("mode") = "unsupervised" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape = [28, 28], channels = 1] + -> ImageDataGenerator:[] ; + params: ; + + +architecture: + input: x ~ image: [shape = [28,28], channels = 1] ; + output: y ; + + x -> UnsupervisedFlatten:[] + -> Pca:[n_components=8] + -> Kmeans:[n_clusters=2] + -> y ; + +train: + compile: ; + run: ; + dashboard: ; diff --git a/DataSets/Image/MNIST/mnist_KmeansPCA_batch.nml b/DataSets/Image/MNIST/mnist_KmeansPCA_batch.nml new file mode 100644 index 0000000..08b3001 --- /dev/null +++ b/DataSets/Image/MNIST/mnist_KmeansPCA_batch.nml @@ -0,0 +1,25 @@ +oracle("mode") = "unsupervised" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape = [28, 28], channels = 1] + -> ImageDataGenerator:[] ; + params: + batch_size = 16 ; + +architecture: + input: x ~ image: [shape = [28,28], channels = 1] ; + output: y ; + + x -> UnsupervisedFlatten:[] + -> Pca:[n_components=8, batch=True] + -> Kmeans:[n_clusters=2, batch=True] + -> y ; + +train: + compile: + batch = True ; + run: ; + dashboard: ; diff --git a/DataSets/Image/MNIST/mnist_call_auto.nml b/DataSets/Image/MNIST/mnist_call_auto.nml new file mode 100644 index 0000000..e52b2d0 --- /dev/null +++ b/DataSets/Image/MNIST/mnist_call_auto.nml @@ -0,0 +1,47 @@ +oracle("mode") = "classification" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape=[28, 28], channels=1] + -> ImageDataGenerator: [rescale= 0.003921568627451] ; + output: + y ~ from "Label"-> flat: [10] + -> FlatDataGenerator: [] ; + params: + number_validation = 10000, + batch_size = 32 ; + +architecture: + input: x ~ image: [shape=[28, 28], channels=1] ; + output: y ~ flat: [10] ; + + x -> Conv2D: [32, [3,3]] + -> Activation: ['relu'] + -> Conv2D: [32, [3,3]] + -> Activation: ['relu'] + -> MaxPooling2D: [pool_size=2] + -> Dropout: auto + -> Conv2D: [64, [3,3]] + -> Activation: ['relu'] + -> Conv2D: [64, [3,3]] + -> Activation: ['relu'] + -> MaxPooling2D: [pool_size=2] + -> Dropout: auto + -> Flatten: [] + -> Dense: [512] + -> Activation: ['relu'] + -> Dropout: auto + -> Dense: [10] + -> Activation: ['softmax'] + -> y ; + +train: + compile: + optimizer = 'rmsprop', + loss = 'categorical_crossentropy', + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/DataSets/Image/MNIST/mnist_choice_auto.nml b/DataSets/Image/MNIST/mnist_choice_auto.nml new file mode 100644 index 0000000..57fb2f8 --- /dev/null +++ b/DataSets/Image/MNIST/mnist_choice_auto.nml @@ -0,0 +1,47 @@ +oracle("mode") = "classification" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape=[28, 28], channels=1] + -> ImageDataGenerator: [rescale= 0.003921568627451] ; + output: + y ~ from "Label"-> flat: [10] + -> FlatDataGenerator: [] ; + params: + number_validation = 10000, + batch_size = 32 ; + +architecture: + input: x ~ image: [shape=[28, 28], channels=1] ; + output: y ~ flat: [10] ; + + x -> Conv2D: [32,[3,3]] + -> Activation: ['relu'] + -> Conv2D: [32,[3,3]] + -> Activation: ['relu'] + -> MaxPooling2D: [pool_size=2] + -> Dropout: [auto(0.25 ? 0.75 | name = "Drop")] + -> Conv2D: [64,[3,3]] + -> Activation: ['relu'] + -> Conv2D: [64,[3,3]] + -> Activation: ['relu'] + -> MaxPooling2D: [pool_size=2] + -> Dropout: [auto(0.25 ? 0.75 | name = "Drop")] + -> Flatten: [] + -> Dense: [512] + -> Activation: ['relu'] + -> Dropout: [auto(0.25 ? 0.75 | name = "Drop")] + -> Dense: [10] + -> Activation: ['softmax'] + -> y ; + +train: + compile: + optimizer = 'rmsprop', + loss = 'categorical_crossentropy', + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/DataSets/Image/MNIST/mnist_dist_auto.nml b/DataSets/Image/MNIST/mnist_dist_auto.nml new file mode 100644 index 0000000..4b8aea0 --- /dev/null +++ b/DataSets/Image/MNIST/mnist_dist_auto.nml @@ -0,0 +1,47 @@ +oracle("mode") = "classification" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape=[28, 28], channels=1] + -> ImageDataGenerator: [rescale= 0.003921568627451] ; + output: + y ~ from "Label"-> flat: [10] + -> FlatDataGenerator: [] ; + params: + number_validation = 10000, + batch_size = 32 ; + +architecture: + input: x ~ image: [shape=[28, 28], channels=1] ; + output: y ~ flat: [10] ; + + x -> Convolution2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_1"), kernel_size=[3,3]] + -> Activation: ['relu'] + -> Conv2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_2"), kernel_size=[3,3]] + -> Activation: ['relu'] + -> MaxPooling2D: [pool_size=3] + -> Dropout: [0.25] + -> Conv2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_3"), kernel_size=[3,3]] + -> Activation: ['relu'] + -> Conv2D: [nb_filter=auto(dist = "uniform", low = 10, high = 100, cast="int" | count = 10, name="Hyperparameter_4"), kernel_size=[3,3]] + -> Activation: ['relu'] + -> MaxPooling2D: [pool_size=3] + -> Dropout: [0.25] + -> Flatten: [] + -> Dense: [512] + -> Activation: ['relu'] + -> Dropout: [0.5] + -> Dense: [10] + -> Activation: ['softmax'] + -> y ; + +train: + compile: + optimizer = 'rmsprop', + loss = 'categorical_crossentropy', + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/DataSets/Image/MNIST/mnist_full_auto.nml b/DataSets/Image/MNIST/mnist_full_auto.nml new file mode 100644 index 0000000..612cb4b --- /dev/null +++ b/DataSets/Image/MNIST/mnist_full_auto.nml @@ -0,0 +1,30 @@ +oracle("mode") = "classification" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape=[28, 28], channels=1] + -> ImageDataGenerator: [rescale= 0.003921568627451] ; + output: + y ~ from "Label" + -> flat: [10] + -> FlatDataGenerator: [] ; + params: + number_validation = 10000, + batch_size = 32 ; + +architecture: + input: x ~ image: [shape=[28, 28], channels=1] ; + output: y ~ flat: [10] ; + + x -> auto -> y ; + +train: + compile: + optimizer = auto, + loss = auto, + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/DataSets/Image/MNIST/mnist_kmeans.nml b/DataSets/Image/MNIST/mnist_kmeans.nml new file mode 100644 index 0000000..e4e873e --- /dev/null +++ b/DataSets/Image/MNIST/mnist_kmeans.nml @@ -0,0 +1,23 @@ +oracle("mode") = "unsupervised" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape = [28, 28], channels = 1] + -> ImageDataGenerator:[]; + params:; + + +architecture: + input: x ~ image: [shape = [28,28], channels = 1]; + output: y; + + x -> UnsupervisedFlatten:[] + -> Kmeans:[n_clusters=10] + -> y ; + +train: + compile:; + run:; + dashboard:; diff --git a/DataSets/Image/MNIST/mnist_kmeans_batch.nml b/DataSets/Image/MNIST/mnist_kmeans_batch.nml new file mode 100644 index 0000000..5d1dcb6 --- /dev/null +++ b/DataSets/Image/MNIST/mnist_kmeans_batch.nml @@ -0,0 +1,25 @@ +oracle("mode") = "unsupervised" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape = [28, 28], channels = 1] + -> ImageDataGenerator:[]; + params: + batch_size = 16; + + +architecture: + input: x ~ image: [shape = [28,28], channels = 1]; + output: y; + + x -> UnsupervisedFlatten:[] + -> Kmeans:[n_clusters=10, batch=True] + -> y ; + +train: + compile: + batch=True; + run:; + dashboard:; diff --git a/DataSets/Image/MNIST/mnist_matrix_capsule.nml b/DataSets/Image/MNIST/mnist_matrix_capsule.nml new file mode 100644 index 0000000..5f99457 --- /dev/null +++ b/DataSets/Image/MNIST/mnist_matrix_capsule.nml @@ -0,0 +1,34 @@ +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape = [28,28], channels = 1] + -> ImageDataGenerator:[rescale = 0.00392156862745098] ; + output: + y ~ from "Label" + -> flat: [10] + -> FlatDataGenerator:[] ; + params: + batch_size = 64, + shuffle = True, + shuffle_init = True ; + +architecture: + input: x ~ image: [shape = [28,28], channels = 1] ; + output: y ~ flat: [10] ; + + x -> Conv2D: [filters = 32, kernel_size = 5, strides = 2, padding = 'valid', activation = 'relu', name = 'conv1'] + -> PrimaryCaps_Matrix: [] + -> ConvCaps:[channels = 32, kernel_size = 3, strides = 2, routings = 3] + -> ConvCaps:[channels = 32, kernel_size = 3, strides = 1, routings = 3] + -> ClassCaps:[num_capsule = 10, routings = 3] + -> y ; + +train: + compile: + optimizer = Adam:[lr = 0.001], + loss = margin_loss, + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/DataSets/Image/MNIST/mnist_matrix_capsule_auto.nml b/DataSets/Image/MNIST/mnist_matrix_capsule_auto.nml new file mode 100644 index 0000000..82d23c3 --- /dev/null +++ b/DataSets/Image/MNIST/mnist_matrix_capsule_auto.nml @@ -0,0 +1,31 @@ +oracle("mode") = "matrix_capsule" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape = [28,28], channels = 1] + -> ImageDataGenerator:[rescale = 0.00392156862745098] ; + output: + y ~ from "Label" + -> flat: [10] + -> FlatDataGenerator:[] ; + params: + batch_size = 64, + shuffle = True, + shuffle_init = True ; + +architecture: + input: x ~ image: [shape = [28,28], channels = 1] ; + output: y ~ flat: [10] ; + + x -> auto -> y ; + +train: + compile: + optimizer = Adam:[lr = 0.001], + loss = margin_loss, + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/DataSets/Image/MNIST/mnist_pca.nml b/DataSets/Image/MNIST/mnist_pca.nml new file mode 100644 index 0000000..44ffbb1 --- /dev/null +++ b/DataSets/Image/MNIST/mnist_pca.nml @@ -0,0 +1,23 @@ +oracle("mode") = "unsupervised" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape = [28, 28], channels = 1] + -> ImageDataGenerator:[] ; + params: ; + + +architecture: + input: x ~ image: [shape = [28,28], channels = 1] ; + output: y ; + + x -> UnsupervisedFlatten:[] + -> Pca:[n_components=8] + -> y ; + +train: + compile: ; + run: ; + dashboard: ; diff --git a/DataSets/Image/MNIST/mnist_pca_batch.nml b/DataSets/Image/MNIST/mnist_pca_batch.nml new file mode 100644 index 0000000..49a6f58 --- /dev/null +++ b/DataSets/Image/MNIST/mnist_pca_batch.nml @@ -0,0 +1,25 @@ +oracle("mode") = "unsupervised" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape = [28, 28], channels = 1] + -> ImageDataGenerator:[] ; + params: + batch_size = 16 ; + + +architecture: + input: x ~ image: [shape = [28,28], channels = 1] ; + output: y ; + + x -> UnsupervisedFlatten:[] + -> Pca:[n_components=8, batch=True] + -> y ; + +train: + compile: + batch = True ; + run: ; + dashboard: ; diff --git a/DataSets/Image/MNIST/mnist_spectral.nml b/DataSets/Image/MNIST/mnist_spectral.nml new file mode 100644 index 0000000..9e64743 --- /dev/null +++ b/DataSets/Image/MNIST/mnist_spectral.nml @@ -0,0 +1,50 @@ +oracle("mode") = "spectral_opt" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape = [28,28], channels = 1] + -> ImageDataGenerator: [rescale=0.00392156862745098] ; + + output: + y ~ from "Label" -> flat: [10] -> FlatDataGenerator: [] ; + + params: + shuffle = True, + shuffle_init = True ; + +architecture: + input: + x1 ~ image: [shape = [28,28], channels = 1] ; + output: + y1 ~ flat: [10] ; + + x1 -> Conv2D: [32,[3,3]] + -> Activation: ['relu'] + -> Conv2D: [32,[3,3]] + -> Activation: ['relu'] + -> MaxPooling2D: [pool_size=2] + -> Conv2D: [64,[3,3]] + -> Activation: ['relu'] + -> Conv2D: [64,[3,3]] + -> Activation: ['relu'] + -> MaxPooling2D: [pool_size=2] + -> Flatten:[] + -> Dense: [10] + -> Activation: ['softmax'] + -> y1 ; + + +train: + compile: + opt_options = ['sgd', 'adam', 'adamax'], + lr_options = [0.03, 0.01, 0.003, 0.001, 0.0003], + momentum_options = [0.99, 0.9, 0.0], + decay_options = [0.0001, 0.0], + batch_options = [32,64,128], + loss = 'categorical_crossentropy', + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/DataSets/Image/MNIST/mnist_vector_capsule.nml b/DataSets/Image/MNIST/mnist_vector_capsule.nml new file mode 100644 index 0000000..ed13305 --- /dev/null +++ b/DataSets/Image/MNIST/mnist_vector_capsule.nml @@ -0,0 +1,33 @@ +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape = [28,28], channels = 1] + -> ImageDataGenerator:[rescale = 0.00392156862745098] ; + output: + y ~ from "Label" + -> flat: [10] + -> FlatDataGenerator:[] ; + params: + batch_size = 64, + shuffle = True, + shuffle_init = True ; + +architecture: + input: x ~ image: [shape = [28,28], channels = 1] ; + output: y ~ flat: [10] ; + + x -> Conv2D:[filters = 256, kernel_size = 9, strides = 1, padding = 'valid', activation = 'relu', name = 'conv1'] + -> PrimaryCaps_Vector:[capsule_dim = 8, channels = 32, kernel_size = [9,9],strides = [2,2], padding = 'valid', name = 'primarycap_conv2D'] + -> DigitCaps: [num_capsule = 10, capsule_dim = 16, routings = 3, name = 'digitcaps'] + -> ClassCaps:[num_capsule = 10] + -> y ; + +train: + compile: + optimizer = Adam:[lr = 0.001], + loss = margin_loss, + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/DataSets/Image/MNIST/mnist_vector_capsule_auto.nml b/DataSets/Image/MNIST/mnist_vector_capsule_auto.nml new file mode 100644 index 0000000..6e413e7 --- /dev/null +++ b/DataSets/Image/MNIST/mnist_vector_capsule_auto.nml @@ -0,0 +1,31 @@ +oracle("mode") = "vector_capsule" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Image" + -> image: [shape = [28,28], channels = 1] + -> ImageDataGenerator:[rescale = 0.00392156862745098] ; + output: + y ~ from "Label" + -> flat: [10] + -> FlatDataGenerator:[] ; + params: + batch_size = 64, + shuffle = True, + shuffle_init = True ; + +architecture: + input: x ~ image: [shape = [28,28], channels = 1] ; + output: y ~ flat: [10] ; + + x -> auto -> y ; + +train: + compile: + optimizer = Adam:[lr = 0.001], + loss = margin_loss, + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/Classification/Image/README.md b/DataSets/Image/README.md similarity index 89% rename from Classification/Image/README.md rename to DataSets/Image/README.md index b94ee32..b446540 100644 --- a/Classification/Image/README.md +++ b/DataSets/Image/README.md @@ -8,6 +8,8 @@ The CIFAR-10 dataset features 60,000 32x32 color images among 10 classes (6,000 The MNIST dataset features 60,000 handwritten digits with 10,000 reserved for test. More information on the datasets and data formats can be found at the links above. +The VOC2012 dataset is an example dataset for training a Single Shot MultiBox Detector model for drawing bounding boxes around objects and classifying them. + # Tutorial Videos and Guides Tutorial videos are available in the *Tutorials & Guides* section of the [DimensionalMechanics™ Developer Portal](https://dimensionalmechanics.com/ai-developer-portal) diff --git a/DataSets/Image/VOC2012/.gitignore b/DataSets/Image/VOC2012/.gitignore new file mode 100644 index 0000000..91e6ae4 --- /dev/null +++ b/DataSets/Image/VOC2012/.gitignore @@ -0,0 +1,3 @@ +VOCdevkit/ +raw_data/ +pre_trained_model/ diff --git a/DataSets/Image/VOC2012/README.md b/DataSets/Image/VOC2012/README.md new file mode 100644 index 0000000..dde79f2 --- /dev/null +++ b/DataSets/Image/VOC2012/README.md @@ -0,0 +1,137 @@ +# Introduction +These sample .nml files are for training a Single Shot MultiBox Detector model using image data in [NeoPulse™ AI Studio](https://aws.amazon.com/marketplace/pp/B074NDG36S/ref=vdr_rf). + +# Model Structure +The SSD model structure uses VGG-16 as base model. As shown in figure 1, it takes conv4_3, fc7, conv6_2, conv7_2, conv8_2, and conv9_2 feature layers to predict both location and class confidences. + + +![Fig. 1](../../../assets/Picture1.png "Fig. 1: SSD model with VGG-16 as base. Feature Layer conv4_3, fc7, conv6_2, conv7_2, conv8_2 and conv9_2 are used to predict location and confidence.") +**Fig.1 SSD model with VGG-16 as base. Feature Layer conv4_3, fc7, conv6_2, conv7_2, conv8_2 and conv9_2 are used to predict location and confidence.** + +In predicting process, each feature layer is feed into RPN(Regional Proposal Network) to predict class confidence and location coordinates. + +![Fig. 2](../../../assets/Picture2.png "Fig. 2: RPN structure") +**Fig. 2: RPN structure** + + A set of Anchor Boxes are predefined for each feature layer. Each point in feature map is defined as anchor point. 4 or 6 different default boxes(different ratios and scales) are defined around each anchor point. Classes and location coordinates are predicted based on each anchor box. + +![Fig. 3](../../../assets/Picture3.png "Fig. 3: 4 different default boxes are defind around one anchor point") + +**Fig. 3: 4 different default boxes are defind around one anchor point** + +For example, conv6-2 RPN is defined as below in .nml: + +``` + conv6_2 + -> Conv2D:[126, [3,3], padding = "same", kernel_initializer = "he_normal", kernel_regularizer = l2:[0.0005], name = "conv6_2_mbox_conf"] + -> Reshape: [[-1, 21]] + -> classes_conv6_2 + + + conv6_2 + -> Conv2D:[24, [3,3], padding = "same", kernel_initializer = "he_normal", kernel_regularizer = l2:[0.0005], name = "conv6_2_mbox_loc"] + -> boxes_conv6_2 + + + boxes_conv6_2 + -> AnchorBoxes: [300, 300, this_scale = 0.37, next_scale = 0.54, aspect_ratios = [1.0, 2.0, 0.5, 3.0, 0.33], this_steps = 32, this_offsets = 0.5, name = "conv6_2_mbox_priorbox"] + -> anchors_conv6_2 + + + boxes_conv6_2 + -> Reshape: [[-1,4]] + -> boxes_conv6_2 + +``` + +In details, Confidence RPN is implemented by 3 * 3 convolution layer. 6 default boxes are defined around each anchor point, and each box is classified into 21 classes (20 onject classes + background), so convolutional layer has 6 * 21 filters. After convolutional layer, prediction results are reshaped as (21,). + +``` + conv6_2 + -> Conv2D:[126, [3,3], padding = "same", kernel_initializer = "he_normal", kernel_regularizer = l2:[0.0005], name = "conv6_2_mbox_conf"] + -> Reshape: [[-1, 21]] + -> classes_conv6_2 + +``` + +Box RPN is also implemented by 3 * 3 convolution layer, which has 24 filters. (24 = 4 * 6, 4 location coordinates and 6 default boxes each anchor point). Then, box predictions are reshaped as (4,). + +``` + conv6_2 + -> Conv2D:[24, [3,3], padding = "same", kernel_initializer = "he_normal", kernel_regularizer = l2:[0.0005], name = "conv6_2_mbox_loc"] + -> boxes_conv6_2 + + boxes_conv6_2 + -> Reshape: [[-1,4]] + -> boxes_conv6_2 + +``` + +Anchor boxes labels for training is created by Layer "AnchorBoxes", which uses intermediate results between box convolution and reshape layer as input. + +``` + boxes_conv6_2 + -> AnchorBoxes: [300, 300, this_scale = 0.37, next_scale = 0.54, aspect_ratios = [1.0, 2.0, 0.5, 3.0, 0.33], this_steps = 32, this_offsets = 0.5, name = "conv6_2_mbox_priorbox"] + -> anchors_conv6_2 + +``` + +In the end, all class and box prediction results across all feature layers need to be concatenated into final output. + +``` + + [classes_conv4_3, classes_fc7, classes_conv6_2, classes_conv7_2, classes_conv8_2, classes_conv9_2] + -> Concatenate:[axis = 1] + -> classes_concat + + [boxes_conv4_3, boxes_fc7, boxes_conv6_2, boxes_conv7_2, boxes_conv8_2, boxes_conv9_2] + -> Concatenate: [axis = 1] + -> boxes_concat + + [boxes_concat, classes_concat] + -> Concatenate: [axis = 2] + -> prediction + +``` + + +## Loss +Neopulse support 3 types of loss for Image Detection, SSD loss, Yolo loss, and Focal loss. Users do not need to declare any type of loss. The according loss will be automatically set when users declare typical oracle mode at the beginning of .nml file. + + +# Data +The data for this task can be found at: http://host.robots.ox.ac.uk/pascal/VOC/voc2012/ +To run this example, first you will need to download the raw data and pretrained vgg16 model for the VOC2012 task using the included ```build_csv.py``` script: + +```bash +$ python build_csv.py +``` + +If the script fails, make sure that you have installed all the package dependencies of this script which are: `gzip, os, shutil, pathlib, numpy, requests, imageio, and python-mnist`. + +Missing packages can be installed using pip: +```bash +$ pip install +``` + +Once you've downloaded and pre-processed the data, you can start training using any of the NML scripts provided. To begin training: +```bash +$ neopulse train -p -f /DM-Dash/NeoPulse_Examples/ImageDetection/ssd/VOC2012/ssd300.nml +``` +The paths in the NML scripts in this directory assume that you have cloned this repository into the /DM-Dash directory of your machine. If you have put it somewhere else, you'll need to move the NML files into a location under the /DM-Dash directory, and change the path in the line: +```bash +bind = "/DM-Dash/NeoPulse_Examples/ImageDetection/ssd/VOC2012/training_data.csv" ; +``` + +# Tutorial Files +**build_csv.py:** Script creates list of training files and writes training full image paths and corresponding labels to a training CSV file. + +**ssd300.nml:** VGG Based detector runs on 300 * 300 Image. + +# Tutorial Videos and Guides +Tutorial videos are available in the *Tutorials & Guides* section of the [DimensionalMechanics™ Developer Portal](https://dimensionalmechanics.com/ai-developer-portal) + +For more information on using the ImageDataGenerator visit the [Data section] of the NeoPulse™ AI Studio Documentation(https://docs.neopulse.ai/NML-source/#data) + +# License +Tutorial materials are published under the MIT license. See license for commercial, academic, and personal use. diff --git a/DataSets/Image/VOC2012/build_csv.py b/DataSets/Image/VOC2012/build_csv.py new file mode 100644 index 0000000..0352477 --- /dev/null +++ b/DataSets/Image/VOC2012/build_csv.py @@ -0,0 +1,99 @@ +import gzip +import shutil +from pathlib import Path +import tarfile +import os + +import numpy as np +import requests +from imageio import imwrite +from xml.etree import ElementTree +import json + + +def download_data(): + ''' + Check if raw VOC2012 data and VGG pre_trained model are present. If not, download VOC2012 data from the official site. + ''' + + Path('raw_data').mkdir(parents=True, exist_ok=True) + + URL_voc = 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/' + f = 'VOCtrainval_11-May-2012.tar' + if not Path('raw_data/' + f).is_file(): + r = requests.get(URL_voc + f, stream=True) + with open('raw_data/' + f, 'wb') as f_z: + shutil.copyfileobj(r.raw, f_z) + tarfile.open('raw_data/' + f).extractall() + + Path('pre_trained_model').mkdir(parents=True, exist_ok=True) + URL_vgg = 'https://drive.google.com/uc?authuser=0&id=1sBmajn6vOE7qJ8GnxUJt4fGPuffVUZox&export=download' + f_vgg = 'vgg_16.h5' + if not Path('pre_trained_model/' + f).is_file(): + r_vgg = requests.get(URL_vgg + f_vgg, stream=True) + with open('pre_trained_model/' + f_vgg, 'wb') as f_k: + shutil.copyfileobj(r_vgg.raw, f_k) + +def write_csv_file(): + ''' + Write absolute path to image files and bounding box labels to training_data.csv. + ''' + + xml_path = Path("VOCdevkit/VOC2012/Annotations/") + image_folder = Path("VOCdevkit/VOC2012/JPEGImages/") + + label2id = {"aeroplane" : 0, + "bicycle" : 1, + "bird" : 2, + "boat" : 3, + "bottle" : 4, + "bus" : 5, + "car" : 6, + "cat" : 7, + "chair" : 8, + "cow" : 9, + "diningtable" :10, + "dog" : 11, + "horse" : 12, + "motorbike" : 13, + "person" : 14, + "pottedplant" : 15, + "sheep" : 16, + "sofa" : 17, + "train" : 18, + "tvmonitor" : 19} + + with open('training_data.csv', 'w') as of: + of.write('image,label\n') + filenames = os.listdir(str(xml_path)) + for index, filename in enumerate(filenames): + tree = ElementTree.parse(str(xml_path / filename)) + root = tree.getroot() + bounding_boxes = [] + size_tree = root.find('size') + width = float(size_tree.find('width').text) + height = float(size_tree.find('height').text) + for object_tree in root.findall('object'): + for bounding_box in object_tree.iter('bndbox'): + xmin = float(bounding_box.find('xmin').text)/width + ymin = float(bounding_box.find('ymin').text)/height + xmax = float(bounding_box.find('xmax').text)/width + ymax = float(bounding_box.find('ymax').text)/height + + class_name = object_tree.find('name').text + class_id = label2id[class_name] + bounding_box = [xmin, ymin, xmax, ymax, class_id] + bounding_boxes.append(bounding_box) + image_name = root.find('filename').text + image_path = str((image_folder / image_name).resolve()) + jstring = json.dumps(bounding_boxes) + of.write(image_path + ",\"" + jstring + "\"\n") + + +if __name__ == '__main__': + + # Download data if necessary + download_data() + + # create a csv file for NeoPulse AI Studio + write_csv_file() diff --git a/DataSets/Image/VOC2012/ssd300.nml b/DataSets/Image/VOC2012/ssd300.nml new file mode 100644 index 0000000..6ee8e80 --- /dev/null +++ b/DataSets/Image/VOC2012/ssd300.nml @@ -0,0 +1,244 @@ +oracle("mode") = "SSD" + +num_classes = 20 +class_names = ["aeroplane", + "bicycle", + "bird", + "boat", + "bottle", + "bus", + "car", + "cat", + "chair", + "cow", + "diningtable", + "dog", + "horse", + "motorbike", + "person", + "pottedplant", + "sheep", + "sofa", + "train", + "tvmonitor"] + + +architecture from "/DM-Dash/Neopulse_Examples/ImageDetection/ssd/VOC2012/pre_trained_model/vgg_16.h5": + input: + x ~ image: [shape = [300, 300], channels = 3]; + output: + prediction ~ bbox: [num_classes = 20]; + + + x -> Conv2D:[64, [3,3], strides = [1,1], activation = 'relu', padding = "same", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv1_1'] + -> Conv2D:[64, [3,3], strides = [1,1], activation = 'relu', padding = "same", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv1_2'] + -> MaxPooling2D:[pool_size = [2,2], strides = [2,2], padding = "same", name = 'pool1'] + + + -> Conv2D:[128, [3,3], strides = [1,1], activation = 'relu', padding = "same", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv2_1'] + -> Conv2D:[128, [3,3], strides = [1,1], activation = 'relu', padding = "same", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv2_2'] + -> MaxPooling2D:[pool_size = [2,2], strides = [2,2], padding = "same", name = 'pool2'] + + + -> Conv2D:[256, [3,3], strides = [1,1], activation = 'relu', padding = "same", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv3_1'] + -> Conv2D:[256, [3,3], strides = [1,1], activation = 'relu', padding = "same", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv3_2'] + -> Conv2D:[256, [3,3], strides = [1,1], activation = 'relu', padding = "same", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv3_3'] + -> MaxPooling2D:[pool_size = [2,2], strides = [2,2], padding = "same", name = 'pool3'] + + + -> Conv2D:[512, [3,3], strides = [1,1], activation = 'relu', padding = "same", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv4_1'] + -> Conv2D:[512, [3,3], strides = [1,1], activation = 'relu', padding = "same", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv4_2'] + -> Conv2D:[512, [3,3], strides = [1,1], activation = 'relu', padding = "same", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv4_3'] + -> conv4_3 + + + conv4_3 + -> MaxPooling2D:[pool_size = [2,2], strides = [2,2], padding = "same", name = 'pool4'] + + + -> Conv2D:[512, [3,3], strides = [1,1], activation = 'relu', padding = "same", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv5_1'] + -> Conv2D:[512, [3,3], strides = [1,1], activation = 'relu', padding = "same", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv5_2'] + -> Conv2D:[512, [3,3], strides = [1,1], activation = 'relu', padding = "same", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv5_3'] + -> MaxPooling2D:[pool_size = [3,3], strides = [1,1], padding = "same", name = 'pool5'] + + + -> Conv2D:[1024, [3,3], dilation_rate= [6,6], strides = [1,1], activation = 'relu', padding = "same", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'fc6'] + + + -> Conv2D:[1024, [1,1], strides = [1,1], activation = 'relu', padding = "same", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'fc7'] + -> fc7 + + + fc7 + -> Conv2D:[256, [1,1], strides = [1,1], activation = 'relu', padding = "same", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv6_1'] + -> ZeroPadding2D:[padding = [[1,1],[1,1]], name = 'conv6_padding'] + -> Conv2D:[512, [3,3], strides = [2,2], activation = 'relu', padding = "valid", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv6_2'] + -> conv6_2 + + + conv6_2 + -> Conv2D:[128, [1,1], strides = [1,1], activation = 'relu', padding = "same", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv7_1'] + -> ZeroPadding2D:[padding = [[1,1],[1,1]], name = 'conv7_padding'] + -> Conv2D:[256, [3,3], strides = [2,2], activation = 'relu', padding = "valid", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv7_2'] + -> conv7_2 + + + conv7_2 + -> Conv2D:[128, [1,1], strides = [1,1], activation = 'relu', padding = "same", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv8_1'] + -> Conv2D:[256, [3,3], strides = [1,1], activation = 'relu', padding = "valid", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv8_2'] + -> conv8_2 + + + conv8_2 + -> Conv2D:[128, [1,1], strides = [1,1], activation = 'relu', padding = "same", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv9_1'] + -> Conv2D:[256, [3,3], strides = [1,1], activation = 'relu', padding = "valid", kernel_initializer = 'he_normal', kernel_regularizer = l2:[0.0005], name = 'conv9_2'] + -> conv9_2 + + + conv4_3 + -> L2Normalization: [gamma_init = 20, name = 'conv4_3_norm'] + -> conv4_3_norm + + + conv4_3_norm + -> Conv2D:[84, [3,3], padding = "same", kernel_initializer = "he_normal", kernel_regularizer = l2:[0.0005], name ="conv4_3_norm_mbox_conf"] + -> Reshape: [[-1, 21]] + -> classes_conv4_3 + + fc7 + -> Conv2D:[126, [3,3], padding = "same", kernel_initializer = "he_normal", kernel_regularizer = l2:[0.0005], name = "fc7_mbox_conf"] + -> Reshape: [[-1, 21]] + -> classes_fc7 + + conv6_2 + -> Conv2D:[126, [3,3], padding = "same", kernel_initializer = "he_normal", kernel_regularizer = l2:[0.0005], name = "conv6_2_mbox_conf"] + -> Reshape: [[-1, 21]] + -> classes_conv6_2 + + conv7_2 + -> Conv2D:[126, [3,3], padding = "same", kernel_initializer = "he_normal", kernel_regularizer = l2:[0.0005], name = "conv7_2_mbox_conf"] + -> Reshape: [[-1, 21]] + -> classes_conv7_2 + + conv8_2 + -> Conv2D:[84, [3,3], padding = "same", kernel_initializer = "he_normal", kernel_regularizer = l2:[0.0005], name = "conv8_2_mbox_conf"] + -> Reshape: [[-1, 21]] + -> classes_conv8_2 + + conv9_2 + -> Conv2D:[84, [3,3], padding = "same", kernel_initializer = "he_normal", kernel_regularizer = l2:[0.0005], name = "conv9_2_mbox_conf"] + -> Reshape: [[-1, 21]] + -> classes_conv9_2 + + [classes_conv4_3, classes_fc7, classes_conv6_2, classes_conv7_2, classes_conv8_2, classes_conv9_2] + -> Concatenate:[axis = 1] + -> classes_concat + + + + conv4_3_norm + -> Conv2D:[16, [3,3], padding = "same", kernel_initializer = "he_normal", kernel_regularizer = l2:[0.0005], name ="conv4_3_norm_mbox_loc"] + -> boxes_conv4_3 + + fc7 + -> Conv2D:[24, [3,3], padding = "same", kernel_initializer = "he_normal", kernel_regularizer = l2:[0.0005], name = "fc7_mbox_loc"] + -> boxes_fc7 + + conv6_2 + -> Conv2D:[24, [3,3], padding = "same", kernel_initializer = "he_normal", kernel_regularizer = l2:[0.0005], name = "conv6_2_mbox_loc"] + -> boxes_conv6_2 + + conv7_2 + -> Conv2D:[24, [3,3], padding = "same", kernel_initializer = "he_normal", kernel_regularizer = l2:[0.0005], name = "conv7_2_mbox_loc"] + -> boxes_conv7_2 + + conv8_2 + -> Conv2D:[16, [3,3], padding = "same", kernel_initializer = "he_normal", kernel_regularizer = l2:[0.0005], name = "conv8_2_mbox_loc"] + -> boxes_conv8_2 + + conv9_2 + -> Conv2D:[16, [3,3], padding = "same", kernel_initializer = "he_normal", kernel_regularizer = l2:[0.0005], name = "conv9_2_mbox_loc"] + -> boxes_conv9_2 + + + boxes_conv4_3 + -> AnchorBoxes: [300, 300, this_scale = 0.1, next_scale = 0.2, aspect_ratios = [1.0, 2.0, 0.5], this_steps = 8, this_offsets = 0.5, name = "conv4_3_mbox_priorbox"] + -> Reshape: [[-1,8]] + -> anchors_conv4_3 + + boxes_fc7 + -> AnchorBoxes: [300, 300, this_scale = 0.2, next_scale = 0.37, aspect_ratios = [1.0, 2.0, 0.5, 3.0, 0.33], this_steps = 16, this_offsets = 0.5, name = "fc7_mbox_priorbox"] + -> anchors_fc7 + + boxes_conv6_2 + -> AnchorBoxes: [300, 300, this_scale = 0.37, next_scale = 0.54, aspect_ratios = [1.0, 2.0, 0.5, 3.0, 0.33], this_steps = 32, this_offsets = 0.5, name = "conv6_2_mbox_priorbox"] + -> anchors_conv6_2 + + boxes_conv7_2 + -> AnchorBoxes: [300, 300, this_scale = 0.54, next_scale = 0.71, aspect_ratios = [1.0, 2.0, 0.5, 3.0, 0.33], this_steps = 64, this_offsets = 0.5, name = "conv7_2_mbox_priorbox"] + -> anchors_conv7_2 + + boxes_conv8_2 + -> AnchorBoxes: [300, 300, this_scale = 0.71, next_scale = 0.88, aspect_ratios = [1.0, 2.0, 0.5], this_steps = 100, this_offsets = 0.5, name = "conv8_2_mbox_priorbox"] + -> anchors_conv8_2 + + boxes_conv9_2 + -> AnchorBoxes: [300, 300, this_scale = 0.88, next_scale = 1.05, aspect_ratios = [1.0, 2.0, 0.5], this_steps = 300, this_offsets = 0.5, name = "conv9_2_mbox_priorbox"] + -> anchors_conv9_2 + + + boxes_conv4_3 + -> Reshape: [[-1,4]] + -> boxes_conv4_3 + + boxes_fc7 + -> Reshape: [[-1,4]] + -> boxes_fc7 + + boxes_conv6_2 + -> Reshape: [[-1,4]] + -> boxes_conv6_2 + + boxes_conv7_2 + -> Reshape: [[-1,4]] + -> boxes_conv7_2 + + boxes_conv8_2 + -> Reshape: [[-1,4]] + -> boxes_conv8_2 + + boxes_conv9_2 + -> Reshape: [[-1,4]] + -> boxes_conv9_2 + + [boxes_conv4_3, boxes_fc7, boxes_conv6_2, boxes_conv7_2, boxes_conv8_2, boxes_conv9_2] + -> Concatenate: [axis = 1] + -> boxes_concat + + + [boxes_concat, classes_concat] + -> Concatenate: [axis = 2] + -> prediction; + +source: + bind = "/DM-Dash/Neopulse_Examples/ImageDetection/ssd/VOC2012/training_data.csv"; + input: + img ~ from "image" + -> image: [shape = [300, 300], channels = 3] + -> ImageDataGenerator:[]; + output: + label ~ from "label" + -> bbox: [] + -> BBoxDataGenerator:[num_classes = 20]; + params: + batch_size = 16, + shuffle = True, + shuffle_init = True; + +train: + compile: + optimizer = Adam:[lr = 0.001, beta_1 = 0.9, beta_2 = 0.999, epsilon = 0.0001, decay = 0.0]; + run: + epochs = 2; + dashboard: ; + diff --git a/DataSets/Text/Chinese_news/README.md b/DataSets/Text/Chinese_news/README.md new file mode 100644 index 0000000..61e1674 --- /dev/null +++ b/DataSets/Text/Chinese_news/README.md @@ -0,0 +1,37 @@ +# Introduction +These sample .nml files are for training a classification model using Text data in [NeoPulse™ AI Studio](https://aws.amazon.com/marketplace/pp/B074NDG36S/ref=vdr_rf). Neopulse text model support multiple languages, like Chinese, Japanese, Korean etc. This examples classify Chinese news into 10 classes, "sports":0, "science and technology": 1, "stocks": 2, "entertainment": 3, "politics": 4, "society": 5, "education": 6, "finance": 7, "house and home": 8, "games": 9 + +# Data +The data for this task can be found at: http://thuctc.thunlp.org/ +To run this example, first you will need to download and pre-process the raw data for the task using the included ```build_csv.py``` script: + +```bash +$ python build_csv.py +``` + +If the script fails, make sure that you have installed all the package dependencies of this script which are: `shutil, tarfile, pathlib, pandas, requests, natsort, and sklearn`. + +Missing packages can be installed using pip: +```bash +$ pip install +``` + +Once you've downloaded and pre-processed the data, you can start training using any of the NML scripts provided. To begin training: +```bash +$ neopulse train -p -f /DM-Dash/NeoPulse_Examples/Classification/Text/Chinese_news/cnews.nml +``` +The paths in the NML scripts in this directory assume that you have cloned this repository into the /DM-Dash directory of your machine. If you have put it somewhere else, you'll need to move the NML files into a location under the /DM-Dash directory, and change the path in the line: +```bash +bind = "/DM-Dash/NeoPulse_Examples/Classification/Text/Chinese_news/training_data.csv" ; +``` + +# Tutorial Files +**build_csv.py:** Script creates list of training files and writes training full image paths and corresponding labels to a training CSV file. + +# Tutorial Videos and Guides +Tutorial videos are available in the *Tutorials & Guides* section of the [DimensionalMechanics™ Developer Portal](https://dimensionalmechanics.com/ai-developer-portal) + +For more information on using the AudioDataGenerator visit the [Data section] of the NeoPulse™ AI Studio Documentation(https://docs.neopulse.ai/NML-source/#data) + +# License +Tutorial materials are published under the MIT license. See license for commercial, academic, and personal use. diff --git a/DataSets/Text/Chinese_news/build_csv.py b/DataSets/Text/Chinese_news/build_csv.py new file mode 100644 index 0000000..59375c3 --- /dev/null +++ b/DataSets/Text/Chinese_news/build_csv.py @@ -0,0 +1,27 @@ +import shutil +import tarfile +from pathlib import Path + +import pandas as pd +import requests +from natsort import humansorted +from sklearn.datasets import load_files + + +def download_data(): + ''' + Check if raw IMDB data is present. If not, download data from the official site. + ''' + + URL = 'https://drive.google.com/uc?authuser=0&id=1Jg3EcJEB48-B_dGeOGMoLzJg_HjSnY67&export=download' + if not Path('training_data.csv').is_file(): + r = requests.get(URL, stream=True) + with open('training_data.csv', 'wb') as f_z: + shutil.copyfileobj(r.raw, f_z) + + + +if __name__ == "__main__": + + download_data() + diff --git a/DataSets/Text/Chinese_news/cnews.nml b/DataSets/Text/Chinese_news/cnews.nml new file mode 100644 index 0000000..36bc2f0 --- /dev/null +++ b/DataSets/Text/Chinese_news/cnews.nml @@ -0,0 +1,41 @@ +source: + bind = "/DM-Dash/Neopulse_Examples/Classification/Text/Chinese_news/training_data.csv" ; + input: + x ~ from "news" + -> text: [300] + -> TextDataGenerator: [nb_words=5001,char_level=True] ; + output: + y ~ from "label" + -> flat: [10] + -> FlatDataGenerator: [] ; + params: + validation_split = 0.2, + batch_size = 32; + +architecture: + input: x ~ text: [300] ; + output: y ~ flat: [10] ; + + x -> Embedding: [5001, 64, input_length=300] + -> Conv1D: [256,3, padding='same', strides = 1, activation='relu'] + -> MaxPooling1D: [pool_size=3] + -> Conv1D: [128,3, padding='same', strides = 1, activation='relu'] + -> MaxPooling1D: [pool_size=3] + -> Conv1D: [64,3, padding='same', strides = 1, activation='relu'] + -> Flatten: [] + -> Dropout: [0.1] + -> BatchNormalization: [] + -> Dense: [256, activation='relu'] + -> Dropout: [0.1] + -> Dense: [10, activation='softmax'] + -> y ; + +train: + compile: + optimizer = Adam:[0.001], + loss = 'categorical_crossentropy', + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: + save_on = 'val_acc' ; \ No newline at end of file diff --git a/SageMaker/Sentiment/.gitignore b/DataSets/Text/Sentiment/.gitignore similarity index 100% rename from SageMaker/Sentiment/.gitignore rename to DataSets/Text/Sentiment/.gitignore index 2879cbd..cb2b32c 100644 --- a/SageMaker/Sentiment/.gitignore +++ b/DataSets/Text/Sentiment/.gitignore @@ -1,3 +1,3 @@ -raw_data/ aclImdb/ +raw_data/ training_data.csv diff --git a/Classification/Text/Sentiment/README.md b/DataSets/Text/Sentiment/README.md similarity index 100% rename from Classification/Text/Sentiment/README.md rename to DataSets/Text/Sentiment/README.md diff --git a/Classification/Text/Sentiment/build_csv.py b/DataSets/Text/Sentiment/build_csv.py similarity index 100% rename from Classification/Text/Sentiment/build_csv.py rename to DataSets/Text/Sentiment/build_csv.py diff --git a/Classification/Text/Sentiment/sentiment_call_auto.nml b/DataSets/Text/Sentiment/sentiment_call_auto.nml similarity index 91% rename from Classification/Text/Sentiment/sentiment_call_auto.nml rename to DataSets/Text/Sentiment/sentiment_call_auto.nml index 0a1e39e..520a207 100644 --- a/Classification/Text/Sentiment/sentiment_call_auto.nml +++ b/DataSets/Text/Sentiment/sentiment_call_auto.nml @@ -3,7 +3,7 @@ oracle("complexity") = 0.1 oracle("regularization") = 0.99 source: - bind = "/DM-Dash/examples/sentiment/data.csv" ; + bind = "training_data.csv" ; input: x ~ from "Review" -> text: [200] @@ -34,5 +34,5 @@ train: loss = 'categorical_crossentropy', metrics = ['accuracy'] ; run: - epochs = 4 ; + epochs = 2 ; dashboard: ; diff --git a/SageMaker/Sentiment/sentiment_choice_auto.nml b/DataSets/Text/Sentiment/sentiment_choice_auto.nml similarity index 87% rename from SageMaker/Sentiment/sentiment_choice_auto.nml rename to DataSets/Text/Sentiment/sentiment_choice_auto.nml index 00aee12..05d952d 100644 --- a/SageMaker/Sentiment/sentiment_choice_auto.nml +++ b/DataSets/Text/Sentiment/sentiment_choice_auto.nml @@ -1,4 +1,4 @@ -oracle("generated") = 1 +oracle("generated") = 2 oracle("complexity") = 0.1 oracle("regularization") = 0.99 @@ -14,7 +14,8 @@ source: -> FlatDataGenerator: [] ; params: validation_split = 0.5, - batch_size = 1250 ; + batch_size = 64, + shuffle_init = False; architecture: input: x ~ text: [200] ; @@ -34,6 +35,5 @@ train: loss = 'categorical_crossentropy', metrics = ['accuracy'] ; run: - epochs = 4 ; - dashboard: - save_on = 'val_acc' ; + epochs = 2 ; + dashboard: ; diff --git a/Classification/Text/Sentiment/sentiment_dist_auto.nml b/DataSets/Text/Sentiment/sentiment_dist_auto.nml similarity index 87% rename from Classification/Text/Sentiment/sentiment_dist_auto.nml rename to DataSets/Text/Sentiment/sentiment_dist_auto.nml index 272ba3b..557c39b 100644 --- a/Classification/Text/Sentiment/sentiment_dist_auto.nml +++ b/DataSets/Text/Sentiment/sentiment_dist_auto.nml @@ -3,7 +3,7 @@ oracle("complexity") = 0.1 oracle("regularization") = 0.99 source: - bind = "/DM-Dash/examples/sentiment/data.csv" ; + bind = "training_data.csv" ; input: x ~ from "Review" -> text: [200] @@ -25,7 +25,8 @@ architecture: -> Convolution1D: [64, 4] -> MaxPooling1D: [pool_size=4] -> LSTM: [128] - -> Dense: [2, activation = 'softmax'] -> y ; + -> Dense: [2, activation = 'softmax'] + -> y ; train: compile: @@ -33,5 +34,5 @@ train: loss = 'categorical_crossentropy', metrics = ['accuracy'] ; run: - epochs = 4 ; + epochs = 2 ; dashboard: ; diff --git a/Classification/Text/Sentiment/sentiment_full_auto.nml b/DataSets/Text/Sentiment/sentiment_full_auto.nml similarity index 88% rename from Classification/Text/Sentiment/sentiment_full_auto.nml rename to DataSets/Text/Sentiment/sentiment_full_auto.nml index 5829e4b..2d1cd5c 100644 --- a/Classification/Text/Sentiment/sentiment_full_auto.nml +++ b/DataSets/Text/Sentiment/sentiment_full_auto.nml @@ -2,7 +2,7 @@ oracle("mode") = "classification" oracle("complexity") = 0.1 source: - bind = "/DM-Dash/examples/sentiment/data.csv" ; + bind = "training_data.csv" ; input: x ~ from "Review" -> text: [200] @@ -26,5 +26,5 @@ train: loss = auto, metrics = ['accuracy'] ; run: - epochs = 4 ; + epochs = 2 ; dashboard: ; diff --git a/Classification/Text/Sentiment/sentiment_multi-GPU.nml b/DataSets/Text/Sentiment/sentiment_multi-GPU.nml similarity index 92% rename from Classification/Text/Sentiment/sentiment_multi-GPU.nml rename to DataSets/Text/Sentiment/sentiment_multi-GPU.nml index 26487ac..2b6d5f2 100644 --- a/Classification/Text/Sentiment/sentiment_multi-GPU.nml +++ b/DataSets/Text/Sentiment/sentiment_multi-GPU.nml @@ -3,7 +3,7 @@ oracle("complexity") = 0.1 oracle("regularization") = 0.99 source: - bind = "/DM-Dash/dsl/sentiment/data.csv" ; + bind = "training_data.csv" ; input: x ~ from "Review" -> text: [200] @@ -34,5 +34,5 @@ train Ngpu 2: loss = 'categorical_crossentropy', metrics = ['accuracy'] ; run: - epochs = 4 ; + epochs = 2 ; dashboard: ; diff --git a/Classification/Video/HumanAction/.gitignore b/DataSets/Video/HumanAction/.gitignore similarity index 100% rename from Classification/Video/HumanAction/.gitignore rename to DataSets/Video/HumanAction/.gitignore diff --git a/Classification/Video/HumanAction/README.md b/DataSets/Video/HumanAction/README.md similarity index 100% rename from Classification/Video/HumanAction/README.md rename to DataSets/Video/HumanAction/README.md diff --git a/Classification/Video/HumanAction/build_csv.py b/DataSets/Video/HumanAction/build_csv.py similarity index 97% rename from Classification/Video/HumanAction/build_csv.py rename to DataSets/Video/HumanAction/build_csv.py index 5e6cf9b..cc9f04b 100644 --- a/Classification/Video/HumanAction/build_csv.py +++ b/DataSets/Video/HumanAction/build_csv.py @@ -4,6 +4,7 @@ from zipfile import ZipFile import requests + from natsort import humansorted @@ -47,7 +48,7 @@ def build_list(data_path, validation_split): for c, p in enumerate(class_paths): line_list = [] for f in Path(p).iterdir(): - line_list.append(str(f.absolute()) + ',' + str(c) + '\n') + line_list.append(str(f) + ',' + str(c) + '\n') shuffle(line_list) split_index = int(validation_split * len(line_list)) diff --git a/DataSets/Video/HumanAction/video_class.nml b/DataSets/Video/HumanAction/video_class.nml new file mode 100644 index 0000000..4bff29d --- /dev/null +++ b/DataSets/Video/HumanAction/video_class.nml @@ -0,0 +1,45 @@ +source: + bind = "training_data.csv" ; + input: + x ~ from "Video" + -> video: [shape=[80, 80], channels=3, seqlength=32] + -> ImageDataGenerator: [] ; + output: + y ~ from "Class" + -> flat: [6] + -> FlatDataGenerator: [] ; + params: + number_validation = 119, + batch_size = 2 ; + +architecture: + input: x ~ video: [shape=[80, 80], channels=3, seqlength=32] ; + output: y ~ flat: [6] ; + + x -> TimeDistributed: [Conv2D: [32, [3,3], kernel_initializer="he_normal", activation='relu'], input_shape=[32, 80, 80, 3]] + -> TimeDistributed: [Conv2D: [32, [3,3], kernel_initializer="he_normal", activation='relu']] + -> TimeDistributed: [MaxPooling2D: []] + -> TimeDistributed: [Conv2D: [48, [3,3], kernel_initializer="he_normal", activation='relu']] + -> TimeDistributed: [Conv2D: [48, [3,3], kernel_initializer="he_normal", activation='relu']] + -> TimeDistributed: [MaxPooling2D: []] + -> TimeDistributed: [Conv2D: [64, [3,3], kernel_initializer="he_normal", activation='relu']] + -> TimeDistributed: [Conv2D: [64, [3,3], kernel_initializer="he_normal", activation='relu']] + -> TimeDistributed: [MaxPooling2D: []] + -> TimeDistributed: [Conv2D: [128, [3,3], kernel_initializer="he_normal", activation='relu']] + -> TimeDistributed: [Conv2D: [128, [3,3], kernel_initializer="he_normal", activation='relu']] + -> TimeDistributed: [Flatten: []] + -> LSTM: [256, return_sequences=True] + -> Flatten:[] + -> Dense: [512, activation='relu'] + -> Dropout: [0.5] + -> Dense: [6, activation='softmax'] + -> y; + +train: + compile: + optimizer = SGD: [lr=0.0001, momentum=0.9], + loss = 'binary_crossentropy', + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/DataSets/Video/HumanAction/video_class_auto.nml b/DataSets/Video/HumanAction/video_class_auto.nml new file mode 100644 index 0000000..5b2ae98 --- /dev/null +++ b/DataSets/Video/HumanAction/video_class_auto.nml @@ -0,0 +1,30 @@ +oracle("mode")= "classification" + +source: + bind = "training_data.csv" ; + input: + x ~ from "Video" + -> video: [shape=[80, 80], channels=3, seqlength=32] + -> ImageDataGenerator: [] ; + output: + y ~ from "Class" + -> flat: [6] + -> FlatDataGenerator: [] ; + params: + number_validation = 119, + batch_size = 2 ; + +architecture: + input: x ~ video: [shape=[80, 80], channels=3, seqlength=32] ; + output: y ~ flat: [6] ; + + x -> auto -> y; + +train: + compile: + optimizer = auto, + loss = auto, + metrics = ['accuracy'] ; + run: + epochs = 2 ; + dashboard: ; diff --git a/GANs/began/MNIST/README.md b/GANs/began/MNIST/README.md new file mode 100644 index 0000000..5667ae3 --- /dev/null +++ b/GANs/began/MNIST/README.md @@ -0,0 +1,41 @@ +# Introduction +These sample .nml files are for training a Boundary Equilibrium gan model using image data in [NeoPulse™ AI Studio](https://aws.amazon.com/marketplace/pp/B074NDG36S/ref=vdr_rf). + +# Data +The data for this task can be found at: http://yann.lecun.com/exdb/mnist/ +To run this example, first you will need to download and pre-process the raw data for the MNIST task using the included ```build_csv.py``` script: + +```bash +$ python build_csv.py +``` + +If the script fails, make sure that you have installed all the package dependencies of this script which are: `gzip, os, shutil, pathlib, numpy, requests, imageio, and python-mnist`. + +Missing packages can be installed using pip: +```bash +$ pip install +``` + +Once you've downloaded and pre-processed the data, you can start training using any of the NML scripts provided. To begin training: +```bash +$ neopulse train -p -f /DM-Dash/NeoPulse_Examples/GANs/began/MNIST/mnist_began.nml +``` +The paths in the NML scripts in this directory assume that you have cloned this repository into the /DM-Dash directory of your machine. If you have put it somewhere else, you'll need to move the NML files into a location under the /DM-Dash directory, and change the path in the line: +```bash +bind = "/DM-Dash/NeoPulse_Examples/GANs/began/MNIST/training_data.csv" ; +``` + +# Tutorial Files +**build_csv.py:** Script creates list of training files and writes training full image paths and a vector of noise to a training CSV file. + +**mnist_began.nml:** Full self-defined architecture without any automation. + +**mnist_began_auto.nml:** Features full use of the auto keyword to automatically generate the entire architecture. + +# Tutorial Videos and Guides +Tutorial videos are available in the *Tutorials & Guides* section of the [DimensionalMechanics™ Developer Portal](https://dimensionalmechanics.com/ai-developer-portal) + +For more information on using the ImageDataGenerator visit the [Data section] of the NeoPulse™ AI Studio Documentation(https://docs.neopulse.ai/NML-source/#data) + +# License +Tutorial materials are published under the MIT license. See license for commercial, academic, and personal use. diff --git a/GANs/began/MNIST/build_csv.py b/GANs/began/MNIST/build_csv.py new file mode 100644 index 0000000..285b90c --- /dev/null +++ b/GANs/began/MNIST/build_csv.py @@ -0,0 +1,87 @@ +import gzip +import shutil +from pathlib import Path + +import numpy as np +import requests +from imageio import imwrite +from mnist import MNIST + + +def download_data(): + ''' + Check if raw MNIST data is present. If not, download MNIST data from the official site. + ''' + + Path('raw_data').mkdir(parents=True, exist_ok=True) + + URL = 'http://yann.lecun.com/exdb/mnist/' + file_list = ['train-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz'] + for f in file_list: + if not Path('raw_data/' + f.replace('.gz', '')).is_file(): + r = requests.get(URL + f, stream=True) + with open('raw_data/' + f, 'wb') as f_z: + shutil.copyfileobj(r.raw, f_z) + with gzip.open('raw_data/' + f, 'rb') as f_z: + with open('raw_data/' + f.replace('.gz', ''), 'wb') as f_u: + shutil.copyfileobj(f_z, f_u) + + +def convert_images(raw): + ''' + Convert images from the MNIST format and return a 4-dim array with + shape: [number_of_images_per_batch, height, width, channel] + The pixel values are integers between 0 and 255. + There are 10000, 28x28 1 channel images per batch, in row major order. + ''' + + return np.reshape(np.array(raw), (-1, 28, 28, 1)).astype('uint8') + + +def write_csv_file(): + ''' + Save images as PNG files (lossless). + Write absolute path to image files and class label to training_data.csv + training_data.csv should be of length 70001, with the first line containing the header. + The test images are written at the end, i.e. the last 10000 lines correspond to the test set. + ''' + + mndata = MNIST('raw_data') + train_img, train_labels = mndata.load_training() + train_images = convert_images(train_img) + test_img, test_labels = mndata.load_testing() + test_images = convert_images(test_img) + + Path('images').mkdir(parents=True, exist_ok=True) + + with open('training_data.csv', 'w') as of: + of.write('image,noise\n') + + for index, image in enumerate(train_images): + img_file = 'images/mnist_train_' + str(index) + '.png' + imwrite(img_file, image) + noise_num = np.random.normal(0, 1, (64)) + noise = "" + for noise_ele in noise_num: + noise += str(noise_ele) + "|" + noise = noise[:-1] + of.write(str(Path(img_file).resolve()) + ',' + noise + '\n') + + for index, image in enumerate(test_images): + img_file = 'images/mnist_test_' + str(index) + '.png' + imwrite(img_file, image) + noise_num = np.random.normal(0, 1, (64)) + noise = "" + for noise_ele in noise_num: + noise += str(noise_ele) + "|" + noise = noise[:-1] + of.write(str(Path(img_file).resolve()) + ',' + noise + '\n') + + +if __name__ == '__main__': + + # Download data if necessary + download_data() + + # Write the data to PNG files, and create a csv file for NeoPulse AI Studio + write_csv_file() diff --git a/GANs/began/MNIST/mnist_began.nml b/GANs/began/MNIST/mnist_began.nml new file mode 100644 index 0000000..d3dbab4 --- /dev/null +++ b/GANs/began/MNIST/mnist_began.nml @@ -0,0 +1,96 @@ +oracle("mode") = "BEGAN" + +gamma = 0.5 + +source: + bind = "/DM-Dash/NeoPulse_Examples/GANs/began/MNIST/training_data.csv"; + input: + x ~ from "image" + -> image: [shape = [28, 28], channels = 1] + -> ImageDataGenerator: [], + z ~ from "noise" + -> flat: [64] + -> FlatDataGenerator: []; + params: + batch_size = 128, + shuffle = True, + shuffle_init = True; + +architecture name:generator: + input: + z ~ flat: [64]; + output: + g ~ image: [shape = [28, 28], channels = 1]; + + z -> Dense: [6272] + -> Activation:['elu'] + -> Reshape: [[7,7,128]] + -> Conv2D: [128, kernel_size = 3, padding = "same"] + -> Activation: ['elu'] + -> Conv2D: [128, kernel_size = 3, padding = "same"] + -> Activation: ['elu'] + -> UpSampling2D: [[2,2]] + -> Conv2D: [128, kernel_size = 3, padding = "same"] + -> Activation: ['elu'] + -> Conv2D: [128, kernel_size = 3, padding = "same"] + -> Activation: ['elu'] + -> UpSampling2D: [[2,2]] + -> Conv2D: [128, kernel_size = 3, padding = "same"] + -> Activation: ['elu'] + -> Conv2D: [128, kernel_size = 3, padding = "same"] + -> Activation: ['elu'] + -> Conv2D: [1, kernel_size = 3, padding = "same"] + -> Activation: ['elu'] + -> g; + +architecture name: discriminator: + input: + img ~ image: [shape = [28, 28], channels = 1]; + output: + d ~ image: [shape = [28, 28], channels = 1]; + + img -> Conv2D: [1, kernel_size = 3, padding = "same"] + -> Activation: ['elu'] + -> Conv2D: [128, kernel_size = 3, padding = "same"] + -> Activation: ['elu'] + -> Conv2D: [128, kernel_size = 3, strides = 2, padding = "same"] + -> Activation: ['elu'] + -> Conv2D: [256, kernel_size = 3, padding = "same"] + -> Activation: ['elu'] + -> Conv2D: [256, kernel_size = 3, strides = 2, padding = "same"] + -> Activation: ['elu'] + -> Conv2D: [384, kernel_size = 3, padding = "same"] + -> Activation: ['elu'] + -> Conv2D: [384, kernel_size = 3, padding = "same"] + -> Activation: ['elu'] + -> Flatten: [] + -> Dense: [128] + -> Dense: [6272] + -> Activation: ['elu'] + -> Reshape: [[7,7,128]] + -> Conv2D: [128, kernel_size = 3, padding = "same"] + -> Activation: ['elu'] + -> Conv2D: [128, kernel_size = 3, padding = "same"] + -> Activation: ['elu'] + -> UpSampling2D: [[2,2]] + -> Conv2D: [128, kernel_size = 3, padding = "same"] + -> Activation: ['elu'] + -> Conv2D: [128, kernel_size = 3, padding = "same"] + -> Activation: ['elu'] + -> UpSampling2D: [[2,2]] + -> Conv2D: [128, kernel_size = 3, padding = "same"] + -> Activation: ['elu'] + -> Conv2D: [128, kernel_size = 3, padding = "same"] + -> Activation: ['elu'] + -> Conv2D: [1, kernel_size = 3, padding = "same"] + -> Activation: ['elu'] + -> d; + +train : + compile: + optimizer = Adam: [0.00005], + loss = l1loss; + run: + epochs = 2; + dashboard: ; + diff --git a/GANs/began/MNIST/mnist_began_auto.nml b/GANs/began/MNIST/mnist_began_auto.nml new file mode 100644 index 0000000..5d63ad4 --- /dev/null +++ b/GANs/began/MNIST/mnist_began_auto.nml @@ -0,0 +1,42 @@ +oracle("mode") = "BEGAN" + +gamma = 0.5 + +source: + bind = "/DM-Dash/NeoPulse_Examples/GANs/began/MNIST/training_data.csv"; + input: + x ~ from "image" + -> image: [shape = [28, 28], channels = 1] + -> ImageDataGenerator: [], + z ~ from "noise" + -> flat: [64] + -> FlatDataGenerator: []; + params: + batch_size = 128, + shuffle = True, + shuffle_init = True; + +architecture name:generator: + input: + z ~ flat: [64]; + output: + g ~ image: [shape = [28, 28], channels = 1]; + + z -> auto -> g; + +architecture name: discriminator: + input: + img ~ image: [shape = [28, 28], channels = 1]; + output: + d ~ image: [shape = [28, 28], channels = 1]; + + img -> auto -> d; + +train : + compile: + optimizer = auto, + loss = auto; + run: + epochs = 2; + dashboard: ; + diff --git a/GANs/cgan/MNIST/README.md b/GANs/cgan/MNIST/README.md new file mode 100644 index 0000000..a8c2905 --- /dev/null +++ b/GANs/cgan/MNIST/README.md @@ -0,0 +1,39 @@ +# Introduction +These sample .nml files are for training a Conditional gan model using image data in [NeoPulse™ AI Studio](https://aws.amazon.com/marketplace/pp/B074NDG36S/ref=vdr_rf). + +# Data +The data for this task can be found at: http://yann.lecun.com/exdb/mnist/ +To run this example, first you will need to download and pre-process the raw data for the MNIST task using the included ```build_csv.py``` script: + +```bash +$ python build_csv.py +``` + +If the script fails, make sure that you have installed all the package dependencies of this script which are: `gzip, os, shutil, pathlib, numpy, requests, imageio, and python-mnist`. + +Missing packages can be installed using pip: +```bash +$ pip install +``` + +Once you've downloaded and pre-processed the data, you can start training using any of the NML scripts provided. To begin training: +```bash +$ neopulse train -p -f /DM-Dash/NeoPulse_Examples/GANs/cgan/MNIST/mnist_cgan.nml +``` +The paths in the NML scripts in this directory assume that you have cloned this repository into the /DM-Dash directory of your machine. If you have put it somewhere else, you'll need to move the NML files into a location under the /DM-Dash directory, and change the path in the line: +```bash +bind = "/DM-Dash/NeoPulse_Examples/GANs/cgan/MNIST/training_data.csv" ; +``` + +# Tutorial Files +**build_csv.py:** Script creates list of training files and writes training full image paths, a vector of noise and corresponding labels to a training CSV file. + +**mnist_cgan.nml:** Full self-defined architecture without any automation. + +# Tutorial Videos and Guides +Tutorial videos are available in the *Tutorials & Guides* section of the [DimensionalMechanics™ Developer Portal](https://dimensionalmechanics.com/ai-developer-portal) + +For more information on using the ImageDataGenerator visit the [Data section] of the NeoPulse™ AI Studio Documentation(https://docs.neopulse.ai/NML-source/#data) + +# License +Tutorial materials are published under the MIT license. See license for commercial, academic, and personal use. diff --git a/GANs/cgan/MNIST/build_csv.py b/GANs/cgan/MNIST/build_csv.py new file mode 100644 index 0000000..3fb7064 --- /dev/null +++ b/GANs/cgan/MNIST/build_csv.py @@ -0,0 +1,87 @@ +import gzip +import shutil +from pathlib import Path + +import numpy as np +import requests +from imageio import imwrite +from mnist import MNIST + + +def download_data(): + ''' + Check if raw MNIST data is present. If not, download MNIST data from the official site. + ''' + + Path('raw_data').mkdir(parents=True, exist_ok=True) + + URL = 'http://yann.lecun.com/exdb/mnist/' + file_list = ['train-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz'] + for f in file_list: + if not Path('raw_data/' + f.replace('.gz', '')).is_file(): + r = requests.get(URL + f, stream=True) + with open('raw_data/' + f, 'wb') as f_z: + shutil.copyfileobj(r.raw, f_z) + with gzip.open('raw_data/' + f, 'rb') as f_z: + with open('raw_data/' + f.replace('.gz', ''), 'wb') as f_u: + shutil.copyfileobj(f_z, f_u) + + +def convert_images(raw): + ''' + Convert images from the MNIST format and return a 4-dim array with + shape: [number_of_images_per_batch, height, width, channel] + The pixel values are integers between 0 and 255. + There are 10000, 28x28 1 channel images per batch, in row major order. + ''' + + return np.reshape(np.array(raw), (-1, 28, 28, 1)).astype('uint8') + + +def write_csv_file(): + ''' + Save images as PNG files (lossless). + Write absolute path to image files and class label to training_data.csv + training_data.csv should be of length 70001, with the first line containing the header. + The test images are written at the end, i.e. the last 10000 lines correspond to the test set. + ''' + + mndata = MNIST('raw_data') + train_img, train_labels = mndata.load_training() + train_images = convert_images(train_img) + test_img, test_labels = mndata.load_testing() + test_images = convert_images(test_img) + + Path('images').mkdir(parents=True, exist_ok=True) + + with open('training_data.csv', 'w') as of: + of.write('image,noise,label\n') + + for index, image in enumerate(train_images): + img_file = 'images/mnist_train_' + str(index) + '.png' + imwrite(img_file, image) + noise_num = np.random.normal(0, 1, (64)) + noise = "" + for noise_ele in noise_num: + noise += str(noise_ele) + "|" + noise = noise[:-1] + of.write(str(Path(img_file).resolve()) + ',' + noise + ',' + str(train_labels[index]) + '\n') + + for index, image in enumerate(test_images): + img_file = 'images/mnist_test_' + str(index) + '.png' + imwrite(img_file, image) + noise_num = np.random.normal(0, 1, (64)) + noise = "" + for noise_ele in noise_num: + noise += str(noise_ele) + "|" + noise = noise[:-1] + of.write(str(Path(img_file).resolve()) + ',' + noise + ',' + str(train_labels[index]) + '\n') + + +if __name__ == '__main__': + + # Download data if necessary + download_data() + + # Write the data to PNG files, and create a csv file for NeoPulse AI Studio + write_csv_file() diff --git a/GANs/cgan/MNIST/mnist_cgan.nml b/GANs/cgan/MNIST/mnist_cgan.nml new file mode 100644 index 0000000..12557ee --- /dev/null +++ b/GANs/cgan/MNIST/mnist_cgan.nml @@ -0,0 +1,75 @@ +oracle("mode") = "CGAN" + +source: + bind = "/DM-Dash/NeoPulse_Examples/GANs/cgan/MNIST/training_data.csv"; + input: + x ~ from "image" + -> image: [shape = [28, 28], channels = 1] + -> ImageDataGenerator: [], + z ~ from "noise" + -> flat: [64] + -> FlatDataGenerator: [], + label ~ from "label" + -> flat: [10] + -> FlatDataGenerator: []; + params: + batch_size = 128, + shuffle = True, + shuffle_init = True; + + +architecture name:generator : + input: + z ~ flat: [64], + label ~ flat: [10]; + output: + g ~ image:[shape = [28,28], channels = 1]; + + label -> Dense:[64] + -> imd + + [z, imd] -> Multiply:[] + -> Dense: [256] + -> LeakyReLU: [alpha = 0.2] + -> BatchNormalization: [momentum = 0.8] + -> Dense: [512] + -> LeakyReLU: [alpha = 0.2] + -> BatchNormalization: [momentum = 0.8] + -> Dense: [1024] + -> LeakyReLU: [alpha = 0.2] + -> BatchNormalization: [momentum = 0.8] + -> Dense: [784, activation = 'tanh'] + -> Reshape: [[28,28,1]] + -> g; + +architecture name:discriminator: + input: + img ~ image: [shape = [28, 28], channels = 1], + label ~ flat: [10]; + output: + d; + + img -> Flatten: [] + -> img_f + + label -> Dense:[784] + -> label_f + + [img_f, label_f] -> Multiply:[] + -> Dense: [512] + -> LeakyReLU: [alpha = 0.2] + -> Dense: [512] + -> LeakyReLU: [alpha = 0.2] + -> Dropout: [0.4] + -> Dense: [512] + -> LeakyReLU: [alpha = 0.2] + -> Dropout: [0.4] + -> Dense: [1, activation = 'sigmoid'] + -> d; + +train : + compile: + optimizer = Adam: [lr = 0.0002, beta_1 = 0.5], + loss = 'binary_crossentropy'; + run: + epochs = 2; \ No newline at end of file diff --git a/GANs/dcgan/MNIST/README.md b/GANs/dcgan/MNIST/README.md new file mode 100644 index 0000000..8ae4b00 --- /dev/null +++ b/GANs/dcgan/MNIST/README.md @@ -0,0 +1,41 @@ +# Introduction +These sample .nml files are for training a Deep Convolutional gan model using image data in [NeoPulse™ AI Studio](https://aws.amazon.com/marketplace/pp/B074NDG36S/ref=vdr_rf). + +# Data +The data for this task can be found at: http://yann.lecun.com/exdb/mnist/ +To run this example, first you will need to download and pre-process the raw data for the MNIST task using the included ```build_csv.py``` script: + +```bash +$ python build_csv.py +``` + +If the script fails, make sure that you have installed all the package dependencies of this script which are: `gzip, os, shutil, pathlib, numpy, requests, imageio, and python-mnist`. + +Missing packages can be installed using pip: +```bash +$ pip install +``` + +Once you've downloaded and pre-processed the data, you can start training using any of the NML scripts provided. To begin training: +```bash +$ neopulse train -p -f /DM-Dash/NeoPulse_Examples/GANs/dcgan/MNIST/mnist_dcgan.nml +``` +The paths in the NML scripts in this directory assume that you have cloned this repository into the /DM-Dash directory of your machine. If you have put it somewhere else, you'll need to move the NML files into a location under the /DM-Dash directory, and change the path in the line: +```bash +bind = "/DM-Dash/NeoPulse_Examples/GANs/dcgan/MNIST/training_data.csv" ; +``` + +# Tutorial Files +**build_csv.py:** Script creates list of training files and writes training full image paths and a vector of noise to a training CSV file. + +**mnist_dcgan.nml:** Full self-defined architecture without any automation. + +**mnist_dcgan_auto.nml:** Features full use of the auto keyword to automatically generate the entire architecture. + +# Tutorial Videos and Guides +Tutorial videos are available in the *Tutorials & Guides* section of the [DimensionalMechanics™ Developer Portal](https://dimensionalmechanics.com/ai-developer-portal) + +For more information on using the ImageDataGenerator visit the [Data section] of the NeoPulse™ AI Studio Documentation(https://docs.neopulse.ai/NML-source/#data) + +# License +Tutorial materials are published under the MIT license. See license for commercial, academic, and personal use. diff --git a/GANs/dcgan/MNIST/build_csv.py b/GANs/dcgan/MNIST/build_csv.py new file mode 100644 index 0000000..8ac5f29 --- /dev/null +++ b/GANs/dcgan/MNIST/build_csv.py @@ -0,0 +1,87 @@ +import gzip +import shutil +from pathlib import Path + +import numpy as np +import requests +from imageio import imwrite +from mnist import MNIST + + +def download_data(): + ''' + Check if raw MNIST data is present. If not, download MNIST data from the official site. + ''' + + Path('raw_data').mkdir(parents=True, exist_ok=True) + + URL = 'http://yann.lecun.com/exdb/mnist/' + file_list = ['train-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz'] + for f in file_list: + if not Path('raw_data/' + f.replace('.gz', '')).is_file(): + r = requests.get(URL + f, stream=True) + with open('raw_data/' + f, 'wb') as f_z: + shutil.copyfileobj(r.raw, f_z) + with gzip.open('raw_data/' + f, 'rb') as f_z: + with open('raw_data/' + f.replace('.gz', ''), 'wb') as f_u: + shutil.copyfileobj(f_z, f_u) + + +def convert_images(raw): + ''' + Convert images from the MNIST format and return a 4-dim array with + shape: [number_of_images_per_batch, height, width, channel] + The pixel values are integers between 0 and 255. + There are 10000, 28x28 1 channel images per batch, in row major order. + ''' + + return np.reshape(np.array(raw), (-1, 28, 28, 1)).astype('uint8') + + +def write_csv_file(): + ''' + Save images as PNG files (lossless). + Write absolute path to image files and class label to training_data.csv + training_data.csv should be of length 70001, with the first line containing the header. + The test images are written at the end, i.e. the last 10000 lines correspond to the test set. + ''' + + mndata = MNIST('raw_data') + train_img, train_labels = mndata.load_training() + train_images = convert_images(train_img) + test_img, test_labels = mndata.load_testing() + test_images = convert_images(test_img) + + Path('images').mkdir(parents=True, exist_ok=True) + + with open('training_data.csv', 'w') as of: + of.write('image,noise\n') + + for index, image in enumerate(train_images): + img_file = 'images/mnist_train_' + str(index) + '.png' + imwrite(img_file, image) + noise_num = np.random.normal(0, 1, (100)) + noise = "" + for noise_ele in noise_num: + noise += str(noise_ele) + "|" + noise = noise[:-1] + of.write(str(Path(img_file).resolve()) + ',' + noise + '\n') + + for index, image in enumerate(test_images): + img_file = 'images/mnist_test_' + str(index) + '.png' + imwrite(img_file, image) + noise_num = np.random.normal(0, 1, (100)) + noise = "" + for noise_ele in noise_num: + noise += str(noise_ele) + "|" + noise = noise[:-1] + of.write(str(Path(img_file).resolve()) + ',' + noise + '\n') + + +if __name__ == '__main__': + + # Download data if necessary + download_data() + + # Write the data to PNG files, and create a csv file for NeoPulse AI Studio + write_csv_file() diff --git a/GANs/dcgan/MNIST/mnist_dcgan.nml b/GANs/dcgan/MNIST/mnist_dcgan.nml new file mode 100644 index 0000000..3583ee1 --- /dev/null +++ b/GANs/dcgan/MNIST/mnist_dcgan.nml @@ -0,0 +1,74 @@ +oracle("mode") = "DCGAN" + +source: + bind = "/DM-Dash/NeoPulse_Examples/GANs/dcgan/MNIST/training_data.csv"; + input: + x ~ from "image" + -> image: [shape = [28, 28], channels = 1] + -> ImageDataGenerator: [], + z ~ from "noise" + -> flat: [100] + -> FlatDataGenerator: []; + params: + batch_size = 128, + shuffle = True, + shuffle_init = True; + + + + +architecture name:generator: + input: + z ~ flat: [100]; + output: + g ~ image: [shape = [28, 28], channels = 1]; + + z -> Dense: [6272] + -> Activation: ['relu'] + -> Reshape: [[7,7,128]] + -> BatchNormalization: [momentum = 0.8] + -> UpSampling2D: [[2,2]] + -> Conv2D:[128, kernel_size = 3, padding = 'same'] + -> Activation: ['relu'] + -> BatchNormalization: [momentum = 0.8] + -> UpSampling2D:[[2,2]] + -> Conv2D: [64, kernel_size = 3, padding = 'same'] + -> Activation: ['relu'] + -> BatchNormalization: [momentum = 0.8] + -> Conv2D: [1, kernel_size = 3, padding = 'same'] + -> Activation: ['tanh'] + -> g; + +architecture name: discriminator: + input: + img ~ image: [shape = [28, 28], channels = 1]; + output: + d ~ flat: [1]; + + img -> Conv2D:[32, kernel_size = 3, strides = 2, input_shape = [28, 28, 1], padding = 'same'] + -> LeakyReLU:[alpha = 0.2] + -> Dropout:[0.25] + -> Conv2D: [64, kernel_size = 3, strides = 2, padding = 'same'] + -> ZeroPadding2D: [] + -> LeakyReLU:[alpha = 0.2] + -> Dropout: [0.25] + -> BatchNormalization: [momentum = 0.8] + -> Conv2D: [128, kernel_size = 3, strides = 2, padding = 'same'] + -> LeakyReLU: [alpha = 0.2] + -> Dropout: [0.25] + -> BatchNormalization : [momentum = 0.8] + -> Conv2D: [256, kernel_size= 3, strides = 1, padding = 'same'] + -> LeakyReLU: [alpha = 0.2] + -> Dropout: [0.25] + -> Flatten:[] + -> Dense:[1, activation = 'sigmoid'] + -> d; + + +train: + compile: + optimizer = Adam: [0.0002, 0.5], + loss = 'binary_crossentropy'; + run: + epochs = 2; + diff --git a/GANs/dcgan/MNIST/mnist_dcgan_auto.nml b/GANs/dcgan/MNIST/mnist_dcgan_auto.nml new file mode 100644 index 0000000..e76a17b --- /dev/null +++ b/GANs/dcgan/MNIST/mnist_dcgan_auto.nml @@ -0,0 +1,43 @@ +oracle("mode") = "DCGAN" + +source: + bind = "/DM-Dash/NeoPulse_Examples/GANs/dcgan/MNIST/training_data.csv"; + input: + x ~ from "image" + -> image: [shape = [28, 28], channels = 1] + -> ImageDataGenerator: [], + z ~ from "noise" + -> flat: [100] + -> FlatDataGenerator: []; + params: + batch_size = 128, + shuffle = True, + shuffle_init = True; + + + + +architecture name:generator: + input: + z ~ flat: [100]; + output: + g ~ image: [shape = [28, 28], channels = 1]; + + z -> auto -> g; + +architecture name: discriminator: + input: + img ~ image: [shape = [28, 28], channels = 1]; + output: + d ~ flat: [1]; + + img -> auto -> d; + + +train: + compile: + optimizer = auto, + loss = auto; + run: + epochs = 2; + diff --git a/GANs/gan/MNIST/README.md b/GANs/gan/MNIST/README.md new file mode 100644 index 0000000..2d27d70 --- /dev/null +++ b/GANs/gan/MNIST/README.md @@ -0,0 +1,41 @@ +# Introduction +These sample .nml files are for training a gan model using image data in [NeoPulse™ AI Studio](https://aws.amazon.com/marketplace/pp/B074NDG36S/ref=vdr_rf). + +# Data +The data for this task can be found at: http://yann.lecun.com/exdb/mnist/ +To run this example, first you will need to download and pre-process the raw data for the MNIST task using the included ```build_csv.py``` script: + +```bash +$ python build_csv.py +``` + +If the script fails, make sure that you have installed all the package dependencies of this script which are: `gzip, os, shutil, pathlib, numpy, requests, imageio, and python-mnist`. + +Missing packages can be installed using pip: +```bash +$ pip install +``` + +Once you've downloaded and pre-processed the data, you can start training using any of the NML scripts provided. To begin training: +```bash +$ neopulse train -p -f /DM-Dash/NeoPulse_Examples/GANs/gan/MNIST/mnist_gan.nml +``` +The paths in the NML scripts in this directory assume that you have cloned this repository into the /DM-Dash directory of your machine. If you have put it somewhere else, you'll need to move the NML files into a location under the /DM-Dash directory, and change the path in the line: +```bash +bind = "/DM-Dash/NeoPulse_Examples/GANs/gan/MNIST/training_data.csv" ; +``` + +# Tutorial Files +**build_csv.py:** Script creates list of training files and writes training full image paths and a vector of noise to a training CSV file. + +**mnist_gan.nml:** Full self-defined architecture without any automation. + +**mnist_gan_auto.nml:** Features full use of the auto keyword to automatically generate the entire architecture and set hyperparameters as default values. + +# Tutorial Videos and Guides +Tutorial videos are available in the *Tutorials & Guides* section of the [DimensionalMechanics™ Developer Portal](https://dimensionalmechanics.com/ai-developer-portal) + +For more information on using the ImageDataGenerator visit the [Data section] of the NeoPulse™ AI Studio Documentation(https://docs.neopulse.ai/NML-source/#data) + +# License +Tutorial materials are published under the MIT license. See license for commercial, academic, and personal use. diff --git a/GANs/gan/MNIST/build_csv.py b/GANs/gan/MNIST/build_csv.py new file mode 100644 index 0000000..8ac5f29 --- /dev/null +++ b/GANs/gan/MNIST/build_csv.py @@ -0,0 +1,87 @@ +import gzip +import shutil +from pathlib import Path + +import numpy as np +import requests +from imageio import imwrite +from mnist import MNIST + + +def download_data(): + ''' + Check if raw MNIST data is present. If not, download MNIST data from the official site. + ''' + + Path('raw_data').mkdir(parents=True, exist_ok=True) + + URL = 'http://yann.lecun.com/exdb/mnist/' + file_list = ['train-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz'] + for f in file_list: + if not Path('raw_data/' + f.replace('.gz', '')).is_file(): + r = requests.get(URL + f, stream=True) + with open('raw_data/' + f, 'wb') as f_z: + shutil.copyfileobj(r.raw, f_z) + with gzip.open('raw_data/' + f, 'rb') as f_z: + with open('raw_data/' + f.replace('.gz', ''), 'wb') as f_u: + shutil.copyfileobj(f_z, f_u) + + +def convert_images(raw): + ''' + Convert images from the MNIST format and return a 4-dim array with + shape: [number_of_images_per_batch, height, width, channel] + The pixel values are integers between 0 and 255. + There are 10000, 28x28 1 channel images per batch, in row major order. + ''' + + return np.reshape(np.array(raw), (-1, 28, 28, 1)).astype('uint8') + + +def write_csv_file(): + ''' + Save images as PNG files (lossless). + Write absolute path to image files and class label to training_data.csv + training_data.csv should be of length 70001, with the first line containing the header. + The test images are written at the end, i.e. the last 10000 lines correspond to the test set. + ''' + + mndata = MNIST('raw_data') + train_img, train_labels = mndata.load_training() + train_images = convert_images(train_img) + test_img, test_labels = mndata.load_testing() + test_images = convert_images(test_img) + + Path('images').mkdir(parents=True, exist_ok=True) + + with open('training_data.csv', 'w') as of: + of.write('image,noise\n') + + for index, image in enumerate(train_images): + img_file = 'images/mnist_train_' + str(index) + '.png' + imwrite(img_file, image) + noise_num = np.random.normal(0, 1, (100)) + noise = "" + for noise_ele in noise_num: + noise += str(noise_ele) + "|" + noise = noise[:-1] + of.write(str(Path(img_file).resolve()) + ',' + noise + '\n') + + for index, image in enumerate(test_images): + img_file = 'images/mnist_test_' + str(index) + '.png' + imwrite(img_file, image) + noise_num = np.random.normal(0, 1, (100)) + noise = "" + for noise_ele in noise_num: + noise += str(noise_ele) + "|" + noise = noise[:-1] + of.write(str(Path(img_file).resolve()) + ',' + noise + '\n') + + +if __name__ == '__main__': + + # Download data if necessary + download_data() + + # Write the data to PNG files, and create a csv file for NeoPulse AI Studio + write_csv_file() diff --git a/GANs/gan/MNIST/mnist_gan.nml b/GANs/gan/MNIST/mnist_gan.nml new file mode 100644 index 0000000..9964446 --- /dev/null +++ b/GANs/gan/MNIST/mnist_gan.nml @@ -0,0 +1,57 @@ +oracle("mode") = "GAN" + +source: + bind = "/DM-Dash/NeoPulse_Examples/GANs/gan/MNIST/training_data.csv"; + input: + x ~ from "image" + -> image: [shape = [28, 28], channels = 1] + -> ImageDataGenerator: [], + z ~ from "noise" + -> flat: [100] + -> FlatDataGenerator: []; + params: + batch_size = 256, + shuffle = True, + shuffle_init = True; + + +architecture name:generator : + input: + z ~ flat: [100]; + output: + g ~ image: [shape = [28,28], channels = 1]; + + z -> Dense:[128] + -> LeakyReLU: [alpha = 0.2] + -> Dense:[128] + -> LeakyReLU: [alpha = 0.2] + -> Dense: [784, activation = 'tanh'] + -> Reshape: [[28,28,1]] + -> g; + + + +architecture name:discriminator : + input: + img ~ image: [shape = [28, 28], channels = 1]; + output: + d ~ flat:[1]; + + img -> Flatten:[] + -> Dense:[128] + -> LeakyReLU: [alpha = 0.2] + -> Dense: [128] + -> LeakyReLU: [alpha = 0.2] + -> Dense: [1] + -> Activation: ['sigmoid'] + -> d; + + + +train : + + compile: + optimizer = Adam: [lr = 0.0005], + loss = 'binary_crossentropy'; + run: + epochs = 10; \ No newline at end of file diff --git a/GANs/gan/MNIST/mnist_gan_auto.nml b/GANs/gan/MNIST/mnist_gan_auto.nml new file mode 100644 index 0000000..1e3d0a1 --- /dev/null +++ b/GANs/gan/MNIST/mnist_gan_auto.nml @@ -0,0 +1,44 @@ +oracle("mode") = "GAN" + +source: + bind = "/DM-Dash/NeoPulse_Examples/GANs/gan/MNIST/training_data.csv"; + input: + x ~ from "image" + -> image: [shape = [28, 28], channels = 1] + -> ImageDataGenerator: [], + z ~ from "noise" + -> flat: [100] + -> FlatDataGenerator: []; + params: + batch_size = 256, + shuffle = True, + shuffle_init = True; + + +architecture name:generator : + input: + z ~ flat: [100]; + output: + g ~ image: [shape = [28,28], channels = 1]; + + z -> auto -> g; + + + +architecture name:discriminator : + input: + img ~ image: [shape = [28, 28], channels = 1]; + output: + d ~ flat:[1]; + + img -> auto -> d; + + + +train : + + compile: + optimizer = auto, + loss = auto; + run: + epochs = 10; \ No newline at end of file diff --git a/GANs/lsgan/MNIST/README.md b/GANs/lsgan/MNIST/README.md new file mode 100644 index 0000000..1564f98 --- /dev/null +++ b/GANs/lsgan/MNIST/README.md @@ -0,0 +1,41 @@ +# Introduction +These sample .nml files are for training a Least Square gan model using image data in [NeoPulse™ AI Studio](https://aws.amazon.com/marketplace/pp/B074NDG36S/ref=vdr_rf). + +# Data +The data for this task can be found at: http://yann.lecun.com/exdb/mnist/ +To run this example, first you will need to download and pre-process the raw data for the MNIST task using the included ```build_csv.py``` script: + +```bash +$ python build_csv.py +``` + +If the script fails, make sure that you have installed all the package dependencies of this script which are: `gzip, os, shutil, pathlib, numpy, requests, imageio, and python-mnist`. + +Missing packages can be installed using pip: +```bash +$ pip install +``` + +Once you've downloaded and pre-processed the data, you can start training using any of the NML scripts provided. To begin training: +```bash +$ neopulse train -p -f /DM-Dash/NeoPulse_Examples/GANs/lsgan/MNIST/mnist_lsgan.nml +``` +The paths in the NML scripts in this directory assume that you have cloned this repository into the /DM-Dash directory of your machine. If you have put it somewhere else, you'll need to move the NML files into a location under the /DM-Dash directory, and change the path in the line: +```bash +bind = "/DM-Dash/NeoPulse_Examples/GANs/lsgan/MNIST/training_data.csv" ; +``` + +# Tutorial Files +**build_csv.py:** Script creates list of training files and writes training full image paths and a vector of noise to a training CSV file. + +**mnist_lsgan.nml:** Full self-defined architecture without any automation. + +**mnist_lsgan_auto.nml:** Features full use of the auto keyword to automatically generate the entire architecture and hyperparameters values. + +# Tutorial Videos and Guides +Tutorial videos are available in the *Tutorials & Guides* section of the [DimensionalMechanics™ Developer Portal](https://dimensionalmechanics.com/ai-developer-portal) + +For more information on using the ImageDataGenerator visit the [Data section] of the NeoPulse™ AI Studio Documentation(https://docs.neopulse.ai/NML-source/#data) + +# License +Tutorial materials are published under the MIT license. See license for commercial, academic, and personal use. diff --git a/GANs/lsgan/MNIST/build_csv.py b/GANs/lsgan/MNIST/build_csv.py new file mode 100644 index 0000000..8ac5f29 --- /dev/null +++ b/GANs/lsgan/MNIST/build_csv.py @@ -0,0 +1,87 @@ +import gzip +import shutil +from pathlib import Path + +import numpy as np +import requests +from imageio import imwrite +from mnist import MNIST + + +def download_data(): + ''' + Check if raw MNIST data is present. If not, download MNIST data from the official site. + ''' + + Path('raw_data').mkdir(parents=True, exist_ok=True) + + URL = 'http://yann.lecun.com/exdb/mnist/' + file_list = ['train-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz'] + for f in file_list: + if not Path('raw_data/' + f.replace('.gz', '')).is_file(): + r = requests.get(URL + f, stream=True) + with open('raw_data/' + f, 'wb') as f_z: + shutil.copyfileobj(r.raw, f_z) + with gzip.open('raw_data/' + f, 'rb') as f_z: + with open('raw_data/' + f.replace('.gz', ''), 'wb') as f_u: + shutil.copyfileobj(f_z, f_u) + + +def convert_images(raw): + ''' + Convert images from the MNIST format and return a 4-dim array with + shape: [number_of_images_per_batch, height, width, channel] + The pixel values are integers between 0 and 255. + There are 10000, 28x28 1 channel images per batch, in row major order. + ''' + + return np.reshape(np.array(raw), (-1, 28, 28, 1)).astype('uint8') + + +def write_csv_file(): + ''' + Save images as PNG files (lossless). + Write absolute path to image files and class label to training_data.csv + training_data.csv should be of length 70001, with the first line containing the header. + The test images are written at the end, i.e. the last 10000 lines correspond to the test set. + ''' + + mndata = MNIST('raw_data') + train_img, train_labels = mndata.load_training() + train_images = convert_images(train_img) + test_img, test_labels = mndata.load_testing() + test_images = convert_images(test_img) + + Path('images').mkdir(parents=True, exist_ok=True) + + with open('training_data.csv', 'w') as of: + of.write('image,noise\n') + + for index, image in enumerate(train_images): + img_file = 'images/mnist_train_' + str(index) + '.png' + imwrite(img_file, image) + noise_num = np.random.normal(0, 1, (100)) + noise = "" + for noise_ele in noise_num: + noise += str(noise_ele) + "|" + noise = noise[:-1] + of.write(str(Path(img_file).resolve()) + ',' + noise + '\n') + + for index, image in enumerate(test_images): + img_file = 'images/mnist_test_' + str(index) + '.png' + imwrite(img_file, image) + noise_num = np.random.normal(0, 1, (100)) + noise = "" + for noise_ele in noise_num: + noise += str(noise_ele) + "|" + noise = noise[:-1] + of.write(str(Path(img_file).resolve()) + ',' + noise + '\n') + + +if __name__ == '__main__': + + # Download data if necessary + download_data() + + # Write the data to PNG files, and create a csv file for NeoPulse AI Studio + write_csv_file() diff --git a/GANs/lsgan/MNIST/mnist_lsgan.nml b/GANs/lsgan/MNIST/mnist_lsgan.nml new file mode 100644 index 0000000..517f74c --- /dev/null +++ b/GANs/lsgan/MNIST/mnist_lsgan.nml @@ -0,0 +1,58 @@ +oracle("mode") = "LSGAN" + +source: + bind = "/DM-Dash/NeoPulse_Examples/GANs/lsgan/MNIST/training_data.csv"; + input: + x ~ from "image" + -> image: [shape = [28, 28], channels = 1] + -> ImageDataGenerator: [], + z ~ from "noise" + -> flat: [100] + -> FlatDataGenerator: []; + params: + batch_size = 128, + shuffle = True, + shuffle_init = True; + +architecture name:generator: + input: + z ~ flat: [100]; + output: + g ~ image: [shape = [28,28], channels = 1]; + + z -> Dense: [256] + -> LeakyReLU: [alpha = 0.2] + -> BatchNormalization: [momentum = 0.8] + -> Dense: [512] + -> LeakyReLU: [alpha = 0.2] + -> BatchNormalization: [momentum = 0.8] + -> Dense: [1024] + -> LeakyReLU: [alpha=0.2] + -> BatchNormalization: [momentum = 0.8] + -> Dense: [784, activation = 'tanh'] + -> Reshape: [[28, 28, 1]] + -> g; + +architecture name: discriminator: + input: + img ~ image: [shape = [28, 28], channels = 1]; + output: + d ~ flat: [1]; + + img -> Flatten:[] + -> Dense:[512] + -> LeakyReLU: [alpha = 0.2] + -> Dense: [256] + -> LeakyReLU: [alpha = 0.2] + -> Dense: [1] + -> d; + +train: + compile: + optimizer = Adam: [0.0002, beta_1 = 0.5], + loss_generator = 'binary_crossentropy', + loss_discriminator = 'mse', + loss = 'mse'; + run: + epochs = 2; + diff --git a/GANs/lsgan/MNIST/mnist_lsgan_auto.nml b/GANs/lsgan/MNIST/mnist_lsgan_auto.nml new file mode 100644 index 0000000..9315e03 --- /dev/null +++ b/GANs/lsgan/MNIST/mnist_lsgan_auto.nml @@ -0,0 +1,39 @@ +oracle("mode") = "LSGAN" + +source: + bind = "/DM-Dash/NeoPulse_Examples/GANs/lsgan/MNIST/training_data.csv"; + input: + x ~ from "image" + -> image: [shape = [28, 28], channels = 1] + -> ImageDataGenerator: [], + z ~ from "noise" + -> flat: [100] + -> FlatDataGenerator: [];; + params: + batch_size = 128, + shuffle = True, + shuffle_init = True; + +architecture name:generator: + input: + z ~ flat: [100]; + output: + g ~ image: [shape = [28,28], channels = 1]; + + z -> auto -> g; + +architecture name: discriminator: + input: + img ~ image: [shape = [28, 28], channels = 1]; + output: + d ~ flat: [1]; + + img -> auto -> d; + +train: + compile: + optimizer = auto, + loss = auto; + run: + epochs = 2; + diff --git a/GANs/wgan/MNIST/README.md b/GANs/wgan/MNIST/README.md new file mode 100644 index 0000000..558b971 --- /dev/null +++ b/GANs/wgan/MNIST/README.md @@ -0,0 +1,41 @@ +# Introduction +These sample .nml files are for training a wasserstein gan model using image data in [NeoPulse™ AI Studio](https://aws.amazon.com/marketplace/pp/B074NDG36S/ref=vdr_rf). + +# Data +The data for this task can be found at: http://yann.lecun.com/exdb/mnist/ +To run this example, first you will need to download and pre-process the raw data for the MNIST task using the included ```build_csv.py``` script: + +```bash +$ python build_csv.py +``` + +If the script fails, make sure that you have installed all the package dependencies of this script which are: `gzip, os, shutil, pathlib, numpy, requests, imageio, and python-mnist`. + +Missing packages can be installed using pip: +```bash +$ pip install +``` + +Once you've downloaded and pre-processed the data, you can start training using any of the NML scripts provided. To begin training: +```bash +$ neopulse train -p -f /DM-Dash/NeoPulse_Examples/GANs/wgan/MNIST/mnist_wgan.nml +``` +The paths in the NML scripts in this directory assume that you have cloned this repository into the /DM-Dash directory of your machine. If you have put it somewhere else, you'll need to move the NML files into a location under the /DM-Dash directory, and change the path in the line: +```bash +bind = "/DM-Dash/NeoPulse_Examples/GANs/wgan/MNIST/training_data.csv" ; +``` + +# Tutorial Files +**build_csv.py:** Script creates list of training files and writes training full image paths and a vector of noise to a training CSV file. + +**mnist_wgan.nml:** Full self-defined architecture without any automation. + +**mnist_wgan_auto.nml:** Features full use of the auto keyword to automatically generate the entire architecture. + +# Tutorial Videos and Guides +Tutorial videos are available in the *Tutorials & Guides* section of the [DimensionalMechanics™ Developer Portal](https://dimensionalmechanics.com/ai-developer-portal) + +For more information on using the ImageDataGenerator visit the [Data section] of the NeoPulse™ AI Studio Documentation(https://docs.neopulse.ai/NML-source/#data) + +# License +Tutorial materials are published under the MIT license. See license for commercial, academic, and personal use. diff --git a/GANs/wgan/MNIST/build_csv.py b/GANs/wgan/MNIST/build_csv.py new file mode 100644 index 0000000..8ac5f29 --- /dev/null +++ b/GANs/wgan/MNIST/build_csv.py @@ -0,0 +1,87 @@ +import gzip +import shutil +from pathlib import Path + +import numpy as np +import requests +from imageio import imwrite +from mnist import MNIST + + +def download_data(): + ''' + Check if raw MNIST data is present. If not, download MNIST data from the official site. + ''' + + Path('raw_data').mkdir(parents=True, exist_ok=True) + + URL = 'http://yann.lecun.com/exdb/mnist/' + file_list = ['train-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz'] + for f in file_list: + if not Path('raw_data/' + f.replace('.gz', '')).is_file(): + r = requests.get(URL + f, stream=True) + with open('raw_data/' + f, 'wb') as f_z: + shutil.copyfileobj(r.raw, f_z) + with gzip.open('raw_data/' + f, 'rb') as f_z: + with open('raw_data/' + f.replace('.gz', ''), 'wb') as f_u: + shutil.copyfileobj(f_z, f_u) + + +def convert_images(raw): + ''' + Convert images from the MNIST format and return a 4-dim array with + shape: [number_of_images_per_batch, height, width, channel] + The pixel values are integers between 0 and 255. + There are 10000, 28x28 1 channel images per batch, in row major order. + ''' + + return np.reshape(np.array(raw), (-1, 28, 28, 1)).astype('uint8') + + +def write_csv_file(): + ''' + Save images as PNG files (lossless). + Write absolute path to image files and class label to training_data.csv + training_data.csv should be of length 70001, with the first line containing the header. + The test images are written at the end, i.e. the last 10000 lines correspond to the test set. + ''' + + mndata = MNIST('raw_data') + train_img, train_labels = mndata.load_training() + train_images = convert_images(train_img) + test_img, test_labels = mndata.load_testing() + test_images = convert_images(test_img) + + Path('images').mkdir(parents=True, exist_ok=True) + + with open('training_data.csv', 'w') as of: + of.write('image,noise\n') + + for index, image in enumerate(train_images): + img_file = 'images/mnist_train_' + str(index) + '.png' + imwrite(img_file, image) + noise_num = np.random.normal(0, 1, (100)) + noise = "" + for noise_ele in noise_num: + noise += str(noise_ele) + "|" + noise = noise[:-1] + of.write(str(Path(img_file).resolve()) + ',' + noise + '\n') + + for index, image in enumerate(test_images): + img_file = 'images/mnist_test_' + str(index) + '.png' + imwrite(img_file, image) + noise_num = np.random.normal(0, 1, (100)) + noise = "" + for noise_ele in noise_num: + noise += str(noise_ele) + "|" + noise = noise[:-1] + of.write(str(Path(img_file).resolve()) + ',' + noise + '\n') + + +if __name__ == '__main__': + + # Download data if necessary + download_data() + + # Write the data to PNG files, and create a csv file for NeoPulse AI Studio + write_csv_file() diff --git a/GANs/wgan/MNIST/mnist_wgan.nml b/GANs/wgan/MNIST/mnist_wgan.nml new file mode 100644 index 0000000..19c9e11 --- /dev/null +++ b/GANs/wgan/MNIST/mnist_wgan.nml @@ -0,0 +1,67 @@ +oracle("mode") = "WGAN" + +source: + bind = "/DM-Dash/NeoPulse_Examples/GANs/wgan/MNIST/training_data.csv"; + input: + x ~ from "image" + -> image: [shape = [28, 28], channels = 1] + -> ImageDataGenerator: [], + z ~ from "noise" + -> flat: [100] + -> FlatDataGenerator: []; + params: + batch_size = 128, + shuffle = True, + shuffle_init = True; + +architecture name:generator : + input: + z ~ flat: [100]; + output: + g ~ image: [shape = [28,28], channels = 1]; + + + z -> Dense: [6272, activation='relu'] + -> Reshape: [[7, 7, 128]] + -> BatchNormalization: [momentum = 0.8] + -> UpSampling2D: [] + -> Conv2D: [128, kernel_size = 4, padding = "same"] + -> Activation: ['relu'] + -> BatchNormalization: [momentum = 0.8] + -> UpSampling2D: [] + -> Conv2D: [64, kernel_size = 4, padding = 'same'] + -> Activation: ['relu'] + -> BatchNormalization: [momentum = 0.8] + -> Conv2D: [1, kernel_size = 4, padding = 'same'] + -> Activation: ['tanh'] + -> g; + + +architecture name:discriminator: + input: + img ~ image: [shape = [28, 28], channels = 1]; + output: + d ~ flat: [1]; + + img -> Conv2D: [16, kernel_size = 3, strides = 2, padding = "same"] + -> LeakyReLU: [alpha = 0.2] + -> Dropout: [0.25] + -> Conv2D: [32, kernel_size = 3, strides = 2, padding = "same"] + -> ZeroPadding2D:[] + -> LeakyReLU: [alpha = 0.2] + -> Dropout: [0.25] + -> BatchNormalization: [momentum = 0.8] + -> Conv2D: [64, kernel_size = 3, strides = 1, padding = "same"] + -> LeakyReLU: [alpha = 0.2] + -> Dropout: [0.25] + -> Flatten:[] + -> Dense: [1, activation = "linear"] + -> d; + + +train : + compile: + optimizer = RMSprop:[lr = 0.00005], + loss = wasserstein_loss; + run: + epochs = 2; \ No newline at end of file diff --git a/GANs/wgan/MNIST/mnist_wgan_auto.nml b/GANs/wgan/MNIST/mnist_wgan_auto.nml new file mode 100644 index 0000000..28a6025 --- /dev/null +++ b/GANs/wgan/MNIST/mnist_wgan_auto.nml @@ -0,0 +1,41 @@ +oracle("mode") = "WGAN" + +source: + bind = "/DM-Dash/NeoPulse_Examples/GANs/wgan/MNIST/training_data.csv"; + input: + x ~ from "image" + -> image: [shape = [28, 28], channels = 1] + -> ImageDataGenerator: [], + z ~ from "noise" + -> flat: [100] + -> FlatDataGenerator: []; + params: + batch_size = 128, + shuffle = True, + shuffle_init = True; + +architecture name:generator : + input: + z ~ flat: [100]; + output: + g ~ image: [shape = [28,28], channels = 1]; + + + z -> auto -> g; + + +architecture name:discriminator: + input: + img ~ image: [shape = [28, 28], channels = 1]; + output: + d ~ flat: [1]; + + img -> auto -> d; + + +train : + compile: + optimizer = auto, + loss = auto; + run: + epochs = 2; \ No newline at end of file diff --git a/GANs/wganGP/MNIST/README.md b/GANs/wganGP/MNIST/README.md new file mode 100644 index 0000000..1a189d7 --- /dev/null +++ b/GANs/wganGP/MNIST/README.md @@ -0,0 +1,41 @@ +# Introduction +These sample .nml files are for training a wasserstein gan with gradient penalty model using image data in [NeoPulse™ AI Studio](https://aws.amazon.com/marketplace/pp/B074NDG36S/ref=vdr_rf). + +# Data +The data for this task can be found at: http://yann.lecun.com/exdb/mnist/ +To run this example, first you will need to download and pre-process the raw data for the MNIST task using the included ```build_csv.py``` script: + +```bash +$ python build_csv.py +``` + +If the script fails, make sure that you have installed all the package dependencies of this script which are: `gzip, os, shutil, pathlib, numpy, requests, imageio, and python-mnist`. + +Missing packages can be installed using pip: +```bash +$ pip install +``` + +Once you've downloaded and pre-processed the data, you can start training using any of the NML scripts provided. To begin training: +```bash +$ neopulse train -p -f /DM-Dash/NeoPulse_Examples/GANs/wganGP/MNIST/mnist_wganGP.nml +``` +The paths in the NML scripts in this directory assume that you have cloned this repository into the /DM-Dash directory of your machine. If you have put it somewhere else, you'll need to move the NML files into a location under the /DM-Dash directory, and change the path in the line: +```bash +bind = "/DM-Dash/NeoPulse_Examples/GANs/wganGP/MNIST/training_data.csv" ; +``` + +# Tutorial Files +**build_csv.py:** Script creates list of training files and writes training full image paths and a vector of noise to a training CSV file. + +**mnist_wganGP.nml:** Full self-defined architecture without any automation. + +**mnist_wganGP_auto.nml:** Features full use of the auto keyword to automatically generate the entire architecture. + +# Tutorial Videos and Guides +Tutorial videos are available in the *Tutorials & Guides* section of the [DimensionalMechanics™ Developer Portal](https://dimensionalmechanics.com/ai-developer-portal) + +For more information on using the ImageDataGenerator visit the [Data section] of the NeoPulse™ AI Studio Documentation(https://docs.neopulse.ai/NML-source/#data) + +# License +Tutorial materials are published under the MIT license. See license for commercial, academic, and personal use. diff --git a/GANs/wganGP/MNIST/build_csv.py b/GANs/wganGP/MNIST/build_csv.py new file mode 100644 index 0000000..8ac5f29 --- /dev/null +++ b/GANs/wganGP/MNIST/build_csv.py @@ -0,0 +1,87 @@ +import gzip +import shutil +from pathlib import Path + +import numpy as np +import requests +from imageio import imwrite +from mnist import MNIST + + +def download_data(): + ''' + Check if raw MNIST data is present. If not, download MNIST data from the official site. + ''' + + Path('raw_data').mkdir(parents=True, exist_ok=True) + + URL = 'http://yann.lecun.com/exdb/mnist/' + file_list = ['train-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz'] + for f in file_list: + if not Path('raw_data/' + f.replace('.gz', '')).is_file(): + r = requests.get(URL + f, stream=True) + with open('raw_data/' + f, 'wb') as f_z: + shutil.copyfileobj(r.raw, f_z) + with gzip.open('raw_data/' + f, 'rb') as f_z: + with open('raw_data/' + f.replace('.gz', ''), 'wb') as f_u: + shutil.copyfileobj(f_z, f_u) + + +def convert_images(raw): + ''' + Convert images from the MNIST format and return a 4-dim array with + shape: [number_of_images_per_batch, height, width, channel] + The pixel values are integers between 0 and 255. + There are 10000, 28x28 1 channel images per batch, in row major order. + ''' + + return np.reshape(np.array(raw), (-1, 28, 28, 1)).astype('uint8') + + +def write_csv_file(): + ''' + Save images as PNG files (lossless). + Write absolute path to image files and class label to training_data.csv + training_data.csv should be of length 70001, with the first line containing the header. + The test images are written at the end, i.e. the last 10000 lines correspond to the test set. + ''' + + mndata = MNIST('raw_data') + train_img, train_labels = mndata.load_training() + train_images = convert_images(train_img) + test_img, test_labels = mndata.load_testing() + test_images = convert_images(test_img) + + Path('images').mkdir(parents=True, exist_ok=True) + + with open('training_data.csv', 'w') as of: + of.write('image,noise\n') + + for index, image in enumerate(train_images): + img_file = 'images/mnist_train_' + str(index) + '.png' + imwrite(img_file, image) + noise_num = np.random.normal(0, 1, (100)) + noise = "" + for noise_ele in noise_num: + noise += str(noise_ele) + "|" + noise = noise[:-1] + of.write(str(Path(img_file).resolve()) + ',' + noise + '\n') + + for index, image in enumerate(test_images): + img_file = 'images/mnist_test_' + str(index) + '.png' + imwrite(img_file, image) + noise_num = np.random.normal(0, 1, (100)) + noise = "" + for noise_ele in noise_num: + noise += str(noise_ele) + "|" + noise = noise[:-1] + of.write(str(Path(img_file).resolve()) + ',' + noise + '\n') + + +if __name__ == '__main__': + + # Download data if necessary + download_data() + + # Write the data to PNG files, and create a csv file for NeoPulse AI Studio + write_csv_file() diff --git a/GANs/wganGP/MNIST/mnist_wganGP.nml b/GANs/wganGP/MNIST/mnist_wganGP.nml new file mode 100644 index 0000000..9564c62 --- /dev/null +++ b/GANs/wganGP/MNIST/mnist_wganGP.nml @@ -0,0 +1,69 @@ +oracle("mode") = "WGAN_GP" + +source: + bind = "/DM-Dash/NeoPulse_Examples/GANs/wganGP/MNIST/training_data.csv"; + input: + x ~ from "image" + -> image: [shape = [28, 28], channels = 1] + -> ImageDataGenerator: [], + z ~ from "noise" + -> flat: [100] + -> FlatDataGenerator: []; + params: + batch_size = 128, + shuffle = True, + shuffle_init = True; + +architecture name:generator : + input: + z ~ flat: [100]; + output: + g ~ image: [shape = [28,28], channels = 1]; + + + z -> Dense: [1024, activation='relu'] + -> LeakyReLU: [] + -> Dense: [6272] + -> BatchNormalization: [] + -> LeakyReLU: [] + -> Reshape: [[7, 7, 128]] + -> Conv2DTranspose: [128, [5,5], strides= 2, padding = 'same'] + -> BatchNormalization: [] + -> LeakyReLU: [] + -> Conv2DTranspose: [64, [5,5], padding = 'same'] + -> BatchNormalization: [] + -> LeakyReLU: [] + -> Conv2D: [64, [5,5], padding = "same"] + -> BatchNormalization: [] + -> LeakyReLU: [] + -> Conv2DTranspose: [64, [5,5], strides = 2, padding = 'same'] + -> BatchNormalization: [] + -> LeakyReLU: [] + -> Conv2D: [1, [5,5], padding = 'same'] + -> Activation: ['tanh'] + -> g; + + +architecture name:discriminator: + input: + img ~ image: [shape = [28, 28], channels = 1]; + output: + d ~ flat: [1]; + + img -> Conv2D: [64, [5,5], padding = "same"] + -> LeakyReLU: [] + -> Conv2D: [128, [5,5], kernel_initializer = 'he_normal', strides = 2] + -> LeakyReLU: [] + -> Conv2D: [128, [5,5], kernel_initializer = 'he_normal', strides = 2] + -> LeakyReLU: [] + -> Flatten:[] + -> Dense: [1024, kernel_initializer = 'he_normal'] + -> LeakyReLU: [] + -> Dense: [1, kernel_initializer = 'he_normal'] + -> d; + + +train: + compile: optimizer = Adam: [0.0001, beta_1 = 0.5, beta_2 = 0.9]; + run: + epochs = 2; \ No newline at end of file diff --git a/GANs/wganGP/MNIST/mnist_wganGP_auto.nml b/GANs/wganGP/MNIST/mnist_wganGP_auto.nml new file mode 100644 index 0000000..478ab60 --- /dev/null +++ b/GANs/wganGP/MNIST/mnist_wganGP_auto.nml @@ -0,0 +1,39 @@ +oracle("mode") = "WGAN_GP" + +source: + bind = "/DM-Dash/NeoPulse_Examples/GANs/wganGP/MNIST/training_data.csv"; + input: + x ~ from "image" + -> image: [shape = [28, 28], channels = 1] + -> ImageDataGenerator: [], + z ~ from "noise" + -> flat: [100] + -> FlatDataGenerator: []; + params: + batch_size = 128, + shuffle = True, + shuffle_init = True; + +architecture name:generator : + input: + z ~ flat: [100]; + output: + g ~ image: [shape = [28,28], channels = 1]; + + + z -> auto -> g; + + +architecture name:discriminator: + input: + img ~ image: [shape = [28, 28], channels = 1]; + output: + d ~ flat: [1]; + + img -> auto -> d; + + +train: + compile: optimizer = auto; + run: + epochs = 2; \ No newline at end of file diff --git a/README.md b/README.md index dbc58be..7c309e4 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,19 @@ -# Introduction +# NeoPulse 3.0 Examples + +## Introduction This is a repository for [NeoPulse™ AI Studio](https://aws.amazon.com/marketplace/pp/B074NDG36S/ref=vdr_rf) sample training files. -There are training examples for the following data and task types: Classification (Image, Audio, Video) and Regression (Text, Vector, Image, Audio, Video). Examples are added periodically. You will find a classification example using text data in the /DM-Dash/examples/sentiment/ folder on your NeoPulse AI Studio instance. A brief tutorial is also available in the Tutorials & Guides section of the Developer Portal (see link below). +There are training examples for the following data and task types: Classification (Image, Audio, Video) and Regression (Text, Vector, Image, Audio, Video). Examples are added periodically. A brief tutorial is also available in the Tutorials & Guides section of the Developer Portal (see link below). Under each data type folder you will find sample NeoPulse™ Modeling Language (NML) code as well as information on the sample dataset used. -# Tutorial Videos and Guides +## Tutorial Videos and Guides Tutorial videos are available in the *Tutorials & Guides* section of the [DimensionalMechanics™ Developer Portal](https://dimensionalmechanics.com/ai-neopulse-tutorials). -# Tutorial Files +## Tutorial Files The NeoPulse™ Modeling Language (NML) scripts can be used to train text, vector, image, audio, and video classification and regression data in NeoPulse™ AI Studio. Each file demonstrates a different level of direct hints in building a model architecture (more details on [direct hints](https://docs.neopulse.ai/NML-Oracle-direct/)): -# License +## License Tutorial materials are published under the MIT license. See license for commercial, academic, and personal use. You are welcome to modify these tutorial files. If citing please link to this repository. diff --git a/Regression/Vector/.DS_Store b/Regression/Vector/.DS_Store new file mode 100644 index 0000000..a246a96 Binary files /dev/null and b/Regression/Vector/.DS_Store differ diff --git a/SageMaker/Sentiment/README.md b/SageMaker/SAGEMAKER_README.md similarity index 100% rename from SageMaker/Sentiment/README.md rename to SageMaker/SAGEMAKER_README.md diff --git a/SageMaker/Sentiment/build_csv.py b/SageMaker/Sentiment/build_csv.py deleted file mode 100644 index f9574c5..0000000 --- a/SageMaker/Sentiment/build_csv.py +++ /dev/null @@ -1,52 +0,0 @@ -import shutil -import tarfile -from pathlib import Path - -import pandas as pd -import requests -from natsort import humansorted -from sklearn.datasets import load_files - - -def download_data(): - ''' - Check if raw IMDB data is present. If not, download data from the official site. - ''' - Path('raw_data').mkdir(parents=True, exist_ok=True) - - URL = 'http://ai.stanford.edu/~amaas/data/sentiment/' - file_list = ['aclImdb_v1.tar.gz'] - for f in file_list: - if not Path('raw_data/' + f).is_file(): - r = requests.get(URL + f, stream=True) - with open('raw_data/' + f, 'wb') as f_z: - shutil.copyfileobj(r.raw, f_z) - - tarfile.open('raw_data/' + f).extractall() - - -def write_data(dir, save_as): - ''' - Write a csv file containing the text and labels. - ''' - df = pd.DataFrame() - shutil.move('aclImdb/train/unsup', '.') - for d in humansorted([str(p) for p in Path(dir).iterdir() if p.is_dir()], reverse=True): - print(d) - data = load_files(d) - pd_form = {"Review": data.data, "Label": data.target} - df = df.append(pd.DataFrame(pd_form)) - shutil.move('unsup', 'aclImdb/train') - df.to_csv(save_as, index=False) - - -def load_query(direc, save_as): - data = load_files(direc) - pd_form = {"Review": data.data} - pd.DataFrame(pd_form).loc[1:5, :].to_csv(save_as, index=False) - -if __name__ == "__main__": - - download_data() - - write_data('aclImdb', 'training_data.csv') diff --git a/SageMaker/Sentiment/sentiment_call_auto.nml b/SageMaker/Sentiment/sentiment_call_auto.nml deleted file mode 100644 index cf9e50f..0000000 --- a/SageMaker/Sentiment/sentiment_call_auto.nml +++ /dev/null @@ -1,39 +0,0 @@ -oracle("generated") = 1 -oracle("complexity") = 0.1 -oracle("regularization") = 0.99 - -source: - bind = "training_data.csv" ; - input: - x ~ from "Review" - -> text: [200] - -> TextDataGenerator: [nb_words=20000] ; - output: - y ~ from "Label" - -> flat: [2] - -> FlatDataGenerator: [] ; - params: - validation_split = 0.5, - batch_size = 1250 ; - -architecture: - input: x ~ text: [200] ; - output: y ~ flat: [2] ; - - x -> Embedding: [20000, 128] - -> Dropout: auto - -> Conv1D: auto - -> MaxPooling1D: [pool_size=4] - -> LSTM: [128] - -> Dense: [2, activation='softmax'] - -> y ; - -train: - compile: - optimizer = 'rmsprop', - loss = 'categorical_crossentropy', - metrics = ['accuracy'] ; - run: - epochs = 4 ; - dashboard: - save_on = 'val_acc' ; diff --git a/SageMaker/Sentiment/sentiment_dist_auto.nml b/SageMaker/Sentiment/sentiment_dist_auto.nml deleted file mode 100644 index 409da53..0000000 --- a/SageMaker/Sentiment/sentiment_dist_auto.nml +++ /dev/null @@ -1,38 +0,0 @@ -oracle("generated") = 4 -oracle("complexity") = 0.1 -oracle("regularization") = 0.99 - -source: - bind = "training_data.csv" ; - input: - x ~ from "Review" - -> text: [200] - -> TextDataGenerator: [nb_words=20000] ; - output: - y ~ from "Label" - -> flat: [2] - -> FlatDataGenerator: [] ; - params: - validation_split = 0.5, - batch_size = 1250 ; - -architecture: - input: x ~ text: [200] ; - output: y ~ flat: [2] ; - - x -> Embedding: [20000, 128] - -> Dropout: [auto(dist="uniform", low=0.25, high=0.75, cast="float" | count=4, name="Drop")] - -> Convolution1D: [64, 4] - -> MaxPooling1D: [pool_size=4] - -> LSTM: [128] - -> Dense: [2, activation = 'softmax'] -> y ; - -train: - compile: - optimizer = 'rmsprop', - loss = 'categorical_crossentropy', - metrics = ['accuracy'] ; - run: - epochs = 4 ; - dashboard: - save_on = 'val_acc' ; diff --git a/SageMaker/Sentiment/sentiment_full_auto.nml b/SageMaker/Sentiment/sentiment_full_auto.nml deleted file mode 100644 index cc6ba29..0000000 --- a/SageMaker/Sentiment/sentiment_full_auto.nml +++ /dev/null @@ -1,32 +0,0 @@ -oracle("mode") = "classification" -oracle("complexity") = 0.1 - -source: - bind = "training_data.csv" ; - input: - x ~ from "Review" - -> text: [200] - -> TextDataGenerator: [nb_words=20000] ; - output: - y ~ from "Label" - -> flat: [2] - -> FlatDataGenerator: [] ; - params: - validation_split = 0.5 - batch_size = 1250 ; - -architecture: - input: x ~ text: [200] ; - output: y ~ flat: [2] ; - - x -> auto -> y ; - -train: - compile: - optimizer = auto, - loss = auto, - metrics = ['accuracy'] ; - run: - epochs = 4 ; - dashboard: - save_on = 'val_acc' ; diff --git a/SageMaker/Sentiment/sentiment_multi-GPU.nml b/SageMaker/Sentiment/sentiment_multi-GPU.nml deleted file mode 100644 index 21c0ea9..0000000 --- a/SageMaker/Sentiment/sentiment_multi-GPU.nml +++ /dev/null @@ -1,35 +0,0 @@ -source: - bind = "training_data.csv" ; - input: - x ~ from "Review" - -> text: [200] - -> TextDataGenerator: [nb_words=20000] ; - output: - y ~ from "Label" - -> flat: [2] - -> FlatDataGenerator: [] ; - params: - validation_split = 0.5, - batch_size = 1250; - -architecture: - input: x ~ text: [100] ; - output: y ~ flat: [2] ; - - x -> Embedding: [20000, 128] - -> Dropout: [0.5] - -> Convolution1D: [64, 4] - -> MaxPooling1D: [pool_size=4] - -> LSTM: [128] - -> Dense: [2, activation='softmax'] - -> y ; - -train Ngpu 2: - compile: - optimizer = 'rmsprop', - loss = 'categorical_crossentropy', - metrics = ['accuracy'] ; - run: - epochs = 4 ; - dashboard: - save_on = 'val_acc' ;