Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Projects/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ Now is the time to do a project to validate your learning. Follow the following
Build a web app to recognise objects from images using tensorflow and flask API.

## Submissions
1. Dr.Strange (embed github url)
1. Jeffin G Benny (https://github.com/JeffinGBenny/AIBootcampFinalProject)

328 changes: 328 additions & 0 deletions Tasks/Day3Preprocessing.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,328 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Preprocessing.ipynb",
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "JgbNwCHQJSTX",
"outputId": "fc27f332-f877-4b8c-8eb5-b248df4b37c7"
},
"source": [
"pip install tensorflow_text"
],
"execution_count": 12,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting tensorflow_text\n",
" Using cached tensorflow_text-2.6.0-cp37-cp37m-manylinux1_x86_64.whl (4.4 MB)\n",
"Requirement already satisfied: tensorflow-hub>=0.8.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow_text) (0.12.0)\n",
"Requirement already satisfied: tensorflow<2.7,>=2.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow_text) (2.6.0)\n",
"Requirement already satisfied: flatbuffers~=1.12.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.7,>=2.6.0->tensorflow_text) (1.12)\n",
"Requirement already satisfied: h5py~=3.1.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.7,>=2.6.0->tensorflow_text) (3.1.0)\n",
"Requirement already satisfied: termcolor~=1.1.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.7,>=2.6.0->tensorflow_text) (1.1.0)\n",
"Requirement already satisfied: absl-py~=0.10 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.7,>=2.6.0->tensorflow_text) (0.12.0)\n",
"Requirement already satisfied: opt-einsum~=3.3.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.7,>=2.6.0->tensorflow_text) (3.3.0)\n",
"Requirement already satisfied: wheel~=0.35 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.7,>=2.6.0->tensorflow_text) (0.37.0)\n",
"Requirement already satisfied: keras~=2.6 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.7,>=2.6.0->tensorflow_text) (2.6.0)\n",
"Requirement already satisfied: clang~=5.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.7,>=2.6.0->tensorflow_text) (5.0)\n",
"Requirement already satisfied: numpy~=1.19.2 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.7,>=2.6.0->tensorflow_text) (1.19.5)\n",
"Requirement already satisfied: keras-preprocessing~=1.1.2 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.7,>=2.6.0->tensorflow_text) (1.1.2)\n",
"Requirement already satisfied: tensorboard~=2.6 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.7,>=2.6.0->tensorflow_text) (2.6.0)\n",
"Requirement already satisfied: google-pasta~=0.2 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.7,>=2.6.0->tensorflow_text) (0.2.0)\n",
"Requirement already satisfied: gast==0.4.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.7,>=2.6.0->tensorflow_text) (0.4.0)\n",
"Requirement already satisfied: astunparse~=1.6.3 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.7,>=2.6.0->tensorflow_text) (1.6.3)\n",
"Requirement already satisfied: grpcio<2.0,>=1.37.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.7,>=2.6.0->tensorflow_text) (1.39.0)\n",
"Requirement already satisfied: typing-extensions~=3.7.4 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.7,>=2.6.0->tensorflow_text) (3.7.4.3)\n",
"Requirement already satisfied: protobuf>=3.9.2 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.7,>=2.6.0->tensorflow_text) (3.17.3)\n",
"Requirement already satisfied: wrapt~=1.12.1 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.7,>=2.6.0->tensorflow_text) (1.12.1)\n",
"Requirement already satisfied: six~=1.15.0 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.7,>=2.6.0->tensorflow_text) (1.15.0)\n",
"Requirement already satisfied: tensorflow-estimator~=2.6 in /usr/local/lib/python3.7/dist-packages (from tensorflow<2.7,>=2.6.0->tensorflow_text) (2.6.0)\n",
"Requirement already satisfied: cached-property in /usr/local/lib/python3.7/dist-packages (from h5py~=3.1.0->tensorflow<2.7,>=2.6.0->tensorflow_text) (1.5.2)\n",
"Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow<2.7,>=2.6.0->tensorflow_text) (2.23.0)\n",
"Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow<2.7,>=2.6.0->tensorflow_text) (1.0.1)\n",
"Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow<2.7,>=2.6.0->tensorflow_text) (3.3.4)\n",
"Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow<2.7,>=2.6.0->tensorflow_text) (57.4.0)\n",
"Requirement already satisfied: tensorboard-data-server<0.7.0,>=0.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow<2.7,>=2.6.0->tensorflow_text) (0.6.1)\n",
"Requirement already satisfied: google-auth<2,>=1.6.3 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow<2.7,>=2.6.0->tensorflow_text) (1.34.0)\n",
"Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow<2.7,>=2.6.0->tensorflow_text) (0.4.5)\n",
"Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard~=2.6->tensorflow<2.7,>=2.6.0->tensorflow_text) (1.8.0)\n",
"Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard~=2.6->tensorflow<2.7,>=2.6.0->tensorflow_text) (0.2.8)\n",
"Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard~=2.6->tensorflow<2.7,>=2.6.0->tensorflow_text) (4.2.2)\n",
"Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.7/dist-packages (from google-auth<2,>=1.6.3->tensorboard~=2.6->tensorflow<2.7,>=2.6.0->tensorflow_text) (4.7.2)\n",
"Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard~=2.6->tensorflow<2.7,>=2.6.0->tensorflow_text) (1.3.0)\n",
"Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from markdown>=2.6.8->tensorboard~=2.6->tensorflow<2.7,>=2.6.0->tensorflow_text) (4.6.4)\n",
"Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.7/dist-packages (from pyasn1-modules>=0.2.1->google-auth<2,>=1.6.3->tensorboard~=2.6->tensorflow<2.7,>=2.6.0->tensorflow_text) (0.4.8)\n",
"Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3,>=2.21.0->tensorboard~=2.6->tensorflow<2.7,>=2.6.0->tensorflow_text) (3.0.4)\n",
"Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3,>=2.21.0->tensorboard~=2.6->tensorflow<2.7,>=2.6.0->tensorflow_text) (2.10)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3,>=2.21.0->tensorboard~=2.6->tensorflow<2.7,>=2.6.0->tensorflow_text) (2021.5.30)\n",
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3,>=2.21.0->tensorboard~=2.6->tensorflow<2.7,>=2.6.0->tensorflow_text) (1.24.3)\n",
"Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard~=2.6->tensorflow<2.7,>=2.6.0->tensorflow_text) (3.1.1)\n",
"Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->markdown>=2.6.8->tensorboard~=2.6->tensorflow<2.7,>=2.6.0->tensorflow_text) (3.5.0)\n",
"Installing collected packages: tensorflow-text\n",
"Successfully installed tensorflow-text-2.6.0\n"
]
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "dfcqqXF_FI1t"
},
"source": [
"import collections\n",
"import pathlib\n",
"import re\n",
"import string\n",
"\n",
"import tensorflow as tf\n",
"\n",
"from tensorflow.keras import layers\n",
"from tensorflow.keras import losses\n",
"from tensorflow.keras import preprocessing\n",
"from tensorflow.keras import utils\n",
"from tensorflow.keras.layers.experimental.preprocessing import TextVectorization\n",
"\n",
"import tensorflow_datasets as tfds\n",
"#import tensorflow_text as tf_text"
],
"execution_count": 3,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "DtZbjz6JJ5DP"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gfW6xuKaArwC",
"outputId": "cbd4ca66-404b-4284-c420-b0dc9ea9d0bd"
},
"source": [
"#import tensorflow as tf\n",
"#import pathlib \n",
"from tensorflow.keras import utils\n",
"DIRECTORY_URL = 'https://storage.googleapis.com/download.tensorflow.org/data/illiad/'\n",
"FILE_NAMES = ['cowper.txt', 'derby.txt', 'butler.txt']\n",
"\n",
"for name in FILE_NAMES:\n",
" text_dir = utils.get_file(name, origin=DIRECTORY_URL + name)\n",
"\n",
"parent_dir = pathlib.Path(text_dir).parent\n",
"list(parent_dir.iterdir())"
],
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/cowper.txt\n",
"819200/815980 [==============================] - 0s 0us/step\n",
"827392/815980 [==============================] - 0s 0us/step\n",
"Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/derby.txt\n",
"811008/809730 [==============================] - 0s 0us/step\n",
"819200/809730 [==============================] - 0s 0us/step\n",
"Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/butler.txt\n",
"811008/807992 [==============================] - 0s 0us/step\n",
"819200/807992 [==============================] - 0s 0us/step\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"[PosixPath('/root/.keras/datasets/butler.txt'),\n",
" PosixPath('/root/.keras/datasets/cowper.txt'),\n",
" PosixPath('/root/.keras/datasets/derby.txt')]"
]
},
"metadata": {},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "LNp2pbRsKAm_"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "j_2ObQ94C1dQ"
},
"source": [
"def labeler(example, index):\n",
" return example, tf.cast(index, tf.int64)"
],
"execution_count": 5,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "T19zCadtKHMf"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "UmqqekPtC8_a"
},
"source": [
"labeled_data_sets = []\n",
"\n",
"for i, file_name in enumerate(FILE_NAMES):\n",
" lines_dataset = tf.data.TextLineDataset(str(parent_dir/file_name))\n",
" labeled_dataset = lines_dataset.map(lambda ex: labeler(ex, i))\n",
" labeled_data_sets.append(labeled_dataset)"
],
"execution_count": 6,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "5Mdsm9luKMfd"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "iZhPKY_IDlH2"
},
"source": [
"BUFFER_SIZE = 50000\n",
"BATCH_SIZE = 64\n",
"VALIDATION_SIZE = 5000"
],
"execution_count": 7,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "--rRGLrIKRqS"
},
"source": [
""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "GgP8zzwFDpPV"
},
"source": [
"all_labeled_data = labeled_data_sets[0]\n",
"for labeled_dataset in labeled_data_sets[1:]:\n",
" all_labeled_data = all_labeled_data.concatenate(labeled_dataset)\n",
"\n",
"all_labeled_data = all_labeled_data.shuffle(\n",
" BUFFER_SIZE, reshuffle_each_iteration=False)"
],
"execution_count": 8,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "MJ9zYYRwKgg-"
},
"source": [
"import tensorflow_text as tf_text\n",
"tokenizer = tf_text.UnicodeScriptTokenizer()"
],
"execution_count": 14,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "jGtnnoMdLF2k"
},
"source": [
"def tokenize(text, unused_label):\n",
" lower_case = tf_text.case_fold_utf8(text)\n",
" return tokenizer.tokenize(lower_case)"
],
"execution_count": 15,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1EBEb-8cLRFw",
"outputId": "c3b5819d-6f95-4dc6-d8e7-35dc0f261c57"
},
"source": [
"tokenized_ds = all_labeled_data.map(tokenize)"
],
"execution_count": 16,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/util/dispatch.py:206: batch_gather (from tensorflow.python.ops.array_ops) is deprecated and will be removed after 2017-10-25.\n",
"Instructions for updating:\n",
"`tf.batch_gather` is deprecated, please use `tf.gather` with `batch_dims=-1` instead.\n"
]
}
]
}
]
}
Loading