diff --git a/.streamlit/config.toml b/.streamlit/config.toml
new file mode 100644
index 00000000..f4d8567e
--- /dev/null
+++ b/.streamlit/config.toml
@@ -0,0 +1,28 @@
+[theme]
+
+# Primary accent for interactive elements
+primaryColor="#d33682"
+
+# Background color for the main content area
+backgroundColor="#002b36"
+
+# Background color for sidebar and most interactive widgets
+secondaryBackgroundColor="#586e75"
+
+# Color used for almost all text
+textColor="#fafafa"
+
+# Font family for all text in the app, except code blocks
+# Accepted values (serif | sans serif | monospace)
+# Default: "sans serif"
+font="sans serif"
+
+
+[deprecation]
+showPyplotGlobalUse = false
+
+
+
+
+
+
diff --git a/base2_app.py b/base2_app.py
new file mode 100644
index 00000000..b83991ec
--- /dev/null
+++ b/base2_app.py
@@ -0,0 +1,169 @@
+"""
+
+ Simple Streamlit webserver application for serving developed classification
+ models.
+
+ Author: Explore Data Science Academy.
+
+ Note:
+ ---------------------------------------------------------------------
+ Please follow the instructions provided within the README.md file
+ located within this directory for guidance on how to use this script
+ correctly.
+ ---------------------------------------------------------------------
+
+ Description: This file is used to launch a minimal streamlit web
+ application. You are expected to extend the functionality of this script
+ as part of your predict project.
+
+ For further help with the Streamlit framework, see:
+
+ https://docs.streamlit.io/en/latest/
+
+"""
+# Streamlit dependencies
+import streamlit as st
+import joblib,os
+import base64
+
+# Data dependencies
+import pandas as pd
+import numpy as np
+import pydeck as pdk
+import matplotlib.pyplot as plt
+import plotly.express as px
+
+page_bg_img = """
+
+"""
+st.markdown(page_bg_img, unsafe_allow_html=True)
+
+
+
+
+# Vectorizer
+news_vectorizer = open("resources/tfidfvect.pkl","rb")
+tweet_cv = joblib.load(news_vectorizer) # loading your vectorizer from the pkl file
+
+# Load your raw data
+raw = pd.read_csv("resources/train.csv")
+
+# The main function where we will build the actual app
+def main():
+ """Tweet Classifier App with Streamlit """
+
+ # Creates a main title and subheader on your page -
+ # these are static across all pages
+ st.title("Tweet Classifer")
+ st.subheader("Climate change tweet classification")
+
+ st.info("This application is all about tweet sentiment analysis of climate change. It is able to classify whether"
+ "or not a person believes in climate change, based on their novel tweet data.")
+ #st.()
+ # You can read a markdown file from supporting resources folder
+ #st.markdown("")
+ if st.checkbox('Show raw data'): # data is hidden if box is unchecked
+ st.write(raw[['sentiment', 'message']]) # will write the df to the page
+
+
+ # Creating sidebar with selection box -
+ # you can create multiple pages this way
+ options = ["Home", "About us", "App tour", "Tweet classifier", "Tweet analysis"]
+ selection = st.sidebar.selectbox("Choose Option", options)
+
+ # Building out the "Tweet Sentitment classification " page
+ if selection == "Tweet classifier":
+ st.info("Prediction with ML Models")
+ st.markdown("You can enter text or upload file")
+ # Creating a text box for user input
+ tweet_text = st.text_area("Enter Text","Type Here")
+
+ # upload a file
+ upload_file = st.file_uploader("Upload file")
+
+ if st.button("Classify"):
+ # Transforming user input with vectorizer
+ vect_text = tweet_cv.transform([tweet_text]).toarray()
+ # Load your .pkl file with the model of your choice + make predictions
+ # Try loading in multiple models to give the user a choice
+ predictor = joblib.load(open(os.path.join("resources/Logistic_regression.pkl"),"rb"))
+ prediction = predictor.predict(vect_text)
+
+
+ # Building out the "Tweet Sentitment analysis " page
+ if selection == "Tweet analysis":
+ st.info("This app analyses sentiments on climate change based on tweet data")
+ #top level filters
+ #message_filter = st.selectbox("Select the message", pd.unique(raw['sentiment']))
+ # dataframe filter
+ #df = raw[raw['sentiment']== message_filter]
+ st.markdown("### Tweet distribution")
+ sentiment = raw['sentiment'].value_counts()
+ sentiment = pd.DataFrame({'Sentiment':sentiment.index, 'Tweets':sentiment.values})
+
+ # create two columns for charts
+ fig_col1, fig_col2 = st.columns(2)
+
+ with fig_col1:
+ fig = fig = px.bar(sentiment, x='Sentiment', y='Tweets', color = 'Tweets', height= 500)
+ st.plotly_chart(fig)
+
+ #
+ with fig_col2:
+ fig = px.pie(sentiment, values= 'Tweets', names= 'Sentiment')
+ st.plotly_chart(fig)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ #with fig_col1:
+ # st.markdown("### First Chart")
+ # fig = px.density_heatmap(
+ # data_frame=df, y="message", x="sentiment"
+ # )
+ # st.write(fig)
+
+
+ # fig2 = px.histogram(data_frame=df, x="sentiment")
+ # st.write(fig2)
+
+
+
+
+
+
+# Required to let Streamlit instantiate our web app.
+if __name__ == '__main__':
+ main()
diff --git a/base_app.py b/base_app.py
index ad0f724a..95b71039 100644
--- a/base_app.py
+++ b/base_app.py
@@ -1,83 +1,574 @@
-"""
-
- Simple Streamlit webserver application for serving developed classification
- models.
-
- Author: Explore Data Science Academy.
-
- Note:
- ---------------------------------------------------------------------
- Please follow the instructions provided within the README.md file
- located within this directory for guidance on how to use this script
- correctly.
- ---------------------------------------------------------------------
-
- Description: This file is used to launch a minimal streamlit web
- application. You are expected to extend the functionality of this script
- as part of your predict project.
-
- For further help with the Streamlit framework, see:
-
- https://docs.streamlit.io/en/latest/
-
-"""
-# Streamlit dependencies
-import streamlit as st
-import joblib,os
-
-# Data dependencies
-import pandas as pd
-
-# Vectorizer
-news_vectorizer = open("resources/tfidfvect.pkl","rb")
-tweet_cv = joblib.load(news_vectorizer) # loading your vectorizer from the pkl file
-
-# Load your raw data
-raw = pd.read_csv("resources/train.csv")
-
-# The main function where we will build the actual app
-def main():
- """Tweet Classifier App with Streamlit """
-
- # Creates a main title and subheader on your page -
- # these are static across all pages
- st.title("Tweet Classifer")
- st.subheader("Climate change tweet classification")
-
- # Creating sidebar with selection box -
- # you can create multiple pages this way
- options = ["Prediction", "Information"]
- selection = st.sidebar.selectbox("Choose Option", options)
-
- # Building out the "Information" page
- if selection == "Information":
- st.info("General Information")
- # You can read a markdown file from supporting resources folder
- st.markdown("Some information here")
-
- st.subheader("Raw Twitter data and label")
- if st.checkbox('Show raw data'): # data is hidden if box is unchecked
- st.write(raw[['sentiment', 'message']]) # will write the df to the page
-
- # Building out the predication page
- if selection == "Prediction":
- st.info("Prediction with ML Models")
- # Creating a text box for user input
- tweet_text = st.text_area("Enter Text","Type Here")
-
- if st.button("Classify"):
- # Transforming user input with vectorizer
- vect_text = tweet_cv.transform([tweet_text]).toarray()
- # Load your .pkl file with the model of your choice + make predictions
- # Try loading in multiple models to give the user a choice
- predictor = joblib.load(open(os.path.join("resources/Logistic_regression.pkl"),"rb"))
- prediction = predictor.predict(vect_text)
-
- # When model has successfully run, will print prediction
- # You can use a dictionary or similar structure to make this output
- # more human interpretable.
- st.success("Text Categorized as: {}".format(prediction))
-
-# Required to let Streamlit instantiate our web app.
-if __name__ == '__main__':
- main()
+
+
+import numpy as np
+import pydeck as pdk
+import matplotlib.pyplot as plt
+import plotly.express as px
+
+page_bg_img = """
+
+"""
+st.markdown(page_bg_img, unsafe_allow_html=True)
+
+
+ st.info("This application is all about tweet sentiment analysis of climate change. It is able to classify whether"
+ "or not a person believes in climate change, based on their novel tweet data.")
+
+
+ # Building out the "Tweet Sentitment analysis " page
+ if selection == "Tweet analysis":
+ st.info("This app analyses sentiments on climate change based on tweet data")
+ #top level filters
+ #message_filter = st.selectbox("Select the message", pd.unique(raw['sentiment']))
+ # dataframe filter
+ #df = raw[raw['sentiment']== message_filter]
+ st.markdown("### Tweet distribution")
+ sentiment = raw['sentiment'].value_counts()
+ sentiment = pd.DataFrame({'Sentiment':sentiment.index, 'Tweets':sentiment.values})
+
+ # create two columns for charts
+ fig_col1, fig_col2 = st.columns(2)
+
+ with fig_col1:
+ fig = fig = px.bar(sentiment, x='Sentiment', y='Tweets', color = 'Tweets', height= 500)
+ st.plotly_chart(fig)
+
+ #
+ with fig_col2:
+ fig = px.pie(sentiment, values= 'Tweets', names= 'Sentiment')
+ st.plotly_chart(fig)
+
+
+
+ Simple Streamlit webserver application for serving developed classification
+ models.
+
+ Author: Explore Data Science Academy.
+
+ Note:
+ ---------------------------------------------------------------------
+ Please follow the instructions provided within the README.md file
+ located within this directory for guidance on how to use this script
+ correctly.
+ ---------------------------------------------------------------------
+
+ Description: This file is used to launch a minimal streamlit web
+ application. You are expected to extend the functionality of this script
+ as part of your predict project.
+
+ For further help with the Streamlit framework, see:
+
+ https://docs.streamlit.io/en/latest/
+
+"""
+# Streamlit dependencies
+import streamlit as st
+import joblib,os
+from streamlit_option_menu import option_menu
+from PIL import Image
+from sklearn.feature_extraction.text import TfidfVectorizer
+
+
+from sklearn.metrics import f1_score
+import model_app
+import base64
+import numpy as np
+
+
+
+
+
+# Data dependencies
+import pandas as pd
+import numpy as np
+import pydeck as pdk
+import matplotlib.pyplot as plt
+import plotly.express as px
+
+page_bg_img = """
+
+"""
+st.markdown(page_bg_img, unsafe_allow_html=True)
+
+# Vectorizer
+
+#news_vectorizer = open("resources/tfidfvect.pkl","rb")
+#tweet_cv = joblib.load(news_vectorizer) # loading your vectorizer from the pkl file
+#rfc_vectorizer = open("resources/rfc_TfidfVectorizer.pkl","rb")
+#tweet_rfc = joblib.load(rfc_vectorizer)
+
+news_vectorizer = open("resources/Count_Vectorizer.pkl","rb")
+tweet_vect = joblib.load(news_vectorizer) # loading your vectorizer from the pkl file
+#upload_file
+
+
+# Load your raw data
+#raw = pd.read_csv("resources/train.csv")
+
+# The main function where we will build the actual appdef random_char(y):
+
+news_vectorizer = open("resources/tfidfvect.pkl","rb")
+tweet_cv = joblib.load(news_vectorizer) # loading your vectorizer from the pkl file
+rfc_vectorizer = open("resources/rfc_TfidfVectorizer.pkl","rb")
+tweet_rfc = joblib.load(rfc_vectorizer)
+
+
+
+
+# Load your raw data
+raw = pd.read_csv("resources/train.csv")
+
+# The main function where we will build the actual app
+
+def main():
+ """Tweet Classifier App with Streamlit """
+
+ # Creates a main title and subheader on your page -
+ # these are static across all pages
+
+ #st.subheader("Climate change tweet classification")
+
+ #st.info("This application is all about tweet sentiment analysis of climate change. It is able to classify whether"
+ # "or not a person believes in climate change, based on their novel tweet data.")
+
+ #st.subheader("Climate change tweet classification")
+
+ # Creating sidebar with selection box -
+ # you can create multiple pages this way
+ #with open("resources/imgs/testing_bck.jpg","rb") as background_img:
+ # encoded_string = base64.b64encode(background_img.read())
+
+ # st.markdown(
+ #f"""
+ #
+ #""",
+ # unsafe_allow_html=True
+ #)
+
+ options = ["Background","About us","Know your file","Text tweet prediction","File tweet classification","Conclusion"]
+ selection = st.sidebar.selectbox("Lets interact", options)
+
+ if selection == "Conclusion":
+ st.success("Some very good news, we successfully deployed the AI-Platform Twitter classification to production today! Big thanks to the team for helping us get this over the line. It was a pleasure to work off of a well written code base, and the requested changes were delivered on time and to spec.")
+ st.info("Thynk Data allows us to improve on all aspects of risk management by providing a single toolkit for data analysis and preparation, modelling, deployment and monitoring. It allows us to use the latest tools and techniques, without sacrificing the transparency, robustness, customisation and efficiency we expect.")
+
+ if selection == "About us":
+ st.subheader("Our Story")
+ st.info("Our CEO, Dr Craig Nyatondo, an internationally published data scientist and at the time computer science lecturer at the University of Stellenbosch, founded ml4africa.com (Machine Learning for Africa) in 2013 after he saw the potential of his field of study to impact communities in South Africa and beyond. One year later Thynk Data with its agile and innovative business model was born as the result of a keen understanding of the predictive analysis needs of governmental stakeholders as well as corporate clients. The integration of theory and praxis lies at the heart of who Thynk Data is. Our data crafters are accomplished engineers, mathematicians and scientists, who are much respected in their respective fields of specialisation.")
+
+ st.subheader("We belive in Purpose before profit")
+ st.write("• Our purpose is to create value by collaboratively crafting elegant, data-driven solutions for significant problems. ")
+ st.write("• To achieve this, we subscribe to the values of Trustworthy Leadership, Collaborative Learning and Creative Craftsmanship..")
+ st.write("• To our clients, Thynk Data promises to be Trendsetters, Academically Excellent, Agile and Adaptable and Deeply Immersed")
+
+
+ st.subheader("Meet the Team")
+ fig_col1, fig_col2,fig_col3,fig_col4,fig_col5 = st.columns(5)
+
+ with fig_col1:
+ image_climate = Image.open(os.path.join("resources/imgs/Craig.jpg"))
+ image_climate = image_climate.resize((300,300))
+ st.image(image_climate, caption='Chief Execitive Officer: Dr Craig Nyatondo ')
+
+ with fig_col2:
+ image_climate = Image.open(os.path.join("resources/imgs/Caitlin.jpg"))
+ image_climate = image_climate.resize((300,300))
+ st.image(image_climate, caption='Chief Infrmation Officer: caitlin Mclaren')
+
+ with fig_col3:
+ image_climate = Image.open(os.path.join("resources/imgs/Karabo.jpg"))
+ image_climate = image_climate.resize((300,300))
+ st.image(image_climate, caption= 'Senior Data Engineer: Karabo Ratona')
+
+ with fig_col4:
+ image_climate = Image.open(os.path.join("resources/imgs/Nomonde.jpg"))
+ image_climate = image_climate.resize((300,300))
+ st.image(image_climate, caption='Senior Interface Developer: Nomonde Mraqisa')
+
+ with fig_col5:
+ image_climate = Image.open(os.path.join("resources/imgs/Mamtie.jpg"))
+ image_climate = image_climate.resize((300,300))
+ st.image(image_climate, caption='Lead full stack Engineer: Mamutele Phosa')
+
+ st.subheader("Recognition for Thynk Data")
+ st.info("Thynk Data is a Amazon Gold Partner and Amazon Independent Software Vendor. In 2019 Thynk Data was named a finalist in the Amazon AI Partner of the Year Awards. In 2016 Thynk Data was named the Most Innovative Business in Afica.")
+
+
+
+ if selection == "Background":
+ st.subheader("Background")
+ st.info("Bio Straw has tasked Thynk Data to create a mechine learning model that is able to classify weather or not a person belives in climate change based on their novel tweet data")
+
+ st.info ("Bio straw like many other comapnies strive to offer products and services that are enviormentally firiendly and sustainable in line with their values and ideals. With this said Bio straw would like to know how people percive cimate change and weather or not they belive it is a real threat. This information would add to their market reserch efforts in gauging how their prducts and services may be recieved.")
+ image_climate = Image.open(os.path.join("resources/imgs/Clmate change.jpg"))
+ image_climate = image_climate.resize((300,300))
+ st.image(image_climate)
+ st.info("Bio Straw has tasked Thynk Data to create a mechine learning model that is able to classify weather or not a person belives in climate change based on their novel tweet data")
+ st.info("Twitter is a Social media platform where people express their opions using tweets (a message) about anything hapennig around the world. sit tight as the team take you through on how you can collect data, process it and extract meaningful information that can be used to make future predictions about curent products and services.")
+ # You can read a markdown file from supporting resources folder
+
+ image_twitter = Image.open(os.path.join("resources/imgs/Twitter_tweet.jpg"))
+ image_twitter = image_twitter.resize((300,450))
+ st.image(image_twitter)
+ # pulling to main
+
+ #st.image(image, caption='Sunrise by the mountains')
+ #st.info("Twitter is a Social media platform where people express their opions using tweets (a message) about anything hapennig around the world. sit tight as the team take you through on how you can collect data, process it and extract meaningful information that can be used to make future predictions about curent products and services.")
+
+ if selection == "Know your file":
+ st.subheader("Let us explore our data")
+ upload_file = model_app.upload_file()
+ if upload_file is not None:
+ raw = pd.read_csv(upload_file )
+
+ if st.checkbox('Display the data in your file'): # data is hidden if box is unchecked
+ st.write(raw) # will write the df to the page
+ if st.checkbox('Display the wordmap of the uploaded file'):
+ testing_wordMap = model_app.word_map(raw)
+
+ # Building out the predication page
+ if selection == "Text tweet prediction":
+ st.subheader("Classifying tweets using models")
+ st.info("In this section we will be classifying tweets using the listed models. The models are listed from best performing to least performing. Keeping in mind that you can select one model at a time")
+ model = st.radio(
+ "Select a model to classifiy your tweet",
+ ('Logistic_regression','Naive_Bayes','Linear_Support_Vector','Random Forest','K Neighbors' ))
+ # Creating a text box for user input
+
+
+ if model == 'Random Forest' :
+ st.success("The random forest is a classification algorithm consisting of many decisions trees. It uses bagging and feature randomness when building each individual tree to try to create an uncorrelated forest of trees whose prediction by committee is more accurate than that of any individual tree")
+ tweet_text = st.text_area("Type a tweet")
+ tweet_text = model_app.cleaning_text(tweet_text)
+ if st.button("Classify"):
+ # Transforming user input with vectorizer
+ st.info(tweet_text)
+ rfc_text = tweet_vect.transform([tweet_text]).toarray()
+ # Load your randomfc_model.pkl file
+ predictor = joblib.load(open(os.path.join("resources/Random_Forest.pkl"),"rb"))
+ prediction = predictor.predict(rfc_text)
+ results = model_app.classify_desc(format(prediction))
+ # When model has successfully run, will print prediction
+ st.success("Your tweet is classified as: {} ".format(results) )
+
+ if model == 'Logistic_regression':
+ st.success("The logisic regression model estimates the probability of an event occurring, such as voted or didn’t vote, based on a given dataset of independent variables. ne of the main advantages of logistic regre is that it is one of the most efficient algorithms ")
+ tweet_text = st.text_area("Type a tweet")
+ tweet_text = model_app.cleaning_text(tweet_text)
+ if st.button("Classify"):
+ # Transforming user input with vectorizer
+ vect_text = tweet_vect.transform([tweet_text]).toarray()
+ # Load your Logistic_regression.pkl file
+ predictor = joblib.load(open(os.path.join("resources/Logistic_regression.pkl"),"rb"))
+ prediction = predictor.predict(vect_text)
+ results = model_app.classify_desc(format(prediction))
+ # When model has successfully run, will print prediction
+ st.success("Your tweet is classified as: {} ".format(results))
+
+ if model == 'K Neighbors' :
+ st.success("The random forest is a classification algorithm consisting of many decisions trees. It uses bagging and feature randomness when building each individual tree to try to create an uncorrelated forest of trees whose prediction by committee is more accurate than that of any individual tree")
+ tweet_text = st.text_area("Type a tweet")
+ tweet_text = model_app.cleaning_text(tweet_text)
+ if st.button("Classify"):
+ # Transforming user input with vectorizer
+ vect_text = tweet_vect.transform([tweet_text]).toarray()
+ # Load your Logistic_regression.pkl file
+ predictor = joblib.load(open(os.path.join("resources/K_Neighbors.pkl"),"rb"))
+ prediction = predictor.predict(vect_text)
+ results = model_app.classify_desc(format(prediction))
+ # When model has successfully run, will print prediction
+ st.success("Your tweet is classified as: {} ".format(results))
+
+ if model == 'Naive_Bayes' :
+ st.success("Naive Bayes classifiers are a collection of classification algorithms based on Bayes’ Theorem. The Bayes’ Theorem finds the probability of an event occurring given the probability of another event that has already occurred.")
+ tweet_text = st.text_area("Type a tweet")
+ tweet_text = model_app.cleaning_text(tweet_text)
+ if st.button("Classify"):
+ # Transforming user input with vectorizer
+ vect_text = tweet_vect.transform([tweet_text]).toarray()
+ # Load your Logistic_regression.pkl file
+ predictor = joblib.load(open(os.path.join("resources/Naive_Bayes.pkl"),"rb"))
+ prediction = predictor.predict(vect_text)
+ results = model_app.classify_desc(format(prediction))
+ # When model has successfully run, will print prediction
+ st.success("Your tweet is classified as: {} ".format(results))
+
+ if model == 'Linear_Support_Vector' :
+ st.success("The objective of Support Vector Machine algorithm is to find a hyperplane in an N-dimensional space that distinctly classifies the data points. The dimension of the hyperplane depends upon the number of features. The supervised machine learning algorithm can be used for both classification and regression.")
+ tweet_text = st.text_area("Type a tweet")
+ tweet_text = model_app.cleaning_text(tweet_text)
+ if st.button("Classify"):
+ # Transforming user input with vectorizer
+ vect_text = tweet_vect.transform([tweet_text]).toarray()
+ # Load your Logistic_regression.pkl file
+ predictor = joblib.load(open(os.path.join("resources/Linear_Support_Vector.pkl"),"rb"))
+ prediction = predictor.predict(vect_text)
+ results = model_app.classify_desc(format(prediction))
+ # When model has successfully run, will print prediction
+ st.success("Your tweet is classified as: {} ".format(results))
+
+
+
+ if selection == "File tweet classification":
+ #code to call the file upload function
+ st.info("Classifying tweets using files")
+ upload_file = model_app.upload_file()
+
+ if upload_file is not None:
+ raw = pd.read_csv(upload_file)
+ model = st.radio("Select a model to classifiy your tweet",
+ ('Random Forest', 'Logistic_regression','K Neighbors', 'Naive_Bayes', 'Linear_Support_Vector'))
+ upload_file['message'] = upload_file['message'].apply(lambda text:model_app.cleaning_text(text))
+ f1_score(y_val,rfc_pred, average ='macro')
+ from sklearn.metrics import f1_score
+
+
+ #if model == 'Random Forest' :
+ # if st.button("Classify"):
+ # process of cleaning the data, then
+
+
+
+
+
+ #st.info("Classifying tweets using models")
+ #if upload_file is None:
+ # upload_file = st.file_uploader("Upload a .csv file that contains tweets",'csv')
+ # if upload_file is not None:
+ # raw = pd.read_csv(upload_file )
+ #elif upload_file is not None:
+ # model = st.radio("Select a model to classifiy your tweet",
+ # ('Random Forest', 'Logistic regression','K Neighbors', 'Naive_Bayes', 'Linear_Support_Vector'))
+ # if model == "Random Forest":
+ # data = st.radio(
+ # "How do you want to load data",
+ # ('Upload tweets samples', 'Type your tweet'))
+#
+# if data == 'Upload tweets samples' :
+# upload_file = st.file_uploader("Upload file")
+
+
+
+ st.subheader("Climate change tweet classification")
+
+
+ # Creating sidebar with selection box -
+ # you can create multiple pages this way
+
+ options = ["About us","Background", "App tour", "Tweet analysis", "Prediction", "Clonclusion"]
+ selection = st.sidebar.selectbox("Page Menu", options)
+
+ # Building out the About us page
+ if selection == "About us":
+ st.info("General Information")
+ # You can read a markdown file from supporting resources folder
+ st.markdown("Some information here")
+ image = Image.open(os.path.join("resources/imgs/twitter_logo.jpg"))
+ st.image(image, caption='Sunrise by the mountains')
+
+ # Building out the Background page
+ if selection == "Background":
+ st.title("ThynkData partners with you to build an industry-leading roadmap for change and innovation.")
+ # You can read a markdown file from supporting resources folder
+ st.info("With our proven process we identify business cases in an engaging and collaborative way." " "
+ "We also assist in quantifying the business value of such potential projects, preventing wasted expenditure.")
+ st.write("This is how we build with you a Data-Driven Enterprise:")
+ image = Image.open(os.path.join("resources/imgs/strategic_planning.jpg"))
+ st.image(image, caption='')
+ st.title("Machine Learning is our profession")
+ st.info("We are experts in the latest Machine Learning and modelling techniques" " "
+ "and are able to apply the best fit to your business problem.")
+ st.write("• ThynkData designs, implements and maintains infrastructure " " "
+ "to run Machine Learning models on enterprise scale.")
+ st.write("• Our production environment ensures auditable data governance, " " "
+ "robust quality testing and the monitoring of model performance.")
+ st.write("• ThynkData assists in the end-to-end implementation " " "
+ "(in the cloud, on premise or in hybrid configurations) of the most " " "
+ "optimal infrastructure for specific industry and business needs.")
+
+ st.info("This is how we develop solutions to your challenges:")
+ st.write("ThynkData Development Process")
+ image = Image.open(os.path.join("resources/imgs/ThynkData_Dev_Process.jpg"))
+ st.image(image, caption='')
+
+
+
+ # Building out the App tour page
+ if selection == "App tour":
+ selected = option_menu(
+ menu_title="Main Menu",
+ options=["About us", "Background", "Twitter analysis", "Prediction", "Conclusion/ Credit"],
+ icons=["people-fill", "book-half", "bar-chart-line-fill", "graph-up"],
+ menu_icon="cast",
+ default_index=0,
+ orientation="horizontal",
+ )
+
+ st.title("Hi There :wave:")
+ st.title("Welcome to our App :smile:")
+ st.write("" * 34)
+
+
+
+
+
+
+
+
+
+
+
+
+###########################################################################################################
+###########################################################################################################
+
+ # Building out the Tweet Sentitment analysis page
+ if selection == "Tweet analysis":
+ st.info("This app analyses sentiments on climate change based on tweet data")
+ if st.checkbox('Show raw data'): # data is hidden if box is unchecked
+ st.write(raw[['sentiment', 'message']]) # will write the df to the page
+ #top level filters
+ #message_filter = st.selectbox("Select the message", pd.unique(raw['sentiment']))
+ # dataframe filter
+ #df = raw[raw['sentiment']== message_filter]
+ st.markdown("### Tweet distribution")
+ sentiment = raw['sentiment'].value_counts()
+ sentiment = pd.DataFrame({'Sentiment':sentiment.index, 'Tweets':sentiment.values})
+
+ # create two columns for charts
+ fig_col1, fig_col2 = st.columns(2)
+
+ with fig_col1:
+ fig = fig = px.bar(sentiment, x='Sentiment', y='Tweets', color = 'Tweets', height= 500)
+ #plt.bar(x_pos, height, color=['black', 'red', 'green', 'blue', 'cyan'])
+ #x_pos = np.arange(len(bars))
+ st.plotly_chart(fig)
+
+ #
+ with fig_col2:
+ fig = px.pie(sentiment, values= 'Tweets', names= 'Sentiment')
+ #fig.plt(np.arrange(0,11), color = 'yellow')
+ st.plotly_chart(fig)
+
+
+
+ # Building out the predication page
+ if selection == "Prediction":
+ st.info("Prediction with ML Models")
+ model = st.radio(
+ "Select a model to classifiy your tweet",
+ ('Random Forest Classifier', 'Logistic_regression'))
+ # Creating a text box for user input
+ # upload a file
+ data = st.radio(
+ "How do you want to load data",
+ ('Upload tweets samples', 'Type your tweet'))
+
+ if data == 'Upload tweets samples' :
+ upload_file = st.file_uploader("Upload file")
+ else:
+ tweet_text = st.text_area("Type a tweet")
+
+ if model == 'Random Forest Classifier':
+ if st.button("Classify"):
+ # Transforming user input with vectorizer
+ if data == 'Upload tweets samples' :
+ rfc_file = tweet_rfc.transform([upload_file]).toarray()
+ else:
+ rfc_text = tweet_rfc.transform([tweet_text]).toarray()
+
+ # Load your randomfc_model.pkl file
+ predictor = joblib.load(open(os.path.join("resources/randomfc_model.pkl"),"rb"))
+ if data == 'Upload tweets samples' :
+ prediction_file = predictor.predict(rfc_file)
+ else:
+ prediction = predictor.predict(rfc_text)
+ # When model has successfully run, will print prediction
+ st.success("Text Categorized as: {}".format(prediction))
+
+ if model == 'Logistic_regression' :
+ if st.button("Classify"):
+ # Transforming user input with vectorizer
+ if data == 'Upload tweets samples' :
+ vect_file = tweet_cv.transform([upload_file]).toarray()
+ else:
+ vect_text = tweet_cv.transform([tweet_text]).toarray()
+ # Load your Logistic_regression.pkl file
+ predictor = joblib.load(open(os.path.join("resources/Logistic_regression.pkl"),"rb"))
+ prediction = predictor.predict(vect_text)
+ # When model has successfully run, will print prediction
+ st.success("Text Categorized as: {}".format(prediction))
+
+ #Building out the predication page
+ if selection == "Random Forest Classifier":
+ st.info("Just a little bit about the random classifyer model")
+
+ image = Image.open(os.path.join("resources/imgs/twitter_logo.jpg"))
+ st.image(image, caption='Sunrise by the mountains')
+ # Creating a text box for user input
+ tweet_text = st.text_area("Enter Text","Type Here")
+
+ if st.button("Classify"):
+ # Transforming user input with vectorizer
+ vect_text = tweet_cv.transform([tweet_text]).toarray()
+ # Load your .pkl file with the model of your choice + make predictions
+ # Try loading in multiple models to give the user a choice
+ predictor = joblib.load(open(os.path.join("resources/Logistic_regression.pkl"),"rb"))
+ predictor = joblib.load(open(os.path.join("resources/randomfc_model.pkl"),"rb"))
+ prediction = predictor.predict(vect_text)
+
+ # When model has successfully run, will print prediction
+ # You can use a dictionary or similar structure to make this output
+ # more human interpretable.
+ st.success("Text Categorized as: {}".format(prediction))
+
+
+
+
+# Required to let Streamlit instantiate our web app.
+if __name__ == '__main__':
+ main()
+
+
diff --git a/model_app.py b/model_app.py
new file mode 100644
index 00000000..b95a377b
--- /dev/null
+++ b/model_app.py
@@ -0,0 +1,133 @@
+"""
+ Simple Streamlit webserver application for serving developed classification
+ models.
+
+ Author: Explore Data Science Academy.
+
+ Note:
+ ---------------------------------------------------------------------
+ Please follow the instructions provided within the README.md file
+ located within this directory for guidance on how to use this script
+ correctly.
+ ---------------------------------------------------------------------
+
+ Description: This file is used to launch a minimal streamlit web
+ application. You are expected to extend the functionality of this script
+ as part of your predict project.
+
+ For further help with the Streamlit framework, see:
+
+ https://docs.streamlit.io/en/latest/
+
+"""
+
+import warnings
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import streamlit as st
+import csv
+import warnings
+warnings.filterwarnings("ignore")
+
+# Libraries for data preparation and model building
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D
+from pandas_profiling import ProfileReport
+import spellchecker
+import autocorrect
+
+import nltk
+from nltk import TreebankWordTokenizer, SnowballStemmer
+from nltk.stem import WordNetLemmatizer
+from nltk.corpus import stopwords
+import re
+import string
+import urllib
+from wordcloud import WordCloud, STOPWORDS
+
+STOPWORDS = set(stopwords.words('english'))
+
+# Reads 'train.csv.csv' file
+def classify_desc (description):
+ if description == '[-1]':
+ return "The tweet does not believe in man-made climate change (Anti)"
+ elif description == '[0]':
+ return "The tweet neither supports nor refutes the belief of man-made climate change (Neutral)"
+ elif description == '[1]':
+ return "The tweet supports the belief of man-made climate change (Pro)"
+ elif description == '[2]':
+ return "The tweet links to factual news about climate change (News)"
+
+def upload_file():
+ upload_file = st.file_uploader("Upload a .csv file that contains tweets",'csv')
+ if upload_file is not None:
+ return(upload_file)
+
+def to_lower(text):
+ text = text.str.lower()
+ return(text)
+
+def remove_url(text):
+ text = re.sub(r"http\S+", "", text)
+ return(text)
+
+def remove_punctuation(text):
+ text = re.sub('[^a-zA-z0-9\s]', '', text)
+ return(text)
+
+def remove_special_char(text):
+ text = ''.join([x for x in text if x not in string.punctuation])
+ return(text)
+
+def remove_digits(text):
+ text = "".join(filter(lambda x: not x.isdigit(), text))
+ return(text)
+
+def remove_stop_words(text):
+ text = " ".join([word for word in str(text).split() if word not in STOPWORDS])
+ return(text)
+
+def cleaning_text(tweet):
+ tweet = tweet.lower() #Change everything to lower case
+ tweet = re.sub(r"http\S+", "", tweet) # remove urls
+ tweet = re.sub('[^a-zA-z0-9\s]', '', tweet) # remove all puncuation
+ tweet = ''.join([x for x in tweet if x not in string.punctuation]) # remove all special characters
+ tweet = "".join(filter(lambda x: not x.isdigit(), tweet)) #remove all digits
+ tweet = "".join(filter(lambda x: not x.isdigit(), tweet)) # remove all stop words
+ return(tweet)
+
+
+
+
+
+def word_map(file):
+ if file is not None:
+ comment_words = ''
+ stopwords = set(STOPWORDS)
+ # iterate through the csv file
+ for val in file.message:
+ # typecaste each val to string
+ val = str(val)
+ # split the value
+ tokens = val.split()
+
+ # Converts each token into lowercase
+ for i in range(len(tokens)):
+ tokens[i] = tokens[i].lower()
+
+ comment_words += " ".join(tokens)+" "
+
+ wordcloud = WordCloud(width = 800, height = 800,
+ background_color ='white',
+ stopwords = stopwords,
+ min_font_size = 10).generate(comment_words)
+
+ # plot the WordCloud image
+ fig = plt.figure(figsize = (8, 8), facecolor = None)
+ fig = plt.imshow(wordcloud)
+ fig = plt.axis("off")
+ fig = plt.tight_layout(pad = 0)
+ st.pyplot(fig)
+
+
diff --git a/resources/Count_Vectorizer.pkl b/resources/Count_Vectorizer.pkl
new file mode 100644
index 00000000..59f272fb
Binary files /dev/null and b/resources/Count_Vectorizer.pkl differ
diff --git a/resources/K_Neighbors.pkl b/resources/K_Neighbors.pkl
new file mode 100644
index 00000000..c8577725
Binary files /dev/null and b/resources/K_Neighbors.pkl differ
diff --git a/resources/Linear_Support_Vector.pkl b/resources/Linear_Support_Vector.pkl
new file mode 100644
index 00000000..4cfaadd3
Binary files /dev/null and b/resources/Linear_Support_Vector.pkl differ
diff --git a/resources/Logistic_regression.pkl b/resources/Logistic_regression.pkl
index f36167d1..c0d64172 100644
Binary files a/resources/Logistic_regression.pkl and b/resources/Logistic_regression.pkl differ
diff --git a/resources/Naive_Bayes.pkl b/resources/Naive_Bayes.pkl
new file mode 100644
index 00000000..4e39c5be
Binary files /dev/null and b/resources/Naive_Bayes.pkl differ
diff --git a/resources/Random_Forest.pkl b/resources/Random_Forest.pkl
new file mode 100644
index 00000000..b50d879c
Binary files /dev/null and b/resources/Random_Forest.pkl differ
diff --git a/resources/imgs/618afd9e80dab90018db1160.webp b/resources/imgs/618afd9e80dab90018db1160.webp
new file mode 100644
index 00000000..a58aceb3
Binary files /dev/null and b/resources/imgs/618afd9e80dab90018db1160.webp differ
diff --git a/resources/imgs/Blue Modern Technology Company Logo (1).png b/resources/imgs/Blue Modern Technology Company Logo (1).png
new file mode 100644
index 00000000..739959f6
Binary files /dev/null and b/resources/imgs/Blue Modern Technology Company Logo (1).png differ
diff --git a/resources/imgs/Blue_Modern_Technology_Company_Logo__1_-removebg-preview.png b/resources/imgs/Blue_Modern_Technology_Company_Logo__1_-removebg-preview.png
new file mode 100644
index 00000000..41ff7fd7
Binary files /dev/null and b/resources/imgs/Blue_Modern_Technology_Company_Logo__1_-removebg-preview.png differ
diff --git a/resources/imgs/Caitlin.jpg b/resources/imgs/Caitlin.jpg
new file mode 100644
index 00000000..ec8e3073
Binary files /dev/null and b/resources/imgs/Caitlin.jpg differ
diff --git a/resources/imgs/Clmate change.JPG b/resources/imgs/Clmate change.JPG
new file mode 100644
index 00000000..e915b162
Binary files /dev/null and b/resources/imgs/Clmate change.JPG differ
diff --git a/resources/imgs/Craig.jpg b/resources/imgs/Craig.jpg
new file mode 100644
index 00000000..896a896f
Binary files /dev/null and b/resources/imgs/Craig.jpg differ
diff --git a/resources/imgs/Karabo.jpg b/resources/imgs/Karabo.jpg
new file mode 100644
index 00000000..dc57cf64
Binary files /dev/null and b/resources/imgs/Karabo.jpg differ
diff --git a/resources/imgs/Mamtie.jpg b/resources/imgs/Mamtie.jpg
new file mode 100644
index 00000000..df3519b4
Binary files /dev/null and b/resources/imgs/Mamtie.jpg differ
diff --git a/resources/imgs/Nomonde.jpg b/resources/imgs/Nomonde.jpg
new file mode 100644
index 00000000..02734343
Binary files /dev/null and b/resources/imgs/Nomonde.jpg differ
diff --git a/resources/imgs/Strategic_planning.jpg b/resources/imgs/Strategic_planning.jpg
new file mode 100644
index 00000000..21d7a48d
Binary files /dev/null and b/resources/imgs/Strategic_planning.jpg differ
diff --git a/resources/imgs/ThynkData_Dev_Process.jpg b/resources/imgs/ThynkData_Dev_Process.jpg
new file mode 100644
index 00000000..fa46668e
Binary files /dev/null and b/resources/imgs/ThynkData_Dev_Process.jpg differ
diff --git a/resources/imgs/Twitter_tweet.JPG b/resources/imgs/Twitter_tweet.JPG
new file mode 100644
index 00000000..eb46d0ae
Binary files /dev/null and b/resources/imgs/Twitter_tweet.JPG differ
diff --git a/resources/imgs/Twitter_two.jpg b/resources/imgs/Twitter_two.jpg
new file mode 100644
index 00000000..a58aceb3
Binary files /dev/null and b/resources/imgs/Twitter_two.jpg differ
diff --git a/resources/imgs/testing_bck.jpg b/resources/imgs/testing_bck.jpg
new file mode 100644
index 00000000..826c4f60
Binary files /dev/null and b/resources/imgs/testing_bck.jpg differ
diff --git a/resources/imgs/twitter_img.png b/resources/imgs/twitter_img.png
new file mode 100644
index 00000000..ec84b19a
Binary files /dev/null and b/resources/imgs/twitter_img.png differ
diff --git a/resources/randomfc_model.pkl b/resources/randomfc_model.pkl
new file mode 100644
index 00000000..2d966436
Binary files /dev/null and b/resources/randomfc_model.pkl differ
diff --git a/resources/rfc_TfidfVectorizer.pkl b/resources/rfc_TfidfVectorizer.pkl
new file mode 100644
index 00000000..1d679c2c
Binary files /dev/null and b/resources/rfc_TfidfVectorizer.pkl differ
diff --git a/resources/rfc_model.pkl b/resources/rfc_model.pkl
new file mode 100644
index 00000000..7f9220ad
Binary files /dev/null and b/resources/rfc_model.pkl differ
diff --git a/resources/tfidfvect.pkl b/resources/tfidfvect.pkl
index 72c33bcd..d080a21b 100644
Binary files a/resources/tfidfvect.pkl and b/resources/tfidfvect.pkl differ