diff --git a/build.py b/build.py index 35cdd2a..63a0c8a 100644 --- a/build.py +++ b/build.py @@ -1,26 +1,56 @@ -def get_categorical_variables(df): - return [] +import numpy as np +import pandas as pd +from scipy.stats import norm +import seaborn as sns +import matplotlib.pyplot as plt +df = pd.read_csv("data/conversion_data.csv") -def get_numerical_variables(df): - return [] +def get_categorical_variables(df): + return df[['country','new_user','source','converted']] +def get_numerical_variables(df): + numerical = df._get_numeric_data().columns + return list(numerical) def get_numerical_variables_percentile(df): - pass - + final = df[get_numerical_variables(df)] + return final.describe() def get_categorical_variables_modes(df): - pass - + modes = get_categorical_variables(df).mode() + return modes def get_missing_values_count(df): - pass + df1 = df.apply(lambda x: sum(x.isnull()), axis=0) + return pd.DataFrame(df1) def plot_histogram_with_numerical_values(df): - pass - + num_cols = get_numerical_variables(df) + plt.figure(figsize=(15,6)) + plt.subplot(221) + plt.title(num_cols[0]) + sns.distplot(df[num_cols[0]], color='yellow', fit=norm, kde=False) + plt.subplot(222) + plt.title(num_cols[1]) + sns.distplot(df[num_cols[1]], color='yellow', fit=norm, kde=False) + plt.subplot(223) + plt.title(num_cols[2]) + sns.distplot(df[num_cols[2]], color='yellow', fit=norm, kde=False) + plt.subplot(224) + plt.title(num_cols[3]) + sns.distplot(df[num_cols[3]], color='yellow', fit=norm, kde=False) + plt.tight_layout() + plt.show() def plot_facet_box(df): - pass + plt.figure(figsize=(10,10)) + plt.subplot(221) + plt.title('Age') + sns.boxplot('converted','age',data=df) + plt.subplot(222) + plt.title('Total Pages Visited') + sns.boxplot('converted','total_pages_visited',data=df) + plt.tight_layout() + plt.show() diff --git a/build.pyc b/build.pyc new file mode 100644 index 0000000..4834f7f Binary files /dev/null and b/build.pyc differ diff --git a/tests/__init__.pyc b/tests/__init__.pyc new file mode 100644 index 0000000..de27e46 Binary files /dev/null and b/tests/__init__.pyc differ diff --git a/tests/test_get_categorical_variables.pyc b/tests/test_get_categorical_variables.pyc new file mode 100644 index 0000000..47c1104 Binary files /dev/null and b/tests/test_get_categorical_variables.pyc differ