diff --git a/build.py b/build.py index 35cdd2a..dcda46c 100644 --- a/build.py +++ b/build.py @@ -1,25 +1,32 @@ +import pandas as pd + def get_categorical_variables(df): - return [] + categorical_data = list(df[['country', 'new_user', 'source', 'converted']]) + return categorical_data def get_numerical_variables(df): - return [] + numerical_data = list(df[['age', 'total_pages_visited']]) + return numerical_data def get_numerical_variables_percentile(df): - pass + return df.describe() def get_categorical_variables_modes(df): - pass - + return df[get_categorical_variables(df)].mode() def get_missing_values_count(df): - pass + return pd.DataFrame(df.isnull().sum()) def plot_histogram_with_numerical_values(df): - pass + plt.hist(df['age'], color='r', bins=50) + plt.hist(df['total_pages_visited'], color='b', bins=50) + plt.xlabel('age') + plt.ylabel('total_pages_visited') + plt.show() def plot_facet_box(df): diff --git a/build.pyc b/build.pyc new file mode 100644 index 0000000..4a68884 Binary files /dev/null and b/build.pyc differ diff --git a/tests/__init__.pyc b/tests/__init__.pyc new file mode 100644 index 0000000..8d432fd Binary files /dev/null and b/tests/__init__.pyc differ diff --git a/tests/test_get_categorical_variables.pyc b/tests/test_get_categorical_variables.pyc new file mode 100644 index 0000000..9d085d7 Binary files /dev/null and b/tests/test_get_categorical_variables.pyc differ