diff --git a/FeatureExtraction.py b/FeatureExtraction.py deleted file mode 100644 index c27d629..0000000 --- a/FeatureExtraction.py +++ /dev/null @@ -1,79 +0,0 @@ -import pandas as pd -import numpy as np -from dataCleaning import read_run, column_clean, preprocessing -import pdb - -def overall_cleaning(): - df_p3_exo = read_run("P3_Exo_1_0.csv") # second run, male - df_p3_noexo = read_run("P3_NoExo_1_0.csv") # first run, male - df_p4_exo = read_run("P4_Exo_1_0.csv") # 1st run female - df_p4_noexo = read_run("P4_NoExo_1_0.csv") # 2nd female - - df_p3_exo = column_clean(df_p3_exo, run_num = 2, gender = 'male') - df_p3_noexo = column_clean(df_p3_noexo, run_num = 1, gender = 'male') - df_p4_exo = column_clean(df_p4_exo, run_num = 2, gender = 'female') - df_p4_noexo = column_clean(df_p4_noexo, run_num = 1, gender = 'female') - combined_df = pd.concat([df_p3_exo, df_p3_noexo, df_p4_exo, df_p4_noexo], ignore_index=True) - dfs = [df_p3_exo, df_p3_noexo, df_p4_exo, df_p4_noexo] #jack's list for the data cleaning he does later. - # # Show the head of the data - # df_p3_exo.describe() - df_p3_noexo.head() - # df_p4_exo.head() - # df_p4_noexo.head() - # # Choose inputs - # features = df_p3_exo[['EMG 1 (mV)', 'ACC X (G)', 'ACC Y (G)', 'ACC Z (G)', 'GYRO X (deg/s)', 'GYRO Y (deg/s)', 'GYRO Z (deg/s)']].dropna() - # features.head() - feature_sets = [] - # Run functions to extract features for each dataframe - #CP: does this make sure to remove the redundant time series columns? - #can keep ACC X Time Series (s) in each sensor group, and remove any other column with 'Time Series (s)' in its name - for df in dfs: - emg_features = compute_emg_features(df['EMG 1 (mV)']) - accel_features = compute_accel_features(df['ACC X (G)'], df['ACC Y (G)'], df['ACC Z (G)']) - gyro_features = compute_gyro_features(df['GYRO X (deg/s)'], df['GYRO Y (deg/s)'], df['GYRO Z (deg/s)']) - features = { - 'emg': emg_features, - 'accel': accel_features, - 'gyro': gyro_features - } - feature_sets.append(features) - - # feature_sets now contains extracted features for each df - p3exo_feats, p3noexo_feats, p4exo_feats, p4noexo_feats = feature_sets - return p3exo_feats, p3noexo_feats, p4exo_feats, p4noexo_feats - -# Calculations for Feature Extraction from Project_Guide -def compute_emg_features(signal): - return { - 'mean': np.mean(signal), - 'max': np.max(signal), - 'min': np.min(signal), - 'std': np.std(signal), - 'rms': np.sqrt(np.mean(signal**2)) - } - -def compute_accel_features(a_x, a_y, a_z): - a_mag = np.sqrt(a_x**2 + a_y**2 + a_z**2) - - features = { - 'peak_accel': np.max(a_mag), - 'mean_accel': np.mean(a_mag), - 'total_accel': np.sqrt(np.mean(a_x**2) + np.mean(a_y**2) + np.mean(a_z**2)), - 'accel_range': np.max(a_mag) - np.min(a_mag) - } - return features - -def compute_gyro_features(w_x, w_y, w_z): - w_mag = np.sqrt(w_x**2 + w_y**2 + w_z**2) - - features = { - 'peak_angular_vel': np.max(w_mag), - 'mean_angular_vel': np.mean(w_mag), - 'total_angular_vel': np.sqrt(np.mean(w_x**2) + np.mean(w_y**2) + np.mean(w_z**2)), - 'angular_vel_range': np.max(w_mag) - np.min(w_mag) - } - return features - - -if __name__ == '__main__': - p3exo_feats, p3noexo_feats, p4exo_feats, p4noexo_feats = overall_cleaning() \ No newline at end of file diff --git a/dataCleaning.py b/dataCleaning.py index 087d7e8..4ae484f 100644 --- a/dataCleaning.py +++ b/dataCleaning.py @@ -1,8 +1,17 @@ import pandas as pd import numpy as np -import pdb +import pdb +from sklearn.model_selection import train_test_split +from modif_cols import tidy_emg_imu_as_measured +from resampling import upsample, downsample + +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import MinMaxScaler, StandardScaler +from sklearn.impute import SimpleImputer +from sklearn.preprocessing import OneHotEncoder +from sklearn.compose import ColumnTransformer # Data Labels: # Label for EMG Data shared: # Open CSV files to check what they look like. Use skiprows=5 and low_memory=False to load it properly (the top 5 rows are metadata) @@ -27,13 +36,13 @@ def read_run(filename, skiprows=7): #skip the first 7 rows (freq/cycle time fiel usecols = usecols, on_bad_lines='skip') df.columns = ['RDelt_EMG_TimeSeries', 'RDelt_EMG_MilliVolts', 'RDelt_IMU_Acc X Time Series(s)', 'RDelt_ACC X (G)', 'RDelt_Acc Y Time Series(s)', 'RDelt_ACC Y (G)', 'RDelt_Acc Z Time Series(s)', 'RDelt_ACC Z (G)','RDelt_GyroXTime Series(s)', 'RDelt_GYRO X (deg/s)','RDelt_GyroYTime Series(s)', 'RDelt_GYRO Y (deg/s)', 'RDelt_GyroZTime Series(s)', 'RDelt_GYRO Z (deg/s)', - 'LDelt_TimeSeries', 'LDelt_MilliVolts', 'LDelt_Acc X Time Series(s)', 'LDelt_ACC X (G)', 'LDelt_Acc Y Time Series(s)', 'LDelt_ACC Y (G)', 'LDelt_Acc Z Time Series(s)', 'LDelt_ACC Z (G)','LDelt_GyroXTime Series(s)', 'LDelt_GYRO X (deg/s)','LDelt_GyroYTime Series(s)', 'LDelt_GYRO Y (deg/s)', 'LDelt_GyroZTime Series(s)', 'LDelt_GYRO Z (deg/s)', - 'RBicep_TimeSeries', 'RBicep_MilliVolts', 'RBicep_Acc X Time Series(s)', 'RBicep_ACC X (G)', 'RBicep_Acc Y Time Series(s)', 'RBicep_ACC Y (G)', 'RBicep_Acc Z Time Series(s)', 'RBicep_ACC Z (G)','RBicep_GyroXTime Series(s)', 'RBicep_GYRO X (deg/s)','RBicep_GyroYTime Series(s)', 'RBicep_GYRO Y (deg/s)', 'RBicep_GyroZTime Series(s)', 'RBicep_GYRO Z (deg/s)', - 'LBicep_TimeSeries', 'LBicep_MilliVolts', 'LBicep_Acc X Time Series(s)', 'LBicep_ACC X (G)', 'LBicep_Acc Y Time Series(s)', 'LBicep_ACC Y (G)', 'LBicep_Acc Z Time Series(s)', 'LBicep_ACC Z (G)','LBicep_GyroXTime Series(s)', 'LBicep_GYRO X (deg/s)','LBicep_GyroYTime Series(s)', 'LBicep_GYRO Y (deg/s)', 'LBicep_GyroZTime Series(s)', 'LBicep_GYRO Z (deg/s)' + 'LDelt_TimeSeries', 'LDelt_EMG_MilliVolts', 'LDelt_Acc X Time Series(s)', 'LDelt_ACC X (G)', 'LDelt_Acc Y Time Series(s)', 'LDelt_ACC Y (G)', 'LDelt_Acc Z Time Series(s)', 'LDelt_ACC Z (G)','LDelt_GyroXTime Series(s)', 'LDelt_GYRO X (deg/s)','LDelt_GyroYTime Series(s)', 'LDelt_GYRO Y (deg/s)', 'LDelt_GyroZTime Series(s)', 'LDelt_GYRO Z (deg/s)', + 'RBicep_TimeSeries', 'RBicep_EMG_MilliVolts', 'RBicep_Acc X Time Series(s)', 'RBicep_ACC X (G)', 'RBicep_Acc Y Time Series(s)', 'RBicep_ACC Y (G)', 'RBicep_Acc Z Time Series(s)', 'RBicep_ACC Z (G)','RBicep_GyroXTime Series(s)', 'RBicep_GYRO X (deg/s)','RBicep_GyroYTime Series(s)', 'RBicep_GYRO Y (deg/s)', 'RBicep_GyroZTime Series(s)', 'RBicep_GYRO Z (deg/s)', + 'LBicep_TimeSeries', 'LBicep_EMG_MilliVolts', 'LBicep_Acc X Time Series(s)', 'LBicep_ACC X (G)', 'LBicep_Acc Y Time Series(s)', 'LBicep_ACC Y (G)', 'LBicep_Acc Z Time Series(s)', 'LBicep_ACC Z (G)','LBicep_GyroXTime Series(s)', 'LBicep_GYRO X (deg/s)','LBicep_GyroYTime Series(s)', 'LBicep_GYRO Y (deg/s)', 'LBicep_GyroZTime Series(s)', 'LBicep_GYRO Z (deg/s)' ] - return df + return df #raw data -def column_clean(df, run_num, gender): +def column_clean(df): #remove all time series columns except RDelt_EMG_TimeSeries' and 'RDelt_IMU_Acc X Time Series(s)', so keep time scale for both EMG and IMU extr_time_series = [ 'RDelt_Acc Y Time Series(s)', 'RDelt_Acc Z Time Series(s)', 'RDelt_GyroXTime Series(s)', 'RDelt_GyroYTime Series(s)', 'RDelt_GyroZTime Series(s)', 'LDelt_TimeSeries', 'LDelt_Acc X Time Series(s)', @@ -45,14 +54,65 @@ def column_clean(df, run_num, gender): 'LBicep_GyroYTime Series(s)', 'LBicep_GyroZTime Series(s)'] df = df.drop(extr_time_series, axis = 1) + df = df.rename(columns={'RDelt_EMG_TimeSeries': 'EMG_TimeSeries', 'RDelt_IMU_Acc X Time Series(s)': 'IMU_TimeSeries'}) # measurement_cols = [col for col in df.columns if (('ACC' in col or 'GYRO' in col) and 'Time Series' not in col)] #exclude mV and time cols # df.columns = df.columns.str.strip() # Remove leading/trailing spaces (Yuxuan) # df = df.apply(pd.to_numeric, errors='coerce') # Conver t everything to numeric (Yuxuan) df = df.replace(['', ' ', 'NA', None], np.nan) #stdize missing data - df['gender'] = gender - df['run_num'] = run_num - df.to_csv("test.csv") return df -def preprocessing(full_df): - pass + +#melting and stuff +def create_sensor_col(df, run_num, gender, exo): + df_pivoted = tidy_emg_imu_as_measured(df) + df_pivoted.columns = df_pivoted.columns.str.strip() + df_pivoted = df_pivoted.reset_index() + df_pivoted['gender'] = gender + df_pivoted['run_num'] = run_num + df_pivoted['exo'] = exo + df_pivoted.to_csv("pivoted_df.csv") + return df_pivoted + +def preprocessing_actions(full_df, neural_net=False): + num_attribs = [ + 'EMG_MilliVolts_filtered', + 'ACC X (G)_filtered', + 'ACC Y (G)_filtered', + 'ACC Z (G)_filtered', + 'GYRO X (deg/s)_filtered', + 'GYRO Y (deg/s)_filtered', + 'GYRO Z (deg/s)_filtered', + # Add any other numerical features here + ] + cat_attribs = [ + 'BodyPart', + 'gender' + #exo is the target variable + ] + if neural_net: + num_pipeline = Pipeline([ + ("impute", SimpleImputer(strategy="median")), + ("standardize", MinMaxScaler()), + ]) + else: + num_pipeline = Pipeline([ + ("impute", SimpleImputer(strategy="median")), + ("standardize", StandardScaler()), + ]) + cat_pipeline = Pipeline([ + ("impute", SimpleImputer(strategy="most_frequent")), + ("oneHot", OneHotEncoder()), + ]) + + preprocessing = ColumnTransformer([ + ("num", num_pipeline, num_attribs), + ("cat", cat_pipeline, cat_attribs), + ]) + # Prepare data for modeling + X = full_df[num_attribs + cat_attribs] + y = full_df["exo"] + + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) + X_train_prepared = preprocessing.fit_transform(X_train) + X_test_prepared = preprocessing.transform(X_test) + return X_train_prepared, X_test_prepared, y_train, y_test, preprocessing \ No newline at end of file diff --git a/debug.ipynb b/debug.ipynb new file mode 100644 index 0000000..c6ad330 --- /dev/null +++ b/debug.ipynb @@ -0,0 +1,952 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "e0bebc80-fe7f-4d6c-8387-5c512308e48d", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np \n", + "from dataCleaning import read_run, column_clean, preprocessing, create_sensor_col\n", + "import pdb" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "3bd13a4f-893e-43c3-bc4a-afabc40bbcde", + "metadata": {}, + "outputs": [], + "source": [ + "def read_run(filename, skiprows=7): #skip the first 7 rows (freq/cycle time fields as well as metadata)\n", + " usecols = list(range(0, 56)) \n", + " df = pd.read_csv(filename, low_memory = False, \n", + " header = 0, \n", + " skiprows=skiprows,\n", + " # names=header,\n", + " usecols = usecols,\n", + " on_bad_lines='skip') \n", + " df.columns = ['RDelt_EMG_TimeSeries', 'RDelt_EMG_MilliVolts', 'RDelt_IMU_Acc X Time Series(s)', 'RDelt_ACC X (G)', 'RDelt_Acc Y Time Series(s)', 'RDelt_ACC Y (G)', 'RDelt_Acc Z Time Series(s)', 'RDelt_ACC Z (G)','RDelt_GyroXTime Series(s)', 'RDelt_GYRO X (deg/s)','RDelt_GyroYTime Series(s)', 'RDelt_GYRO Y (deg/s)', 'RDelt_GyroZTime Series(s)', 'RDelt_GYRO Z (deg/s)',\n", + " 'LDelt_TimeSeries', 'LDelt_MilliVolts', 'LDelt_Acc X Time Series(s)', 'LDelt_ACC X (G)', 'LDelt_Acc Y Time Series(s)', 'LDelt_ACC Y (G)', 'LDelt_Acc Z Time Series(s)', 'LDelt_ACC Z (G)','LDelt_GyroXTime Series(s)', 'LDelt_GYRO X (deg/s)','LDelt_GyroYTime Series(s)', 'LDelt_GYRO Y (deg/s)', 'LDelt_GyroZTime Series(s)', 'LDelt_GYRO Z (deg/s)',\n", + " 'RBicep_TimeSeries', 'RBicep_MilliVolts', 'RBicep_Acc X Time Series(s)', 'RBicep_ACC X (G)', 'RBicep_Acc Y Time Series(s)', 'RBicep_ACC Y (G)', 'RBicep_Acc Z Time Series(s)', 'RBicep_ACC Z (G)','RBicep_GyroXTime Series(s)', 'RBicep_GYRO X (deg/s)','RBicep_GyroYTime Series(s)', 'RBicep_GYRO Y (deg/s)', 'RBicep_GyroZTime Series(s)', 'RBicep_GYRO Z (deg/s)',\n", + " 'LBicep_TimeSeries', 'LBicep_MilliVolts', 'LBicep_Acc X Time Series(s)', 'LBicep_ACC X (G)', 'LBicep_Acc Y Time Series(s)', 'LBicep_ACC Y (G)', 'LBicep_Acc Z Time Series(s)', 'LBicep_ACC Z (G)','LBicep_GyroXTime Series(s)', 'LBicep_GYRO X (deg/s)','LBicep_GyroYTime Series(s)', 'LBicep_GYRO Y (deg/s)', 'LBicep_GyroZTime Series(s)', 'LBicep_GYRO Z (deg/s)'\n", + " ]\n", + " return df #raw data " + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "07d4e282-5741-449e-a2a8-71f3b3a6c66d", + "metadata": {}, + "outputs": [], + "source": [ + "def column_clean(df, run_num, gender):\n", + " #remove all time series columns except RDelt_EMG_TimeSeries' and 'RDelt_IMU_Acc X Time Series(s)', so keep time scale for both EMG and IMU \n", + " extr_time_series = [ 'RDelt_Acc Y Time Series(s)', 'RDelt_Acc Z Time Series(s)', 'RDelt_GyroXTime Series(s)',\n", + " 'RDelt_GyroYTime Series(s)', 'RDelt_GyroZTime Series(s)', 'LDelt_TimeSeries', 'LDelt_Acc X Time Series(s)', \n", + " 'LDelt_Acc Y Time Series(s)', 'LDelt_Acc Z Time Series(s)', 'LDelt_GyroXTime Series(s)',\n", + " 'LDelt_GyroYTime Series(s)', 'LDelt_GyroZTime Series(s)', 'RBicep_TimeSeries', 'RBicep_Acc X Time Series(s)',\n", + " 'RBicep_Acc Y Time Series(s)', 'RBicep_Acc Z Time Series(s)', 'RBicep_GyroXTime Series(s)',\n", + " 'RBicep_GyroYTime Series(s)', 'RBicep_GyroZTime Series(s)', 'LBicep_TimeSeries', 'LBicep_Acc X Time Series(s)',\n", + " 'LBicep_Acc Y Time Series(s)', 'LBicep_Acc Z Time Series(s)', 'LBicep_GyroXTime Series(s)', \n", + " 'LBicep_GyroYTime Series(s)', 'LBicep_GyroZTime Series(s)']\n", + " \n", + " df = df.drop(extr_time_series, axis = 1)\n", + " # measurement_cols = [col for col in df.columns if (('ACC' in col or 'GYRO' in col) and 'Time Series' not in col)] #exclude mV and time cols\n", + " # df.columns = df.columns.str.strip() # Remove leading/trailing spaces (Yuxuan)\n", + " # df = df.apply(pd.to_numeric, errors='coerce') # Conver t everything to numeric (Yuxuan)\n", + " df = df.replace(['', ' ', 'NA', None], np.nan) #stdize missing data\n", + " df['gender'] = gender\n", + " df['run_num'] = run_num\n", + " return df " + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "c74acdb7-01e3-443f-92b0-6d2a6e45696c", + "metadata": {}, + "outputs": [], + "source": [ + "def create_sensor_col(df): \n", + " columns_to_keep = ['RDelt_EMG_TimeSeries', 'RDelt_IMU_Acc X Time Series(s)', 'gender', 'run_num']\n", + " pdb.set_trace()\n", + " # Identify all measurement columns, including EMG millivolts\n", + " measurement_columns = [col for col in df.columns if any(prefix in col for prefix in ['RDelt', 'LDelt', 'RBicep', 'LBicep']) and col not in columns_to_keep]\n", + " df_melted = df.melt(id_vars=columns_to_keep, value_vars=measurement_columns, var_name=\"sensor_measurement\", value_name=\"value\")\n", + " \n", + " # Extract the Sensor Body Position\n", + " df_melted[\"Sensor_Body_Position\"] = df_melted[\"sensor_measurement\"].str.extract(r'^(RDelt|LDelt|RBicep|LBicep)')\n", + " # Extract measurement type, including EMG millivolts\n", + " df_melted[\"measurement_type\"] = df_melted[\"sensor_measurement\"].str.extract(r'_(EMG_MilliVolts|MilliVolts|ACC X|ACC Y|ACC Z|GYRO X|GYRO Y|GYRO Z)') # Drop the original sensor_measurement column\n", + " df_melted = df_melted.drop(columns=[\"sensor_measurement\"]) # Pivot the DataFrame so each measurement type becomes a separate column\n", + " df_melted[\"value\"] = df_melted[\"value\"].astype(str).str.strip() #make sure that values are clean if they're strings (no extra space)\n", + " df_melted[\"value\"] = pd.to_numeric(df_melted[\"value\"], errors=\"coerce\") #make sure all values are cast to numeric\n", + " df_melted.fillna(np.nan, inplace=True)\n", + " # Pivot the DataFrame so each measurement type becomes a separate column\n", + " df_pivoted = df_melted.pivot_table(index=['Sensor_Body_Position', 'RDelt_EMG_TimeSeries', 'RDelt_IMU_Acc X Time Series(s)', 'gender', 'run_num'], \n", + " columns='measurement_type', values='value')\n", + " df_pivoted.columns = df_pivoted.columns.get_level_values(0)\n", + " df_pivoted.columns = df_pivoted.columns.str.strip()\n", + " df_pivoted = df_pivoted.reset_index()\n", + " pdb.set_trace()\n", + " df_pivoted.to_csv(\"pivoted_df.csv\")\n", + " return df_pivoted" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "2a446022-09eb-4f86-a77a-980490d55b0e", + "metadata": {}, + "outputs": [], + "source": [ + "def standardize_time_series():\n", + " # interpolate()\n", + " pass\n", + "\n", + "def preprocessing(full_df):\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fc81654-185e-4738-919a-57afa144341e", + "metadata": {}, + "outputs": [], + "source": [ + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "dab168eb-f1dc-4d7e-99fe-3ec1042db065", + "metadata": {}, + "outputs": [], + "source": [ + "# Calculations for Feature Extraction from Project_Guide\n", + "def compute_emg_features(signal):\n", + " return {\n", + " 'mean': np.mean(signal),\n", + " 'max': np.max(signal),\n", + " 'min': np.min(signal),\n", + " 'std': np.std(signal),\n", + " 'rms': np.sqrt(np.mean(signal**2))\n", + " }\n", + "\n", + "def compute_accel_features(a_x, a_y, a_z):\n", + " a_mag = np.sqrt(a_x**2 + a_y**2 + a_z**2)\n", + " \n", + " features = {\n", + " 'peak_accel': np.max(a_mag),\n", + " 'mean_accel': np.mean(a_mag),\n", + " 'total_accel': np.sqrt(np.mean(a_x**2) + np.mean(a_y**2) + np.mean(a_z**2)),\n", + " 'accel_range': np.max(a_mag) - np.min(a_mag)\n", + " }\n", + " return features\n", + "\n", + "def compute_gyro_features(w_x, w_y, w_z):\n", + " w_mag = np.sqrt(w_x**2 + w_y**2 + w_z**2)\n", + " \n", + " features = {\n", + " 'peak_angular_vel': np.max(w_mag),\n", + " 'mean_angular_vel': np.mean(w_mag),\n", + " 'total_angular_vel': np.sqrt(np.mean(w_x**2) + np.mean(w_y**2) + np.mean(w_z**2)),\n", + " 'angular_vel_range': np.max(w_mag) - np.min(w_mag)\n", + " }\n", + " return features " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72ed77bf-dbc1-406c-a5b1-57dcce504502", + "metadata": {}, + "outputs": [], + "source": [ + "P3exo_feats, p3noexo_feats, p4exo_feats, p4noexo_feats = overall_cleaning()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "d8305190-17ac-43fd-b7d9-d9a90e329533", + "metadata": {}, + "outputs": [], + "source": [ + "df_p3_exo = read_run(\"P3_Exo_1_0.csv\") # 2nd run, male" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ab22fd91-5089-4c1e-91e5-f2fedb609f69", + "metadata": {}, + "outputs": [], + "source": [ + "df_p3_noexo = read_run(\"P3_NoExo_1_0.csv\") # first run, male" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "2b503e14-fc38-46c2-9a10-3d7ea8ade855", + "metadata": {}, + "outputs": [], + "source": [ + "df_p4_exo = read_run(\"P4_Exo_1_0.csv\") # 1st run female\n", + "df_p4_noexo = read_run(\"P4_NoExo_1_0.csv\") # 2nd female" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "ad941e26-6be4-4ae5-8d57-f148509e1675", + "metadata": {}, + "outputs": [], + "source": [ + "df_p3_exo = column_clean(df_p3_exo, run_num = 2, gender = 'male')\n", + "df_p3_noexo = column_clean(df_p3_noexo, run_num = 1, gender = 'male')\n", + "df_p4_exo = column_clean(df_p4_exo, run_num = 2, gender = 'female')\n", + "df_p4_noexo = column_clean(df_p4_noexo, run_num = 1, gender = 'female')" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "0372fab5-cc33-4ec4-a878-a20207b8b542", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | RDelt_EMG_TimeSeries | \n", + "RDelt_IMU_Acc X Time Series(s) | \n", + "gender | \n", + "run_num | \n", + "sensor_measurement | \n", + "value | \n", + "Sensor_Body_Position | \n", + "measurement_type | \n", + "
|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "0.000000 | \n", + "0 | \n", + "male | \n", + "2 | \n", + "RDelt_EMG_MilliVolts | \n", + "0.004868 | \n", + "RDelt | \n", + "EMG_MilliVolts | \n", + "
| 1 | \n", + "0.000794 | \n", + "0.00675 | \n", + "male | \n", + "2 | \n", + "RDelt_EMG_MilliVolts | \n", + "0.005875 | \n", + "RDelt | \n", + "EMG_MilliVolts | \n", + "
| 2 | \n", + "0.001588 | \n", + "0.0135 | \n", + "male | \n", + "2 | \n", + "RDelt_EMG_MilliVolts | \n", + "0.005203 | \n", + "RDelt | \n", + "EMG_MilliVolts | \n", + "
| 3 | \n", + "0.002382 | \n", + "0.02025 | \n", + "male | \n", + "2 | \n", + "RDelt_EMG_MilliVolts | \n", + "0.005539 | \n", + "RDelt | \n", + "EMG_MilliVolts | \n", + "
| 4 | \n", + "0.003177 | \n", + "0.027 | \n", + "male | \n", + "2 | \n", + "RDelt_EMG_MilliVolts | \n", + "0.007721 | \n", + "RDelt | \n", + "EMG_MilliVolts | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 3852739 | \n", + "109.265029 | \n", + "NaN | \n", + "male | \n", + "2 | \n", + "LBicep_GYRO Z (deg/s) | \n", + "NaN | \n", + "LBicep | \n", + "GYRO Z | \n", + "
| 3852740 | \n", + "109.265823 | \n", + "NaN | \n", + "male | \n", + "2 | \n", + "LBicep_GYRO Z (deg/s) | \n", + "NaN | \n", + "LBicep | \n", + "GYRO Z | \n", + "
| 3852741 | \n", + "109.266618 | \n", + "NaN | \n", + "male | \n", + "2 | \n", + "LBicep_GYRO Z (deg/s) | \n", + "NaN | \n", + "LBicep | \n", + "GYRO Z | \n", + "
| 3852742 | \n", + "109.267412 | \n", + "NaN | \n", + "male | \n", + "2 | \n", + "LBicep_GYRO Z (deg/s) | \n", + "NaN | \n", + "LBicep | \n", + "GYRO Z | \n", + "
| 3852743 | \n", + "109.268206 | \n", + "NaN | \n", + "male | \n", + "2 | \n", + "LBicep_GYRO Z (deg/s) | \n", + "NaN | \n", + "LBicep | \n", + "GYRO Z | \n", + "
3852744 rows × 8 columns
\n", + "| \n", + " | RDelt_EMG_TimeSeries | \n", + "RDelt_EMG_MilliVolts | \n", + "RDelt_IMU_Acc X Time Series(s) | \n", + "RDelt_ACC X (G) | \n", + "RDelt_ACC Y (G) | \n", + "RDelt_ACC Z (G) | \n", + "RDelt_GYRO X (deg/s) | \n", + "RDelt_GYRO Y (deg/s) | \n", + "RDelt_GYRO Z (deg/s) | \n", + "LDelt_MilliVolts | \n", + "... | \n", + "RBicep_GYRO Z (deg/s) | \n", + "LBicep_MilliVolts | \n", + "LBicep_ACC X (G) | \n", + "LBicep_ACC Y (G) | \n", + "LBicep_ACC Z (G) | \n", + "LBicep_GYRO X (deg/s) | \n", + "LBicep_GYRO Y (deg/s) | \n", + "LBicep_GYRO Z (deg/s) | \n", + "gender | \n", + "run_num | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "0.000000 | \n", + "0.004868 | \n", + "0 | \n", + "0.0747681 | \n", + "0.9061279 | \n", + "0.2548828 | \n", + "-30.7404575 | \n", + "-4.2519083 | \n", + "9.358779 | \n", + "-0.006546 | \n", + "... | \n", + "-11.9541988 | \n", + "0.041962 | \n", + "0.2507324 | \n", + "0.8808594 | \n", + "0.1972656 | \n", + "-30.801527 | \n", + "8.5572519 | \n", + "12.6870232 | \n", + "male | \n", + "2 | \n", + "
| 1 | \n", + "0.000794 | \n", + "0.005875 | \n", + "0.00675 | \n", + "0.0795288 | \n", + "0.913208 | \n", + "0.2689209 | \n", + "-30.7786255 | \n", + "-5.961832 | \n", + "8.5419846 | \n", + "-0.006546 | \n", + "... | \n", + "-12.5343513 | \n", + "0.041962 | \n", + "0.2453003 | \n", + "0.8790283 | \n", + "0.2055054 | \n", + "-29.038168 | \n", + "9.9007635 | \n", + "13.0305347 | \n", + "male | \n", + "2 | \n", + "
| 2 | \n", + "0.001588 | \n", + "0.005203 | \n", + "0.0135 | \n", + "0.0804443 | \n", + "0.9194336 | \n", + "0.2719116 | \n", + "-29.9312973 | \n", + "-6.8015265 | \n", + "8.4503813 | \n", + "-0.007217 | \n", + "... | \n", + "-12.801527 | \n", + "0.041459 | \n", + "0.2486572 | \n", + "0.880188 | \n", + "0.2092896 | \n", + "-27.6641216 | \n", + "8.9694653 | \n", + "12.358779 | \n", + "male | \n", + "2 | \n", + "
| 3 | \n", + "0.002382 | \n", + "0.005539 | \n", + "0.02025 | \n", + "0.0809326 | \n", + "0.9316406 | \n", + "0.2680054 | \n", + "-29.1068707 | \n", + "-6.8854961 | \n", + "7.6793895 | \n", + "-0.004196 | \n", + "... | \n", + "-12.7480917 | \n", + "0.039780 | \n", + "0.2533569 | \n", + "0.880127 | \n", + "0.2134399 | \n", + "-25.442749 | \n", + "7.6106873 | \n", + "11.1145039 | \n", + "male | \n", + "2 | \n", + "
| 4 | \n", + "0.003177 | \n", + "0.007721 | \n", + "0.027 | \n", + "0.0866699 | \n", + "0.9319458 | \n", + "0.2663574 | \n", + "-29.3129768 | \n", + "-7.4045801 | \n", + "6.7557254 | \n", + "-0.005203 | \n", + "... | \n", + "-11.1832066 | \n", + "0.041459 | \n", + "0.2590942 | \n", + "0.8770752 | \n", + "0.2124634 | \n", + "-23.557251 | \n", + "6.0916033 | \n", + "9.7862597 | \n", + "male | \n", + "2 | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 137593 | \n", + "109.265029 | \n", + "0.017960 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "-0.002182 | \n", + "... | \n", + "NaN | \n", + "0.030716 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "male | \n", + "2 | \n", + "
| 137594 | \n", + "109.265823 | \n", + "0.019974 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "-0.001846 | \n", + "... | \n", + "NaN | \n", + "0.035416 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "male | \n", + "2 | \n", + "
| 137595 | \n", + "109.266618 | \n", + "0.020981 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "-0.004196 | \n", + "... | \n", + "NaN | \n", + "0.034745 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "male | \n", + "2 | \n", + "
| 137596 | \n", + "109.267412 | \n", + "0.018631 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "-0.005707 | \n", + "... | \n", + "NaN | \n", + "0.035248 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "male | \n", + "2 | \n", + "
| 137597 | \n", + "109.268206 | \n", + "0.019974 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "-0.004196 | \n", + "... | \n", + "NaN | \n", + "0.036591 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "male | \n", + "2 | \n", + "
137598 rows × 32 columns
\n", + "