diff --git a/.gitignore b/.gitignore index 5694eb9e..e2fc5e93 100644 --- a/.gitignore +++ b/.gitignore @@ -70,3 +70,6 @@ MLPR.py docs/Makefile sifibridge-* *.pyc +TEST.ipynb +/DATASET_85 +/DATASET_85PROCESSED diff --git a/docs/source/documentation/data/data_doc.md b/docs/source/documentation/data/data_doc.md index e95b69bf..4858a3ae 100644 --- a/docs/source/documentation/data/data_doc.md +++ b/docs/source/documentation/data/data_doc.md @@ -73,6 +73,57 @@ https://github.com/LibEMG/OneSubjectEMaGerDataset
+ + +
+EMGEPN100 + +
+ +**Dataset Description:** +Multi-hardware EMG dataset for 12 different hand gesture categories using the Myo armband and the G-force armband. + +| Attribute | Description | +|-------------------|------------------------------------------------------------------------------------------------------| +| **Num Subjects:** | 85 | +| **Num Reps:** | 30 Reps x 12 Gestures x 43 Users (Train group), 15 Reps x 12 Gestures x 42 Users (Test group) --> Cross User Split | +| **Classes:** | | +| **Device:** | Myo, gForce | +| **Sampling Rates:** | Myo: 200Hz, gForce: 500Hz | +| **Auto Download:** | False | + +**Using the Dataset:** +```Python +import libemg +from libemg.datasets import get_dataset_list +emg_epn100 = libemg.datasets.EMGEPN100() +# or get_dataset_list(cross_user=True)['EMGEPN100']() +odh = emg_epn100.prepare_data(split=True, segment=True, relabel_seg=None, + channel_last=True, subjects=None)['All'] +``` + +**Dataset Location** +https://laboratorio-ia.epn.edu.ec/es/recursos/dataset/emg-imu-epn-100 + +**References:** +``` +@article{vasconez-2022, + author = {Vásconez, Juan Pablo and López, Lorena Isabel Barona and Caraguay, Ángel Leonardo Valdivieso and Benalcázar, Marco E.}, + journal = {Sensors}, + month = {12}, + number = {24}, + pages = {9613}, + title = {{Hand Gesture Recognition Using EMG-IMU Signals and Deep Q-Networks}}, + volume = {22}, + year = {2022}, + doi = {10.3390/s22249613}, + url = {https://doi.org/10.3390/s22249613}, +} +``` +
+
+ +
@@ -748,7 +799,6 @@ https://github.com/LibEMG/WS_CIIL

- ## Regression diff --git a/libemg/_datasets/__init__.py b/libemg/_datasets/__init__.py index 975fd02b..ef752a37 100644 --- a/libemg/_datasets/__init__.py +++ b/libemg/_datasets/__init__.py @@ -3,6 +3,7 @@ from libemg._datasets import continous_transitions from libemg._datasets import dataset from libemg._datasets import emg_epn612 +from libemg._datasets import emg_epn100 from libemg._datasets import fors_emg from libemg._datasets import fougner_lp from libemg._datasets import grab_myo diff --git a/libemg/_datasets/emg_epn100.py b/libemg/_datasets/emg_epn100.py new file mode 100644 index 00000000..8ed80431 --- /dev/null +++ b/libemg/_datasets/emg_epn100.py @@ -0,0 +1,335 @@ +from libemg._datasets.dataset import Dataset +from libemg.data_handler import OfflineDataHandler +import numpy as np +from libemg.utils import * +import os +import warnings +from typing import Any, Dict, Iterable +import h5py +import numpy as np +from scipy.io import loadmat + + +# FIXED GLOBAL GESTURE MAP +GESTURE_MAP = { # Matches EPN-612 Class IDs + "relax": 0, + "fist": 1, + "wave in": 2, + "wave out": 3, + "open": 4, + "pinch": 5, + "up": 6, + "down": 7, + "left": 8, + "right": 9, + "forward": 10, + "backward": 11, +} + +# MATLAB-derived fixed gesture order (rep 1..180) +GESTURE_ORDER_180 = ( + ["relax"] * 15 + + ["wave in"] * 15 + + ["wave out"] * 15 + + ["fist"] * 15 + + ["open"] * 15 + + ["pinch"] * 15 + + ["up"] * 15 + + ["down"] * 15 + + ["left"] * 15 + + ["right"] * 15 + + ["forward"] * 15 + + ["backward"] * 15 +) +assert len(GESTURE_ORDER_180) == 180 + +# Assigning integer labels to devices +DEVICE_MAP = { + "myo": 0, + "gForce": 1, +} + + +# ======== UTILS ======== +def to_scalar(x): + if isinstance(x, np.ndarray) and x.size == 1: + return x.item() + return x + +def write_h5_scalar(group: h5py.Group, name, value): + name = str(name) + value = to_scalar(value) + + if isinstance(value, (int, float, np.integer, np.floating)): + group.create_dataset(name, data=value) + else: + dt = h5py.string_dtype(encoding="utf-8") + group.create_dataset(name, data=np.array(str(value), dtype=dt)) + + +# ======== METADATA EXTRACTION ======== +def extract_metadata(userData) -> Dict[str, Any]: + meta = {} + + def extract_struct(section): + out = {} + for field in section._fieldnames: + out[field] = to_scalar(getattr(section, field)) + return out + + meta["userInfo"] = extract_struct(userData.userInfo) + meta["extraInfo"] = extract_struct(userData.extraInfo) + meta["deviceInfo"] = extract_struct(userData.deviceInfo) + meta["userGroup"] = to_scalar(userData.userGroup) + meta["gestureNameMap"] = {str(v): k for k, v in GESTURE_MAP.items()} + + return meta + + +# ======== CORE PROCESSING ======== +def process_user( + mat_path: str, + out_path: str, + subject_id: int, + is_training_group: bool): + userData = loadmat(mat_path, squeeze_me=True, + struct_as_record=False)["userData"] + + reps_written = 0 + + with h5py.File(out_path, "w") as h5: + # ---- META ---- + meta_grp = h5.create_group("meta") + meta = extract_metadata(userData) + + for section, values in meta.items(): + if isinstance(values, dict): + sec_grp = meta_grp.create_group(section) + for k, v in values.items(): + write_h5_scalar(sec_grp, k, v) + else: + write_h5_scalar(meta_grp, section, values) + + # ---- REPS ---- + reps_grp = h5.create_group("reps") + + def process_block(block, rep_offset: int, max_reps: int): + nonlocal reps_written + for i in range(max_reps): + rep_id = rep_offset + i + entry = block[i] + + if not hasattr(entry, "emg"): + warnings.warn( + f"Missing EMG (subject={subject_id}, rep={rep_id})" + ) + continue + + gesture = GESTURE_ORDER_180[i] + classe = GESTURE_MAP[gesture] + + emg = np.asarray(entry.emg, dtype=np.float32) + point_begins = np.asarray(entry.pointGestureBegins, dtype=np.int64) + + rep_grp = reps_grp.create_group(f"rep_{rep_id:03d}") + rep_grp.create_dataset("emg", data=emg) + rep_grp.create_dataset("gesture", data=classe) + rep_grp.create_dataset("subject", data=subject_id) + rep_grp.create_dataset("rep", data=rep_id) + rep_grp.create_dataset("point_begins", data=point_begins) + + reps_written += 1 + + # training block: reps 0..179 + process_block(userData.training, rep_offset=0, max_reps=180) + + # testing block only for training users: reps 180..359 + if is_training_group and hasattr(userData, "testing"): + process_block(userData.testing, rep_offset=180, max_reps=180) + + print(f"Finished user subject={subject_id} | " + f"reps extracted={reps_written} | " + f"output={out_path}") + + +# ======== DATASET WALKER ======== +def process_dataset(root_in: str, root_out: str): + for split in ["training", "testing"]: + in_split = os.path.join(root_in, split) + out_split = os.path.join(root_out, split) + os.makedirs(out_split, exist_ok=True) + + user_dirs = sorted(d for d in os.listdir(in_split) if d.startswith("user_")) + + print(f"\n=== Processing split: {split} ===") + + for idx, user_dir in enumerate(user_dirs): + subject_id = idx + mat_path = os.path.join(in_split, user_dir, "userData.mat") + out_path = os.path.join(out_split, f"{user_dir}.h5") + + print(f"Starting {user_dir} -> subject={subject_id}") + + process_user(mat_path=mat_path, out_path=out_path, + subject_id=subject_id, + is_training_group=(split == "training")) + +# ======== MAIN DATASET CLASS ======== +class EMGEPN100(Dataset): + def __init__(self, dataset_folder: str='DATASET_85'): + Dataset.__init__(self, + sampling={'myo': 200, 'gForce': 500}, + num_channels={'myo': 8, 'gForce': 8}, + recording_device=['myo', 'gForce'], + num_subjects=85, + gestures= GESTURE_MAP, # Matches EPN-612 static classes IDs + num_reps="30 Reps x 12 Gestures x 43 Users (Train group), 15 Reps x 12 Gestures x 42 Users (Test group) --> Cross User Split", + description="Multi-hardware EMG dataset for 12 different hand gesture categories using the myo armband and the G-force armband.", + citation="https://doi.org/10.3390/s22249613") + self.resolution_bit = {'myo': 8, 'gForce': 12} + self.dataset_folder = dataset_folder + self.url = "https://laboratorio-ia.epn.edu.ec/es/recursos/dataset/emg-imu-epn-100" + + def _get_odh(self, processed_root, subjects, + segment, relabel_seg, channel_last): + + splits = {"training", "testing"} + odhs = [] + + for split in splits: + split_dir = os.path.join(processed_root, split) + user_files = sorted(f for f in os.listdir(split_dir) if f.endswith(".h5")) + + odh = OfflineDataHandler() + odh.subjects = [] + odh.classes = [] + odh.reps = [] + odh.devices = [] + odh.sampling_rates = [] + odh.extra_attributes = ['subjects', 'classes', 'reps', + 'devices', 'sampling_rates'] + + for user_file in user_files: + path = os.path.join(split_dir, user_file) + + with h5py.File(path, "r") as f: + subject = int(f["reps"]["rep_000"]["subject"][()]) + subject += 43 if split == "testing" else 0 # 43 training group subjects and 42 testing + if subjects is not None: + if subject not in subjects: + continue + + reps = f["reps"] + device_str = f["meta/deviceInfo/DeviceType"][()].decode("utf-8") + device = DEVICE_MAP[device_str] + fs = float(f["meta/deviceInfo/emgSamplingRate"][()]) + + for rep_name in reps: + rep_grp = reps[rep_name] + + gst = int(rep_grp["gesture"][()]) + rep_id = int(rep_grp["rep"][()]) + + _emg = rep_grp["emg"][:].astype(np.float32, copy=False) # [T, CH] + if not channel_last: + _emg = np.transpose(_emg, (1, 0)) # [CH, T] + + if segment and gst != 0: + point_begins = rep_grp["point_begins"][()] + emg = _emg[point_begins:] + else: + emg = _emg + + # ---- Preparing ODH ---- + odh.data.append(emg) + odh.classes.append(np.ones((len(emg), 1)) * gst) + odh.subjects.append(np.ones((len(emg), 1)) * subject) + odh.reps.append(np.ones((len(emg), 1)) * rep_id) + odh.devices.append(np.ones((len(emg), 1)) * device) + odh.sampling_rates.append(np.ones((len(emg), 1)) * fs) + + if segment and gst != 0 and relabel_seg is not None: + assert type(relabel_seg) is int + gst = relabel_seg + + emg = _emg[:point_begins] + + odh.data.append(emg) + odh.classes.append(np.ones((len(emg), 1)) * gst) + odh.subjects.append(np.ones((len(emg), 1)) * subject) + odh.reps.append(np.ones((len(emg), 1)) * rep_id) + odh.devices.append(np.ones((len(emg), 1)) * device) + odh.sampling_rates.append(np.ones((len(emg), 1)) * fs) + + odhs.append(odh) + + return odhs + + + def prepare_data(self, + split: bool = False, + segment: bool = True, + relabel_seg: int | None = None, + channel_last: bool = True, + subjects: Iterable[int] | None = None) -> OfflineDataHandler: + """Return processed EPN100 dataset as LibEMG ODH. + + Parameters + ---------- + split: bool or None (optional), default=False + Whether to return seperate training and testing ODHs. + window_ms: float or None (optional), default=None + Windows size in ms (for feature extraction). There are two different sensors used in this dataset with different sampling rates. + stride_ms: float or None (optional), default=None + Window stride (increment) size in ms (for feature extraction). There are two different sensors used in this dataset with different sampling rates. + segment: bool, default=True + Whether crop the segment before 'pointGestureBeging' index in the dataset. + relabel_seg: int or None (optional), default=0 + If not False, this arg will be used as the relabeling value. + channel_last: bool, default=True, + Shape will be (, T, CH) if True otherwise (, CH, T) + subjects: Iterable[int] or None (optional), default=None + Subjects to be included in the processed dataset. + + Returns + ---------- + Dic or OfflineDataHandler + A dictionary of 'All', 'Train' and 'Test' ODHs of processed data or a single OfflineDataHandler if split is False. + """ + print('\nPlease cite: ' + self.citation+'\n') + if (not self.check_exists(self.dataset_folder)) and \ + (not self.check_exists( self.dataset_folder + "PROCESSED")): + raise FileNotFoundError("Please download the EPN100+ dataset from: {} " + "and place 'testing' and 'training' folders inside: " + "'{}' folder.".format(self.url, self.dataset_folder)) + + if (not self.check_exists( self.dataset_folder + "PROCESSED")): + process_dataset(self.dataset_folder, self.dataset_folder + "PROCESSED") + + odh_tr, odh_te = self._get_odh(self.dataset_folder + "PROCESSED", + subjects, segment, relabel_seg, channel_last) + + return {'All': odh_tr + odh_te, 'Train': odh_tr, 'Test': odh_te} \ + if split else odh_tr + odh_te + + def get_device_ID(self, device_name: str): + """ + Get device label ID by name + + Parameters + ---------- + device_name: str + Name of the requested device. + + Returns + ---------- + int + Device's ID + """ + + return DEVICE_MAP[device_name] + + + + + \ No newline at end of file diff --git a/libemg/_datasets/emg_epn612.py b/libemg/_datasets/emg_epn612.py index 5e7a35e8..9c259ea9 100644 --- a/libemg/_datasets/emg_epn612.py +++ b/libemg/_datasets/emg_epn612.py @@ -102,6 +102,8 @@ def prepare_data(self, split = True, subjects = None): if split: data = {'All': odh, 'Train': odh_tr, 'Test': odh_te} + else: + data = odh_tr + odh_te return data class EMGEPN_UserIndependent(EMGEPN612): @@ -112,8 +114,11 @@ def prepare_data(self, split = True, subjects=None, feature_list = None, window_ odh = self.get_odh(subjects, feature_list, window_size, window_inc, feature_dic) odh_tr = odh.isolate_data('subjects', values=list(range(0,306))) odh_te = odh.isolate_data('subjects', values=list(range(306,612))) + if split: data = {'All': odh_tr + odh_te, 'Train': odh_tr, 'Test': odh_te} + else: + data = odh_tr + odh_te return data \ No newline at end of file diff --git a/libemg/_streamers/_OTB_MuoviPlus.py b/libemg/_streamers/_OTB_MuoviPlus.py index ec7391e9..50d697a5 100644 --- a/libemg/_streamers/_OTB_MuoviPlus.py +++ b/libemg/_streamers/_OTB_MuoviPlus.py @@ -6,7 +6,7 @@ from multiprocessing import Event, Process from libemg.shared_memory_manager import SharedMemoryManager -from crc.crc import Crc8, CrcCalculator +from crc import Crc8, Calculator """ OT Bioelettronica @@ -343,7 +343,7 @@ def _send_packet(self, sig_bits): """Send a packet to the OTB MuoviPlus device.""" if self.client: packet = bytearray(sig_bits) - crc_calc = CrcCalculator(Crc8.MAXIM_DOW) + crc_calc = Calculator(Crc8.MAXIM_DOW) packet.append(crc_calc.calculate_checksum(packet)) self.client.send(packet) @@ -404,4 +404,4 @@ def cleanup(self): def _handle_exit_signal(self, signum, frame): print(f"[OTBStreamer] Received exit signal {signum}, cleaning up.") - self.cleanup() + self.cleanup() \ No newline at end of file diff --git a/libemg/_streamers/_emager_streamer.py b/libemg/_streamers/_emager_streamer.py index bf19c911..0a14426c 100644 --- a/libemg/_streamers/_emager_streamer.py +++ b/libemg/_streamers/_emager_streamer.py @@ -2,14 +2,23 @@ import numpy as np import platform from multiprocessing import Event, Process +from queue import Queue, Empty +import threading from libemg.shared_memory_manager import SharedMemoryManager -def _get_channel_map(): - channel_map = [10, 22, 12, 24, 13, 26, 7, 28, 1, 30, 59, 32, 53, 34, 48, 36] + \ - [62, 16, 14, 21, 11, 27, 5, 33, 63, 39, 57, 45, 51, 44, 50, 40] + \ - [8, 18, 15, 19, 9, 25, 3, 31, 61, 37, 55, 43, 49, 46, 52, 38] + \ - [6, 20, 4, 17, 2, 23, 0, 29, 60, 35, 58, 41, 56, 47, 54, 42] +def _get_channel_map(version: str = "1.0"): + if version == "1.1": + channel_map = [44, 49, 43, 55, 39, 59, 33, 2, 32, 3, 26, 6, 22, 13, 16, 10] + \ + [42, 48, 45, 54, 38, 58, 35, 0, 34, 1, 27, 7, 23, 11, 17, 12] + \ + [46, 52, 40, 51, 36, 56, 31, 60, 30, 63, 25, 4, 21, 8, 18, 15] + \ + [47, 50, 41, 53, 37, 57, 29, 62, 28, 61, 24, 5, 19, 9, 20, 14] + else: + channel_map = [10, 22, 12, 24, 13, 26, 7, 28, 1, 30, 59, 32, 53, 34, 48, 36] + \ + [62, 16, 14, 21, 11, 27, 5, 33, 63, 39, 57, 45, 51, 44, 50, 40] + \ + [8, 18, 15, 19, 9, 25, 3, 31, 61, 37, 55, 43, 49, 46, 52, 38] + \ + [6, 20, 4, 17, 2, 23, 0, 29, 60, 35, 58, 41, 56, 47, 54, 42] + return channel_map @@ -35,7 +44,7 @@ def reorder(data, mask, match_result): class Emager: - def __init__(self, baud_rate): + def __init__(self, baud_rate, version: str = "1.0"): com_name = 'KitProg3' ports = list(serial.tools.list_ports.comports()) for p in ports: @@ -52,7 +61,7 @@ def __init__(self, baud_rate): ### ^ Number of bytes in message (i.e. channel bytes + header/tail bytes) self.mask = np.array([0, 2] + [0, 1] * 63) ### ^ Template mask for template matching on input data - self.channel_map = _get_channel_map() + self.channel_map = _get_channel_map(version) self.emg_handlers = [] def connect(self): @@ -88,26 +97,265 @@ def close(self): self.ser.close() return +class Emager3: + """Reader for the new Emager3 device which sends framed payloads: + HDR 0xAA55, APP_PAYLOAD=8192 bytes (64x64 samples x 2 bytes), TLR 0x55AA. + The payload is interpreted as 4096 16-bit samples arranged as (time x channel) + when reshaped to (64,64) and transposed to (channel x time). We emit one + 64-channel sample vector to handlers per timepoint (64 vectors per frame). + """ + HDR = b"\xAA\x55" + TLR = b"\x55\xAA" + + def __init__(self, baud_rate, endianness='le', signed=False, com_name=None, vid_pid=(12259, 256), + channels: int = 64, samples_per_frame: int = 64, version: str = "3.0"): + self.com_name = com_name + self.vid_pid = vid_pid + ports = list(serial.tools.list_ports.comports()) + com_port = None + for p in ports: + if self.com_name is None: + if (p.vid, p.pid) == self.vid_pid: + if platform.system() == 'Windows': + com_port = p.name + else: + com_port = p.device.replace('cu', 'tty') + break + else: + if self.com_name in p.description: + if platform.system() == 'Windows': + com_port = p.name + else: + com_port = p.device.replace('cu', 'tty') + break + + if com_port is None: + print(f"Could not find serial port for {self.com_name}") + # include a helpful error that lists all detected serial ports + ports_info = [] + for p in ports: + dev = getattr(p, "device", None) or getattr(p, "name", None) or "" + desc = getattr(p, "description", "") or "" + vid = getattr(p, "vid", None) + pid = getattr(p, "pid", None) + ports_info.append(f"{dev} - {desc} (VID: {vid}, PID: {pid})") + + if ports_info: + avail = "\n".join(f" - {pi}" for pi in ports_info) + else: + avail = " (no serial ports found)" + + raise RuntimeError(f"Could not find serial port for {self.com_name}. Available ports:\n{avail}") + + self.ser = serial.Serial(com_port, baud_rate, timeout=1) + self.ser.close() + self._buf = bytearray() + self.emg_handlers = [] + + # dtype selection + if endianness == 'le': + self.sample_dtype = np.int16 if signed else np.uint16 + else: + self.sample_dtype = np.dtype('>i2') if signed else np.dtype('>u2') + + # framing params + self.channels = int(channels) + self.samples_per_frame = int(samples_per_frame) + self.expected_samples = self.channels * self.samples_per_frame + self.APP_PAYLOAD = self.expected_samples * 2 + self.FRAME_SIZE = 2 + self.APP_PAYLOAD + 2 + # frame and counter stats + self.frames_ok = 0 + self.bad_tlr = 0 + self.resyncs = 0 + self.last_ctr = None + self.ctr_miss = 0 + # parser position for incremental search + self.pos = 0 + + def connect(self): + self.ser.open() + + def add_emg_handler(self, closure): + self.emg_handlers.append(closure) + + def clear_buffer(self): + try: + self.ser.reset_input_buffer() + except Exception: + pass + + def close(self): + try: + self.ser.close() + except Exception: + pass + + def _process_frame_payload(self, payload_bytes): + # Decode payload into a full block (samples_per_frame x channels) and emit once per frame. + try: + arr = np.frombuffer(payload_bytes, dtype=self.sample_dtype) + if arr.size != self.expected_samples: + return + # payload is time-major: samples_per_frame x channels + block_time_ch = arr.reshape(self.samples_per_frame, self.channels) + + # Emit the whole block once to each handler (shape: samples x channels) + for h in self.emg_handlers: + try: + h(block_time_ch) + except Exception: + pass + except Exception: + return + + def get_data(self): + # Read what's available and append to buffer, then parse frames + try: + n_av = self.ser.in_waiting + except Exception: + return + + if n_av <= 0: + return + + data = self.ser.read(n_av) + if not data: + return + + self._buf += data + + # incremental frame parsing (mirrors FrameParser.feed logic) + while True: + h = self._buf.find(self.HDR, self.pos) + if h < 0: + keep = min(len(self._buf), self.FRAME_SIZE - 1) + if keep: + self._buf[:] = self._buf[-keep:] + else: + self._buf.clear() + self.pos = 0 + return + + if len(self._buf) - h < self.FRAME_SIZE: + if h > 0: + self._buf[:] = self._buf[h:] + self.pos = 0 + else: + self.pos = h + return + + t0 = h + 2 + self.APP_PAYLOAD + if self._buf[t0:t0+2] == self.TLR: + # good frame + self.frames_ok += 1 + p = h + 2 + # counter (first 4 bytes of payload, big-endian) + try: + ctr = ((self._buf[p] << 24) | + (self._buf[p+1] << 16) | + (self._buf[p+2] << 8) | + self._buf[p+3]) + if self.last_ctr is not None: + expected = (self.last_ctr + 1) & 0xFFFFFFFF + if ctr != expected: + self.ctr_miss += 1 + self.last_ctr = ctr + except Exception: + pass + + try: + payload_bytes = bytes(self._buf[p : p + self.APP_PAYLOAD]) + self._process_frame_payload(payload_bytes) + except Exception: + pass + + self.pos = h + self.FRAME_SIZE + if self.pos > (self.FRAME_SIZE * 2): + self._buf[:] = self._buf[self.pos:] + self.pos = 0 + else: + self.bad_tlr += 1 + self.resyncs += 1 + self.pos = h + 1 + if self.pos > (self.FRAME_SIZE * 2): + self._buf[:] = self._buf[self.pos:] + self.pos = 0 + class EmagerStreamer(Process): - def __init__(self, shared_memory_items): + def __init__(self, shared_memory_items, version: str = "v1", emager_kwargs: dict | None = None): + """ + :param shared_memory_items: list[(name, shape, dtype, lock)] + :param version: str Emager version: 'v1.0', 'v1.1', 'v3.0' + :param emager_kwargs: dict passed to Emager/Emager3. Supported keys: + baud_rate (int, default 1500000 or 5000000 for v3), endianness ('le'), signed (bool), + com_name, vid_pid (tuple), channels (int), samples_per_frame (int) + """ super().__init__(daemon=True) self.smm = SharedMemoryManager() self.shared_memory_items = shared_memory_items self._stop_event = Event() self.e = None + + version = version.strip().lower().lstrip('v').replace('_', '.') + if '.' not in version: + version += '.0' + if version not in ['1.0', '1.1', '3.0']: + raise ValueError(f"Unsupported Emager version: {version}") + self.version = version + self.emager_kwargs = emager_kwargs or {} def run(self): for item in self.shared_memory_items: self.smm.create_variable(*item) - self.e = Emager(1500000) + # Instantiate the appropriate Emager reader based on version and kwargs + if self.version == "3.0": + bw = self.emager_kwargs + baud = bw.get('baud_rate', 5000000) + endianness = bw.get('endianness', 'le') + signed = bw.get('signed', False) + com_name = bw.get('com_name', None) + vid_pid = bw.get('vid_pid', (12259, 256)) + channels = bw.get('channels', 64) + samples_per_frame = bw.get('samples_per_frame', 64) + self.e = Emager3(baud, endianness=endianness, signed=signed, com_name=com_name, vid_pid=vid_pid, + channels=channels, samples_per_frame=samples_per_frame, version=self.version) + else: + baud = self.emager_kwargs.get('baud_rate', 1500000) + self.e = Emager(baud, version=self.version) self.e.connect() + # Create a queue and writer thread to offload shared-memory writes + q: Queue = Queue(maxsize=100) + + def writer_thread_fn(): + while not self._stop_event.is_set(): + try: + block = q.get(timeout=0.1) + except Empty: + continue + try: + # block is samples x channels; stack new rows on top and keep window + self.smm.modify_variable('emg', lambda x, b=block: np.vstack((b, x))[:x.shape[0], :]) + # increment count by number of rows written + rows = block.shape[0] if hasattr(block, 'shape') else 1 + self.smm.modify_variable('emg_count', lambda x, r=rows: x + r) + except Exception: + pass + finally: + q.task_done() + + writer = threading.Thread(target=writer_thread_fn, daemon=True) + writer.start() + + def write_emg(emg_block): + # emg_block expected shape: samples x channels (numpy array) + try: + q.put_nowait(np.array(emg_block)) + except Exception: + # if queue full, drop + pass - def write_emg(emg): - emg = np.array(emg) - self.smm.modify_variable('emg', lambda x: np.vstack((emg, x))[:x.shape[0], :]) - self.smm.modify_variable('emg_count', lambda x: x + 1) - self.e.add_emg_handler(write_emg) try: diff --git a/libemg/data_handler.py b/libemg/data_handler.py index a1cbdf6a..61f56056 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -423,7 +423,8 @@ def active_threshold(self, nm_windows, active_windows, active_labels, num_std=3, print(f"{num_relabeled} of {len(active_labels)} active class windows were relabelled to no motion.") return active_labels - def parse_windows(self, window_size, window_increment, metadata_operations=None): + def parse_windows(self, window_size, window_increment, metadata_operations=None, + multi_rate=False, sampling_rate_key='sampling_rates'): """Parses windows based on the acquired data from the get_data function. Parameters @@ -439,15 +440,25 @@ def parse_windows(self, window_size, window_increment, metadata_operations=None) a function handle that takes in an ndarray of size (window_size, ) and returns a single value to represent the metadata for that window. Passing in a string will map from that string to the specified operation. The windowing of only the attributes specified in this dictionary will be modified - all other attributes will default to the mode. If None, all attributes default to the mode. Defaults to None. + multi_rate: bool default=False + Should be True if the dataset contains sensors with different sampling rates, then window_size, window_increment must be in ms. + sampling_rate_key: str, default='sampling_rates' + the key in metadat where sampling frequency is stored. Used if multi_rate is True. Returns ---------- list - A np.ndarray of size windows x channels x samples. + A list of size windows x channels x samples. If multi_rate is True, windows size will varry + since the window size will be based on time for multiple sampling rates. list A dictionary containing np.ndarrays for each metadata tag of the dataset. Each window will have an associated value for each metadata. Therefore, the dimensions of the metadata should be Wx1 for each field. + If multi_rate is True, Each key will contain a list of the same size as windows. """ + + if multi_rate: + return self._multi_rate_parse_windows_helper(window_size, window_increment, metadata_operations, sampling_rate_key) + return self._parse_windows_helper(window_size, window_increment, metadata_operations) def _parse_windows_helper(self, window_size, window_increment, metadata_operations): @@ -486,15 +497,63 @@ def _parse_windows_helper(self, window_size, window_increment, metadata_operatio return np.vstack(window_data), {k: np.concatenate(metadata[k], axis=0) for k in metadata.keys()} + def _multi_rate_parse_windows_helper(self, window_ms, stride_ms, metadata_operations, + sampling_rate_key='sampling_rates'): + common_metadata_operations = { + 'mean': np.mean, + 'median': np.median, + 'last_sample': lambda x: x[-1] + } + window_data = [] + metadata = {k: [] for k in self.extra_attributes} + for i, file in enumerate(self.data): + + # Calculating window size and increment based on given time in ms and sensor sampling rate + fs = getattr(self,sampling_rate_key)[i][0].item() + window_size = int(np.ceil(window_ms * fs / 1000.0)) + window_increment = int(np.ceil(stride_ms * fs / 1000.0)) + + if window_size <= 0 or window_increment <= 0: + raise ValueError("Window or stride length <= 0 samples for the given time in ms") + + # emg data windowing + window_data.append(get_windows(file,window_size,window_increment)) + + for k in self.extra_attributes: + if type(getattr(self,k)[i]) != np.ndarray: + file_metadata = np.ones((window_data[-1].shape[0])) * getattr(self, k)[i] + else: + if metadata_operations is not None: + if k in metadata_operations.keys(): + # do the specified operation + operation = metadata_operations[k] + + if isinstance(operation, str): + try: + operation = common_metadata_operations[operation] + except KeyError as e: + raise KeyError(f"Unexpected metadata operation string. Please pass in a function or an accepted string {tuple(common_metadata_operations.keys())}. Got: {operation}.") + file_metadata = _get_fn_windows(getattr(self,k)[i], window_size, window_increment, operation) + else: + file_metadata = _get_mode_windows(getattr(self,k)[i], window_size, window_increment) + else: + file_metadata = _get_mode_windows(getattr(self,k)[i], window_size, window_increment) + + metadata[k].append(file_metadata) + + return window_data, metadata - def isolate_channels(self, channels): + def isolate_channels(self, channels, channel_last=True): """Entry point for isolating a certain range of channels. Parameters ---------- channels: list A list of values (i.e., channels) that you want to isolate. (e.g., [0,1,2]). Indexing starts at 0. - + + channel_first: bool, default=True + the order of the Time and Channel axis in data. + Returns ---------- OfflineDataHandler @@ -508,7 +567,10 @@ def isolate_channels(self, channels): new_odh = copy.deepcopy(self) # TODO: Optimize this for i in range(0, len(new_odh.data)): - new_odh.data[i] = new_odh.data[i][:,channels] + if channel_last: + new_odh.data[i] = new_odh.data[i][:,channels] + else: + new_odh.data[i] = new_odh.data[i][channels,:] return new_odh def isolate_data(self, key, values, fast=True): diff --git a/libemg/datasets.py b/libemg/datasets.py index fe6f95ce..da58e1ab 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -19,6 +19,7 @@ from libemg.offline_metrics import OfflineMetrics from libemg.filtering import Filter from libemg._datasets.emg2pose import EMG2POSEUD, EMG2POSECU +from libemg._datasets.emg_epn100 import EMGEPN100 from sklearn.preprocessing import StandardScaler import pickle import numpy as np @@ -45,6 +46,7 @@ def get_dataset_list(type='CLASSIFICATION', cross_user=False): cross_user_classification = { 'EMGEPN612': EMGEPN_UserIndependent, + 'EMGEPN100': EMGEPN100, } cross_user_regression = { @@ -69,6 +71,7 @@ def get_dataset_list(type='CLASSIFICATION', cross_user=False): 'TMR_Post' : TMR_Post, 'TMR_Pre': TMR_Pre, 'HyserPR': HyserPR, + 'EMGEPN100': EMGEPN100, } regression = { diff --git a/libemg/streamers.py b/libemg/streamers.py index cdd69731..c2cc49e8 100644 --- a/libemg/streamers.py +++ b/libemg/streamers.py @@ -474,7 +474,7 @@ def oymotion_streamer(shared_memory_items : list | None = None, -def emager_streamer(shared_memory_items = None): +def emager_streamer(shared_memory_items = None, version:str = "v1.0", **kwargs): """The streamer for the emager armband. This function connects to the emager cuff and streams its data over a serial port and access it via shared memory. @@ -484,6 +484,11 @@ def emager_streamer(shared_memory_items = None): shared_memory_items : list (optional) Shared memory configuration parameters for the streamer in format: ["tag", (size), datatype]. + version: str Emager version: 'v1.0', 'v1.1', 'v3.0'. Default is 'v1.0'. + emager_kwargs: dict passed to Emager/Emager3. Supported keys: + baud_rate (int, default 1500000), endianness ('le'), signed (bool), + com_name, vid_pid (tuple), channels (int), samples_per_frame (int) + Returns ---------- Object: streamer @@ -501,8 +506,11 @@ def emager_streamer(shared_memory_items = None): shared_memory_items.append(['emg_count', (1, 1), np.int32]) for item in shared_memory_items: - item.append(Lock()) - ema = EmagerStreamer(shared_memory_items) + if len(item) == 3: + item.append(Lock()) + + # Use unified EmagerStreamer and pass emager version + kwargs + ema = EmagerStreamer(shared_memory_items, version=version, emager_kwargs=kwargs) ema.start() return ema, shared_memory_items diff --git a/libemg/utils.py b/libemg/utils.py index 8422dd5a..96cea6d4 100644 --- a/libemg/utils.py +++ b/libemg/utils.py @@ -7,7 +7,7 @@ from matplotlib.patches import Circle -def get_windows(data, window_size, window_increment): +def get_windows(data, window_size, window_increment, channel_last=False): """Extracts windows from a given set of data. Parameters @@ -18,30 +18,33 @@ def get_windows(data, window_size, window_increment): The number of samples in a window. window_increment: int The number of samples that advances before next window. + channel_last: bool, default=False + Output will be NxLxC if True. By default the LibEMG feature extractor assumes default is False. Returns ---------- list The set of windows extracted from the data as a NxCxL where N is the number of windows, C is the number of channels - and L is the length of each window. + and L is the length of each window. Output will be NxLxC if channel_last is True. Examples --------- >>> data = np.loadtxt('data.csv', delimiter=',') >>> windows = get_windows(data, 100, 50) """ - num_windows = int((data.shape[0]-window_size)/window_increment) + 1 - windows = [] - st_id=0 - ed_id=st_id+window_size - for _ in range(num_windows): - if data.ndim == 1: - windows.append([data[st_id:ed_id].transpose()]) # One Channel EMG - else: - windows.append(data[st_id:ed_id,:].transpose()) - st_id += window_increment - ed_id += window_increment - return np.array(windows) + data = np.array(data) + if data.ndim == 1: + data = np.expand_dims(data, axis=-1) + + T = data.shape[0] + starts = np.arange(0, T - window_size + 1, window_increment) + idx = starts[:, None] + np.arange(window_size)[None, :] + + windows = data[idx] + if not channel_last: + windows = np.transpose(windows, (0, 2, 1)) + + return windows def _get_mode_windows(data, window_size, window_increment): windows = get_windows(data, window_size, window_increment) diff --git a/requirements.txt b/requirements.txt index 12b0c640..d799b59b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -32,3 +32,4 @@ h5py onedrivedownloader sifi-bridge-py mindrove +crc \ No newline at end of file diff --git a/setup.py b/setup.py index f5a6e728..4dd3a297 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ # python -m twine upload --repository testpypi dist/* --verbose <------ testpypi # -VERSION = "2.0.1" +VERSION = "2.0.3" DESCRIPTION = "LibEMG - Myoelectric Control Library" LONG_DESCRIPTION = "A library for designing and exploring real-time and offline myoelectric control systems." @@ -44,7 +44,8 @@ "onedrivedownloader", "sifi-bridge-py", "pygame", - "mindrove" + "mindrove", + "crc" ], keywords=[ "emg", diff --git a/tests/multi_rate_dataset.ipynb b/tests/multi_rate_dataset.ipynb new file mode 100644 index 00000000..20ed01bf --- /dev/null +++ b/tests/multi_rate_dataset.ipynb @@ -0,0 +1,478 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ea407210", + "metadata": {}, + "source": [ + "# EMGEPN100\n", + "# An example for datasets with multiple hardware (sampling rates)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6e4d00eb", + "metadata": {}, + "outputs": [], + "source": [ + "import libemg\n", + "from libemg.datasets import get_dataset_list\n", + "import numpy as np " + ] + }, + { + "cell_type": "markdown", + "id": "58dba541", + "metadata": {}, + "source": [ + "# The dataset in 'get_dataset_list'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a11473c2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "'EMGEPN100' in get_dataset_list(cross_user=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b69cec65", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "'EMGEPN100' in get_dataset_list(cross_user=False)" + ] + }, + { + "cell_type": "markdown", + "id": "5e00b600", + "metadata": {}, + "source": [ + "# Prepare data" + ] + }, + { + "cell_type": "markdown", + "id": "3e6e576d", + "metadata": {}, + "source": [ + "##### Here we process the .mat files into .h5 format (done once and saved), and then prepare the data, with data being segmented based on the 'pointGestureBegins' index provided by the dataset (EMGEPN100)." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "010f9ab2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Please cite: https://doi.org/10.3390/s22249613\n", + "\n", + "\n", + "=== Processing split: training ===\n", + "Starting user_001 -> subject=0\n", + "Finished user subject=0 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_001.h5\n", + "Starting user_002 -> subject=1\n", + "Finished user subject=1 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_002.h5\n", + "Starting user_003 -> subject=2\n", + "Finished user subject=2 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_003.h5\n", + "Starting user_004 -> subject=3\n", + "Finished user subject=3 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_004.h5\n", + "Starting user_005 -> subject=4\n", + "Finished user subject=4 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_005.h5\n", + "Starting user_006 -> subject=5\n", + "Finished user subject=5 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_006.h5\n", + "Starting user_007 -> subject=6\n", + "Finished user subject=6 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_007.h5\n", + "Starting user_008 -> subject=7\n", + "Finished user subject=7 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_008.h5\n", + "Starting user_009 -> subject=8\n", + "Finished user subject=8 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_009.h5\n", + "Starting user_010 -> subject=9\n", + "Finished user subject=9 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_010.h5\n", + "Starting user_011 -> subject=10\n", + "Finished user subject=10 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_011.h5\n", + "Starting user_012 -> subject=11\n", + "Finished user subject=11 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_012.h5\n", + "Starting user_013 -> subject=12\n", + "Finished user subject=12 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_013.h5\n", + "Starting user_014 -> subject=13\n", + "Finished user subject=13 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_014.h5\n", + "Starting user_015 -> subject=14\n", + "Finished user subject=14 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_015.h5\n", + "Starting user_016 -> subject=15\n", + "Finished user subject=15 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_016.h5\n", + "Starting user_017 -> subject=16\n", + "Finished user subject=16 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_017.h5\n", + "Starting user_018 -> subject=17\n", + "Finished user subject=17 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_018.h5\n", + "Starting user_019 -> subject=18\n", + "Finished user subject=18 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_019.h5\n", + "Starting user_020 -> subject=19\n", + "Finished user subject=19 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_020.h5\n", + "Starting user_021 -> subject=20\n", + "Finished user subject=20 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_021.h5\n", + "Starting user_022 -> subject=21\n", + "Finished user subject=21 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_022.h5\n", + "Starting user_023 -> subject=22\n", + "Finished user subject=22 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_023.h5\n", + "Starting user_024 -> subject=23\n", + "Finished user subject=23 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_024.h5\n", + "Starting user_025 -> subject=24\n", + "Finished user subject=24 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_025.h5\n", + "Starting user_026 -> subject=25\n", + "Finished user subject=25 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_026.h5\n", + "Starting user_027 -> subject=26\n", + "Finished user subject=26 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_027.h5\n", + "Starting user_028 -> subject=27\n", + "Finished user subject=27 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_028.h5\n", + "Starting user_029 -> subject=28\n", + "Finished user subject=28 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_029.h5\n", + "Starting user_030 -> subject=29\n", + "Finished user subject=29 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_030.h5\n", + "Starting user_031 -> subject=30\n", + "Finished user subject=30 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_031.h5\n", + "Starting user_032 -> subject=31\n", + "Finished user subject=31 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_032.h5\n", + "Starting user_033 -> subject=32\n", + "Finished user subject=32 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_033.h5\n", + "Starting user_034 -> subject=33\n", + "Finished user subject=33 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_034.h5\n", + "Starting user_035 -> subject=34\n", + "Finished user subject=34 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_035.h5\n", + "Starting user_036 -> subject=35\n", + "Finished user subject=35 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_036.h5\n", + "Starting user_037 -> subject=36\n", + "Finished user subject=36 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_037.h5\n", + "Starting user_038 -> subject=37\n", + "Finished user subject=37 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_038.h5\n", + "Starting user_039 -> subject=38\n", + "Finished user subject=38 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_039.h5\n", + "Starting user_040 -> subject=39\n", + "Finished user subject=39 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_040.h5\n", + "Starting user_041 -> subject=40\n", + "Finished user subject=40 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_041.h5\n", + "Starting user_042 -> subject=41\n", + "Finished user subject=41 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_042.h5\n", + "Starting user_043 -> subject=42\n", + "Finished user subject=42 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_043.h5\n", + "\n", + "=== Processing split: testing ===\n", + "Starting user_001 -> subject=0\n", + "Finished user subject=0 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_001.h5\n", + "Starting user_002 -> subject=1\n", + "Finished user subject=1 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_002.h5\n", + "Starting user_003 -> subject=2\n", + "Finished user subject=2 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_003.h5\n", + "Starting user_004 -> subject=3\n", + "Finished user subject=3 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_004.h5\n", + "Starting user_005 -> subject=4\n", + "Finished user subject=4 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_005.h5\n", + "Starting user_006 -> subject=5\n", + "Finished user subject=5 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_006.h5\n", + "Starting user_007 -> subject=6\n", + "Finished user subject=6 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_007.h5\n", + "Starting user_008 -> subject=7\n", + "Finished user subject=7 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_008.h5\n", + "Starting user_009 -> subject=8\n", + "Finished user subject=8 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_009.h5\n", + "Starting user_010 -> subject=9\n", + "Finished user subject=9 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_010.h5\n", + "Starting user_011 -> subject=10\n", + "Finished user subject=10 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_011.h5\n", + "Starting user_012 -> subject=11\n", + "Finished user subject=11 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_012.h5\n", + "Starting user_013 -> subject=12\n", + "Finished user subject=12 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_013.h5\n", + "Starting user_014 -> subject=13\n", + "Finished user subject=13 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_014.h5\n", + "Starting user_015 -> subject=14\n", + "Finished user subject=14 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_015.h5\n", + "Starting user_016 -> subject=15\n", + "Finished user subject=15 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_016.h5\n", + "Starting user_017 -> subject=16\n", + "Finished user subject=16 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_017.h5\n", + "Starting user_018 -> subject=17\n", + "Finished user subject=17 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_018.h5\n", + "Starting user_019 -> subject=18\n", + "Finished user subject=18 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_019.h5\n", + "Starting user_020 -> subject=19\n", + "Finished user subject=19 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_020.h5\n", + "Starting user_021 -> subject=20\n", + "Finished user subject=20 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_021.h5\n", + "Starting user_022 -> subject=21\n", + "Finished user subject=21 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_022.h5\n", + "Starting user_023 -> subject=22\n", + "Finished user subject=22 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_023.h5\n", + "Starting user_024 -> subject=23\n", + "Finished user subject=23 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_024.h5\n", + "Starting user_025 -> subject=24\n", + "Finished user subject=24 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_025.h5\n", + "Starting user_026 -> subject=25\n", + "Finished user subject=25 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_026.h5\n", + "Starting user_027 -> subject=26\n", + "Finished user subject=26 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_027.h5\n", + "Starting user_028 -> subject=27\n", + "Finished user subject=27 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_028.h5\n", + "Starting user_029 -> subject=28\n", + "Finished user subject=28 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_029.h5\n", + "Starting user_030 -> subject=29\n", + "Finished user subject=29 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_030.h5\n", + "Starting user_031 -> subject=30\n", + "Finished user subject=30 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_031.h5\n", + "Starting user_032 -> subject=31\n", + "Finished user subject=31 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_032.h5\n", + "Starting user_033 -> subject=32\n", + "Finished user subject=32 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_033.h5\n", + "Starting user_034 -> subject=33\n", + "Finished user subject=33 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_034.h5\n", + "Starting user_035 -> subject=34\n", + "Finished user subject=34 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_035.h5\n", + "Starting user_036 -> subject=35\n", + "Finished user subject=35 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_036.h5\n", + "Starting user_037 -> subject=36\n", + "Finished user subject=36 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_037.h5\n", + "Starting user_038 -> subject=37\n", + "Finished user subject=37 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_038.h5\n", + "Starting user_039 -> subject=38\n", + "Finished user subject=38 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_039.h5\n", + "Starting user_040 -> subject=39\n", + "Finished user subject=39 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_040.h5\n", + "Starting user_041 -> subject=40\n", + "Finished user subject=40 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_041.h5\n", + "Starting user_042 -> subject=41\n", + "Finished user subject=41 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_042.h5\n" + ] + } + ], + "source": [ + "emg_epn100 = libemg.datasets.EMGEPN100() # or get_dataset_list(cross_user=True)['EMGEPN100']()\n", + "dataset = emg_epn100.prepare_data(split=True, segment=True, relabel_seg=None, \n", + " channel_last=True, subjects=None)['All']" + ] + }, + { + "cell_type": "markdown", + "id": "01ad978e", + "metadata": {}, + "source": [ + "# Windowing" + ] + }, + { + "cell_type": "markdown", + "id": "d6d828b4", + "metadata": {}, + "source": [ + "##### Simple windowing while ignoring the sampling rate differences. All windows will have similar shapes but logically not recommended for datasets with multiple sampling rates." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "952c5b79", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(304244, 8, 20)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset = dataset.isolate_data(\"subjects\", list(range(10)), fast=True)\n", + "windows, meta = dataset.parse_windows(20, 20)\n", + "windows.shape" + ] + }, + { + "cell_type": "markdown", + "id": "bfe3698f", + "metadata": {}, + "source": [ + "##### Here we set multi_rate=True so the given window size and window increment will be time based (ms). The actual window size will be calculated based on frequency in the sampling_rate_key.\n", + "##### The output will be a list of reps for the windows, and is non rectangular due to different sampling rates and fixed time-based window size. The metadata file, will have a list of the same size as windows, for each of the keys." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "862839d5", + "metadata": {}, + "outputs": [], + "source": [ + "# Time based windowing\n", + "windows, meta = dataset.parse_windows(250, # ms\n", + " 20, # ms\n", + " multi_rate=True, \n", + " sampling_rate_key='sampling_rates')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ceb3670c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(3600, 3600, 3600)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(windows), len(meta['sampling_rates']), len(meta['classes'])" + ] + }, + { + "cell_type": "markdown", + "id": "19b69a65", + "metadata": {}, + "source": [ + "##### Here we see that there are two different Time axis shapes: 125 and 50, both 250ms for 500 Hz and 200 Hz sensors respectively." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "2f667152", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 50, 125])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.unique([w.shape[-1] for w in windows])" + ] + }, + { + "cell_type": "markdown", + "id": "a99b7c53", + "metadata": {}, + "source": [ + "##### We can also easily isolate different sensors with different sampling rates so we can use the normal window parser for rectangular batch of windows with fixed time-based window size" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "e1529d1c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(288025, 8, 40)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Window size of 40 @ 500 Hz sensor = 80 ms windows\n", + "\n", + "dataset_g = dataset.isolate_data(\"devices\", [emg_epn100.get_device_ID('gForce')], fast=True)\n", + "windows, meta = dataset_g.parse_windows(40, 20)\n", + "windows.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "65ec0717", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(12619, 8, 40)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Window size of 40 @ 200 Hz sensor = 200 ms windows\n", + "\n", + "dataset_m = dataset.isolate_data(\"devices\", [emg_epn100.get_device_ID('myo')], fast=True)\n", + "windows, meta = dataset_m.parse_windows(40, 20)\n", + "windows.shape" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}