diff --git a/.gitignore b/.gitignore index a59753e..851c5ba 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *.gpx *.csv *.fit +__pycache__/ +*.lock diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..24ee5b1 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.13 diff --git a/ReadMe.md b/ReadMe.md index f64ad63..9871a8f 100644 --- a/ReadMe.md +++ b/ReadMe.md @@ -1,52 +1,67 @@ # PythonHeatmap -This is a simple way to visualize GPS data from Strava/Garmin/Polar in either the csv, fit, and/or gpx formats on an attractive and interactive interface. -This was a side-project while I helped organize the McGill Physics Hackathon. +PythonHeatmap is a tool for visualizing GPS data from Strava, Garmin, Polar, and other fitness platforms in CSV, FIT, and/or GPX formats as interactive heatmaps. -If you are interested in physics or programming, hackathons are a great idea. If you're curious about more of my work, the linked pages at mrhheffernan.github.io provide links to more information. +## Features -An additional feature is present to color the lines by heart rate, which can be found in the hr_color branch by tjrademaker. The code in that branch also incoporates tcx file support. All original code there is written by tjrademaker and is offered under the MIT License. +- Supports GPX, and FIT file formats +- Interactive HTML heatmaps using Folium +- CLI arguments for customization +- Configurable timezone handling ## Getting Started -Download your data as a gpx, csv, or fit file from your provider of choice. For advanced users, `selenium_downloader.py` is provided to automate this process. These users will have to specify some paths and have selenium/chromium configured before running the script. Additionally, they will have to supply a file called `login_info.secret`. This file should contain `username,password,athlete_id` and will be read in by `selenium_downloader.py`. Currently, `selenium_downloader.py` may not export all data, but is intended for use for the past 12 months of activities. It sometimes downloads more. +### Prerequisites -Most users can simply request their data as a download from Strava. +Python 3.13 or higher is required. Dependencies are managed automatically by uv - no installation required. -Note that extra python packages may be required if you have fit files, as the binary files are not easily readable on all systems. Just download the python files here and run them! This is also written in to be compatible with Python 3.7, certain rewrites will be necessary if using Python2. +### Obtaining Data -I'm Montreal based, so the map is currently designed to center on Montreal. To correct for this, change "Montreal Quebec" to your location! +Download GPS data from your fitness platform. For Strava, bulk exports are available under account settings. For Garmin Connect, compressed `.fit.gz` files may need to be extracted: +```bash +gunzip *.fit.gz +``` -### Prerequisites +For automated downloading, `selenium_downloader.py` is provided. This requires: +- Selenium and Chromium configured +- A `login_info.secret` file containing `username,password,athlete_id` -Certain Python modules are required. They are: numpy, pandas, geopy, folium, gpxpy, fitparse, and pytz. To download any and all of these in one fell swoop, the below code is provided. +Note: The selenium downloader may not export all historical data and is best suited for the most recent 12 months of activities. This script is not maintained or tested with updates to Strava's UI and may require adjustments to work with current Strava versions. -``` -pip install numpy pandas geopy folium gpxpy fitparse pytz -``` +### Running -Also required prerequisites are GPS tracks. On Strava, these are available for bulk download under settings. If files have been uploaded via Garmin Connect, there may be compressed .fit files in .fit.gz format. To unzip these (at least in linux/unix-based systems): -``` -gunzip *.fit.gz -``` +Run from the directory containing your GPS files: -## Running the tests +```bash +uv run personal_heatmap.py +``` -The heatmap will be output in a html file, which is viewable in a web browser. Currently, there is no native folium support for image exports, so screenshots of relevant areas is the recommended strategy. +#### CLI Options -The Python is designed to run in the same directory as the GPS files, so make sure this is the case. +- `--dir`: Directory containing .fit and .gpx files (default: current directory) +- `--timezone`: Timezone for timestamps, e.g., 'US/Pacific' (default: 'US/Pacific') +- `--output_path`: Path for the output heatmap HTML file (default: 'heatmap.html') -To run: +For FIT to CSV conversion: +```bash +uv run fit_to_csv.py --dir /path/to/files --timezone US/Pacific ``` -python personal_heatmap.py + +- `--overwrite`: Overwrite existing CSV files + +For the simple matplotlib-based heatmap: + +```bash +uv run simple_heatmap.py --dir /path/to/files --output_path output.png ``` -## License +## Output -Map tiles by Stamen Design, under CC BY 3.0. Data by OpenStreetMap, under ODbL. +The heatmap is generated as an HTML file viewable in any web browser. Use the interactive map controls to navigate and zoom to desired areas. +## License Original Python Copyright 2018 Matthew Heffernan Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: @@ -55,21 +70,8 @@ The above copyright notice and this permission notice shall be included in all c THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -This code is by Matthew Heffernan. As long as you retain this notice you -can do whatever you want with this stuff, subject to the conditions above. -If we meet some day, and you think this stuff is worth it, you can buy me a beer -in return. - Matthew Heffernan - ## Acknowledgements -This code is built with a combination of original and unlicensed code. Special thanks are due to the developers working to make the FIT file format more accessible, especially Max Candocia whose fit_to_csv code is instrumental and included here. Source: https://maxcandocia.com/article/2017/Sep/22/converting-garmin-fit-to-csv/ - -Additional thanks are due to the McGill Physics Hackathon 2018, during which I wrote this code while assisting many capable hackers visualize physics concepts. Their dedication and the unlimited coffee were inspirational to the development of this project. - -## simple_heatmap.py -This is a simple heatmap which does not superimpose the tracks on a map, but does provide a simple playground for plotting tracks. This reproduces much of the functionality of some prominent Strava apps, but full resolution is gained for free and is more customizable with matplotlib. Enjoy! This script will additionally required the matplotlib module. -This doesn't automatically center, but the native zooming interface will allow you to better crop the heatmap for use on social media. The GUI save feature is recommended. +This code builds upon original work and tools for making the FIT file format more accessible. Special thanks to Max Candocia, whose fit_to_csv code is instrumental to this project. Source: https://maxcandocia.com/article/2017/Sep/22/converting-garmin-fit-to-csv/ -## Upcoming work: -..*Add option to plot heatmap in style of: http://qingkaikong.blogspot.com/2016/06/using-folium-3-heatmap.html -..*Broadening the scope of `selenium_downloader.py` +Additional thanks to the McGill Physics Hackathon 2018, during which this project was developed while assisting participants with visualizing physics concepts. \ No newline at end of file diff --git a/fit_to_csv.py b/fit_to_csv.py index f78af48..59df5be 100644 --- a/fit_to_csv.py +++ b/fit_to_csv.py @@ -1,12 +1,14 @@ +import argparse import csv +import glob import os +from datetime import timezone +from typing import Any +from zoneinfo import ZoneInfo -# to install fitparse, run -# sudo pip3 install -e git+https://github.com/dtcooper/python-fitparse#egg=python-fitparse import fitparse -import pytz -allowed_fields = [ +FIELDS_ALLOWED = [ "timestamp", "position_lat", "position_long", @@ -15,61 +17,120 @@ "altitude", "enhanced_speed", "speed", + "avg_heart_rate", "heart_rate", "cadence", "fractional_cadence", ] -required_fields = ["timestamp", "position_lat", "position_long", "altitude"] +FIELDS_REQUIRED = ["timestamp", "position_lat", "position_long"] -UTC = pytz.UTC -CST = pytz.timezone("US/Central") +UTC = timezone.utc +TZ = ZoneInfo("US/Pacific") -def main(): - files = os.listdir() - fit_files = [file for file in files if file[-4:].lower() == ".fit"] - for file in fit_files: - new_filename = file[:-4] + ".csv" - if os.path.exists(new_filename): - # print('%s already exists. skipping.' % new_filename) - continue - fitfile = fitparse.FitFile( - file, data_processor=fitparse.StandardUnitsDataProcessor() - ) +def write_to_csv(data: list[dict[str, Any]], output_path: str) -> None: + """Write extracted data fields from the .fit messages to file - print("converting %s" % file) - write_fitfile_to_csv(fitfile, new_filename) - print("finished conversions") + Args: + data (list[dict[str, Any]]): Data from messages + output_path (str): Output path + """ + # write to csv + with open(output_path, "w") as f: + writer = csv.writer(f) + writer.writerow(FIELDS_ALLOWED) + for entry in data: + writer.writerow([str(entry.get(k, "")) for k in FIELDS_ALLOWED]) + print("wrote %s" % output_path) -def write_fitfile_to_csv(fitfile, output_file="test_output.csv"): - messages = fitfile.messages +def collect_data(filepath: str, tz: ZoneInfo = TZ) -> list[dict[str, Any]]: + """Collects data from the .fit file at filepath + + Args: + filepath (str): Path to .fit file + tz (ZoneInfo, optional): Timezone identifier. Defaults to TZ. + + Returns: + list[dict[str, Any]]: List of dicts containing relevant data from each message in the .fit + """ + # Parse the .fit file + fitfile = fitparse.FitFile( + filepath, data_processor=fitparse.StandardUnitsDataProcessor() + ) + data = [] + messages = fitfile.messages + for m in messages: skip = False if not hasattr(m, "fields"): continue fields = m.fields - # check for important data types + + # check for desired data and collect it mdata = {} for field in fields: - if field.name in allowed_fields: + if field.name in FIELDS_ALLOWED: if field.name == "timestamp": - mdata[field.name] = UTC.localize(field.value).astimezone(CST) + timestamp_value = field.value + if timestamp_value.tzinfo is None: + timestamp_value = timestamp_value.replace(tzinfo=UTC) + mdata[field.name] = timestamp_value.astimezone(tz) else: mdata[field.name] = field.value - for rf in required_fields: - if rf not in mdata: + + for required_field in FIELDS_REQUIRED: + if required_field not in mdata: skip = True + if not skip: data.append(mdata) - # write to csv - with open(output_file, "w") as f: - writer = csv.writer(f) - writer.writerow(allowed_fields) - for entry in data: - writer.writerow([str(entry.get(k, "")) for k in allowed_fields]) - print("wrote %s" % output_file) + + return data + + +def parse_args() -> argparse.Namespace: + args = argparse.ArgumentParser(description="Convert .fit to .csv") + + args.add_argument( + "--dir", + help="Path to directory containing .fit files", + type=str, + default=os.getcwd(), + ) + args.add_argument( + "--timezone", + help="Timezone for timestamps, e.g. 'US/Pacific'", + default="US/Pacific", + ) + args.add_argument( + "--overwrite", + help="Overwrite any .csv files already converted from .fit", + action="store_true", + ) + + return args.parse_args() + + +def main(): + args = parse_args() + + # Identify .fit files + fit_files = glob.glob(args.dir + "/*.fit") + + for file in fit_files: + # Use the same filename, just change extension to .csv + base_filename = file.removesuffix(".fit") + new_filename = base_filename + ".csv" + if not args.overwrite and os.path.exists(new_filename): + continue + + print("converting %s" % file) + data = collect_data(file, tz=ZoneInfo(args.timezone)) + write_to_csv(data, new_filename) + + print("finished conversions") if __name__ == "__main__": diff --git a/personal_heatmap.py b/personal_heatmap.py index 7cfd1a0..7853cf9 100644 --- a/personal_heatmap.py +++ b/personal_heatmap.py @@ -1,147 +1,98 @@ +import argparse import glob import os +from zoneinfo import ZoneInfo import folium import gpxpy import numpy as np import pandas as pd -from geopy.geocoders import Nominatim -geolocator = Nominatim() -location = geolocator.geocode( - "Montreal Quebec" -) # Change this to change location centering -lat_check = float(location.raw["lat"]) -lon_check = float(location.raw["lon"]) +from fit_to_csv import collect_data -data = glob.glob("*.gpx") -fitdata = glob.glob("*.fit") -if not len(fitdata) == 0: - print("Converting Garmin FIT files") - os.system("python fit_to_csv.py") - os.system("mkdir fit_files") - os.system("mv *.fit ./fit_files") +def parse_args() -> argparse.Namespace: + args = argparse.ArgumentParser() + args.add_argument( + "--dir", + help="Path to direcotry with .fit, .gpx files to process for the heatmap", + default=os.getcwd(), + ) + args.add_argument( + "--timezone", + help="Timezone for timestamps, e.g. 'US/Pacific'", + default="US/Pacific", + ) + args.add_argument( + "--output_path", + help="Path to write the heatmap .html to", + default="heatmap.html", + ) -csvdata = glob.glob("*.csv") + return args.parse_args() -lat = [] -lon = [] -all_lat = [] -all_long = [] +def main(): + args = parse_args() -print("Loading data") + gpx_files = glob.glob(args.dir + "/*.gpx") + fit_files = glob.glob(args.dir + "/*.fit") -for activity in data: - gpx_filename = activity - gpx_file = open(gpx_filename, "r") - gpx = gpxpy.parse(gpx_file) + fit_data = [] + if len(fit_files): + print("Converting Garmin FIT files") + for file in fit_files: + activity_data = collect_data(file, tz=ZoneInfo(args.timezone)) + df_activity_data = pd.DataFrame(activity_data) + fit_data.append(df_activity_data) - for track in gpx.tracks: - for segment in track.segments: - for point in segment.points: - lat.append(point.latitude) - lon.append(point.longitude) + all_lat = [] + all_long = [] - check1 = np.any( - np.isclose(lat, lat_check, atol=0.5) - ) # Change the tolerance 'atol' to include a larger or smaller area around the centering point - check2 = np.any( - np.isclose(lon, lon_check, atol=0.5) - ) # Change the tolerance 'atol' to include a larger or smaller area around the centering point + print("Loading data") - if check1 and check2: - all_lat.append(lat) - all_long.append(lon) + for activity in gpx_files: + lon = [] + lat = [] - lon = [] - lat = [] + with open(activity, "r") as gpx_file: + gpx = gpxpy.parse(gpx_file) -for activity in csvdata: - csv_filename = activity - csv_file = pd.read_csv(csv_filename) + for track in gpx.tracks: + for segment in track.segments: + for point in segment.points: + lat.append(point.latitude) + lon.append(point.longitude) - for i in range(len(csv_file)): - lat.append(csv_file["position_lat"][i]) - lon.append(csv_file["position_long"][i]) + all_lat.append(lat) + all_long.append(lon) - check1 = np.any( - np.isclose(lat, lat_check, atol=0.5) - ) # Change the tolerance 'atol' to include a larger or smaller area around the centering point - check2 = np.any( - np.isclose(lon, lon_check, atol=0.5) - ) # Change the tolerance 'atol' to include a larger or smaller area around the centering point + for activity in fit_data: + lat = activity["position_lat"].values.tolist() + lon = activity["position_long"].values.tolist() - if check1 and check2: all_lat.append(lat) all_long.append(lon) - lon = [] - lat = [] - -all_lat = all_lat[0] -all_long = all_long[0] - -central_long = sum(all_long) / float(len(all_long)) -central_lat = sum(all_lat) / float(len(all_lat)) - -print("Initializing map") -m = folium.Map( - location=[central_lat, central_long], tiles="Stamen Toner", zoom_start=14.2 -) # Recommended map styles are "Stamen Terrain", "Stamen Toner" - -print("Plotting gpx data") - -for activity in data: - gpx_filename = activity - gpx_file = open(gpx_filename, "r") - gpx = gpxpy.parse(gpx_file) - - for track in gpx.tracks: - for segment in track.segments: - for point in segment.points: - lat.append(point.latitude) - lon.append(point.longitude) - - points = zip(lat, lon) - points = [item for item in zip(lat, lon)] - - folium.PolyLine(points, color="red", weight=2.5, opacity=0.5).add_to(m) - lat = [] - lon = [] - -print("Plotting csv data") -color = "red" -hr = [] -for activity in csvdata: - csv_filename = activity - csv_file = pd.read_csv(csv_filename) - for i in range(len(csv_file)): - lat.append(csv_file["position_lat"][i]) - lon.append(csv_file["position_long"][i]) - hr.append(csv_file["heart_rate"][i]) - points = zip(lat, lon) - points = [item for item in zip(lat, lon)] - - # color = [] - # print('heart_rate',csv_file['heart_rate']) - # hr = hr / max(hr) - # for value in hr: - # if value < 0.2: - # color.append("darkred") - # elif value >= 0.2 and value < 0.4: - # color.append("red") - # elif value >= 0.4 and value < 0.6: - # color.append("lightred") - # elif value >= 0.6 and value < 0.8: - # color.append("lightyellow") - # elif value >= 0.6: - # color.append("yellow") - - folium.PolyLine(points, color=color, weight=2.5, opacity=0.5).add_to(m) - lat = [] - lon = [] - hr = [] - -m.save("heatmap.html") + central_long = np.mean(np.array(all_long).flatten()) + central_lat = np.mean(np.array(all_lat).flatten()) + + print("Initializing map") + m = folium.Map( + location=[central_lat, central_long], tiles="Cartodb Positron", zoom_start=14.2 + ) + + print("Plotting activities") + + for i in range(len(all_lat)): + lat = all_lat[i] + lon = all_long[i] + points = list(zip(lat, lon)) + + folium.PolyLine(points, color="red", weight=2.5, opacity=0.5).add_to(m) + + m.save(args.output_path) + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..c3d6176 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,18 @@ +[project] +name = "pythonheatmap" +version = "0.1.0" +description = "Make a simple heatmap from .fit and .gpx files" +readme = "README.md" +requires-python = ">=3.13" +dependencies = [ + "fitparse", + "folium>=0.20.0", + "geopy>=2.4.1", + "gpxpy>=1.6.2", + "matplotlib>=3.10.8", + "numpy>=2.4.2", + "pandas>=3.0.1", + "python-fitparse>=2.1.3", + "pytz>=2026.1.post1", + "selenium>=4.41.0", +] \ No newline at end of file diff --git a/selenium_downloader.py b/selenium_downloader.py index acd61b7..99f74ee 100644 --- a/selenium_downloader.py +++ b/selenium_downloader.py @@ -1,12 +1,14 @@ import time from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By -driver = webdriver.Chrome(executable_path="") +driver = webdriver.Chrome() download_dir = "" -options = webdriver.ChromeOptions() +options = Options() options.add_argument("--ignore-certificate-errors") options.add_argument("--test-type") options.binary_location = "/usr/bin/chromium-browser" @@ -18,36 +20,37 @@ "download": {"prompt_for_download": False, "default_directory": download_dir}, }, ) -driver = webdriver.Chrome(chrome_options=options) +driver = webdriver.Chrome(options=options) -usern, passw, id = open("login_info.secret").read().strip().split(",") +with open("login_info.secret") as f: + usern, passw, id = f.read().strip().split(",") driver.get("http://strava.com/login") -usern_box = driver.find_element_by_xpath("//input[@name='email' and @type='email']") +usern_box = driver.find_element(By.XPATH, "//input[@name='email' and @type='email']") usern_box.send_keys(usern) -passw_box = driver.find_element_by_xpath( - "//input[@name='password' and @type='password']" +passw_box = driver.find_element( + By.XPATH, "//input[@name='password' and @type='password']" ) passw_box.send_keys(passw) -submit_button = driver.find_element_by_xpath('//button[@id="login-button"]') +submit_button = driver.find_element(By.XPATH, '//button[@id="login-button"]') submit_button.click() time.sleep(2) driver.get("https://www.strava.com/athletes/" + str(id)) -monthly_button = driver.find_element_by_xpath( - '//a[contains(@class,"button btn-xs") and contains(@href,"month")]' +monthly_button = driver.find_element( + By.XPATH, '//a[contains(@class,"button btn-xs") and contains(@href,"month")]' ) monthly_button.click() time.sleep(2) -bar_list = driver.find_elements_by_xpath( - '//a[@class="bar" and contains(@href,"interval")]' +bar_list = driver.find_elements( + By.XPATH, '//a[@class="bar" and contains(@href,"interval")]' ) activity_list = [] @@ -56,8 +59,9 @@ bar.click() time.sleep(3) - for a in driver.find_elements_by_xpath( - './/a[contains(@href, "activities") and not(contains(@href, "twitter")) and not(contains(@href, "#")) and not(contains(@href, "photos")) and not(contains(@href, "segments"))]' + for a in driver.find_elements( + By.XPATH, + './/a[contains(@href, "activities") and not(contains(@href, "twitter")) and not(contains(@href, "#")) and not(contains(@href, "photos")) and not(contains(@href, "segments"))]', ): activity_list.append(a.get_attribute("href")) diff --git a/simple_heatmap.py b/simple_heatmap.py index 11595d4..f37abc9 100644 --- a/simple_heatmap.py +++ b/simple_heatmap.py @@ -1,30 +1,56 @@ +import argparse import glob +import os import gpxpy import matplotlib.pyplot as plt -gpx_list = glob.glob("*.gpx") - - -fig = plt.figure(facecolor="0.05") -ax = plt.Axes( - fig, - [0.0, 0.0, 1.0, 1.0], -) -ax.set_aspect("equal") -ax.set_axis_off() -fig.add_axes(ax) - -for gpx_data in gpx_list: - lat = [] - lon = [] - gpx_file = open(gpx_data, "r") - gpx = gpxpy.parse(gpx_file) - for track in gpx.tracks: - for segment in track.segments: - for point in segment.points: - lat.append(point.latitude) - lon.append(point.longitude) - plt.plot(lon, lat, color="deepskyblue", lw=0.8, alpha=0.8) -# plt.savefig('simple_heatmap.png') -plt.show() + +def parse_args() -> argparse.Namespace: + args = argparse.ArgumentParser(description="Create a simple heatmap") + + args.add_argument( + "--dir", + help="Path to directory containing files to parse to generate the heatmap", + default=os.getcwd(), + ) + args.add_argument( + "--output_path", + help="Path to output the simple heatmap", + default="simple_heatmap.png", + ) + + return args.parse_args() + + +def main(): + args = parse_args() + + gpx_list = glob.glob(args.dir + "/*.gpx") + + fig = plt.figure(facecolor="0.05") + ax = plt.Axes( + fig, + [0.0, 0.0, 1.0, 1.0], + ) + ax.set_aspect("equal") + ax.set_axis_off() + fig.add_axes(ax) + + for gpx_data in gpx_list: + lat = [] + lon = [] + with open(gpx_data, "r") as gpx_file: + gpx = gpxpy.parse(gpx_file) + for track in gpx.tracks: + for segment in track.segments: + for point in segment.points: + lat.append(point.latitude) + lon.append(point.longitude) + plt.plot(lon, lat, color="deepskyblue", lw=0.8, alpha=0.8) + plt.savefig(args.output_path) + plt.show() + + +if __name__ == "__main__": + main()