diff --git a/.gitignore b/.gitignore index 62998c7..bfd2757 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,213 @@ -build -dist -*egg-info* -private.py -__pycache__ +### JetBrains template +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### Python template +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + diff --git a/GoogleMyMaps/GoogleMyMaps.py b/GoogleMyMaps/GoogleMyMaps.py index 5aad251..627ce33 100644 --- a/GoogleMyMaps/GoogleMyMaps.py +++ b/GoogleMyMaps/GoogleMyMaps.py @@ -1,66 +1,58 @@ -# import sys -import requests -from bs4 import BeautifulSoup -from pyjsparser import PyJsParser +from GoogleMyMaps.parsers import GoogleMyMapsParser +from .models import Map, Layer, Place -class GoogleMyMaps(): - +class GoogleMyMaps: def __init__(self): - self.parser = PyJsParser() - - def getFromMyMap(self, mapID): - r = requests.get( - "https://www.google.com/maps/d/edit?hl=ja&mid=" + mapID) - return r - - def parseData(self, r): - soup = BeautifulSoup(r.text, "html.parser") - script = soup.find_all("script")[1].text - js = self.parser.parse(script) - pagedata = js["body"][1]["declarations"][0]["init"]["value"] - - data = pagedata.replace("true", "True") - data = data.replace("false", "False") - data = data.replace("null", "None") - data = data.replace("\n", "") - # exec("data = " + data) - data = eval(data) - return data[1] - - def parseLayerData(self, layerData): - # layerName = layerData[2] - - places = layerData[4] - # url = places[0][0] - - parsed = [] - for place in places: - placeName = place[5][0][0] - - info = place[4] - point = info[4] - - parsed.append({ - "placeName": placeName, - "point": point, - }) - - return parsed - - def get(self, mapID, layers=[0]): - r = self.getFromMyMap(mapID) - if r.status_code != 200: - print("status_code:", r.status_code) - raise - - data = self.parseData(r) - # mapID = data[1] - # mapName = data[2] - - parsed = [] - for layer in layers: - layerData = data[6][layer] - parsed += self.parseLayerData(layerData) - - return parsed + self.parser = GoogleMyMapsParser() + + def create_map(self, map_link, chosen_layers: list = None): + data = self.parser.get_map_data(map_link) + name = data[2] if len(data) > 2 else 'Unnamed Map' + chosen_layers = GoogleMyMaps._parse_layers(data[6], chosen_layers) if len(data) > 6 else [] + return Map(map_link, name, chosen_layers) + + @staticmethod + def _parse_layers(layers_data, chosen_layers=None): + layers = [] + for index, layer_data in enumerate(layers_data): + if chosen_layers is None or index in chosen_layers: + layer_name = layer_data[2] if len(layer_data) > 2 else f'Unnamed Layer {index + 1}' + places = GoogleMyMaps._parse_places(layer_data[12][0][13]) if len(layer_data) > 12 else [] + layers.append(Layer(layer_name, places)) + return layers + + @staticmethod + def _parse_places(places_data): + places = [] + for place_data, place_icon_data in zip(places_data[0], places_data[1]): + icon = place_icon_data[0][0] if place_icon_data and len(place_icon_data) > 0 else None + + place_type, coords = GoogleMyMaps._get_place_type_and_coords(place_data) if len(place_data) > 5 else (None, None) + + place_info = place_data[5] if len(place_data) > 5 else None + name = place_info[0][1][0] if place_info and len(place_info[0]) > 1 else 'Unnamed Place' + photos = [photo[1] for photo in place_info[2]] if len(place_info) > 2 and place_info[2] else None + data = GoogleMyMaps._extract_place_data(place_info) + + places.append(Place(place_type, name, icon, coords, photos, data)) + return places + + @staticmethod + def _get_place_type_and_coords(place): + if place[1] is not None: + return 'Point', place[1][0][0] + elif place[2] is not None: + return 'Line', [cord[0] for cord in place[2][0][0]] + elif place[3] is not None: + return 'Polygon', [cord[0] for cord in place[3][0][0][0][0]] + + @staticmethod + def _extract_place_data(place_info): + place_data = {} + if len(place_info) > 1 and place_info[1]: + place_data[place_info[1][0]] = place_info[1][1][place_info[1][2] - 1] + if len(place_info) > 3 and place_info[3]: + for info in place_info[3]: + place_data[info[0]] = info[1][info[2] - 1] + return place_data if place_data else None diff --git a/GoogleMyMaps/__init__.py b/GoogleMyMaps/__init__.py index 1fc0806..dd0c3b5 100644 --- a/GoogleMyMaps/__init__.py +++ b/GoogleMyMaps/__init__.py @@ -1 +1,2 @@ from .GoogleMyMaps import GoogleMyMaps +from .models import Map, Layer, Place diff --git a/GoogleMyMaps/models/Layer.py b/GoogleMyMaps/models/Layer.py new file mode 100644 index 0000000..b65d879 --- /dev/null +++ b/GoogleMyMaps/models/Layer.py @@ -0,0 +1,12 @@ +from . import Place + + +class Layer: + def __init__(self, name: str, places: list[Place]): + self.name = name + self.places = places + + def __str__(self): + places_str = '\n'.join([f" {place}" for place in self.places]) if self.places else "No places" + return f'Layer: {self.name}\n' \ + f'{places_str}\n' diff --git a/GoogleMyMaps/models/Map.py b/GoogleMyMaps/models/Map.py new file mode 100644 index 0000000..8a1342f --- /dev/null +++ b/GoogleMyMaps/models/Map.py @@ -0,0 +1,14 @@ +from . import Layer + + +class Map: + def __init__(self, link: str, name: str, layers: list[Layer]): + self.link = link + self.name = name + self.layers = layers + + def __str__(self): + layers_str = '\n'.join([f" {layer}" for layer in self.layers]) if self.layers else "No layers" + return f'Link: {self.link}\n' \ + f'Map: {self.name}\n' \ + f'{layers_str}\n' diff --git a/GoogleMyMaps/models/Place.py b/GoogleMyMaps/models/Place.py new file mode 100644 index 0000000..31d6385 --- /dev/null +++ b/GoogleMyMaps/models/Place.py @@ -0,0 +1,28 @@ +class Place: + def __init__(self, + place_type: str, + name: str, + icon: str or None, + coords: list[float] or list[list[float]], + photos: list[str] or None, + data: dict or None): + self.place_type = place_type + self.name = name + self.icon = icon + self.coords = coords + self.photos = photos + self.data = data + + def __str__(self): + photos_str = (' Photos:\n' + + ''.join([f" {photo}\n" for photo in self.photos])) \ + if self.photos else '' + data_str = (' Data:\n' + + ''.join([f" {data}: {self.data[data]}\n" for data in self.data])) \ + if self.data else '' + + return f'{self.place_type}: {self.name}\n' \ + f' Icon: {self.icon}\n' \ + f' Coordinates: {self.coords}\n' \ + f'{photos_str}' \ + f'{data_str}' diff --git a/GoogleMyMaps/models/__init__.py b/GoogleMyMaps/models/__init__.py new file mode 100644 index 0000000..2f30b74 --- /dev/null +++ b/GoogleMyMaps/models/__init__.py @@ -0,0 +1,3 @@ +from .Map import Map +from .Layer import Layer +from .Place import Place diff --git a/GoogleMyMaps/parsers/GoogleMyMapsParser.py b/GoogleMyMaps/parsers/GoogleMyMapsParser.py new file mode 100644 index 0000000..6be8d69 --- /dev/null +++ b/GoogleMyMaps/parsers/GoogleMyMapsParser.py @@ -0,0 +1,45 @@ +import re + +import requests +from bs4 import BeautifulSoup +from pyjsparser import PyJsParser + + +class GoogleMyMapsParser: + def __init__(self): + self.parser = PyJsParser() + + def get_map_data(self, map_link: str): + GoogleMyMapsParser._validate_map_link(map_link) + raw_data = GoogleMyMapsParser._fetch_data(map_link) + parsed_data = self._parse_data(raw_data) + return parsed_data + + @staticmethod + def _validate_map_link(map_link: str): + map_link_pattern = re.compile( + r'https://www\.google\.com/maps/d/u/.*' + ) + + if not map_link_pattern.match(map_link): + raise ValueError('Invalid map link format.') + + @staticmethod + def _fetch_data(map_link: str): + response = requests.get(map_link) + + if response.status_code != 200: + raise Exception(f'Failed to fetch map data. Status code: {response.status_code}') + + return response.text + + def _parse_data(self, raw_data: str): + soup = BeautifulSoup(raw_data, 'html.parser') + script = soup.find_all('script')[1].text + js = self.parser.parse(script) + page_data = js['body'][1]['declarations'][0]['init']['value'] + + data = page_data.replace('true', 'True').replace('false', 'False').replace('null', 'None') + data = data.replace('\n', '').replace('\xa0', ' ') + + return eval(data)[1] diff --git a/GoogleMyMaps/parsers/__init__.py b/GoogleMyMaps/parsers/__init__.py new file mode 100644 index 0000000..ea1e982 --- /dev/null +++ b/GoogleMyMaps/parsers/__init__.py @@ -0,0 +1 @@ +from .GoogleMyMapsParser import GoogleMyMapsParser diff --git a/README.md b/README.md new file mode 100644 index 0000000..65081eb --- /dev/null +++ b/README.md @@ -0,0 +1,2 @@ +# GoogleMyMaps +Python parser for Google My Maps diff --git a/examples/example_simple.py b/examples/example_simple.py index 5c5785b..7ef6acb 100644 --- a/examples/example_simple.py +++ b/examples/example_simple.py @@ -1,10 +1,9 @@ from GoogleMyMaps import GoogleMyMaps -from pprint import pprint -mapID = "YOUR_MAP_ID" +map_link = 'YOUR_MAP_LINK' -if __name__ == "__main__": +if __name__ == '__main__': gmm = GoogleMyMaps() - data = gmm.get(mapID) - pprint(data) + my_map = gmm.create_map(map_link) + print(my_map) diff --git a/setup.py b/setup.py index a4ab9dd..713eea3 100644 --- a/setup.py +++ b/setup.py @@ -16,6 +16,8 @@ 'install_requires': [ 'beautifulsoup4', 'pyjsparser', + 'requests', + 're', ], }