FelixDz · FelixDz · Feb 5, 2018 · Jan 14, 2020 · Oct 22, 2020 · Oct 22, 2020
diff --git a/.travis.yml b/.travis.yml
@@ -1,12 +1,11 @@
 language: python
 python:
-  - "2.7"
-  - "3.3"
   - "3.4"
   - "3.5"
+  - "3.8"
+  - "3.9"
 # command to install dependencies
 install:
-# - "pip install -r requirements.txt"
   - "pip install setuptools --upgrade; python setup.py install"
 # command to run tests
 script: nosetests
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 ![SoundScrape!](http://i.imgur.com/nHAt2ow.png)
 
-SoundScrape [![Build Status](https://travis-ci.org/Miserlou/SoundScrape.svg)](https://travis-ci.org/Miserlou/SoundScrape) [![Python 2](https://img.shields.io/badge/Python-2-brightgreen.svg)](https://pypi.python.org/pypi/soundscrape/) [![Python 3](https://img.shields.io/badge/Python-3-brightgreen.svg)](https://pypi.python.org/pypi/soundscrape/) [![PyPI](https://img.shields.io/pypi/v/soundscrape.svg)](https://pypi.python.org/pypi/SoundScrape)
+SoundScrape [![Build Status](https://travis-ci.org/Miserlou/SoundScrape.svg)](https://travis-ci.org/Miserlou/SoundScrape) [![Python 3](https://img.shields.io/badge/Python-3-brightgreen.svg)](https://pypi.python.org/pypi/soundscrape/) [![PyPI](https://img.shields.io/pypi/v/soundscrape.svg)](https://pypi.python.org/pypi/SoundScrape)
 ==============
 
 **SoundScrape** makes it super easy to download artists from SoundCloud (and Bandcamp and MixCloud) - even those which don't have download links! It automatically creates ID3 tags as well (including album art), which is handy.

diff --git a/setup.py b/setup.py
@@ -48,10 +48,11 @@
         'License :: OSI Approved :: Apache Software License',
         'Operating System :: OS Independent',
         'Programming Language :: Python',
-        'Programming Language :: Python :: 2.7',
-        'Programming Language :: Python :: 3.3',
         'Programming Language :: Python :: 3.4',
         'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
         'Topic :: Internet :: WWW/HTTP',
         'Topic :: Internet :: WWW/HTTP :: Dynamic Content',
     ],

diff --git a/soundscrape/__init__.py b/soundscrape/__init__.py
@@ -1 +1 @@
-__version__ = '0.30.1'
+__version__ = '0.31.0'
diff --git a/soundscrape/soundscrape.py b/soundscrape/soundscrape.py
@@ -1,8 +1,7 @@
 #! /usr/bin/env python
-from __future__ import unicode_literals
-
 import argparse
 import demjson
+import html
 import os
 import re
 import requests
@@ -19,11 +18,16 @@
 from os.path import dirname, exists, join
 from os import access, mkdir, W_OK
 
+if sys.version_info.minor < 4:
+    html_unescape = html.parser.HTMLParser().unescape
+else:
+    html_unescape = html.unescape
+
 ####################################################################
 
 # Please be nice with this!
-CLIENT_ID = '175c043157ffae2c6d5fed16c3d95a4c'
-CLIENT_SECRET = '99a51990bd81b6a82c901d4cc6828e46'
+CLIENT_ID = 'a3dd183a357fcff9a6943c0d65664087'
+CLIENT_SECRET = '7e10d33e967ad42574124977cf7fa4b7'
 MAGIC_CLIENT_ID = 'b45b1aa10f1ac2941910a7f0d10f8e28'
 
 AGGRESSIVE_CLIENT_ID = 'OmTFHKYSMLFqnu2HHucmclAptedxWXkq'
@@ -219,7 +223,7 @@ def process_soundcloud(vargs):
         tagged = tag_file(filename,
                  artist=track_data['artist'],
                  title=track_data['title'],
-                 year='2016',
+                 year='2018',
                  genre='',
                  album='',
                  artwork_url='')
@@ -232,6 +236,7 @@ def process_soundcloud(vargs):
         filenames.append(filename)
 
     else:
+
         aggressive = False
 
         # This is is likely a 'likes' page.
@@ -267,6 +272,7 @@ def process_soundcloud(vargs):
                     aggressive = True
                     filenames = []
 
+                    # this might be buggy
                     data = get_soundcloud_api2_data(artist_id)
 
                     for track in data['collection']:
@@ -431,6 +437,8 @@ def download_tracks(client, tracks, num_tracks=sys.maxsize, downloadable=False,
                     continue
 
                 puts_safe(colored.green("Downloading") + colored.white(": " + track['title']))
+
+
                 if track.get('direct', False):
                     location = track['stream_url']
                 else:
@@ -457,7 +465,7 @@ def download_tracks(client, tracks, num_tracks=sys.maxsize, downloadable=False,
                 filenames.append(filename)
         except Exception as e:
             puts_safe(colored.red("Problem downloading ") + colored.white(track['title']))
-            puts_safe(e)
+            puts_safe(str(e))
 
     return filenames
 
@@ -541,7 +549,12 @@ def process_bandcamp(vargs):
     else:
         bc_url = 'https://' + artist_url + '.bandcamp.com/music'
 
-    filenames = scrape_bandcamp_url(bc_url, num_tracks=vargs['num_tracks'], folders=vargs['folders'], custom_path=vargs['path'])
+    filenames = scrape_bandcamp_url(
+        bc_url,
+        num_tracks=vargs['num_tracks'],
+        folders=vargs['folders'],
+        custom_path=vargs['path'],
+    )
 
     # check if we have lists inside a list, which indicates the
     # scraping has gone recursive, so we must format the output
@@ -576,11 +589,15 @@ def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path=
     # so we call the scrape_bandcamp_url() method for each one
     if type(album_data) is list:
         for album_url in album_data:
-            filenames.append(scrape_bandcamp_url(album_url, num_tracks, folders, custom_path))
+            filenames.append(
+                scrape_bandcamp_url(
+                    album_url, num_tracks, folders, custom_path
+                )
+            )
         return filenames
 
-    artist = album_data["artist"]
-    album_name = album_data["album_name"]
+    artist = album_data.get("artist")
+    album_name = album_data.get("album_title")
 
     if folders:
         if album_name:
@@ -647,21 +664,58 @@ def scrape_bandcamp_url(url, num_tracks=sys.maxsize, folders=False, custom_path=
     return filenames
 
 
+def extract_embedded_json_from_attribute(request, attribute, debug=False):
+    """
+    Extract JSON object embedded in an element's attribute value.
+
+    The JSON is "sloppy". The native python JSON parser often can't deal,
+    so we use the more tolerant demjson instead.
+
+    Args:
+        request (obj:`requests.Response`): HTTP GET response from which to extract
+        attribute (str): name of the attribute holding the desired JSON object
+        debug (bool, optional): whether to print debug messages
+
+    Returns:
+        The embedded JSON object as a dict, or None if extraction failed
+    """
+    try:
+        embed = request.text.split('{}="'.format(attribute))[1]
+        embed = html_unescape(
+            embed.split('"')[0]
+        )
+        output = demjson.decode(embed)
+        if debug:
+            print(
+                'extracted JSON: '
+                + demjson.encode(
+                    output,
+                    compactly=False,
+                    indent_amount=2,
+                )
+            )
+    except Exception as e:
+        output = None
+        if debug:
+            print(e)
+    return output
+
+
 def get_bandcamp_metadata(url):
     """
-    Read information from the Bandcamp JavaScript object.
+    Read information from Bandcamp embedded JavaScript object notation.
     The method may return a list of URLs (indicating this is probably a "main" page which links to one or more albums),
     or a JSON if we can already parse album/track info from the given url.
-    The JSON is "sloppy". The native python JSON parser often can't deal, so we use the more tolerant demjson instead.
     """
     request = requests.get(url)
+    output = {}
     try:
-        sloppy_json = request.text.split("var TralbumData = ")
-        sloppy_json = sloppy_json[1].replace('" + "', "")
-        sloppy_json = sloppy_json.replace("'", "\'")
-        sloppy_json = sloppy_json.split("};")[0] + "};"
-        sloppy_json = sloppy_json.replace("};", "}")
-        output = demjson.decode(sloppy_json)
+        for attr in ['data-tralbum', 'data-embed']:
+            output.update(
+                extract_embedded_json_from_attribute(
+                    request, attr
+                )
+            )
     # if the JSON parser failed, we should consider it's a "/music" page,
     # so we generate a list of albums/tracks and return it immediately
     except Exception as e:
@@ -680,14 +734,6 @@ def get_bandcamp_metadata(url):
     # according to http://stackoverflow.com/a/7323861
     # (very unlikely, but better safe than sorry!)
     output['genre'] = ' '.join(s for s in tags)
-    # make sure we always get the correct album name, even if this is a
-    # track URL (unless this track does not belong to any album, in which
-    # case the album name remains set as None.
-    output['album_name'] = None
-    regex_album_name = r'album_title\s*:\s*"([^"]+)"\s*,'
-    match = re.search(regex_album_name, request.text, re.MULTILINE)
-    if match:
-        output['album_name'] = match.group(1)
 
     try:
         artUrl = request.text.split("\"tralbumArt\">")[1].split("\">")[0].split("href=\"")[1]

diff --git a/tests/test.py b/tests/test.py
@@ -1,21 +1,20 @@
 import glob
 import os
-import re
-import string
 import sys
 import unittest
 
-import nose
-from nose import case
-from nose.pyversion import unbound_method
-from nose import util
-
+from mutagen.mp3 import EasyMP3
 from soundscrape.soundscrape import get_client
 from soundscrape.soundscrape import process_soundcloud
 from soundscrape.soundscrape import process_bandcamp
-from soundscrape.soundscrape import process_mixcloud
-from soundscrape.soundscrape import process_audiomack
-from soundscrape.soundscrape import process_musicbed
+
+
+def rm_mp3():
+    """ deletes all ``*.mp3`` files in current directory
+    """
+    for f in glob.glob('*.mp3'):
+        os.unlink(f)
+
 
 class TestSoundscrape(unittest.TestCase):
 
@@ -31,45 +30,33 @@ def test_get_client(self):
         self.assertTrue(bool(client))
 
     def test_soundcloud(self):
-        for f in glob.glob('*.mp3'):
-           os.unlink(f)
-
+        rm_mp3()
         mp3_count = len(glob.glob1('', "*.mp3"))
         vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 9223372036854775807, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://soundcloud.com/fzpz/revised', 'keep': True}
         process_soundcloud(vargs)
         new_mp3_count = len(glob.glob1('', "*.mp3"))
         self.assertTrue(new_mp3_count > mp3_count)
-
-        for f in glob.glob('*.mp3'):
-           os.unlink(f)
+        rm_mp3()
 
     def test_soundcloud_hard(self):
-        for f in glob.glob('*.mp3'):
-           os.unlink(f)
-
+        rm_mp3()
         mp3_count = len(glob.glob1('', "*.mp3"))
         vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 1, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'puptheband', 'keep': False}
         process_soundcloud(vargs)
         new_mp3_count = len(glob.glob1('', "*.mp3"))
         self.assertTrue(new_mp3_count > mp3_count)
         self.assertTrue(new_mp3_count == 1) # This used to be 3, but is now 'Not available in United States.'
-
-        for f in glob.glob('*.mp3'):
-           os.unlink(f)
+        rm_mp3()
 
     def test_soundcloud_hard_2(self):
-        for f in glob.glob('*.mp3'):
-           os.unlink(f)
-
+        rm_mp3()
         mp3_count = len(glob.glob1('', "*.mp3"))
         vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 1, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://soundcloud.com/lostdogz/snuggles-chapstick', 'keep': False}
         process_soundcloud(vargs)
         new_mp3_count = len(glob.glob1('', "*.mp3"))
         self.assertTrue(new_mp3_count > mp3_count)
         self.assertTrue(new_mp3_count == 1) # This used to be 3, but is now 'Not available in United States.'
-
-        for f in glob.glob('*.mp3'):
-           os.unlink(f)
+        rm_mp3()
 
     # The test URL for this is no longer a WAV. Need a new testcase.
     #
@@ -88,30 +75,35 @@ def test_soundcloud_hard_2(self):
     #        os.unlink(f)
 
     def test_bandcamp(self):
-        for f in glob.glob('*.mp3'):
-           os.unlink(f)
-
+        rm_mp3()
         mp3_count = len(glob.glob1('', "*.mp3"))
         vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 9223372036854775807, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://atenrays.bandcamp.com/track/who-u-think'}
         process_bandcamp(vargs)
         new_mp3_count = len(glob.glob1('', "*.mp3"))
         self.assertTrue(new_mp3_count > mp3_count)
-
-        for f in glob.glob('*.mp3'):
-           os.unlink(f)
+        rm_mp3()
 
     def test_bandcamp_slashes(self):
-        for f in glob.glob('*.mp3'):
-           os.unlink(f)
-
+        rm_mp3()
         mp3_count = len(glob.glob1('', "*.mp3"))
         vargs = {'path':'', 'folders': False, 'group': False, 'track': '', 'num_tracks': 9223372036854775807, 'bandcamp': False, 'downloadable': False, 'likes': False, 'open': False, 'artist_url': 'https://defill.bandcamp.com/track/amnesia-chamber-harvest-skit'}
         process_bandcamp(vargs)
         new_mp3_count = len(glob.glob1('', "*.mp3"))
         self.assertTrue(new_mp3_count > mp3_count)
+        rm_mp3()
+
+    def test_bandcamp_html_entities(self):
+        rm_mp3()
+        vargs = {'path': '', 'folders': False, 'num_tracks': sys.maxsize, 'open': False, 'artist_url': 'https://anaalnathrakh.bandcamp.com/track/man-at-c-a-bonus-track'}
+        process_bandcamp(vargs)
+        mp3s = glob.glob('*.mp3')
+        self.assertEquals(1, len(mp3s))
+        fn = mp3s[0]
+        self.assertTrue('CandA' in fn)
+        t = EasyMP3(fn)['title']
+        self.assertTrue('C&A' in t[0])
+        rm_mp3()
 
-        for f in glob.glob('*.mp3'):
-           os.unlink(f)
 
     # def test_musicbed(self):
     #     for f in glob.glob('*.mp3'):
@@ -131,11 +123,9 @@ def test_mixcloud(self):
         MixCloud is being blocked from Travis, interestingly.
         """
 
-        for f in glob.glob('*.mp3'):
-           os.unlink(f)
-
-        for f in glob.glob('*.m4a'):
-           os.unlink(f)
+        # rm_mp3()
+        # for f in glob.glob('*.m4a'):
+        #    os.unlink(f)
 
         # shortest mix I could find that was still semi tolerable
         #mp3_count = len(glob.glob1('', "*.mp3"))
@@ -146,11 +136,9 @@ def test_mixcloud(self):
         #new_m4a_count = len(glob.glob1('', "*.m4a"))
         #self.assertTrue((new_mp3_count > mp3_count) or (new_m4a_count > m4a_count))
 
-        for f in glob.glob('*.mp3'):
-           os.unlink(f)
-
-        for f in glob.glob('*.m4a'):
-           os.unlink(f)
+        # rm_mp3()
+        # for f in glob.glob('*.m4a'):
+        #    os.unlink(f)
 
     # def test_audiomack(self):
     #     for f in glob.glob('*.mp3'):