-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdatacollection.py
More file actions
111 lines (91 loc) · 3.88 KB
/
datacollection.py
File metadata and controls
111 lines (91 loc) · 3.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import numpy as np
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
client_id= "1c152cf60bb94f8695836484148e1d4b"
client_secret= "5ce0cdc1c9594c23b857919f8a7e4bd9"
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
#Utility functions
def show_tracks(tracks):
for i, item in enumerate(tracks['items']):
track = item['track']
print (" %32.32s %s" % (track['artists'][0]['name'], track['name']))
def show_playlist(username, playlist_id):
results = sp.user_playlist(username, playlist_id, fields="tracks,next")
tracks = results['tracks']
while tracks:
show_tracks(tracks)
tracks = sp.next(tracks)
def get_playlist_tracks(username, playlist_id):
return_value = []
results = sp.user_playlist(username, playlist_id, fields="tracks,next")
tracks = results['tracks']
while tracks:
return_value += [ item['track'] for (i, item) in enumerate(tracks['items']) ]
tracks = sp.next(tracks)
return return_value
def get_playlist_URIs(username, playlist_id):
return [t["uri"] for t in get_playlist_tracks(username, playlist_id)]
def splitlist(l,n) :
t = l[:]
r = []
while len(t) :
r += [t[0:n]]
t = t[n:]
return r
def get_audio_features (track_URIs) :
features = []
for pack in splitlist(track_URIs,50) :
features += sp.audio_features(pack)
df = pd.DataFrame.from_dict(features)
df["uri"] = track_URIs
return df
#Collecting playlist URI
#Playlist "Piano Classical" by user yguezennec
#Playlist "Rock" by user yguezennec
username = "yguezennec"
playlists = sp.user_playlists(username)
while playlists:
for i, playlist in enumerate(playlists['items']):
print("%4d %s %s = %s tracks" % (i + 1 + playlists['offset'], playlist['uri'], playlist['name'],playlist['tracks']['total']))
if playlists['next']:
playlists = sp.next(playlists)
else:
playlists = None
#URI of Piano Classical Playlist is "183mAeiSAAUZm40gvvB1he" 344 tracks
#URI of Rock Playlist is "0NY0QaRzStMKbHHuGWpG1K" 120 tracks
classic_URIs = get_playlist_URIs(username, "183mAeiSAAUZm40gvvB1he")
rock_URIs = get_playlist_URIs(username, "0NY0QaRzStMKbHHuGWpG1K")
#Get features in playlists
class_feat = get_audio_features(classic_URIs) #344 rows x 18 columns
rock_feat = get_audio_features(rock_URIs) #120 rows x 18 columns
#Tagging
class_feat["playlists"] = "piano_classical"
rock_feat["playlists"] = "rock"
#Concatinating
df_all = pd.concat([class_feat, rock_feat], sort=True, ignore_index=True) #464 rows, 19 columns
df_all.columns
#returns: ['acousticness', 'analysis_url', 'danceability', 'duration_ms', 'energy',
# 'id', 'instrumentalness', 'key', 'liveness', 'loudness', 'mode',
# 'playlists', 'speechiness', 'tempo', 'time_signature', 'track_href',
# 'type', 'uri', 'valence']
#Features to analyse: acousticness, danceability, energy, instrumentalness,
#liveness, loudness, speechiness, tempo
#Creating new data frame
df = df_all[['acousticness', 'danceability', 'energy','instrumentalness',
'liveness', 'loudness','speechiness','tempo','uri','playlists']]
#Exporting data into a csv
# df.to_csv(r'export_dataframe.csv', header=True)
# #Let's try on Immigrant Song by Led Zeppelin
# led_URI = "spotify:artist:36QJpDe2go2KgaRleHCDTp"
#
# results = sp.artist_top_tracks(led_URI)
# for track in results['tracks'][:10]:
# print('track : ' + track['name'] + track['uri'])
#uri of Immigrant Song is: 78lgmZwycJ3nzsdgmPPGNx
#uri of Bohemian Rhapsody: spotify:track:1AhDOtG9vPSOmsWgNW0BEY
imsong = get_audio_features(["78lgmZwycJ3nzsdgmPPGNx"])
borap = get_audio_features(["1AhDOtG9vPSOmsWgNW0BEY"])
new_songs = pd.concat([imsong, borap], sort = True, ignore_index=False)
new_songs.to_csv(r'new_songs.csv', header = True)