import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf
init_notebook_mode(connected=True)
cf.go_offline()
import plotly.graph_objects as go
import plotly.express as px
songs_by_time = pd.read_csv('MyData\songs_by_time.csv')
songs_raw = pd.read_csv('MyData\songs_raw.csv')
songs_by_artists = pd.read_csv('MyData\songs_by_artists.csv')
#Total Time
total_time_minutes= songs_raw['Minutes Played'].sum()
total_time_days = total_time_minutes/(60*24)
#Total Days of listening to Spotify
first_date = pd.to_datetime(songs_raw['Date'].iloc[0])
last_date =pd.to_datetime(songs_raw['Date'].iloc[-1])
number_of_days = (last_date-first_date).days
#Perecentage Calculation
percent = (total_time_days/number_of_days)*100
#Result
print(f"I've spent {np.round(percent, 2)}% of my time on Spotify!")
I've spent 11.01% of my time on Spotify!
It turns out I have used 11.01% time of my day listening to Spotify. That is a 2.5 hours daily average!
total_played = songs_raw.shape[0]
start_date = songs_raw['Date'].iloc[0]
final_date = songs_raw['Date'].iloc[-1]
print(f"I've played {total_played} songs from {start_date} to {final_date}!")
I've played 13550 songs from 06 Jan 2021 to 21 Sep 2021!
indi_songs = len(songs_raw.groupby('Song').sum())
print(f"I've played {indi_songs} different songs from {start_date} to {final_date}!")
I've played 599 different songs from 06 Jan 2021 to 21 Sep 2021!
A. Total Songs Played = 13550
B Individual Songs Played = 599
This means that on average a song has been played about 22 times!
#Day Wise Usage
day_wise = songs_raw.groupby('Date').sum().sort_values(by='Minutes Played', ascending=False)
day_wise['Hours Played'] = day_wise['Minutes Played']/60
#Rounding the numbers to two decimal places
day_wise['Hours Played'] = day_wise['Hours Played'].apply(lambda x: np.round(x,2))
day_wise['Minutes Played'] = day_wise['Minutes Played'].apply(lambda x: np.round(x,2))
#Top 10 Days with maximum number of hours
day_wise.head(10)
Minutes Played | Hours Played | |
---|---|---|
Date | ||
20 Feb 2021 | 496.43 | 8.27 |
23 Jan 2021 | 462.33 | 7.71 |
06 Feb 2021 | 457.45 | 7.62 |
23 Mar 2021 | 448.84 | 7.48 |
25 Jan 2021 | 446.96 | 7.45 |
25 Feb 2021 | 444.57 | 7.41 |
18 Jan 2021 | 439.94 | 7.33 |
08 Jan 2021 | 412.15 | 6.87 |
21 Feb 2021 | 394.75 | 6.58 |
17 Mar 2021 | 376.28 | 6.27 |
#Bottom 10 Days with maximum number of hours
day_wise.tail(10)
Minutes Played | Hours Played | |
---|---|---|
Date | ||
27 May 2021 | 31.78 | 0.53 |
12 Jul 2021 | 27.92 | 0.47 |
26 Aug 2021 | 26.79 | 0.45 |
20 Jun 2021 | 26.25 | 0.44 |
28 Apr 2021 | 24.98 | 0.42 |
23 Aug 2021 | 24.77 | 0.41 |
28 May 2021 | 24.47 | 0.41 |
02 May 2021 | 23.22 | 0.39 |
30 Apr 2021 | 18.33 | 0.31 |
23 Apr 2021 | 17.57 | 0.29 |
#Making a plot showing Day Wise Streaming Time in minutes and Day
day_wise['Minutes Played'].iplot(color='red',
title='Day Wise Streaming Time', yTitle = 'Number of Minutes', xTitle = 'Days', width = 3)
#Making a plot showing Day Wise Streaming Time in Hours and Day
day_wise['Hours Played'].iplot(color='green',
title='Day Wise Streaming Time', yTitle = 'Number of Hours', xTitle = 'Days', width = 3)
day_wise.to_csv('MyData\day_wise.csv')
day_wise=pd.read_csv('MyData\day_wise.csv')
index = pd.to_datetime(day_wise['Date']).sort_values().index
day_wise_sorted = day_wise.reindex(index)
#Making a barplot showing Day Wise Streaming Time and that date
day_wise_sorted.iplot(kind = 'bar', color='green', x= 'Date', y='Hours Played',
title='Day Wise Streaming Time', yTitle = 'Number of Hours', xTitle = 'Days')
#Making a lineplot showing Day Wise Streaming Time in Hours and that date
day_wise_sorted.iplot(kind = 'line', color='green', x= 'Date', y='Hours Played',
title='Day Wise Streaming Time', yTitle = 'Number of Hours', xTitle = 'Days', width = 3)
#Making a barplot showing Day Wise Streaming Time and that date
day_wise_sorted.iplot(kind = 'bar', color='blue', x= 'Date', y='Minutes Played',
title='Day Wise Streaming Time', yTitle = 'Number of Minutes', xTitle = 'Days')
#Making a lineplot showing Day Wise Streaming Time in Hours and that date
day_wise_sorted.iplot(kind = 'line', color='blue', x= 'Date', y='Minutes Played',
title='Day Wise Streaming Time', yTitle = 'Number of Minutes', xTitle = 'Days', width = 2)
# Making Hours Played column
songs_by_artists['Hours Played'] = songs_by_artists['Minutes Played']/60
#Making a percentage column
total_time = songs_by_artists['Minutes Played'].sum()
songs_by_artists['Percent of Overall Time (in %)']= songs_by_artists['Minutes Played'].apply(lambda x:np.round(x*100/total_time, 2))
#Rounding the numbers to two decimal places
songs_by_artists['Minutes Played'] = songs_by_artists['Minutes Played'].apply(lambda x: np.round(x,2))
songs_by_artists['Hours Played'] = songs_by_artists['Hours Played'].apply(lambda x: np.round(x,2))
# Top 10 Artists
songs_by_artists.head(10)
Artist | Minutes Played | Hours Played | Percent of Overall Time (in %) | |
---|---|---|---|---|
0 | Taylor Swift | 19419.25 | 323.65 | 47.61 |
1 | Céline Dion | 5278.13 | 87.97 | 12.94 |
2 | Ed Sheeran | 3249.73 | 54.16 | 7.97 |
3 | Shawn Mendes | 2141.92 | 35.70 | 5.25 |
4 | Alan Walker | 2025.89 | 33.76 | 4.97 |
5 | Lady Gaga | 1183.59 | 19.73 | 2.90 |
6 | Maren Morris | 1129.92 | 18.83 | 2.77 |
7 | Camila Cabello | 1067.44 | 17.79 | 2.62 |
8 | Dua Lipa | 819.79 | 13.66 | 2.01 |
9 | Christina Perri | 666.50 | 11.11 | 1.63 |
#Making a plot showing Artist Wise Streaming Time in Minutes
songs_by_artists.iplot(color='green', x= 'Artist', y = 'Minutes Played',
title='Minutes Played and Artists', yTitle = 'Number of Minutes', xTitle = 'Artists', width = 2)
#Making a plot showing Artist Wise Streaming Time in Hours
songs_by_artists.iplot(color='red', x= 'Artist', y = 'Hours Played',
title='Hours Played and Artists', yTitle = 'Number of Hours', xTitle = 'Artists', width = 2)
#Making a plot showing Artist Wise Streaming Time in percent
songs_by_artists.iplot(color='red', x= 'Artist', y = 'Percent of Overall Time (in %)',
title='Percentage Played and Artists', yTitle = 'Percent of Overall Time (in %)', xTitle = 'Artists', width = 2)
#Pie chart is for only those artist who have Percent of Overall Time (in %) greater than 0.1%
import plotly.express as px
df = songs_by_artists[songs_by_artists['Percent of Overall Time (in %)']>0.1]
fig = px.pie(df, values='Hours Played', names='Artist', title='Pie Chart of Favourite Authors')
fig.show()
A. Taylor Swift is my favourite artist. (Why am I not surprised!😂)
B. What is surprising is that I've listened to Taylor 47.8% of time!
C. My top 10 artists are:
#Rounding off to two digits after decimal
songs_by_time['Minutes Played'] = songs_by_time['Minutes Played'].apply(lambda x:np.round(x,2))
#Cleaninng the song name
songs_by_time['Song'] = songs_by_time['Song'].apply(lambda x:x.split('(')[0])
songs_by_time['Song'] = songs_by_time['Song'].apply(lambda x:x.split('-')[0])
#Making a plot showing songs Wise Streaming Time in Minutes
def plot_n_songs(n):
songs_by_time[0:n].iplot(color='green', x= 'Song', y = 'Minutes Played',
title=f'Top {n} Songs ', yTitle = 'Number of Minutes', xTitle = 'Song', width = 2)
# plt.xticks(rotation=90)
#Top 10
plot_n_songs(10)
#Top 20
plot_n_songs(20)
#Top 50
plot_n_songs(50)
import plotly.express as px
def pie_for_n(n):
df = songs_by_time[0:n]
fig = px.pie(df, values='Minutes Played', names='Song', title=f'Pie Chart of Favourite {n} Songs')
fig.show()
# Pie Chart for favourite 10 songs
pie_for_n(10)
# Pie Chart for favourite 20 songs
pie_for_n(20)
# Pie Chart for favourite 50 songs
pie_for_n(50)
#Top 50 Songs
top_fifty = songs_by_time[0:50]
#Adding Hour Column
top_fifty['Hours Played'] = np.round(top_fifty['Minutes Played']/60,2)
top_fifty[0:20]
Song | Minutes Played | Hours Played | |
---|---|---|---|
0 | willow | 1257.92 | 20.97 |
1 | evermore | 1190.79 | 19.85 |
2 | exile | 1124.17 | 18.74 |
3 | august | 1068.40 | 17.81 |
4 | Enchanted | 1019.12 | 16.99 |
5 | You All Over Me | 995.28 | 16.59 |
6 | Let's Talk About Love | 901.29 | 15.02 |
7 | Untouchable | 856.45 | 14.27 |
8 | Afterglow | 832.94 | 13.88 |
9 | Delicate | 831.24 | 13.85 |
10 | champagne problems | 827.60 | 13.79 |
11 | Perfect | 790.90 | 13.18 |
12 | Shape of You | 785.98 | 13.10 |
13 | The Bones | 745.70 | 12.43 |
14 | Where Were You In The Morning? | 694.03 | 11.57 |
15 | Darkside | 683.20 | 11.39 |
16 | A Thousand Years | 666.50 | 11.11 |
17 | I'm Alive | 631.53 | 10.53 |
18 | All Too Well | 630.92 | 10.52 |
19 | My Heart Will Go On | 619.63 | 10.33 |
A. willow is my most streamed song. Not surprising!
B. Now comes the interseting fact, willow has been streamed for 1258 minutes. That is 21 Hours!
C. Top 10 songs are:
#Creating a DataFrame containing the number of times a song has been played
df = songs_raw.groupby('Song').count().sort_values(by='Date', ascending=False)
songs_played = df.drop(['Artist', 'Date'], axis =1 )
songs_played=songs_played.reset_index()
songs_played.columns = ['Song', 'Number of Times']
songs_played = songs_played[songs_played['Number of Times']>2]
#Cleaning the Song Name
songs_played['Song'] = songs_played['Song'].apply(lambda x:x.split('(')[0])
songs_played['Song'] = songs_played['Song'].apply(lambda x:x.split('-')[0])
songs_played[songs_played['Number of Times']>5]
Song | Number of Times | |
---|---|---|
0 | willow | 408 |
1 | You All Over Me | 319 |
2 | Afterglow | 316 |
3 | august | 287 |
4 | exile | 287 |
... | ... | ... |
131 | Stitches | 6 |
132 | Breathe | 6 |
133 | I'll Never Love | 6 |
134 | The A Team | 6 |
135 | Dancing With Our Hands Tied | 6 |
136 rows × 2 columns
songs_played[songs_played['Number of Times']>5].iplot(kind = 'bar', color='violet', y= 'Number of Times', x='Song',
title='Song and Times Played', yTitle = 'Songs', xTitle = 'Number of Time Played')
#A dictionary containing number of songs played at least a number of times
num_songs = {}
for n in range(0,350,10):
if n==0:
num_songs[5] = len(songs_played[songs_played['Number of Times']>5])
else:
number = len(songs_played[songs_played['Number of Times']>n])
num_songs[n] = number
# for n in range(50,400,50):
# number = len(songs_played[songs_played['Number of Times']>n])
# num_songs[n] = number
#Creating a DataFrame for the same
songs_number = pd.DataFrame(num_songs, index = num_songs.keys())[0:1].transpose()
songs_number = songs_number.reset_index()
songs_number.columns =['Maximum Number of Time Played', 'Numbers of Songs']
# songs_number
#Making a barplot showing number of times a song is played with number of songs
songs_number.iplot(kind = 'bar', color='blue', x= 'Maximum Number of Time Played', y='Numbers of Songs',
title='Maximum Number of Times a Song is Played', yTitle = 'Numbers of Songs', xTitle = 'Maximum Number of Time Played')
df = songs_number[songs_number['Numbers of Songs']>1]
fig = px.pie(df, names='Maximum Number of Time Played', values='Numbers of Songs', title='Number of Songs Played a Maximum Given Times')
fig.show()
songs_played.head(10)
Song | Number of Times | |
---|---|---|
0 | willow | 408 |
1 | You All Over Me | 319 |
2 | Afterglow | 316 |
3 | august | 287 |
4 | exile | 287 |
5 | evermore | 279 |
6 | Shape of You | 260 |
7 | The Bones | 246 |
8 | champagne problems | 244 |
9 | Darkside | 241 |
A. willow is the most streamed song which is streamed 400+ times!
B. There are 76 songs streamed over 50 times.
C. 55 songs streamed over 100 times.
D. 36 songs streamed over 150 times.
E. 16 songs streamed over 200 times.
F. 7 songs streamed over 250 times.
G. 3 songs streamed over 300 times.
#Not ignoring any songs
songs_raw.groupby('Date').count()['Minutes Played'].mean()
52.316602316602314
#Ignoring songs which has been played less than 1.5 minutes
songs_g1 = songs_raw[songs_raw['Minutes Played']>1.5]
songs_raw.shape[0] - songs_g1.shape[0]
2811
average = songs_g1.groupby('Date').count()['Minutes Played'].mean()
print(f"Average number of songs is {np.int(average)}!")
Average number of songs is 41!
Average number of songs is 41!