In [18]:
%%capture
%load_ext sql
%sql sqlite:///chinook.db
In [19]:
%sql SELECT * FROM sqlite_master WHERE type='table';
 * sqlite:///chinook.db
Done.
Out[19]:
type name tbl_name rootpage sql
table album album 2 CREATE TABLE [album]
(
[album_id] INTEGER PRIMARY KEY NOT NULL,
[title] NVARCHAR(160) NOT NULL,
[artist_id] INTEGER NOT NULL,
FOREIGN KEY ([artist_id]) REFERENCES [artist] ([artist_id])
ON DELETE NO ACTION ON UPDATE NO ACTION
)
table artist artist 3 CREATE TABLE [artist]
(
[artist_id] INTEGER PRIMARY KEY NOT NULL,
[name] NVARCHAR(120)
)
table customer customer 4 CREATE TABLE [customer]
(
[customer_id] INTEGER PRIMARY KEY NOT NULL,
[first_name] NVARCHAR(40) NOT NULL,
[last_name] NVARCHAR(20) NOT NULL,
[company] NVARCHAR(80),
[address] NVARCHAR(70),
[city] NVARCHAR(40),
[state] NVARCHAR(40),
[country] NVARCHAR(40),
[postal_code] NVARCHAR(10),
[phone] NVARCHAR(24),
[fax] NVARCHAR(24),
[email] NVARCHAR(60) NOT NULL,
[support_rep_id] INTEGER,
FOREIGN KEY ([support_rep_id]) REFERENCES [employee] ([employee_id])
ON DELETE NO ACTION ON UPDATE NO ACTION
)
table employee employee 5 CREATE TABLE [employee]
(
[employee_id] INTEGER PRIMARY KEY NOT NULL,
[last_name] NVARCHAR(20) NOT NULL,
[first_name] NVARCHAR(20) NOT NULL,
[title] NVARCHAR(30),
[reports_to] INTEGER,
[birthdate] DATETIME,
[hire_date] DATETIME,
[address] NVARCHAR(70),
[city] NVARCHAR(40),
[state] NVARCHAR(40),
[country] NVARCHAR(40),
[postal_code] NVARCHAR(10),
[phone] NVARCHAR(24),
[fax] NVARCHAR(24),
[email] NVARCHAR(60),
FOREIGN KEY ([reports_to]) REFERENCES [employee] ([employee_id])
ON DELETE NO ACTION ON UPDATE NO ACTION
)
table genre genre 6 CREATE TABLE [genre]
(
[genre_id] INTEGER PRIMARY KEY NOT NULL,
[name] NVARCHAR(120)
)
table invoice invoice 7 CREATE TABLE [invoice]
(
[invoice_id] INTEGER PRIMARY KEY NOT NULL,
[customer_id] INTEGER NOT NULL,
[invoice_date] DATETIME NOT NULL,
[billing_address] NVARCHAR(70),
[billing_city] NVARCHAR(40),
[billing_state] NVARCHAR(40),
[billing_country] NVARCHAR(40),
[billing_postal_code] NVARCHAR(10),
[total] NUMERIC(10,2) NOT NULL,
FOREIGN KEY ([customer_id]) REFERENCES [customer] ([customer_id])
ON DELETE NO ACTION ON UPDATE NO ACTION
)
table invoice_line invoice_line 8 CREATE TABLE [invoice_line]
(
[invoice_line_id] INTEGER PRIMARY KEY NOT NULL,
[invoice_id] INTEGER NOT NULL,
[track_id] INTEGER NOT NULL,
[unit_price] NUMERIC(10,2) NOT NULL,
[quantity] INTEGER NOT NULL,
FOREIGN KEY ([invoice_id]) REFERENCES [invoice] ([invoice_id])
ON DELETE NO ACTION ON UPDATE NO ACTION,
FOREIGN KEY ([track_id]) REFERENCES [track] ([track_id])
ON DELETE NO ACTION ON UPDATE NO ACTION
)
table media_type media_type 9 CREATE TABLE [media_type]
(
[media_type_id] INTEGER PRIMARY KEY NOT NULL,
[name] NVARCHAR(120)
)
table playlist playlist 10 CREATE TABLE [playlist]
(
[playlist_id] INTEGER PRIMARY KEY NOT NULL,
[name] NVARCHAR(120)
)
table playlist_track playlist_track 11 CREATE TABLE [playlist_track]
(
[playlist_id] INTEGER NOT NULL,
[track_id] INTEGER NOT NULL,
CONSTRAINT [pk_playlist_track] PRIMARY KEY ([playlist_id], [track_id]),
FOREIGN KEY ([playlist_id]) REFERENCES [playlist] ([playlist_id])
ON DELETE NO ACTION ON UPDATE NO ACTION,
FOREIGN KEY ([track_id]) REFERENCES [track] ([track_id])
ON DELETE NO ACTION ON UPDATE NO ACTION
)
table track track 13 CREATE TABLE [track]
(
[track_id] INTEGER PRIMARY KEY NOT NULL,
[name] NVARCHAR(200) NOT NULL,
[album_id] INTEGER,
[media_type_id] INTEGER NOT NULL,
[genre_id] INTEGER,
[composer] NVARCHAR(220),
[milliseconds] INTEGER NOT NULL,
[bytes] INTEGER,
[unit_price] NUMERIC(10,2) NOT NULL,
FOREIGN KEY ([album_id]) REFERENCES [album] ([album_id])
ON DELETE NO ACTION ON UPDATE NO ACTION,
FOREIGN KEY ([genre_id]) REFERENCES [genre] ([genre_id])
ON DELETE NO ACTION ON UPDATE NO ACTION,
FOREIGN KEY ([media_type_id]) REFERENCES [media_type] ([media_type_id])
ON DELETE NO ACTION ON UPDATE NO ACTION
)

Selecting Albums to Purchase

The four most popular genres of tracks in terms of sales in the US are Rock, Alternative & Punk, Metal and R&B/Soul, but each of these are only 3-4% of the total number of sales in the respective genre. The Pop, Blues and Hip-hop genres are the fifth, seventh and ninth most popular genres in terms of US sales. I would therefore recommend that the store purchase the following artists:

  • Red Tone
  • Slim Jim Bites
  • Meteor and the Girls
In [50]:
%%sql 
WITH
country_sales AS
(
    SELECT invoice_id, billing_country
    FROM invoice
),
tracks_sold AS
(
    SELECT track_id, invoice_id, quantity
    FROM invoice_line
),
tracks_genre AS
(
    SELECT t.track_id, t.genre_id, g.name name
    FROM track t
    INNER JOIN genre g ON g.genre_id = t.genre_id 
),
sold_in_USA AS
(
    SELECT tg1.name genre, SUM(ts1.quantity) usa_sales  
    FROM tracks_sold ts1
    INNER JOIN country_sales cs1 ON cs1.invoice_id = ts1.invoice_id
    INNER JOIN tracks_genre tg1 ON tg1.track_id = ts1.track_id   
    WHERE cs1.billing_country = "USA"
    GROUP BY tg1.name
)

SELECT tg.name genre,
       SUM(ts.quantity) "tracks sold",
       s.usa_sales,
       SUM(ts.quantity) / s.usa_sales usa_sales_pct 
FROM tracks_sold ts
INNER JOIN tracks_genre tg ON tg.track_id = ts.track_id
INNER JOIN country_sales cs ON cs.invoice_id = ts.invoice_id
INNER JOIN sold_in_usa s ON s.genre = tg.name
GROUP BY tg.name
ORDER BY s.usa_sales DESC
 * sqlite:///chinook.db
Done.
Out[50]:
genre tracks sold usa_sales usa_sales_pct
Rock 2635 561 4
Alternative & Punk 492 130 3
Metal 619 124 4
R&B/Soul 159 53 3
Blues 124 36 3
Alternative 117 35 3
Pop 63 22 2
Latin 167 22 7
Hip Hop/Rap 33 20 1
Jazz 121 14 8
Easy Listening 74 13 5
Reggae 35 6 5
Electronica/Dance 55 5 11
Classical 47 4 11
Heavy Metal 8 3 2
Soundtrack 5 2 2
TV Shows 2 1 2

Analysing Employee Sales Performance

In terms of dollar sales, Jane Peacock has the highest performance with $1,731.51, followed by Margaret Park with $1,584.00 and then Steve Johnson with $1,393.92. The order of performance reflects exactly the order in which each sales support agent joined the company - Jane joined first, so she has more time to accrue sales, compared to Margaret and Steve.

In [75]:
%%sql
SELECT (e.first_name || " " || e.last_name) employee,
       e.hire_date,
       e.birthdate,
       ROUND(SUM(i.total),2) sales
FROM employee e
LEFT JOIN customer c ON c.support_rep_id = e.employee_id
LEFT JOIN invoice i ON i.customer_id = c.customer_id
WHERE e.title = "Sales Support Agent"
GROUP BY employee 
 
 * sqlite:///chinook.db
Done.
Out[75]:
employee hire_date birthdate sales
Jane Peacock 2017-04-01 00:00:00 1973-08-29 00:00:00 1731.51
Margaret Park 2017-05-03 00:00:00 1947-09-19 00:00:00 1584.0
Steve Johnson 2017-10-17 00:00:00 1965-03-03 00:00:00 1393.92

Analysing Sales by Country

In [98]:
%%sql

WITH 
avg_order_value_country AS
(
    SELECT c.country, AVG(i.total) avg_total
    FROM invoice i
    INNER JOIN customer c ON c.customer_id = i.customer_id
    GROUP BY country
)

SELECT c.country,
       COUNT(c.customer_id) "number of customers",
       ROUND(SUM(i.total),2) "total sales",
       CAST(SUM(i.total) / COUNT(c.customer_id) AS INTEGER) "average sales per customer",
       ROUND(aovc.avg_total,2) "Average order value"
FROM customer c
INNER JOIN invoice i ON i.customer_id = c.customer_id
INNER JOIN avg_order_value_country aovc ON aovc.country = c.country
GROUP BY c.country
ORDER BY "total sales" DESC
 * sqlite:///chinook.db
Done.
Out[98]:
country number of customers total sales average sales per customer Average order value
USA 131 1040.49 7 7.94
Canada 76 535.59 7 7.05
Brazil 61 427.68 7 7.01
France 50 389.07 7 7.78
Germany 41 334.62 8 8.16
Czech Republic 30 273.24 9 9.11
United Kingdom 28 245.52 8 8.77
Portugal 29 185.13 6 6.38
India 21 183.15 8 8.72
Ireland 13 114.84 8 8.83
Spain 11 98.01 8 8.91
Chile 13 97.02 7 7.46
Australia 10 81.18 8 8.12
Finland 11 79.2 7 7.2
Hungary 10 78.21 7 7.82
Poland 10 76.23 7 7.62
Sweden 10 75.24 7 7.52
Norway 9 72.27 8 8.03
Austria 9 69.3 7 7.7
Netherlands 10 65.34 6 6.53
Belgium 7 60.39 8 8.63
Italy 9 50.49 5 5.61
Argentina 5 39.6 7 7.92
Denmark 10 37.62 3 3.76

Albums v Individual Tracks

Out of a total of 689 invoices, the vast majority (70%) have been for compilations of individual tracks, whilst just 30% have been for album purchases. So, on the face of it, it would appear to make more sense to mainly cater for only individual tracks being added manually to a purchase. Since the store charges the same for an album as it does for each individual track being added manually, there would be no real financial benefit to purchasing whole albums, instead of the individual tracks of an album individually. But, 30% is still a sizeable minority in terms of customer demand (208 out of 689), so only purchasing the most popular tracks from an album could alienate this group. I would therefore recommend that the store continue to buy full albums, and not just the popular tracks from albums.

In [15]:
%%sql

WITH
invoice_track_category AS
(
    SELECT 
        il.invoice_id,
        t.album_id,
        COUNT(il.track_id) number_of_tracks,
        CASE
            WHEN COUNT(il.track_id) = 1 THEN "No" 
            ELSE "Yes"
        END AS is_album
    FROM invoice_line il
    INNER JOIN track t ON t.track_id = il.track_id
    GROUP BY il.invoice_id, t.album_id
),
album_purchase AS
(
    SELECT invoice_id, is_album 
    FROM invoice_track_category
    GROUP BY invoice_id, number_of_tracks
)

SELECT COUNT(invoice_id) total_invoices,
       (SELECT COUNT(is_album) FROM album_purchase WHERE is_album = "Yes") yes,
       (SELECT COUNT(is_album) FROM album_purchase WHERE is_album = "No") no,
       ROUND((CAST(COUNT(CASE WHEN is_album = "Yes" THEN 1 ELSE NULL END) AS FLOAT) / COUNT(invoice_id)),2) * 100 pct_yes,
       ROUND((CAST(COUNT(CASE WHEN is_album = "No" THEN 1 ELSE NULL END) AS FLOAT) / COUNT(invoice_id)),2) * 100 pct_no 
FROM album_purchase
 * sqlite:///chinook.db
Done.
Out[15]:
total_invoices yes no pct_yes pct_no
689 208 481 30.0 70.0

Which artist is used in the largest number of playlists?

Iron Maiden is the artist that appears in the most number of playlists

In [67]:
%%sql

WITH 
artist_album_track AS
(
    SELECT artist.artist_id, artist.name artist, track.track_id track
    FROM artist
    INNER JOIN album ON album.artist_id = artist.artist_id
    INNER JOIN track ON track.album_id = album.album_id
),
track_in_playlist AS
(
    SELECT p.playlist_id, p.name, pt.track_id
    FROM playlist p
    INNER JOIN playlist_track pt ON pt.playlist_id = p.playlist_id
)

SELECT aat.artist, COUNT(tip.name) number_of_playlists
FROM artist_album_track aat
INNER JOIN track_in_playlist tip ON tip.track_id = aat.track
GROUP BY aat.artist
ORDER BY number_of_playlists DESC
 * sqlite:///chinook.db
Done.
Out[67]:
artist number_of_playlists
Iron Maiden 516
U2 333
Metallica 296
Led Zeppelin 252
Deep Purple 226
Lost 184
Pearl Jam 177
Faith No More 145
Eric Clapton 145
Lenny Kravitz 143
Red Hot Chili Peppers 128
Van Halen 122
Os Paralamas Do Sucesso 119
Titãs 114
Guns N' Roses 114
Various Artists 112
The Rolling Stones 111
Chico Science & Nação Zumbi 108
The Office 106
Kiss 104
Foo Fighters 101
R.E.M. 96
Jamiroquai 96
Queen 94
Nirvana 93
Legião Urbana 93
Chico Buarque 87
Smashing Pumpkins 85
Green Day 83
Djavan 83
Gilberto Gil 82
Audioslave 81
Creedence Clearwater Revival 80
Ozzy Osbourne 79
The Tea Party 76
Cidade Negra 76
Santana 75
Miles Davis 75
Tim Maia 67
The Cult 64
Antônio Carlos Jobim 64
Caetano Veloso 63
Cássia Eller 62
James Brown 60
Zeca Pagodinho 57
Skank 57
House Of Pain 57
Lulu Santos 56
Milton Nascimento 55
The Clash 54
Spyro Gyra 54
Mötley Crüe 52
Body Count 51
Soundgarden 49
Planet Hemp 48
Funk Como Le Gusta 48
Frank Sinatra 48
Def Leppard 48
Battlestar Galactica (Classic) 48
Heroes 46
Amy Winehouse 46
Aerosmith 45
Gonzaguinha 44
Gene Krupa 44
UB40 42
The Police 42
Rush 42
Passengers 42
Ed Motta 42
The Who 40
Battlestar Galactica 40
Incognito 39
Alanis Morissette 39
The Black Crowes 38
Stone Temple Pilots 37
Alice In Chains 37
AC/DC 37
Page & Plant 36
Marvin Gaye 36
Marisa Monte 36
Jota Quest 36
Black Sabbath 36
Black Label Society 36
BackBeat 36
O Rappa 34
Marcos Valle 34
Jimi Hendrix 34
R.E.M. Feat. Kate Pearson 33
Judas Priest 33
Falamansa 33
Bruce Dickinson 33
Motörhead 32
Temple of the Dog 31
Jorge Ben 31
Elis Regina 31
Vinícius De Moraes 30
Toquinho & Vinícius 30
Raimundos 30
O Terço 30
Joe Satriani 30
Raul Seixas 28
Os Mutantes 28
Olodum 28
João Suplicy 28
Chris Cornell 28
Velvet Revolver 26
JET 26
Scorpions 25
Mônica Marianno 24
Godsmack 24
David Coverdale 24
Apocalyptica 24
The Doors 22
System Of A Down 22
Buddy Guy 22
Terry Bozzio, Tony Levin & Steve Stevens 21
Stevie Ray Vaughan & Double Trouble 20
Paul D'Ianno 20
Men At Work 20
Marillion 20
Cláudio Zoli 20
Pink Floyd 18
Frank Zappa & Captain Beefheart 18
Dennis Chambers 18
Billy Cobham 16
Eugene Ormandy 15
Accept 15
Berliner Philharmoniker & Herbert Von Karajan 13
The King's Singers 10
Michael Tilson Thomas & San Francisco Symphony 10
Academy of St. Martin in the Fields & Sir Neville Marriner 10
English Concert & Trevor Pinnock 9
Habib Koité and Bamada 6
Yo-Yo Ma 5
Wilhelm Kempff 5
Ton Koopman 5
Sir Georg Solti, Sumi Jo & Wiener Philharmoniker 5
Sir Georg Solti & Wiener Philharmoniker 5
Scholars Baroque Ensemble 5
Richard Marlow & The Choir of Trinity College, Cambridge 5
Philip Glass Ensemble 5
Philharmonia Orchestra & Sir Neville Marriner 5
Orchestre Révolutionnaire et Romantique & John Eliot Gardiner 5
Mela Tenenbaum, Pro Musica Prague & Richard Kapp 5
Maurizio Pollini 5
Martin Roscoe 5
Luciano Pavarotti 5
Kent Nagano and Orchestre de l'Opéra de Lyon 5
Julian Bream 5
James Levine 5
Gustav Mahler 5
Emanuel Ax, Eugene Ormandy & Philadelphia Orchestra 5
Chor der Wiener Staatsoper, Herbert Von Karajan & Wiener Philharmoniker 5
Chicago Symphony Orchestra & Fritz Reiner 5
Chicago Symphony Chorus, Chicago Symphony Orchestra & Sir Georg Solti 5
Britten Sinfonia, Ivor Bolton & Lesley Garrett 5
Boston Symphony Orchestra & Seiji Ozawa 5
Berliner Philharmoniker & Hans Rosbaud 5
Barry Wordsworth & BBC Concert Orchestra 5
Antal Doráti & London Symphony Orchestra 5
Alberto Turco & Nova Schola Gregoriana 5
Adrian Leaper & Doreen de Feis 5
Academy of St. Martin in the Fields, Sir Neville Marriner & Thurston Dart 5
Yehudi Menuhin 4
The Posies 4
The 12 Cellists of The Berlin Philharmonic 4
Sergei Prokofiev & Yuri Temirkanov 4
Royal Philharmonic Orchestra & Sir Thomas Beecham 4
Roger Norrington, London Classical Players 4
Otto Klemperer & Philharmonia Orchestra 4
Orchestra of The Age of Enlightenment 4
Nash Ensemble 4
Michele Campanella 4
London Symphony Orchestra & Sir Charles Mackerras 4
Les Arts Florissants & William Christie 4
Leonard Bernstein & New York Philharmonic 4
Karsh Kale 4
Itzhak Perlman 4
Hilary Hahn, Jeffrey Kahane, Los Angeles Chamber Orchestra & Margaret Batjer 4
Herbert Von Karajan, Mirella Freni & Wiener Philharmoniker 4
Göteborgs Symfoniker & Neeme Järvi 4
Gerald Moore 4
Fretwork 4
Felix Schmidt, London Symphony Orchestra & Rafael Frühbeck de Burgos 4
Equale Brass Ensemble, John Eliot Gardiner & Munich Monteverdi Orchestra and Choir 4
Emerson String Quartet 4
Edo de Waart & San Francisco Symphony 4
Christopher O'Riley 4
Choir Of Westminster Abbey & Simon Preston 4
Charles Dutoit & L'Orchestre Symphonique de Montréal 4
C. Monteverdi, Nigel Rogers - Chiaroscuro; London Baroque; London Cornett & Sackbu 4
Berliner Philharmoniker, Claudio Abbado & Sabine Meyer 4
Anne-Sophie Mutter, Herbert Von Karajan & Wiener Philharmoniker 4
Aisha Duo 4
Academy of St. Martin in the Fields, John Birch, Sir Neville Marriner & Sylvia McNair 4
Academy of St. Martin in the Fields Chamber Ensemble & Sir Neville Marriner 4
Aaron Copland & London Symphony Orchestra 4
Nicolaus Esterhazy Sinfonia 3
Dread Zeppelin 3
Luciana Souza/Romero Lubambo 2
Calexico 2
Cake 2
Aquaman 2
Aaron Goldberg 2

How many tracks have been purchased versus not purchased?

Of the 6,454 tracks available, 4,757 have been purchased and 1,697 have not been purchased.

In [82]:
%%sql

WITH
purchase_status AS
(
    SELECT t.name, il.quantity
    FROM track t
    LEFT JOIN invoice_line il ON il.track_id = t.track_id
)

SELECT COUNT(name) total_tracks,
       COUNT(CASE WHEN quantity > 0 THEN 1 ELSE NULL END) purchased,
       COUNT(name) - COUNT(CASE WHEN quantity > 0 THEN 1 ELSE NULL END) not_purchased 
FROM purchase_status ps
 * sqlite:///chinook.db
Done.
Out[82]:
total_tracks purchased not_purchased
6454 4757 1697

Is the range of tracks in the store reflective of their sales popularity?

I interpreted the range of tracks to mean the number of tracks by individual artists. For example, if for each artist, there was equal number of tracks, the range was pretty evenly spread out. On the other hand, there were a lot of artists with a handful of tracks and few of artists with a lot of tracks, then the range would be skewed towards the artists with the most tracks. So, for me, the question was, is the number of tracks by a particular artist reflected in the sales popularity of those tracks? (i.e. Are the most sold tracks by artists with the most number of tracks).

The first stage was to create of table showing the number of tracks in the shop's catalogue per artist.The second stage was to create another table that showed the number of sold tracks per artist. From these two tables, it was possible to identify the artists where the quantity of tracks reflected their sales popularity and the artists where the quantity of the tracks did not reflect their sales popularity.

What I found was that, for the vast majority of the artists, the number of tracks available for sale was not reflected in the number of sales of those tracks.

In [120]:
%%sql

WITH
range_of_tracks AS
(
    SELECT art.name, COUNT(t.track_id) number_tracks
    FROM track t
    INNER JOIN album alb ON alb.album_id = t.album_id
    INNER JOIN artist art ON art.artist_id = alb.artist_id
    GROUP BY art.name
),
track_popularity AS
(
    SELECT art.name, COUNT(il.track_id) number_tracks
    FROM invoice_line il
    INNER JOIN track t ON t.track_id = il.track_id
    INNER JOIN album alb ON alb.album_id = t.album_id
    INNER JOIN artist art ON art.artist_id = alb.artist_id
    GROUP BY art.name
),
range_and_popularity AS
(
    SELECT * FROM range_of_tracks INTERSECT SELECT * FROM track_popularity
),
range_not_popularity AS
(
    SELECT * FROM track_popularity EXCEPT SELECT * FROM range_of_tracks
)


SELECT COUNT(*) FROM range_and_popularity
UNION
SELECT COUNT(*) FROM range_not_popularity
 * sqlite:///chinook.db
Done.
Out[120]:
COUNT(*)
7
123