Source: Kaggle https://www.kaggle.com/rounakbanik/ted-talks
tedtalks <- read.csv("../TedTalks/ted_main.csv/ted_main.csv")
names(tedtalks)
## [1] "comments" "description" "duration"
## [4] "event" "film_date" "languages"
## [7] "main_speaker" "name" "num_speaker"
## [10] "published_date" "ratings" "related_talks"
## [13] "speaker_occupation" "tags" "title"
## [16] "url" "views"
##What are the most common occupations of speakers?
tail(sort(table(tedtalks$speaker_occupation)),20)
##
## Marine biologist Musician Physicist Biologist
## 11 11 14 15
## Philosopher Roboticist Author Economist
## 16 16 20 20
## Educator Neuroscientist Filmmaker Photographer
## 20 20 21 25
## Psychologist Inventor Architect Entrepreneur
## 26 27 30 31
## Journalist Artist Designer Writer
## 33 34 34 45
barplot(tail(sort(table(tedtalks$speaker_occupation)),10))
tail(sort(table(tedtalks$event)),20)
##
## TEDIndia 2009 TED2005 TED2006 TEDGlobal 2014 TEDGlobal 2010
## 35 37 45 51 55
## TED2008 TED2012 TEDGlobal 2009 TEDGlobal 2013 TED2017
## 57 65 65 66 67
## TED2007 TED2010 TEDGlobal 2011 TED2011 TEDGlobal 2012
## 68 68 68 70 70
## TED2015 TED2013 TED2016 TED2009 TED2014
## 75 77 77 83 84
sum(tedtalks$duration) / (60*60*24)
## [1] 24.39353
mean(tedtalks$duration) / 60
## [1] 13.77517
median(tedtalks$duration) / 60
## [1] 14.13333
as.numeric(names(sort(-table(tedtalks$duration)))[1]) / 60
## [1] 14.13333
tedtalks$main_speaker[tedtalks$duration == max(tedtalks$duration)]
## [1] Douglas Adams
## 2156 Levels: OK Go Rodrigo y Gabriela Silk Road Ensemble ... Zubaida Bai
tedtalks$title[tedtalks$duration == max(tedtalks$duration)]
## [1] Parrots, the universe and everything
## 2550 Levels: Hidden miracles of the natural world ...
tedtalks$url[tedtalks$duration == max(tedtalks$duration)]
## [1] https://www.ted.com/talks/douglas_adams_parrots_the_universe_and_everything\n
## 2550 Levels: https://www.ted.com/talks/9_11_healing_the_mothers_who_found_forgiveness_friendship\n ...
tedtalks$duration[tedtalks$main_speaker == "Douglas Adams"] / 60
## [1] 87.6
max(tedtalks$duration) / 60
## [1] 87.6
tedtalks$main_speaker[tedtalks$duration == min(tedtalks$duration)]
## [1] Murray Gell-Mann
## 2156 Levels: OK Go Rodrigo y Gabriela Silk Road Ensemble ... Zubaida Bai
tedtalks$title[tedtalks$duration == min(tedtalks$duration)]
## [1] The ancestor of language
## 2550 Levels: Hidden miracles of the natural world ...
tedtalks$url[tedtalks$duration == min(tedtalks$duration)]
## [1] https://www.ted.com/talks/murray_gell_mann_on_the_ancestor_of_language\n
## 2550 Levels: https://www.ted.com/talks/9_11_healing_the_mothers_who_found_forgiveness_friendship\n ...
tedtalks$duration[tedtalks$main_speaker == "Murray Gell-Mann"] / 60
## [1] 16.03333 2.25000
min(tedtalks$duration) / 60
## [1] 2.25
cor(tedtalks$duration, tedtalks$views)
## [1] 0.04874043
cor(tedtalks$duration, tedtalks$comments)
## [1] 0.1406936
plot(tedtalks$duration, tedtalks$comments, xlab = "Duration", ylab = "Number of comments")
cor(tedtalks$views, tedtalks$comments)
## [1] 0.5309387
plot(tedtalks$views, tedtalks$comments, xlab = "Number of views", ylab = "Number of comments")
tail(sort(table(tedtalks$main_speaker)),20)
##
## Jonathan Drori Jonathan Haidt Ken Robinson
## 4 4 4
## Kevin Kelly Lawrence Lessig Robert Full
## 4 4 4
## Stefan Sagmeister Steven Johnson Stewart Brand
## 4 4 4
## Tom Wujec Bill Gates Clay Shirky
## 4 5 5
## Dan Ariely Jacqueline Novogratz Julian Treasure
## 5 5 5
## Nicholas Negroponte Marco Tempest Rives
## 5 6 6
## Juan Enriquez Hans Rosling
## 7 9
mean(table(tedtalks$main_speaker))
## [1] 1.182746
median(table(tedtalks$main_speaker))
## [1] 1
sd(table(tedtalks$main_speaker))
## [1] 0.5747987
table(table(tedtalks$main_speaker))[1]
## 1
## 1880
barplot(table(table(tedtalks$main_speaker)), xlab = "Number of talks", ylab = "Number of main speakers")