5 Lyric Sentiments
require(sentimentr)
require(taylor)
require(ggrepel)
## taylor_album_songs from taylor package
<- taylor_album_songs %>%
lyrics_df unnest(lyrics) %>%
select(album_name, track_name, track_number, line, lyric, element)
$row_id <- 1:nrow(lyrics_df)
lyrics_df
<- get_sentences(lyrics_df$lyric, lyrics_df$row_id) # this processes
sentences_with_id ## each line as its own sentence as it's using the row_id.
<- sentiment(sentences_with_id)
sentiment_with_id <- sentiment_with_id %>%
sentiment_summary group_by(element_id) %>%
summarise(
avg_sentiment = mean(sentiment, na.rm = TRUE),
word_count = sum(word_count)
)
## Join
<- lyrics_df %>%
lyrics_sentiment left_join(sentiment_summary, by = c("row_id" = "element_id"))
## Aggregate by song to get net sentiment score
<- lyrics_sentiment %>%
song_sentiment_scores_sentimentr group_by(track_number, track_name, album_name) %>%
summarize(
sum_sentiment = sum(avg_sentiment),
total_sentiment_words = n(),
avg_sentiment = sum(avg_sentiment) / n(),
.groups = "drop"
)
<- c("Taylor Swift", "Fearless (Taylor's Version)",
albumOrder "Speak Now (Taylor's Version)", "Red (Taylor's Version)",
"1989 (Taylor's Version)", "Reputation", "Lover",
"folklore", "evermore", "Midnights",
"THE TORTURED POETS DEPARTMENT")
<- lyrics_sentiment %>%
lyrics_sentiment mutate(album_name = factor(album_name, levels = albumOrder))
<- lyrics_sentiment %>%
lyrics_sentiment filter(!is.na(album_name))
## Plot the net sentiment scores
ggplot(lyrics_sentiment, aes(x = track_number, y = avg_sentiment, fill = album_name)) +
geom_col(show.legend = FALSE) +
facet_wrap(~album_name, ncol = 3, scales = "free") +
scale_fill_manual(values = colorPaletteAlbums) +
theme_minimal() +
labs(
title = "The emotional spectrum across Taylor Swift's discography",
x = "Track Number",
y = "Net Sentiment (Positive - Negative Words)"
+
) theme(plot.title = element_text(hjust=0.5))
<- lyrics_sentiment %>%
song_sentiment_scores_sentimentr group_by(track_number, track_name, album_name) %>%
summarize(
sum_sentiment = sum(avg_sentiment),
total_sentiment_words = n(),
avg_sentiment = sum(avg_sentiment) / n(),
.groups = "drop"
)
<- surpriseSongsDressColours %>%
surpriseSongsDressColours mutate(DressColourGroup = case_when(
%in% c("Pink", "Flamingo pink") ~ "Reds",
DressName %in% c("Blue", "Ocean blue") ~ "Blues",
DressName %in% c("Yellow", "Sunset orange") ~ "Yellows",
DressName %in% c("Cotton candy", "Grapefruit", "Popsicle") ~ "Colourful",
DressName == "Blurple" ~ "Purples",
DressName == "Green" ~ "Greens"
DressName
))
<- surpriseSongsDressColours %>%
dress_song_sentiment_scores_sentimentr left_join(song_sentiment_scores_sentimentr, by = c("Song title" = "track_name")) %>%
filter(!is.na(avg_sentiment))
<- dress_song_sentiment_scores_sentimentr %>%
dress_group_sentiments group_by(DressColourGroup) %>%
summarise(
performance_weighted_sentiment = mean(avg_sentiment, na.rm = TRUE),
n_performances = n(),
.groups = 'drop'
)
## recall the CA from before
$colourGroup <- colorGroups[colorSentimentScores$colour]
colorSentimentScores<- as.data.frame.matrix(table(colorSentimentScores$colourGroup, colorSentimentScores$meaning))
cols.df <- FactoMineR::CA(cols.df) coa
## here we flip so that dim 1 is "-ve --> +ve"
<- coa$row$coord %*% diag(c(-1,1)) |>
colour_coa_table as.data.frame() |>
rownames_to_column("colour_category") |>
select(colour_category, colour_sentiment = V1) |>
as_tibble()
<- colour_coa_table %>%
colour_sentiments mutate(
DressColourGroup = case_when(
== "blues" ~ "Blues",
colour_category == "colorful" ~ "Colourful",
colour_category == "greens" ~ "Greens",
colour_category == "purples" ~ "Purples",
colour_category == "reds" ~ "Reds",
colour_category == "yellows" ~ "Yellows",
colour_category TRUE ~ NA_character_
)%>%
) filter(!is.na(DressColourGroup))
<- dress_song_sentiment_scores_sentimentr %>%
dress_group_sentiments group_by(DressColourGroup) %>%
summarise(
performance_weighted_sentiment = mean(avg_sentiment, na.rm = TRUE),
n_performances = n(),
.groups = 'drop'
)
<- dress_group_sentiments %>%
combined_analysis left_join(colour_sentiments, by = "DressColourGroup") %>%
select(DressColourGroup, performance_weighted_sentiment, colour_sentiment, n_performances)
Let’s plot.
<- dress_song_sentiment_scores_sentimentr %>%
song_performance_counts group_by(`Song title`, DressColourGroup) %>%
summarise(
avg_sentiment = first(avg_sentiment),
performance_count = n(),
.groups = 'drop'
)
<- song_performance_counts %>%
plot_data left_join(colour_sentiments, by = "DressColourGroup") %>%
filter(!is.na(colour_sentiment))
## Top songs (highest sentiment) for each color group
<- plot_data %>%
top_songs group_by(DressColourGroup) %>%
slice_max(avg_sentiment, n = 1, with_ties = FALSE) %>%
ungroup()
## Bottom songs (lowest sentiment) for each color group
<- plot_data %>%
bottom_songs group_by(DressColourGroup) %>%
slice_min(avg_sentiment, n = 1, with_ties = FALSE) %>%
ungroup()
## Medium songs (closest to median for each color group)
<- plot_data %>%
medium_songs group_by(DressColourGroup) %>%
mutate(
median_sentiment = median(avg_sentiment, na.rm = TRUE),
distance_from_median = abs(avg_sentiment - median_sentiment)
%>%
) slice_min(distance_from_median, n = 1, with_ties = FALSE) %>% # Closest to median per color (medium)
ungroup()
<- bind_rows(
highlight_songs
top_songs,
bottom_songs,%>%
medium_songs) distinct(`Song title`, DressColourGroup, .keep_all = TRUE)
ggplot(plot_data, aes(x = colour_sentiment, y = avg_sentiment)) +
geom_point(aes(size = performance_count, colour = tolower(DressColourGroup)),
alpha = 0.4, stroke = 0.8) +
geom_point(data = highlight_songs,
aes(size = performance_count, colour = tolower(DressColourGroup)),
alpha = 0.9, stroke = 1.5) +
geom_text_repel(data = highlight_songs,
aes(label = `Song title`, colour = tolower(DressColourGroup)),
size = 6,
max.overlaps = Inf,
box.padding = 0.5,
point.padding = 0.3,
min.segment.length = 0.1,
show.legend = FALSE) +
scale_size_continuous(name = "Performance\nCount",
range = c(2, 12),
breaks = c(1, 2, 3, 4, 5),
guide = guide_legend(override.aes = list(alpha = 1))) +
scale_colour_manual(values = colorPaletteGroups,
name = "Dress colour\nGroup") +
labs(title = "",
subtitle = "",
x = "Dress Colour Sentiment",
y = "Lyric Sentiment",
caption = "") +
theme_minimal() +
theme(
plot.title = element_blank(),
axis.title.x = element_text(size = 16,
margin = margin(t = 10)),
axis.title.y = element_text(size = 16),
axis.text = element_text(size = 14),
legend.title = element_text(size = 12),
legend.text = element_text(size = 11),
legend.position = "bottom",
legend.box = "horizontal"
)
What about group-wise correlation?
## We only have 6 observations, not great, but let's go ahead anyway!
## Pearson's correlation
cor.test(combined_analysis$performance_weighted_sentiment,
$colour_sentiment) combined_analysis
Pearson's product-moment correlation
data: combined_analysis$performance_weighted_sentiment and combined_analysis$colour_sentiment
t = -5.4996, df = 4, p-value = 0.005329
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.9935629 -0.5403340
sample estimates:
cor
-0.9397859
## Spearman's correlation
cor.test(combined_analysis$performance_weighted_sentiment,
$colour_sentiment,
combined_analysismethod = "spearman")
Spearman's rank correlation rho
data: combined_analysis$performance_weighted_sentiment and combined_analysis$colour_sentiment
S = 54, p-value = 0.2972
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
-0.5428571
## Spearman's correlation with 95% CIs
::cor_test(data = combined_analysis,
correlationx = "performance_weighted_sentiment",
y = "colour_sentiment",
method = "spearman")
Parameter1 | Parameter2 | rho | 95% CI
-------------------------------------------------------------------------
performance_weighted_sentiment | colour_sentiment | -0.54 | [-0.94, 0.51]
Parameter1 | S | p
----------------------------------------------
performance_weighted_sentiment | 54.00 | 0.266
Observations: 6