import polars as pl
from plotnine import *
import datetime
A Century of Screams
area
contest
We analyze the trends and box office success of horror movies over the years. We will explore the data to understand the rise of horror films and identify the top-grossing english movies per decade.
Read the movie data
= pl.read_csv(
movies "data/english_horror_movies.csv",
={"release_date": pl.Date},
schema_overrides
).with_columns(=pl.col("release_date").dt.year(),
year=(pl.col("revenue") / 1_000_000).round(0).cast(int),
revenue_millions
) movies.head()
shape: (5, 13)
title | release_date | revenue | popularity | vote_count | vote_average | budget | runtime | adult | genre_names | collection_name | year | revenue_millions |
---|---|---|---|---|---|---|---|---|---|---|---|---|
str | date | i64 | f64 | i64 | f64 | i64 | i64 | bool | str | str | i32 | i64 |
"Orphan: First Kill" | 2022-07-27 | 9572765 | 5088.584 | 902 | 6.9 | 0 | 99 | false | "Horror, Thriller" | "Orphan Collection" | 2022 | 10 |
"Beast" | 2022-08-11 | 56000000 | 2172.338 | 584 | 7.1 | 0 | 93 | false | "Adventure, Drama, Horror" | "NA" | 2022 | 56 |
"Smile" | 2022-09-23 | 45000000 | 1863.628 | 114 | 6.8 | 17000000 | 115 | false | "Horror, Mystery, Thriller" | "NA" | 2022 | 45 |
"The Black Phone" | 2022-06-22 | 161000000 | 1071.398 | 2736 | 7.9 | 18800000 | 103 | false | "Horror, Thriller" | "NA" | 2022 | 161 |
"Jeepers Creepers: Reborn" | 2022-09-15 | 2892594 | 821.605 | 125 | 5.8 | 20000000 | 88 | false | "Horror, Mystery, Thriller" | "Jeepers Creepers Collection" | 2022 | 3 |
Get a tally of the movies per year
= movies.group_by("year").len("count").sort("year")
movies_per_year movies_per_year.head()
shape: (5, 2)
year | count |
---|---|
i32 | u32 |
1950 | 2 |
1951 | 9 |
1952 | 5 |
1953 | 16 |
1954 | 15 |
Calculate the top movies per decade
= movies.with_columns(
top_movies_per_decade =(pl.col("year") // 10 ) * 10
decadefilter(
)."year") < 2020,
pl.col(
).sort("revenue_millions",
=True
descending
).group_by("decade",
).first(
).select("title", "year", "decade", "revenue_millions"
).sort("decade"
).with_columns(=pl.col("title") + "\n$" + pl.col("revenue_millions").cast(str) + "M",
label# Keep the labels in order of revenue, and adjust them
# vertically so that they do not overlap
=pl.when(
label_position"title") == "Van Helsing"
pl.col(
).then("revenue_millions") + 300
pl.col(
).otherwise("revenue_millions") + 200
pl.col(
),
) top_movies_per_decade
shape: (7, 6)
title | year | decade | revenue_millions | label | label_position |
---|---|---|---|---|---|
str | i32 | i32 | i64 | str | i64 |
"House of Wax" | 1953 | 1950 | 24 | "House of Wax $24M" | 224 |
"Psycho" | 1960 | 1960 | 50 | "Psycho $50M" | 250 |
"Jaws" | 1975 | 1970 | 471 | "Jaws $471M" | 671 |
"Gremlins" | 1984 | 1980 | 153 | "Gremlins $153M" | 353 |
"The Blair Witch Project" | 1999 | 1990 | 249 | "The Blair Witch Project $249M" | 449 |
"Van Helsing" | 2004 | 2000 | 300 | "Van Helsing $300M" | 600 |
"It" | 2017 | 2010 | 702 | "It $702M" | 902 |
For the contest submission, Deepali used Corbel, a proprietary font that matches well with the theme of the plot. Since Corbel isn’t freely available, we instead use Lato which is an open-source font that is likely available most systems.
= "Lato" body_font
Plot
("year", "count"))
ggplot(movies_per_year, aes(+ geom_area(fill="#911010")
+ geom_line(color="#F40D0D", size=1.3)
+ geom_segment(
="year", xend="year", y=0, yend="label_position"),
aes(x
top_movies_per_decade,="#ADB5BD",
color="dashed",
linetype=0.4,
size
)+ geom_label(
aes("year",
"label_position",
="label"
label
),
top_movies_per_decade,="#DEE2E6",
color="black",
fill=8,
size=1.5,
lineheight
)+ labs(
="Number of Horror Movies",
y="The Rise of Horror: A Century of Screams and Box Office Success",
title="Charting the exponential growth of horror films and their top-grossing entries per decade",
subtitle="Graphic: Deepali Kank"
caption
)+ scale_x_continuous(breaks=range(1950, 2022, 10), expand=(0, 1, 0, 0))
+ scale_y_continuous(expand=(0, 0))
+ theme(
=(8, 6),
figure_size=0.015,
plot_margin=element_rect(fill="black"),
panel_background=element_rect(fill="black"),
plot_background=element_text(size=18),
plot_title=element_text(color="#D7DADD", family=body_font),
text=element_blank(),
panel_grid=element_blank(),
panel_grid_major_x=element_blank(),
axis_ticks="plot",
plot_title_position=element_blank(),
axis_title_x=element_text(margin={"t": 5, "units": "pt"}),
axis_text_x=element_text(margin={"t": 10, "units": "pt"}),
plot_caption
) )
Where to find Deepali Kank: