import pandas as pd
from plotnine import (
ggplot,
aes,
geom_boxplot,
geom_jitter,
scale_x_discrete,
coord_flip, )
A box and whiskers plot
In [1]:
The boxplot compactly displays the distribution of a continuous variable.
Read more: + wikipedia + ggplot2 docs
In [2]:
= pd.read_csv("data/flights.csv")
flights flights.head()
year | month | passengers | |
---|---|---|---|
0 | 1949 | January | 112 |
1 | 1949 | February | 118 |
2 | 1949 | March | 132 |
3 | 1949 | April | 129 |
4 | 1949 | May | 121 |
Basic boxplot
In [3]:
= [month[:3] for month in flights.month[:12]]
months print(months)
['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
A Basic Boxplot
In [4]:
# Gallery, distributions
(
ggplot(flights)+ geom_boxplot(aes(x="factor(month)", y="passengers"))
+ scale_x_discrete(labels=months, name="month") # change ticks labels on OX
)
Horizontal boxplot
In [5]:
(
ggplot(flights)+ geom_boxplot(aes(x="factor(month)", y="passengers"))
+ coord_flip()
+ scale_x_discrete(
=months[::-1],
labels=flights.month[11::-1],
limits="month",
name
) )
Boxplot with jittered points:
In [6]:
(="factor(month)", y="passengers"))
ggplot(flights, aes(x+ geom_boxplot()
+ geom_jitter()
+ scale_x_discrete(labels=months, name="month") # change ticks labels on OX
)