%load_ext autoreload
%autoreload 2
%aimport plotnine
import pandas as pd
import numpy as np
from plotnine import (
ggplot,
aes,
after_stat,
stage,
geom_bar,
geom_text,
geom_bin_2d,
stat_bin_2d, )
stage
= pd.DataFrame({
df "var1": list("abbcccddddeeeee"),
"cat": list("RSRSRSRRRSRSSRS")
})
("var1"))
ggplot(df, aes(+ geom_bar()
)
Add the corresponding count on top of each bar.
("var1"))
ggplot(df, aes(+ geom_bar()
+ geom_text(aes(label=after_stat("count")), stat="count")
)
Adjust the y
position so that the counts do not overlap the bars.
("var1"))
ggplot(df, aes(+ geom_bar()
+ geom_text(
=after_stat("count"), y=stage(after_stat="count", after_scale="y+.1")),
aes(label="count",
stat
) )
Note that this will work even nicely for stacked bars where adjustig the position with nudge_y=0.1
would not.
("var1", fill="cat"))
ggplot(df, aes(+ geom_bar()
+ geom_text(
=after_stat("count"), y=stage(after_stat="count", after_scale="y+.1")),
aes(label="count",
stat="stack",
position
) )
Create a binned 2d plot with counts
123)
np.random.seed(= pd.DataFrame({
df "col_1": np.random.rand(1000),
"col_2": np.random.rand(1000)
})
(="col_1", y="col_2"))
ggplot(df, aes(x+ geom_bin_2d(position="identity", binwidth=0.1)
)
Add counts to the bins. stat_bin_2d
bins are specified using retangular minimum and maximum end-points for dimension; we use these values to compute the mid-points at which to place the counts.
First x
and y
aesthetics are mapped to col_1 and col_2 variables, then after the statistic consumes them and creates xmin
, xmax
, ymin
& ymax
values for each bin along with associated count
. After the statistic computation the x
and y
aesthetics do not exist, but we create meaningful values using the minimum and maximum end-points.
Note that the binning parameters for the geom
and stat
combination must be the same. In this case it is the binwidth
.
(="col_1", y="col_2"))
ggplot(df, aes(x+ geom_bin_2d(position="identity", binwidth=0.1)
+ stat_bin_2d(
aes(=stage(start="col_1", after_stat="(xmin+xmax)/2"),
x=stage(start="col_2", after_stat="(ymin+ymax)/2"),
y=after_stat("count"),
label
),=0.1,
binwidth="text",
geom="{:.0f}",
format_string=10,
size
) )