Back to Article
Scatter Plot Shapes
Download Notebook

Scatter Plot Shapes

In [1]:
from plotnine import (
    aes,
    annotate,
    coord_equal,
    element_rect,
    element_text,
    facet_wrap,
    geom_point,
    geom_segment,
    geom_text,
    geom_tile,
    ggplot,
    lims,
    scale_shape_identity,
    theme,
    theme_void,
)

import polars as pl
import numpy as np

List the shapes and prepare dataframe with what is required to display them.

In [2]:
filled_shapes = (
    "o",  # circle
    "^",  # triangle up
    "s",  # square
    "D",  # Diamond
    "v",  # triangle down
    "*",  # star
    "p",  # pentagon
    "8",  # octagon
    "<",  # triangle left
    "h",  # hexagon1
    ">",  # triangle right
    "H",  # hexagon1
    "d",  # thin diamond
)

unfilled_shapes = (
    "+",  # plus
    "x",  # x
    ".",  # point
    "1",  # tri_down
    "2",  # tri_up
    "3",  # tri_left
    "4",  # tri_right
    ",",  # pixel
    "_",  # hline
    "|",  # vline
    0,  # tickleft
    1,  # tickright
    2,  # tickup
    3,  # tickdown
    4,  # caretleft
    5,  # caretright
    6,  # caretup
    7,  # caretdown
)

n1 = len(filled_shapes)
n2 = len(unfilled_shapes)
ncols = 10
shapes = filled_shapes + unfilled_shapes

df = pl.DataFrame({
    "x": np.hstack([np.arange(n1) % ncols, np.arange(n2) % ncols]),
    "y": 4 - 1.5 * np.hstack([np.arange(n1) // ncols, np.arange(n2) // ncols]),
    "shape": pl.Series(shapes, dtype=object),
    "shape_kind": np.repeat(["Filled Shapes", "Unfilled Shapes"], (n1, n2))
})

df
shape: (31, 4)
x y shape shape_kind
i64 f64 object str
0 4.0 o "Filled Shapes"
1 4.0 ^ "Filled Shapes"
2 4.0 s "Filled Shapes"
3 4.0 D "Filled Shapes"
4 4.0 v "Filled Shapes"
3 2.5 3 "Unfilled Shapes"
4 2.5 4 "Unfilled Shapes"
5 2.5 5 "Unfilled Shapes"
6 2.5 6 "Unfilled Shapes"
7 2.5 7 "Unfilled Shapes"
In [3]:
# Gallery, points

def double_quote_strings(series):
    return [
        '"{}"'.format(s) if isinstance(s, str) else "{}".format(s)
        for s in series
    ]

(
    ggplot(df, aes("x", "y"))
    
    # Background
    + geom_tile(aes(width=0.9, height=0.9), fill="#F8F8F8")
    
    # Cross-grid to help see the centering of the shapes
    + geom_segment(aes(x="x-0.45", xend="x+0.45", yend="y"), color="Crimson", alpha=0.05)
    + geom_segment(aes(xend="x", y="y-0.45", yend="y+0.45"), color="Crimson", alpha=0.05)
    
    # The shape
    + geom_point(aes(shape="shape"), fill="#E5C8D6", size=8)

    # Specification value for the shape
    + geom_text(
        aes(y="y+.5", label="double_quote_strings(shape)"),
        color="darkblue",
        va="bottom",
    )

    # Separate the shapes by their kind
    + facet_wrap("shape_kind", ncol=1)

    # Make things pretty
    + lims(y=(2, 5))
    + scale_shape_identity()
    + coord_equal()
    + theme_void()
    + theme(
        plot_background=element_rect(fill="white"),
        strip_text=element_text(size=12)
    )
)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
File ~/.uvenv/plotnine/lib/python3.13/site-packages/IPython/core/formatters.py:984, in IPythonDisplayFormatter.__call__(self, obj)
    982 method = get_real_method(obj, self.print_method)
    983 if method is not None:
--> 984     method()
    985     return True

File ~/scm/python/plotnine/plotnine/ggplot.py:149, in ggplot._ipython_display_(self)
    142 def _ipython_display_(self):
    143     """
    144     Display plot in the output of the cell
    145 
    146     This method will always be called when a ggplot object is the
    147     last in the cell.
    148     """
--> 149     self._display()

File ~/scm/python/plotnine/plotnine/ggplot.py:190, in ggplot._display(self)
    187     self.theme = self.theme.to_retina()
    189 buf = BytesIO()
--> 190 self.save(buf, "png" if format == "retina" else format, verbose=False)
    191 figure_size_px = self.theme._figure_size_px
    192 display_func = get_display_function(format, figure_size_px)

File ~/scm/python/plotnine/plotnine/ggplot.py:702, in ggplot.save(self, filename, format, path, width, height, units, dpi, limitsize, verbose, **kwargs)
    653 def save(
    654     self,
    655     filename: Optional[str | Path | BytesIO] = None,
   (...)    664     **kwargs: Any,
    665 ):
    666     """
    667     Save a ggplot object as an image file
    668 
   (...)    700         Additional arguments to pass to matplotlib `savefig()`.
    701     """
--> 702     sv = self.save_helper(
    703         filename=filename,
    704         format=format,
    705         path=path,
    706         width=width,
    707         height=height,
    708         units=units,
    709         dpi=dpi,
    710         limitsize=limitsize,
    711         verbose=verbose,
    712         **kwargs,
    713     )
    715     with plot_context(self).rc_context:
    716         sv.figure.savefig(**sv.kwargs)

File ~/scm/python/plotnine/plotnine/ggplot.py:650, in ggplot.save_helper(self, filename, format, path, width, height, units, dpi, limitsize, verbose, **kwargs)
    647 if dpi is not None:
    648     self.theme = self.theme + theme(dpi=dpi)
--> 650 figure = self.draw(show=False)
    651 return mpl_save_view(figure, fig_kwargs)

File ~/scm/python/plotnine/plotnine/ggplot.py:322, in ggplot.draw(self, show)
    319     self._create_figure()
    320 figure = self.figure
--> 322 self._build()
    324 # setup
    325 self.axs = self.facet.setup(self)

File ~/scm/python/plotnine/plotnine/ggplot.py:419, in ggplot._build(self)
    417 npscales = scales.non_position_scales()
    418 if len(npscales):
--> 419     layers.train(npscales)
    420     layers.map(npscales)
    422 # Train coordinate system

File ~/scm/python/plotnine/plotnine/layer.py:492, in Layers.train(self, scales)
    490 def train(self, scales: Scales):
    491     for l in self:
--> 492         scales.train_df(l.data)

File ~/scm/python/plotnine/plotnine/scales/scales.py:203, in Scales.train_df(self, data, drop)
    201 # Each scale trains the columns it understands
    202 for sc in self:
--> 203     sc.train_df(data)

File ~/scm/python/plotnine/plotnine/scales/scale.py:284, in scale.train_df(self, df)
    282 aesthetics = sorted(set(self.aesthetics) & set(df.columns))
    283 for ae in aesthetics:
--> 284     self.train(df[ae])

File ~/scm/python/plotnine/plotnine/scales/scale_identity.py:36, in MapTrainMixin.train(self, x, drop)
     33 if self.guide is None:  # pyright: ignore
     34     return
---> 36 return super().train(x)

File ~/scm/python/plotnine/plotnine/scales/scale_discrete.py:107, in scale_discrete.train(self, x, drop)
    104     return
    106 na_rm = not self.na_translate
--> 107 self._range.train(x, drop, na_rm=na_rm)

File ~/scm/python/plotnine/plotnine/scales/range.py:67, in RangeDiscrete.train(self, x, drop, na_rm)
     63 """
     64 Train discrete range
     65 """
     66 rng = None if self.is_empty() else self.range
---> 67 self.range = scale_discrete.train(x, rng, drop, na_rm=na_rm)

File ~/.uvenv/plotnine/lib/python3.13/site-packages/mizani/scale.py:269, in scale_discrete.train(cls, new_data, old, drop, na_rm)
    267     limits = [c for c in ordered_cats if c in all_set]
    268 else:
--> 269     new = np.unique(new_data)
    270     new.sort()
    272     limits = old + [i for i in new if (i not in old_set)]

File ~/.uvenv/plotnine/lib/python3.13/site-packages/numpy/lib/_arraysetops_impl.py:286, in unique(ar, return_index, return_inverse, return_counts, axis, equal_nan)
    284 ar = np.asanyarray(ar)
    285 if axis is None:
--> 286     ret = _unique1d(ar, return_index, return_inverse, return_counts,
    287                     equal_nan=equal_nan, inverse_shape=ar.shape, axis=None)
    288     return _unpack_tuple(ret)
    290 # axis was specified and not None

File ~/.uvenv/plotnine/lib/python3.13/site-packages/numpy/lib/_arraysetops_impl.py:353, in _unique1d(ar, return_index, return_inverse, return_counts, equal_nan, inverse_shape, axis)
    351     aux = ar[perm]
    352 else:
--> 353     ar.sort()
    354     aux = ar
    355 mask = np.empty(aux.shape, dtype=np.bool)

TypeError: '<' not supported between instances of 'int' and 'str'
<plotnine.ggplot.ggplot at 0x10acd1220>