plt
and (when needed) matplotlib as mpl
. import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.__version__
y
and z
having a specified correlation with x
. rng = np.random.default_rng(9 * 26 * 2021)
n = 100
rho = .3, .7
x = rng.normal(size=n)
y = 2 + rho[0] * x + np.sqrt(1 - rho[0] ** 2) * rng.normal(size=n)
z = np.pi + rho[1] * x + np.sqrt(1 - rho[1] ** 2) * rng.normal(size=n)
df = pd.DataFrame({'x': x, 'y': y, 'z': z})
df
TIP One nuance of using Jupyter notebooks is that plots are reset after each cell is evaluated, so for more complex plots you must put all of the plotting commands in a single notebook cell.
--Wes McKinney
Figure
object, plt.figure()
..add_subplot()
.fig0 = plt.figure()
ax0 = fig0.add_subplot(3, 1, 1)
ax1 = fig0.add_subplot(3, 1, 2)
ax2 = fig0.add_subplot(3, 1, 3)
fig0.tight_layout()
_ = plt.scatter(x, y, color='darkgreen')
_ = plt.scatter(x, z, color='red')
_ = ax0.hist(x)
_ = ax1.hist(y, color='darkgreen', alpha=0.5)
_ = ax1.hist(z, color='red', alpha=0.5)
fig.subplots_adjust()
fig.tight_layout()
tends to produce nice spacing.plt.subplots()
simplifies creating a figure with a given subplot layout.sharex
and sharey
to reduce clutter when plotting on a common scale. fig1, axes = plt.subplots(nrows=3, ncols=1, sharex=True)
fig1.tight_layout()
_ = axes[0].hist(x)
_ = axes[1].scatter(x, y, color='darkgreen')
_ = axes[2].scatter(x, z, color='red')
_, _ = axes[1].set_ylim(-2, 6), axes[2].set_ylim(-2, 6)
_ = axes[2].set_xlabel('x')
for i, lab in enumerate(['count', 'y', 'z']):
_ = axes[i].set_ylabel(lab)
plt.scatter()
) are useful for showing the relationship
between two variables. .set_*label()
method of an axes. label
parameter in a plotting artist for a simple legend. fig2, ax2 = plt.subplots(nrows=1, ncols=1)
plt.scatter(x, y, color='darkgreen', alpha=0.5, label='y')
plt.scatter(x, z, color='red', alpha=0.5, label='z')
_, _ = ax2.set_xlabel('x (IV)'), ax2.set_ylabel('y or z (DVs)')
_ = ax2.legend(loc='upper left')
plt.errorbar()
for this. # compute mean, se, and CI half-width
df_bar = df.mean().reset_index()
df_bar.rename(columns={'index': 'variable', 0: 'mean'}, inplace=True)
df_se = (df.std() / np.sqrt(df.size)).reset_index()
df_se = df_se.rename(columns={'index': 'variable', 0: 'se'})
## figure 3
fig3, ax3 = plt.subplots(nrows=1, ncols=1)
_ = plt.errorbar(
x=df_bar['mean'],
y=df_bar['variable'],
xerr=df_se['se'] * 1.96,
fmt='o'
)
_ = ax3.set_xlabel('Mean and 95% CI (xx units)')
plt.errorbar()
for both point and interval estimates as on
the last slide, or set fmt=None
to draw error bars only.xerr
or yerr
for asymmetric intervals. ## figure 4
fig4, ax4 = plt.subplots(nrows=1, ncols=1)
_ = plt.scatter(
data=df_bar,
x='mean',
y='variable',
marker='s',
color='black'
)
_ = plt.errorbar(
x=df_bar['mean'],
y=df_bar['variable'],
fmt='None',
xerr=(df_se['se'] * 1.96, df_se['se'] * 1.96),
ecolor='gray',
capsize=4
)
_ = ax4.set_xlabel('Mean and 95% CI (xx units)')
marker
or shape of plotting point,color
for points, lines, and more, distinguish fill and edge colors,alpha
transparency level (especially useful when plot elements overlap),linestyle
the type or style of line to draw: solid, dashed, dotted, etc.markersize
the size of points to drawLine2D
properties at plt.plot() or refer to a matplotlib
cheat sheet..plot()
method, which itself has a family of methods for
specific types of plots.kind
parameter, but I prefer the methods for
better interactive help. ax
you can use to
specify the axes to plot on. ## figure 5
fig5, ax5 = plt.subplots(nrows=3, ncols=1, sharex=True)
fig5.tight_layout()
_ = df['x'].plot.hist(ax=ax5[0])
_ = df.plot.scatter('x', 'y', ax=ax5[1], color='darkgreen')
_ = df.plot.scatter('x', 'z', ax=ax5[2], color='red')
for i, lab in enumerate(['count', 'y', 'z']):
_ = ax5[i].set_ylabel(lab)
ax0 = df.plot.hist(color=['darkred', 'darkgreen', 'blue'], alpha=0.5)
_ = ax0.set_xlabel('x')
type(ax0)
plt
plotting function. ax2 = df.plot.scatter(x='x', y='y', color='darkgreen', alpha=0.5, label='y')
_ = df.plot.scatter(ax=ax2, x='x', y='z', color='red', alpha=0.5, label='z')
_ = ax2.set_ylabel('y/z (DVs)')
_ = ax2.legend(loc='upper left')
.plot()
API when plot data is already in
a DataFrame.## figure 6 (like figure 4)
if not 'se' in df_bar:
df_bar = pd.merge(df_bar, df_se, on='variable')
df_bar['moe'] = 1.96 * df_bar['se']
ax6 = df_bar.plot.scatter(
x='mean',
y='variable',
marker='s',
color='black'
)
_ = plt.errorbar(
data=df_bar,
x='mean',
y='variable',
fmt='None',
xerr='moe',
ecolor='gray',
capsize=4
)
_ = ax6.set_xlabel('Mean and 95% CI (xx units)')