【2.4.5】箱线图(matplotlib-boxplot)

一、简单的盒状图

代码:

import matplotlib.pyplot as plt
import numpy as np
 
all_data = [np.random.normal(0, std, 100) for std in range(1, 4)]
 
fig = plt.figure(figsize=(8,6))
 
plt.boxplot(all_data,
            notch=False, # box instead of notch shape
            sym='rs',    # red squares for outliers
            vert=True)   # vertical box aligmnent
 
plt.xticks([y+1 for y in range(len(all_data))], ['x1', 'x2', 'x3'])
plt.xlabel('measurement x')
t = plt.title('Box plot')
plt.show()

二、黑白盒状图

代码:

import matplotlib.pyplot as plt
import numpy as np
 
all_data = [np.random.normal(0, std, 100) for std in range(1, 4)]
 
fig = plt.figure(figsize=(8,6))
 
bplot = plt.boxplot(all_data,
            notch=False, # box instead of notch shape
            sym='rs',    # red squares for outliers
            vert=True)   # vertical box aligmnent
 
plt.xticks([y+1 for y in range(len(all_data))], ['x1', 'x2', 'x3'])
plt.xlabel('measurement x')
 
for components in bplot.keys():
    for line in bplot[components]:
        line.set_color('black')     # black lines
 
t = plt.title('Black and white box plot')
 
plt.show()

三、水平盒状图

代码:

import matplotlib.pyplot as plt
import numpy as np
 
all_data = [np.random.normal(0, std, 100) for std in range(1, 4)]
 
fig = plt.figure(figsize=(8,6))
 
plt.boxplot(all_data,
            notch=False,  # box instead of notch shape
            sym='rs',     # red squares for outliers
            vert=False)   # horizontal box aligmnent
 
plt.yticks([y+1 for y in range(len(all_data))], ['x1', 'x2', 'x3'])
plt.ylabel('measurement x')
t = plt.title('Horizontal Box plot')
plt.show()

四、圆柱盒状图

代码:

import matplotlib.pyplot as plt
import numpy as np
 
all_data = [np.random.normal(0, std, 100) for std in range(1, 4)]
 
fig = plt.figure(figsize=(8,6))
 
plt.boxplot(all_data,
            notch=True,  # notch shape
            sym='bs',     # blue squares for outliers
            vert=True,   # vertical box aligmnent
            patch_artist=True)   # fill with color
 
plt.xticks([y+1 for y in range(len(all_data))], ['x1', 'x2', 'x3'])
plt.xlabel('measurement x')
t = plt.title('Box plot')
plt.show()

五、自定义颜色填充盒状图

代码:

import matplotlib.pyplot as plt
import numpy as np
 
all_data = [np.random.normal(0, std, 100) for std in range(1, 4)]
 
fig = plt.figure(figsize=(8,6))
 
bplot = plt.boxplot(all_data,
            notch=False,  # notch shape
            vert=True,   # vertical box aligmnent
            patch_artist=True)   # fill with color
 
colors = ['pink', 'lightblue', 'lightgreen']
for patch, color in zip(bplot['boxes'], colors):
    patch.set_facecolor(color)
 
plt.xticks([y+1 for y in range(len(all_data))], ['x1', 'x2', 'x3'])
plt.xlabel('measurement x')
t = plt.title('Box plot')
plt.show()

六、带中位值

# Import Data
df = pd.read_csv("https://github.com/selva86/datasets/raw/master/mpg_ggplot2.csv")

# Draw Plot
plt.figure(figsize=(13,10), dpi= 80)
sns.boxplot(x='class', y='hwy', data=df, notch=False)

# Add N Obs inside boxplot (optional)
def add_n_obs(df,group_col,y):
    medians_dict = {grp[0]:grp[1][y].median() for grp in df.groupby(group_col)}
    xticklabels = [x.get_text() for x in plt.gca().get_xticklabels()]
    n_obs = df.groupby(group_col)[y].size().values
    for (x, xticklabel), n_ob in zip(enumerate(xticklabels), n_obs):
        plt.text(x, medians_dict[xticklabel]*1.01, "#obs : "+str(n_ob), horizontalalignment='center', fontdict={'size':14}, color='white')

add_n_obs(df,group_col='class',y='hwy')    

# Decoration
plt.title('Box Plot of Highway Mileage by Vehicle Class', fontsize=22)
plt.ylim(10, 40)
plt.show()

六、Dot + Box Plot

# Import Data
df = pd.read_csv("https://github.com/selva86/datasets/raw/master/mpg_ggplot2.csv")

# Draw Plot
plt.figure(figsize=(13,10), dpi= 80)
sns.boxplot(x='class', y='hwy', data=df, hue='cyl')
sns.stripplot(x='class', y='hwy', data=df, color='black', size=3, jitter=1)

for i in range(len(df['class'].unique())-1):
    plt.vlines(i+.5, 10, 45, linestyles='solid', colors='gray', alpha=0.2)

# Decoration
plt.title('Box Plot of Highway Mileage by Vehicle Class', fontsize=22)
plt.legend(title='Cylinders')
plt.show()

例一

代码:

from pylab import plot, show, savefig, xlim, figure, \
                hold, ylim, legend, boxplot, setp, axes

# function for setting the colors of the box plots pairs
def setBoxColors(bp):
    setp(bp['boxes'][0], color='blue')
    setp(bp['caps'][0], color='blue')
    setp(bp['caps'][1], color='blue')
    setp(bp['whiskers'][0], color='blue')
    setp(bp['whiskers'][1], color='blue')
    setp(bp['fliers'][0], color='blue')
    setp(bp['fliers'][1], color='blue')
    setp(bp['medians'][0], color='blue')

    setp(bp['boxes'][1], color='red')
    setp(bp['caps'][2], color='red')
    setp(bp['caps'][3], color='red')
    setp(bp['whiskers'][2], color='red')
    setp(bp['whiskers'][3], color='red')
    setp(bp['fliers'][2], color='red')
    setp(bp['fliers'][3], color='red')
    setp(bp['medians'][1], color='red')

# Some fake data to plot
A= [[1, 2, 5,],  [7, 2]]
B = [[5, 7, 2, 2, 5], [7, 2, 5]]
C = [[3,2,5,7], [6, 7, 3]]

fig = figure()
ax = axes()
hold(True)

# first boxplot pair
bp = boxplot(A, positions = [1, 2], widths = 0.6)
setBoxColors(bp)

# second boxplot pair
bp = boxplot(B, positions = [4, 5], widths = 0.6)
setBoxColors(bp)

# thrid boxplot pair
bp = boxplot(C, positions = [7, 8], widths = 0.6)
setBoxColors(bp)

# set axes limits and labels
xlim(0,9)
ylim(0,9)
ax.set_xticklabels(['A', 'B', 'C'])
ax.set_xticks([1.5, 4.5, 7.5])

# draw temporary red and blue lines and use them to create a legend
hB, = plot([1,1],'b-')
hR, = plot([1,1],'r-')
legend((hB, hR),('Apples', 'Oranges'))
hB.set_visible(False)
hR.set_visible(False)

savefig('boxcompare.png')
show()

例二

代码 In [1]: import pandas as pd, numpy as np

In [2]: df = pd.DataFrame(np.random.rand(12,2), columns=['Apples', 'Oranges'] )

In [3]: df['Categories'] = pd.Series(list('AAAABBBBCCCC'))

In [4]: pd.options.display.mpl_style = 'default'

In [5]: df.boxplot(by='Categories')
Out[5]: 
array([<matplotlib.axes.AxesSubplot object at 0x51a5190>,
       <matplotlib.axes.AxesSubplot object at 0x53fddd0>], dtype=object)

例三

代码

import matplotlib.pyplot as plt
import numpy as np

data_a = [[1,2,5], [5,7,2,2,5], [7,2,5]]
data_b = [[6,4,2], [1,2,5,3,2], [2,3,5,1]]

ticks = ['A', 'B', 'C']

def set_box_color(bp, color):
    plt.setp(bp['boxes'], color=color)
    plt.setp(bp['whiskers'], color=color)
    plt.setp(bp['caps'], color=color)
    plt.setp(bp['medians'], color=color)

plt.figure()

bpl = plt.boxplot(data_a, positions=np.array(xrange(len(data_a)))*2.0-0.4, sym='', widths=0.6)
bpr = plt.boxplot(data_b, positions=np.array(xrange(len(data_b)))*2.0+0.4, sym='', widths=0.6)
set_box_color(bpl, '#D7191C') # colors are from http://colorbrewer2.org/
set_box_color(bpr, '#2C7BB6')

# draw temporary red and blue lines and use them to create a legend
plt.plot([], c='#D7191C', label='Apples')
plt.plot([], c='#2C7BB6', label='Oranges')
plt.legend()

plt.xticks(xrange(0, len(ticks) * 2, 2), ticks)
plt.xlim(-2, len(ticks)*2)
plt.ylim(0, 8)
plt.tight_layout()
plt.savefig('boxcompare.png')

例四

代码

df = pd.DataFrame({'Group':['A','A','A','B','C','B','B','C','A','C'],\
                  'Apple':np.random.rand(10),'Orange':np.random.rand(10)})
df = df[['Group','Apple','Orange']]

        Group    Apple     Orange
    0      A  0.465636  0.537723
    1      A  0.560537  0.727238
    2      A  0.268154  0.648927
    3      B  0.722644  0.115550
    4      C  0.586346  0.042896
    5      B  0.562881  0.369686
    6      B  0.395236  0.672477
    7      C  0.577949  0.358801
    8      A  0.764069  0.642724
    9      C  0.731076  0.302369

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
dd=pd.melt(df,id_vars=['Group'],value_vars=['Apple','Orange'],var_name='fruits')
sns.boxplot(x='Group',y='value',data=dd,hue='fruits')

参考资料

药企,独角兽,苏州。团队长期招人,感兴趣的都可以发邮件聊聊:tiehan@sina.cn
个人公众号,比较懒,很少更新,可以在上面提问题,如果回复不及时,可发邮件给我: tiehan@sina.cn