The packages used in this tutorial:

For more details about data visualization in python, there is a cookbook that is easy to learn and use.Python Data Visualization Cookbook

We are going to draw the same type of graph using these packages at the same time, making it easier to understand and distinguish the difference.

TOPIC 1: Time Series Lines

We’ll be dealing with a tidy data set named “ts.” It consists of three columns: a “dt” column (for dates); a “value” column (for values); and a “kind” column, which has four unique levels: A, B, C, and D. Here’s a preview…)
Matplotlib

fig, ax = plt.subplots(1, 1,
 figsize=(7.5, 5))
 
for k in ts.kind.unique():
 tmp = ts[ts.kind == k]
 ax.plot(tmp.dt, tmp.value, label=k)
 
ax.set(xlabel='Date',
 ylabel='Value',
 title='Random Timeseries') 
 
ax.legend(loc=2)
fig.autofmt_xdate()

Pandas

dfp = ts.pivot(index='dt', columns='kind', values='value')
dfp.head()
fig, ax = plt.subplots(1, 1,
figsize=(7.5, 5))

dfp.plot(ax=ax)

ax.set(xlabel='Date',
ylabel='Value',
title='Random Timeseries')

ax.legend(loc=2)
fig.autofmt_xdate()

SEABORN

g = sns.FacetGrid(ts, hue='kind', size=5, aspect=1.5)
g.map(plt.plot, 'dt', 'value').add_legend()
g.ax.set(xlabel='Date',
 ylabel='Value',
 title='Random Timeseries')
g.fig.autofmt_xdate()

GGPY

fig, ax = plt.subplots(1, 1, figsize=(7.5, 5))
 
ggplot(ts, aes(x='dt', y='value', color='kind')) + \
 geom_line(size=2.0) + \
 xlab('Date') + \
 ylab('Value') + \
 ggtitle('Random Timeseries')

ALTAIR

Chart(ts).mark_line().encode(
 x='dt',
 y='value',
 color='kind'
)

TOPIC 2: Scatterplot

In Topics 2-4, we’ll be dealing with the famous “iris” data set [though we refer to it as “df” in our code]. It consists of four numeric columns corresponding to various measurements, and a categorical column corresponding to one of three species of iris. Here’s a preview…

Matplotlib

fig, ax = plt.subplots(1, 1, figsize=(7.5, 7.5))
 
for i, s in enumerate(df.species.unique()):
 tmp = df[df.species == s]
 ax.scatter(tmp.petalLength, tmp.petalWidth,
 label=s, color=cp[i])
 
ax.set(xlabel='Petal Length',
 ylabel='Petal Width',
 title='Petal Width v. Length -- by Species')
 
ax.legend(loc=2)

No pandas this time
SEABORN

g = sns.FacetGrid(df, hue='species', size=7.5)
g.map(plt.scatter, 'petalLength', 'petalWidth').add_legend()
g.ax.set_title('Petal Width v. Length -- by Species')

GGPY

ggplot(df, aes(x='petalLength',
 y='petalWidth',
 color='species')) + \
 geom_point(size=40.0) + \
 ggtitle('Petal Width v. Length -- by Species')

ALTAIR

Chart(df).mark_point(filled=True).encode(
 x='petalLength',
 y='petalWidth',
 color='species'
)

TOPIC 3: Faceted Scatterplot

Matplotlib

fig, ax = plt.subplots(1, 3, figsize=(15, 5),
 sharex=True, sharey=True)
 
for i, s in enumerate(df.species.unique()):
 tmp = df[df.species == s]
 
 ax[i].scatter(tmp.petalLength,
 tmp.petalWidth,
 c=cp[i])
 
 ax[i].set(xlabel='Petal Length',
 ylabel='Petal Width',
 title=s)
 
fig.tight_layout()

Pandas pass
SEABORN

g = sns.FacetGrid(df, col='species', hue='species', size=5)
g.map(plt.scatter, 'petalLength', 'petalWidth')

GGPY

ggplot(df, aes(x='petalLength',
 y='petalWidth',
 color='species')) + \
 facet_grid(y='species') + \
 geom_point(size=40.0)

ALTAIR

c = Chart(df).mark_point().encode(
 x='petalLength',
 y='petalWidth',
 color='species',
 column=Column('species',
 title='Petal Width v. Length by Species')
)
c.configure_cell(height=300, width=300)

TOPIC 4: Distributions and Bars

Matplotlib

fig, ax = plt.subplots(1, 1, figsize=(10, 10))
 
ax.boxplot([df[df.species == s]['petalWidth'].values
 for s in df.species.unique()])
 
ax.set(xticklabels=df.species.unique(),
 xlabel='Species',
 ylabel='Petal Width',
 title='Distribution of Petal Width by Species')

fig, ax = plt.subplots(1, 1, figsize=(10, 10))
 
for i, s in enumerate(df.species.unique()):
 tmp = df[df.species == s]
 ax.hist(tmp.petalWidth, label=s, alpha=.8)
 
ax.set(xlabel='Petal Width',
 ylabel='Frequency',
 title='Distribution of Petal Width by Species') 
 
ax.legend(loc=1)

Pandas

fig, ax = plt.subplots(1, 1, figsize=(10, 10))
 
df.boxplot(column='petalWidth', by='species', ax=ax)

fig, ax = plt.subplots(1, 1, figsize=(10, 10))
 
df.hist(column='petalWidth', by='species', grid=None, ax=ax)

SEABORN

fig, ax = plt.subplots(1, 1, figsize=(10, 10))
 
g = sns.boxplot('species', 'petalWidth', data=df, ax=ax)
g.set(title='Distribution of Petal Width by Species')

g = sns.FacetGrid(df, hue='species', size=7.5)
 
g.map(sns.distplot, 'petalWidth', bins=10,
 kde=False, rug=True).add_legend()
 
g.set(xlabel='Petal Width',
 ylabel='Frequency',
 title='Distribution of Petal Width by Species')

GGPY

ggplot(df, aes(x='species',
 y='petalWidth',
 fill='species')) + \
 geom_boxplot() + \
 ggtitle('Distribution of Petal Width by Species')

ggplot(df, aes(x='petalWidth',
 fill='species')) + \
 geom_histogram() + \
 ylab('Frequency') + \
 ggtitle('Distribution of Petal Width by Species')

ALTAIR

Chart(df).mark_bar(opacity=.75).encode(
 x=X('petalWidth', bin=Bin(maxbins=30)),
 y='count(*)',
 color=Color('species', scale=Scale(range=cp.as_hex()))
)

TOPIC 5: Bar Chart

In this topic we’ll be dealing with “titanic,” another famous tidy dataset [although again, we refer to it as “df” in our code]. Here’s a preview…
Matplotlib

dfg = df.groupby(['survived', 'pclass']).agg({'fare': 'mean'})
died = dfg.loc[0, :]
survived = dfg.loc[1, :]
 
# more or less copied from matplotlib's own
# api example
fig, ax = plt.subplots(1, 1, figsize=(12.5, 7))
 
N = 3
 
ind = np.arange(N) # the x locations for the groups
width = 0.35 # the width of the bars
 
rects1 = ax.bar(ind, died.fare, width, color='r')
rects2 = ax.bar(ind + width, survived.fare, width, color='y')
 
# add some text for labels, title and axes ticks
ax.set_ylabel('Fare')
ax.set_title('Fare by survival and class')
ax.set_xticks(ind + width)
ax.set_xticklabels(('First', 'Second', 'Third'))
 
ax.legend((rects1[0], rects2[0]), ('Died', 'Survived'))
 
def autolabel(rects):
 # attach some text labels
 for rect in rects:
 height = rect.get_height()
 ax.text(rect.get_x() + rect.get_width()/2., 1.05*height,
 '%d' % int(height),
 ha='center', va='bottom')
 
ax.set_ylim(0, 110) 
 
autolabel(rects1)
autolabel(rects2)
 
plt.show()

Pandas

fig, ax = plt.subplots(1, 1, figsize=(12.5, 7))
# note: dfg refers to grouped by
# version of df, presented above
dfg.reset_index().\
 pivot(index='pclass',
 columns='survived',
 values='fare').plot.bar(ax=ax)
 
ax.set(xlabel='Class',
 ylabel='Fare',
 title='Fare by survival and class')

SEABORN

g = sns.factorplot(x='class', y='fare', hue='survived',
 data=df, kind='bar',
 order=['First', 'Second', 'Third'],
 size=7.5, aspect=1.5)
g.ax.set_title('Fare by survival and class')

GGPY

ggplot(df.groupby(['class', 'survived']).\
 agg({'fare': 'mean'}).\
 reset_index(), aes(x='class',
 fill='factor(survived)',
 weight='fare',
 y='fare')) + \
 geom_bar() + \
 ylab('Avg. Fare') + \
 xlab('Class') + \
 ggtitle('Fare by survival and class')

ALTAIR

c = Chart(df).mark_bar().encode(
 x='survived:N',
 y='mean(fare)',
 color='survived:N',
 column='class')
c.configure_facet_cell(strokeWidth=0, height=250)

Two basic python packages are required for visualization:
1. Matplotlib – a Python based plotting library offers matplotlib with a complete 2D support with limited 3D graphic support. It is useful in producing publication quality figures in interactive environment across platforms.
2. Seaborn – Based on Matplotlib, Seaborn provides various features such as built-in themes, color palettes, functions and tools to visualize univariate, bivariate, linear regression, data matrices, time series, etc in order to build more complex visualizations.
The sample dataset used in this tutorial
Import dataset:

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns 
df=pd.read_excel("pathtodataset", "sample")

Create histogram:

fig=plt.figure() #Plots in matplotlib reside within a figure object, use plt.figure to create new figure
#Create one or more subplots using add_subplot, because you can't create blank figure
ax = fig.add_subplot(1,1,1)
#Variable
ax.hist(df['Age'],bins = 7) # Here you can play with number of bins
Labels and Tit
plt.title('Age distribution')
plt.xlabel('Age')
plt.ylabel('#Employee')
plt.show()

Create boxplot:

fig=plt.figure()
ax = fig.add_subplot(1,1,1)
#Variable
ax.boxplot(df['Age'])
plt.show()

Create violin plot

sns.violinplot(df['Age'], df['Gender']) #Variable Plot
sns.despine()

Create bar chart

var = df.groupby('Gender').Sales.sum() #grouped sum of sales at Gender level
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
ax1.set_xlabel('Gender')
ax1.set_ylabel('Sum of Sales')
ax1.set_title("Gender wise Sum of Sales")
var.plot(kind='bar')

Create line chart

var = df.groupby('BMI').Sales.sum()
fig = plt.figure()
ax1 = fig.add_subplot(1,1,1)
ax1.set_xlabel('BMI')
ax1.set_ylabel('Sum of Sales')
ax1.set_title("BMI wise Sum of Sales")
var.plot(kind='line')

Create Stacked Column Chart

var = df.groupby(['BMI','Gender']).Sales.sum()
var.unstack().plot(kind='bar',stacked=True,  color=['red','blue'], grid=False)

Create Scatter Plot

fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(df['Age'],df['Sales']) #You can also add more variables here to represent color and size.
plt.show()

Create Bubble Plot

fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.scatter(df['Age'],df['Sales'], s=df['Income']) # Added third variable income as size of the bubble
plt.show()

Create Pie chart

var=df.groupby(['Gender']).sum().stack()
temp=var.unstack()
type(temp)
x_list = temp['Sales']
label_list = temp.index
plt.axis("equal") #The pie chart is oval by default. To make it a circle use pyplot.axis("equal")
#To show the percentage of each pie slice, pass an output format to the autopctparameter plt.pie(x_list,labels=label_list,autopct="%1.1f%%") plt.title("Pastafarianism expenses") 
plt.show()

Create Heat Map

#Generate a random number, you can refer your data values also
data = np.random.rand(4,2)
rows = list('1234') #rows categories
columns = list('MF') #column categories
fig,ax=plt.subplots()
#Advance color controls
ax.pcolor(data,cmap=plt.cm.Reds,edgecolors='k')
ax.set_xticks(np.arange(0,2)+0.5)
ax.set_yticks(np.arange(0,4)+0.5)
# Here we position the tick labels for x and y axis
ax.xaxis.tick_bottom()
ax.yaxis.tick_left()
#Values against each labels
ax.set_xticklabels(columns,minor=False,fontsize=20)
ax.set_yticklabels(rows,minor=False,fontsize=20)
plt.show()

The packages used in this tutorial:

For more details about data visualization in python, there is a cookbook that is easy to learn and use.Python Data Visualization Cookbook

We are going to draw the same type of graph using these packages at the same time, making it easier to understand and distinguish the difference.

TOPIC 1: Time Series Lines

Matplotlib

Pandas

SEABORN

GGPY

ALTAIR

TOPIC 2: Scatterplot

Matplotlib

No pandas this time

SEABORN

GGPY

ALTAIR

TOPIC 3: Faceted Scatterplot

Matplotlib

Pandas pass

SEABORN

GGPY

ALTAIR

TOPIC 4: Distributions and Bars

Matplotlib

Pandas

SEABORN

GGPY

ALTAIR

TOPIC 5: Bar Chart

Matplotlib

Pandas

SEABORN

GGPY

ALTAIR

Two basic python packages are required for visualization:

The sample dataset used in this tutorial

Import dataset:

Create histogram:

Create boxplot:

Create violin plot

Create bar chart

Create line chart

Create Stacked Column Chart

Create Scatter Plot

Create Bubble Plot

Create Pie chart

Create Heat Map