# Install libraries as necessary
#pip install openpyxl
#pip install pandas
#pip install matplotlib
#pip install seaborn
#pip install numpy
#import importlib.metadata
1: ANI and SNP distance matrices
Introduction
This tutorial will take n genome sequences and run algorithms to determine average nucleotide idenitities (ANI) and core genome single nucleotide polymorphisms (SNPs), visualising the distances as heatmaps in python.
This workflow uses JSpeciesWS for ANI, SNP-dists for SNP distances, and seaborn and matplotlib in python to visualise the distances as heatmaps.
For a more in-depth tutorial on using ANI and SNP distances to determine isolate relatedness see my ANI and SNP distances github repo.
Part 1 - ANI matrix with python
1.1: Install libraries
Next load the libraries
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
import numpy as np
1.2: Create a function which makes a ANI heatmap
def create_ani_heatmap_rotate(title, input_file, rotation=0, lower_legend=90, upper_legend=100):
"""
Reads ANI values from a table and creates a triangular heatmap with rotation options.
Parameters:
input_file (str): Path to the Excel file containing the ANI table.
rotation (int): Rotation angle for the heatmap (0, 90, 180, 270).
output_file_png (str): Path to save the generated heatmap as a PNG file.
output_file_svg (str): Path to save the generated heatmap as an SVG file.
"""
# Load the table, ensuring the first column is used as row labels
= pd.read_excel(input_file, index_col=0)
ani_table
# Ensure all values are numeric (convert strings to floats if necessary)
= ani_table.apply(pd.to_numeric, errors='coerce')
ani_table
# Check for and handle any missing (NaN) values
if ani_table.isnull().values.any():
print("Warning: Missing or invalid values found. Filling with 0.")
0, inplace=True)
ani_table.fillna(
# Rotate the ANI table if necessary
if rotation == 90:
= ani_table.transpose()
ani_table elif rotation == 180:
= ani_table.iloc[::-1, ::-1]
ani_table elif rotation == 270:
= ani_table.transpose().iloc[::-1, ::-1]
ani_table elif rotation not in [0, 90, 180, 270]:
raise ValueError("Rotation must be 0, 90, 180, or 270 degrees.")
# Generate a mask for the upper triangle (rotate mask based on rotation)
= np.triu(np.ones_like(ani_table, dtype=bool))
mask if rotation == 90 or rotation == 270:
= np.transpose(mask)
mask elif rotation == 180:
= np.flip(mask)
mask
# Plot the heatmap
=(12, 10)) # Adjust figure size
plt.figure(figsize
sns.heatmap(
ani_table, =True,
annot=".1f",
fmt=mask,
mask="coolwarm",
cmap=lower_legend, # Set fixed color scale minimum
vmin=upper_legend, # Set fixed color scale maximum
vmax={"size": 8}, # Adjust annotation text size
annot_kws=ani_table.columns,
xticklabels=ani_table.index,
yticklabels={"label": "ANI (%)"}
cbar_kws
)=10, rotation=45, ha="right") # Adjust x-axis ticks
plt.xticks(fontsize=10) # Adjust y-axis ticks
plt.yticks(fontsizef"{title}", fontsize=14) # Add title
plt.title(# Adjust padding
plt.tight_layout() plt.show()
1.3: Create the ANI heatmaps
First we will produce an ANI matrix for Klebsiella pneumoniae species types @KpSc-ANI
# Call the function to create the heatmap
= "ANI KpSc"
fig_title = "../data/JSpeciesWS_all_hybracter_1448_LR_FC1654_FC2240_UNI_ANIb_half_triangle_FC_rearranged_Klebsiella_spp.xlsx"
input_file =fig_title, input_file=input_file, rotation=90, lower_legend=93, upper_legend=100) create_ani_heatmap_rotate(title
Warning: Missing or invalid values found. Filling with 0.
Next we will produce an ANI matrix for E. coli species types @Ec-ANI
# Call the function to create the heatmap
= "ANI E.coli"
fig_title = "../data/JSpeciesWS_all_hybracter_1448_LR_FC1654_FC2240_UNI_ANIb_half_triangle_FC_rearranged_E.coli.xlsx"
input_file =fig_title, input_file=input_file, rotation=90, lower_legend=93, upper_legend=100) create_ani_heatmap_rotate(title
Warning: Missing or invalid values found. Filling with 0.
Part 2 - SNP distance matrix with python
Make sure you have all the required libraries installed, if you need to install them see Part 1.1: Install libraries
2.1: Create a function which makes a SNP distance heatmap
def create_snp_heatmap(title, input_file, rotation=0):
"""
Reads ANI values from a table and creates a triangular heatmap.
Parameters:
input_file (str): Path to the Excel file containing the ANI table.
output_file (str): Path to save the generated heatmap.
"""
# Load the table, ensuring the first column is used as row labels
= pd.read_excel(input_file, index_col=0)
ani_table
# Ensure all values are numeric (convert strings to floats if necessary)
= ani_table.apply(pd.to_numeric, errors='coerce')
ani_table
# Check for and handle any missing (NaN) values
if ani_table.isnull().values.any():
print("Warning: Missing or invalid values found. Filling with 0.")
0, inplace=True)
ani_table.fillna(
# Rotate the ANI table if necessary
if rotation == 90:
= ani_table.transpose()
ani_table elif rotation == 180:
= ani_table.iloc[::-1, ::-1]
ani_table elif rotation == 270:
= ani_table.transpose().iloc[::-1, ::-1]
ani_table elif rotation not in [0, 90, 180, 270]:
raise ValueError("Rotation must be 0, 90, 180, or 270 degrees.")
# Generate a mask for the upper triangle (rotate mask based on rotation)
= np.triu(np.ones_like(ani_table, dtype=bool))
mask if rotation == 90 or rotation == 270:
= np.transpose(mask)
mask elif rotation == 180:
= np.flip(mask)
mask
# Plot the heatmap
=(12, 10)) # Adjust figure size
plt.figure(figsize
sns.heatmap(
ani_table, =True,
annot=".1f",
fmt=mask,
mask="coolwarm", # Colours for legend: options: coolwarm, RdBu
cmap={"size": 8}, # Adjust annotation text size
annot_kws=ani_table.columns,
xticklabels=ani_table.index,
yticklabels={"label": "SNP count"}
cbar_kws
)=10, rotation=45, ha="right") # Adjust x-axis ticks
plt.xticks(fontsize=10) # Adjust y-axis ticks
plt.yticks(fontsizef"{title}", fontsize=14) # Add title
plt.title(# Adjust padding
plt.tight_layout() plt.show()
2.2: Create the SNP distance heatmaps
First we will produce an SNP distance matrix for Klebsiella pneumoniae species types @KpSc-SNP
# Call the function to create the heatmap
= "SNP distances KpSc"
fig_title = "../data/snp_matrix_all_R1386_half_triangle_FC_Klebsiella_spp.xlsx"
input_file =fig_title, input_file=input_file, rotation=90) create_snp_heatmap(title
Warning: Missing or invalid values found. Filling with 0.
Next we will produce an SNP distance matrix for E. coli species types @Ec-SNP
# Call the function to create the heatmap
= "SNP distances E. coli"
fig_title = "../data/snp_matrix_all_R1386_half_triangle_FC_E.coli.xlsx"
input_file =fig_title, input_file=input_file, rotation=90) create_snp_heatmap(title
Warning: Missing or invalid values found. Filling with 0.