Source code for vireoSNP.plot.base_plot

# base functions for plotting

import numpy as np

vireo_colors = np.array(['#4796d7', '#f79e54', '#79a702', '#df5858', '#556cab', 
                         '#de7a1f', '#ffda5c', '#4b595c', '#6ab186', '#bddbcf', 
                         '#daad58', '#488a99', '#f79b78', '#ffba00'])

[docs]def heat_matrix(X, yticks=None, xticks=None, rotation=45, cmap='BuGn', alpha=0.6, display_value=True, row_sort=False, aspect='auto', interpolation='none', **kwargs): """ Plot heatmap of distance matrix Parameters ---------- X: numpy.array or matrix The matrix to plot in heatmap yticks: list The ticks ids for y axis xticks: list The ticks ids for x axis ratation: scalar The ratation angel for xticks cmap: str The colormap for the heatmap, more options: https://matplotlib.org/stable/tutorials/colors/colormaps.html alpha: scalar The transparency, value between 0 and 1 display_value: bool If True, dispaly the values in the heatmap raw_sort: bool If True, sort the rows with row index as row_idx = np.argsort(np.dot(X, 2**np.arange(X.shape[1]))) aspect: str `aspect` in `plt.imshow` interpolation: str `interpolation` in `plt.imshow` **kwargs: keywords & values `**kwargs` for `plt.imshow` Returns ------- The return from `plt.imshow` Examples -------- .. plot:: >>> from vireoSNP.plot import heat_matrix >>> import numpy as np >>> np.random.seed(1) >>> X = np.random.rand(5, 7) >>> heat_matrix(X) """ import matplotlib.pyplot as plt if row_sort: row_idx = np.argsort(np.dot(X, 2**np.arange(X.shape[1]))) X = X[row_idx, :] im = plt.imshow(X, cmap=cmap, alpha=alpha, aspect=aspect, interpolation=interpolation, **kwargs) if xticks is not None: plt.xticks(range(len(xticks)), xticks, rotation=rotation) plt.xlim(-0.5, len(xticks) - 0.5) if yticks is not None: plt.yticks(range(len(yticks)), yticks) plt.ylim(-0.5, len(yticks) - 0.5) # Loop over data dimensions and create text annotations. if display_value: for i in range(X.shape[0]): for j in range(X.shape[1]): plt.text(j, i, "%.2f" %X[i, j], ha="center", va="center", color="k") return im
def plot_GT(out_dir, cell_GPb, donor_names, donor_GPb=None, donor_names_in=None): """ Plot the genotype distance between samples """ import matplotlib.pyplot as plt ## compare the GT probability of estimated samples diff_mat = np.zeros((cell_GPb.shape[1], cell_GPb.shape[1])) for i in range(cell_GPb.shape[1]): for j in range(cell_GPb.shape[1]): diff_mat[i,j] = np.mean(np.abs(cell_GPb[:, i, :] - cell_GPb[:, j, :])) fig = plt.figure() heat_matrix(diff_mat, donor_names, donor_names) plt.title("Geno Prob Delta: %d SNPs" %(cell_GPb.shape[0])) plt.tight_layout() fig.savefig(out_dir + "/fig_GT_distance_estimated.pdf", dpi=300) ## compare in the estimated sample with input samples if donor_GPb is not None: diff_mat = np.zeros((cell_GPb.shape[1], donor_GPb.shape[1])) for i in range(cell_GPb.shape[1]): for j in range(donor_GPb.shape[1]): diff_mat[i,j] = np.mean(np.abs( cell_GPb[:, i, :] - donor_GPb[:, j, :])) fig = plt.figure() heat_matrix(diff_mat, donor_names, donor_names_in) plt.title("Geno Prob Delta: %d SNPs" %(cell_GPb.shape[0])) plt.tight_layout() fig.savefig(out_dir + "/fig_GT_distance_input.pdf", dpi=300) def minicode_plot(barcode_set, var_ids=None, sample_ids=None, cmap="Set3", interpolation='none', **kwargs): import matplotlib.pyplot as plt mat = np.zeros((len(barcode_set[0][1:]), len(barcode_set))) for i in range(mat.shape[0]): for j in range(mat.shape[1]): mat[i, j] = float(barcode_set[j][i + 1]) im = plt.imshow(mat, cmap=cmap, interpolation=interpolation, **kwargs) for i in range(mat.shape[0]): for j in range(mat.shape[1]): plt.text(j, i, int(mat[i, j]), ha="center", va="center", color="k") if var_ids is None: var_ids = range(mat.shape[0]) plt.yticks(range(len(var_ids)), var_ids) plt.ylim(-0.5, len(var_ids) - 0.5) if sample_ids is None: sample_ids = ["%s\nS%d" %(barcode_set[x], x) for x in range(mat.shape[1])] else: sample_ids = ["%s\n%s" %(barcode_set[x], sample_ids[x]) for x in range(mat.shape[1])] plt.xticks(range(len(sample_ids)), sample_ids) plt.xlim(-0.5, len(sample_ids) - 0.5) return im
[docs]def anno_heat(X, row_anno=None, col_anno=None, row_order_ids=None, col_order_ids=None, xticklabels=False, yticklabels=False, row_cluster=False, col_cluster=False, **kwargs): """ Heatmap with column or row annotations. Based on seaborn.clustermap() Row or column will be ordered by the annotation group. Note, haven't tested if input both row_anno and col_anno. """ import seaborn as sns # prepare row annotation if row_anno is not None: if row_order_ids is None: row_order_ids = list(np.unique(row_anno)) else: row_order_ids = [x for x in row_order_ids] row_num = np.array([row_order_ids.index(x) for x in row_anno]) dot_row = np.array(np.nansum(X, axis=1)).reshape(-1) idx_row = np.argsort(row_num * 2**X.shape[1])# + dot_row / dot_row.max()) row_colors = vireo_colors[row_num][idx_row] else: row_colors = None row_order_ids = [] idx_row = range(X.shape[0]) # prepare col annotation if col_anno is not None: if col_order_ids is None: col_order_ids = list(np.unique(col_order_ids)) else: col_order_ids = [x for x in col_order_ids] col_num = np.array([col_order_ids.index(x) for x in col_anno]) dot_col = np.array(np.nansum(X, axis=0)).reshape(-1) idx_col = np.argsort(col_num * 2**X.shape[0])# + dot_row / dot_row.max()) col_colors = vireo_colors[col_num][idx_col] else: col_colors = None col_order_ids = [] idx_col = range(X.shape[1]) ## plot with seaborn clustermap g = sns.clustermap(X[idx_row, :][:, idx_col], row_colors=row_colors, col_colors=col_colors, col_cluster=col_cluster, row_cluster=row_cluster, xticklabels=xticklabels, yticklabels=yticklabels, **kwargs) if row_anno is not None: for i in range(len(row_order_ids)): g.ax_row_dendrogram.bar(0, 0, color=vireo_colors[i], label=row_order_ids[i], linewidth=0) g.ax_row_dendrogram.legend(loc="center", ncol=1, title="") if col_anno is not None: for i in range(len(col_order_ids)): g.ax_col_dendrogram.bar(0, 0, color=vireo_colors[i], label=col_order_ids[i], linewidth=0) g.ax_col_dendrogram.legend(loc="center", ncol=6, title="") g.cax.set_position([1.01, .2, .03, .45]) return g
# def ppca_plot(AD, DP): # """ # PPCA plot for each cell genotypes. This function is still underdevelopment # """ # Z = DP.copy().astype(float) # idx = DP > 0 # Z[idx] = AD[idx] / Z[idx] # Z[idx] = Z[idx] - 0.5 # from sklearn.decomposition import TruncatedSVD # svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42) # svd.fit(Z) # print("variance explained:", svd.explained_variance_ratio_) # import matplotlib.pyplot as plt # plt.scatter(svd.components_[0, :], svd.components_[1, :]) # return svd.components_