Source code for vireoSNP.plot.base_plot

# base functions for plotting

import numpy as np

vireo_colors = np.array(['#4796d7', '#f79e54', '#79a702', '#df5858', '#556cab', 
                         '#de7a1f', '#ffda5c', '#4b595c', '#6ab186', '#bddbcf', 
                         '#daad58', '#488a99', '#f79b78', '#ffba00'])

[docs]def heat_matrix(X, yticks=None, xticks=None, rotation=45, cmap='BuGn', 
                alpha=0.6, display_value=True, row_sort=False, 
                aspect='auto', interpolation='none', **kwargs):
    """
    Plot heatmap of distance matrix

    Parameters
    ----------
    X: numpy.array or matrix
        The matrix to plot in heatmap
    yticks: list
        The ticks ids for y axis
    xticks: list
        The ticks ids for x axis
    ratation: scalar
        The ratation angel for xticks
    cmap: str
        The colormap for the heatmap, more options: 
        https://matplotlib.org/stable/tutorials/colors/colormaps.html
    alpha: scalar
        The transparency, value between 0 and 1
    display_value: bool
        If True, dispaly the values in the heatmap
    raw_sort: bool
        If True, sort the rows with row index as
        row_idx = np.argsort(np.dot(X, 2**np.arange(X.shape[1])))
    aspect: str
        `aspect` in `plt.imshow`
    interpolation: str
        `interpolation` in `plt.imshow`
    **kwargs: keywords & values
        `**kwargs` for `plt.imshow`
    
    Returns
    -------
    The return from `plt.imshow`

    Examples
    --------

    .. plot::

        >>> from vireoSNP.plot import heat_matrix
        >>> import numpy as np
        >>> np.random.seed(1)
        >>> X = np.random.rand(5, 7)
        >>> heat_matrix(X)
    """
    import matplotlib.pyplot as plt
    
    if row_sort:
        row_idx = np.argsort(np.dot(X, 2**np.arange(X.shape[1])))
        X = X[row_idx, :]

    im = plt.imshow(X, cmap=cmap, alpha=alpha, aspect=aspect, 
                    interpolation=interpolation, **kwargs)
    if xticks is not None:
        plt.xticks(range(len(xticks)), xticks, rotation=rotation)
        plt.xlim(-0.5, len(xticks) - 0.5)
    if yticks is not None:
        plt.yticks(range(len(yticks)), yticks)
        plt.ylim(-0.5, len(yticks) - 0.5)
    
    # Loop over data dimensions and create text annotations.
    if display_value:
        for i in range(X.shape[0]):
            for j in range(X.shape[1]):
                plt.text(j, i, "%.2f" %X[i, j],
                         ha="center", va="center", color="k")
    
    return im


def plot_GT(out_dir, cell_GPb, donor_names, 
            donor_GPb=None, donor_names_in=None):
    """
    Plot the genotype distance between samples
    """
    import matplotlib.pyplot as plt

    ## compare the GT probability of estimated samples
    diff_mat = np.zeros((cell_GPb.shape[1], cell_GPb.shape[1]))
    for i in range(cell_GPb.shape[1]):
        for j in range(cell_GPb.shape[1]):
            diff_mat[i,j] = np.mean(np.abs(cell_GPb[:, i, :] - 
                                           cell_GPb[:, j, :]))

    fig = plt.figure()
    heat_matrix(diff_mat, donor_names, donor_names)
    plt.title("Geno Prob Delta: %d SNPs" %(cell_GPb.shape[0]))
    plt.tight_layout()
    fig.savefig(out_dir + "/fig_GT_distance_estimated.pdf", dpi=300)

    ## compare in the estimated sample with input samples
    if donor_GPb is not None:
        diff_mat = np.zeros((cell_GPb.shape[1], donor_GPb.shape[1]))
        for i in range(cell_GPb.shape[1]):
            for j in range(donor_GPb.shape[1]):
                diff_mat[i,j] = np.mean(np.abs( cell_GPb[:, i, :] - 
                                               donor_GPb[:, j, :]))

        fig = plt.figure()
        heat_matrix(diff_mat, donor_names, donor_names_in)
        plt.title("Geno Prob Delta: %d SNPs" %(cell_GPb.shape[0]))
        plt.tight_layout()
        fig.savefig(out_dir + "/fig_GT_distance_input.pdf", dpi=300)


def minicode_plot(barcode_set, var_ids=None, sample_ids=None, 
                  cmap="Set3", interpolation='none', **kwargs):
    import matplotlib.pyplot as plt
    
    mat = np.zeros((len(barcode_set[0][1:]), len(barcode_set)))
    for i in range(mat.shape[0]):
        for j in range(mat.shape[1]):
            mat[i, j] = float(barcode_set[j][i + 1])
            
    im = plt.imshow(mat, cmap=cmap, interpolation=interpolation, **kwargs)
    for i in range(mat.shape[0]):
        for j in range(mat.shape[1]):
            plt.text(j, i, int(mat[i, j]), 
                     ha="center", va="center", color="k")
            
    if var_ids is None:
        var_ids = range(mat.shape[0])
    plt.yticks(range(len(var_ids)), var_ids)
    plt.ylim(-0.5, len(var_ids) - 0.5)
    
    if sample_ids is None:
        sample_ids = ["%s\nS%d" %(barcode_set[x], x)
                      for x in range(mat.shape[1])]
    else:
        sample_ids = ["%s\n%s" %(barcode_set[x], sample_ids[x])
                      for x in range(mat.shape[1])]
    plt.xticks(range(len(sample_ids)), sample_ids)
    plt.xlim(-0.5, len(sample_ids) - 0.5)

    return im


[docs]def anno_heat(X, row_anno=None, col_anno=None,
              row_order_ids=None, col_order_ids=None, 
              xticklabels=False, yticklabels=False,
              row_cluster=False, col_cluster=False,
              **kwargs):
    """
    Heatmap with column or row annotations. Based on seaborn.clustermap()
    Row or column will be ordered by the annotation group.
    
    Note, haven't tested if input both row_anno and col_anno.
    """
    
    import seaborn as sns
    
    # prepare row annotation
    if row_anno is not None:
        if row_order_ids is None:
            row_order_ids = list(np.unique(row_anno))
        else:
            row_order_ids = [x for x in row_order_ids]
        row_num = np.array([row_order_ids.index(x) for x in row_anno])

        dot_row = np.array(np.nansum(X, axis=1)).reshape(-1)
        idx_row = np.argsort(row_num * 2**X.shape[1])# + dot_row / dot_row.max())

        row_colors = vireo_colors[row_num][idx_row]
    else:
        row_colors = None
        row_order_ids = []
        idx_row = range(X.shape[0])
        
    # prepare col annotation
    if col_anno is not None:
        if col_order_ids is None:
            col_order_ids = list(np.unique(col_order_ids))
        else:
            col_order_ids = [x for x in col_order_ids]
        col_num = np.array([col_order_ids.index(x) for x in col_anno])

        dot_col = np.array(np.nansum(X, axis=0)).reshape(-1)
        idx_col = np.argsort(col_num * 2**X.shape[0])# + dot_row / dot_row.max())
        
        col_colors = vireo_colors[col_num][idx_col]
    else:
        col_colors = None
        col_order_ids = []
        idx_col = range(X.shape[1])
        
    ## plot with seaborn clustermap
    g = sns.clustermap(X[idx_row, :][:, idx_col], 
                       row_colors=row_colors, col_colors=col_colors,
                       col_cluster=col_cluster, row_cluster=row_cluster,
                       xticklabels=xticklabels, yticklabels=yticklabels,
                       **kwargs)
    
    if row_anno is not None:
        for i in range(len(row_order_ids)):
            g.ax_row_dendrogram.bar(0, 0, color=vireo_colors[i],
                                    label=row_order_ids[i], linewidth=0)
        g.ax_row_dendrogram.legend(loc="center", ncol=1, title="")
        
    if col_anno is not None:
        for i in range(len(col_order_ids)):
            g.ax_col_dendrogram.bar(0, 0, color=vireo_colors[i],
                                    label=col_order_ids[i], linewidth=0)
        g.ax_col_dendrogram.legend(loc="center", ncol=6, title="")
    
    g.cax.set_position([1.01, .2, .03, .45])
    
    return g

# def ppca_plot(AD, DP):
#     """
#     PPCA plot for each cell genotypes. This function is still underdevelopment
#     """
#     Z = DP.copy().astype(float)
#     idx = DP > 0
#     Z[idx] = AD[idx] / Z[idx]
#     Z[idx] = Z[idx] - 0.5

#     from sklearn.decomposition import TruncatedSVD
#     svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)
#     svd.fit(Z)

#     print("variance explained:", svd.explained_variance_ratio_)

#     import matplotlib.pyplot as plt
#     plt.scatter(svd.components_[0, :], svd.components_[1, :])
#     return svd.components_