Source code for reports.htmltable

# coding=utf-8
# -*- python -*-
#
#  This file is part of GDSCTools software
#
#  Copyright (c) 2015 - Wellcome Trust Sanger Institute
#  All rights reserved
#
#  File author(s): Thomas Cokelaer <cokelaer@gmail.com>
#
#  Distributed under the BSD 3-Clause License.
#  See accompanying file LICENSE.txt distributed with this software
#
#  website: http://github.com/CancerRxGene/gdsctools
#
##############################################################################
"""Base classes to create HTML reports easily"""
import os
import shutil

import easydev
import pandas as pd

from colormap import rgb2hex, cmap_builder
# note that the sorttable javascript is from
# `http://www.kryogenix.org/code/browser/sorttable/
# with an X11 license


__all__ = ['HTMLTable']


[docs]class HTMLTable(object):
    """Handler to export dataframe into HTML table.

    Dataframe in Pandas already have a to_html method to export the dataframe
    into a HTML formatted table. However, we provide here a few handy features:

        * Takes each cell in a given column and creates an HTML
          reference in each cell. See :meth:`add_href` method.
        * add an HTML background into cells (numeric content) of
          a given column using different methods (e.g., normalise).
          See :meth:`add_bgcolor`

    ::

        import pandas as pd
        df = pd.DataFrame({'A':[1,2,10], 'B':[1,10,2]})
        from gdsctools import HTMLTable
        html = HTMLTable(df)

    .. note:: similar project exists such as prettytable but could not do
        exactly what we wanted at the time gdsctools was developed.

    .. note:: Could be moved to biokit or easydev package.

    """
    def __init__(self, df, name=None, **kargs):
        """.. rubric:: Constructor


        :param dataframe df: a pandas dataframe to transform into a table
        :param str name: not used yet

        There is an :attr:`pd_options` attribute to reduce the max column
        width or the precision of the numerical values.

        """
        self.df = df.copy() # because we will change its contents possibly
        self.name = name
        self.pd_options = {
                'max_colwidth': -1,
                'precision': 2}

[docs]    def to_html(self, index=False, escape=False, header=True,
            collapse_table=True, class_outer="table_outer", **kargs):
        """Return HTML version of the table

        This is a wrapper of the to_html method of the pandas dataframe.

        :param bool index: do not include the index
        :param bool escape: do not escape special characters
        :param bool header: include header
        :param bool collapse_table: long tables are shorten with a scroll bar
        :param kargs: any parameter accepted by
            :meth:`pandas.DataFrame.to_html`

        """
        _buffer = {}
        for k, v in self.pd_options.items():
            # save the current option
            _buffer[k] = pd.get_option(k)
            # set with user value
            pd.set_option(k, v)

        # class sortable is to use the sorttable javascript
        # note that the class has one t and the javascript library has 2
        # as in the original version of sorttable.js
        table = self.df.to_html(escape=escape, header=header, index=index,
                classes='sortable', **kargs)

        # get back to default options
        for k, v in _buffer.items():
            pd.set_option(k, v)

        # We wrap the table in a dedicated class/div nammed table_scroller
        # that users must define.
        return '<div class="%s">' % class_outer + table+"</div>"

[docs]    def add_bgcolor(self, colname, cmap='copper', mode='absmax',
            threshold=2):
        """Change column content into HTML paragraph with background color

        :param colname:
        :param cmap: a colormap (matplotlib) or created using
            colormap package (from pypi).
        :param mode: type of normalisation in 'absmax', 'max', 'clip'
            (see details below)
        :param threshold: used if mode is set to 'clip'

        Colormap have values between 0 and 1 so we need to normalised the data
        between 0 and 1. There are 3 mode to normalise the data so far.

        If mode is set to 'absmax', negatives and positives values are
        expected to be found in a range from -inf to inf. Values are
        scaled in between [0,1] X' = (X / M +1) /2. where m is the absolute
        maximum. Ideally a colormap should be made of 3 colors, the first
        color used for negative values, the second for zeros and third color
        for positive values.

        If mode is set to 'clip', values are clipped to a max value (parameter
        *threshold* and values are normalised by that same threshold.

        If mode is set to 'max', values are normalised by the max.

        """
        try:
            # if a cmap is provided, it may be just a known cmap name
            cmap = cmap_builder(cmap)
        except:
            pass

        data = self.df[colname].values

        if len(data) == 0:
            return

        if mode == 'clip':
            data = [min(x, threshold)/float(threshold) for x in data]
        elif mode == 'absmax':
            m = abs(data.min())
            M = abs(data.max())
            M = max([m, M])
            if M != 0:
                data = (data / M + 1)/2.
        elif mode == 'max':
            if data.max() != 0:
                data = data / float(data.max())

        # the expected RGB values for a given data point
        rgbcolors = [cmap(x)[0:3] for x in data]
        hexcolors = [rgb2hex(*x, normalised=True) for x in rgbcolors]

        # need to read original data again
        data = self.df[colname].values
        # need to set precision since this is going to be a text not a number
        # so pandas will not use the precision for those cases:

        def prec(x):
            try:
                # this may fail if for instance x is nan or inf
                x = easydev.precision(x, self.pd_options['precision'])
                return x
            except:
                return x

        data = [prec(x) for x in data]
        html_formatter = '<p style="background-color:{0}">{1}</p>'
        self.df[colname] = [html_formatter.format(x, y)
                for x, y in zip(hexcolors, data)]

[docs]    def add_href(self, colname, url=None, newtab=False, suffix=None):
        """

        default behaviour: takes column content and put into::

            <a href={content}.html>content</a>

        This is used to link to local files. If url is provided, you typically
        want to link to an external url where the content is an identifier::

            <a href={url}{content}>content</a>

        Note that in the first case, *.html* is appended but not in the second
        case, which means cell's content should already have the .html
        Also in the second case, a new tab is open whereas in the first case
        the url is open in the current tab.

        .. note:: this api may change in the future.

        """
        if url is not None:
            if suffix is None:
                suffix = ''
            if newtab is False:
                formatter = '<a  alt="{1}" href="{0}{1}{2}">{1}</a>'
            else:
                formatter = '<a target="_blank" alt={1} href="{0}{1}{2}">{1}</a>'
            self.df[colname] = self.df[colname].apply(lambda x:
                    formatter.format(url, x, suffix))
        else:
            if suffix is None:
                suffix = '.html'

            if newtab is False:
                formatter = '<a alt="{1}" href="{0}{2}">{1}</a>'
            else:
                formatter = '<a target="_blank" alt="{1}" href="{0}{2}">{1}</a>'
            self.df[colname] = self.df[colname].apply(lambda x:
                formatter.format(x,x, suffix))

[docs]    def sort(self, name, ascending=True):
        # for different pandas implementations
        try:
            self.df.sort_values(by=name, inplace=True, ascending=ascending)
        except:
            self.df.sort(columns=name, inplace=True, ascending=ascending)