Source code for reports.htmltable

# coding=utf-8
# -*- python -*-
#
#  This file is part of GDSCTools software
#
#  Copyright (c) 2015 - Wellcome Trust Sanger Institute
#  All rights reserved
#
#  File author(s): Thomas Cokelaer <cokelaer@gmail.com>
#
#  Distributed under the BSD 3-Clause License.
#  See accompanying file LICENSE.txt distributed with this software
#
#  website: http://github.com/CancerRxGene/gdsctools
#
##############################################################################
"""Base classes to create HTML reports easily"""
import os
import shutil

import easydev
import pandas as pd

from colormap import rgb2hex, cmap_builder
# note that the sorttable javascript is from
# `http://www.kryogenix.org/code/browser/sorttable/
# with an X11 license


__all__ = ['HTMLTable']


[docs]class HTMLTable(object): """Handler to export dataframe into HTML table. Dataframe in Pandas already have a to_html method to export the dataframe into a HTML formatted table. However, we provide here a few handy features: * Takes each cell in a given column and creates an HTML reference in each cell. See :meth:`add_href` method. * add an HTML background into cells (numeric content) of a given column using different methods (e.g., normalise). See :meth:`add_bgcolor` :: import pandas as pd df = pd.DataFrame({'A':[1,2,10], 'B':[1,10,2]}) from gdsctools import HTMLTable html = HTMLTable(df) .. note:: similar project exists such as prettytable but could not do exactly what we wanted at the time gdsctools was developed. .. note:: Could be moved to biokit or easydev package. """ def __init__(self, df, name=None, **kargs): """.. rubric:: Constructor :param dataframe df: a pandas dataframe to transform into a table :param str name: not used yet There is an :attr:`pd_options` attribute to reduce the max column width or the precision of the numerical values. """ self.df = df.copy() # because we will change its contents possibly self.name = name self.pd_options = { 'max_colwidth': -1, 'precision': 2}
[docs] def to_html(self, index=False, escape=False, header=True, collapse_table=True, class_outer="table_outer", **kargs): """Return HTML version of the table This is a wrapper of the to_html method of the pandas dataframe. :param bool index: do not include the index :param bool escape: do not escape special characters :param bool header: include header :param bool collapse_table: long tables are shorten with a scroll bar :param kargs: any parameter accepted by :meth:`pandas.DataFrame.to_html` """ _buffer = {} for k, v in self.pd_options.items(): # save the current option _buffer[k] = pd.get_option(k) # set with user value pd.set_option(k, v) # class sortable is to use the sorttable javascript # note that the class has one t and the javascript library has 2 # as in the original version of sorttable.js table = self.df.to_html(escape=escape, header=header, index=index, classes='sortable', **kargs) # get back to default options for k, v in _buffer.items(): pd.set_option(k, v) # We wrap the table in a dedicated class/div nammed table_scroller # that users must define. return '<div class="%s">' % class_outer + table+"</div>"
[docs] def add_bgcolor(self, colname, cmap='copper', mode='absmax', threshold=2): """Change column content into HTML paragraph with background color :param colname: :param cmap: a colormap (matplotlib) or created using colormap package (from pypi). :param mode: type of normalisation in 'absmax', 'max', 'clip' (see details below) :param threshold: used if mode is set to 'clip' Colormap have values between 0 and 1 so we need to normalised the data between 0 and 1. There are 3 mode to normalise the data so far. If mode is set to 'absmax', negatives and positives values are expected to be found in a range from -inf to inf. Values are scaled in between [0,1] X' = (X / M +1) /2. where m is the absolute maximum. Ideally a colormap should be made of 3 colors, the first color used for negative values, the second for zeros and third color for positive values. If mode is set to 'clip', values are clipped to a max value (parameter *threshold* and values are normalised by that same threshold. If mode is set to 'max', values are normalised by the max. """ try: # if a cmap is provided, it may be just a known cmap name cmap = cmap_builder(cmap) except: pass data = self.df[colname].values if len(data) == 0: return if mode == 'clip': data = [min(x, threshold)/float(threshold) for x in data] elif mode == 'absmax': m = abs(data.min()) M = abs(data.max()) M = max([m, M]) if M != 0: data = (data / M + 1)/2. elif mode == 'max': if data.max() != 0: data = data / float(data.max()) # the expected RGB values for a given data point rgbcolors = [cmap(x)[0:3] for x in data] hexcolors = [rgb2hex(*x, normalised=True) for x in rgbcolors] # need to read original data again data = self.df[colname].values # need to set precision since this is going to be a text not a number # so pandas will not use the precision for those cases: def prec(x): try: # this may fail if for instance x is nan or inf x = easydev.precision(x, self.pd_options['precision']) return x except: return x data = [prec(x) for x in data] html_formatter = '<p style="background-color:{0}">{1}</p>' self.df[colname] = [html_formatter.format(x, y) for x, y in zip(hexcolors, data)]
[docs] def add_href(self, colname, url=None, newtab=False, suffix=None): """ default behaviour: takes column content and put into:: <a href={content}.html>content</a> This is used to link to local files. If url is provided, you typically want to link to an external url where the content is an identifier:: <a href={url}{content}>content</a> Note that in the first case, *.html* is appended but not in the second case, which means cell's content should already have the .html Also in the second case, a new tab is open whereas in the first case the url is open in the current tab. .. note:: this api may change in the future. """ if url is not None: if suffix is None: suffix = '' if newtab is False: formatter = '<a alt="{1}" href="{0}{1}{2}">{1}</a>' else: formatter = '<a target="_blank" alt={1} href="{0}{1}{2}">{1}</a>' self.df[colname] = self.df[colname].apply(lambda x: formatter.format(url, x, suffix)) else: if suffix is None: suffix = '.html' if newtab is False: formatter = '<a alt="{1}" href="{0}{2}">{1}</a>' else: formatter = '<a target="_blank" alt="{1}" href="{0}{2}">{1}</a>' self.df[colname] = self.df[colname].apply(lambda x: formatter.format(x,x, suffix))
[docs] def sort(self, name, ascending=True): # for different pandas implementations try: self.df.sort_values(by=name, inplace=True, ascending=ascending) except: self.df.sort(columns=name, inplace=True, ascending=ascending)