#!/usr/bin/env python """ histogram_gnuplot.py <ylabel> <yrange_min> <yrange_max> <grath_file> a generic histogram builder based on gnuplot backend data_file - tab delimited file with data xtic_column - column containing labels for x ticks [integer, 0 means no ticks] column_list - comma separated list of columns to plot title - title for the entire histrogram ylabel - y axis label yrange_max - minimal value at the y axis (integer) yrange_max - maximal value at the y_axis (integer) to set yrange to autoscaling assign 0 to yrange_min and yrange_max graph_file - file to write histogram image to img_size - as X,Y pair in pixels (e.g., 800,600 or 600,800 etc.) This tool required gnuplot and gnuplot.py anton nekrutenko | anton@bx.psu.edu """ import sys import tempfile import Gnuplot import Gnuplot.funcutils assert sys.version_info[:2] >= (2, 4) def stop_err(msg): sys.stderr.write(msg) sys.exit() def main(tmpFileName): skipped_lines_count = 0 skipped_lines_index = [] gf = open(tmpFileName, "w") try: in_file = open(sys.argv[1]) xtic = int(sys.argv[2]) col_list = sys.argv[3].split(",") title = 'set title "' + sys.argv[4] + '"' ylabel = 'set ylabel "' + sys.argv[5] + '"' ymin = sys.argv[6] ymax = sys.argv[7] img_file = sys.argv[8] img_size = sys.argv[9] except Exception: stop_err("Check arguments\n") try: int(col_list[0]) except Exception: stop_err("You forgot to set columns for plotting\n") for i, line in enumerate(in_file): valid = True line = line.rstrip("\r\n") if line and not line.startswith("#"): row = [] try: fields = line.split("\t") for col in col_list: row.append(str(float(fields[int(col) - 1]))) except Exception: valid = False skipped_lines_count += 1 skipped_lines_index.append(i) else: valid = False skipped_lines_count += 1 skipped_lines_index.append(i) if valid and xtic > 0: row.append(fields[xtic - 1]) elif valid and xtic == 0: row.append(str(i)) if valid: gf.write("\t".join(row)) gf.write("\n") if skipped_lines_count < i: # Prepare 'using' clause of plot statement g_plot_command = " " # Set the first column if xtic > 0: g_plot_command = "'%s' using 1:xticlabels(%s) ti 'Column %s', " % (tmpFileName, str(len(row)), col_list[0]) else: g_plot_command = "'%s' using 1 ti 'Column %s', " % (tmpFileName, col_list[0]) # Set subsequent columns for i in range(1, len(col_list)): g_plot_command += "'%s' using %s t 'Column %s', " % (tmpFileName, str(i + 1), col_list[i]) g_plot_command = g_plot_command.rstrip(", ") yrange = "set yrange [" + ymin + ":" + ymax + "]" try: g = Gnuplot.Gnuplot() g("reset") g("set boxwidth 0.9 absolute") g("set style fill solid 1.00 border -1") g("set style histogram clustered gap 5 title offset character 0, 0, 0") g("set xtics border in scale 1,0.5 nomirror rotate by 90 offset character 0, 0, 0") g("set key invert reverse Left outside") if xtic == 0: g("unset xtics") g(title) g(ylabel) g_term = "set terminal png tiny size " + img_size g(g_term) g_out = 'set output "' + img_file + '"' if ymin != ymax: g(yrange) g(g_out) g("set style data histograms") g.plot(g_plot_command) except Exception: stop_err("Gnuplot error: Data cannot be plotted") else: sys.stderr.write("Column(s) %s of your dataset do not contain valid numeric data" % sys.argv[3]) if skipped_lines_count > 0: sys.stdout.write( "\nWARNING. You dataset contain(s) %d invalid lines starting with line #%d. These lines were skipped while building the graph.\n" % (skipped_lines_count, skipped_lines_index[0] + 1) ) if __name__ == "__main__": # The tempfile initialization is here because while inside the main() it seems to create a condition # when the file is removed before gnuplot has a chance of accessing it gp_data_file = tempfile.NamedTemporaryFile("w") Gnuplot.gp.GnuplotOpts.default_term = "png" main(gp_data_file.name)