1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237 |
- # -*- coding: utf-8 -*-
- """Pretty-print tabular data."""
- from __future__ import print_function
- from __future__ import unicode_literals
- from collections import namedtuple, Iterable
- from platform import python_version_tuple
- import re
- if python_version_tuple()[0] < "3":
- from itertools import izip_longest
- from functools import partial
- _none_type = type(None)
- _int_type = int
- _long_type = long
- _float_type = float
- _text_type = unicode
- _binary_type = str
- def _is_file(f):
- return isinstance(f, file)
- else:
- from itertools import zip_longest as izip_longest
- from functools import reduce, partial
- _none_type = type(None)
- _int_type = int
- _long_type = int
- _float_type = float
- _text_type = str
- _binary_type = bytes
- import io
- def _is_file(f):
- return isinstance(f, io.IOBase)
- try:
- import wcwidth # optional wide-character (CJK) support
- except ImportError:
- wcwidth = None
- __all__ = ["tabulate", "tabulate_formats", "simple_separated_format"]
- __version__ = "0.7.6-dev"
- # minimum extra space in headers
- MIN_PADDING = 2
- # if True, enable wide-character (CJK) support
- WIDE_CHARS_MODE = wcwidth is not None
- Line = namedtuple("Line", ["begin", "hline", "sep", "end"])
- DataRow = namedtuple("DataRow", ["begin", "sep", "end"])
- # A table structure is suppposed to be:
- #
- # --- lineabove ---------
- # headerrow
- # --- linebelowheader ---
- # datarow
- # --- linebewteenrows ---
- # ... (more datarows) ...
- # --- linebewteenrows ---
- # last datarow
- # --- linebelow ---------
- #
- # TableFormat's line* elements can be
- #
- # - either None, if the element is not used,
- # - or a Line tuple,
- # - or a function: [col_widths], [col_alignments] -> string.
- #
- # TableFormat's *row elements can be
- #
- # - either None, if the element is not used,
- # - or a DataRow tuple,
- # - or a function: [cell_values], [col_widths], [col_alignments] -> string.
- #
- # padding (an integer) is the amount of white space around data values.
- #
- # with_header_hide:
- #
- # - either None, to display all table elements unconditionally,
- # - or a list of elements not to be displayed if the table has column headers.
- #
- TableFormat = namedtuple("TableFormat", ["lineabove", "linebelowheader",
- "linebetweenrows", "linebelow",
- "headerrow", "datarow",
- "padding", "with_header_hide"])
- def _pipe_segment_with_colons(align, colwidth):
- """Return a segment of a horizontal line with optional colons which
- indicate column's alignment (as in `pipe` output format)."""
- w = colwidth
- if align in ["right", "decimal"]:
- return ('-' * (w - 1)) + ":"
- elif align == "center":
- return ":" + ('-' * (w - 2)) + ":"
- elif align == "left":
- return ":" + ('-' * (w - 1))
- else:
- return '-' * w
- def _pipe_line_with_colons(colwidths, colaligns):
- """Return a horizontal line with optional colons to indicate column's
- alignment (as in `pipe` output format)."""
- segments = [_pipe_segment_with_colons(a, w) for a, w in zip(colaligns, colwidths)]
- return "|" + "|".join(segments) + "|"
- def _mediawiki_row_with_attrs(separator, cell_values, colwidths, colaligns):
- alignment = { "left": '',
- "right": 'align="right"| ',
- "center": 'align="center"| ',
- "decimal": 'align="right"| ' }
- # hard-coded padding _around_ align attribute and value together
- # rather than padding parameter which affects only the value
- values_with_attrs = [' ' + alignment.get(a, '') + c + ' '
- for c, a in zip(cell_values, colaligns)]
- colsep = separator*2
- return (separator + colsep.join(values_with_attrs)).rstrip()
- def _textile_row_with_attrs(cell_values, colwidths, colaligns):
- cell_values[0] += ' '
- alignment = { "left": "<.", "right": ">.", "center": "=.", "decimal": ">." }
- values = (alignment.get(a, '') + v for a, v in zip(colaligns, cell_values))
- return '|' + '|'.join(values) + '|'
- def _html_begin_table_without_header(colwidths_ignore, colaligns_ignore):
- # this table header will be suppressed if there is a header row
- return "\n".join(["<table>", "<tbody>"])
- def _html_row_with_attrs(celltag, cell_values, colwidths, colaligns):
- alignment = { "left": '',
- "right": ' style="text-align: right;"',
- "center": ' style="text-align: center;"',
- "decimal": ' style="text-align: right;"' }
- values_with_attrs = ["<{0}{1}>{2}</{0}>".format(celltag, alignment.get(a, ''), c)
- for c, a in zip(cell_values, colaligns)]
- rowhtml = "<tr>" + "".join(values_with_attrs).rstrip() + "</tr>"
- if celltag == "th": # it's a header row, create a new table header
- rowhtml = "\n".join(["<table>",
- "<thead>",
- rowhtml,
- "</thead>",
- "<tbody>"])
- return rowhtml
- def _moin_row_with_attrs(celltag, cell_values, colwidths, colaligns, header=''):
- alignment = { "left": '',
- "right": '<style="text-align: right;">',
- "center": '<style="text-align: center;">',
- "decimal": '<style="text-align: right;">' }
- values_with_attrs = ["{0}{1} {2} ".format(celltag,
- alignment.get(a, ''),
- header+c+header)
- for c, a in zip(cell_values, colaligns)]
- return "".join(values_with_attrs)+"||"
- def _latex_line_begin_tabular(colwidths, colaligns, booktabs=False):
- alignment = { "left": "l", "right": "r", "center": "c", "decimal": "r" }
- tabular_columns_fmt = "".join([alignment.get(a, "l") for a in colaligns])
- return "\n".join(["\\begin{tabular}{" + tabular_columns_fmt + "}",
- "\\toprule" if booktabs else "\hline"])
- LATEX_ESCAPE_RULES = {r"&": r"\&", r"%": r"\%", r"$": r"\$", r"#": r"\#",
- r"_": r"\_", r"^": r"\^{}", r"{": r"\{", r"}": r"\}",
- r"~": r"\textasciitilde{}", "\\": r"\textbackslash{}",
- r"<": r"\ensuremath{<}", r">": r"\ensuremath{>}"}
- def _latex_row(cell_values, colwidths, colaligns):
- def escape_char(c):
- return LATEX_ESCAPE_RULES.get(c, c)
- escaped_values = ["".join(map(escape_char, cell)) for cell in cell_values]
- rowfmt = DataRow("", "&", "\\\\")
- return _build_simple_row(escaped_values, rowfmt)
- _table_formats = {"simple":
- TableFormat(lineabove=Line("", "-", " ", ""),
- linebelowheader=Line("", "-", " ", ""),
- linebetweenrows=None,
- linebelow=Line("", "-", " ", ""),
- headerrow=DataRow("", " ", ""),
- datarow=DataRow("", " ", ""),
- padding=0,
- with_header_hide=["lineabove", "linebelow"]),
- "plain":
- TableFormat(lineabove=None, linebelowheader=None,
- linebetweenrows=None, linebelow=None,
- headerrow=DataRow("", " ", ""),
- datarow=DataRow("", " ", ""),
- padding=0, with_header_hide=None),
- "grid":
- TableFormat(lineabove=Line("+", "-", "+", "+"),
- linebelowheader=Line("+", "=", "+", "+"),
- linebetweenrows=Line("+", "-", "+", "+"),
- linebelow=Line("+", "-", "+", "+"),
- headerrow=DataRow("|", "|", "|"),
- datarow=DataRow("|", "|", "|"),
- padding=1, with_header_hide=None),
- "fancy_grid":
- TableFormat(lineabove=Line("╒", "═", "╤", "╕"),
- linebelowheader=Line("╞", "═", "╪", "╡"),
- linebetweenrows=Line("├", "─", "┼", "┤"),
- linebelow=Line("╘", "═", "╧", "╛"),
- headerrow=DataRow("│", "│", "│"),
- datarow=DataRow("│", "│", "│"),
- padding=1, with_header_hide=None),
- "pipe":
- TableFormat(lineabove=_pipe_line_with_colons,
- linebelowheader=_pipe_line_with_colons,
- linebetweenrows=None,
- linebelow=None,
- headerrow=DataRow("|", "|", "|"),
- datarow=DataRow("|", "|", "|"),
- padding=1,
- with_header_hide=["lineabove"]),
- "orgtbl":
- TableFormat(lineabove=None,
- linebelowheader=Line("|", "-", "+", "|"),
- linebetweenrows=None,
- linebelow=None,
- headerrow=DataRow("|", "|", "|"),
- datarow=DataRow("|", "|", "|"),
- padding=1, with_header_hide=None),
- "jira":
- TableFormat(lineabove=None,
- linebelowheader=None,
- linebetweenrows=None,
- linebelow=None,
- headerrow=DataRow("||", "||", "||"),
- datarow=DataRow("|", "|", "|"),
- padding=1, with_header_hide=None),
- "psql":
- TableFormat(lineabove=Line("+", "-", "+", "+"),
- linebelowheader=Line("|", "-", "+", "|"),
- linebetweenrows=None,
- linebelow=Line("+", "-", "+", "+"),
- headerrow=DataRow("|", "|", "|"),
- datarow=DataRow("|", "|", "|"),
- padding=1, with_header_hide=None),
- "rst":
- TableFormat(lineabove=Line("", "=", " ", ""),
- linebelowheader=Line("", "=", " ", ""),
- linebetweenrows=None,
- linebelow=Line("", "=", " ", ""),
- headerrow=DataRow("", " ", ""),
- datarow=DataRow("", " ", ""),
- padding=0, with_header_hide=None),
- "mediawiki":
- TableFormat(lineabove=Line("{| class=\"wikitable\" style=\"text-align: left;\"",
- "", "", "\n|+ <!-- caption -->\n|-"),
- linebelowheader=Line("|-", "", "", ""),
- linebetweenrows=Line("|-", "", "", ""),
- linebelow=Line("|}", "", "", ""),
- headerrow=partial(_mediawiki_row_with_attrs, "!"),
- datarow=partial(_mediawiki_row_with_attrs, "|"),
- padding=0, with_header_hide=None),
- "moinmoin":
- TableFormat(lineabove=None,
- linebelowheader=None,
- linebetweenrows=None,
- linebelow=None,
- headerrow=partial(_moin_row_with_attrs,"||",header="'''"),
- datarow=partial(_moin_row_with_attrs,"||"),
- padding=1, with_header_hide=None),
- "html":
- TableFormat(lineabove=_html_begin_table_without_header,
- linebelowheader="",
- linebetweenrows=None,
- linebelow=Line("</tbody>\n</table>", "", "", ""),
- headerrow=partial(_html_row_with_attrs, "th"),
- datarow=partial(_html_row_with_attrs, "td"),
- padding=0, with_header_hide=["lineabove"]),
- "latex":
- TableFormat(lineabove=_latex_line_begin_tabular,
- linebelowheader=Line("\\hline", "", "", ""),
- linebetweenrows=None,
- linebelow=Line("\\hline\n\\end{tabular}", "", "", ""),
- headerrow=_latex_row,
- datarow=_latex_row,
- padding=1, with_header_hide=None),
- "latex_booktabs":
- TableFormat(lineabove=partial(_latex_line_begin_tabular, booktabs=True),
- linebelowheader=Line("\\midrule", "", "", ""),
- linebetweenrows=None,
- linebelow=Line("\\bottomrule\n\\end{tabular}", "", "", ""),
- headerrow=_latex_row,
- datarow=_latex_row,
- padding=1, with_header_hide=None),
- "tsv":
- TableFormat(lineabove=None, linebelowheader=None,
- linebetweenrows=None, linebelow=None,
- headerrow=DataRow("", "\t", ""),
- datarow=DataRow("", "\t", ""),
- padding=0, with_header_hide=None),
- "textile":
- TableFormat(lineabove=None, linebelowheader=None,
- linebetweenrows=None, linebelow=None,
- headerrow=DataRow("|_. ", "|_.", "|"),
- datarow=_textile_row_with_attrs,
- padding=1, with_header_hide=None)}
- tabulate_formats = list(sorted(_table_formats.keys()))
- _invisible_codes = re.compile(r"\x1b\[\d*m|\x1b\[\d*\;\d*\;\d*m") # ANSI color codes
- _invisible_codes_bytes = re.compile(b"\x1b\[\d*m|\x1b\[\d*\;\d*\;\d*m") # ANSI color codes
- def simple_separated_format(separator):
- """Construct a simple TableFormat with columns separated by a separator.
- >>> tsv = simple_separated_format("\\t") ; \
- tabulate([["foo", 1], ["spam", 23]], tablefmt=tsv) == 'foo \\t 1\\nspam\\t23'
- True
- """
- return TableFormat(None, None, None, None,
- headerrow=DataRow('', separator, ''),
- datarow=DataRow('', separator, ''),
- padding=0, with_header_hide=None)
- def _isconvertible(conv, string):
- try:
- n = conv(string)
- return True
- except (ValueError, TypeError):
- return False
- def _isnumber(string):
- """
- >>> _isnumber("123.45")
- True
- >>> _isnumber("123")
- True
- >>> _isnumber("spam")
- False
- """
- return _isconvertible(float, string)
- def _isint(string, inttype=int):
- """
- >>> _isint("123")
- True
- >>> _isint("123.45")
- False
- """
- return type(string) is inttype or\
- (isinstance(string, _binary_type) or isinstance(string, _text_type))\
- and\
- _isconvertible(inttype, string)
- def _type(string, has_invisible=True):
- """The least generic type (type(None), int, float, str, unicode).
- >>> _type(None) is type(None)
- True
- >>> _type("foo") is type("")
- True
- >>> _type("1") is type(1)
- True
- >>> _type('\x1b[31m42\x1b[0m') is type(42)
- True
- >>> _type('\x1b[31m42\x1b[0m') is type(42)
- True
- """
- if has_invisible and \
- (isinstance(string, _text_type) or isinstance(string, _binary_type)):
- string = _strip_invisible(string)
- if string is None:
- return _none_type
- elif hasattr(string, "isoformat"): # datetime.datetime, date, and time
- return _text_type
- elif _isint(string):
- return int
- elif _isint(string, _long_type):
- return int
- elif _isnumber(string):
- return float
- elif isinstance(string, _binary_type):
- return _binary_type
- else:
- return _text_type
- def _afterpoint(string):
- """Symbols after a decimal point, -1 if the string lacks the decimal point.
- >>> _afterpoint("123.45")
- 2
- >>> _afterpoint("1001")
- -1
- >>> _afterpoint("eggs")
- -1
- >>> _afterpoint("123e45")
- 2
- """
- if _isnumber(string):
- if _isint(string):
- return -1
- else:
- pos = string.rfind(".")
- pos = string.lower().rfind("e") if pos < 0 else pos
- if pos >= 0:
- return len(string) - pos - 1
- else:
- return -1 # no point
- else:
- return -1 # not a number
- def _padleft(width, s):
- """Flush right.
- >>> _padleft(6, '\u044f\u0439\u0446\u0430') == ' \u044f\u0439\u0446\u0430'
- True
- """
- fmt = "{0:>%ds}" % width
- return fmt.format(s)
- def _padright(width, s):
- """Flush left.
- >>> _padright(6, '\u044f\u0439\u0446\u0430') == '\u044f\u0439\u0446\u0430 '
- True
- """
- fmt = "{0:<%ds}" % width
- return fmt.format(s)
- def _padboth(width, s):
- """Center string.
- >>> _padboth(6, '\u044f\u0439\u0446\u0430') == ' \u044f\u0439\u0446\u0430 '
- True
- """
- fmt = "{0:^%ds}" % width
- return fmt.format(s)
- def _strip_invisible(s):
- "Remove invisible ANSI color codes."
- if isinstance(s, _text_type):
- return re.sub(_invisible_codes, "", s)
- else: # a bytestring
- return re.sub(_invisible_codes_bytes, "", s)
- def _visible_width(s):
- """Visible width of a printed string. ANSI color codes are removed.
- >>> _visible_width('\x1b[31mhello\x1b[0m'), _visible_width("world")
- (5, 5)
- """
- # optional wide-character support
- if wcwidth is not None and WIDE_CHARS_MODE:
- len_fn = wcwidth.wcswidth
- else:
- len_fn = len
- if isinstance(s, _text_type) or isinstance(s, _binary_type):
- return len_fn(_strip_invisible(s))
- else:
- return len_fn(_text_type(s))
- def _align_column(strings, alignment, minwidth=0, has_invisible=True):
- """[string] -> [padded_string]
- >>> list(map(str,_align_column(["12.345", "-1234.5", "1.23", "1234.5", "1e+234", "1.0e234"], "decimal")))
- [' 12.345 ', '-1234.5 ', ' 1.23 ', ' 1234.5 ', ' 1e+234 ', ' 1.0e234']
- >>> list(map(str,_align_column(['123.4', '56.7890'], None)))
- ['123.4', '56.7890']
- """
- if alignment == "right":
- strings = [s.strip() for s in strings]
- padfn = _padleft
- elif alignment == "center":
- strings = [s.strip() for s in strings]
- padfn = _padboth
- elif alignment == "decimal":
- if has_invisible:
- decimals = [_afterpoint(_strip_invisible(s)) for s in strings]
- else:
- decimals = [_afterpoint(s) for s in strings]
- maxdecimals = max(decimals)
- strings = [s + (maxdecimals - decs) * " "
- for s, decs in zip(strings, decimals)]
- padfn = _padleft
- elif not alignment:
- return strings
- else:
- strings = [s.strip() for s in strings]
- padfn = _padright
- enable_widechars = wcwidth is not None and WIDE_CHARS_MODE
- if has_invisible:
- width_fn = _visible_width
- elif enable_widechars: # optional wide-character support if available
- width_fn = wcwidth.wcswidth
- else:
- width_fn = len
- s_lens = list(map(len, strings))
- s_widths = list(map(width_fn, strings))
- maxwidth = max(max(s_widths), minwidth)
- if not enable_widechars and not has_invisible:
- padded_strings = [padfn(maxwidth, s) for s in strings]
- else:
- # enable wide-character width corrections
- visible_widths = [maxwidth - (w - l) for w, l in zip(s_widths, s_lens)]
- # wcswidth and _visible_width don't count invisible characters;
- # padfn doesn't need to apply another correction
- padded_strings = [padfn(w, s) for s, w in zip(strings, visible_widths)]
- return padded_strings
- def _more_generic(type1, type2):
- types = { _none_type: 0, int: 1, float: 2, _binary_type: 3, _text_type: 4 }
- invtypes = { 4: _text_type, 3: _binary_type, 2: float, 1: int, 0: _none_type }
- moregeneric = max(types.get(type1, 4), types.get(type2, 4))
- return invtypes[moregeneric]
- def _column_type(strings, has_invisible=True):
- """The least generic type all column values are convertible to.
- >>> _column_type(["1", "2"]) is _int_type
- True
- >>> _column_type(["1", "2.3"]) is _float_type
- True
- >>> _column_type(["1", "2.3", "four"]) is _text_type
- True
- >>> _column_type(["four", '\u043f\u044f\u0442\u044c']) is _text_type
- True
- >>> _column_type([None, "brux"]) is _text_type
- True
- >>> _column_type([1, 2, None]) is _int_type
- True
- >>> import datetime as dt
- >>> _column_type([dt.datetime(1991,2,19), dt.time(17,35)]) is _text_type
- True
- """
- types = [_type(s, has_invisible) for s in strings ]
- return reduce(_more_generic, types, int)
- def _format(val, valtype, floatfmt, missingval="", has_invisible=True):
- """Format a value accoding to its type.
- Unicode is supported:
- >>> hrow = ['\u0431\u0443\u043a\u0432\u0430', '\u0446\u0438\u0444\u0440\u0430'] ; \
- tbl = [['\u0430\u0437', 2], ['\u0431\u0443\u043a\u0438', 4]] ; \
- good_result = '\\u0431\\u0443\\u043a\\u0432\\u0430 \\u0446\\u0438\\u0444\\u0440\\u0430\\n------- -------\\n\\u0430\\u0437 2\\n\\u0431\\u0443\\u043a\\u0438 4' ; \
- tabulate(tbl, headers=hrow) == good_result
- True
- """
- if val is None:
- return missingval
- if valtype in [int, _text_type]:
- return "{0}".format(val)
- elif valtype is _binary_type:
- try:
- return _text_type(val, "ascii")
- except TypeError:
- return _text_type(val)
- elif valtype is float:
- is_a_colored_number = has_invisible and isinstance(val, (_text_type, _binary_type))
- if is_a_colored_number:
- raw_val = _strip_invisible(val)
- formatted_val = format(float(raw_val), floatfmt)
- return val.replace(raw_val, formatted_val)
- else:
- return format(float(val), floatfmt)
- else:
- return "{0}".format(val)
- def _align_header(header, alignment, width, visible_width):
- "Pad string header to width chars given known visible_width of the header."
- width += len(header) - visible_width
- if alignment == "left":
- return _padright(width, header)
- elif alignment == "center":
- return _padboth(width, header)
- elif not alignment:
- return "{0}".format(header)
- else:
- return _padleft(width, header)
- def _prepend_row_index(rows, index):
- """Add a left-most index column."""
- if index is None or index is False:
- return rows
- if len(index) != len(rows):
- print('index=', index)
- print('rows=', rows)
- raise ValueError('index must be as long as the number of data rows')
- rows = [[v]+list(row) for v,row in zip(index, rows)]
- return rows
- def _bool(val):
- "A wrapper around standard bool() which doesn't throw on NumPy arrays"
- try:
- return bool(val)
- except ValueError: # val is likely to be a numpy array with many elements
- return False
- def _normalize_tabular_data(tabular_data, headers, showindex="default"):
- """Transform a supported data type to a list of lists, and a list of headers.
- Supported tabular data types:
- * list-of-lists or another iterable of iterables
- * list of named tuples (usually used with headers="keys")
- * list of dicts (usually used with headers="keys")
- * list of OrderedDicts (usually used with headers="keys")
- * 2D NumPy arrays
- * NumPy record arrays (usually used with headers="keys")
- * dict of iterables (usually used with headers="keys")
- * pandas.DataFrame (usually used with headers="keys")
- The first row can be used as headers if headers="firstrow",
- column indices can be used as headers if headers="keys".
- If showindex="default", show row indices of the pandas.DataFrame.
- If showindex="always", show row indices for all types of data.
- If showindex="never", don't show row indices for all types of data.
- If showindex is an iterable, show its values as row indices.
- """
- try:
- bool(headers)
- is_headers2bool_broken = False
- except ValueError: # numpy.ndarray, pandas.core.index.Index, ...
- is_headers2bool_broken = True
- headers = list(headers)
- index = None
- if hasattr(tabular_data, "keys") and hasattr(tabular_data, "values"):
- # dict-like and pandas.DataFrame?
- if hasattr(tabular_data.values, "__call__"):
- # likely a conventional dict
- keys = tabular_data.keys()
- rows = list(izip_longest(*tabular_data.values())) # columns have to be transposed
- elif hasattr(tabular_data, "index"):
- # values is a property, has .index => it's likely a pandas.DataFrame (pandas 0.11.0)
- keys = tabular_data.keys()
- vals = tabular_data.values # values matrix doesn't need to be transposed
- # for DataFrames add an index per default
- index = list(tabular_data.index)
- rows = [list(row) for row in vals]
- else:
- raise ValueError("tabular data doesn't appear to be a dict or a DataFrame")
- if headers == "keys":
- headers = list(map(_text_type,keys)) # headers should be strings
- else: # it's a usual an iterable of iterables, or a NumPy array
- rows = list(tabular_data)
- if (headers == "keys" and
- hasattr(tabular_data, "dtype") and
- getattr(tabular_data.dtype, "names")):
- # numpy record array
- headers = tabular_data.dtype.names
- elif (headers == "keys"
- and len(rows) > 0
- and isinstance(rows[0], tuple)
- and hasattr(rows[0], "_fields")):
- # namedtuple
- headers = list(map(_text_type, rows[0]._fields))
- elif (len(rows) > 0
- and isinstance(rows[0], dict)):
- # dict or OrderedDict
- uniq_keys = set() # implements hashed lookup
- keys = [] # storage for set
- if headers == "firstrow":
- firstdict = rows[0] if len(rows) > 0 else {}
- keys.extend(firstdict.keys())
- uniq_keys.update(keys)
- rows = rows[1:]
- for row in rows:
- for k in row.keys():
- #Save unique items in input order
- if k not in uniq_keys:
- keys.append(k)
- uniq_keys.add(k)
- if headers == 'keys':
- headers = keys
- elif isinstance(headers, dict):
- # a dict of headers for a list of dicts
- headers = [headers.get(k, k) for k in keys]
- headers = list(map(_text_type, headers))
- elif headers == "firstrow":
- if len(rows) > 0:
- headers = [firstdict.get(k, k) for k in keys]
- headers = list(map(_text_type, headers))
- else:
- headers = []
- elif headers:
- raise ValueError('headers for a list of dicts is not a dict or a keyword')
- rows = [[row.get(k) for k in keys] for row in rows]
- elif headers == "keys" and len(rows) > 0:
- # keys are column indices
- headers = list(map(_text_type, range(len(rows[0]))))
- # take headers from the first row if necessary
- if headers == "firstrow" and len(rows) > 0:
- if index is not None:
- headers = [index[0]] + list(rows[0])
- index = index[1:]
- else:
- headers = rows[0]
- headers = list(map(_text_type, headers)) # headers should be strings
- rows = rows[1:]
- headers = list(map(_text_type,headers))
- rows = list(map(list,rows))
- # add or remove an index column
- showindex_is_a_str = type(showindex) in [_text_type, _binary_type]
- if showindex == "default" and index is not None:
- rows = _prepend_row_index(rows, index)
- elif isinstance(showindex, Iterable) and not showindex_is_a_str:
- rows = _prepend_row_index(rows, list(showindex))
- elif showindex == "always" or (_bool(showindex) and not showindex_is_a_str):
- if index is None:
- index = list(range(len(rows)))
- rows = _prepend_row_index(rows, index)
- elif showindex == "never" or (not _bool(showindex) and not showindex_is_a_str):
- pass
- # pad with empty headers for initial columns if necessary
- if headers and len(rows) > 0:
- nhs = len(headers)
- ncols = len(rows[0])
- if nhs < ncols:
- headers = [""]*(ncols - nhs) + headers
- return rows, headers
- def tabulate(tabular_data, headers=(), tablefmt="simple",
- floatfmt="g", numalign="decimal", stralign="left",
- missingval="", showindex="default"):
- """Format a fixed width table for pretty printing.
- >>> print(tabulate([[1, 2.34], [-56, "8.999"], ["2", "10001"]]))
- --- ---------
- 1 2.34
- -56 8.999
- 2 10001
- --- ---------
- The first required argument (`tabular_data`) can be a
- list-of-lists (or another iterable of iterables), a list of named
- tuples, a dictionary of iterables, an iterable of dictionaries,
- a two-dimensional NumPy array, NumPy record array, or a Pandas'
- dataframe.
- Table headers
- -------------
- To print nice column headers, supply the second argument (`headers`):
- - `headers` can be an explicit list of column headers
- - if `headers="firstrow"`, then the first row of data is used
- - if `headers="keys"`, then dictionary keys or column indices are used
- Otherwise a headerless table is produced.
- If the number of headers is less than the number of columns, they
- are supposed to be names of the last columns. This is consistent
- with the plain-text format of R and Pandas' dataframes.
- >>> print(tabulate([["sex","age"],["Alice","F",24],["Bob","M",19]],
- ... headers="firstrow"))
- sex age
- ----- ----- -----
- Alice F 24
- Bob M 19
- By default, pandas.DataFrame data have an additional column called
- row index. To add a similar column to all other types of data,
- use `showindex="always"` or `showindex=True`. To suppress row indices
- for all types of data, pass `showindex="never" or `showindex=False`.
- To add a custom row index column, pass `showindex=some_iterable`.
- >>> print(tabulate([["F",24],["M",19]], showindex="always"))
- - - --
- 0 F 24
- 1 M 19
- - - --
- Column alignment
- ----------------
- `tabulate` tries to detect column types automatically, and aligns
- the values properly. By default it aligns decimal points of the
- numbers (or flushes integer numbers to the right), and flushes
- everything else to the left. Possible column alignments
- (`numalign`, `stralign`) are: "right", "center", "left", "decimal"
- (only for `numalign`), and None (to disable alignment).
- Table formats
- -------------
- `floatfmt` is a format specification used for columns which
- contain numeric data with a decimal point.
- `None` values are replaced with a `missingval` string:
- >>> print(tabulate([["spam", 1, None],
- ... ["eggs", 42, 3.14],
- ... ["other", None, 2.7]], missingval="?"))
- ----- -- ----
- spam 1 ?
- eggs 42 3.14
- other ? 2.7
- ----- -- ----
- Various plain-text table formats (`tablefmt`) are supported:
- 'plain', 'simple', 'grid', 'pipe', 'orgtbl', 'rst', 'mediawiki',
- 'latex', and 'latex_booktabs'. Variable `tabulate_formats` contains the list of
- currently supported formats.
- "plain" format doesn't use any pseudographics to draw tables,
- it separates columns with a double space:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "plain"))
- strings numbers
- spam 41.9999
- eggs 451
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="plain"))
- spam 41.9999
- eggs 451
- "simple" format is like Pandoc simple_tables:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "simple"))
- strings numbers
- --------- ---------
- spam 41.9999
- eggs 451
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="simple"))
- ---- --------
- spam 41.9999
- eggs 451
- ---- --------
- "grid" is similar to tables produced by Emacs table.el package or
- Pandoc grid_tables:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "grid"))
- +-----------+-----------+
- | strings | numbers |
- +===========+===========+
- | spam | 41.9999 |
- +-----------+-----------+
- | eggs | 451 |
- +-----------+-----------+
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="grid"))
- +------+----------+
- | spam | 41.9999 |
- +------+----------+
- | eggs | 451 |
- +------+----------+
- "fancy_grid" draws a grid using box-drawing characters:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "fancy_grid"))
- ╒═══════════╤═══════════╕
- │ strings │ numbers │
- ╞═══════════╪═══════════╡
- │ spam │ 41.9999 │
- ├───────────┼───────────┤
- │ eggs │ 451 │
- ╘═══════════╧═══════════╛
- "pipe" is like tables in PHP Markdown Extra extension or Pandoc
- pipe_tables:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "pipe"))
- | strings | numbers |
- |:----------|----------:|
- | spam | 41.9999 |
- | eggs | 451 |
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="pipe"))
- |:-----|---------:|
- | spam | 41.9999 |
- | eggs | 451 |
- "orgtbl" is like tables in Emacs org-mode and orgtbl-mode. They
- are slightly different from "pipe" format by not using colons to
- define column alignment, and using a "+" sign to indicate line
- intersections:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "orgtbl"))
- | strings | numbers |
- |-----------+-----------|
- | spam | 41.9999 |
- | eggs | 451 |
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="orgtbl"))
- | spam | 41.9999 |
- | eggs | 451 |
- "rst" is like a simple table format from reStructuredText; please
- note that reStructuredText accepts also "grid" tables:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]],
- ... ["strings", "numbers"], "rst"))
- ========= =========
- strings numbers
- ========= =========
- spam 41.9999
- eggs 451
- ========= =========
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="rst"))
- ==== ========
- spam 41.9999
- eggs 451
- ==== ========
- "mediawiki" produces a table markup used in Wikipedia and on other
- MediaWiki-based sites:
- >>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], ["eggs", "451.0"]],
- ... headers="firstrow", tablefmt="mediawiki"))
- {| class="wikitable" style="text-align: left;"
- |+ <!-- caption -->
- |-
- ! strings !! align="right"| numbers
- |-
- | spam || align="right"| 41.9999
- |-
- | eggs || align="right"| 451
- |}
- "html" produces HTML markup:
- >>> print(tabulate([["strings", "numbers"], ["spam", 41.9999], ["eggs", "451.0"]],
- ... headers="firstrow", tablefmt="html"))
- <table>
- <thead>
- <tr><th>strings </th><th style="text-align: right;"> numbers</th></tr>
- </thead>
- <tbody>
- <tr><td>spam </td><td style="text-align: right;"> 41.9999</td></tr>
- <tr><td>eggs </td><td style="text-align: right;"> 451 </td></tr>
- </tbody>
- </table>
- "latex" produces a tabular environment of LaTeX document markup:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex"))
- \\begin{tabular}{lr}
- \\hline
- spam & 41.9999 \\\\
- eggs & 451 \\\\
- \\hline
- \\end{tabular}
- "latex_booktabs" produces a tabular environment of LaTeX document markup
- using the booktabs.sty package:
- >>> print(tabulate([["spam", 41.9999], ["eggs", "451.0"]], tablefmt="latex_booktabs"))
- \\begin{tabular}{lr}
- \\toprule
- spam & 41.9999 \\\\
- eggs & 451 \\\\
- \\bottomrule
- \end{tabular}
- """
- if tabular_data is None:
- tabular_data = []
- list_of_lists, headers = _normalize_tabular_data(
- tabular_data, headers, showindex=showindex)
- # optimization: look for ANSI control codes once,
- # enable smart width functions only if a control code is found
- plain_text = '\n'.join(['\t'.join(map(_text_type, headers))] + \
- ['\t'.join(map(_text_type, row)) for row in list_of_lists])
- has_invisible = re.search(_invisible_codes, plain_text)
- enable_widechars = wcwidth is not None and WIDE_CHARS_MODE
- if has_invisible:
- width_fn = _visible_width
- elif enable_widechars: # optional wide-character support if available
- width_fn = wcwidth.wcswidth
- else:
- width_fn = len
- # format rows and columns, convert numeric values to strings
- cols = list(zip(*list_of_lists))
- coltypes = list(map(_column_type, cols))
- cols = [[_format(v, ct, floatfmt, missingval, has_invisible) for v in c]
- for c,ct in zip(cols, coltypes)]
- # align columns
- aligns = [numalign if ct in [int,float] else stralign for ct in coltypes]
- minwidths = [width_fn(h) + MIN_PADDING for h in headers] if headers else [0]*len(cols)
- cols = [_align_column(c, a, minw, has_invisible)
- for c, a, minw in zip(cols, aligns, minwidths)]
- if headers:
- # align headers and add headers
- t_cols = cols or [['']] * len(headers)
- t_aligns = aligns or [stralign] * len(headers)
- minwidths = [max(minw, width_fn(c[0])) for minw, c in zip(minwidths, t_cols)]
- headers = [_align_header(h, a, minw, width_fn(h))
- for h, a, minw in zip(headers, t_aligns, minwidths)]
- rows = list(zip(*cols))
- else:
- minwidths = [width_fn(c[0]) for c in cols]
- rows = list(zip(*cols))
- if not isinstance(tablefmt, TableFormat):
- tablefmt = _table_formats.get(tablefmt, _table_formats["simple"])
- return _format_table(tablefmt, headers, rows, minwidths, aligns)
- def _build_simple_row(padded_cells, rowfmt):
- "Format row according to DataRow format without padding."
- begin, sep, end = rowfmt
- return (begin + sep.join(padded_cells) + end).rstrip()
- def _build_row(padded_cells, colwidths, colaligns, rowfmt):
- "Return a string which represents a row of data cells."
- if not rowfmt:
- return None
- if hasattr(rowfmt, "__call__"):
- return rowfmt(padded_cells, colwidths, colaligns)
- else:
- return _build_simple_row(padded_cells, rowfmt)
- def _build_line(colwidths, colaligns, linefmt):
- "Return a string which represents a horizontal line."
- if not linefmt:
- return None
- if hasattr(linefmt, "__call__"):
- return linefmt(colwidths, colaligns)
- else:
- begin, fill, sep, end = linefmt
- cells = [fill*w for w in colwidths]
- return _build_simple_row(cells, (begin, sep, end))
- def _pad_row(cells, padding):
- if cells:
- pad = " "*padding
- padded_cells = [pad + cell + pad for cell in cells]
- return padded_cells
- else:
- return cells
- def _format_table(fmt, headers, rows, colwidths, colaligns):
- """Produce a plain-text representation of the table."""
- lines = []
- hidden = fmt.with_header_hide if (headers and fmt.with_header_hide) else []
- pad = fmt.padding
- headerrow = fmt.headerrow
- padded_widths = [(w + 2*pad) for w in colwidths]
- padded_headers = _pad_row(headers, pad)
- padded_rows = [_pad_row(row, pad) for row in rows]
- if fmt.lineabove and "lineabove" not in hidden:
- lines.append(_build_line(padded_widths, colaligns, fmt.lineabove))
- if padded_headers:
- lines.append(_build_row(padded_headers, padded_widths, colaligns, headerrow))
- if fmt.linebelowheader and "linebelowheader" not in hidden:
- lines.append(_build_line(padded_widths, colaligns, fmt.linebelowheader))
- if padded_rows and fmt.linebetweenrows and "linebetweenrows" not in hidden:
- # initial rows with a line below
- for row in padded_rows[:-1]:
- lines.append(_build_row(row, padded_widths, colaligns, fmt.datarow))
- lines.append(_build_line(padded_widths, colaligns, fmt.linebetweenrows))
- # the last row without a line below
- lines.append(_build_row(padded_rows[-1], padded_widths, colaligns, fmt.datarow))
- else:
- for row in padded_rows:
- lines.append(_build_row(row, padded_widths, colaligns, fmt.datarow))
- if fmt.linebelow and "linebelow" not in hidden:
- lines.append(_build_line(padded_widths, colaligns, fmt.linebelow))
- return "\n".join(lines)
- def _main():
- """\
- Usage: tabulate [options] [FILE ...]
- Pretty-print tabular data.
- See also https://bitbucket.org/astanin/python-tabulate
- FILE a filename of the file with tabular data;
- if "-" or missing, read data from stdin.
- Options:
- -h, --help show this message
- -1, --header use the first row of data as a table header
- -o FILE, --output FILE print table to FILE (default: stdout)
- -s REGEXP, --sep REGEXP use a custom column separator (default: whitespace)
- -F FPFMT, --float FPFMT floating point number format (default: g)
- -f FMT, --format FMT set output table format; supported formats:
- plain, simple, grid, fancy_grid, pipe, orgtbl,
- rst, mediawiki, html, latex, latex_booktabs, tsv
- (default: simple)
- """
- import getopt
- import sys
- import textwrap
- usage = textwrap.dedent(_main.__doc__)
- try:
- opts, args = getopt.getopt(sys.argv[1:],
- "h1o:s:F:f:",
- ["help", "header", "output", "sep=", "float=", "format="])
- except getopt.GetoptError as e:
- print(e)
- print(usage)
- sys.exit(2)
- headers = []
- floatfmt = "g"
- tablefmt = "simple"
- sep = r"\s+"
- outfile = "-"
- for opt, value in opts:
- if opt in ["-1", "--header"]:
- headers = "firstrow"
- elif opt in ["-o", "--output"]:
- outfile = value
- elif opt in ["-F", "--float"]:
- floatfmt = value
- elif opt in ["-f", "--format"]:
- if value not in tabulate_formats:
- print("%s is not a supported table format" % value)
- print(usage)
- sys.exit(3)
- tablefmt = value
- elif opt in ["-s", "--sep"]:
- sep = value
- elif opt in ["-h", "--help"]:
- print(usage)
- sys.exit(0)
- files = [sys.stdin] if not args else args
- with (sys.stdout if outfile == "-" else open(outfile, "w")) as out:
- for f in files:
- if f == "-":
- f = sys.stdin
- if _is_file(f):
- _pprint_file(f, headers=headers, tablefmt=tablefmt,
- sep=sep, floatfmt=floatfmt, file=out)
- else:
- with open(f) as fobj:
- _pprint_file(fobj, headers=headers, tablefmt=tablefmt,
- sep=sep, floatfmt=floatfmt, file=out)
- def _pprint_file(fobject, headers, tablefmt, sep, floatfmt, file):
- rows = fobject.readlines()
- table = [re.split(sep, r.rstrip()) for r in rows if r.strip()]
- print(tabulate(table, headers, tablefmt, floatfmt=floatfmt), file=file)
- if __name__ == "__main__":
- _main()
|