#!/usr/bin/env python

import datetime
import random
import sys 

from csvkit import init_common_parser, extract_csv_reader_kwargs
from csvkit import table

def median(l):
    """
    Compute the median of a list.
    """
    length = len(l)

    if len(l) % 2 == 1:
        return l[((length + 1) / 2) - 1]
    else:
        a = l[(length / 2) - 1]
        b = l[length / 2]
    return (float(a + b)) / 2  

def main(args):
    """
    Print descriptive statistics for all columns in a CSV.
    """
    tab = table.Table.from_csv(args.file, **extract_csv_reader_kwargs(args))

    null_excluder = lambda i: i is not None

    for c in tab:
        values = sorted(filter(null_excluder, c))
        uniques = set(c)

        sys.stdout.write('%3i. %s\n' % (c.order, c.name))

        if c.type == None:
            sys.stdout.write('\tEmpty column\n')
            continue
            
        sys.stdout.write('\t%s\n' % c.type)
        sys.stdout.write('\tNulls: %s\n' % ('Yes' if c.nullable else 'No'))
        
        if len(uniques) <= 5:
            sys.stdout.write('\tValues: %s\n' % ', '.join(['"%s"' % unicode(u) for u in uniques]))
        else:
            # Skip min/max for strings and bools
            if c.type not in [unicode, bool]:
                minval = min(values)
                maxval = max(values)

                if c.type in [datetime.datetime, datetime.date, datetime.time]:
                    minval = minval.isoformat()
                    maxval = maxval.isoformat()

                sys.stdout.write('\tMin: %s\n' % min(values))
                sys.stdout.write('\tMax: %s\n' % max(values))

                if c.type in [int, float]:
                    sys.stdout.write('\tMean: %s\n' % (sum(values) / len(values)))
                    sys.stdout.write('\tMedian: %s\n' % median(values))

            sys.stdout.write('\tUnique values: %i\n' % len(uniques))

            if c.type == unicode:
                sys.stdout.write('\tMax length: %i\n' % c.max_length)
                
                uniques.discard(None)
                sys.stdout.write('\tSamples: %s\n' % (', '.join(['"%s"' % unicode(u) for u in random.sample(uniques, 5)])))

if __name__ == '__main__':
    parser = init_common_parser(description='Print descriptive statistics for all columns in a CSV file.')

    main(parser.parse_args())

