import sys
import subprocess
import numpy
import cPickle
import tables
from time import time

# Size of the NxM array
N = 10; M = 125000

# Name of the dump file
filename = "/tmp/dumpfile.data"

# Function to get the size of a file
def get_filesize():
    sout = subprocess.Popen("sync;du -h %s" % filename, shell=True,
                            stdout=subprocess.PIPE).stdout
    line = [l for l in sout][0]
    return line.split()[0]

# Print out some statistics
def print_stats(explain, tref, tpick):
    ttime = time()-tref
    print "%s %ss.  " %(explain, round(ttime, 3)),
    print "Speed-up over cPickle: %sx" % round(tpick/ttime, 2)
    print "File size:", get_filesize()


# Print some preliminary information
print 'Python version:    %s' % sys.version
print "NumPy version:     %s" % numpy.__version__
print "PyTables version:  %s" % tables.__version__

print "Checking with a %sx%s matrix of float64 elements (%s MB)" % \
      (N, M, round(N*M*8/(1024.*1024),3))

# Start the actual benchmarks


print "***** cPickle (protocol 2) *****"

na = numpy.random.rand(N, M)
tref = time()
f = file(filename, 'w')
cPickle.dump(na, f, 2)
tpickw = time()-tref
f.close()
print "Time for writing: %ss" % round(tpickw, 3)
print "File size:", get_filesize()

tref = time()
f = file(filename, 'r')
nar = cPickle.load(f)
tpickr = time()-tref
print "Time for reading: %ss" % round(tpickr, 3)
f.close()

print "***** PyTables EArray (dump row to row) *****"

na = numpy.random.rand(1, M)
tref = time()
f = tables.openFile(filename, 'w')
a = f.createEArray(f.root, 'array', tables.Float64Atom(), (0, M))
for i in xrange(N):
    a.append(na)
f.close()
print_stats("Time for writing:", tref, tpickw)

tref = time()
f = tables.openFile(filename, 'r')
a = f.root.array
nar = f.root.array[:]
f.close()
print_stats("Time for reading:", tref, tpickr)

print "***** PyTables EArray (dump row to row, compressed with zlib) ******"

na = numpy.random.rand(1, M)
tref = time()
f = tables.openFile(filename, 'w')
a = f.createEArray(f.root, 'array', tables.Float64Atom(), (0, M),
                   filters=tables.Filters(complevel=3, complib='zlib'))
for i in xrange(N):
    a.append(na)
f.close()
print_stats("Time for writing:", tref, tpickw)

tref = time()
f = tables.openFile(filename, 'r')
a = f.root.array
nar = f.root.array[:]
f.close()
print_stats("Time for reading:", tref, tpickr)

print "***** PyTables EArray (dump row to row, compressed with lzo) *****"

na = numpy.random.rand(1, M)
tref = time()
f = tables.openFile(filename, 'w')
a = f.createEArray(f.root, 'array', tables.Float64Atom(), (0, M),
                   filters=tables.Filters(complevel=3, complib='lzo'))
for i in xrange(N):
    a.append(na)
f.close()
print_stats("Time for writing:", tref, tpickw)

tref = time()
f = tables.openFile(filename, 'r')
a = f.root.array
nar = f.root.array[:]
f.close()
print_stats("Time for reading:", tref, tpickr)

print "***** PyTables EArray (complete dump) *****"

na = numpy.random.rand(N, M)
tref = time()
f = tables.openFile(filename, 'w')
a = f.createEArray(f.root, 'array', tables.Float64Atom(), (0, M))
a.append(na)
f.close()
print_stats("Time for writing:", tref, tpickw)

tref = time()
f = tables.openFile(filename, 'r')
a = f.root.array
nar = f.root.array[:]
f.close()
print_stats("Time for reading:", tref, tpickr)

print "***** PyTables Array *****"

na = numpy.random.rand(N, M)
tref = time()
f = tables.openFile(filename, 'w')
a = f.createArray(f.root, 'array', na)
f.close()
print_stats("Time for writing:", tref, tpickw)

tref = time()
f = tables.openFile(filename, 'r')
a = f.root.array
nar = f.root.array[:]
f.close()
print_stats("Time for reading:", tref, tpickr)

