#!/usr/bin/python
# -*- coding: utf-8 -*-

import sys
import stats
import time
import os
import subprocess
#import Gnuplot, Gnuplot.funcutils
#from numpy import *
import matplotlib.pyplot as plt

verboselevel = 10
tests = [
    { 
        "enabled": False,
        "desc": "macaroni CPU bandwidth",
        "pre_start_cmd": None,
        "post_start_cmd": None,
        "pre_stop_cmd": None,
        "post_stop_cmd": None,
        "count": 10000,
        "threads": 10,
	"blocksizes": "1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M".split(),
	#"blocksizes": "1K 2K 4K 8K".split(),
        "cmd": """ssh macaroni dd if=/dev/zero bs=%BLOCKSIZE% count=%COUNT% of=/dev/null 2>/dev/null"""
    },
    { 
        "enabled": False,
        "desc": "torchio CPU bandwidth",
        "pre_start_cmd": None,
        "post_start_cmd": None,
        "pre_stop_cmd": None,
        "post_stop_cmd": None,
        "count": 10000,
        "threads": 10,
	"blocksizes": "1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M".split(),
	#"blocksizes": "1K 2K 4K 8K".split(),
        "cmd": """ssh torchio dd if=/dev/zero bs=%BLOCKSIZE% count=%COUNT% of=/dev/null 2>/dev/null"""
    },
    { 
        "enabled": False,
        "desc": "network bandwidth",
        "pre_start_cmd": None,
        "pre_start_cmd": """for ((THREAD=0; $THREAD<%THREADS%; THREAD++)); do
                            : echo \"launching listener $THREAD ...\"
                            ssh -fn macaroni \"nc -lp $((8000+$THREAD)) > /dev/null 2>&1\" &
                        done
                        #  allow listeners to settle
                        sleep 2""",
        "post_start_cmd": None,
        "pre_stop_cmd": None,
        "post_stop_cmd": "ssh -n macaroni killall nc 2>/dev/null",
        "count": 1000,
        "threads": 4,
	"blocksizes": "1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M".split(),
        "cmd": """dd if=/dev/zero bs=%BLOCKSIZE% count=%COUNT% 2>/dev/null | nc -q 0 macaroni $((8000+%THREAD%))"""
    },
    {
        "enabled": False,
        "desc": "torchio disk write bandwidth",
        "pre_start_cmd": None,
        "post_start_cmd": None,
        "pre_stop_cmd": """sync""",
        "post_stop_cmd": None,
        "count": 1600,
        "threads": 2,
	"blocksizes": "1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M".split(),
        "cmd": """ssh torchio dd if=/dev/zero bs=%BLOCKSIZE% count=%COUNT% of=/dev/vg0/perf-tests 2>/dev/null"""
    },
    {
        "enabled": False,
        "desc": "torchio disk read bandwidth",
        "pre_start_cmd": None,
        "post_start_cmd": None,
        "pre_stop_cmd": None,
        "post_stop_cmd": None,
        "count": 1600,
        "threads": 1,
	"blocksizes": "1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M".split(),
        "cmd": """ssh torchio dd of=/dev/null bs=%BLOCKSIZE% count=%COUNT% if=/dev/vg0/perf-tests 2>/dev/null"""
    },
    {
        "enabled": False,
        "desc": "macaroni disk write bandwidth",
        "pre_start_cmd": None,
        "post_start_cmd": None,
        "pre_stop_cmd": """sync""",
        "post_stop_cmd": None,
        "count": 1600,
        "threads": 2,
	"blocksizes": "1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M".split(),
        "cmd": """ssh macaroni dd if=/dev/zero bs=%BLOCKSIZE% count=%COUNT% of=/dev/vg0/perf-tests 2>/dev/null"""
    },
    {
        "enabled": False,
        "desc": "macaroni disk read bandwidth",
        "pre_start_cmd": None,
        "post_start_cmd": None,
        "pre_stop_cmd": None,
        "post_stop_cmd": None,
        "count": 1600,
        "threads": 1,
	"blocksizes": "1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M".split(),
        "cmd": """ssh macaroni dd of=/dev/null bs=%BLOCKSIZE% count=%COUNT% if=/dev/vg0/perf-tests 2>/dev/null"""
    },
    {
        "enabled": True,
        "desc": "torchio filesystem write bandwidth",
        "pre_start_cmd": None,
        "post_start_cmd": None,
        "pre_stop_cmd": """sync""",
        "post_stop_cmd": None,
        "count": 1600,
        "threads": 2,
	"blocksizes": "1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M".split(),
        "cmd": """ssh torchio dd if=/dev/zero bs=%BLOCKSIZE% count=%COUNT% of=/perf-tests/perf-tests 2>/dev/null"""
    },
    {
        "enabled": True,
        "desc": "torchio filesystem read bandwidth",
        "pre_start_cmd": None,
        "post_start_cmd": None,
        "pre_stop_cmd": None,
        "post_stop_cmd": None,
        "count": 1600,
        "threads": 1,
	"blocksizes": "1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M".split(),
        "cmd": """ssh torchio dd of=/dev/null bs=%BLOCKSIZE% count=%COUNT% if=/perf-tests/perf-tests 2>/dev/null"""
    },
    {
        "enabled": True,
        "desc": "macaroni filesystem write bandwidth",
        "pre_start_cmd": None,
        "post_start_cmd": None,
        "pre_stop_cmd": """sync""",
        "post_stop_cmd": None,
        "count": 1600,
        "threads": 2,
	"blocksizes": "1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M".split(),
        "cmd": """ssh macaroni dd if=/dev/zero bs=%BLOCKSIZE% count=%COUNT% of=/perf-tests/perf-tests 2>/dev/null"""
    },
    {
        "enabled": True,
        "desc": "macaroni filesystem read bandwidth",
        "pre_start_cmd": None,
        "post_start_cmd": None,
        "pre_stop_cmd": None,
        "post_stop_cmd": None,
        "count": 1600,
        "threads": 1,
	"blocksizes": "1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M".split(),
        "cmd": """ssh macaroni dd of=/dev/null bs=%BLOCKSIZE% count=%COUNT% if=/perf-tests/perf-tests 2>/dev/null"""
    }
]

def doit():
    global count, threads, thread, blocksize, cmd, pre_start_cmd, post_start_cmd, pre_stop_cmd, post_stop_cmd

    #  Run pre-start command, start clock, run post-start command
    if pre_start_cmd is not None:
        local_pre_start_cmd = pre_start_cmd
        debug(20, "doit: before substitutions pre_start_cmd is [%s]" % local_pre_start_cmd)
        local_pre_start_cmd = local_pre_start_cmd.replace("%THREADS%", str(threads))
        local_pre_start_cmd = local_pre_start_cmd.replace("%COUNT%", str(count))
        local_pre_start_cmd = local_pre_start_cmd.replace("%BLOCKSIZE%", blocksize)
        debug(20, "doit: after substitutions pre_start_cmd is [%s]" % local_pre_start_cmd)
        p = subprocess.Popen(local_pre_start_cmd, shell=True)
        sts = os.waitpid(p.pid, 0)[1]
    start_time = time.time()
    if post_start_cmd is not None:
        local_post_start_cmd = post_start_cmd
        debug(20, "doit: before substitutions post_start_cmd is [%s]" % local_post_start_cmd)
        local_post_start_cmd = local_post_start_cmd.replace("%THREADS%", str(threads))
        local_post_start_cmd = local_post_start_cmd.replace("%COUNT%", str(count))
        local_post_start_cmd = local_post_start_cmd.replace("%BLOCKSIZE%", blocksize)
        debug(20, "doit: after substitutions post_start_cmd is [%s]" % local_post_start_cmd)
        p = subprocess.Popen(local_post_start_cmd, shell=True)
        sts = os.waitpid(p.pid, 0)[1]

    #  Launch threads
    for thread in range(0, threads): 
        debug(20, "doit/parent: launching thread %d ..." % thread)
        local_cmd = cmd
        local_cmd = local_cmd.replace("%THREAD%", str(thread))
        local_cmd = local_cmd.replace("%COUNT%", str(count))
        local_cmd = local_cmd.replace("%BLOCKSIZE%", str(blocksize))
        pid = os.fork()
        if pid == 0:
            os.execl("/bin/sh", "/bin/sh", "-c", local_cmd)
    
    #  Wait for threads to exit
    debug(20, "doit: waiting for children to exit ...")
    while (1):
        try: 
            os.wait()
        except OSError:
            break 
     
    #  Run pre-stop command, stop clock, run post-stop command
    if pre_stop_cmd is not None:
        local_pre_stop_cmd = pre_stop_cmd
        debug(20, "doit: before substitutions pre_stop_cmd is [%s]" % local_pre_stop_cmd)
        local_pre_stop_cmd = local_pre_stop_cmd.replace("%THREADS%", str(threads))
        local_pre_stop_cmd = local_pre_stop_cmd.replace("%COUNT%", str(count))
        local_pre_stop_cmd = local_pre_stop_cmd.replace("%BLOCKSIZE%", blocksize)
        debug(20, "doit: after substitutions pre_stop_cmd is [%s]" % local_pre_stop_cmd)
        p = subprocess.Popen(local_pre_stop_cmd, shell=True)
        sts = os.waitpid(p.pid, 0)[1]
    end_time = time.time()
    if post_stop_cmd is not None:
        local_post_stop_cmd = post_stop_cmd
        debug(20, "doit: before substitutions post_stop_cmd is [%s]" % local_post_stop_cmd)
        local_post_stop_cmd = local_post_stop_cmd.replace("%THREADS%", str(threads))
        local_post_stop_cmd = local_post_stop_cmd.replace("%COUNT%", str(count))
        local_post_stop_cmd = local_post_stop_cmd.replace("%BLOCKSIZE%", blocksize)
        debug(20, "doit: after substitutions post_stop_cmd is [%s]" % local_post_stop_cmd)
        p = subprocess.Popen(local_post_stop_cmd, shell=True)
        sts = os.waitpid(p.pid, 0)[1]

    #  Convert blocksize string to byte count.
    debug(20, "doit: converting blocksize to blocksize_bytes ...")
    if blocksize.endswith("M"):
        blocksize_bytes = int(blocksize[:-1]) * 1024 * 1024
    elif blocksize.endswith("K"):
        blocksize_bytes = int(blocksize[:-1]) * 1024
    else:
        blocksize_bytes = int(blocksize)

    #  Calculate rate
    rate = count * blocksize_bytes * threads / (end_time - start_time)
    debug(20, "doit: rate=%f, count=%d, blocksize_bytes=%d, threads=%d, end_time-start_time=%f" % (rate, count, blocksize_bytes, threads, end_time-start_time))
    return rate

def main():
    global count, threads, thread, blocksize, cmd, pre_start_cmd, post_start_cmd, pre_stop_cmd, post_stop_cmd
    repetitions = 20

    for i in range(0, len(tests)):
        results = []
        enabled = tests[i]["enabled"]
        if not enabled:
            continue
        desc = tests[i]["desc"]
        pre_start_cmd = tests[i]["pre_start_cmd"]
        post_start_cmd = tests[i]["post_start_cmd"]
        pre_stop_cmd = tests[i]["pre_stop_cmd"]
        post_stop_cmd = tests[i]["post_stop_cmd"]
        count = tests[i]["count"]
        threads = tests[i]["threads"]
        cmd = tests[i]["cmd"]
        blocksizes = tests[i]["blocksizes"]
        debug(20, "main: desc=\"%s\", blocksizes=%s" % (desc, blocksizes))
        print "%s\n%s\n" % (desc, "=" * len(desc))
        already_worked_out_divider = False
        for j in range(0, len(blocksizes)):
            blocksize = blocksizes[j]
            debug(20, "main: blocksize=%s" % blocksize)
            rates = []
            for k in range(0, repetitions):
                debug(20, "main: repetition %d" % k)
                rate = doit()
	        rates.append(rate)
	    mean = stats.mean(rates)
            if not already_worked_out_divider:
                 (divider, units) = get_divider_and_units(mean)
                 debug(20, "main: divider=%f, units=%s" % (divider, units))
                 already_worked_out_divider = True
            std  = stats.stdev(rates)
            maximum = max(rates)
            minimum = min(rates)
            results.append([blocksize, units, mean/divider, std/divider, minimum/divider, maximum/divider])

            #  Store for later plotting
	    print "blocksize: %4s, mean rate: %6.3f %3s, std dev: %6.3f %3s, min: %6.3f %3s, max: %6.3f %3s" % tuple(map(lambda x: results[len(results)-1][x], [0,2,1,3,1,4,1,5,1]))
        print

        #  Plot
        plt.clf()
        plt.plot(map(lambda x: x[2], results))
        plt.xlabel("blocksize")
        plt.ylabel("rate (%s)" % units)
        plt.xticks(range(0,len(blocksizes)), blocksizes)
        plt.savefig(desc.replace(' ','-') + ".png");

    return

def get_divider_and_units(rate):
    if rate > 1024*1024*1024:
        divider = 1024*1024*1024
        units = "GB/s"
    elif rate > 1024*1024:
        divider = 1024*1024
        units = "MB/s"
    elif rate > 1024:
        divider = 1024
        units = "KB/s"
    else:
        divider = 1
        units = "B/s"

    return divider, units

def debug(level, str):
    global verboselevel
    if verboselevel >= level:
        print "%s: DEBUG[%d]: %s" % (sys.argv[0].rpartition("/")[2], level, str)

main()