#!/bin/sh

ZEEK_BUILD=""
DATA_FILE=""
MODE="benchmark"
INTERFACE=""

# Path where flamegraph is installed
FLAMEGRAPH_PATH=""
FLAMEGRAPH_OUTPUT=""

usage() {
    usage="\
Usage: $0 -z [zeek binary path] -d [data file path]

  Options:
    -b, --build PATH        The path to a Zeek binary to benchmark
    -d, --data-file PATH    The path to a data file to read from for replay
    -i, --interface INTF    The network interface to use for capturing data.
                            This interface should be completely idle, since
                            tcpreplay will be using it to replay the data.
    -f, --flamegraph PATH   (optional) The path to the directory where
                            Flamegraph is installed.
    -o, --output FILE       The file to use as output for Flamegraph. This
                            will default to 'perf.svg', and is ignored if
                            the '-f' argument is not passed.

    By default the output will include CPU, memory, etc statistics from Zeek
    processing all of the data in the data file. With the -f argument, the
    script will instead output a flamegraph for the process runtime, showing
    the time spent in functions, etc.

    This script assumes that it is being run on a system with a large number
    of CPU cores. If being used on a smaller system, modify this script and
    set the ZEEK_CPU and TCPREPLAY_CPU variables to smaller values.
"

    echo "${usage}"
    exit 1
}

while (( "$#" )); do
  case "$1" in
      -d|--data-file)
	  DATA_FILE=$2
	  shift 2
	  ;;
      -b|--build)
	  ZEEK_BUILD=$2
	  shift 2
	  ;;
      -i|--interface)
	  INTERFACE=$2
	  shift 2
	  ;;
      -f|--flamegraph)
	  MODE="flamegraph"
	  FLAMEGRAPH_PATH=$2
	  if [ -z $FLAMEGRAPH_OUTPUT ]; then
	      FLAMEGRAPH_OUTPUT="benchmark.svg"
	  fi
	  shift 2
	  ;;
      -o|--output)
	  FLAMEGRAPH_OUTPUT=$2
	  shift 2
	  ;;
  esac
done

if [ -z $ZEEK_BUILD ]; then
    echo "Error: -b argument is required and should point at a Zeek binary"
    echo
    usage
fi

if [ -z $DATA_FILE ]; then
    echo "Error: -d argument is required and should point at a pcap file to replay"
    echo
    usage
fi

if [ -z $INTERFACE ]; then
    echo "Error: -i argument is required and should point to an idle network interface"
    echo
    usage
fi

# Various run-time options
ZEEK_ARGS="-i af_packet::${INTERFACE}"
ZEEK_CPU=10
TCPREPLAY_CPU=11

echo "Running '${ZEEK_BUILD} ${ZEEK_ARGS}' against ${DATA_FILE}"

if [ "${MODE}" = "benchmark" ]; then

    TIME_FILE=$(mktemp)

    # Start zeek, find it's PID, then wait 10s to let it reach a steady state
    taskset --cpu-list $ZEEK_CPU time -f "%M" -o $TIME_FILE $ZEEK_BUILD $ZEEK_ARGS &
    TIME_PID=$!

    sleep 10

    ZEEK_PID=$(ps -ef | awk -v timepid="${TIME_PID}" '{ if ($3 == timepid) { print $2 } }')
    echo "Zeek running on PID ${ZEEK_PID}"

    # Start perf stat on the zeek process
    perf stat -p $ZEEK_PID &
    PERF_PID=$!

    # Start replaying the data
    echo "Starting replay"
    taskset --cpu-list $TCPREPLAY_CPU tcpreplay -i $INTERFACE -q $DATA_FILE

    # TODO: does it make sense to sleep here to let zeek finish processing all of the packets
    # out of the kernel buffer?

    # Capture the average CPU usage of the process
    CPU_USAGE=$(ps -p $ZEEK_PID -o %cpu=)

    # Kill everything
    echo
    kill -2 $ZEEK_PID
    wait $TIME_PID
    wait $PERF_PID

    echo "Maximum memory usage (max_rss): $(head -n 1 ${TIME_FILE}) bytes"
    echo "Average CPU usage: ${CPU_USAGE}%"

    rm $TIME_FILE

elif [ "${MODE}" = "flamegraph" ]; then

    PERF_RECORD_FILE=$(mktemp)

    # Start zeek under perf record, then sleep for a few seconds to let it actually start up. For runs with
    # shorter amounts of data or with slower traffic, you can add '-c 499' here to get finer-grained results.
    # With big data sets, it just results in the graph getting blown out by waits in the IO loop.
    perf record -g -o $PERF_RECORD_FILE -- $ZEEK_BUILD $ZEEK_ARGS &
    PERF_PID=$!

    sleep 5

    ZEEK_PID=$(ps -ef | awk -v perfpid="${PERF_PID}" '{ if ($3 == perfpid) { print $2 } }')
    echo "Zeek running on PID ${ZEEK_PID}"

    # Start replaying the data
    echo "Starting replay"
    taskset --cpu-list $TCPREPLAY_CPU tcpreplay -i $INTERFACE -q $DATA_FILE

    # Kill everything
    echo
    kill -2 $ZEEK_PID
    wait $PERF_PID

    echo
    echo "Building flamegraph, writing to ${FLAMEGRAPH_OUTPUT}"
    perf script -i $PERF_RECORD_FILE | ${FLAMEGRAPH_PATH}/stackcollapse-perf.pl | ${FLAMEGRAPH_PATH}/flamegraph.pl > $FLAMEGRAPH_OUTPUT
    rm $PERF_RECORD_FILE

fi
