##################################################################
#
#  SCRIPT FOR PROCESSING LOG FILES CONTAINING ELAPSED TIME
#  and EXPLAIN PLAN COST.
#
##################################################################
# This script processes log files created with the make_files.ksh
# script. The log files contain either explain statements or 
# duration of a query. The output file created by this script
# contains the timestamp of when this script was run, plus
# a user supplied trial name.
#
#  The output file will have the following format:
#  Trial_Time_Stamp, Trial_Name, Query_Name, Cost, Duration
#  06/03/2008 10:54:28,Trial two,bank_admin_4,   778 ,00:00:09.42
#  06/03/2008 10:54:28,Trial two,empl_lookup_1,   214 ,00:00:00.01
#
#  This comma separated data can be loaded into other tools 
#  such as a spreadsheet for further analysis.
# 
#  A practical use would be to track the effect of changes to
#  database instance configuration or to assess the effect
#  of an upgrade. The same sql statements can be run repeatedly
#  and the duration and cost of each query during each
#  trial can be measured.
#
# Arguments
#   1. Name of data file to create.
#   2. Name of trial. This value will go in output log
#      and is used when analyzing data to distinguish
#      among different executions of the same file. 

# String values used in grep statements
COST_STRING='| Cost'

# Get list of log files to process.
files=`ls *.log`

# Initialize counter used for detecting when a pair of files
# has been processed.
counter=1

# Get time stamp value
trial_time_stamp=`date '+%m/%d/%Y %H:%M:%S'`

# Start header in output file.
echo "Trial_Time_Stamp", "Trial_Name", Query_Name, Cost, Duration, Rows > zzz_temp

for i in $files
do
	echo "Processing file "$i
	# Test if file has word explain in it.
	is_explain=`expr "$i" : ".*explain"`
	if [ $is_explain != 0 ]
	then
		
		# Parse file to find location of Cost value. The row where
		# the word Cost appears varies, and so does the column. 
		# grep and awk are needed to parse out the location of "Cost".

		# Get the row where Cost appears.
		temp_rownum=`grep -n  "${COST_STRING}" $i | awk -F: '{print $1}' `
		rownum=`expr $temp_rownum + 2`

		# Get the column where Cost appears in the row named above.
		# This method of finding the column does not work since it assumes
		# the cost is always the last column.
		#costcol=`grep -n  "${COST_STRING}" $i | awk 'BEGIN {FS = "|"} {print NF - 1}'`
		# Found this monstrous line on www.unix.com on the following forum thread
		# http://www.unix.com/shell-programming-scripting/57079-column-number-awk-help.html
		costcol=`grep -n "${COST_STRING}" $i | nawk '{printf("%s","");a=gsub(/\|/,"",$1);if (a==3){print "(Not Found)";next}print a+1}' FS='Cost'`
		echo "Cost column is "$costcol		

		# This line parses out the numeric cost value using the row and 
		# col variables parsed above.
		temp_cost=`head -$rownum $i | tail -1 | nawk -v col=$costcol -F"|" '{print $col}'`

	  	# Parse out possible CPU% data which will be a value in parentheses, e.g. 20 (2). 	
	 	cost=`echo $temp_cost | awk -F\( '{print $1}'`	
		echo "Cost is "$cost
	
	else
		# This must be a regular query file with timing statement, so 
		# grep for "Elapsed."
		elapsed=`grep Elapsed: $i | awk '{print $2}'`
		echo "Elapsed is "$elapsed
		# Get short name for use in labeling data point in output file.
		shortname=`echo $i | awk -F. '{print $1}'`
		# Also get rows selected.
		rows=`grep "row.*selected" $i | awk '{print $1}'`
		if [ $rows = no ]
		then
			rows=0
		fi
	fi
	
	# Test to see if a pair of files has been processed. This assumes
	# ls is returning files in alphabetic order with a pair like this
	#  file_one.sql.log
	#  file_one_explain.sql.log
	# A counter variable is maintained for purposes of detecting this.
	file_pair_check=`echo $counter/2 | bc -l`
	echo "File pair check variable set to "$file_pair_check
	if [ $counter != 1 ]
	then
		test_val=`echo $file_pair_check | awk -F. '{print substr($2,0,1)}'`
	else
		test_val=5
	fi
	echo "test val: "$test_val
	if [ $test_val = 0 ]
	then
		echo "$trial_time_stamp,$2,$shortname,$cost,$elapsed,$rows" >> zzz_temp
	fi	
	
	# Increment counter
	counter=`expr $counter + 1`
	
done

mv zzz_temp $1
