################################################################## # # SCRIPT FOR PROCESSING LOG FILES CONTAINING ELAPSED TIME # and EXPLAIN PLAN COST. # ################################################################## # This script processes log files created with the make_files.ksh # script. The log files contain either explain statements or # duration of a query. The output file created by this script # contains the timestamp of when this script was run, plus # a user supplied trial name. # # The output file will have the following format: # Trial_Time_Stamp, Trial_Name, Query_Name, Cost, Duration # 06/03/2008 10:54:28,Trial two,bank_admin_4, 778 ,00:00:09.42 # 06/03/2008 10:54:28,Trial two,empl_lookup_1, 214 ,00:00:00.01 # # This comma separated data can be loaded into other tools # such as a spreadsheet for further analysis. # # A practical use would be to track the effect of changes to # database instance configuration or to assess the effect # of an upgrade. The same sql statements can be run repeatedly # and the duration and cost of each query during each # trial can be measured. # # Arguments # 1. Name of data file to create. # 2. Name of trial. This value will go in output log # and is used when analyzing data to distinguish # among different executions of the same file. # String values used in grep statements COST_STRING='| Cost' # Get list of log files to process. files=`ls *.log` # Initialize counter used for detecting when a pair of files # has been processed. counter=1 # Get time stamp value trial_time_stamp=`date '+%m/%d/%Y %H:%M:%S'` # Start header in output file. echo "Trial_Time_Stamp", "Trial_Name", Query_Name, Cost, Duration, Rows > zzz_temp for i in $files do echo "Processing file "$i # Test if file has word explain in it. is_explain=`expr "$i" : ".*explain"` if [ $is_explain != 0 ] then # Parse file to find location of Cost value. The row where # the word Cost appears varies, and so does the column. # grep and awk are needed to parse out the location of "Cost". # Get the row where Cost appears. temp_rownum=`grep -n "${COST_STRING}" $i | awk -F: '{print $1}' ` rownum=`expr $temp_rownum + 2` # Get the column where Cost appears in the row named above. # This method of finding the column does not work since it assumes # the cost is always the last column. #costcol=`grep -n "${COST_STRING}" $i | awk 'BEGIN {FS = "|"} {print NF - 1}'` # Found this monstrous line on www.unix.com on the following forum thread # http://www.unix.com/shell-programming-scripting/57079-column-number-awk-help.html costcol=`grep -n "${COST_STRING}" $i | nawk '{printf("%s","");a=gsub(/\|/,"",$1);if (a==3){print "(Not Found)";next}print a+1}' FS='Cost'` echo "Cost column is "$costcol # This line parses out the numeric cost value using the row and # col variables parsed above. temp_cost=`head -$rownum $i | tail -1 | nawk -v col=$costcol -F"|" '{print $col}'` # Parse out possible CPU% data which will be a value in parentheses, e.g. 20 (2). cost=`echo $temp_cost | awk -F\( '{print $1}'` echo "Cost is "$cost else # This must be a regular query file with timing statement, so # grep for "Elapsed." elapsed=`grep Elapsed: $i | awk '{print $2}'` echo "Elapsed is "$elapsed # Get short name for use in labeling data point in output file. shortname=`echo $i | awk -F. '{print $1}'` # Also get rows selected. rows=`grep "row.*selected" $i | awk '{print $1}'` if [ $rows = no ] then rows=0 fi fi # Test to see if a pair of files has been processed. This assumes # ls is returning files in alphabetic order with a pair like this # file_one.sql.log # file_one_explain.sql.log # A counter variable is maintained for purposes of detecting this. file_pair_check=`echo $counter/2 | bc -l` echo "File pair check variable set to "$file_pair_check if [ $counter != 1 ] then test_val=`echo $file_pair_check | awk -F. '{print substr($2,0,1)}'` else test_val=5 fi echo "test val: "$test_val if [ $test_val = 0 ] then echo "$trial_time_stamp,$2,$shortname,$cost,$elapsed,$rows" >> zzz_temp fi # Increment counter counter=`expr $counter + 1` done mv zzz_temp $1