/*  analysis.c
 * 
 *  R3lib
 * 
 *  Memory utilisation analysis and statistics
 *
 *  Copyright (C) 2006-2007  Michal Linhard <michal@linhard.sk>
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public License
 *  as published by the Free Software Foundation; either version 2.1
 *  of the License, or (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *  
 */
#include <sys/time.h>
#include <stdio.h>
#include "../conf.h" 
#include "../suffixArray.h" 
#include "../r3.h" 
#include "analysis.h" 
#include "test.h" 

/* fill time with current time
 * */
int time_get(struct timeval* time)
{
  struct timezone temp;

  return (gettimeofday(time, &temp)); 
}

/* convert timeval to miliseconds
 * */
UINT time_to_ms(struct timeval* time)
{
	return (UINT) (time->tv_sec*1000 + (time->tv_usec/1000));
}
 
void time_fprint(FILE* log, struct timeval* time)
{
    fprintf(log, "%i.", (int) (time->tv_sec) );
    if(time->tv_usec/1000 < 100)
    {
    	fprintf(log, "0");
    }
    if (time->tv_usec/1000 < 10)
    {
    	fprintf(log, "0");
    }
    fprintf(log, "%i", (int) (time->tv_usec/1000) );
} 

/* compute statistics about r3 trees created from files specified in filenames
 * store result in CSV format to result_filename
 * */
int statistics_from_files(FILE* log, char** filenames, int cnt, char* result_filename)
{
	int i, result;
	FILE* fout;
	t_measurement* msr;
	
	if(!log || !filenames || !result_filename || cnt<0)
		return ERROR_INTEGRITY;
	
	fout = fopen(result_filename, "w");
	if(!fout) {
		fprintf(log, "couldn't open file '%s'.\n", result_filename);
		return ERROR_FILEOPEN;
	}
	
	msr = (t_measurement*) malloc(cnt*sizeof(t_measurement));
	if(!msr)
		return ERROR_MEM;
	
	for(i=0; i<cnt; i++) {
		if(!filenames[i]) {
			fclose(fout);
			free(msr);
			return ERROR_INTEGRITY;
		}
		fprintf(log, "computing statistics for file '%s'...\n", filenames[i]);
		fflush(log);
		result = statistics_from_file(filenames[i], &(msr[i]));
		if(result) {
			fclose(fout);
			free(msr);
			return result;
		}
		fprintf(log, "size: %i time: ", msr[i].n);
		time_fprint(log, &(msr[i].time));
		fprintf(log, " peak: %u mem: %u rpeak: %4.2lf rmem: %4.2lf\n", msr[i].peakmem, msr[i].totalmem, ((double) msr[i].peakmem / (double) msr[i].n), ((double) msr[i].totalmem / (double) msr[i].n) );
		fflush(log);
	}
	
	fprintf(fout, "file;\tsize;\ttime;\tpeak;\tmem;\trpeak;\trmem\n");
	for(i=0; i<cnt; i++) {
		fprintf(fout, filenames[i]);
		fprintf(fout,";\t");
		fprintf(fout, "%i", msr[i].n );
		fprintf(fout,";\t");
		time_fprint(fout, &(msr[i].time));
		fprintf(fout,";\t");
		fprintf(fout, "%u", msr[i].peakmem );
		fprintf(fout,";\t");
		fprintf(fout, "%u", msr[i].totalmem );
		fprintf(fout,";\t");
		fprintf(fout, "%4.2lf", ((double) msr[i].peakmem / (double) msr[i].n) );
		fprintf(fout,";\t");
		fprintf(fout, "%4.2lf", ((double) msr[i].totalmem / (double) msr[i].n) );
		fprintf(fout,"\n");
	}
	
	free(msr);
	fclose(fout);
	return 0;
}

/* compute statistics about r3 tree created for filename
 * fills measurement data structure
 * */
int statistics_from_file(char* filename, t_measurement* msr)
{
	t_r3t r3t;
	int result;
	BYTE* text;
	int textLen;
	struct timeval before;
	struct timeval after;
	
	if(!filename || !msr)
		return ERROR_INTEGRITY;
		
	result = load_data(&text, &textLen, filename);
	if(result)
		return result;

	if(!text)
		return ERROR_INTEGRITY;
	
	msr->n = textLen;
	
	time_get(&before);
	result = r3t_create(&r3t, text, textLen);
	time_get(&after);
	if(result)
		return result;
	
	timersub(&after, &before, &(msr->time));
	msr->peakmem = r3t.peakmem;
	msr->totalmem = r3t.totalmem;

	r3t_destroy(&r3t);
	free(text);
	return 0;
}

/* compute findpairs query statistics about r3 tree created for filename
 * output results to result_filename
 * minlength is minimal length of repeat (used in findPairs queries)
 * */
int statistics_complete_from_file(char* filename, char* result_filename, int minlength)
{
	t_r3t r3t;
	int result,i;
	BYTE* text;
	int textLen, pair_cnt;
	struct timeval before;
	struct timeval after;
	struct timeval diff;
	int* p2;
	int* l;
	double total, rel, minr, maxr;
	FILE* fout;
	UINT temp, maxt, mint;
	int valid_pos, minr_defined;
	
	if(!filename || !result_filename)
		return ERROR_INTEGRITY;

	fout = fopen(result_filename, "w");
	if(!fout) {
		fprintf(stderr, "couldn't open file '%s'.\n", result_filename);
		return ERROR_FILEOPEN;
	}
			
	result = load_data(&text, &textLen, filename);
	if(result)
		return result;

	if(!text)
		return ERROR_INTEGRITY;
	
	time_get(&before);
	result = r3t_create(&r3t, text, textLen);
	time_get(&after);
	if(result)
		return result;
	
	timersub(&after, &before, &diff);
	fprintf(fout, "ANALYSIS RESULTS FOR FILE '%s'\n", filename);
	fprintf(fout, "file size: %i\n", textLen);
	fprintf(fout, "time to construct: ");
	time_fprint(fout, &diff);
	fprintf(fout, "\n");
	fprintf(fout, "memory usage: %u\n",r3t.totalmem);
	fprintf(fout, "peak memory usage: %u\n",r3t.peakmem);
	fprintf(fout, "relative memory usage: %lf\n", ((double) r3t.totalmem / (double) r3t.textLen));
	fprintf(fout, "relative peak memory usage: %lf\n\n", ((double) r3t.peakmem / (double) r3t.textLen));
	fprintf(fout, "findPairs queries (k=%i):\n\n", minlength);
	fprintf(fout, "     pos       cnt      time       rel\n");
	
	p2 = (int*) malloc(r3t.textLen*sizeof(int));
	if(!p2) {
		free(text);
		return ERROR_MEM;
	}

	l = (int*) malloc(r3t.textLen*sizeof(int));
	if(!l) {
		free(p2);
		free(text);
		return ERROR_MEM;
	}
	
	total = 0.0;
	valid_pos = 0;
	maxr = 0.0;
	minr = 0.0;
	minr_defined = 0;
	mint = (UINT) -1;
	maxt = 0;
	
	for(i=0; i<textLen; i++) {
		time_get(&before);
		result = r3t_findPairs(&r3t, i, minlength, &pair_cnt, r3t.textLen, p2, l);
		time_get(&after);
		if(result) {
			free(text);
			free(p2);
			free(l);
			r3t_destroy(&r3t);
			return result;
		}
		timersub(&after, &before, &diff);
		if(pair_cnt) { // generate data only if at least one pair is found
			fprintf(fout, "%8i", i);
			fprintf(fout, "%10u", pair_cnt);
			temp = time_to_ms(&diff);
			if(temp > maxt)
				maxt = temp;
			if(temp < mint)
				mint = temp;
			rel = ((double) temp) / (double) (pair_cnt);
			if(rel > maxr)
				maxr = rel;
			if(!minr_defined) {
				minr = rel;
				minr_defined = 1; }
			else {
				if(rel < minr)
					minr = rel; }
			total += rel;
			fprintf(fout, "%10u", temp);
			fprintf(fout, "%10.2lf\n", rel);
			valid_pos++;
		}
	}
	
	if(!valid_pos)
		valid_pos = 1;
	total = total / (double) valid_pos;

	fprintf(fout, "\n\n\navg rel  = %10.2lf\n", total);
	fprintf(fout, "min rel  = %10.2lf\n", minr);
	fprintf(fout, "max rel  = %10.2lf\n", maxr);
	fprintf(fout, "min time = %10u\n", mint);
	fprintf(fout, "max time = %10u\n", maxt);
	
	r3t_destroy(&r3t);
	free(text);
	return 0;
}






