/*  test_r3.c
 * 
 *  R3lib
 * 
 *  Testing routines for R3 structure 
 *
 *  Copyright (C) 2006-2007  Michal Linhard <michal@linhard.sk>
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public License
 *  as published by the Free Software Foundation; either version 2.1
 *  of the License, or (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *  
 */
#include <string.h>
#include "../conf.h"
#include "../suffixArray.h"
#include "../r3.h" 
#include "debug.h" 
#include "test.h"
#include "test_r3.h" 
 
/* test1
 * tests information contained in bp-tables
 * tests integrity of each union tree of root node
 * */
int testr3t_test1(FILE* log, t_r3t* r3t)
{
	int result, v, i, j, k, i2;
	FLAGUINT* bptab;
	FLAGUINT* uf;
	
	if(!log || !r3t)
		return ERROR_INTEGRITY;
	if(!r3t->text || !r3t->bp || !r3t->lcp)
		return ERROR_INTEGRITY;
		
	bptab = r3t->bptables;
	uf = r3t->uf;
	
	if(r3t->size <= 0 || 
		r3t->size > r3t->textLen || 
		r3t->textLen <= 0 ||
		r3t->textLen > FLAGUINT_MAX || 
		r3t->bptables_size < 0 ||
		r3t->bptables_size > r3t->textLen*3 ||
		r3t->uf_size < 0 ||
		r3t->uf_size > (r3t->textLen-1)*3 )
		return ERROR_INTEGRITY;
		
	// test union trees referenced from root
	v = r3t->size-1;
	if(r3t->bp[v] != UNDEF) {
		if(!bptab || !uf)
			return ERROR_INTEGRITY;
			
		i = r3t->bp[v]-1;
		do {
			i++;
			if(i < 0 || i >= r3t->bptables_size)
				return ERROR_INTEGRITY;
			result = testr3t_test_ut(log, uf, bptab[i].value, r3t->text, r3t->textLen);
			if(result)
				return result;
		} while(bptab[i].flag2 != 1);
	}
	
	for(v = 0; v<r3t->size; v++)
	{
		if(r3t->lcp[v].value >= r3t->textLen)
			return ERROR_INTEGRITY;
			
		if(r3t->lcp[v].flag1 && r3t->bp[v] != UNDEF) { // zero position present
			i2 = r3t->bp[v]-1;
			do {
				i2++;
				if(i2 < 0 || i2 >= r3t->bptables_size)
					return ERROR_INTEGRITY;
				result = testr3t_combine(log, r3t, bptab[i2].value, 0, r3t->lcp[v].value);
				if(result)	
					return result;
			} while(bptab[i2].flag2 != 1);
		}

		if(r3t->bp[v] != UNDEF) {
			i = r3t->bp[v]-1;
			do {
				i++;
				if(i < 0 || i >= r3t->bptables_size)
					return ERROR_INTEGRITY;
					
				if(bptab[i].flag1) { 
					j = bptab[i].value;
					if(j < 1 || j >= r3t->uf_size)
						return ERROR_INTEGRITY;
					if(!uf[j].flag2) {
						fprintf(log, "error: navigator item expected: node %i, bp %i, bptab item %i, uf item %i\n", v, r3t->bp[v], i, j);
						return ERROR_TEST;
					}
					if(uf[j-1].flag2) // j is u-navigator item, bptab[i].flag1==1 therefore j-1 is b-navigator node
						j--;
					for(k=j-1; k>=uf[j].value; k--) {
						if(uf[k].flag2) {
							fprintf(log, "error: value node expected: node %i, bp %i, bptab item %i, uf item %i, ut root %i\n", v, r3t->bp[v], i, k, j);
							return ERROR_TEST;
						}
						if(r3t->lcp[v].flag2 && !(r3t->lcp[v].flag1)) {
							result = test_maximal_repeat(log, r3t->text, r3t->textLen, uf[k].value, 0, r3t->lcp[v].value);
								if(result)	
									return result;
						}
						
						i2 = r3t->bp[v]-1;
						do {
							i2++;
							if((bptab[i2].flag1 && i2>i) || !(bptab[i2].flag1)) {
								result = testr3t_combine(log, r3t, bptab[i2].value, uf[k].value, r3t->lcp[v].value);
								if(result)	
									return result;
							}
						} while(bptab[i2].flag2 != 1);
						
					}
				}
			} while(bptab[i].flag2 != 1);
		}
	}	
	
	return 0;
} 
 
/* test up-tables
 * */
int testr3t_test2(FILE* log, t_r3t* r3t)   
{
	int j, k, l, result, v;
	FLAGUINT bptab[SIGMA_SIZE];
	FLAGUINT* uf;
	FLAGUINT* parent1;
	FLAGUINT* lcp;
	int* parent2;
	UINT lc1;
	int unode1, unode2;
	
	int par1, par2;
	
	if(!log || !r3t)
		return ERROR_INTEGRITY;
	if(!r3t->text || !r3t->bp || !r3t->lcp || !r3t->up1 || !r3t->up2 || !r3t->lc1)
		return ERROR_INTEGRITY;
		
	uf = r3t->uf;
	parent1 = r3t->up1;
	parent2 = r3t->up2;
	lcp = r3t->lcp;
	
	if(r3t->size <= 0 || 
		r3t->size > r3t->textLen || 
		r3t->textLen <= 0 ||
		r3t->textLen > FLAGUINT_MAX || 
		r3t->bptables_size < 0 ||
		r3t->bptables_size > r3t->textLen*3 ||
		r3t->uf_size < 0 ||
		r3t->uf_size > (r3t->textLen-1)*3 )
		return ERROR_INTEGRITY;
				
	if(r3t->bp[r3t->size-1] != UNDEF && (!r3t->uf || !r3t->bptables))
		return ERROR_INTEGRITY;
		
	for(v=0; v<r3t->size; v++)
	{
		par1 = r3t->up1[v].value; 
		if(par1 != UNDEF) {  
			if(par1 <= v || par1 >= r3t->size) {
				fprintf(log, "parent error\n");
				return ERROR_TEST;
			}

			result = r3th_loadBpTable(r3t, bptab, v);
			if(result)
				return result;
				
			// node par1 has to contain at least one more position with lc1 than node i
			if(parent1[v].flag1) {// lc1 == LC_CENT
				if(lcp[v].flag2) { // bp(i, LC_CENT) != emptyset
					fprintf(log, "parent error\n");
					return ERROR_TEST;
				} else { // bp(i, LC_CENT) == emptyset
					if(!lcp[par1].flag2) {
						fprintf(log, "parent error\n");
						return ERROR_TEST;
					}
				}

				par2 = r3t->up2[v]; 
				if(par2 != UNDEF) {  
					if(par2 <= v || par2 >= r3t->size) {
						fprintf(log, "parent error\n");
						return ERROR_TEST;
					}
	
					if(r3t->bp[par2] == UNDEF) {
						fprintf(log, "parent error\n");
						return ERROR_TEST;
					}
					
					result = r3th_findBpEntry(r3t, bptab, par2, &unode1);
					if(result)
						return result;
						
					if(unode1 == UNDEF) {
						fprintf(log, "parent error\n");
						return ERROR_TEST;
					} else {
						if(unode1 < 1 || unode1 >= r3t->uf_size)
							return ERROR_TEST;
					}
				}
			}
			else {// lc1 == r3t->lc1[v];
				lc1 = r3t->lc1[v];
				unode1 = bptab[lc1].value;
				
				if(r3t->bp[par1] == UNDEF) {
					fprintf(log, "parent error\n");
					return ERROR_TEST;
				}
				
				j = r3t->bp[par1] -1;
				unode2 = UNDEF;
				do {
					j++;
					if(j < 0 || j >= r3t->bptables_size)
						return ERROR_INTEGRITY;
					k = r3t->bptables[j].value; // index of an item in union forest
					if(k < 0 || k >= r3t->uf_size || !r3t->uf[k].flag2)
						return ERROR_INTEGRITY;
					l = r3t->uf[k].value;
					if(l < 0 || l >= k || r3t->uf[l].flag2)
						return ERROR_INTEGRITY;
					if(lc1 == r3t->text[r3t->uf[l].value-1])
						unode2 =k;
				} while(!r3t->bptables[j].flag2);
				
				if(unode2 == UNDEF) {
					fprintf(log, "parent error\n");
					return ERROR_TEST;
				}
	
				if(unode1 != UNDEF) { // unode2 set has to be superset of unode1 set
					if(unode2 <= unode1 || uf[unode2].value > uf[unode1].value) {	
						fprintf(log, "parent error\n");
						return ERROR_TEST;
					}
				} 
	
				par2 = r3t->up2[v]; 
				if(par2 != UNDEF) {  
					if(par2 <= v || par2 >= r3t->size) {
						fprintf(log, "parent error\n");
						return ERROR_TEST;
					}

//					if((r3t->lcp[par2].flag2 && !r3t->lcp[v].flag2) ||
//					   (r3t->lcp[par2].flag1) ) // this condition holds if bp(par2, cent) is superset of bp(v, cent)
					
					if( (!r3t->lcp[par2].flag2 || r3t->lcp[v].flag2) && !r3t->lcp[par2].flag1 ) { 
						result = r3th_findBpEntryDifferentFromLc(r3t, bptab, par2, lc1, &unode1);
						if(result)
							return result;
							
						if(unode1 == UNDEF ) {
							fprintf(log, "parent error\n");
							return ERROR_TEST;
						}
					}
				}
			}
		}
	}	
	return 0;
}
 
/* tests if r3 tree contains given repeats
 * */
int testr3t_contains_repeats(FILE* log, t_r3t* r3t, t_repeat* repeats, int cnt)
{
	int i, result;
	int* store_p2;
	int* store_l;
	if(!log || !repeats || !r3t)
		return ERROR_INTEGRITY;

	// location for findPairs query results	
	store_l = (int*) malloc( r3t->textLen*sizeof(int));
	if(!store_l)
		return ERROR_MEM;	
	store_p2 = (int*) malloc( r3t->textLen*sizeof(int));
	if(!store_p2)
		return ERROR_MEM;	

	result = 0;
	for(i=0; i<cnt; i++)
		if( testr3t_contains_repeat(log, r3t, repeats[i].p1, repeats[i].p2, repeats[i].l, store_p2, store_l) )
			result = ERROR_TEST;
		
	return result;
}


/* tests if r3 tree contains given repeat (p1, p2, l)...
 * store_p2, store_l is location to store results from findpairs
 * */
int testr3t_contains_repeat(FILE* log, t_r3t* r3t, int p1, int p2, int l, int* store_p2, int* store_l)
{
	int i, count, result;

	if(!log || !r3t || !store_p2 || !store_l)
		return ERROR_INTEGRITY;
		
	result = test_maximal_repeat(log, r3t->text, r3t->textLen, p1, p2, l);
	if(result)
	{
		fprintf(log, "test_maximal_repeat returned %i\n", result);
		return result;
	}
	result = r3t_findPairs(r3t, p1, l, &count, r3t->textLen, store_p2, store_l);		
	if(result)
	{
		fprintf(log, "findPairs returned %i\n", result);
		return result;
	}
	
	for(i=0; i<count; i++)
		if( (store_l[i] == l) && (store_p2[i] == p2) )
			return 0;

	fprintf(log, "findPairs didn't find repeat (%i, %i, %i) in r3 tree\n", p1, p2, l);
	return ERROR_TEST;
}

/* 
 * create R3 tree for given file
 * */
int testr3t_create_from_file(char* filename, t_r3t* r3t)
{
	BYTE* text;
	int textLen;
	int result;
	if(!filename || !r3t)
		return ERROR_INTEGRITY;

	result = load_data(&text, &textLen, filename);
	if(result)	{
		return result; 
	}

	result = r3t_create(r3t, text, textLen);
	if(result) {
		free(text);
		r3t_destroy(r3t);
		return result;
	}	

	return 0;
}
	

/* testr3t_files 
 * test r3t_create on files specified by filenames
 * output results to log file
 * */
int testr3t_files(FILE* log, char** filenames, int cnt)
{
	int i, result_or, result;
	if(!log || !filenames || (cnt < 0))
		return ERROR_INTEGRITY;
		
	result_or = 0;
	for(i=0; i<cnt; i++)
	{
		if(!(filenames[i]))
			return ERROR_INTEGRITY;
		result = testr3t_file(log, filenames[i]);
		fflush(log);
		if(result)
			result_or = ERROR_TEST;
	}
	
	return result_or;
}

/* testr3t_file 
 * test r3t_create on a file
 * output results to log file
 * */
int testr3t_file(FILE* log, char* filename)
{
	BYTE* text;
	int textLen;
	t_r3t r3t;
	int result;
	char* repfilename;
	t_repeat* repeats;
	int repcnt;
	
	if(!log)
		return ERROR_INTEGRITY;

	fprintf(log, "testing input: '%s' ... ", filename);
	result = load_data(&text, &textLen, filename);
	if(result) {
		fprintf(log, "\nerror loading file.\n");
		return result; }
	else
		fprintf(log, "size: %u\n", textLen);

    result = r3t_create(&r3t, text, textLen); 
    if(result) { 	
		fprintf(log, "r3t_create returned error: %i\n", result);
		free(text);
    	return result; 
    } else {

#ifdef DEBUG_PRINTPHASE
		printf("testing ...\n");
		fflush(stdout);
#endif
		result = testr3t_test1(log, &r3t); 
	}
	
	if(result) {
		fprintf(log, "testr3t_test1 returned error: %i\n", result);
		r3t_destroy(&r3t);
		free(text);
		return result; 	
    } else {
		result = testr3t_test2(log, &r3t); 
	}
	if(result) {
		fprintf(log, "testr3t_test2 returned error: %i\n", result);
		r3t_destroy(&r3t);
		free(text);
		return result; 	
    } else { 
		
		repfilename = (char*) malloc( strlen(filename) + strlen(".repeats") + 1);
		if(!repfilename)
			return ERROR_MEM;
		strcpy(repfilename, filename);
		strcat(repfilename, ".repeats");
		result = load_repeats(&repeats, &repcnt, repfilename);
		if(!result) {
			result = testr3t_contains_repeats(log, &r3t, repeats, repcnt);
			if(result) {
				fprintf(log, "testr3t_contains_repeats returned error: %i\n", result);
				r3t_destroy(&r3t);
				free(text);
				return result;
			} else {
				fprintf(log, "O.K.\n");	
			} 	
		} else {
			// couldn't open .repeat file
			// do not test r3 tree for repeats
			fprintf(log, "couldn't open file '%s' ...\n", repfilename);
			fprintf(log, "O.K.\n");	
		}
	}

	r3t_destroy(&r3t);
	free(text);
	return 0;
}

/* test union tree integrity
 * tests union tree in uf with root u
 * */
int testr3t_test_ut(FILE* log, FLAGUINT* uf, int u, BYTE* text, int textLen)
{
	int i, fd, stack_size, top, pos1, error, pos;
	
	int* stack;
	if(!uf || !text || !log)
		return ERROR_INTEGRITY;
	if(!(uf[u].flag2)) {
		fprintf(log, "uf error: root has to be navigator node!\n");
		return ERROR_TEST;
	}
	fd = uf[u].value; 
	if(fd >= u) {
		fprintf(log, "uf error: uf[u].value >= u\n");
		return ERROR_TEST;
	}

	stack_size = (((u - fd)*2)/3) +1; // max number of navigator nodes in this tree
	stack = (int*) malloc(stack_size * sizeof(int));
	stack[0] = u;
	top = 0;
	pos1 = -1;
	error = 0;
	for(i=u-1; i >= fd; i--) {
		if(uf[i].flag2 == 1) { // navigator node
			if(uf[i].value < uf[stack[top]].value) {
				fprintf(log, "uf error: uf[%i].value=%i < uf[%i].value=%i\n", i, uf[i].value, stack[top], uf[stack[top]].value);
				error = 1;
			}
			if(uf[i].value >= i) {
				fprintf(log, "uf error: uf[i=%i].value=%i >= i\n", i, uf[i].value);
				error = 1;
			}
			top++;
			stack[top]=i;	
		} else { // value node
			pos = uf[i].value;
			if(pos < 0 || pos >= textLen) {
				fprintf(log, "uf error: position out of range: %i (textLen=%i)\n", pos, textLen);
				error = 1;
			}
			if(pos1 == -1)
				pos1 = pos;
			else
				if(text[pos1-1] != text[pos-1]) {
					fprintf(log, "uf error: union tree contains positions with different lc: LC(%i) == %i != LC(%i) == %i\n", pos1, text[pos1-1], pos, text[pos-1] );
					error = 1;
				}
		}
		while(top && (i == stack[top]))
			top--;
	}
	
	if(error)
		return ERROR_TEST;
	else
		return 0;
}

/* called only from testr3t_test1
 * r3t data has to be sane
 * */
int testr3t_combine(FILE* log, t_r3t *r3t, int u, int pos, int len)
{
	int i, result;

	if(u < 0 || u >= r3t->uf_size || 
	r3t->uf[u].value < 0 ||
	r3t->uf[u].value >= u ||
	!r3t->uf[u].flag2 )
		return ERROR_INTEGRITY;
		
	
	for(i=r3t->uf[u].value; i<u; i++) {
		if(!r3t->uf[i].flag2) {
			result = test_maximal_repeat(log, r3t->text, r3t->textLen, pos, r3t->uf[i].value, len);
			if(result)
				return result;
		} 
	}
	
	return 0;
}




/* test_maximal_repeat
 *
 * test if (p1, p2, l) is maximal repeat in text 
 *  
 * */
int test_maximal_repeat(FILE* log, BYTE* text, int textLen, int p1, int p2, int l)
{
	UINT p1last = p1+l;
	UINT p1j = p1;
	UINT p2j = p2;
	
	if(textLen > FLAGUINT_MAX)
		return ERROR_INTEGRITY;
	
	if(!log)
	{
		fprintf(stderr, "text_maximal_repeat: log == NULL \n");
		return ERROR_INTEGRITY;
	}
	if(!text)
	{
		fprintf(log, "text_maximal_repeat: text == NULL \n");
		return ERROR_INTEGRITY;
	}
	if(p1last > textLen)
	{
		fprintf(log, "text_maximal_repeat: incorrect repeat (%4i, %4i, %4i): (p1+l) > textLen \n", p1, p2, l);
		return ERROR_TEST;
	}
	if(p2+l > textLen)
	{
		fprintf(log, "text_maximal_repeat: incorrect repeat (%4i, %4i, %4i): (p2+l) > textLen \n", p1, p2, l);
		return ERROR_TEST;
	}
	
	if(p1 && p2 && (text[p1-1] == text[p2-1]))
	{
		fprintf(log, "text_maximal_repeat: repeat (%4i, %4i, %4i) is not left maximal: \n", p1, p2, l);
		return ERROR_TEST;
	}

	while(p1j < p1last)
	{
		if(text[p1j] != text[p2j])
		{
			fprintf(log, "text_maximal_repeat: repeat (%4i, %4i, %4i) doesn't match at position %i \n", p1, p2, l, p1j-p1 );
			return ERROR_TEST;
		}
		
		p1j++;
		p2j++;	
	}
	
	textLen--;
	
	if((p1j < textLen) && (p2j < textLen) && (text[p1j] == text[p2j]))
	{
		fprintf(log, "text_maximal_repeat: repeat (%4i, %4i, %4i) is not right maximal: \n", p1, p2, l);
		return ERROR_TEST;
	}	
	
	return 0;
}

