#include "hinter.h"

Hinter::Hinter()
{
	minIntervals = -1;
}

int Hinter::SetGTF(char* file)
{
	gtf = fopen(file, "r");
    if (gtf == NULL) {
        return -1;
    }
	chromosome = "__empty__";
	return 0;
}

int Hinter::SetOutput(char* file)
{
	output = fopen(file, "w");
    if (output == NULL) {
        return -1;
    }
	return 0;
}

int Hinter::SetResp(char* file)
{
	if (file != NULL) {
		resp = fopen(file, "r");
		if (resp == NULL) {
			return -1;
		}
		char ch;
		fscanf(resp, "%c", &ch);	
		if (ch != '>') {
			printf ("bad :-(\n");
			return -1;
		} 
	} else resp = NULL;
	return 0;
}

bool compareHints (const SHint &i, const SHint &j) 
{ 
	if (i.start == j.start) {
		if (i.label == 'b') return true;
	}
	return (i.start<j.start); 
}

int Hinter::LoadGtf()
{
	if (gtf == NULL) return -1;

	char chrom[100];
	char type[100];
	char empty[100];
	char strand;
	char frame;
	char transcript[100];

	int v;
	
	SHint hint;
	fpos_t position;

	int count = 0;

	if (feof(gtf)) return -1;
	
	while (true) {
		count++;
		if (count % 1000 == 0) {
			printf("line %d done\n", count);
		}
		char *point;
		if (count != 1 || !read) point = fgets(buffer, 1000, gtf);
		read = true;
		int ret = sscanf(buffer, "%s %s %s %d %d %d %c %c %s %s %s %s \n", 
			chrom, empty, type, &hint.start, &hint.stop, &v, 
			&strand, &frame, empty, hint.gene_ID, empty, transcript);
		if (ret != 12 || point == NULL) {
			//printf("end of file %d\n", ret); 
			read = false;
			break;
		}

		// correct numbering from 0
		hint.start -= 1;
		hint.stop -= 1;
		
		// chromosome end
		if (chromosome == "__empty__" || count == 1) {
			chromosome = chrom;
			printf("Starting chromosome: %s\n", chrom);
		} else if (strcmp(chrom, chromosome.c_str()) != 0) {
			break;
		}
		
		// transcript
		string trans = transcript;

		// frame
		if (frame == '.') hint.frame = -1;
		else {
			if (frame >= '0' && frame <= '2') hint.frame = frame - '0';
			else printf("WTF frame = %c\n", frame);
		}

		// strand
		if (strand == '+') hint.plus_strand = true;
		else {
			if (strand == '-') hint.plus_strand = false;
			else  printf("WTF strand = %c\n", strand);
		}

		// type
		if (strcmp(type, "CDS") == 0)				{ hint.label = 'c'; }
		else if (strcmp(type, "intron") == 0)		{ hint.label = 'i'; }
		else if (strcmp(type, "start_codon") == 0)	{ hint.label = 'b'; }
		else if (strcmp(type, "stop_codon") == 0)	{ hint.label = 'e'; }
		else if (strcmp(type, "intergenic") == 0)	{ hint.label = 'x'; }
		else printf("WTF type = %s\n", type);

		// dont_use -> use always
		hint.dont_use = false;

		// add to map
		if (hints.count(trans)==0) {
			hints.insert(make_pair(trans, vector<SHint>()));
		} 
		hints[trans].push_back(hint);
	}

	//sort according to beginnings
	map<string, vector<SHint> >::iterator it;
	for (it=hints.begin(); it!=hints.end(); it++) {
		sort(it->second.begin(), it->second.end(), compareHints);
	}

	return 0;
}

int Hinter::LoadGood(bool set_chr) 
{
	if (resp == NULL) return -1;
	if (feof(resp)) return -1;

	char chrom[100];

	int ret = fscanf(resp, "%s\n", chrom);
	annotation.clear();

	while (true) {
		int ch;
		ch = fgetc(resp);
		if (ch == '\n') continue;
		if (ch == '>' || ch == EOF) break;
		if (stateToHint.count(ch) > 0) {
			annotation.push_back(stateToHint[ch]);
		} else {
			printf ("unknown: %c\n");
		}
	}

	printf ("Good annot. loaded:  %s\n", chrom);
	if (set_chr) chromosome = chrom;
	return 0;
}

int Hinter::ConvertToComplex(char* src, char* dst, bool add_int, char* resp_hints)
{
	// set files
	int ret = SetGTF(src);
	if (ret != 0) {
		return -1;
	}
	ret = SetOutput(dst);
	if (ret != 0) {
		return -2;
	}
	ret = SetResp(resp_hints);
	if (ret != 0) {
		return -3;
	}

	int count = 0;
	int count_bad = 0;
	int count_good = 0;
	read = false;

	// statistics:
	vector<int> arr(50, 0);
	int max_arr = 0;

	// do all!
	while (LoadGtf() == 0) {
		LoadGood();

		count ++;
		
		// Modify + count bad hints
		count_bad += Modify(resp_hints); 

		// convert
		map<string, vector<SHint> >::iterator it;
		int hint_no = 0;
		fprintf(output, ">%s\n", chromosome.c_str());
		for (it=hints.begin(); it!=hints.end(); it++) {
			// single hint
				// count intervals
			int j = 0;
			for (unsigned int i=0; i<it->second.size(); i++) {
				if (it->second[i].dont_use) continue;
				j++;
			}

			if (j == 0) continue;

				// print it
			fprintf(output, "%3d bonus1 %2d - %7d", hint_no++, j, it->second[0].start);
			int stop = -1; 
			for (unsigned int i=0; i<it->second.size(); i++) {
				// ignore bad ones
				if (it->second[i].dont_use) continue;
				// print it
				char label = it->second[i].label;
				if (label != 'x' && !it->second[i].plus_strand) label = toupper(label);
				if (stop != -1 && stop != it->second[i].start) {
					printf("BAD!! start %d  stop %d\n", it->second[i].start, stop);
				}
				stop = it->second[i].stop+1;
				fprintf(output, " %c %7d", label, stop);
			}

			fprintf(output, "\n");
			count_good++;

			arr[j]++;
			if (j>13) printf("%d %s %d\n", j, chromosome.c_str(), hint_no);
			if (j > max_arr) max_arr = j;

			// add intervals maybe
			if (!add_int || j==1) continue;
			for (unsigned int i=0; i<it->second.size(); i++) {
				// ignore bad ones
				if (it->second[i].dont_use) continue;
				// print it
				char label = it->second[i].label;
				if (label != 'x' && !it->second[i].plus_strand) label = toupper(label);

				// every interval as one hint
				fprintf(output, "%3d bonus2 1 - %7d %c %7d\n", hint_no++, it->second[i].start, label, it->second[i].stop+1);
			}
		}

		// delete space
		hints.clear();
		printf("Good hints: %d\nBad  hints: %d\n\n", count_good, count_bad);
	}

	// statistics
	for (int i=1; i<=max_arr; i++) {
		printf("%d %d\n", i, arr[i]);
	}

	// close files
	if (gtf != NULL) fclose(gtf);
	if (output != NULL) fclose(output);

	return 0;
}

bool Hinter::RespHint(SHint hint)
{
	if (hint.label == 'b' || hint.label == 'e') return true;
	if (hint.stop >= annotation.size()) return false;

	char label = hint.label;
	if (label != 'x' && !hint.plus_strand) label = toupper(label);

	for(int i=hint.start; i<=hint.stop; i++) {
		if (annotation[i] != label) return false;
	}
	return true;
}

int Hinter::ConvertToIntervals(char* src, char* dst)
{
	// set files
	int ret = SetGTF(src);
	if (ret != 0) {
		return -1;
	}
	ret = SetOutput(dst);
	if (ret != 0) {
		return -2;
	}

	int count = 0;
	read = false;

	// do all!
	while (LoadGtf() == 0) {
		count ++;

		// modify
		Modify();

		// convert
		map<string, vector<SHint> >::iterator it;
		int hint_no = 0;
		fprintf(output, ">%s\n", chromosome.c_str());
		for (it=hints.begin(); it!=hints.end(); it++) {
			// single hint
			for (unsigned int i=0; i<it->second.size(); i++) {
				// ignore bad ones
				if (it->second[i].dont_use) continue;
				// print it
				char label = it->second[i].label;
				if (label != 'x' && !it->second[i].plus_strand) label = toupper(label);

				// every interval as one hint
				fprintf(output, "%3d bonus2 1 - %7d %c %7d\n", hint_no++, it->second[i].start, label, it->second[i].stop+1);
			}
		}

		// delete space
		hints.clear();

	}

	// close files
	if (gtf != NULL) fclose(gtf);
	if (output != NULL) fclose(output);

	return 0;
}

int Hinter::ConvertToPoints(char* src, char* dst) 
{
	// set files
	int ret = SetGTF(src);
	if (ret != 0) {
		return -1;
	}
	ret = SetOutput(dst);
	if (ret != 0) {
		return -2;
	}

	int count = 0;
	read = false;

	// do all!
	while (LoadGtf() == 0) {
		count ++;

		// modify
		Modify();

		// convert
		map<string, vector<SHint> >::iterator it;
		int hint_no = 0;
		fprintf(output, ">%s\n", chromosome.c_str());
		for (it=hints.begin(); it!=hints.end(); it++) {
			// single hint
			for (unsigned int i=0; i<it->second.size(); i++) {
				// ignore bad ones
				if (it->second[i].dont_use) continue;
				// print it
				char label = it->second[i].label;
				if (label != 'x' && !it->second[i].plus_strand) label = toupper(label);

				// every point as one hint
				for (int j=it->second[i].start; j<=it->second[i].stop; j++) {
					fprintf(output, "%3d bonus2 0 - %7d %c\n", hint_no++, j, label);
				}
			}
		}

		// delete space
		hints.clear();

	}

	// close files
	if (gtf != NULL) fclose(gtf);
	if (output != NULL) fclose(output);

	return 0;
}

static int k = -1000;
int Rand()
{
	//k+=1000;
	//return k;
	return RAND_MAX*rand() + rand();
}

int Hinter::CreateFake(char* src, char* dst_com, char* dst_int, int length, double coverage)
{
	// random
	srand ( time(NULL) );

	// set files
	int ret = SetResp(src);
	if (ret != 0) {
		return -1;
	}
	FILE *out_com, *out_int;
	bool complex = false, interval = false;
	if (dst_com != NULL) {
		out_com = fopen(dst_com, "w");
		complex = true;
		if (out_com == NULL) {
			return -1;
		}
	}

	if (dst_int != NULL) {
		out_int = fopen(dst_int, "w");
		interval = true;
		if (out_int == NULL) {
			return -1;
		}
	}

	// statistics:
	vector<int> arr(500, 0);
	int max_arr = 0;
	
	while (LoadGood(true) == 0) {
		k = -1000;
		if (complex) fprintf(out_com, ">%s\n", chromosome.c_str());
		if (interval) fprintf(out_int, ">%s\n", chromosome.c_str());
		

		int hint_com = 0;
		int hint_int = 0;
		for (int i=0; i<coverage*annotation.size()/length; i++) {
			int r = Rand() % (annotation.size()-length);
			
			char last_char = '?';
			if (complex) {
				int count = 0;

				// count num of intervals
				for (int j=r; j<r+length; j++) {
					char label = annotation[j];
					if (label != last_char) count++;
					last_char = label;
				}

				arr[count]++;
				//if (count>13) printf("%d %s %d\n", j, chromosome.c_str(), hint_no);
				if (count > max_arr) max_arr = count;

				last_char = '?';
			
				fprintf(out_com, "%4d bonus1 %2d - ", hint_com++, count);

				// add inform. about hint
				for (int j=r; j<r+length; j++) {
					char label = annotation[j];
					if (label != last_char) {
						fprintf(out_com, "%7d %c ", j, label);
						last_char = label;
					}
				}

				fprintf(out_com, "%7d\n", r+length);

			} 
			last_char = '?';
			if (interval) {
				// add inform. about hint
				for (int j=r; j<r+length; j++) {
					char label = annotation[j];
					if (label != last_char) {
						if (last_char != '?') fprintf(out_int, "%7d\n", j);
						fprintf(out_int, "%4d bonus1 1 - %7d %c ", hint_int++, j, label);
						last_char = label;
					}
				}
				fprintf(out_int, "%7d\n", r+length);
			}//if
		}//for
	}//while

	// statistics
	for (int i=1; i<=max_arr; i++) {
		printf("%d %d\n", i, arr[i]);
	}

	if (complex) fclose(out_com);
	if (interval) fclose(out_int);
	return 0;
}


int Hinter::LoadLabels(char* file)
{
	FILE* input;
    input = fopen(file, "r");
    if (input==NULL) {
        return -1;
    }
	int ret = 1;

	while (!feof(input)) {
		char hint;
		char state;
		ret = fscanf(input, "%c - ", &hint);
		if (ret != 1) break;
		if (hintToStateLabel.find(hint) == hintToStateLabel.end()) {
			hintToStateLabel.insert(make_pair(hint, vector<char>()));
		}
		while (true) {
			state = fgetc(input);
			if (state == '\n' || state == EOF) break;
			if (state == ' ') continue;
			hintToStateLabel[hint].push_back(state);
			stateToHint.insert(make_pair(state, hint));
		}
	}

	fclose(input);
    return ret-1;
}

void Hinter::SetIntervalRest(int k)
{
	minIntervals = k;
}

int Hinter::Modify(char* resp_hints)
{
	int count_bad = 0;
	// modify
	map<string, vector<SHint> >::iterator it;
	for (it=hints.begin(); it!=hints.end(); it++) {
		// single hint
		for (unsigned int i=0; i<it->second.size(); i++) {
			// fix stop_codon
			if (it->second[i].label == 'e') {
				switch (it->second[i].plus_strand) {
					case true: // small letters
						if (i-1 >= 0) it->second[i-1].stop += 3;
						break;
					case false: // big letters
						if (i+1 < it->second.size()) it->second[i+1].start -= 3;
				}
			}
			// invalid start/stop codons
			if (it->second[i].label == 'e' || it->second[i].label == 'b') {
				it->second[i].dont_use = true;
			}

			// if they disagree with resp, get them out
			if (resp_hints != NULL) {
				if (!RespHint(it->second[i])) {
					count_bad++;
					// throw out the hint...
					for (unsigned int j=0; j<it->second.size(); j++) {
						it->second[j].dont_use = true; 
					}
					break;
				}
			}
		}
		// shorten exons - OMIT_EXON
		if (it->second[0].label == 'c') {
			it->second[0].start += OMIT_EXON;
			if (it->second[0].start > it->second[0].stop) {
				it->second[0].dont_use = true;
			}
		}

		if (it->second[it->second.size() -1].label == 'c') {
			it->second[it->second.size() -1].stop -= OMIT_EXON;
			if (it->second[it->second.size() -1].start > it->second[it->second.size() -1].stop) {
				it->second[it->second.size() -1].dont_use = true;
			}
		}

		// count good intervals
		int num_int = 0;
		for (unsigned int i=0; i<it->second.size(); i++) {
			if (it->second[i].dont_use) continue;
			num_int++;
		}
			// delete if less than minimum intervals
		if ((minIntervals != -1) && (num_int < minIntervals) && (num_int > 0)) {
			// throw out the hint...
			for (unsigned int j=0; j<it->second.size(); j++) {
				it->second[j].dont_use = true; 
			}
		} 
	}

	return count_bad;
}