#include <iostream> 
#include <unordered_map> 
#include <sstream> 
#include <vector>
#include <fstream>
#include <tuple>
#include <algorithm>
using namespace std; 
#define For(i,q,w) for(int i = q; i < w; i++)  
typedef long long int lli;

//Hasovacia tabulka s informacii o pocte vyskytov vzorov v dlhych citaniach
unordered_map<string, tuple<int, int, int, int, int> > lods;
//Maximalna dlzka vzoru ako pocet vrcholov vzoru
int pat_length = 5;

//Vytvorenie vzoru ako jedneho retazca z pola vrcholov so znamienkami
string make_pattern(vector<string> &splited_pattern, int from, int length){
    string pattern = " ";
    For(i, from, from + length){
	    pattern += (splited_pattern[i] + " ");
    }
    return pattern;
}

//Obratenie znamienka na opacne
char rev_sign(char sign){
    if(sign == '+') return '-';
    else return '+';
}

//Vytvorenie opacneho vzoru - opacne poradie vrcholov a opacne znamienko pre kazdy vzor
string make_rev_pattern(vector<string> &splited_pattern, int from, int length){
    string rev_pattern = " ";
    string new_node = "";
    for(int i = from + length - 1; i >= from; i--){
	    new_node = splited_pattern[i];
	    new_node[0] = rev_sign(new_node[0]);
	    rev_pattern += (new_node + " ");
    }
    return rev_pattern;
}

//Pridava vyskyt konkretneho vzoru do hasovacej tabulky
void add_pattern(string &pattern, int is_beg, int is_end, int pat_len){
    int is_mid = 0;
    int is_amb = 0;
    if(is_beg == 0 && is_end == 0) is_mid = 1;
    if(is_beg == 1 && is_end == 1) is_amb = 1;
    if (lods.find(pattern) == lods.end()){
    	lods.insert(make_pair(pattern, make_tuple(1, is_beg, is_end, is_mid, is_amb)));
    }else{
        get<0>(lods[pattern])++;
	    get<1>(lods[pattern]) += is_beg;
    	get<2>(lods[pattern]) += is_end;
	    get<3>(lods[pattern]) += is_mid;
    	get<4>(lods[pattern]) += is_amb;
    }
    return;
}

//Vracia info, ci sa vzor v dlhych citaniach nasiel
pair<lli, tuple<int, lli, string> > in_lods(string &pattern, int line_num, int pat_place, int pat_len){
    if (lods.find(pattern) == lods.end()) {
        return make_pair(0, make_tuple(line_num, pat_place, pattern));
    }else{
        return make_pair(get<0>(lods[pattern]), make_tuple(line_num, pat_place, pattern));
    }
}

//Zistuje, ci sa skrateny vzor nachadza v tabulke pre dlhe citania
tuple<int, int, int, int> early_lods(string &pattern){
     if (lods.find(pattern) == lods.end()) {
          return make_tuple(0,0,0,0); 
     }else{
         return make_tuple(get<1>(lods[pattern]), get<2>(lods[pattern]), get<3>(lods[pattern]), get<4>(lods[pattern]));
     }
}

//Tato funkcia testuje skratene vzory o jedno sprava a zlava a vracia informaciu, kolkokrat sa tieto vzory nachadzaju v strede dlheho citania
// alebo na kraji citania
pair<int, int> inspect_short_pattern(vector<string> &splited_line, int i, int pat_len, int lods_count){
    int ear_end = 0;
    int mid_end = 0;	
    if(pat_len == 1){
	    return make_pair(0, 0);
    }
    tuple<int, int, int, int> tmp_short;
    string left_pat = make_pattern(splited_line, i, pat_len - 1);
    tmp_short =  early_lods(left_pat);
    ear_end += get<1>(tmp_short);
    mid_end += get<0>(tmp_short) + get<2>(tmp_short) - get<3>(tmp_short);

    string left_rev_pat = make_rev_pattern(splited_line, i, pat_len - 1);
    tmp_short =  early_lods(left_rev_pat);
    ear_end += get<0>(tmp_short);
    mid_end += get<1>(tmp_short) + get<2>(tmp_short) - get<3>(tmp_short);
    
    string right_pat = make_pattern(splited_line, i + 1, pat_len - 1);
    tmp_short =  early_lods(right_pat);
    ear_end += get<0>(tmp_short);
    mid_end += get<1>(tmp_short) + get<2>(tmp_short) - get<3>(tmp_short);
    
    string right_rev_pat = make_rev_pattern(splited_line, i + 1, pat_len - 1);
    tmp_short =  early_lods(right_rev_pat);
    ear_end += get<1>(tmp_short);
    mid_end += get<0>(tmp_short) + get<2>(tmp_short) - get<3>(tmp_short);
    
    mid_end -= lods_count;
    return make_pair(ear_end, mid_end); 
}

//Zistuje ci je dana chyba minimalna, alebo sa uz nejaky podsled tejto chyby v tabulke nachadza
bool is_new_pattern(vector<string> &splited_line, int i, int pat_len, string pat, int lods_count, int ear_end, int mid_end, unordered_map<string, tuple<int, int, int> > &problem_pattern){
    bool present = false;
    string tmp_pat = "";
    int end_pos = i + pat_len;
    string tmp_rev_pat = "";
    For(j, i, i + pat_len){
	    For(k, 1, pat_len + 1){
	        if(j + k - 1 >= end_pos){
	    	    break;
	        }
	        tmp_pat = make_pattern(splited_line, j, k);
	        tmp_rev_pat = make_rev_pattern(splited_line, j, k);
	        if((problem_pattern.find(tmp_pat) != problem_pattern.end() || problem_pattern.find(tmp_rev_pat) != problem_pattern.end()) && pat != tmp_pat && pat != tmp_rev_pat){
       	        return false;
	        }
	    }
    }
    problem_pattern.insert(make_pair(pat, make_tuple(lods_count, ear_end, mid_end)));
    return true;
}

//Meni vzor na format pouzity v bed formate - vrcholy oddelene ciarkou
string pattern_to_bed(string &pattern){
    For(i, 1, pattern.length() -1){
        if(pattern[i] == ' ') pattern[i] = ',';
    }
    return pattern;
}

//Vytvara z jedneho zarovnania vzory a testuje na chyby
void line_to_patterns(ofstream &problem_places_f, string &line, int line_num, unordered_map<string, tuple<int, int, int> > &problem_pattern, int limit){
    vector<string> splited_line;
    string node = "$";
    stringstream ss;
    ss << line;
    ss >> node;
    while(ss){
        splited_line.push_back(node);
        ss >> node;
    }
    string tmp_pat;
    string tmp_pat_rev;
    string min_pat;
    lli lods_count;
    int is_beg;
    int is_end;
    pair<lli,tuple<int, lli, string> > result;
    pair<lli,tuple<int, lli, string> > result_rev;
    int splt_len = splited_line.size();
    pair<int, int> short_stat;
    //Vytvara vsetky mozne vzory
    For(pat_len, 1, pat_length + 1){
        For(i, 0, splt_len - pat_len + 1){
	        is_beg = 0;
    	    is_end = 0;
            tmp_pat = make_pattern(splited_line, i, pat_len);
    	    tmp_pat_rev = make_rev_pattern(splited_line, i, pat_len);

    	    result = in_lods(tmp_pat, line_num, i, pat_len);
	        result_rev = in_lods(tmp_pat_rev, line_num, i, pat_len);
	        min_pat = min(tmp_pat, tmp_pat_rev);
		
    	    lods_count = result.first + result_rev.first;
            //Ziskame info o skratenych vzoroch
    	    short_stat = inspect_short_pattern(splited_line, i, pat_len, lods_count);
    	    //V pripade ze je vzor chybovy, otestuj ci je minimalny a ak ano, pridaj do mnoziny minimalnych vzorov
            if(lods_count <= limit){
	    	    if(!is_new_pattern(splited_line, i, pat_len, min(tmp_pat,tmp_pat_rev), lods_count, short_stat.first, short_stat.second, problem_pattern))
	        	    continue;
		        problem_places_f<<get<0>(result.second)<<"\t"<<lods_count<<"\t"<<get<1>(result.second)<<"\t"<<pat_len<<"\t"<<pattern_to_bed(min_pat)<<"\n";
	        }
	    }
    }
}

//Funkcia spracuje jedno citanie a zavola funkciu add_pattern, ktora prislusne upravy hasovaciu tabulku
void parse_long_reads(string &line){
    vector<string> splited_line;
    string node = "$";
    stringstream ss;
    ss << line;
    ss >> node;
    while(ss){
        splited_line.push_back(node);
        ss >> node;
    }
    
    string tmp_pat;
    string tmp_pat_rev;
    string min_pat;
    lli lods_count;
    int is_beg;
    int is_end;
    pair<lli,tuple<int, lli, string> > result;
    pair<lli,tuple<int, lli, string> > result_rev;
    int splt_len = splited_line.size();
    pair<int, int> short_stat;
    //Vytvorenie vsetkych moznych vzorov dlheho citania
    For(pat_len, 1, pat_length + 1){
        For(i, 0, splt_len - pat_len + 1){
	        is_beg = 0;
    	    is_end = 0;
            tmp_pat = make_pattern(splited_line, i, pat_len);
	        tmp_pat_rev = make_rev_pattern(splited_line, i, pat_len); 
	        if(i == 0) is_beg = 1;
	        if((i + pat_len) == splt_len) is_end = 1;
            //pridanie vyskytu vzoru do hasovacej tabulky
	        add_pattern(tmp_pat, is_beg, is_end, pat_len);
	    }
    }
}

//Hlada chyby v sekvencii a vytvara vystupne subory s informaciami o chybach
void parse_assembly(string assembly_path, string output_path, string assembly_name, int limit){
    //Hasovacia tabulka s minimalnymi vzormi
    unordered_map<string, tuple<int, int, int> > problem_pattern;
    
    ofstream problem_places_f;
    problem_places_f.open(output_path + assembly_name + "_cpp_l" + to_string(limit));
    ofstream problem_file;
    problem_file.open(output_path + assembly_name + "_cpp_prob_l"+ to_string(limit));
    
    fstream assembly_f(assembly_path, fstream::in);

    string line = "";
    string name = "";
    bool is_name = true;
    int counter = 0;
    //Po jednotlivych zarovnaniach spracuva sekvenciu
    while(getline(assembly_f, line)){
        if(!line.empty()){
	        if(is_name){
		        name = line;
		        is_name = false;
		        continue;
	        }
            //Vytvori zo zarovnania vzory a hlada chyby
	        line_to_patterns(problem_places_f, line, counter, problem_pattern, limit);
	        is_name = true;
	        counter++;
	    }	
    }

    unordered_map<string, tuple<int, int, int> >:: iterator itr; 
    for (itr = problem_pattern.begin(); itr != problem_pattern.end(); itr++) 
    { 
        problem_file << itr->first << "\t" <<get<0>(itr->second)<<"\t"<<get<1>(itr->second)<<"\t"<<get<2>(itr->second)<< endl; 
    }

    problem_places_f.close();
    problem_file.close();
}


//Argumenty pre spustenie programu: subor s dlhymi citaniami, subor so skumanou sekvenciou, meno skumanej sekvencie, limit pre chybovost
int main(int argc, char** argv){	
    if(argc != 6){
	    cout<<"CHYBNY POCET ARGUMENTOV\n";
	    return 0;
    }
    //subor s dlhymi citaniami moze nahradit lubovolny subor, ktory chceme vyuzit na kontrolu pokrytia vzorov z assembly
    // (napr referencia pre hladanie referencnych chyb)	
    string long_path = argv[1];
    fstream nanopore_f(long_path, fstream::in);
    string line; 
    int line_count = 0;
    bool is_name = true;
    //Spracovanie dlhych citani/referencie po jednotlivych citaniach/kontigoch
    while(getline(nanopore_f, line)){
	    if(!line.empty()){
	        if(is_name){
		        is_name = false;
		        continue;
	        }
	    line_count++;
	    parse_long_reads(line);
	    is_name = true;
	    }
    }
    string assembly_path = argv[2];
    string output_path = argv[3];
    string assembly_name = argv[4];
    int limit = atoi(argv[5]);
    //Hladanie chyb v sekvencii
    parse_assembly(assembly_path, output_path, assembly_name, limit);
}
