import json
import re
import math
import os
import sys

global graph
graph = {}

arguments = sys.argv
limit = int(arguments[7])
# VSTUPNE SUBORY
# graf
in_graph = open(arguments[1])
in_cpp_final = open(arguments[2])
in_cpp_problem = open(arguments[3])
#rwp_bp zarovnania skumanej sekvencie
in_rwp_bp = open(arguments[4])

# VYSTUPNE SUBORY
# problemove vzory so statistikami
output_problem = open((arguments[5] + arguments[6] + '_problem_l' + str(limit) + '.txt'), "w+")
# chybne oblasti v txt formate, pre jednoduche otvaranie (napr. cez gmail viewer)
final_output = open(arguments[5] + arguments[6] + "_final_l" + str(limit) + '.txt', "+w")
# chybne oblasti v bed formate
final_output_bed = open(arguments[5] + arguments[6] + "_final_l" + str(limit) + '.bed', "+w")
# stlpce final_output suborov su:
# contig_name - meno kontigu, starting_bp_pos(including) - zaciatok useku, ending_bp_pos(not including) - koniec useku, node_pattern - vzor, lods_count - pocet dlhych citani, ktore ho pokryva

#spracovanie informacii o dlzke vrcholov grafu
def parse_graph():
    global graph
    for line in in_graph:
        if line[0] == 'S':
            node_id = re.split("\t", line)[1]
            node_len = (re.split("\t", line)[3])[5:]
            graph['+' + node_id] = node_len
            graph['-' + node_id] = node_len

#Vyratanie dlzky vzoru v bazach
def count_pattern_bplenght(pattern):
    splited_pattern = re.split(" ", pattern)
    bp_lenght = 0
    if(len(splited_pattern) - 2 <= 2):
        return 0
    for node in splited_pattern[1:(len(splited_pattern) - 1)]:
        bp_lenght += int(graph[str(node)])
    return bp_lenght

#Uprava cpp vystupu pre chybne miesta
def finish_bed():
    counter = 0
    json_line = re.split('\t',in_rwp_bp.readline())
    for line in in_cpp_final:
        splited_line = re.split('\t', line)
        contig = int(splited_line[0])
        while counter != int(splited_line[0]):
            json_line = re.split('\t',in_rwp_bp.readline())
            counter += 1
        final_output.write(json_line[0] + '\t')
        final_output_bed.write(json_line[0] + '\t')
        node_beg = int(splited_line[2])
        node_end = node_beg + int(splited_line[3]) - 1
        
        bp_beg = (json_line[1 + 3*node_beg + 1])
        bp_end = (json_line[1 + 3*node_end + 2])
      
        final_output.write(bp_beg + '\t' + bp_end + '\t')
        final_output_bed.write(bp_beg + '\t' + bp_end + '\t')
        
        final_output.write(splited_line[4][:-1] + '\t')
        final_output_bed.write(splited_line[4][:-1] + '\t')
        
        final_output.write(splited_line[1] + '\n')
        final_output_bed.write(splited_line[1] + '\n')

# uprava cpp vystupu pre chybne vzory
def finish_problem_pattern():
    for line in in_cpp_problem:
        splited_line = re.split('\t', line)
        output_problem.write(line[:-1] + '\t')
        pat_len = len(re.split(' ', splited_line[0])) - 2
        output_problem.write(str(pat_len) + '\t')
        pat_bp_len = count_pattern_bplenght(splited_line[0])
        output_problem.write(str(pat_bp_len) + '\n')

parse_graph()
finish_bed()
finish_problem_pattern()
