/*  suffixArray.c
 * 
 *  R3lib
 * 
 *  Suffix array tools based on Ko-Aluru algorithm
 *  contains modification of original implementation from Pang Ko
 * 
 *  Copyright (C) 2006-2007  Michal Linhard <michal@linhard.sk>
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public License
 *  as published by the Free Software Foundation; either version 2.1
 *  of the License, or (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 * 
 */ 
#include <string.h>
#include "conf.h"
#include "suffixArray.h"  
#include "bitstr.h"
#include "test/debug.h" 

#ifdef DEBUG1
	int depth; 
#endif

/* createBWTTable 
 * create Burrows-Wheeler Transform of given text, using suffixArray
 * 
 * BYTE* bwt has to be allocated for textLen bytes
 * 
 * */
void createBWTTable(BYTE* text, int textLen, int* sa, BYTE* bwt)
{
	int i;
	
	for(i=0;i<textLen;i++)
	{
		if(sa[i])
			bwt[i] = text[sa[i]-1];
		else
			bwt[i] = 0;	
	}
}


/*
 * createLCPTable_2
 * 
 * creates lcp table for given suffix array, uses 4n extra memory
 * 
 * input:  text of size textLen and valid suffix array sa over text
 * output: lcp table for sa
 * return:  0 - ok
 *         -1 - error, lcp == NULL
 * */

int createLCPTable_2(BYTE* text, int textLen, int* sa, int** lcp)
{
	int i,j,k,h;
	int textLen1 = textLen-1;
	int* lcp1;
	int* rank = (int*) malloc(textLen*sizeof(int));
	if(rank == NULL)
		return ERROR_MEM;
	lcp1 = (int*) malloc(textLen*sizeof(int));
	if(!lcp1)
	{
		free(rank); rank = NULL;
		return ERROR_MEM;
	}

	for(i=0;i<textLen;i++)
		rank[sa[i]] = i;

	h=0;	
	for(i=0;i<textLen;i++)
	{
		k = rank[i];
		if(k==0)
			lcp1[k] = -1;
		else
		{
			j = sa[k-1];
			while((i+h<textLen1) && (j+h<textLen1) && (text[i+h]==text[j+h]))
				h++;
			lcp1[k] = h;
		}
		if(h>0)
			h--;
	}	

	free(rank); rank = NULL;
	*lcp = lcp1;	
	return 0;
}

/*
 * createLCPTable_3
 * 
 * compute lcp table using only 4n of output memory. lcp information will be available like 
 * this: LCP(i) == lcp[sa[i]]
 * input:  text of size textLen and valid suffix array sa over text
 * output: lcp table for sa
 * return:  0 - ok
 *         -1 - error, lcp == NULL
 * */

int createLCPTable_3(BYTE* text, int textLen, int* sa, int** lcp)
{
	int i,j,k,h;
	int textLen1 = textLen-1;
	int* lcp1 = (int*) malloc(textLen*sizeof(int));
	if(!lcp1)
		return ERROR_MEM;

	for(i=0;i<textLen;i++) // fill lcp with rank info:
		lcp1[sa[i]] = i;
		
	h=0;	
	for(i=0;i<textLen;i++)
	{
		k = lcp1[i]; // get rank of i-th suffix
		if(k==0)
			lcp1[i] = -1; 
		else
		{
			j = sa[k-1];
			while((i+h<textLen1) && (j+h<textLen1) && (text[i+h]==text[j+h]))
				h++;
			lcp1[i] = h;
		}
		if(h>0)
			h--;
	}	
	*lcp = lcp1;
	return 0;
}

/*
 * createLCPTable
 * 
 * compute lcp table using only 4n of output memory. 
 * uses implemetation trick by Giovanni Manzini
 * input:  text of size textLen and valid suffix array sa over text
 * output: lcp table for sa
 * return:  0 - ok
 *         -1 - mem allocation error, lcp == NULL
 * */

int createLCPTable(BYTE* text, int textLen, int* sa, int** lcp)
{
	
	int i,j,k,h, nextk;
	int textLen1 = textLen-1;
	int count[SIGMA_SIZE];
	BYTE c;
	
	UINT* lcp1 = (UINT*) malloc(textLen*sizeof(UINT));
	if(!lcp1)
		return ERROR_MEM;

	memset((void*) &count, 0, sizeof(int)*SIGMA_SIZE);
	
	// count character occurences
	for(i=0;i<textLen1;i++)
		count[text[i]]++;
	// create buckets with respective sizes	
	h = count[0];
	count[0] = 1;
	for(i=1;i<SIGMA_SIZE;i++)
	{
		j = count[i];
		count[i] = count[i-1] + h;
		h = j;
	}
	// count[text[textLen-1]] == count[$] == 1	
	
  	// sa2ranknext
	lcp1[0]=0;
	for(i=0;i<textLen;i++) 
	{
		if(sa[i]==0)
			k = i;
		else
		{
			c = text[sa[i]-1];
			j = count[c]++;
			lcp1[j]=i;
		}
	}
	
	h=0;	
	for(i=0;i<textLen;i++)
	{
		nextk = lcp1[k];
		if(k==0)
			lcp1[k] = -1; 
		else
		{
			j = sa[k-1];
			while((i+h<textLen1) && (j+h<textLen1) && (text[i+h]==text[j+h]))
				h++;
			lcp1[k] = h;
		}
		if(h>0)
			h--;
		k = nextk;
	}	
	*lcp = lcp1;
	return 0;
}

/* createSuffixArrayMemReqs
 * returns total memory requirements of createSuffixArray function in bytes
 * including output location, should be around 8.28*textLen
 * */
UINT createSuffixArrayMemReqs(int textLen)
{
	UINT mem_n16_l;
	UINT mem_n32_l;
	UINT mem_n8_l;
	UINT mem_2n_l;
	UINT mem_4n_l;

	UINT rmaxlen =  (UINT) (textLen+1)/2; // maximum n of recursive call, max value of numS or numL
	UINT r2maxlen = (UINT) (rmaxlen+1)/2; // maximum n of recursive call, depth 2
	
    mem_n8_l = textLen / 8; 
	if (textLen % 8 != 0)   
		mem_n8_l++;

    mem_n16_l = rmaxlen / 8; 
    if ( rmaxlen % 8 != 0)   
		mem_n16_l++;
		
	mem_n32_l = r2maxlen / 8;
    if ( r2maxlen % 8 != 0)   
		mem_n32_l++;

	mem_2n_l = rmaxlen*sizeof(int);
	mem_4n_l = (UINT) textLen*sizeof(int);

	// none of the locations can have length 0
	if(!mem_n8_l) mem_n8_l = 1;
	if(!mem_n16_l) mem_n16_l = 1;
	if(!mem_n32_l) mem_n32_l = 1;
	if(!mem_2n_l) mem_2n_l = 1;
	if(!mem_4n_l) mem_4n_l = 1;

    return 4*mem_2n_l + mem_n8_l + 2*mem_n16_l + mem_n32_l;
}

/* createSuffixArray
 * 
 * creates suffix array for given text, text may contain arbitrary binary data
 * 
 * input:     text - pointer to data
 *            textLen - lenght of input, textLen >= 1
 *            textLen-1 bytes of data are to be processed as text, last byte
 *            text[textLen-1] may have arbitrary value and is placeholder for special character $            
 * output:    suffixArray - pointer to suffix array, initialised in the end of computation
 * 			  suffixArray[0] = textLen-1 because $ is smaller than all values 0..255 of byte
 *            return value - 0           - ok
 *                           ERROR_MEM   - out of memory, suffixArray = NULL
 *                           ERROR_OTHER - other error, suffixArray = NULL
 * mem usage: uses max 9.28n Bytes of memory including input and output location
 *            output space is reused during computation
 * */

int createSuffixArray(BYTE* text, int textLen, int** suffixArray)
{
	int i, numS, numL, maxDistance;
	int* arrayA;
	int* arrayB;
	int* tPrime;
	t_bitstr suffixType;
	t_bitstr BuckB;
	
	BYTE* mem;  // memory used by algorithm dependent on input length 
	BYTE* mem2; // 

	// memory locations used by algorithm
	BYTE* mem_n8_1;    // suffixType
	BYTE* mem_2n_1;    // ArrayB
	BYTE* mem_2n_2;    // tPrime 
	BYTE* mem_2n_3;    // mem_skipVal
	BYTE* mem_2n_4;    // mem_tempBucket
	BYTE* mem_n16_1;   // BuckB + recursive data(n16+n16+n32)

	UINT mem_n16_l;
	UINT mem_n32_l;
	UINT mem_n8_l;
	UINT mem_2n_l;
	UINT mem_4n_l;
	UINT mem_r2n_l; 
	
	UINT rmaxlen, r2maxlen;
	
	int result;

	if(textLen == 0) // texLen >= 1
		return ERROR_BADARG;

	if((!text)||(!suffixArray)) // argument check
		return ERROR_BADARG;
		
	if(textLen > FLAGUINT_MAX) // do not try to process too large input
		return ERROR_BADARG;

//  ----- Memory allocation --------------------------------------	

	// compute lengths of allocation segments
	rmaxlen = (textLen+1)/2; // maximum n of recursive call, max value of numS or numL
	r2maxlen = (rmaxlen+1)/2; // maximum n of recursive call, depth 2
	
	mem_r2n_l = r2maxlen * sizeof(int); // max mem_2n_l in recursive call
	
    mem_n8_l = textLen / 8; 
	if (textLen % 8 != 0)   
		mem_n8_l++;
	
    mem_n16_l = rmaxlen / 8; 
    if ( rmaxlen % 8 != 0)   
		mem_n16_l++;
		
	mem_n32_l = r2maxlen / 8;
    if ( r2maxlen % 8 != 0)   
		mem_n32_l++;

	//mem_2n_l = rmaxlen*sizeof(int);
	// it may be that 2*mem_r2n_l > mem_2n_l
	mem_2n_l = 2*mem_r2n_l; 
	mem_4n_l = textLen*sizeof(int);
	
		// none of the locations can have length 0
	if(!mem_n8_l) mem_n8_l = 1;
	if(!mem_n16_l) mem_n16_l = 1;
	if(!mem_n32_l) mem_n32_l = 1;
#ifdef DEBUG2
	if((mem_2n_l < 4) || (mem_4n_l < 4))
	{
		fprintf(stderr, "integrity error: (mem_2n_l < 4) || (mem_4n_l < 4)\n");
		return ERROR_INTEGRITY;
	}
#endif	

    mem = (BYTE*) malloc( mem_2n_l + mem_n8_l );
    if(mem == NULL)
    {
    	*suffixArray = NULL;
    	return ERROR_MEM;
    }

    mem2 = (BYTE*) malloc( 3*mem_2n_l + 2*mem_n16_l + mem_n32_l ); 
    if(mem2 == NULL)
    {
    	free((void*)mem); mem = NULL;
    	*suffixArray = NULL;
    	return ERROR_MEM;
    }
    
    mem_n8_1  = mem;
    mem_2n_1  = mem_n8_1 + mem_n8_l;
    
    mem_2n_2  = mem2;
    mem_2n_3  = mem_2n_2 + mem_2n_l;
    mem_2n_4  = mem_2n_3 + mem_2n_l;
    mem_n16_1 = mem_2n_4 + mem_2n_l;
    // mem_n16_1 has size 2*mem_n16_l+mem_n32_l
    
//    last_writable_byte = (UINT) (mem_n16_1 + 2*mem_n16_l+mem_n32_l -1);

#ifdef DEBUG2
//	test_writability(mem_n8_1, mem_n8_l);
//	test_writability(mem_2n_1, mem_2n_l);
//	test_writability(mem_2n_2, mem_2n_l);
//	test_writability(mem_2n_3, mem_2n_l);
//	test_writability(mem_2n_4, mem_2n_l);
//	test_writability(mem_n16_1, 2*mem_n16_l+mem_n32_l);
//	test_writability(mem_2n_3, 2*mem_2n_l + 2*mem_n16_l + mem_n32_l);
#endif
        
//  ----- Beginning ----------------------------------------------	

    bs_init(&suffixType, mem_n8_1, textLen);
	arrayB = (int*) mem_2n_1;
	arrayA = (int*) mem_2n_3; // uses also memory after mem_2n_4
	tPrime = (int*) mem_2n_2;
	
	suffix_type(text, textLen, &numS, &numL, &suffixType); 
	
	if (bs_getVal(&suffixType,textLen - 1) == 1)
	{
		//Less type S suffixes
#ifdef DEBUG2
//		fprintf(stderr, "(cS)");
#endif
		bs_init(&BuckB, mem_n16_1, numS);
			           
		s_distanceR(&suffixType, textLen, &maxDistance);
		
		//assign arrayB[0] to the index of '$'.
		//and for array[j], j >= 1, if inputString[i] is type S,
		//set array[j] = i, and j++. 
		   
		int j = 0;
		for (i = 0; i < textLen; i++)
		{
			if(j > numS)
			{
				fprintf(stderr, "error: (j > numL\n");
				return ERROR_INTEGRITY;
			}
			arrayB[j] = i;
			j = j + bs_getVal(&suffixType,i);
		}
		      
		sort_substrings(text, textLen, arrayB, &BuckB, numS, &suffixType, maxDistance, 1,
		                mem_2n_3, mem_2n_4);
		
		if (bs_isAllTrue(&BuckB))
		{
			arrayA = (int*) realloc(mem2, mem_4n_l);
			mem2 = (BYTE*) arrayA;
			if(!arrayA)
			{
				*suffixArray = NULL;
				free((void*) mem); mem = NULL;
				return ERROR_MEM;
			}
			construct_SA_typeS_B(arrayB, numS, text, textLen, &suffixType, arrayA);
			free((void*) mem); mem = NULL;
			*suffixArray = arrayA;
			return 0;
		}
		    
		construct_TPrime_typeS(arrayB, numS, &BuckB, tPrime, textLen, &suffixType, mem_2n_3); 

#ifdef DEBUG1
		output_intarray(tPrime, numS, "output/tPrime.txt");
#endif
		  
		result = LinearSuffixSort(tPrime, numS, arrayB, mem_2n_3);
		if(result)
		{
			free((void*) mem); mem = NULL;
			free((void*) mem2); mem2 = NULL;
			*suffixArray = NULL;
			return result;
		}
		    
		reconstruct_B_typeS(arrayB, numS, &suffixType, textLen, mem_2n_3);

		arrayA = (int*) realloc(mem2, mem_4n_l);
		mem2 = (BYTE*) arrayA;
		if(!arrayA)
		{
			free((void*) mem); mem = NULL;
			*suffixArray = NULL;
			return ERROR_MEM;
		}
		construct_SA_typeS_B(arrayB, numS, text, textLen, &suffixType, arrayA);

		free((void*) mem); mem = NULL;
		*suffixArray = arrayA;
		return 0;
	}
	else
	{
		//Less type L suffixes
#ifdef DEBUG2
//		fprintf(stderr, "(cL)");
#endif		
		bs_init(&BuckB, mem_n16_1, numL);
		   
		l_distanceR(&suffixType, textLen, &maxDistance);
		
		//assign arrayB[0] to the index of '$'.
		//and for array[j], j >= 1, if inputString[i] is type S,
		//set array[j] = i, and j++.  
		  	       
		int j = 0;
		for (i = 0; i < textLen; i++)
		{
			if(j > numL)
			{
				fprintf(stderr, "error: (j > numL\n");
				return ERROR_INTEGRITY;
			}
			arrayB[j] = i;
			j = j - (bs_getVal(&suffixType,i) - 1);
		}
		
#ifdef DEBUG1
		output_sa_to_file2(text, textLen, numL, arrayB, "output/arrayB0.txt");
#endif

		sort_substrings(text, textLen, arrayB, &BuckB, numL, &suffixType, maxDistance, 0,
						mem_2n_3, mem_2n_4);

#ifdef DEBUG1
		output_sa_to_file2(text, textLen, numL, arrayB, "output/arrayB1.txt");
#endif
		
		if (bs_isAllTrue(&BuckB))
		{
			arrayA = (int*) realloc(mem2, mem_4n_l);
			mem2 = (BYTE*) arrayA;
			if(!arrayA)
			{
				free((void*) mem); mem = NULL;
				*suffixArray = NULL;
				return ERROR_MEM;
			}
			construct_SA_typeL_B(arrayB, numL, text, textLen, &suffixType, arrayA);

			free((void*) mem); mem = NULL;
			*suffixArray = arrayA;
			return 0;
		}
		    
		construct_TPrime_typeL(arrayB, numL, &BuckB, tPrime, textLen, &suffixType, mem_2n_3); 
		 
#ifdef DEBUG1
		output_intarray(tPrime, numL, "output/tPrime.txt");
#endif
		 
#ifdef DEBUG1
		depth = 1;
#endif
		result = LinearSuffixSort(tPrime, numL, arrayB, mem_2n_3);
		if(result)
		{
			free((void*) mem); mem = NULL;
			free((void*) mem2); mem2 = NULL;
			*suffixArray = NULL;
			return result;
		}

#ifdef DEBUG1
		output_sa_to_file3(tPrime, numL, arrayB, "output/tPrime_sa.txt");
#endif

		    
		reconstruct_B_typeL(arrayB, numL, &suffixType, textLen, mem_2n_3);

		arrayA = (int*) realloc(mem2, mem_4n_l);
		mem2 = (BYTE*) arrayA;
		if(!arrayA)
		{
			free((void*) mem); mem = NULL;
			*suffixArray = NULL;
			return ERROR_MEM;
		}
		construct_SA_typeL_B(arrayB, numL, text, textLen, &suffixType, arrayA);

		free((void*)mem); mem = NULL;
		*suffixArray = arrayA;
		return 0;
	}
}

/* sort_substrings
 *
 * sorts type S or type L substrings
 *
 * additional mem req: 4n
 * mem_skipVal: ALength * sizeof(int)
 * mem_tempBucket: ALength * sizeof(int)
 *   
* */
 
void sort_substrings( BYTE* 				text, 
                      int 					textLen,
                      int* 					A, 
                      t_bitstr*		 		BuckA,
                      const int				ALength, 
		       		  t_bitstr*			 	suffixType, 
		       		  const int 			maxDist,
		       		  int		 			type,
		       		  BYTE* 				mem_skipVal,
		       		  BYTE*					mem_tempBucket)
{
	int i, prevCount, temp, tmpIndex, offset, special_offset;
	int start, end, prevPos;
	int buffer[bufferLen];
	int* skipVal;
	int* tempBucket;
	int specialPos = textLen-1;
	
	type=~(-(type&1));
	// type 0 -> 0xffffffff
	// type 1 -> 0x00000000

	bs_setAll(BuckA, 0);
	
	skipVal = (int*) mem_skipVal;
	tempBucket = (int*) mem_tempBucket;

	memset ((void*)skipVal, 0, sizeof(int) * ALength);
 
	skipVal[0] = ALength;

	for (offset = 0; offset <= maxDist; offset++)
	{
	    start = 0;
	    prevPos = 0;
	
	    while (start < ALength)
	    {
			// skip to another nonnegative bucket
			prevPos = start;
			while (skipVal[start] < 0 && start < ALength)
			{
				start = -skipVal[start];
			}
			end = skipVal[start] - 1;
			skipVal[prevPos] = -start;
			memset((void*)buffer, 0, sizeof(int) * bufferLen);
	
			//Sort the bucket if start <= ALength - 1. 
			if (start < ALength)
			{
				// start of bucket space may be shifted depending on
				// whether we need space for special character $
				special_offset = 0;  
				
				for (i = start; i <= end; i++)
				{
			  		//copy the element into temporary bucket.
			  		tempBucket[i] = A[i];
		
			  		//count the occurence.
			  		tmpIndex = A[i] + offset;
			  		// TODO: optimize
			  		if(tmpIndex == specialPos) // text[specialPos] == $
			  		{
			  			special_offset++; // we need space before normal bucket space for $
			  		}
			  		else
			  		{	
				  		temp = ((int) text[tmpIndex]) << 1;
				  		temp = temp + bs_getVal(suffixType,tmpIndex);
				  		buffer[temp]++;
			  		}
				}
		
				//calculate the new starting value of each bucket.
				prevCount = buffer[0];
				buffer[0] = start+special_offset;
			
				for (i = 1; i < bufferLen; i++)
				{
			  		temp = buffer[i];
			  		buffer[i] = buffer[i - 1] + prevCount;
			  		prevCount = temp;
				}
		
				//put the elements in there correct positions.
				//must read from the temporary bucket, because array
				//A is not reliable.
		
				for (i = start; i <= end; i++)
				{
			  		tmpIndex = tempBucket[i] + offset;
			  		if(tmpIndex == specialPos) // text[specialPos] == $
			  		{
			  			A[start] = tempBucket[i]; // insert special character to the front
			  			skipVal[start] = -(start+1); // and set skipVal and
				    	bs_setVal(BuckA,start,1);    // BuckA boundary flag for it's bucket
			  		}
			  		else
			  		{	
				  		temp = ((int) text[tmpIndex]) << 1;
				  		temp = temp + bs_getVal(suffixType,tmpIndex);
			       	  	A[buffer[temp]] = tempBucket[i];
				  		buffer[temp]++;
			  		}
				}
		       		
		       	// invariant:
		       	// buffer[i] for 0 <= i < 511 points to first item of bucket i+1
		       	// start+special_offset points to first item of bucket 0	
		       	// buffer[511] == end+1
		       	
				if (offset > 0)
				{
					// note that if special_offset ==1 then skipVal[start] has been set before to -(start+1)
					
					// set skipVal and BuckA for bucket 0
			    	if (buffer[0] == start+special_offset + 1)
			    	{ //skip bucket next time
			      		bs_setVal(BuckA,buffer[0] - 1, 1);
			      		skipVal[start+special_offset] = - buffer[0];  
			    	} 
			    	else if (buffer[0] > start+special_offset + 1)
			    	{ //skip depending on type of index
			    		bs_setVal(BuckA,buffer[0] - 1, 1);
				    	temp = (-(i & 1)) ^ type;
			    		temp = (buffer[0] ^ temp) - temp;
			    		skipVal[start+special_offset] = temp; 
			    	} 


				  	//for all other buckets if its not empty, its value is greater
					//than the previous bucket. 
					for (i = 1; i < bufferLen; i++)
					{
						//if the bucket only have 1 element, then skipVal is negative
				    	if (buffer[i] == buffer[i-1] + 1)
				    	{
				      		bs_setVal(BuckA,buffer[i] - 1, 1);
				      		skipVal[buffer[i-1]] = - buffer[i]; 
				    	} 
				    	else if (buffer[i] > buffer[i-1] + 1)
				    	{
				    		bs_setVal(BuckA,buffer[i] - 1, 1);
					    	temp = (-(i & 1)) ^ type;
				    		temp = (buffer[i] ^ temp) - temp;
				    		skipVal[buffer[i-1]] = temp; 
				    	} 
					}
				}
				else
				{
					// set skipVal and BuckA for bucket 0
			    	if (buffer[0] == start+special_offset + 1)
			    	{ //skip bucket next time
			      		bs_setVal(BuckA,buffer[0] - 1, 1);
			      		skipVal[start+special_offset] = - buffer[0];  
			    	} 
			    	else if (buffer[0] > start+special_offset + 1)
			    	{ //don't skip bucket
			    		bs_setVal(BuckA,buffer[0] - 1, 1);
			    		skipVal[start+special_offset] = buffer[0]; 
			    	} 			
			    	
					//for all other buckets if its not empty, its value is greater
					//than the previous bucket. 
					for (i = 1; i < bufferLen; i++)
					{
						//if the bucket only have 1 element, then skipVal is negative
						if (buffer[i] == buffer[i-1] + 1)
						{
							bs_setVal(BuckA,buffer[i] - 1, 1);
							skipVal[buffer[i-1]] = - buffer[i]; 
						} 
						else if (buffer[i] > buffer[i-1] + 1)
						{
							bs_setVal(BuckA,buffer[i] - 1, 1);
							skipVal[buffer[i-1]] = buffer[i]; 
						}
					}
				}
				
				//set the start to the point to the next bucket.
				
				start = end + 1;
			}
		}
  	}
}

/* suffix_type
 * 
 * computes suffixType mask for given text
 * 
 * input:     text - pointer to data
 *            textLen - lenght of input
 * output:    suffixType - boolean mask of suffix types 
 *                         0 - type L, 1 - type S
 *            numStype, numLtype - number of respective suffix types	                                                        
 * */
 
void suffix_type(	BYTE* 		text, 
					int 		textLen, 
		 			int* 		numStype, 
		 			int* 		numLtype, 
		 			t_bitstr* 	suffixType)
{
	int i, j, k;
	
	j = 0;
	(*numStype) = 0;
	(*numLtype) = 0;
	
	bs_setAll(suffixType, 1);
	
	for (i = 0; i < textLen - 2; i++)
	{  
		if (text[i] < text[i+1])
		{
			for (k = i - j; k <= i; k++)
			{
				(*numStype)++;
				bs_setVal(suffixType, k, 1);
			}
			j = 0;
		}
		else if (text[i] > text[i+1])
		{
			for (k = i - j; k <= i; k++)
			{
				(*numLtype)++;
				bs_setVal(suffixType, k, 0);
			}
			j = 0;
		}
		else // text[i] == text[i+1]
		{
	    	j++; // number of undecided suffixes
		}
	}
    // i = textLen - 2
	// text[i] > text[i+1] holds because text[i+1] == $
	// where $ < c for all values of BYTE c
	
	for (k = i - j; k <= i; k++)
	{
		(*numLtype)++;
		bs_setVal(suffixType, k, 0);
	}

	if ((*numStype) < (*numLtype))
	{
		bs_setVal(suffixType, textLen - 1, 1);
		(*numStype)++;
	}
	else
	{
		bs_setVal(suffixType, textLen - 1, 0);
		(*numLtype)++;
	}
}

/* construct_SA_typeS_B
 * 
 * computes complete suffix array given sorted S type substrings
 * 
 * additional mem req: none
 * 
 * */
 
void construct_SA_typeS_B(int* ArrayB, const int ArrayBLength, const BYTE* text, const int textLen,
			t_bitstr* suffixType, int* suffixArray)
{
	int count[SIGMA_SIZE];
	int i, j, temp, prevCount, prevChar, charBuck;
	int specialPos = textLen-1;
	
	// for each character in the alphabet calculate
	// the beginning position of their bucket.
	
	memset((void*)count, 0, SIGMA_SIZE * sizeof(int));
	
	for (i = 0; i < specialPos; i++)
	{
		temp = (int) text[i];
		count[temp]++;
	}
	  
	prevCount = count[0];
//	count[0] = 0;
	count[0] = 1; // bucket 0 starts at position 1 right after special suffix
	
	for (i = 1; i < SIGMA_SIZE; i++)
	{
		temp = count[i];
		count[i] = count[i-1] + prevCount;
		prevCount = temp;
	}
	
	// initalize the suffix array
	
	memset((void*)suffixArray, -1, textLen * sizeof(int));
	
	// initalize j, which means the at the beginning of array B
	
	j = 0;
	
	// move all the suffixes into the suffix array in
	// order
	
	for (i = 0; i < textLen; i++)
	{
		// if suffixArray[i] is -1, then an element of
		// array B should be put in that place. And its
		// previous suffix moved to the approperiate 
		// place. 
		// Otherwise just move the previous suffix to
		// the appropriate place. 
		
		if (suffixArray[i] == -1)
		{
			if(j >= ArrayBLength)
			{
				fprintf(stderr, "error: (j >= ArrayBLength)");
				return;
				/* TODO: should return int ERROR_INTEGRITY */
			}
			suffixArray[i] = ArrayB[j];
			j++;
			prevChar = suffixArray[i] - 1;
			
			if (prevChar >= 0)
			{
				if (bs_getVal(suffixType,prevChar) == 0)
				{
					charBuck = (int) text[prevChar];
					if (count[charBuck] > i)
					{	
						suffixArray[count[charBuck]] = prevChar;
						count[charBuck]++;
					}
				}
			}
		}
		else
		{
			prevChar = suffixArray[i] - 1;
			if (prevChar >= 0)
			{
				if (bs_getVal(suffixType,prevChar) == 0)
				{
					charBuck = (int) text[prevChar];
					if (count[charBuck] > i)
					{
						suffixArray[count[charBuck]] = prevChar;
						count[charBuck]++;
					}
				}
			}
		}
	}  
}

/* construct_SA_typeL_B
 * 
 * computes complete suffix array given sorted L type substrings
 
 * additional mem req: none
 * 
 * */

void construct_SA_typeL_B(int* ArrayB, const int ArrayBLength, const BYTE* text, const int textLen,
			t_bitstr* suffixType, int* suffixArray)
{
	int count[SIGMA_SIZE];
	int prevChar, charBuck;
	int i, j, temp;
	int specialPos = textLen-1; // position of special character $
	
	// for each character in the alphabet calculate
	// the ending position of their bucket.
	
	memset((void*)count, 0, SIGMA_SIZE * sizeof(int));
	
	// count the occurrence. 
	for (i = 0; i < specialPos; i++)
	{
		temp = (int) text[i];
		count[temp]++;
	}
	
	//  count[0] = count[0] - 1;
	// now 1 more element will be in the beginning
	
	for (i = 1; i < SIGMA_SIZE; i++)
	{
		count[i] = count[i-1] + count[i];
	}
	// count[i] points to last element of bucket i
	
	// initalize the suffix array
	
	memset((void*)suffixArray, -1, textLen * sizeof(int));
	
	// initalize j to be at the end of array B.
	
	j = ArrayBLength - 1;
	
	// move all the suffixes into the suffix array in
	// order
	
	for (i = textLen - 1; i >= 0; i--)
	{
		// if suffixArray[i] is -1, then an element of
		// array B should be put in that place. And its
		// previous suffix moved to the approperiate 
		// place. 
		// Otherwise just move the previous suffix to
		// the approperiate place. 
		
		if (suffixArray[i] == -1)
		{
			if(j >= ArrayBLength)
			{
				fprintf(stderr, "error: (j >= ArrayBLength)");
				return;
				/* TODO: should return int ERROR_INTEGRITY */
			}
			suffixArray[i] = ArrayB[j];
			j--;
			prevChar = suffixArray[i] - 1;
			if (prevChar >= 0)
			{ 
				if (bs_getVal(suffixType,prevChar) == 1)
				{
					charBuck = (int) text[prevChar];
					if (count[charBuck] < i)
					{
						suffixArray[count[charBuck]] = prevChar;
						count[charBuck]--;
					}
				}
			}
		}
		else
		{
			prevChar = suffixArray[i] - 1;
			if (prevChar >= 0)
			{
				if (bs_getVal(suffixType,prevChar) == 1)
				{
					charBuck = (int) text[prevChar];
					if (count[charBuck] < i)
					{
						suffixArray[count[charBuck]] = prevChar;
						count[charBuck]--;
					}
				}
			}
		}
	} 
}

/* testSuffixArray
 * 
 * test given suffix array
 * uses 4n additional memory
 * 
 * */
int testSuffixArray(BYTE* text, int textLen, int* suffixArray, FILE *log)
{
	int result;
	int* rank = (int*) malloc(textLen*sizeof(int));
	int i;
	if(!log)
		return ERROR_INTEGRITY;

    if(suffixArray[0] != textLen-1)// first element should be $ (last suffix)
    {
    	fprintf(log, "testSuffixArray: suffixArray[0] != textLen-1\n");
    	return ERROR_INTEGRITY;
    }
    	
	memset((void*)rank, -1, textLen * sizeof(int));
	
	for(i=0;i<textLen;i++)
		rank[suffixArray[i]] = i;

	result = 0;
	for(i=0;i<textLen;i++) //check if suffixArray is permutation of all suffixes
		if(rank[i]==-1)
		{
	    	fprintf(log, "testSuffixArray: suffix %4i not present in suffix array\n", i);
			result = -1;
		}
	if(result)
		return ERROR_INTEGRITY;
	
	result = 0;
	for(i=1;i<textLen-1;i++) //we start from 1 so that suffixArray[i] is never textLen-1
	{
		if( text[suffixArray[i]] == text[suffixArray[i+1]] )
		{
			if( rank[suffixArray[i]+1] > rank[suffixArray[i+1]+1] )
			{
				fprintf(log,"testSuffixArray: bad order of suffixes: T[sa[%4i]]==T[sa[%4i]], rank[%4i +1] > rank[%4i +1]\n", 
				               i, i+1, suffixArray[i], suffixArray[i+1]);
				result = -1; // bad order of suffixes
			}
		}
		else if ( text[suffixArray[i]] > text[suffixArray[i+1]] )
		{
			fprintf(log,"testSuffixArray: bad order of suffixes: T[sa[%4i]] > T[sa[%4i]]\n", i, i+1);
			result = -1; // bad order of suffixes
		}
	}
	if(result)
		return ERROR_INTEGRITY;
	
	return 0;
}

//----------\------------------------------/--------------------------------------------
//-----------\ original Pang Ko functions /---------------------------------------------
//------------\--------------------------/----------------------------------------------

// some are slightly edited, because of memory management modification

void suffix_typei(	int* 		inputString, 
					int 		inputLength, 
		 			int*		numStype,
		 			int*		numLtype, 
		 			t_bitstr*	suffixType)
{
	int i, j, k;
	
	j = 0;
	(*numStype) = 0;
	(*numLtype) = 0;
	
	bs_setAll(suffixType, 1);

	for (i = 0; i < inputLength - 1; i++)
	{  
		/****************************************/
		//If character T[i] does not equal to T[i+1] make a 
		//decision and go back to mark all the previously 
		//undecided suffix a certain type. Also increment the
		//counter for that type. And set undecided suffix 
		//counter (j) to 0.
		/****************************************/
		if (inputString[i] < inputString[i+1])
		{
			for (k = i - j; k <= i; k++)
			{
				(*numStype)++;
				bs_setVal(suffixType, k, 1);
			}
			j = 0;
		}
		else if (inputString[i] > inputString[i+1])
		{
			for (k = i - j; k <= i; k++)
			{
				(*numLtype)++;
				bs_setVal(suffixType,k, 0);
			}
			j = 0;
		}
		else
		{
			/**************************************/
			//If two adjacent suffixes have the same first
			//character, move on, to the next, but remember the 
			//number of undecided suffixes by increment j.
			/**************************************/      
			j++;
		}
	}
	
	/******************************************/
	//The last suffix $ must be selected no matter we choose
	//to sort L or to sort S. So the type of the last suffix
	//is set to which ever type that is smaller in number.
	/******************************************/
	
	if ((*numStype) < (*numLtype))
	{
		bs_setVal(suffixType, inputLength - 1, 1);
		(*numStype)++;
	}
	else
	{
		bs_setVal(suffixType, inputLength - 1, 0);
		(*numLtype)++;
	}
}

void s_distanceR(t_bitstr* suffixType, 
		 const int inputLength, int* maxDist)
{
	int i, temp, prevDist, maxDist1;
	
	//Set the reverse S-distance for the last suffix to 0.
	
	maxDist1 = 0;
	
	prevDist = 1;
	for (i = inputLength - 2; i >= 0; i--)
	{
		//Calculate maxDist
		
		temp = maxDist1 - prevDist;
		temp = temp >> MAX_INT_INDEX;
		maxDist1 = maxDist1 + ((prevDist - maxDist1) & temp);
		
		//If suffix i is type L, then increment prevDist by 1.
		//else set prevDist to 1.
		 
		temp = bs_getVal(suffixType,i) - 1;
		prevDist = (prevDist & temp) + 1;
	}
	*maxDist = maxDist1;
}  


//This function computes the reverse l_distance. 

void l_distanceR(t_bitstr* suffixType, 
		 const int inputLength, int* maxDist)
{
	int i, temp, prevDist, maxDist1;
	  
	//Set the reverse l-distance for the last suffix to 0;
	
	maxDist1 = 0;
	
	prevDist = 1;
	for (i = inputLength - 2; i >= 0; i--)
	{
		//Calculate maxDist.
		
		temp = maxDist1 - prevDist;
		temp = temp >> MAX_INT_INDEX;
		maxDist1 = maxDist1 + ((prevDist - maxDist1) & temp);
		
		//If suffix i is type S, then increment prevDist by 1.
		//else set prevDist to 1.
		
		temp = 0 - bs_getVal(suffixType,i);
		prevDist = (prevDist & temp) + 1;
	}
	*maxDist = maxDist1;
}  

/*
 * additional mem req: 4n
 * mem_Buckets - inputLength * sizeof(int)
 * */
 
void construct_TPrime_typeS(int* 					ArrayB, 
							const int 				ArrayBLength,
			    			t_bitstr*			 	BuckB,
			    			int* 					tPrime,
			    			const int				inputLength,
			    			t_bitstr*			 	suffixType,
			    			BYTE* 					mem_Buckets)
{
	int* Buckets;
	int i, j, currBuck;
	int tempVal, tempValInv;
	
	// initalize array Buckets, and for each type
	// S suffix calculate their bucket number
	  
	Buckets = (int*) mem_Buckets;
	 
	currBuck = 0;
	for (i = 0; i < ArrayBLength; i++)
	{
		Buckets[ArrayB[i]] = currBuck;
		currBuck = currBuck + bs_getVal(BuckB,i);
	}
	
	// construct tPrime
	j = 0;
	for (i = 0; i < inputLength; i++)
	{
		tempVal = - bs_getVal(suffixType,i);
		tempValInv = ~tempVal;
		tPrime[j] = (tPrime[j] & tempValInv) | (Buckets[i] & tempVal);
		j = j + (1 & tempVal);
	}
}

/*
 * additional mem req: 4n
 * mem_Buckets - inputLength * sizeof(int)
 * */
 
void construct_TPrime_typeL(int* 					ArrayB, 
							const int 				ArrayBLength,
			    			t_bitstr*			 	BuckB,
			    			int* 					tPrime,
			    			const int				inputLength,
			    			t_bitstr*			 	suffixType,
			    			BYTE* 					mem_Buckets)
{
	int* Buckets;
	int i, j, currBuck;
	int tempVal, tempValInv;
	
	// initalize array Buckets, and for each type
	// L suffix calculate their bucket number
	
	Buckets = (int*) mem_Buckets;
	 
	currBuck = 0;
	for (i = 0; i < ArrayBLength; i++)
	{
		Buckets[ArrayB[i]] = currBuck;
		currBuck = currBuck + bs_getVal(BuckB,i);
	}
	
	// construct tPrime
	j = 0;
	for (i = 0; i < inputLength; i++)
	{
		tempVal = (bs_getVal(suffixType,i) - 1);
		tempValInv = ~tempVal;
		tPrime[j] = (tPrime[j] & tempValInv) | (Buckets[i] & tempVal);
		j = j + (1 & tempVal);
	}
}



/*
 * additional mem req: none
 * */

void construct_ArrayB_typeS(const int* ArrayA, const int inputLength, 
			    t_bitstr* BuckA, 
			    int* ArrayB, t_bitstr* BuckB, 
			    t_bitstr* suffixType)
{
	int i, j;
	int temp;
	
	bs_setAll(BuckB,0);

	j = 0;
	 
	for (i = 0; i < inputLength; i++)
	{
		temp = ArrayA[i];
		if (bs_getVal(suffixType,temp) == 1)
		{
			ArrayB[j] = ArrayA[i];
			j++;
		}
		if (bs_getVal(BuckA,i) == 1 && j - 1 >= 0)
		{
			bs_setVal(BuckB, j - 1, 1);
		}
	}
}


/*
 * additional mem req: none
 * */
void construct_ArrayB_typeL(const int* ArrayA, const int inputLength, 
			    t_bitstr* BuckA, 
			    int* ArrayB, t_bitstr* BuckB, 
			    t_bitstr* suffixType)
{
	int i, j;
	int temp;
	
	bs_setAll(BuckB,0);
	
	j = 0;
	 
	for (i = 0; i < inputLength; i++)
	{
		temp = ArrayA[i];
		if (bs_getVal(suffixType,temp) == 0)
		{
			ArrayB[j] = ArrayA[i];
			j++;
		}
		if (bs_getVal(BuckA,i) == 1 && j - 1 >= 0)
		{
			bs_setVal(BuckB, j - 1, 1);
		}
	}
}

/*
 * additional mem req: 4n
 * mem_count - sigma * sizeof(int) <= inputLength * sizeof(int)
 * */

void construct_SA_typeS(int* 					ArrayB, 
						const int				ArrayBLength,
						const int* 				stringT,
						const int				inputLength,
						t_bitstr*				suffixType,
						int*					suffixArray,
						BYTE*					mem_count)
{
	int* count;
	int sigma, maxChar, minChar;
	int i, j, temp, prevCount, prevChar, charBuck;
	
	// count the size of the alphabet in stringT.
	
	findMaxMin(stringT, inputLength, &maxChar, &minChar);
	sigma = maxChar - minChar + 1;
	
	// for each character in the alphabet calculate
	// the beginning position of their bucket.
	
	count = (int*) mem_count;
	memset((void*)count, 0, sigma * sizeof(int));
	
	for (i = 0; i < inputLength; i++)
	{
		temp = stringT[i] - minChar;
		count[temp]++;
	}
	  
	prevCount = count[0];
	count[0] = 0;
	
	for (i = 1; i < sigma; i++)
	{
		temp = count[i];
		count[i] = count[i-1] + prevCount;
		prevCount = temp;
	}
	
	// initalize the suffix array
	
	memset((void*)suffixArray, -1, inputLength * sizeof(int));
	
	// initalize j, which means the at the beginning of array B
	
	j = 0;
	
	// move all the suffixes into the suffix array in
	// order
	
	for (i = 0; i < inputLength; i++)
	{
		// if suffixArray[i] is -1, then an element of
		// array B should be put in that place. And its
		// previous suffix moved to the approperiate 
		// place. 
		// Otherwise just move the previous suffix to
		// the approperiate place. 
		
		if (suffixArray[i] == -1)
		{
			if(j >= ArrayBLength)
			{
				fprintf(stderr, "error: (j >= ArrayBLength)");
				return;
				/* TODO: should return int ERROR_INTEGRITY */
			}
			suffixArray[i] = ArrayB[j];
			j++;
			prevChar = suffixArray[i] - 1;
			if (prevChar >= 0)
			{
				if (bs_getVal(suffixType,prevChar) == 0)
				{
					charBuck = stringT[prevChar];
					if (count[charBuck] > i)
					{
						suffixArray[count[charBuck]] = prevChar;
						count[charBuck]++;
					}
				}
			}
		}
		else
		{
			prevChar = suffixArray[i] - 1;
			if (prevChar >= 0)
			{
				if (bs_getVal(suffixType,prevChar) == 0)
				{
					charBuck = stringT[prevChar];
					if (count[charBuck] > i)
					{
						suffixArray[count[charBuck]] = prevChar;
						count[charBuck]++;
					}
				}
			}
		}
	}  
}



/*
 * additional mem req: 4n
 * mem_count - sigma * sizeof(int) <= inputLength * sizeof(int)
 * */

void construct_SA_typeL(int* 					ArrayB, 
						const int				ArrayBLength,
						const int* 				stringT,
						const int				inputLength,
						t_bitstr*				suffixType,
						int*					suffixArray,
						BYTE*					mem_count)
{
	int* count;
	int sigma, maxChar, minChar, prevChar, charBuck;
	int i, j, temp;
	
	findMaxMin(stringT, inputLength, &maxChar, &minChar);
	sigma = maxChar - minChar + 1;
	
	// for each character in the alphabet calculate
	// the ending position of their bucket.
	
	count = (int*) mem_count;
	memset((void*)count, 0, sigma * sizeof(int));
	
	for (i = 0; i < inputLength; i++)
	{
		temp = stringT[i] - minChar;
		count[temp]++;
	}
	
	count[0] = count[0] - 1;
	
	for (i = 1; i < sigma; i++)
	{
		count[i] = count[i-1] + count[i];
	}
	
	// initalize the suffix array
	
	memset((void*)suffixArray, -1, inputLength * sizeof(int));
	
	// initalize j to be at the end of array B.
	
	j = ArrayBLength - 1;

	// move all the suffixes into the suffix array in
	// order
	
	for (i = inputLength - 1; i >= 0; i--)
	{
		// if suffixArray[i] is -1, then an element of
		// array B should be put in that place. And its
		// previous suffix moved to the approperiate 
		// place. 
		// Otherwise just move the previous suffix to
		// the approperiate place. 
		
		if (suffixArray[i] == -1)
		{
			if(j >= ArrayBLength)
			{
				fprintf(stderr, "error: (j >= ArrayBLength)");
				return;
				/* TODO: should return int ERROR_INTEGRITY */
			}
			suffixArray[i] = ArrayB[j];
			j--;
			prevChar = suffixArray[i] - 1;
			if (prevChar >= 0)
			{
				if (bs_getVal(suffixType,prevChar) == 1)
				{
					charBuck = stringT[prevChar];
					if (count[charBuck] < i)
					{
						suffixArray[count[charBuck]] = prevChar;
						count[charBuck]--;
					}
				}
			}
		}
		else
		{
			prevChar = suffixArray[i] - 1;
			if (prevChar >= 0)
			{
				if (bs_getVal(suffixType,prevChar) == 1)
				{
					charBuck = stringT[prevChar];
					if (count[charBuck] < i)
					{
						suffixArray[count[charBuck]] = prevChar;
						count[charBuck]--;
					}
				}
			}
		}
	} 
}



/* LinearSuffixSort
 * 
 * modification of original LinearSuffixSort function by Pang Ko
 * computes suffix array over string over integer alphabet
 * last character of string should be 0 (lowest character)
 * mem should be allocated for 4.
 * 
 * */
int LinearSuffixSort(int* inputString, int stringLength, int* suffixArray, BYTE* mem)
{
	t_bitstr suffixType;
	t_bitstr BuckA;
	t_bitstr BuckB, BuckList;
	int numS, numL, maxDist;
	int* ArrayA; 
	int* ArrayB;
	int* Dist; 
	int* DistCount;
	int* theList;
	int* tPrime;
	int listLength;
	int result;

#ifdef DEBUG1
	char filename[100];
	char tmp[100];
	int boolBuckList[100];
	int boolBuck[100];
	int i;
#endif

	//                       size  peak_struct  other_use
	BYTE* mem_n8_1;       // n/8   suffixType
	BYTE* mem_2n_1;       // 2n    ArrayB
	BYTE* mem_2n_2;       // 2n    mem_Left,    mem_Right, BuckA, tPrime
	BYTE* mem_n8_2;       // n/8   BuckList
	BYTE* mem_n16_1;      // n/16  BuckB
	BYTE* mem_4n_1;       // 4n    mem_Rev,     Dist, mem_intBuffer, mem_count
	
//	UINT last_writable_byte;
	
	// lengths for allocation
	int mem_4n_l;
	int mem_2n_l;
	int mem_n8_l;
	int mem_n16_l;

//
	if(stringLength ==0)
		return ERROR_BADARG;
	if(stringLength > FLAGUINT_MAX)
		return ERROR_BADARG;

//  ----- Memory allocation --------------------------------------

	// compute lengths of allocated segments 
    mem_n8_l = stringLength / 8; 
    if (stringLength % 8 != 0)   
		mem_n8_l++; 
        
    mem_n16_l = ((stringLength+1)/2) / 8; 
    if ( ((stringLength+1)/2) % 8 != 0)   
    	mem_n16_l++; 
    
	mem_2n_l = (stringLength+1)/2; // maximum value of numS or numL
	mem_2n_l *= sizeof(int);
	
	mem_4n_l = stringLength*sizeof(int);

	// none of the locations can have length 0
	if(!mem_n8_l) mem_n8_l = 1;
	if(!mem_n16_l) mem_n16_l = 1;
#ifdef DEBUG2
	if((mem_2n_l < 4) || (mem_4n_l < 4))
	{
		fprintf(stderr, "error: (mem_2n_l < 4) || (mem_4n_l < 4)");
		return ERROR_INTEGRITY;
	}
#endif	


	// segmentation of mem location
	mem_n8_1  = mem;
	mem_2n_1  = mem_n8_1  + mem_n8_l;
	mem_2n_2  = mem_2n_1  + mem_2n_l;
	mem_n8_2  = mem_2n_2  + mem_2n_l;  
	mem_n16_1 = mem_n8_2  + mem_n8_l;
	mem_4n_1  = mem_n16_1 + mem_n16_l; 
	
#ifdef DEBUG2
//	test_writability(mem_n8_1, mem_n8_l);
//	test_writability(mem_2n_1, mem_2n_l);
//	test_writability(mem_2n_2, mem_2n_l);
//	test_writability(mem_n8_2, mem_n8_l);
//	test_writability(mem_n16_1, mem_n16_l);
//	test_writability(mem_4n_1, mem_4n_l);
	// DEBUG check array boundaries by writing
//	mem_n8_1[mem_n8_l-1] = 1;
//	mem_2n_1[mem_2n_l-1] = 1;
//	mem_2n_2[mem_2n_l-1] = 1;
//	mem_n8_2[mem_n8_l-1] = 1;
//	mem_n16_1[mem_n16_l-1] = 1;
//	mem_4n_1[mem_4n_l-1] = 1;
#endif	


//  ----- Beginning ----------------------------------------------	
	bs_init(&suffixType, mem_n8_1, stringLength);
	bs_init(&BuckA, mem_2n_2, stringLength);
	ArrayA = suffixArray; // reuse output space
	ArrayB = (int*) mem_2n_1;
	Dist   = (int*) mem_4n_1;
	tPrime = (int*) mem_2n_2;
	
	if(stringLength <= 0)
	{
#ifdef DEBUG1
		depth--;
#endif	
		return ERROR_INTEGRITY;
	}  

	suffix_typei(inputString, stringLength, &numS, &numL, &suffixType);

	if (bs_getVal(&suffixType,stringLength - 1) == 1 && numS == 1)
	{
		ArrayB[0] = stringLength - 1;

		// ArrayB read
		// suffixType read
		// ArrayA initiated
		// mem_intBuffer used
        // MEM: mem_count - sigma * sizeof(int) <= inputLength * sizeof(int)
		construct_SA_typeS(ArrayB, numS, inputString, stringLength, &suffixType, suffixArray, mem_4n_1);  

#ifdef DEBUG1
		depth--;
#endif	
		return 0;
	}

	// ArrayA initiated, 
	// BuckA initiated, 
	// mem_intBuffer used
    // MEM: mem_intBuffer - sizeof(int)*sigmaSize <= sizeof(int)*inputLength
	counting_sort(inputString, stringLength, ArrayA, &BuckA, mem_4n_1);
	
#ifdef DEBUG1
		strcpy(filename, "output/LinearSuffixSort_ArrayA_BuckA_");
		sprintf(tmp, "%i", depth);
		strcat(filename, tmp);
		strcat(filename, "(S).txt");
		for(i=0;i<stringLength;i++)
			boolBuck[i] = bs_getVal(&BuckA, i);
        output_array_buck(ArrayA, stringLength, boolBuck, filename);
#endif	

	if (bs_getVal(&suffixType,stringLength - 1) == 1)
	{
#ifdef DEBUG2
//		fprintf(stderr, "(iS)");
#endif			
		bs_init(&BuckB, mem_n16_1, numS);
		
		// ArrayA read, 
		// BuckA read, 
		// ArrayB initiated, 
		// BuckB initiated, 
		// suffixType read
		// MEM: none
		construct_ArrayB_typeS(ArrayA, stringLength, &BuckA, ArrayB, &BuckB, &suffixType); 

#ifdef DEBUG1
		strcpy(filename, "output/LinearSuffixSort_ArrayB_BuckB");
		sprintf(tmp, "%i", depth);
		strcat(filename, tmp);
		strcat(filename, "(S).txt");
		for(i=0;i<numS;i++)
			boolBuck[i] = bs_getVal(&BuckB, i);
        output_array_buck(ArrayB, numS, boolBuck, filename);
#endif	

		// Dist read 
		// DistCount initiated
		// suffixType read
		s_distance(&suffixType, stringLength, Dist, &DistCount, &maxDist);

#ifdef DEBUG1
		strcpy(filename, "output/LinearSuffixSort_Dist_");
		sprintf(tmp, "%i", depth);
		strcat(filename, tmp);
		strcat(filename, "(S).txt");
		output_dist(Dist, stringLength, DistCount, maxDist, filename);
#endif


		listLength = DistCount[maxDist];
		bs_init(&BuckList, mem_n8_2, listLength);

		// ArrayA modified
		// Dist initiated
		// DistCount modified
		// BuckA read
		// BuckList initiated
		// MEM: none
		theList = construct_list_typeS(ArrayA, stringLength, Dist, 
				   DistCount, maxDist, &BuckA, &BuckList, listLength);
		// theList == ArrayA

#ifdef DEBUG1
		strcpy(filename, "output/LinearSuffixSort_Lists_");
		sprintf(tmp, "%i", depth);
		strcat(filename, tmp);
		strcat(filename, "(S).txt");
		for(i=0;i<listLength;i++)
			boolBuckList[i] = bs_getVal(&BuckList, i);
        output_lists(theList, boolBuckList, listLength, filename);
#endif
        		
		free(DistCount); DistCount = NULL;
		
		// ArrayB initiated
		// BuckB initiated
		// theList read
		// BuckList read
        // MEM: mem_Rev - inputLength * sizeof(int)
        //      mem_Left - ArrayBLength * sizeof(int)
		sort_by_list_typeS(ArrayB, &BuckB, theList, &BuckList, stringLength, listLength, numS, mem_4n_1, mem_2n_2);
#ifdef DEBUG1
		strcpy(filename, "output/LinearSuffixSort_ArrayB_");
		sprintf(tmp, "%i", depth);
		strcat(filename, tmp);
		strcat(filename, "_Sorted(S).txt");
		output_intarray(ArrayB, numS, filename);
#endif
		if (bs_isAllTrue(&BuckB))
		{
			// ArrayB read
			// suffixType read
			// ArrayA initiated
			// MEM: mem_count - sigma * sizeof(int) <= inputLength * sizeof(int)
			construct_SA_typeS(ArrayB, numS, inputString, stringLength, &suffixType, suffixArray, mem_4n_1);
#ifdef DEBUG1
			depth--;
#endif			
			return 0;
		}
 
		// ArrayB read
		// BuckB read
		// tPrime initiated
		// suffixType read
        // MEM: mem_Buckets - inputLength * sizeof(int)
		construct_TPrime_typeS(ArrayB, numS, &BuckB, tPrime, stringLength, &suffixType, mem_4n_1);

		// tPrime read
		// ArrayB initiated
		
#ifdef DEBUG1
		strcpy(filename, "output/LinearSuffixSort_tPrime_");
		sprintf(tmp, "%i", depth);
		strcat(filename, tmp);
		strcat(filename, "(S).txt");

		output_intarray(tPrime, numS, filename);
		depth++;
#endif		
		result = LinearSuffixSort(tPrime, numS, ArrayB, mem_n8_2);

#ifdef DEBUG1
		strcpy(filename, "output/LinearSuffixSort_tPrime_");
		sprintf(tmp, "%i", depth);
		strcat(filename, tmp);
		strcat(filename, "SA(S).txt");

		output_sa_to_file3(tPrime, numS, ArrayB, filename);
#endif		


		if(result)
		{
//			suffixArray = NULL;
#ifdef DEBUG1
			depth--;
#endif			
			return result;
		}

		// ArrayB initiated
		// suffixType read
		// mem_tPrime used
        // MEM: mem_convertion - ArrayBLength * sizeof(int)
		reconstruct_B_typeS(ArrayB, numS, &suffixType, stringLength, mem_2n_2);
#ifdef DEBUG1
		strcpy(filename, "output/LinearSuffixSort_ArrayB_");
		sprintf(tmp, "%i", depth);
		strcat(filename, tmp);
		strcat(filename, "_Reconstructed(S).txt");

		output_intarray(ArrayB, numS, filename);
#endif	
		// ArrayB read
		// suffixType read
		// ArrayA initiated
		// mem_intBuffer used
        // MEM: mem_count - sigma * sizeof(int) <= inputLength * sizeof(int)		
		construct_SA_typeS(ArrayB, numS, inputString, stringLength, &suffixType, suffixArray, mem_4n_1);

#ifdef DEBUG1
		depth--;
#endif			
		return 0;
	}
	else 
	{
#ifdef DEBUG2
//		fprintf(stderr, "(iL)");
#endif			
		bs_init(&BuckB, mem_n16_1, numL);

		// ArrayA read
		// BuckA read
		// ArrayB initiated
		// BuckB initiated
		// suffixType read
		// MEM: none
		construct_ArrayB_typeL(ArrayA, stringLength, &BuckA, ArrayB, &BuckB, &suffixType);

#ifdef DEBUG1
		strcpy(filename, "output/LinearSuffixSort_ArrayB_");
		sprintf(tmp, "%i", depth);
		strcat(filename, tmp);
		strcat(filename, "(L).txt");
		output_intarray(ArrayB, numL, filename);
#endif

		// suffixType read
		// Dist initiated
		// DistCount initiated
		l_distance(&suffixType, stringLength, Dist, &DistCount, &maxDist);

		listLength = DistCount[maxDist];
		bs_init(&BuckList, mem_n8_2, listLength);
		
		// ArrayA modified
		// Dist initiated
		// DistCount modified
		// BuckA read
		// BuckList initiated
		// MEM: none
		theList = construct_list_typeL(ArrayA, stringLength, Dist, 
				   DistCount, maxDist, &BuckA, &BuckList, listLength);
		// theList == ArrayA
				   
		free(DistCount); DistCount = NULL;

		// ArrayB initiated
		// BuckB initiated
		// theList read
		// BuckList read
        // MEM: mem_Rev - inputLength * sizeof(int)
        //      mem_Right - ArrayBLength * sizeof(int)
		sort_by_list_typeL(ArrayB, &BuckB, theList, &BuckList, stringLength, listLength, numL, mem_4n_1, mem_2n_2);
   
#ifdef DEBUG1
		strcpy(filename, "output/LinearSuffixSort_ArrayB_");
		sprintf(tmp, "%i", depth);
		strcat(filename, tmp);
		strcat(filename, "_Sorted(L).txt");
		output_intarray(ArrayB, numL, filename);
#endif
		if (bs_isAllTrue(&BuckB))
		{
			// ArrayB read
			// suffixType read
			// ArrayA initiated
			// mem_intBuffer used
			// MEM: mem_count - sigma * sizeof(int) <= inputLength * sizeof(int)
			construct_SA_typeL(ArrayB, numL, inputString, stringLength, &suffixType, suffixArray, mem_4n_1);
#ifdef DEBUG1
			depth--;
#endif		
			return 0;
		}

		// ArrayB read
		// BuckB read
		// tPrime initiated
		// suffixType read
        // MEM: mem_Buckets - inputLength * sizeof(int)
		construct_TPrime_typeL(ArrayB, numL, &BuckB, tPrime, stringLength, &suffixType, mem_4n_1);

#ifdef DEBUG1
		strcpy(filename, "output/LinearSuffixSort_tPrime_");
		sprintf(tmp, "%i", depth);
		strcat(filename, tmp);
		strcat(filename, "(L).txt");

		output_intarray(tPrime, numL, filename);
		depth++;
#endif	
		result = LinearSuffixSort(tPrime, numL, ArrayB, mem_n8_2);

#ifdef DEBUG1
		strcpy(filename, "output/LinearSuffixSort_tPrime_");
		sprintf(tmp, "%i", depth);
		strcat(filename, tmp);
		strcat(filename, "SA(L).txt");

		output_sa_to_file3(tPrime, numL, ArrayB, filename);
#endif		

		if(result)
		{
#ifdef DEBUG1
			depth--;
#endif		
			return result;
		}
		
		// ArrayB initiated
		// suffixType read
		// mem_tPrime used
        // MEM: mem_convertion - ArrayBLength * sizeof(int)
		reconstruct_B_typeL(ArrayB, numL, &suffixType, stringLength, mem_2n_2);
		
		// ArrayB read
		// suffixType read
		// ArrayA initiated
		// mem_intBuffer used
        // MEM: mem_count - sigma * sizeof(int) <= inputLength * sizeof(int)		
		construct_SA_typeL(ArrayB, numL, inputString, stringLength, &suffixType, suffixArray, mem_4n_1);

#ifdef DEBUG1
		depth--;
#endif		
		return 0;
	}
}


/*
 * additional mem req: 2n
 * mem_convertion - ArrayBLength * sizeof(int)
 * */
 
void reconstruct_B_typeS(	int* 					ArrayB, 
							const int 				ArrayBLength,
			 				t_bitstr*			 	suffixType,
			 				const int 				inputLength,
			 				BYTE*					mem_convertion)
{
	int* convertion;
	int i, j, tempVal, tempValInv;
	  
	// build a conversion array, such that conversion[i] = j
	// where i is the index of a suffix of T', and j is
	// the index of the corresponding type S suffix in T
	
	convertion = (int*) mem_convertion;
	
	j = 0;
	for (i = 0; i < inputLength; i++)
	{
		tempVal = bs_getVal(suffixType,i);
		tempVal = tempVal << MAX_INT_INDEX;
		tempVal = tempVal >> MAX_INT_INDEX;
		tempValInv = ~tempVal;
		
		convertion[j] = (i & tempVal) | (convertion[j] & tempValInv);
		j = j + (1 & tempVal);
	}
	
	// use the conversion array to calculate the actual 
	// index of the type S suffix of ArrayB[i]
	
	for (i = 0; i < ArrayBLength; i++)
	{
		tempVal = ArrayB[i];
		ArrayB[i] = convertion[tempVal];
	}
}


/*
 * additional mem req: 2n
 * mem_convertion - ArrayBLength * sizeof(int)
 * */
 
void reconstruct_B_typeL(	int* 					ArrayB, 
							const int 				ArrayBLength,
			 				t_bitstr*			 	suffixType,
			 				const int 				inputLength,
			 				BYTE*					mem_convertion)
{
	int* convertion;
	int i, j, tempVal, tempValInv;
	
	// build a conversion array, such that conversion[i] = j
	// where i is the index of a suffix of T', and j is
	// the index of the corresponding type L suffix in T
	
	convertion = (int*) mem_convertion;
	
	j = 0;
	for (i = 0; i < inputLength; i++)
	{
		tempVal = bs_getVal(suffixType,i);
		tempVal = tempVal << MAX_INT_INDEX;
		tempVal = tempVal >> MAX_INT_INDEX;
		tempValInv = ~tempVal;
		
		convertion[j] = (i & tempValInv) | (convertion[j] & tempVal);
		j = j + (1 & tempValInv);
	}
	
	// use the conversion array to calculate the actual 
	// index of the type S suffix of ArrayB[i]
	
	for (i = 0; i < ArrayBLength; i++)
	{
		tempVal = ArrayB[i];
		ArrayB[i] = convertion[tempVal];
	}
}


/*
 * additional mem req: none
 * */

int* construct_list_typeS(int* ArrayA, const int inputLength, int* Dist, 
		          int* DistCount, const int maxDist, 
		          t_bitstr* BuckA, 
		          t_bitstr* BuckList, const int listLength)
{
	int i, j;
	int posList;
	int temp;
	int startB, endB;
	
	bs_setAll(BuckList, 0);
	i = 0;
	
	while (i < inputLength)
	{
		startB = i;
		while (bs_getVal(BuckA,i) != 1 && i < inputLength)
		{
			//use Dist to be the temporary reverse mapping array of
			//the List.
			temp = Dist[ArrayA[i]];
			if (temp > 0)
			{
				posList = DistCount[temp - 1];
				Dist[ArrayA[i]] = posList;
				bs_setVal(BuckList, posList, 1);
				DistCount[temp - 1]++;
			}
			else
			{
				Dist[ArrayA[i]] = -1;
			}
			i++;
		}
	  
		//set the values for the last member in the bucket. 
		
		temp = Dist[ArrayA[i]];
		if (temp != 0)
		{
			posList = DistCount[temp - 1];
			Dist[ArrayA[i]] = posList;
			bs_setVal(BuckList, posList, 1);
			DistCount[temp - 1]++;
		}
		else
		{
			Dist[ArrayA[i]] = -1;
		}
		endB = i;
	
		//for all the member in the middle of the bucket in list
		//reset its BuckList to false. i.e. show that there is
		//no boundary between i and i+1 in list. It is possible
		//that the boundary of the last bucket of a S-distance
		//is not drawn properly. 
	  
		for (j = startB; j < endB; j++)
		{
			posList = Dist[ArrayA[j]];  
			if (posList >= 0 && posList != listLength - 1)
			{
				if (bs_getVal(BuckList, posList+1) == 1)
				{
					bs_setVal(BuckList, posList, 0);
				}
			}
		}
		i++;
	}
	//reconstruct the list from Dist
			
	for (i = 0; i < inputLength; i++)
	{
		if (Dist[i] >= 0)
		{
			ArrayA[Dist[i]] = i;
		}
	}
	
	//At this point DistCount[i] give the total number
	//of suffixes having S-distance of i+1. i.e. 
	//DistCount[0] give the number of suffixes with
	//S-distance of 1.
	
	for (i = 0; i < maxDist; i++)
	{
		bs_setVal(BuckList, DistCount[i]-1, 1);
	}
	
	//Calculate the index of the beginning of the type S
	//substring.
	
	for (i = 0; i < maxDist; i++)
	{
		if (i == 0)
		{
			j = 0;
		}
		else
		{
			j = DistCount[i-1];
		}
		while (j < DistCount[i])
		{
			ArrayA[j] = ArrayA[j] - i - 1;
			j++;
		}
	}
	return (ArrayA);
}


/*
 * additional mem req: none
 * */

int* construct_list_typeL(int* ArrayA, const int inputLength, int* Dist, 
		          int* DistCount, const int maxDist, 
		          t_bitstr* BuckA, 
		          t_bitstr* BuckList, const int listLength)
{
  int i, j;
  int posList;
  int temp;
  int startB, endB;
  int firstElement;
  bs_setAll(BuckList, 0);
  i = inputLength - 1;

  while (i >= 0)
  {
    endB = i;
    if (i > 0)
    {
      if (bs_getVal(BuckA, i-1) != 1)
      {
	firstElement = 0;
      }
      else
      {
	firstElement = 1;
      }
    }
    else
    {
      firstElement = 1;
    }

    while (!firstElement)
    {
 
      //use Dist to be the temporary reverse mapping array of
      //the List. 
 
      temp = Dist[ArrayA[i]];
      if (temp > 0)
      {
	posList = DistCount[temp - 1];
	Dist[ArrayA[i]] = posList;
	bs_setVal(BuckList, posList, 1);
	DistCount[temp - 1]++;
      }
      else
      {
	Dist[ArrayA[i]] = -1;
      }

      // Decrement i, and determine if i is the first element
      // of the bucket. 

      i--;
      if (i > 0)
      {
	if (bs_getVal(BuckA, i-1) != 1)
	{
	  firstElement = 0;
	}
	else
	{
	  firstElement = 1;
	}
      }
      else
      {
	firstElement = 1;
      }
    }

    //set the values for the first member in the bucket. 

    temp = Dist[ArrayA[i]];
    if (temp != 0)
    {
      posList = DistCount[temp - 1];
      Dist[ArrayA[i]] = posList;
      bs_setVal(BuckList, posList, 1);
      DistCount[temp - 1]++;
    }
    else
    {
      Dist[ArrayA[i]] = -1;
    }

    startB = i;
    
    //for all the member in the middle of the bucket in list
    //reset its BuckList to false. i.e. show that there is
    //no boundary between i and i+1 in list. It is possible
    //that the boundary of the last bucket of a L-distance
    //is not drawn properly. 
   
    for (j = endB; j >= startB; j--)
    {
      posList = Dist[ArrayA[j]];
      if (posList >= 0 && posList != listLength - 1)
      {
	if (bs_getVal(BuckList, posList+1) == 1)
	{
	  bs_setVal(BuckList, posList, 0);
	}
      }
    }
    i--;
  }

  //reconstruct the list from Dist

  for (i = 0; i < inputLength; i++)
  {
    if (Dist[i] >= 0)
    { 
      ArrayA[Dist[i]] = i;
    }
  }

  //At this point DistCount[i] give the total number
  //of suffixes having L-distance of i+1. i.e. 
  //DistCount[0] give the number of suffixes with
  //L-distance of 1.

  for (i = 0; i < maxDist; i++)
  {
    bs_setVal(BuckList, DistCount[i]-1, 1);
  }

  //Calculate the index of the beginning of the type L
  //substring.

  for (i = 0; i < maxDist; i++)
  {
    if (i == 0)
    {
      j = 0;
    }
    else
    {
      j = DistCount[i-1];
    }
    while (j < DistCount[i])
    {
      ArrayA[j] = ArrayA[j] - i - 1;
      j++;
    }
  }
  
  return (ArrayA);
}


/*
 * additional mem req: 6n
 * mem_Rev - inputLength * sizeof(int)
 * mem_Left - ArrayBLength * sizeof(int)
 * */
 
void sort_by_list_typeS(int*					ArrayB, 
						t_bitstr*				BuckB,
						int*					List,
						t_bitstr*				BuckList,
						const int				inputLength,
						const int				listLength, 
						const int				ArrayBLength,
						BYTE*					mem_Rev,
						BYTE*					mem_Left)
{
  int* Rev;
  int* Left;
  int i, j, newBuckNum, BucketNum;
  int BucketRight;

#ifdef DEBUG1
  char filename[100];//debug
  char tmp[100];//debug
#endif  
   
//  Rev = (int*) malloc(inputLength * sizeof(int));
//  Left = (int*) malloc(ArrayBLength * sizeof(int));
  Rev = (int*) mem_Rev;
  Left = (int*) mem_Left;

  memset((void*)Rev, -1, inputLength*sizeof(int));
  memset((void*)Left, -1, ArrayBLength*sizeof(int));

  // initalize Rev and Left

  BucketRight = ArrayBLength - 1;
  for (i = ArrayBLength - 1; i > 0; i--)
  {
    Rev[ArrayB[i]] = BucketRight;
    if (bs_getVal(BuckB, i - 1) == 1)
    {
      Left[BucketRight] = i;
      BucketRight = i - 1;
    }
  }

#ifdef DEBUG1
	strcpy(filename, "output/LinearSuffixSort_sort_Rev_");
	sprintf(tmp, "%i", depth);
	strcat(filename, tmp);
	strcat(filename, "_(S).txt");
	output_intarray(Rev, inputLength, filename);
	strcpy(filename, "output/LinearSuffixSort_sort_Left_");
	sprintf(tmp, "%i", depth);
	strcat(filename, tmp);
	strcat(filename, "_(S).txt");
	output_intarray(Left, ArrayBLength, filename);	
#endif
  
  // initalize Rev and Left for the first element of ArrayB

  Rev[ArrayB[0]] = BucketRight;  
  Left[BucketRight] = 0;

  // sort the type S substrings according to the list
  // bucket by bucket.

  i = 0;
  while (i < listLength)
  {

    // count the number of elements to move in each bucket
    // and also set the value of Left for all the elements
    // that being moved.
 
    j = i;
    while (bs_getVal(BuckList, j) == 0)
    {
      Left[Rev[List[j]]]++;
      j++;
    }

    // count for the last element of the bucket.

    Left[Rev[List[j]]]++;

    // moving the elements by re-assigning the Rev 
    // after this we still need to update Left

    j = i;
    while (bs_getVal(BuckList, j) == 0)
    {
      newBuckNum = Left[Rev[List[j]]] - 1;
      Rev[List[j]] = newBuckNum;
      j++;
    }

    // update Rev for the last element of the bucket
  
    newBuckNum = Left[Rev[List[j]]] - 1;
    Rev[List[j]] = newBuckNum;

    // correct the values of Left for all affected
    // buckets

    j = i;
    while (bs_getVal(BuckList, j) == 0)
    {
      newBuckNum = Rev[List[j]];
      if (Left[newBuckNum] == -1)
      {
	Left[newBuckNum] = newBuckNum;
      }
      else
      {
	Left[newBuckNum]--;
      }
      bs_setVal(BuckB, newBuckNum, 1);
      j++;
    }

    // correct the values of Left for the last element

    newBuckNum = Rev[List[j]];
    if (Left[newBuckNum] == -1)
    {
      Left[newBuckNum] = newBuckNum;
    }
    else
    {
      Left[newBuckNum]--;
    }
    bs_setVal(BuckB, newBuckNum, 1);

    // set i to point to the first element of the next bucket

    i = j+1;
  }
  
  // Reconsturct ArrayB from Rev.

  for (i = 0; i < inputLength; i++)
  {
    BucketNum = Rev[i];
    if (BucketNum > -1)
    {
      ArrayB[Left[BucketNum]] = i;
      Left[BucketNum]++;
    }
  }
}


/*
 * additional mem req: 6n
 * mem_Rev - inputLength * sizeof(int)
 * mem_Right - ArrayBLength * sizeof(int)
 * */
 
void sort_by_list_typeL(int*					ArrayB, 
						t_bitstr*				BuckB,
						int*					List,
						t_bitstr*				BuckList,
						const int				inputLength,
						const int				listLength, 
						const int				ArrayBLength,
						BYTE*					mem_Rev,
						BYTE*					mem_Right)
{
  int* Rev;
  int* Right;
  int i, j, newBuckNum, BucketNum;
  int BucketLeft;
 
//  Rev = (int*) malloc(inputLength * sizeof(int));
//  Right = (int*) malloc(ArrayBLength * sizeof(int));
  Rev = (int*) mem_Rev;
  Right = (int*) mem_Right;

  memset((void*)Rev, -1, inputLength*sizeof(int));
  memset((void*)Right, -1, ArrayBLength*sizeof(int));

  // initalize Rev and Right

  BucketLeft = 0;
  for (i = 0; i < ArrayBLength; i++)
  {
    Rev[ArrayB[i]] = BucketLeft;
    if (bs_getVal(BuckB, i) == 1)
    {
      Right[BucketLeft] = i;
      BucketLeft = i + 1;
    }
  }
	   
  // sort the type L substrings according to the list
  // bucket by bucket.

  i = 0;
  while (i < listLength)
  {

    // count the number of elements to move in each bucket
    // and also set the value of Right for all the elements
    // that being moved.
 
    j = i;
    while (bs_getVal(BuckList, j) == 0)
    {
      Right[Rev[List[j]]]--;
      j++;
    }

    // count for the last element of the bucket

    Right[Rev[List[j]]]--;

    // moving the elements by re-assigning the Rev
    // after this we still need to update Right

    j = i;
    while (bs_getVal(BuckList, j) == 0)
    {
      newBuckNum = Right[Rev[List[j]]] + 1;
      Rev[List[j]] = newBuckNum;
      j++;
    }

    // update Rev for the last element of the bucket

    newBuckNum = Right[Rev[List[j]]] + 1;
    Rev[List[j]] = newBuckNum;

    // correct the values of Right for all affected
    // buckets

    j = i;
    while (bs_getVal(BuckList, j) == 0)
    {
      newBuckNum = Rev[List[j]];
      if (Right[newBuckNum] == -1)
      {
	Right[newBuckNum] = newBuckNum;
      }
      else
      {
	Right[newBuckNum]++;
      }
      if (newBuckNum > 0)
      {
	bs_setVal(BuckB, newBuckNum - 1, 1);
      }
      j++;
    }

    // correct the values of Right for the last element

    newBuckNum = Rev[List[j]];
    if (Right[newBuckNum] == -1)
    {
      Right[newBuckNum] = newBuckNum;
    }
    else
    {
      Right[newBuckNum]++;
    }
    if (newBuckNum > 0)
    {
      bs_setVal(BuckB, newBuckNum - 1, 1);
    }

    // set i to point to the first element of the next bucket

    i = j+1;
  }

  // Reconsturct ArrayB from Rev.

  for (i = 0; i < inputLength; i++)
  {
    BucketNum = Rev[i];
    if (BucketNum > -1)
    {
      ArrayB[Right[BucketNum]] = i;
      Right[BucketNum]--;
    }
  }
}

/*
 * additional mem req: 4n
 * mem_intBuffer - sizeof(int)*sigmaSize <= sizeof(int)*inputLength
 * */
 
void counting_sort(const int* inputString, const int inputLength, 
		   int* A, t_bitstr* BuckA, BYTE* mem_intBuffer)
{
  //The int array buffer is used as a array of counters for
  //each character in the alphabet. 

  int max, min, sigmaSize;
  int Temp, i, prevCount;
  int* buffer = (int *) mem_intBuffer;

  if(inputLength <= 0)
  {
  	fprintf(stderr, "error: inputLength <= 0\n");
  	return;
  	/* TODO: should return int */
  }

  findMaxMin(inputString, inputLength, &max, &min);

  sigmaSize = max - min + 1;

  memset((void*) buffer, 0, sizeof(int)*sigmaSize);

  //Count the number of occurences of each character;

  for (i = 0; i < inputLength; i++)
  {
    Temp = inputString[i] - min;
    buffer[Temp]++;
  }

  //Convert buffer from a list that have the count of each
  //characters in the alphabet to a list that points to the
  //left boundaries of A. So it can be used when making A. 

  prevCount = buffer[0];
  buffer[0] = 0;

  for (i = 1; i < sigmaSize; i++)
  {
    Temp = buffer[i];
    buffer[i] = prevCount + buffer[i-1];
    prevCount = Temp;
  }
 
  //Constructing A. First find out which bucket a suffix goes,
  //then using buffer to calculate where should the suffix
  //be put in A.

  for (i = 0; i < inputLength; i++)
  {
    Temp = inputString[i] - min;
    A[buffer[Temp]] = i;
    buffer[Temp]++;
  }

  //Draw the bucket boundaries. BuckA[i] is defined to be true,
  //if there is an boundary between A[i] and A[i+1].

  bs_setAll(BuckA, 0);

  for (i = 0; i < sigmaSize; i++)
  {
    bs_setVal(BuckA, buffer[i] - 1, 1);
  }
}

void findMaxMin(const int* inputString, const int inputLength, 
		       int* Max, int* Min)
{
  int i, temp;

  if(inputLength <= 0)
  {
//    cout << "In function findMaxMin(int*, int, int&, int&):" << endl;
//    cout << "Length of input string cannot be less than 0." 
//	 << endl;
    fprintf(stderr, "In function findMaxMin(int*, int, int&, int&):\n");
    fprintf(stderr, "Length of input string cannot be less than 0.\n");
    fflush(stderr);
    return; 
  }  

  *Max = inputString[0];
  *Min = inputString[0];

  for(i = 1; i < inputLength; i++)
  {
    temp = (*Max) - inputString[i];
    temp = temp >> MAX_INT_INDEX;
    (*Max) = (*Max) + ((inputString[i] - (*Max)) & temp);

    temp = inputString[i] - (*Min);
    temp = temp >> MAX_INT_INDEX;
    (*Min) = (*Min) - (((*Min) - inputString[i]) & temp);
  }
}

void s_distance(t_bitstr* suffixType, 
		const int inputLength, int* Dist, int** DistCount, 
		int* maxDist)
{
  int i, j;
  int prevCount, temp, prevDist;
  int* DistCount1;
  int maxDist1;

  Dist[0] = 0;
  maxDist1 = 0;

  //Find the first S type, everything before that have Dist[i]=0.

  i = 0;
  while (bs_getVal(suffixType,i) == 0)
  {
    Dist[i] = 0;
    i++;
  }
  Dist[i] = 0;

  if (i < inputLength - 1)
  {
    maxDist1 = 1;
  }

  //Find Dist for the rest of the string.

  j = i + 1;
  prevDist = 1;

  for (i = j; i < inputLength; i++)
  {
    Dist[i] = prevDist;

    //If suffix i is type L, then increment prevDist by 1.
    //else set prevDist to 1.

    prevDist = prevDist - prevDist * bs_getVal(suffixType,i) + 1;

    //Calculate maxDist.

    temp = maxDist1 - Dist[i];
    temp = temp >> MAX_INT_INDEX;
    maxDist1 = maxDist1 + ((Dist[i] - maxDist1) & temp);
  }
  
  //Initalize and count the number of suffix of each s-distant.

  DistCount1 = (int*) malloc((maxDist1 + 1) * sizeof(int));

  for (i = 0; i <= maxDist1; i++)
  {
    DistCount1[i] = 0;
  }

  // memset((void*) DistCount, 0, sizeof(int) * (maxDist + 1));

  //Skip the zero values.

  j = 0;
  while (Dist[j] == 0)
  {
    j++;
  }

  for(i = j; i < inputLength; i++)
  {
    DistCount1[Dist[i] - 1]++;
  }

  //Compute the total number of suffixes that has a lesser s-distant.

  prevCount = DistCount1[0];
  DistCount1[0] = 0;

  for (i = 1; i <= maxDist1; i++)
  {
    temp = DistCount1[i];
    DistCount1[i] = prevCount + DistCount1[i-1];
    prevCount = temp;
  }  
  
  *DistCount = DistCount1;
  *maxDist = maxDist1;
}  


//This function computes the l_distance. 

void l_distance(t_bitstr* suffixType, 
		const int inputLength, int* Dist, int** DistCount, 
		int* maxDist)
{
  int i, j;
  int prevCount, temp, prevDist;
  int* DistCount1;
  int maxDist1;
  
  Dist[0] = 0;
  maxDist1 = 0;

  //Find the first L type, everything before that have Dist[i]=0.

  i = 0;
  while (bs_getVal(suffixType,i) == 1)
  {
    Dist[i] = 0;
    i++;
  }
  Dist[i] = 0;

  if (i < inputLength - 1)
  {
    maxDist1 = 1;
  }

  //Find Dist for the rest of the string.
 
  j = i + 1;
  prevDist = 1;

  for (i = j; i < inputLength; i++)
  {
    Dist[i] = prevDist;

    //If suffix i is type S, then increment prevDist by 1.
    //else set prevDist to 1.

    temp = bs_getVal(suffixType,i) - 1;
    prevDist = prevDist - (prevDist & temp) + 1;

    //Calculate maxDist.

    temp = maxDist1 - Dist[i];
    temp = temp >> MAX_INT_INDEX;
    maxDist1 = maxDist1 + ((Dist[i] - maxDist1) & temp);
  }

  //Initalize and count the number of suffix of each l-distant

  DistCount1 = (int*) malloc((maxDist1 + 1) * sizeof(int));

  for (i = 0; i <= maxDist1; i++)
  {
    DistCount1[i] = 0;
  }

  //Skip the zero values.

  j = 0;
  while (Dist[j] == 0)
  {
    j++;
  }

  for(i = j; i < inputLength; i++)
  {
    DistCount1[Dist[i] - 1]++;
  }

  //Compute the total number of suffixes that has a lesser l-distant.

  prevCount = DistCount1[0];
  DistCount1[0] = 0;
  
  for (i = 1; i <= maxDist1; i++)
  {
    temp = DistCount1[i];
    DistCount1[i] = prevCount + DistCount1[i-1];
    prevCount = temp;
  } 
  
  *DistCount = DistCount1;
  *maxDist = maxDist1;
}  
 




