/// unit obsahujuci triedy na nacitavanie znakov a retazcov zo suboru
unit filereader;

interface

uses constants,simple;

type
  /// trieda obsahujuca indexy koncov riadkov vo vstupnom subore
  TLineSearchArray = class
    a: array of Integer;
    count: Integer;
    allocated: Integer;
  public
    constructor create;
    destructor Destroy; override;
    function add(pos: Integer): boolean;
    function getLineIndexByPos(pos: Integer): Integer;
    function getLineEndByPos(pos: Integer): Integer;
    function getLineEndByIndex(index: Integer): Integer;
  end;

  /// abstaktna trieda definujuca operacie pre jednoduchy citac, ci uz zo suboru, alebo stringu
  TSimpleReader = class
  public
    constructor create; virtual; abstract;
    function open(path: xString; const S: xString = ''): boolean; virtual; abstract;
    function readChar(var c: char): boolean; virtual; abstract;
    function unreadChar2: boolean; virtual; abstract;
    function getFilepath: xString; virtual; abstract;
    function getPosition: Integer; virtual; abstract;  //getcharposition
    function getChangedFullPosition(var l,c: Integer; delta: Integer): Integer; virtual; abstract; //getcharposition + delta & line & col
    function atEOF: boolean; virtual; abstract;
    function setLineEndChar(c: char): boolean; virtual; abstract;
    function getBlock(sdelta,edelta: Integer): xString; virtual; abstract;
  end;

  /// trieda definujuca operacie pre citanie zo suboru
  TSimpleFileReader = class(TSimpleReader)
    F: file;
    Buffer: array[0..FILE_READER_BUFFER_SIZE-1] of char;
    loadedcharsize: Integer;
    bsizedivlsize: Integer;  //set to FILE_READER_BUFFER_SIZE div loadedcharsize
    lines: TLineSearchArray;
    filepath: xString;
    lineendchar: char;
(*    lines: array of Integer;
    linescount: Integer;
    linesallocated: Integer;*)
    forw,reached,newest,lastvalid: Integer;
    conversion: Integer;
    EOF,LastBlock: boolean;
    FPos: Integer;
  public
    constructor create; override;
    destructor Destroy; override;
    function open(path: xString; const S: xString = ''): boolean; override;
//    function fillBuffer(Lo,Hi: Integer): boolean;
    function fillBuffer(Lo,NumOfBytes: Integer): boolean;
    function readChar(var c: char): boolean; override;
    function unreadChar: boolean;
    function unreadChar2: boolean; override;
    function atEOF: boolean; override;
    function getFilepath: xString; override;
    function getPosition: Integer; override;     //getcharposition
    function getFullPositionFromPos(var l,c: Integer; p: Integer): Integer;  //get line & col from getcharposition
    function getChangedFullPosition(var l,c: Integer; delta: Integer): Integer; override; //getcharposition + delta & line & col
    function getFullPosition(var l,c: Integer): Integer;  //getcharposition & line & col
    function getAbsPosition: Integer;   //getbyteposition
    function setLineEndChar(c: char): boolean; override;
    function getPos: Integer;
    function getBlock(sdelta,edelta: Integer): xString; override;
  end;

  /// trieda definujuca operacie pre citanie zo stringu
  TSimpleStringReader = class(TSimpleReader)
    inputxString: xString;
    loadedcharsize: Integer;
    lines: TLineSearchArray;
    filepath: xString;
    lineendchar: char;
    forw,reached: Integer;
//    ,reached,newest,lastvalid: Integer;
//    conversion: Integer;
    EOF,LastBlock: boolean;
  public
    constructor create; override;
    destructor Destroy; override;
    function open(path: xString; const S: xString = ''): boolean; override;
    function readChar(var c: char): boolean; override;
    function unreadChar2: boolean; override;
    function getPosition: Integer; override;     //getcharposition
    function atEOF: boolean; override;
    function getFilepath: xString; override;
    function getFullPositionFromPos(var l,c: Integer; p: Integer): Integer;  //get line & col from getcharposition
    function getChangedFullPosition(var l,c: Integer; delta: Integer): Integer; override; //getcharposition + delta & line & col
    function getFullPosition(var l,c: Integer): Integer; //getcharposition & line & col
    function setLineEndChar(c: char): boolean; override;
    function getBlock(sdelta,edelta: Integer): xString; override;
  end;
implementation

uses
  SysUtils;

{ TSimpleFileReader }

/// konstruktor vytvarajuci objekt typu TSimpleFileReader
constructor TSimpleFileReader.create;
begin
//  inherited create;
  forw := 0;
  reached := 0;
  newest := -1;
  lastvalid := 0;
  conversion := 0;
  loadedcharsize := CHAR_SIZE;
  LastBlock := true;
  EOF := true;
  lines := TLineSearchArray.create;
  filepath := '';
  lineendchar := LINE_END_CHAR;
  FPos := 0;
  if loadedcharsize = 1 then
    bsizedivlsize := FILE_READER_BUFFER_SIZE
  else if loadedcharsize = 2 then
    bsizedivlsize := FILE_READER_BUFFER_SIZE shr 1
  else
    bsizedivlsize := FILE_READER_BUFFER_SIZE div loadedcharsize;
end;


/// funkcia otvarajuca subor s cestou path, vracia ci otvorenie subora prebehlo uspesne
function TSimpleFileReader.open(path: xString; const S: xString = ''): boolean;
begin
  result := true;
  AssignFile(F,path);
  try
    Reset(F,1);
    FPos := 0;
    if (not fillBuffer(0,FILE_READER_BUFFER_SIZE)) then begin
      result := false;
//      CloseFile(F);
    end
    else
      filepath := path;
  except
    on E: EInOutError do begin
      result := false;
//      CloseFile(F);
    end;
  end;
end;


/// destruktor odstranujuci objekt z pamate
destructor TSimpleFileReader.destroy;
begin
  try
    CloseFile(F);
  except
    on E: EInOutError do begin
      //file not opened - i know
    end;
  end;
  lines.Free;
  inherited destroy;
end;


/// funkcia citajuca blok znakov o velkosti NumOfBytes zo vstupneho buffera od pozicie Lo, vracia ci operacia prebehla uspesne
function TSimpleFileReader.fillBuffer(Lo,NumOfBytes: Integer): boolean;
var NumRead: Integer;
//    NumRequested: Integer;
    i: Integer;
    c: char;
begin
  result := true;
  try
    if (conversion = 2) and (Lo = 0) then begin
      Lo := FILE_READER_BUFFER_SIZE div loadedcharsize;
    end;
    BlockRead(F, (@Buffer[Lo])^, NumOfBytes, NumRead);
    LastBlock := (NumRead < NumOfBytes);
    EOF := LastBlock and (NumRead = 0);
    if EOF then
      Exit;

    if (newest = -1) and not EOF then begin
      if (CHAR_SIZE > 1) then begin
        if Buffer[0] <> char(254*256+255) then begin
          //non-Unicode file in Unicode mode
          //MessageBox(0,PChar('Unicode file expected'),PChar('Error'),MB_OK);
          conversion := 1;
          loadedcharsize := 1;
        end
        else begin
          readChar(c);    //Unicode file - first char FF FE ignore
        end;
      end
      else if (NumRead > 1) then begin
        if (Buffer[0] = char(255)) and (Buffer[1] = char(254)) then begin
          //non-ASCII file in ASCII mode
          //MessageBox(0,PChar('ASCII file expected'),PChar('Error'),MB_OK);
          conversion := 2;
          loadedcharsize := 2;
          readChar(c);    //Unicode file - first char FF FE ignore
        end
        else begin

        end;
      end;
      if loadedcharsize = 1 then
        bsizedivlsize := FILE_READER_BUFFER_SIZE
      else if loadedcharsize = 2 then
        bsizedivlsize := FILE_READER_BUFFER_SIZE shr 1
      else
        bsizedivlsize := FILE_READER_BUFFER_SIZE div loadedcharsize;
    end;

    if loadedcharsize = 1 then
      NumRead := NumRead
    else if loadedcharsize = 2 then
      NumRead := NumRead shr 1
    else
      NumRead := NumRead div loadedcharsize;

    if (conversion > 0) then begin
      if (conversion = 1) then begin
        for i := Lo + Pred(NumRead) downto Lo do begin
          if ((i mod CHAR_SIZE) = 0) then
            Buffer[i] := Char(Ord(Buffer[(i - Lo) div CHAR_SIZE + Lo]) and $FF)
          else
            Buffer[i] := Char(Ord(Buffer[(i - Lo) div CHAR_SIZE + Lo]) shr 8);
        end;
(*        newest := Lo + NumRead - 1;
        lastvalid := Lo + NumRead;*)
      end
      else begin
        for i := Lo to Lo + Pred(NumRead) do begin
          Buffer[i] := Buffer[(i - Lo)*loadedcharsize + Lo];
        end;
        if (newest <> -1) and (Lo = bsizedivlsize) then begin
          for i := Lo to Lo + Pred(NumRead) do begin
            Buffer[i-Lo] := Buffer[i];
          end;
          Lo := 0;
        end;
(*        newest := Lo + NumRead - 1;
        lastvalid := Lo + NumRead;

        newest := newest div (CHAR_SIZE + 1);
        lastvalid := lastvalid div (CHAR_SIZE + 1);*)
      end;
    end
    else begin
(*      newest := Lo + NumRead - 1;
      lastvalid := Lo + NumRead;

      if CHAR_SIZE > 1 then begin
        newest := newest div CHAR_SIZE;
        lastvalid := lastvalid div CHAR_SIZE;
      end;*)
    end;

    newest := Lo + NumRead - 1;
    lastvalid := overflowwrap(Lo + NumRead,bsizedivlsize);
  except
    on EInOutError do begin
      result := false;
      CloseFile(F);
    end;
  end;
end;


/// funkcia citajuca znak zo vstupneho buffera na pozicii forw, vracia ci operacia prebehla uspesne
function TSimpleFileReader.readChar(var c: char): boolean;
//var pos: Integer;
begin
  result := false;
  if EOF then exit;
  c := Buffer[forw];
//  writeln(getPosition,' - ',c);
(*  pos := getPosition;
  if c = char(0) then begin
    MessageBox(0,PChar(IntToStr(pos)),PChar('whata'),MB_OK);
  end;*)
  if (forw = reached) then begin
    if (c = lineendchar) then begin
      lines.add(getPosition+1);
    end;
    reached := overflowwrap(reached + 1,bsizedivlsize);
    if (forw = newest) then begin
      if not LastBlock then
//        fillbuffer(overflowwrap(newest + 1,FILE_READER_BUFFER_SIZE),overflowwrap(newest + FILE_READER_BUFFER_SIZE shr 1,FILE_READER_BUFFER_SIZE))
        fillbuffer(overflowwrap(newest + 1,bsizedivlsize),FILE_READER_BUFFER_SIZE shr 1)
      else
        EOF := true;
    end;
  end
  else if LastBlock and (forw = newest) then begin
    EOF := true;
  end;
  forw := overflowwrap(forw + 1,bsizedivlsize);
  result := true;
  FPos := FPos + 1;
end;

/// funkcia, ktora posuva index forw spat o jeden znak, ak je to mozne, vracia ci operacia prebehla uspesne
function TSimpleFileReader.unreadChar: boolean;
//var pos: Integer;
begin
  result := false;
(*  pos := getPosition;
  MessageBox(0,PChar(IntToStr(pos)),PChar('unread'),MB_OK);*)
  if (forw = lastvalid) then exit;
  forw := zerowrap(forw - 1,bsizedivlsize);
  if EOF then EOF := false;
  result := true;
  FPos := FPos - 1;
end;


/// funkcia, ktora posuva index forw spat o jeden znak, ak je to mozne, vracia ci operacia prebehla uspesne
function TSimpleFileReader.unreadChar2: boolean;
begin
//  result := false;
//  if (forw = lastvalid) then exit;  //CHANGED "check size / 2" needed in caller!!!
  forw := zerowrap(forw - 1,bsizedivlsize);
  if EOF then EOF := false;
  result := true;
  FPos := FPos - 1;
end;


/// funkcia, ktora zisti, ci sme na konci vstupu
function TSimpleFileReader.atEOF: boolean;
begin
  result := EOF;
end;


/// funkcia vracajuca momentalnu poziciu vo vstupe
function TSimpleFileReader.getPosition: Integer;
begin
(*
  if (forw > newest) and not EOF then
    result := FilePos(F) div loadedcharsize + forw - newest - 1 - bsizedivlsize
  else
    result := FilePos(F) div loadedcharsize + forw - newest - 1;
*)
  result := FPos;
end;

/// funkcia vracajuca momentalnu poziciu vo vstupe
function TSimpleFileReader.getPos: Integer;
begin
  result := FPos;
end;

/// funkcia vracajuca dvojicu riadok - l, stlpec - c a samotnu poziciu zo vstupnej pozicie - p
function TSimpleFileReader.getFullPositionFromPos(var l,c: Integer; p: Integer): Integer;
begin
  result := p;
  l := lines.getLineIndexByPos(result);
  c := result - lines.getLineEndByIndex(l-1);
  if (l = 0) and (loadedcharsize = 2) then begin  //UNICODE 2 byte header on first line ignore
    c := c - 1;
  end;
end;


/// funkcia vracajuca dvojicu riadok - l, stlpec - c a samotnu poziciu zo vstupneho rozdielu od momentalnej pozicie - delta
function TSimpleFileReader.getChangedFullPosition(var l,c: Integer; delta: Integer): Integer;
begin
  result := getFullPositionFromPos(l,c,getPosition + delta);
end;


/// funkcia vracajuca dvojicu riadok - l, stlpec - c a samotnu poziciu - p z momentalnej pozicie
function TSimpleFileReader.getFullPosition(var l,c: Integer): Integer;
begin
  result := getChangedFullPosition(l,c,0);
end;


/// funkcia vracajuca absolutnu poziciu vo vstupnom subore
function TSimpleFileReader.getAbsPosition: Integer;
begin
  result := FilePos(F) + (forw - newest - 1) * loadedcharsize;
end;

/// funkcia vracajuca cestu k momentalne otvorenemu suboru
function TSimpleFileReader.getFilepath: xString;
begin
  result := filepath;
end;

/// funkcia nastavujuca znak konca riadku pre zistovanie, ci uz sme na dalsom riadku pri citani
function TSimpleFileReader.setLineEndChar(c: char): boolean;
begin
  result := true;
  lineendchar := c;
end;

/// funkcia, ktora vrati blok znakov v podobe stringu od momentalne pozicie + sdelta az po momentalnu poziciu + edelta
function TSimpleFileReader.getBlock(sdelta,edelta: Integer): xString;
var c: char;
    i: Integer;
begin
  result := '';
  if sdelta >= edelta then
    Exit;

  if sdelta < - (bsizedivlsize shl 1) then
    sdelta := - (bsizedivlsize shl 1);
  if edelta > 0 then
    edelta := 0;

  clearGCA;
  i := zerowrap(forw + sdelta,bsizedivlsize);
  repeat
    c := Buffer[i];
    addtoGCA(c);
    i := overflowwrap(i + 1,bsizedivlsize);
  until i = zerowrap(forw + edelta,bsizedivlsize);
  toStringGCA(result);
end;

{ TSimpleStringReader }

/// konstruktor vytvarajuci objekt typu TSimpleStringReader
constructor TSimpleStringReader.create;
begin
//  inherited create;
  forw := 1;
  reached := 0;
  loadedcharsize := CHAR_SIZE;
  EOF := false;
  lines := TLineSearchArray.create;
  filepath := '';
  lineendchar := LINE_END_CHAR;
end;


/// funkcia otvarajuca subor s cestou path, vracia ci otvorenie subora prebehlo uspesne
function TSimpleStringReader.open(path: xString; const S: xString = ''): boolean;
begin
//  result := false;
  if S = '' then
    EOF := true;
//    Exit;
  filepath := path;
  inputxString := S;
  result := true;
end;


/// destruktor odstranujuci objekt z pamate
destructor TSimpleStringReader.destroy;
begin
  lines.Free;
  inherited destroy;
end;

/// funkcia citajuca znak zo vstupneho buffera na pozicii forw, vracia ci operacia prebehla uspesne
function TSimpleStringReader.readChar(var c: char): boolean;
//var pos: Integer;
begin
  result := false;
  if EOF then exit;
  c := inputxString[forw];
  if (c = lineendchar) and (forw > reached) then begin
    lines.add(getPosition+1);
  end;
  reached := forw;
  forw := forw + 1;
  if forw > Length(inputxString) then begin
    EOF := true;
  end;
  result := true;
end;

/// funkcia, ktora posuva index forw spat o jeden znak, ak je to mozne, vracia ci operacia prebehla uspesne
function TSimpleStringReader.unreadChar2: boolean;
begin
//  result := false;
//  if (forw = lastvalid) then exit;  //CHANGED "check size / 2" needed in caller!!!
  if forw > 1 then begin
    forw := forw - 1;
    if EOF then EOF := false;
    result := true;
  end
  else begin
    result := false;
  end;
end;

/// funkcia, ktora zisti, ci sme na konci vstupu
function TSimpleStringReader.atEOF: boolean;
begin
  result := EOF;
end;

/// funkcia vracajuca momentalnu poziciu vo vstupe
function TSimpleStringReader.getPosition: Integer;
begin
  result := forw-1;
end;

/// funkcia vracajuca dvojicu riadok - l, stlpec - c a samotnu poziciu zo vstupnej pozicie - p
function TSimpleStringReader.getFullPositionFromPos(var l,c: Integer; p: Integer): Integer;
begin
  result := p;
  l := lines.getLineIndexByPos(result);
  c := result - lines.getLineEndByIndex(l-1);
(*  if (l = 0) then begin
    c := c - 1;
  end;*)
  if (l = 0) and (loadedcharsize = 2) then begin  //UNICODE 2 byte header on first line ignore
    c := c - 1;
  end;
end;


/// funkcia vracajuca dvojicu riadok - l, stlpec - c a samotnu poziciu zo vstupneho rozdielu od momentalnej pozicie - delta
function TSimpleStringReader.getChangedFullPosition(var l,c: Integer; delta: Integer): Integer;
begin
  result := getFullPositionFromPos(l,c,getPosition + delta);
end;


/// funkcia vracajuca dvojicu riadok - l, stlpec - c a samotnu poziciu - p z momentalnej pozicie
function TSimpleStringReader.getFullPosition(var l,c: Integer): Integer;
begin
  result := getChangedFullPosition(l,c,0);
end;

/// funkcia vracajuca cestu k momentalne otvorenemu suboru
function TSimpleStringReader.getFilepath: xString;
begin
  result := filepath;
end;

/// funkcia nastavujuca znak konca riadku pre zistovanie, ci uz sme na dalsom riadku pri citani
function TSimpleStringReader.setLineEndChar(c: char): boolean;
begin
  result := true;
  lineendchar := c;
end;

/// funkcia, ktora vrati blok znakov v podobe stringu od momentalne pozicie + sdelta az po momentalnu poziciu + edelta
function TSimpleStringReader.getBlock(sdelta,edelta: Integer): xString;
begin
  result := '';
  if sdelta >= edelta then
    Exit;
  if sdelta <= -forw then
    sdelta := - (forw - 1);
  if edelta > 0 then
    edelta := 0;

  result := copy(inputxString,forw + sdelta, edelta - sdelta);
end;

{ TLineSearchArray }

/// konstruktor vytvarajuci objekt typu TLineSearchArray
constructor TLineSearchArray.create;
begin
  inherited create;
  SetLength(a,8);
  count := 0;
  allocated := 8;
end;


/// destruktor uvolnujuci objekt typu TLineSearchArray z pamate
destructor TLineSearchArray.Destroy;
begin
  Finalize(a);
  inherited Destroy;
end;

/// funkcia pridavajuca poziciu pos do utriedeneho pola koncov riadkov ak je vacsia ako posledna pozicia,
/// inak vrati false a neprida ju do pola
function TLineSearchArray.add(pos: Integer): boolean;
begin
  result := true;
  if (count > 0) then begin
    if pos > a[count] then begin
      a[count] := pos;
      count := count + 1;
    end
    else begin
      result := false;
    end;
  end
  else begin
    a[count] := pos;
    count := count + 1;
  end;
  if (count = allocated) then begin
    allocated := allocated * 2;
    SetLength(a,allocated);
  end;
end;

/// funkcia pouzivajuca binarne vyhladavanie na zistenie riadku v ktorom sa nachadza pozicia pos
function TLineSearchArray.getLineIndexByPos(pos: Integer): Integer;
var i,j,t: Integer;
begin
(*  for i := Pred(count) downto 0 do begin  //linear
    if a[i] <= pos then begin
      result := i + 1;
      exit;
    end;
  end;
  result := 0;*)

  if pos <= 0 then
    result := 0
  else if a[Pred(count)] <= pos then
    result := count
  else begin
    i := 0;
    j := Pred(count);
    while i < j do begin
      t := (i+j) shr 1;
      if a[t] > pos then
        j := t
      else
        i := t + 1;
    end;
    result := i;
  end;
end;


/// funkcia, ktora vracia poziciu konca riadku v ktorom je pozicia pos
function TLineSearchArray.getLineEndByPos(pos: Integer): Integer;
begin
  result := a[getLineIndexByPos(pos)];
end;


/// funkcia, ktora vracia poziciu konca riadku pre dany riadok (index)
function TLineSearchArray.getLineEndByIndex(index: Integer): Integer;
begin
  if (index >= 0) and (index < count) then begin
    result := a[index];
  end
  else
    result := 0;
end;

end.
