(* (c) Microsoft Corporation 2005-2006.  *)

open Lexing
open Fsyaccast
(*F# module Unilex = Microsoft.FSharp.Compiler.UnicodeLexing F#*)

let input = ref None
let modname= ref None
let opens= ref []
let out = ref None
let tokenize = ref false
let compat = ref false
let log = ref false
let inputCodePage = ref None

let usage =
  [ "-o", Arg.String (fun s -> out := Some s), "Name the output file.";
    "-v", Arg.Unit (fun () -> log := true), "Produce a listing file."; 
    "--module", Arg.String (fun s -> modname := Some s), "Define the F# module name to host the generated parser."; 
    "--open", Arg.String (fun s -> opens := !opens @ [s]), "Add the given module to the list of those to open in both the generated signature and implementation."; 
    "--ml-compatibility", Arg.Set compat, "Support the use of the global state from the 'Parsing' module in MLLib."; 
    "--tokens", Arg.Set tokenize, "Simply tokenize the specification file itself."; 
    "--codepage", Arg.Int (fun i -> inputCodePage := Some i), "Assume input lexer specification file is encoded with the given codepage.";  ]

let _ = Arg.parse usage (fun x -> match !input with Some _ -> failwith "more than one input given" | None -> input := Some x) "fsyacc <filename>"

let output_int os n = output_string os (string_of_int n)

(* nb. using printf is a little slow here *)
let output_trigraph os n = 
  output_string os "\\";
  output_int os (n/100);
  output_int os ((n mod 100)/10);
  output_int os (n mod 10)

let output_coded_u16 os n = 
  output_trigraph os (n/256);
  output_trigraph os (n mod 256)

let shiftFlag = 0x0000
let reduceFlag = 0x4000
let errorFlag = 0x8000
let acceptFlag = 0xc000
let actionMask = 0xc000

let anyMarker = 0xffff

let actionCoding action  =
  match action with 
  | Accept -> acceptFlag
  | Shift n -> shiftFlag lor n
  | Reduce n -> reduceFlag lor n
  | Error -> errorFlag 

let (|>) x f = f x
let insert x l = if List.mem x l then l else x::l 

let main() = 
  let filename = (match !input with Some x -> x | None -> failwith "no input given") in 
  let spec = 
    Unilex.usingUnicodeFileAsUTF8Lexbuf filename !inputCodePage (fun lexbuf -> 
      try 
        (*IF-FSHARP Lexing.lexbuf_set_curr_p lexbuf ENDIF-FSHARP*)
        (*IF-OCAML*) lexbuf.lex_curr_p <- (*ENDIF-OCAML*) 
          {(Lexing.lexeme_end_p lexbuf)  with pos_fname = filename; 
                                               pos_cnum=0;
                                               pos_lnum=1 }; 
        if !tokenize then begin 
          while true do 
            Printf.eprintf "tokenize - getting one token\n";
            let t = Fsyacclex.token lexbuf in 
            (*F# Printf.eprintf "tokenize - got %s\n" (Fsyaccpars.token_to_string t); F#*)
            if t = Fsyaccpars.EOF then exit 0;
          done;
        end;
    
        Fsyaccpars.spec Fsyacclex.token lexbuf 
      with e -> 
         let p = Lexing.lexeme_start_p lexbuf in 
         Printf.eprintf "%s(%d,%d): error: %s" filename p.pos_lnum (p.pos_cnum -  p.pos_bol) (Printexc.to_string e);
         exit 1 
    ) in

  let output = match !out with Some x -> x | _ -> Filename.chop_extension filename ^  (if Filename.check_suffix filename ".mly" then ".ml" else ".fs") in
  let outputi = match !out with Some x -> Filename.chop_extension x ^ (if Filename.check_suffix x ".ml" then ".mli" else ".fsi") | _ -> Filename.chop_extension filename ^ (if Filename.check_suffix filename ".mly" then ".mli" else ".fsi") in
  let outputo = 
    if !log then Some (match !out with Some x -> Filename.chop_extension x ^ ".fsyacc.output" | _ -> Filename.chop_extension filename ^ ".fsyacc.output") 
    else None in
  let os = open_out output in
  let osi = open_out outputi in
  let logf = 
    match outputo with 
    | None -> (fun f -> ())
    | Some filename -> let oso = open_out filename in (fun f -> f oso) in

  logf (fun oso -> Printf.fprintf oso "Output file describing compiled parser placed in %s and %s\n" output outputi);

  Printf.printf "building tables\n"; flush stdout;
  let spec1 = mkParserSpec spec in 
  let (prods,states, startStates,actionTable,immediateActionTable,gotoTable,endOfInputTerminalIdx,errorTerminalIdx) = lalrParserSpecToTables logf spec1 in 

  let (code,pos) = spec.header in 
  Printf.printf "%d states\n" (Array.length states); flush stdout;
  Printf.printf "%d nonterminals\n" (Array.length gotoTable.(0)); flush stdout;
  Printf.printf "%d terminals\n" (Array.length actionTable.(0)); flush stdout;
  Printf.printf "%d productions\n" (Array.length prods); flush stdout;
  Printf.printf "#rows in action table: %d\n" (Array.length actionTable); flush stdout;
  Printf.printf "#unique rows in action table: %d\n" (List.length (Array.fold_right (fun row acc -> insert (Array.to_list row) acc) actionTable [])); flush stdout;
  Printf.printf "maximum #different actions per state: %d\n" (Array.fold_right (fun row acc ->max (List.length (List.fold_right insert (Array.to_list row) [])) acc) actionTable 0); flush stdout;
  Printf.printf "average #different actions per state: %d\n" ((Array.fold_right (fun row acc -> (List.length (List.fold_right insert (Array.to_list row) [])) + acc) actionTable 0) / (Array.length states)); flush stdout;

  Printf.fprintf os "// Implementation file for parser generated by fsyacc \n";
  Printf.fprintf osi "// Signature file for parser generated by fsyacc \n";
  begin match !modname with 
  | None -> ()
  | Some s -> 
      Printf.fprintf os "module %s\n" s;
      Printf.fprintf osi "module %s\n" s;
  end;
  Printf.fprintf os "#nowarn \"64\";; // turn off warnings that type variables used in production annotations are instantiated to concrete type\n";
  !opens |> List.iter (fun s -> 
      Printf.fprintf os "open %s\n" s;
      Printf.fprintf osi "open %s\n" s);
  Printf.fprintf os "open Microsoft.FSharp.Tools.FsYacc.ParseHelpers\n";
  if !compat then Printf.fprintf os "open Microsoft.FSharp.Compatibility.OCaml.Parsing\n";

  Printf.fprintf os "# %d \"%s\"\n" pos.pos_lnum pos.pos_fname;
  Printf.fprintf os "%s\n" code;

  Printf.fprintf os "# %d \"%s\"\n" 10000 output;
  let ptype out = 
    Printf.fprintf out "type token = \n";
    List.iter
      (fun (id,typ) -> 
        match typ with 
          None -> Printf.fprintf out "  | %s\n" id
        | Some ty -> Printf.fprintf out "  | %s of (%s)\n" id ty)
      spec.tokens; in 
  ptype os;
  ptype osi;
  Printf.fprintf os "let _fspars_tagof (t:token) = \n";
  Printf.fprintf os "  match t with \n";
  Array.iteri
    (fun i (id,typ) -> Printf.fprintf os "  | %s %s -> %d \n" id (match typ with Some _ -> "_" | None -> "") i)
    (Array.of_list spec.tokens);
  Printf.fprintf os "let _fspars_end_of_input_tag = %d \n" endOfInputTerminalIdx;
  Printf.fprintf os "let _fspars_tagof_error_term = %d\n" errorTerminalIdx;
  Printf.fprintf os "let _fspars_token_to_string (t:token) = \n";
  Printf.fprintf os "  match t with \n";
  Array.iteri
    (fun i (id,typ) -> Printf.fprintf os "  | %s %s -> \"%s\" \n" id (match typ with Some _ -> "_" | None -> "") id)
    (Array.of_list spec.tokens);
  Printf.fprintf os "let _fspars_dataof (t:token) = \n";
  Printf.fprintf os "  match t with \n";
  List.iter
    (fun (id,typ) -> 
      Printf.fprintf os "  | %s %s -> %s \n" 
        id
        (match typ with Some _ -> "_fspars_x" | None -> "")
        (match typ with Some _ -> "Microsoft.FSharp.Core.Operators.box _fspars_x" | None -> "(null : System.Object)"))
    spec.tokens;

      let tyv = if !compat then "Lexing.position" else "'a" in 
      let tychar = "'cty" in 

  List.iter 
    (fun id -> 
      if not (List.mem_assoc id spec.types) then 
        failwith ("a %type declaration is required for for start token "^id);
      let ty = List.assoc id spec.types in 
      Printf.fprintf osi "val %s : (Microsoft.FSharp.Tools.FsLex.LexBuffer<%s,%s> -> token) -> Microsoft.FSharp.Tools.FsLex.LexBuffer<%s,%s> -> (%s) \n" id tyv tychar tyv tychar ty)
    spec.starts;
  let nStates = Array.length states in 
  begin 
    Printf.fprintf os "let _fspars_gotos = \"" ;
    let numGotoNonTerminals = (Array.length gotoTable.(0)) in 
    let gotoIndexes = Array.create numGotoNonTerminals 0 in 
    let gotoTableCurrIndex = ref 0 in 
    for j = 0 to numGotoNonTerminals-1 do  
        gotoIndexes.(j) <- !gotoTableCurrIndex;

        (* Count the number of entries in the association table. *)
        let count = ref 0 in 
        for i = 0 to nStates - 1 do 
          let goto = gotoTable.(i).(j) in 
          match goto with 
          | None -> ()
          | Some _ -> incr count
        done;
 
        (* Write the head of the table (i.e. the number of entries and the default value) *)
        gotoTableCurrIndex := !gotoTableCurrIndex + 1;
        output_coded_u16 os !count;
        output_coded_u16 os anyMarker;
        
        (* Write the pairs of entries in incremental order by key *)
        (* This lets us implement the lookup by a binary chop. *)
        for i = 0 to nStates - 1 do 
          let goto = gotoTable.(i).(j) in 
          match goto with 
          | None -> ()
          | Some n -> 
              gotoTableCurrIndex := !gotoTableCurrIndex + 1;
              output_coded_u16 os i;
              output_coded_u16 os n;
        done;
    done;
    Printf.fprintf os "\"B\n" ;
    (* Output offsets into gotos table where the gotos for a particular nonterminal begin *)
    Printf.fprintf os "let _fspars_gotos_row_offsets = \"" ;
    for j = 0 to numGotoNonTerminals-1 do  
      Printf.fprintf os "%a" output_coded_u16 gotoIndexes.(j);
    done;
    Printf.fprintf os "\"B\n" ;
  end;
  begin 
    let numActionRows = (Array.length actionTable) in 
    let maxActionColumns = (Array.length actionTable.(0)) in 
    Printf.fprintf os "let _fspars_action_rows = %d\n" numActionRows;
    Printf.fprintf os "let _fspars_action_table_elements = \"" ;
    let actionIndexes = Array.create numActionRows 0 in 
    
    let actionTableCurrIndex = ref 0 in 
    for i = 0 to nStates -1 do 
      actionIndexes.(i) <- !actionTableCurrIndex;
      let actions = actionTable.(i) in 
      let terminalsByAction = Hashtbl.create 10 in 
      let countPerAction = Hashtbl.create 10 in 
      for terminal = 0 to Array.length actions-1 do  
        let action = snd actions.(terminal) in 
        Hashtbl.add terminalsByAction action terminal;
        if Hashtbl.mem countPerAction action then begin
          let old = Hashtbl.find countPerAction action in 
          Hashtbl.remove countPerAction action;
          Hashtbl.add countPerAction action (old+1)
        end else begin
          Hashtbl.add countPerAction action 1
        end
      done;
      let mostCommonAction = 
        let mostCommon = ref Error in 
        let max = ref 0 in 
        Hashtbl.iter (fun x y -> if y > !max then (mostCommon := x; max := y)) countPerAction;
        !mostCommon in 

      (* Count the number of entries in the association table. *)
      let count = ref 0 in 
      terminalsByAction |> Hashtbl.iter (fun action terminal -> if action <> mostCommonAction then  incr count);

      (* Write the head of the table (i.e. the number of entries and the default value) *)
      actionTableCurrIndex := !actionTableCurrIndex + 1;
      output_coded_u16 os !count;
      output_coded_u16 os (actionCoding mostCommonAction);
      
      (* Write the pairs of entries in incremental order by key *)
      (* This lets us implement the lookup by a binary chop. *)
      for terminal = 0 to Array.length actions-1 do  
        let action = snd actions.(terminal) in 
        if action <> mostCommonAction then begin
            actionTableCurrIndex := !actionTableCurrIndex + 1;
            output_coded_u16 os terminal;
            output_coded_u16 os (actionCoding action);
          end
      done;
    done;
    Printf.fprintf os "\"B\n" ;
    (* Output offsets into actions table where the actions for a particular nonterminal begin *)
    Printf.fprintf os "let _fspars_action_table_row_offsets = \"" ;
    for j = 0 to numActionRows-1 do  
      Printf.fprintf os "%a" output_coded_u16 actionIndexes.(j);
    done;
    Printf.fprintf os "\"B\n" ;

  end;
  begin 
    Printf.fprintf os "let _fspars_reduction_nsyms = \"" ;
    for i = 0 to Array.length prods -1 do 
      let nt,ntIdx,syms,code = prods.(i) in 
      Printf.fprintf os "%a" output_coded_u16 (List.length syms);
    done;
    Printf.fprintf os "\"B\n" ;
  end;
  begin 
    Printf.fprintf os "let _fspars_nonterms = \"" ;
    for i = 0 to Array.length prods -1 do 
      let nt,ntIdx,syms,code = prods.(i) in 
      Printf.fprintf os "%a" output_coded_u16 ntIdx;
    done;
    Printf.fprintf os "\"B\n" ;
  end;
  begin 
    Printf.fprintf os "let _fspars_immediate_actions = \"" ;
    for i = 0 to Array.length immediateActionTable -1 do 
      let prodIdx = immediateActionTable.(i) in
      match prodIdx with
          None     -> Printf.fprintf os "%a" output_coded_u16 anyMarker (* NONE REP *)
        | Some act -> Printf.fprintf os "%a" output_coded_u16 (actionCoding act)
    done;
    Printf.fprintf os "\"B\n" ;
  end;
  
  let getType nt = if List.mem_assoc nt spec.types then  List.assoc nt spec.types else "'"^nt in 
  begin 
    Printf.fprintf os "let _fspars_reductions ()  = [| " ;
    for i = 0 to Array.length prods -1 do 
      let nt,ntIdx,syms,code = prods.(i) in 
      Printf.fprintf os "(fun (parseState : _ Microsoft.FSharp.Tools.FsYacc.IParseState) -> \n%s\n %aMicrosoft.FSharp.Core.Operators.box(\n%a((%a)\n : %s) )); \n" 
        (if !compat then "Parsing.set_parse_state parseState;" else "")
        (fun os syms -> 
          Array.iteri
            (fun i sym -> 
              let tyopt = 
                match sym with
                | T t -> 
                    if List.mem_assoc t spec.tokens then 
                      List.assoc t spec.tokens 
                    else None
                | NT nt -> Some (getType nt) in 
              match tyopt with 
              | Some ty -> Printf.fprintf os "let _%d = let data = parseState.GetData(%d) in (Microsoft.FSharp.Core.Operators.unbox data : %s)in\n" (i+1) (i+1) ty
              | None -> ())
            (Array.of_list syms))
        syms
        (fun os code -> 
          match code with 
          | Some (_,pos) -> 
              Printf.fprintf os "# %d \"%s\"\n" pos.pos_lnum pos.pos_fname
          | None -> ())
        code
        (fun os code ->
          match code with 
          | Some (c,_) -> 
              let dollar = ref false in 
              String.iter
                (fun c -> 
                  if not !dollar & c = '$' then (dollar := true)
                  else if !dollar & c >= '0' & c <= '9' then (dollar := false; Printf.fprintf os "_%c" c)
                  else if !dollar then (dollar := false; output_char os '$'; output_char os c)
                  else output_char os c)
                c;
              if !dollar then output_char os '$'
          | None -> 
              Printf.fprintf os "raise (Microsoft.FSharp.Tools.FsYacc.Accept(Microsoft.FSharp.Core.Operators.box _1))")
        code
        (if List.mem_assoc nt spec.types then  List.assoc nt spec.types else "'"^nt)
    done;
    Printf.fprintf os "|]\n" ;
  end;
  Printf.fprintf os "# %d \"%s\"\n" 20000 output;
  output_string os ("                                           \n\
    let tables () = \n\
      { new Microsoft.FSharp.Tools.FsYacc.Tables<_,_> \n\
        with reductions= _fspars_reductions ();\n\
        and  end_of_input_tag = _fspars_end_of_input_tag;\n\
        and  tagof = _fspars_tagof;\n\
        and  dataof = _fspars_dataof; \n\
        and  action_table_elements = _fspars_action_table_elements;\n\
        and  action_table_row_offsets = _fspars_action_table_row_offsets;\n\
        and  reduction_nsyms = _fspars_reduction_nsyms;\n\
        and  immediate_action = _fspars_immediate_actions;\n\
        and  gotos = _fspars_gotos;\n\
        and  gotos_row_offsets = _fspars_gotos_row_offsets;\n\
        and  tagof_error_term = _fspars_tagof_error_term;\n\
        and  parse_error = parse_error;\n\
        and  nonterms = _fspars_nonterms  }\n\
\n\
let engine lexer lexbuf startState = (tables ()).Interpret(lexer, lexbuf, startState)\n\
");                                                                                                         
  Printf.fprintf os "let token_to_string (t:token) = _fspars_token_to_string t\n";                          
  Printf.fprintf osi "val token_to_string: token -> string\n";
  List.iter2 
    (fun id startState -> 
      if not (List.mem_assoc id spec.types) then 
        failwith ("a %type declaration is required for for start token "^id);
      let ty = List.assoc id spec.types in 
      Printf.fprintf os "let %s (lexer : Microsoft.FSharp.Tools.FsLex.LexBuffer<_,_> -> token) (lexbuf:  Microsoft.FSharp.Tools.FsLex.LexBuffer<_,_>) : (%s) = \n(Microsoft.FSharp.Core.Operators.unbox (engine lexer lexbuf %d) : %s) \n" id ty startState ty)
    spec.starts
    startStates;


  close_out os;
  close_out osi;
  logf (fun oso -> close_out oso)

let _ = 
  try main()
  with e -> 
    Printf.eprintf "Error: %s\n" (match e with Failure s -> s | e -> Printexc.to_string e);
    exit 1

