{-# LANGUAGE NoImplicitPrelude #-}

{-
    BNF Converter: C flex generator
    Copyright (C) 2004  Author:  Michael Pellauer

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
-}

{-
   **************************************************************
    BNF Converter Module

    Description   : This module generates the Flex file. It is
                    similar to JLex but with a few peculiarities.

    Author        : Michael Pellauer (pellauer@cs.chalmers.se)

    License       : GPL (GNU General Public License)

    Created       : 5 August, 2003

    Modified      : 10 August, 2003


   **************************************************************
-}
module BNFC.Backend.C.CFtoFlexC (cf2flex, lexComments, cMacros) where

import Prelude'
import Data.Maybe (fromMaybe)

import BNFC.CF
import BNFC.Backend.C.RegToFlex
import BNFC.Backend.Common.NamedVariables
import BNFC.PrettyPrint
import BNFC.Utils (cstring)

--The environment must be returned for the parser to use.
cf2flex :: String -> CF -> (String, SymEnv)
cf2flex name cf = (unlines
 [
  prelude name,
  cMacros,
  lexSymbols env,
  restOfFlex cf env'
 ], env')
  where
   env = makeSymEnv (cfgSymbols cf ++ reservedWords cf) (0 :: Int)
   env' = env ++ (makeSymEnv (tokenNames cf) (length env))
   makeSymEnv [] _ = []
   makeSymEnv (s:symbs) n = (s, "_SYMB_" ++ (show n)) : (makeSymEnv symbs (n+1))

prelude :: String -> String
prelude name = unlines
  [
   "/* -*- c -*- This FLex file was machine-generated by the BNF converter */",
   -- noinput and nounput are most often unused
   -- https://stackoverflow.com/questions/39075510/option-noinput-nounput-what-are-they-for
   "%option noyywrap noinput nounput",
   "%{",
   "#define yylval " ++ name ++ "lval",
   "#define yylloc " ++ name ++ "lloc",
   "#define YY_BUFFER_APPEND " ++ name ++ "_BUFFER_APPEND",
   "#define YY_BUFFER_RESET " ++ name ++ "_BUFFER_RESET",
   "#define init_lexer " ++ name ++ "_init_lexer",
   "#include <string.h>",
   "#include \"Parser.h\"",
   "#define YY_BUFFER_LENGTH 4096",
   "char YY_PARSED_STRING[YY_BUFFER_LENGTH];",
   "void YY_BUFFER_APPEND(char *s)",
   "{",
   "  strcat(YY_PARSED_STRING, s); //Do something better here!",
   "}",
   "void YY_BUFFER_RESET(void)",
   "{",
   "  int x;",
   "  for(x = 0; x < YY_BUFFER_LENGTH; x++)",
   "    YY_PARSED_STRING[x] = 0;",
   "}",
   -- https://www.gnu.org/software/bison/manual/html_node/Token-Locations.html
   -- Flex is responsible for keeping tracking of the yylloc for Bison.
   -- Flex also doesn't do this automatically so we need this function
   -- https://stackoverflow.com/a/22125500/425756
   "static void update_loc(YYLTYPE* loc, char* text)",
   "{",
   "  loc->first_line = loc->last_line;",
   "  loc->first_column = loc->last_column;",
   "  for (int i = 0; text[i] != '\\0'; ++i) {",
   "      if (text[i] == '\\n') {",
   "          ++loc->last_line;",
   "          loc->last_column = 0; ",
   "      } else {",
   "          ++loc->last_column; ",
   "      }",
   "  }",
   "}",
   "#define YY_USER_ACTION update_loc(&yylloc, yytext);",
   "",
   "%}"
  ]

-- For now all categories are included.
-- Optimally only the ones that are used should be generated.
cMacros :: String
cMacros = unlines
  [
  "LETTER [a-zA-Z]",
  "CAPITAL [A-Z]",
  "SMALL [a-z]",
  "DIGIT [0-9]",
  "IDENT [a-zA-Z0-9'_]",
  "%START YYINITIAL COMMENT CHAR CHARESC CHAREND STRING ESCAPED",
  "%%"
  ]

lexSymbols :: SymEnv -> String
lexSymbols ss = concatMap transSym ss
  where
    transSym (s,r) =
      "<YYINITIAL>\"" ++ s' ++ "\"      \t return " ++ r ++ ";\n"
        where
         s' = escapeChars s

restOfFlex :: CF -> SymEnv -> String
restOfFlex cf env = concat
  [
   render $ lexComments Nothing (comments cf),
   "\n\n",
   userDefTokens,
   ifC catString  strStates,
   ifC catChar    chStates,
   ifC catDouble  "<YYINITIAL>{DIGIT}+\".\"{DIGIT}+(\"e\"(\\-)?{DIGIT}+)?      \t yylval.double_ = atof(yytext); return _DOUBLE_;\n",
   ifC catInteger "<YYINITIAL>{DIGIT}+      \t yylval.int_ = atoi(yytext); return _INTEGER_;\n",
   ifC catIdent   "<YYINITIAL>{LETTER}{IDENT}*      \t yylval.string_ = strdup(yytext); return _IDENT_;\n",
   "<YYINITIAL>[ \\t\\r\\n\\f]      \t /* ignore white space. */;\n",
   "<YYINITIAL>.      \t return _ERROR_;\n",
   "%%\n",
   footer
  ]
  where
   ifC cat s = if isUsedCat cf (TokenCat cat) then s else ""
   userDefTokens = unlines $
     ["<YYINITIAL>" ++ printRegFlex exp ++
      "     \t yylval.string_ = strdup(yytext); return " ++ sName name ++ ";"
       | (name, exp) <- tokenPragmas cf]
      where
        sName n = fromMaybe n $ lookup n env
   strStates = unlines --These handle escaped characters in Strings.
    [
     "<YYINITIAL>\"\\\"\"      \t BEGIN STRING;",
     "<STRING>\\\\      \t BEGIN ESCAPED;",
     "<STRING>\\\"      \t yylval.string_ = strdup(YY_PARSED_STRING); YY_BUFFER_RESET(); BEGIN YYINITIAL; return _STRING_;",
     "<STRING>.      \t YY_BUFFER_APPEND(yytext);",
     "<ESCAPED>n      \t YY_BUFFER_APPEND(\"\\n\"); BEGIN STRING;",
     "<ESCAPED>\\\"      \t YY_BUFFER_APPEND(\"\\\"\"); BEGIN STRING ;",
     "<ESCAPED>\\\\      \t YY_BUFFER_APPEND(\"\\\\\"); BEGIN STRING;",
     "<ESCAPED>t       \t YY_BUFFER_APPEND(\"\\t\"); BEGIN STRING;",
     "<ESCAPED>.       \t YY_BUFFER_APPEND(yytext); BEGIN STRING;"
    ]
   chStates = unlines --These handle escaped characters in Chars.
    [
     "<YYINITIAL>\"'\" \tBEGIN CHAR;",
     "<CHAR>\\\\      \t BEGIN CHARESC;",
     "<CHAR>[^']      \t BEGIN CHAREND; yylval.char_ = yytext[0]; return _CHAR_;",
     "<CHARESC>n      \t BEGIN CHAREND; yylval.char_ = '\\n'; return _CHAR_;",
     "<CHARESC>t      \t BEGIN CHAREND; yylval.char_ = '\\t'; return _CHAR_;",
     "<CHARESC>.      \t BEGIN CHAREND; yylval.char_ = yytext[0]; return _CHAR_;",
     "<CHAREND>\"'\"      \t BEGIN YYINITIAL;"
    ]
   footer = unlines
    [
     "void init_lexer(FILE *inp)",
     "{",
     "  yyrestart(inp);",
     "  yylloc.first_line = 1;",
     "  yylloc.first_column = 1;",
     "  yylloc.last_line = 1;",
     "  yylloc.last_column = 1;",
     "  BEGIN YYINITIAL;",
     "}"
    ]

-- ---------------------------------------------------------------------------
-- Comments

-- | Create flex rules for single-line and multi-lines comments.
-- The first argument is an optional namespace (for C++); the second
-- argument is the set of comment delimiters as returned by BNFC.CF.comments.
--
-- This function is only compiling the results of applying either
-- lexSingleComment or lexMultiComment on each comment delimiter or pair of
-- delimiters.
--
-- >>> lexComments (Just "myns.") ([("{-","-}")],["--"])
-- <YYINITIAL>"--"[^\n]*\n /* skip */; // BNFC: comment "--";
-- <YYINITIAL>"{-" BEGIN COMMENT; // BNFC: comment "{-" "-}";
-- <COMMENT>"-}" BEGIN YYINITIAL;
-- <COMMENT>. /* skip */;
-- <COMMENT>[\n] /* skip */;
lexComments :: Maybe String -> ([(String, String)], [String]) -> Doc
lexComments _ (m,s) =
    vcat (map lexSingleComment s ++ map lexMultiComment m)

-- | Create a lexer rule for single-line comments.
-- The first argument is -- an optional c++ namespace
-- The second argument is the delimiter that marks the beginning of the
-- comment.
--
-- >>> lexSingleComment "--"
-- <YYINITIAL>"--"[^\n]*\n /* skip */; // BNFC: comment "--";
--
-- >>> lexSingleComment "\""
-- <YYINITIAL>"\""[^\n]*\n /* skip */; // BNFC: comment "\"";
lexSingleComment :: String -> Doc
lexSingleComment c =
    "<YYINITIAL>" <> cstring c <> "[^\\n]*\\n"
    <+> "/* skip */;"
    <+> "// BNFC: comment" <+> cstring c <> ";"

-- | Create a lexer rule for multi-lines comments.
-- The first argument is -- an optional c++ namespace
-- The second arguments is the pair of delimiter for the multi-lines comment:
-- start deleminiter and end delimiter.
-- There might be a possible bug here if a language includes 2 multi-line
-- comments. They could possibly start a comment with one character and end it
-- with another.  However this seems rare.
--
-- >>> lexMultiComment ("{-", "-}")
-- <YYINITIAL>"{-" BEGIN COMMENT; // BNFC: comment "{-" "-}";
-- <COMMENT>"-}" BEGIN YYINITIAL;
-- <COMMENT>. /* skip */;
-- <COMMENT>[\n] /* skip */;
--
-- >>> lexMultiComment ("\"'", "'\"")
-- <YYINITIAL>"\"'" BEGIN COMMENT; // BNFC: comment "\"'" "'\"";
-- <COMMENT>"'\"" BEGIN YYINITIAL;
-- <COMMENT>. /* skip */;
-- <COMMENT>[\n] /* skip */;
lexMultiComment :: (String, String) -> Doc
lexMultiComment (b,e) = vcat
    [ "<YYINITIAL>" <> cstring b <+> "BEGIN COMMENT;"
        <+> "// BNFC: comment" <+> cstring b <+> cstring e <> ";"
    , "<COMMENT>" <> cstring e <+> "BEGIN YYINITIAL;"
    , "<COMMENT>. /* skip */;"
    , "<COMMENT>[\\n] /* skip */;"
    ]

-- --There might be a possible bug here if a language includes 2 multi-line comments.
-- --They could possibly start a comment with one character and end it with another.
-- --However this seems rare.
-- --
-- lexMultiComment :: Maybe String -> (String, String) -> String
-- lexMultiComment inPackage (b,e) = unlines [
--   "<YYINITIAL>\"" ++ b ++ "\"      \t BEGIN COMMENT;",
--   "<COMMENT>\"" ++ e ++ "\"      \t BEGIN YYINITIAL;",
--   "<COMMENT>.      \t /* BNFC multi-line comment */;",
--   "<COMMENT>[\\n]   ++" ++ nsString inPackage ++ "yy_mylinenumber ; \t /* BNFC multi-line comment */;"
--  ---- "\\n  ++yy_mylinenumber ;"
--   ]
--Helper function that escapes characters in strings
escapeChars :: String -> String
escapeChars [] = []
escapeChars ('\\':xs) = '\\' : ('\\' : (escapeChars xs))
escapeChars ('\"':xs) = '\\' : ('\"' : (escapeChars xs))
escapeChars (x:xs) = x : (escapeChars xs)
