open FStar_Parser_Parse
open FStar_Parser_Util

module Option  = BatOption
module String  = BatString
module Hashtbl = BatHashtbl
module Ulexing = FStar_Ulexing
module L = Ulexing
module E = FStar_Errors

let ba_of_string s = Array.init (String.length s) (fun i -> Char.code (String.get s i))
let array_trim_both a n m = Array.sub a n (Array.length a - n - m)
let string_trim_both s n m = BatString.sub s n (String.length s - (n+m))
let trim_both   lexbuf n m = string_trim_both (L.lexeme lexbuf) n m
let utrim_both  lexbuf n m = array_trim_both (L.ulexeme lexbuf) n m
let trim_right  lexbuf n = trim_both lexbuf 0 n
let trim_left   lexbuf n = trim_both lexbuf n 0

let unescape (a:int array) : int =
  match a.(0) with
  | 92 (* \ *) ->
    (match a.(1) with
    | 48  (*0*) -> 0
    | 98  (*b*) -> 8
    | 116 (*t*) -> 9
    | 110 (*n*) -> 10
    | 118 (*v*) -> 11
    | 102 (*f*) -> 12
    | 114 (*r*) -> 13
    | 117 (*u*) ->
      let s = Utf8.from_int_array a 2 4 in
      int_of_string ("0x"^s)
    | 120 (*x*) ->
      let s = Utf8.from_int_array a 2 2 in
      int_of_string ("0x"^s)
    | c -> c)
  | c -> c

let keywords = Hashtbl.create 0
let constructors = Hashtbl.create 0
let operators = Hashtbl.create 0

let () =
  Hashtbl.add keywords "abstract"      ABSTRACT    ;
  Hashtbl.add keywords "attributes"    ATTRIBUTES  ;
  Hashtbl.add keywords "noeq"          NOEQUALITY  ;
  Hashtbl.add keywords "unopteq"       UNOPTEQUALITY  ;
  Hashtbl.add keywords "and"           AND         ;
  Hashtbl.add keywords "assert"        ASSERT      ;
  Hashtbl.add keywords "assume"        ASSUME      ;
  Hashtbl.add keywords "begin"         BEGIN       ;
  Hashtbl.add keywords "by"            BY          ;
  Hashtbl.add keywords "calc"          CALC        ;
  Hashtbl.add keywords "class"         CLASS       ;
  Hashtbl.add keywords "default"       DEFAULT     ;
  Hashtbl.add keywords "effect"        EFFECT      ;
  Hashtbl.add keywords "else"          ELSE        ;
  Hashtbl.add keywords "end"           END         ;
  Hashtbl.add keywords "ensures"       ENSURES     ;
  Hashtbl.add keywords "exception"     EXCEPTION   ;
  Hashtbl.add keywords "exists"        EXISTS      ;
  Hashtbl.add keywords "false"         FALSE       ;
  Hashtbl.add keywords "friend"        FRIEND      ;
  Hashtbl.add keywords "forall"        FORALL      ;
  Hashtbl.add keywords "fun"           FUN         ;
  Hashtbl.add keywords "λ"             FUN         ;
  Hashtbl.add keywords "function"      FUNCTION    ;
  Hashtbl.add keywords "if"            IF          ;
  Hashtbl.add keywords "in"            IN          ;
  Hashtbl.add keywords "include"       INCLUDE     ;
  Hashtbl.add keywords "inline"        INLINE      ;
  Hashtbl.add keywords "inline_for_extraction"        INLINE_FOR_EXTRACTION      ;
  Hashtbl.add keywords "instance"      INSTANCE    ;
  Hashtbl.add keywords "irreducible"   IRREDUCIBLE ;
  Hashtbl.add keywords "let"           (LET false) ;
  Hashtbl.add keywords "logic"         LOGIC       ;
  Hashtbl.add keywords "match"         MATCH       ;
  Hashtbl.add keywords "module"        MODULE      ;
  Hashtbl.add keywords "new"           NEW         ;
  Hashtbl.add keywords "new_effect"    NEW_EFFECT  ;
  Hashtbl.add keywords "layered_effect"               LAYERED_EFFECT             ;
  Hashtbl.add keywords "polymonadic_bind"             POLYMONADIC_BIND           ;
  Hashtbl.add keywords "noextract"     NOEXTRACT   ;
  Hashtbl.add keywords "of"            OF          ;
  Hashtbl.add keywords "open"          OPEN        ;
  Hashtbl.add keywords "opaque"        OPAQUE      ;
  Hashtbl.add keywords "private"       PRIVATE     ;
  Hashtbl.add keywords "range_of"      RANGE_OF    ;
  Hashtbl.add keywords "rec"           REC         ;
  Hashtbl.add keywords "reifiable"     REIFIABLE   ;
  Hashtbl.add keywords "reify"         REIFY       ;
  Hashtbl.add keywords "reflectable"   REFLECTABLE ;
  Hashtbl.add keywords "requires"      REQUIRES    ;
  Hashtbl.add keywords "set_range_of"  SET_RANGE_OF;
  Hashtbl.add keywords "sub_effect"    SUB_EFFECT  ;
  Hashtbl.add keywords "synth"         SYNTH       ;
  Hashtbl.add keywords "then"          THEN        ;
  Hashtbl.add keywords "total"         TOTAL       ;
  Hashtbl.add keywords "true"          TRUE        ;
  Hashtbl.add keywords "try"           TRY         ;
  Hashtbl.add keywords "type"          TYPE        ;
  Hashtbl.add keywords "unfold"        UNFOLD      ;
  Hashtbl.add keywords "unfoldable"    UNFOLDABLE  ;
  Hashtbl.add keywords "val"           VAL         ;
  Hashtbl.add keywords "when"          WHEN        ;
  Hashtbl.add keywords "with"          WITH        ;
  Hashtbl.add keywords "_"             UNDERSCORE  ;
  Hashtbl.add keywords "α"             (TVAR "a")  ;
  Hashtbl.add keywords "β"             (TVAR "b")  ;
  Hashtbl.add keywords "γ"             (TVAR "c")  ;
  Hashtbl.add keywords "δ"             (TVAR "d")  ;
  Hashtbl.add keywords "ε"             (TVAR "e")  ;
  Hashtbl.add keywords "φ"             (TVAR "f")  ;
  Hashtbl.add keywords "χ"             (TVAR "g")  ;
  Hashtbl.add keywords "η"             (TVAR "h")  ;
  Hashtbl.add keywords "ι"             (TVAR "i")  ;
  Hashtbl.add keywords "κ"             (TVAR "k")  ;
  Hashtbl.add keywords "μ"             (TVAR "m")  ;
  Hashtbl.add keywords "ν"             (TVAR "n")  ;
  Hashtbl.add keywords "π"             (TVAR "p")  ;
  Hashtbl.add keywords "θ"             (TVAR "q")  ;
  Hashtbl.add keywords "ρ"             (TVAR "r")  ;
  Hashtbl.add keywords "σ"             (TVAR "s")  ;
  Hashtbl.add keywords "τ"             (TVAR "t")  ;
  Hashtbl.add keywords "ψ"             (TVAR "u")  ;
  Hashtbl.add keywords "ω"             (TVAR "w")  ;
  Hashtbl.add keywords "ξ"             (TVAR "x")  ;
  Hashtbl.add keywords "ζ"             (TVAR "z")  ;
  Hashtbl.add constructors "ℕ"         (IDENT "nat");
  Hashtbl.add constructors "ℤ"         (IDENT "int");
  Hashtbl.add constructors "𝔹"         (IDENT "bool");
  let l =
  ["~", TILDE "~";
   "-", MINUS;
   "/\\", CONJUNCTION;
   "\\/", DISJUNCTION;
   "<:", SUBTYPE;
   "<@", SUBKIND;
   "(|", LENS_PAREN_LEFT;
   "|)", LENS_PAREN_RIGHT;
   "#", HASH;
   "u#", UNIV_HASH;
   "&", AMP;
   "()", LPAREN_RPAREN;
   "(", LPAREN;
   ")", RPAREN;
   ",", COMMA;
   "~>", SQUIGGLY_RARROW;
   "->", RARROW;
   "<--", LONG_LEFT_ARROW;
   "<-", LARROW;
   "<==>", IFF;
   "==>", IMPLIES;
   ".", DOT;
   "?.", QMARK_DOT;
   "?", QMARK;
   ".[", DOT_LBRACK;
   ".(|", DOT_LENS_PAREN_LEFT;
   ".(", DOT_LPAREN;
   ".[|", DOT_LBRACK_BAR;   
   "{:pattern", LBRACE_COLON_PATTERN;
   ":", COLON;
   "::", COLON_COLON;
   ":=", COLON_EQUALS;
   ";;", SEMICOLON_SEMICOLON;
   ";", SEMICOLON;
   "=", EQUALS;
   "%[", PERCENT_LBRACK;
   "!{", BANG_LBRACE;
   "[@", LBRACK_AT;
   "[", LBRACK;
   "[|", LBRACK_BAR;
   "|>", PIPE_RIGHT;
   "]", RBRACK;
   "|]", BAR_RBRACK;
   "{", LBRACE;
   "|", BAR;
   "}", RBRACE;
   "$", DOLLAR;
     (* New Unicode equivalents *)
   "∀", FORALL;
   "∃", EXISTS;
   "⊤", NAME "True";
   "⊥", NAME "False";
   "⟹", IMPLIES;
   "⟺", IFF;
   "→", RARROW;
   "←", LARROW;
   "⟵", LONG_LEFT_ARROW;
   "↝", SQUIGGLY_RARROW;
   "≔", COLON_EQUALS;
   "∧", CONJUNCTION;
   "∨", DISJUNCTION;
   "¬", TILDE "~";
   "⸬", COLON_COLON;
   "▹", PIPE_RIGHT;
   "÷", OPINFIX3 "÷";
   "‖", OPINFIX0a "||";
   "×", IDENT "op_Multiply";
   "∗", OPINFIX3 "*";
   "⇒", OPINFIX0c "=>";
   "≥", OPINFIX0c ">=";
   "≤", OPINFIX0c "<=";
   "≠", OPINFIX0c "<>";
   "≪", OPINFIX0c "<<";
   "◃", OPINFIX0c "<|";
   "±", OPPREFIX "±";
   "∁", OPPREFIX "∁";
   "∂", OPPREFIX "∂";
   "√", OPPREFIX "√";
    ] in
   List.iter (fun (k,v) -> Hashtbl.add operators k v) l

let current_range lexbuf =
    FStar_Parser_Util.mksyn_range (fst (L.range lexbuf)) (snd (L.range lexbuf))

let fail lexbuf (e, msg) =
     let m = current_range lexbuf in
     E.raise_error (e, msg) m

type delimiters = { angle:int ref; paren:int ref; }
let n_typ_apps = ref 0

(* ADL: unicode identifiers won't work with --fs_typ_app
   Since this is only used for bootstrapping I am not going to bother fixing this *)
let is_typ_app lexbuf =
  if not (FStar_Options.fs_typ_app (L.source_file lexbuf)) then false
  else
   try
    let char_ok = function
      | '(' | ')' | '<' | '>' | '*' | '-' | '\'' | '_' | ',' | '.' | ' ' | '\t' -> true
      | c when c >= 'A' && c <= 'Z' -> true
      | c when c >= 'a' && c <= 'z' -> true
      | c when c >= '0' && c <= '9' -> true
      | _ -> false in
    let balanced (contents:string) pos =
      if contents.[pos] <> '<' then (fail lexbuf (E.Fatal_SyntaxError, "Unexpected position in is_typ_lapp"));
      let d = {angle=ref 1; paren=ref 0} in
      let upd i = match contents.[i] with
        | '(' -> incr d.paren
        | ')' -> decr d.paren
        | '<' -> incr d.angle
        | '>' when contents.[i-1] <> '-' -> decr d.angle
        | _ -> () in
      let ok () = !(d.angle) >= 0 && !(d.paren) >= 0 in
      let rec aux i =
        if !(d.angle)=0 && !(d.paren)=0 then true
        else if i >= String.length contents || not (ok ()) || (not (char_ok (contents.[i]))) || FStar_Util.(starts_with (substring_from contents (Z.of_int i)) "then") then false
        else (upd i; aux (i + 1))
      in aux (pos + 1)
    in
    let res = balanced (L.lookahead lexbuf (L.get_cur lexbuf - 1)) 0 in
    if res then incr n_typ_apps; res
   with e -> Printf.printf "Resolving typ_app<...> syntax failed.\n"; false

let is_typ_app_gt () =
  if !n_typ_apps > 0
  then (decr n_typ_apps; true)
  else false

let rec mknewline n lexbuf = 
  if n = 0 then ()
  else (L.new_line lexbuf; mknewline (n-1) lexbuf)

let clean_number x = String.strip ~chars:"uzyslLUnIN" x

(* Try to trim each line of [comment] by the ammount of space
    on the first line of the comment if possible *)
(* TODO : apply this to FSDOC too *)
let maybe_trim_lines start_column comment =
  if start_column = 0 then comment
  else
    let comment_lines = String.split_on_char '\n' comment in
    let ensures_empty_prefix k s =
      let j = min k (String.length s - 1) in
      let rec aux i = if i > j then k else if s.[i] <> ' ' then i else aux (i+1) in
      aux 0 in
    let trim_width = List.fold_left ensures_empty_prefix start_column comment_lines in
    String.concat "\n" (List.map (fun s -> String.tail s trim_width) comment_lines)

let comment_buffer = Buffer.create 128

let start_comment lexbuf =
  Buffer.add_string comment_buffer "(*" ;
  (false, comment_buffer, fst (L.range lexbuf))

let terminate_comment buffer startpos lexbuf =
  let endpos = snd (L.range lexbuf) in
  Buffer.add_string buffer "*)" ;
  let comment = Buffer.contents buffer in
  let comment = maybe_trim_lines (startpos.Lexing.pos_cnum - startpos.Lexing.pos_bol) comment in
  Buffer.clear buffer;
  add_comment (comment, FStar_Parser_Util.mksyn_range startpos endpos)

let push_one_line_comment pre lexbuf =
  let startpos, endpos = L.range lexbuf in
  assert (startpos.Lexing.pos_lnum = endpos.Lexing.pos_lnum);
  add_comment (pre ^ L.lexeme lexbuf, FStar_Parser_Util.mksyn_range startpos endpos)

(** Unicode class definitions
  Auto-generated from http:/ /www.unicode.org/Public/8.0.0/ucd/UnicodeData.txt **)
(** Ll **)
let regexp u_lower = [0x0061-0x007a 0x00b5 0x00df-0x00f6 0x00f8-0x00ff 0x0101 0x0103 0x0105 0x0107 0x0109 0x010b 0x010d 0x010f 0x0111 0x0113 0x0115 0x0117 0x0119 0x011b 0x011d 0x011f 0x0121 0x0123 0x0125 0x0127 0x0129 0x012b 0x012d 0x012f 0x0131 0x0133 0x0135 0x0137-0x0138 0x013a 0x013c 0x013e 0x0140 0x0142 0x0144 0x0146 0x0148-0x0149 0x014b 0x014d 0x014f 0x0151 0x0153 0x0155 0x0157 0x0159 0x015b 0x015d 0x015f 0x0161 0x0163 0x0165 0x0167 0x0169 0x016b 0x016d 0x016f 0x0171 0x0173 0x0175 0x0177 0x017a 0x017c 0x017e-0x0180 0x0183 0x0185 0x0188 0x018c-0x018d 0x0192 0x0195 0x0199-0x019b 0x019e 0x01a1 0x01a3 0x01a5 0x01a8 0x01aa-0x01ab 0x01ad 0x01b0 0x01b4 0x01b6 0x01b9-0x01ba 0x01bd-0x01bf 0x01c6 0x01c9 0x01cc 0x01ce 0x01d0 0x01d2 0x01d4 0x01d6 0x01d8 0x01da 0x01dc-0x01dd 0x01df 0x01e1 0x01e3 0x01e5 0x01e7 0x01e9 0x01eb 0x01ed 0x01ef-0x01f0 0x01f3 0x01f5 0x01f9 0x01fb 0x01fd 0x01ff 0x0201 0x0203 0x0205 0x0207 0x0209 0x020b 0x020d 0x020f 0x0211 0x0213 0x0215 0x0217 0x0219 0x021b 0x021d 0x021f 0x0221 0x0223 0x0225 0x0227 0x0229 0x022b 0x022d 0x022f 0x0231 0x0233-0x0239 0x023c 0x023f-0x0240 0x0242 0x0247 0x0249 0x024b 0x024d 0x024f-0x0293 0x0295-0x02af 0x0371 0x0373 0x0377 0x037b-0x037d 0x0390 0x03ac-0x03ce 0x03d0-0x03d1 0x03d5-0x03d7 0x03d9 0x03db 0x03dd 0x03df 0x03e1 0x03e3 0x03e5 0x03e7 0x03e9 0x03eb 0x03ed 0x03ef-0x03f3 0x03f5 0x03f8 0x03fb-0x03fc 0x0430-0x045f 0x0461 0x0463 0x0465 0x0467 0x0469 0x046b 0x046d 0x046f 0x0471 0x0473 0x0475 0x0477 0x0479 0x047b 0x047d 0x047f 0x0481 0x048b 0x048d 0x048f 0x0491 0x0493 0x0495 0x0497 0x0499 0x049b 0x049d 0x049f 0x04a1 0x04a3 0x04a5 0x04a7 0x04a9 0x04ab 0x04ad 0x04af 0x04b1 0x04b3 0x04b5 0x04b7 0x04b9 0x04bb 0x04bd 0x04bf 0x04c2 0x04c4 0x04c6 0x04c8 0x04ca 0x04cc 0x04ce-0x04cf 0x04d1 0x04d3 0x04d5 0x04d7 0x04d9 0x04db 0x04dd 0x04df 0x04e1 0x04e3 0x04e5 0x04e7 0x04e9 0x04eb 0x04ed 0x04ef 0x04f1 0x04f3 0x04f5 0x04f7 0x04f9 0x04fb 0x04fd 0x04ff 0x0501 0x0503 0x0505 0x0507 0x0509 0x050b 0x050d 0x050f 0x0511 0x0513 0x0515 0x0517 0x0519 0x051b 0x051d 0x051f 0x0521 0x0523 0x0525 0x0527 0x0529 0x052b 0x052d 0x052f 0x0561-0x0587 0x13f8-0x13fd 0x1d00-0x1d2b 0x1d6b-0x1d77 0x1d79-0x1d9a 0x1e01 0x1e03 0x1e05 0x1e07 0x1e09 0x1e0b 0x1e0d 0x1e0f 0x1e11 0x1e13 0x1e15 0x1e17 0x1e19 0x1e1b 0x1e1d 0x1e1f 0x1e21 0x1e23 0x1e25 0x1e27 0x1e29 0x1e2b 0x1e2d 0x1e2f 0x1e31 0x1e33 0x1e35 0x1e37 0x1e39 0x1e3b 0x1e3d 0x1e3f 0x1e41 0x1e43 0x1e45 0x1e47 0x1e49 0x1e4b 0x1e4d 0x1e4f 0x1e51 0x1e53 0x1e55 0x1e57 0x1e59 0x1e5b 0x1e5d 0x1e5f 0x1e61 0x1e63 0x1e65 0x1e67 0x1e69 0x1e6b 0x1e6d 0x1e6f 0x1e71 0x1e73 0x1e75 0x1e77 0x1e79 0x1e7b 0x1e7d 0x1e7f 0x1e81 0x1e83 0x1e85 0x1e87 0x1e89 0x1e8b 0x1e8d 0x1e8f 0x1e91 0x1e93 0x1e95-0x1e9d 0x1e9f 0x1ea1 0x1ea3 0x1ea5 0x1ea7 0x1ea9 0x1eab 0x1ead 0x1eaf 0x1eb1 0x1eb3 0x1eb5 0x1eb7 0x1eb9 0x1ebb 0x1ebd 0x1ebf 0x1ec1 0x1ec3 0x1ec5 0x1ec7 0x1ec9 0x1ecb 0x1ecd 0x1ecf 0x1ed1 0x1ed3 0x1ed5 0x1ed7 0x1ed9 0x1edb 0x1edd 0x1edf 0x1ee1 0x1ee3 0x1ee5 0x1ee7 0x1ee9 0x1eeb 0x1eed 0x1eef 0x1ef1 0x1ef3 0x1ef5 0x1ef7 0x1ef9 0x1efb 0x1efd 0x1eff-0x1f07 0x1f10-0x1f15 0x1f20-0x1f27 0x1f30-0x1f37 0x1f40-0x1f45 0x1f50-0x1f57 0x1f60-0x1f67 0x1f70-0x1f7d 0x1f80-0x1f87 0x1f90-0x1f97 0x1fa0-0x1fa7 0x1fb0-0x1fb4 0x1fb6-0x1fb7 0x1fbe 0x1fc2-0x1fc4 0x1fc6-0x1fc7 0x1fd0-0x1fd3 0x1fd6-0x1fd7 0x1fe0-0x1fe7 0x1ff2-0x1ff4 0x1ff6-0x1ff7 0x210a 0x210e-0x210f 0x2113 0x212f 0x2134 0x2139 0x213c-0x213d 0x2146-0x2149 0x214e 0x2184 0x2c30-0x2c5e 0x2c61 0x2c65-0x2c66 0x2c68 0x2c6a 0x2c6c 0x2c71 0x2c73-0x2c74 0x2c76-0x2c7b 0x2c81 0x2c83 0x2c85 0x2c87 0x2c89 0x2c8b 0x2c8d 0x2c8f 0x2c91 0x2c93 0x2c95 0x2c97 0x2c99 0x2c9b 0x2c9d 0x2c9f 0x2ca1 0x2ca3 0x2ca5 0x2ca7 0x2ca9 0x2cab 0x2cad 0x2caf 0x2cb1 0x2cb3 0x2cb5 0x2cb7 0x2cb9 0x2cbb 0x2cbd 0x2cbf 0x2cc1 0x2cc3 0x2cc5 0x2cc7 0x2cc9 0x2ccb 0x2ccd 0x2ccf 0x2cd1 0x2cd3 0x2cd5 0x2cd7 0x2cd9 0x2cdb 0x2cdd 0x2cdf 0x2ce1 0x2ce3-0x2ce4 0x2cec 0x2cee 0x2cf3 0x2d00-0x2d25 0x2d27 0x2d2d 0xa641 0xa643 0xa645 0xa647 0xa649 0xa64b 0xa64d 0xa64f 0xa651 0xa653 0xa655 0xa657 0xa659 0xa65b 0xa65d 0xa65f 0xa661 0xa663 0xa665 0xa667 0xa669 0xa66b 0xa66d 0xa681 0xa683 0xa685 0xa687 0xa689 0xa68b 0xa68d 0xa68f 0xa691 0xa693 0xa695 0xa697 0xa699 0xa69b 0xa723 0xa725 0xa727 0xa729 0xa72b 0xa72d 0xa72f-0xa731 0xa733 0xa735 0xa737 0xa739 0xa73b 0xa73d 0xa73f 0xa741 0xa743 0xa745 0xa747 0xa749 0xa74b 0xa74d 0xa74f 0xa751 0xa753 0xa755 0xa757 0xa759 0xa75b 0xa75d 0xa75f 0xa761 0xa763 0xa765 0xa767 0xa769 0xa76b 0xa76d 0xa76f 0xa771-0xa778 0xa77a 0xa77c 0xa77f 0xa781 0xa783 0xa785 0xa787 0xa78c 0xa78e 0xa791 0xa793-0xa795 0xa797 0xa799 0xa79b 0xa79d 0xa79f 0xa7a1 0xa7a3 0xa7a5 0xa7a7 0xa7a9 0xa7b5 0xa7b7 0xa7fa 0xab30-0xab5a 0xab60-0xab65 0xab70-0xabbf 0xfb00-0xfb06 0xfb13-0xfb17 0xff41-0xff5a 0x10428-0x1044f 0x10cc0-0x10cf2 0x118c0-0x118df 0x1d41a-0x1d433 0x1d44e-0x1d454 0x1d456-0x1d467 0x1d482-0x1d49b 0x1d4b6-0x1d4b9 0x1d4bb 0x1d4bd-0x1d4c3 0x1d4c5-0x1d4cf 0x1d4ea-0x1d503 0x1d51e-0x1d537 0x1d552-0x1d56b 0x1d586-0x1d59f 0x1d5ba-0x1d5d3 0x1d5ee-0x1d607 0x1d622-0x1d63b 0x1d656-0x1d66f 0x1d68a-0x1d6a5 0x1d6c2-0x1d6da 0x1d6dc-0x1d6e1 0x1d6fc-0x1d714 0x1d716-0x1d71b 0x1d736-0x1d74e 0x1d750-0x1d755 0x1d770-0x1d788 0x1d78a-0x1d78f 0x1d7aa-0x1d7c2 0x1d7c4-0x1d7c9 0x1d7cb]
(** Lu *)
let regexp u_upper = [0x0041-0x005a 0x00c0-0x00d6 0x00d8-0x00de 0x0100 0x0102 0x0104 0x0106 0x0108 0x010a 0x010c 0x010e 0x0110 0x0112 0x0114 0x0116 0x0118 0x011a 0x011c 0x011e 0x0120 0x0122 0x0124 0x0126 0x0128 0x012a 0x012c 0x012e 0x0130 0x0132 0x0134 0x0136 0x0139 0x013b 0x013d 0x013f 0x0141 0x0143 0x0145 0x0147 0x014a 0x014c 0x014e 0x0150 0x0152 0x0154 0x0156 0x0158 0x015a 0x015c 0x015e 0x0160 0x0162 0x0164 0x0166 0x0168 0x016a 0x016c 0x016e 0x0170 0x0172 0x0174 0x0176 0x0178-0x0179 0x017b 0x017d 0x0181-0x0182 0x0184 0x0186-0x0187 0x0189-0x018b 0x018e-0x0191 0x0193-0x0194 0x0196-0x0198 0x019c-0x019d 0x019f-0x01a0 0x01a2 0x01a4 0x01a6-0x01a7 0x01a9 0x01ac 0x01ae-0x01af 0x01b1-0x01b3 0x01b5 0x01b7-0x01b8 0x01bc 0x01c4 0x01c7 0x01ca 0x01cd 0x01cf 0x01d1 0x01d3 0x01d5 0x01d7 0x01d9 0x01db 0x01de 0x01e0 0x01e2 0x01e4 0x01e6 0x01e8 0x01ea 0x01ec 0x01ee 0x01f1 0x01f4 0x01f6-0x01f8 0x01fa 0x01fc 0x01fe 0x0200 0x0202 0x0204 0x0206 0x0208 0x020a 0x020c 0x020e 0x0210 0x0212 0x0214 0x0216 0x0218 0x021a 0x021c 0x021e 0x0220 0x0222 0x0224 0x0226 0x0228 0x022a 0x022c 0x022e 0x0230 0x0232 0x023a-0x023b 0x023d-0x023e 0x0241 0x0243-0x0246 0x0248 0x024a 0x024c 0x024e 0x0370 0x0372 0x0376 0x037f 0x0386 0x0388-0x038a 0x038c 0x038e-0x038f 0x0391-0x03a1 0x03a3-0x03ab 0x03cf 0x03d2-0x03d4 0x03d8 0x03da 0x03dc 0x03de 0x03e0 0x03e2 0x03e4 0x03e6 0x03e8 0x03ea 0x03ec 0x03ee 0x03f4 0x03f7 0x03f9-0x03fa 0x03fd-0x042f 0x0460 0x0462 0x0464 0x0466 0x0468 0x046a 0x046c 0x046e 0x0470 0x0472 0x0474 0x0476 0x0478 0x047a 0x047c 0x047e 0x0480 0x048a 0x048c 0x048e 0x0490 0x0492 0x0494 0x0496 0x0498 0x049a 0x049c 0x049e 0x04a0 0x04a2 0x04a4 0x04a6 0x04a8 0x04aa 0x04ac 0x04ae 0x04b0 0x04b2 0x04b4 0x04b6 0x04b8 0x04ba 0x04bc 0x04be 0x04c0-0x04c1 0x04c3 0x04c5 0x04c7 0x04c9 0x04cb 0x04cd 0x04d0 0x04d2 0x04d4 0x04d6 0x04d8 0x04da 0x04dc 0x04de 0x04e0 0x04e2 0x04e4 0x04e6 0x04e8 0x04ea 0x04ec 0x04ee 0x04f0 0x04f2 0x04f4 0x04f6 0x04f8 0x04fa 0x04fc 0x04fe 0x0500 0x0502 0x0504 0x0506 0x0508 0x050a 0x050c 0x050e 0x0510 0x0512 0x0514 0x0516 0x0518 0x051a 0x051c 0x051e 0x0520 0x0522 0x0524 0x0526 0x0528 0x052a 0x052c 0x052e 0x0531-0x0556 0x10a0-0x10c5 0x10c7 0x10cd 0x13a0-0x13f5 0x1e00 0x1e02 0x1e04 0x1e06 0x1e08 0x1e0a 0x1e0c 0x1e0e 0x1e10 0x1e12 0x1e14 0x1e16 0x1e18 0x1e1a 0x1e1c 0x1e1e 0x1e20 0x1e22 0x1e24 0x1e26 0x1e28 0x1e2a 0x1e2c 0x1e2e 0x1e30 0x1e32 0x1e34 0x1e36 0x1e38 0x1e3a 0x1e3c 0x1e3e 0x1e40 0x1e42 0x1e44 0x1e46 0x1e48 0x1e4a 0x1e4c 0x1e4e 0x1e50 0x1e52 0x1e54 0x1e56 0x1e58 0x1e5a 0x1e5c 0x1e5e 0x1e60 0x1e62 0x1e64 0x1e66 0x1e68 0x1e6a 0x1e6c 0x1e6e 0x1e70 0x1e72 0x1e74 0x1e76 0x1e78 0x1e7a 0x1e7c 0x1e7e 0x1e80 0x1e82 0x1e84 0x1e86 0x1e88 0x1e8a 0x1e8c 0x1e8e 0x1e90 0x1e92 0x1e94 0x1e9e 0x1ea0 0x1ea2 0x1ea4 0x1ea6 0x1ea8 0x1eaa 0x1eac 0x1eae 0x1eb0 0x1eb2 0x1eb4 0x1eb6 0x1eb8 0x1eba 0x1ebc 0x1ebe 0x1ec0 0x1ec2 0x1ec4 0x1ec6 0x1ec8 0x1eca 0x1ecc 0x1ece 0x1ed0 0x1ed2 0x1ed4 0x1ed6 0x1ed8 0x1eda 0x1edc 0x1ede 0x1ee0 0x1ee2 0x1ee4 0x1ee6 0x1ee8 0x1eea 0x1eec 0x1eee 0x1ef0 0x1ef2 0x1ef4 0x1ef6 0x1ef8 0x1efa 0x1efc 0x1efe 0x1f08-0x1f0f 0x1f18-0x1f1d 0x1f28-0x1f2f 0x1f38-0x1f3f 0x1f48-0x1f4d 0x1f59 0x1f5b 0x1f5d 0x1f5f 0x1f68-0x1f6f 0x1fb8-0x1fbb 0x1fc8-0x1fcb 0x1fd8-0x1fdb 0x1fe8-0x1fec 0x1ff8-0x1ffb 0x2102 0x2107 0x210b-0x210d 0x2110-0x2112 0x2115 0x2119-0x211d 0x2124 0x2126 0x2128 0x212a-0x212d 0x2130-0x2133 0x213e-0x213f 0x2145 0x2183 0x2c00-0x2c2e 0x2c60 0x2c62-0x2c64 0x2c67 0x2c69 0x2c6b 0x2c6d-0x2c70 0x2c72 0x2c75 0x2c7e-0x2c80 0x2c82 0x2c84 0x2c86 0x2c88 0x2c8a 0x2c8c 0x2c8e 0x2c90 0x2c92 0x2c94 0x2c96 0x2c98 0x2c9a 0x2c9c 0x2c9e 0x2ca0 0x2ca2 0x2ca4 0x2ca6 0x2ca8 0x2caa 0x2cac 0x2cae 0x2cb0 0x2cb2 0x2cb4 0x2cb6 0x2cb8 0x2cba 0x2cbc 0x2cbe 0x2cc0 0x2cc2 0x2cc4 0x2cc6 0x2cc8 0x2cca 0x2ccc 0x2cce 0x2cd0 0x2cd2 0x2cd4 0x2cd6 0x2cd8 0x2cda 0x2cdc 0x2cde 0x2ce0 0x2ce2 0x2ceb 0x2ced 0x2cf2 0xa640 0xa642 0xa644 0xa646 0xa648 0xa64a 0xa64c 0xa64e 0xa650 0xa652 0xa654 0xa656 0xa658 0xa65a 0xa65c 0xa65e 0xa660 0xa662 0xa664 0xa666 0xa668 0xa66a 0xa66c 0xa680 0xa682 0xa684 0xa686 0xa688 0xa68a 0xa68c 0xa68e 0xa690 0xa692 0xa694 0xa696 0xa698 0xa69a 0xa722 0xa724 0xa726 0xa728 0xa72a 0xa72c 0xa72e 0xa732 0xa734 0xa736 0xa738 0xa73a 0xa73c 0xa73e 0xa740 0xa742 0xa744 0xa746 0xa748 0xa74a 0xa74c 0xa74e 0xa750 0xa752 0xa754 0xa756 0xa758 0xa75a 0xa75c 0xa75e 0xa760 0xa762 0xa764 0xa766 0xa768 0xa76a 0xa76c 0xa76e 0xa779 0xa77b 0xa77d-0xa77e 0xa780 0xa782 0xa784 0xa786 0xa78b 0xa78d 0xa790 0xa792 0xa796 0xa798 0xa79a 0xa79c 0xa79e 0xa7a0 0xa7a2 0xa7a4 0xa7a6 0xa7a8 0xa7aa-0xa7ad 0xa7b0-0xa7b4 0xa7b6 0xff21-0xff3a 0x10400-0x10427 0x10c80-0x10cb2 0x118a0-0x118bf 0x1d400-0x1d419 0x1d434-0x1d44d 0x1d468-0x1d481 0x1d49c 0x1d49e-0x1d49f 0x1d4a2 0x1d4a5-0x1d4a6 0x1d4a9-0x1d4ac 0x1d4ae-0x1d4b5 0x1d4d0-0x1d4e9 0x1d504-0x1d505 0x1d507-0x1d50a 0x1d50d-0x1d514 0x1d516-0x1d51c 0x1d538-0x1d539 0x1d53b-0x1d53e 0x1d540-0x1d544 0x1d546 0x1d54a-0x1d550 0x1d56c-0x1d585 0x1d5a0-0x1d5b9 0x1d5d4-0x1d5ed 0x1d608-0x1d621 0x1d63c-0x1d655 0x1d670-0x1d689 0x1d6a8-0x1d6c0 0x1d6e2-0x1d6fa 0x1d71c-0x1d734 0x1d756-0x1d76e 0x1d790-0x1d7a8 0x1d7ca]
(** Lo *)
let regexp u_other = [0x00aa 0x00ba 0x01bb 0x01c0-0x01c3 0x0294 0x05d0-0x05ea 0x05f0-0x05f2 0x0620-0x063f 0x0641-0x064a 0x066e-0x066f 0x0671-0x06d3 0x06d5 0x06ee-0x06ef 0x06fa-0x06fc 0x06ff 0x0710 0x0712-0x072f 0x074d-0x07a5 0x07b1 0x07ca-0x07ea 0x0800-0x0815 0x0840-0x0858 0x08a0-0x08b4 0x0904-0x0939 0x093d 0x0950 0x0958-0x0961 0x0972-0x0980 0x0985-0x098c 0x098f-0x0990 0x0993-0x09a8 0x09aa-0x09b0 0x09b2 0x09b6-0x09b9 0x09bd 0x09ce 0x09dc-0x09dd 0x09df-0x09e1 0x09f0-0x09f1 0x0a05-0x0a0a 0x0a0f-0x0a10 0x0a13-0x0a28 0x0a2a-0x0a30 0x0a32-0x0a33 0x0a35-0x0a36 0x0a38-0x0a39 0x0a59-0x0a5c 0x0a5e 0x0a72-0x0a74 0x0a85-0x0a8d 0x0a8f-0x0a91 0x0a93-0x0aa8 0x0aaa-0x0ab0 0x0ab2-0x0ab3 0x0ab5-0x0ab9 0x0abd 0x0ad0 0x0ae0-0x0ae1 0x0af9 0x0b05-0x0b0c 0x0b0f-0x0b10 0x0b13-0x0b28 0x0b2a-0x0b30 0x0b32-0x0b33 0x0b35-0x0b39 0x0b3d 0x0b5c-0x0b5d 0x0b5f-0x0b61 0x0b71 0x0b83 0x0b85-0x0b8a 0x0b8e-0x0b90 0x0b92-0x0b95 0x0b99-0x0b9a 0x0b9c 0x0b9e-0x0b9f 0x0ba3-0x0ba4 0x0ba8-0x0baa 0x0bae-0x0bb9 0x0bd0 0x0c05-0x0c0c 0x0c0e-0x0c10 0x0c12-0x0c28 0x0c2a-0x0c39 0x0c3d 0x0c58-0x0c5a 0x0c60-0x0c61 0x0c85-0x0c8c 0x0c8e-0x0c90 0x0c92-0x0ca8 0x0caa-0x0cb3 0x0cb5-0x0cb9 0x0cbd 0x0cde 0x0ce0-0x0ce1 0x0cf1-0x0cf2 0x0d05-0x0d0c 0x0d0e-0x0d10 0x0d12-0x0d3a 0x0d3d 0x0d4e 0x0d5f-0x0d61 0x0d7a-0x0d7f 0x0d85-0x0d96 0x0d9a-0x0db1 0x0db3-0x0dbb 0x0dbd 0x0dc0-0x0dc6 0x0e01-0x0e30 0x0e32-0x0e33 0x0e40-0x0e45 0x0e81-0x0e82 0x0e84 0x0e87-0x0e88 0x0e8a 0x0e8d 0x0e94-0x0e97 0x0e99-0x0e9f 0x0ea1-0x0ea3 0x0ea5 0x0ea7 0x0eaa-0x0eab 0x0ead-0x0eb0 0x0eb2-0x0eb3 0x0ebd 0x0ec0-0x0ec4 0x0edc-0x0edf 0x0f00 0x0f40-0x0f47 0x0f49-0x0f6c 0x0f88-0x0f8c 0x1000-0x102a 0x103f 0x1050-0x1055 0x105a-0x105d 0x1061 0x1065-0x1066 0x106e-0x1070 0x1075-0x1081 0x108e 0x10d0-0x10fa 0x10fd-0x1248 0x124a-0x124d 0x1250-0x1256 0x1258 0x125a-0x125d 0x1260-0x1288 0x128a-0x128d 0x1290-0x12b0 0x12b2-0x12b5 0x12b8-0x12be 0x12c0 0x12c2-0x12c5 0x12c8-0x12d6 0x12d8-0x1310 0x1312-0x1315 0x1318-0x135a 0x1380-0x138f 0x1401-0x166c 0x166f-0x167f 0x1681-0x169a 0x16a0-0x16ea 0x16f1-0x16f8 0x1700-0x170c 0x170e-0x1711 0x1720-0x1731 0x1740-0x1751 0x1760-0x176c 0x176e-0x1770 0x1780-0x17b3 0x17dc 0x1820-0x1842 0x1844-0x1877 0x1880-0x18a8 0x18aa 0x18b0-0x18f5 0x1900-0x191e 0x1950-0x196d 0x1970-0x1974 0x1980-0x19ab 0x19b0-0x19c9 0x1a00-0x1a16 0x1a20-0x1a54 0x1b05-0x1b33 0x1b45-0x1b4b 0x1b83-0x1ba0 0x1bae-0x1baf 0x1bba-0x1be5 0x1c00-0x1c23 0x1c4d-0x1c4f 0x1c5a-0x1c77 0x1ce9-0x1cec 0x1cee-0x1cf1 0x1cf5-0x1cf6 0x2135-0x2138 0x2d30-0x2d67 0x2d80-0x2d96 0x2da0-0x2da6 0x2da8-0x2dae 0x2db0-0x2db6 0x2db8-0x2dbe 0x2dc0-0x2dc6 0x2dc8-0x2dce 0x2dd0-0x2dd6 0x2dd8-0x2dde 0x3006 0x303c 0x3041-0x3096 0x309f 0x30a1-0x30fa 0x30ff 0x3105-0x312d 0x3131-0x318e 0x31a0-0x31ba 0x31f0-0x31ff 0x3400 0x4db5 0x4e00 0x9fd5 0xa000-0xa014 0xa016-0xa48c 0xa4d0-0xa4f7 0xa500-0xa60b 0xa610-0xa61f 0xa62a-0xa62b 0xa66e 0xa6a0-0xa6e5 0xa78f 0xa7f7 0xa7fb-0xa801 0xa803-0xa805 0xa807-0xa80a 0xa80c-0xa822 0xa840-0xa873 0xa882-0xa8b3 0xa8f2-0xa8f7 0xa8fb 0xa8fd 0xa90a-0xa925 0xa930-0xa946 0xa960-0xa97c 0xa984-0xa9b2 0xa9e0-0xa9e4 0xa9e7-0xa9ef 0xa9fa-0xa9fe 0xaa00-0xaa28 0xaa40-0xaa42 0xaa44-0xaa4b 0xaa60-0xaa6f 0xaa71-0xaa76 0xaa7a 0xaa7e-0xaaaf 0xaab1 0xaab5-0xaab6 0xaab9-0xaabd 0xaac0 0xaac2 0xaadb-0xaadc 0xaae0-0xaaea 0xaaf2 0xab01-0xab06 0xab09-0xab0e 0xab11-0xab16 0xab20-0xab26 0xab28-0xab2e 0xabc0-0xabe2 0xac00 0xd7a3 0xd7b0-0xd7c6 0xd7cb-0xd7fb 0xf900-0xfa6d 0xfa70-0xfad9 0xfb1d 0xfb1f-0xfb28 0xfb2a-0xfb36 0xfb38-0xfb3c 0xfb3e 0xfb40-0xfb41 0xfb43-0xfb44 0xfb46-0xfbb1 0xfbd3-0xfd3d 0xfd50-0xfd8f 0xfd92-0xfdc7 0xfdf0-0xfdfb 0xfe70-0xfe74 0xfe76-0xfefc 0xff66-0xff6f 0xff71-0xff9d 0xffa0-0xffbe 0xffc2-0xffc7 0xffca-0xffcf 0xffd2-0xffd7 0xffda-0xffdc 0x10000-0x1000b 0x1000d-0x10026 0x10028-0x1003a 0x1003c-0x1003d 0x1003f-0x1004d 0x10050-0x1005d 0x10080-0x100fa 0x10280-0x1029c 0x102a0-0x102d0 0x10300-0x1031f 0x10330-0x10340 0x10342-0x10349 0x10350-0x10375 0x10380-0x1039d 0x103a0-0x103c3 0x103c8-0x103cf 0x10450-0x1049d 0x10500-0x10527 0x10530-0x10563 0x10600-0x10736 0x10740-0x10755 0x10760-0x10767 0x10800-0x10805 0x10808 0x1080a-0x10835 0x10837-0x10838 0x1083c 0x1083f-0x10855 0x10860-0x10876 0x10880-0x1089e 0x108e0-0x108f2 0x108f4-0x108f5 0x10900-0x10915 0x10920-0x10939 0x10980-0x109b7 0x109be-0x109bf 0x10a00 0x10a10-0x10a13 0x10a15-0x10a17 0x10a19-0x10a33 0x10a60-0x10a7c 0x10a80-0x10a9c 0x10ac0-0x10ac7 0x10ac9-0x10ae4 0x10b00-0x10b35 0x10b40-0x10b55 0x10b60-0x10b72 0x10b80-0x10b91 0x10c00-0x10c48 0x11003-0x11037 0x11083-0x110af 0x110d0-0x110e8 0x11103-0x11126 0x11150-0x11172 0x11176 0x11183-0x111b2 0x111c1-0x111c4 0x111da 0x111dc 0x11200-0x11211 0x11213-0x1122b 0x11280-0x11286 0x11288 0x1128a-0x1128d 0x1128f-0x1129d 0x1129f-0x112a8 0x112b0-0x112de 0x11305-0x1130c 0x1130f-0x11310 0x11313-0x11328 0x1132a-0x11330 0x11332-0x11333 0x11335-0x11339 0x1133d 0x11350 0x1135d-0x11361 0x11480-0x114af 0x114c4-0x114c5 0x114c7 0x11580-0x115ae 0x115d8-0x115db 0x11600-0x1162f 0x11644 0x11680-0x116aa 0x11700-0x11719 0x118ff 0x11ac0-0x11af8 0x12000-0x12399 0x12480-0x12543 0x13000-0x1342e 0x14400-0x14646 0x16800-0x16a38 0x16a40-0x16a5e 0x16ad0-0x16aed 0x16b00-0x16b2f 0x16b63-0x16b77 0x16b7d-0x16b8f 0x16f00-0x16f44 0x16f50 0x1b000-0x1b001 0x1bc00-0x1bc6a 0x1bc70-0x1bc7c 0x1bc80-0x1bc88 0x1bc90-0x1bc99 0x1e800-0x1e8c4 0x1ee00-0x1ee03 0x1ee05-0x1ee1f 0x1ee21-0x1ee22 0x1ee24 0x1ee27 0x1ee29-0x1ee32 0x1ee34-0x1ee37 0x1ee39 0x1ee3b 0x1ee42 0x1ee47 0x1ee49 0x1ee4b 0x1ee4d-0x1ee4f 0x1ee51-0x1ee52 0x1ee54 0x1ee57 0x1ee59 0x1ee5b 0x1ee5d 0x1ee5f 0x1ee61-0x1ee62 0x1ee64 0x1ee67-0x1ee6a 0x1ee6c-0x1ee72 0x1ee74-0x1ee77 0x1ee79-0x1ee7c 0x1ee7e 0x1ee80-0x1ee89 0x1ee8b-0x1ee9b 0x1eea1-0x1eea3 0x1eea5-0x1eea9 0x1eeab-0x1eebb 0x20000 0x2a6d6 0x2a700 0x2b734 0x2b740 0x2b81d 0x2b820 0x2cea1 0x2f800-0x2fa1d]
(** Lm *)
let regexp u_modifier = [0x02b0-0x02c1 0x02c6-0x02d1 0x02e0-0x02e4 0x02ec 0x02ee 0x0374 0x037a 0x0559 0x0640 0x06e5-0x06e6 0x07f4-0x07f5 0x07fa 0x081a 0x0824 0x0828 0x0971 0x0e46 0x0ec6 0x10fc 0x17d7 0x1843 0x1aa7 0x1c78-0x1c7d 0x1d2c-0x1d6a 0x1d78 0x1d9b-0x1dbf 0x2071 0x207f 0x2090-0x209c 0x2c7c-0x2c7d 0x2d6f 0x2e2f 0x3005 0x3031-0x3035 0x303b 0x309d-0x309e 0x30fc-0x30fe 0xa015 0xa4f8-0xa4fd 0xa60c 0xa67f 0xa69c-0xa69d 0xa717-0xa71f 0xa770 0xa788 0xa7f8-0xa7f9 0xa9cf 0xa9e6 0xaa70 0xaadd 0xaaf3-0xaaf4 0xab5c-0xab5f 0xff70 0xff9e-0xff9f 0x16b40-0x16b43 0x16f93-0x16f9f]
(** Lt *)
let regexp u_title = [0x01c5 0x01c8 0x01cb 0x01f2 0x1f88-0x1f8f 0x1f98-0x1f9f 0x1fa8-0x1faf 0x1fbc 0x1fcc 0x1ffc]
(** Zs *)
let regexp u_space = [0x0020 0x00a0 0x1680 0x2000-0x200a 0x202f 0x205f 0x3000]
(** These are not unicode spaces but we accept as whitespace in F* source (e.g. tab and BOM) *)
let regexp u_space_extra = ['\t' '\x0B' '\x0C' '\xA0' 0xfeff]
(** Zl and Zp *)
let regexp u_line_sep = [0x2028]
let regexp u_par_sep = [0x2029]
(** Sm math symbols *)
let regexp u_math_nonascii = [0x00ac 0x00b1 0x00d7 0x00f7 0x03f6 0x0606-0x0608 0x2044 0x2052 0x207a-0x207c 0x208a-0x208c 0x2118 0x2140-0x2144 0x214b 0x2190-0x2194 0x219a-0x219b 0x21a0 0x21a3 0x21a6 0x21ae 0x21ce-0x21cf 0x21d2 0x21d4 0x21f4-0x22ff 0x2320-0x2321 0x237c 0x239b-0x23b3 0x23dc-0x23e1 0x25b7 0x25c1 0x25f8-0x25ff 0x266f 0x27c0-0x27c4 0x27c7-0x27e5 0x27f0-0x27ff 0x2900-0x2982 0x2999-0x29d7 0x29dc-0x29fb 0x29fe-0x2aff 0x2b30-0x2b44 0x2b47-0x2b4c 0xfb29 0xfe62 0xfe64-0xfe66 0xff0b 0xff1c-0xff1e 0xff5c 0xff5e 0xffe2 0xffe9-0xffec 0x1d6c1 0x1d6db 0x1d6fb 0x1d715 0x1d735 0x1d74f 0x1d76f 0x1d789 0x1d7a9 0x1d7c3 0x1eef0-0x1eef1]
let regexp u_math = [0x002b 0x003c-0x003e 0x007c 0x007e] | u_math_nonascii
(** Sc currency *)
let regexp u_currency = [0x0024 0x00a2-0x00a5 0x058f 0x060b 0x09f2-0x09f3 0x09fb 0x0af1 0x0bf9 0x0e3f 0x17db 0x20a0-0x20be 0xa838 0xfdfc 0xfe69 0xff04 0xffe0-0xffe1 0xffe5-0xffe6]
(** Sk *)
let regexp u_modifier_symbol = [0x005e 0x0060 0x00a8 0x00af 0x00b4 0x00b8 0x02c2-0x02c5 0x02d2-0x02df 0x02e5-0x02eb 0x02ed 0x02ef-0x02ff 0x0375 0x0384-0x0385 0x1fbd 0x1fbf-0x1fc1 0x1fcd-0x1fcf 0x1fdd-0x1fdf 0x1fed-0x1fef 0x1ffd-0x1ffe 0x309b-0x309c 0xa700-0xa716 0xa720-0xa721 0xa789-0xa78a 0xab5b 0xfbb2-0xfbc1 0xff3e 0xff40 0xffe3 0x1f3fb-0x1f3ff]
(** So *)
let regexp u_other_symbol = [0x00a6 0x00a9 0x00ae 0x00b0 0x0482 0x058d-0x058e 0x060e-0x060f 0x06de 0x06e9 0x06fd-0x06fe 0x07f6 0x09fa 0x0b70 0x0bf3-0x0bf8 0x0bfa 0x0c7f 0x0d79 0x0f01-0x0f03 0x0f13 0x0f15-0x0f17 0x0f1a-0x0f1f 0x0f34 0x0f36 0x0f38 0x0fbe-0x0fc5 0x0fc7-0x0fcc 0x0fce-0x0fcf 0x0fd5-0x0fd8 0x109e-0x109f 0x1390-0x1399 0x1940 0x19de-0x19ff 0x1b61-0x1b6a 0x1b74-0x1b7c 0x2100-0x2101 0x2103-0x2106 0x2108-0x2109 0x2114 0x2116-0x2117 0x211e-0x2123 0x2125 0x2127 0x2129 0x212e 0x213a-0x213b 0x214a 0x214c-0x214d 0x214f 0x218a-0x218b 0x2195-0x2199 0x219c-0x219f 0x21a1-0x21a2 0x21a4-0x21a5 0x21a7-0x21ad 0x21af-0x21cd 0x21d0-0x21d1 0x21d3 0x21d5-0x21f3 0x2300-0x2307 0x230c-0x231f 0x2322-0x2328 0x232b-0x237b 0x237d-0x239a 0x23b4-0x23db 0x23e2-0x23fa 0x2400-0x2426 0x2440-0x244a 0x249c-0x24e9 0x2500-0x25b6 0x25b8-0x25c0 0x25c2-0x25f7 0x2600-0x266e 0x2670-0x2767 0x2794-0x27bf 0x2800-0x28ff 0x2b00-0x2b2f 0x2b45-0x2b46 0x2b4d-0x2b73 0x2b76-0x2b95 0x2b98-0x2bb9 0x2bbd-0x2bc8 0x2bca-0x2bd1 0x2bec-0x2bef 0x2ce5-0x2cea 0x2e80-0x2e99 0x2e9b-0x2ef3 0x2f00-0x2fd5 0x2ff0-0x2ffb 0x3004 0x3012-0x3013 0x3020 0x3036-0x3037 0x303e-0x303f 0x3190-0x3191 0x3196-0x319f 0x31c0-0x31e3 0x3200-0x321e 0x322a-0x3247 0x3250 0x3260-0x327f 0x328a-0x32b0 0x32c0-0x32fe 0x3300-0x33ff 0x4dc0-0x4dff 0xa490-0xa4c6 0xa828-0xa82b 0xa836-0xa837 0xa839 0xaa77-0xaa79 0xfdfd 0xffe4 0xffe8 0xffed-0xffee 0xfffc-0xfffd 0x10137-0x1013f 0x10179-0x10189 0x1018c 0x10190-0x1019b 0x101a0 0x101d0-0x101fc 0x10877-0x10878 0x10ac8 0x1173f 0x16b3c-0x16b3f 0x16b45 0x1bc9c 0x1d000-0x1d0f5 0x1d100-0x1d126 0x1d129-0x1d164 0x1d16a-0x1d16c 0x1d183-0x1d184 0x1d18c-0x1d1a9 0x1d1ae-0x1d1e8 0x1d200-0x1d241 0x1d245 0x1d300-0x1d356 0x1d800-0x1d9ff 0x1da37-0x1da3a 0x1da6d-0x1da74 0x1da76-0x1da83 0x1da85-0x1da86 0x1f000-0x1f02b 0x1f030-0x1f093 0x1f0a0-0x1f0ae 0x1f0b1-0x1f0bf 0x1f0c1-0x1f0cf 0x1f0d1-0x1f0f5 0x1f110-0x1f12e 0x1f130-0x1f16b 0x1f170-0x1f19a 0x1f1e6-0x1f202 0x1f210-0x1f23a 0x1f240-0x1f248 0x1f250-0x1f251 0x1f300-0x1f3fa 0x1f400-0x1f579 0x1f57b-0x1f5a3 0x1f5a5-0x1f6d0 0x1f6e0-0x1f6ec 0x1f6f0-0x1f6f3 0x1f700-0x1f773 0x1f780-0x1f7d4 0x1f800-0x1f80b 0x1f810-0x1f847 0x1f850-0x1f859 0x1f860-0x1f887 0x1f890-0x1f8ad 0x1f910-0x1f918 0x1f980-0x1f984 0x1f9c0]
(** Nd *)
let regexp u_decimal_digit = [0x0030-0x0039 0x0660-0x0669 0x06f0-0x06f9 0x07c0-0x07c9 0x0966-0x096f 0x09e6-0x09ef 0x0a66-0x0a6f 0x0ae6-0x0aef 0x0b66-0x0b6f 0x0be6-0x0bef 0x0c66-0x0c6f 0x0ce6-0x0cef 0x0d66-0x0d6f 0x0de6-0x0def 0x0e50-0x0e59 0x0ed0-0x0ed9 0x0f20-0x0f29 0x1040-0x1049 0x1090-0x1099 0x17e0-0x17e9 0x1810-0x1819 0x1946-0x194f 0x19d0-0x19d9 0x1a80-0x1a89 0x1a90-0x1a99 0x1b50-0x1b59 0x1bb0-0x1bb9 0x1c40-0x1c49 0x1c50-0x1c59 0xa620-0xa629 0xa8d0-0xa8d9 0xa900-0xa909 0xa9d0-0xa9d9 0xa9f0-0xa9f9 0xaa50-0xaa59 0xabf0-0xabf9 0xff10-0xff19 0x104a0-0x104a9 0x11066-0x1106f 0x110f0-0x110f9 0x11136-0x1113f 0x111d0-0x111d9 0x112f0-0x112f9 0x114d0-0x114d9 0x11650-0x11659 0x116c0-0x116c9 0x11730-0x11739 0x118e0-0x118e9 0x16a60-0x16a69 0x16b50-0x16b59 0x1d7ce-0x1d7ff]
(** Nl *)
let regexp u_digit_letter = [0x16ee-0x16f0 0x2160-0x2182 0x2185-0x2188 0x3007 0x3021-0x3029 0x3038-0x303a 0xa6e6-0xa6ef 0x10140-0x10174 0x10341 0x1034a 0x103d1-0x103d5 0x12400-0x1246e]
(** No *)
let regexp u_other_digit = [0x00b2-0x00b3 0x00b9 0x00bc-0x00be 0x09f4-0x09f9 0x0b72-0x0b77 0x0bf0-0x0bf2 0x0c78-0x0c7e 0x0d70-0x0d75 0x0f2a-0x0f33 0x1369-0x137c 0x17f0-0x17f9 0x19da 0x2070 0x2074-0x2079 0x2080-0x2089 0x2150-0x215f 0x2189 0x2460-0x249b 0x24ea-0x24ff 0x2776-0x2793 0x2cfd 0x3192-0x3195 0x3220-0x3229 0x3248-0x324f 0x3251-0x325f 0x3280-0x3289 0x32b1-0x32bf 0xa830-0xa835 0x10107-0x10133 0x10175-0x10178 0x1018a-0x1018b 0x102e1-0x102fb 0x10320-0x10323 0x10858-0x1085f 0x10879-0x1087f 0x108a7-0x108af 0x108fb-0x108ff 0x10916-0x1091b 0x109bc-0x109bd 0x109c0-0x109cf 0x109d2-0x109ff 0x10a40-0x10a47 0x10a7d-0x10a7e 0x10a9d-0x10a9f 0x10aeb-0x10aef 0x10b58-0x10b5f 0x10b78-0x10b7f 0x10ba9-0x10baf 0x10cfa-0x10cff 0x10e60-0x10e7e 0x11052-0x11065 0x111e1-0x111f4 0x1173a-0x1173b 0x118ea-0x118f2 0x16b5b-0x16b61 0x1d360-0x1d371 0x1e8c7-0x1e8cf 0x1f100-0x1f10c]
(** Pd *)
let regexp u_punct_hyphen = [0x002d 0x058a 0x05be 0x1400 0x1806 0x2010-0x2015 0x2e17 0x2e1a 0x2e3a-0x2e3b 0x2e40 0x301c 0x3030 0x30a0 0xfe31-0xfe32 0xfe58 0xfe63 0xff0d]
(** Ps *)
let regexp u_punct_obra = [0x0028 0x005b 0x007b 0x0f3a 0x0f3c 0x169b 0x201a 0x201e 0x2045 0x207d 0x208d 0x2308 0x230a 0x2329 0x2768 0x276a 0x276c 0x276e 0x2770 0x2772 0x2774 0x27c5 0x27e6 0x27e8 0x27ea 0x27ec 0x27ee 0x2983 0x2985 0x2987 0x2989 0x298b 0x298d 0x298f 0x2991 0x2993 0x2995 0x2997 0x29d8 0x29da 0x29fc 0x2e22 0x2e24 0x2e26 0x2e28 0x2e42 0x3008 0x300a 0x300c 0x300e 0x3010 0x3014 0x3016 0x3018 0x301a 0x301d 0xfd3f 0xfe17 0xfe35 0xfe37 0xfe39 0xfe3b 0xfe3d 0xfe3f 0xfe41 0xfe43 0xfe47 0xfe59 0xfe5b 0xfe5d 0xff08 0xff3b 0xff5b 0xff5f 0xff62]
(** Pe *)
let regexp u_punct_cbra = [0x0029 0x005d 0x007d 0x0f3b 0x0f3d 0x169c 0x2046 0x207e 0x208e 0x2309 0x230b 0x232a 0x2769 0x276b 0x276d 0x276f 0x2771 0x2773 0x2775 0x27c6 0x27e7 0x27e9 0x27eb 0x27ed 0x27ef 0x2984 0x2986 0x2988 0x298a 0x298c 0x298e 0x2990 0x2992 0x2994 0x2996 0x2998 0x29d9 0x29db 0x29fd 0x2e23 0x2e25 0x2e27 0x2e29 0x3009 0x300b 0x300d 0x300f 0x3011 0x3015 0x3017 0x3019 0x301b 0x301e-0x301f 0xfd3e 0xfe18 0xfe36 0xfe38 0xfe3a 0xfe3c 0xfe3e 0xfe40 0xfe42 0xfe44 0xfe48 0xfe5a 0xfe5c 0xfe5e 0xff09 0xff3d 0xff5d 0xff60 0xff63]
(** Pi *)
let regexp u_punct_oquot = [0x00ab 0x2018 0x201b-0x201c 0x201f 0x2039 0x2e02 0x2e04 0x2e09 0x2e0c 0x2e1c 0x2e20]
(** Pf *)
let regexp u_punct_cquot = [0x00bb 0x2019 0x201d 0x203a 0x2e03 0x2e05 0x2e0a 0x2e0d 0x2e1d 0x2e21]
(** Pc *)
let regexp u_punct_connect = [0x005f 0x203f-0x2040 0x2054 0xfe33-0xfe34 0xfe4d-0xfe4f 0xff3f]
(** Po *)
let regexp u_punct_other = [0x0021-0x0023 0x0025-0x0027 0x002a 0x002c 0x002e-0x002f 0x003a-0x003b 0x003f-0x0040 0x005c 0x00a1 0x00a7 0x00b6-0x00b7 0x00bf 0x037e 0x0387 0x055a-0x055f 0x0589 0x05c0 0x05c3 0x05c6 0x05f3-0x05f4 0x0609-0x060a 0x060c-0x060d 0x061b 0x061e-0x061f 0x066a-0x066d 0x06d4 0x0700-0x070d 0x07f7-0x07f9 0x0830-0x083e 0x085e 0x0964-0x0965 0x0970 0x0af0 0x0df4 0x0e4f 0x0e5a-0x0e5b 0x0f04-0x0f12 0x0f14 0x0f85 0x0fd0-0x0fd4 0x0fd9-0x0fda 0x104a-0x104f 0x10fb 0x1360-0x1368 0x166d-0x166e 0x16eb-0x16ed 0x1735-0x1736 0x17d4-0x17d6 0x17d8-0x17da 0x1800-0x1805 0x1807-0x180a 0x1944-0x1945 0x1a1e-0x1a1f 0x1aa0-0x1aa6 0x1aa8-0x1aad 0x1b5a-0x1b60 0x1bfc-0x1bff 0x1c3b-0x1c3f 0x1c7e-0x1c7f 0x1cc0-0x1cc7 0x1cd3 0x2016-0x2017 0x2020-0x2027 0x2030-0x2038 0x203b-0x203e 0x2041-0x2043 0x2047-0x2051 0x2053 0x2055-0x205e 0x2cf9-0x2cfc 0x2cfe-0x2cff 0x2d70 0x2e00-0x2e01 0x2e06-0x2e08 0x2e0b 0x2e0e-0x2e16 0x2e18-0x2e19 0x2e1b 0x2e1e-0x2e1f 0x2e2a-0x2e2e 0x2e30-0x2e39 0x2e3c-0x2e3f 0x2e41 0x3001-0x3003 0x303d 0x30fb 0xa4fe-0xa4ff 0xa60d-0xa60f 0xa673 0xa67e 0xa6f2-0xa6f7 0xa874-0xa877 0xa8ce-0xa8cf 0xa8f8-0xa8fa 0xa8fc 0xa92e-0xa92f 0xa95f 0xa9c1-0xa9cd 0xa9de-0xa9df 0xaa5c-0xaa5f 0xaade-0xaadf 0xaaf0-0xaaf1 0xabeb 0xfe10-0xfe16 0xfe19 0xfe30 0xfe45-0xfe46 0xfe49-0xfe4c 0xfe50-0xfe52 0xfe54-0xfe57 0xfe5f-0xfe61 0xfe68 0xfe6a-0xfe6b 0xff01-0xff03 0xff05-0xff07 0xff0a 0xff0c 0xff0e-0xff0f 0xff1a-0xff1b 0xff1f-0xff20 0xff3c 0xff61 0xff64-0xff65 0x10100-0x10102 0x1039f 0x103d0 0x1056f 0x10857 0x1091f 0x1093f 0x10a50-0x10a58 0x10a7f 0x10af0-0x10af6 0x10b39-0x10b3f 0x10b99-0x10b9c 0x11047-0x1104d 0x110bb-0x110bc 0x110be-0x110c1 0x11140-0x11143 0x11174-0x11175 0x111c5-0x111c9 0x111cd 0x111db 0x111dd-0x111df 0x11238-0x1123d 0x112a9 0x114c6 0x115c1-0x115d7 0x11641-0x11643 0x1173c-0x1173e 0x12470-0x12474 0x16a6e-0x16a6f 0x16af5 0x16b37-0x16b3b 0x16b44 0x1bc9f 0x1da87-0x1da8b]
(** Mn *)
let regexp u_mod_nospace = [0x0300-0x036f 0x0483-0x0487 0x0591-0x05bd 0x05bf 0x05c1-0x05c2 0x05c4-0x05c5 0x05c7 0x0610-0x061a 0x064b-0x065f 0x0670 0x06d6-0x06dc 0x06df-0x06e4 0x06e7-0x06e8 0x06ea-0x06ed 0x0711 0x0730-0x074a 0x07a6-0x07b0 0x07eb-0x07f3 0x0816-0x0819 0x081b-0x0823 0x0825-0x0827 0x0829-0x082d 0x0859-0x085b 0x08e3-0x0902 0x093a 0x093c 0x0941-0x0948 0x094d 0x0951-0x0957 0x0962-0x0963 0x0981 0x09bc 0x09c1-0x09c4 0x09cd 0x09e2-0x09e3 0x0a01-0x0a02 0x0a3c 0x0a41-0x0a42 0x0a47-0x0a48 0x0a4b-0x0a4d 0x0a51 0x0a70-0x0a71 0x0a75 0x0a81-0x0a82 0x0abc 0x0ac1-0x0ac5 0x0ac7-0x0ac8 0x0acd 0x0ae2-0x0ae3 0x0b01 0x0b3c 0x0b3f 0x0b41-0x0b44 0x0b4d 0x0b56 0x0b62-0x0b63 0x0b82 0x0bc0 0x0bcd 0x0c00 0x0c3e-0x0c40 0x0c46-0x0c48 0x0c4a-0x0c4d 0x0c55-0x0c56 0x0c62-0x0c63 0x0c81 0x0cbc 0x0cbf 0x0cc6 0x0ccc-0x0ccd 0x0ce2-0x0ce3 0x0d01 0x0d41-0x0d44 0x0d4d 0x0d62-0x0d63 0x0dca 0x0dd2-0x0dd4 0x0dd6 0x0e31 0x0e34-0x0e3a 0x0e47-0x0e4e 0x0eb1 0x0eb4-0x0eb9 0x0ebb-0x0ebc 0x0ec8-0x0ecd 0x0f18-0x0f19 0x0f35 0x0f37 0x0f39 0x0f71-0x0f7e 0x0f80-0x0f84 0x0f86-0x0f87 0x0f8d-0x0f97 0x0f99-0x0fbc 0x0fc6 0x102d-0x1030 0x1032-0x1037 0x1039-0x103a 0x103d-0x103e 0x1058-0x1059 0x105e-0x1060 0x1071-0x1074 0x1082 0x1085-0x1086 0x108d 0x109d 0x135d-0x135f 0x1712-0x1714 0x1732-0x1734 0x1752-0x1753 0x1772-0x1773 0x17b4-0x17b5 0x17b7-0x17bd 0x17c6 0x17c9-0x17d3 0x17dd 0x180b-0x180d 0x18a9 0x1920-0x1922 0x1927-0x1928 0x1932 0x1939-0x193b 0x1a17-0x1a18 0x1a1b 0x1a56 0x1a58-0x1a5e 0x1a60 0x1a62 0x1a65-0x1a6c 0x1a73-0x1a7c 0x1a7f 0x1ab0-0x1abd 0x1b00-0x1b03 0x1b34 0x1b36-0x1b3a 0x1b3c 0x1b42 0x1b6b-0x1b73 0x1b80-0x1b81 0x1ba2-0x1ba5 0x1ba8-0x1ba9 0x1bab-0x1bad 0x1be6 0x1be8-0x1be9 0x1bed 0x1bef-0x1bf1 0x1c2c-0x1c33 0x1c36-0x1c37 0x1cd0-0x1cd2 0x1cd4-0x1ce0 0x1ce2-0x1ce8 0x1ced 0x1cf4 0x1cf8-0x1cf9 0x1dc0-0x1df5 0x1dfc-0x1dff 0x20d0-0x20dc 0x20e1 0x20e5-0x20f0 0x2cef-0x2cf1 0x2d7f 0x2de0-0x2dff 0x302a-0x302d 0x3099-0x309a 0xa66f 0xa674-0xa67d 0xa69e-0xa69f 0xa6f0-0xa6f1 0xa802 0xa806 0xa80b 0xa825-0xa826 0xa8c4 0xa8e0-0xa8f1 0xa926-0xa92d 0xa947-0xa951 0xa980-0xa982 0xa9b3 0xa9b6-0xa9b9 0xa9bc 0xa9e5 0xaa29-0xaa2e 0xaa31-0xaa32 0xaa35-0xaa36 0xaa43 0xaa4c 0xaa7c 0xaab0 0xaab2-0xaab4 0xaab7-0xaab8 0xaabe-0xaabf 0xaac1 0xaaec-0xaaed 0xaaf6 0xabe5 0xabe8 0xabed 0xfb1e 0xfe00-0xfe0f 0xfe20-0xfe2f 0x101fd 0x102e0 0x10376-0x1037a 0x10a01-0x10a03 0x10a05-0x10a06 0x10a0c-0x10a0f 0x10a38-0x10a3a 0x10a3f 0x10ae5-0x10ae6 0x11001 0x11038-0x11046 0x1107f-0x11081 0x110b3-0x110b6 0x110b9-0x110ba 0x11100-0x11102 0x11127-0x1112b 0x1112d-0x11134 0x11173 0x11180-0x11181 0x111b6-0x111be 0x111ca-0x111cc 0x1122f-0x11231 0x11234 0x11236-0x11237 0x112df 0x112e3-0x112ea 0x11300-0x11301 0x1133c 0x11340 0x11366-0x1136c 0x11370-0x11374 0x114b3-0x114b8 0x114ba 0x114bf-0x114c0 0x114c2-0x114c3 0x115b2-0x115b5 0x115bc-0x115bd 0x115bf-0x115c0 0x115dc-0x115dd 0x11633-0x1163a 0x1163d 0x1163f-0x11640 0x116ab 0x116ad 0x116b0-0x116b5 0x116b7 0x1171d-0x1171f 0x11722-0x11725 0x11727-0x1172b 0x16af0-0x16af4 0x16b30-0x16b36 0x16f8f-0x16f92 0x1bc9d-0x1bc9e 0x1d167-0x1d169 0x1d17b-0x1d182 0x1d185-0x1d18b 0x1d1aa-0x1d1ad 0x1d242-0x1d244 0x1da00-0x1da36 0x1da3b-0x1da6c 0x1da75 0x1da84 0x1da9b-0x1da9f 0x1daa1-0x1daaf 0x1e8d0-0x1e8d6 0xe0100-0xe01ef]
(** Mc *)
let regexp u_mod = [0x0903 0x093b 0x093e-0x0940 0x0949-0x094c 0x094e-0x094f 0x0982-0x0983 0x09be-0x09c0 0x09c7-0x09c8 0x09cb-0x09cc 0x09d7 0x0a03 0x0a3e-0x0a40 0x0a83 0x0abe-0x0ac0 0x0ac9 0x0acb-0x0acc 0x0b02-0x0b03 0x0b3e 0x0b40 0x0b47-0x0b48 0x0b4b-0x0b4c 0x0b57 0x0bbe-0x0bbf 0x0bc1-0x0bc2 0x0bc6-0x0bc8 0x0bca-0x0bcc 0x0bd7 0x0c01-0x0c03 0x0c41-0x0c44 0x0c82-0x0c83 0x0cbe 0x0cc0-0x0cc4 0x0cc7-0x0cc8 0x0cca-0x0ccb 0x0cd5-0x0cd6 0x0d02-0x0d03 0x0d3e-0x0d40 0x0d46-0x0d48 0x0d4a-0x0d4c 0x0d57 0x0d82-0x0d83 0x0dcf-0x0dd1 0x0dd8-0x0ddf 0x0df2-0x0df3 0x0f3e-0x0f3f 0x0f7f 0x102b-0x102c 0x1031 0x1038 0x103b-0x103c 0x1056-0x1057 0x1062-0x1064 0x1067-0x106d 0x1083-0x1084 0x1087-0x108c 0x108f 0x109a-0x109c 0x17b6 0x17be-0x17c5 0x17c7-0x17c8 0x1923-0x1926 0x1929-0x192b 0x1930-0x1931 0x1933-0x1938 0x1a19-0x1a1a 0x1a55 0x1a57 0x1a61 0x1a63-0x1a64 0x1a6d-0x1a72 0x1b04 0x1b35 0x1b3b 0x1b3d-0x1b41 0x1b43-0x1b44 0x1b82 0x1ba1 0x1ba6-0x1ba7 0x1baa 0x1be7 0x1bea-0x1bec 0x1bee 0x1bf2-0x1bf3 0x1c24-0x1c2b 0x1c34-0x1c35 0x1ce1 0x1cf2-0x1cf3 0x302e-0x302f 0xa823-0xa824 0xa827 0xa880-0xa881 0xa8b4-0xa8c3 0xa952-0xa953 0xa983 0xa9b4-0xa9b5 0xa9ba-0xa9bb 0xa9bd-0xa9c0 0xaa2f-0xaa30 0xaa33-0xaa34 0xaa4d 0xaa7b 0xaa7d 0xaaeb 0xaaee-0xaaef 0xaaf5 0xabe3-0xabe4 0xabe6-0xabe7 0xabe9-0xabea 0xabec 0x11000 0x11002 0x11082 0x110b0-0x110b2 0x110b7-0x110b8 0x1112c 0x11182 0x111b3-0x111b5 0x111bf-0x111c0 0x1122c-0x1122e 0x11232-0x11233 0x11235 0x112e0-0x112e2 0x11302-0x11303 0x1133e-0x1133f 0x11341-0x11344 0x11347-0x11348 0x1134b-0x1134d 0x11357 0x11362-0x11363 0x114b0-0x114b2 0x114b9 0x114bb-0x114be 0x114c1 0x115af-0x115b1 0x115b8-0x115bb 0x115be 0x11630-0x11632 0x1163b-0x1163c 0x1163e 0x116ac 0x116ae-0x116af 0x116b6 0x11720-0x11721 0x11726 0x16f51-0x16f7e 0x1d165-0x1d166 0x1d16d-0x1d172]
(** Me *)
let regexp u_mod_enclose = [0x0488-0x0489 0x1abe 0x20dd-0x20e0 0x20e2-0x20e4 0xa670-0xa672]
(** Cc *)
let regexp u_ascii_control = [0x0000-0x001f 0x007f-0x009f]
(** Cf *)
let regexp u_format_control = [0x00ad 0x0600-0x0605 0x061c 0x06dd 0x070f 0x180e 0x200b-0x200f 0x202a-0x202e 0x2060-0x2064 0x2066-0x206f 0xfeff 0xfff9-0xfffb 0x110bd 0x1bca0-0x1bca3 0x1d173-0x1d17a 0xe0001 0xe0020-0xe007f]
(** Co *)
let regexp u_private_use = [0xe000 0xf8ff 0xf0000 0xffffd 0x100000 0x10fffd]
(** Cs *)
let regexp u_surrogate = [0xd800 0xdb7f-0xdb80 0xdbff-0xdc00 0xdfff]

(* -------------------------------------------------------------------- *)
let regexp lower  = u_lower
let regexp upper  = u_upper | u_title
let regexp letter = u_lower | u_upper | u_other | u_modifier
let regexp digit  = ['0'-'9']
let regexp hex    = ['0'-'9'] | ['A'-'F'] | ['a'-'f']

(* -------------------------------------------------------------------- *)
let regexp anywhite  = u_space | u_space_extra
let regexp newline   = "\r\n" | [10 13 0x2028 0x2029]

(* -------------------------------------------------------------------- *)
let regexp op_char = ["!$%&*+-./<=?^|~:"]
let regexp ignored_op_char = [".$"]
let regexp op_token =
  "~" | "-" | "/\\" | "\\/" | "<:" | "<@" | "(|" | "|)" | "#" |
  "u#" | "&" | "()" | "(" | ")" | "," | "~>" | "->" | "<--" |
  "<-" | "<==>" | "==>" | "." | "?." | "?" | ".[|" | ".[" | ".(|" | ".(" |
  "$" | "{:pattern" | ":" | "::" | ":=" | ";;" | ";" | "=" | "%[" |
  "!{" | "[@" | "[|" | "[" | "|>" | "]" | "|]" | "{" | "|" | "}"

(* -------------------------------------------------------------------- *)
let regexp xinteger =
  (  '0' ('x'| 'X')  hex +
   | '0' ('o'| 'O')  (['0'-'7']) +
   | '0' ('b'| 'B')  (['0'-'1']) + )
let regexp integer = digit+
let regexp any_integer = xinteger | integer
let regexp unsigned = ["uU"]
let regexp int8 = any_integer 'y'
let regexp uint8 = any_integer unsigned 'y'
let regexp int16 = any_integer 's'
let regexp uint16 = any_integer unsigned 's'
let regexp int32 = any_integer 'l'
let regexp uint32 = any_integer unsigned 'l'
let regexp int64 = any_integer 'L'
let regexp uint64 = any_integer unsigned 'L'
let regexp char8 = any_integer 'z'

let regexp floatp     = digit+ '.' digit*
let regexp floate     = digit+ ('.' digit* )? ["eE"] ["+-"]? digit+
let regexp real     = floatp 'R'
let regexp ieee64     = floatp | floate
let regexp xieee64    = xinteger 'L' 'F'
let regexp range      = digit+ '.' '.' digit+

let regexp op_prefix  = ["!~?"]
let regexp op_infix0a = ["|"] (* left *)
let regexp op_infix0b = ["&"] (* left *)
let regexp op_infix0c = ["=<>"] (* left *)
let regexp op_infix0c_nogt = ["=<"] (* left *)
let regexp op_infix0d = ["$"] (* left *)

let regexp op_infix0  = op_infix0a | op_infix0b | op_infix0c | op_infix0d
let regexp op_infix1  = ["@^"] (* right *)
let regexp op_infix2  = ["+-"] (* left *)
let regexp op_infix3  = ["*/%"] (* left *)
let regexp symbolchar = op_prefix | op_infix0 | op_infix1 | op_infix2 | op_infix3 | [".:"]
let regexp uoperator  = u_math_nonascii

(* -------------------------------------------------------------------- *)
let regexp escape_char = '\\' (["\\\"'bfntrv0"] | "x" hex hex | "u" hex hex hex hex)
let regexp char        = [^'\\'] | escape_char

(* -------------------------------------------------------------------- *)
let regexp constructor_start_char = upper
let regexp ident_start_char       = lower  | '_'
let regexp ident_char             = letter | digit  | '\'' | '_'
let regexp tvar_char              = letter | digit | '\'' | '_'

let regexp constructor = constructor_start_char ident_char*
let regexp ident       = ident_start_char ident_char*
let regexp tvar        = '\'' (ident_start_char | constructor_start_char) tvar_char*

let rec token = lexer
 | "%splice" -> SPLICE
 | "`%" -> BACKTICK_PERC
 | "`#" -> BACKTICK_HASH
 | "`@" -> BACKTICK_AT
 | "quote" -> QUOTE
 | "#light" -> FStar_Options.add_light_off_file (L.source_file lexbuf); PRAGMALIGHT
 | "#set-options" -> PRAGMA_SET_OPTIONS
 | "#reset-options" -> PRAGMA_RESET_OPTIONS
 | "#push-options" -> PRAGMA_PUSH_OPTIONS
 | "#pop-options" -> PRAGMA_POP_OPTIONS
 | "#restart-solver" -> PRAGMA_RESTART_SOLVER
 | "__SOURCE_FILE__" -> STRING (L.source_file lexbuf)
 | "__LINE__" -> INT (string_of_int (L.current_line lexbuf), false)

 | anywhite+ -> token lexbuf
 | newline -> L.new_line lexbuf; token lexbuf

 (* Must appear before tvar to avoid 'a <-> 'a' conflict *)
 | ('\'' char '\'') -> CHAR (unescape (utrim_both lexbuf 1 1))
 | ('\'' char '\'' 'B') -> CHAR (unescape (utrim_both lexbuf 1 2))
 | '`' -> BACKTICK

 | ident -> let id = L.lexeme lexbuf in
   if FStar_Util.starts_with id FStar_Ident.reserved_prefix
   then FStar_Errors.raise_error
                    (FStar_Errors.Fatal_ReservedPrefix,
                     FStar_Ident.reserved_prefix  ^ " is a reserved prefix for an identifier")
                    (current_range lexbuf);
   Hashtbl.find_option keywords id |> Option.default (IDENT id)
 | constructor -> let id = L.lexeme lexbuf in
   Hashtbl.find_option constructors id |> Option.default (NAME id)

 | tvar -> TVAR (L.lexeme lexbuf)
 | (integer | xinteger) -> INT (clean_number (L.lexeme lexbuf), false)
 | (uint8 | char8) ->
   let c = clean_number (L.lexeme lexbuf) in
   let cv = int_of_string c in
   if cv < 0 || cv > 255 then fail lexbuf (E.Fatal_SyntaxError, "Out-of-range character literal")
   else UINT8 (c)
 | int8 -> INT8 (clean_number (L.lexeme lexbuf), false)
 | uint16 -> UINT16 (clean_number (L.lexeme lexbuf))
 | int16 -> INT16 (clean_number (L.lexeme lexbuf), false)
 | uint32 -> UINT32 (clean_number (L.lexeme lexbuf))
 | int32 -> INT32 (clean_number (L.lexeme lexbuf), false)
 | uint64 -> UINT64 (clean_number (L.lexeme lexbuf))
 | int64 -> INT64 (clean_number (L.lexeme lexbuf), false)
 | range -> RANGE (L.lexeme lexbuf)
 | real -> REAL(trim_right lexbuf 1)
 | (ieee64 | xieee64) -> IEEE64 (float_of_string (L.lexeme lexbuf))
 
 | (integer | xinteger | ieee64 | xieee64) ident_char+ ->
   fail lexbuf (E.Fatal_SyntaxError, "This is not a valid numeric literal: " ^ L.lexeme lexbuf)

 | "(*" ->
   let inner, buffer, startpos = start_comment lexbuf in
   comment inner buffer startpos lexbuf

 | "// IN F*:" -> token lexbuf
 | "//" ->
     (* Only match on "//" to allow the longest-match rule to catch IN F*. This
      * creates a lexing conflict with op_infix3 which is caught below. *)
     one_line_comment (L.lexeme lexbuf) lexbuf

 | '"' -> string (Buffer.create 0) lexbuf

 | '`' '`' (([^'`' 10 13 0x2028 0x2029] | '`' [^'`' 10 13 0x2028 0x2029])+) '`' '`' ->
   IDENT (trim_both lexbuf 2 2)

 | op_token  -> L.lexeme lexbuf |> Hashtbl.find operators
 | "<"       -> if is_typ_app lexbuf then TYP_APP_LESS else OPINFIX0c("<")
 | ">"       -> if is_typ_app_gt () then TYP_APP_GREATER else symbolchar_parser lexbuf
 
 (* Operators. *)
 | op_prefix  symbolchar* -> OPPREFIX (L.lexeme lexbuf)
 | op_infix0a symbolchar* -> OPINFIX0a (L.lexeme lexbuf)
 | op_infix0b symbolchar* -> OPINFIX0b (L.lexeme lexbuf)
 | op_infix0c_nogt symbolchar* -> OPINFIX0c (L.lexeme lexbuf)
 | op_infix0d symbolchar* -> OPINFIX0d (L.lexeme lexbuf)
 | op_infix1  symbolchar* -> OPINFIX1 (L.lexeme lexbuf)
 | op_infix2  symbolchar* -> OPINFIX2 (L.lexeme lexbuf)
 | "**"       symbolchar* -> OPINFIX4 (L.lexeme lexbuf)

 (* Unicode Operators *)
 | uoperator -> let id = L.lexeme lexbuf in
   Hashtbl.find_option operators id |> Option.default (OPINFIX4 id)

 | op_infix3  symbolchar* -> 
     let l = L.lexeme lexbuf in
     if String.length l >= 2 && String.sub l 0 2 = "//" then
       one_line_comment l lexbuf
     else
        OPINFIX3 l
 | ".[]<-"                 -> OP_MIXFIX_ASSIGNMENT (L.lexeme lexbuf)
 | ".()<-"                 -> OP_MIXFIX_ASSIGNMENT (L.lexeme lexbuf)
 | ".(||)<-"                -> OP_MIXFIX_ASSIGNMENT (L.lexeme lexbuf)
 | ".[||]<-"                 -> OP_MIXFIX_ASSIGNMENT (L.lexeme lexbuf)
 | ".[]"                  -> OP_MIXFIX_ACCESS (L.lexeme lexbuf)
 | ".()"                  -> OP_MIXFIX_ACCESS (L.lexeme lexbuf)
 | ".(||)"                 -> OP_MIXFIX_ACCESS (L.lexeme lexbuf)
 | ".[||]"                  -> OP_MIXFIX_ACCESS (L.lexeme lexbuf)

 | eof -> EOF
 | _ -> fail lexbuf (E.Fatal_SyntaxError, "unexpected char")

and one_line_comment pre = lexer
 | [^ 10 13 0x2028 0x2029]* -> push_one_line_comment pre lexbuf; token lexbuf

and symbolchar_parser = lexer
 | symbolchar* -> OPINFIX0c (">" ^  L.lexeme lexbuf)

and string buffer = lexer
 | '\\' newline anywhite* -> L.new_line lexbuf; string buffer lexbuf
 | newline ->
   Buffer.add_string buffer (L.lexeme lexbuf);
   L.new_line lexbuf; string buffer lexbuf
 | escape_char -> 
   Buffer.add_string buffer (BatUTF8.init 1 (fun _ -> unescape (L.ulexeme lexbuf) |> BatUChar.chr));
   string buffer lexbuf
 | '"' -> STRING (Buffer.contents buffer)
 | '"''B' -> BYTEARRAY (ba_of_string (Buffer.contents buffer))
 | _ ->
   Buffer.add_string buffer (L.lexeme lexbuf);
   string buffer lexbuf
 | eof -> fail lexbuf (E.Fatal_SyntaxError, "unterminated string")

and comment inner buffer startpos = lexer
 | "(*" ->
   Buffer.add_string buffer "(*" ;
   let _ = comment true buffer startpos lexbuf in
   comment inner buffer startpos lexbuf
 | newline ->
   L.new_line lexbuf;
   Buffer.add_string buffer (L.lexeme lexbuf);
   comment inner buffer startpos lexbuf
 | "*)" ->
   terminate_comment buffer startpos lexbuf;
   if inner then EOF else token lexbuf
 | _ ->
   Buffer.add_string buffer (L.lexeme lexbuf);
   comment inner buffer startpos lexbuf
 | eof ->
   terminate_comment buffer startpos lexbuf; EOF

and ignore_endline = lexer
 | ' '* newline -> token lexbuf

