(*************************************************************************** * Copyright (C) 2007 Antoine Bodin * * * * This program is free software; you can redistribute it and/or * * modify it under the terms of the GNU General Public License * * as published by the Free Software Foundation; either version 2 * * of the License, or (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA * ***************************************************************************) { exception Html_error of string*string exception Html_illegal_character of char exception Html_illegal_value of string type cnt = Content of string | Tag of tag and opt = String of string | Float of float | Int of int and tag = { name:string; options:(string*opt) list; content:cnt list } } let digit = ['0'-'9'] let alnum = ['A'-'Z''a'-'z'] let variable = alnum (alnum|digit|'_'|'-')* let space = [' ' '\t' '\r' '\n'] rule lexer name = parse | [^'<']+ as s { Content s::lexer name lexbuf } | "' { (*if var=name then [] else raise (Html_error (name,var))*) []} | '<' space* (variable as var) { let t = lextag var [] lexbuf in Tag t::lexer name lexbuf } | '<' space* (_ as c) { raise (Html_illegal_character c) } | eof { [] } and lextag nm optl = parse | space { lextag nm optl lexbuf } | '>' { {name=nm;options=optl;content=lexer nm lexbuf} } | "/>" { {name=nm;options=optl;content=[]} } | (variable as oname) space* '=' space* { let oval = value lexbuf in lextag nm ((oname,oval)::optl) lexbuf } | _ as c { raise (Html_illegal_character c) } and value = parse | digit+ as n { Int (int_of_string n) } | digit+'.'digit* as f { Float (float_of_string f) } | '\"' ([^'\"']* as s)'\"' { String s } | [^'>'' ' '\t' '\r' '\n']* as s { raise (Html_illegal_value s) } and comment = parse '>' {} | _ {comment lexbuf} { let parse path = let fd = open_in_bin path in let t = lexer "root" (Lexing.from_channel fd) in close_in fd; t }