# Copyright (c) 2003-2007 LOGILAB S.A. (Paris, FRANCE).
# http://www.logilab.fr/ -- mailto:contact@logilab.fr
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation; either version 2 of the License, or (at your option) any later
# version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
"""
checker for Page Template files

some code comes from the ZChecker project
"""

import re

from apycot import register, IChecker
from apycot.checkers import AbstractFilteredFileChecker

class HTMLChecker(AbstractFilteredFileChecker):
    """check syntax of Page Template file
    """
    
    __implements__ = IChecker
    __name__ = 'html_tidy'

    ignore = [
        'unknown attribute "xmlns:metal"',
        'unknown attribute "xmlns:tal"',
        'unknown attribute "xmlns:i18n"',
    
        '<tal:block> is not recognized',
        '<metal:block> is not recognized',
        'tal:block is not recognized',
        'metal:block is not recognized',
    
        '<html> has XML attribute "xml:lang"',
        'inserting missing \'title\' element',
        'discarding unexpected ',
        'trimming empty ',
    
        'This document has errors that must be fixed before',
        'using HTML Tidy to generate a tidied up version.',
    ]
 
    unknown_attr_rgx = re.compile('unknown attribute "(tal|metal|i18n):')

    def __init__(self, extensions=('.htm', '.html')):
        AbstractFilteredFileChecker.__init__(self, extensions)
        
    def check_file(self, filepath, writer):
        """check a single file
        return true if the test succeeded, else false.
        """
        if not HAS_TIDY:
            return 1
        data = open(filepath.read())
        errors = tidy(data, output_markup=0, quiet=1)[-1]
        status = 1
        if errors:
            for line in errors.splitlines():
                line = line.strip()
                # ignore blanks
                if not line:
                    continue
                # loop through each error checking to see if its there
                # thanks Alan Runyan
                for msg in self.ignore:
                    if line.find(msg) > -1:
                        break
                else:
                    # re ignore
                    if self.unknown_attr_rgx.search(line):
                        continue
                    # that's really an error
                    status = 0
                    # FIXME line no
                    writer.log(ERROR, filepath, None, line)
        return status          
        
    def version_info(self, writer):
        """hook for checkers to add their version information"""
        if not HAS_TIDY:
            return
        version = tidy.__version__ # FIXME
        writer.raw('mx_tidy_version', version)

try:
    from mx.Tidy import tidy
    register('checker', HTMLChecker)
    HAS_TIDY = 1
except ImportError:
    HAS_TIDY = 0
