<?php

class MinhasClasses_IndiceLuceneBusca extends Zend_Search_Lucene_Document {

    public function retira_acentos($texto) {
        $array1 = array("á", "à", "â", "ã", "ä", "é", "è", "ê", "ë", "í", "ì", "î", "ï", "ó", "ò", "ô", "õ", "ö", "ú", "ù", "û", "ü", "ç"
            , "Á", "À", "Â", "Ã", "Ä", "É", "È", "Ê", "Ë", "Í", "Ì", "Î", "Ï", "Ó", "Ò", "Ô", "Õ", "Ö", "Ú", "Ù", "Û", "Ü", "Ç");
        $array2 = array("a", "a", "a", "a", "a", "e", "e", "e", "e", "i", "i", "i", "i", "o", "o", "o", "o", "o", "u", "u", "u", "u", "c", ","
            , "A", "A", "A", "A", "A", "E", "E", "E", "E", "I", "I", "I", "I", "O", "O", "O", "O", "O", "U", "U", "U", "U", "C", '');
        return strtolower(str_replace($array1, $array2, $texto));
    }

    public function retiraTagHTML($textoComTag) {
        return strip_tags($textoComTag, '<(.*?)>');
    }

    public function limpar_por_completo($text) {


        $text = preg_replace(
                array(
            // Remove invisible content
            '@<head[^>]*?>.*?</head>@siu',
            '@<style[^>]*?>.*?</style>@siu',
            '@<script[^>]*?.*?</script>@siu',
            '@<object[^>]*?.*?</object>@siu',
            '@<embed[^>]*?.*?</embed>@siu',
            '@<applet[^>]*?.*?</applet>@siu',
            '@<noframes[^>]*?.*?</noframes>@siu',
            '@<noscript[^>]*?.*?</noscript>@siu',
            '@<noembed[^>]*?.*?</noembed>@siu',
            // Add line breaks before and after blocks
            '@</?((address)|(blockquote)|(center)|(del))@iu',
            '@</?((div)|(h[1-9])|(ins)|(isindex)|(p)|(pre))@iu',
            '@</?((dir)|(dl)|(dt)|(dd)|(li)|(menu)|(ol)|(ul))@iu',
            '@</?((table)|(th)|(td)|(caption))@iu',
            '@</?((form)|(button)|(fieldset)|(legend)|(input))@iu',
            '@</?((label)|(select)|(optgroup)|(option)|(textarea))@iu',
            '@</?((frameset)|(frame)|(iframe))@iu',
                ), array(
            ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
            "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0",
            "\n\$0", "\n\$0",
                ), $text);
        $text = strip_tags($text);

        $array1 = array("á", "à", "â", "ã", "ä", "é", "è", "ê", "ë", "í", "ì", "î", "ï", "ó", "ò", "ô", "õ", "ö", "ú", "ù", "û", "ü", "ç"
            , "Á", "À", "Â", "Ã", "Ä", "É", "È", "Ê", "Ë", "Í", "Ì", "Î", "Ï", "Ó", "Ò", "Ô", "Õ", "Ö", "Ú", "Ù", "Û", "Ü", "Ç");
        $array2 = array("a", "a", "a", "a", "a", "e", "e", "e", "e", "i", "i", "i", "i", "o", "o", "o", "o", "o", "u", "u", "u", "u", "c"
            , "A", "A", "A", "A", "A", "E", "E", "E", "E", "I", "I", "I", "I", "O", "O", "O", "O", "O", "U", "U", "U", "U", "C");
        return strtolower(str_replace($array1, $array2, $text));
    }

    public function limpar_somente($text) {


        $text = preg_replace(
                array(
            // Remove invisible content
            '@<head[^>]*?>.*?</head>@siu',
            '@<style[^>]*?>.*?</style>@siu',
            '@<script[^>]*?.*?</script>@siu',
            '@<object[^>]*?.*?</object>@siu',
            '@<embed[^>]*?.*?</embed>@siu',
            '@<applet[^>]*?.*?</applet>@siu',
            '@<noframes[^>]*?.*?</noframes>@siu',
            '@<noscript[^>]*?.*?</noscript>@siu',
            '@<noembed[^>]*?.*?</noembed>@siu',
            // Add line breaks before and after blocks
            '@</?((address)|(blockquote)|(center)|(del))@iu',
            '@</?((div)|(h[1-9])|(ins)|(isindex)|(p)|(pre))@iu',
            '@</?((dir)|(dl)|(dt)|(dd)|(li)|(menu)|(ol)|(ul))@iu',
            '@</?((table)|(th)|(td)|(caption))@iu',
            '@</?((form)|(button)|(fieldset)|(legend)|(input))@iu',
            '@</?((label)|(select)|(optgroup)|(option)|(textarea))@iu',
            '@</?((frameset)|(frame)|(iframe))@iu',
                ), array(
            ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
            "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0",
            "\n\$0", "\n\$0",
                ), $text);

        return strip_tags($text);
    }

    public function Truncate($string, $start = 0, $length = 467, $prefix = '...', $postfix = '...') {
        mb_internal_encoding("UTF-8");
        $truncated = trim($string);
        $start = (int) $start;
        $length = (int) $length;
        // Return original string if max length is 0
        if ($length < 1)
            return $truncated;
        $full_length = iconv_strlen($truncated);
        // Truncate if necessary
        if ($full_length > $length) {
            // Right-clipped
            if ($length + $start > $full_length) {
                $start = $full_length - $length;
                $postfix = '';
            }
            // Left-clipped
            if ($start == 0)
                $prefix = '';
            // Do truncate!
            $truncated = $prefix . trim(mb_substr($truncated, $start, $length)) . $postfix;
        }
        return $truncated;
    }

    public function truncateModoDoisIndexacao($string, $start = 0, $length = 467, $prefix = '...', $postfix = '...') {
        mb_internal_encoding("UTF-8");
        $truncated = trim($string);
        $start = (int) $start;
        $length = (int) $length;
        // Return original string if max length is 0
        if ($length < 1)
            return $truncated;
        $full_length = iconv_strlen($truncated);
        // Truncate if necessary
        if ($full_length > $length) {
            // Right-clipped
            if ($length + $start > $full_length) {
                $start = $full_length - $length;
                $postfix = '';
            }
            // Left-clipped
            if ($start == 0)
                $prefix = '';
            // Do truncate!
            $truncated = $prefix . trim(mb_substr($truncated, $start, $length)) . $postfix;
        }
        return $truncated;
    }

    public function limpar_por_completoModoDoisIndexacao($text) {

        $text = preg_replace(
                array(
            // Remove invisible content
            '@<head[^>]*?>.*?</head>@siu',
            '@<style[^>]*?>.*?</style>@siu',
            '@<script[^>]*?.*?</script>@siu',
            '@<object[^>]*?.*?</object>@siu',
            '@<embed[^>]*?.*?</embed>@siu',
            '@<applet[^>]*?.*?</applet>@siu',
            '@<noframes[^>]*?.*?</noframes>@siu',
            '@<noscript[^>]*?.*?</noscript>@siu',
            '@<noembed[^>]*?.*?</noembed>@siu',
            // Add line breaks before and after blocks
            '@</?((address)|(blockquote)|(center)|(del))@iu',
            '@</?((div)|(h[1-9])|(ins)|(isindex)|(p)|(pre))@iu',
            '@</?((dir)|(dl)|(dt)|(dd)|(li)|(menu)|(ol)|(ul))@iu',
            '@</?((table)|(th)|(td)|(caption))@iu',
            '@</?((form)|(button)|(fieldset)|(legend)|(input))@iu',
            '@</?((label)|(select)|(optgroup)|(option)|(textarea))@iu',
            '@</?((frameset)|(frame)|(iframe))@iu',
                ), array(
            ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
            "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0", "\n\$0",
            "\n\$0", "\n\$0",
                ), $text);
        $text = strip_tags($text);

        return $text;
    }

    public function __construct($document) {

        $this->addField(Zend_Search_Lucene_Field::Keyword('ctdId', $document ['ctdId']));
        //midificado por ricardo
        //$this->addField(Zend_Search_Lucene_Field::Text('ctdCorpo', $document ['ctdCorpo'], 'UTF8'));
        
        $texto_armazenavel =  $this->limpar_por_completoModoDoisIndexacao($document ['ctdCorpo']) ;
        
        $this->addField(Zend_Search_Lucene_Field::Text('ctdCorpo', $this->retira_acentos($texto_armazenavel), 'utf-8'));
        $this->addField(Zend_Search_Lucene_Field::UnIndexed('ctdCorpoOriginal', $texto_armazenavel, 'utf-8'));
        
        $this->addField(Zend_Search_Lucene_Field::Text('titulo', $this->retira_acentos($document ['itnNome']), 'utf-8'));
        $this->addField(Zend_Search_Lucene_Field::UnIndexed('tituloOriginal', $document ['itnNome'], 'utf-8'));
        
        $this->addField(Zend_Search_Lucene_Field::UnIndexed('url', $document['itnUrl'], 'utf-8'));
    }

}