php clean html代码(设置过滤及保留属性)

发布时间:2020-03-03编辑:脚本学堂
php实现的可以设置过滤及保留相关属性的代码,有需要的朋友可以参考下。

代码如下:
 

<?php
/**
 * clean html 设置过滤 保留属性
 * by http://www.jb200.com
 *
*/
function reg_escape( $str )  
{  
        $conversions = array( "^" => "^", "[" => "[", "." => ".", "$" => "$", "{" => "{", "*" => "*", "(" => "(", "" => "\", "/" => "/", "+" => "+", ")" => ")", "|" => "|", "?" => "?", "<" => "<", ">" => ">" );  
        return strtr( $str, $conversions );  
}  
      
class cleanHtml{  
        public $str         = '';  
        public $allow       = array();  
        public $exceptions  = array();  
        public $ignore      = array();  
          
        public function strip( $str )  
        {  
            $this->str = $str;  
              
            if( is_string( $str ) && strlen( $str ) > 0 )  
            {  
                $res = $this->findElements();  
                if( is_string( $res ) )  
                    return $res;  
                $nodes = $this->findAttributes( $res );  
                $this->removeAttributes( $nodes );  
            }  
              
            return $this->str;  
        }  
          
        private function findElements()  
        {  
              
            # Create an array of elements with attributes  
            $nodes = array();  
            preg_match_all( "/<([^ !/>n]+)([^>]*)>/i", $this->str, $elements );  
            foreach( $elements[1] as $el_key => $element )  
            {  
                if( $elements[2][$el_key] )  
                {  
                    $literal = $elements[0][$el_key];  
                    $element_name = $elements[1][$el_key];  
                    $attributes = $elements[2][$el_key];  
                    if( is_array( $this->ignore ) && !in_array( $element_name, $this->ignore ) )  
                        $nodes[] = array( 'literal' => $literal, 'name' => $element_name, 'attributes' => $attributes );  
                }  
            }  
              
            # Return the XML if there were no attributes to remove  
            if( !$nodes[0] )  
                return $this->str;  
            else  
                return $nodes;  
        }  
          
        private function findAttributes( $nodes )  
        {  
              
            # Extract attributes  
            foreach( $nodes as &$node )  
            {  
                preg_match_all( "/([^ =]+)s*=s*["|']{0,1}([^"']*)["|']{0,1}/i", $node['attributes'], $attributes );  
                if( $attributes[1] )  
                {  
                    foreach( $attributes[1] as $att_key => $att )  
                    {  
                        $literal = $attributes[0][$att_key];  
                        $attribute_name = $attributes[1][$att_key];  
                        $value = $attributes[2][$att_key];  
                        $atts[] = array( 'literal' => $literal, 'name' => $attribute_name, 'value' => $value );  
                    }  
                }  
                else  
                    $node['attributes'] = null;  
                  
                $node['attributes'] = $atts;  
                unset( $atts );  
            }  
              
            return $nodes;  
        }  
          
        private function removeAttributes( $nodes )  
        {  
              
            # Remove unwanted attributes  
            foreach( $nodes as $node )  
            {  
                  
                # Check if node has any attributes to be kept  
                $node_name = $node['name'];  
                $new_attributes = '';  
                if( is_array( $node['attributes'] ) )  
                {  
                    foreach( $node['attributes'] as $attribute )  
                    {  
                        if( ( is_array( $this->allow ) && in_array( $attribute['name'], $this->allow ) ) || $this->isException( $node_name, $attribute['name'], $this->exceptions ) )  
                            $new_attributes = $this->createAttributes( $new_attributes, $attribute['name'], $attribute['value'] );  
                    }  
                }  
                $replacement = ( $new_attributes ) ? "<$node_name $new_attributes>" : "<$node_name>";  
                $this->str = preg_replace( '/'. reg_escape( $node['literal'] ) .'/', $replacement, $this->str );  
            }  
              
        }  
          
        private function isException( $element_name, $attribute_name, $exceptions )  
        {  
            if( array_key_exists($element_name, $this->exceptions) )  
            {  
                if( in_array( $attribute_name, $this->exceptions[$element_name] ) )  
                    return true;  
            }  
              
            return false;  
        }  
          
        private function createAttributes( $new_attributes, $name, $value )  
        {  
            if( $new_attributes )  
                $new_attributes .= " ";  
            $new_attributes .= "$name="$value"";  
              
            return $new_attributes;  
        }  
      
    }  
?>
2、调用示例:
<?php
$str = 'Here is some sample html that is <span id="good" class="aClass" style="abc" font="3"> <font color="red"> getting broken </font> </span> <iframe width="540" height="304" src="http://www.youtube.com/embed/YacZqlFz2bI?fs=1&#038;feature=oembed" frameborder="0" allowfullscreen></iframe> <img id="featuredon" src="http://www.goodfinancialcents.com/wp-content/themes/thesis_182/custom/images/featuredon.jpg" height="23" width="265" />';  
      
$sa = new cleanHtml;  //声明实例
$sa->allow = array( 'id' );    
      
$sa->exceptions = array(  
    'img' => array( 'src', 'alt' ),  
     'a' => array( 'href', 'title' ),  
   'iframe'=>array('src','frameborder'),  
);  
echo $str = $sa->strip( $str );
?>