@ 设置及保留属性的类
@ class cleanHtml
@ link:www.jb200.com
@ date:2013/2/28
function reg_escape( $str )
$conversions = array( "^" => "^", "[" => "[", "." => ".", "$" => "$", "{" => "{", "*" => "*", "(" => "(", "" => "
\", "/" => "/", "+" => "+", ")" => ")", "|" => "|", "?" => "?", "<" => "<", ">" => ">" );
return strtr( $str, $conversions );
* Strip attribute Class
* Remove attributes from XML elements
* @author David (semlabs.co.uk)
* @version 0.2.1
class cleanHtml{
public $str = '';
public $allow = array();
public $exceptions = array();
public $ignore = array();
public function strip( $str )
$this->str = $str;
if( is_string( $str ) && strlen( $str ) > 0 )
$res = $this->findElements();
if( is_string( $res ) )
return $res;
$nodes = $this->findAttributes( $res );
$this->removeAttributes( $nodes );
return $this->str;
private function findElements()
# Create an array of elements with attributes
$nodes = array();
preg_match_all( "/<([^ !/>n]+)([^>]*)>/i", $this->str, $elements );
foreach( $elements[1] as $el_key => $element )
if( $elements[2][$el_key] )
$literal = $elements[0][$el_key];
$element_name = $elements[1][$el_key];
$attributes = $elements[2][$el_key];
if( is_array( $this->ignore ) && !in_array( $element_name, $this->ignore ) )
$nodes[] = array( 'literal' => $literal, 'name' => $element_name, 'attributes' => $attributes );
# Return the XML if there were no attributes to remove
if( !$nodes[0] )
return $this->str;
return $nodes;
private function findAttributes( $nodes )
# Extract attributes
foreach( $nodes as &$node )
preg_match_all( "/([^ =]+)s*=s*["|']{0,1}([^"']*)["|']{0,1}/i", $node['attributes'], $attributes );
if( $attributes[1] )
foreach( $attributes[1] as $att_key => $att )
$literal = $attributes[0][$att_key];
$attribute_name = $attributes[1][$att_key];
$value = $attributes[2][$att_key];
$atts[] = array( 'literal' => $literal, 'name' => $attribute_name, 'value' => $value );
$node['attributes'] = null;
$node['attributes'] = $atts;
unset( $atts );
return $nodes;
private function removeAttributes( $nodes )
# Remove unwanted attributes
foreach( $nodes as $node )
# Check if node has any attributes to be kept
$node_name = $node['name'];
$new_attributes = '';
if( is_array( $node['attributes'] ) )
foreach( $node['attributes'] as $attribute )
if( ( is_array( $this->allow ) && in_array( $attribute['name'], $this->allow ) ) || $this->isException( $node_name, $attribute['name'], $this->exceptions ) )
$new_attributes = $this->createAttributes( $new_attributes, $attribute['name'], $attribute['value'] );
$replacement = ( $new_attributes ) ? "<$node_name $new_attributes>" : "<$node_name>";
$this->str = preg_replace( '/'. reg_escape( $node['literal'] ) .'/', $replacement, $this->str );
private function isException( $element_name, $attribute_name, $exceptions )
if( array_key_exists($element_name, $this->exceptions) )
if( in_array( $attribute_name, $this->exceptions[$element_name] ) )
return true;
return false;
private function createAttributes( $new_attributes, $name, $value )
if( $new_attributes )
$new_attributes .= " ";
$new_attributes .= "$name="$value"";
return $new_attributes;
$str = 'Here is some sample html that is <span id="good" class="aClass" style="abc" font="3"> <font color="red"> getting broken </font> </span> <
iframe width="540" height="304" src="http://www.youtube.com/embed/YacZqlFz2bI?fs=1&feature=oembed" frameborder="0" allowfullscreen></iframe> <img id="featuredon" src="http://www.goodfinancialcents.com/wp-content/themes/thesis_182/custom/images/featuredon.jpg" height="23" width="265" />';
$sa = new cleanHtml;
$sa->allow = array( 'id' );
$sa->exceptions = array(
'img' => array( 'src', 'alt' ),
'a' => array( 'href', 'title' ),
echo $str = $sa->strip( $str );