HTML and DOM Parsing


// Create a new DOMDocument object.
$dom = new DOMDocument; 
// Load the HTML page into the object.

// Once the HTML is loaded into the object, access nodes and child elements:

// Get an element by it's ID.
$my_div_obj = $dom->getElementById('mydiv');

// Get all elements of a type.
$anchors = $dom->getElementsByTagName('a'); 
foreach ($anchors as $anchor) {
    echo $dom->saveHTML($anchor); // Prints the text-only content of the anchor.

Remove HTML Tag Attribute

function remove_html_attribute($html, $attribute_name) {
    return preg_replace('/(<[^>]+)' . $attribute_name . '=".*?"/i', '$1', $html);

echo remove_html_attribute('<div style="color:#CCC;"></div>', 'style'); // Prints '<div></div>'

Get all image tags

preg_match_all('/]+>/i',$html, $result);