diff --git a/main/inc/lib/opengraph b/main/inc/lib/opengraph deleted file mode 160000 index faba415deb..0000000000 --- a/main/inc/lib/opengraph +++ /dev/null @@ -1 +0,0 @@ -Subproject commit faba415deb9a84ae534dce78afdd5ca6535bdf12 diff --git a/main/inc/lib/opengraph/OpenGraph.php b/main/inc/lib/opengraph/OpenGraph.php new file mode 100755 index 0000000000..b3ebb2bc92 --- /dev/null +++ b/main/inc/lib/opengraph/OpenGraph.php @@ -0,0 +1,209 @@ + array('activity', 'sport'), + 'business' => array('bar', 'company', 'cafe', 'hotel', 'restaurant'), + 'group' => array('cause', 'sports_league', 'sports_team'), + 'organization' => array('band', 'government', 'non_profit', 'school', 'university'), + 'person' => array('actor', 'athlete', 'author', 'director', 'musician', 'politician', 'public_figure'), + 'place' => array('city', 'country', 'landmark', 'state_province'), + 'product' => array('album', 'book', 'drink', 'food', 'game', 'movie', 'product', 'song', 'tv_show'), + 'website' => array('blog', 'website'), + ); + + /** + * Holds all the Open Graph values we've parsed from a page + * + */ + private $_values = array(); + + /** + * Fetches a URI and parses it for Open Graph data, returns + * false on error. + * + * @param $URI URI to page to parse for Open Graph data + * @return OpenGraph + */ + static public function fetch($URI) { + $curl = curl_init($URI); + + curl_setopt($curl, CURLOPT_FAILONERROR, true); + curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curl, CURLOPT_TIMEOUT, 15); + curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false); + curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); + curl_setopt($curl, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']); + + $response = curl_exec($curl); + + curl_close($curl); + + if (!empty($response)) { + return self::_parse($response); + } else { + return false; + } + } + + /** + * Parses HTML and extracts Open Graph data, this assumes + * the document is at least well formed. + * + * @param $HTML HTML to parse + * @return OpenGraph + */ + static private function _parse($HTML) { + $old_libxml_error = libxml_use_internal_errors(true); + + $doc = new DOMDocument(); + $doc->loadHTML($HTML); + + libxml_use_internal_errors($old_libxml_error); + + $tags = $doc->getElementsByTagName('meta'); + if (!$tags || $tags->length === 0) { + return false; + } + + $page = new self(); + + $nonOgDescription = null; + + foreach ($tags AS $tag) { + if ($tag->hasAttribute('property') && + strpos($tag->getAttribute('property'), 'og:') === 0) { + $key = strtr(substr($tag->getAttribute('property'), 3), '-', '_'); + $page->_values[$key] = $tag->getAttribute('content'); + } + + //Added this if loop to retrieve description values from sites like the New York Times who have malformed it. + if ($tag ->hasAttribute('value') && $tag->hasAttribute('property') && + strpos($tag->getAttribute('property'), 'og:') === 0) { + $key = strtr(substr($tag->getAttribute('property'), 3), '-', '_'); + $page->_values[$key] = $tag->getAttribute('value'); + } + if ($tag->hasAttribute('name') && $tag->getAttribute('name') === 'description') { + $nonOgDescription = $tag->getAttribute('content'); + } + + } + if (!isset($page->_values['title'])) { + $titles = $doc->getElementsByTagName('title'); + if ($titles->length > 0) { + $page->_values['title'] = $titles->item(0)->textContent; + } + } + if (!isset($page->_values['description']) && $nonOgDescription) { + $page->_values['description'] = $nonOgDescription; + } + + //Fallback to use image_src if ogp::image isn't set. + if (!isset($page->values['image'])) { + $domxpath = new DOMXPath($doc); + $elements = $domxpath->query("//link[@rel='image_src']"); + + if ($elements->length > 0) { + $domattr = $elements->item(0)->attributes->getNamedItem('href'); + if ($domattr) { + $page->_values['image'] = $domattr->value; + $page->_values['image_src'] = $domattr->value; + } + } + } + + if (empty($page->_values)) { return false; } + + return $page; + } + + /** + * Helper method to access attributes directly + * Example: + * $graph->title + * + * @param $key Key to fetch from the lookup + */ + public function __get($key) { + if (array_key_exists($key, $this->_values)) { + return $this->_values[$key]; + } + + if ($key === 'schema') { + foreach (self::$TYPES AS $schema => $types) { + if (array_search($this->_values['type'], $types)) { + return $schema; + } + } + } + } + + /** + * Return all the keys found on the page + * + * @return array + */ + public function keys() { + return array_keys($this->_values); + } + + /** + * Helper method to check an attribute exists + * + * @param $key + */ + public function __isset($key) { + return array_key_exists($key, $this->_values); + } + + /** + * Will return true if the page has location data embedded + * + * @return boolean Check if the page has location data + */ + public function hasLocation() { + if (array_key_exists('latitude', $this->_values) && array_key_exists('longitude', $this->_values)) { + return true; + } + + $address_keys = array('street_address', 'locality', 'region', 'postal_code', 'country_name'); + $valid_address = true; + foreach ($address_keys AS $key) { + $valid_address = ($valid_address && array_key_exists($key, $this->_values)); + } + return $valid_address; + } + + /** + * Iterator code + */ + private $_position = 0; + public function rewind() { reset($this->_values); $this->_position = 0; } + public function current() { return current($this->_values); } + public function key() { return key($this->_values); } + public function next() { next($this->_values); ++$this->_position; } + public function valid() { return $this->_position < sizeof($this->_values); } +}