parent
634287c866
commit
90f59ee0c3
@ -1 +0,0 @@ |
||||
kellan <kellan@protest.net> |
||||
@ -1,41 +0,0 @@ |
||||
Version 0.72 |
||||
----------- |
||||
- fix security exploit: http://www.sec-consult.com/216.html |
||||
|
||||
Version 0.7 |
||||
----------- |
||||
- support for input and output charset encoding |
||||
based on the work in FoF, uses iconv or mbstring if available |
||||
- |
||||
|
||||
Version 0.6 |
||||
----------- |
||||
- basic support for Atom syndication format |
||||
including support for Atom content constructs |
||||
- fixed support for private feeds (HTTP Auth and SSL) |
||||
(thanks to silverorange.com for providing test feeds) |
||||
- support for some broken webservers |
||||
|
||||
Version 0.52 |
||||
----------- |
||||
- support GZIP content negoiation |
||||
- PHP 4.3.2 support |
||||
|
||||
Version 0.4 |
||||
----------- |
||||
- improved error handling, better access for script authors |
||||
- included example scripts of working with MagpieRSS |
||||
- new Smarty plugin for RSS date parsing |
||||
|
||||
Version 0.3 |
||||
----------- |
||||
- added support for conditional gets (Last-Modified, ETag) |
||||
- now use Snoopy to handle fetching RSS files |
||||
|
||||
Version 0.2 |
||||
----------- |
||||
- MAJOR CLEAN UP |
||||
- removed kludgy $options array in favour of constants |
||||
- phased out returning arrays |
||||
- added better error handling |
||||
- re-worked comments |
||||
@ -1,405 +0,0 @@ |
||||
2005-10-28 14:11 kellan |
||||
|
||||
* extlib/Snoopy.class.inc: a better solution |
||||
|
||||
2005-10-28 11:51 kellan |
||||
|
||||
* extlib/Snoopy.class.inc: fix arbtriary code execution |
||||
vulnerability when using curl+ssl |
||||
|
||||
http://www.sec-consult.com/216.html |
||||
|
||||
2005-03-08 10:46 kellan |
||||
|
||||
* rss_parse.inc: fix bug w/ atom and date normalization |
||||
|
||||
2005-02-09 14:59 kellan |
||||
|
||||
* rss_fetch.inc: fix stale cache bug |
||||
|
||||
2005-01-28 02:27 kellan |
||||
|
||||
* rss_parse.inc: support php w/o array_change_case |
||||
|
||||
2005-01-23 20:02 kellan |
||||
|
||||
* rss_fetch.inc: fix cache bug introduced by charset encoding |
||||
|
||||
2005-01-12 09:14 kellan |
||||
|
||||
* rss_cache.inc, rss_fetch.inc: more sanity checks for when things |
||||
go wrong |
||||
|
||||
2004-12-12 13:44 kellan |
||||
|
||||
* INSTALL, rss_cache.inc, rss_utils.inc: detab |
||||
|
||||
2004-11-23 20:15 kellan |
||||
|
||||
* rss_parse.inc: fix calling iconv instead of mb_convert_encoding |
||||
|
||||
2004-11-22 02:11 kellan |
||||
|
||||
* CHANGES, ChangeLog, rss_parse.inc, scripts/magpie_debug.php: last |
||||
bit of tidying |
||||
|
||||
2004-11-22 01:45 kellan |
||||
|
||||
* rss_fetch.inc: detab, bump version |
||||
|
||||
2004-11-22 01:43 kellan |
||||
|
||||
* rss_parse.inc: was filtering too much |
||||
|
||||
2004-11-22 00:03 kellan |
||||
|
||||
* rss_fetch.inc, rss_parse.inc: cache on $url . $output_encoding |
||||
otherwise we can get munged output |
||||
|
||||
2004-11-21 23:52 kellan |
||||
|
||||
* rss_parse.inc: add WARNING |
||||
|
||||
2004-11-21 23:45 kellan |
||||
|
||||
* rss_parse.inc: don't set ERROR on notice or warning (rss_fetch |
||||
dies on parse errors) |
||||
|
||||
2004-11-21 23:44 kellan |
||||
|
||||
* rss_fetch.inc: add encoding defines (fix timeout error reporting) |
||||
|
||||
2004-11-21 20:21 kellan |
||||
|
||||
* rss_parse.inc: incorporate steve's patch |
||||
|
||||
2004-11-21 19:26 kellan |
||||
|
||||
* rss_parse.inc: remove old debugging functions, totally |
||||
arbitrarily. might break stuff. can't really explain why i'm |
||||
doing this. |
||||
|
||||
2004-10-28 15:52 kellan |
||||
|
||||
* rss_parse.inc: fixed '=' instead of '==' |
||||
|
||||
2004-10-26 00:48 kellan |
||||
|
||||
* rss_parse.inc: chance epoch to timestamp to conform w/ php naming |
||||
conventions |
||||
|
||||
2004-06-15 12:00 kellan |
||||
|
||||
* rss_parse.inc: [no log message] |
||||
|
||||
2004-04-26 14:16 kellan |
||||
|
||||
* rss_fetch.inc: bump version |
||||
|
||||
2004-04-26 12:36 kellan |
||||
|
||||
* rss_parse.inc: fix field doubling |
||||
|
||||
2004-04-24 17:47 kellan |
||||
|
||||
* CHANGES, ChangeLog: updated |
||||
|
||||
2004-04-24 17:35 kellan |
||||
|
||||
* rss_fetch.inc: bumped version |
||||
|
||||
2004-04-24 16:52 kellan |
||||
|
||||
* rss_parse.inc: support arbitrary atom content constructs |
||||
|
||||
some refactoring |
||||
|
||||
2004-04-24 16:15 kellan |
||||
|
||||
* rss_parse.inc: support summary content contstruct. add normalize |
||||
function |
||||
|
||||
2004-03-27 16:29 kellan |
||||
|
||||
* extlib/Snoopy.class.inc: accept self-signed certs |
||||
|
||||
2004-03-27 12:53 kellan |
||||
|
||||
* extlib/Snoopy.class.inc: fixed SSL support * set status * set |
||||
error on bad curl |
||||
|
||||
(also ripped out big chunks of dead weight (submit_form) which |
||||
were getting in my way |
||||
|
||||
2004-01-25 02:25 kellan |
||||
|
||||
* rss_parse.inc: make RSS 1.0's rdf:about available |
||||
|
||||
2004-01-25 02:07 kellan |
||||
|
||||
* rss_parse.inc: clean up text, and line formats. add support item |
||||
rdf:about |
||||
|
||||
2004-01-24 23:40 kellan |
||||
|
||||
* CHANGES, ChangeLog: update changes |
||||
|
||||
2004-01-24 23:37 kellan |
||||
|
||||
* rss_fetch.inc: updated version |
||||
|
||||
2004-01-24 23:35 kellan |
||||
|
||||
* rss_parse.inc: whitespace |
||||
|
||||
2004-01-24 23:23 kellan |
||||
|
||||
* extlib/Snoopy.class.inc: support badly formatted http headers |
||||
|
||||
2004-01-24 23:20 kellan |
||||
|
||||
* rss_parse.inc: added alpha atom parsing support |
||||
|
||||
2003-06-25 22:34 kellan |
||||
|
||||
* extlib/Snoopy.class.inc: fixed fread 4.3.2 compatibility problems |
||||
|
||||
2003-06-13 11:31 kellan |
||||
|
||||
* rss_fetch.inc: reset cache on 304 |
||||
|
||||
2003-06-12 21:37 kellan |
||||
|
||||
* rss_cache.inc, rss_fetch.inc, rss_parse.inc, rss_utils.inc: |
||||
bumped up version numbers |
||||
|
||||
2003-06-12 21:32 kellan |
||||
|
||||
* htdocs/index.html: updated news |
||||
|
||||
2003-06-12 21:27 kellan |
||||
|
||||
* NEWS: a manual blog :) |
||||
|
||||
2003-06-12 21:22 kellan |
||||
|
||||
* htdocs/index.html: fully qualified img |
||||
|
||||
2003-06-12 21:20 kellan |
||||
|
||||
* htdocs/index.html: clean up. added badge. |
||||
|
||||
2003-06-12 21:04 kellan |
||||
|
||||
* rss_utils.inc: clean up regex |
||||
|
||||
2003-06-12 21:02 kellan |
||||
|
||||
* rss_cache.inc: suppress some warnings |
||||
|
||||
2003-05-30 20:44 kellan |
||||
|
||||
* extlib/Snoopy.class.inc: more comments, cleaned up notice |
||||
|
||||
2003-05-30 15:14 kellan |
||||
|
||||
* extlib/Snoopy.class.inc: don't advertise gzip support if the user |
||||
hasn't built php with gzinflate support |
||||
|
||||
2003-05-12 22:32 kellan |
||||
|
||||
* ChangeLog: changes |
||||
|
||||
2003-05-12 22:11 kellan |
||||
|
||||
* htdocs/index.html: announce 0.5 |
||||
|
||||
2003-05-12 21:42 kellan |
||||
|
||||
* htdocs/index.html: change |
||||
|
||||
2003-05-12 21:39 kellan |
||||
|
||||
* rss_fetch.inc: use gzip |
||||
|
||||
2003-05-12 21:37 kellan |
||||
|
||||
* extlib/Snoopy.class.inc: added support gzip encoded content |
||||
negoiation |
||||
|
||||
2003-05-12 21:32 kellan |
||||
|
||||
* rss_cache.inc, rss_fetch.inc, rss_parse.inc, rss_utils.inc: fixed |
||||
typoes |
||||
|
||||
2003-04-26 21:44 kellan |
||||
|
||||
* rss_parse.inc: fix minor typo |
||||
|
||||
2003-04-18 08:19 kellan |
||||
|
||||
* htdocs/cookbook.html: updated cookbook to show more code for |
||||
limiting items |
||||
|
||||
2003-03-03 16:02 kellan |
||||
|
||||
* rss_parse.inc, scripts/magpie_slashbox.php: committed (or |
||||
adpated) patch from Nicola (www.technick.com) to quell 'Undefined |
||||
Indexes' notices |
||||
|
||||
2003-03-03 15:59 kellan |
||||
|
||||
* rss_fetch.inc: commited patch from nicola (www.technick.com) to |
||||
quell 'undefined indexes' notices. |
||||
|
||||
* Magpie now automatically includes its version in the |
||||
user-agent, & whether cacheing is turned on. |
||||
|
||||
2003-02-12 01:22 kellan |
||||
|
||||
* CHANGES, ChangeLog: ChangeLog now auto-generated by cvs2cl |
||||
|
||||
2003-02-12 00:21 kellan |
||||
|
||||
* rss_fetch.inc: better errors, hopefully stomped on pesky notices |
||||
|
||||
2003-02-12 00:19 kellan |
||||
|
||||
* rss_parse.inc: check to see is xml is supported, if not die |
||||
|
||||
also throw better xml errors |
||||
|
||||
2003-02-12 00:18 kellan |
||||
|
||||
* rss_cache.inc: hopefully cleared up some notices that were being |
||||
thrown into the log |
||||
|
||||
fixed a debug statement that was being called as an error |
||||
|
||||
2003-02-12 00:15 kellan |
||||
|
||||
* scripts/: magpie_simple.php, magpie_slashbox.php: moved |
||||
magpie_simple to magpie_slashbox, and replaced it with a simpler |
||||
demo. |
||||
|
||||
2003-02-12 00:02 kellan |
||||
|
||||
* INSTALL, README, TROUBLESHOOTING: Improved documentation. Better |
||||
install instructions. |
||||
|
||||
TROUBLESHOOTING cover common installation and usage problems |
||||
|
||||
2003-01-22 14:40 kellan |
||||
|
||||
* htdocs/cookbook.html: added cookbook.html |
||||
|
||||
2003-01-21 23:47 kellan |
||||
|
||||
* cookbook: a magpie cookbook |
||||
|
||||
2003-01-20 10:09 kellan |
||||
|
||||
* ChangeLog: updated |
||||
|
||||
2003-01-20 09:23 kellan |
||||
|
||||
* scripts/simple_smarty.php: minor clean up |
||||
|
||||
2003-01-20 09:15 kellan |
||||
|
||||
* scripts/README: added smarty url |
||||
|
||||
2003-01-20 09:14 kellan |
||||
|
||||
* magpie_simple.php, htdocs/index.html, scripts/README, |
||||
scripts/magpie_debug.php, scripts/magpie_simple.php, |
||||
scripts/simple_smarty.php, |
||||
scripts/smarty_plugin/modifier.rss_date_parse.php, |
||||
scripts/templates/simple.smarty: Added scripts directory for |
||||
examples on how to use MagpieRSS |
||||
|
||||
magpie_simple - is a simple example magpie_debug - spew all the |
||||
information from a parsed RSS feed simple_smary - example of |
||||
using magpie with Smarty template system |
||||
smarty_plugin/modifier.rss_date_parse.php - support file for the |
||||
smarty demo templates/simple.smary - template for the smarty demo |
||||
|
||||
2003-01-20 09:11 kellan |
||||
|
||||
* rss_fetch.inc, rss_parse.inc: changes to error handling to give |
||||
script authors more access to magpie's errors. |
||||
|
||||
added method magpie_error() to retrieve global MAGPIE_ERROR |
||||
variable for when fetch_rss() returns false |
||||
|
||||
2002-10-26 19:02 kellan |
||||
|
||||
* htdocs/index.html: putting the website under source control |
||||
|
||||
2002-10-26 18:43 kellan |
||||
|
||||
* AUTHORS, ChangeLog, INSTALL, README: some documentation to make |
||||
it all look official :) |
||||
|
||||
2002-10-25 23:04 kellan |
||||
|
||||
* magpie_simple.php: quxx |
||||
|
||||
2002-10-25 23:04 kellan |
||||
|
||||
* rss_parse.inc: added support for textinput and image |
||||
|
||||
2002-10-25 19:23 kellan |
||||
|
||||
* magpie_simple.php, rss_cache.inc, rss_fetch.inc, rss_parse.inc, |
||||
rss_utils.inc: switched to using Snoopy for fetching remote RSS |
||||
files. |
||||
|
||||
added support for conditional gets |
||||
|
||||
2002-10-25 19:22 kellan |
||||
|
||||
* rss_cache.inc, rss_fetch.inc, rss_parse.inc, rss_utils.inc: |
||||
Change comment style to slavishly imitate the phpinsider style |
||||
found in Smarty and Snoopy :) |
||||
|
||||
2002-10-25 19:18 kellan |
||||
|
||||
* extlib/Snoopy.class.inc: added Snoopy in order to support |
||||
conditional gets |
||||
|
||||
2002-10-23 23:19 kellan |
||||
|
||||
* magpie_simple.php, rss_cache.inc, rss_fetch.inc, rss_parse.inc: |
||||
MAJOR CLEANUP! |
||||
|
||||
* rss_fetch got rid of the options array, replaced it with a more |
||||
PHP-like solution of using defines. constants are setup, with |
||||
defaults, in the function init() |
||||
|
||||
got rid of the idiom of passing back an array, its was awkward to |
||||
deal with in PHP, and unusual (and consquently confusing to |
||||
people). now i return true/false values, and try to setup error |
||||
string where appropiate (rss_cache has the most complete example |
||||
of this) |
||||
|
||||
change the logic for interacting with the cache |
||||
|
||||
* rss_cache major re-working of how error are handled. tried to |
||||
make the code more resillient. the cache is now much more aware |
||||
of MAX_AGE, where before this was being driven out of rss_fetch |
||||
(which was silly) |
||||
|
||||
* rss_parse properly handles xml parse errors. used to sail |
||||
along blithely unaware. |
||||
|
||||
2002-09-11 11:11 kellan |
||||
|
||||
* rss_cache.inc, rss_parse.inc, magpie_simple.php, rss_fetch.inc, |
||||
rss_utils.inc: Initial revision |
||||
|
||||
2002-09-11 11:11 kellan |
||||
|
||||
* rss_cache.inc, rss_parse.inc, magpie_simple.php, rss_fetch.inc, |
||||
rss_utils.inc: initial import |
||||
|
||||
@ -1,143 +0,0 @@ |
||||
REQUIREMENTS |
||||
|
||||
MapieRSS requires a recent PHP 4+ (developed with 4.2.0) |
||||
with xml (expat) support. |
||||
|
||||
Optionally: |
||||
* PHP5 with libxml2 support. |
||||
* cURL for SSL support |
||||
* iconv (preferred) or mb_string for expanded character set support |
||||
|
||||
QUICK START |
||||
|
||||
Magpie consists of 4 files (rss_fetch.inc, rss_parser.inc, rss_cache.inc, |
||||
and rss_utils.inc), and the directory extlib (which contains a modified |
||||
version of the Snoopy HTTP client) |
||||
|
||||
Copy these 5 resources to a directory named 'magpierss' in the same |
||||
directory as your PHP script. |
||||
|
||||
At the top of your script add the following line: |
||||
|
||||
require_once('magpierss/rss_fetch.inc'); |
||||
|
||||
Now you can use the fetch_rss() method: |
||||
|
||||
$rss = fetch_rss($url); |
||||
|
||||
Done. That's it. See README for more details on using MagpieRSS. |
||||
|
||||
NEXT STEPS |
||||
|
||||
Important: you'll probably want to get the cache directory working in |
||||
order to speed up your application, and not abuse the webserver you're |
||||
downloading the RSS from. |
||||
|
||||
Optionally you can install MagpieRSS in your PHP include path in order to |
||||
make it available server wide. |
||||
|
||||
Lastly you might want to look through the constants in rss_fetch.inc see if |
||||
there is anything you want to override (the defaults are pretty good) |
||||
|
||||
For more info, or if you have trouble, see TROUBLESHOOTING |
||||
|
||||
SETTING UP CACHING |
||||
|
||||
Magpie has built-in transparent caching. With caching Magpie will only |
||||
fetch and parse RSS feeds when there is new content. Without this feature |
||||
your pages will be slow, and the sites serving the RSS feed will be annoyed |
||||
with you. |
||||
|
||||
** Simple and Automatic ** |
||||
|
||||
By default Magpie will try to create a cache directory named 'cache' in the |
||||
same directory as your PHP script. |
||||
|
||||
** Creating a Local Cache Directory ** |
||||
|
||||
Often this will fail, because your webserver doesn't have sufficient |
||||
permissions to create the directory. |
||||
|
||||
Exact instructions for how to do this will vary from install to install and |
||||
platform to platform. The steps are: |
||||
|
||||
1. Make a directory named 'cache' |
||||
2. Give the web server write access to that directory. |
||||
|
||||
An example of how to do this on Debian would be: |
||||
|
||||
1. mkdir /path/to/script/cache |
||||
2. chgrp www-data /path/to/script/cache |
||||
3. chmod 775 /path/to/script/cache |
||||
|
||||
On other Unixes you'll need to change 'www-data' to what ever user Apache |
||||
runs as. (on MacOS X the user would be 'www') |
||||
|
||||
** Cache in /tmp ** |
||||
|
||||
Sometimes you won't be able to create a local cache directory. Some reasons |
||||
might be: |
||||
|
||||
1. No shell account |
||||
2. Insufficient permissions to change ownership of a directory |
||||
3. Webserver runs as 'nobody' |
||||
|
||||
In these situations using a cache directory in /tmp can often be a good |
||||
option. |
||||
|
||||
The drawback is /tmp is public, so anyone on the box can read the cache |
||||
files. Usually RSS feeds are public information, so you'll have to decide |
||||
how much of an issue that is. |
||||
|
||||
To use /tmp as your cache directory you need to add the following line to |
||||
your script: |
||||
|
||||
define('MAGPIE_CACHE_DIR', '/tmp/magpie_cache'); |
||||
|
||||
** Global Cache ** |
||||
|
||||
If you have several applications using Magpie, you can create a single |
||||
shared cache directory, either using the /tmp cache, or somewhere else on |
||||
the system. |
||||
|
||||
The upside is that you'll distribute fetching and parsing feeds across |
||||
several applications. |
||||
|
||||
INSTALLING MAGPIE SERVER WIDE |
||||
|
||||
Rather then following the Quickstart instructions which requires you to have |
||||
a copy of Magpie per application, alternately you can place it in some |
||||
shared location. |
||||
|
||||
** Adding Magpie to Your Include Path ** |
||||
|
||||
Copy the 5 resources (rss_fetch.inc, rss_parser.inc, rss_cache.inc, |
||||
rss_utils.inc, and extlib) to a directory named 'magpierss' in your include |
||||
path. Now any PHP file on your system can use Magpie with: |
||||
|
||||
require_once('magpierss/rss_fetch.inc'); |
||||
|
||||
Different installs have different include paths, and you'll have to figure |
||||
out what your include_path is. |
||||
|
||||
From shell you can try: |
||||
|
||||
php -i | grep 'include_path' |
||||
|
||||
Alternatley you can create a phpinfo.php file with contains: |
||||
|
||||
<?php phpinfo(); ?> |
||||
|
||||
Debian's default is: |
||||
|
||||
/usr/share/php |
||||
|
||||
(though more idealogically pure location would be /usr/local/share/php) |
||||
|
||||
Apple's default include path is: |
||||
|
||||
/usr/lib/php |
||||
|
||||
While the Entropy PHP build seems to use: |
||||
|
||||
/usr/local/php/lib/php |
||||
@ -1,53 +0,0 @@ |
||||
MagpieRSS News |
||||
|
||||
MAGPIERSS 0.51 RELEASED |
||||
* important bugfix! |
||||
* fix "silent failure" when PHP doesn't have zlib |
||||
|
||||
FEED ON FEEDS USES MAGPIE |
||||
* web-based RSS aggregator built with Magpie |
||||
* easy to install, easy to use. |
||||
http://minutillo.com/steve/feedonfeeds/ |
||||
|
||||
MAGPIERSS 0.5 RELEASED |
||||
* supports transparent HTTP gzip content negotiation for reduced bandwidth usage |
||||
* quashed some undefined index notices |
||||
|
||||
MAGPIERSS 0.46 RELEASED |
||||
* minor release, more error handling clean up |
||||
* documentation fixes, simpler example |
||||
* new trouble shooting guide for installation and usage problems |
||||
http://magpierss.sourceforge.net/TROUBLESHOOTING |
||||
|
||||
MAGPIE NEWS AS RSS |
||||
* releases, bug fixes, releated stories in RSS |
||||
|
||||
MAGPIERSS COOKBOOK: SIMPLE PHP RSS HOW TOS |
||||
* answers some of the most frequently asked Magpie questions |
||||
* feedback, suggestions, requests, recipes welcome |
||||
http://magpierss.sourceforge.net/cookbook.html |
||||
|
||||
MAGPIERSS 0.4 RELEASED! |
||||
* improved error handling, more flexibility for script authors, backwards compatible |
||||
* new and better examples! including using MagpieRSS and Smarty |
||||
* new Smarty plugin for RSS date parsing |
||||
http://smarty.php.net |
||||
|
||||
INFINITE PENGUIN NOW SUPPORTS MAGPIE 0.3 |
||||
* simple, sophisticated RSS viewer |
||||
* includes auto-generated javascript ticker from RSS feed |
||||
http://www.infinitepenguins.net/rss/ |
||||
|
||||
TRAUMWIND RELEASES REX BACKEND FOR MAGPIERSS |
||||
* drop in support using regex based XML parser |
||||
* parses improperly formed XML that chokes expat |
||||
http://traumwind.de/blog/magpie/magpie_alike.php |
||||
|
||||
MAGPIERSS 0.3 RELEASED! |
||||
* Support added for HTTP Conditional GETs. |
||||
http://fishbowl.pastiche.org/archives/001132.html |
||||
|
||||
MAGPIERSS 0.2! |
||||
* Major clean up of the code. Easier to use. |
||||
* Simpler install on shared hosts. |
||||
* Better documentation and comments. |
||||
@ -1,48 +0,0 @@ |
||||
NAME |
||||
|
||||
MagpieRSS - a simple RSS integration tool |
||||
|
||||
SYNOPSIS |
||||
|
||||
require_once(rss_fetch.inc); |
||||
$url = $_GET['url']; |
||||
$rss = fetch_rss( $url ); |
||||
|
||||
echo "Channel Title: " . $rss->channel['title'] . "<p>"; |
||||
echo "<ul>"; |
||||
foreach ($rss->items as $item) { |
||||
$href = $item['link']; |
||||
$title = $item['title']; |
||||
echo "<li><a href=$href>$title</a></li>"; |
||||
} |
||||
echo "</ul>"; |
||||
|
||||
DESCRIPTION |
||||
|
||||
MapieRSS is an XML-based RSS parser in PHP. It attempts to be "PHP-like", |
||||
and simple to use. |
||||
|
||||
Some features include: |
||||
|
||||
* supports RSS 0.9 - 1.0, with limited RSS 2.0 support |
||||
* supports namespaces, and modules, including mod_content and mod_event |
||||
* open minded [1] |
||||
* simple, functional interface, to object oriented backend parser |
||||
* automatic caching of parsed RSS objects makes its easy to integrate |
||||
* supports conditional GET with Last-Modified, and ETag |
||||
* uses constants for easy override of default behaviour |
||||
* heavily commented |
||||
|
||||
|
||||
1. By open minded I mean Magpie will accept any tag it finds in good faith that |
||||
it was supposed to be here. For strict validation, look elsewhere. |
||||
|
||||
|
||||
GETTING STARTED |
||||
|
||||
|
||||
|
||||
COPYRIGHT: |
||||
Copyright(c) 2002 kellan@protest.net. All rights reserved. |
||||
This software is released under the GNU General Public License. |
||||
Please read the disclaimer at the top of the Snoopy.class.inc file. |
||||
@ -1,152 +0,0 @@ |
||||
TROUBLESHOOTING |
||||
|
||||
|
||||
Trouble Installing MagpieRSS: |
||||
|
||||
1. Fatal error: Failed opening required '/path/to/script/rss_fetch.inc' |
||||
(include_path='.:/usr/local/lib/php:/usr/local/lib/php/pear') |
||||
|
||||
2. Cache couldn't make dir './cache'. |
||||
|
||||
3. Fatal error: Failed to load PHP's XML Extension. |
||||
http://www.php.net/manual/en/ref.xml.php |
||||
|
||||
Trouble Using MagpieRSS |
||||
|
||||
4. Warning: MagpieRSS: Failed to fetch example.com/index.rdf. |
||||
(HTTP Error: Invalid protocol "") |
||||
|
||||
5. Warning: MagpieRSS: Failed to parse RSS file. |
||||
(not well-formed (invalid token) at line 19, column 98) |
||||
|
||||
6. Warning: MagpieRSS: Failed to fetch http://localhost/rss/features.1-0.rss. |
||||
(HTTP Response: HTTP/1.1 404 Not Found) |
||||
|
||||
If you would rather provide a custom error, see the COOKBOOK |
||||
(http://magpierss.sf.net/cookbook.html) recipe 2. |
||||
|
||||
************************************************************************* |
||||
1. Fatal error: Failed opening required '/path/to/script/rss_fetch.inc' |
||||
(include_path='.:/usr/local/lib/php:/usr/local/lib/php/pear') |
||||
|
||||
This could mean that: |
||||
|
||||
a) PHP can't find the MagpieRSS files. |
||||
b) PHP found them the MagpieRSS files, but can't read them. |
||||
|
||||
a. Telling PHP where to look for MagpieRSS file. |
||||
|
||||
This might mean your PHP program can't find the MagpieRSS libraries. |
||||
Magpie relies on 4 include files, rss_fetch.inc, rss_parse.inc, |
||||
rss_cache.inc, rss_util.inc, and for normal use you'll need all 4 (see the |
||||
cookbook for exceptions). |
||||
|
||||
This can be fixed by making sure the MagpieRSS files are in your include |
||||
path. |
||||
|
||||
If you can edit your include path (for example your on a shared host) then |
||||
you need to replace: |
||||
|
||||
require_once('rss_fetch.inc'); |
||||
|
||||
-with- |
||||
|
||||
define('MAGPIE_DIR', '/path/to/magpierss/'); |
||||
require_once(MAGPIE_DIR.'rss_fetch.inc'); |
||||
|
||||
b. PHP can't read the MagpieRSS files |
||||
|
||||
All PHP libraries need to be readable by your webserver. |
||||
|
||||
On Unix you can accomplish this with: |
||||
|
||||
chmod 755 rss_fetch.inc rss_parse.inc rss_cache.inc rss_util.inc |
||||
|
||||
************************************************************************* |
||||
2. Cache couldn't make dir './cache'. |
||||
|
||||
MagpieRSS caches the results of fetched and parsed RSS to reduce the load on |
||||
both your server, and the remote server providing the RSS. It does this by |
||||
writing files to a cache directory. |
||||
|
||||
This error means the webserver doesn't have write access to the current |
||||
directory. |
||||
|
||||
a. Make a webserver writeable cache directory |
||||
|
||||
Find the webserver's group. (on my system it is 'www') |
||||
|
||||
mkdir ./cache |
||||
chgrp www directory_name |
||||
chmod g+w directory_name |
||||
|
||||
(this is the best, and desired solution) |
||||
|
||||
b. Tell MagpieRSS to create the cache directory somewhere the webserver can |
||||
write to. |
||||
|
||||
define('MAGPIE_CACHE_DIR', '/tmp/magpierss'); |
||||
|
||||
(this is not a great solution, and might have security considerations) |
||||
|
||||
c. Turn off cacheing. |
||||
|
||||
Magpie can work fine with cacheing, but it will be slower, and you might |
||||
become a nuiance to the RSS provider, but it is an option. |
||||
|
||||
define('MAGPIE_CACHE_ON', 0); |
||||
|
||||
d. And lastly, do NOT |
||||
|
||||
chmod 777 ./cache |
||||
|
||||
Any of the above solutions are better then this. |
||||
|
||||
NOTE: If none of this works for you, let me know. I've got root, and a |
||||
custom compiled Apache on almost any box I ever touch, so I can be a little |
||||
out of touch with reality. But I won't know that if I don't feedback. |
||||
|
||||
************************************************************************* 3. |
||||
3. Fatal error: Failed to load PHP's XML Extension. |
||||
http://www.php.net/manual/en/ref.xml.php |
||||
|
||||
-or- |
||||
|
||||
Fatal error: Failed to create an instance of PHP's XML parser. |
||||
http://www.php.net/manual/en/ref.xml.php |
||||
|
||||
Make sure your PHP was built with --with-xml |
||||
|
||||
This has been turned on by default for several versions of PHP, but it might |
||||
be turned off in your build. |
||||
|
||||
See php.net for details on building and configuring PHP. |
||||
|
||||
|
||||
************************************************************************* |
||||
4. Warning: MagpieRSS: Failed to fetch index.rdf. |
||||
(HTTP Error: Invalid protocol "") |
||||
|
||||
You need to put http:// in front of your the URL to your RSS feed |
||||
|
||||
************************************************************************* |
||||
5. Warning: MagpieRSS: Failed to parse RSS file. |
||||
(not well-formed (invalid token) at line 19, column 98) |
||||
|
||||
There is a problem with the RSS feed you are trying to read. |
||||
MagpieRSS is an XML parser, and therefore can't parse RSS feed with invalid |
||||
characters. Some RSS parser are based on regular expressions, and can |
||||
parse invalid RSS but they have their own problems. |
||||
|
||||
You could try contacting the author of the RSS feed, and pointing them to |
||||
the online RSS validator at: |
||||
|
||||
http://feeds.archive.org/validator/ |
||||
|
||||
************************************************************************* |
||||
6. Warning: MagpieRSS: Failed to fetch http://example.com/index.rdf |
||||
(HTTP Response: HTTP/1.1 404 Not Found) |
||||
|
||||
Its a 404! The RSS file ain't there. |
||||
|
||||
|
||||
@ -1,125 +0,0 @@ |
||||
MAGPIERSS RECIPES: Cooking with Corbies |
||||
|
||||
"Four and twenty blackbirds baked in a pie." |
||||
|
||||
1. LIMIT THE NUMBER OF HEADLINES(AKA ITEMS) RETURNED. |
||||
|
||||
PROBLEM: |
||||
|
||||
You want to display the 10 (or 3) most recent headlines, but the RSS feed |
||||
contains 15. |
||||
|
||||
SOLUTION: |
||||
|
||||
$num_items = 10; |
||||
$rss = fetch_rss($url); |
||||
|
||||
$items = array_slice($rss->items, 0, $num_items); |
||||
|
||||
DISCUSSION: |
||||
|
||||
Rather then trying to limit the number of items Magpie parses, a much simpler, |
||||
and more flexible approach is to take a "slice" of the array of items. And |
||||
array_slice() is smart enough to do the right thing if the feed has less items |
||||
then $num_items. |
||||
|
||||
See: http://www.php.net/array_slice |
||||
|
||||
|
||||
2. DISPLAY A CUSTOM ERROR MESSAGE IF SOMETHING GOES WRONG |
||||
|
||||
PROBLEM: |
||||
|
||||
You don't want Magpie's error messages showing up if something goes wrong. |
||||
|
||||
SOLUTION: |
||||
|
||||
# Magpie throws USER_WARNINGS only |
||||
# so you can cloak these, by only showing ERRORs |
||||
error_reporting(E_ERROR); |
||||
|
||||
# check the return value of fetch_rss() |
||||
|
||||
$rss = fetch_rss($url); |
||||
|
||||
if ( $rss ) { |
||||
...display rss feed... |
||||
} |
||||
else { |
||||
echo "An error occured! " . |
||||
"Consider donating more $$$ for restoration of services." . |
||||
"<br>Error Message: " . magpie_error(); |
||||
} |
||||
|
||||
DISCUSSION: |
||||
|
||||
MagpieRSS triggers a warning in a number of circumstances. The 2 most common |
||||
circumstances are: if the specified RSS file isn't properly formed (usually |
||||
because it includes illegal HTML), or if Magpie can't download the remote RSS |
||||
file, and there is no cached version. |
||||
|
||||
If you don't want your users to see these warnings change your error_reporting |
||||
settings to only display ERRORs. Another option is to turn off display_error, |
||||
so that WARNINGs, and NOTICEs still go to the error_log but not to the webpages. |
||||
|
||||
You can do this with: |
||||
|
||||
ini_set('display_errors', 0); |
||||
|
||||
See: http://www.php.net/error_reporting, |
||||
http://www.php.net/ini_set, |
||||
http://www.php.net/manual/en/ref.errorfunc.php |
||||
|
||||
3. GENERATE A NEW RSS FEED |
||||
|
||||
PROBLEM: |
||||
|
||||
Create an RSS feed for other people to use. |
||||
|
||||
SOLUTION: |
||||
|
||||
Use Useful Inc's RSSWriter (http://usefulinc.com/rss/rsswriter/) |
||||
|
||||
DISCUSSION: |
||||
|
||||
An example of turning a Magpie parsed RSS object back into an RSS file is forth |
||||
coming. In the meantime RSSWriter has great documentation. |
||||
|
||||
4. DISPLAY HEADLINES MORE RECENT THEN X DATE |
||||
|
||||
PROBLEM: |
||||
|
||||
You only want to display headlines that were published on, or after a certain |
||||
date. |
||||
|
||||
|
||||
SOLUTION: |
||||
|
||||
require 'rss_utils.inc'; |
||||
|
||||
# get all headlines published today |
||||
$today = getdate(); |
||||
|
||||
# today, 12AM |
||||
$date = mktime(0,0,0,$today['mon'], $today['mday'], $today['year']); |
||||
|
||||
$rss = fetch_rss($url); |
||||
|
||||
foreach ( $rss->items as $item ) { |
||||
$published = parse_w3cdtf($item['dc']['date']); |
||||
if ( $published >= $date ) { |
||||
echo "Title: " . $item['title']; |
||||
echo "Published: " . date("h:i:s A", $published); |
||||
echo "<p>"; |
||||
} |
||||
} |
||||
|
||||
DISCUSSION: |
||||
|
||||
This recipe only works for RSS 1.0 feeds that include the <dc:date> field. |
||||
(which is very good RSS style) |
||||
|
||||
parse_w3cdtf is defined in rss_utils.inc, and parses RSS style dates into Unix |
||||
epoch seconds. |
||||
|
||||
See: http://www.php.net/manual/en/ref.datetime.php |
||||
@ -1,896 +0,0 @@ |
||||
<?php |
||||
|
||||
/************************************************* |
||||
|
||||
Snoopy - the PHP net client |
||||
Author: Monte Ohrt <monte@ispi.net> |
||||
Copyright (c): 1999-2000 ispi, all rights reserved |
||||
Version: 1.0 |
||||
|
||||
* This library is free software; you can redistribute it and/or |
||||
* modify it under the terms of the GNU Lesser General Public |
||||
* License as published by the Free Software Foundation; either |
||||
* version 2.1 of the License, or (at your option) any later version. |
||||
* |
||||
* This library is distributed in the hope that it will be useful, |
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||||
* Lesser General Public License for more details. |
||||
* |
||||
* You should have received a copy of the GNU Lesser General Public |
||||
* License along with this library; if not, write to the Free Software |
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
||||
|
||||
You may contact the author of Snoopy by e-mail at: |
||||
monte@ispi.net |
||||
|
||||
Or, write to: |
||||
Monte Ohrt |
||||
CTO, ispi |
||||
237 S. 70th suite 220 |
||||
Lincoln, NE 68510 |
||||
|
||||
The latest version of Snoopy can be obtained from: |
||||
http://snoopy.sourceforge.com |
||||
|
||||
*************************************************/ |
||||
|
||||
class Snoopy { |
||||
/**** Public variables ****/ |
||||
|
||||
/* user definable vars */ |
||||
|
||||
public $host = "www.php.net"; // host name we are connecting to |
||||
public $port = 80; // port we are connecting to |
||||
public $proxy_host = ""; // proxy host to use |
||||
public $proxy_port = ""; // proxy port to use |
||||
public $agent = "Snoopy v1.0"; // agent we masquerade as |
||||
public $referer = ""; // referer info to pass |
||||
public $cookies = array(); // array of cookies to pass |
||||
// $cookies["username"]="joe"; |
||||
public $rawheaders = array(); // array of raw headers to send |
||||
// $rawheaders["Content-type"]="text/html"; |
||||
|
||||
public $maxredirs = 5; // http redirection depth maximum. 0 = disallow |
||||
public $lastredirectaddr = ""; // contains address of last redirected address |
||||
public $offsiteok = true; // allows redirection off-site |
||||
public $maxframes = 0; // frame content depth maximum. 0 = disallow |
||||
public $expandlinks = true; // expand links to fully qualified URLs. |
||||
// this only applies to fetchlinks() |
||||
// or submitlinks() |
||||
public $passcookies = true; // pass set cookies back through redirects |
||||
// NOTE: this currently does not respect |
||||
// dates, domains or paths. |
||||
|
||||
public $user = ""; // user for http authentication |
||||
public $pass = ""; // password for http authentication |
||||
|
||||
// http accept types |
||||
public $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*"; |
||||
|
||||
public $results = ""; // where the content is put |
||||
|
||||
public $error = ""; // error messages sent here |
||||
public $response_code = ""; // response code returned from server |
||||
public $headers = array(); // headers returned from server sent here |
||||
public $maxlength = 500000; // max return data length (body) |
||||
public $read_timeout = 0; // timeout on read operations, in seconds |
||||
// supported only since PHP 4 Beta 4 |
||||
// set to 0 to disallow timeouts |
||||
public $timed_out = false; // if a read operation timed out |
||||
public $status = 0; // http request status |
||||
|
||||
public $curl_path = "/usr/bin/curl"; |
||||
// Snoopy will use cURL for fetching |
||||
// SSL content if a full system path to |
||||
// the cURL binary is supplied here. |
||||
// set to false if you do not have |
||||
// cURL installed. See http://curl.haxx.se |
||||
// for details on installing cURL. |
||||
// Snoopy does *not* use the cURL |
||||
// library functions built into php, |
||||
// as these functions are not stable |
||||
// as of this Snoopy release. |
||||
|
||||
// send Accept-encoding: gzip? |
||||
public $use_gzip = true; |
||||
|
||||
/**** Private variables ****/ |
||||
|
||||
private $_maxlinelen = 4096; // max line length (headers) |
||||
|
||||
private $_httpmethod = "GET"; // default http request method |
||||
private $_httpversion = "HTTP/1.0"; // default http request version |
||||
private $_submit_method = "POST"; // default submit method |
||||
private $_submit_type = "application/x-www-form-urlencoded"; // default submit type |
||||
private $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type |
||||
private $_redirectaddr = false; // will be set if page fetched is a redirect |
||||
private $_redirectdepth = 0; // increments on an http redirect |
||||
private $_frameurls = array(); // frame src urls |
||||
private $_framedepth = 0; // increments on frame depth |
||||
|
||||
private $_isproxy = false; // set if using a proxy server |
||||
private $_fp_timeout = 30; // timeout for socket connection |
||||
|
||||
/*======================================================================*\ |
||||
Function: fetch |
||||
Purpose: fetch the contents of a web page |
||||
(and possibly other protocols in the |
||||
future like ftp, nntp, gopher, etc.) |
||||
Input: $URI the location of the page to fetch |
||||
Output: $this->results the output text from the fetch |
||||
\*======================================================================*/ |
||||
|
||||
public function fetch($URI) { |
||||
//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS); |
||||
$URI_PARTS = parse_url($URI); |
||||
if (!empty($URI_PARTS["user"])) |
||||
$this->user = $URI_PARTS["user"]; |
||||
if (!empty($URI_PARTS["pass"])) |
||||
$this->pass = $URI_PARTS["pass"]; |
||||
if (!isset($fp)) { $fp = false; } |
||||
switch ($URI_PARTS["scheme"]) { |
||||
case "http": |
||||
$this->host = $URI_PARTS["host"]; |
||||
if(!empty($URI_PARTS["port"])) |
||||
$this->port = $URI_PARTS["port"]; |
||||
if($this->_connect($fp)) |
||||
{ |
||||
if($this->_isproxy) |
||||
{ |
||||
// using proxy, send entire URI |
||||
$this->_httprequest($URI,$fp,$URI,$this->_httpmethod); |
||||
} |
||||
else |
||||
{ |
||||
$path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : ""); |
||||
// no proxy, send only the path |
||||
$this->_httprequest($path, $fp, $URI, $this->_httpmethod); |
||||
} |
||||
|
||||
$this->_disconnect($fp); |
||||
|
||||
if($this->_redirectaddr) |
||||
{ |
||||
/* url was redirected, check if we've hit the max depth */ |
||||
if($this->maxredirs > $this->_redirectdepth) |
||||
{ |
||||
// only follow redirect if it's on this site, or offsiteok is true |
||||
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) |
||||
{ |
||||
/* follow the redirect */ |
||||
$this->_redirectdepth++; |
||||
$this->lastredirectaddr=$this->_redirectaddr; |
||||
$this->fetch($this->_redirectaddr); |
||||
} |
||||
} |
||||
} |
||||
|
||||
if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) |
||||
{ |
||||
$frameurls = $this->_frameurls; |
||||
$this->_frameurls = array(); |
||||
|
||||
while(list(,$frameurl) = each($frameurls)) |
||||
{ |
||||
if($this->_framedepth < $this->maxframes) |
||||
{ |
||||
$this->fetch($frameurl); |
||||
$this->_framedepth++; |
||||
} |
||||
else |
||||
break; |
||||
} |
||||
} |
||||
} |
||||
else |
||||
{ |
||||
return false; |
||||
} |
||||
return true; |
||||
break; |
||||
case "https": |
||||
if(!$this->curl_path || (!is_executable($this->curl_path))) { |
||||
$this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n"; |
||||
return false; |
||||
} |
||||
$this->host = $URI_PARTS["host"]; |
||||
if(!empty($URI_PARTS["port"])) |
||||
$this->port = $URI_PARTS["port"]; |
||||
if($this->_isproxy) |
||||
{ |
||||
// using proxy, send entire URI |
||||
$this->_httpsrequest($URI,$URI,$this->_httpmethod); |
||||
} |
||||
else |
||||
{ |
||||
$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); |
||||
// no proxy, send only the path |
||||
$this->_httpsrequest($path, $URI, $this->_httpmethod); |
||||
} |
||||
|
||||
if($this->_redirectaddr) |
||||
{ |
||||
/* url was redirected, check if we've hit the max depth */ |
||||
if($this->maxredirs > $this->_redirectdepth) |
||||
{ |
||||
// only follow redirect if it's on this site, or offsiteok is true |
||||
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) |
||||
{ |
||||
/* follow the redirect */ |
||||
$this->_redirectdepth++; |
||||
$this->lastredirectaddr=$this->_redirectaddr; |
||||
$this->fetch($this->_redirectaddr); |
||||
} |
||||
} |
||||
} |
||||
|
||||
if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) |
||||
{ |
||||
$frameurls = $this->_frameurls; |
||||
$this->_frameurls = array(); |
||||
|
||||
while(list(,$frameurl) = each($frameurls)) |
||||
{ |
||||
if($this->_framedepth < $this->maxframes) |
||||
{ |
||||
$this->fetch($frameurl); |
||||
$this->_framedepth++; |
||||
} |
||||
else |
||||
break; |
||||
} |
||||
} |
||||
return true; |
||||
break; |
||||
default: |
||||
// not a valid protocol |
||||
$this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; |
||||
return false; |
||||
break; |
||||
} |
||||
return true; |
||||
} |
||||
|
||||
|
||||
|
||||
/*======================================================================*\ |
||||
Private functions |
||||
\*======================================================================*/ |
||||
|
||||
|
||||
/*======================================================================*\ |
||||
Function: _striplinks |
||||
Purpose: strip the hyperlinks from an html document |
||||
Input: $document document to strip. |
||||
Output: $match an array of the links |
||||
\*======================================================================*/ |
||||
|
||||
private function _striplinks($document) |
||||
{ |
||||
preg_match_all("'<\s*a\s+.*href\s*=\s* # find <a href= |
||||
([\"\'])? # find single or double quote |
||||
(?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching |
||||
# quote, otherwise match up to next space |
||||
'isx",$document,$links); |
||||
|
||||
|
||||
// catenate the non-empty matches from the conditional subpattern |
||||
|
||||
while(list($key,$val) = each($links[2])) |
||||
{ |
||||
if(!empty($val)) |
||||
$match[] = $val; |
||||
} |
||||
|
||||
while(list($key,$val) = each($links[3])) |
||||
{ |
||||
if(!empty($val)) |
||||
$match[] = $val; |
||||
} |
||||
|
||||
// return the links |
||||
return $match; |
||||
} |
||||
|
||||
/*======================================================================*\ |
||||
Function: _stripform |
||||
Purpose: strip the form elements from an html document |
||||
Input: $document document to strip. |
||||
Output: $match an array of the links |
||||
\*======================================================================*/ |
||||
|
||||
private function _stripform($document) |
||||
{ |
||||
preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements); |
||||
|
||||
// catenate the matches |
||||
$match = implode("\r\n",$elements[0]); |
||||
|
||||
// return the links |
||||
return $match; |
||||
} |
||||
|
||||
|
||||
|
||||
/*======================================================================*\ |
||||
Function: _striptext |
||||
Purpose: strip the text from an html document |
||||
Input: $document document to strip. |
||||
Output: $text the resulting text |
||||
\*======================================================================*/ |
||||
|
||||
private function _striptext($document) |
||||
{ |
||||
|
||||
// I didn't use preg eval (//e) since that is only available in PHP 4.0. |
||||
// so, list your entities one by one here. I included some of the |
||||
// more common ones. |
||||
|
||||
$search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript |
||||
"'<[\/\!]*?[^<>]*?>'si", // strip out html tags |
||||
"'([\r\n])[\s]+'", // strip out white space |
||||
"'&(quote|#34);'i", // replace html entities |
||||
"'&(amp|#38);'i", |
||||
"'&(lt|#60);'i", |
||||
"'&(gt|#62);'i", |
||||
"'&(nbsp|#160);'i", |
||||
"'&(iexcl|#161);'i", |
||||
"'&(cent|#162);'i", |
||||
"'&(pound|#163);'i", |
||||
"'&(copy|#169);'i" |
||||
); |
||||
$replace = array( "", |
||||
"", |
||||
"\\1", |
||||
"\"", |
||||
"&", |
||||
"<", |
||||
">", |
||||
" ", |
||||
chr(161), |
||||
chr(162), |
||||
chr(163), |
||||
chr(169)); |
||||
|
||||
$text = preg_replace($search,$replace,$document); |
||||
|
||||
return $text; |
||||
} |
||||
|
||||
/*======================================================================*\ |
||||
Function: _expandlinks |
||||
Purpose: expand each link into a fully qualified URL |
||||
Input: $links the links to qualify |
||||
$URI the full URI to get the base from |
||||
Output: $expandedLinks the expanded links |
||||
\*======================================================================*/ |
||||
|
||||
private function _expandlinks($links,$URI) |
||||
{ |
||||
|
||||
preg_match("/^[^\?]+/",$URI,$match); |
||||
|
||||
$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]); |
||||
|
||||
$search = array( "|^http://".preg_quote($this->host)."|i", |
||||
"|^(?!http://)(\/)?(?!mailto:)|i", |
||||
"|/\./|", |
||||
"|/[^\/]+/\.\./|" |
||||
); |
||||
|
||||
$replace = array( "", |
||||
$match."/", |
||||
"/", |
||||
"/" |
||||
); |
||||
|
||||
$expandedLinks = preg_replace($search,$replace,$links); |
||||
|
||||
return $expandedLinks; |
||||
} |
||||
|
||||
/*======================================================================*\ |
||||
Function: _httprequest |
||||
Purpose: go get the http data from the server |
||||
Input: $url the url to fetch |
||||
$fp the current open file pointer |
||||
$URI the full URI |
||||
$body body contents to send if any (POST) |
||||
Output: |
||||
\*======================================================================*/ |
||||
|
||||
private function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="") |
||||
{ |
||||
if($this->passcookies && $this->_redirectaddr) |
||||
$this->setcookies(); |
||||
|
||||
$URI_PARTS = parse_url($URI); |
||||
if(empty($url)) |
||||
$url = "/"; |
||||
$headers = $http_method." ".$url." ".$this->_httpversion."\r\n"; |
||||
if(!empty($this->agent)) |
||||
$headers .= "User-Agent: ".$this->agent."\r\n"; |
||||
if(!empty($this->host) && !isset($this->rawheaders['Host'])) |
||||
$headers .= "Host: ".$this->host."\r\n"; |
||||
if(!empty($this->accept)) |
||||
$headers .= "Accept: ".$this->accept."\r\n"; |
||||
|
||||
if($this->use_gzip) { |
||||
// make sure PHP was built with --with-zlib |
||||
// and we can handle gzipp'ed data |
||||
if ( function_exists(gzinflate) ) { |
||||
$headers .= "Accept-encoding: gzip\r\n"; |
||||
} |
||||
else { |
||||
trigger_error( |
||||
"use_gzip is on, but PHP was built without zlib support.". |
||||
" Requesting file(s) without gzip encoding.", |
||||
E_USER_NOTICE); |
||||
} |
||||
} |
||||
|
||||
if(!empty($this->referer)) |
||||
$headers .= "Referer: ".$this->referer."\r\n"; |
||||
if(!empty($this->cookies)) |
||||
{ |
||||
if(!is_array($this->cookies)) |
||||
$this->cookies = (array)$this->cookies; |
||||
|
||||
reset($this->cookies); |
||||
if ( count($this->cookies) > 0 ) { |
||||
$cookie_headers .= 'Cookie: '; |
||||
foreach ( $this->cookies as $cookieKey => $cookieVal ) { |
||||
$cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; "; |
||||
} |
||||
$headers .= substr($cookie_headers,0,-2) . "\r\n"; |
||||
} |
||||
} |
||||
if(!empty($this->rawheaders)) |
||||
{ |
||||
if(!is_array($this->rawheaders)) |
||||
$this->rawheaders = (array)$this->rawheaders; |
||||
while(list($headerKey,$headerVal) = each($this->rawheaders)) |
||||
$headers .= $headerKey.": ".$headerVal."\r\n"; |
||||
} |
||||
if(!empty($content_type)) { |
||||
$headers .= "Content-type: $content_type"; |
||||
if ($content_type == "multipart/form-data") |
||||
$headers .= "; boundary=".$this->_mime_boundary; |
||||
$headers .= "\r\n"; |
||||
} |
||||
if(!empty($body)) |
||||
$headers .= "Content-length: ".strlen($body)."\r\n"; |
||||
if(!empty($this->user) || !empty($this->pass)) |
||||
$headers .= "Authorization: BASIC ".base64_encode($this->user.":".$this->pass)."\r\n"; |
||||
|
||||
$headers .= "\r\n"; |
||||
|
||||
// set the read timeout if needed |
||||
if ($this->read_timeout > 0) |
||||
socket_set_timeout($fp, $this->read_timeout); |
||||
$this->timed_out = false; |
||||
|
||||
fwrite($fp,$headers.$body,strlen($headers.$body)); |
||||
|
||||
$this->_redirectaddr = false; |
||||
unset($this->headers); |
||||
|
||||
// content was returned gzip encoded? |
||||
$is_gzipped = false; |
||||
|
||||
while($currentHeader = fgets($fp,$this->_maxlinelen)) |
||||
{ |
||||
if ($this->read_timeout > 0 && $this->_check_timeout($fp)) |
||||
{ |
||||
$this->status=-100; |
||||
return false; |
||||
} |
||||
|
||||
// if($currentHeader == "\r\n") |
||||
if(preg_match("/^\r?\n$/", $currentHeader) ) |
||||
break; |
||||
|
||||
// if a header begins with Location: or URI:, set the redirect |
||||
if(preg_match("/^(Location:|URI:)/i",$currentHeader)) |
||||
{ |
||||
// get URL portion of the redirect |
||||
preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches); |
||||
// look for :// in the Location header to see if hostname is included |
||||
if(!preg_match("|\:\/\/|",$matches[2])) |
||||
{ |
||||
// no host in the path, so prepend |
||||
$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; |
||||
// eliminate double slash |
||||
if(!preg_match("|^/|",$matches[2])) |
||||
$this->_redirectaddr .= "/".$matches[2]; |
||||
else |
||||
$this->_redirectaddr .= $matches[2]; |
||||
} |
||||
else |
||||
$this->_redirectaddr = $matches[2]; |
||||
} |
||||
|
||||
if(preg_match("|^HTTP/|",$currentHeader)) |
||||
{ |
||||
if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status)) |
||||
{ |
||||
$this->status= $status[1]; |
||||
} |
||||
$this->response_code = $currentHeader; |
||||
} |
||||
|
||||
if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) { |
||||
$is_gzipped = true; |
||||
} |
||||
|
||||
$this->headers[] = $currentHeader; |
||||
} |
||||
|
||||
# $results = fread($fp, $this->maxlength); |
||||
$results = ""; |
||||
while ( $data = fread($fp, $this->maxlength) ) { |
||||
$results .= $data; |
||||
if ( |
||||
strlen($results) > $this->maxlength ) { |
||||
break; |
||||
} |
||||
} |
||||
|
||||
// gunzip |
||||
if ( $is_gzipped ) { |
||||
// per http://www.php.net/manual/en/function.gzencode.php |
||||
$results = substr($results, 10); |
||||
$results = gzinflate($results); |
||||
} |
||||
|
||||
if ($this->read_timeout > 0 && $this->_check_timeout($fp)) |
||||
{ |
||||
$this->status=-100; |
||||
return false; |
||||
} |
||||
|
||||
// check if there is a a redirect meta tag |
||||
|
||||
if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) |
||||
{ |
||||
$this->_redirectaddr = $this->_expandlinks($match[1],$URI); |
||||
} |
||||
|
||||
// have we hit our frame depth and is there frame src to fetch? |
||||
if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match)) |
||||
{ |
||||
$this->results[] = $results; |
||||
for($x=0; $x<count($match[1]); $x++) |
||||
$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); |
||||
} |
||||
// have we already fetched framed content? |
||||
elseif(is_array($this->results)) |
||||
$this->results[] = $results; |
||||
// no framed content |
||||
else |
||||
$this->results = $results; |
||||
|
||||
return true; |
||||
} |
||||
|
||||
/*======================================================================*\ |
||||
Function: _httpsrequest |
||||
Purpose: go get the https data from the server using curl |
||||
Input: $url the url to fetch |
||||
$URI the full URI |
||||
$body body contents to send if any (POST) |
||||
Output: |
||||
\*======================================================================*/ |
||||
|
||||
private function _httpsrequest($url,$URI,$http_method,$content_type="",$body="") |
||||
{ |
||||
if($this->passcookies && $this->_redirectaddr) |
||||
$this->setcookies(); |
||||
|
||||
$headers = array(); |
||||
|
||||
$URI_PARTS = parse_url($URI); |
||||
if(empty($url)) |
||||
$url = "/"; |
||||
// GET ... header not needed for curl |
||||
//$headers[] = $http_method." ".$url." ".$this->_httpversion; |
||||
if(!empty($this->agent)) |
||||
$headers[] = "User-Agent: ".$this->agent; |
||||
if(!empty($this->host)) |
||||
$headers[] = "Host: ".$this->host; |
||||
if(!empty($this->accept)) |
||||
$headers[] = "Accept: ".$this->accept; |
||||
if(!empty($this->referer)) |
||||
$headers[] = "Referer: ".$this->referer; |
||||
if(!empty($this->cookies)) |
||||
{ |
||||
if(!is_array($this->cookies)) |
||||
$this->cookies = (array)$this->cookies; |
||||
|
||||
reset($this->cookies); |
||||
if ( count($this->cookies) > 0 ) { |
||||
$cookie_str = 'Cookie: '; |
||||
foreach ( $this->cookies as $cookieKey => $cookieVal ) { |
||||
$cookie_str .= $cookieKey."=".urlencode($cookieVal)."; "; |
||||
} |
||||
$headers[] = substr($cookie_str,0,-2); |
||||
} |
||||
} |
||||
if(!empty($this->rawheaders)) |
||||
{ |
||||
if(!is_array($this->rawheaders)) |
||||
$this->rawheaders = (array)$this->rawheaders; |
||||
while(list($headerKey,$headerVal) = each($this->rawheaders)) |
||||
$headers[] = $headerKey.": ".$headerVal; |
||||
} |
||||
if(!empty($content_type)) { |
||||
if ($content_type == "multipart/form-data") |
||||
$headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary; |
||||
else |
||||
$headers[] = "Content-type: $content_type"; |
||||
} |
||||
if(!empty($body)) |
||||
$headers[] = "Content-length: ".strlen($body); |
||||
if(!empty($this->user) || !empty($this->pass)) |
||||
$headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass); |
||||
|
||||
for($curr_header = 0; $curr_header < count($headers); $curr_header++) { |
||||
$cmdline_params .= " -H \"".$headers[$curr_header]."\""; |
||||
} |
||||
|
||||
if(!empty($body)) |
||||
$cmdline_params .= " -d \"$body\""; |
||||
|
||||
if($this->read_timeout > 0) |
||||
$cmdline_params .= " -m ".$this->read_timeout; |
||||
|
||||
$headerfile = uniqid(time()); |
||||
|
||||
# accept self-signed certs |
||||
$cmdline_params .= " -k"; |
||||
$results = array(); |
||||
$return = 0; |
||||
exec($this->curl_path." -D \"/tmp/$headerfile\"".escapeshellcmd($cmdline_params)." ".escapeshellcmd($URI),$results,$return); |
||||
|
||||
if($return) |
||||
{ |
||||
$this->error = "Error: cURL could not retrieve the document, error $return."; |
||||
return false; |
||||
} |
||||
|
||||
|
||||
$results = implode("\r\n",$results); |
||||
|
||||
$result_headers = file("/tmp/$headerfile"); |
||||
|
||||
$this->_redirectaddr = false; |
||||
unset($this->headers); |
||||
|
||||
for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++) |
||||
{ |
||||
|
||||
// if a header begins with Location: or URI:, set the redirect |
||||
if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader])) |
||||
{ |
||||
// get URL portion of the redirect |
||||
preg_match("/^(Location: |URI:)(.*)/",chop($result_headers[$currentHeader]),$matches); |
||||
// look for :// in the Location header to see if hostname is included |
||||
if(!preg_match("|\:\/\/|",$matches[2])) |
||||
{ |
||||
// no host in the path, so prepend |
||||
$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; |
||||
// eliminate double slash |
||||
if(!preg_match("|^/|",$matches[2])) |
||||
$this->_redirectaddr .= "/".$matches[2]; |
||||
else |
||||
$this->_redirectaddr .= $matches[2]; |
||||
} |
||||
else |
||||
$this->_redirectaddr = $matches[2]; |
||||
} |
||||
|
||||
if(preg_match("|^HTTP/|",$result_headers[$currentHeader])) |
||||
{ |
||||
$this->response_code = $result_headers[$currentHeader]; |
||||
if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$this->response_code, $match)) |
||||
{ |
||||
$this->status= $match[1]; |
||||
} |
||||
} |
||||
$this->headers[] = $result_headers[$currentHeader]; |
||||
} |
||||
|
||||
// check if there is a a redirect meta tag |
||||
|
||||
if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) |
||||
{ |
||||
$this->_redirectaddr = $this->_expandlinks($match[1],$URI); |
||||
} |
||||
|
||||
// have we hit our frame depth and is there frame src to fetch? |
||||
if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match)) |
||||
{ |
||||
$this->results[] = $results; |
||||
for($x=0; $x<count($match[1]); $x++) |
||||
$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); |
||||
} |
||||
// have we already fetched framed content? |
||||
elseif(is_array($this->results)) |
||||
$this->results[] = $results; |
||||
// no framed content |
||||
else |
||||
$this->results = $results; |
||||
|
||||
unlink("/tmp/$headerfile"); |
||||
|
||||
return true; |
||||
} |
||||
|
||||
/*======================================================================*\ |
||||
Function: setcookies() |
||||
Purpose: set cookies for a redirection |
||||
\*======================================================================*/ |
||||
|
||||
public function setcookies() |
||||
{ |
||||
for($x=0; $x<count($this->headers); $x++) |
||||
{ |
||||
if(preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i", $this->headers[$x],$match)) |
||||
$this->cookies[$match[1]] = $match[2]; |
||||
} |
||||
} |
||||
|
||||
|
||||
/*======================================================================*\ |
||||
Function: _check_timeout |
||||
Purpose: checks whether timeout has occurred |
||||
Input: $fp file pointer |
||||
\*======================================================================*/ |
||||
|
||||
private function _check_timeout($fp) |
||||
{ |
||||
if ($this->read_timeout > 0) { |
||||
$fp_status = socket_get_status($fp); |
||||
if ($fp_status["timed_out"]) { |
||||
$this->timed_out = true; |
||||
return true; |
||||
} |
||||
} |
||||
return false; |
||||
} |
||||
|
||||
/*======================================================================*\ |
||||
Function: _connect |
||||
Purpose: make a socket connection |
||||
Input: $fp file pointer |
||||
\*======================================================================*/ |
||||
|
||||
private function _connect(&$fp) |
||||
{ |
||||
if(!empty($this->proxy_host) && !empty($this->proxy_port)) |
||||
{ |
||||
$this->_isproxy = true; |
||||
$host = $this->proxy_host; |
||||
$port = $this->proxy_port; |
||||
} |
||||
else |
||||
{ |
||||
$host = $this->host; |
||||
$port = $this->port; |
||||
} |
||||
|
||||
$this->status = 0; |
||||
|
||||
if($fp = fsockopen( |
||||
$host, |
||||
$port, |
||||
$errno, |
||||
$errstr, |
||||
$this->_fp_timeout |
||||
)) |
||||
{ |
||||
// socket connection succeeded |
||||
|
||||
return true; |
||||
} |
||||
else |
||||
{ |
||||
// socket connection failed |
||||
$this->status = $errno; |
||||
switch($errno) |
||||
{ |
||||
case -3: |
||||
$this->error="socket creation failed (-3)"; |
||||
case -4: |
||||
$this->error="dns lookup failure (-4)"; |
||||
case -5: |
||||
$this->error="connection refused or timed out (-5)"; |
||||
default: |
||||
$this->error="connection failed (".$errno.")"; |
||||
} |
||||
return false; |
||||
} |
||||
} |
||||
/*======================================================================*\ |
||||
Function: _disconnect |
||||
Purpose: disconnect a socket connection |
||||
Input: $fp file pointer |
||||
\*======================================================================*/ |
||||
|
||||
private function _disconnect($fp) |
||||
{ |
||||
return(fclose($fp)); |
||||
} |
||||
|
||||
|
||||
/*======================================================================*\ |
||||
Function: _prepare_post_body |
||||
Purpose: Prepare post body according to encoding type |
||||
Input: $formvars - form variables |
||||
$formfiles - form upload files |
||||
Output: post body |
||||
\*======================================================================*/ |
||||
|
||||
private function _prepare_post_body($formvars, $formfiles) |
||||
{ |
||||
settype($formvars, "array"); |
||||
settype($formfiles, "array"); |
||||
|
||||
if (count($formvars) == 0 && count($formfiles) == 0) |
||||
return; |
||||
|
||||
switch ($this->_submit_type) { |
||||
case "application/x-www-form-urlencoded": |
||||
reset($formvars); |
||||
while(list($key,$val) = each($formvars)) { |
||||
if (is_array($val) || is_object($val)) { |
||||
while (list($cur_key, $cur_val) = each($val)) { |
||||
$postdata .= urlencode($key)."[]=".urlencode($cur_val)."&"; |
||||
} |
||||
} else |
||||
$postdata .= urlencode($key)."=".urlencode($val)."&"; |
||||
} |
||||
break; |
||||
|
||||
case "multipart/form-data": |
||||
$this->_mime_boundary = "Snoopy".md5(uniqid(microtime())); |
||||
|
||||
reset($formvars); |
||||
while(list($key,$val) = each($formvars)) { |
||||
if (is_array($val) || is_object($val)) { |
||||
while (list($cur_key, $cur_val) = each($val)) { |
||||
$postdata .= "--".$this->_mime_boundary."\r\n"; |
||||
$postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n"; |
||||
$postdata .= "$cur_val\r\n"; |
||||
} |
||||
} else { |
||||
$postdata .= "--".$this->_mime_boundary."\r\n"; |
||||
$postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n"; |
||||
$postdata .= "$val\r\n"; |
||||
} |
||||
} |
||||
|
||||
reset($formfiles); |
||||
while (list($field_name, $file_names) = each($formfiles)) { |
||||
settype($file_names, "array"); |
||||
while (list(, $file_name) = each($file_names)) { |
||||
if (!is_readable($file_name)) continue; |
||||
|
||||
$fp = fopen($file_name, "r"); |
||||
$file_content = fread($fp, filesize($file_name)); |
||||
fclose($fp); |
||||
$base_name = basename($file_name); |
||||
|
||||
$postdata .= "--".$this->_mime_boundary."\r\n"; |
||||
$postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n"; |
||||
$postdata .= "$file_content\r\n"; |
||||
} |
||||
} |
||||
$postdata .= "--".$this->_mime_boundary."--\r\n"; |
||||
break; |
||||
} |
||||
|
||||
return $postdata; |
||||
} |
||||
} |
||||
@ -1,6 +0,0 @@ |
||||
<html> |
||||
<head> |
||||
</head> |
||||
<body> |
||||
</body> |
||||
</html> |
||||
@ -1,6 +0,0 @@ |
||||
<html> |
||||
<head> |
||||
</head> |
||||
<body> |
||||
</body> |
||||
</html> |
||||
@ -1,200 +0,0 @@ |
||||
<?php |
||||
/** |
||||
* Project: MagpieRSS: a simple RSS integration tool |
||||
* File: rss_cache.inc, a simple, rolling(no GC), cache |
||||
* for RSS objects, keyed on URL. |
||||
* Author: Kellan Elliott-McCrea <kellan@protest.net> |
||||
* Version: 0.51 |
||||
* License: GPL |
||||
* |
||||
* The lastest version of MagpieRSS can be obtained from: |
||||
* http://magpierss.sourceforge.net |
||||
* |
||||
* For questions, help, comments, discussion, etc., please join the |
||||
* Magpie mailing list: |
||||
* http://lists.sourceforge.net/lists/listinfo/magpierss-general |
||||
* @package chamilo.include.rss |
||||
*/ |
||||
/** |
||||
* @package chamilo.include.rss |
||||
*/ |
||||
class RSSCache { |
||||
public $BASE_CACHE = './cache'; // where the cache files are stored |
||||
public $MAX_AGE = 3600; // when are files stale, default one hour |
||||
public $ERROR = ""; // accumulate error messages |
||||
|
||||
public function RSSCache ($base='', $age='') { |
||||
if ( $base ) { |
||||
$this->BASE_CACHE = $base; |
||||
} |
||||
if ( $age ) { |
||||
$this->MAX_AGE = $age; |
||||
} |
||||
|
||||
// attempt to make the cache directory |
||||
if ( ! file_exists( $this->BASE_CACHE ) ) { |
||||
$status = @mkdir( $this->BASE_CACHE, 0755 ); |
||||
|
||||
// if make failed |
||||
if ( ! $status ) { |
||||
$this->error( |
||||
"Cache couldn't make dir '" . $this->BASE_CACHE . "'." |
||||
); |
||||
} |
||||
} |
||||
} |
||||
|
||||
/*=======================================================================*\ |
||||
Function: set |
||||
Purpose: add an item to the cache, keyed on url |
||||
Input: url from wich the rss file was fetched |
||||
Output: true on sucess |
||||
\*=======================================================================*/ |
||||
public function set ($url, $rss) { |
||||
$this->ERROR = ""; |
||||
$cache_file = $this->file_name( $url ); |
||||
$fp = @fopen( $cache_file, 'w' ); |
||||
|
||||
if ( ! $fp ) { |
||||
$this->error( |
||||
"Cache unable to open file for writing: $cache_file" |
||||
); |
||||
return 0; |
||||
} |
||||
|
||||
|
||||
$data = $this->serialize( $rss ); |
||||
fwrite( $fp, $data ); |
||||
fclose( $fp ); |
||||
|
||||
return $cache_file; |
||||
} |
||||
|
||||
/*=======================================================================*\ |
||||
Function: get |
||||
Purpose: fetch an item from the cache |
||||
Input: url from wich the rss file was fetched |
||||
Output: cached object on HIT, false on MISS |
||||
\*=======================================================================*/ |
||||
public function get ($url) { |
||||
$this->ERROR = ""; |
||||
$cache_file = $this->file_name( $url ); |
||||
|
||||
if ( ! file_exists( $cache_file ) ) { |
||||
$this->debug( |
||||
"Cache doesn't contain: $url (cache file: $cache_file)" |
||||
); |
||||
return 0; |
||||
} |
||||
|
||||
$fp = @fopen($cache_file, 'r'); |
||||
if ( ! $fp ) { |
||||
$this->error( |
||||
"Failed to open cache file for reading: $cache_file" |
||||
); |
||||
return 0; |
||||
} |
||||
|
||||
if ($filesize = filesize($cache_file) ) { |
||||
$data = fread( $fp, filesize($cache_file) ); |
||||
$rss = $this->unserialize( $data ); |
||||
|
||||
return $rss; |
||||
} |
||||
|
||||
return 0; |
||||
} |
||||
|
||||
/*=======================================================================*\ |
||||
Function: check_cache |
||||
Purpose: check a url for membership in the cache |
||||
and whether the object is older then MAX_AGE (ie. STALE) |
||||
Input: url from wich the rss file was fetched |
||||
Output: cached object on HIT, false on MISS |
||||
\*=======================================================================*/ |
||||
public function check_cache ( $url ) { |
||||
$this->ERROR = ""; |
||||
$filename = $this->file_name( $url ); |
||||
|
||||
if ( file_exists( $filename ) ) { |
||||
// find how long ago the file was added to the cache |
||||
// and whether that is longer then MAX_AGE |
||||
$mtime = filemtime( $filename ); |
||||
$age = time() - $mtime; |
||||
if ( $this->MAX_AGE > $age ) { |
||||
// object exists and is current |
||||
return 'HIT'; |
||||
} |
||||
else { |
||||
// object exists but is old |
||||
return 'STALE'; |
||||
} |
||||
} |
||||
else { |
||||
// object does not exist |
||||
return 'MISS'; |
||||
} |
||||
} |
||||
|
||||
public function cache_age( $url ) { |
||||
$filename = $this->file_name( $url); |
||||
if ( file_exists( $filename ) ) { |
||||
$mtime = filemtime( $filename ); |
||||
$age = time() - $mtime; |
||||
return $age; |
||||
} |
||||
else { |
||||
return -1; |
||||
} |
||||
} |
||||
|
||||
/*=======================================================================*\ |
||||
Function: serialize |
||||
\*=======================================================================*/ |
||||
public function serialize ( $rss ) { |
||||
return serialize( $rss ); |
||||
} |
||||
|
||||
/*=======================================================================*\ |
||||
Function: unserialize |
||||
\*=======================================================================*/ |
||||
public function unserialize ( $data ) { |
||||
return unserialize( $data ); |
||||
} |
||||
|
||||
/*=======================================================================*\ |
||||
Function: file_name |
||||
Purpose: map url to location in cache |
||||
Input: url from wich the rss file was fetched |
||||
Output: a file name |
||||
\*=======================================================================*/ |
||||
public function file_name ($url) { |
||||
$filename = md5( $url ); |
||||
return join( DIRECTORY_SEPARATOR, array( $this->BASE_CACHE, $filename ) ); |
||||
} |
||||
|
||||
/*=======================================================================*\ |
||||
Function: error |
||||
Purpose: register error |
||||
\*=======================================================================*/ |
||||
public function error ($errormsg, $lvl=E_USER_WARNING) { |
||||
// append PHP's error message if track_errors enabled |
||||
if ( isset($php_errormsg) ) { |
||||
$errormsg .= " ($php_errormsg)"; |
||||
} |
||||
$this->ERROR = $errormsg; |
||||
if ( MAGPIE_DEBUG ) { |
||||
trigger_error( $errormsg, $lvl); |
||||
} |
||||
else { |
||||
error_log( $errormsg, 0); |
||||
} |
||||
} |
||||
|
||||
public function debug ($debugmsg, $lvl=E_USER_NOTICE) { |
||||
if ( MAGPIE_DEBUG ) { |
||||
$this->error("MagpieRSS [debug] $debugmsg", $lvl); |
||||
} |
||||
} |
||||
|
||||
} |
||||
@ -1,459 +0,0 @@ |
||||
<?php |
||||
/** |
||||
* Project: MagpieRSS: a simple RSS integration tool |
||||
* File: rss_fetch.inc, a simple functional interface |
||||
to fetching and parsing RSS files, via the |
||||
function fetch_rss() |
||||
* Author: Kellan Elliott-McCrea <kellan@protest.net> |
||||
* License: GPL |
||||
* |
||||
* The lastest version of MagpieRSS can be obtained from: |
||||
* http://magpierss.sourceforge.net |
||||
* |
||||
* For questions, help, comments, discussion, etc., please join the |
||||
* Magpie mailing list: |
||||
* magpierss-general@lists.sourceforge.net |
||||
* @package chamilo.include.rss |
||||
*/ |
||||
/** |
||||
* Code |
||||
*/ |
||||
// Setup MAGPIE_DIR for use on hosts that don't include |
||||
// the current path in include_path. |
||||
// with thanks to rajiv and smarty |
||||
if (!defined('DIR_SEP')) { |
||||
define('DIR_SEP', DIRECTORY_SEPARATOR); |
||||
} |
||||
|
||||
if (!defined('MAGPIE_DIR')) { |
||||
define('MAGPIE_DIR', dirname(__FILE__) . DIR_SEP); |
||||
} |
||||
|
||||
require_once( MAGPIE_DIR . 'rss_parse.inc' ); |
||||
require_once( MAGPIE_DIR . 'rss_cache.inc' ); |
||||
|
||||
// for including 3rd party libraries |
||||
define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP); |
||||
require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc'); |
||||
define('MAGPIE_CACHE_DIR', api_get_path(SYS_ARCHIVE_PATH)); |
||||
|
||||
/* |
||||
* CONSTANTS - redefine these in your script to change the |
||||
* behaviour of fetch_rss() currently, most options effect the cache |
||||
* |
||||
* MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects? |
||||
* For me a built in cache was essential to creating a "PHP-like" |
||||
* feel to Magpie, see rss_cache.inc for rationale |
||||
* |
||||
* |
||||
* MAGPIE_CACHE_DIR - Where should Magpie cache parsed RSS objects? |
||||
* This should be a location that the webserver can write to. If this |
||||
* directory does not already exist Mapie will try to be smart and create |
||||
* it. This will often fail for permissions reasons. |
||||
* |
||||
* |
||||
* MAGPIE_CACHE_AGE - How long to store cached RSS objects? In seconds. |
||||
* |
||||
* |
||||
* MAGPIE_CACHE_FRESH_ONLY - If remote fetch fails, throw error |
||||
* instead of returning stale object? |
||||
* |
||||
* MAGPIE_DEBUG - Display debugging notices? |
||||
* |
||||
*/ |
||||
|
||||
|
||||
/*=======================================================================*\ |
||||
Function: fetch_rss: |
||||
Purpose: return RSS object for the give url |
||||
maintain the cache |
||||
Input: url of RSS file |
||||
Output: parsed RSS object (see rss_parse.inc) |
||||
|
||||
NOTES ON CACHEING: |
||||
If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache. |
||||
|
||||
NOTES ON RETRIEVING REMOTE FILES: |
||||
If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will |
||||
return a cached object, and touch the cache object upon recieving a |
||||
304. |
||||
|
||||
NOTES ON FAILED REQUESTS: |
||||
If there is an HTTP error while fetching an RSS object, the cached |
||||
version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off) |
||||
\*=======================================================================*/ |
||||
|
||||
define('MAGPIE_VERSION', '0.72'); |
||||
|
||||
$MAGPIE_ERROR = ""; |
||||
|
||||
function fetch_rss ($url) { |
||||
// initialize constants |
||||
init(); |
||||
|
||||
if ( !isset($url) ) { |
||||
error("fetch_rss called without a url"); |
||||
return false; |
||||
} |
||||
|
||||
// if cache is disabled |
||||
if ( !MAGPIE_CACHE_ON ) { |
||||
// fetch file, and parse it |
||||
$resp = _fetch_remote_file( $url ); |
||||
if ( is_success( $resp->status ) ) { |
||||
return _response_to_rss( $resp ); |
||||
} |
||||
else { |
||||
error("Failed to fetch $url and cache is off"); |
||||
return false; |
||||
} |
||||
} |
||||
// else cache is ON |
||||
else { |
||||
// Flow |
||||
// 1. check cache |
||||
// 2. if there is a hit, make sure its fresh |
||||
// 3. if cached obj fails freshness check, fetch remote |
||||
// 4. if remote fails, return stale object, or error |
||||
|
||||
$cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE ); |
||||
|
||||
if (MAGPIE_DEBUG and $cache->ERROR) { |
||||
debug($cache->ERROR, E_USER_WARNING); |
||||
} |
||||
|
||||
|
||||
$cache_status = 0; // response of check_cache |
||||
$request_headers = array(); // HTTP headers to send with fetch |
||||
$rss = 0; // parsed RSS object |
||||
$errormsg = 0; // errors, if any |
||||
|
||||
// store parsed XML by desired output encoding |
||||
// as character munging happens at parse time |
||||
$cache_key = $url . MAGPIE_OUTPUT_ENCODING; |
||||
|
||||
if (!$cache->ERROR) { |
||||
// return cache HIT, MISS, or STALE |
||||
$cache_status = $cache->check_cache( $cache_key); |
||||
} |
||||
|
||||
// if object cached, and cache is fresh, return cached obj |
||||
if ( $cache_status == 'HIT' ) { |
||||
$rss = $cache->get( $cache_key ); |
||||
if ( isset($rss) and $rss ) { |
||||
// should be cache age |
||||
$rss->from_cache = 1; |
||||
if ( MAGPIE_DEBUG > 1) { |
||||
debug("MagpieRSS: Cache HIT", E_USER_NOTICE); |
||||
} |
||||
return $rss; |
||||
} |
||||
} |
||||
|
||||
// else attempt a conditional get |
||||
|
||||
// setup headers |
||||
if ( $cache_status == 'STALE' ) { |
||||
$rss = $cache->get( $cache_key ); |
||||
if ( $rss and $rss->etag and $rss->last_modified ) { |
||||
$request_headers['If-None-Match'] = $rss->etag; |
||||
$request_headers['If-Last-Modified'] = $rss->last_modified; |
||||
} |
||||
} |
||||
|
||||
$resp = _fetch_remote_file( $url, $request_headers ); |
||||
|
||||
if (isset($resp) and $resp) { |
||||
if ($resp->status == '304' ) { |
||||
// we have the most current copy |
||||
if ( MAGPIE_DEBUG > 1) { |
||||
debug("Got 304 for $url"); |
||||
} |
||||
// reset cache on 304 (at minutillo insistent prodding) |
||||
$cache->set($cache_key, $rss); |
||||
return $rss; |
||||
} |
||||
elseif ( is_success( $resp->status ) ) { |
||||
$rss = _response_to_rss( $resp ); |
||||
if ( $rss ) { |
||||
if (MAGPIE_DEBUG > 1) { |
||||
debug("Fetch successful"); |
||||
} |
||||
// add object to cache |
||||
$cache->set( $cache_key, $rss ); |
||||
return $rss; |
||||
} |
||||
} |
||||
else { |
||||
$errormsg = "Failed to fetch $url "; |
||||
if ( $resp->status == '-100' ) { |
||||
$errormsg .= "(Request timed out after " . MAGPIE_FETCH_TIME_OUT . " seconds)"; |
||||
} |
||||
elseif ( $resp->error ) { |
||||
# compensate for Snoopy's annoying habbit to tacking |
||||
# on '\n' |
||||
$http_error = substr($resp->error, 0, -2); |
||||
$errormsg .= "(HTTP Error: $http_error)"; |
||||
} |
||||
else { |
||||
$errormsg .= "(HTTP Response: " . $resp->response_code .')'; |
||||
} |
||||
} |
||||
} |
||||
else { |
||||
$errormsg = "Unable to retrieve RSS file for unknown reasons."; |
||||
} |
||||
|
||||
// else fetch failed |
||||
|
||||
// attempt to return cached object |
||||
if ($rss) { |
||||
if ( MAGPIE_DEBUG ) { |
||||
debug("Returning STALE object for $url"); |
||||
} |
||||
return $rss; |
||||
} |
||||
|
||||
// else we totally failed |
||||
//hide the error |
||||
//error( $errormsg ); |
||||
|
||||
return false; |
||||
|
||||
} // end if ( !MAGPIE_CACHE_ON ) { |
||||
} // end fetch_rss() |
||||
|
||||
/*=======================================================================*\ |
||||
Function: error |
||||
Purpose: set MAGPIE_ERROR, and trigger error |
||||
\*=======================================================================*/ |
||||
|
||||
function error ($errormsg, $lvl=E_USER_WARNING) { |
||||
global $MAGPIE_ERROR; |
||||
|
||||
// append PHP's error message if track_errors enabled |
||||
if ( isset($php_errormsg) ) { |
||||
$errormsg .= " ($php_errormsg)"; |
||||
} |
||||
if ( $errormsg ) { |
||||
$errormsg = "MagpieRSS: $errormsg"; |
||||
$MAGPIE_ERROR = $errormsg; |
||||
trigger_error( $errormsg, $lvl); |
||||
} |
||||
} |
||||
|
||||
function debug ($debugmsg, $lvl=E_USER_NOTICE) { |
||||
trigger_error("MagpieRSS [debug] $debugmsg", $lvl); |
||||
} |
||||
|
||||
/*=======================================================================*\ |
||||
Function: magpie_error |
||||
Purpose: accessor for the magpie error variable |
||||
\*=======================================================================*/ |
||||
function magpie_error ($errormsg="") { |
||||
global $MAGPIE_ERROR; |
||||
|
||||
if ( isset($errormsg) and $errormsg ) { |
||||
$MAGPIE_ERROR = $errormsg; |
||||
} |
||||
|
||||
return $MAGPIE_ERROR; |
||||
} |
||||
|
||||
/*=======================================================================*\ |
||||
Function: _fetch_remote_file |
||||
Purpose: retrieve an arbitrary remote file |
||||
Input: url of the remote file |
||||
headers to send along with the request (optional) |
||||
Output: an HTTP response object (see Snoopy.class.inc) |
||||
\*=======================================================================*/ |
||||
function _fetch_remote_file ($url, $headers = "" ) { |
||||
// Snoopy is an HTTP client in PHP |
||||
$client = new Snoopy(); |
||||
$client->agent = MAGPIE_USER_AGENT; |
||||
$client->read_timeout = MAGPIE_FETCH_TIME_OUT; |
||||
$client->use_gzip = MAGPIE_USE_GZIP; |
||||
if (is_array($headers) ) { |
||||
$client->rawheaders = $headers; |
||||
} |
||||
|
||||
@$client->fetch($url); |
||||
return $client; |
||||
|
||||
} |
||||
|
||||
/*=======================================================================*\ |
||||
Function: _response_to_rss |
||||
Purpose: parse an HTTP response object into an RSS object |
||||
Input: an HTTP response object (see Snoopy) |
||||
Output: parsed RSS object (see rss_parse) |
||||
\*=======================================================================*/ |
||||
function _response_to_rss ($resp) { |
||||
$rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING ); |
||||
|
||||
// if RSS parsed successfully |
||||
if ( $rss and !$rss->ERROR) { |
||||
|
||||
// find Etag, and Last-Modified |
||||
foreach($resp->headers as $h) { |
||||
// 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1" |
||||
if (strpos($h, ": ")) { |
||||
list($field, $val) = explode(": ", $h, 2); |
||||
} |
||||
else { |
||||
$field = $h; |
||||
$val = ""; |
||||
} |
||||
|
||||
if ( $field == 'ETag' ) { |
||||
$rss->etag = $val; |
||||
} |
||||
|
||||
if ( $field == 'Last-Modified' ) { |
||||
$rss->last_modified = $val; |
||||
} |
||||
} |
||||
|
||||
return $rss; |
||||
} // else construct error message |
||||
else { |
||||
$errormsg = "Failed to parse RSS file."; |
||||
|
||||
if ($rss) { |
||||
$errormsg .= " (" . $rss->ERROR . ")"; |
||||
} |
||||
error($errormsg,E_USER_NOTICE); |
||||
|
||||
return false; |
||||
} // end if ($rss and !$rss->error) |
||||
} |
||||
|
||||
/*=======================================================================*\ |
||||
Function: init |
||||
Purpose: setup constants with default values |
||||
check for user overrides |
||||
\*=======================================================================*/ |
||||
function init () { |
||||
if ( defined('MAGPIE_INITALIZED') ) { |
||||
return; |
||||
} |
||||
else { |
||||
define('MAGPIE_INITALIZED', true); |
||||
} |
||||
|
||||
if ( !defined('MAGPIE_CACHE_ON') ) { |
||||
define('MAGPIE_CACHE_ON', true); |
||||
} |
||||
|
||||
if ( !defined('MAGPIE_CACHE_DIR') ) { |
||||
define('MAGPIE_CACHE_DIR', './cache'); |
||||
} |
||||
|
||||
if ( !defined('MAGPIE_CACHE_AGE') ) { |
||||
define('MAGPIE_CACHE_AGE', 60*60); // one hour |
||||
} |
||||
|
||||
if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) { |
||||
define('MAGPIE_CACHE_FRESH_ONLY', false); |
||||
} |
||||
|
||||
if ( !defined('MAGPIE_OUTPUT_ENCODING') ) { |
||||
define('MAGPIE_OUTPUT_ENCODING', 'UTF-8'); |
||||
} |
||||
|
||||
if ( !defined('MAGPIE_INPUT_ENCODING') ) { |
||||
define('MAGPIE_INPUT_ENCODING', null); |
||||
} |
||||
|
||||
if ( !defined('MAGPIE_DETECT_ENCODING') ) { |
||||
define('MAGPIE_DETECT_ENCODING', true); |
||||
} |
||||
|
||||
if ( !defined('MAGPIE_DEBUG') ) { |
||||
define('MAGPIE_DEBUG', 0); |
||||
} |
||||
|
||||
if ( !defined('MAGPIE_USER_AGENT') ) { |
||||
$ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net'; |
||||
|
||||
if ( MAGPIE_CACHE_ON ) { |
||||
$ua = $ua . ')'; |
||||
} |
||||
else { |
||||
$ua = $ua . '; No cache)'; |
||||
} |
||||
|
||||
define('MAGPIE_USER_AGENT', $ua); |
||||
} |
||||
|
||||
if ( !defined('MAGPIE_FETCH_TIME_OUT') ) { |
||||
define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout |
||||
} |
||||
|
||||
// use gzip encoding to fetch rss files if supported? |
||||
if ( !defined('MAGPIE_USE_GZIP') ) { |
||||
define('MAGPIE_USE_GZIP', true); |
||||
} |
||||
} |
||||
|
||||
// NOTE: the following code should really be in Snoopy, or at least |
||||
// somewhere other then rss_fetch! |
||||
|
||||
/*=======================================================================*\ |
||||
HTTP STATUS CODE PREDICATES |
||||
These functions attempt to classify an HTTP status code |
||||
based on RFC 2616 and RFC 2518. |
||||
|
||||
All of them take an HTTP status code as input, and return true or false |
||||
|
||||
All this code is adapted from LWP's HTTP::Status. |
||||
\*=======================================================================*/ |
||||
|
||||
|
||||
/*=======================================================================*\ |
||||
Function: is_info |
||||
Purpose: return true if Informational status code |
||||
\*=======================================================================*/ |
||||
function is_info ($sc) { |
||||
return $sc >= 100 && $sc < 200; |
||||
} |
||||
|
||||
/*=======================================================================*\ |
||||
Function: is_success |
||||
Purpose: return true if Successful status code |
||||
\*=======================================================================*/ |
||||
function is_success ($sc) { |
||||
return $sc >= 200 && $sc < 300; |
||||
} |
||||
|
||||
/*=======================================================================*\ |
||||
Function: is_redirect |
||||
Purpose: return true if Redirection status code |
||||
\*=======================================================================*/ |
||||
function is_redirect ($sc) { |
||||
return $sc >= 300 && $sc < 400; |
||||
} |
||||
|
||||
/*=======================================================================*\ |
||||
Function: is_error |
||||
Purpose: return true if Error status code |
||||
\*=======================================================================*/ |
||||
function is_error ($sc) { |
||||
return $sc >= 400 && $sc < 600; |
||||
} |
||||
|
||||
/*=======================================================================*\ |
||||
Function: is_client_error |
||||
Purpose: return true if Error status code, and its a client error |
||||
\*=======================================================================*/ |
||||
function is_client_error ($sc) { |
||||
return $sc >= 400 && $sc < 500; |
||||
} |
||||
|
||||
/*=======================================================================*\ |
||||
Function: is_client_error |
||||
Purpose: return true if Error status code, and its a server error |
||||
\*=======================================================================*/ |
||||
function is_server_error ($sc) { |
||||
return $sc >= 500 && $sc < 600; |
||||
} |
||||
@ -1,605 +0,0 @@ |
||||
<?php |
||||
/** |
||||
* Project: MagpieRSS: a simple RSS integration tool |
||||
* File: rss_parse.inc - parse an RSS or Atom feed |
||||
* return as a simple object. |
||||
* |
||||
* Handles RSS 0.9x, RSS 2.0, RSS 1.0, and Atom 0.3 |
||||
* |
||||
* The lastest version of MagpieRSS can be obtained from: |
||||
* http://magpierss.sourceforge.net |
||||
* |
||||
* For questions, help, comments, discussion, etc., please join the |
||||
* Magpie mailing list: |
||||
* magpierss-general@lists.sourceforge.net |
||||
* |
||||
* @author Kellan Elliott-McCrea <kellan@protest.net> |
||||
* @version 0.7a |
||||
* @license GPL |
||||
* @package chamilo.include.rss |
||||
*/ |
||||
/** |
||||
* Code |
||||
*/ |
||||
define('RSS', 'RSS'); |
||||
define('ATOM', 'Atom'); |
||||
|
||||
require_once (MAGPIE_DIR . 'rss_utils.inc'); |
||||
|
||||
/** |
||||
* Hybrid parser, and object, takes RSS as a string and returns a simple object. |
||||
* |
||||
* see: rss_fetch.inc for a simpler interface with integrated caching support |
||||
* |
||||
* @package chamilo.include.rss |
||||
*/ |
||||
class MagpieRSS { |
||||
public $parser; |
||||
|
||||
public $current_item = array(); // item currently being parsed |
||||
public $items = array(); // collection of parsed items |
||||
public $channel = array(); // hash of channel fields |
||||
public $textinput = array(); |
||||
public $image = array(); |
||||
public $feed_type; |
||||
public $feed_version; |
||||
public $encoding = ''; // output encoding of parsed rss |
||||
|
||||
private $_source_encoding = ''; // only set if we have to parse xml prolog |
||||
|
||||
public $ERROR = ""; |
||||
public $WARNING = ""; |
||||
|
||||
// define some constants |
||||
|
||||
private $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright'); |
||||
private $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1'); |
||||
|
||||
// parser variables, useless if you're not a parser, treat as private |
||||
public $stack = array(); // parser stack |
||||
public $inchannel = false; |
||||
public $initem = false; |
||||
public $incontent = false; // if in Atom <content mode="xml"> field |
||||
public $intextinput = false; |
||||
public $inimage = false; |
||||
public $current_namespace = false; |
||||
|
||||
|
||||
/** |
||||
* Set up XML parser, parse source, and return populated RSS object.. |
||||
* |
||||
* @param string $source string containing the RSS to be parsed |
||||
* |
||||
* NOTE: Probably a good idea to leave the encoding options alone unless |
||||
* you know what you're doing as PHP's character set support is |
||||
* a little weird. |
||||
* |
||||
* NOTE: A lot of this is unnecessary but harmless with PHP5 |
||||
* |
||||
* |
||||
* @param string $output_encoding output the parsed RSS in this character |
||||
* set defaults to ISO-8859-1 as this is PHP's |
||||
* default. |
||||
* |
||||
* NOTE: might be changed to UTF-8 in future |
||||
* versions. |
||||
* |
||||
* @param string $input_encoding the character set of the incoming RSS source. |
||||
* Leave blank and Magpie will try to figure it |
||||
* out. |
||||
* |
||||
* |
||||
* @param bool $detect_encoding if false Magpie won't attempt to detect |
||||
* source encoding. (caveat emptor) |
||||
* |
||||
*/ |
||||
public function MagpieRSS ($source, $output_encoding='ISO-8859-1', |
||||
$input_encoding=null, $detect_encoding=true) |
||||
{ |
||||
# if PHP xml isn't compiled in, die |
||||
# |
||||
if (!function_exists('xml_parser_create')) { |
||||
$this->error( "Failed to load PHP's XML Extension. " . |
||||
"http://www.php.net/manual/en/ref.xml.php", |
||||
E_USER_ERROR ); |
||||
} |
||||
|
||||
list($parser, $source) = $this->create_parser($source, |
||||
$output_encoding, $input_encoding, $detect_encoding); |
||||
|
||||
|
||||
if (!is_resource($parser)) { |
||||
$this->error( "Failed to create an instance of PHP's XML parser. " . |
||||
"http://www.php.net/manual/en/ref.xml.php", |
||||
E_USER_ERROR ); |
||||
} |
||||
|
||||
|
||||
$this->parser = $parser; |
||||
|
||||
# pass in parser, and a reference to this object |
||||
# setup handlers |
||||
# |
||||
xml_set_object( $this->parser, $this ); |
||||
xml_set_element_handler($this->parser, |
||||
'feed_start_element', 'feed_end_element' ); |
||||
|
||||
xml_set_character_data_handler( $this->parser, 'feed_cdata' ); |
||||
|
||||
$status = xml_parse( $this->parser, $source ); |
||||
|
||||
if (! $status ) { |
||||
$errorcode = xml_get_error_code( $this->parser ); |
||||
if ( $errorcode != XML_ERROR_NONE ) { |
||||
$xml_error = xml_error_string( $errorcode ); |
||||
$error_line = xml_get_current_line_number($this->parser); |
||||
$error_col = xml_get_current_column_number($this->parser); |
||||
$errormsg = "$xml_error at line $error_line, column $error_col"; |
||||
|
||||
$this->error( $errormsg ); |
||||
} |
||||
} |
||||
|
||||
xml_parser_free( $this->parser ); |
||||
|
||||
$this->normalize(); |
||||
} |
||||
|
||||
public function feed_start_element($p, $element, &$attrs) { |
||||
$el = $element = strtolower($element); |
||||
$attrs = array_change_key_case($attrs, CASE_LOWER); |
||||
|
||||
// check for a namespace, and split if found |
||||
$ns = false; |
||||
if ( strpos( $element, ':' ) ) { |
||||
list($ns, $el) = split( ':', $element, 2); |
||||
} |
||||
if ( $ns and $ns != 'rdf' ) { |
||||
$this->current_namespace = $ns; |
||||
} |
||||
|
||||
# if feed type isn't set, then this is first element of feed |
||||
# identify feed from root element |
||||
# |
||||
if (!isset($this->feed_type) ) { |
||||
if ( $el == 'rdf' ) { |
||||
$this->feed_type = RSS; |
||||
$this->feed_version = '1.0'; |
||||
} |
||||
elseif ( $el == 'rss' ) { |
||||
$this->feed_type = RSS; |
||||
$this->feed_version = $attrs['version']; |
||||
} |
||||
elseif ( $el == 'feed' ) { |
||||
$this->feed_type = ATOM; |
||||
$this->feed_version = $attrs['version']; |
||||
$this->inchannel = true; |
||||
} |
||||
return; |
||||
} |
||||
|
||||
if ( $el == 'channel' ) |
||||
{ |
||||
$this->inchannel = true; |
||||
} |
||||
elseif ($el == 'item' or $el == 'entry' ) |
||||
{ |
||||
$this->initem = true; |
||||
if ( isset($attrs['rdf:about']) ) { |
||||
$this->current_item['about'] = $attrs['rdf:about']; |
||||
} |
||||
} |
||||
|
||||
// if we're in the default namespace of an RSS feed, |
||||
// record textinput or image fields |
||||
elseif ( |
||||
$this->feed_type == RSS and |
||||
$this->current_namespace == '' and |
||||
$el == 'textinput' ) |
||||
{ |
||||
$this->intextinput = true; |
||||
} |
||||
|
||||
elseif ( |
||||
$this->feed_type == RSS and |
||||
$this->current_namespace == '' and |
||||
$el == 'image' ) |
||||
{ |
||||
$this->inimage = true; |
||||
} |
||||
|
||||
# handle atom content constructs |
||||
elseif ( $this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) |
||||
{ |
||||
// avoid clashing w/ RSS mod_content |
||||
if ($el == 'content' ) { |
||||
$el = 'atom_content'; |
||||
} |
||||
|
||||
$this->incontent = $el; |
||||
|
||||
|
||||
} |
||||
|
||||
// if inside an Atom content construct (e.g. content or summary) field treat tags as text |
||||
elseif ($this->feed_type == ATOM and $this->incontent ) |
||||
{ |
||||
// if tags are inlined, then flatten |
||||
$attrs_str = join(' ', |
||||
array_map('map_attrs', |
||||
array_keys($attrs), |
||||
array_values($attrs) ) ); |
||||
|
||||
$this->append_content( "<$element $attrs_str>" ); |
||||
|
||||
array_unshift( $this->stack, $el ); |
||||
} |
||||
|
||||
// Atom support many links per containging element. |
||||
// Magpie treats link elements of type rel='alternate' |
||||
// as being equivalent to RSS's simple link element. |
||||
// |
||||
elseif ($this->feed_type == ATOM and $el == 'link' ) |
||||
{ |
||||
if ( isset($attrs['rel']) and $attrs['rel'] == 'alternate' ) |
||||
{ |
||||
$link_el = 'link'; |
||||
} |
||||
else { |
||||
$link_el = 'link_' . $attrs['rel']; |
||||
} |
||||
|
||||
$this->append($link_el, $attrs['href']); |
||||
} |
||||
// set stack[0] to current element |
||||
else { |
||||
array_unshift($this->stack, $el); |
||||
} |
||||
} |
||||
|
||||
|
||||
|
||||
public function feed_cdata ($p, $text) { |
||||
if ($this->feed_type == ATOM and $this->incontent) |
||||
{ |
||||
$this->append_content( $text ); |
||||
} |
||||
else { |
||||
$current_el = join('_', array_reverse($this->stack)); |
||||
$this->append($current_el, $text); |
||||
} |
||||
} |
||||
|
||||
public function feed_end_element ($p, $el) { |
||||
$el = strtolower($el); |
||||
|
||||
if ( $el == 'item' or $el == 'entry' ) |
||||
{ |
||||
$this->items[] = $this->current_item; |
||||
$this->current_item = array(); |
||||
$this->initem = false; |
||||
} |
||||
elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'textinput' ) |
||||
{ |
||||
$this->intextinput = false; |
||||
} |
||||
elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'image' ) |
||||
{ |
||||
$this->inimage = false; |
||||
} |
||||
elseif ($this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) |
||||
{ |
||||
$this->incontent = false; |
||||
} |
||||
elseif ($el == 'channel' or $el == 'feed' ) |
||||
{ |
||||
$this->inchannel = false; |
||||
} |
||||
elseif ($this->feed_type == ATOM and $this->incontent ) { |
||||
// balance tags properly |
||||
// note: i don't think this is actually neccessary |
||||
if ( $this->stack[0] == $el ) |
||||
{ |
||||
$this->append_content("</$el>"); |
||||
} |
||||
else { |
||||
$this->append_content("<$el />"); |
||||
} |
||||
|
||||
array_shift( $this->stack ); |
||||
} |
||||
else { |
||||
array_shift( $this->stack ); |
||||
} |
||||
|
||||
$this->current_namespace = false; |
||||
} |
||||
|
||||
public function concat (&$str1, $str2="") { |
||||
if (!isset($str1) ) { |
||||
$str1=""; |
||||
} |
||||
$str1 .= $str2; |
||||
} |
||||
|
||||
|
||||
|
||||
public function append_content($text) { |
||||
if ( $this->initem ) { |
||||
$this->concat( $this->current_item[ $this->incontent ], $text ); |
||||
} |
||||
elseif ( $this->inchannel ) { |
||||
$this->concat( $this->channel[ $this->incontent ], $text ); |
||||
} |
||||
} |
||||
|
||||
// smart append - field and namespace aware |
||||
public function append($el, $text) { |
||||
if (!$el) { |
||||
return; |
||||
} |
||||
if ( $this->current_namespace ) |
||||
{ |
||||
if ( $this->initem ) { |
||||
$this->concat( |
||||
$this->current_item[ $this->current_namespace ][ $el ], $text); |
||||
} |
||||
elseif ($this->inchannel) { |
||||
$this->concat( |
||||
$this->channel[ $this->current_namespace][ $el ], $text ); |
||||
} |
||||
elseif ($this->intextinput) { |
||||
$this->concat( |
||||
$this->textinput[ $this->current_namespace][ $el ], $text ); |
||||
} |
||||
elseif ($this->inimage) { |
||||
$this->concat( |
||||
$this->image[ $this->current_namespace ][ $el ], $text ); |
||||
} |
||||
} |
||||
else { |
||||
if ( $this->initem ) { |
||||
$this->concat( |
||||
$this->current_item[ $el ], $text); |
||||
} |
||||
elseif ($this->intextinput) { |
||||
$this->concat( |
||||
$this->textinput[ $el ], $text ); |
||||
} |
||||
elseif ($this->inimage) { |
||||
$this->concat( |
||||
$this->image[ $el ], $text ); |
||||
} |
||||
elseif ($this->inchannel) { |
||||
$this->concat( |
||||
$this->channel[ $el ], $text ); |
||||
} |
||||
|
||||
} |
||||
} |
||||
|
||||
public function normalize () { |
||||
// if atom populate rss fields |
||||
if ( $this->is_atom() ) { |
||||
$this->channel['description'] = $this->channel['tagline']; |
||||
for ( $i = 0; $i < count($this->items); $i++) { |
||||
$item = $this->items[$i]; |
||||
if ( isset($item['summary']) ) |
||||
$item['description'] = $item['summary']; |
||||
if ( isset($item['atom_content'])) |
||||
$item['content']['encoded'] = $item['atom_content']; |
||||
|
||||
$atom_date = (isset($item['issued']) ) ? $item['issued'] : $item['modified']; |
||||
if ( $atom_date ) { |
||||
$epoch = @parse_w3cdtf($atom_date); |
||||
if ($epoch and $epoch > 0) { |
||||
$item['date_timestamp'] = $epoch; |
||||
} |
||||
} |
||||
|
||||
$this->items[$i] = $item; |
||||
} |
||||
} |
||||
elseif ( $this->is_rss() ) { |
||||
$this->channel['tagline'] = $this->channel['description']; |
||||
for ( $i = 0; $i < count($this->items); $i++) { |
||||
$item = $this->items[$i]; |
||||
if ( isset($item['description'])) |
||||
$item['summary'] = $item['description']; |
||||
if ( isset($item['content']['encoded'] ) ) |
||||
$item['atom_content'] = $item['content']['encoded']; |
||||
|
||||
if ( $this->is_rss() == '1.0' and isset($item['dc']['date']) ) { |
||||
$epoch = @parse_w3cdtf($item['dc']['date']); |
||||
if ($epoch and $epoch > 0) { |
||||
$item['date_timestamp'] = $epoch; |
||||
} |
||||
} |
||||
elseif ( isset($item['pubdate']) ) { |
||||
$epoch = @strtotime($item['pubdate']); |
||||
if ($epoch > 0) { |
||||
$item['date_timestamp'] = $epoch; |
||||
} |
||||
} |
||||
|
||||
$this->items[$i] = $item; |
||||
} |
||||
} |
||||
} |
||||
|
||||
|
||||
public function is_rss () { |
||||
if ( $this->feed_type == RSS ) { |
||||
return $this->feed_version; |
||||
} |
||||
else { |
||||
return false; |
||||
} |
||||
} |
||||
|
||||
public function is_atom() { |
||||
if ( $this->feed_type == ATOM ) { |
||||
return $this->feed_version; |
||||
} |
||||
else { |
||||
return false; |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* return XML parser, and possibly re-encoded source |
||||
* |
||||
*/ |
||||
public function create_parser($source, $out_enc, $in_enc, $detect) { |
||||
if ( substr(phpversion(),0,1) == 5) { |
||||
$parser = $this->php5_create_parser($in_enc, $detect); |
||||
} |
||||
else { |
||||
list($parser, $source) = $this->php4_create_parser($source, $in_enc, $detect); |
||||
} |
||||
if ($out_enc) { |
||||
$this->encoding = $out_enc; |
||||
xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $out_enc); |
||||
} |
||||
|
||||
return array($parser, $source); |
||||
} |
||||
|
||||
/** |
||||
* Instantiate an XML parser under PHP5 |
||||
* |
||||
* PHP5 will do a fine job of detecting input encoding |
||||
* if passed an empty string as the encoding. |
||||
* |
||||
* All hail libxml2! |
||||
* |
||||
*/ |
||||
public function php5_create_parser($in_enc, $detect) { |
||||
// by default php5 does a fine job of detecting input encodings |
||||
if(!$detect && $in_enc) { |
||||
return xml_parser_create($in_enc); |
||||
} |
||||
else { |
||||
return xml_parser_create(''); |
||||
} |
||||
} |
||||
|
||||
/** |
||||
* Instaniate an XML parser under PHP4 |
||||
* |
||||
* Unfortunately PHP4's support for character encodings |
||||
* and especially XML and character encodings sucks. As |
||||
* long as the documents you parse only contain characters |
||||
* from the ISO-8859-1 character set (a superset of ASCII, |
||||
* and a subset of UTF-8) you're fine. However once you |
||||
* step out of that comfy little world things get mad, bad, |
||||
* and dangerous to know. |
||||
* |
||||
* The following code is based on SJM's work with FoF |
||||
* @see http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss |
||||
* |
||||
*/ |
||||
public function php4_create_parser($source, $in_enc, $detect) { |
||||
if ( !$detect ) { |
||||
return array(xml_parser_create($in_enc), $source); |
||||
} |
||||
|
||||
if (!$in_enc) { |
||||
if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $source, $m)) { |
||||
$in_enc = strtoupper($m[1]); |
||||
$this->source_encoding = $in_enc; |
||||
} |
||||
else { |
||||
$in_enc = 'UTF-8'; |
||||
} |
||||
} |
||||
|
||||
if ($this->known_encoding($in_enc)) { |
||||
return array(xml_parser_create($in_enc), $source); |
||||
} |
||||
|
||||
// the dectected encoding is not one of the simple encodings PHP knows |
||||
|
||||
// attempt to use the iconv extension to |
||||
// cast the XML to a known encoding |
||||
// @see http://php.net/iconv |
||||
|
||||
if (function_exists('iconv')) { |
||||
$encoded_source = iconv($in_enc,'UTF-8', $source); |
||||
if ($encoded_source) { |
||||
return array(xml_parser_create('UTF-8'), $encoded_source); |
||||
} |
||||
} |
||||
|
||||
// iconv didn't work, try mb_convert_encoding |
||||
// @see http://php.net/mbstring |
||||
if(function_exists('mb_convert_encoding')) { |
||||
$encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc ); |
||||
if ($encoded_source) { |
||||
return array(xml_parser_create('UTF-8'), $encoded_source); |
||||
} |
||||
} |
||||
|
||||
// else |
||||
$this->error("Feed is in an unsupported character encoding. ($in_enc) " . |
||||
"You may see strange artifacts, and mangled characters.", |
||||
E_USER_NOTICE); |
||||
|
||||
return array(xml_parser_create(), $source); |
||||
} |
||||
|
||||
public function known_encoding($enc) { |
||||
$enc = strtoupper($enc); |
||||
if ( in_array($enc, $this->_KNOWN_ENCODINGS) ) { |
||||
return $enc; |
||||
} |
||||
else { |
||||
return false; |
||||
} |
||||
} |
||||
|
||||
public function error ($errormsg, $lvl=E_USER_WARNING) { |
||||
// append PHP's error message if track_errors enabled |
||||
if ( isset($php_errormsg) ) { |
||||
$errormsg .= " ($php_errormsg)"; |
||||
} |
||||
if ( MAGPIE_DEBUG ) { |
||||
trigger_error( $errormsg, $lvl); |
||||
} |
||||
else { |
||||
error_log( $errormsg, 0); |
||||
} |
||||
|
||||
$notices = E_USER_NOTICE|E_NOTICE; |
||||
if ( $lvl&$notices ) { |
||||
$this->WARNING = $errormsg; |
||||
} else { |
||||
$this->ERROR = $errormsg; |
||||
} |
||||
} |
||||
|
||||
|
||||
} // end class RSS |
||||
|
||||
function map_attrs($k, $v) { |
||||
return "$k=\"$v\""; |
||||
} |
||||
|
||||
// patch to support medieval versions of PHP4.1.x, |
||||
// courtesy, Ryan Currie, ryan@digibliss.com |
||||
|
||||
if (!function_exists('array_change_key_case')) { |
||||
define("CASE_UPPER",1); |
||||
define("CASE_LOWER",0); |
||||
|
||||
|
||||
function array_change_key_case($array,$case=CASE_LOWER) { |
||||
if ($case=CASE_LOWER) $cmd=strtolower; |
||||
elseif ($case=CASE_UPPER) $cmd=strtoupper; |
||||
foreach($array as $key=>$value) { |
||||
$output[$cmd($key)]=$value; |
||||
} |
||||
return $output; |
||||
} |
||||
|
||||
} |
||||
@ -1,65 +0,0 @@ |
||||
<?php |
||||
/** |
||||
* Project: MagpieRSS: a simple RSS integration tool |
||||
* File: rss_utils.inc, utility methods for working with RSS |
||||
* Author: Kellan Elliott-McCrea <kellan@protest.net> |
||||
* Version: 0.51 |
||||
* License: GPL |
||||
* |
||||
* The lastest version of MagpieRSS can be obtained from: |
||||
* http://magpierss.sourceforge.net |
||||
* |
||||
* For questions, help, comments, discussion, etc., please join the |
||||
* Magpie mailing list: |
||||
* magpierss-general@lists.sourceforge.net |
||||
* @package chamilo.include.rss |
||||
*/ |
||||
|
||||
|
||||
/** |
||||
* Function: parse_w3cdtf |
||||
* Purpose: parse a W3CDTF date into unix epoch |
||||
* |
||||
* NOTE: http://www.w3.org/TR/NOTE-datetime |
||||
*/ |
||||
function parse_w3cdtf ( $date_str ) { |
||||
|
||||
# regex to match wc3dtf |
||||
$pat = "/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/"; |
||||
|
||||
if ( preg_match( $pat, $date_str, $match ) ) { |
||||
list( $year, $month, $day, $hours, $minutes, $seconds) = |
||||
array( $match[1], $match[2], $match[3], $match[4], $match[5], $match[6]); |
||||
|
||||
# calc epoch for current date assuming GMT |
||||
$epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year); |
||||
|
||||
$offset = 0; |
||||
if ( $match[10] == 'Z' ) { |
||||
# zulu time, aka GMT |
||||
} |
||||
else { |
||||
list( $tz_mod, $tz_hour, $tz_min ) = |
||||
array( $match[8], $match[9], $match[10]); |
||||
|
||||
# zero out the variables |
||||
if ( ! $tz_hour ) { $tz_hour = 0; } |
||||
if ( ! $tz_min ) { $tz_min = 0; } |
||||
|
||||
$offset_secs = (($tz_hour*60)+$tz_min)*60; |
||||
|
||||
# is timezone ahead of GMT? then subtract offset |
||||
# |
||||
if ( $tz_mod == '+' ) { |
||||
$offset_secs = $offset_secs * -1; |
||||
} |
||||
|
||||
$offset = $offset_secs; |
||||
} |
||||
$epoch = $epoch + $offset; |
||||
return $epoch; |
||||
} |
||||
else { |
||||
return -1; |
||||
} |
||||
} |
||||
Loading…
Reference in new issue