parent
634287c866
commit
90f59ee0c3
@ -1 +0,0 @@ |
|||||||
kellan <kellan@protest.net> |
|
||||||
@ -1,41 +0,0 @@ |
|||||||
Version 0.72 |
|
||||||
----------- |
|
||||||
- fix security exploit: http://www.sec-consult.com/216.html |
|
||||||
|
|
||||||
Version 0.7 |
|
||||||
----------- |
|
||||||
- support for input and output charset encoding |
|
||||||
based on the work in FoF, uses iconv or mbstring if available |
|
||||||
- |
|
||||||
|
|
||||||
Version 0.6 |
|
||||||
----------- |
|
||||||
- basic support for Atom syndication format |
|
||||||
including support for Atom content constructs |
|
||||||
- fixed support for private feeds (HTTP Auth and SSL) |
|
||||||
(thanks to silverorange.com for providing test feeds) |
|
||||||
- support for some broken webservers |
|
||||||
|
|
||||||
Version 0.52 |
|
||||||
----------- |
|
||||||
- support GZIP content negoiation |
|
||||||
- PHP 4.3.2 support |
|
||||||
|
|
||||||
Version 0.4 |
|
||||||
----------- |
|
||||||
- improved error handling, better access for script authors |
|
||||||
- included example scripts of working with MagpieRSS |
|
||||||
- new Smarty plugin for RSS date parsing |
|
||||||
|
|
||||||
Version 0.3 |
|
||||||
----------- |
|
||||||
- added support for conditional gets (Last-Modified, ETag) |
|
||||||
- now use Snoopy to handle fetching RSS files |
|
||||||
|
|
||||||
Version 0.2 |
|
||||||
----------- |
|
||||||
- MAJOR CLEAN UP |
|
||||||
- removed kludgy $options array in favour of constants |
|
||||||
- phased out returning arrays |
|
||||||
- added better error handling |
|
||||||
- re-worked comments |
|
||||||
@ -1,405 +0,0 @@ |
|||||||
2005-10-28 14:11 kellan |
|
||||||
|
|
||||||
* extlib/Snoopy.class.inc: a better solution |
|
||||||
|
|
||||||
2005-10-28 11:51 kellan |
|
||||||
|
|
||||||
* extlib/Snoopy.class.inc: fix arbtriary code execution |
|
||||||
vulnerability when using curl+ssl |
|
||||||
|
|
||||||
http://www.sec-consult.com/216.html |
|
||||||
|
|
||||||
2005-03-08 10:46 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: fix bug w/ atom and date normalization |
|
||||||
|
|
||||||
2005-02-09 14:59 kellan |
|
||||||
|
|
||||||
* rss_fetch.inc: fix stale cache bug |
|
||||||
|
|
||||||
2005-01-28 02:27 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: support php w/o array_change_case |
|
||||||
|
|
||||||
2005-01-23 20:02 kellan |
|
||||||
|
|
||||||
* rss_fetch.inc: fix cache bug introduced by charset encoding |
|
||||||
|
|
||||||
2005-01-12 09:14 kellan |
|
||||||
|
|
||||||
* rss_cache.inc, rss_fetch.inc: more sanity checks for when things |
|
||||||
go wrong |
|
||||||
|
|
||||||
2004-12-12 13:44 kellan |
|
||||||
|
|
||||||
* INSTALL, rss_cache.inc, rss_utils.inc: detab |
|
||||||
|
|
||||||
2004-11-23 20:15 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: fix calling iconv instead of mb_convert_encoding |
|
||||||
|
|
||||||
2004-11-22 02:11 kellan |
|
||||||
|
|
||||||
* CHANGES, ChangeLog, rss_parse.inc, scripts/magpie_debug.php: last |
|
||||||
bit of tidying |
|
||||||
|
|
||||||
2004-11-22 01:45 kellan |
|
||||||
|
|
||||||
* rss_fetch.inc: detab, bump version |
|
||||||
|
|
||||||
2004-11-22 01:43 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: was filtering too much |
|
||||||
|
|
||||||
2004-11-22 00:03 kellan |
|
||||||
|
|
||||||
* rss_fetch.inc, rss_parse.inc: cache on $url . $output_encoding |
|
||||||
otherwise we can get munged output |
|
||||||
|
|
||||||
2004-11-21 23:52 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: add WARNING |
|
||||||
|
|
||||||
2004-11-21 23:45 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: don't set ERROR on notice or warning (rss_fetch |
|
||||||
dies on parse errors) |
|
||||||
|
|
||||||
2004-11-21 23:44 kellan |
|
||||||
|
|
||||||
* rss_fetch.inc: add encoding defines (fix timeout error reporting) |
|
||||||
|
|
||||||
2004-11-21 20:21 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: incorporate steve's patch |
|
||||||
|
|
||||||
2004-11-21 19:26 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: remove old debugging functions, totally |
|
||||||
arbitrarily. might break stuff. can't really explain why i'm |
|
||||||
doing this. |
|
||||||
|
|
||||||
2004-10-28 15:52 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: fixed '=' instead of '==' |
|
||||||
|
|
||||||
2004-10-26 00:48 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: chance epoch to timestamp to conform w/ php naming |
|
||||||
conventions |
|
||||||
|
|
||||||
2004-06-15 12:00 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: [no log message] |
|
||||||
|
|
||||||
2004-04-26 14:16 kellan |
|
||||||
|
|
||||||
* rss_fetch.inc: bump version |
|
||||||
|
|
||||||
2004-04-26 12:36 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: fix field doubling |
|
||||||
|
|
||||||
2004-04-24 17:47 kellan |
|
||||||
|
|
||||||
* CHANGES, ChangeLog: updated |
|
||||||
|
|
||||||
2004-04-24 17:35 kellan |
|
||||||
|
|
||||||
* rss_fetch.inc: bumped version |
|
||||||
|
|
||||||
2004-04-24 16:52 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: support arbitrary atom content constructs |
|
||||||
|
|
||||||
some refactoring |
|
||||||
|
|
||||||
2004-04-24 16:15 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: support summary content contstruct. add normalize |
|
||||||
function |
|
||||||
|
|
||||||
2004-03-27 16:29 kellan |
|
||||||
|
|
||||||
* extlib/Snoopy.class.inc: accept self-signed certs |
|
||||||
|
|
||||||
2004-03-27 12:53 kellan |
|
||||||
|
|
||||||
* extlib/Snoopy.class.inc: fixed SSL support * set status * set |
|
||||||
error on bad curl |
|
||||||
|
|
||||||
(also ripped out big chunks of dead weight (submit_form) which |
|
||||||
were getting in my way |
|
||||||
|
|
||||||
2004-01-25 02:25 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: make RSS 1.0's rdf:about available |
|
||||||
|
|
||||||
2004-01-25 02:07 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: clean up text, and line formats. add support item |
|
||||||
rdf:about |
|
||||||
|
|
||||||
2004-01-24 23:40 kellan |
|
||||||
|
|
||||||
* CHANGES, ChangeLog: update changes |
|
||||||
|
|
||||||
2004-01-24 23:37 kellan |
|
||||||
|
|
||||||
* rss_fetch.inc: updated version |
|
||||||
|
|
||||||
2004-01-24 23:35 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: whitespace |
|
||||||
|
|
||||||
2004-01-24 23:23 kellan |
|
||||||
|
|
||||||
* extlib/Snoopy.class.inc: support badly formatted http headers |
|
||||||
|
|
||||||
2004-01-24 23:20 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: added alpha atom parsing support |
|
||||||
|
|
||||||
2003-06-25 22:34 kellan |
|
||||||
|
|
||||||
* extlib/Snoopy.class.inc: fixed fread 4.3.2 compatibility problems |
|
||||||
|
|
||||||
2003-06-13 11:31 kellan |
|
||||||
|
|
||||||
* rss_fetch.inc: reset cache on 304 |
|
||||||
|
|
||||||
2003-06-12 21:37 kellan |
|
||||||
|
|
||||||
* rss_cache.inc, rss_fetch.inc, rss_parse.inc, rss_utils.inc: |
|
||||||
bumped up version numbers |
|
||||||
|
|
||||||
2003-06-12 21:32 kellan |
|
||||||
|
|
||||||
* htdocs/index.html: updated news |
|
||||||
|
|
||||||
2003-06-12 21:27 kellan |
|
||||||
|
|
||||||
* NEWS: a manual blog :) |
|
||||||
|
|
||||||
2003-06-12 21:22 kellan |
|
||||||
|
|
||||||
* htdocs/index.html: fully qualified img |
|
||||||
|
|
||||||
2003-06-12 21:20 kellan |
|
||||||
|
|
||||||
* htdocs/index.html: clean up. added badge. |
|
||||||
|
|
||||||
2003-06-12 21:04 kellan |
|
||||||
|
|
||||||
* rss_utils.inc: clean up regex |
|
||||||
|
|
||||||
2003-06-12 21:02 kellan |
|
||||||
|
|
||||||
* rss_cache.inc: suppress some warnings |
|
||||||
|
|
||||||
2003-05-30 20:44 kellan |
|
||||||
|
|
||||||
* extlib/Snoopy.class.inc: more comments, cleaned up notice |
|
||||||
|
|
||||||
2003-05-30 15:14 kellan |
|
||||||
|
|
||||||
* extlib/Snoopy.class.inc: don't advertise gzip support if the user |
|
||||||
hasn't built php with gzinflate support |
|
||||||
|
|
||||||
2003-05-12 22:32 kellan |
|
||||||
|
|
||||||
* ChangeLog: changes |
|
||||||
|
|
||||||
2003-05-12 22:11 kellan |
|
||||||
|
|
||||||
* htdocs/index.html: announce 0.5 |
|
||||||
|
|
||||||
2003-05-12 21:42 kellan |
|
||||||
|
|
||||||
* htdocs/index.html: change |
|
||||||
|
|
||||||
2003-05-12 21:39 kellan |
|
||||||
|
|
||||||
* rss_fetch.inc: use gzip |
|
||||||
|
|
||||||
2003-05-12 21:37 kellan |
|
||||||
|
|
||||||
* extlib/Snoopy.class.inc: added support gzip encoded content |
|
||||||
negoiation |
|
||||||
|
|
||||||
2003-05-12 21:32 kellan |
|
||||||
|
|
||||||
* rss_cache.inc, rss_fetch.inc, rss_parse.inc, rss_utils.inc: fixed |
|
||||||
typoes |
|
||||||
|
|
||||||
2003-04-26 21:44 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: fix minor typo |
|
||||||
|
|
||||||
2003-04-18 08:19 kellan |
|
||||||
|
|
||||||
* htdocs/cookbook.html: updated cookbook to show more code for |
|
||||||
limiting items |
|
||||||
|
|
||||||
2003-03-03 16:02 kellan |
|
||||||
|
|
||||||
* rss_parse.inc, scripts/magpie_slashbox.php: committed (or |
|
||||||
adpated) patch from Nicola (www.technick.com) to quell 'Undefined |
|
||||||
Indexes' notices |
|
||||||
|
|
||||||
2003-03-03 15:59 kellan |
|
||||||
|
|
||||||
* rss_fetch.inc: commited patch from nicola (www.technick.com) to |
|
||||||
quell 'undefined indexes' notices. |
|
||||||
|
|
||||||
* Magpie now automatically includes its version in the |
|
||||||
user-agent, & whether cacheing is turned on. |
|
||||||
|
|
||||||
2003-02-12 01:22 kellan |
|
||||||
|
|
||||||
* CHANGES, ChangeLog: ChangeLog now auto-generated by cvs2cl |
|
||||||
|
|
||||||
2003-02-12 00:21 kellan |
|
||||||
|
|
||||||
* rss_fetch.inc: better errors, hopefully stomped on pesky notices |
|
||||||
|
|
||||||
2003-02-12 00:19 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: check to see is xml is supported, if not die |
|
||||||
|
|
||||||
also throw better xml errors |
|
||||||
|
|
||||||
2003-02-12 00:18 kellan |
|
||||||
|
|
||||||
* rss_cache.inc: hopefully cleared up some notices that were being |
|
||||||
thrown into the log |
|
||||||
|
|
||||||
fixed a debug statement that was being called as an error |
|
||||||
|
|
||||||
2003-02-12 00:15 kellan |
|
||||||
|
|
||||||
* scripts/: magpie_simple.php, magpie_slashbox.php: moved |
|
||||||
magpie_simple to magpie_slashbox, and replaced it with a simpler |
|
||||||
demo. |
|
||||||
|
|
||||||
2003-02-12 00:02 kellan |
|
||||||
|
|
||||||
* INSTALL, README, TROUBLESHOOTING: Improved documentation. Better |
|
||||||
install instructions. |
|
||||||
|
|
||||||
TROUBLESHOOTING cover common installation and usage problems |
|
||||||
|
|
||||||
2003-01-22 14:40 kellan |
|
||||||
|
|
||||||
* htdocs/cookbook.html: added cookbook.html |
|
||||||
|
|
||||||
2003-01-21 23:47 kellan |
|
||||||
|
|
||||||
* cookbook: a magpie cookbook |
|
||||||
|
|
||||||
2003-01-20 10:09 kellan |
|
||||||
|
|
||||||
* ChangeLog: updated |
|
||||||
|
|
||||||
2003-01-20 09:23 kellan |
|
||||||
|
|
||||||
* scripts/simple_smarty.php: minor clean up |
|
||||||
|
|
||||||
2003-01-20 09:15 kellan |
|
||||||
|
|
||||||
* scripts/README: added smarty url |
|
||||||
|
|
||||||
2003-01-20 09:14 kellan |
|
||||||
|
|
||||||
* magpie_simple.php, htdocs/index.html, scripts/README, |
|
||||||
scripts/magpie_debug.php, scripts/magpie_simple.php, |
|
||||||
scripts/simple_smarty.php, |
|
||||||
scripts/smarty_plugin/modifier.rss_date_parse.php, |
|
||||||
scripts/templates/simple.smarty: Added scripts directory for |
|
||||||
examples on how to use MagpieRSS |
|
||||||
|
|
||||||
magpie_simple - is a simple example magpie_debug - spew all the |
|
||||||
information from a parsed RSS feed simple_smary - example of |
|
||||||
using magpie with Smarty template system |
|
||||||
smarty_plugin/modifier.rss_date_parse.php - support file for the |
|
||||||
smarty demo templates/simple.smary - template for the smarty demo |
|
||||||
|
|
||||||
2003-01-20 09:11 kellan |
|
||||||
|
|
||||||
* rss_fetch.inc, rss_parse.inc: changes to error handling to give |
|
||||||
script authors more access to magpie's errors. |
|
||||||
|
|
||||||
added method magpie_error() to retrieve global MAGPIE_ERROR |
|
||||||
variable for when fetch_rss() returns false |
|
||||||
|
|
||||||
2002-10-26 19:02 kellan |
|
||||||
|
|
||||||
* htdocs/index.html: putting the website under source control |
|
||||||
|
|
||||||
2002-10-26 18:43 kellan |
|
||||||
|
|
||||||
* AUTHORS, ChangeLog, INSTALL, README: some documentation to make |
|
||||||
it all look official :) |
|
||||||
|
|
||||||
2002-10-25 23:04 kellan |
|
||||||
|
|
||||||
* magpie_simple.php: quxx |
|
||||||
|
|
||||||
2002-10-25 23:04 kellan |
|
||||||
|
|
||||||
* rss_parse.inc: added support for textinput and image |
|
||||||
|
|
||||||
2002-10-25 19:23 kellan |
|
||||||
|
|
||||||
* magpie_simple.php, rss_cache.inc, rss_fetch.inc, rss_parse.inc, |
|
||||||
rss_utils.inc: switched to using Snoopy for fetching remote RSS |
|
||||||
files. |
|
||||||
|
|
||||||
added support for conditional gets |
|
||||||
|
|
||||||
2002-10-25 19:22 kellan |
|
||||||
|
|
||||||
* rss_cache.inc, rss_fetch.inc, rss_parse.inc, rss_utils.inc: |
|
||||||
Change comment style to slavishly imitate the phpinsider style |
|
||||||
found in Smarty and Snoopy :) |
|
||||||
|
|
||||||
2002-10-25 19:18 kellan |
|
||||||
|
|
||||||
* extlib/Snoopy.class.inc: added Snoopy in order to support |
|
||||||
conditional gets |
|
||||||
|
|
||||||
2002-10-23 23:19 kellan |
|
||||||
|
|
||||||
* magpie_simple.php, rss_cache.inc, rss_fetch.inc, rss_parse.inc: |
|
||||||
MAJOR CLEANUP! |
|
||||||
|
|
||||||
* rss_fetch got rid of the options array, replaced it with a more |
|
||||||
PHP-like solution of using defines. constants are setup, with |
|
||||||
defaults, in the function init() |
|
||||||
|
|
||||||
got rid of the idiom of passing back an array, its was awkward to |
|
||||||
deal with in PHP, and unusual (and consquently confusing to |
|
||||||
people). now i return true/false values, and try to setup error |
|
||||||
string where appropiate (rss_cache has the most complete example |
|
||||||
of this) |
|
||||||
|
|
||||||
change the logic for interacting with the cache |
|
||||||
|
|
||||||
* rss_cache major re-working of how error are handled. tried to |
|
||||||
make the code more resillient. the cache is now much more aware |
|
||||||
of MAX_AGE, where before this was being driven out of rss_fetch |
|
||||||
(which was silly) |
|
||||||
|
|
||||||
* rss_parse properly handles xml parse errors. used to sail |
|
||||||
along blithely unaware. |
|
||||||
|
|
||||||
2002-09-11 11:11 kellan |
|
||||||
|
|
||||||
* rss_cache.inc, rss_parse.inc, magpie_simple.php, rss_fetch.inc, |
|
||||||
rss_utils.inc: Initial revision |
|
||||||
|
|
||||||
2002-09-11 11:11 kellan |
|
||||||
|
|
||||||
* rss_cache.inc, rss_parse.inc, magpie_simple.php, rss_fetch.inc, |
|
||||||
rss_utils.inc: initial import |
|
||||||
|
|
||||||
@ -1,143 +0,0 @@ |
|||||||
REQUIREMENTS |
|
||||||
|
|
||||||
MapieRSS requires a recent PHP 4+ (developed with 4.2.0) |
|
||||||
with xml (expat) support. |
|
||||||
|
|
||||||
Optionally: |
|
||||||
* PHP5 with libxml2 support. |
|
||||||
* cURL for SSL support |
|
||||||
* iconv (preferred) or mb_string for expanded character set support |
|
||||||
|
|
||||||
QUICK START |
|
||||||
|
|
||||||
Magpie consists of 4 files (rss_fetch.inc, rss_parser.inc, rss_cache.inc, |
|
||||||
and rss_utils.inc), and the directory extlib (which contains a modified |
|
||||||
version of the Snoopy HTTP client) |
|
||||||
|
|
||||||
Copy these 5 resources to a directory named 'magpierss' in the same |
|
||||||
directory as your PHP script. |
|
||||||
|
|
||||||
At the top of your script add the following line: |
|
||||||
|
|
||||||
require_once('magpierss/rss_fetch.inc'); |
|
||||||
|
|
||||||
Now you can use the fetch_rss() method: |
|
||||||
|
|
||||||
$rss = fetch_rss($url); |
|
||||||
|
|
||||||
Done. That's it. See README for more details on using MagpieRSS. |
|
||||||
|
|
||||||
NEXT STEPS |
|
||||||
|
|
||||||
Important: you'll probably want to get the cache directory working in |
|
||||||
order to speed up your application, and not abuse the webserver you're |
|
||||||
downloading the RSS from. |
|
||||||
|
|
||||||
Optionally you can install MagpieRSS in your PHP include path in order to |
|
||||||
make it available server wide. |
|
||||||
|
|
||||||
Lastly you might want to look through the constants in rss_fetch.inc see if |
|
||||||
there is anything you want to override (the defaults are pretty good) |
|
||||||
|
|
||||||
For more info, or if you have trouble, see TROUBLESHOOTING |
|
||||||
|
|
||||||
SETTING UP CACHING |
|
||||||
|
|
||||||
Magpie has built-in transparent caching. With caching Magpie will only |
|
||||||
fetch and parse RSS feeds when there is new content. Without this feature |
|
||||||
your pages will be slow, and the sites serving the RSS feed will be annoyed |
|
||||||
with you. |
|
||||||
|
|
||||||
** Simple and Automatic ** |
|
||||||
|
|
||||||
By default Magpie will try to create a cache directory named 'cache' in the |
|
||||||
same directory as your PHP script. |
|
||||||
|
|
||||||
** Creating a Local Cache Directory ** |
|
||||||
|
|
||||||
Often this will fail, because your webserver doesn't have sufficient |
|
||||||
permissions to create the directory. |
|
||||||
|
|
||||||
Exact instructions for how to do this will vary from install to install and |
|
||||||
platform to platform. The steps are: |
|
||||||
|
|
||||||
1. Make a directory named 'cache' |
|
||||||
2. Give the web server write access to that directory. |
|
||||||
|
|
||||||
An example of how to do this on Debian would be: |
|
||||||
|
|
||||||
1. mkdir /path/to/script/cache |
|
||||||
2. chgrp www-data /path/to/script/cache |
|
||||||
3. chmod 775 /path/to/script/cache |
|
||||||
|
|
||||||
On other Unixes you'll need to change 'www-data' to what ever user Apache |
|
||||||
runs as. (on MacOS X the user would be 'www') |
|
||||||
|
|
||||||
** Cache in /tmp ** |
|
||||||
|
|
||||||
Sometimes you won't be able to create a local cache directory. Some reasons |
|
||||||
might be: |
|
||||||
|
|
||||||
1. No shell account |
|
||||||
2. Insufficient permissions to change ownership of a directory |
|
||||||
3. Webserver runs as 'nobody' |
|
||||||
|
|
||||||
In these situations using a cache directory in /tmp can often be a good |
|
||||||
option. |
|
||||||
|
|
||||||
The drawback is /tmp is public, so anyone on the box can read the cache |
|
||||||
files. Usually RSS feeds are public information, so you'll have to decide |
|
||||||
how much of an issue that is. |
|
||||||
|
|
||||||
To use /tmp as your cache directory you need to add the following line to |
|
||||||
your script: |
|
||||||
|
|
||||||
define('MAGPIE_CACHE_DIR', '/tmp/magpie_cache'); |
|
||||||
|
|
||||||
** Global Cache ** |
|
||||||
|
|
||||||
If you have several applications using Magpie, you can create a single |
|
||||||
shared cache directory, either using the /tmp cache, or somewhere else on |
|
||||||
the system. |
|
||||||
|
|
||||||
The upside is that you'll distribute fetching and parsing feeds across |
|
||||||
several applications. |
|
||||||
|
|
||||||
INSTALLING MAGPIE SERVER WIDE |
|
||||||
|
|
||||||
Rather then following the Quickstart instructions which requires you to have |
|
||||||
a copy of Magpie per application, alternately you can place it in some |
|
||||||
shared location. |
|
||||||
|
|
||||||
** Adding Magpie to Your Include Path ** |
|
||||||
|
|
||||||
Copy the 5 resources (rss_fetch.inc, rss_parser.inc, rss_cache.inc, |
|
||||||
rss_utils.inc, and extlib) to a directory named 'magpierss' in your include |
|
||||||
path. Now any PHP file on your system can use Magpie with: |
|
||||||
|
|
||||||
require_once('magpierss/rss_fetch.inc'); |
|
||||||
|
|
||||||
Different installs have different include paths, and you'll have to figure |
|
||||||
out what your include_path is. |
|
||||||
|
|
||||||
From shell you can try: |
|
||||||
|
|
||||||
php -i | grep 'include_path' |
|
||||||
|
|
||||||
Alternatley you can create a phpinfo.php file with contains: |
|
||||||
|
|
||||||
<?php phpinfo(); ?> |
|
||||||
|
|
||||||
Debian's default is: |
|
||||||
|
|
||||||
/usr/share/php |
|
||||||
|
|
||||||
(though more idealogically pure location would be /usr/local/share/php) |
|
||||||
|
|
||||||
Apple's default include path is: |
|
||||||
|
|
||||||
/usr/lib/php |
|
||||||
|
|
||||||
While the Entropy PHP build seems to use: |
|
||||||
|
|
||||||
/usr/local/php/lib/php |
|
||||||
@ -1,53 +0,0 @@ |
|||||||
MagpieRSS News |
|
||||||
|
|
||||||
MAGPIERSS 0.51 RELEASED |
|
||||||
* important bugfix! |
|
||||||
* fix "silent failure" when PHP doesn't have zlib |
|
||||||
|
|
||||||
FEED ON FEEDS USES MAGPIE |
|
||||||
* web-based RSS aggregator built with Magpie |
|
||||||
* easy to install, easy to use. |
|
||||||
http://minutillo.com/steve/feedonfeeds/ |
|
||||||
|
|
||||||
MAGPIERSS 0.5 RELEASED |
|
||||||
* supports transparent HTTP gzip content negotiation for reduced bandwidth usage |
|
||||||
* quashed some undefined index notices |
|
||||||
|
|
||||||
MAGPIERSS 0.46 RELEASED |
|
||||||
* minor release, more error handling clean up |
|
||||||
* documentation fixes, simpler example |
|
||||||
* new trouble shooting guide for installation and usage problems |
|
||||||
http://magpierss.sourceforge.net/TROUBLESHOOTING |
|
||||||
|
|
||||||
MAGPIE NEWS AS RSS |
|
||||||
* releases, bug fixes, releated stories in RSS |
|
||||||
|
|
||||||
MAGPIERSS COOKBOOK: SIMPLE PHP RSS HOW TOS |
|
||||||
* answers some of the most frequently asked Magpie questions |
|
||||||
* feedback, suggestions, requests, recipes welcome |
|
||||||
http://magpierss.sourceforge.net/cookbook.html |
|
||||||
|
|
||||||
MAGPIERSS 0.4 RELEASED! |
|
||||||
* improved error handling, more flexibility for script authors, backwards compatible |
|
||||||
* new and better examples! including using MagpieRSS and Smarty |
|
||||||
* new Smarty plugin for RSS date parsing |
|
||||||
http://smarty.php.net |
|
||||||
|
|
||||||
INFINITE PENGUIN NOW SUPPORTS MAGPIE 0.3 |
|
||||||
* simple, sophisticated RSS viewer |
|
||||||
* includes auto-generated javascript ticker from RSS feed |
|
||||||
http://www.infinitepenguins.net/rss/ |
|
||||||
|
|
||||||
TRAUMWIND RELEASES REX BACKEND FOR MAGPIERSS |
|
||||||
* drop in support using regex based XML parser |
|
||||||
* parses improperly formed XML that chokes expat |
|
||||||
http://traumwind.de/blog/magpie/magpie_alike.php |
|
||||||
|
|
||||||
MAGPIERSS 0.3 RELEASED! |
|
||||||
* Support added for HTTP Conditional GETs. |
|
||||||
http://fishbowl.pastiche.org/archives/001132.html |
|
||||||
|
|
||||||
MAGPIERSS 0.2! |
|
||||||
* Major clean up of the code. Easier to use. |
|
||||||
* Simpler install on shared hosts. |
|
||||||
* Better documentation and comments. |
|
||||||
@ -1,48 +0,0 @@ |
|||||||
NAME |
|
||||||
|
|
||||||
MagpieRSS - a simple RSS integration tool |
|
||||||
|
|
||||||
SYNOPSIS |
|
||||||
|
|
||||||
require_once(rss_fetch.inc); |
|
||||||
$url = $_GET['url']; |
|
||||||
$rss = fetch_rss( $url ); |
|
||||||
|
|
||||||
echo "Channel Title: " . $rss->channel['title'] . "<p>"; |
|
||||||
echo "<ul>"; |
|
||||||
foreach ($rss->items as $item) { |
|
||||||
$href = $item['link']; |
|
||||||
$title = $item['title']; |
|
||||||
echo "<li><a href=$href>$title</a></li>"; |
|
||||||
} |
|
||||||
echo "</ul>"; |
|
||||||
|
|
||||||
DESCRIPTION |
|
||||||
|
|
||||||
MapieRSS is an XML-based RSS parser in PHP. It attempts to be "PHP-like", |
|
||||||
and simple to use. |
|
||||||
|
|
||||||
Some features include: |
|
||||||
|
|
||||||
* supports RSS 0.9 - 1.0, with limited RSS 2.0 support |
|
||||||
* supports namespaces, and modules, including mod_content and mod_event |
|
||||||
* open minded [1] |
|
||||||
* simple, functional interface, to object oriented backend parser |
|
||||||
* automatic caching of parsed RSS objects makes its easy to integrate |
|
||||||
* supports conditional GET with Last-Modified, and ETag |
|
||||||
* uses constants for easy override of default behaviour |
|
||||||
* heavily commented |
|
||||||
|
|
||||||
|
|
||||||
1. By open minded I mean Magpie will accept any tag it finds in good faith that |
|
||||||
it was supposed to be here. For strict validation, look elsewhere. |
|
||||||
|
|
||||||
|
|
||||||
GETTING STARTED |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
COPYRIGHT: |
|
||||||
Copyright(c) 2002 kellan@protest.net. All rights reserved. |
|
||||||
This software is released under the GNU General Public License. |
|
||||||
Please read the disclaimer at the top of the Snoopy.class.inc file. |
|
||||||
@ -1,152 +0,0 @@ |
|||||||
TROUBLESHOOTING |
|
||||||
|
|
||||||
|
|
||||||
Trouble Installing MagpieRSS: |
|
||||||
|
|
||||||
1. Fatal error: Failed opening required '/path/to/script/rss_fetch.inc' |
|
||||||
(include_path='.:/usr/local/lib/php:/usr/local/lib/php/pear') |
|
||||||
|
|
||||||
2. Cache couldn't make dir './cache'. |
|
||||||
|
|
||||||
3. Fatal error: Failed to load PHP's XML Extension. |
|
||||||
http://www.php.net/manual/en/ref.xml.php |
|
||||||
|
|
||||||
Trouble Using MagpieRSS |
|
||||||
|
|
||||||
4. Warning: MagpieRSS: Failed to fetch example.com/index.rdf. |
|
||||||
(HTTP Error: Invalid protocol "") |
|
||||||
|
|
||||||
5. Warning: MagpieRSS: Failed to parse RSS file. |
|
||||||
(not well-formed (invalid token) at line 19, column 98) |
|
||||||
|
|
||||||
6. Warning: MagpieRSS: Failed to fetch http://localhost/rss/features.1-0.rss. |
|
||||||
(HTTP Response: HTTP/1.1 404 Not Found) |
|
||||||
|
|
||||||
If you would rather provide a custom error, see the COOKBOOK |
|
||||||
(http://magpierss.sf.net/cookbook.html) recipe 2. |
|
||||||
|
|
||||||
************************************************************************* |
|
||||||
1. Fatal error: Failed opening required '/path/to/script/rss_fetch.inc' |
|
||||||
(include_path='.:/usr/local/lib/php:/usr/local/lib/php/pear') |
|
||||||
|
|
||||||
This could mean that: |
|
||||||
|
|
||||||
a) PHP can't find the MagpieRSS files. |
|
||||||
b) PHP found them the MagpieRSS files, but can't read them. |
|
||||||
|
|
||||||
a. Telling PHP where to look for MagpieRSS file. |
|
||||||
|
|
||||||
This might mean your PHP program can't find the MagpieRSS libraries. |
|
||||||
Magpie relies on 4 include files, rss_fetch.inc, rss_parse.inc, |
|
||||||
rss_cache.inc, rss_util.inc, and for normal use you'll need all 4 (see the |
|
||||||
cookbook for exceptions). |
|
||||||
|
|
||||||
This can be fixed by making sure the MagpieRSS files are in your include |
|
||||||
path. |
|
||||||
|
|
||||||
If you can edit your include path (for example your on a shared host) then |
|
||||||
you need to replace: |
|
||||||
|
|
||||||
require_once('rss_fetch.inc'); |
|
||||||
|
|
||||||
-with- |
|
||||||
|
|
||||||
define('MAGPIE_DIR', '/path/to/magpierss/'); |
|
||||||
require_once(MAGPIE_DIR.'rss_fetch.inc'); |
|
||||||
|
|
||||||
b. PHP can't read the MagpieRSS files |
|
||||||
|
|
||||||
All PHP libraries need to be readable by your webserver. |
|
||||||
|
|
||||||
On Unix you can accomplish this with: |
|
||||||
|
|
||||||
chmod 755 rss_fetch.inc rss_parse.inc rss_cache.inc rss_util.inc |
|
||||||
|
|
||||||
************************************************************************* |
|
||||||
2. Cache couldn't make dir './cache'. |
|
||||||
|
|
||||||
MagpieRSS caches the results of fetched and parsed RSS to reduce the load on |
|
||||||
both your server, and the remote server providing the RSS. It does this by |
|
||||||
writing files to a cache directory. |
|
||||||
|
|
||||||
This error means the webserver doesn't have write access to the current |
|
||||||
directory. |
|
||||||
|
|
||||||
a. Make a webserver writeable cache directory |
|
||||||
|
|
||||||
Find the webserver's group. (on my system it is 'www') |
|
||||||
|
|
||||||
mkdir ./cache |
|
||||||
chgrp www directory_name |
|
||||||
chmod g+w directory_name |
|
||||||
|
|
||||||
(this is the best, and desired solution) |
|
||||||
|
|
||||||
b. Tell MagpieRSS to create the cache directory somewhere the webserver can |
|
||||||
write to. |
|
||||||
|
|
||||||
define('MAGPIE_CACHE_DIR', '/tmp/magpierss'); |
|
||||||
|
|
||||||
(this is not a great solution, and might have security considerations) |
|
||||||
|
|
||||||
c. Turn off cacheing. |
|
||||||
|
|
||||||
Magpie can work fine with cacheing, but it will be slower, and you might |
|
||||||
become a nuiance to the RSS provider, but it is an option. |
|
||||||
|
|
||||||
define('MAGPIE_CACHE_ON', 0); |
|
||||||
|
|
||||||
d. And lastly, do NOT |
|
||||||
|
|
||||||
chmod 777 ./cache |
|
||||||
|
|
||||||
Any of the above solutions are better then this. |
|
||||||
|
|
||||||
NOTE: If none of this works for you, let me know. I've got root, and a |
|
||||||
custom compiled Apache on almost any box I ever touch, so I can be a little |
|
||||||
out of touch with reality. But I won't know that if I don't feedback. |
|
||||||
|
|
||||||
************************************************************************* 3. |
|
||||||
3. Fatal error: Failed to load PHP's XML Extension. |
|
||||||
http://www.php.net/manual/en/ref.xml.php |
|
||||||
|
|
||||||
-or- |
|
||||||
|
|
||||||
Fatal error: Failed to create an instance of PHP's XML parser. |
|
||||||
http://www.php.net/manual/en/ref.xml.php |
|
||||||
|
|
||||||
Make sure your PHP was built with --with-xml |
|
||||||
|
|
||||||
This has been turned on by default for several versions of PHP, but it might |
|
||||||
be turned off in your build. |
|
||||||
|
|
||||||
See php.net for details on building and configuring PHP. |
|
||||||
|
|
||||||
|
|
||||||
************************************************************************* |
|
||||||
4. Warning: MagpieRSS: Failed to fetch index.rdf. |
|
||||||
(HTTP Error: Invalid protocol "") |
|
||||||
|
|
||||||
You need to put http:// in front of your the URL to your RSS feed |
|
||||||
|
|
||||||
************************************************************************* |
|
||||||
5. Warning: MagpieRSS: Failed to parse RSS file. |
|
||||||
(not well-formed (invalid token) at line 19, column 98) |
|
||||||
|
|
||||||
There is a problem with the RSS feed you are trying to read. |
|
||||||
MagpieRSS is an XML parser, and therefore can't parse RSS feed with invalid |
|
||||||
characters. Some RSS parser are based on regular expressions, and can |
|
||||||
parse invalid RSS but they have their own problems. |
|
||||||
|
|
||||||
You could try contacting the author of the RSS feed, and pointing them to |
|
||||||
the online RSS validator at: |
|
||||||
|
|
||||||
http://feeds.archive.org/validator/ |
|
||||||
|
|
||||||
************************************************************************* |
|
||||||
6. Warning: MagpieRSS: Failed to fetch http://example.com/index.rdf |
|
||||||
(HTTP Response: HTTP/1.1 404 Not Found) |
|
||||||
|
|
||||||
Its a 404! The RSS file ain't there. |
|
||||||
|
|
||||||
|
|
||||||
@ -1,125 +0,0 @@ |
|||||||
MAGPIERSS RECIPES: Cooking with Corbies |
|
||||||
|
|
||||||
"Four and twenty blackbirds baked in a pie." |
|
||||||
|
|
||||||
1. LIMIT THE NUMBER OF HEADLINES(AKA ITEMS) RETURNED. |
|
||||||
|
|
||||||
PROBLEM: |
|
||||||
|
|
||||||
You want to display the 10 (or 3) most recent headlines, but the RSS feed |
|
||||||
contains 15. |
|
||||||
|
|
||||||
SOLUTION: |
|
||||||
|
|
||||||
$num_items = 10; |
|
||||||
$rss = fetch_rss($url); |
|
||||||
|
|
||||||
$items = array_slice($rss->items, 0, $num_items); |
|
||||||
|
|
||||||
DISCUSSION: |
|
||||||
|
|
||||||
Rather then trying to limit the number of items Magpie parses, a much simpler, |
|
||||||
and more flexible approach is to take a "slice" of the array of items. And |
|
||||||
array_slice() is smart enough to do the right thing if the feed has less items |
|
||||||
then $num_items. |
|
||||||
|
|
||||||
See: http://www.php.net/array_slice |
|
||||||
|
|
||||||
|
|
||||||
2. DISPLAY A CUSTOM ERROR MESSAGE IF SOMETHING GOES WRONG |
|
||||||
|
|
||||||
PROBLEM: |
|
||||||
|
|
||||||
You don't want Magpie's error messages showing up if something goes wrong. |
|
||||||
|
|
||||||
SOLUTION: |
|
||||||
|
|
||||||
# Magpie throws USER_WARNINGS only |
|
||||||
# so you can cloak these, by only showing ERRORs |
|
||||||
error_reporting(E_ERROR); |
|
||||||
|
|
||||||
# check the return value of fetch_rss() |
|
||||||
|
|
||||||
$rss = fetch_rss($url); |
|
||||||
|
|
||||||
if ( $rss ) { |
|
||||||
...display rss feed... |
|
||||||
} |
|
||||||
else { |
|
||||||
echo "An error occured! " . |
|
||||||
"Consider donating more $$$ for restoration of services." . |
|
||||||
"<br>Error Message: " . magpie_error(); |
|
||||||
} |
|
||||||
|
|
||||||
DISCUSSION: |
|
||||||
|
|
||||||
MagpieRSS triggers a warning in a number of circumstances. The 2 most common |
|
||||||
circumstances are: if the specified RSS file isn't properly formed (usually |
|
||||||
because it includes illegal HTML), or if Magpie can't download the remote RSS |
|
||||||
file, and there is no cached version. |
|
||||||
|
|
||||||
If you don't want your users to see these warnings change your error_reporting |
|
||||||
settings to only display ERRORs. Another option is to turn off display_error, |
|
||||||
so that WARNINGs, and NOTICEs still go to the error_log but not to the webpages. |
|
||||||
|
|
||||||
You can do this with: |
|
||||||
|
|
||||||
ini_set('display_errors', 0); |
|
||||||
|
|
||||||
See: http://www.php.net/error_reporting, |
|
||||||
http://www.php.net/ini_set, |
|
||||||
http://www.php.net/manual/en/ref.errorfunc.php |
|
||||||
|
|
||||||
3. GENERATE A NEW RSS FEED |
|
||||||
|
|
||||||
PROBLEM: |
|
||||||
|
|
||||||
Create an RSS feed for other people to use. |
|
||||||
|
|
||||||
SOLUTION: |
|
||||||
|
|
||||||
Use Useful Inc's RSSWriter (http://usefulinc.com/rss/rsswriter/) |
|
||||||
|
|
||||||
DISCUSSION: |
|
||||||
|
|
||||||
An example of turning a Magpie parsed RSS object back into an RSS file is forth |
|
||||||
coming. In the meantime RSSWriter has great documentation. |
|
||||||
|
|
||||||
4. DISPLAY HEADLINES MORE RECENT THEN X DATE |
|
||||||
|
|
||||||
PROBLEM: |
|
||||||
|
|
||||||
You only want to display headlines that were published on, or after a certain |
|
||||||
date. |
|
||||||
|
|
||||||
|
|
||||||
SOLUTION: |
|
||||||
|
|
||||||
require 'rss_utils.inc'; |
|
||||||
|
|
||||||
# get all headlines published today |
|
||||||
$today = getdate(); |
|
||||||
|
|
||||||
# today, 12AM |
|
||||||
$date = mktime(0,0,0,$today['mon'], $today['mday'], $today['year']); |
|
||||||
|
|
||||||
$rss = fetch_rss($url); |
|
||||||
|
|
||||||
foreach ( $rss->items as $item ) { |
|
||||||
$published = parse_w3cdtf($item['dc']['date']); |
|
||||||
if ( $published >= $date ) { |
|
||||||
echo "Title: " . $item['title']; |
|
||||||
echo "Published: " . date("h:i:s A", $published); |
|
||||||
echo "<p>"; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
DISCUSSION: |
|
||||||
|
|
||||||
This recipe only works for RSS 1.0 feeds that include the <dc:date> field. |
|
||||||
(which is very good RSS style) |
|
||||||
|
|
||||||
parse_w3cdtf is defined in rss_utils.inc, and parses RSS style dates into Unix |
|
||||||
epoch seconds. |
|
||||||
|
|
||||||
See: http://www.php.net/manual/en/ref.datetime.php |
|
||||||
@ -1,896 +0,0 @@ |
|||||||
<?php |
|
||||||
|
|
||||||
/************************************************* |
|
||||||
|
|
||||||
Snoopy - the PHP net client |
|
||||||
Author: Monte Ohrt <monte@ispi.net> |
|
||||||
Copyright (c): 1999-2000 ispi, all rights reserved |
|
||||||
Version: 1.0 |
|
||||||
|
|
||||||
* This library is free software; you can redistribute it and/or |
|
||||||
* modify it under the terms of the GNU Lesser General Public |
|
||||||
* License as published by the Free Software Foundation; either |
|
||||||
* version 2.1 of the License, or (at your option) any later version. |
|
||||||
* |
|
||||||
* This library is distributed in the hope that it will be useful, |
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
||||||
* Lesser General Public License for more details. |
|
||||||
* |
|
||||||
* You should have received a copy of the GNU Lesser General Public |
|
||||||
* License along with this library; if not, write to the Free Software |
|
||||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
||||||
|
|
||||||
You may contact the author of Snoopy by e-mail at: |
|
||||||
monte@ispi.net |
|
||||||
|
|
||||||
Or, write to: |
|
||||||
Monte Ohrt |
|
||||||
CTO, ispi |
|
||||||
237 S. 70th suite 220 |
|
||||||
Lincoln, NE 68510 |
|
||||||
|
|
||||||
The latest version of Snoopy can be obtained from: |
|
||||||
http://snoopy.sourceforge.com |
|
||||||
|
|
||||||
*************************************************/ |
|
||||||
|
|
||||||
class Snoopy { |
|
||||||
/**** Public variables ****/ |
|
||||||
|
|
||||||
/* user definable vars */ |
|
||||||
|
|
||||||
public $host = "www.php.net"; // host name we are connecting to |
|
||||||
public $port = 80; // port we are connecting to |
|
||||||
public $proxy_host = ""; // proxy host to use |
|
||||||
public $proxy_port = ""; // proxy port to use |
|
||||||
public $agent = "Snoopy v1.0"; // agent we masquerade as |
|
||||||
public $referer = ""; // referer info to pass |
|
||||||
public $cookies = array(); // array of cookies to pass |
|
||||||
// $cookies["username"]="joe"; |
|
||||||
public $rawheaders = array(); // array of raw headers to send |
|
||||||
// $rawheaders["Content-type"]="text/html"; |
|
||||||
|
|
||||||
public $maxredirs = 5; // http redirection depth maximum. 0 = disallow |
|
||||||
public $lastredirectaddr = ""; // contains address of last redirected address |
|
||||||
public $offsiteok = true; // allows redirection off-site |
|
||||||
public $maxframes = 0; // frame content depth maximum. 0 = disallow |
|
||||||
public $expandlinks = true; // expand links to fully qualified URLs. |
|
||||||
// this only applies to fetchlinks() |
|
||||||
// or submitlinks() |
|
||||||
public $passcookies = true; // pass set cookies back through redirects |
|
||||||
// NOTE: this currently does not respect |
|
||||||
// dates, domains or paths. |
|
||||||
|
|
||||||
public $user = ""; // user for http authentication |
|
||||||
public $pass = ""; // password for http authentication |
|
||||||
|
|
||||||
// http accept types |
|
||||||
public $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*"; |
|
||||||
|
|
||||||
public $results = ""; // where the content is put |
|
||||||
|
|
||||||
public $error = ""; // error messages sent here |
|
||||||
public $response_code = ""; // response code returned from server |
|
||||||
public $headers = array(); // headers returned from server sent here |
|
||||||
public $maxlength = 500000; // max return data length (body) |
|
||||||
public $read_timeout = 0; // timeout on read operations, in seconds |
|
||||||
// supported only since PHP 4 Beta 4 |
|
||||||
// set to 0 to disallow timeouts |
|
||||||
public $timed_out = false; // if a read operation timed out |
|
||||||
public $status = 0; // http request status |
|
||||||
|
|
||||||
public $curl_path = "/usr/bin/curl"; |
|
||||||
// Snoopy will use cURL for fetching |
|
||||||
// SSL content if a full system path to |
|
||||||
// the cURL binary is supplied here. |
|
||||||
// set to false if you do not have |
|
||||||
// cURL installed. See http://curl.haxx.se |
|
||||||
// for details on installing cURL. |
|
||||||
// Snoopy does *not* use the cURL |
|
||||||
// library functions built into php, |
|
||||||
// as these functions are not stable |
|
||||||
// as of this Snoopy release. |
|
||||||
|
|
||||||
// send Accept-encoding: gzip? |
|
||||||
public $use_gzip = true; |
|
||||||
|
|
||||||
/**** Private variables ****/ |
|
||||||
|
|
||||||
private $_maxlinelen = 4096; // max line length (headers) |
|
||||||
|
|
||||||
private $_httpmethod = "GET"; // default http request method |
|
||||||
private $_httpversion = "HTTP/1.0"; // default http request version |
|
||||||
private $_submit_method = "POST"; // default submit method |
|
||||||
private $_submit_type = "application/x-www-form-urlencoded"; // default submit type |
|
||||||
private $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type |
|
||||||
private $_redirectaddr = false; // will be set if page fetched is a redirect |
|
||||||
private $_redirectdepth = 0; // increments on an http redirect |
|
||||||
private $_frameurls = array(); // frame src urls |
|
||||||
private $_framedepth = 0; // increments on frame depth |
|
||||||
|
|
||||||
private $_isproxy = false; // set if using a proxy server |
|
||||||
private $_fp_timeout = 30; // timeout for socket connection |
|
||||||
|
|
||||||
/*======================================================================*\ |
|
||||||
Function: fetch |
|
||||||
Purpose: fetch the contents of a web page |
|
||||||
(and possibly other protocols in the |
|
||||||
future like ftp, nntp, gopher, etc.) |
|
||||||
Input: $URI the location of the page to fetch |
|
||||||
Output: $this->results the output text from the fetch |
|
||||||
\*======================================================================*/ |
|
||||||
|
|
||||||
public function fetch($URI) { |
|
||||||
//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS); |
|
||||||
$URI_PARTS = parse_url($URI); |
|
||||||
if (!empty($URI_PARTS["user"])) |
|
||||||
$this->user = $URI_PARTS["user"]; |
|
||||||
if (!empty($URI_PARTS["pass"])) |
|
||||||
$this->pass = $URI_PARTS["pass"]; |
|
||||||
if (!isset($fp)) { $fp = false; } |
|
||||||
switch ($URI_PARTS["scheme"]) { |
|
||||||
case "http": |
|
||||||
$this->host = $URI_PARTS["host"]; |
|
||||||
if(!empty($URI_PARTS["port"])) |
|
||||||
$this->port = $URI_PARTS["port"]; |
|
||||||
if($this->_connect($fp)) |
|
||||||
{ |
|
||||||
if($this->_isproxy) |
|
||||||
{ |
|
||||||
// using proxy, send entire URI |
|
||||||
$this->_httprequest($URI,$fp,$URI,$this->_httpmethod); |
|
||||||
} |
|
||||||
else |
|
||||||
{ |
|
||||||
$path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : ""); |
|
||||||
// no proxy, send only the path |
|
||||||
$this->_httprequest($path, $fp, $URI, $this->_httpmethod); |
|
||||||
} |
|
||||||
|
|
||||||
$this->_disconnect($fp); |
|
||||||
|
|
||||||
if($this->_redirectaddr) |
|
||||||
{ |
|
||||||
/* url was redirected, check if we've hit the max depth */ |
|
||||||
if($this->maxredirs > $this->_redirectdepth) |
|
||||||
{ |
|
||||||
// only follow redirect if it's on this site, or offsiteok is true |
|
||||||
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) |
|
||||||
{ |
|
||||||
/* follow the redirect */ |
|
||||||
$this->_redirectdepth++; |
|
||||||
$this->lastredirectaddr=$this->_redirectaddr; |
|
||||||
$this->fetch($this->_redirectaddr); |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) |
|
||||||
{ |
|
||||||
$frameurls = $this->_frameurls; |
|
||||||
$this->_frameurls = array(); |
|
||||||
|
|
||||||
while(list(,$frameurl) = each($frameurls)) |
|
||||||
{ |
|
||||||
if($this->_framedepth < $this->maxframes) |
|
||||||
{ |
|
||||||
$this->fetch($frameurl); |
|
||||||
$this->_framedepth++; |
|
||||||
} |
|
||||||
else |
|
||||||
break; |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
else |
|
||||||
{ |
|
||||||
return false; |
|
||||||
} |
|
||||||
return true; |
|
||||||
break; |
|
||||||
case "https": |
|
||||||
if(!$this->curl_path || (!is_executable($this->curl_path))) { |
|
||||||
$this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n"; |
|
||||||
return false; |
|
||||||
} |
|
||||||
$this->host = $URI_PARTS["host"]; |
|
||||||
if(!empty($URI_PARTS["port"])) |
|
||||||
$this->port = $URI_PARTS["port"]; |
|
||||||
if($this->_isproxy) |
|
||||||
{ |
|
||||||
// using proxy, send entire URI |
|
||||||
$this->_httpsrequest($URI,$URI,$this->_httpmethod); |
|
||||||
} |
|
||||||
else |
|
||||||
{ |
|
||||||
$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); |
|
||||||
// no proxy, send only the path |
|
||||||
$this->_httpsrequest($path, $URI, $this->_httpmethod); |
|
||||||
} |
|
||||||
|
|
||||||
if($this->_redirectaddr) |
|
||||||
{ |
|
||||||
/* url was redirected, check if we've hit the max depth */ |
|
||||||
if($this->maxredirs > $this->_redirectdepth) |
|
||||||
{ |
|
||||||
// only follow redirect if it's on this site, or offsiteok is true |
|
||||||
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) |
|
||||||
{ |
|
||||||
/* follow the redirect */ |
|
||||||
$this->_redirectdepth++; |
|
||||||
$this->lastredirectaddr=$this->_redirectaddr; |
|
||||||
$this->fetch($this->_redirectaddr); |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) |
|
||||||
{ |
|
||||||
$frameurls = $this->_frameurls; |
|
||||||
$this->_frameurls = array(); |
|
||||||
|
|
||||||
while(list(,$frameurl) = each($frameurls)) |
|
||||||
{ |
|
||||||
if($this->_framedepth < $this->maxframes) |
|
||||||
{ |
|
||||||
$this->fetch($frameurl); |
|
||||||
$this->_framedepth++; |
|
||||||
} |
|
||||||
else |
|
||||||
break; |
|
||||||
} |
|
||||||
} |
|
||||||
return true; |
|
||||||
break; |
|
||||||
default: |
|
||||||
// not a valid protocol |
|
||||||
$this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; |
|
||||||
return false; |
|
||||||
break; |
|
||||||
} |
|
||||||
return true; |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*======================================================================*\ |
|
||||||
Private functions |
|
||||||
\*======================================================================*/ |
|
||||||
|
|
||||||
|
|
||||||
/*======================================================================*\ |
|
||||||
Function: _striplinks |
|
||||||
Purpose: strip the hyperlinks from an html document |
|
||||||
Input: $document document to strip. |
|
||||||
Output: $match an array of the links |
|
||||||
\*======================================================================*/ |
|
||||||
|
|
||||||
private function _striplinks($document) |
|
||||||
{ |
|
||||||
preg_match_all("'<\s*a\s+.*href\s*=\s* # find <a href= |
|
||||||
([\"\'])? # find single or double quote |
|
||||||
(?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching |
|
||||||
# quote, otherwise match up to next space |
|
||||||
'isx",$document,$links); |
|
||||||
|
|
||||||
|
|
||||||
// catenate the non-empty matches from the conditional subpattern |
|
||||||
|
|
||||||
while(list($key,$val) = each($links[2])) |
|
||||||
{ |
|
||||||
if(!empty($val)) |
|
||||||
$match[] = $val; |
|
||||||
} |
|
||||||
|
|
||||||
while(list($key,$val) = each($links[3])) |
|
||||||
{ |
|
||||||
if(!empty($val)) |
|
||||||
$match[] = $val; |
|
||||||
} |
|
||||||
|
|
||||||
// return the links |
|
||||||
return $match; |
|
||||||
} |
|
||||||
|
|
||||||
/*======================================================================*\ |
|
||||||
Function: _stripform |
|
||||||
Purpose: strip the form elements from an html document |
|
||||||
Input: $document document to strip. |
|
||||||
Output: $match an array of the links |
|
||||||
\*======================================================================*/ |
|
||||||
|
|
||||||
private function _stripform($document) |
|
||||||
{ |
|
||||||
preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements); |
|
||||||
|
|
||||||
// catenate the matches |
|
||||||
$match = implode("\r\n",$elements[0]); |
|
||||||
|
|
||||||
// return the links |
|
||||||
return $match; |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*======================================================================*\ |
|
||||||
Function: _striptext |
|
||||||
Purpose: strip the text from an html document |
|
||||||
Input: $document document to strip. |
|
||||||
Output: $text the resulting text |
|
||||||
\*======================================================================*/ |
|
||||||
|
|
||||||
private function _striptext($document) |
|
||||||
{ |
|
||||||
|
|
||||||
// I didn't use preg eval (//e) since that is only available in PHP 4.0. |
|
||||||
// so, list your entities one by one here. I included some of the |
|
||||||
// more common ones. |
|
||||||
|
|
||||||
$search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript |
|
||||||
"'<[\/\!]*?[^<>]*?>'si", // strip out html tags |
|
||||||
"'([\r\n])[\s]+'", // strip out white space |
|
||||||
"'&(quote|#34);'i", // replace html entities |
|
||||||
"'&(amp|#38);'i", |
|
||||||
"'&(lt|#60);'i", |
|
||||||
"'&(gt|#62);'i", |
|
||||||
"'&(nbsp|#160);'i", |
|
||||||
"'&(iexcl|#161);'i", |
|
||||||
"'&(cent|#162);'i", |
|
||||||
"'&(pound|#163);'i", |
|
||||||
"'&(copy|#169);'i" |
|
||||||
); |
|
||||||
$replace = array( "", |
|
||||||
"", |
|
||||||
"\\1", |
|
||||||
"\"", |
|
||||||
"&", |
|
||||||
"<", |
|
||||||
">", |
|
||||||
" ", |
|
||||||
chr(161), |
|
||||||
chr(162), |
|
||||||
chr(163), |
|
||||||
chr(169)); |
|
||||||
|
|
||||||
$text = preg_replace($search,$replace,$document); |
|
||||||
|
|
||||||
return $text; |
|
||||||
} |
|
||||||
|
|
||||||
/*======================================================================*\ |
|
||||||
Function: _expandlinks |
|
||||||
Purpose: expand each link into a fully qualified URL |
|
||||||
Input: $links the links to qualify |
|
||||||
$URI the full URI to get the base from |
|
||||||
Output: $expandedLinks the expanded links |
|
||||||
\*======================================================================*/ |
|
||||||
|
|
||||||
private function _expandlinks($links,$URI) |
|
||||||
{ |
|
||||||
|
|
||||||
preg_match("/^[^\?]+/",$URI,$match); |
|
||||||
|
|
||||||
$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]); |
|
||||||
|
|
||||||
$search = array( "|^http://".preg_quote($this->host)."|i", |
|
||||||
"|^(?!http://)(\/)?(?!mailto:)|i", |
|
||||||
"|/\./|", |
|
||||||
"|/[^\/]+/\.\./|" |
|
||||||
); |
|
||||||
|
|
||||||
$replace = array( "", |
|
||||||
$match."/", |
|
||||||
"/", |
|
||||||
"/" |
|
||||||
); |
|
||||||
|
|
||||||
$expandedLinks = preg_replace($search,$replace,$links); |
|
||||||
|
|
||||||
return $expandedLinks; |
|
||||||
} |
|
||||||
|
|
||||||
/*======================================================================*\ |
|
||||||
Function: _httprequest |
|
||||||
Purpose: go get the http data from the server |
|
||||||
Input: $url the url to fetch |
|
||||||
$fp the current open file pointer |
|
||||||
$URI the full URI |
|
||||||
$body body contents to send if any (POST) |
|
||||||
Output: |
|
||||||
\*======================================================================*/ |
|
||||||
|
|
||||||
private function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="") |
|
||||||
{ |
|
||||||
if($this->passcookies && $this->_redirectaddr) |
|
||||||
$this->setcookies(); |
|
||||||
|
|
||||||
$URI_PARTS = parse_url($URI); |
|
||||||
if(empty($url)) |
|
||||||
$url = "/"; |
|
||||||
$headers = $http_method." ".$url." ".$this->_httpversion."\r\n"; |
|
||||||
if(!empty($this->agent)) |
|
||||||
$headers .= "User-Agent: ".$this->agent."\r\n"; |
|
||||||
if(!empty($this->host) && !isset($this->rawheaders['Host'])) |
|
||||||
$headers .= "Host: ".$this->host."\r\n"; |
|
||||||
if(!empty($this->accept)) |
|
||||||
$headers .= "Accept: ".$this->accept."\r\n"; |
|
||||||
|
|
||||||
if($this->use_gzip) { |
|
||||||
// make sure PHP was built with --with-zlib |
|
||||||
// and we can handle gzipp'ed data |
|
||||||
if ( function_exists(gzinflate) ) { |
|
||||||
$headers .= "Accept-encoding: gzip\r\n"; |
|
||||||
} |
|
||||||
else { |
|
||||||
trigger_error( |
|
||||||
"use_gzip is on, but PHP was built without zlib support.". |
|
||||||
" Requesting file(s) without gzip encoding.", |
|
||||||
E_USER_NOTICE); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
if(!empty($this->referer)) |
|
||||||
$headers .= "Referer: ".$this->referer."\r\n"; |
|
||||||
if(!empty($this->cookies)) |
|
||||||
{ |
|
||||||
if(!is_array($this->cookies)) |
|
||||||
$this->cookies = (array)$this->cookies; |
|
||||||
|
|
||||||
reset($this->cookies); |
|
||||||
if ( count($this->cookies) > 0 ) { |
|
||||||
$cookie_headers .= 'Cookie: '; |
|
||||||
foreach ( $this->cookies as $cookieKey => $cookieVal ) { |
|
||||||
$cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; "; |
|
||||||
} |
|
||||||
$headers .= substr($cookie_headers,0,-2) . "\r\n"; |
|
||||||
} |
|
||||||
} |
|
||||||
if(!empty($this->rawheaders)) |
|
||||||
{ |
|
||||||
if(!is_array($this->rawheaders)) |
|
||||||
$this->rawheaders = (array)$this->rawheaders; |
|
||||||
while(list($headerKey,$headerVal) = each($this->rawheaders)) |
|
||||||
$headers .= $headerKey.": ".$headerVal."\r\n"; |
|
||||||
} |
|
||||||
if(!empty($content_type)) { |
|
||||||
$headers .= "Content-type: $content_type"; |
|
||||||
if ($content_type == "multipart/form-data") |
|
||||||
$headers .= "; boundary=".$this->_mime_boundary; |
|
||||||
$headers .= "\r\n"; |
|
||||||
} |
|
||||||
if(!empty($body)) |
|
||||||
$headers .= "Content-length: ".strlen($body)."\r\n"; |
|
||||||
if(!empty($this->user) || !empty($this->pass)) |
|
||||||
$headers .= "Authorization: BASIC ".base64_encode($this->user.":".$this->pass)."\r\n"; |
|
||||||
|
|
||||||
$headers .= "\r\n"; |
|
||||||
|
|
||||||
// set the read timeout if needed |
|
||||||
if ($this->read_timeout > 0) |
|
||||||
socket_set_timeout($fp, $this->read_timeout); |
|
||||||
$this->timed_out = false; |
|
||||||
|
|
||||||
fwrite($fp,$headers.$body,strlen($headers.$body)); |
|
||||||
|
|
||||||
$this->_redirectaddr = false; |
|
||||||
unset($this->headers); |
|
||||||
|
|
||||||
// content was returned gzip encoded? |
|
||||||
$is_gzipped = false; |
|
||||||
|
|
||||||
while($currentHeader = fgets($fp,$this->_maxlinelen)) |
|
||||||
{ |
|
||||||
if ($this->read_timeout > 0 && $this->_check_timeout($fp)) |
|
||||||
{ |
|
||||||
$this->status=-100; |
|
||||||
return false; |
|
||||||
} |
|
||||||
|
|
||||||
// if($currentHeader == "\r\n") |
|
||||||
if(preg_match("/^\r?\n$/", $currentHeader) ) |
|
||||||
break; |
|
||||||
|
|
||||||
// if a header begins with Location: or URI:, set the redirect |
|
||||||
if(preg_match("/^(Location:|URI:)/i",$currentHeader)) |
|
||||||
{ |
|
||||||
// get URL portion of the redirect |
|
||||||
preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches); |
|
||||||
// look for :// in the Location header to see if hostname is included |
|
||||||
if(!preg_match("|\:\/\/|",$matches[2])) |
|
||||||
{ |
|
||||||
// no host in the path, so prepend |
|
||||||
$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; |
|
||||||
// eliminate double slash |
|
||||||
if(!preg_match("|^/|",$matches[2])) |
|
||||||
$this->_redirectaddr .= "/".$matches[2]; |
|
||||||
else |
|
||||||
$this->_redirectaddr .= $matches[2]; |
|
||||||
} |
|
||||||
else |
|
||||||
$this->_redirectaddr = $matches[2]; |
|
||||||
} |
|
||||||
|
|
||||||
if(preg_match("|^HTTP/|",$currentHeader)) |
|
||||||
{ |
|
||||||
if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status)) |
|
||||||
{ |
|
||||||
$this->status= $status[1]; |
|
||||||
} |
|
||||||
$this->response_code = $currentHeader; |
|
||||||
} |
|
||||||
|
|
||||||
if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) { |
|
||||||
$is_gzipped = true; |
|
||||||
} |
|
||||||
|
|
||||||
$this->headers[] = $currentHeader; |
|
||||||
} |
|
||||||
|
|
||||||
# $results = fread($fp, $this->maxlength); |
|
||||||
$results = ""; |
|
||||||
while ( $data = fread($fp, $this->maxlength) ) { |
|
||||||
$results .= $data; |
|
||||||
if ( |
|
||||||
strlen($results) > $this->maxlength ) { |
|
||||||
break; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
// gunzip |
|
||||||
if ( $is_gzipped ) { |
|
||||||
// per http://www.php.net/manual/en/function.gzencode.php |
|
||||||
$results = substr($results, 10); |
|
||||||
$results = gzinflate($results); |
|
||||||
} |
|
||||||
|
|
||||||
if ($this->read_timeout > 0 && $this->_check_timeout($fp)) |
|
||||||
{ |
|
||||||
$this->status=-100; |
|
||||||
return false; |
|
||||||
} |
|
||||||
|
|
||||||
// check if there is a a redirect meta tag |
|
||||||
|
|
||||||
if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) |
|
||||||
{ |
|
||||||
$this->_redirectaddr = $this->_expandlinks($match[1],$URI); |
|
||||||
} |
|
||||||
|
|
||||||
// have we hit our frame depth and is there frame src to fetch? |
|
||||||
if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match)) |
|
||||||
{ |
|
||||||
$this->results[] = $results; |
|
||||||
for($x=0; $x<count($match[1]); $x++) |
|
||||||
$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); |
|
||||||
} |
|
||||||
// have we already fetched framed content? |
|
||||||
elseif(is_array($this->results)) |
|
||||||
$this->results[] = $results; |
|
||||||
// no framed content |
|
||||||
else |
|
||||||
$this->results = $results; |
|
||||||
|
|
||||||
return true; |
|
||||||
} |
|
||||||
|
|
||||||
/*======================================================================*\ |
|
||||||
Function: _httpsrequest |
|
||||||
Purpose: go get the https data from the server using curl |
|
||||||
Input: $url the url to fetch |
|
||||||
$URI the full URI |
|
||||||
$body body contents to send if any (POST) |
|
||||||
Output: |
|
||||||
\*======================================================================*/ |
|
||||||
|
|
||||||
private function _httpsrequest($url,$URI,$http_method,$content_type="",$body="") |
|
||||||
{ |
|
||||||
if($this->passcookies && $this->_redirectaddr) |
|
||||||
$this->setcookies(); |
|
||||||
|
|
||||||
$headers = array(); |
|
||||||
|
|
||||||
$URI_PARTS = parse_url($URI); |
|
||||||
if(empty($url)) |
|
||||||
$url = "/"; |
|
||||||
// GET ... header not needed for curl |
|
||||||
//$headers[] = $http_method." ".$url." ".$this->_httpversion; |
|
||||||
if(!empty($this->agent)) |
|
||||||
$headers[] = "User-Agent: ".$this->agent; |
|
||||||
if(!empty($this->host)) |
|
||||||
$headers[] = "Host: ".$this->host; |
|
||||||
if(!empty($this->accept)) |
|
||||||
$headers[] = "Accept: ".$this->accept; |
|
||||||
if(!empty($this->referer)) |
|
||||||
$headers[] = "Referer: ".$this->referer; |
|
||||||
if(!empty($this->cookies)) |
|
||||||
{ |
|
||||||
if(!is_array($this->cookies)) |
|
||||||
$this->cookies = (array)$this->cookies; |
|
||||||
|
|
||||||
reset($this->cookies); |
|
||||||
if ( count($this->cookies) > 0 ) { |
|
||||||
$cookie_str = 'Cookie: '; |
|
||||||
foreach ( $this->cookies as $cookieKey => $cookieVal ) { |
|
||||||
$cookie_str .= $cookieKey."=".urlencode($cookieVal)."; "; |
|
||||||
} |
|
||||||
$headers[] = substr($cookie_str,0,-2); |
|
||||||
} |
|
||||||
} |
|
||||||
if(!empty($this->rawheaders)) |
|
||||||
{ |
|
||||||
if(!is_array($this->rawheaders)) |
|
||||||
$this->rawheaders = (array)$this->rawheaders; |
|
||||||
while(list($headerKey,$headerVal) = each($this->rawheaders)) |
|
||||||
$headers[] = $headerKey.": ".$headerVal; |
|
||||||
} |
|
||||||
if(!empty($content_type)) { |
|
||||||
if ($content_type == "multipart/form-data") |
|
||||||
$headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary; |
|
||||||
else |
|
||||||
$headers[] = "Content-type: $content_type"; |
|
||||||
} |
|
||||||
if(!empty($body)) |
|
||||||
$headers[] = "Content-length: ".strlen($body); |
|
||||||
if(!empty($this->user) || !empty($this->pass)) |
|
||||||
$headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass); |
|
||||||
|
|
||||||
for($curr_header = 0; $curr_header < count($headers); $curr_header++) { |
|
||||||
$cmdline_params .= " -H \"".$headers[$curr_header]."\""; |
|
||||||
} |
|
||||||
|
|
||||||
if(!empty($body)) |
|
||||||
$cmdline_params .= " -d \"$body\""; |
|
||||||
|
|
||||||
if($this->read_timeout > 0) |
|
||||||
$cmdline_params .= " -m ".$this->read_timeout; |
|
||||||
|
|
||||||
$headerfile = uniqid(time()); |
|
||||||
|
|
||||||
# accept self-signed certs |
|
||||||
$cmdline_params .= " -k"; |
|
||||||
$results = array(); |
|
||||||
$return = 0; |
|
||||||
exec($this->curl_path." -D \"/tmp/$headerfile\"".escapeshellcmd($cmdline_params)." ".escapeshellcmd($URI),$results,$return); |
|
||||||
|
|
||||||
if($return) |
|
||||||
{ |
|
||||||
$this->error = "Error: cURL could not retrieve the document, error $return."; |
|
||||||
return false; |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
$results = implode("\r\n",$results); |
|
||||||
|
|
||||||
$result_headers = file("/tmp/$headerfile"); |
|
||||||
|
|
||||||
$this->_redirectaddr = false; |
|
||||||
unset($this->headers); |
|
||||||
|
|
||||||
for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++) |
|
||||||
{ |
|
||||||
|
|
||||||
// if a header begins with Location: or URI:, set the redirect |
|
||||||
if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader])) |
|
||||||
{ |
|
||||||
// get URL portion of the redirect |
|
||||||
preg_match("/^(Location: |URI:)(.*)/",chop($result_headers[$currentHeader]),$matches); |
|
||||||
// look for :// in the Location header to see if hostname is included |
|
||||||
if(!preg_match("|\:\/\/|",$matches[2])) |
|
||||||
{ |
|
||||||
// no host in the path, so prepend |
|
||||||
$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; |
|
||||||
// eliminate double slash |
|
||||||
if(!preg_match("|^/|",$matches[2])) |
|
||||||
$this->_redirectaddr .= "/".$matches[2]; |
|
||||||
else |
|
||||||
$this->_redirectaddr .= $matches[2]; |
|
||||||
} |
|
||||||
else |
|
||||||
$this->_redirectaddr = $matches[2]; |
|
||||||
} |
|
||||||
|
|
||||||
if(preg_match("|^HTTP/|",$result_headers[$currentHeader])) |
|
||||||
{ |
|
||||||
$this->response_code = $result_headers[$currentHeader]; |
|
||||||
if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$this->response_code, $match)) |
|
||||||
{ |
|
||||||
$this->status= $match[1]; |
|
||||||
} |
|
||||||
} |
|
||||||
$this->headers[] = $result_headers[$currentHeader]; |
|
||||||
} |
|
||||||
|
|
||||||
// check if there is a a redirect meta tag |
|
||||||
|
|
||||||
if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) |
|
||||||
{ |
|
||||||
$this->_redirectaddr = $this->_expandlinks($match[1],$URI); |
|
||||||
} |
|
||||||
|
|
||||||
// have we hit our frame depth and is there frame src to fetch? |
|
||||||
if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match)) |
|
||||||
{ |
|
||||||
$this->results[] = $results; |
|
||||||
for($x=0; $x<count($match[1]); $x++) |
|
||||||
$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); |
|
||||||
} |
|
||||||
// have we already fetched framed content? |
|
||||||
elseif(is_array($this->results)) |
|
||||||
$this->results[] = $results; |
|
||||||
// no framed content |
|
||||||
else |
|
||||||
$this->results = $results; |
|
||||||
|
|
||||||
unlink("/tmp/$headerfile"); |
|
||||||
|
|
||||||
return true; |
|
||||||
} |
|
||||||
|
|
||||||
/*======================================================================*\ |
|
||||||
Function: setcookies() |
|
||||||
Purpose: set cookies for a redirection |
|
||||||
\*======================================================================*/ |
|
||||||
|
|
||||||
public function setcookies() |
|
||||||
{ |
|
||||||
for($x=0; $x<count($this->headers); $x++) |
|
||||||
{ |
|
||||||
if(preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i", $this->headers[$x],$match)) |
|
||||||
$this->cookies[$match[1]] = $match[2]; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
/*======================================================================*\ |
|
||||||
Function: _check_timeout |
|
||||||
Purpose: checks whether timeout has occurred |
|
||||||
Input: $fp file pointer |
|
||||||
\*======================================================================*/ |
|
||||||
|
|
||||||
private function _check_timeout($fp) |
|
||||||
{ |
|
||||||
if ($this->read_timeout > 0) { |
|
||||||
$fp_status = socket_get_status($fp); |
|
||||||
if ($fp_status["timed_out"]) { |
|
||||||
$this->timed_out = true; |
|
||||||
return true; |
|
||||||
} |
|
||||||
} |
|
||||||
return false; |
|
||||||
} |
|
||||||
|
|
||||||
/*======================================================================*\ |
|
||||||
Function: _connect |
|
||||||
Purpose: make a socket connection |
|
||||||
Input: $fp file pointer |
|
||||||
\*======================================================================*/ |
|
||||||
|
|
||||||
private function _connect(&$fp) |
|
||||||
{ |
|
||||||
if(!empty($this->proxy_host) && !empty($this->proxy_port)) |
|
||||||
{ |
|
||||||
$this->_isproxy = true; |
|
||||||
$host = $this->proxy_host; |
|
||||||
$port = $this->proxy_port; |
|
||||||
} |
|
||||||
else |
|
||||||
{ |
|
||||||
$host = $this->host; |
|
||||||
$port = $this->port; |
|
||||||
} |
|
||||||
|
|
||||||
$this->status = 0; |
|
||||||
|
|
||||||
if($fp = fsockopen( |
|
||||||
$host, |
|
||||||
$port, |
|
||||||
$errno, |
|
||||||
$errstr, |
|
||||||
$this->_fp_timeout |
|
||||||
)) |
|
||||||
{ |
|
||||||
// socket connection succeeded |
|
||||||
|
|
||||||
return true; |
|
||||||
} |
|
||||||
else |
|
||||||
{ |
|
||||||
// socket connection failed |
|
||||||
$this->status = $errno; |
|
||||||
switch($errno) |
|
||||||
{ |
|
||||||
case -3: |
|
||||||
$this->error="socket creation failed (-3)"; |
|
||||||
case -4: |
|
||||||
$this->error="dns lookup failure (-4)"; |
|
||||||
case -5: |
|
||||||
$this->error="connection refused or timed out (-5)"; |
|
||||||
default: |
|
||||||
$this->error="connection failed (".$errno.")"; |
|
||||||
} |
|
||||||
return false; |
|
||||||
} |
|
||||||
} |
|
||||||
/*======================================================================*\ |
|
||||||
Function: _disconnect |
|
||||||
Purpose: disconnect a socket connection |
|
||||||
Input: $fp file pointer |
|
||||||
\*======================================================================*/ |
|
||||||
|
|
||||||
private function _disconnect($fp) |
|
||||||
{ |
|
||||||
return(fclose($fp)); |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
/*======================================================================*\ |
|
||||||
Function: _prepare_post_body |
|
||||||
Purpose: Prepare post body according to encoding type |
|
||||||
Input: $formvars - form variables |
|
||||||
$formfiles - form upload files |
|
||||||
Output: post body |
|
||||||
\*======================================================================*/ |
|
||||||
|
|
||||||
private function _prepare_post_body($formvars, $formfiles) |
|
||||||
{ |
|
||||||
settype($formvars, "array"); |
|
||||||
settype($formfiles, "array"); |
|
||||||
|
|
||||||
if (count($formvars) == 0 && count($formfiles) == 0) |
|
||||||
return; |
|
||||||
|
|
||||||
switch ($this->_submit_type) { |
|
||||||
case "application/x-www-form-urlencoded": |
|
||||||
reset($formvars); |
|
||||||
while(list($key,$val) = each($formvars)) { |
|
||||||
if (is_array($val) || is_object($val)) { |
|
||||||
while (list($cur_key, $cur_val) = each($val)) { |
|
||||||
$postdata .= urlencode($key)."[]=".urlencode($cur_val)."&"; |
|
||||||
} |
|
||||||
} else |
|
||||||
$postdata .= urlencode($key)."=".urlencode($val)."&"; |
|
||||||
} |
|
||||||
break; |
|
||||||
|
|
||||||
case "multipart/form-data": |
|
||||||
$this->_mime_boundary = "Snoopy".md5(uniqid(microtime())); |
|
||||||
|
|
||||||
reset($formvars); |
|
||||||
while(list($key,$val) = each($formvars)) { |
|
||||||
if (is_array($val) || is_object($val)) { |
|
||||||
while (list($cur_key, $cur_val) = each($val)) { |
|
||||||
$postdata .= "--".$this->_mime_boundary."\r\n"; |
|
||||||
$postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n"; |
|
||||||
$postdata .= "$cur_val\r\n"; |
|
||||||
} |
|
||||||
} else { |
|
||||||
$postdata .= "--".$this->_mime_boundary."\r\n"; |
|
||||||
$postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n"; |
|
||||||
$postdata .= "$val\r\n"; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
reset($formfiles); |
|
||||||
while (list($field_name, $file_names) = each($formfiles)) { |
|
||||||
settype($file_names, "array"); |
|
||||||
while (list(, $file_name) = each($file_names)) { |
|
||||||
if (!is_readable($file_name)) continue; |
|
||||||
|
|
||||||
$fp = fopen($file_name, "r"); |
|
||||||
$file_content = fread($fp, filesize($file_name)); |
|
||||||
fclose($fp); |
|
||||||
$base_name = basename($file_name); |
|
||||||
|
|
||||||
$postdata .= "--".$this->_mime_boundary."\r\n"; |
|
||||||
$postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n"; |
|
||||||
$postdata .= "$file_content\r\n"; |
|
||||||
} |
|
||||||
} |
|
||||||
$postdata .= "--".$this->_mime_boundary."--\r\n"; |
|
||||||
break; |
|
||||||
} |
|
||||||
|
|
||||||
return $postdata; |
|
||||||
} |
|
||||||
} |
|
||||||
@ -1,6 +0,0 @@ |
|||||||
<html> |
|
||||||
<head> |
|
||||||
</head> |
|
||||||
<body> |
|
||||||
</body> |
|
||||||
</html> |
|
||||||
@ -1,6 +0,0 @@ |
|||||||
<html> |
|
||||||
<head> |
|
||||||
</head> |
|
||||||
<body> |
|
||||||
</body> |
|
||||||
</html> |
|
||||||
@ -1,200 +0,0 @@ |
|||||||
<?php |
|
||||||
/** |
|
||||||
* Project: MagpieRSS: a simple RSS integration tool |
|
||||||
* File: rss_cache.inc, a simple, rolling(no GC), cache |
|
||||||
* for RSS objects, keyed on URL. |
|
||||||
* Author: Kellan Elliott-McCrea <kellan@protest.net> |
|
||||||
* Version: 0.51 |
|
||||||
* License: GPL |
|
||||||
* |
|
||||||
* The lastest version of MagpieRSS can be obtained from: |
|
||||||
* http://magpierss.sourceforge.net |
|
||||||
* |
|
||||||
* For questions, help, comments, discussion, etc., please join the |
|
||||||
* Magpie mailing list: |
|
||||||
* http://lists.sourceforge.net/lists/listinfo/magpierss-general |
|
||||||
* @package chamilo.include.rss |
|
||||||
*/ |
|
||||||
/** |
|
||||||
* @package chamilo.include.rss |
|
||||||
*/ |
|
||||||
class RSSCache { |
|
||||||
public $BASE_CACHE = './cache'; // where the cache files are stored |
|
||||||
public $MAX_AGE = 3600; // when are files stale, default one hour |
|
||||||
public $ERROR = ""; // accumulate error messages |
|
||||||
|
|
||||||
public function RSSCache ($base='', $age='') { |
|
||||||
if ( $base ) { |
|
||||||
$this->BASE_CACHE = $base; |
|
||||||
} |
|
||||||
if ( $age ) { |
|
||||||
$this->MAX_AGE = $age; |
|
||||||
} |
|
||||||
|
|
||||||
// attempt to make the cache directory |
|
||||||
if ( ! file_exists( $this->BASE_CACHE ) ) { |
|
||||||
$status = @mkdir( $this->BASE_CACHE, 0755 ); |
|
||||||
|
|
||||||
// if make failed |
|
||||||
if ( ! $status ) { |
|
||||||
$this->error( |
|
||||||
"Cache couldn't make dir '" . $this->BASE_CACHE . "'." |
|
||||||
); |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/*=======================================================================*\ |
|
||||||
Function: set |
|
||||||
Purpose: add an item to the cache, keyed on url |
|
||||||
Input: url from wich the rss file was fetched |
|
||||||
Output: true on sucess |
|
||||||
\*=======================================================================*/ |
|
||||||
public function set ($url, $rss) { |
|
||||||
$this->ERROR = ""; |
|
||||||
$cache_file = $this->file_name( $url ); |
|
||||||
$fp = @fopen( $cache_file, 'w' ); |
|
||||||
|
|
||||||
if ( ! $fp ) { |
|
||||||
$this->error( |
|
||||||
"Cache unable to open file for writing: $cache_file" |
|
||||||
); |
|
||||||
return 0; |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
$data = $this->serialize( $rss ); |
|
||||||
fwrite( $fp, $data ); |
|
||||||
fclose( $fp ); |
|
||||||
|
|
||||||
return $cache_file; |
|
||||||
} |
|
||||||
|
|
||||||
/*=======================================================================*\ |
|
||||||
Function: get |
|
||||||
Purpose: fetch an item from the cache |
|
||||||
Input: url from wich the rss file was fetched |
|
||||||
Output: cached object on HIT, false on MISS |
|
||||||
\*=======================================================================*/ |
|
||||||
public function get ($url) { |
|
||||||
$this->ERROR = ""; |
|
||||||
$cache_file = $this->file_name( $url ); |
|
||||||
|
|
||||||
if ( ! file_exists( $cache_file ) ) { |
|
||||||
$this->debug( |
|
||||||
"Cache doesn't contain: $url (cache file: $cache_file)" |
|
||||||
); |
|
||||||
return 0; |
|
||||||
} |
|
||||||
|
|
||||||
$fp = @fopen($cache_file, 'r'); |
|
||||||
if ( ! $fp ) { |
|
||||||
$this->error( |
|
||||||
"Failed to open cache file for reading: $cache_file" |
|
||||||
); |
|
||||||
return 0; |
|
||||||
} |
|
||||||
|
|
||||||
if ($filesize = filesize($cache_file) ) { |
|
||||||
$data = fread( $fp, filesize($cache_file) ); |
|
||||||
$rss = $this->unserialize( $data ); |
|
||||||
|
|
||||||
return $rss; |
|
||||||
} |
|
||||||
|
|
||||||
return 0; |
|
||||||
} |
|
||||||
|
|
||||||
/*=======================================================================*\ |
|
||||||
Function: check_cache |
|
||||||
Purpose: check a url for membership in the cache |
|
||||||
and whether the object is older then MAX_AGE (ie. STALE) |
|
||||||
Input: url from wich the rss file was fetched |
|
||||||
Output: cached object on HIT, false on MISS |
|
||||||
\*=======================================================================*/ |
|
||||||
public function check_cache ( $url ) { |
|
||||||
$this->ERROR = ""; |
|
||||||
$filename = $this->file_name( $url ); |
|
||||||
|
|
||||||
if ( file_exists( $filename ) ) { |
|
||||||
// find how long ago the file was added to the cache |
|
||||||
// and whether that is longer then MAX_AGE |
|
||||||
$mtime = filemtime( $filename ); |
|
||||||
$age = time() - $mtime; |
|
||||||
if ( $this->MAX_AGE > $age ) { |
|
||||||
// object exists and is current |
|
||||||
return 'HIT'; |
|
||||||
} |
|
||||||
else { |
|
||||||
// object exists but is old |
|
||||||
return 'STALE'; |
|
||||||
} |
|
||||||
} |
|
||||||
else { |
|
||||||
// object does not exist |
|
||||||
return 'MISS'; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
public function cache_age( $url ) { |
|
||||||
$filename = $this->file_name( $url); |
|
||||||
if ( file_exists( $filename ) ) { |
|
||||||
$mtime = filemtime( $filename ); |
|
||||||
$age = time() - $mtime; |
|
||||||
return $age; |
|
||||||
} |
|
||||||
else { |
|
||||||
return -1; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/*=======================================================================*\ |
|
||||||
Function: serialize |
|
||||||
\*=======================================================================*/ |
|
||||||
public function serialize ( $rss ) { |
|
||||||
return serialize( $rss ); |
|
||||||
} |
|
||||||
|
|
||||||
/*=======================================================================*\ |
|
||||||
Function: unserialize |
|
||||||
\*=======================================================================*/ |
|
||||||
public function unserialize ( $data ) { |
|
||||||
return unserialize( $data ); |
|
||||||
} |
|
||||||
|
|
||||||
/*=======================================================================*\ |
|
||||||
Function: file_name |
|
||||||
Purpose: map url to location in cache |
|
||||||
Input: url from wich the rss file was fetched |
|
||||||
Output: a file name |
|
||||||
\*=======================================================================*/ |
|
||||||
public function file_name ($url) { |
|
||||||
$filename = md5( $url ); |
|
||||||
return join( DIRECTORY_SEPARATOR, array( $this->BASE_CACHE, $filename ) ); |
|
||||||
} |
|
||||||
|
|
||||||
/*=======================================================================*\ |
|
||||||
Function: error |
|
||||||
Purpose: register error |
|
||||||
\*=======================================================================*/ |
|
||||||
public function error ($errormsg, $lvl=E_USER_WARNING) { |
|
||||||
// append PHP's error message if track_errors enabled |
|
||||||
if ( isset($php_errormsg) ) { |
|
||||||
$errormsg .= " ($php_errormsg)"; |
|
||||||
} |
|
||||||
$this->ERROR = $errormsg; |
|
||||||
if ( MAGPIE_DEBUG ) { |
|
||||||
trigger_error( $errormsg, $lvl); |
|
||||||
} |
|
||||||
else { |
|
||||||
error_log( $errormsg, 0); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
public function debug ($debugmsg, $lvl=E_USER_NOTICE) { |
|
||||||
if ( MAGPIE_DEBUG ) { |
|
||||||
$this->error("MagpieRSS [debug] $debugmsg", $lvl); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
} |
|
||||||
@ -1,459 +0,0 @@ |
|||||||
<?php |
|
||||||
/** |
|
||||||
* Project: MagpieRSS: a simple RSS integration tool |
|
||||||
* File: rss_fetch.inc, a simple functional interface |
|
||||||
to fetching and parsing RSS files, via the |
|
||||||
function fetch_rss() |
|
||||||
* Author: Kellan Elliott-McCrea <kellan@protest.net> |
|
||||||
* License: GPL |
|
||||||
* |
|
||||||
* The lastest version of MagpieRSS can be obtained from: |
|
||||||
* http://magpierss.sourceforge.net |
|
||||||
* |
|
||||||
* For questions, help, comments, discussion, etc., please join the |
|
||||||
* Magpie mailing list: |
|
||||||
* magpierss-general@lists.sourceforge.net |
|
||||||
* @package chamilo.include.rss |
|
||||||
*/ |
|
||||||
/** |
|
||||||
* Code |
|
||||||
*/ |
|
||||||
// Setup MAGPIE_DIR for use on hosts that don't include |
|
||||||
// the current path in include_path. |
|
||||||
// with thanks to rajiv and smarty |
|
||||||
if (!defined('DIR_SEP')) { |
|
||||||
define('DIR_SEP', DIRECTORY_SEPARATOR); |
|
||||||
} |
|
||||||
|
|
||||||
if (!defined('MAGPIE_DIR')) { |
|
||||||
define('MAGPIE_DIR', dirname(__FILE__) . DIR_SEP); |
|
||||||
} |
|
||||||
|
|
||||||
require_once( MAGPIE_DIR . 'rss_parse.inc' ); |
|
||||||
require_once( MAGPIE_DIR . 'rss_cache.inc' ); |
|
||||||
|
|
||||||
// for including 3rd party libraries |
|
||||||
define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP); |
|
||||||
require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc'); |
|
||||||
define('MAGPIE_CACHE_DIR', api_get_path(SYS_ARCHIVE_PATH)); |
|
||||||
|
|
||||||
/* |
|
||||||
* CONSTANTS - redefine these in your script to change the |
|
||||||
* behaviour of fetch_rss() currently, most options effect the cache |
|
||||||
* |
|
||||||
* MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects? |
|
||||||
* For me a built in cache was essential to creating a "PHP-like" |
|
||||||
* feel to Magpie, see rss_cache.inc for rationale |
|
||||||
* |
|
||||||
* |
|
||||||
* MAGPIE_CACHE_DIR - Where should Magpie cache parsed RSS objects? |
|
||||||
* This should be a location that the webserver can write to. If this |
|
||||||
* directory does not already exist Mapie will try to be smart and create |
|
||||||
* it. This will often fail for permissions reasons. |
|
||||||
* |
|
||||||
* |
|
||||||
* MAGPIE_CACHE_AGE - How long to store cached RSS objects? In seconds. |
|
||||||
* |
|
||||||
* |
|
||||||
* MAGPIE_CACHE_FRESH_ONLY - If remote fetch fails, throw error |
|
||||||
* instead of returning stale object? |
|
||||||
* |
|
||||||
* MAGPIE_DEBUG - Display debugging notices? |
|
||||||
* |
|
||||||
*/ |
|
||||||
|
|
||||||
|
|
||||||
/*=======================================================================*\ |
|
||||||
Function: fetch_rss: |
|
||||||
Purpose: return RSS object for the give url |
|
||||||
maintain the cache |
|
||||||
Input: url of RSS file |
|
||||||
Output: parsed RSS object (see rss_parse.inc) |
|
||||||
|
|
||||||
NOTES ON CACHEING: |
|
||||||
If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache. |
|
||||||
|
|
||||||
NOTES ON RETRIEVING REMOTE FILES: |
|
||||||
If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will |
|
||||||
return a cached object, and touch the cache object upon recieving a |
|
||||||
304. |
|
||||||
|
|
||||||
NOTES ON FAILED REQUESTS: |
|
||||||
If there is an HTTP error while fetching an RSS object, the cached |
|
||||||
version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off) |
|
||||||
\*=======================================================================*/ |
|
||||||
|
|
||||||
define('MAGPIE_VERSION', '0.72'); |
|
||||||
|
|
||||||
$MAGPIE_ERROR = ""; |
|
||||||
|
|
||||||
function fetch_rss ($url) { |
|
||||||
// initialize constants |
|
||||||
init(); |
|
||||||
|
|
||||||
if ( !isset($url) ) { |
|
||||||
error("fetch_rss called without a url"); |
|
||||||
return false; |
|
||||||
} |
|
||||||
|
|
||||||
// if cache is disabled |
|
||||||
if ( !MAGPIE_CACHE_ON ) { |
|
||||||
// fetch file, and parse it |
|
||||||
$resp = _fetch_remote_file( $url ); |
|
||||||
if ( is_success( $resp->status ) ) { |
|
||||||
return _response_to_rss( $resp ); |
|
||||||
} |
|
||||||
else { |
|
||||||
error("Failed to fetch $url and cache is off"); |
|
||||||
return false; |
|
||||||
} |
|
||||||
} |
|
||||||
// else cache is ON |
|
||||||
else { |
|
||||||
// Flow |
|
||||||
// 1. check cache |
|
||||||
// 2. if there is a hit, make sure its fresh |
|
||||||
// 3. if cached obj fails freshness check, fetch remote |
|
||||||
// 4. if remote fails, return stale object, or error |
|
||||||
|
|
||||||
$cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE ); |
|
||||||
|
|
||||||
if (MAGPIE_DEBUG and $cache->ERROR) { |
|
||||||
debug($cache->ERROR, E_USER_WARNING); |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
$cache_status = 0; // response of check_cache |
|
||||||
$request_headers = array(); // HTTP headers to send with fetch |
|
||||||
$rss = 0; // parsed RSS object |
|
||||||
$errormsg = 0; // errors, if any |
|
||||||
|
|
||||||
// store parsed XML by desired output encoding |
|
||||||
// as character munging happens at parse time |
|
||||||
$cache_key = $url . MAGPIE_OUTPUT_ENCODING; |
|
||||||
|
|
||||||
if (!$cache->ERROR) { |
|
||||||
// return cache HIT, MISS, or STALE |
|
||||||
$cache_status = $cache->check_cache( $cache_key); |
|
||||||
} |
|
||||||
|
|
||||||
// if object cached, and cache is fresh, return cached obj |
|
||||||
if ( $cache_status == 'HIT' ) { |
|
||||||
$rss = $cache->get( $cache_key ); |
|
||||||
if ( isset($rss) and $rss ) { |
|
||||||
// should be cache age |
|
||||||
$rss->from_cache = 1; |
|
||||||
if ( MAGPIE_DEBUG > 1) { |
|
||||||
debug("MagpieRSS: Cache HIT", E_USER_NOTICE); |
|
||||||
} |
|
||||||
return $rss; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
// else attempt a conditional get |
|
||||||
|
|
||||||
// setup headers |
|
||||||
if ( $cache_status == 'STALE' ) { |
|
||||||
$rss = $cache->get( $cache_key ); |
|
||||||
if ( $rss and $rss->etag and $rss->last_modified ) { |
|
||||||
$request_headers['If-None-Match'] = $rss->etag; |
|
||||||
$request_headers['If-Last-Modified'] = $rss->last_modified; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
$resp = _fetch_remote_file( $url, $request_headers ); |
|
||||||
|
|
||||||
if (isset($resp) and $resp) { |
|
||||||
if ($resp->status == '304' ) { |
|
||||||
// we have the most current copy |
|
||||||
if ( MAGPIE_DEBUG > 1) { |
|
||||||
debug("Got 304 for $url"); |
|
||||||
} |
|
||||||
// reset cache on 304 (at minutillo insistent prodding) |
|
||||||
$cache->set($cache_key, $rss); |
|
||||||
return $rss; |
|
||||||
} |
|
||||||
elseif ( is_success( $resp->status ) ) { |
|
||||||
$rss = _response_to_rss( $resp ); |
|
||||||
if ( $rss ) { |
|
||||||
if (MAGPIE_DEBUG > 1) { |
|
||||||
debug("Fetch successful"); |
|
||||||
} |
|
||||||
// add object to cache |
|
||||||
$cache->set( $cache_key, $rss ); |
|
||||||
return $rss; |
|
||||||
} |
|
||||||
} |
|
||||||
else { |
|
||||||
$errormsg = "Failed to fetch $url "; |
|
||||||
if ( $resp->status == '-100' ) { |
|
||||||
$errormsg .= "(Request timed out after " . MAGPIE_FETCH_TIME_OUT . " seconds)"; |
|
||||||
} |
|
||||||
elseif ( $resp->error ) { |
|
||||||
# compensate for Snoopy's annoying habbit to tacking |
|
||||||
# on '\n' |
|
||||||
$http_error = substr($resp->error, 0, -2); |
|
||||||
$errormsg .= "(HTTP Error: $http_error)"; |
|
||||||
} |
|
||||||
else { |
|
||||||
$errormsg .= "(HTTP Response: " . $resp->response_code .')'; |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
else { |
|
||||||
$errormsg = "Unable to retrieve RSS file for unknown reasons."; |
|
||||||
} |
|
||||||
|
|
||||||
// else fetch failed |
|
||||||
|
|
||||||
// attempt to return cached object |
|
||||||
if ($rss) { |
|
||||||
if ( MAGPIE_DEBUG ) { |
|
||||||
debug("Returning STALE object for $url"); |
|
||||||
} |
|
||||||
return $rss; |
|
||||||
} |
|
||||||
|
|
||||||
// else we totally failed |
|
||||||
//hide the error |
|
||||||
//error( $errormsg ); |
|
||||||
|
|
||||||
return false; |
|
||||||
|
|
||||||
} // end if ( !MAGPIE_CACHE_ON ) { |
|
||||||
} // end fetch_rss() |
|
||||||
|
|
||||||
/*=======================================================================*\ |
|
||||||
Function: error |
|
||||||
Purpose: set MAGPIE_ERROR, and trigger error |
|
||||||
\*=======================================================================*/ |
|
||||||
|
|
||||||
function error ($errormsg, $lvl=E_USER_WARNING) { |
|
||||||
global $MAGPIE_ERROR; |
|
||||||
|
|
||||||
// append PHP's error message if track_errors enabled |
|
||||||
if ( isset($php_errormsg) ) { |
|
||||||
$errormsg .= " ($php_errormsg)"; |
|
||||||
} |
|
||||||
if ( $errormsg ) { |
|
||||||
$errormsg = "MagpieRSS: $errormsg"; |
|
||||||
$MAGPIE_ERROR = $errormsg; |
|
||||||
trigger_error( $errormsg, $lvl); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
function debug ($debugmsg, $lvl=E_USER_NOTICE) { |
|
||||||
trigger_error("MagpieRSS [debug] $debugmsg", $lvl); |
|
||||||
} |
|
||||||
|
|
||||||
/*=======================================================================*\ |
|
||||||
Function: magpie_error |
|
||||||
Purpose: accessor for the magpie error variable |
|
||||||
\*=======================================================================*/ |
|
||||||
function magpie_error ($errormsg="") { |
|
||||||
global $MAGPIE_ERROR; |
|
||||||
|
|
||||||
if ( isset($errormsg) and $errormsg ) { |
|
||||||
$MAGPIE_ERROR = $errormsg; |
|
||||||
} |
|
||||||
|
|
||||||
return $MAGPIE_ERROR; |
|
||||||
} |
|
||||||
|
|
||||||
/*=======================================================================*\ |
|
||||||
Function: _fetch_remote_file |
|
||||||
Purpose: retrieve an arbitrary remote file |
|
||||||
Input: url of the remote file |
|
||||||
headers to send along with the request (optional) |
|
||||||
Output: an HTTP response object (see Snoopy.class.inc) |
|
||||||
\*=======================================================================*/ |
|
||||||
function _fetch_remote_file ($url, $headers = "" ) { |
|
||||||
// Snoopy is an HTTP client in PHP |
|
||||||
$client = new Snoopy(); |
|
||||||
$client->agent = MAGPIE_USER_AGENT; |
|
||||||
$client->read_timeout = MAGPIE_FETCH_TIME_OUT; |
|
||||||
$client->use_gzip = MAGPIE_USE_GZIP; |
|
||||||
if (is_array($headers) ) { |
|
||||||
$client->rawheaders = $headers; |
|
||||||
} |
|
||||||
|
|
||||||
@$client->fetch($url); |
|
||||||
return $client; |
|
||||||
|
|
||||||
} |
|
||||||
|
|
||||||
/*=======================================================================*\ |
|
||||||
Function: _response_to_rss |
|
||||||
Purpose: parse an HTTP response object into an RSS object |
|
||||||
Input: an HTTP response object (see Snoopy) |
|
||||||
Output: parsed RSS object (see rss_parse) |
|
||||||
\*=======================================================================*/ |
|
||||||
function _response_to_rss ($resp) { |
|
||||||
$rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING ); |
|
||||||
|
|
||||||
// if RSS parsed successfully |
|
||||||
if ( $rss and !$rss->ERROR) { |
|
||||||
|
|
||||||
// find Etag, and Last-Modified |
|
||||||
foreach($resp->headers as $h) { |
|
||||||
// 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1" |
|
||||||
if (strpos($h, ": ")) { |
|
||||||
list($field, $val) = explode(": ", $h, 2); |
|
||||||
} |
|
||||||
else { |
|
||||||
$field = $h; |
|
||||||
$val = ""; |
|
||||||
} |
|
||||||
|
|
||||||
if ( $field == 'ETag' ) { |
|
||||||
$rss->etag = $val; |
|
||||||
} |
|
||||||
|
|
||||||
if ( $field == 'Last-Modified' ) { |
|
||||||
$rss->last_modified = $val; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
return $rss; |
|
||||||
} // else construct error message |
|
||||||
else { |
|
||||||
$errormsg = "Failed to parse RSS file."; |
|
||||||
|
|
||||||
if ($rss) { |
|
||||||
$errormsg .= " (" . $rss->ERROR . ")"; |
|
||||||
} |
|
||||||
error($errormsg,E_USER_NOTICE); |
|
||||||
|
|
||||||
return false; |
|
||||||
} // end if ($rss and !$rss->error) |
|
||||||
} |
|
||||||
|
|
||||||
/*=======================================================================*\ |
|
||||||
Function: init |
|
||||||
Purpose: setup constants with default values |
|
||||||
check for user overrides |
|
||||||
\*=======================================================================*/ |
|
||||||
function init () { |
|
||||||
if ( defined('MAGPIE_INITALIZED') ) { |
|
||||||
return; |
|
||||||
} |
|
||||||
else { |
|
||||||
define('MAGPIE_INITALIZED', true); |
|
||||||
} |
|
||||||
|
|
||||||
if ( !defined('MAGPIE_CACHE_ON') ) { |
|
||||||
define('MAGPIE_CACHE_ON', true); |
|
||||||
} |
|
||||||
|
|
||||||
if ( !defined('MAGPIE_CACHE_DIR') ) { |
|
||||||
define('MAGPIE_CACHE_DIR', './cache'); |
|
||||||
} |
|
||||||
|
|
||||||
if ( !defined('MAGPIE_CACHE_AGE') ) { |
|
||||||
define('MAGPIE_CACHE_AGE', 60*60); // one hour |
|
||||||
} |
|
||||||
|
|
||||||
if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) { |
|
||||||
define('MAGPIE_CACHE_FRESH_ONLY', false); |
|
||||||
} |
|
||||||
|
|
||||||
if ( !defined('MAGPIE_OUTPUT_ENCODING') ) { |
|
||||||
define('MAGPIE_OUTPUT_ENCODING', 'UTF-8'); |
|
||||||
} |
|
||||||
|
|
||||||
if ( !defined('MAGPIE_INPUT_ENCODING') ) { |
|
||||||
define('MAGPIE_INPUT_ENCODING', null); |
|
||||||
} |
|
||||||
|
|
||||||
if ( !defined('MAGPIE_DETECT_ENCODING') ) { |
|
||||||
define('MAGPIE_DETECT_ENCODING', true); |
|
||||||
} |
|
||||||
|
|
||||||
if ( !defined('MAGPIE_DEBUG') ) { |
|
||||||
define('MAGPIE_DEBUG', 0); |
|
||||||
} |
|
||||||
|
|
||||||
if ( !defined('MAGPIE_USER_AGENT') ) { |
|
||||||
$ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net'; |
|
||||||
|
|
||||||
if ( MAGPIE_CACHE_ON ) { |
|
||||||
$ua = $ua . ')'; |
|
||||||
} |
|
||||||
else { |
|
||||||
$ua = $ua . '; No cache)'; |
|
||||||
} |
|
||||||
|
|
||||||
define('MAGPIE_USER_AGENT', $ua); |
|
||||||
} |
|
||||||
|
|
||||||
if ( !defined('MAGPIE_FETCH_TIME_OUT') ) { |
|
||||||
define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout |
|
||||||
} |
|
||||||
|
|
||||||
// use gzip encoding to fetch rss files if supported? |
|
||||||
if ( !defined('MAGPIE_USE_GZIP') ) { |
|
||||||
define('MAGPIE_USE_GZIP', true); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
// NOTE: the following code should really be in Snoopy, or at least |
|
||||||
// somewhere other then rss_fetch! |
|
||||||
|
|
||||||
/*=======================================================================*\ |
|
||||||
HTTP STATUS CODE PREDICATES |
|
||||||
These functions attempt to classify an HTTP status code |
|
||||||
based on RFC 2616 and RFC 2518. |
|
||||||
|
|
||||||
All of them take an HTTP status code as input, and return true or false |
|
||||||
|
|
||||||
All this code is adapted from LWP's HTTP::Status. |
|
||||||
\*=======================================================================*/ |
|
||||||
|
|
||||||
|
|
||||||
/*=======================================================================*\ |
|
||||||
Function: is_info |
|
||||||
Purpose: return true if Informational status code |
|
||||||
\*=======================================================================*/ |
|
||||||
function is_info ($sc) { |
|
||||||
return $sc >= 100 && $sc < 200; |
|
||||||
} |
|
||||||
|
|
||||||
/*=======================================================================*\ |
|
||||||
Function: is_success |
|
||||||
Purpose: return true if Successful status code |
|
||||||
\*=======================================================================*/ |
|
||||||
function is_success ($sc) { |
|
||||||
return $sc >= 200 && $sc < 300; |
|
||||||
} |
|
||||||
|
|
||||||
/*=======================================================================*\ |
|
||||||
Function: is_redirect |
|
||||||
Purpose: return true if Redirection status code |
|
||||||
\*=======================================================================*/ |
|
||||||
function is_redirect ($sc) { |
|
||||||
return $sc >= 300 && $sc < 400; |
|
||||||
} |
|
||||||
|
|
||||||
/*=======================================================================*\ |
|
||||||
Function: is_error |
|
||||||
Purpose: return true if Error status code |
|
||||||
\*=======================================================================*/ |
|
||||||
function is_error ($sc) { |
|
||||||
return $sc >= 400 && $sc < 600; |
|
||||||
} |
|
||||||
|
|
||||||
/*=======================================================================*\ |
|
||||||
Function: is_client_error |
|
||||||
Purpose: return true if Error status code, and its a client error |
|
||||||
\*=======================================================================*/ |
|
||||||
function is_client_error ($sc) { |
|
||||||
return $sc >= 400 && $sc < 500; |
|
||||||
} |
|
||||||
|
|
||||||
/*=======================================================================*\ |
|
||||||
Function: is_client_error |
|
||||||
Purpose: return true if Error status code, and its a server error |
|
||||||
\*=======================================================================*/ |
|
||||||
function is_server_error ($sc) { |
|
||||||
return $sc >= 500 && $sc < 600; |
|
||||||
} |
|
||||||
@ -1,605 +0,0 @@ |
|||||||
<?php |
|
||||||
/** |
|
||||||
* Project: MagpieRSS: a simple RSS integration tool |
|
||||||
* File: rss_parse.inc - parse an RSS or Atom feed |
|
||||||
* return as a simple object. |
|
||||||
* |
|
||||||
* Handles RSS 0.9x, RSS 2.0, RSS 1.0, and Atom 0.3 |
|
||||||
* |
|
||||||
* The lastest version of MagpieRSS can be obtained from: |
|
||||||
* http://magpierss.sourceforge.net |
|
||||||
* |
|
||||||
* For questions, help, comments, discussion, etc., please join the |
|
||||||
* Magpie mailing list: |
|
||||||
* magpierss-general@lists.sourceforge.net |
|
||||||
* |
|
||||||
* @author Kellan Elliott-McCrea <kellan@protest.net> |
|
||||||
* @version 0.7a |
|
||||||
* @license GPL |
|
||||||
* @package chamilo.include.rss |
|
||||||
*/ |
|
||||||
/** |
|
||||||
* Code |
|
||||||
*/ |
|
||||||
define('RSS', 'RSS'); |
|
||||||
define('ATOM', 'Atom'); |
|
||||||
|
|
||||||
require_once (MAGPIE_DIR . 'rss_utils.inc'); |
|
||||||
|
|
||||||
/** |
|
||||||
* Hybrid parser, and object, takes RSS as a string and returns a simple object. |
|
||||||
* |
|
||||||
* see: rss_fetch.inc for a simpler interface with integrated caching support |
|
||||||
* |
|
||||||
* @package chamilo.include.rss |
|
||||||
*/ |
|
||||||
class MagpieRSS { |
|
||||||
public $parser; |
|
||||||
|
|
||||||
public $current_item = array(); // item currently being parsed |
|
||||||
public $items = array(); // collection of parsed items |
|
||||||
public $channel = array(); // hash of channel fields |
|
||||||
public $textinput = array(); |
|
||||||
public $image = array(); |
|
||||||
public $feed_type; |
|
||||||
public $feed_version; |
|
||||||
public $encoding = ''; // output encoding of parsed rss |
|
||||||
|
|
||||||
private $_source_encoding = ''; // only set if we have to parse xml prolog |
|
||||||
|
|
||||||
public $ERROR = ""; |
|
||||||
public $WARNING = ""; |
|
||||||
|
|
||||||
// define some constants |
|
||||||
|
|
||||||
private $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright'); |
|
||||||
private $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1'); |
|
||||||
|
|
||||||
// parser variables, useless if you're not a parser, treat as private |
|
||||||
public $stack = array(); // parser stack |
|
||||||
public $inchannel = false; |
|
||||||
public $initem = false; |
|
||||||
public $incontent = false; // if in Atom <content mode="xml"> field |
|
||||||
public $intextinput = false; |
|
||||||
public $inimage = false; |
|
||||||
public $current_namespace = false; |
|
||||||
|
|
||||||
|
|
||||||
/** |
|
||||||
* Set up XML parser, parse source, and return populated RSS object.. |
|
||||||
* |
|
||||||
* @param string $source string containing the RSS to be parsed |
|
||||||
* |
|
||||||
* NOTE: Probably a good idea to leave the encoding options alone unless |
|
||||||
* you know what you're doing as PHP's character set support is |
|
||||||
* a little weird. |
|
||||||
* |
|
||||||
* NOTE: A lot of this is unnecessary but harmless with PHP5 |
|
||||||
* |
|
||||||
* |
|
||||||
* @param string $output_encoding output the parsed RSS in this character |
|
||||||
* set defaults to ISO-8859-1 as this is PHP's |
|
||||||
* default. |
|
||||||
* |
|
||||||
* NOTE: might be changed to UTF-8 in future |
|
||||||
* versions. |
|
||||||
* |
|
||||||
* @param string $input_encoding the character set of the incoming RSS source. |
|
||||||
* Leave blank and Magpie will try to figure it |
|
||||||
* out. |
|
||||||
* |
|
||||||
* |
|
||||||
* @param bool $detect_encoding if false Magpie won't attempt to detect |
|
||||||
* source encoding. (caveat emptor) |
|
||||||
* |
|
||||||
*/ |
|
||||||
public function MagpieRSS ($source, $output_encoding='ISO-8859-1', |
|
||||||
$input_encoding=null, $detect_encoding=true) |
|
||||||
{ |
|
||||||
# if PHP xml isn't compiled in, die |
|
||||||
# |
|
||||||
if (!function_exists('xml_parser_create')) { |
|
||||||
$this->error( "Failed to load PHP's XML Extension. " . |
|
||||||
"http://www.php.net/manual/en/ref.xml.php", |
|
||||||
E_USER_ERROR ); |
|
||||||
} |
|
||||||
|
|
||||||
list($parser, $source) = $this->create_parser($source, |
|
||||||
$output_encoding, $input_encoding, $detect_encoding); |
|
||||||
|
|
||||||
|
|
||||||
if (!is_resource($parser)) { |
|
||||||
$this->error( "Failed to create an instance of PHP's XML parser. " . |
|
||||||
"http://www.php.net/manual/en/ref.xml.php", |
|
||||||
E_USER_ERROR ); |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
$this->parser = $parser; |
|
||||||
|
|
||||||
# pass in parser, and a reference to this object |
|
||||||
# setup handlers |
|
||||||
# |
|
||||||
xml_set_object( $this->parser, $this ); |
|
||||||
xml_set_element_handler($this->parser, |
|
||||||
'feed_start_element', 'feed_end_element' ); |
|
||||||
|
|
||||||
xml_set_character_data_handler( $this->parser, 'feed_cdata' ); |
|
||||||
|
|
||||||
$status = xml_parse( $this->parser, $source ); |
|
||||||
|
|
||||||
if (! $status ) { |
|
||||||
$errorcode = xml_get_error_code( $this->parser ); |
|
||||||
if ( $errorcode != XML_ERROR_NONE ) { |
|
||||||
$xml_error = xml_error_string( $errorcode ); |
|
||||||
$error_line = xml_get_current_line_number($this->parser); |
|
||||||
$error_col = xml_get_current_column_number($this->parser); |
|
||||||
$errormsg = "$xml_error at line $error_line, column $error_col"; |
|
||||||
|
|
||||||
$this->error( $errormsg ); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
xml_parser_free( $this->parser ); |
|
||||||
|
|
||||||
$this->normalize(); |
|
||||||
} |
|
||||||
|
|
||||||
public function feed_start_element($p, $element, &$attrs) { |
|
||||||
$el = $element = strtolower($element); |
|
||||||
$attrs = array_change_key_case($attrs, CASE_LOWER); |
|
||||||
|
|
||||||
// check for a namespace, and split if found |
|
||||||
$ns = false; |
|
||||||
if ( strpos( $element, ':' ) ) { |
|
||||||
list($ns, $el) = split( ':', $element, 2); |
|
||||||
} |
|
||||||
if ( $ns and $ns != 'rdf' ) { |
|
||||||
$this->current_namespace = $ns; |
|
||||||
} |
|
||||||
|
|
||||||
# if feed type isn't set, then this is first element of feed |
|
||||||
# identify feed from root element |
|
||||||
# |
|
||||||
if (!isset($this->feed_type) ) { |
|
||||||
if ( $el == 'rdf' ) { |
|
||||||
$this->feed_type = RSS; |
|
||||||
$this->feed_version = '1.0'; |
|
||||||
} |
|
||||||
elseif ( $el == 'rss' ) { |
|
||||||
$this->feed_type = RSS; |
|
||||||
$this->feed_version = $attrs['version']; |
|
||||||
} |
|
||||||
elseif ( $el == 'feed' ) { |
|
||||||
$this->feed_type = ATOM; |
|
||||||
$this->feed_version = $attrs['version']; |
|
||||||
$this->inchannel = true; |
|
||||||
} |
|
||||||
return; |
|
||||||
} |
|
||||||
|
|
||||||
if ( $el == 'channel' ) |
|
||||||
{ |
|
||||||
$this->inchannel = true; |
|
||||||
} |
|
||||||
elseif ($el == 'item' or $el == 'entry' ) |
|
||||||
{ |
|
||||||
$this->initem = true; |
|
||||||
if ( isset($attrs['rdf:about']) ) { |
|
||||||
$this->current_item['about'] = $attrs['rdf:about']; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
// if we're in the default namespace of an RSS feed, |
|
||||||
// record textinput or image fields |
|
||||||
elseif ( |
|
||||||
$this->feed_type == RSS and |
|
||||||
$this->current_namespace == '' and |
|
||||||
$el == 'textinput' ) |
|
||||||
{ |
|
||||||
$this->intextinput = true; |
|
||||||
} |
|
||||||
|
|
||||||
elseif ( |
|
||||||
$this->feed_type == RSS and |
|
||||||
$this->current_namespace == '' and |
|
||||||
$el == 'image' ) |
|
||||||
{ |
|
||||||
$this->inimage = true; |
|
||||||
} |
|
||||||
|
|
||||||
# handle atom content constructs |
|
||||||
elseif ( $this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) |
|
||||||
{ |
|
||||||
// avoid clashing w/ RSS mod_content |
|
||||||
if ($el == 'content' ) { |
|
||||||
$el = 'atom_content'; |
|
||||||
} |
|
||||||
|
|
||||||
$this->incontent = $el; |
|
||||||
|
|
||||||
|
|
||||||
} |
|
||||||
|
|
||||||
// if inside an Atom content construct (e.g. content or summary) field treat tags as text |
|
||||||
elseif ($this->feed_type == ATOM and $this->incontent ) |
|
||||||
{ |
|
||||||
// if tags are inlined, then flatten |
|
||||||
$attrs_str = join(' ', |
|
||||||
array_map('map_attrs', |
|
||||||
array_keys($attrs), |
|
||||||
array_values($attrs) ) ); |
|
||||||
|
|
||||||
$this->append_content( "<$element $attrs_str>" ); |
|
||||||
|
|
||||||
array_unshift( $this->stack, $el ); |
|
||||||
} |
|
||||||
|
|
||||||
// Atom support many links per containging element. |
|
||||||
// Magpie treats link elements of type rel='alternate' |
|
||||||
// as being equivalent to RSS's simple link element. |
|
||||||
// |
|
||||||
elseif ($this->feed_type == ATOM and $el == 'link' ) |
|
||||||
{ |
|
||||||
if ( isset($attrs['rel']) and $attrs['rel'] == 'alternate' ) |
|
||||||
{ |
|
||||||
$link_el = 'link'; |
|
||||||
} |
|
||||||
else { |
|
||||||
$link_el = 'link_' . $attrs['rel']; |
|
||||||
} |
|
||||||
|
|
||||||
$this->append($link_el, $attrs['href']); |
|
||||||
} |
|
||||||
// set stack[0] to current element |
|
||||||
else { |
|
||||||
array_unshift($this->stack, $el); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public function feed_cdata ($p, $text) { |
|
||||||
if ($this->feed_type == ATOM and $this->incontent) |
|
||||||
{ |
|
||||||
$this->append_content( $text ); |
|
||||||
} |
|
||||||
else { |
|
||||||
$current_el = join('_', array_reverse($this->stack)); |
|
||||||
$this->append($current_el, $text); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
public function feed_end_element ($p, $el) { |
|
||||||
$el = strtolower($el); |
|
||||||
|
|
||||||
if ( $el == 'item' or $el == 'entry' ) |
|
||||||
{ |
|
||||||
$this->items[] = $this->current_item; |
|
||||||
$this->current_item = array(); |
|
||||||
$this->initem = false; |
|
||||||
} |
|
||||||
elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'textinput' ) |
|
||||||
{ |
|
||||||
$this->intextinput = false; |
|
||||||
} |
|
||||||
elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'image' ) |
|
||||||
{ |
|
||||||
$this->inimage = false; |
|
||||||
} |
|
||||||
elseif ($this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) |
|
||||||
{ |
|
||||||
$this->incontent = false; |
|
||||||
} |
|
||||||
elseif ($el == 'channel' or $el == 'feed' ) |
|
||||||
{ |
|
||||||
$this->inchannel = false; |
|
||||||
} |
|
||||||
elseif ($this->feed_type == ATOM and $this->incontent ) { |
|
||||||
// balance tags properly |
|
||||||
// note: i don't think this is actually neccessary |
|
||||||
if ( $this->stack[0] == $el ) |
|
||||||
{ |
|
||||||
$this->append_content("</$el>"); |
|
||||||
} |
|
||||||
else { |
|
||||||
$this->append_content("<$el />"); |
|
||||||
} |
|
||||||
|
|
||||||
array_shift( $this->stack ); |
|
||||||
} |
|
||||||
else { |
|
||||||
array_shift( $this->stack ); |
|
||||||
} |
|
||||||
|
|
||||||
$this->current_namespace = false; |
|
||||||
} |
|
||||||
|
|
||||||
public function concat (&$str1, $str2="") { |
|
||||||
if (!isset($str1) ) { |
|
||||||
$str1=""; |
|
||||||
} |
|
||||||
$str1 .= $str2; |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public function append_content($text) { |
|
||||||
if ( $this->initem ) { |
|
||||||
$this->concat( $this->current_item[ $this->incontent ], $text ); |
|
||||||
} |
|
||||||
elseif ( $this->inchannel ) { |
|
||||||
$this->concat( $this->channel[ $this->incontent ], $text ); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
// smart append - field and namespace aware |
|
||||||
public function append($el, $text) { |
|
||||||
if (!$el) { |
|
||||||
return; |
|
||||||
} |
|
||||||
if ( $this->current_namespace ) |
|
||||||
{ |
|
||||||
if ( $this->initem ) { |
|
||||||
$this->concat( |
|
||||||
$this->current_item[ $this->current_namespace ][ $el ], $text); |
|
||||||
} |
|
||||||
elseif ($this->inchannel) { |
|
||||||
$this->concat( |
|
||||||
$this->channel[ $this->current_namespace][ $el ], $text ); |
|
||||||
} |
|
||||||
elseif ($this->intextinput) { |
|
||||||
$this->concat( |
|
||||||
$this->textinput[ $this->current_namespace][ $el ], $text ); |
|
||||||
} |
|
||||||
elseif ($this->inimage) { |
|
||||||
$this->concat( |
|
||||||
$this->image[ $this->current_namespace ][ $el ], $text ); |
|
||||||
} |
|
||||||
} |
|
||||||
else { |
|
||||||
if ( $this->initem ) { |
|
||||||
$this->concat( |
|
||||||
$this->current_item[ $el ], $text); |
|
||||||
} |
|
||||||
elseif ($this->intextinput) { |
|
||||||
$this->concat( |
|
||||||
$this->textinput[ $el ], $text ); |
|
||||||
} |
|
||||||
elseif ($this->inimage) { |
|
||||||
$this->concat( |
|
||||||
$this->image[ $el ], $text ); |
|
||||||
} |
|
||||||
elseif ($this->inchannel) { |
|
||||||
$this->concat( |
|
||||||
$this->channel[ $el ], $text ); |
|
||||||
} |
|
||||||
|
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
public function normalize () { |
|
||||||
// if atom populate rss fields |
|
||||||
if ( $this->is_atom() ) { |
|
||||||
$this->channel['description'] = $this->channel['tagline']; |
|
||||||
for ( $i = 0; $i < count($this->items); $i++) { |
|
||||||
$item = $this->items[$i]; |
|
||||||
if ( isset($item['summary']) ) |
|
||||||
$item['description'] = $item['summary']; |
|
||||||
if ( isset($item['atom_content'])) |
|
||||||
$item['content']['encoded'] = $item['atom_content']; |
|
||||||
|
|
||||||
$atom_date = (isset($item['issued']) ) ? $item['issued'] : $item['modified']; |
|
||||||
if ( $atom_date ) { |
|
||||||
$epoch = @parse_w3cdtf($atom_date); |
|
||||||
if ($epoch and $epoch > 0) { |
|
||||||
$item['date_timestamp'] = $epoch; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
$this->items[$i] = $item; |
|
||||||
} |
|
||||||
} |
|
||||||
elseif ( $this->is_rss() ) { |
|
||||||
$this->channel['tagline'] = $this->channel['description']; |
|
||||||
for ( $i = 0; $i < count($this->items); $i++) { |
|
||||||
$item = $this->items[$i]; |
|
||||||
if ( isset($item['description'])) |
|
||||||
$item['summary'] = $item['description']; |
|
||||||
if ( isset($item['content']['encoded'] ) ) |
|
||||||
$item['atom_content'] = $item['content']['encoded']; |
|
||||||
|
|
||||||
if ( $this->is_rss() == '1.0' and isset($item['dc']['date']) ) { |
|
||||||
$epoch = @parse_w3cdtf($item['dc']['date']); |
|
||||||
if ($epoch and $epoch > 0) { |
|
||||||
$item['date_timestamp'] = $epoch; |
|
||||||
} |
|
||||||
} |
|
||||||
elseif ( isset($item['pubdate']) ) { |
|
||||||
$epoch = @strtotime($item['pubdate']); |
|
||||||
if ($epoch > 0) { |
|
||||||
$item['date_timestamp'] = $epoch; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
$this->items[$i] = $item; |
|
||||||
} |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
public function is_rss () { |
|
||||||
if ( $this->feed_type == RSS ) { |
|
||||||
return $this->feed_version; |
|
||||||
} |
|
||||||
else { |
|
||||||
return false; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
public function is_atom() { |
|
||||||
if ( $this->feed_type == ATOM ) { |
|
||||||
return $this->feed_version; |
|
||||||
} |
|
||||||
else { |
|
||||||
return false; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* return XML parser, and possibly re-encoded source |
|
||||||
* |
|
||||||
*/ |
|
||||||
public function create_parser($source, $out_enc, $in_enc, $detect) { |
|
||||||
if ( substr(phpversion(),0,1) == 5) { |
|
||||||
$parser = $this->php5_create_parser($in_enc, $detect); |
|
||||||
} |
|
||||||
else { |
|
||||||
list($parser, $source) = $this->php4_create_parser($source, $in_enc, $detect); |
|
||||||
} |
|
||||||
if ($out_enc) { |
|
||||||
$this->encoding = $out_enc; |
|
||||||
xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $out_enc); |
|
||||||
} |
|
||||||
|
|
||||||
return array($parser, $source); |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* Instantiate an XML parser under PHP5 |
|
||||||
* |
|
||||||
* PHP5 will do a fine job of detecting input encoding |
|
||||||
* if passed an empty string as the encoding. |
|
||||||
* |
|
||||||
* All hail libxml2! |
|
||||||
* |
|
||||||
*/ |
|
||||||
public function php5_create_parser($in_enc, $detect) { |
|
||||||
// by default php5 does a fine job of detecting input encodings |
|
||||||
if(!$detect && $in_enc) { |
|
||||||
return xml_parser_create($in_enc); |
|
||||||
} |
|
||||||
else { |
|
||||||
return xml_parser_create(''); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
/** |
|
||||||
* Instaniate an XML parser under PHP4 |
|
||||||
* |
|
||||||
* Unfortunately PHP4's support for character encodings |
|
||||||
* and especially XML and character encodings sucks. As |
|
||||||
* long as the documents you parse only contain characters |
|
||||||
* from the ISO-8859-1 character set (a superset of ASCII, |
|
||||||
* and a subset of UTF-8) you're fine. However once you |
|
||||||
* step out of that comfy little world things get mad, bad, |
|
||||||
* and dangerous to know. |
|
||||||
* |
|
||||||
* The following code is based on SJM's work with FoF |
|
||||||
* @see http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss |
|
||||||
* |
|
||||||
*/ |
|
||||||
public function php4_create_parser($source, $in_enc, $detect) { |
|
||||||
if ( !$detect ) { |
|
||||||
return array(xml_parser_create($in_enc), $source); |
|
||||||
} |
|
||||||
|
|
||||||
if (!$in_enc) { |
|
||||||
if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $source, $m)) { |
|
||||||
$in_enc = strtoupper($m[1]); |
|
||||||
$this->source_encoding = $in_enc; |
|
||||||
} |
|
||||||
else { |
|
||||||
$in_enc = 'UTF-8'; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
if ($this->known_encoding($in_enc)) { |
|
||||||
return array(xml_parser_create($in_enc), $source); |
|
||||||
} |
|
||||||
|
|
||||||
// the dectected encoding is not one of the simple encodings PHP knows |
|
||||||
|
|
||||||
// attempt to use the iconv extension to |
|
||||||
// cast the XML to a known encoding |
|
||||||
// @see http://php.net/iconv |
|
||||||
|
|
||||||
if (function_exists('iconv')) { |
|
||||||
$encoded_source = iconv($in_enc,'UTF-8', $source); |
|
||||||
if ($encoded_source) { |
|
||||||
return array(xml_parser_create('UTF-8'), $encoded_source); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
// iconv didn't work, try mb_convert_encoding |
|
||||||
// @see http://php.net/mbstring |
|
||||||
if(function_exists('mb_convert_encoding')) { |
|
||||||
$encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc ); |
|
||||||
if ($encoded_source) { |
|
||||||
return array(xml_parser_create('UTF-8'), $encoded_source); |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
// else |
|
||||||
$this->error("Feed is in an unsupported character encoding. ($in_enc) " . |
|
||||||
"You may see strange artifacts, and mangled characters.", |
|
||||||
E_USER_NOTICE); |
|
||||||
|
|
||||||
return array(xml_parser_create(), $source); |
|
||||||
} |
|
||||||
|
|
||||||
public function known_encoding($enc) { |
|
||||||
$enc = strtoupper($enc); |
|
||||||
if ( in_array($enc, $this->_KNOWN_ENCODINGS) ) { |
|
||||||
return $enc; |
|
||||||
} |
|
||||||
else { |
|
||||||
return false; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
public function error ($errormsg, $lvl=E_USER_WARNING) { |
|
||||||
// append PHP's error message if track_errors enabled |
|
||||||
if ( isset($php_errormsg) ) { |
|
||||||
$errormsg .= " ($php_errormsg)"; |
|
||||||
} |
|
||||||
if ( MAGPIE_DEBUG ) { |
|
||||||
trigger_error( $errormsg, $lvl); |
|
||||||
} |
|
||||||
else { |
|
||||||
error_log( $errormsg, 0); |
|
||||||
} |
|
||||||
|
|
||||||
$notices = E_USER_NOTICE|E_NOTICE; |
|
||||||
if ( $lvl&$notices ) { |
|
||||||
$this->WARNING = $errormsg; |
|
||||||
} else { |
|
||||||
$this->ERROR = $errormsg; |
|
||||||
} |
|
||||||
} |
|
||||||
|
|
||||||
|
|
||||||
} // end class RSS |
|
||||||
|
|
||||||
function map_attrs($k, $v) { |
|
||||||
return "$k=\"$v\""; |
|
||||||
} |
|
||||||
|
|
||||||
// patch to support medieval versions of PHP4.1.x, |
|
||||||
// courtesy, Ryan Currie, ryan@digibliss.com |
|
||||||
|
|
||||||
if (!function_exists('array_change_key_case')) { |
|
||||||
define("CASE_UPPER",1); |
|
||||||
define("CASE_LOWER",0); |
|
||||||
|
|
||||||
|
|
||||||
function array_change_key_case($array,$case=CASE_LOWER) { |
|
||||||
if ($case=CASE_LOWER) $cmd=strtolower; |
|
||||||
elseif ($case=CASE_UPPER) $cmd=strtoupper; |
|
||||||
foreach($array as $key=>$value) { |
|
||||||
$output[$cmd($key)]=$value; |
|
||||||
} |
|
||||||
return $output; |
|
||||||
} |
|
||||||
|
|
||||||
} |
|
||||||
@ -1,65 +0,0 @@ |
|||||||
<?php |
|
||||||
/** |
|
||||||
* Project: MagpieRSS: a simple RSS integration tool |
|
||||||
* File: rss_utils.inc, utility methods for working with RSS |
|
||||||
* Author: Kellan Elliott-McCrea <kellan@protest.net> |
|
||||||
* Version: 0.51 |
|
||||||
* License: GPL |
|
||||||
* |
|
||||||
* The lastest version of MagpieRSS can be obtained from: |
|
||||||
* http://magpierss.sourceforge.net |
|
||||||
* |
|
||||||
* For questions, help, comments, discussion, etc., please join the |
|
||||||
* Magpie mailing list: |
|
||||||
* magpierss-general@lists.sourceforge.net |
|
||||||
* @package chamilo.include.rss |
|
||||||
*/ |
|
||||||
|
|
||||||
|
|
||||||
/** |
|
||||||
* Function: parse_w3cdtf |
|
||||||
* Purpose: parse a W3CDTF date into unix epoch |
|
||||||
* |
|
||||||
* NOTE: http://www.w3.org/TR/NOTE-datetime |
|
||||||
*/ |
|
||||||
function parse_w3cdtf ( $date_str ) { |
|
||||||
|
|
||||||
# regex to match wc3dtf |
|
||||||
$pat = "/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/"; |
|
||||||
|
|
||||||
if ( preg_match( $pat, $date_str, $match ) ) { |
|
||||||
list( $year, $month, $day, $hours, $minutes, $seconds) = |
|
||||||
array( $match[1], $match[2], $match[3], $match[4], $match[5], $match[6]); |
|
||||||
|
|
||||||
# calc epoch for current date assuming GMT |
|
||||||
$epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year); |
|
||||||
|
|
||||||
$offset = 0; |
|
||||||
if ( $match[10] == 'Z' ) { |
|
||||||
# zulu time, aka GMT |
|
||||||
} |
|
||||||
else { |
|
||||||
list( $tz_mod, $tz_hour, $tz_min ) = |
|
||||||
array( $match[8], $match[9], $match[10]); |
|
||||||
|
|
||||||
# zero out the variables |
|
||||||
if ( ! $tz_hour ) { $tz_hour = 0; } |
|
||||||
if ( ! $tz_min ) { $tz_min = 0; } |
|
||||||
|
|
||||||
$offset_secs = (($tz_hour*60)+$tz_min)*60; |
|
||||||
|
|
||||||
# is timezone ahead of GMT? then subtract offset |
|
||||||
# |
|
||||||
if ( $tz_mod == '+' ) { |
|
||||||
$offset_secs = $offset_secs * -1; |
|
||||||
} |
|
||||||
|
|
||||||
$offset = $offset_secs; |
|
||||||
} |
|
||||||
$epoch = $epoch + $offset; |
|
||||||
return $epoch; |
|
||||||
} |
|
||||||
else { |
|
||||||
return -1; |
|
||||||
} |
|
||||||
} |
|
||||||
Loading…
Reference in new issue