Bug #3850 fix count wiki words

skala
Juan Carlos Raña 15 years ago
parent 89de7fbf15
commit 8485433022
  1. 16
      main/wiki/wiki.inc.php

@ -912,6 +912,7 @@ function word_count($document) {
$search = array(
'@<script[^>]*?>.*?</script>@si',
'@<style[^>]*?>.*?</style>@siU',
'@<div id="player.[^>]*?>.*?</div>@',
'@<![\s\S]*?--[ \t\n\r]*>@'
);
@ -919,15 +920,16 @@ function word_count($document) {
# strip all html tags
$wc = strip_tags($document);
$wc = html_entity_decode(utf8_encode($wc)); //html_entity_decode($wc,ENT_NOQUOTES, 'UTF-8') does not work ok
//remove words and remove one letter words commented temporarily because of problems with utf8 support. TODO: fix and enable
# remove 'words' that don't consist of alphanumerical characters or punctuation
//$pattern = "#[^(\w|\d|\'|\"|\.|\!|\?|;|,|\\|\/|\-|:|\&|@)]+#";
//$wc = trim(preg_replace($pattern, " ", $wc));
# remove one-letter 'words' that consist only of punctuation
//$wc = trim(preg_replace("#\s*[(\'|\"|\.|\!|\?|;|,|\\|\/|\-|:|\&|@)]\s*#", " ", $wc)); //
# remove 'words' that don't consist of alphanumerical characters or punctuation
$pattern = "#[^(\w|\d|\'|\"|\.|\!|\?|;|,|\\|\/|\-|:|\&|@)]+#";
$wc = trim(preg_replace($pattern, " ", $wc));
# remove superfluous whitespace
# remove one-letter 'words' that consist only of punctuation
$wc = trim(preg_replace("#\s*[(\'|\"|\.|\!|\?|;|,|\\|\/|\-|:|\&|@)]\s*#", " ", $wc));
# remove superfluous whitespace
$wc = preg_replace("/\s\s+/", " ", $wc);
# split string into an array of words

Loading…
Cancel
Save