一个改进的UBB类
发布时间:2006-10-14 2:41:35   收集提供:gaoqian
<?php
/*
如有转载,请注明作者

原作者: 何志强
改进: SonyMusic[ sonymusic@163.net ]
文件: ubb.php
备注: 说是改进,其实核心函数parse()已经完全重写了,而且思路也是不一样的。
不过仍是受何志强的例子的启发,而且测试的例子还有URLCHECK等几个函数也是沿用的何志强的程序,谢谢何志强。
目前还没有颜色的功能,但我会加入的。
如果在程序上有什么BUG或不便的地方,请给我MAIL。
谢谢!
改进功能:
对字符串进行UBB编码,该类目前只支持下列几个简单且实用的编码:
1. URL裢接
[url] http://phpuser.com/ [/url]
http://头可以不需要
如[url]phpuser.com[/url]也是可以的。
2. Email裢接
[email] sonymusic@163.net [/email]
3. 图片裢接
[img] http://www.phpchina.com/images/logo.gif [/img]
同URL链接一样,前面的http也可以不要。
4. 文字方面
[b]粗体字[/b]
[i]斜体字[/i]
[u]加下划线[/u]
[h1]1号标题字[/h1] ... [h6]6号标题字[/h6]
[sup][/sup]
[sub][/sub]
[tt][/tt]
[s][/s]
[strike][/strike]
[em][/em]
[strong][/strong]
[code][/code]
[samp][/samp]
[kbd][/kbd]
[var][/var]
[dfn][/dfn]
[cite][/cite]
[small][/small]
[big][/big]
[blink][/blink]
注意以下几点:
1. url,email,img等标签是不分大小写的.
2. 在标签中不允许有TAB键出现,但空格允许。
3. 该类要调用htmlencode,htmlencode4textarea,emailcheck函数和urlcheck类.
4. 修改后支持嵌套,但url,email,img这三个标签不是允许嵌套的。
技术资料:
Ultimate Bulletin Board
http://www.ultimatebb.com/  
What is UBB Code
http://www.scriptkeeper.com/ubb/ubbcode.html  
*/

include("urlcheck.php");
include("otherfunc.php"); //这两个文件的内容,附在最后。

//ubbcode类
class ubbcode{
var $call_time=0;
//可处理标签及处理函数对应表
var $tags = array( //小写的标签 => 对应的处理函数
'url' => '$this->url',
'email' => '$this->email',
'img' => '$this->img',
'b' => '$this->simple',
'i' => '$this->simple',
'u' => '$this->simple',
'tt' => '$this->simple',
's' => '$this->simple',
'strike' => '$this->simple',
'h1' => '$this->simple',
'h2' => '$this->simple',
'h3' => '$this->simple',
'h4' => '$this->simple',
'h5' => '$this->simple',
'h6' => '$this->simple',
'sup' => '$this->simple',
'sub' => '$this->simple',
'em' => '$this->simple',
'strong' => '$this->simple',
'code' => '$this->simple',
'samp' => '$this->simple',
'kbd' => '$this->simple',
'var' => '$this->simple',
'dfn' => '$this->simple',
'cite' => '$this->simple',
'small' => '$this->simple',
'big' => '$this->simple',
'blink' => '$this->simple'
);
//url裢接属性
var $attr_url;
//url合法性检查对象
var $urlcheck;

function ubbcode($attr_url){
$this->attr_url = ''.$attr_url;
$this->urlcheck = new urlcheck();
}

//对$str进行UBB编码解析
function parse($str){
$this->call_time++;
$parse = ''.htmlencode($str);

$ret = '';
while(true){
$eregi_ret=eregi("[[#]{0,1}[[:alnum:]]{1,7}]",$parse,$eregi_arr); //查找[xx]
if(!$eregi_ret){
$ret .= $parse;
break; //如果没有,返回
}
$pos = @strpos ($parse,$eregi_arr[0]);
$tag_len=strlen($eregi_arr[0])-2;//标记长度
$tag_start=substr($eregi_arr[0],1,$tag_len);
$tag=strtolower($tag_start);

if((($tag=="url") or ($tag=="email") or ($tag=="img")) and ($this->call_time>1)){
echo $this->call_time."<br>";
return $parse;//如果不能是不能嵌套的标记,直接返回
}

$parse2 = substr($parse,0,$pos);//标记之前
$parse = substr($parse,$pos+$tag_len+2);//标记之后
if(!isset($this->tags[$tag])){
echo "$tag_start<br>";
$ret .= $parse2.'['.$tag_start.']';
continue;//如果是不支持的标记
}

//查找对对应的结束标记
$eregi_ret=eregi("[/".$tag."]",$parse,$eregi_arr);
if(!$eregi_ret){
$ret .= $parse2.'['.$tag_start.']';
continue;//如果没有对应该的结束标记
}
$pos=strpos($parse,$eregi_arr[0]);
$value=substr($parse,0,$pos);//这是起止标记之间的内容
$tag_end=substr($parse,$pos+2,$tag_len);
$parse=substr($parse,$pos+$tag_len+3);//结束标记之后的内容

if(($tag!="url") and ($tag!="email") and ($tag!="img")){
$value=$this->parse($value);
}

$ret .= $parse2;
eval('$ret .= '.$this->tags[$tag].'("'.$tag_start.'","'.$tag_end.'","'.$value.'");');
}
$this->call_time--;
return $ret;
}

function simple($start,$end,$value){
return '<'.$start.'>'.$value.'</'.$end.'>';
}

function url($start,$end,$value){
$trim_value=trim($value);
if (strtolower(substr($trim_value,0,7))!="http://")
$trim_value="http://".$trim_value;
if($this->urlcheck->check($trim_value)) return '<a href="'.$trim_value.'" '.$this->attr_url.'>'.$value.'</a>';
else return '['.$start.']'.$value.'[/'.$end.']';
}

function email($start,$end,$value){
if(emailcheck($value)) return '<a href="mailto:'.$value.'">'.$value.'</a>';
else return '['.$start.']'.$value.'[/'.$end.']';
}

function img($start,$end,$value){
$trim_value=trim($value);
if ((strtolower(substr($trim_value,0,7))!="http://") or ($this->urlcheck->check($trim_value)))
return '<img src="'.$trim_value.'"></img>';
else return '['.$start.']'.$value.'[/'.$end.']';
}
}

//测试
echo '<html>';
echo '<head><title>测试</title></head>';
echo '<body>';
echo '<form action="'.str2url($PATH_INFO).'" method="post">';
echo '<textarea cols="100" rows="10" name="ubb">'.htmlencode4textarea($ubb).'</textarea><br>';
echo '<input type="submit" value="转换">';
echo '</form>';

if(isset($ubb)){
$ubbcode = new ubbcode('target="_blank"');
echo '<hr>'.$ubbcode->parse($ubb);
}

echo '</body>';
echo '</html>';

?>


文件urlcheck.php的内容:
<?php
//urlcheck.php
class urlcheck{
var $regex = array(//协议名(注意在这里必须写成小写) => 对应的正则表达式
'ftp' => '$this->ftpurl',
'file' => '$this->fileurl',
'http' => '$this->httpurl',
'https' => '$this->httpurl',
'gopher' => '$this->gopherurl',
'news' => '$this->newsurl',
'nntp' => '$this->nntpurl',
'telnet' => '$this->telneturl',
'wais' => '$this->waisurl'
);

var $lowalpha;
var $hialpha;
var $alpha;
var $digit;
var $safe;
var $extra;
var $national;
var $punctuation;
var $reserved;
var $hex;
var $escape;
var $unreserved;
var $uchar;
var $xchar;
var $digits;

var $urlpath;
var $password;
var $user;
var $port;
var $hostnumber;
var $alphadigit;
var $toplabel;
var $domainlabel;
var $hostname;
var $host;
var $hostport;
var $login;

//ftp
var $ftptype;
var $fsegment;
var $fpath;
var $ftpurl;

//file
var $fileurl;

//http,https
var $search;
var $hsegment;
var $hpath;
var $httpurl;

//gopher
var $gopher_string;
var $selector;
var $gtype;
var $gopherurl;

//news
var $article;
var $group;
var $grouppart;
var $newsurl;

//nntp
var $nntpurl;

//telnet
var $telneturl;

//wais
var $wpath;
var $wtype;
var $database;
var $waisdoc;
var $waisindex;
var $waisdatabase;
var $waisurl;

function check($url){
$pos = @strpos ($url,':',1);
if($pos<1) return false;
$prot = substr($url,0,$pos);
if(!isset($this->regex[$prot])) return false;
eval('$regex = '.$this->regex[$prot].';');
return ereg('^'.$regex.'$',$url);
}

function urlcheck(){
$this->lowalpha = '[a-z]';
$this->hialpha = '[A-Z]';
$this->alpha = '('.$this->lowalpha.'|'.$this->hialpha.')';
$this->digit = '[0-9]';
$this->safe = '[$.+_-]';
$this->extra = '[*()'!,]';
$this->national = '([{}|^~`]|\[|\])';
$this->punctuation = '[<>#%"]';
$this->reserved = '[?;/: @&= ]';
$this->hex = '('.$this->digit.'|[a-fA-F])';
$this->escape = '(%'.$this->hex.'{2})';
$this->unreserved = '('.$this->alpha.'|'.$this->digit.'|'.$this->safe.'|'.$this->extra.')';
$this->uchar = '('.$this->unreserved.'|'.$this->escape.')';
$this->xchar = '('.$this->unreserved.'|'.$this->reserved.'|'.$this->escape.')';
$this->digits = '('.$this->digit.'+)';

$this->urlpath = '('.$this->xchar.'*)';
$this->password = '(('.$this->uchar.'|[?;&=]'.')*)';
$this->user = '(('.$this->uchar.'|[?;&=]'.')*)';
$this->port = $this->digits;
$this->hostnumber = '('.$this->digits.'.'.$this->digits.'.'.$this->digits.'.'.$this->digits.')';
$this->alphadigit = '('.$this->alpha.'|'.$this->digit.')';
$this->toplabel = '('.$this->alpha.'|('.$this->alpha.'('.$this->alphadigit.'|-)*'.$this->alphadigit.'))';
$this->domainlabel = '('.$this->alphadigit.'|('.$this->alphadigit.'('.$this->alphadigit.'|-)*'.$this->alphadigit.'))';
$this->hostname = '(('.$this->domainlabel.'\.)*'.$this->toplabel.')';
$this->host = '('.$this->hostname.'|'.$this->hostnumber.')';
$this->hostport = '('.$this->host.'(:'.$this->port.')?)';
$this->login = '(('.$this->user.'(:'.$this->password.')?@)?'.$this->hostport.')';

$this->ftptype = '[aidAID]';
$this->fsegment = '(('.$this->uchar.'|[?: @&= ])*)';
$this->fpath = '('.$this->fsegment.'(/'.$this->fsegment.')*)';
$this->ftpurl = '([fF][tT][pP]://'.$this->login.'(/'.$this->fpath.'(;[tT][yY][pP][eE]='.$this->ftptype.')?)?)';

$this->fileurl = '([fF][iI][lL][eE]://('.$this->host.'|[lL][oO][cC][aA][lL][hH][oO][sS][tT])?/'.$this->fpath.')';

$this->search = '(('.$this->uchar.'|[;: @&= ])*)';
$this->hsegment = '(('.$this->uchar.'|[;: @&= ])*)';
$this->hpath = '('.$this->hsegment.'(/'.$this->hsegment.')*)';
$this->httpurl = '([hH][tT][tT][pP][sS]?://'.$this->hostport.'(/'.$this->hpath.'([?]'.$this->search.')?)?)';

$this->gopher_string = '('.$this->xchar.'*)';
$this->selector = '('.$this->xchar.'*)';
$this->gtype = $this->xchar;
$this->gopherurl = '([gG][oO][pP][hH][eE][rR]://'.$this->hostport.'(/('.$this->gtype.'('.$this->selector.'(%09'.$this->search.'(%09'.$this->gopher_string.')?)?)?)?)?)';

$this->article = '(('.$this->uchar.'|[;/?:&=]) +@'.$this- >host.')';
$this->group = '('.$this->alpha.'('.$this->alpha.'|'.$this->digit.'|[-.+_])*)';
$this->grouppart = '([*]|'.$this->group.'|'.$this->article.')';
$this->newsurl = '([nN][eE][wW][sS]:'.$this->grouppart.')';

$this->nntpurl = '([nN][nN][tT][pP]://'.$this->hostport.'/'.$this->group.'(/'.$this->digits.')?)';

$this->telneturl = '([tT][eE][lL][nN][eE][tT]://'.$this->login.'/?)';

$this->wpath = '('.$this->uchar.'*)';
$this->wtype = '('.$this->uchar.'*)';
$this->database = '('.$this->uchar.'*)';
$this->waisdoc = '([wW][aA][iI][sS]://'.$this->hostport.'/'.$this->database.'/'.$this->wtype.'/'.$this->wpath.')';
$this->waisindex = '([wW][aA][iI][sS]://'.$this->hostport.'/'.$this->database.'[?]'$this->search.')';
$this->waisdatabase = '([wW][aA][iI][sS]://'.$this->hostport.'/'.$this->database.')';
$this->waisurl = '('.$this->waisdatabase.'|'.$this->waisindex.'|'.$this->waisdoc.')';
}
}

?>


文件otherfunc.php的内容:
<?php
//otherfunc.php
function htmlencode($str){
$str = (string)$str;

$ret = '';
$len = strlen($str);
$nl = false;
for($i=0;$i<$len;$i++){
$chr = $str[$i];
switch($chr){
case '<':
$ret .= '&lt;';
$nl = false;
break;
case '>':
$ret .= '&gt;';
$nl = false;
break;
case '"':
$ret .= '&quot;';
$nl = false;
break;
case '&':
$ret .= '&amp;';
$nl = false;
break;
/*
case ' ':
$ret .= '&nbsp;';
$nl = false;
break;
*/  
case chr(9):
$ret .= '&nbsp;&nbsp;&nbsp;&nbsp;';
$nl = false;
break;
case chr(10):
if($nl) $nl = false;
else{
$ret .= '<br>';
$nl = true;
}
break;
case chr(13):
if($nl) $nl = false;
else{
$ret .= '<br>';
$nl = true;
}
break;
default:
$ret .= $chr;
$nl = false;
break;
}
}

return $ret;
}


function htmlencode4textarea($str){
$str = (string)$str;

$ret = '';
$len = strlen($str);
for($i=0;$i<$len;$i++){
$chr = $str[$i];
switch($chr){
case '<':
$ret .= '&lt;';
break;
case '>':
$ret .= '&gt;';
break;
case '"':
$ret .= '&quot;';
break;
case '&':
$ret .= '&amp;';
break;
case ' ':
$ret .= '&nbsp;';
break;
case chr(9):
$ret .= '&nbsp;&nbsp;&nbsp;&nbsp;';
break;
default:
$ret .= $chr;
break;
}
}

return $ret;
}

function emailcheck($email){
$ret=false;
if(strstr($email, '@' ) && strstr($email, '.')){
if(eregi("^([_a-z0-9]+([\._a-z0-9-]+)*)@([a-z0-9]{2,}(\.[a-z0-9-]{2,})*\.[a-z]{2,3})$", $email)){
$ret=true;
}
}
return $ret;
}

function str2url($path){
return eregi_replace("%2f","/",urlencode($path));
}
?>
 
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50