func('file');
load()->func('communication');
//采集规则
$html = ihttp_request($url, '', array('CURLOPT_REFERER' => 'http://www.qq.com'));
// $html = file_get_contents($url);
$html = str_replace("", "", $html['content']);
$reg = array(
//采集文章标题
'title' => array('#activity-name', 'text'),
//采集文章发布日期,这里用到了QueryList的过滤功能,过滤掉span标签和a标签
//采集文章正文内容,利用过滤功能去掉文章中的超链接,但保留超链接的文字,并去掉版权、JS代码等无用信息
'content' => array('#js_content', 'html'),
'nickname' => array('.profile_nickname', 'text'),
'video' => array('.video_iframe', 'data-src', '', function ($video) {
$video = explode('vid=', $video);
$video = explode('&', $video['1']);
return $video['0'];
}),
'logo' => array(':contains(msg_cdn_url)', 'text', '', function ($logo) {
$logo = explode('var msg_cdn_url = "', $logo);
$logo = explode('";', $logo['1']);
$logo = 'web/index.php?c=utility&a=wxcode&do=image&attach=' . $logo['0'];
return $logo;
}),
'desc' => array(':contains(msg_cdn_url)', 'text', '', function ($desc) {
$desc = explode('var msg_desc = "', $desc);
$desc = explode('";', $desc['1']);
return $desc['0'];
}),
);
$rang = 'body';
$ql = QueryList::Query($html, $reg, $rang, 'UTF-8');
$con = $ql->getData();
$contents = $con['0']['content'];
//如果出现中文乱码使用下面代码
//$getcontent = iconv("gb2312", "utf-8",$contents);
preg_match_all('/<\s*img\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $contents, $match);
$pic1 = $match['0'];
$img = $match['2'];
foreach ($pic1 as $key => $value) {
$url = $value;
$path = $_W['siteroot'] . 'web/index.php?c=utility&a=wxcode&do=image&attach=' . $img[$key];
// $imgarr = getimagesize($path);
// if ($imgarr['0'] > 300 && $imgarr['1'] > 10) {
// $fileurl = '
';
// } else {
// $fileurl = '
';
// }
// if ($imgarr['0'] > 300 && $imgarr['1'] > 200) {
// if ($key < 4) {
// $pic .= tomedia($path) . ',';
// }
// }
$fileurl = '
';
$pic .= tomedia($path) . ',';
$contents = str_replace("{$url}", $fileurl, $contents);
}
preg_match_all('/<\s*iframe\s+[^>]*?src\s*=\s*(\'|\")(.*?)\\1[^>]*?\/?\s*>/i', $contents, $match);
$fs = $match['0'];
$fskey = $match['2'];
foreach ($fs as $key => $value) {
$fileurl = "";
$contents = str_replace("$value", $fileurl, $contents);
}
$pic = rtrim($pic, ",");
$pic = explode(",", $pic);
if (count($pic) == 3) {
$pic = iserializer($pic);
} else {
$pic = null;
}
$data = array(
'title' => $con['0']['title'],
'contents' => $contents,
'desc' => $con['0']['desc'],
'pic' => $pic,
'vid' => $con['0']['video'],
'thumb' => $_W['siteroot'] . $con['0']['logo'],
'nickname' => $con['0']['nickname']
);
return $data;
}
}