'; $data=$info; $queryURL=$caijiurl[$i]; set_time_limit(0); vendor('phpQuery.phpQuery'); $cip = '123.125.68.'.mt_rand(0,254); $xip = '125.90.88.'.mt_rand(0,254); //user_agent $useragent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)"; //伪造header $header = array('Accept-Language: zh-cn','Connection: Keep-Alive','Cache-Control: no-cache','CLIENT-IP:'.$cip, 'X-FORWARDED-FOR:'.$xip); $ch = curl_init(); curl_setopt($ch, CURLOPT_REFERER, $queryURL); curl_setopt($ch,CURLOPT_HTTPHEADER,$header); curl_setopt ( $ch , CURLOPT_TIMEOUT , 60 ); curl_setopt($ch, CURLOPT_USERAGENT, $useragent); curl_setopt($ch, CURLOPT_URL,$queryURL); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); $result = curl_exec($ch); curl_close($ch); if($info['charsets']=='gb2312') $result=iconv("UTF-8", "GB2312//IGNORE",$result); $result=str_ireplace('', ' ', $result); \phpQuery::newDocumentHTML($result,'utf-8'); /*title*/ if($data['content_title_html']==0) { $title=pq('title')->html(); $title=_strip_tags(array('div','ins','ul','script','object','li','span','iframe','style'),str_replace($data['content_title_filter'], "", $title)); } else { if($data['content_title_html']=='id') { $title=pq('#'.$data['content_title_name'])->html(); $title=strip_tags(array('div','ins','ul','script','object','li','span','iframe','style'),str_replace($data['content_title_filter'], "", $title)); } else { $title=pq('.'.$data['content_title_name'])->html(); $title=strip_tags(array('div','ins','ul','script','object','li','span','iframe','style'),str_replace($data['content_title_filter'], "", $title)); } } /*title*/ /*keywords*/ if($data['content_keywords_html']==0) { $keywords=pq($data['content_keywords_name'])->attr('content'); $keywords=_strip_tags(array('div','ins','ul','script','object','li','span','iframe','style'),str_replace($data['content_keywords_filter'], "", $keywords)); } else { if($data['content_keywords_html']=='id') { $keywords=pq('#'.$data['content_keywords_name'])->html(); $keywords=strip_tags(array('div','ins','ul','script','object','li','span','iframe','style'),str_replace($data['content_keywords_filter'], "", $title)); } else { $keywords=pq('.'.$data['content_keywords_name'])->html(); $keywords=strip_tags(array('div','ins','ul','script','object','li','span','iframe','style'),str_replace($data['content_keywords_filter'], "", $title)); } } /*keywords*/ /*description*/ if($data['content_description_html']==0) { $description=pq($data['content_description_name'])->attr('content'); $description=_strip_tags(array('div','ins','ul','script','object','li','span','iframe','style'),str_replace($data['content_description_filter'], "", $description)); } else { if($data['content_description_html']=='id') { $description=pq('#'.$data['content_description_name'])->html(); $description=_strip_tags(array('div','ins','ul','script','object','li','span','iframe','style'),str_replace($data['content_description_filter'], "", $description)); } else { $description=pq('.'.$data['content_description_name'])->html(); $description=_strip_tags(array('div','ins','ul','script','object','li','span','iframe','style'),str_replace($data['content_description_filter'], "", $description)); } } /*description*/ /*来源*/ if($data['content_source_html']=='id') { $source=pq('#'.$data['content_source_name'])->html(); $source=_strip_tags(array('div','ins','ul','script','object','li','span','iframe','style'),str_replace($data['content_source_filter'], "", $source)); } else if($data['content_source_html']=='class') { $source=pq('.'.$data['content_source_name'])->html(); $source=_strip_tags(array('div','ins','ul','script','object','li','span','iframe','style'),str_replace($data['content_source_filter'], "", $source)); } else { $source=''; } /*来源*/ /*正文内容*/ if($data['content_contenttext_html']=='id') { $content=pq('#'.$data['content_contenttext_name'])->html(); $content=_strip_tags(array('div','ins','ul','script','object','li','span','iframe','style'),str_replace($data['content_contenttext_filter'], "", $content)); } else { $content=pq('.'.$data['content_contenttext_name'])->html(); $content=_strip_tags(array('div','ins','ul','script','object','li','span','iframe','style'),str_replace($data['content_contenttext_filter'], "",$content)); } $content=htmlspecialchars(addslashes($content)); if($info['mypic']==1) { $content=auto_save_image(stripslashess($content)); $content=htmlspecialchars(addslashes($content)); } $c=S('config'); if($info['mylink']==1) { $content=Replace_Links(stripslashess($content),array($c['web_url'],$c['web_m_url'])); $content=htmlspecialchars(addslashes($content)); } $data=''; if($info['mypic_one']==1) { $picarray=auto_return_image(stripslashess($content)); // print_r($picarray); //exit(); $litpic=$picarray[0]; } /*正文内容*/ if($title!='') { $check=M('article_content')->where(array('title'=>$title))->find(); if(!$check) { $odata=''; $odata['title']=$title; $odata['litpic']= $litpic; $odata['keywords']=$keywords; $odata['description']=$description; $odata['source']=$source; $odata['time']=time(); $odata['typeid']=$info['typeid']; $odata['content']=$content; $odata['project_id']=$info['id']; M('collect_content')->add($odata); echo '采集 '.$title.'成功'; } else { echo '采集 '.$title.'失败,标题重复!'; } } else { echo '采集失败,标题为空,请检查本项目的采集规则!'; } $i++; if($i>$c_caijiurl) { S('caijiurl',null); M('collect_project')->where(array('id'=>$info['id']))->save(array('edit_time'=>time())); die('
采集完毕,全部内容已经采入临时库!'); } }