www.xuxule.com 全站采集程序
www.xuxule.com 全站采集程序
把歌手。歌手档案。专集。介绍图片等下载来。
还有歌词。这网站没有音乐文件的。
<?PHP
define('ROOT_PATH', './');
require_once(ROOT_PATH.'Includes/Global.php');
set_time_limit(0);
#把下边的数据先导进数据库。再设置xuxule2.php里边放图片的目录就行了。先运行本程序。本程序先把专集列表抓下来。再更新专集里边其他信息及抓取歌词。你可以把2个文件合成一个。
#本程序随便写的。你需要把类改成MYSQL直接执行方式就行了。其他不用改。请有空的帮处理一下。可以随便用。没版权什么之说。
/*
CREATE TABLE `xl_lyric` (
`lyricID` int(10) unsigned NOT NULL auto_increment,
`Name` varchar(120) NOT NULL default '',
`songerID` int(10) unsigned NOT NULL default '0',
`songerName` varchar(50) NOT NULL default '',
`specialID` int(10) unsigned NOT NULL default '0',
`specialName` varchar(120) NOT NULL default '',
`lyric` text NOT NULL,
`createDate` int(10) unsigned NOT NULL default '0',
UNIQUE KEY `lyricID` (`lyricID`)
) TYPE=MyISAM AUTO_INCREMENT=1 ;
CREATE TABLE `xl_songer` (
`songerID` smallint(6) unsigned NOT NULL auto_increment,
`songerName` varchar(50) NOT NULL default '',
`enName` varchar(100) NOT NULL default '',
`str` varchar(50) NOT NULL default '',
`specialNum` tinyint(4) unsigned NOT NULL default '0',
`musicNum` smallint(6) unsigned NOT NULL default '0',
`description` varchar(255) NOT NULL default '',
`typeID` tinyint(2) unsigned NOT NULL default '0',
`typeName` varchar(20) NOT NULL default '',
`firstP` char(2) NOT NULL default 'a',
`usersID` smallint(6) unsigned NOT NULL default '0',
`viewNum` int(10) unsigned NOT NULL default '0',
`vote` mediumint(8) unsigned NOT NULL default '0',
`commentNum` smallint(6) unsigned NOT NULL default '0',
`hotNum` int(11) unsigned NOT NULL default '0',
`isPublic` enum('y','n') NOT NULL default 'y',
`createDate` int(11) unsigned NOT NULL default '0',
UNIQUE KEY `songerID` (`songerID`),
KEY `songerID_2` (`songerID`),
KEY `songerName` (`songerName`)
) TYPE=MyISAM AUTO_INCREMENT=1 ;
CREATE TABLE `xl_special` (
`specialID` mediumint(8) unsigned NOT NULL auto_increment,
`typeID` tinyint(2) unsigned NOT NULL default '0',(来源www.iocblog.net)
`typeName` varchar(20) NOT NULL default '',
`specialName` varchar(100) NOT NULL default '',
`enName` varchar(255) NOT NULL default '',
`str` varchar(30) NOT NULL default '',
`description` text NOT NULL,
`songerName` varchar(50) NOT NULL default '',
`songerID` smallint(6) unsigned NOT NULL default '0',
`language` varchar(50) NOT NULL default '',
`date` varchar(30) NOT NULL default '',
`company` varchar(200) NOT NULL default '',
`musicNum` tinyint(3) unsigned NOT NULL default '0',
`viewNum` int(10) unsigned NOT NULL default '0',
`picture` varchar(200) NOT NULL default '',
`vote` mediumint(8) unsigned NOT NULL default '0',
`commentNum` smallint(6) unsigned NOT NULL default '0',
`createDate` int(11) unsigned NOT NULL default '0',
UNIQUE KEY `specialID_2` (`specialID`),
KEY `specialID` (`specialID`),
KEY `specialName` (`specialName`)
) TYPE=MyISAM AUTO_INCREMENT=1 ;
*/
for ($i=1989;$i<=2005;$i++)//1989-2005
{
$url = "http://www.xuxule.com/update.asp?y=".$i;
$File = @file_Get_contents($url);
if ( $File )
{
preg_match("/共有<b>(\d+)<\/b>页/",$makeFile,$pages);#page num
for ($p=1;$p<=$pages[1];$p++)
{
$url = "http://www.xuxule.com/update.asp?y=".$i."&page=".$p;
$File = @file_Get_contents($url);
if ( $File )
{
preg_match_all("/cd\.asp\?cdID=(.{1,30})><b>(.{1,100})<\/b><\/a>/",$makeFile,$name);#special name
preg_match_all("/singer_cd\.asp\?pyab=(.{1,30})>(.{1,50})<\/a>/",$makeFile,$singer);#singer
preg_match_all("/<font color=\"#666666\">([0-9]{1,4})<\/font>/",$makeFile,$num);#music num
for ($j=0;$j<count($name[1]);$j++)
{
#get singer ID
$SQL = " SELECT songerID FROM xl_songer WHERE songerName='".addslashes($singer[2][$j])."' ";
$Row = $DB->queryFirstRow($SQL);
if ( $Row['songerID'] == 0 )
{
$SQL = " INSERT INTO xl_songer SET songerName='".addslashes($singer[2][$j])."',str='".$singer[1][$j]."',musicNum='".$num[1][$j]."',specialNum='1',createDate='".time()."' ";
$DB->query($SQL);
$songerID = $DB->_GetInsertID();
}
else
{
$SQL = " UPDATE xl_songer SET musicNum=musicNum+".$num[1][$j].",specialNum=specialNum+1 WHERE songerName='".addslashes($songerName[1][$j])."' ";
$DB->query($SQL);
$songerID = $Row['songerID'];
}
#insert special
$SQL = " INSERT INTO xl_special SET str='".$name[1][$j]."',specialName='".addslashes($name[2][$j])."',songerID='$songerID',songerName='".addslashes($singer[2][$j])."',musicNum='".$num[1][$j]."',createDate='".time()."' ";
$DB->query($SQL);
UpdateOrderID('special');#get special last ID
$specialID = $lastOrderID;
echo $SQL."<BR><bR>";
}
}
}
}
}
?>
<?PHP
define('ROOT_PATH', './');
require_once(ROOT_PATH.'Includes/Global.php');
set_time_limit(0);
$pic_dir = ROOT_PATH.'xuxule_pic/';
#save special picture
#此处要注意。保存图片必须设置来源。否则不能下载。
function sava_pic($filename='',$str='')
{
global $pic_dir;
$server = 'www.xuxule.com';
$host = 'www.xuxule.com';
$port = 80;
$target = "/".$filename;
$referer = 'http://'.$host.$target; // Referer
$File = fsockopen($server, $port, $errno, $errstr, 30);
if (!$File)
{
echo "$errstr ($errno)<br />\n";
return '';
}
else
{
$out = "GET $target HTTP/1.1\r\n";
$out .= "Host: $host\r\n";
$out .= "Referer: $referer\r\n";
$out .= "Connection: Close\r\n\r\n";
fputs($File, $out);
$makeFile = $buffer = "";
while ($buffer = fread($File,4096))
{
$makeFile = $makeFile.$buffer;
}
fclose($File);
preg_match("/Content-Length: (\d+)(.*)/s",$makeFile,$files);
$files[2] = ltrim($files[2]);
$pic_name = time()."_".$str.".jpg";
$Filename = $pic_dir.$pic_name;
$fp = fopen ($Filename,'w+');
fwrite($fp,$files[2]);
fclose($fp);
return $pic_name;
}
}
$SQL = " SELECT * FROM xl_special where date='' ORDER BY specialID DESC ";//05-0248
$Result = $DB->query($SQL);
while ( $Row = $DB->queryArray($Result) )
{
$url = "http://www.xuxule.com/cd.asp?cdID=".$Row['str'];
$File = @file_Get_contents($url);
if ( $File )
{
#save picture
preg_match("/width=\"70\"(.*)border=1/s",$File,$pic2);
preg_match("/href=(.{1,110}) target=_blank/",$pic2[1],$pic);
$pic = sava_pic($pic[1],$Row['str']);
preg_match("/专辑介绍(.*)<td> <\/td>/s",$File,$description2);#description
preg_match("/<TD>(.*)<\/TD>/s",$description2[1],$description);#description
preg_match("/出版年月(.*)<TD height=\"20\">&/s",$File,$date3);#date
$st = preg_match("/:(.*)<TD height=\"20/s",$date3[1],$date2);#date
if ( $st == 1 )
{
preg_match("/(.*)<\/TD>/s",$date2[1],$date);#date
$date[1] = trim($date[1]);
}
else
{
preg_match("/:(.*)<\/TD>/s",$date3[1],$datex);#date
$date[1] = trim($datex[1]);
}
preg_match("/发行公司:(.{1,100})<\/TD>/",$File,$company);#company
preg_match("/专辑类别:(.{1,100})<\/TD>/",$File,$language);#language
preg_match("/专辑曲目(.*)b_back/s",$File,$music2);#music num
preg_match_all("/&songNo=(\d+) class=c>(.{1,100})<\/a>/",$music2[1],$music);#music num
preg_match_all("/class=nolyric>(.{1,100})<\/span>/",$music2[1],$music_no);#music num
for ( $i=0;$i<count($music[1]);$i++)
{
$SQL = " INSERT INTO xl_lyric SET Name='".addslashes(trim($music[2][$i]))."',specialID='$Row[specialID]',specialName='".addslashes($Row[specialName])."',songerName='".addslashes($Row[songerName])."',songerID='$Row[songerID]',createDate='".time()."' ";
$DB->query($SQL);
$lyricID = $DB->_GetInsertID();
$lyricFile = @file_Get_contents("http://www.xuxule.com/lyric.asp?cdID=$Row[str]&songNo=".$music[1][$i]);
if ( $lyricFile )
{
preg_match("/width=500>(.*)<td height=\"30/s",$lyricFile,$lyric3);#lyric
preg_match("/align=left>(.*)<\/tbody>/s",$lyric3[1],$lyric2);#lyric
preg_match("/(.*)<\/tr>/s",$lyric2[1],$lyric);#lyric
$lyric[1] = str_replace("</td>","",$lyric[1]);
$SQL = " UPDATE xl_lyric SET lyric='".addslashes(trim($lyric[1]))."' WHERE lyricID='$lyricID' ";
$DB->query($SQL);
}
}(来源www.iocblog.net)
for ( $j=0;$j<count($music_no[1]);$j++)
{
$SQL = " INSERT INTO xl_lyric SET Name='".addslashes(trim($music_no[1][$j]))."',specialID='$Row[specialID]',specialName='".addslashes($Row[specialName])."',songerName='".addslashes($Row[songerName])."',songerID='$Row[songerID]',createDate='".time()."' ";
$DB->query($SQL);
}
#insert into SQL
$SQL = " UPDATE xl_special SET picture='$pic',
description='".addslashes($description[1])."',
company='".addslashes(trim($company[1]))."',
date='".addslashes(trim($date[1]))."',
language='".addslashes(trim($language[1]))."'
WHERE specialID='$Row[specialID]' ";
$DB->query($SQL);
echo $SQL."<BR><bR>";
}
}
?>
Tag: 采集程序
文章整理:iocblog
版权申明:本站文章均来自网络,如有侵权,请联系我们,我们收到后立即删除,谢谢!
特别注意:本站所有转载文章言论不代表本站观点,本站所提供的摄影照片,插画,设计作品,如需使用,请与原作者联系,版权归原作者所有。