返回列表 发帖

[讨论] [ASP]起点文学采集入库程序

   注意:
      1、本程序在XP下调试会出现无法入库的情况,特此说明!
      2、本程序目前只支持单ID采集,多ID采集的功能如果您有兴趣,可以自己加上去。
<!--#include file="conn.asp" -->
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<meta http-equiv="Content-Type" c>
<meta http-equiv="Content-Language" c>
<title>文章采集分析程序</title>
<style type="text/css">
.style1 {
font-size: small;
}
.style2 {
text-align: center;
}
.newStyle1 {
border: 1px solid #00CC00;
}
</style>
</head>
<body>
<%
'根据关键字符串得到内容
Public Function FindData(str,strStart,strEnd)
Str1=Right(Str,Len(Str)-Instr(Lcase(Str),Lcase(strstart))-Len(strStart)+1)
Str1=Left(str1,Instr(Lcase(str1),Lcase(strEnd))-1)
Finddata=Str1
End function
'利用流进行中文编码
Private Function BytesToBstr(strBody)
Set ADOS = Server.CreateObject("ADODB.Stream")
        Dim Bdat
        Bdat=strBody
        ADOS.Type = 1
        ADOS.Mode =3
        ADOS.Open
        ADOS.Write Bdat
        ADOS.Position = 0
        ADOS.Type = 2
        ADOS.Charset = "GB2312"
        BytesToBstr = ADOS.ReadText  
        ADOS.Close
End Function
'利用OXML得到数据
Public Function GetData(strURL)
        'on error resume next  
        SourceCode = OXML.open ("GET",strURL,False)
        OXML.send()  
        if OXML.readystate<>4 then exit function
        GetData = BytesToBstr(OXML.responseBody)
        if err.number<>0 then err.Clear
End Function
Dim Content,strStart,strEnd
Dim strTitle,strResult,strClass,strAuthor,strPicture
if Request("Action")="Analyse" Then
Server.ScriptTimeOut=999999999
BookID=CLng(Trim(Request("BookID")))
'链接到书页,得到源文件
strBookPage="http://www.cmfu.com/showbook.asp?bl_id=" & BookID
Set OXML = server.CreateObject("Microsoft.XMLHTTP")
Content=GetData(strBookPage)
Set OXML=Nothing
'标题
strStart="<td colspan=2 class=zt valign=top align=center width=100% ><h1 style=""FONT-SIZE: 18.5pt;color:#FF6666;font-family:隶书"">"
strEnd="</h1></td>"
strTitle=Finddata(Content,strStart,strEnd)
Response.write "作品:" & strTitle
'得到一般信息
strStart="<td class=xt align=center colspan=2 valign=top style=""border-top: green solid double;border-bottom: green 1px solid;"">"
strEnd="</div></td>"
Result=Finddata(Content,strStart,strEnd)
arrResult=Split(Result,"|")
'分类
strClass=Finddata(arrResult(0),"target=""_blank"">","</a>")
Response.write "<br>作品分类:" & strClass
'作者
strAuthor=FindData(arrResult(1),"target=_blank>","</a>")
Response.write "<br>作者:" & strAuthor
'图片
strStart="<td class=xt width=20% align=center valign=top style=""padding-top:15;padding-boottom:5;border-bottom: green 1px solid;"">"
strEnd="width=100 height=125><br>"
strPicture=Finddata(Content,strStart,strEnd)
strPicture=Finddata(strPicture,"<img src="," ")
Response.write "<br>图片:<img src=" & strPicture & ">"
'内容简介
strStart="<div style=""padding-top:5;padding-bottom:5;padding-left:10;padding-right:5;"">"
strEnd="<span style=""color:#0000FF"">[快乐阅读忠告 ]<br>"
strCont=Finddata(Content,strStart,strEnd)
Response.write "<br>内容简介:" & strCont
'基本信息入库处
Set rs=Server.CreateObject("ADODB.RecordSet")
sql="select * from Books where BookID='"&BookID&"'"
rs.open sql,conn,1,3
  Response.write conn.state & "<br><br><br><br>"
if rs.eof and rs.bof Then
  rs.AddNew
  rs("BookID")=BookID
  rs("BookName")=strTitle
  rs("Author")=strAuthor
  rs("BookCont")=strCont
  rs("BookImg")=strPicture
  rs("Class")=strClass
  rs.Update
  Response.write "<b>"&strTitle & "</b>基本信息入库成功!<br>"
Else
  Response.write "本书已录入!"
  Response.End
End if
'进入章节列表,得到源文件
strListPage="http://www.cmfu.com/readbook.asp?bl_id=" & BookID
Set OXML = server.CreateObject("Microsoft.XMLHTTP")
Content=GetData(strListPage)
strStart="<table border=1 cellPadding=0 cellSpacing=0 width=100% valign=top align=center bordercolorlight=""#CCCCCC"" bordercolordark=""#ccffcc"">"
strEnd="<table border=1 cellPadding=0 cellSpacing=0 width=100% valign=top align=center bordercolorlight=""#CCCCCC"" bordercolordark=""#ccffcc"">"
strListCont=strStart & Finddata(Content,strStart,strEnd)
strSplit="<tr height=26 bgcolor=#E3E3E3>"
arrListCont=Split(strListCont,strSplit)
'本书卷数目
intTome=Ubound(arrListCont)
strChapterName=""
strChapterNum=""
For i = 1 to intTome
  '得到卷标题
  strStart="<font color=black>"
  strEnd="   [ <a href='#'"
  strTome=Finddata(arrListCont(i),strStart,strEnd)
  '分卷标题入库
  strSql="Insert Into Tomes (BookID,TomeName) Values ('"&BookID&"','"&strTome&"')"
  conn.Execute(strSql)
  '取卷ID
  Set rs=Server.CreateObject("ADODB.RecordSet")
  sql="select * from Tomes where TomeName='"&strTome&"' and BookID="&BookID
  rs.open sql,conn,1,1
  TomeID=rs("ID")
  rs.close
  Response.write "卷:<b>"&strTome&"</b>入库成功!<br>"
  '反复取得章节名和章节号
  arrListCont(i)=Replace(arrListCont(i),"'","""")
  Do while Instr(arrListCont(i),"<a href=")<>0
   'Response.write Server.HTMLEncode(arrListCont(i)) & "<br><br><br><br>"
   strStart="<td width=""25%"">"
   strEnd="</td>"
   arrListCont(i)=Right(arrListCont(i),Len(arrListCont(i))-Instr(Lcase(arrListCont(i)),Lcase(strstart))-Len(strStart)+1)
   strTemp=Left(arrListCont(i),Instr(Lcase(arrListCont(i)),Lcase(strEnd))-1)
   arrListCont(i)=Right(arrListCont(i),Len(arrListCont(i))-Instr(Lcase(arrListCont(i)),Lcase(strEnd))-Len(strEnd)+1)
   '取章节号,各章节号间用“||||”分隔
   strStart="<a href=""javascript:gotopage("
   strEnd=")"
   strChapterNum=FindData(strTemp,strStart,strEnd)
   strChapterURL="http://Author4.cmfu.com/books/"&BookID&"/" & strChapterNum & ".txt"
   strChapterCont=GetData(strChapterURL)
   strChapterCont=Replace(strChapterCont,"document.write('","")
   strChapterCont=Replace(strChapterCont,"');","")
   strChapterCont=Replace(strChapterCont,"http://www.cmfu.com","http://www.hfdsoft.com.cn")
   strChapterCont=Replace(strChapterCont,"起点中文网","CodeWorld科技")
   strChapterCont=Replace(strChapterCont,"起点","CodeWorld")
   '取章节名,各章节名间用“||||”分隔
   strStart="target=_top>"
   strEnd="</a>"
   strChapterName=FindData(strTemp,strStart,strEnd)
   '章节入库
   strSql="Insert Into Chapters (BookID,TomeID,ChapterName,ChapterCont,ChapterURL)"
   strSql=strSql & " Values ('"&BookID&"','"&TomeID&"','"&strChapterName&"','"&strChapterCont&"','"&strChapterURL&"')"
   conn.Execute(strSql)
   Response.write "章节:<b>"&strChapterName&"</b>入库成功!<br>"
  Loop
Next
End if
%>
<form action="Default.asp?Action=Analyse" method="post">
<table style="width: 500px" align="center">
<tr>
  <td class="style1"><strong>书号:<input name="BookID" type="text" class="newStyle1">  </strong>
  <input name="Submit1" type="submit" value="开始采集" class="newStyle1"></td>
</tr>
<tr>
  <td class="style2">
   </td>
</tr>
<tr>
  <td class="style1"> </td>
</tr>
</table>
</form>
</body>
</html>



[ 本帖最后由 chinanic 于 2007-3-21 15:43 编辑 ]
附件: 您需要登录才可以下载或查看附件。没有帐号?注册
天行健,君子以自强不息
地势坤,君子以厚德载物
黑色海岸线欢迎您

QQ群:7212260
致力于探索WEB技术精髓:http://www.bitechcn.com
点这里加我!

看不懂啊!不会改啊!
加我Q284339234教我把!!

TOP

返回列表 回复 发帖