<!--#include file="conn.asp" --> <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> <html> <head> <meta http-equiv="Content-Type" c> <meta http-equiv="Content-Language" c> <title>文章采集分析程序</title> <style type="text/css"> .style1 { font-size: small; } .style2 { text-align: center; } .newStyle1 { border: 1px solid #00CC00; } </style> </head> <body> <% '根据关键字符串得到内容 Public Function FindData(str,strStart,strEnd) Str1=Right(Str,Len(Str)-Instr(Lcase(Str),Lcase(strstart))-Len(strStart)+1) Str1=Left(str1,Instr(Lcase(str1),Lcase(strEnd))-1) Finddata=Str1 End function '利用流进行中文编码 Private Function BytesToBstr(strBody) Set ADOS = Server.CreateObject("ADODB.Stream") Dim Bdat Bdat=strBody ADOS.Type = 1 ADOS.Mode =3 ADOS.Open ADOS.Write Bdat ADOS.Position = 0 ADOS.Type = 2 ADOS.Charset = "GB2312" BytesToBstr = ADOS.ReadText ADOS.Close End Function '利用OXML得到数据 Public Function GetData(strURL) 'on error resume next SourceCode = OXML.open ("GET",strURL,False) OXML.send() if OXML.readystate<>4 then exit function GetData = BytesToBstr(OXML.responseBody) if err.number<>0 then err.Clear End Function Dim Content,strStart,strEnd Dim strTitle,strResult,strClass,strAuthor,strPicture if Request("Action")="Analyse" Then Server.ScriptTimeOut=999999999 BookID=CLng(Trim(Request("BookID"))) '链接到书页,得到源文件 strBookPage="http://www.cmfu.com/showbook.asp?bl_id=" & BookID Set OXML = server.CreateObject("Microsoft.XMLHTTP") Content=GetData(strBookPage) Set OXML=Nothing '标题 strStart="<td colspan=2 class=zt valign=top align=center width=100% ><h1 style=""FONT-SIZE: 18.5pt;color:#FF6666;font-family:隶书"">" strEnd="</h1></td>" strTitle=Finddata(Content,strStart,strEnd) Response.write "作品:" & strTitle '得到一般信息 strStart="<td class=xt align=center colspan=2 valign=top style=""border-top: green solid double;border-bottom: green 1px solid;"">" strEnd="</div></td>" Result=Finddata(Content,strStart,strEnd) arrResult=Split(Result,"|") '分类 strClass=Finddata(arrResult(0),"target=""_blank"">","</a>") Response.write "<br>作品分类:" & strClass '作者 strAuthor=FindData(arrResult(1),"target=_blank>","</a>") Response.write "<br>作者:" & strAuthor '图片 strStart="<td class=xt width=20% align=center valign=top style=""padding-top:15;padding-boottom:5;border-bottom: green 1px solid;"">" strEnd="width=100 height=125><br>" strPicture=Finddata(Content,strStart,strEnd) strPicture=Finddata(strPicture,"<img src="," ") Response.write "<br>图片:<img src=" & strPicture & ">" '内容简介 strStart="<div style=""padding-top:5;padding-bottom:5;padding-left:10;padding-right:5;"">" strEnd="<span style=""color:#0000FF"">[快乐阅读忠告 ]<br>" strCont=Finddata(Content,strStart,strEnd) Response.write "<br>内容简介:" & strCont '基本信息入库处 Set rs=Server.CreateObject("ADODB.RecordSet") sql="select * from Books where BookID='"&BookID&"'" rs.open sql,conn,1,3 Response.write conn.state & "<br><br><br><br>" if rs.eof and rs.bof Then rs.AddNew rs("BookID")=BookID rs("BookName")=strTitle rs("Author")=strAuthor rs("BookCont")=strCont rs("BookImg")=strPicture rs("Class")=strClass rs.Update Response.write "<b>"&strTitle & "</b>基本信息入库成功!<br>" Else Response.write "本书已录入!" Response.End End if '进入章节列表,得到源文件 strListPage="http://www.cmfu.com/readbook.asp?bl_id=" & BookID Set OXML = server.CreateObject("Microsoft.XMLHTTP") Content=GetData(strListPage) strStart="<table border=1 cellPadding=0 cellSpacing=0 width=100% valign=top align=center bordercolorlight=""#CCCCCC"" bordercolordark=""#ccffcc"">" strEnd="<table border=1 cellPadding=0 cellSpacing=0 width=100% valign=top align=center bordercolorlight=""#CCCCCC"" bordercolordark=""#ccffcc"">" strListCont=strStart & Finddata(Content,strStart,strEnd) strSplit="<tr height=26 bgcolor=#E3E3E3>" arrListCont=Split(strListCont,strSplit) '本书卷数目 intTome=Ubound(arrListCont) strChapterName="" strChapterNum="" For i = 1 to intTome '得到卷标题 strStart="<font color=black>" strEnd=" [ <a href='#'" strTome=Finddata(arrListCont(i),strStart,strEnd) '分卷标题入库 strSql="Insert Into Tomes (BookID,TomeName) Values ('"&BookID&"','"&strTome&"')" conn.Execute(strSql) '取卷ID Set rs=Server.CreateObject("ADODB.RecordSet") sql="select * from Tomes where TomeName='"&strTome&"' and BookID="&BookID rs.open sql,conn,1,1 TomeID=rs("ID") rs.close Response.write "卷:<b>"&strTome&"</b>入库成功!<br>" '反复取得章节名和章节号 arrListCont(i)=Replace(arrListCont(i),"'","""") Do while Instr(arrListCont(i),"<a href=")<>0 'Response.write Server.HTMLEncode(arrListCont(i)) & "<br><br><br><br>" strStart="<td width=""25%"">" strEnd="</td>" arrListCont(i)=Right(arrListCont(i),Len(arrListCont(i))-Instr(Lcase(arrListCont(i)),Lcase(strstart))-Len(strStart)+1) strTemp=Left(arrListCont(i),Instr(Lcase(arrListCont(i)),Lcase(strEnd))-1) arrListCont(i)=Right(arrListCont(i),Len(arrListCont(i))-Instr(Lcase(arrListCont(i)),Lcase(strEnd))-Len(strEnd)+1) '取章节号,各章节号间用“||||”分隔 strStart="<a href=""javascript:gotopage(" strEnd=")" strChapterNum=FindData(strTemp,strStart,strEnd) strChapterURL="http://Author4.cmfu.com/books/"&BookID&"/" & strChapterNum & ".txt" strChapterCont=GetData(strChapterURL) strChapterCont=Replace(strChapterCont,"document.write('","") strChapterCont=Replace(strChapterCont,"');","") strChapterCont=Replace(strChapterCont,"http://www.cmfu.com","http://www.hfdsoft.com.cn") strChapterCont=Replace(strChapterCont,"起点中文网","CodeWorld科技") strChapterCont=Replace(strChapterCont,"起点","CodeWorld") '取章节名,各章节名间用“||||”分隔 strStart="target=_top>" strEnd="</a>" strChapterName=FindData(strTemp,strStart,strEnd) '章节入库 strSql="Insert Into Chapters (BookID,TomeID,ChapterName,ChapterCont,ChapterURL)" strSql=strSql & " Values ('"&BookID&"','"&TomeID&"','"&strChapterName&"','"&strChapterCont&"','"&strChapterURL&"')" conn.Execute(strSql) Response.write "章节:<b>"&strChapterName&"</b>入库成功!<br>" Loop Next End if %> <form action="Default.asp?Action=Analyse" method="post"> <table style="width: 500px" align="center"> <tr> <td class="style1"><strong>书号:<input name="BookID" type="text" class="newStyle1"> </strong> <input name="Submit1" type="submit" value="开始采集" class="newStyle1"></td> </tr> <tr> <td class="style2"> </td> </tr> <tr> <td class="style1"> </td> </tr> </table> </form> </body> </html> |
欢迎光临 黑色海岸线论坛 (http://bbs.thysea.com/) | Powered by Discuz! 7.2 |