JAVA发送HTTP请求
首先,向一个Web站点发送POST请求只需要简单的几步:
注意,这里不需要导入任何第三方包
package com.test; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.net.URL; import java.net.URLConnection; public class TestPost { public static void testPost() throws IOException { /** * 首先要和URL下的URLConnection对话。 URLConnection可以很容易的从URL得到。比如: // Using * java.net.URL and //java.net.URLConnection */ URL url = new URL("http://www.faircanton.com/message/check.asp"); URLConnection connection = url.openConnection(); /** * 然后把连接设为输出模式。URLConnection通常作为输入来使用,比如下载一个Web页。 * 通过把URLConnection设为输出,你可以把数据向你个Web页传送。下面是如何做: */ connection.setDoOutput(true); /** * 最后,为了得到OutputStream,简单起见,把它约束在Writer并且放入POST信息中,例如: ... */ OutputStreamWriter out = new OutputStreamWriter(connection .getOutputStream(), "8859_1"); out.write("username=kevin&password=*********"); //post的关键所在! // remember to clean up out.flush(); out.close(); /** * 这样就可以发送一个看起来象这样的POST: * POST /jobsearch/jobsearch.cgi HTTP 1.0 ACCEPT: * text/plain Content-type: application/x-www-form-urlencoded * Content-length: 99 username=bob password=someword */ // 一旦发送成功,用以下方法就可以得到服务器的回应: String sCurrentLine; String sTotalString; sCurrentLine = ""; sTotalString = ""; InputStream l_urlStream; l_urlStream = connection.getInputStream(); // 传说中的三层包装阿! BufferedReader l_reader = new BufferedReader(new InputStreamReader( l_urlStream)); while ((sCurrentLine = l_reader.readLine()) != null) { sTotalString += sCurrentLine + "\r\n"; } System.out.println(sTotalString); } public static void main(String[] args) throws IOException { testPost(); } }
执行后返回的HTML
<html> <head> <meta http-equiv="Content-Type" content="text/html; charset=gb2312" /> <title>账户已经冻结</title> <style type="text/css"> <!-- .temp { font-family: Arial, Helvetica, sans-serif; font-size: 14px; font-weight: bold; color: #666666; margin: 10px; padding: 10px; border: 1px solid #999999; } .STYLE1 {color: #FF0000} --> </style> </head> <body> <p> </p> <p> </p> <p> </p> <table width="700" border="0" align="center" cellpadding="0" cellspacing="0" class="temp"> <tr> <td width="135" height="192"><div align="center"><img src="images/err.jpg" width="54" height="58"></div></td> <td width="563"><p><span class="STYLE1">登录失败</span><br> <br> 您的帐户活跃指数低于系统限制,您的帐户已经被暂时冻结。<br> 请您联系网络主管或者人事主管重新激活您的帐户。</p> </td> </tr> </table> <p> </p> </body> </html>
一些Web站点用POST形式而不是GET,这是因为POST能够携带更多的数据,而且不用URL,这使得它看起来不那么庞大。使用上面列出的大致的代码,Java代码可以和这些站点轻松的实现对话。
得到html以后,分析内容就显得相对轻松了。现在就可以使用htmlparser了,下面是一个简单的示例程序,过多的解释我就不说了,相信代码能够说明一切的!
package com.test; import org.htmlparser.Node; import org.htmlparser.NodeFilter; import org.htmlparser.Parser; import org.htmlparser.filters.TagNameFilter; import org.htmlparser.tags.TableTag; import org.htmlparser.util.NodeList; /** * 标题:利用htmlparser提取网页纯文本的例子 */ public class TestHTMLParser { public static void testHtml() { try { String sCurrentLine; String sTotalString; sCurrentLine = ""; sTotalString = ""; java.io.InputStream l_urlStream; java.net.URL l_url = new java.net.URL("http://www.ideagrace.com/html/doc/2006/07/04/00929.html"); java.net.HttpURLConnection l_connection = (java.net.HttpURLConnection) l_url.openConnection(); l_connection.connect(); l_urlStream = l_connection.getInputStream(); java.io.BufferedReader l_reader = new java.io.BufferedReader(new java.io.InputStreamReader(l_urlStream)); while ((sCurrentLine = l_reader.readLine()) != null) { sTotalString += sCurrentLine+"\r\n"; // System.out.println(sTotalString); } String testText = extractText(sTotalString); System.out.println( testText ); } catch (Exception e) { e.printStackTrace(); } } public static String extractText(String inputHtml) throws Exception { StringBuffer text = new StringBuffer(); Parser parser = Parser.createParser(new String(inputHtml.getBytes(),"GBK"), "GBK"); // 遍历所有的节点 NodeList nodes = parser.extractAllNodesThatMatch(new NodeFilter() { public boolean accept(Node node) { return true; } }); System.out.println(nodes.size()); //打印节点的数量 for (int i=0;i<nodes.size();i++){ Node nodet = nodes.elementAt(i); //System.out.println(nodet.getText()); text.append(new String(nodet.toPlainTextString().getBytes("GBK"))+"\r\n"); } return text.toString(); } public static void test5(String resource) throws Exception { Parser myParser = new Parser(resource); myParser.setEncoding("GBK"); String filterStr = "table"; NodeFilter filter = new TagNameFilter(filterStr); NodeList nodeList = myParser.extractAllNodesThatMatch(filter); TableTag tabletag = (TableTag) nodeList.elementAt(11); } public static void main(String[] args) throws Exception { // test5("http://www.ggdig.com"); testHtml(); } }
相关推荐
knightwatch 2020-07-19
标题无所谓 2020-03-23
似水流年梦 2020-03-04
Guanjs0 2020-11-09
wmsjlihuan 2020-09-15
shishengsoft 2020-09-15
poplpsure 2020-08-17
CyborgLin 2020-08-15
Richardxx 2020-07-26
sunnyhappy0 2020-07-26
wcqwcq 2020-07-04
chichichi0 2020-06-16
YAruli 2020-06-13
JF0 2020-06-13
84423067 2020-06-12
心丨悦 2020-06-11
zkwgpp 2020-06-04
stoneechogx 2020-06-04