了解HttpClient、httpclient获取指定的网页
引用参考:
--HttpClient超时设置详解
http://blog.csdn.net/u011191463/article/details/78664896
--HttpClient4.5版本设置连接超时时间
https://my.oschina.net/wallechen/blog/526642
--HttpClientPoolingClientConnectionManager参数含义
http://geniuszhe.blog.163.com/blog/static/11934682014102521241984/
package com.tender.news.crawler; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler; import org.apache.commons.httpclient.Header; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.HttpMethod; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.params.HttpMethodParams; public class HttpClientGet{ public String getHtml(String url){ String result=""; // String url="http://www.ahbc.com.cn/bulletin.aspx?ID=1"; //构造HttpClient的实例 HttpClient httpClient = new HttpClient(); //创建GET方法的实例 GetMethod getMethod = new GetMethod(url); //使用系统提供的默认的恢复策略 getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler()); //定义一个输入流 InputStream ins = null; //定义文件流 BufferedReader br =null; try { //执行getMethod int statusCode = httpClient.executeMethod(getMethod); if (statusCode != HttpStatus.SC_OK) { System.err.println("方法失败: "+ getMethod.getStatusLine()); } //使用getResponseBodyAsStream读取页面内容,这个方法对于目标地址中有大量数据需要传输是最佳的。 ins = getMethod.getResponseBodyAsStream(); String charset = getMethod.getResponseCharSet(); System.out.println("编码是?"+charset); if(charset.toUpperCase().equals("ISO-8859-1")){ charset = "gbk"; } //按服务器编码字符集构建文件流,这里的CHARSET要根据实际情况设置 br = new BufferedReader(new InputStreamReader(ins,getMethod.getResponseCharSet())); StringBuffer sbf = new StringBuffer(); String line = null; while ((line = br.readLine()) != null) { sbf.append(line); } result = new String(sbf.toString().getBytes(getMethod.getResponseCharSet()),charset); //输出内容 // System.out.println(result); //服务器编码 System.out.println("服务器编码是:"+getMethod.getResponseCharSet()); } catch (HttpException e) { //发生致命的异常,可能是协议不对或者返回的内容有问题 System.out.println("请检查您所提供的HTTP地址!"); e.printStackTrace(); } catch (IOException e) { //发生网络异常 e.printStackTrace(); } finally { //关闭流,释放连接 try { ins.close(); br.close();} catch (IOException e) { e.printStackTrace(); } getMethod.releaseConnection(); } return result; } }
相关推荐
84487600 2020-08-16
似水流年梦 2020-08-09
knightwatch 2020-07-26
fengchao000 2020-06-16
标题无所谓 2020-06-14
sicceer 2020-06-12
yanghui0 2020-06-09
yanghui0 2020-06-09
创建一个 HttpClient 实例,这个实例需要调用 Dispose 方法释放资源,这里使用了 using 语句。接着调用 GetAsync,给它传递要调用的方法的地址,向服务器发送 Get 请求。
wanghongsha 2020-06-04
jiaguoquan00 2020-05-26
zhaolisha 2020-05-16
wanghongsha 2020-05-05
wanghongsha 2020-04-14
knightwatch 2020-04-11
hygbuaa 2020-03-27
zergxixi 2020-03-24