HttpClient 模拟登录并解析网页数据
//Post Method 例子(模拟登录) public class PostExample { private static HttpClient client; private static Cookie[] cookies; private static String WEB_SITE = "http://www.xxx.cn"; private static String LOGIN = "http://www.xxx.cn/login.html"; private static String INDEX = "http://www.xxx.cn/index.html"; private static int WEB_PORT = 80; private static String USER_NAME = "username"; private static String PASSWORD = "password"; static { client = new HttpClient(); client.getHttpConnectionManager().getParams().setSoTimeout(15000); client.getHttpConnectionManager().getParams().setConnectionTimeout(15000); cookies = client.getState().getCookies(); } private static void testLogin() { client.getHostConfiguration().setHost(WEB_SITE, WEB_PORT); GetMethod get = new GetMethod(LOGIN); GetExample.processGet(client, get, cookies, false, false); PostMethod post = new PostMethod(LOGIN); NameValuePair[] params = new NameValuePair[] { new NameValuePair("email", USER_NAME), new NameValuePair("pass", PASSWORD), new NameValuePair("remember", "1"), new NameValuePair("goto", "/index.html") }; processPost(client, post, params, cookies, false, false); Header header = post.getResponseHeader("location"); String url = header.getValue(); if (url.equals(INDEX)) { System.out.println("登录成功!"); } else { System.out.println("登录失败,请检查请求参数以及url是否正确..."); return; } get = new GetMethod(url); String result = GetExample.processGet(client, get, cookies, false, true); //将得到的结果集写到文件里 String filePath = Util.writerFile(result); //解析html //ParserExample.parserHtml(filePath); } public static String processPost(HttpClient client, PostMethod post, NameValuePair[] params, Cookie[] cookies, boolean needAppendCookies, boolean needResponse) { try { post.setRequestHeader("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.2.10) Gecko/20100914 Firefox/3.6.10"); if (params != null && params.length > 0) { post.setRequestBody(params); } if (cookies != null) { post.setRequestHeader("cookie", cookies.toString()); } // 设置post方法请求超时为 10秒 post.getParams().setParameter(HttpMethodParams.SO_TIMEOUT, 10000); client.executeMethod(post); if (needAppendCookies) { cookies = client.getState().getCookies(); client.getState().addCookies(cookies); } if (needResponse) { return post.getResponseBodyAsString(); } } catch (HttpException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { client.getParams().clear(); post.releaseConnection(); try { Thread.sleep(3000); } catch (InterruptedException e) { e.printStackTrace(); } } return null; } public static void main(String[] args) { testLogin(); } }
//将get,post响应的result写到文件里 public class Util { public static String writerFile(String result) { File file = new File("d:\\" + UUID.randomUUID() + ".html"); byte[] bytes = new byte[1024 * 3]; bytes = result.getBytes(); FileOutputStream fos; try { fos = new FileOutputStream(file); fos.write(bytes, 0, bytes.length); fos.flush(); fos.close(); return file.getAbsolutePath(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return null; } }
//以htmlparser和正则两种方式解析得到网页上的内容 public class ParserExample { public static void parserHtml(String filePath) { parserName(filePath); parserMemberCount(); } //解析会员名 private static void parserName(String filePath) { try { Parser parser = new Parser(filePath); parser.setEncoding("gbk"); NodeFilter filter = new AndFilter(new TagNameFilter("a"), new HasAttributeFilter("href", "http://www.xxx.cn/member/index.html")); NodeList nodeList = parser.extractAllNodesThatMatch(filter); if (nodeList != null) { LinkTag node = (LinkTag) nodeList.elementAt(0); String name = node.getChildren().elementAt(0).toPlainTextString(); if (name == null) { System.out.println("解析姓名出错,请检查网页结构是否发生变化"); } else { System.out.println("姓名:" + name); } } } catch (ParserException e) { e.printStackTrace(); } } //解析网站会员数 private static void parserMemberCount() { HttpClient client = new HttpClient(); Cookie[] cookies = client.getState().getCookies(); client.getHostConfiguration().setHost("http://www.xxx.cn", 80); GetMethod get = new GetMethod("http://www.xxx.cn/ajax/memberCount.html"); String result = GetExample.processGet(client, get, cookies, false, true); Pattern pattern = Pattern.compile("var value=(.*?);"); Matcher matcher = pattern.matcher(result); if (matcher.find()) { String memberCount = matcher.group(1); System.out.println("会员:" + memberCount); } } }
相关推荐
似水流年梦 2019-12-19
84487600 2020-08-16
似水流年梦 2020-08-09
knightwatch 2020-07-26
fengchao000 2020-06-16
标题无所谓 2020-06-14
sicceer 2020-06-12
yanghui0 2020-06-09
yanghui0 2020-06-09
创建一个 HttpClient 实例,这个实例需要调用 Dispose 方法释放资源,这里使用了 using 语句。接着调用 GetAsync,给它传递要调用的方法的地址,向服务器发送 Get 请求。
wanghongsha 2020-06-04
jiaguoquan00 2020-05-26
zhaolisha 2020-05-16
wanghongsha 2020-05-05
wanghongsha 2020-04-14
knightwatch 2020-04-11
hygbuaa 2020-03-27