httpclient获取百度真实url(java)
百度搜索后显示的结果中url是临时的,我们想要的是点进去的浏览器地址栏的那个网址
如:
https://www.baidu.com/link?url=a2VZ4Xp9ukhitVl8mvA9gjMEQXz02cI51SVwYmtbFaLv61OTHeSzmrk0CkX-UN6tqAvrvpGTppbygbyuRyXf0Y__Jy404M0S4-aJ1b_DSDS&wd=&eqid=8761145d00014ab10000000359549e56
转换为
http://muzhi.baidu.com/question/1366450327137412899.html
需要的jar包:
commons-codec-1.6.jar
commons-httpclient-3.1.jar
commons-logging.jar
--------------------------------------代码
packagehttpclient;
importjava.io.IOException;
importorg.apache.commons.httpclient.HttpClient;
importorg.apache.commons.httpclient.HttpException;
importorg.apache.commons.httpclient.HttpStatus;
importorg.apache.commons.httpclient.methods.GetMethod;
publicclassHttpClient_Get_Url{
/**
*根据百度url,获取原本url
*@throwsIOException
*@throwsHttpException
**/
publicstaticStringGetTrueUrlByBaiduUrl(Stringbaidu_url)throwsHttpException,IOException{
//---------------------------1
HttpClientclient=newHttpClient();
//设置代理IP
//client.getHostConfiguration().setProxy("172.22.40.20",8080);
GetMethodgetMethod=newGetMethod(baidu_url);
//获取状态码
intstateCode=client.executeMethod(getMethod);
Stringtext=getMethod.getResponseBodyAsString();
//释放
getMethod.releaseConnection();
if(stateCode==HttpStatus.SC_OK){
text=text.split("URL='")[1].split("'")[0];
//System.out.println("访问成功,网址:"+text);
returntext;
}
returnnull;
}
publicstaticvoidmain(String[]args)throwsHttpException,IOException{
Stringurl="https://www.baidu.com/link?url=a2VZ4Xp9ukhitVl8mvA9gjMEQXz02cI51SVwYmtbFaLv61OTHeSzmrk0CkX-UN6tqAvrvpGTppbygbyuRyXf0Y__Jy404M0S4-aJ1b_DSDS&wd=&eqid=8761145d00014ab10000000359549e56";
System.out.println(GetTrueUrlByBaiduUrl(url));;
}
}