httpclient获取百度真实url(java)

百度搜索后显示的结果中url是临时的,我们想要的是点进去的浏览器地址栏的那个网址

如:

https://www.baidu.com/link?url=a2VZ4Xp9ukhitVl8mvA9gjMEQXz02cI51SVwYmtbFaLv61OTHeSzmrk0CkX-UN6tqAvrvpGTppbygbyuRyXf0Y__Jy404M0S4-aJ1b_DSDS&wd=&eqid=8761145d00014ab10000000359549e56

转换为

http://muzhi.baidu.com/question/1366450327137412899.html

需要的jar包:

commons-codec-1.6.jar

commons-httpclient-3.1.jar

commons-logging.jar

--------------------------------------代码

packagehttpclient;

importjava.io.IOException;

importorg.apache.commons.httpclient.HttpClient;

importorg.apache.commons.httpclient.HttpException;

importorg.apache.commons.httpclient.HttpStatus;

importorg.apache.commons.httpclient.methods.GetMethod;

publicclassHttpClient_Get_Url{

/**

*根据百度url,获取原本url

*@throwsIOException

*@throwsHttpException

**/

publicstaticStringGetTrueUrlByBaiduUrl(Stringbaidu_url)throwsHttpException,IOException{

//---------------------------1

HttpClientclient=newHttpClient();

//设置代理IP

//client.getHostConfiguration().setProxy("172.22.40.20",8080);

GetMethodgetMethod=newGetMethod(baidu_url);

//获取状态码

intstateCode=client.executeMethod(getMethod);

Stringtext=getMethod.getResponseBodyAsString();

//释放

getMethod.releaseConnection();

if(stateCode==HttpStatus.SC_OK){

text=text.split("URL='")[1].split("'")[0];

//System.out.println("访问成功,网址:"+text);

returntext;

}

returnnull;

}

publicstaticvoidmain(String[]args)throwsHttpException,IOException{

Stringurl="https://www.baidu.com/link?url=a2VZ4Xp9ukhitVl8mvA9gjMEQXz02cI51SVwYmtbFaLv61OTHeSzmrk0CkX-UN6tqAvrvpGTppbygbyuRyXf0Y__Jy404M0S4-aJ1b_DSDS&wd=&eqid=8761145d00014ab10000000359549e56";

System.out.println(GetTrueUrlByBaiduUrl(url));;

}

}

相关推荐