使用fastjson解析json抓取新浪新闻文章

首先看看2个简单的fastjson的使用

例子一

package ivyy.taobao.com.domain.json;

import java.util.Iterator;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;


/**
 * @Author:jilongliang
 * @Date:2014-12-19
 * @Version:1.0
 * @Description:
 */
public class JsonTest1 {
	public static void main(String[] args) {
		 
		//[{"age":22,"sex":"男","userName":"xiaoliang"},{"age":22,"sex":"男","userName":"xiaoliang"}]
		StringBuffer buff=new StringBuffer();
		
		buff.append("[");
			buff.append("{");
				buff.append("'age'").append(":").append("22").append(",");
				buff.append("'sex'").append(":").append("'男'").append(",");
				buff.append("'userName'").append(":").append("'周伯通'").append("");
			buff.append("}");
			buff.append(",");//第一个数组结尾
			buff.append("{");
				buff.append("'age'").append(":").append("22").append(",");
				buff.append("'sex'").append(":").append("'男'").append(",");
				buff.append("'userName'").append(":").append("'令狐冲'").append("");
			buff.append("}");
		buff.append("]");
		
		String  jsonStr=buff.toString();
		
		
		JSONArray jarr=JSONArray.parseArray(jsonStr);//JSON.parseArray(jsonStr);
		for (Iterator iterator = jarr.iterator(); iterator.hasNext();) {
			JSONObject job=(JSONObject)iterator.next();
			String age=job.get("age").toString();
			System.out.println(age);
		}
		
		
	}
}

 例子二

package ivyy.taobao.com.domain.json;

import ivyy.taobao.com.entity.Classz;
import ivyy.taobao.com.entity.Student;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;


/**
 * @Author:jilongliang
 * @Date:2014-12-19
 * @Version:1.0
 * @Description:
 */
public class JsonTest2 {
	public static void main(String[] args) { 
		
		Student stu1=new Student();
		stu1.setAge(22);
		stu1.setUserName("xiaoliang");
		stu1.setSex("男");
		
		Classz claz1=new Classz();
		claz1.getStudents().add(stu1);
		
		String jsonStr=JSON.toJSONString(claz1);
		
		JSONObject jsonObj=new JSONObject();
		
		Object obj=jsonObj.parse(jsonStr);
		
		System.out.println(obj);
		
		
		Classz clz=JSON.parseObject(jsonStr, Classz.class);
		
		Student st=clz.getStudents().get(0);
		System.out.println(st.getSex());
		
		
		
	}
}

 1和2的实体

package ivyy.taobao.com.entity;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;

/**
 *@Author:liangjl
 *@Date:2014-12-19
 *@Version:1.0
 *@Description:
 */
public class Classz implements Serializable{

	private List<Student> students=new ArrayList<Student>();

	public List<Student> getStudents() {
		return students;
	}

	public void setStudents(List<Student> students) {
		this.students = students;
	}
}
package ivyy.taobao.com.entity;

import java.io.Serializable;

/**
 *@Author:liangjl
 *@Date:2014-12-19
 *@Version:1.0
 *@Description:
 */
public class Student implements Serializable{
	private Integer age;
	private String sex;
	private String userName;
	
	public Integer getAge() {
		return age;
	}
	public void setAge(Integer age) {
		this.age = age;
	}
	public String getSex() {
		return sex;
	}
	public void setSex(String sex) {
		this.sex = sex;
	}
	public String getUserName() {
		return userName;
	}
	public void setUserName(String userName) {
		this.userName = userName;
	}
}

例子三、

package ivyy.taobao.com.domain.json;

import ivyy.taobao.com.utils.GlobalConstants;
import ivyy.taobao.com.utils.HttpRequestUtils;

import java.util.Iterator;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;

/**
 *@Author:liangjilong
 *@Date:2015-1-4
 *@Email:[email protected]
 *@Version:1.0
 *@Description这个是通过fastjson处理的
 */
public class SinaNew {
	
	
	public static void main(String[] args) throws Exception {

		String requestURL = GlobalConstants.getUrl(2, "json");

		String jsonText = HttpRequestUtils.HttpURLConnRequest(requestURL, "GET");
		//System.out.println(jsonText);
		
		// 处理页面的json数据
		int start = jsonText.indexOf("(") + 1;
		jsonText = jsonText.substring(start, jsonText.lastIndexOf(")"));

		String result = "";
		JSONObject jsonObject = JSONObject.parseObject(jsonText);
		result = jsonObject.get("result").toString();
		
		JSONObject resObj = JSONObject.parseObject(result);
		// String encoding=resObj.get("encoding").toString();//获取到编码

		String dataStr = resObj.get("data").toString();

		JSONArray dataArr = JSONArray.parseArray(dataStr);
		String title = "", url = "", keywords = "", img = "", media_name = "";
		int i=0;
		 
		for (Iterator iterator = dataArr.iterator(); iterator.hasNext();) {
			JSONObject object = (JSONObject) iterator.next();

			title = object.get("title").toString();// title
			url = object.get("url").toString();// url
			keywords = object.get("keywords").toString();// keywords
			img = object.get("img").toString();// img
			media_name = object.get("media_name").toString();// media_name

			
			String newsText=GlobalConstants.getNewsContent(url);//处理新闻内容
			
			//System.out.println("==================第"+i+"篇=================="+newsText);
	    	i++;
	    	
			System.out.println(title + "\n" + url + "\n" + keywords + "\n"+ url + "\n" + media_name);

		}
	}
}

 

package ivyy.taobao.com.utils;

import java.net.URL;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

/**
 *@Author:liangjilong
 *@Date:2015-1-4
 *@Email:[email protected]
 *@Version:1.0
 *@Description
 */
public class GlobalConstants {
	
	/***
	 * 获取url连接
	 * @param page第几页
	 * @param format格式(XML、JSON)
	 * @return
	 */
	public static String getUrl(Integer page,String format){
		StringBuffer buffer=new StringBuffer("http://api.roll.news.sina.com.cn/zt_list?channel=news");
		String url="";
		buffer.append("&cat_1=shxw");//显示新闻
		buffer.append("&cat_2==zqsk||=qwys||=shwx||=fz-shyf");
		buffer.append("&level==1||=2");//级别
		buffer.append("&show_ext=1");
		buffer.append("&show_all=1");//显示所有
		buffer.append("&show_num=22");//显示多少条
		buffer.append("&tag=1");
		buffer.append("&format="+format);
		buffer.append("&page="+page);
		buffer.append("&callback=newsloader");
		url=buffer.toString();
		return url;
	}
	
	
	/***
	 * 获取文章的内容
	 * 从新浪的网页分析,通过文章body的id就可以拿到相应的文章内容..
	 * @param url
	 * @return
	 */
	public static String getNewsContent(String url) throws Exception{
		Document doc=Jsoup.parse(new URL(url), 3000);
		if(doc!=null){
			String artibody=doc.getElementById("artibody").html();//通过网页的html的id去拿到新闻内容artibody
			return artibody;
		}else{
			return "网络异常";
		}
	}
}

 源代码:http://download.csdn.net/detail/jilongliang/8324543

package ivyy.taobao.com.utils;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;

/**
 *@Author:liangjilong
 *@Date:2015-1-4
 *@Email:[email protected]
 *@Version:1.0
 *@Description
 */

public class HttpRequestUtils {
	/**
	 * 发送http请求
	 * POST和GET请求都可以
	 * @param requestUrl 请求地址
	 * @param method传入的执行的方式 是GET还是POST方式
	 * @return String
	 */
	public static String HttpURLConnRequest(String requestUrl,String method) {
		StringBuffer buffer = new StringBuffer();
		try {
			URL url = new URL(requestUrl);
			HttpURLConnection httpUrlConn = (HttpURLConnection) url.openConnection();
			httpUrlConn.setDoInput(true);
			httpUrlConn.setRequestMethod(method);
			httpUrlConn.setUseCaches(false);  
			httpUrlConn.setInstanceFollowRedirects(true); //重定向
			httpUrlConn.connect();
			// 将返回的输入流转换成字符串
			InputStream inputStream = httpUrlConn.getInputStream();
			InputStreamReader inputStreamReader = new InputStreamReader(inputStream, "utf-8");
			BufferedReader bufferedReader = new BufferedReader(inputStreamReader);

			String str = null;
			while ((str = bufferedReader.readLine()) != null) {
				buffer.append(str);
			}
			bufferedReader.close();
			inputStreamReader.close();
			// 释放资源
			inputStream.close();
			inputStream = null;
			httpUrlConn.disconnect();

		} catch (Exception e) {
			e.printStackTrace();
		}
		return buffer.toString();
	}
}

 
使用fastjson解析json抓取新浪新闻文章
 
使用fastjson解析json抓取新浪新闻文章
 源代码:
http://download.csdn.net/detail/jilongliang/8328255

相关推荐