Solrj JavaBinCodec分析
solr请求回来的数据为字节流,solrj通过JavaBinCodec对其进行解析。
一、JavaBinCodec的主要标识tag
各tag的定义:
public static final byte NULL = 0, BOOL_TRUE = 1, BOOL_FALSE = 2, BYTE = 3, SHORT = 4, DOUBLE = 5, INT = 6, LONG = 7, FLOAT = 8, DATE = 9, MAP = 10, SOLRDOC = 11, //SolrDocument对象,会先读取SortedMap,然后setField到SolrDocument中 SOLRDOCLST = 12, //SolrDocumentList对象,会先读一个长度为3的list即arr,并设置numFound,start,maxScore3个值,然后再读一个list,即doc的list BYTEARR = 13, ITERATOR = 14, /** * this is a special tag signals an end. No value is associated with it */ END = 15, SOLRINPUTDOC = 16, SOLRINPUTDOC_CHILDS = 17, ENUM_FIELD_VALUE = 18, // types that combine tag + length (or other info) in a single byte
//这些tag除了表示tag外还包括了其子元素的个数或其他信息 TAG_AND_LEN = (byte) (1 << 5), STR = (byte) (1 << 5), //表示字符串以及长度,tag范围32~63,&0x1f后得到的就是字符串长度 SINT = (byte) (2 << 5), //表示small int以及长度,tag范围32~95,&0x1f后得到的就是sint的长度 SLONG = (byte) (3 << 5), //表示small long以及长度,tag范围96~111, &0x0f后得到的就是slong的长度 ARR = (byte) (4 << 5), //表示数组以及数组元素个数, tag范围-113~-128, &0x0f后得到的就是数组元素个数 ORDERED_MAP = (byte) (5 << 5), // SimpleOrderedMap (a NamedList subclass, and more common)。Key-->Value,Key是先读EXTERN_STRING,再读Str即真正的key值。范围-65~-96。 NAMED_LST = (byte) (6 << 5), // NamedList,范围-33~-64 EXTERN_STRING = (byte) (7 << 5);//map中的keyvalue的key时会用此tag。 范围-32
二、JavaBinCodec主要方法
1、最主要的方法:readVal()
public Object readVal(DataInputInputStream dis) throws IOException { tagByte = dis.readByte(); // if ((tagByte & 0xe0) == 0) { // if top 3 bits are clear, this is a normal tag // OK, try type + size in single byte switch (tagByte >>> 5) { case STR >>> 5: return readStr(dis); case SINT >>> 5: return readSmallInt(dis); case SLONG >>> 5: return readSmallLong(dis); case ARR >>> 5: return readArray(dis); case ORDERED_MAP >>> 5: return readOrderedMap(dis); case NAMED_LST >>> 5: return readNamedList(dis); case EXTERN_STRING >>> 5: return readExternString(dis); } switch (tagByte) { case NULL: return null; case DATE: return new Date(dis.readLong()); case INT: return dis.readInt(); case BOOL_TRUE: return Boolean.TRUE; case BOOL_FALSE: return Boolean.FALSE; case FLOAT: return dis.readFloat(); case DOUBLE: return dis.readDouble(); case LONG: return dis.readLong(); case BYTE: return dis.readByte(); case SHORT: return dis.readShort(); case MAP: return readMap(dis); case SOLRDOC: return readSolrDocument(dis); case SOLRDOCLST: return readSolrDocumentList(dis); case BYTEARR: return readByteArray(dis); case ITERATOR: return readIterator(dis); case END: return END_OBJ; case SOLRINPUTDOC: return readSolrInputDocument(dis); case ENUM_FIELD_VALUE: return readEnumFieldValue(dis); } throw new RuntimeException("Unknown type " + tagByte); }
2、readSolrDocumentList:读SolrDocumentList
//先得到numFound,start,maxScore三个属性的值,存于List中,设置到solrdocumentlist对象中,再找其包含的doclist public SolrDocumentList readSolrDocumentList(DataInputInputStream dis) throws IOException { SolrDocumentList solrDocs = new SolrDocumentList(); List list = (List) readVal(dis); solrDocs.setNumFound((Long) list.get(0)); solrDocs.setStart((Long) list.get(1)); solrDocs.setMaxScore((Float) list.get(2)); @SuppressWarnings("unchecked") List<SolrDocument> l = (List<SolrDocument>) readVal(dis); solrDocs.addAll(l); return solrDocs; }
3、readSolrDocument:读一个doc
//先获得NameList,再放到doc中 public SolrDocument readSolrDocument(DataInputInputStream dis) throws IOException { NamedList nl = (NamedList) readVal(dis);//结果其实是OrderedMap,会被转成NamedList SolrDocument doc = new SolrDocument(); for (int i = 0; i < nl.size(); i++) { String name = nl.getName(i); Object val = nl.getVal(i); doc.setField(name, val); } return doc; }
4、readOrderedMap:读map
//先key后value,key时有EXTERN_STRING的Tag,再是Str_tag public SimpleOrderedMap<Object> readOrderedMap(DataInputInputStream dis) throws IOException { int sz = readSize(dis); SimpleOrderedMap<Object> nl = new SimpleOrderedMap<Object>(); for (int i = 0; i < sz; i++) { String name = (String) readVal(dis); Object val = readVal(dis);//读完key后会读value nl.add(name, val); } return nl; }
5、readArray:读数组list
// public List<Object> readArray(DataInputInputStream dis) throws IOException { int sz = readSize(dis); ArrayList<Object> l = new ArrayList<Object>(sz); for (int i = 0; i < sz; i++) { l.add(readVal(dis)); } return l; }
6、readSize:将tag &0x1f获得大小
public int readSize(DataInputInputStream in) throws IOException { int sz = tagByte & 0x1f; //即如果tagByte<31 (0x1f是31), 则tag还表示个数。 if (sz == 0x1f) sz += readVInt(in); //如果太大,则下一个内容就是大小 return sz; }
7、readSmallInt: &0x0f得到长度
public int readSmallInt(DataInputInputStream dis) throws IOException { int v = tagByte & 0x0F; if ((tagByte & 0x10) != 0) v = (readVInt(dis) << 4) | v; return v; }
8、readExternString
// public String readExternString(DataInputInputStream fis) throws IOException { int idx = readSize(fis); if (idx != 0) {// idx != 0 is the index of the extern string 字符串索引 return stringsList.get(idx - 1); } else {// idx == 0 means it has a string value 即后面是字符串 String s = (String) readVal(fis); if (stringsList == null) stringsList = new ArrayList<String>(); stringsList.add(s); return s; } }
三、举例
返回的byte:
2, -94, -32, 46, 114, 101, 115, 112, 111, 110, 115, 101, 72, 101, 97, 100, 101, 114, -93, -32, 38, 115, 116, 97, 116, 117, 115, 6, 0, 0, 0, 0, -32, 37, 81, 84, 105, 109, 101, 6, 0, 0, 0, 0, -32, 38, 112, 97, 114, 97, 109, 115, -93, -32, 33, 113, 36, 105, 100, 58, 49, -32, 34, 119, 116, 39, 106, 97, 118, 97, 98, 105, 110, -32, 39, 118, 101, 114, 115, 105, 111, 110, 33, 50, -32, 40, 114, 101, 115, 112, 111, 110, 115, 101, 12, -125, 97, 96, 0, -127, 11, -93, -32, 34, 105, 100, 33, 49, -32, 36, 110, 97, 109, 101, 33, 49, -32, 41, 95, 118, 101, 114, 115, 105, 111, 110, 95, 7, 20, 43, 47, -61, -44, 64, 0, 0
解析完的结果:
{responseHeader={status=0,QTime=0,params={q=id:1,wt=javabin,version=2}},response={numFound=1,start=0,docs=[SolrDocument{id=1, name=1, _version_=1453307822883209216}]}}
解析过程分析:
版本, ORDERED_MAP Tag(read size: -94&0x1f结果为2,即有两个keyvalue), //第一个keyvalue EXTERN_STRING Tag, 字符串长度(46&0x1f=14), responseHeader, ORDERED_MAP Tag(read size: -93&0x1f结果为3), EXTERN_STRING Tag, Str tag(32是str的tag)字符串长度(38&0x1f=6), status, 下个值是int型(6表示int)(ordermap的key读取完后就会读value), 0(4byte), EXTERN_STRING Tag, 字符串长度(37&0x1f=5), QTime, 下个值是int型(6表示int)(ordermap的key读取完后就会读value), 0(4byte), EXTERN_STRING Tag, 字符串长度(38&0x1f=6), params, ORDERED_MAP Tag(read size: -94&0x1f结果为3), EXTERN_STRING Tag, 字符串长度(33&0x1f=1), q, STR TAG(下个值是string)(36&0x1f=4,即有4个字符), id:1, EXTERN_STRING Tag, 字符串长度(34&0x1f=2), wt, STR TAG(下个值是string)(39&0x1f=7,即有7个字符), javabin, EXTERN_STRING Tag, 字符串长度(39&0x1f=7), version, STR TAG(下个值是string)(33&0x1f=1,即有1个字符), 2, //第二个keyvalue EXTERN_STRING Tag, 字符串长度(40&0x1f=8), response, SOLRDOCLST TAG(12是doclist,即SolrDocumentList对象,有属性numFound,start,maxScore,以及自身是ArrayList<SolrDocument>), ARRAY TAG(-128是arr tag)(-125&0x1f=3, 即有3个属性)(后面的三个属性分别是numFound,start,maxScore,详见readSolrDocumentList), SLONG Tag(96是smalllong tag)(97&0x0f=1)结果是1, SLONG Tag(96&0x0f=0)结果是0, NULL Tag(0表示null), ARRAY Tag(-127&0x1f=1,即有一个元素)(该产生的list会被前面的solrdocumentlist.addAll(list)), SOLRDOC Tag(11是solrdoc tag), ORDERED_MAP Tag(read size: -93&0x1f结果为3), EXTERN_STRING Tag, Str Tag字符串长度(34&0x1f=2), id, Str Tag字符串长度(33&0x1f=1), 1, EXTERN_STRING Tag, Str Tag字符串长度(36&0x1f=4), name, Str Tag字符串长度(33&0x1f=1), 1, EXTERN_STRING Tag, Str Tag字符串长度(41&0x1f=9), _version_, Long Tag(7是Long Tag), 1453307822883209216
注:
(1453307822883209216=1010000101011001011111100001111010100010000000000000000000000)(20=00010100,43=00101011,47=00101111,-61=11000011,-44=11010100,64=01000000,0=00000000,0=00000000)