hbase rowkey设计之hash的使用和md5的使用

在组拼hbase rowkey时,为了防止热点,rowkey可以使用 md5方式或者hash方式,

这里介绍下  hash方式:

0 work_regionmax_student=10000

1 hashfun (stu.class_id, ${work_regionmax_student}) 

2 hashfun : hashfun(string, int) 获取string的hashcode,根据int取余并补全位数

3 自定义函数中的写法:



import org.apache.hadoop.hive.ql.exec.UDF;

/**

 * Created by zm on 16/6/30.

 */

public class CreateHashId extends UDF {

    public String evaluate(String value, long number){



        if (value == null || value.toString().equals("") || number < 1 ){

            return null;

        } else {

            long result = Math.abs(value.hashCode()%number);

            System.out.println("result: " + result );

            int formatLength = String.valueOf(number-1).length();

            System.out.println("formatLength: " + formatLength );

            String newString = String.format("%0" + formatLength + "d", result); // 用十进制0来填补

            return newString;

        }

    }



    //test

    public static void main(String [] args) {

        CreateHashId testid = new CreateHashId();

        //System.out.println(testid.evaluate("010f5ae14d604b729456009a4c806bc0", 10000)); 

         /**

          *  结果:

        result: 3754

        formatLength: 4

        3754

          */



        System.out.println(testid.evaluate("01110158d9f041f6a9c75520f3e91b53", 10000)); 

        /**  结果

        result: 27

        formatLength: 4     如果result的值是4位,那么 String.format("%0" + formatLength + "d", result);的操作没用 ,否则 result前面不够4位就用0填充

        0027

         */



    	/*String test = "tmp_dm_dm_evaluation_school_grade_teacher_kp_rank_distribution_teacher_kp_mastered_count_incountyrank_allsubjectlist_20170416_semester";

    	System.out.println(test.length());*/

    }

}

这里介绍下md5的使用:

import com.google.common.base.Charsets;
import com.google.common.hash.Hashing;
import org.apache.hadoop.hive.ql.exec.UDF;

/**
 * 转换md5
 * Created by pc on 2016/12/23.
 */
public class MD5 extends  UDF{

        public String evaluate (String userId){
            return Hashing.md5().hashString(userId, Charsets.UTF_16LE).toString();
        }

    public static void main(String[] args) {
        String str1="{\"actId\":\"1600005\",\"classId\":\"e291a2698c0a4c60a1af41d7dc40fe50\",\"className\":\"四年级(1)班\",\"countyId\":\"370725\",\"countyName\":\"昌乐县\",\"gradeId\":\"4\",\"originCode\":\"2\",\"role\":\"STUDENT\",\"schoolId\":\"460958\",\"schoolName\":\"昌乐行知双语实验学校\",\"time\":1511059347806,\"userIcon\":\"de28f0bde3800375c3cf23fc7d03d0bc\",\"userId\":\"38085844\",\"userIp\":\"111.37.45.234\",\"userName\":\"孙豪章\",\"data\":{\"workMode\":2,\"publishClassType\":1,\"subjectName\":\"昌乐行知双语实验学校\",\"questionId\":\"tch_a285a3cb93bb4695b018b4e58e6a4ee2_v1_WT_3\",\"subjectId\":\"110\",\"workId\":\"172630abe0a949018b00fad7d80673cc\",\"publishClassId\":\"e291a2698c0a4c60a1af41d7dc40fe50\",\"unitId\":\"110002001065100001001\"}}";

        String str2="{\"actId\":\"1600005\",\"classId\":\"e291a2698c0a4c60a1af41d7dc40fe50\",\"className\":\"四年级(1)班\",\"countyId\":\"370725\",\"countyName\":\"昌乐县\",\"gradeId\":\"4\",\"originCode\":\"2\",\"role\":\"STUDENT\",\"schoolId\":\"460958\",\"schoolName\":\"昌乐行知双语实验学校\",\"time\":1511059347806,\"userIcon\":\"de28f0bde3800375c3cf23fc7d03d0bc\",\"userId\":\"38085844\",\"userIp\":\"111.37.45.234\",\"userName\":\"孙豪章\",\"data\":{\"workMode\":2,\"publishClassType\":1,\"subjectName\":\"昌乐行知双语实验学校\",\"questionId\":\"tch_a285a3cb93bb4695b018b4e58e6a4ee2_v1_WT_3\",\"subjectId\":\"110\",\"workId\":\"172630abe0a949018b00fad7d80673cc\",\"publishClassId\":\"e291a2698c0a4c60a1af41d7dc40fe50\",\"unitId\":\"110002001065100001001\"}}";


        Hashing.md5().hashString(str1, Charsets.UTF_16LE).toString();
        System.out.println( Hashing.md5().hashString(str1, Charsets.UTF_16LE).toString()+":"+ Hashing.md5().hashString(str2, Charsets.UTF_16LE).toString());
    }
}

相关推荐