hive examples测试

herryyy

2014-09-23

hive> create table pokes (foo int, bar striing);
OK
Time taken: 0.251 seconds
hive>create table invites (foo INT, bar STRING) partitioned by (ds string);
OK
Time taken: 0.106 seconds
hive>show tables;
OK
invites pokes
Time taken: 0.107 seconds
hive> descripe invites;
OK
foo     int
bar     string
ds      string
Time taken: 0.151 seconds
hive> alter table pokes add columns (new_col int);
OK
Time taken: 0.117 seconds
hive> alter table invites add columns (new_col2 int);
OK
Time taken: 0.152 seconds
hive> LOAD DATA LOCAL INPATH './examples/files/kv1.txt' OVERWRITE INTO TABLE pokes;
Copying data from file:/home/hadoop/hadoop-0.19.1/contrib/hive/examples/files/kv1.txt
Loading data to table pokes
OK
Time taken: 0.288 seconds
hive> load data local inpath './examples/files/kv2.txt' overwrite into table invites partition (ds=’2008-08-15′);
Copying data from file:/home/hadoop/hadoop-0.19.1/contrib/hive/examples/files/kv2.txt
Loading data to table invites partition {ds=2008-08-15}
OK
Time taken: 0.524 seconds
hive> LOAD DATA LOCAL INPATH './examples/files/kv3.txt' OVERWRITE INTO TABLE invites PARTITION (ds=’2008-08-08′);
Copying data from file:/home/hadoop/hadoop-0.19.1/contrib/hive/examples/files/kv3.txt
Loading data to table invites partition {ds=2008-08-08}
OK
Time taken: 0.406 seconds

hive> INSERT OVERWRITE DIRECTORY '/tmp/hdfs_out' SELECT a.* FROM invites a;
Total MapReduce jobs = 1
Starting Job = job_200902261245_0002, Tracking URL = http://gp1:50030/jobdetails.jsp?jobid=job_200902261245_0002
Kill Command = /home/hadoop/hadoop-0.19.1/bin/hadoop job  -Dmapred.job.tracker=gp1:9001 -kill job_200902261245_0002
map = 0%,  reduce =0%
map = 50%,  reduce =0%
map = 100%,  reduce =0%
Ended Job = job_200902261245_0002
Moving data to: /tmp/hdfs_out
OK
Time taken: 18.551 seconds

hive> select count(1) from pokes;
Total MapReduce jobs = 2
Number of reducers = 1
In order to change numer of reducers use:
set mapred.reduce.tasks = <number>
Starting Job = job_200902261245_0003, Tracking URL = http://gp1:50030/jobdetails.jsp?jobid=job_200902261245_0003
Kill Command = /home/hadoop/hadoop-0.19.1/bin/hadoop job  -Dmapred.job.tracker=gp1:9001 -kill job_200902261245_0003
map = 0%,  reduce =0%
map = 50%,  reduce =0%
map = 100%,  reduce =0%
map = 100%,  reduce =17%
map = 100%,  reduce =100%
Ended Job = job_200902261245_0003
Starting Job = job_200902261245_0004, Tracking URL = http://gp1:50030/jobdetails.jsp?jobid=job_200902261245_0004
Kill Command = /home/hadoop/hadoop-0.19.1/bin/hadoop job  -Dmapred.job.tracker=gp1:9001 -kill job_200902261245_0004
map = 0%,  reduce =0%
map = 50%,  reduce =0%
map = 100%,  reduce =0%
map = 100%,  reduce =100%
Ended Job = job_200902261245_0004
OK
500
Time taken: 57.285 seconds

hive> INSERT OVERWRITE DIRECTORY ‘/tmp/hdfs_out’ SELECT a.* FROM invites a;
Total MapReduce jobs = 1
Starting Job = job_200902261245_0005, Tracking URL = http://gp1:50030/jobdetails.jsp?jobid=job_200902261245_0005
Kill Command = /home/hadoop/hadoop-0.19.1/bin/hadoop job  -Dmapred.job.tracker=gp1:9001 -kill job_200902261245_0005
map = 0%,  reduce =0%
map = 50%,  reduce =0%
map = 100%,  reduce =0%
Ended Job = job_200902261245_0005
Moving data to: /tmp/hdfs_out
OK
Time taken: 18.349 seconds

hive>  INSERT OVERWRITE DIRECTORY ‘/tmp/reg_5′ SELECT COUNT(1) FROM invites a;
Total MapReduce jobs = 2
Number of reducers = 1
In order to change numer of reducers use:
set mapred.reduce.tasks = <number>
Starting Job = job_200902261245_0006, Tracking URL = http://gp1:50030/jobdetails.jsp?jobid=job_200902261245_0006
Kill Command = /home/hadoop/hadoop-0.19.1/bin/hadoop job  -Dmapred.job.tracker=gp1:9001 -kill job_200902261245_0006
map = 0%,  reduce =0%
map = 50%,  reduce =0%
map = 100%,  reduce =0%
map = 100%,  reduce =17%
map = 100%,  reduce =100%
Ended Job = job_200902261245_0006
Starting Job = job_200902261245_0007, Tracking URL = http://gp1:50030/jobdetails.jsp?jobid=job_200902261245_0007
Kill Command = /home/hadoop/hadoop-0.19.1/bin/hadoop job  -Dmapred.job.tracker=gp1:9001 -kill job_200902261245_0007
map = 0%,  reduce =0%
map = 50%,  reduce =0%
map = 100%,  reduce =0%
map = 100%,  reduce =17%
map = 100%,  reduce =100%
Ended Job = job_200902261245_0007
Moving data to: /tmp/reg_5
OK
Time taken: 70.956 seconds

自定义分隔符

create table  user_info (user_id int, cid string, ckid string, username string) 
row format delimited 
fields terminated by '\t'
lines terminated by '\n';
导入数据表的数据格式是：字段之间是tab键分割，行之间是断行。

及要我们的文件内容格式：

100636  100890  c5c86f4cddc15eb7        yyyvybtvt
100612  100865  97cc70d411c18b6f        gyvcycy
100078  100087  ecd6026a15ffddf5        qa000100

hive table hadoop