hive表操作（转）

转载于：http://blog.csdn.net/lovelovelovelovelo/article/details/52234971

数据类型
基本数据类型
集合类型，array、map、struct
文件格式，textfile、sequencefile、rcfile

创建表（内部表）

create table employee(

    name string comment 'name',

    salary float,

    subordinates array<string>,

    deductions map<string,float>,

    address struct<street:string,city:string,state:string,zip:int>

)

row format delimited fields termited by '\t' lines terminated by '\n' stored as textfile;

从文件加载数据，覆盖源表

load data local infile 'path' overwrite into table 'table'

创建外部表

create external table employee(

    name string comment 'name',

    salary float,

    subordinates array<string>,

    deductions map<string,float>,

    address struct<street:string,city:string,state:string,zip:int>

)

row format delimited fields terminated by '\t'

collection items terminated by ','

map keys terminated by ':'

lines terminated by '\n'

stored as textfile

location '/data/';

表中数据

lucy 11000 tom,jack,dave,kate  tom:1200,jack:1560 beijing,changanjie,xichengqu,10000

lily 13000 dave,kate  dave:1300,kate:1260 beijing,changanjie,xichengqu,10000

和我们熟悉的关系型数据库不一样，Hive现在还不支持在insert语句里面直接给出一组记录的文字形式，也就是说，hive并不支持INSERT INTO …. VALUES形式的语句。

新建employee.txt，将数据存入文件中，注意字段间用tab，行间换行enter
通过hive命令加载数据

hive> load data local inpath '/root/employee.txt' into table employee;

hive> select * from employee;

OK

lucy    11000.0 ["tom","jack","dave","kate"]    {"tom":1200.0,"jack":1560.0}    {"street":"beijing","city":"changanjie","state":"xichengqu","zip":10000}

lily    13000.0 ["dave","kate"] {"dave":1300.0,"kate":1260.0}   {"street":"beijing","city":"changanjie","state":"xichengqu","zip":10000}

Time taken: 0.054 seconds, Fetched: 2 row(s)

select * from table不走mapreduce

由一个表创建另一个表

create table table2 like table1;

从其他表查询创建表

create table table2 as select name,age,add from table1;

hive不同文件读取

stored as textfile:

    hadoop fs -text

stored as sequencefile:

    hadoop fs -text

stored as rcfile:

    hive -service rcfilecat path

stored as input format 'class':

    outformat 'class'

分区表操作

alter table employee add if not exists partition(country='')

alter table employee drop if exists partition(country='')

hive分桶

create table bucket_table(

    id int,

    name string

)

clustered by(id) sorted by(name) into 4 buckets

row format  delimited fields terminated by '\t' stored as textfile;

set hive.enforce.bucketing=true;

创建分区表

create table partitionTable(

    name string,

    age int

)

partitioned by(dt string)

row format delimited fields terminated by '\t'

lines terminated by '\n'

stored as textfile;