摘要:熟悉Hive文件格式和数据导入导出,对数据的查询和快速处理有很大的帮助。
TEXTFILE
SEQUENCEFILE
RCFILE
ORCFILE
PARQUET
本地文件导入到Hive表,需提前创建表
load data local inpath "/tmp/user/data/demo_local.parquet"
into table db_tmp.demo_local;
HDFS文件导入到Hive表,需提前创建表
load data inpath "/tmp/user/data/demo_hdfs.parquet" into table db_tmp.demo_hdfs;
Hive表导入到Hive表
insert into table demo_hive select * from demo_hive_b;
创建表时从其他Hive表导入
create table demo_a as select * from demo_hive_b;
通过sqoop将mysql库导入到Hive表
-- 默认导入到default库 sqoop import --connect jdbc:mysql://10.168.225.1:3306/casedb
--username root --password password --table demo
--hive-import --create-hive-table -m 1 -- 指定导入的数据库 sqoop import --connect jdbc:mysql://10.168.225.1:3306/casedb
--username root --password root --table demo
--hive-import --create-hive-table
--hive-table database.demo -m 1
导出到本地
insert overwrite local directory "/home/hadoop/data/" row format dilimited fields terminated by "," select * from demo_hive_b;
导出到HDFS
insert overwrite directory "/home/hadoop/data/" row format dilimited fields terminated by "," select * from demo_hive_b;
Hive命令行导出
# Linux bash终端 # 重定向方式 hive -e "select * from demo_hive_b" >> /home/hadoop/data/demo_output.txt # sql文件方式 echo "select * from demo_hive_b" > /home/hadoop/data/demo_output.sql hive -f /home/hadoop/data/demo_output.sql >> /home/hadoop/data/demo_output.txt