本文介绍了Hive,如何检索数据库的所有表列的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
我想在 Hive 中编写这个 sql 请求的等价物:
select * from information_schema.columns where table_schema='database_name'
如何访问 hive 的 Metastore 并检索存储在特定数据库中的所有表的所有列?我知道我们可以做到它通过describe [table_name]按表进行,但无论如何都要拥有所有同一请求中数据库中所有表的列?
解决方案
这是一种连接方式 HiveMetaStoreClient
并且您可以使用方法 getTableColumnsInformation 获取列.
在这个类和列中,可以提取所有其他信息,如分区.请参阅示例客户端和示例方法.
import org.apache.hadoop.hive.conf.HiveConf;//测试程序公共类测试{公共静态无效主(字符串 [] args){HiveConf hiveConf = new HiveConf();hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3);hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://host:port");HiveMetaStoreConnector hiveMetaStoreConnector = new HiveMetaStoreConnector(hiveConf);if(hiveMetaStoreConnector != null){System.out.print(hiveMetaStoreConnector.getAllPartitionInfo("tablename"));}}}//定义一个这样的类导入 com.google.common.base.Joiner;导入 com.google.common.collect.Lists;导入 org.apache.hadoop.hive.conf.HiveConf;导入 org.apache.hadoop.hive.metastore.HiveMetaStoreClient;导入 org.apache.hadoop.hive.metastore.api.FieldSchema;导入 org.apache.hadoop.hive.metastore.api.MetaException;导入 org.apache.hadoop.hive.metastore.api.Partition;导入 org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;导入 org.apache.hadoop.hive.ql.metadata.Hive;导入 org.apache.thrift.TException;导入 org.joda.time.DateTime;导入 org.joda.time.format.DateTimeFormatter;导入 java.util.ArrayList;导入 java.util.Arrays;导入 java.util.List;公共类 HiveMetaStoreConnector {私人 HiveConf hiveConf;HiveMetaStoreClient hiveMetaStoreClient;公共 HiveMetaStoreConnector(字符串 msAddr,字符串 msPort){尝试 {hiveConf = new HiveConf();hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, msAddr+":"+ msPort);hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf);} catch (MetaException e) {e.printStackTrace();System.err.println("构造器错误");System.err.println(e.toString());System.exit(-100);}}公共 HiveMetaStoreConnector(HiveConf hiveConf){尝试 {this.hiveConf = hiveConf;hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf);} catch (MetaException e) {e.printStackTrace();System.err.println("构造器错误");System.err.println(e.toString());System.exit(-100);}}公共字符串 getAllPartitionInfo(String dbName){列表res = Lists.newArrayList();尝试 {列表tableList = hiveMetaStoreClient.getAllTables(dbName);for(String tableName:tableList){res.addAll(getTablePartitionInformation(dbName,tableName));}} catch (MetaException e) {e.printStackTrace();System.out.println("getAllTableStatistic 错误");System.out.println(e.toString());System.exit(-100);}返回 Joiner.on("
").join(res);}公共列表getTablePartitionInformation(String dbName, String tableName){列表partitionsInfo = Lists.newArrayList();尝试 {列表partitionNames = hiveMetaStoreClient.listPartitionNames(dbName,tableName, (short) 10000);列表partitions = hiveMetaStoreClient.listPartitions(dbName,tableName, (short) 10000);for(分区分区:分区){StringBuffer sb = new StringBuffer();sb.append(tableName);sb.append(" ");列表partitionValues = partition.getValues();if(partitionValues.size()fields = hiveMetaStoreClient.getFields(dbName, tableName);列表infs = Lists.newArrayList();int cnt = 0;for(FieldSchema fs : 字段){StringBuffer sb = new StringBuffer();sb.append(tableName);sb.append(" ");sb.append(cnt);sb.append(" ");cnt++;sb.append(fs.getName());sb.append(" ");sb.append(fs.getType());sb.append(" ");sb.append(fs.getComment());infs.add(sb.toString());}返回信息;} catch (TException e) {e.printStackTrace();System.out.println("getTableColumnsInformation 错误");System.out.println(e.toString());System.exit(-100);返回空;}}}
I want to write the equivalent of this sql request in Hive :
select * from information_schema.columns where table_schema='database_name'
解决方案
This is one way to connect HiveMetaStoreClient
and you can use method getTableColumnsInformation will get columns.
In this class along with columns all the other information like partitions can be extracted. pls see example client and sample methods.
import org.apache.hadoop.hive.conf.HiveConf;
// test program
public class Test {
public static void main(String[] args){
HiveConf hiveConf = new HiveConf();
hiveConf.setIntVar(HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, 3);
hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, "thrift://host:port");
HiveMetaStoreConnector hiveMetaStoreConnector = new HiveMetaStoreConnector(hiveConf);
if(hiveMetaStoreConnector != null){
System.out.print(hiveMetaStoreConnector.getAllPartitionInfo("tablename"));
}
}
}
// define a class like this
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.thrift.TException;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class HiveMetaStoreConnector {
private HiveConf hiveConf;
HiveMetaStoreClient hiveMetaStoreClient;
public HiveMetaStoreConnector(String msAddr, String msPort){
try {
hiveConf = new HiveConf();
hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, msAddr+":"+ msPort);
hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf);
} catch (MetaException e) {
e.printStackTrace();
System.err.println("Constructor error");
System.err.println(e.toString());
System.exit(-100);
}
}
public HiveMetaStoreConnector(HiveConf hiveConf){
try {
this.hiveConf = hiveConf;
hiveMetaStoreClient = new HiveMetaStoreClient(hiveConf);
} catch (MetaException e) {
e.printStackTrace();
System.err.println("Constructor error");
System.err.println(e.toString());
System.exit(-100);
}
}
public String getAllPartitionInfo(String dbName){
List<String> res = Lists.newArrayList();
try {
List<String> tableList = hiveMetaStoreClient.getAllTables(dbName);
for(String tableName:tableList){
res.addAll(getTablePartitionInformation(dbName,tableName));
}
} catch (MetaException e) {
e.printStackTrace();
System.out.println("getAllTableStatistic error");
System.out.println(e.toString());
System.exit(-100);
}
return Joiner.on("
").join(res);
}
public List<String> getTablePartitionInformation(String dbName, String tableName){
List<String> partitionsInfo = Lists.newArrayList();
try {
List<String> partitionNames = hiveMetaStoreClient.listPartitionNames(dbName,tableName, (short) 10000);
List<Partition> partitions = hiveMetaStoreClient.listPartitions(dbName,tableName, (short) 10000);
for(Partition partition:partitions){
StringBuffer sb = new StringBuffer();
sb.append(tableName);
sb.append(" ");
List<String> partitionValues = partition.getValues();
if(partitionValues.size()<4){
int size = partitionValues.size();
for(int j=0; j<4-size;j++){
partitionValues.add("null");
}
}
sb.append(Joiner.on(" ").join(partitionValues));
sb.append(" ");
DateTime createDate = new DateTime((long)partition.getCreateTime()*1000);
sb.append(createDate.toString("yyyy-MM-dd HH:mm:ss"));
partitionsInfo.add(sb.toString());
}
} catch (TException e) {
e.printStackTrace();
return Arrays.asList(new String[]{"error for request on" + tableName});
}
return partitionsInfo;
}
public String getAllTableStatistic(String dbName){
List<String> res = Lists.newArrayList();
try {
List<String> tableList = hiveMetaStoreClient.getAllTables(dbName);
for(String tableName:tableList){
res.addAll(getTableColumnsInformation(dbName,tableName));
}
} catch (MetaException e) {
e.printStackTrace();
System.out.println("getAllTableStatistic error");
System.out.println(e.toString());
System.exit(-100);
}
return Joiner.on("
").join(res);
}
public List<String> getTableColumnsInformation(String dbName, String tableName){
try {
List<FieldSchema> fields = hiveMetaStoreClient.getFields(dbName, tableName);
List<String> infs = Lists.newArrayList();
int cnt = 0;
for(FieldSchema fs : fields){
StringBuffer sb = new StringBuffer();
sb.append(tableName);
sb.append(" ");
sb.append(cnt);
sb.append(" ");
cnt++;
sb.append(fs.getName());
sb.append(" ");
sb.append(fs.getType());
sb.append(" ");
sb.append(fs.getComment());
infs.add(sb.toString());
}
return infs;
} catch (TException e) {
e.printStackTrace();
System.out.println("getTableColumnsInformation error");
System.out.println(e.toString());
System.exit(-100);
return null;
}
}
}
这篇关于Hive,如何检索数据库的所有表列的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持!