我正在尝试使用Pig中的 Hcatalog 加载我的 hive 表,因为我已经在下面的代码中编写了代码,但出现错误。我正在使用pig -useHCatalog打开我的 pig 壳

代码:

A = LOAD 'patient_info' USING org.apache.hive.hcatalog.pig.HCatLoader();

错误:



更新:

我在 hive 中存储数据的命令如下。
add jar /home/cloudera/hivexmlserde-1.0.5.3.jar;
CREATE EXTERNAL TABLE patient_info (
statusCode string,
title string,
startTime string,
endTime string,
frequencyValue string,
frequencyUnits string

)
ROW FORMAT SERDE 'com.ibm.spss.hive.serde2.xml.XmlSerDe'
WITH SERDEPROPERTIES (
"column.xpath.statusCode"="medicationsInfo/entryInfo/statusCode/text()",
"column.xpath.title"="medications/code/code/text()",
"column.xpath.startTime"="medications/xxx/startTime/text()",
"column.xpath.endTime"="medications/xxx/endTime/text()",
"column.xpath.frequencyValue"="medications/xxx/frequencyValue/text()",
"column.xpath.frequencyUnits"="medications/xxx/frequencyUnits/text()",
)
STORED AS
INPUTFORMAT 'com.ibm.spss.hive.serde2.xml.XmlInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'
TBLPROPERTIES (
"xmlinput.start"="<medicationsInfo",
"xmlinput.end"="</medicationsInfo>");


load data inpath '/user/cloudera/xml' into table patient_info ;

样本:
 <Document>
    <ProductCode>
     <code>10160-0</code>
     <entryInfo>
        <statusCode>completed</statusCode>
        <startTime>20110729</startTime>
        <endTime>20110822</endTime>
        <strengthValue>24</strengthValue>
        <strengthUnits>h</strengthUnits>
     </entryInfo>
     <entryInfo>
        <statusCode>completed</statusCode>
        <startTime>20120130</startTime>
        <endTime>20120326</endTime>
        <strengthValue>12</strengthValue>
        <strengthUnits>h</strengthUnits>
     </entryInfo>
     <entryInfo>
        <statusCode>completed</statusCode>
        <startTime>20100412</startTime>
        <endTime>20110822</endTime>
        <strengthValue>8</strengthValue>
        <strengthUnits>d</strengthUnits>
     </entryInfo>
    </ProductCode>
    <ProductCode>
     <code>10160-0</code>
     <entryInfo>
        <statusCode>completed</statusCode>
        <startTime>20110729</startTime>
        <endTime>20110822</endTime>
        <strengthValue>24</strengthValue>
        <strengthUnits>h</strengthUnits>
     </entryInfo>
     <entryInfo>
        <statusCode>completed</statusCode>
        <startTime>20120130</startTime>
        <endTime>20120326</endTime>
        <strengthValue>12</strengthValue>
        <strengthUnits>h</strengthUnits>
     </entryInfo>
     <entryInfo>
        <statusCode>completed</statusCode>
        <startTime>20100412</startTime>
        <endTime>20110822</endTime>
        <strengthValue>8</strengthValue>
        <strengthUnits>d</strengthUnits>
     </entryInfo>
    </ProductCode>
   <Medicationsinfo>
     <code>10160-0</code>
     <entryInfo>
        <statusCode>completed</statusCode>
        <startTime>20110729</startTime>
        <endTime>20110822</endTime>
        <strengthValue>24</strengthValue>
        <strengthUnits>h</strengthUnits>
     </entryInfo>
     <entryInfo>
        <statusCode>completed</statusCode>
        <startTime>20120130</startTime>
        <endTime>20120326</endTime>
        <strengthValue>12</strengthValue>
        <strengthUnits>h</strengthUnits>
     </entryInfo>
     <entryInfo>
        <statusCode>completed</statusCode>
        <startTime>20100412</startTime>
        <endTime>20110822</endTime>
        <strengthValue>8</strengthValue>
        <strengthUnits>d</strengthUnits>
     </entryInfo>
    </Medicationsinfo>
    <Medicationsinfo>
     <code>10160-0</code>
     <entryInfo>
        <statusCode>completed</statusCode>
        <startTime>20110729</startTime>
        <endTime>20110822</endTime>
        <strengthValue>24</strengthValue>
        <strengthUnits>h</strengthUnits>
     </entryInfo>
     <entryInfo>
        <statusCode>completed</statusCode>
        <startTime>20120130</startTime>
        <endTime>20120326</endTime>
        <strengthValue>12</strengthValue>
        <strengthUnits>h</strengthUnits>
     </entryInfo>
     <entryInfo>
        <statusCode>completed</statusCode>
        <startTime>20100412</startTime>
        <endTime>20110822</endTime>
        <strengthValue>8</strengthValue>
        <strengthUnits>d</strengthUnits>
     </entryInfo>
    </Medicationsinfo>
    </Document>

最佳答案

您的外部表的定义无效。
以下是一些选项:

选项1

create external table patient_info
(
    code        string
   ,entryInfo   string
)
row format serde 'com.ibm.spss.hive.serde2.xml.XmlSerDe'
with serdeproperties
(
    "column.xpath.code"      = "/Medicationsinfo/code/text()"
   ,"column.xpath.entryInfo" = "/Medicationsinfo/entryInfo"
)
stored as
inputformat 'com.ibm.spss.hive.serde2.xml.XmlInputFormat'
outputformat 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'
location '/user/hive/warehouse/patient_info'
tblproperties
(
    "xmlinput.start" = "<Medicationsinfo"
   ,"xmlinput.end"   = "</Medicationsinfo>"
)
;

select * from patient_info
;
+-------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| patient_info.code | patient_info.entryinfo                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
+-------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| 10160-0           | <string><entryInfo><statusCode>completed</statusCode><startTime>20110729</startTime><endTime>20110822</endTime><strengthValue>24</strengthValue><strengthUnits>h</strengthUnits></entryInfo><entryInfo><statusCode>completed</statusCode><startTime>20120130</startTime><endTime>20120326</endTime><strengthValue>12</strengthValue><strengthUnits>h</strengthUnits></entryInfo><entryInfo><statusCode>completed</statusCode><startTime>20100412</startTime><endTime>20110822</endTime><strengthValue>8</strengthValue><strengthUnits>d</strengthUnits></entryInfo></string> |
+-------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| 10160-0           | <string><entryInfo><statusCode>completed</statusCode><startTime>20110729</startTime><endTime>20110822</endTime><strengthValue>24</strengthValue><strengthUnits>h</strengthUnits></entryInfo><entryInfo><statusCode>completed</statusCode><startTime>20120130</startTime><endTime>20120326</endTime><strengthValue>12</strengthValue><strengthUnits>h</strengthUnits></entryInfo><entryInfo><statusCode>completed</statusCode><startTime>20100412</startTime><endTime>20110822</endTime><strengthValue>8</strengthValue><strengthUnits>d</strengthUnits></entryInfo></string> |
+-------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+

选项2
create external table patient_info
(
    code        string
   ,entryInfo   array<map<string,map<string,string>>>
)
row format serde 'com.ibm.spss.hive.serde2.xml.XmlSerDe'
with serdeproperties
(
    "column.xpath.code"      = "/Medicationsinfo/code/text()"
   ,"column.xpath.entryInfo" = "/Medicationsinfo/entryInfo"
)
stored as
inputformat 'com.ibm.spss.hive.serde2.xml.XmlInputFormat'
outputformat 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'
location '/user/hive/warehouse/patient_info'
tblproperties
(
    "xmlinput.start" = "<Medicationsinfo"
   ,"xmlinput.end"   = "</Medicationsinfo>"
)
;

select * from patient_info
;
+-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| patient_info.code |                                                                                                                                                                                   patient_info.entryinfo                                                                                                                                                                                   |
+-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| 10160-0           | [{"entryInfo":{"statusCode":"completed","startTime":"20110729","strengthUnits":"h","endTime":"20110822","strengthValue":"24"}},{"entryInfo":{"statusCode":"completed","startTime":"20120130","strengthUnits":"h","endTime":"20120326","strengthValue":"12"}},{"entryInfo":{"statusCode":"completed","startTime":"20100412","strengthUnits":"d","endTime":"20110822","strengthValue":"8"}}] |
| 10160-0           | [{"entryInfo":{"statusCode":"completed","startTime":"20110729","strengthUnits":"h","endTime":"20110822","strengthValue":"24"}},{"entryInfo":{"statusCode":"completed","startTime":"20120130","strengthUnits":"h","endTime":"20120326","strengthValue":"12"}},{"entryInfo":{"statusCode":"completed","startTime":"20100412","strengthUnits":"d","endTime":"20110822","strengthValue":"8"}}] |
+-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+

选项3
create external table patient_info
(
    code        string
   ,entryInfo   array<map<string,struct<statusCode:string,startTime:string,endTime:string,strengthValue:int,strengthUnits:string>>>
)
row format serde 'com.ibm.spss.hive.serde2.xml.XmlSerDe'
with serdeproperties
(
    "column.xpath.code"      = "/Medicationsinfo/code/text()"
   ,"column.xpath.entryInfo" = "/Medicationsinfo/entryInfo"
)
stored as
inputformat 'com.ibm.spss.hive.serde2.xml.XmlInputFormat'
outputformat 'org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat'
location '/user/hive/warehouse/patient_info'
tblproperties
(
    "xmlinput.start" = "<Medicationsinfo"
   ,"xmlinput.end"   = "</Medicationsinfo>"
)
;

select * from patient_info
;
+-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| patient_info.code |                                                                                                                                                                                patient_info.entryinfo                                                                                                                                                                                |
+-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| 10160-0           | [{"entryInfo":{"statuscode":"completed","starttime":"20110729","endtime":"20110822","strengthvalue":24,"strengthunits":"h"}},{"entryInfo":{"statuscode":"completed","starttime":"20120130","endtime":"20120326","strengthvalue":12,"strengthunits":"h"}},{"entryInfo":{"statuscode":"completed","starttime":"20100412","endtime":"20110822","strengthvalue":8,"strengthunits":"d"}}] |
| 10160-0           | [{"entryInfo":{"statuscode":"completed","starttime":"20110729","endtime":"20110822","strengthvalue":24,"strengthunits":"h"}},{"entryInfo":{"statuscode":"completed","starttime":"20120130","endtime":"20120326","strengthvalue":12,"strengthunits":"h"}},{"entryInfo":{"statuscode":"completed","starttime":"20100412","endtime":"20110822","strengthvalue":8,"strengthunits":"d"}}] |
+-------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+

爆炸选项3
select  pi.code
       ,ei.i + 1    as i
       ,ei.entryInfo["entryInfo"].statusCode
       ,ei.entryInfo["entryInfo"].startTime
       ,ei.entryInfo["entryInfo"].endTime
       ,ei.entryInfo["entryInfo"].strengthValue
       ,ei.entryInfo["entryInfo"].strengthUnits

from    patient_info    pi
        lateral view  posexplode (entryInfo) ei as i,entryInfo
;
+---------+---+------------+-----------+----------+---------------+---------------+
| pi.code | i | statuscode | starttime | endtime  | strengthvalue | strengthunits |
+---------+---+------------+-----------+----------+---------------+---------------+
| 10160-0 | 1 | completed  | 20110729  | 20110822 | 24            | h             |
+---------+---+------------+-----------+----------+---------------+---------------+
| 10160-0 | 2 | completed  | 20120130  | 20120326 | 12            | h             |
+---------+---+------------+-----------+----------+---------------+---------------+
| 10160-0 | 3 | completed  | 20100412  | 20110822 | 8             | d             |
+---------+---+------------+-----------+----------+---------------+---------------+
| 10160-0 | 1 | completed  | 20110729  | 20110822 | 24            | h             |
+---------+---+------------+-----------+----------+---------------+---------------+
| 10160-0 | 2 | completed  | 20120130  | 20120326 | 12            | h             |
+---------+---+------------+-----------+----------+---------------+---------------+
| 10160-0 | 3 | completed  | 20100412  | 20110822 | 8             | d             |
+---------+---+------------+-----------+----------+---------------+---------------+

关于xml - 在Pig中使用Hcatalog加载配置单元表时出错,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/42227789/

10-11 22:41
查看更多