问题:将where子句与具有分类列的数据框一起使用会产生ValueError:错误的维数
我只是不知道我在做什么错。
df=pd.read_csv("F:/python/projects/mail/Inbox_20160911-1646/rows.csv",header=0,sep=",",quotechar="'",quoting=1)
df.where(df > 100) # WORKS !!!!
for c in [x for x in df.columns[2:] if df[x].dtype == "object" ]:
cl="c"+c
df[cl]=df[c].astype("category")
df.where(df > 100) # ---> ValueError: Wrong number of dimensions
df.where(df > 100)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-278-7469c620cf83> in <module>()
----> 1 df.where(df > 100)
F:\python\anaconda3\lib\site-packages\pandas\core\ops.py in f(self, other)
1182 # straight boolean comparisions we want to allow all columns
1183 # (regardless of dtype to pass thru) See #4537 for discussion.
-> 1184 res = self._combine_const(other, func, raise_on_error=False)
1185 return res.fillna(True).astype(bool)
1186
F:\python\anaconda3\lib\site-packages\pandas\core\frame.py in _combine_const(self, other, func, raise_on_error)
3553
3554 new_data = self._data.eval(func=func, other=other,
-> 3555 raise_on_error=raise_on_error)
3556 return self._constructor(new_data)
3557
F:\python\anaconda3\lib\site-packages\pandas\core\internals.py in eval(self, **kwargs)
2909
2910 def eval(self, **kwargs):
-> 2911 return self.apply('eval', **kwargs)
2912
2913 def quantile(self, **kwargs):
F:\python\anaconda3\lib\site-packages\pandas\core\internals.py in apply(self, f, axes, filter, do_integrity_check, consolidate, raw, **kwargs)
2888
2889 kwargs['mgr'] = self
-> 2890 applied = getattr(b, f)(**kwargs)
2891 result_blocks = _extend_blocks(applied, result_blocks)
2892
F:\python\anaconda3\lib\site-packages\pandas\core\internals.py in eval(self, func, other, raise_on_error, try_cast, mgr)
1160 result = self._try_cast_result(result)
1161
-> 1162 return [self.make_block(result, fastpath=True, )]
1163
1164 def where(self, other, cond, align=True, raise_on_error=True,
F:\python\anaconda3\lib\site-packages\pandas\core\internals.py in make_block(self, values, placement, ndim, **kwargs)
179 ndim = self.ndim
180
--> 181 return make_block(values, placement=placement, ndim=ndim, **kwargs)
182
183 def make_block_same_class(self, values, placement=None, fastpath=True,
F:\python\anaconda3\lib\site-packages\pandas\core\internals.py in make_block(values, placement, klass, ndim, dtype, fastpath)
2516 placement=placement, dtype=dtype)
2517
-> 2518 return klass(values, ndim=ndim, fastpath=fastpath, placement=placement)
2519
2520 # TODO: flexible with index=None and/or items=None
F:\python\anaconda3\lib\site-packages\pandas\core\internals.py in __init__(self, values, ndim, fastpath, placement, **kwargs)
1661
1662 super(ObjectBlock, self).__init__(values, ndim=ndim, fastpath=fastpath,
-> 1663 placement=placement, **kwargs)
1664
1665 @property
F:\python\anaconda3\lib\site-packages\pandas\core\internals.py in __init__(self, values, placement, ndim, fastpath)
79 ndim = values.ndim
80 elif values.ndim != ndim:
---> 81 raise ValueError('Wrong number of dimensions')
82 self.ndim = ndim
83
ValueError:尺寸错误
最佳答案
这是一个小演示,它重现了您的错误:
In [11]: df = pd.DataFrame(np.random.randint(0, 10, (5,3)), columns=list('abc'))
In [12]: df
Out[12]:
a b c
0 9 9 8
1 5 6 1
2 2 9 8
3 8 1 3
4 1 5 1
这有效:
In [13]: df > 1
Out[13]:
a b c
0 True True True
1 True True False
2 True True True
3 True False True
4 False True False
In [14]: df['cat'] = df.c.astype('category')
In [15]: df
Out[15]:
a b c cat
0 9 9 8 8
1 5 6 1 1
2 2 9 8 8
3 8 1 3 3
4 1 5 1 1
这会引发
Wrong number of dimensions
异常:In [16]: df > 1
...skipped...
ValueError: Wrong number of dimensions
这是先前错误的真实原因:
In [19]: df.cat > 1
...skipped...
TypeError: Unordered Categoricals can only compare equality or not
解:
In [22]: df.select_dtypes(include=['number']) > 1
Out[22]:
a b c
0 True True True
1 True True False
2 True True True
3 True False True
4 False True False
In [23]: np.where(df.select_dtypes(exclude=['category']) > 1)
Out[23]:
(array([0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 4], dtype=int64),
array([0, 1, 2, 0, 1, 0, 1, 2, 0, 2, 1], dtype=int64))
关于python - 在哪里与 Pandas 和分类列一起使用时出错,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/39685764/