问题:将where子句与具有分类列的数据框一起使用会产生ValueError:错误的维数

我只是不知道我在做什么错。

df=pd.read_csv("F:/python/projects/mail/Inbox_20160911-1646/rows.csv",header=0,sep=",",quotechar="'",quoting=1)
df.where(df > 100)  # WORKS !!!!

for c in [x for x in df.columns[2:] if df[x].dtype == "object" ]:
    cl="c"+c
    df[cl]=df[c].astype("category")

df.where(df > 100) # ---> ValueError: Wrong number of dimensions

    df.where(df > 100)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-278-7469c620cf83> in <module>()
----> 1 df.where(df > 100)

F:\python\anaconda3\lib\site-packages\pandas\core\ops.py in f(self, other)
   1182             # straight boolean comparisions we want to allow all columns
   1183             # (regardless of dtype to pass thru) See #4537 for discussion.
-> 1184             res = self._combine_const(other, func, raise_on_error=False)
   1185             return res.fillna(True).astype(bool)
   1186

F:\python\anaconda3\lib\site-packages\pandas\core\frame.py in _combine_const(self, other, func, raise_on_error)
   3553
   3554         new_data = self._data.eval(func=func, other=other,
-> 3555                                    raise_on_error=raise_on_error)
   3556         return self._constructor(new_data)
   3557

F:\python\anaconda3\lib\site-packages\pandas\core\internals.py in eval(self, **kwargs)
   2909
   2910     def eval(self, **kwargs):
-> 2911         return self.apply('eval', **kwargs)
   2912
   2913     def quantile(self, **kwargs):

F:\python\anaconda3\lib\site-packages\pandas\core\internals.py in apply(self, f, axes, filter, do_integrity_check, consolidate, raw, **kwargs)
   2888
   2889             kwargs['mgr'] = self
-> 2890             applied = getattr(b, f)(**kwargs)
   2891             result_blocks = _extend_blocks(applied, result_blocks)
   2892

F:\python\anaconda3\lib\site-packages\pandas\core\internals.py in eval(self, func, other, raise_on_error, try_cast, mgr)
   1160             result = self._try_cast_result(result)
   1161
-> 1162         return [self.make_block(result, fastpath=True, )]
   1163
   1164     def where(self, other, cond, align=True, raise_on_error=True,

F:\python\anaconda3\lib\site-packages\pandas\core\internals.py in make_block(self, values, placement, ndim, **kwargs)
    179             ndim = self.ndim
    180
--> 181         return make_block(values, placement=placement, ndim=ndim, **kwargs)
    182
    183     def make_block_same_class(self, values, placement=None, fastpath=True,

F:\python\anaconda3\lib\site-packages\pandas\core\internals.py in make_block(values, placement, klass, ndim, dtype, fastpath)
   2516                      placement=placement, dtype=dtype)
   2517
-> 2518     return klass(values, ndim=ndim, fastpath=fastpath, placement=placement)
   2519
   2520 # TODO: flexible with index=None and/or items=None

F:\python\anaconda3\lib\site-packages\pandas\core\internals.py in __init__(self, values, ndim, fastpath, placement, **kwargs)
   1661
   1662         super(ObjectBlock, self).__init__(values, ndim=ndim, fastpath=fastpath,
-> 1663                                           placement=placement, **kwargs)
   1664
   1665     @property

F:\python\anaconda3\lib\site-packages\pandas\core\internals.py in __init__(self, values, placement, ndim, fastpath)
     79             ndim = values.ndim
     80         elif values.ndim != ndim:
---> 81             raise ValueError('Wrong number of dimensions')
     82         self.ndim = ndim
     83


ValueError:尺寸错误

最佳答案

这是一个小演示,它重现了您的错误:

In [11]: df = pd.DataFrame(np.random.randint(0, 10, (5,3)), columns=list('abc'))

In [12]: df
Out[12]:
   a  b  c
0  9  9  8
1  5  6  1
2  2  9  8
3  8  1  3
4  1  5  1


这有效:

In [13]: df > 1
Out[13]:
       a      b      c
0   True   True   True
1   True   True  False
2   True   True   True
3   True  False   True
4  False   True  False

In [14]: df['cat'] = df.c.astype('category')

In [15]: df
Out[15]:
   a  b  c cat
0  9  9  8   8
1  5  6  1   1
2  2  9  8   8
3  8  1  3   3
4  1  5  1   1


这会引发Wrong number of dimensions异常:

In [16]: df > 1
...skipped...
ValueError: Wrong number of dimensions


这是先前错误的真实原因:

In [19]: df.cat > 1
...skipped...
TypeError: Unordered Categoricals can only compare equality or not


解:

In [22]: df.select_dtypes(include=['number']) > 1
Out[22]:
       a      b      c
0   True   True   True
1   True   True  False
2   True   True   True
3   True  False   True
4  False   True  False

In [23]: np.where(df.select_dtypes(exclude=['category']) > 1)
Out[23]:
(array([0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 4], dtype=int64),
 array([0, 1, 2, 0, 1, 0, 1, 2, 0, 2, 1], dtype=int64))

关于python - 在哪里与 Pandas 和分类列一起使用时出错,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/39685764/

10-12 14:28