我想在 ggplot2 中使用 Jupyter Notebook 。但是,当我尝试制作 R 魔术单元并引入变量时,出现错误。

这是代码(一段表示一个单元格):

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import rpy2

%matplotlib inline
from rpy2.robjects import pandas2ri
pandas2ri.activate()
%load_ext rpy2.ipython

%%R
library(ggplot2)

data = pd.read_csv('train_titanic.csv')

%%R -i data -w 900 -h 480 -u px


使用最后一个单元格,我收到以下错误(包括回溯):
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py in py2rpy_pandasdataframe(obj)
     54         try:
---> 55             od[name] = conversion.py2rpy(values)
     56         except Exception as e:

~/anaconda3/envs/catenv/lib/python3.7/functools.py in wrapper(*args, **kw)
    839
--> 840         return dispatch(args[0].__class__)(*args, **kw)
    841

~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py in py2rpy_pandasseries(obj)
    125             if type(x) is not homogeneous_type:
--> 126                 raise ValueError('Series can only be of one type, or None.')
    127         # TODO: Could this be merged with obj.type.name == 'O' case above ?

ValueError: Series can only be of one type, or None.

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_object(cls, obj)
    367         try:
--> 368             mv = memoryview(obj)
    369             res = cls.from_memoryview(mv)

TypeError: memoryview: a bytes-like object is required, not 'Series'

During handling of the above exception, another exception occurred:

AttributeError                            Traceback (most recent call last)
<ipython-input-14-75e210679e4a> in <module>
----> 1 get_ipython().run_cell_magic('R', '-i data -w 900 -h 480 -u px', '\n\n')

~/anaconda3/envs/catenv/lib/python3.7/site-packages/IPython/core/interactiveshell.py in run_cell_magic(self, magic_name, line, cell)
   2360             with self.builtin_trap:
   2361                 args = (magic_arg_s, cell)
-> 2362                 result = fn(*args, **kwargs)
   2363             return result
   2364

</home/morgan/anaconda3/envs/catenv/lib/python3.7/site-packages/decorator.py:decorator-gen-130> in R(self, line, cell, local_ns)

~/anaconda3/envs/catenv/lib/python3.7/site-packages/IPython/core/magic.py in <lambda>(f, *a, **k)
    185     # but it's overkill for just that one bit of state.
    186     def magic_deco(arg):
--> 187         call = lambda f, *a, **k: f(*a, **k)
    188
    189         if callable(arg):

~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/ipython/rmagic.py in R(self, line, cell, local_ns)
    721                         raise NameError("name '%s' is not defined" % input)
    722                 with localconverter(converter) as cv:
--> 723                     ro.r.assign(input, val)
    724
    725         tmpd = self.setup_graphics(args)

~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/functions.py in __call__(self, *args, **kwargs)
    190                 kwargs[r_k] = v
    191         return (super(SignatureTranslatedFunction, self)
--> 192                 .__call__(*args, **kwargs))
    193
    194

~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/functions.py in __call__(self, *args, **kwargs)
    111
    112     def __call__(self, *args, **kwargs):
--> 113         new_args = [conversion.py2rpy(a) for a in args]
    114         new_kwargs = {}
    115         for k, v in kwargs.items():

~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/functions.py in <listcomp>(.0)
    111
    112     def __call__(self, *args, **kwargs):
--> 113         new_args = [conversion.py2rpy(a) for a in args]
    114         new_kwargs = {}
    115         for k, v in kwargs.items():

~/anaconda3/envs/catenv/lib/python3.7/functools.py in wrapper(*args, **kw)
    838                             '1 positional argument')
    839
--> 840         return dispatch(args[0].__class__)(*args, **kw)
    841
    842     funcname = getattr(func, '__name__', 'singledispatch function')

~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py in py2rpy_pandasdataframe(obj)
     59                           'The error is: %s'
     60                           % (name, str(e)))
---> 61             od[name] = StrVector(values)
     62
     63     return DataFrame(od)

~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/robjects/vectors.py in __init__(self, obj)
    382
    383     def __init__(self, obj):
--> 384         super().__init__(obj)
    385         self._add_rops()
    386

~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in __init__(self, obj)
    286             super().__init__(obj)
    287         elif isinstance(obj, collections.abc.Sized):
--> 288             super().__init__(type(self).from_object(obj).__sexp__)
    289         else:
    290             raise TypeError('The constructor must be called '

~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_object(cls, obj)
    370         except (TypeError, ValueError):
    371             try:
--> 372                 res = cls.from_iterable(obj)
    373             except ValueError:
    374                 msg = ('The class methods from_memoryview() and '

~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _(*args, **kwargs)
     26 def _cdata_res_to_rinterface(function):
     27     def _(*args, **kwargs):
---> 28         cdata = function(*args, **kwargs)
     29         # TODO: test cdata is of the expected CType
     30         return _cdata_to_rinterface(cdata)

~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_iterable(cls, iterable, populate_func)
    317             if populate_func is None:
    318                 cls._populate_r_vector(iterable,
--> 319                                        r_vector)
    320             else:
    321                 populate_func(iterable, r_vector)

~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in _populate_r_vector(cls, iterable, r_vector)
    300                                   r_vector,
    301                                   cls._R_SET_VECTOR_ELT,
--> 302                                   cls._CAST_IN)
    303
    304     @classmethod

~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in _populate_r_vector(iterable, r_vector, set_elt, cast_value)
    237 def _populate_r_vector(iterable, r_vector, set_elt, cast_value):
    238     for i, v in enumerate(iterable):
--> 239         set_elt(r_vector, i, cast_value(v))
    240
    241

~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in _as_charsxp_cdata(x)
    430         return x.__sexp__._cdata
    431     else:
--> 432         return conversion._str_to_charsxp(x)
    433
    434

~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _str_to_charsxp(val)
    118         s = rlib.R_NaString
    119     else:
--> 120         cchar = _str_to_cchar(val)
    121         s = rlib.Rf_mkCharCE(cchar, _CE_UTF8)
    122     return s

~/anaconda3/envs/catenv/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _str_to_cchar(s, encoding)
     97 def _str_to_cchar(s, encoding: str = 'utf-8'):
     98     # TODO: use isStrinb and installTrChar
---> 99     b = s.encode(encoding)
    100     return ffi.new('char[]', b)
    101

AttributeError: 'float' object has no attribute 'encode'


所以我发现在导入我的 Pandas 数据帧对象时甚至无法启动 R 魔法单元。但是,我尝试在单元格内创建 R 向量,发现我可以使用 ggplot2 绘制这些向量而没有任何问题。

我正在使用 Python 3.7.6rpy2 3.1.0jupyter-notebook 6.0.3 并在 Windows 子系统 Linux 上使用 Ubuntu 18.04.2 LTS

最佳答案

问题最有可能是一个(或多个)列具有不止一种类型 - 因此不可能将数据传输到 R 向量(只能保存一种数据类型)。回溯可能是压倒性的,但这是相关部分:

ValueError: Series can only be of one type, or None.

它是哪一栏?如果不查看您加载的数据集很难说,但我的一般解决方案是检查列中的类型:
types = data.applymap(type).apply(set)
types[types.apply(len) > 1]

以上代码段返回的任何内容都将成为候选罪魁祸首。有许多不同的方法来处理这个问题,这取决于数据的确切性质。我经常使用的解决方法包括:
  • 调用 data = data.infer_objects() - 如果 Pandas 没有 catch dtype 更改并且仍然使用(次优)Python 对象存储数据,则有帮助
  • 如果字符串列中缺少值(例如 NaN ),则用空字符串或字符串常量填充 str_columns = str_columns.fillna('')
  • dates.apply(pd.to_datetime, axis=1) 如果您有 datetime 对象但 dtype 是对象
  • 使用 df.applymap(lambda x: datetime.combine(x, datetime.min.time()) if not isinstance(x, datetime) else x) 如果您混合使用 datedatetime 对象

  • 在一些罕见的情况下,pandas 存储的数据与 rpy2 预期的不同(遵循某些操作);然后将数据帧写入 csv 文件并再次从磁盘读取它会有所帮助 - 但这可能不是您在这里面临的情况,因为您从新读取的数据帧开始。

    关于python - 在 R 单元格、rpy2、Jupyter Notebook 中使用 Pandas 数据帧时出错,我们在Stack Overflow上找到一个类似的问题:https://stackoverflow.com/questions/60197294/

    10-12 17:01
    查看更多