我有一个需要旋转的数据框,但该数据框具有重复的标识符,因此spread
函数给出错误Error: Duplicate identifiers for rows (5, 6)
Dimension = c("A","A","B","B","A","A")
Date = c("Mon","Tue","Mon","Wed","Fri","Fri")
Metric = c(23,25,7,9,7,8)
df = data.frame(Dimension,Date,Metric)
df
Dimension Date Metric
1 A Mon 23
2 A Tue 25
3 B Mon 7
4 B Wed 9
5 A Fri 7
6 A Fri 8
library(tidyr)
df1 = spread(df, Date, Metric, fill = " ")
Error: Duplicate identifiers for rows (5, 6)
然后,我合并行并粘贴
Metric
:dfa = aggregate(df[3], df[-3],
FUN = function(X) paste(unique(X), collapse=", "))
Dimension Date Metric
1 A Fri 7, 8
2 A Mon 23
3 B Mon 7
4 A Tue 25
5 B Wed 9
然后重复,当然现在可以使用:
df1 = spread(dfa, Date, Metric, fill = " ")
df1
Dimension Fri Mon Tue Wed
1 A 7, 8 23 25
2 B 7 9
问题:是否有“更轻松”的方法来执行此操作,或者我的方法是否足够有效,所以我不需要为此而昏昏欲睡?谢谢!
编辑。
所有代码-我的代码和2 akrun在这个小型数据集上都可以正常工作。但是,akrun的
dplyr
版本在我的真实数据集上损坏。这是dput
。structure(list(Dimension = c(10994030020, 10994030020, 10994030020,
10994030020, 10994030020, 10994030020, 10994030020, 10994030020,
10994030020, 10994030020, 10994030020, 10994030020, 10994030020,
10994030020, 10994030020, 10994030020, 10994030020, 10994030020,
10994030020, 10994030020, 10994030020, 10994030020, 10994030020,
10994030020, 10994030020, 10994030020, 10994030020, 10994030020,
10994030020, 10994030020, 10994030020, 10994030020, 10994030020,
10994030020, 10994030020, 10994030020, 10994030020, 10994030020,
10994030020, 10994030020, 10994030020, 10994030020, 10994030020,
10994030020, 10994030020, 10994030020, 10994030020, 10994030020,
10994030020, 12300245685, 12300245685, 12300245685, 12300245685,
12300245685, 12300245685, 12300245685, 12300245685, 12300245685,
12300245685, 12300245685, 12300245685, 12300245685, 12300245685,
12300245685, 12300245685, 12300245685, 12300245685, 12300245685,
12300245685, 12300245685, 12300245685, 12300245685, 12300245685,
12300245685, 12300245685, 12300245685, 12300245685, 12300245685,
12300245685, 12300245685, 12300245685, 12300245685, 12300245685,
12303485675, 12303485675, 12303485675, 12303485675, 12303485675,
12701670437, 12701670437, 12701670437, 12701670437, 12701670437,
12701670437, 12701670437, 12701670437, 12701670437, 12701670437,
12701670437, 12701670437, 12701670437, 12701670437, 12701670437,
12701670437), Date = structure(c(1L, 3L, 5L, 7L, 9L, 10L, 11L,
12L, 13L, 14L, 16L, 18L, 19L, 20L, 22L, 23L, 24L, 26L, 27L, 28L,
30L, 32L, 33L, 34L, 40L, 41L, 42L, 47L, 48L, 49L, 51L, 52L, 53L,
54L, 55L, 58L, 59L, 60L, 61L, 62L, 63L, 65L, 66L, 68L, 69L, 70L,
74L, 75L, 76L, 2L, 3L, 5L, 7L, 8L, 10L, 11L, 15L, 17L, 20L, 21L,
24L, 25L, 28L, 30L, 31L, 34L, 36L, 42L, 43L, 46L, 48L, 49L, 53L,
54L, 56L, 65L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 7L, 24L, 30L,
38L, 65L, 4L, 6L, 7L, 24L, 28L, 29L, 30L, 35L, 37L, 39L, 44L,
45L, 50L, 57L, 64L, 65L), .Label = c("16", "analog tuner", "aspect ratio",
"assembled in country of origin", "backlight technology", "battery type",
"brand", "brightness", "color class", "component video", "composite video",
"country of origin", "depth w/ stand", "digital audio output",
"digital tuner", "display technology", "features", "green compliance certificate/authority",
"green compliant", "hdmi", "headphone jack", "height w/ stand",
"limited warranty", "manufacturer", "maximum resolution", "media player",
"motion interpolation technology", "mpn", "multi pack indicator",
"name", "native contrast ratio", "number of hdmi ports", "number of usb ports",
"operating power consumption", "origin of components", "package contents",
"primary color", "product dimensions", "product in in (l x w x h)",
"product model", "product series", "product type", "remote control incl",
"remote included", "resolution", "response time", "rms output power",
"scan format", "screen size", "shipping weight (in lb)", "sound system",
"speaker output power (w)", "speakers", "standard refresh rate",
"standby power consumption", "total number of hdmi ports", "tv definition",
"tv features", "tv refresh rate (hz)", "tv resolution", "tv screen size (in)",
"tv screen size range", "tv speakers", "tv technology", "unspsc",
"usb", "vertical viewing angle", "vesa mount standard", "vga",
"video signal standard", "viewing angle", "warranty length",
"wattage", "weight (approx)", "weight w/ stand (approx)", "width w/ stand"
), class = "factor"), Metric = structure(c(40L, 13L, 57L, 69L,
43L, 72L, 72L, 45L, 38L, 72L, 55L, 44L, 72L, 72L, 15L, 3L, 69L,
72L, 46L, 26L, 70L, 27L, 1L, 29L, 26L, 54L, 58L, 12L, 39L, 25L,
42L, 11L, 72L, 37L, 28L, 52L, 36L, 39L, 24L, 19L, 72L, 33L, 72L,
18L, 72L, 49L, 6L, 10L, 23L, 62L, 13L, 48L, 64L, 31L, 72L, 72L,
41L, 66L, 72L, 72L, 64L, 16L, 63L, 65L, 4L, 32L, 21L, 58L, 71L,
35L, 8L, 20L, 72L, 37L, 17L, 33L, 14L, 7L, 72L, 50L, 14L, 2L,
34L, 59L, 59L, 60L, 5L, 33L, 51L, 47L, 67L, 67L, 53L, 61L, 68L,
51L, 43L, 30L, 72L, 9L, 22L, 49L, 56L, 33L), .Label = c("1",
"1-year limited", "1 Year", "1,000:1", "1,140 x 145 x 705 in ; 65.6 lb",
"10.40 lb", "100 x 100", "1080p", "1080p (HDTV)", "11.20 lb",
"14", "14 W", "16:9", "178 degrees", "18.30 in", "1920 x 1080",
"2", "200 x 100", "21", "22 in", "22 in FHD LED TV; Remote Control",
"25.4", "26.20 in", "29", "29 in", "29L1350U", "3", "300 mW",
"33.80 W", "36.5 x 6.5 x 23.0", "365 Nit", "50 W", "52161505",
"6 W", "6.50 ms", "60", "60 Hz", "7.10 in", "720p", "9", "ATSC",
"Audyssey EQ", "Black", "CEC", "China", "ClearScan 120 Hz", "Does Not Contain a Battery",
"Edge LED", "HDTV", "HDTV 1080p", "Imported", "Internet Apps",
"KDL40W600B", "L1350U", "LCD", "LCD, Internet Connected, LED",
"LED", "LED-LCD TV", "LG", "LG 47LY340C - 47 in - commercial use LED-backlit L",
"No", "NTSC", "PLED2243A", "ProScan", "PROSCAN PLED2243A 22 in 1080p 60 Hz LED HDTV - PTR",
"Sleep Timer; Auto Program", "Sony", "Sony KDL40W600B 40 in 1080p 60 Hz Smart LED TV (20",
"Toshiba", "Toshiba 29L1350U 29 in 720p LED-LCD TV - 16:9 - HD",
"yes", "Yes"), class = "factor")), .Names = c("Dimension", "Date",
"Metric"), class = c("data.table", "data.frame"), row.names = c(NA,
-104L), .internal.selfref = <pointer: 0x00000000003d0788>)
最佳答案
您可以使用dcast
的开发版本中的data.table
,即。 v1.9.5
。安装说明为here
library(data.table)#v1.9.5+
dcast(setDT(df), Dimension~Date, value.var='Metric',
fun.aggregate=function(x) toString(unique(x)))
# Dimension Fri Mon Tue Wed
#1: A 7, 8 23 25
#2: B 7 9
要么
library(dplyr)
library(tidyr)
df %>%
group_by(Dimension, Date) %>%
summarise(Metric=toString(unique(Metric))) %>%
spread(Date, Metric, fill='')
# Dimension Fri Mon Tue Wed
#1 A 7, 8 23 25
#2 B 7 9
更新资料
使用来自OP的新数据集
setDF(df2)
df2 %>%
group_by(Dimension, Date) %>%
summarise(Metric=toString(unique(Metric))) %>%
spread(Date, Metric, fill='') %>%
head(2) %>%
select(1:3)
# Dimension 16 analog tuner
#1 10994030020 9
#2 12300245685 NTSC