这是我的代码。对于2001、2002、2003等,它仅返回一个值。我宁愿它返回不同的值。轴-1可以工作吗?
# Define a function to map the values
def set_value(row_number, assigned_value):
return assigned_value[row_number]
#Create the dictionary
year_dictionary ={'2001' : np.random.randint(1,202335), '2002' : np.random.randint(1,188846),
'2003' : np.random.randint(1,175357), "2004" : np.random.randint(1,161868),
"2005" : np.random.randint(1,148379), "2006": np.random.randint(1,134890),
"2007" : np.random.randint(1,121401), "2008" : np.random.randint(1,107912),
"2009" :np.random.randint(1,94423), "2010" :np.random.randint(1,80934),
"2011" :np.random.randint(1,67445), "2012" :np.random.randint(1,53956),
"2013" :np.random.randint(1,40467), "2014" :np.random.randint(1,26978),
"2015" :np.random.randint(1,13489)}
# Add a new column named 'mileage'
df5['mileage'] = df5['year'].apply(set_value, args =(year_dictionary, ))
这是创建
df5
示例的代码:df5 = pd.DataFrame(columns=["year", "fuel", "status", "sex", "licence_type", "mileage"])
# Populate the data with the number of cars per year.
df5['year'] = np.random.choice(["2001", "2002","2003","2004","2005","2006","2007","2008","2009",2010,2011,2012,2013,2014,2015], p=[0.004, 0.02, 0.044, 0.087, 0.149, 0.187, 0.221, 0.091, 0.012, 0.031, 0.009, 0.036, 0.013, 0.083, 0.013], size=(100))
# Populate the Fuel column with petrol and diesel
df5['fuel'] = np.random.choice(['petrol', 'diesel'], 100, p=[0.24, 0.76])
# Populate the status column
df5['status'] = np.random.choice(["pass", "fail", "incomplete"], 100, p=[0.36, 0.63, 0.01])
# Populate the sex column
df5['sex'] = np.random.choice(['male', 'female'], 100, p=[0.53, 0.47])
# Populate licence_type column
df5['licence_type'] = np.random.choice(["full", "learner"], 100, p=[0.92, 0.08])
print (df5)
最佳答案
将您的year_dictionary更改为
year_dictionary ={'2001' : 202335, '2002' : 188846, '2003' : 175357, "2004" : 161868, "2005" : 148379, "2006": 134890, "2007" : 121401, "2008" : 107912, "2009" :94423,"2010" :80934, "2011" :67445, "2012" :53956, "2013" :40467, "2014" :26978, "2015" :13489}
和set_value函数
def set_value(row_number, assigned_value):
return np.random.randint(1,assigned_value[row_number])