pandas-15 df['one_col'].apply()方法的用法
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
df = pd.read_csv('apply_demo.csv').head()
print(df.size) # .size 如果是series返回行数,如果是dataframe返回行数乘以列数
print(df)
''' 原始数据
time data
0 1473411962 Symbol: APPL Seqno: 0 Price: 1623
1 1473411962 Symbol: APPL Seqno: 0 Price: 1623
2 1473411963 Symbol: APPL Seqno: 0 Price: 1623
3 1473411963 Symbol: APPL Seqno: 0 Price: 1623
4 1473411963 Symbol: APPL Seqno: 1 Price: 1649
'''
s1 = Series(['a'] * 5)
df['A'] = s1
print(df)
'''
time data A
0 1473411962 Symbol: APPL Seqno: 0 Price: 1623 a
1 1473411962 Symbol: APPL Seqno: 0 Price: 1623 a
2 1473411963 Symbol: APPL Seqno: 0 Price: 1623 a
3 1473411963 Symbol: APPL Seqno: 0 Price: 1623 a
4 1473411963 Symbol: APPL Seqno: 1 Price: 1649 a
'''
df['A'] = df['A'].apply(str.upper)
print(df)
'''
time data A
0 1473411962 Symbol: APPL Seqno: 0 Price: 1623 A
1 1473411962 Symbol: APPL Seqno: 0 Price: 1623 A
2 1473411963 Symbol: APPL Seqno: 0 Price: 1623 A
3 1473411963 Symbol: APPL Seqno: 0 Price: 1623 A
4 1473411963 Symbol: APPL Seqno: 1 Price: 1649 A
'''
l = df['data'][0].strip().split(' ')
print(l) # ['Symbol:', 'APPL', 'Seqno:', '0', 'Price:', '1623']
def foo(line):
items = line.strip().split(' ')
return Series([items[1], items[3], items[5]])
df_tmp = df['data'].apply(foo)
print(df_tmp)
'''
0 1 2
0 APPL 0 1623
1 APPL 0 1623
2 APPL 0 1623
3 APPL 0 1623
4 APPL 1 1649
'''
df_tmp = df_tmp.rename(columns = {0:'Symbol', 1:'Seqno', 2:'Price'})
print(df_tmp)
'''
Symbol Seqno Price
0 APPL 0 1623
1 APPL 0 1623
2 APPL 0 1623
3 APPL 0 1623
4 APPL 1 1649
'''
print(df.combine_first(df_tmp).drop(['data', 'A'], axis=1))
'''
Price Seqno Symbol time
0 1623.0 0.0 APPL 1473411962
1 1623.0 0.0 APPL 1473411962
2 1623.0 0.0 APPL 1473411963
3 1623.0 0.0 APPL 1473411963
4 1649.0 1.0 APPL 1473411963
'''
df.combine_first(df_tmp).drop(['data', 'A'], axis=1).to_csv('./demo_duplicate.csv', index=False)