# 比较Pandas DataFrame中的上一行值

jth359：
import pandas as pd
data={'col1':[1,3,3,1,2,3,2,2]}
df=pd.DataFrame(data,columns=['col1'])
print df

col1
0     1
1     3
2     3
3     1
4     2
5     3
6     2
7     2

col1  match
0     1   False
1     3   False
2     3   True
3     1   False
4     2   False
5     3   False
6     2   False
7     2   True

df['match'] = df.col1.eq(df.col1.shift())
print (df)
col1  match
0     1  False
1     3  False
2     3   True
3     1  False
4     2  False
5     3  False
6     2  False
7     2   True

df['match'] = df.col1 == df.col1.shift()
print (df)
col1  match
0     1  False
1     3  False
2     3   True
3     1  False
4     2  False
5     3  False
6     2  False
7     2   True

import pandas as pd
data={'col1':[1,3,3,1,2,3,2,2]}
df=pd.DataFrame(data,columns=['col1'])
print (df)
#[80000 rows x 1 columns]
df = pd.concat([df]*10000).reset_index(drop=True)

df['match'] = df.col1 == df.col1.shift()
df['match1'] = df.col1.eq(df.col1.shift())
print (df)

In [208]: %timeit df.col1.eq(df.col1.shift())
The slowest run took 4.83 times longer than the fastest. This could mean that an intermediate result is being cached.
1000 loops, best of 3: 933 µs per loop

In [209]: %timeit df.col1 == df.col1.shift()
1000 loops, best of 3: 1 ms per loop

0 条评论