# 熊猫按计数将列分成矩阵

``````Column A
--------
x-y: 1
x-y: 2
x-y: 2
x-x: 1
y-x: 2
y-y: 3
y-y: 3
``````

``````     1     2     3      *based on the range of number of column A
--------------
x-x  1     0     0      because there's 1 'x-x: 1'
x-y  1     2     0      because there's 1 'x-y: 1' and 2 'x-y: 2'
y-x  0     1     0      because there's 1 'x-y: 2'
y-y  0     0     2      because there's 2 'y-y: 3'
``````

``````print (df)
Column A
x-y         1
x-y         2
x-y         2
x-x         1
y-x         2
y-y         3
y-y         3

print (df.reset_index())
index  Column A
0   x-y         1
1   x-y         2
2   x-y         2
3   x-x         1
4   y-x         2
5   y-y         3
6   y-y         3

df = df.reset_index().groupby(['index','Column A']).size().unstack(fill_value=0)
print (df)
Column A  1  2  3
index
x-x       1  0  0
x-y       1  2  0
y-x       0  1  0
y-y       0  0  2
``````

``````df = pd.crosstab(df.index, df['Column A'])
print (df)
Column A  1  2  3
row_0
x-x       1  0  0
x-y       1  2  0
y-x       0  1  0
y-y       0  0  2
``````

``````print (df)
Column A
0   x-y: 1
1   x-y: 2
2   x-y: 2
3   x-x: 1
4   y-x: 2
5   y-y: 3
6   y-y: 3

df[['a','b']] = df['Column A'].str.split(':\s+', expand=True)
print (df)

Column A    a  b
0   x-y: 1  x-y  1
1   x-y: 2  x-y  2
2   x-y: 2  x-y  2
3   x-x: 1  x-x  1
4   y-x: 2  y-x  2
5   y-y: 3  y-y  3
6   y-y: 3  y-y  3

df = df.groupby(['a','b']).size().unstack(fill_value=0)
print (df)
b    1  2  3
a
x-x  1  0  0
x-y  1  2  0
y-x  0  1  0
y-y  0  0  2
``````

0 条评论