I'm fairly new to Python and I have a issue with dataframe manipulation using EXCEL:
This is a snippet of the excel:
I was able to drop the duplicates for datetime rows, and get a dataframe with only the datatime rows and another with only the descriptions;
I was able to drop the last row as well:
What I wanted to do is to 'shift' the column A with dates to column B for the row above.
If both Dataframes were 1-1 its easy, but I have a row (in yellow) that does not have any datetime below.
Anyone has any idea how to do it?
To be something like this>
df_cdms_labour = pd.read_excel(test_cdms,
header=None,
names=['start_date', 'end_date', 'price','percent',
'comment','rate', 'rate_comment','number_1','markup','markup_number'])
df_cdms_labour.drop(df_cdms_labour.tail().index,inplace=True)
df_cdms_labour
def get_rate_text(df):
return(df.loc[4,'start_date']
)
def get_rates(df):
flt = df.loc[:,'start_date'].apply(lambda x: isinstance(x, datetime))
return(df[flt]
.drop_duplicates()
.reset_index(drop=True))
rates = get_rates(df_cdms_labour)
Here is a proposition using standard pandas frame's
functions :
import pandas as pd
import numpy as np
def flag_delete(df):
df.insert(0, "temp_col", df.groupby("Col_A")["Col_A"].transform("count"))
df.loc[df.pop("temp_col").eq(1), df.columns!="Col_A"] = "DELETE"
return df
def format_dates(df):
temp_df = df.select_dtypes('datetime64')
df[temp_df.columns] = temp_df.apply(lambda x: x.dt.strftime('%d-%b-%Y'))
return df
df= (
pd.read_excel("BrunoA.xlsx", header=None, dtype=str)
.assign(Col_A= lambda x: pd.Series(np.where(~x[0].str.contains("\d{4}-\d{2}-\d{2}", regex=True), x[0], np.NaN)).ffill(),
Col_B= lambda x: np.where(x[0].str.contains("\d{4}-\d{2}-\d{2}", regex=True), x[0], np.NaN))
.drop(columns=0)
.drop_duplicates()
.apply(lambda _: pd.to_datetime(_, format='%Y-%m-%d', errors="ignore"))
.pipe(format_dates)
.pipe(flag_delete)
.dropna()
.rename(columns={"Col_A": -1, "Col_B": 0})
.sort_index(axis=1)
.reset_index(drop=True)
)
display(df)
Collected from the Internet
Please contact [email protected] to delete if infringement.
Comments