from pyspark.sql.functions import date_format df = df.withColumn("year_month",date_format(df['Ddate'],"yyyyMM"))