79411884

Date: 2025-02-04 13:40:52
Score: 0.5
Natty:
Report link

ZygD solution doesnt work since month is zero indexed. here a simple fix to it.

from pyspark.sql.functions import lpad, concat, lit, regexp_replace, col

def convert_gregorian_datetime_to_date(df, col_name):
    gregorian_regex = '(?:.*)YEAR=(\d+).+?MONTH=(\d+).+?DAY_OF_MONTH=(\d+).+?HOUR=(\d+).+?MINUTE=(\d+).+?SECOND=(\d+).+'

    return (df.withColumn("year_tmp",  regexp_replace(col(col_name), gregorian_regex, '$1'))
              .withColumn("month_tmp", regexp_replace(col(col_name), gregorian_regex, '$2').cast('integer')+1) # month is zero indexed
              .withColumn("month_tmp", lpad(col("month_tmp").cast("string"), 2, "0"))
              .withColumn("day_tmp",   regexp_replace(col(col_name), gregorian_regex, '$3'))
              .withColumn(col_name, concat(col("year_tmp"), lit("-"), col("month_tmp"), lit("-"), col("day_tmp")).cast('date'))
              .drop("year_tmp", "month_tmp", "day_tmp"))

df = spark.createDataFrame(
    [('java.util.GregorianCalendar[time=?,areFieldsSet=false,areAllFieldsSet=false,lenient=true,zone=sun.util.calendar.ZoneInfo[id="Etc/UTC",offset=0,dstSavings=0,useDaylight=false,transitions=0,lastRule=null],firstDayOfWeek=1,minimalDaysInFirstWeek=1,ERA=?,YEAR=2017,MONTH=7,WEEK_OF_YEAR=?,WEEK_OF_MONTH=?,DAY_OF_MONTH=18,DAY_OF_YEAR=?,DAY_OF_WEEK=?,DAY_OF_WEEK_IN_MONTH=?,AM_PM=0,HOUR=9,HOUR_OF_DAY=9,MINUTE=1,SECOND=52,MILLISECOND=0,ZONE_OFFSET=?,DST_OFFSET=?]',)],
    ['GregorianCalendar']
)

df.transform(convert_gregorian_datetime_to_date,'GregorianCalendar').show()

+-----------------+
|GregorianCalendar|
+-----------------+
|       2017-08-18|
+-----------------+
Reasons:
  • Blacklisted phrase (1): doesnt work
  • Long answer (-1):
  • Has code block (-0.5):
  • Low reputation (1):
Posted by: Filippo Adessi