I have tried to use these commands
Version my python script for each change and push to S3 with new version
aws s3 cp aws_glue_script_v1.0.3_1.py s3://mytestcicdglue/glue-scripts/aws_glue_script_v1.0.3_1.py
I have skeleton json of glue job downloaded and i keep changing the s3 location to apply changes for the glule job
aws glue update-job --job-name glue-cicd-test --job-update file://glue_job_updated_sample.json
Issue with this approach : With this implementation i see is once a new file is pushed automatically glue is updating the script that is currently there which is old code. Is there any other solution or approach to update glue job script only while retaining all the other properties of data sources ?
Sample : glue_job_updated_sample.json
{
"JobMode": "VISUAL",
"JobRunQueuingEnabled": false,
"Description": "",
"Role": "arn:aws:iam::xxxxx:role/Glue-cicd-test-role",
"ExecutionProperty": {
"MaxConcurrentRuns": 1
},
"Command": {
"Name": "gluestreaming",
// Below file name needs to be changed
"ScriptLocation": "s3://mytestcicdglue/glue-scripts/aws_glue_script_v1.0.3_1.py",
"PythonVersion": "3"
},
"DefaultArguments": {
"--enable-metrics": "true",
"--enable-spark-ui": "true",
"--extra-py-files": "s3://aws-glue-studio-transforms-xxxxx-prod-ap-southeast-1/misc/openpyxl.zip,s3://aws-glue-studio-transforms-xxxxxx-prod-ap-southeast-1/misc/et_xmlfile.zip",
"--spark-event-logs-path": "s3://aws-glue-assets-xxxxx-ap-southeast-1/sparkHistoryLogs/",
"--enable-job-insights": "true",
"--enable-observability-metrics": "false",
"--enable-glue-datacatalog": "true",
"--job-bookmark-option": "job-bookmark-disable",
"--job-language": "python",
"--TempDir": "s3://aws-glue-assets-xxxxx-ap-southeast-1/temporary/"
},
"MaxRetries": 0,
"Timeout": 480,
"WorkerType": "G.1X",
"NumberOfWorkers": 10,
"GlueVersion": "5.0",
"CodeGenConfigurationNodes": {
"node-1759912958741": {
"CatalogKafkaSource": {
"Name": "Apache Kafka",
"WindowSize": 100,
"DetectSchema": true,
"Table": "glue-cicd-test",
"Database": "testdb",
"StreamingOptions": {
"StartingOffsets": "earliest"
}
}
},
"node-1759909124967": {
"S3ExcelSource": {
"Name": "Amazon S3",
"Paths": [
"s3://xxxxx/access-list_v1.1.xlsx"
],
"Exclusions": [],
"Recurse": true,
"AdditionalOptions": {
"EnableSamplePath": false,
"SamplePath": "s3://xxxx/access-list_v1.1.xlsx"
},
"OutputSchemas": [
{
"Columns": []
}
]
}
}
},
"ExecutionClass": "STANDARD"
}