@@ -1314,6 +1314,7 @@ def _retry_zones(
1314
1314
prev_cluster_status : Optional [status_lib .ClusterStatus ],
1315
1315
prev_handle : Optional ['CloudVmRayResourceHandle' ],
1316
1316
prev_cluster_ever_up : bool ,
1317
+ skip_if_config_hash_matches : Optional [str ],
1317
1318
) -> Dict [str , Any ]:
1318
1319
"""The provision retry loop."""
1319
1320
# Get log_path name
@@ -1424,8 +1425,16 @@ def _retry_zones(
1424
1425
raise exceptions .ResourcesUnavailableError (
1425
1426
f'Failed to provision on cloud { to_provision .cloud } due to '
1426
1427
f'invalid cloud config: { common_utils .format_exception (e )} ' )
1428
+
1429
+ if skip_if_config_hash_matches == config_dict ['config_hash' ]:
1430
+ logger .info (
1431
+ 'Skipping provisioning of cluster with matching config hash.'
1432
+ )
1433
+ return config_dict
1434
+
1427
1435
if dryrun :
1428
1436
return config_dict
1437
+
1429
1438
cluster_config_file = config_dict ['ray' ]
1430
1439
1431
1440
launched_resources = to_provision .copy (region = region .name )
@@ -1937,6 +1946,7 @@ def provision_with_retries(
1937
1946
to_provision_config : ToProvisionConfig ,
1938
1947
dryrun : bool ,
1939
1948
stream_logs : bool ,
1949
+ skip_if_config_hash_matches : Optional [str ],
1940
1950
) -> Dict [str , Any ]:
1941
1951
"""Provision with retries for all launchable resources."""
1942
1952
cluster_name = to_provision_config .cluster_name
@@ -1986,7 +1996,8 @@ def provision_with_retries(
1986
1996
cloud_user_identity = cloud_user ,
1987
1997
prev_cluster_status = prev_cluster_status ,
1988
1998
prev_handle = prev_handle ,
1989
- prev_cluster_ever_up = prev_cluster_ever_up )
1999
+ prev_cluster_ever_up = prev_cluster_ever_up ,
2000
+ skip_if_config_hash_matches = skip_if_config_hash_matches )
1990
2001
if dryrun :
1991
2002
return config_dict
1992
2003
except (exceptions .InvalidClusterNameError ,
@@ -2687,13 +2698,15 @@ def check_resources_fit_cluster(
2687
2698
return valid_resource
2688
2699
2689
2700
def _provision (
2690
- self ,
2691
- task : task_lib .Task ,
2692
- to_provision : Optional [resources_lib .Resources ],
2693
- dryrun : bool ,
2694
- stream_logs : bool ,
2695
- cluster_name : str ,
2696
- retry_until_up : bool = False ) -> Optional [CloudVmRayResourceHandle ]:
2701
+ self ,
2702
+ task : task_lib .Task ,
2703
+ to_provision : Optional [resources_lib .Resources ],
2704
+ dryrun : bool ,
2705
+ stream_logs : bool ,
2706
+ cluster_name : str ,
2707
+ retry_until_up : bool = False ,
2708
+ skip_if_config_hash_matches : Optional [str ] = None
2709
+ ) -> Optional [CloudVmRayResourceHandle ]:
2697
2710
"""Provisions using 'ray up'.
2698
2711
2699
2712
Raises:
@@ -2779,7 +2792,8 @@ def _provision(
2779
2792
rich_utils .force_update_status (
2780
2793
ux_utils .spinner_message ('Launching' , log_path ))
2781
2794
config_dict = retry_provisioner .provision_with_retries (
2782
- task , to_provision_config , dryrun , stream_logs )
2795
+ task , to_provision_config , dryrun , stream_logs ,
2796
+ skip_if_config_hash_matches )
2783
2797
break
2784
2798
except exceptions .ResourcesUnavailableError as e :
2785
2799
# Do not remove the stopped cluster from the global state
@@ -2829,6 +2843,15 @@ def _provision(
2829
2843
record = global_user_state .get_cluster_from_name (cluster_name )
2830
2844
return record ['handle' ] if record is not None else None
2831
2845
2846
+ config_hash = config_dict ['config_hash' ]
2847
+
2848
+ if skip_if_config_hash_matches is not None :
2849
+ record = global_user_state .get_cluster_from_name (cluster_name )
2850
+ if (record is not None and skip_if_config_hash_matches ==
2851
+ config_hash == record ['config_hash' ]):
2852
+ logger .info ('skip remaining' )
2853
+ return record ['handle' ]
2854
+
2832
2855
if 'provision_record' in config_dict :
2833
2856
# New provisioner is used here.
2834
2857
handle = config_dict ['handle' ]
@@ -2868,7 +2891,7 @@ def _provision(
2868
2891
self ._update_after_cluster_provisioned (
2869
2892
handle , to_provision_config .prev_handle , task ,
2870
2893
prev_cluster_status , handle .external_ips (),
2871
- handle .external_ssh_ports (), lock_path )
2894
+ handle .external_ssh_ports (), lock_path , config_hash )
2872
2895
return handle
2873
2896
2874
2897
cluster_config_file = config_dict ['ray' ]
@@ -2940,7 +2963,8 @@ def _get_zone(runner):
2940
2963
2941
2964
self ._update_after_cluster_provisioned (
2942
2965
handle , to_provision_config .prev_handle , task ,
2943
- prev_cluster_status , ip_list , ssh_port_list , lock_path )
2966
+ prev_cluster_status , ip_list , ssh_port_list , lock_path ,
2967
+ config_hash )
2944
2968
return handle
2945
2969
2946
2970
def _open_ports (self , handle : CloudVmRayResourceHandle ) -> None :
@@ -2958,8 +2982,8 @@ def _update_after_cluster_provisioned(
2958
2982
prev_handle : Optional [CloudVmRayResourceHandle ],
2959
2983
task : task_lib .Task ,
2960
2984
prev_cluster_status : Optional [status_lib .ClusterStatus ],
2961
- ip_list : List [str ], ssh_port_list : List [int ],
2962
- lock_path : str ) -> None :
2985
+ ip_list : List [str ], ssh_port_list : List [int ], lock_path : str ,
2986
+ config_hash : str ) -> None :
2963
2987
usage_lib .messages .usage .update_cluster_resources (
2964
2988
handle .launched_nodes , handle .launched_resources )
2965
2989
usage_lib .messages .usage .update_final_cluster_status (
@@ -3019,6 +3043,7 @@ def _update_after_cluster_provisioned(
3019
3043
handle ,
3020
3044
set (task .resources ),
3021
3045
ready = True ,
3046
+ config_hash = config_hash ,
3022
3047
)
3023
3048
usage_lib .messages .usage .update_final_cluster_status (
3024
3049
status_lib .ClusterStatus .UP )
0 commit comments