@@ -110,8 +110,9 @@ def get_gke_accelerator_name(accelerator: str) -> str:
110
110
if accelerator == 'H100' :
111
111
# H100 is named as H100-80GB in GKE.
112
112
accelerator = 'H100-80GB'
113
- if accelerator in ('A100-80GB' , 'L4' , 'H100-80GB' ):
114
- # A100-80GB, L4 and H100-80GB have a different name pattern.
113
+ if accelerator in ('A100-80GB' , 'L4' , 'H100-80GB' , 'H100-MEGA-80GB' ):
114
+ # A100-80GB, L4, H100-80GB and H100-MEGA-80GB
115
+ # have a different name pattern.
115
116
return 'nvidia-{}' .format (accelerator .lower ())
116
117
else :
117
118
return 'nvidia-tesla-{}' .format (accelerator .lower ())
@@ -194,13 +195,10 @@ def get_accelerator_from_label_value(cls, value: str) -> str:
194
195
return value .replace ('nvidia-tesla-' , '' ).upper ()
195
196
elif value .startswith ('nvidia-' ):
196
197
acc = value .replace ('nvidia-' , '' ).upper ()
197
- if acc in ['H100-80GB' , 'H100-MEGA-80GB' ]:
198
- # H100 is named H100-80GB or H100-MEGA-80GB in GKE,
199
- # where the latter has improved bandwidth.
200
- # See a3-mega instances on GCP.
201
- # TODO: we do not distinguish the two GPUs for simplicity,
202
- # but we can evaluate whether we should distinguish
203
- # them based on users' requests.
198
+ if acc == 'H100-80GB' :
199
+ # H100 can be either H100-80GB or H100-MEGA-80GB in GKE
200
+ # we map H100 ---> H100-80GB and keep H100-MEGA-80GB
201
+ # to distinguish between a3-high and a3-mega instances
204
202
return 'H100'
205
203
return acc
206
204
else :
0 commit comments