Skip to content

[SkyServe] New method to install cloud dependencies #2822

New issue

Have a question about this project? No Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “No Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? No Sign in to your account

Merged
merged 4 commits into from
Nov 26, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions sky/execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from sky import spot
from sky import task as task_lib
from sky.backends import backend_utils
from sky.clouds import gcp
from sky.skylet import constants
from sky.usage import usage_lib
from sky.utils import common_utils
Expand Down Expand Up @@ -688,8 +687,6 @@ def spot_launch(
'spot_controller': controller_name,
# Note: actual spot cluster name will be <task.name>-<spot job ID>
'dag_name': dag.name,
'google_sdk_installation_commands':
gcp.GOOGLE_SDK_INSTALLATION_COMMAND,
'retry_until_up': retry_until_up,
**extra_vars,
}
Expand Down
3 changes: 0 additions & 3 deletions sky/serve/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from sky import status_lib
from sky import task as task_lib
from sky.backends import backend_utils
from sky.clouds import gcp
from sky.serve import constants as serve_constants
from sky.serve import serve_utils
from sky.skylet import constants
Expand Down Expand Up @@ -103,8 +102,6 @@ def up(
vars_to_fill = {
'remote_task_yaml_path': remote_tmp_task_yaml_path,
'local_task_yaml_path': service_file.name,
'google_sdk_installation_commands':
gcp.GOOGLE_SDK_INSTALLATION_COMMAND,
'service_name': service_name,
'controller_log_file': controller_log_file,
**extra_vars,
Expand Down
3 changes: 1 addition & 2 deletions sky/setup_files/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,11 +230,10 @@ def parse_readme(readme: str) -> str:
'docker': ['docker'] + local_ray,
'lambda': local_ray,
'cloudflare': aws_dependencies,
'scp': [] + local_ray,
'scp': local_ray,
'oci': ['oci'] + local_ray,
'kubernetes': ['kubernetes'] + local_ray,
'remote': remote,
'serve': ['uvicorn', 'fastapi'],
}

extras_require['all'] = sum(extras_require.values(), [])
Expand Down
9 changes: 5 additions & 4 deletions sky/templates/sky-serve-controller.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@ setup: |
# Install all cloud dependencies.
# This is for multicloud support. To allow controller launch on all clouds,
# we need to install all cloud dependencies.
# This also includes all serve dependencies.
pip install skypilot[all] > /dev/null 2>&1
{%- for cmd in cloud_dependencies_installation_commands %}
{{cmd}}
{%- endfor %}

# Install gcloud CLI.
{{google_sdk_installation_commands}}
# Install serve dependencies.
pip install uvicorn fastapi 2>&1 > /dev/null

file_mounts:
{{remote_task_yaml_path}}: {{local_task_yaml_path}}
Expand Down
18 changes: 4 additions & 14 deletions sky/templates/spot-controller.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,12 @@ file_mounts:
{% endif %}

setup: |
# Install cli dependencies
# Not using SkyPilot wheels because the wheel can be cleaned up by another process.
# TODO(zhwu): Keep the dependencies align with the ones in setup.py
(pip list | grep boto3 > /dev/null 2>&1 && \
pip list | grep google-api-python-client > /dev/null 2>&1 ) || \
pip install boto3 awscli pycryptodome==3.12.0 google-api-python-client google-cloud-storage 2>&1 > /dev/null
{%- for cmd in cloud_dependencies_installation_commands %}
{{cmd}}
{%- endfor %}

# We do not install azure dependencies for now since our subscription does not support spot instances.
# pip list | grep azure-cli > /dev/null 2>&1 || \
# pip3 install azure-cli==2.31.0 azure-core
{{google_sdk_installation_commands}}

pip list | grep oci > /dev/null 2>&1 || pip install oci 2>&1 > /dev/null

# Internal: disable logging for manually logging into the spot controller for debugging.
{% if is_dev %}
# Internal: disable logging for manually logging into the spot controller for debugging.
echo 'export SKYPILOT_DEV=1' >> ~/.bashrc
{% endif %}

Expand Down
44 changes: 42 additions & 2 deletions sky/utils/controller_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
import os
import tempfile
import typing
from typing import Any, Dict, Optional, Tuple
from typing import Any, Dict, List, Optional, Tuple

import colorama

from sky import exceptions
from sky import sky_logging
from sky import skypilot_config
from sky.clouds import gcp
from sky.data import data_utils
from sky.data import storage as storage_lib
from sky.serve import serve_utils
Expand Down Expand Up @@ -122,6 +123,42 @@ def from_name(cls, name: Optional[str]) -> Optional['Controllers']:
return None


# Install cli dependencies. Not using SkyPilot wheels because the wheel
# can be cleaned up by another process.
# TODO(zhwu): Keep the dependencies align with the ones in setup.py
def _get_cloud_dependencies_installation_commands(
controller_type: str) -> List[str]:
commands = [
# aws
'pip list | grep boto3 > /dev/null 2>&1 || '
'pip install "urllib3<2" awscli>=1.27.10 botocore>=1.29.10 '
'boto3>=1.26.1 > /dev/null 2>&1',
# gcp
'pip list | grep google-api-python-client > /dev/null 2>&1 || '
'google-api-python-client>=2.69.0 google-cloud-storage '
'> /dev/null 2>&1',
f'{gcp.GOOGLE_SDK_INSTALLATION_COMMAND}',
# ibm
'pip list | grep ibm-cloud-sdk-core > /dev/null 2>&1 || '
'pip install ibm-cloud-sdk-core ibm-vpc ibm-platform-services '
'ibm-cos-sdk > /dev/null 2>&1',
# oci
'pip list | grep oci > /dev/null 2>&1 || '
'pip install oci > /dev/null 2>&1'
# kubernetes
'pip list | grep kubernetes > /dev/null 2>&1 || '
'pip install kubernetes > /dev/null 2>&1',
]
# We do not install azure dependencies for spot controller since our
# subscription does not support spot instances.
if controller_type != 'spot':
commands.append(
'pip list | grep azure-cli > /dev/null 2>&1 || '
'pip3 install azure-cli>=2.31.0 azure-core azure-identity>=1.13.0 '
'azure-mgmt-network > /dev/null')
return commands


def check_cluster_name_not_controller(
cluster_name: Optional[str],
operation_str: Optional[str] = None) -> None:
Expand Down Expand Up @@ -214,7 +251,10 @@ def skypilot_config_setup(
The controller_resources_config is the resources config that will be
used to launch the controller.
"""
vars_to_fill: Dict[str, Any] = {}
vars_to_fill: Dict[str, Any] = {
'cloud_dependencies_installation_commands':
_get_cloud_dependencies_installation_commands(controller_type)
}
controller_envs = _shared_controller_env_vars()
controller_resources_config_copied: Dict[str, Any] = copy.copy(
controller_resources_config)
Expand Down