From 3520ae82ce85daa4cf1a02484b341df865366fb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=B4=E9=9B=A8=E7=BE=B2?= Date: Mon, 24 Jan 2022 15:12:39 +0800 Subject: [PATCH 1/4] fix fairing install Incompatible dependencies --- .gitignore | 3 +- kubeflow/fairing/constants/constants.py | 2 +- .../fairing/deployers/kfserving/kfserving.py | 81 ++++++++----------- requirements.txt | 13 ++- 4 files changed, 42 insertions(+), 57 deletions(-) diff --git a/.gitignore b/.gitignore index 4fa2425e..90b7d0cc 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,8 @@ .metaparticle *.bak .vscode - +.idea +.env # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/kubeflow/fairing/constants/constants.py b/kubeflow/fairing/constants/constants.py index c5f73e72..bf34e9fe 100644 --- a/kubeflow/fairing/constants/constants.py +++ b/kubeflow/fairing/constants/constants.py @@ -58,7 +58,7 @@ KFSERVING_GROUP = "serving.kubeflow.org" KFSERVING_KIND = "InferenceService" KFSERVING_PLURAL = "inferenceservices" -KFSERVING_VERSION = 'v1alpha2' +KFSERVING_VERSION = 'v1beta1' KFSERVING_DEFAULT_NAME = 'fairing-kfserving-' KFSERVING_DEPLOYER_TYPE = 'kfservice' KFSERVING_CONTAINER_NAME = 'user-container' diff --git a/kubeflow/fairing/deployers/kfserving/kfserving.py b/kubeflow/fairing/deployers/kfserving/kfserving.py index ad5f8dd9..173f451e 100644 --- a/kubeflow/fairing/deployers/kfserving/kfserving.py +++ b/kubeflow/fairing/deployers/kfserving/kfserving.py @@ -12,27 +12,23 @@ # See the License for the specific language governing permissions and # limitations under the License. -import uuid import logging +import uuid -from kubernetes import client as k8s_client - -from kfserving import V1alpha2EndpointSpec -from kfserving import V1alpha2PredictorSpec -from kfserving import V1alpha2TensorflowSpec from kfserving import V1alpha2ONNXSpec -from kfserving import V1alpha2PyTorchSpec -from kfserving import V1alpha2SKLearnSpec -from kfserving import V1alpha2TritonSpec -from kfserving import V1alpha2XGBoostSpec -from kfserving import V1alpha2CustomSpec -from kfserving import V1alpha2InferenceServiceSpec -from kfserving import V1alpha2InferenceService - +from kfserving import V1beta1InferenceService +from kfserving import V1beta1InferenceServiceSpec +from kfserving import V1beta1PredictorSpec +from kfserving import V1beta1SKLearnSpec +from kfserving import V1beta1TFServingSpec +from kfserving import V1beta1TorchServeSpec +from kfserving import V1beta1TritonSpec +from kfserving import V1beta1XGBoostSpec +from kubeflow.fairing import utils from kubeflow.fairing.constants import constants from kubeflow.fairing.deployers.deployer import DeployerInterface from kubeflow.fairing.kubernetes.manager import KubeManager -from kubeflow.fairing import utils +from kubernetes import client as k8s_client logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) @@ -153,50 +149,41 @@ def generate_isvc(self): canary_predictor = self.generate_predictor_spec( self.framework, container=self.custom_canary_container) - if canary_predictor: - isvc_spec = V1alpha2InferenceServiceSpec( - default=V1alpha2EndpointSpec(predictor=default_predictor), - canary=V1alpha2EndpointSpec(predictor=canary_predictor), - canary_traffic_percent=self.canary_traffic_percent) - else: - isvc_spec = V1alpha2InferenceServiceSpec( - default=V1alpha2EndpointSpec(predictor=default_predictor), - canary_traffic_percent=self.canary_traffic_percent) - - return V1alpha2InferenceService(api_version=api_version, - kind=constants.KFSERVING_KIND, - metadata=k8s_client.V1ObjectMeta( - name=self.isvc_name, - generate_name=constants.KFSERVING_DEFAULT_NAME, - namespace=self.namespace), - spec=isvc_spec) - + return V1beta1InferenceService(api_version=api_version, + kind=constants.KFSERVING_KIND, + metadata=k8s_client.V1ObjectMeta( + name=self.isvc_name, + generate_name=constants.KFSERVING_DEFAULT_NAME, + namespace=self.namespace), + spec=V1beta1InferenceServiceSpec( + predictor=default_predictor + )) def generate_predictor_spec(self, framework, storage_uri=None, container=None): '''Generate predictor spec according to framework and default_storage_uri or custom container. ''' if self.framework == 'tensorflow': - predictor = V1alpha2PredictorSpec( - tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri)) + predictor = V1beta1PredictorSpec( + tensorflow=V1beta1TFServingSpec(storage_uri=storage_uri)) elif self.framework == 'onnx': - predictor = V1alpha2PredictorSpec( + predictor = V1beta1PredictorSpec( onnx=V1alpha2ONNXSpec(storage_uri=storage_uri)) elif self.framework == 'pytorch': - predictor = V1alpha2PredictorSpec( - pytorch=V1alpha2PyTorchSpec(storage_uri=storage_uri)) + predictor = V1beta1PredictorSpec( + pytorch=V1beta1TorchServeSpec(storage_uri=storage_uri)) elif self.framework == 'sklearn': - predictor = V1alpha2PredictorSpec( - sklearn=V1alpha2SKLearnSpec(storage_uri=storage_uri)) + predictor = V1beta1PredictorSpec( + sklearn=V1beta1SKLearnSpec(storage_uri=storage_uri)) elif self.framework == 'triton': - predictor = V1alpha2PredictorSpec( - triton=V1alpha2TritonSpec(storage_uri=storage_uri)) + predictor = V1beta1PredictorSpec( + triton=V1beta1TritonSpec(storage_uri=storage_uri)) elif self.framework == 'xgboost': - predictor = V1alpha2PredictorSpec( - xgboost=V1alpha2XGBoostSpec(storage_uri=storage_uri)) - elif self.framework == 'custom': - predictor = V1alpha2PredictorSpec( - custom=V1alpha2CustomSpec(container=container)) + predictor = V1beta1PredictorSpec( + xgboost=V1beta1XGBoostSpec(storage_uri=storage_uri)) + # elif self.framework == 'custom': + # predictor = V1beta1PredictorSpec( + # custom=V1alpha2CustomSpec(container=container)) else: raise RuntimeError("Unsupported framework {}".format(framework)) return predictor diff --git a/requirements.txt b/requirements.txt index e27cd42e..94b2b2bd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,12 @@ -python-dateutil>=2.1,<=2.8.0 -numpy>=1.17.3 -kfserving>=0.3.0.2 +kfserving>=0.6.1 docker>=3.4.1 notebook>=5.6.0 -kubernetes==10.0.1 +kubernetes>=12.0.0 future>=0.17.1 six>=1.11.0 google-cloud-storage>=1.13.2 google-cloud-logging>=1.13.0 -requests>=2.21.0,<2.23 +requests>=2.21.0 setuptools>=34.0.0 google-auth>=1.6.2 httplib2>=0.12.0 @@ -16,13 +14,12 @@ oauth2client>=4.0.0 tornado>=6.0.1 google-api-python-client>=1.7.8 cloudpickle>=0.8,<=1.4.1 -urllib3==1.24.2 boto3>=1.9.0 azure-storage-file>=2.1.0 azure-mgmt-storage>=9.0.0 retrying>=1.3.3 -kubeflow-tfjob>=0.1.1 -kubeflow-pytorchjob>=0.1.1 +kubeflow-training>=1.3.0 ibm-cos-sdk>=2.6.0 grpcio>=1.27.2 nbconvert>=5.6.1 +msrestazure>=0.6.4 \ No newline at end of file From 5fcc1bdd8cdfdaeaf0209e772885416af0d06be0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=B4=E9=9B=A8=E7=BE=B2?= Date: Mon, 24 Jan 2022 18:41:33 +0800 Subject: [PATCH 2/4] adapt train operator --- .../fairing/deployers/pytorchjob/pytorchjob.py | 14 +++++++------- kubeflow/fairing/deployers/tfjob/tfjob.py | 14 ++++++++------ kubeflow/fairing/kubernetes/manager.py | 4 ++-- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/kubeflow/fairing/deployers/pytorchjob/pytorchjob.py b/kubeflow/fairing/deployers/pytorchjob/pytorchjob.py index 8d70bda9..f6d6dfb9 100644 --- a/kubeflow/fairing/deployers/pytorchjob/pytorchjob.py +++ b/kubeflow/fairing/deployers/pytorchjob/pytorchjob.py @@ -1,12 +1,11 @@ import logging -from kubernetes import client as k8s_client - -from kubeflow.pytorchjob import V1ReplicaSpec -from kubeflow.pytorchjob import V1PyTorchJob -from kubeflow.pytorchjob import V1PyTorchJobSpec from kubeflow.fairing.constants import constants from kubeflow.fairing.deployers.job.job import Job +from kubeflow.training import V1PyTorchJob +from kubeflow.training import V1PyTorchJobSpec +from kubeflow.training import V1ReplicaSpec, V1RunPolicy +from kubernetes import client as k8s_client logger = logging.getLogger(__name__) @@ -14,6 +13,7 @@ class PyTorchJob(Job): """ Handle all the k8s' template building to create pytorch training job using Kubeflow PyTorch Operator""" + def __init__(self, namespace=None, master_count=1, worker_count=1, runs=1, job_name=None, stream_log=True, labels=None, pod_spec_mutators=None, cleanup=False, annotations=None, @@ -81,12 +81,12 @@ def generate_deployment_spec(self, pod_template_spec): pytorchjob = V1PyTorchJob( api_version=constants.PYTORCH_JOB_GROUP + "/" + \ - constants.PYTORCH_JOB_VERSION, + constants.PYTORCH_JOB_VERSION, kind=constants.PYTORCH_JOB_KIND, metadata=k8s_client.V1ObjectMeta(name=self.job_name, generate_name=constants.PYTORCH_JOB_DEFAULT_NAME, labels=self.labels), - spec=V1PyTorchJobSpec(pytorch_replica_specs=pytorch_replica_specs) + spec=V1PyTorchJobSpec(pytorch_replica_specs=pytorch_replica_specs, run_policy=V1RunPolicy(clean_pod_policy="None")) ) return pytorchjob diff --git a/kubeflow/fairing/deployers/tfjob/tfjob.py b/kubeflow/fairing/deployers/tfjob/tfjob.py index 6a2f0b0d..4de92924 100644 --- a/kubeflow/fairing/deployers/tfjob/tfjob.py +++ b/kubeflow/fairing/deployers/tfjob/tfjob.py @@ -1,13 +1,13 @@ -import logging import copy -from kubernetes import client as k8s_client +import logging -from kubeflow.tfjob import V1ReplicaSpec -from kubeflow.tfjob import V1TFJob -from kubeflow.tfjob import V1TFJobSpec +from kubernetes import client as k8s_client from kubeflow.fairing.constants import constants from kubeflow.fairing.deployers.job.job import Job +from kubeflow.training import V1ReplicaSpec, V1RunPolicy +from kubeflow.training import V1TFJob +from kubeflow.training import V1TFJobSpec logger = logging.getLogger(__name__) @@ -15,6 +15,7 @@ class TfJob(Job): """ Handle all the k8s' template building to create tensorflow training job using Kubeflow TFOperator""" + def __init__(self, namespace=None, worker_count=1, ps_count=0, chief_count=0, runs=1, job_name=None, stream_log=True, labels=None, pod_spec_mutators=None, cleanup=False, annotations=None, @@ -90,7 +91,8 @@ def generate_deployment_spec(self, pod_template_spec): metadata=k8s_client.V1ObjectMeta(name=self.job_name, generate_name=constants.TF_JOB_DEFAULT_NAME, labels=self.labels), - spec=V1TFJobSpec(tf_replica_specs=tf_replica_specs) + spec=V1TFJobSpec(tf_replica_specs=tf_replica_specs, run_policy=V1RunPolicy(clean_pod_policy="None")) + ) return tfjob diff --git a/kubeflow/fairing/kubernetes/manager.py b/kubeflow/fairing/kubernetes/manager.py index 71bb5e7c..d18bcb39 100644 --- a/kubeflow/fairing/kubernetes/manager.py +++ b/kubeflow/fairing/kubernetes/manager.py @@ -5,8 +5,8 @@ from kubernetes import client, config, watch from kfserving import KFServingClient -from kubeflow.tfjob import TFJobClient -from kubeflow.pytorchjob import PyTorchJobClient +from kubeflow.training import TFJobClient +from kubeflow.training import PyTorchJobClient from kubeflow.fairing.utils import is_running_in_k8s, camel_to_snake from kubeflow.fairing.constants import constants From 5d9de8a8b1bf3d4eeed4b71eeb334567b23476b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=B4=E9=9B=A8=E7=BE=B2?= Date: Mon, 24 Jan 2022 19:27:43 +0800 Subject: [PATCH 3/4] fix kserving can't depend ray 1.9 --- requirements.txt | 1 + setup.py | 1 + 2 files changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 94b2b2bd..d2841f09 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +ray[serve]==1.5.0 kfserving>=0.6.1 docker>=3.4.1 notebook>=5.6.0 diff --git a/setup.py b/setup.py index f6a4d326..07148b88 100644 --- a/setup.py +++ b/setup.py @@ -21,6 +21,7 @@ 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.9', "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", 'Topic :: Scientific/Engineering', From 56c4e9c5b9dacec2b181565750c5204a0fce5939 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=B4=E9=9B=A8=E7=BE=B2?= Date: Tue, 25 Jan 2022 15:20:48 +0800 Subject: [PATCH 4/4] clean kfsering class --- .../fairing/deployers/kfserving/kfserving.py | 32 +++---------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/kubeflow/fairing/deployers/kfserving/kfserving.py b/kubeflow/fairing/deployers/kfserving/kfserving.py index 173f451e..5ae403d7 100644 --- a/kubeflow/fairing/deployers/kfserving/kfserving.py +++ b/kubeflow/fairing/deployers/kfserving/kfserving.py @@ -15,7 +15,6 @@ import logging import uuid -from kfserving import V1alpha2ONNXSpec from kfserving import V1beta1InferenceService from kfserving import V1beta1InferenceServiceSpec from kfserving import V1beta1PredictorSpec @@ -37,7 +36,7 @@ class KFServing(DeployerInterface): """Serves a prediction endpoint using Kubeflow KFServing.""" - def __init__(self, framework, default_storage_uri=None, canary_storage_uri=None, + def __init__(self, framework, default_storage_uri=None, canary_traffic_percent=0, namespace=None, labels=None, annotations=None, custom_default_container=None, custom_canary_container=None, isvc_name=None, stream_log=False, cleanup=False, config_file=None, @@ -46,7 +45,6 @@ def __init__(self, framework, default_storage_uri=None, canary_storage_uri=None, :param framework: The framework for the InferenceService, such as Tensorflow, XGBoost and ScikitLearn etc. :param default_storage_uri: URI pointing to Saved Model assets for default service. - :param canary_storage_uri: URI pointing to Saved Model assets for canary service. :param canary_traffic_percent: The amount of traffic to sent to the canary, defaults to 0. :param namespace: The k8s namespace where the InferenceService will be deployed. :param labels: Labels for the InferenceService, separate with commas if have more than one. @@ -69,7 +67,6 @@ def __init__(self, framework, default_storage_uri=None, canary_storage_uri=None, self.framework = framework self.isvc_name = isvc_name self.default_storage_uri = default_storage_uri - self.canary_storage_uri = canary_storage_uri self.canary_traffic_percent = canary_traffic_percent self.annotations = annotations self.set_labels(labels) @@ -131,24 +128,9 @@ def deploy(self, isvc): # pylint:disable=arguments-differ,unused-argument def generate_isvc(self): """ generate InferenceService """ - api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION - default_predictor, canary_predictor = None, None - - if self.framework == 'custom': - default_predictor = self.generate_predictor_spec( - self.framework, container=self.custom_default_container) - else: - default_predictor = self.generate_predictor_spec( - self.framework, storage_uri=self.default_storage_uri) - - if self.framework != 'custom' and self.canary_storage_uri is not None: - canary_predictor = self.generate_predictor_spec( - self.framework, storage_uri=self.canary_storage_uri) - if self.framework == 'custom' and self.custom_canary_container is not None: - canary_predictor = self.generate_predictor_spec( - self.framework, container=self.custom_canary_container) - + default_predictor = self.generate_predictor_spec( + self.framework, storage_uri=self.default_storage_uri) return V1beta1InferenceService(api_version=api_version, kind=constants.KFSERVING_KIND, metadata=k8s_client.V1ObjectMeta( @@ -159,16 +141,13 @@ def generate_isvc(self): predictor=default_predictor )) - def generate_predictor_spec(self, framework, storage_uri=None, container=None): + def generate_predictor_spec(self, framework, storage_uri=None): '''Generate predictor spec according to framework and default_storage_uri or custom container. ''' if self.framework == 'tensorflow': predictor = V1beta1PredictorSpec( tensorflow=V1beta1TFServingSpec(storage_uri=storage_uri)) - elif self.framework == 'onnx': - predictor = V1beta1PredictorSpec( - onnx=V1alpha2ONNXSpec(storage_uri=storage_uri)) elif self.framework == 'pytorch': predictor = V1beta1PredictorSpec( pytorch=V1beta1TorchServeSpec(storage_uri=storage_uri)) @@ -181,9 +160,6 @@ def generate_predictor_spec(self, framework, storage_uri=None, container=None): elif self.framework == 'xgboost': predictor = V1beta1PredictorSpec( xgboost=V1beta1XGBoostSpec(storage_uri=storage_uri)) - # elif self.framework == 'custom': - # predictor = V1beta1PredictorSpec( - # custom=V1alpha2CustomSpec(container=container)) else: raise RuntimeError("Unsupported framework {}".format(framework)) return predictor