Documentation
¶
Overview ¶
Copyright 2018 The Kubeflow Authors
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License
Copyright 2018 The Kubeflow Authors ¶
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License
Index ¶
- Constants
- Variables
- type AdvancedGpuMetric
- type AllNodeInfo
- type ArenaClientArgs
- type CommonCronArgs
- type CommonGPUNodeInfo
- type CommonModelArgs
- type CommonNodeInfo
- type CommonServingArgs
- type CommonSubmitArgs
- type CommonUpdateServingArgs
- type ConcurrencyPolicy
- type ConfigFileInfo
- type CronHistoryInfo
- type CronInfo
- type CronTFJobArgs
- type CronType
- type CustomServingArgs
- type DataDirVolume
- type Destination
- type DestinationRuleCRD
- type DestinationWeight
- type Driver
- type Endpoint
- type EvaluateJobArgs
- type EvaluateJobInfo
- type EvaluateJobType
- type Executor
- type FormatStyle
- type GPUDeviceInfo
- type GPUExclusiveNodeInfo
- type GPUExclusivePodInfo
- type GPUShareNodeDevice
- type GPUShareNodeInfo
- type GPUSharePodInfo
- type GPUTopology
- type GPUTopologyNodeDevice
- type GPUTopologyNodeInfo
- type GPUTopologyPodInfo
- type GpuMetric
- type GpuMetricInfo
- type HTTPMatchRequest
- type HTTPRoute
- type JobConditionType
- type JobGpuMetric
- type K8sObject
- type KFServingArgs
- type KServeArgs
- type LimitedPodSecurityContext
- type LogArgs
- type LogLevel
- type ModelBenchmarkArgs
- type ModelEvaluateArgs
- type ModelFormat
- type ModelJobInfo
- type ModelJobInstance
- type ModelJobStatus
- type ModelJobType
- type ModelOptimizeArgs
- type ModelProfileArgs
- type ModelTypeInfo
- type ModelVersion
- type ModelVersionStatus
- type ModelVersionTag
- type NodeGpuMetric
- type NodeType
- type NodeTypeInfo
- type NormalNodeInfo
- type PodGpuMetric
- type PortSelector
- type PreprocesObject
- type PrometheusMetric
- type PrometheusMetricData
- type PrometheusMetricResult
- type PrometheusMetricValue
- type PrometheusServer
- type RegisteredModel
- type RegisteredModelAlias
- type RegisteredModelTag
- type Runtime
- type ScaleETJobArgs
- type ScaleInETJobArgs
- type ScaleOutETJobArgs
- type SeldonServingArgs
- type ServingInstance
- type ServingJobInfo
- type ServingJobType
- type ServingTypeInfo
- type ServingVersionWeight
- type StringMatchPrefix
- type SubmitDeepSpeedJobArgs
- type SubmitETJobArgs
- type SubmitHorovodJobArgs
- type SubmitMPIJobArgs
- type SubmitPyTorchJobArgs
- type SubmitSparkJobArgs
- type SubmitSyncCodeArgs
- type SubmitTFJobArgs
- type SubmitTensorboardArgs
- type SubmitVolcanoJobArgs
- type TFRuntime
- type TensorFlowServingArgs
- type TensorRTServingArgs
- type TolerationArgs
- type TrafficRouterSplitArgs
- type TrainingJobInfo
- type TrainingJobInstance
- type TrainingJobStatus
- type TrainingJobType
- type TrainingJobTypeInfo
- type TritonServingArgs
- type UpdateCustomServingArgs
- type UpdateKServeArgs
- type UpdateTensorFlowServingArgs
- type UpdateTritonServingArgs
- type VirtualService
- type VirtualServiceCRD
Constants ¶
const ( )
const ( AliyunGPUResourceName = "aliyun.com/gpu" GPUTopologyAllocationLabel = "topology.kubernetes.io/gpu-group" GPUTopologyVisibleGPULabel = "topology.kubernetes.io/gpu-visible" GPUTopologyNodeLabels = "ack.node.gpu.schedule=topology" )
const ( MultiTenantIsolationLabel = "arena.kubeflow.org/isolate-user" UserNameIdLabel = "arena.kubeflow.org/uid" UserNameNameLabel = "arena.kubeflow.org/username" SSHSecretName = "arena.kubeflow.org/ssh-secret" )
const (
CPUResourceName = "cpu"
)
const KUBEFLOW_NAMESPACE = "kubeflow"
const KUBE_SYSTEM_NAMESPACE = "kube-system"
const NODE_METRIC_TMP = `{__name__=~"%s", node_name=~"%s"}`
const (
// defines the nvidia resource name
NvidiaGPUResourceName = "nvidia.com/gpu"
)
const POD_METRIC_TMP = `{__name__=~"%s", pod_name=~"%s"}`
const PROMETHEUS_INSTALL_DOC_URL = "https://github.com/kubeflow/arena/blob/master/docs/userguide/9-top-job-gpu-metric.md"
const PROMETHEUS_SCHEME = "http"
const PROMETHEUS_SVC_LABEL = "kubernetes.io/name=Prometheus"
const (
RequestGPUsOfJobAnnoKey = "requestGPUsOfJobOwner"
)
Variables ¶
var ( ErrTrainingJobNotFound = errors.New("training job not found,please use 'arena list' to make sure job is existed.") ErrNoPrivilegesToOperateJob = errors.New("you have no privileges to operate the job,because the owner of job is not you") )
var GPU_METRIC_LIST = []string{"nvidia_gpu_duty_cycle", "nvidia_gpu_memory_used_bytes", "nvidia_gpu_memory_total_bytes"}
var ModelTypeMap = map[ModelJobType]ModelTypeInfo{ ModelProfileJob: { Name: ModelProfileJob, Alias: "Profile", Shorthand: "profile", }, ModelOptimizeJob: { Name: ModelOptimizeJob, Alias: "Optimize", Shorthand: "optimize", }, ModelBenchmarkJob: { Name: ModelBenchmarkJob, Alias: "Benchmark", Shorthand: "benchmark", }, ModelEvaluateJob: { Name: ModelEvaluateJob, Alias: "Evaluate", Shorthand: "evaluate", }, }
ModelTypeMap collects model job type and their alias
var NodeTypeSlice = []NodeTypeInfo{ { Name: NormalNode, Alias: "none", Shorthand: "n", }, { Name: GPUExclusiveNode, Alias: "exclusive", Shorthand: "e", }, { Name: GPUTopologyNode, Alias: "topology", Shorthand: "t", }, { Name: GPUShareNode, Alias: "share", Shorthand: "s", }, }
var SUPPORT_PROMETHEUS_SERVERS = []*PrometheusServer{ { Name: "arms-prometheus-admin", ServiceLabels: "kubernetes.io/service-name=prometheus-admin", Protocol: "http", Port: "9335", Path: "api/v1/query", MetricList: []string{ "nvidia_gpu_duty_cycle", "nvidia_gpu_memory_used_bytes", "nvidia_gpu_memory_total_bytes", }, }, { Name: "default", ServiceLabels: "kubernetes.io/service-name=prometheus-server", Protocol: "http", Port: "9090", Path: "api/v1/query", MetricList: []string{ "nvidia_gpu_duty_cycle", "nvidia_gpu_memory_used_bytes", "nvidia_gpu_memory_total_bytes", }, }, { Name: "default-old", ServiceLabels: "kubernetes.io/name=Prometheus", Protocol: "http", Port: "9090", Path: "api/v1/query", MetricList: []string{ "nvidia_gpu_duty_cycle", "nvidia_gpu_memory_used_bytes", "nvidia_gpu_memory_total_bytes", }, }, }
var ServingTypeMap = map[ServingJobType]ServingTypeInfo{ CustomServingJob: { Name: CustomServingJob, Alias: "Custom", Shorthand: "custom", }, KFServingJob: { Name: KFServingJob, Alias: "KFServing", Shorthand: "kf", }, KServeJob: { Name: KServeJob, Alias: "KServe", Shorthand: "kserve", }, TFServingJob: { Name: TFServingJob, Alias: "Tensorflow", Shorthand: "tf", }, TRTServingJob: { Name: TRTServingJob, Alias: "Tensorrt", Shorthand: "trt", }, TritonServingJob: { Name: TritonServingJob, Alias: "Triton", Shorthand: "Triton", }, SeldonServingJob: { Name: SeldonServingJob, Alias: "Seldon", Shorthand: "seldon", }, }
ServingTypeMap collects serving job type and their alias
var TrainingTypeMap = map[TrainingJobType]TrainingJobTypeInfo{ TFTrainingJob: { Name: TFTrainingJob, Alias: "Tensorflow", Shorthand: "tf", }, MPITrainingJob: { Name: MPITrainingJob, Alias: "MPI", Shorthand: "mpi", }, PytorchTrainingJob: { Name: PytorchTrainingJob, Alias: "Pytorch", Shorthand: "py", }, HorovodTrainingJob: { Name: HorovodTrainingJob, Alias: "Horovod", Shorthand: "horovod", }, VolcanoTrainingJob: { Name: VolcanoTrainingJob, Alias: "Volcano", Shorthand: "volcano", }, ETTrainingJob: { Name: ETTrainingJob, Alias: "ElasticTraining", Shorthand: "et", }, SparkTrainingJob: { Name: SparkTrainingJob, Alias: "Spark", Shorthand: "spark", }, DeepSpeedTrainingJob: { Name: DeepSpeedTrainingJob, Alias: "DeepSpeed", Shorthand: "dp", }, }
ServingTypeMap collects serving job type and their alias
Functions ¶
This section is empty.
Types ¶
type AdvancedGpuMetric ¶
type AdvancedGpuMetric struct {
Id string `json:"id" yaml:"id"`
UUID string `json:"uuid" yaml:"uuid"`
GpuDutyCycle float64 `json:"gpuDutyCycle" yaml:"gpuDutyCycle"`
GpuMemoryUsed float64 `json:"usedGPUMemory" yaml:"usedGPUMemory"`
GpuMemoryTotal float64 `json:"totalGPUMemory" yaml:"totalGPUMemory"`
// PodName is combined with namespace and pod name,like 'namespace/pod_name'
PodNames []string `json:"podNames" yaml:"podNames"`
}
type AllNodeInfo ¶
type AllNodeInfo map[string][]interface{}
type ArenaClientArgs ¶
type CommonCronArgs ¶ added in v0.8.2
type CommonCronArgs struct {
// The schedule in Cron format, see https://en.wikipedia.org/wiki/Cron.
Schedule string `yaml:"schedule"` // --schedule
// Specifies how to treat concurrent executions of a Job.
// Valid values are:
// - "Allow" (default): allows CronJobs to run concurrently;
// - "Forbid": forbids concurrent runs, skipping next run if previous run hasn't finished yet;
// - "Replace": cancels currently running job and replaces it with a new one
// +optional
ConcurrencyPolicy string `yaml:"concurrencyPolicy"` // --concurrency-policy
// This flag tells the controller to suspend subsequent executions, it does
// not apply to already started executions. Defaults to false.
// +optional
Suspend bool `yaml:"suspend"` // --suspend
// Deadline is the timestamp that a cron job can keep scheduling util then.
Deadline string `yaml:"deadline"` // --deadline
// The number of finished job history to retain.
// This is a pointer to distinguish between explicit zero and not specified.
// +optional
HistoryLimit int `yaml:"historyLimit"` // --history-limit
}
type CommonGPUNodeInfo ¶
type CommonGPUNodeInfo struct {
TotalGPUs float64 `json:"totalGPUs" yaml:"totalGPUs"`
AllocatedGPUs float64 `json:"allocatedGPUs" yaml:"allocatedGPUs"`
UnhealthyGPUs float64 `json:"unhealthyGPUs" yaml:"unhealthyGPUs"`
GPUMetrics []*AdvancedGpuMetric `json:"gpuMetrics" yaml:"gpuMetrics"`
}
type CommonModelArgs ¶ added in v0.9.0
type CommonModelArgs struct {
Name string `yaml:"name"` // --name
Namespace string `yaml:"namespace"` // --namespace
ModelConfigFile string `yaml:"modelConfigFile"` // --model-config-file
ModelName string `yaml:"modelName"` // --model-name
ModelPath string `yaml:"modelPath"` // --model-path
Inputs string `yaml:"inputs"` // --inputs
Outputs string `yaml:"outputs"` // --outputs
Image string `yaml:"image"` // --image
ImagePullPolicy string `yaml:"imagePullPolicy"` // --image-pull-policy
// ImagePullSecrets stores image pull secrets,match option --image-pull-secrets
ImagePullSecrets []string `yaml:"imagePullSecrets"`
GPUCount int `yaml:"gpuCount"` // --gpus
GPUMemory int `yaml:"gpuMemory"` // --gpumemory
GPUCore int `yaml:"gpuCore"` // --gpucore
Cpu string `yaml:"cpu"` // --cpu
Memory string `yaml:"memory"` // --memory
// DataSet stores the kubernetes pvc names
DataSet map[string]string `yaml:"dataset"` // --data
// DataDirs stores the files(or directories) in k8s node which will map to containers
DataDirs []DataDirVolume `yaml:"dataDirs"` // --data-dir
Envs map[string]string `yaml:"envs"` // --env
NodeSelectors map[string]string `yaml:"nodeSelectors"` // --selector
Tolerations []TolerationArgs `yaml:"tolerations"` // --toleration
Annotations map[string]string `yaml:"annotations"` // --annotation
Labels map[string]string `yaml:"labels"` // --label
Shell string `yaml:"shell"` // --shell
Command string `yaml:"command"`
Type ModelJobType `yaml:"type"`
// HelmOptions stores the helm options
HelmOptions []string `yaml:"-"`
}
type CommonNodeInfo ¶
type CommonServingArgs ¶
type CommonServingArgs struct {
Name string `yaml:"servingName"`
Version string `yaml:"servingVersion"`
Namespace string `yaml:"-"`
Type ServingJobType `yaml:"-"`
Image string `yaml:"image"`
ImagePullPolicy string `yaml:"imagePullPolicy"` // --imagePullPolicy
GPUCount int `yaml:"gpuCount"` // --gpus
GPUMemory int `yaml:"gpuMemory"` // --gpumemory
GPUCore int `yaml:"gpuCore"` // --gpucore
Devices map[string]string `yaml:"devices"` // --device
Cpu string `yaml:"cpu"` // --cpu
Memory string `yaml:"memory"` // --memory
Envs map[string]string `yaml:"envs"` // --envs
EnvsFromSecret map[string]string `yaml:"envsFromSecret"` // --env-from-secret
Shell string `yaml:"shell"` // --shell
Command string `yaml:"command"` // --command
Replicas int `yaml:"replicas"` // --replicas
EnableIstio bool `yaml:"enableIstio"` // --enableIstio
ExposeService bool `yaml:"exposeService"` // --exposeService
ModelDirs map[string]string `yaml:"modelDirs"` // --data
DataSubpathExprs map[string]string `yaml:"dataSubPathExprs"` // --data-subpath-expr
TempDirSubpathExpr map[string]string `yaml:"tempDirSubPathExprs"` // --temp-dir-subpath-expr
TempDirs map[string]string `yaml:"tempDirs"` // --temp-dir
ImagePullSecrets []string `yaml:"imagePullSecrets"` //--image-pull-secrets
HostVolumes []DataDirVolume `yaml:"dataDirs"` // --data-dir
NodeSelectors map[string]string `yaml:"nodeSelectors"` // --selector
Tolerations []TolerationArgs `yaml:"tolerations"` // --toleration
Annotations map[string]string `yaml:"annotations"`
Labels map[string]string `yaml:"labels"` // --label
// ConfigFiles stores the config file which is existed in client host node
// and map it to container,match option --config-file
ConfigFiles map[string]map[string]ConfigFileInfo `yaml:"configFiles"`
// HelmOptions stores the helm options
HelmOptions []string `yaml:"-"`
ModelServiceExists bool `yaml:"modelServiceExists"` // --modelServiceExists
ModelName string `yaml:"modelName"` // --model-name
ModelVersion string `yaml:"modelVersion"` // --model-version
}
type CommonSubmitArgs ¶
type CommonSubmitArgs struct {
// Name stores the job name,match option --name
Name string `yaml:"-"`
// Namespace stores the namespace of job,match option --namespace
Namespace string `yaml:"-"`
// TrainingType stores the trainingType
TrainingType TrainingJobType `yaml:"trainingType"`
// NodeSelectors defines the node selectors,match option --selector
NodeSelectors map[string]string `yaml:"nodeSelectors"`
// ConfigFiles stores the config file which is existed in client host node
// and map it to container,match option --config-file
ConfigFiles map[string]map[string]ConfigFileInfo `yaml:"configFiles"`
// Tolerations defines the tolerations which tolerates node taints
// match option --toleration
Tolerations []TolerationArgs `yaml:"tolerations"`
// Image stores the docker image of job,match option --image
Image string `yaml:"image"`
// ImagePullPolicy stores the docker image pull policy of job,match option --image-pull-policy
ImagePullPolicy string `yaml:"imagePullPolicy"`
// GPUCount stores the gpu count of the job needs,match option --gpus
GPUCount int `yaml:"gpuCount"`
// Devices stores chip vendors and count that used for resources, such as amd.com/gpu=1 gpu.intel.com/i915=1,match option --device
Devices map[string]string `yaml:"devices"`
// Envs stores the envs of container in job, match option --env
Envs map[string]string `yaml:"envs"`
// WorkingDir stores the working directory of container in job,match option --working-dir
WorkingDir string `yaml:"workingDir"`
// Shell specify the linux shell type
Shell string `yaml:"shell"`
// Command stores the command of job
Command string `yaml:"command"`
// Mode is used for horovod,match option --sync-mode
Mode string `yaml:"mode"`
// WorkerCount stores the count of job worker,match option --workers
WorkerCount int `yaml:"workers"`
// Retry defines the retry times
Retry int `yaml:"retry"`
// DataSet stores the kubernetes pvc names
DataSet map[string]string `yaml:"dataset"`
// DataDirs stores the files(or directories) in k8s node which will map to containers
// match option --data-dir
DataDirs []DataDirVolume `yaml:"dataDirs"`
// EnableRDMA enable rdma or not,match option --rdma
EnableRDMA bool `yaml:"enableRDMA"`
// EnableQueue enables the feature to queue jobs after they are scheduled.
EnableQueue bool `yaml:"enableQueue"`
// UseENI defines using eni or not
UseENI bool `yaml:"useENI"`
// Annotations defines pod annotations of job,match option --annotation
Annotations map[string]string `yaml:"annotations"`
// Labels specify the job labels and it is work for pods
Labels map[string]string `yaml:"labels"`
// IsNonRoot is root user or not
IsNonRoot bool `yaml:"isNonRoot"`
// PodSecurityContext defines the pod security context
PodSecurityContext LimitedPodSecurityContext `yaml:"podSecurityContext"`
// PriorityClassName defines the priority class
PriorityClassName string `yaml:"priorityClassName"`
// Coscheduling defines using Coscheduling
Coscheduling bool
// PodGroupName stores pod group name
PodGroupName string `yaml:"podGroupName"`
// PodGroupMinAvailable stores pod group min available
PodGroupMinAvailable string `yaml:"podGroupMinAvailable"`
// ImagePullSecrets stores image pull secrets,match option --image-pull-secrets
ImagePullSecrets []string `yaml:"imagePullSecrets"`
// HelmOptions stores the helm options
HelmOptions []string `yaml:"-"`
// EnableSpotInstance enables the feature of SuperVisor manage spot instance training.
EnableSpotInstance bool `yaml:"enableSpotInstance"`
// MaxWaitTime stores the maximum length of time a job waits for resources
MaxWaitTime int `yaml:"maxWaitTime"`
// SchedulerName stores the scheduler name,match option --scheduler
SchedulerName string `yaml:"schedulerName"`
// UseHostNetwork defines using useHostNetwork
UseHostNetwork bool `yaml:"useHostNetwork"`
// UseHostPID defines using useHostPID
UseHostPID bool `yaml:"useHostPID"`
// UseHostIPC defines using useHostIPC
UseHostIPC bool `yaml:"useHostIPC"`
// ModelName defines the model name associates with the job
ModelName string `yaml:"modelName"`
// ModelSource defines the model source
ModelSource string `yaml:"modelSource"`
}
CommonSubmitArgs defines the common parts of the submitAthd
type CommonUpdateServingArgs ¶ added in v0.8.9
type CommonUpdateServingArgs struct {
Name string `yaml:"servingName"`
Version string `yaml:"servingVersion"`
Namespace string `yaml:"-"`
Type ServingJobType `yaml:"-"`
Image string `yaml:"image"`
GPUCount int `yaml:"gpuCount"` // --gpus
GPUMemory int `yaml:"gpuMemory"` // --gpumemory
GPUCore int `yaml:"gpuCore"` // --gpucore
Cpu string `yaml:"cpu"` // --cpu
Memory string `yaml:"memory"` // --memory
Replicas int `yaml:"replicas"` // --replicas
Envs map[string]string `yaml:"envs"` // --envs
Annotations map[string]string `yaml:"annotations"` // --annotation
Labels map[string]string `yaml:"labels"` // --label
NodeSelectors map[string]string `yaml:"nodeSelectors"` // --selector
Tolerations []TolerationArgs `yaml:"tolerations"` // --toleration
Shell string `yaml:"shell"` // --shell
Command string `yaml:"command"` // --command
ModelDirs map[string]string `yaml:"modelDirs"` // --data
}
type ConcurrencyPolicy ¶ added in v0.8.2
type ConcurrencyPolicy string
ConcurrencyPolicy describes how the job will be handled. Only one of the following concurrent policies may be specified. If none of the following policies is specified, the default one is AllowConcurrent.
const ( ConcurrencyAllow ConcurrencyPolicy = "Allow" ConcurrencyForbid ConcurrencyPolicy = "Forbid" ConcurrencyReplace ConcurrencyPolicy = "Replace" )
type ConfigFileInfo ¶
type ConfigFileInfo struct {
ContainerFileName string `yaml:"containerFileName"`
HostFile string `yaml:"hostFile"`
Key string `yaml:"key"`
ContainerFilePath string `yaml:"containerFilePath"`
}
ConfigFileInfo defines the config files which will be mounted to containers
type CronHistoryInfo ¶ added in v0.8.2
type CronHistoryInfo struct {
Name string `json:"name" yaml:"name"`
Namespace string `json:"namespace" yaml:"namespace"`
Group string `json:"group" yaml:"group"`
Kind string `json:"kind" yaml:"kind"`
Status string `json:"status" yaml:"status"`
CreateTime string `json:"createTime" yaml:"createTime"`
FinishTime string `json:"finishTime" yaml:"finishTime"`
}
type CronInfo ¶ added in v0.8.2
type CronInfo struct {
UUID string `json:"uuid" yaml:"uuid"`
Name string `json:"name" yaml:"name"`
Namespace string `json:"namespace" yaml:"namespace"`
// Type is the job type, like TFjob、PyTorchJob
Type string `json:"type" yaml:"type"`
// The schedule in Cron format, see https://en.wikipedia.org/wiki/Cron.
Schedule string `json:"schedule" yaml:"schedule"`
// Specifies how to treat concurrent executions of a Job.
// Valid values are:
// - "Allow" (default): allows CronJobs to run concurrently;
// - "Forbid": forbids concurrent runs, skipping next run if previous run hasn't finished yet;
// - "Replace": cancels currently running job and replaces it with a new one
// +optional
ConcurrencyPolicy string `json:"concurrencyPolicy" yaml:"concurrencyPolicy"` // --concurrency-policy
// This flag tells the controller to suspend subsequent executions, it does
// not apply to already started executions. Defaults to false.
// +optional
Suspend bool `json:"suspend" yaml:"suspend"` // --suspend
// Deadline is the timestamp that a cron job can keep scheduling util then.
Deadline string `json:"deadline" yaml:"deadline"` // --deadline
// The number of finished job history to retain.
// This is a pointer to distinguish between explicit zero and not specified.
// +optional
HistoryLimit int64 `json:"historyLimit" yaml:"historyLimit"` // --history-limit
// Information when was the last time the job was successfully scheduled.
// +optional
LastScheduleTime string `json:"lastScheduleTime" yaml:"lastScheduleTime"`
// CreationTimestamp stores the creation timestamp of job
CreationTimestamp string `json:"creationTimestamp" yaml:"creationTimestamp"`
History []CronHistoryInfo `json:"cronHistory" yaml:"cronHistory"`
}
type CronTFJobArgs ¶ added in v0.8.2
type CronTFJobArgs struct {
CommonCronArgs `yaml:"cron"`
SubmitTFJobArgs `yaml:"tfjob"`
}
type CronType ¶ added in v0.8.2
type CronType string
CronType defines the supporting job type
const ( // CronTFTrainingJob defines the cron tfjob CronTFTrainingJob CronType = "tfjob" )
type CustomServingArgs ¶
type CustomServingArgs struct {
Port int `yaml:"port"` // --port
RestfulPort int `yaml:"restApiPort"` // --restfulPort
MetricsPort int `yaml:"metricsPort"` // --metrics-port
MaxSurge string `yaml:"maxSurge"` // --maxSurge
LivenessProbeAction string `yaml:"livenessProbeAction"` // --liveness-probe-action
LivenessProbeActionOption []string `yaml:"livenessProbeActionOption"` // --liveness-probe-action-option
LivenessProbeOption []string `yaml:"livenessProbeOption"` // --liveness-probe-option
ReadinessProbeAction string `yaml:"readinessProbeAction"` // --readiness-probe-action
ReadinessProbeActionOption []string `yaml:"readinessProbeActionOption"` // --readiness-probe-action-option
ReadinessProbeOption []string `yaml:"readinessProbeOption"` // --readiness-probe-option
StartupProbeAction string `yaml:"startupProbeAction"` // --startup-probe-action
StartupProbeActionOption []string `yaml:"startupProbeActionOption"` // --startup-probe-action-option
StartupProbeOption []string `yaml:"startupProbeOption"` // --startup-probe-option
CommonServingArgs `yaml:",inline"`
}
type DataDirVolume ¶
type DataDirVolume struct {
// HostPath defines the host path
HostPath string `yaml:"hostPath"`
// ContainerPath defines container path
ContainerPath string `yaml:"containerPath"`
// Name defines the volume name
Name string `yaml:"name"`
}
DataDirVolume defines the volume of kubernetes
type Destination ¶
type Destination struct {
*istiov1alpha3.Destination
Port *PortSelector `protobuf:"bytes,3,opt,name=port" json:"port,omitempty"`
}
type DestinationRuleCRD ¶
type DestinationRuleCRD struct {
// Kind is a string value representing the REST resource this object represents.
// Servers may infer this from the endpoint the client submits requests to.
// Cannot be updated.
// In CamelCase.
// More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds
// +optional
Kind string `json:"kind,omitempty" protobuf:"bytes,1,opt,name=kind"`
// APIVersion defines the versioned schema of this representation of an object.
// Servers should convert recognized schemas to the latest internal value, and
// may reject unrecognized values.
// More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources
// +optional
APIVersion string `json:"apiVersion,omitempty" protobuf:"bytes,2,opt,name=apiVersion"`
metav1.ObjectMeta `json:"metadata,omitempty" yaml:"metadata,omitempty" protobuf:"bytes,1,opt,name=metadata"`
Spec *istiov1alpha3.DestinationRule `json:"spec,omitempty" yaml:"spec,omitempty" protobuf:"bytes,2,opt,name=spec"`
}
type DestinationWeight ¶
type DestinationWeight struct {
Destination *Destination `protobuf:"bytes,1,opt,name=destination" json:"destination,omitempty"`
Weight int32 `protobuf:"varint,2,opt,name=weight,proto3" json:"weight"`
}
type EvaluateJobArgs ¶ added in v0.8.8
type EvaluateJobArgs struct {
// Name stores the job name,match option --name
Name string `yaml:"-"`
// Namespace stores the namespace of job,match option --namespace
Namespace string `yaml:"-"`
// NodeSelectors defines the node selectors,match option --selector
NodeSelectors map[string]string `yaml:"nodeSelectors"`
// Tolerations defines the tolerations which tolerates node taints
// match option --toleration
Tolerations []TolerationArgs `yaml:"tolerations"`
// Image stores the docker image of job,match option --image
Image string `yaml:"image"`
// Envs stores the envs of container in job, match option --env
Envs map[string]string `yaml:"envs"`
WorkingDir string `yaml:"workingDir"`
// Command stores the command of job
Command string `yaml:"command"`
// DataDirs stores the files(or directories) in k8s node which will map to containers
// match option --data-dir
DataDirs []DataDirVolume `yaml:"dataDirs"`
// DataSources stores the kubernetes pvc names
DataSources map[string]string `yaml:"dataSources"`
// Annotations defines pod annotations of job,match option --annotation
Annotations map[string]string `yaml:"annotations"`
// Labels specify the job labels and it is work for pods
Labels map[string]string `yaml:"labels"`
// ImagePullSecrets stores image pull secrets,match option --image-pull-secrets
ImagePullSecrets []string `yaml:"imagePullSecrets"`
// HelmOptions stores the helm options
HelmOptions []string `yaml:"-"`
ModelName string `yaml:"modelName"` // --model-name
ModelPath string `yaml:"modelPath"` // --model-path
ModelVersion string `yaml:"modelVersion"` // --model-version
MetricsPath string `yaml:"metricsPath"` // --metrics-path
DatasetPath string `yaml:"datasetPath"` // --dataset-path
Cpu string `yaml:"cpu"` // --cpu
Memory string `yaml:"memory"` // --memory
GPUCount int `yaml:"gpuCount"` // --gpus
// for sync up source code
SubmitSyncCodeArgs `yaml:",inline"`
}
type EvaluateJobInfo ¶ added in v0.8.8
type EvaluateJobInfo struct {
UUID string `json:"uuid" yaml:"uuid"`
JobID string `json:"jobId" yaml:"jobId"`
Name string `json:"name" yaml:"name"`
Namespace string `json:"namespace" yaml:"namespace"`
ModelName string `json:"modelName" yaml:"modelName"`
ModelPath string `json:"modelPath" yaml:"modelPath"`
ModelVersion string `json:"modelVersion" yaml:"modelVersion"`
MetricsPath string `json:"metricsPath" yaml:"metricsPath"`
DatasetPath string `json:"datasetPath" yaml:"datasetPath"`
Status string `json:"status" yaml:"status"`
CreationTimestamp string `json:"creationTimestamp" yaml:"creationTimestamp"`
}
type EvaluateJobType ¶ added in v0.8.8
type EvaluateJobType string
const ( // EvaluateJob defines the tensorflow serving job EvaluateJob EvaluateJobType = "evaluatejob" )
type FormatStyle ¶
type FormatStyle string
PrintFormatStyle defines the format of output it only used in cmd
const ( // Wide defines the wide format WideFormat FormatStyle = "wide" // Json defines the json format JsonFormat FormatStyle = "json" // Yaml defines the yaml format YamlFormat FormatStyle = "yaml" // Unknwon defines the unknown format UnknownFormat FormatStyle = "unknown" )
type GPUDeviceInfo ¶
type GPUDeviceInfo struct {
ID string `json:"id" yaml:"id"`
TotalGPUMemory float64 `json:"totalGPUMemory" yaml:"totalGPUMemory"`
AllocatedGPUMemory float64 `json:"allocatedGPUMemory" yaml:"allocatedGPUMemory"`
UsedGPUMemory float64 `json:"usedGPUMemory" yaml:"usedGPUMemory"`
DutyCycle float64 `json:"dutyCycle" yaml:"dutyCycle"`
}
type GPUExclusiveNodeInfo ¶
type GPUExclusiveNodeInfo struct {
PodInfos []GPUExclusivePodInfo `json:"instances" yaml:"instances"`
CommonNodeInfo `yaml:",inline" json:",inline"`
CommonGPUNodeInfo `yaml:",inline" json:",inline"`
}
type GPUExclusivePodInfo ¶
type GPUShareNodeDevice ¶
type GPUShareNodeDevice struct {
}
type GPUShareNodeInfo ¶
type GPUShareNodeInfo struct {
}
type GPUSharePodInfo ¶
type GPUSharePodInfo struct {
}
type GPUTopology ¶
type GPUTopologyNodeDevice ¶
type GPUTopologyNodeInfo ¶
type GPUTopologyNodeInfo struct {
PodInfos []GPUTopologyPodInfo `json:"instances" yaml:"instances"`
GPUTopology GPUTopology `json:"gpuTopology" yaml:"gpuTopology"`
CommonGPUNodeInfo `yaml:",inline" json:",inline"`
CommonNodeInfo `yaml:",inline" json:",inline"`
Devices []GPUTopologyNodeDevice `json:"devices" yaml:"devices"`
}
type GPUTopologyPodInfo ¶
type GPUTopologyPodInfo struct {
Name string `json:"name" yaml:"name"`
Namespace string `json:"namespace" yaml:"namespace"`
Status string `json:"status" yaml:"status"`
RequestGPU int `json:"requestGPUs" yaml:"requestGPUs"`
Allocation []string `json:"allocation" yaml:"allocation"`
VisibleGPUs []string `json:"visibleGPUs" yaml:"visibleGPUs"`
}
type GpuMetricInfo ¶
type HTTPMatchRequest ¶
type HTTPMatchRequest struct {
*istiov1alpha3.HTTPMatchRequest
Uri *StringMatchPrefix `protobuf:"bytes,1,opt,name=uri" json:"uri,omitempty"`
}
type HTTPRoute ¶
type HTTPRoute struct {
*istiov1alpha3.HTTPRoute
Match []*HTTPMatchRequest `protobuf:"bytes,1,rep,name=match" json:"match,omitempty"`
Route []*DestinationWeight `protobuf:"bytes,2,rep,name=route" json:"route,omitempty"`
}
type JobConditionType ¶ added in v0.8.2
type JobConditionType string
JobConditionType defines all kinds of types of JobStatus.
const ( // JobCreated means the job has been accepted by the system, // but one or more of the pods/services has not been started. // This includes time before pods being scheduled and launched. JobCreated JobConditionType = "Created" // JobRunning means all sub-resources (e.g. services/pods) of this job // have been successfully scheduled and launched. // The training is running without error. JobRunning JobConditionType = "Running" // JobRestarting means one or more sub-resources (e.g. services/pods) of this job // reached phase failed but maybe restarted according to it's restart policy // which specified by user in v1.PodTemplateSpec. // The training is freezing/pending. JobRestarting JobConditionType = "Restarting" // JobSucceeded means all sub-resources (e.g. services/pods) of this job // reached phase have terminated in success. // The training is complete without error. JobSucceeded JobConditionType = "Succeeded" // JobFailed means one or more sub-resources (e.g. services/pods) of this job // reached phase failed with no restarting. // The training has failed its execution. JobFailed JobConditionType = "Failed" )
type JobGpuMetric ¶
type JobGpuMetric map[string]PodGpuMetric
type K8sObject ¶ added in v0.8.7
type K8sObject struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
}
type KFServingArgs ¶
type KFServingArgs struct {
Port int `yaml:"port"` // --port
ModelType string `yaml:"modelType"` // --modelType
CanaryPercent int `yaml:"canaryPercent"` // --canaryTrafficPercent
StorageUri string `yaml:"storageUri"` // --storageUri
CommonServingArgs `yaml:",inline"`
}
type KServeArgs ¶ added in v0.9.11
type KServeArgs struct {
ModelFormat *ModelFormat `yaml:"modelFormat"` // --model-format
Runtime string `yaml:"runtime"` // --runtime
StorageUri string `yaml:"storageUri"` // --storageUri
RuntimeVersion string `yaml:"runtimeVersion"` // --runtime-version
ProtocolVersion string `yaml:"protocolVersion"` // --protocol-version
MinReplicas int `yaml:"minReplicas"` // --min-replicas
MaxReplicas int `yaml:"maxReplicas"` // --max-replicas
ScaleTarget int `yaml:"scaleTarget"` // --scale-target
ScaleMetric string `yaml:"scaleMetric"` // --scale-metric
ContainerConcurrency int64 `yaml:"containerConcurrency"` // --container-concurrency
TimeoutSeconds int64 `yaml:"timeout"` // --timeout
CanaryTrafficPercent int64 `yaml:"canaryTrafficPercent,omitempty"` // --canary-traffic-percent
Port int `yaml:"port"` // --port
EnablePrometheus bool `yaml:"enablePrometheus,omitempty"` // --enable-prometheus
MetricsPort int `yaml:"metricsPort,omitempty"` // --metrics-port
SecurityContext map[string]string `yaml:"securityContext,omitempty"` // --security-context
CommonServingArgs `yaml:",inline"`
}
type LimitedPodSecurityContext ¶
type LimitedPodSecurityContext struct {
RunAsUser int64 `yaml:"runAsUser"`
RunAsNonRoot bool `yaml:"runAsNonRoot"`
RunAsGroup int64 `yaml:"runAsGroup"`
SupplementalGroups []int64 `yaml:"supplementalGroups"`
}
LimitedPodSecurityContext defines the kuberntes pod security context
type ModelBenchmarkArgs ¶ added in v0.9.0
type ModelBenchmarkArgs struct {
Concurrency int `yaml:"concurrency"` // --concurrency
Requests int `yaml:"requests"` // --requests
Duration int `yaml:"duration"` // --duration (seconds)
ReportPath string `yaml:"reportPath"` // --report-path
CommonModelArgs `yaml:",inline"`
}
type ModelEvaluateArgs ¶ added in v0.9.0
type ModelEvaluateArgs struct {
ModelPlatform string `yaml:"modelPlatform"` // --model-platform
DatasetPath string `yaml:"datasetPath"` // --dataset-path
ReportPath string `yaml:"reportPath"` // --report-path
BatchSize int `yaml:"batchSize"` // --batch-size
CommonModelArgs `yaml:",inline"`
// for sync up source code
SubmitSyncCodeArgs `yaml:",inline"`
}
type ModelFormat ¶ added in v0.9.11
type ModelFormat struct {
// Name of the model format.
// +required
Name string `yaml:"name"`
// Version of the model format.
// Used in validating that a predictor is supported by a runtime.
// Can be "major", "major.minor" or "major.minor.patch".
// +optional
Version *string `yaml:"version,omitempty"`
}
type ModelJobInfo ¶ added in v0.9.0
type ModelJobInfo struct {
// The unique identity of the model job
UUID string `json:"uuid" yaml:"uuid"`
// The name of the model job
Name string `json:"name" yaml:"name"`
// The namespace of the model job
Namespace string `json:"namespace" yaml:"namespace"`
// The time of the model job
Duration string `json:"duration" yaml:"duration"`
// Age specifies the model job age
Age string `json:"age" yaml:"age"`
// The status of the model Job
Status string `json:"status" yaml:"status"`
// The model type of the model job
Type string `json:"type" yaml:"type"`
// The instances under the model job
Instances []ModelJobInstance `json:"instances" yaml:"instances"`
// RequestCPUs GPU count of the Job
RequestCPUs int64 `json:"requestCPUs" yaml:"requestCPUs"`
// RequestGPUs stores the request gpus
RequestGPUs int64 `json:"requestGPUs" yaml:"requestGPUs"`
// RequestGPUMemory stores the request gpus
RequestGPUMemory int64 `json:"requestGPUMemory" yaml:"requestGPUMemory"`
// RequestGPUCore stores the request gpus core
RequestGPUCore int64 `json:"requestGPUCore" yaml:"requestGPUCore"`
// CreationTimestamp stores the creation timestamp of job
CreationTimestamp int64 `json:"creationTimestamp" yaml:"creationTimestamp"`
// CreationTimestamp stores the job parameters
Params map[string]string `json:"params" yaml:"params"`
}
type ModelJobInstance ¶ added in v0.9.0
type ModelJobInstance struct {
// Name gives the instance name
Name string `json:"name" yaml:"name"`
// Status gives the instance status
Status string `json:"status" yaml:"status"`
// Age gives the instance ge
Age string `json:"age" yaml:"age"`
// ReadyContainer represents the count of ready containers
ReadyContainer int `json:"readyContainers" yaml:"readyContainers"`
// TotalContainer represents the count of total containers
TotalContainer int `json:"totalContainers" yaml:"totalContainers"`
// RestartCount represents the count of instance restarts
RestartCount int `json:"restartCount" yaml:"restartCount"`
// HostIP specifies host ip of instance
NodeIP string `json:"nodeIP" yaml:"nodeIP"`
// NodeName returns the node name
NodeName string `json:"nodeName" yaml:"nodeName"`
// IP returns the instance ip
IP string `json:"ip" yaml:"ip"`
// RequestGPU returns the request gpus
RequestGPUs float64 `json:"requestGPUs" yaml:"requestGPUs"`
// RequestGPUMemory returns the request gpu memory
RequestGPUMemory int `json:"requestGPUMemory" yaml:"requestGPUMemory"`
// RequestGPUCore returns the request gpu core
RequestGPUCore int `json:"requestGPUCore" yaml:"requestGPUCore"`
// CreationTimestamp returns the creation timestamp of instance
CreationTimestamp int64 `json:"creationTimestamp" yaml:"creationTimestamp"`
}
type ModelJobStatus ¶ added in v0.9.0
type ModelJobStatus string
ModelJobStatus defines all the kinds of JobStatus
const ( // ModelJobPending means the job is pending ModelJobPending ModelJobStatus = "PENDING" // ModelJobRunning means the job is running ModelJobRunning ModelJobStatus = "RUNNING" // ModelJobComplete means the job is complete ModelJobComplete ModelJobStatus = "COMPLETE" // ModelJobFailed means the job is failed ModelJobFailed ModelJobStatus = "FAILED" // ModelJobUnknown means the job status is unknown ModelJobUnknown ModelJobStatus = "UNKNOWN" )
type ModelJobType ¶ added in v0.9.0
type ModelJobType string
ModelJobType defines the supporting model job type
const ( // ModelProfileJob defines the model profile job ModelProfileJob ModelJobType = "profile" // ModelOptimizeJob defines the model optimize job ModelOptimizeJob ModelJobType = "optimize" // ModelBenchmarkJob defines the model benchmark job ModelBenchmarkJob ModelJobType = "benchmark" // ModelEvaluateJob defines the model evaluate job ModelEvaluateJob ModelJobType = "evaluate" // AllModelJob defines all model job AllModelJob ModelJobType = "" // UnknownModelJob defines the unknown model job UnknownModelJob ModelJobType = "unknown" )
type ModelOptimizeArgs ¶ added in v0.9.0
type ModelOptimizeArgs struct {
Optimizer string `yaml:"optimizer"` // --optimizer
TargetDevice string `yaml:"targetDevice"` // --target-device
ExportPath string `yaml:"exportPath"` // --export-path
CommonModelArgs `yaml:",inline"`
}
type ModelProfileArgs ¶ added in v0.9.0
type ModelProfileArgs struct {
ReportPath string `yaml:"reportPath"` // --report-path
UseTensorboard bool `yaml:"useTensorboard"` // --tensorboard
TensorboardImage string `yaml:"tensorboardImage"` // --tensorboardImage
CommonModelArgs `yaml:",inline"`
}
type ModelTypeInfo ¶ added in v0.9.0
type ModelTypeInfo struct {
Name ModelJobType
Alias string
Shorthand string
}
type ModelVersion ¶ added in v0.9.14
type ModelVersion struct {
Name string `json:"name"`
Version string `json:"version,omitempty"`
CreationTimestamp int64 `json:"creation_timestamp,omitempty"`
LastUpdatedTimestamp int64 `json:"last_updated_timestamp,omitempty"`
Description string `json:"description,omitempty"`
UserId string `json:"user_id,omitempty"`
CurrentStage string `json:"current_stage,omitempty"`
Source string `json:"source,omitempty"`
RunId string `json:"run_id,omitempty"`
Status ModelVersionStatus `json:"status,omitempty"`
StatusMessage string `json:"status_message,omitempty"`
Tags []*ModelVersionTag `json:"tags,omitempty"`
RunLink string `json:"run_link,omitempty"`
Aliases []string `json:"aliases,omitempty"`
}
type ModelVersionStatus ¶ added in v0.9.14
type ModelVersionStatus string
const ( PENDING_REGISTRATION ModelVersionStatus = "PENDING_REGISTRATION" FAILED_REGISTRATION ModelVersionStatus = "FAILED_REGISTRATION" READY ModelVersionStatus = "READY" )
type ModelVersionTag ¶ added in v0.9.14
type NodeTypeInfo ¶
type NormalNodeInfo ¶
type NormalNodeInfo struct {
CommonNodeInfo `yaml:",inline" json:",inline"`
}
type PodGpuMetric ¶
type PortSelector ¶
type PortSelector struct {
*istiov1alpha3.PortSelector
Number uint32 `protobuf:"varint,1,opt,name=number,proto3,oneof" json:"number,omitempty"`
}
type PreprocesObject ¶
type PreprocesObject struct {
ServiceName string
Namespace string
DestinationRule DestinationRuleCRD
VirtualService VirtualServiceCRD
}
type PrometheusMetric ¶
type PrometheusMetric struct {
Status string `json:"status,inline"`
Data PrometheusMetricData `json:"data,omitempty"`
}
type PrometheusMetricData ¶
type PrometheusMetricData struct {
Result []PrometheusMetricResult `json:"result"`
ResultType string `json:"resultType"`
}
type PrometheusMetricResult ¶
type PrometheusMetricResult struct {
Metric map[string]string `json:"metric"`
Value []PrometheusMetricValue `json:"value"`
}
type PrometheusMetricValue ¶
type PrometheusMetricValue interface{}
type PrometheusServer ¶
type PrometheusServer struct {
Name string
ServiceLabels string
Protocol string
Port string
Path string
MetricList []string
Service *v1.Service
}
PrometheusServer is used to define prometheus server
type RegisteredModel ¶ added in v0.9.14
type RegisteredModel struct {
Name string `json:"name"`
CreationTimestamp int64 `json:"creation_timestamp,omitempty"`
LastUpdatedTimestamp int64 `json:"last_updated_timestamp,omitempty"`
Description string `json:"description,omitempty"`
LatestVersions []*ModelVersion `json:"latest_versions,omitempty"`
Tags []*RegisteredModelTag `json:"tags,omitempty"`
Aliases []*RegisteredModelAlias `json:"aliases,omitempty"`
}
Model Management
type RegisteredModelAlias ¶ added in v0.9.14
type RegisteredModelTag ¶ added in v0.9.14
func (RegisteredModelTag) String ¶ added in v0.9.14
func (t RegisteredModelTag) String() string
type ScaleETJobArgs ¶
type ScaleETJobArgs struct {
//--name string required, et job name
Name string `yaml:"etName"`
// TrainingType stores the trainingType
JobType TrainingJobType `yaml:"-"`
// Namespace stores the namespace of job,match option --namespace
Namespace string `yaml:"-"`
//--timeout int timeout of callback scaler script.
Timeout int `yaml:"timeout"`
//--retry int retry times.
Retry int `yaml:"retry"`
//--count int the nums of you want to add or delete worker.
Count int `yaml:"count"`
//--script string script of scaling.
Script string `yaml:"script"`
//-e, --env stringArray the environment variables
Envs map[string]string `yaml:"envs"`
}
type ScaleInETJobArgs ¶
type ScaleInETJobArgs struct {
// common args
ScaleETJobArgs `yaml:",inline"`
}
type ScaleOutETJobArgs ¶
type ScaleOutETJobArgs struct {
// common args
ScaleETJobArgs `yaml:",inline"`
}
type SeldonServingArgs ¶ added in v0.8.0
type SeldonServingArgs struct {
Implementation string `yaml:"implementation"` // --implementation
ModelUri string `yaml:"modelUri"` // --modelUri
CommonServingArgs `yaml:",inline"`
}
type ServingInstance ¶
type ServingInstance struct {
// Name gives the instance name
Name string `json:"name" yaml:"name"`
// Status gives the instance status
Status string `json:"status" yaml:"status"`
// Age gives the instance ge
Age string `json:"age" yaml:"age"`
// ReadyContainer represents the count of ready containers
ReadyContainer int `json:"readyContainers" yaml:"readyContainers"`
// TotalContainer represents the count of total containers
TotalContainer int `json:"totalContainers" yaml:"totalContainers"`
// RestartCount represents the count of instance restarts
RestartCount int `json:"restartCount" yaml:"restartCount"`
// HostIP specifies host ip of instance
NodeIP string `json:"nodeIP" yaml:"nodeIP"`
// NodeName returns the node name
NodeName string `json:"nodeName" yaml:"nodeName"`
// IP returns the instance ip
IP string `json:"ip" yaml:"ip"`
// RequestGPU returns the request gpus
RequestGPUs float64 `json:"requestGPUs" yaml:"requestGPUs"`
// RequestGPUMemory returns the request gpu memory
RequestGPUMemory int `json:"requestGPUMemory" yaml:"requestGPUMemory"`
// RequestGPUMemory specifies the request gpu core,only for gpushare
RequestGPUCore int `json:"requestGPUCore" yaml:"requestGPUCore"`
// CreationTimestamp returns the creation timestamp of instance
CreationTimestamp int64 `json:"creationTimestamp" yaml:"creationTimestamp"`
}
type ServingJobInfo ¶
type ServingJobInfo struct {
// UUID specifies the unique identity of the serving job
UUID string `json:"uuid" yaml:"uuid"`
// Name specifies serving job name
Name string `json:"name" yaml:"name"`
// Namespace specifies serving job namespace
Namespace string `json:"namespace" yaml:"namespace"`
// Type specifies serving job type
Type string `json:"type" yaml:"type"`
// Version specifies serving job version
Version string `json:"version" yaml:"version"`
// Age specifies the serving job age
Age string `json:"age" yaml:"age"`
// Desired specifies the desired instances
Desired int `json:"desiredInstances" yaml:"desiredInstances"`
// Available specifies the available instances
Available int `json:"availableInstances" yaml:"availableInstances"`
// Endpoints specifies the endpoints
Endpoints []Endpoint `json:"endpoints" yaml:"endpoints"`
// IPAddress specifies the ip address
IPAddress string `json:"ip" yaml:"ip"`
// Instances gives the instance informations
Instances []ServingInstance `json:"instances" yaml:"instances"`
// RequestCPUs specifies the request cpus
RequestCPUs float64 `json:"requestCPUs" yaml:"requestCPUs"`
// RequestGPUs specifies the request gpus
RequestGPUs float64 `json:"requestGPUs" yaml:"requestGPUs"`
// RequestGPUMemory specifies the request gpu memory,only for gpushare
RequestGPUMemory int `json:"requestGPUMemory" yaml:"requestGPUMemory"`
// RequestGPUMemory specifies the request gpu core,only for gpushare
RequestGPUCore int `json:"requestGPUCore" yaml:"requestGPUCore"`
// CreationTimestamp stores the creation timestamp of job
CreationTimestamp int64 `json:"creationTimestamp" yaml:"creationTimestamp"`
}
ServingJobInfo display serving job information
type ServingJobType ¶
type ServingJobType string
ServingJobType defines the serving job type name must like shorthand + "-serving"
const ( // TFServingJob defines the tensorflow serving job TFServingJob ServingJobType = "tf-serving" // TRTServingJob defines the tensorrt serving job TRTServingJob ServingJobType = "trt-serving" // KFServingJob defines the kfserving job KFServingJob ServingJobType = "kf-serving" // KServeJob defines the kserve job KServeJob ServingJobType = "kserve" // SeldonServingJob defines the seldon core job SeldonServingJob ServingJobType = "seldon-serving" // TritonServingJob defines the nvidia triton server job TritonServingJob ServingJobType = "triton-serving" // CustomServingJob defines the custom serving job CustomServingJob ServingJobType = "custom-serving" // AllServingJob represents all serving job type AllServingJob ServingJobType = "" // UnknownServingJob defines the unknown serving job UnknownServingJob ServingJobType = "unknown" )
type ServingTypeInfo ¶
type ServingTypeInfo struct {
Name ServingJobType
Alias string
Shorthand string
}
type ServingVersionWeight ¶
type StringMatchPrefix ¶
type StringMatchPrefix struct {
Prefix string `protobuf:"bytes,2,opt,name=prefix,proto3,oneof" json:"prefix,omitempty"`
}
type SubmitDeepSpeedJobArgs ¶ added in v0.9.9
type SubmitDeepSpeedJobArgs struct {
Cpu string `yaml:"cpu"` // --cpu
Memory string `yaml:"memory"` // --memory
// for common args
CommonSubmitArgs `yaml:",inline"`
// SubmitTensorboardArgs stores tensorboard information
SubmitTensorboardArgs `yaml:",inline"`
// SubmitSyncCodeArgs stores syncing code information
SubmitSyncCodeArgs `yaml:",inline"`
LauncherSelectors map[string]string `yaml:"launcherSelectors"` // --launcher-selector
JobRestartPolicy string `yaml:"jobRestartPolicy"` // --job-restart-policy
JobBackoffLimit int `yaml:"jobBackoffLimit"` // --job-backoff-limit
// SSHSecret enables create secret for job.
SSHSecret string `yaml:"sshSecret"`
SecretData map[string]string `yaml:"secretData"`
// Annotations defines launcher pod annotations of job,match option --launcher-annotation
LauncherAnnotations map[string]string `yaml:"launcherAnnotations"`
// Annotations defines worker pod annotations of job,match option --worker-annotation
WorkerAnnotations map[string]string `yaml:"workerAnnotations"`
}
type SubmitETJobArgs ¶
type SubmitETJobArgs struct {
Cpu string `yaml:"cpu"` // --cpu
Memory string `yaml:"memory"` // --memory
// for common args
CommonSubmitArgs `yaml:",inline"`
// SubmitTensorboardArgs stores tensorboard information
SubmitTensorboardArgs `yaml:",inline"`
// SubmitSyncCodeArgs stores syncing code information
SubmitSyncCodeArgs `yaml:",inline"`
MaxWorkers int `yaml:"maxWorkers"`
MinWorkers int `yaml:"minWorkers"`
LauncherSelectors map[string]string `yaml:"launcherSelectors"` // --launcher-selector
JobRestartPolicy string `yaml:"jobRestartPolicy"` // --job-restart-policy
WorkerRestartPolicy string `yaml:"workerRestartPolicy"` // --worker-restart-policy
JobBackoffLimit int `yaml:"jobBackoffLimit"` // --job-backoff-limit
// SSHSecret enables create secret for job.
SSHSecret string `yaml:"sshSecret"`
SecretData map[string]string `yaml:"secretData"`
// Annotations defines launcher pod annotations of job,match option --launcher-annotation
LauncherAnnotations map[string]string `yaml:"launcherAnnotations"`
// Annotations defines worker pod annotations of job,match option --worker-annotation
WorkerAnnotations map[string]string `yaml:"workerAnnotations"`
}
type SubmitHorovodJobArgs ¶
type SubmitHorovodJobArgs struct {
SSHPort int `yaml:"sshPort"`
Cpu string `yaml:"cpu"` // --cpu
Memory string `yaml:"memory"` // --memory
// for common args
CommonSubmitArgs `yaml:",inline"`
// for tensorboard
SubmitTensorboardArgs `yaml:",inline"`
// for sync up source code
SubmitSyncCodeArgs `yaml:",inline"`
}
type SubmitMPIJobArgs ¶
type SubmitMPIJobArgs struct {
Cpu string `yaml:"cpu"` // --cpu
Memory string `yaml:"memory"` // --memory
// for common args
CommonSubmitArgs `yaml:",inline"`
// for tensorboard
SubmitTensorboardArgs `yaml:",inline"`
// for sync up source code
SubmitSyncCodeArgs `yaml:",inline"`
// enable gpu topology scheduling
GPUTopology bool `yaml:"gputopology"`
GPUTopologyReplica string `yaml:"gputopologyreplica"`
MountsOnLauncher bool `yaml:"mountsOnLauncher"`
// clean-task-policy
CleanPodPolicy string `yaml:"cleanPodPolicy"`
}
type SubmitPyTorchJobArgs ¶
type SubmitPyTorchJobArgs struct {
Cpu string `yaml:"cpu"` // --cpu
Memory string `yaml:"memory"` // --memory
// for common args
CommonSubmitArgs `yaml:",inline"`
// for tensorboard
SubmitTensorboardArgs `yaml:",inline"`
// for sync up source code
SubmitSyncCodeArgs `yaml:",inline"`
// clean-task-policy
CleanPodPolicy string `yaml:"cleanPodPolicy"`
// ActiveDeadlineSeconds Specifies the duration (in seconds) since startTime during which the job can remain active
// before it is terminated
ActiveDeadlineSeconds int64 `yaml:"activeDeadlineSeconds,omitempty"`
// Defines the TTL for cleaning up finished PytorchJobs. Defaults to infinite.
TTLSecondsAfterFinished int32 `yaml:"ttlSecondsAfterFinished,omitempty"`
// TrainingOperatorCRD compatible with training-operator crd.
TrainingOperatorCRD bool `yaml:"trainingOperatorCRD,omitempty"`
ShareMemory string `yaml:"shareMemory"`
}
type SubmitSparkJobArgs ¶
type SubmitSparkJobArgs struct {
Name string `yaml:"-"`
Namespace string `yaml:"-"`
TrainingType TrainingJobType `yaml:"-"`
Image string `yaml:"Image"`
MainClass string `yaml:"MainClass"`
Jar string `yaml:"Jar"`
SparkVersion string `yaml:"SparkVersion"`
Driver *Driver `yaml:"Driver"`
Executor *Executor `yaml:"Executor"`
// Annotations defines pod annotations of job,match option --annotation
Annotations map[string]string `yaml:"annotations"`
// Labels specify the job labels and it is work for pods
Labels map[string]string `yaml:"labels"`
}
type SubmitSyncCodeArgs ¶
type SubmitSyncCodeArgs struct {
SyncMode string `yaml:"syncMode"` // --syncMode: rsync, hdfs, git
SyncSource string `yaml:"syncSource"` // --syncSource
SyncImage string `yaml:"syncImage,omitempty"` // --syncImage
// syncGitProjectName
SyncGitProjectName string `yaml:"syncGitProjectName,omitempty"` // --syncImage
}
type SubmitTFJobArgs ¶
type SubmitTFJobArgs struct {
// TFNodeSelectors assigns tfjob node selectors
TFNodeSelectors map[string]map[string]string `yaml:"tfNodeSelectors"`
// Port defines the defaut port if workerPort and PSPort are not set
Port int
// WorkerImage assigns worker image,match option --worker-image
WorkerImage string `yaml:"workerImage"`
// WorkerPort stores worker port,match option --work-port
WorkerPort int `yaml:"workerPort"`
// PSPort stores the ps port,match option --ps-port
PSPort int `yaml:"psPort"`
// PSCount stores the ps count,--ps-count
PSCount int `yaml:"ps"`
// PSImage stores the ps image,--ps-image
PSImage string `yaml:"psImage"`
// WorkerCpu stores the cpu of job worker,match option --worker-cpu
WorkerCpu string `yaml:"workerCPU"`
// WorkerCpuLimit stores the cpu limit of job worker,match option --worker-cpu-limit
WorkerCpuLimit string `yaml:"workerCPULimit"`
//WorkerNodeSelectors map[string]string `yaml:"workerNodeSelectors"` // --worker-selector
// WorkerMemory stores woker memory,match option --worker-memory
WorkerMemory string `yaml:"workerMemory"`
// WorkerMemoryLimit stores woker memory limit,match option --worker-memory-limit
WorkerMemoryLimit string `yaml:"workerMemoryLimit"`
// PSCpu stores ps cpu,match option --ps-cpu
PSCpu string `yaml:"psCPU"`
// PSCpuLimit stores ps cpu limit,match option --ps-cpu-limit
PSCpuLimit string `yaml:"psCPULimit"`
// PSGpu stores ps gpu,match option --ps-gpus
PSGpu int `yaml:"psGPU"` // --ps-gpus
// PSMemory stores the ps memory,match option --ps-memory
PSMemory string `yaml:"psMemory"`
// PSMemoryLimit stores the ps memory limit,match option --ps-memory-limit
PSMemoryLimit string `yaml:"psMemoryLimit"`
// SuccessPolicy defines the policy to mark the TFJob as succeeded.
SuccessPolicy string `yaml:"successPolicy"`
// CleanPodPolicy stores the cleaning pod policy,match option --clean-task-policy
CleanPodPolicy string `yaml:"cleanPodPolicy"`
// UseChief stores the using chief or not,match option --chief
UseChief bool `yaml:",omitempty"` // --chief
// ChiefCount stores the chief count of job,match option --chief-count
ChiefCount int `yaml:"chief"`
// UseEvaluator is used to enable evaluator or not,match option --evaluator
UseEvaluator bool `yaml:",omitempty"`
// ChiefPort stores the chief port,match option --chief-port
ChiefPort int `yaml:"chiefPort"`
//ChiefNodeSelectors map[string]string `yaml:"chiefNodeSelectors"` // --chief-selector
// ChiefCpu stores the chief pod cpu,match option --chief-cpu
ChiefCpu string `yaml:"chiefCPU"`
// ChiefCpuLimit stores the chief pod cpu limit,match option --chief-cpu-limit
ChiefCpuLimit string `yaml:"chiefCPULimit"`
// ChiefMemory stores the chief pod memory,match option --chief-memory
ChiefMemory string `yaml:"chiefMemory"`
// ChiefMemoryLimit stores the chief pod memory limit,match option --chief-memory-limit
ChiefMemoryLimit string `yaml:"chiefMemoryLimit"`
// EvaluatorCpu stores the evaluator pod cpu,match option --evaluator-cpu
EvaluatorCpu string `yaml:"evaluatorCPU"`
// EvaluatorCpuLimit stores the evaluator pod cpu limit,match option --evaluator-cpu-limit
EvaluatorCpuLimit string `yaml:"evaluatorCPULimit"`
//EvaluatorNodeSelectors map[string]string `yaml:"evaluatorNodeSelectors"` // --evaluator-selector
// EvaluatorMemory stores the evaluator pod memory,match option --evaluator-memory
EvaluatorMemory string `yaml:"evaluatorMemory"` // --evaluatorMemory
// EvaluatorMemoryLimit stores the evaluator pod memory limit,match option --evaluator-memory-limit
EvaluatorMemoryLimit string `yaml:"evaluatorMemoryLimit"` // --evaluatorMemoryLimit
// EvaluatorCount stores the evaluator pod count,match option --evaluator-count
EvaluatorCount int `yaml:"evaluator"`
// HasGangScheduler determines if it has gang scheduler
HasGangScheduler bool `yaml:"hasGangScheduler"`
// ActiveDeadlineSeconds Specifies the duration (in seconds) since startTime during which the job can remain active
// before it is terminated
ActiveDeadlineSeconds int64 `yaml:"activeDeadlineSeconds,omitempty"`
// StartingDeadlineSeconds Specifies the duration (in seconds) since startTime during which the job can remain pending
// before it is terminated
StartingDeadlineSeconds int64 `yaml:"startingDeadlineSeconds,omitempty"`
// Defines the TTL for cleaning up finished TFJobs. Defaults to infinite.
TTLSecondsAfterFinished int32 `yaml:"ttlSecondsAfterFinished,omitempty"`
ShareMemory string `yaml:"shareMemory"`
// for common args
CommonSubmitArgs `yaml:",inline"`
// SubmitTensorboardArgs stores tensorboard information
SubmitTensorboardArgs `yaml:",inline"`
// SubmitSyncCodeArgs stores syncing code information
SubmitSyncCodeArgs `yaml:",inline"`
// TFRuntime stores the runtime
TFRuntime `yaml:"-"`
// TrainingOperatorCRD compatible with training-operator crd.
TrainingOperatorCRD bool `yaml:"trainingOperatorCRD,omitempty"`
}
type SubmitTensorboardArgs ¶
type SubmitTensorboardArgs struct {
UseTensorboard bool `yaml:"useTensorboard"` // --tensorboard
TensorboardImage string `yaml:"tensorboardImage,omitempty"` // --tensorboardImage
TrainingLogdir string `yaml:"trainingLogdir"` // --logdir
HostLogPath string `yaml:"hostLogPath"`
IsLocalLogging bool `yaml:"isLocalLogging"`
}
SubmitTensorboardArgs is used to store tensorborad information
type SubmitVolcanoJobArgs ¶
type SubmitVolcanoJobArgs struct {
// Name stores the job name
Name string
// Namespace stores the namespace of job
Namespace string
// TrainingType is used to accept job type
TrainingType TrainingJobType
// Command defines the job command
Command string
// The MinAvailable available pods to run for this Job
MinAvailable int `yaml:"minAvailable"`
// Specifies the queue that will be used in the scheduler, "default" queue is used this leaves empty.
Queue string `yaml:"queue"`
// SchedulerName is the default value of `tasks.template.spec.schedulerName`.
SchedulerName string `yaml:"schedulerName"`
// TaskName specifies the name of task
TaskName string `yaml:"taskName"`
// TaskImages specifies the task image
TaskImages []string `yaml:"taskImages"`
// TaskReplicas specifies the replicas of this Task in Job
TaskReplicas int `yaml:"taskReplicas"`
// TaskCPU specifies the cpu resource required for each replica of Task in Job. default is 250m
TaskCPU string `yaml:"taskCPU"`
// TaskMemory specifies the memory resource required for each replica of Task in Job. default is 128Mi
TaskMemory string `yaml:"taskMemory"`
// TaskPort specifies the task port
TaskPort int `yaml:"taskPort"`
// Annotations defines pod annotations of job,match option --annotation
Annotations map[string]string `yaml:"annotations"`
// Labels specify the job labels and it is work for pods
Labels map[string]string `yaml:"labels"`
}
type TFRuntime ¶
type TFRuntime interface {
// check the tfjob args
Check(tf *SubmitTFJobArgs) (err error)
// transform the tfjob
Transform(tf *SubmitTFJobArgs) (err error)
Runtime
}
Customized runtime for tf training training
type TensorFlowServingArgs ¶
type TensorFlowServingArgs struct {
VersionPolicy string `yaml:"versionPolicy"` // --version-policy
ModelConfigFile string `yaml:"modelConfigFile"` // --model-config-file
MonitoringConfigFile string `yaml:"monitoringConfigFile"` // --monitoring-config-file
ModelPath string `yaml:"modelPath"` // --model-path
Port int `yaml:"port"` // --port
RestfulPort int `yaml:"restApiPort"` // --restful-port
CommonServingArgs `yaml:",inline"`
}
type TensorRTServingArgs ¶
type TensorRTServingArgs struct {
ModelStore string `yaml:"modelStore"` // --modelStore
MetricsPort int `yaml:"metricsPort"` // --metricsPort
HttpPort int `yaml:"httpPort"` // --httpPort
GrpcPort int `yaml:"grpcPort"` // --grpcPort
AllowMetrics bool `yaml:"allowMetrics"` // --allowMetrics
CommonServingArgs `yaml:",inline"`
}
type TolerationArgs ¶ added in v0.9.2
type TrafficRouterSplitArgs ¶
type TrafficRouterSplitArgs struct {
ServingName string `yaml:"servingName,omitempty"` //--name
Namespace string `yaml:"namespace,omitempty"` //--namespace
Versions string `yaml:"versions,omitempty"` //--versions
Weights string `yaml:"weights,omitempty"` //--weights
VersionWeights []ServingVersionWeight
}
type TrainingJobInfo ¶
type TrainingJobInfo struct {
// The unique identity of the training job
UUID string `json:"uuid" yaml:"uuid"`
// The name of the training job
Name string `json:"name" yaml:"name"`
// The namespace of the training job
Namespace string `json:"namespace" yaml:"namespace"`
// The time of the training job
Duration string `json:"duration" yaml:"duration"`
// The status of the training Job
Status TrainingJobStatus `json:"status" yaml:"status"`
// The training type of the training job
Trainer TrainingJobType `json:"trainer" yaml:"trainer"`
// The tensorboard of the training job
Tensorboard string `json:"tensorboard" yaml:"tensorboard"`
// The name of the chief Instance
ChiefName string `json:"chiefName" yaml:"chiefName"`
// The instances under the training job
Instances []TrainingJobInstance `json:"instances" yaml:"instances"`
// The priority of the training job
Priority string `json:"priority" yaml:"priority"`
// RequestGPU stores the request gpus
RequestGPU int64 `json:"requestGPUs" yaml:"requestGPUs"`
// AllocatedGPU stores the allocated gpus
AllocatedGPU int64 `json:"allocatedGPUs" yaml:"allocatedGPUs"`
// CreationTimestamp stores the creation timestamp of job
CreationTimestamp int64 `json:"creationTimestamp" yaml:"creationTimestamp"`
// Model information associated with this job
ModelName string `json:"modelName"`
ModelVersion string `json:"modelVersion"`
ModelSource string `json:"modelSource"`
}
TrainingJobInfo stores training job information
type TrainingJobInstance ¶
type TrainingJobInstance struct {
// IP defines the instance ip
IP string `json:"ip" yaml:"ip"`
// the status of of instance
Status string `json:"status"`
// the name of instance
Name string `json:"name"`
// the age of instance
Age string `json:"age"`
// the node instance runs on
Node string `json:"node"`
// NodeIP is store the node ip
NodeIP string `json:"nodeIP" yaml:"nodeIP"`
// the instance is chief or not
IsChief bool `json:"chief" yaml:"chief"`
// RequestGPUs is used to store request gpu count
RequestGPUs int `json:"requestGPUs" yaml:"requestGPUs"`
// GpuDutyCycle stores the gpu metrics
GPUMetrics map[string]GpuMetric `json:"gpuMetrics" yaml:"gpuMetrics"`
// CreationTimestamp returns the creation timestamp of instance
CreationTimestamp int64 `json:"creationTimestamp" yaml:"creationTimestamp"`
}
TrainingJobInstance defines the instance of training job
type TrainingJobStatus ¶
type TrainingJobStatus string
TrainingJobStatus defines all the kinds of JobStatus
const ( // TrainingJobQueuing means the job is queuing TrainingJobQueuing TrainingJobStatus = "QUEUING" // TrainingJobPending means the job is pending TrainingJobPending TrainingJobStatus = "PENDING" // TrainingJobRunning means the job is running TrainingJobRunning TrainingJobStatus = "RUNNING" // TrainingJobSucceeded means the job is Succeeded TrainingJobSucceeded TrainingJobStatus = "SUCCEEDED" // TrainingJobFailed means the job is failed TrainingJobFailed TrainingJobStatus = "FAILED" )
type TrainingJobType ¶
type TrainingJobType string
TrainingJobType defines the supporting training job type
const ( // TFTrainingJob defines the tfjob TFTrainingJob TrainingJobType = "tfjob" // MPITrainingJob defines the mpijob MPITrainingJob TrainingJobType = "mpijob" // PytorchTrainingJob defines the pytorchjob PytorchTrainingJob TrainingJobType = "pytorchjob" // HorovodTrainingJob defines the horovod job HorovodTrainingJob TrainingJobType = "horovodjob" // VolcanoTrainingJob defines the volcano job VolcanoTrainingJob TrainingJobType = "volcanojob" // ETTrainingJob defines the etjob ETTrainingJob TrainingJobType = "etjob" // SparkTrainingJob defines the spark job SparkTrainingJob TrainingJobType = "sparkjob" // DeepSpeedTrainingJob defines the deepspeed job DeepSpeedTrainingJob TrainingJobType = "deepspeedjob" // AllTrainingJob represents all job types AllTrainingJob TrainingJobType = "" // UnknownTrainingJob defines the unknown training UnknownTrainingJob TrainingJobType = "unknown" )
type TrainingJobTypeInfo ¶
type TrainingJobTypeInfo struct {
Name TrainingJobType
Alias string
Shorthand string
}
type TritonServingArgs ¶ added in v0.8.5
type TritonServingArgs struct {
Backend string `yaml:"backend"` // --backend
ModelRepository string `yaml:"modelRepository"` // --model-repository
MetricsPort int `yaml:"metricsPort"` // --metrics-port
HttpPort int `yaml:"httpPort"` // --http-port
GrpcPort int `yaml:"grpcPort"` // --grpc-port
AllowMetrics bool `yaml:"allowMetrics"` // --allow-metrics
LoadModels []string `yaml:"loadModels"` // --load-model
ExtendCommand string `yaml:"extendCommand"` // --extend-command
CommonServingArgs `yaml:",inline"`
}
type UpdateCustomServingArgs ¶ added in v0.8.9
type UpdateCustomServingArgs struct {
CommonUpdateServingArgs `yaml:",inline"`
}
type UpdateKServeArgs ¶ added in v0.9.11
type UpdateKServeArgs struct {
ModelFormat *ModelFormat `yaml:"modelFormat"` // --model-format
Runtime string `yaml:"runtime"` // --runtime
StorageUri string `yaml:"storageUri"` // --storageUri
RuntimeVersion string `yaml:"runtimeVersion"` // --runtime-version
ProtocolVersion string `yaml:"protocolVersion"` // --protocol-version
MinReplicas int `yaml:"minReplicas"` // --min-replicas
MaxReplicas int `yaml:"maxReplicas"` // --max-replicas
ScaleTarget int `yaml:"scaleTarget"` // --scale-target
ScaleMetric string `yaml:"scaleMetric"` // --scale-metric
ContainerConcurrency int64 `yaml:"containerConcurrency"` // --container-concurrency
TimeoutSeconds int64 `yaml:"timeout"` // --timeout
CanaryTrafficPercent int64 `yaml:"canaryTrafficPercent,omitempty"` // --canary-traffic-percent
Port int `yaml:"port"` // --port
CommonUpdateServingArgs `yaml:",inline"`
}
type UpdateTensorFlowServingArgs ¶ added in v0.8.9
type UpdateTensorFlowServingArgs struct {
ModelConfigFile string `yaml:"modelConfigFile"` // --model-config-file
MonitoringConfigFile string `yaml:"monitoringConfigFile"` // --monitoring-config-file
ModelName string `yaml:"modelName"` // --model-name
ModelPath string `yaml:"modelPath"` // --model-path
CommonUpdateServingArgs `yaml:",inline"`
}
type UpdateTritonServingArgs ¶ added in v0.8.9
type UpdateTritonServingArgs struct {
ModelRepository string `yaml:"modelRepository"` // --model-repository
AllowMetrics bool `yaml:"allowMetrics"` // --allow-metrics
CommonUpdateServingArgs `yaml:",inline"`
}
type VirtualService ¶
type VirtualService struct {
*istiov1alpha3.VirtualService
Http []*HTTPRoute `protobuf:"bytes,3,rep,name=http" json:"http,omitempty"`
}
type VirtualServiceCRD ¶
type VirtualServiceCRD struct {
// Kind is a string value representing the REST resource this object represents.
// Servers may infer this from the endpoint the client submits requests to.
// Cannot be updated.
// In CamelCase.
// More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#types-kinds
// +optional
Kind string `json:"kind,omitempty" protobuf:"bytes,1,opt,name=kind"`
// APIVersion defines the versioned schema of this representation of an object.
// Servers should convert recognized schemas to the latest internal value, and
// may reject unrecognized values.
// More info: https://git.k8s.io/community/contributors/devel/api-conventions.md#resources
// +optional
APIVersion string `json:"apiVersion,omitempty" protobuf:"bytes,2,opt,name=apiVersion"`
metav1.ObjectMeta `json:"metadata,omitempty" yaml:"metadata,omitempty" protobuf:"bytes,1,opt,name=metadata"`
Spec VirtualService `json:"spec,omitempty" yaml:"spec,omitempty" protobuf:"bytes,2,opt,name=spec"`
}