Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
View Source
var ( ErrNotFoundJobs = errors.New(`not found jobs under the assigned conditions.`) ErrTooManyJobs = errors.New(`found jobs more than one,please use --version or --type to filter.`) )
View Source
var KubeConfig string
View Source
var SERVING_CHARTS = map[string]string{
"tensorflow-serving-0.2.0": "Tensorflow",
"tensorrt-inference-server-0.0.1": "TensorRT",
}
Functions ¶
func DisplayGPUShareDetails ¶ added in v0.3.1
func DisplayGPUShareDetails(nodeInfos []*GPUShareNodeInfo)
func DisplayGPUShareSummary ¶ added in v0.3.1
func DisplayGPUShareSummary(nodeInfos []*GPUShareNodeInfo)
func GetGPUMemoryInPod ¶ added in v0.3.1
func GetGPUMemoryInPod(pod v1.Pod) int
Types ¶
type DeviceInfo ¶ added in v0.3.1
type DeviceInfo struct {
Pods []v1.Pod
UsedGPUMem int
TotalGPUMem int
// contains filtered or unexported fields
}
func (*DeviceInfo) String ¶ added in v0.3.1
func (d *DeviceInfo) String() string
type GPUShareNodeInfo ¶ added in v0.3.1
type GPUShareNodeInfo struct {
// contains filtered or unexported fields
}
func BuildAllGPUShareNodeInfos ¶ added in v0.3.1
func BuildAllGPUShareNodeInfos(allPods []v1.Pod, nodes []v1.Node) ([]*GPUShareNodeInfo, error)
For all GPUShare nodes,decide whether the memory of GPU is measured by MiB or GiB
func BuildGPUShareNodeInfo ¶ added in v0.3.1
func BuildGPUShareNodeInfo(allPods []v1.Pod, node v1.Node) (*GPUShareNodeInfo, error)
For one GPUShare node,decide whether the memory of GPU is measured by MiB or GiB
type Instance ¶
type Instance struct {
// the status of of instance
Status string `json:"status"`
// the name of instance
Name string `json:"name"`
// the age of instance
Age string `json:"age"`
// the node instance runs on
Node string `json:"node"`
// the instance is chief or not
IsChief bool `json:"chief" yaml:"chief"`
}
type JobInfo ¶
type JobInfo struct {
// The name of the training job
Name string `json:"name"`
// The namespace of the training job
Namespace string `json:"namespace"`
// The time of the training job
Duration string `json:"duration"`
// The status of the training Job
Status JobStatus `json:"status"`
// The training type of the training job
Trainer string `json:"trainer"`
// The tensorboard of the training job
Tensorboard string `json:"tensorboard,omitempty"`
// The name of the chief Instance
ChiefName string `json:"chiefName" yaml:"chiefName"`
// The instances under the training job
Instances []Instance `json:"instances"`
// The priority of the training job
Priority string `json:"priority"`
}
type JobStatus ¶
type JobStatus string
all the kinds of JobStatus
const ( // JobPending means the job is pending JobPending JobStatus = "PENDING" // JobRunning means the job is running JobRunning JobStatus = "RUNNING" // JobSucceeded means the job is Succeeded JobSucceeded JobStatus = "SUCCEEDED" // JobFailed means the job is failed JobFailed JobStatus = "FAILED" )
type ServingType ¶ added in v0.3.0
type ServingType string
const ( // tensorflow ServingTF ServingType = "TENSORFLOW" // tensorrt ServingTRT ServingType = "TENSORRT" // custom ServingCustom ServingType = "CUSTOM" //kfserving KFServing ServingType = "KFSERVING" )
three serving types.
type TrainingJobInfo ¶
Click to show internal directories.
Click to hide internal directories.