Documentation
¶
Index ¶
- Constants
- Variables
- func FaultConfigBadControlInterface(iface string, err error) *fault.Fault
- func FaultConfigBdevCountMismatch(curIdx, curCount, seenIdx, seenCount int) *fault.Fault
- func FaultConfigControlInterfaceMismatch(ifaceAddr, replicaAddr string) *fault.Fault
- func FaultConfigDuplicateFabric(curIdx, seenIdx int) *fault.Fault
- func FaultConfigDuplicateLogFile(curIdx, seenIdx int) *fault.Fault
- func FaultConfigDuplicateScmDeviceList(curIdx, seenIdx int) *fault.Fault
- func FaultConfigDuplicateScmMount(curIdx, seenIdx int) *fault.Fault
- func FaultConfigEngineNUMAImbalance(nodeMap map[int]int) *fault.Fault
- func FaultConfigFaultCallbackFailed(err error) *fault.Fault
- func FaultConfigFaultCallbackInsecure(requiredDir string) *fault.Fault
- func FaultConfigFaultDomainInvalid(err error) *fault.Fault
- func FaultConfigHelperStreamCountMismatch(curIdx, curCount, seenIdx, seenCount int) *fault.Fault
- func FaultConfigInvalidNetDevClass(curIdx int, primaryDevClass, thisDevClass hardware.NetDevClass, iface string) *fault.Fault
- func FaultConfigNrHugepagesOutOfRange(req, max int) *fault.Fault
- func FaultConfigOverlappingBdevDeviceList(curIdx, seenIdx int) *fault.Fault
- func FaultConfigRamdiskOverMaxMem(confSize, ramSize, memRamdiskMin uint64) *fault.Fault
- func FaultConfigScmDiffClass(curIdx, seenIdx int) *fault.Fault
- func FaultConfigScmDiffHugeEnabled(curIdx, seenIdx int) *fault.Fault
- func FaultConfigTargetCountMismatch(curIdx, curCount, seenIdx, seenCount int) *fault.Fault
- func GetMSReplicaPort(log logging.Logger, addr string) (int, error)
- type EngineAffinityFn
- type Server
- func (cfg *Server) GetBdevConfigs() (bdevCfgs storage.TierConfigs)
- func (cfg *Server) GetNumaNodes() ([]int, error)
- func (cfg *Server) HasPMem() bool
- func (cfg *Server) Load(log logging.Logger) error
- func (cfg *Server) SaveActiveConfig(log logging.Logger)
- func (cfg *Server) SaveToFile(filename string) error
- func (cfg *Server) SetEngineAffinities(log logging.Logger, affSources ...EngineAffinityFn) error
- func (cfg *Server) SetNrHugepages(log logging.Logger, hugepageSizeKiB int) error
- func (cfg *Server) SetPath(inPath string) error
- func (cfg *Server) SetRamdiskSize(log logging.Logger, smi *common.SysMemInfo) error
- func (cfg *Server) Validate(log logging.Logger) (err error)
- func (cfg *Server) WithAllowNumaImbalance(allowed bool) *Server
- func (cfg *Server) WithAllowTHP(allowed bool) *Server
- func (cfg *Server) WithBdevExclude(bList ...string) *Server
- func (cfg *Server) WithClientEnvVars(envVars []string) *Server
- func (cfg *Server) WithControlInterface(iface string) *Server
- func (cfg *Server) WithControlLogFile(filePath string) *Server
- func (cfg *Server) WithControlLogJSON(enabled bool) *Server
- func (cfg *Server) WithControlLogMask(lvl common.ControlLogLevel) *Server
- func (cfg *Server) WithControlMetadata(md storage.ControlMetadata) *Server
- func (cfg *Server) WithControlPort(port int) *Server
- func (cfg *Server) WithCoreDumpFilter(filter uint8) *Server
- func (cfg *Server) WithCrtTimeout(timeout uint32) *Server
- func (cfg *Server) WithDisableHotplug(disabled bool) *Server
- func (cfg *Server) WithDisableHugepages(disabled bool) *Server
- func (cfg *Server) WithDisableVFIO(disabled bool) *Server
- func (cfg *Server) WithDisableVMD(disabled bool) *Server
- func (cfg *Server) WithEngines(engineList ...*engine.Config) *Server
- func (cfg *Server) WithFabricAuthKey(key string) *Server
- func (cfg *Server) WithFabricProvider(provider string) *Server
- func (cfg *Server) WithFaultCb(cb string) *Server
- func (cfg *Server) WithFaultPath(fp string) *Server
- func (cfg *Server) WithFirmwareHelperLogFile(filePath string) *Server
- func (cfg *Server) WithHelperLogFile(filePath string) *Server
- func (cfg *Server) WithHyperthreads(enabled bool) *Server
- func (cfg *Server) WithMgmtSvcReplicas(reps ...string) *Server
- func (cfg *Server) WithModules(mList string) *Server
- func (cfg *Server) WithNrHugepages(nr int) *Server
- func (cfg *Server) WithNumSecondaryEndpoints(nr []int) *Server
- func (cfg *Server) WithSocketDir(sockDir string) *Server
- func (cfg *Server) WithSystemName(name string) *Server
- func (cfg *Server) WithSystemRamReserved(nr int) *Server
- func (cfg *Server) WithTelemetryPort(port int) *Server
- func (cfg *Server) WithTransportConfig(cfgTransport *security.TransportConfig) *Server
- type SupportConfig
Constants ¶
const ( ConfigOut = ".daos_server.active.yml" // ScanMinHugepageCount is the minimum number of hugepages to allocate in order to satisfy // SPDK memory requirements when performing a NVMe device scan. ScanMinHugepageCount = 128 )
Variables ¶
var ( FaultUnknown = serverConfigFault( code.ServerConfigUnknown, "unknown control server error", "", ) FaultBadConfig = serverConfigFault( code.ServerBadConfig, "insufficient information in configuration", "supply path to valid configuration file, use examples for reference", ) FaultConfigNoPath = serverConfigFault( code.ServerNoConfigPath, "configuration file path not set", "supply the path to a server configuration file when restarting the control server with commandline option '-o'", ) FaultConfigBadControlPort = serverConfigFault( code.ServerConfigBadControlPort, "invalid control port in configuration", "specify a positive non-zero network port in configuration ('port' parameter) and restart the control server", ) FaultConfigBadTelemetryPort = serverConfigFault( code.ServerConfigBadTelemetryPort, "invalid telemetry port in configuration", "specify a positive non-zero network port in configuration ('telemetry_port' parameter) and restart the control server", ) FaultConfigBadMgmtSvcReplicas = serverConfigFault( code.ServerConfigBadMgmtSvcReplicas, "invalid list of MS replicas in configuration", "'mgmt_svc_replicas' must contain resolvable addresses; fix the configuration and restart the control server", ) FaultConfigEvenMgmtSvcReplicas = serverConfigFault( code.ServerConfigEvenMgmtSvcReplicas, "non-odd number of MS replicas in configuration", "'mgmt_svc_replicas' must contain an odd number (e.g. 1, 3, 5, etc.) of addresses; fix the configuration and restart the control server", ) FaultConfigNoProvider = serverConfigFault( code.ServerConfigBadProvider, "provider not specified in server configuration", "specify a valid network provider in configuration ('provider' parameter) and restart the control server", ) FaultConfigNoEngines = serverConfigFault( code.ServerConfigNoEngines, "no DAOS IO Engines specified in configuration", "specify at least one IO Engine configuration ('engines' list parameter) and restart the control server", ) FaultConfigFaultCallbackNotFound = serverConfigFault( code.ServerConfigFaultCallbackNotFound, "fault domain callback script not found", "specify a valid fault domain callback script ('fault_cb' parameter) and restart the control server", ) FaultConfigFaultCallbackBadPerms = serverConfigFault( code.ServerConfigFaultCallbackBadPerms, "fault domain callback cannot be executed", "ensure that permissions for the DAOS server user are properly set on the fault domain callback script ('fault_cb' parameter) and restart the control server", ) FaultConfigBothFaultPathAndCb = serverConfigFault( code.ServerConfigBothFaultPathAndCb, "both fault domain and fault path are defined in the configuration", "remove either the fault domain ('fault_path' parameter) or callback script ('fault_cb' parameter) and restart the control server", ) FaultConfigFaultCallbackEmpty = serverConfigFault( code.ServerConfigFaultCallbackEmpty, "fault domain callback executed but did not generate output", "specify a valid fault domain callback script ('fault_cb' parameter) and restart the control server", ) FaultConfigTooManyLayersInFaultDomain = serverConfigFault( code.ServerConfigFaultDomainTooManyLayers, "the fault domain path may have a maximum of 2 levels below the root", "update either the fault domain ('fault_path' parameter) or callback script ('fault_cb' parameter) and restart the control server", ) FaultConfigHugepagesDisabledWithNvmeBdevs = serverConfigFault( code.ServerConfigHugepagesDisabledWithNvmeBdevs, "hugepages cannot be disabled if nvme-bdevs have been specified in config", "either set false (or remove) disable_hugepages parameter or remove nvme storage assignment in config and restart the control server", ) FaultConfigHugepagesDisabledWithNrSet = serverConfigFault( code.ServerConfigHugepagesDisabledWithNrSet, "hugepages cannot be disabled if non-zero number has been specified in config", "either set false (or remove) disable_hugepages parameter or remove nr_hugepages assignment in config and restart the control server", ) FaultConfigControlMetadataNoPath = serverConfigFault( code.ServerConfigControlMetadataNoPath, "using a control_metadata device requires a path to use as the mount point", "add a valid 'path' to the 'control_metadata' section of the config", ) FaultConfigEngineBdevRolesMismatch = serverConfigFault( code.ServerConfigEngineBdevRolesMismatch, "md-on-ssd bdev roles have been set in some but not all engine configs", "set bdev roles on all engines or remove all bdev role assignments in config", ) FaultConfigSysRsvdZero = serverConfigFault( code.ServerConfigSysRsvdZero, "`system_ram_reserved` is set to zero in server config", "set `system_ram_reserved` to a positive integer value in config", ) FaultConfigEnableHotplugDeprecated = serverConfigFault( code.ServerConfigEnableHotplugDeprecated, "'enable_hotplug' setting is deprecated and no longer supported", "set 'disable_hotplug: true' in server config file to disable hotplug", ) FaultConfigBdevExcludeClash = serverConfigFault( code.ServerConfigBdevExcludeClash, "'bdev_exclude' list includes address used in engine config bdev_list", "make sure addresses excluded are not included in engine storage configs then restart daos_server", ) )
var ( // ErrNoAffinityDetected is a sentinel error used to indicate that no affinity was detected. ErrNoAffinityDetected = errors.New("no NUMA affinity detected") )
Functions ¶
func FaultConfigBadControlInterface ¶
FaultConfigBadControlInterface creates a fault for an invalid control plane network interface.
func FaultConfigControlInterfaceMismatch ¶
FaultConfigControlInterfaceMismatch creates a fault when the control interface address doesn't match the configured MS replica address.
func FaultConfigEngineNUMAImbalance ¶
FaultConfigEngineNUMAImbalance creates a fault indicating that engines are not distributed evenly across NUMA nodes.
func FaultConfigFaultCallbackFailed ¶
FaultConfigFaultCallbackFailed creates a Fault for the scenario where the fault domain callback failed with some error.
func FaultConfigFaultCallbackInsecure ¶
FaultConfigFaultCallbackInsecure creates a fault for the scenario where the fault domain callback path doesn't meet security requirements.
func FaultConfigNrHugepagesOutOfRange ¶
FaultConfigNrHugepagesOutOfRange creates a fault for the scenario where the number of configured huge pages is smaller than zero or larger than the maximum value allowed.
func FaultConfigRamdiskOverMaxMem ¶
FaultConfigRamdiskOverMaxMem indicates that the tmpfs size requested in config is larger than maximum allowed.
func FaultConfigScmDiffClass ¶
Types ¶
type EngineAffinityFn ¶
EngineAffinityFn defines a function which returns the NUMA node affinity of a given engine.
type Server ¶
type Server struct {
// control-specific
ControlPort int `yaml:"port"`
ControlInterface string `yaml:"control_iface,omitempty"`
TransportConfig *security.TransportConfig `yaml:"transport_config"`
Engines []*engine.Config `yaml:"engines"`
BdevExclude []string `yaml:"bdev_exclude,omitempty"`
DisableVFIO bool `yaml:"disable_vfio"`
DisableVMD *bool `yaml:"disable_vmd"`
DisableHotplug *bool `yaml:"disable_hotplug"`
NrHugepages int `yaml:"nr_hugepages"` // total for all engines
SystemRamReserved int `yaml:"system_ram_reserved"` // total for all engines
DisableHugepages bool `yaml:"disable_hugepages"`
AllowNumaImbalance bool `yaml:"allow_numa_imbalance"`
AllowTHP bool `yaml:"allow_thp"`
ControlLogMask common.ControlLogLevel `yaml:"control_log_mask"`
ControlLogFile string `yaml:"control_log_file,omitempty"`
ControlLogJSON bool `yaml:"control_log_json,omitempty"`
HelperLogFile string `yaml:"helper_log_file,omitempty"`
FWHelperLogFile string `yaml:"firmware_helper_log_file,omitempty"`
FaultPath string `yaml:"fault_path,omitempty"`
TelemetryPort int `yaml:"telemetry_port,omitempty"`
CoreDumpFilter uint8 `yaml:"core_dump_filter,omitempty"`
ClientEnvVars []string `yaml:"client_env_vars,omitempty"`
SupportConfig SupportConfig `yaml:"support_config,omitempty"`
// duplicated in engine.Config
SystemName string `yaml:"name"`
SocketDir string `yaml:"socket_dir"`
Fabric engine.FabricConfig `yaml:",inline"`
Modules string `yaml:"-"`
MgmtSvcReplicas []string `yaml:"mgmt_svc_replicas"`
Metadata storage.ControlMetadata `yaml:"control_metadata,omitempty"`
// unused (?)
FaultCb string `yaml:"fault_cb"`
Hyperthreads bool `yaml:"hyperthreads"`
Path string `yaml:"-"` // path to config file
// Behavior flags
AutoFormat bool `yaml:"-"`
// contains filtered or unexported fields
}
Server describes configuration options for DAOS control plane. See utils/config/daos_server.yml for parameter descriptions.
func DefaultServer ¶
func DefaultServer() *Server
DefaultServer creates a new instance of configuration struct populated with defaults.
func (*Server) GetBdevConfigs ¶
func (cfg *Server) GetBdevConfigs() (bdevCfgs storage.TierConfigs)
GetBdevConfigs retrieves all engine bdev storage tier configs from a server configuration.
func (*Server) GetNumaNodes ¶
GetNumaNodes returns in use NUMA nodes based on engine configurations. Detects the number of engine configs assigned to each NUMA node and return error if engines are distributed unevenly across NUMA nodes. Otherwise return sorted list of NUMA nodes in use. Configurations where all engines are on a single NUMA node will be allowed.
func (*Server) HasPMem ¶
HasPMem returns true if any engine storage config contains a DCPM-class SCM-tier.
func (*Server) SaveActiveConfig ¶
SaveActiveConfig saves read-only active config, tries config dir then /tmp/.
func (*Server) SaveToFile ¶
SaveToFile serializes the configuration and saves it to the specified filename.
func (*Server) SetEngineAffinities ¶
func (cfg *Server) SetEngineAffinities(log logging.Logger, affSources ...EngineAffinityFn) error
SetEngineAffinities sets the NUMA node affinity for all engines in the configuration.
func (*Server) SetNrHugepages ¶
SetNrHugepages calculates minimum based on total target count if using nvme. Handle scenarios for disabling hugepages and no configured bdevs by setting config request value (NrHugepages) appropriately. Hugepage allocation requests will be validated in prepBdevStorage().
func (*Server) SetRamdiskSize ¶
SetRamdiskSize calculates maximum RAM-disk size using total memory as reported by /proc/meminfo. Then either validate configured engine storage values or assign if not already set.
func (*Server) WithAllowNumaImbalance ¶
WithAllowNumaImbalance allows engine count mismatch between NUMA-nodes.
func (*Server) WithAllowTHP ¶
WithAllowTHP allows DAOS server to run with transparent hugepage support enabled.
func (*Server) WithBdevExclude ¶
WithBdevExclude sets the block device exclude list.
func (*Server) WithClientEnvVars ¶
WithClientEnvVars sets the environment variables to be sent to the client.
func (*Server) WithControlInterface ¶
WithControlInterface sets the network interface for the control plane listener.
func (*Server) WithControlLogFile ¶
WithControlLogFile sets the path to the daos_server logfile.
func (*Server) WithControlLogJSON ¶
WithControlLogJSON enables or disables JSON output.
func (*Server) WithControlLogMask ¶
func (cfg *Server) WithControlLogMask(lvl common.ControlLogLevel) *Server
WithControlLogMask sets the daos_server log level.
func (*Server) WithControlMetadata ¶
func (cfg *Server) WithControlMetadata(md storage.ControlMetadata) *Server
WithControlMetadata sets the control plane metadata.
func (*Server) WithControlPort ¶
WithControlPort sets the gRPC listener port.
func (*Server) WithCoreDumpFilter ¶
WithCoreDumpFilter sets the core dump filter written to /proc/self/coredump_filter.
func (*Server) WithCrtTimeout ¶
WithCrtTimeout sets the top-level CrtTimeout.
func (*Server) WithDisableHotplug ¶
WithDisableHotplug can be used to disable hotplug.
func (*Server) WithDisableHugepages ¶
WithDisableHugepages disables the use of huge pages.
func (*Server) WithDisableVFIO ¶
WithDisableVFIO indicates that the vfio-pci driver should not be used by SPDK even if an IOMMU is detected. Note that this option requires that DAOS be run as root.
func (*Server) WithDisableVMD ¶
WithDisableVMD can be used to set the state of VMD functionality, if disabled then VMD devices will not be used if they exist.
func (*Server) WithEngines ¶
WithEngines sets the list of engine configurations.
func (*Server) WithFabricAuthKey ¶
WithFabricAuthKey sets the top-level fabric authorization key.
func (*Server) WithFabricProvider ¶
WithFabricProvider sets the top-level fabric provider.
func (*Server) WithFaultCb ¶
WithFaultCb sets the path to the fault callback script.
func (*Server) WithFaultPath ¶
WithFaultPath sets the fault path (identification string e.g. rack/shelf/node).
func (*Server) WithFirmwareHelperLogFile ¶
WithFirmwareHelperLogFile sets the path to the daos_firmware_helper logfile.
func (*Server) WithHelperLogFile ¶
WithHelperLogFile sets the path to the daos_server_helper logfile.
func (*Server) WithHyperthreads ¶
WithHyperthreads enables or disables hyperthread support.
func (*Server) WithMgmtSvcReplicas ¶
WithMgmtSvcReplicas sets the MS replicas list.
func (*Server) WithModules ¶
WithModules sets a list of server modules to load.
func (*Server) WithNrHugepages ¶
WithNrHugepages sets the number of huge pages to be used (total for all engines).
func (*Server) WithNumSecondaryEndpoints ¶
WithNumSecondaryEndpoints sets the number of network endpoints for each engine's secondary provider.
func (*Server) WithSocketDir ¶
WithSocketDir sets the default socket directory.
func (*Server) WithSystemName ¶
WithSystemName sets the system name.
func (*Server) WithSystemRamReserved ¶
WithSystemRamReserved sets the amount of system memory to reserve for system (non-DAOS) use. In units of GiB.
func (*Server) WithTelemetryPort ¶
WithTelemetryPort sets the port for the telemetry exporter.
func (*Server) WithTransportConfig ¶
func (cfg *Server) WithTransportConfig(cfgTransport *security.TransportConfig) *Server
WithTransportConfig sets the gRPC transport configuration.
type SupportConfig ¶
type SupportConfig struct {
FileTransferExec string `yaml:"file_transfer_exec,omitempty"`
}
SupportConfig is defined here to avoid a import cycle