Documentation
¶
Overview ¶
Package document provides a generic document processor demonstrating the Graphable implementation pattern for text-rich content like documents, maintenance records, and observations.
This package serves as a reference implementation showing how to:
- Create domain-specific payloads that implement the Graphable interface
- Generate federated 6-part entity IDs with organizational context
- Produce semantic triples using registered vocabulary predicates
- Transform incoming JSON into meaningful graph structures
- Support multiple document types with a single processor
The document processor handles:
- General documents (manuals, reports, guides)
- Maintenance records (work orders, repairs)
- Observations (safety reports, inspections)
- Sensor documents (rich-text sensor descriptions)
Payload types are split into separate files:
- payload_document.go: Document struct
- payload_maintenance.go: Maintenance struct
- payload_observation.go: Observation struct
- payload_sensor.go: SensorDocument struct
Index ¶
- Constants
- func CheckPayloadRegistration() error
- func NewComponent(rawConfig json.RawMessage, deps component.Dependencies) (component.Discoverable, error)
- func Register(registry *component.Registry) error
- func RegisterVocabulary()
- type Component
- func (c *Component) ConfigSchema() component.ConfigSchema
- func (c *Component) DataFlow() component.FlowMetrics
- func (c *Component) Health() component.HealthStatus
- func (c *Component) Initialize() error
- func (c *Component) InputPorts() []component.Port
- func (c *Component) IsStarted() bool
- func (c *Component) Meta() component.Metadata
- func (c *Component) OutputPorts() []component.Port
- func (c *Component) SetContentStore(store *objectstore.Store)
- func (c *Component) Start(ctx context.Context) error
- func (c *Component) Stop(timeout time.Duration) error
- type ComponentConfig
- type Config
- type Document
- func (d *Document) ContentFields() map[string]string
- func (d *Document) EntityID() string
- func (d *Document) MarshalJSON() ([]byte, error)
- func (d *Document) RawContent() map[string]string
- func (d *Document) Schema() message.Type
- func (d *Document) SetStorageRef(ref *message.StorageReference)
- func (d *Document) StorageRef() *message.StorageReference
- func (d *Document) Triples() []message.Triple
- func (d *Document) UnmarshalJSON(data []byte) error
- func (d *Document) Validate() error
- type Maintenance
- func (m *Maintenance) ContentFields() map[string]string
- func (m *Maintenance) EntityID() string
- func (m *Maintenance) MarshalJSON() ([]byte, error)
- func (m *Maintenance) RawContent() map[string]string
- func (m *Maintenance) Schema() message.Type
- func (m *Maintenance) SetStorageRef(ref *message.StorageReference)
- func (m *Maintenance) StorageRef() *message.StorageReference
- func (m *Maintenance) Triples() []message.Triple
- func (m *Maintenance) UnmarshalJSON(data []byte) error
- func (m *Maintenance) Validate() error
- type Observation
- func (o *Observation) ContentFields() map[string]string
- func (o *Observation) EntityID() string
- func (o *Observation) MarshalJSON() ([]byte, error)
- func (o *Observation) RawContent() map[string]string
- func (o *Observation) Schema() message.Type
- func (o *Observation) SetStorageRef(ref *message.StorageReference)
- func (o *Observation) StorageRef() *message.StorageReference
- func (o *Observation) Triples() []message.Triple
- func (o *Observation) UnmarshalJSON(data []byte) error
- func (o *Observation) Validate() error
- type Payload
- type Processor
- type SensorDocument
- func (s *SensorDocument) ContentFields() map[string]string
- func (s *SensorDocument) EntityID() string
- func (s *SensorDocument) MarshalJSON() ([]byte, error)
- func (s *SensorDocument) RawContent() map[string]string
- func (s *SensorDocument) Schema() message.Type
- func (s *SensorDocument) SetStorageRef(ref *message.StorageReference)
- func (s *SensorDocument) StorageRef() *message.StorageReference
- func (s *SensorDocument) Triples() []message.Triple
- func (s *SensorDocument) UnmarshalJSON(data []byte) error
- func (s *SensorDocument) Validate() error
Constants ¶
const ( // Dublin Core metadata predicates (for triple metadata - NOT content body) // See: https://www.dublincore.org/specifications/dublin-core/dcmi-terms/ PredicateDCTitle = "dc.terms.title" // Document title PredicateDCCreator = "dc.terms.creator" // Author or creator PredicateDCSubject = "dc.terms.subject" // Topic/category of content PredicateDCType = "dc.terms.type" // Nature/genre of content PredicateDCDate = "dc.terms.date" // Relevant date (creation, publication) PredicateDCIdentifier = "dc.terms.identifier" // Unique identifier PredicateDCFormat = "dc.terms.format" // File format or media type PredicateDCLanguage = "dc.terms.language" // Language of content // Core content predicates (legacy - prefer Dublin Core for metadata) // NOTE: content.text.body should NOT be used in triples for ContentStorable types. // Body content belongs in ObjectStore, not in triples. PredicateContentTitle = "content.text.title" PredicateContentDescription = "content.text.description" PredicateContentBody = "content.text.body" // DEPRECATED for ContentStorable PredicateContentSummary = "content.text.summary" // Classification predicates PredicateContentType = "content.classification.type" PredicateContentCategory = "content.classification.category" PredicateContentTag = "content.classification.tag" // Maintenance-specific predicates PredicateMaintenanceTechnician = "maintenance.work.technician" PredicateMaintenanceDate = "maintenance.work.completion_date" PredicateMaintenanceStatus = "maintenance.work.status" // Observation-specific predicates PredicateObservationObserver = "observation.record.observer" PredicateObservationSeverity = "observation.record.severity" PredicateObservationObservedAt = "observation.record.observed_at" // Sensor document predicates (for rich-text sensor descriptions) PredicateSensorLocation = "sensor.document.location" PredicateSensorReading = "sensor.document.reading" PredicateSensorUnit = "sensor.document.unit" // Time predicates PredicateTimeCreated = "time.document.created" PredicateTimeUpdated = "time.document.updated" )
Predicate constants for the content domain. These follow the three-level dotted notation: domain.category.property
Variables ¶
This section is empty.
Functions ¶
func CheckPayloadRegistration ¶
func CheckPayloadRegistration() error
CheckPayloadRegistration returns any errors that occurred during payload registration. Call this during component initialization to verify all payloads registered correctly.
func NewComponent ¶
func NewComponent( rawConfig json.RawMessage, deps component.Dependencies, ) (component.Discoverable, error)
NewComponent creates a new document processor component from configuration.
func Register ¶
Register registers the document processor component with the given registry. This enables the component to be discovered and instantiated by the component management system.
The registration includes:
- Component factory function for creating instances
- Configuration schema for validation and UI generation
- Type information (processor, domain: content)
- Protocol identifier for component routing
- Version information for compatibility tracking
func RegisterVocabulary ¶
func RegisterVocabulary()
RegisterVocabulary registers all content domain predicates with the vocabulary system.
Types ¶
type Component ¶
type Component struct {
// contains filtered or unexported fields
}
Component wraps the domain-specific document processor with component lifecycle.
func (*Component) ConfigSchema ¶
func (c *Component) ConfigSchema() component.ConfigSchema
ConfigSchema returns the configuration schema for this processor.
func (*Component) DataFlow ¶
func (c *Component) DataFlow() component.FlowMetrics
DataFlow returns current data flow metrics for this processor.
func (*Component) Health ¶
func (c *Component) Health() component.HealthStatus
Health returns the current health status of this processor.
func (*Component) Initialize ¶
Initialize prepares the component (no-op for document processor)
func (*Component) InputPorts ¶
InputPorts returns the NATS input ports this processor subscribes to.
func (*Component) OutputPorts ¶
OutputPorts returns the NATS output port for Graphable documents.
func (*Component) SetContentStore ¶
func (c *Component) SetContentStore(store *objectstore.Store)
SetContentStore sets the ObjectStore for content storage. When set, ContentStorable payloads will have their content stored before publishing.
type ComponentConfig ¶
type ComponentConfig struct {
// Ports defines NATS input/output subjects for message routing
Ports *component.PortConfig `json:"ports" schema:"type:ports,description:Port configuration,category:basic"`
// OrgID is the organization identifier for federated entity IDs
OrgID string `json:"org_id" schema:"type:string,description:Organization identifier,category:basic,required:true"`
// Platform is the platform/product identifier for federated entity IDs
Platform string `json:"platform" schema:"type:string,description:Platform identifier,category:basic,required:true"`
}
ComponentConfig holds configuration for the document processor component.
func DefaultConfig ¶
func DefaultConfig() ComponentConfig
DefaultConfig returns the default configuration for document processor
type Config ¶
type Config struct {
// OrgID is the organization identifier (e.g., "acme")
// This becomes the first part of federated entity IDs.
OrgID string
// Platform is the platform/product identifier (e.g., "logistics")
// This becomes the second part of federated entity IDs.
Platform string
}
Config holds the configuration for the document processor. It provides the organizational context applied to all processed documents.
type Document ¶
type Document struct {
// Input fields (from incoming JSON)
ID string `json:"id"` // e.g., "doc-001"
Title string `json:"title"` // e.g., "Safety Manual"
Description string `json:"description"` // Primary text for semantic search
Body string `json:"body"` // Full text content (stored in ObjectStore, NOT in triples)
Summary string `json:"summary"` // Brief summary
Category string `json:"category"` // e.g., "safety", "operations"
Tags []string `json:"tags"` // Classification tags
CreatedAt string `json:"created_at"` // ISO timestamp
UpdatedAt string `json:"updated_at"` // ISO timestamp
// Context fields (set by processor from config, preserved through JSON for NATS transport)
OrgID string `json:"org_id,omitempty"` // e.g., "acme"
Platform string `json:"platform,omitempty"` // e.g., "logistics"
// contains filtered or unexported fields
}
Document represents a generic document entity. It implements the ContentStorable interface with federated entity IDs, semantic predicates, and content storage.
ContentStorable pattern:
- Triples() returns metadata ONLY (Dublin Core predicates, NO body)
- ContentFields() maps semantic roles to field names in stored content
- RawContent() returns content to store in ObjectStore
- StorageRef() returns reference to stored content (set by processor)
func (*Document) ContentFields ¶
ContentFields implements message.ContentStorable interface. Returns semantic role → field name mapping for content stored in ObjectStore. Embedding workers use these roles to find text for embedding generation.
func (*Document) EntityID ¶
EntityID returns a deterministic 6-part federated entity ID following the pattern: {org}.{platform}.{domain}.{system}.{type}.{instance}
Example: "acme.logistics.content.document.safety.doc-001"
func (*Document) MarshalJSON ¶
MarshalJSON implements json.Marshaler for Document.
func (*Document) RawContent ¶
RawContent implements message.ContentStorable interface. Returns content to store in ObjectStore. Field names here match values in ContentFields().
func (*Document) SetStorageRef ¶
func (d *Document) SetStorageRef(ref *message.StorageReference)
SetStorageRef is called by processor after storing content in ObjectStore.
func (*Document) StorageRef ¶
func (d *Document) StorageRef() *message.StorageReference
StorageRef implements message.Storable interface. Returns reference to where content is stored in ObjectStore.
func (*Document) Triples ¶
Triples returns METADATA ONLY facts about this document using Dublin Core predicates. Large content fields (body, description) are stored in ObjectStore, NOT in triples. This prevents bloating entity state and enables efficient embedding extraction.
func (*Document) UnmarshalJSON ¶
UnmarshalJSON implements json.Unmarshaler for Document.
type Maintenance ¶
type Maintenance struct {
// Input fields
ID string `json:"id"` // e.g., "maint-001"
Title string `json:"title"` // e.g., "Pump Repair"
Description string `json:"description"` // Work description
Body string `json:"body"` // Detailed work log (stored in ObjectStore)
Technician string `json:"technician"` // Who performed the work
Status string `json:"status"` // completed, pending, in_progress
CompletionDate string `json:"completion_date"` // ISO timestamp
Category string `json:"category"` // equipment, facility, etc.
Tags []string `json:"tags"`
// Context fields (set by processor from config, preserved through JSON for NATS transport)
OrgID string `json:"org_id,omitempty"`
Platform string `json:"platform,omitempty"`
// contains filtered or unexported fields
}
Maintenance represents a maintenance record entity. It implements ContentStorable.
func (*Maintenance) ContentFields ¶
func (m *Maintenance) ContentFields() map[string]string
ContentFields implements message.ContentStorable interface.
func (*Maintenance) EntityID ¶
func (m *Maintenance) EntityID() string
EntityID returns a federated entity ID for the maintenance record. Example: "acme.logistics.maintenance.work.completed.maint-001"
func (*Maintenance) MarshalJSON ¶
func (m *Maintenance) MarshalJSON() ([]byte, error)
MarshalJSON implements json.Marshaler.
func (*Maintenance) RawContent ¶
func (m *Maintenance) RawContent() map[string]string
RawContent implements message.ContentStorable interface.
func (*Maintenance) Schema ¶
func (m *Maintenance) Schema() message.Type
Schema returns the message type for maintenance records.
func (*Maintenance) SetStorageRef ¶
func (m *Maintenance) SetStorageRef(ref *message.StorageReference)
SetStorageRef is called by processor after storing content.
func (*Maintenance) StorageRef ¶
func (m *Maintenance) StorageRef() *message.StorageReference
StorageRef implements message.Storable interface.
func (*Maintenance) Triples ¶
func (m *Maintenance) Triples() []message.Triple
Triples returns METADATA ONLY facts about this maintenance record. Body content is stored in ObjectStore, NOT in triples.
func (*Maintenance) UnmarshalJSON ¶
func (m *Maintenance) UnmarshalJSON(data []byte) error
UnmarshalJSON implements json.Unmarshaler.
func (*Maintenance) Validate ¶
func (m *Maintenance) Validate() error
Validate checks required fields.
type Observation ¶
type Observation struct {
// Input fields
ID string `json:"id"` // e.g., "obs-001"
Title string `json:"title"` // e.g., "Safety Hazard Report"
Description string `json:"description"` // What was observed
Body string `json:"body"` // Detailed notes (stored in ObjectStore)
Observer string `json:"observer"` // Who made the observation
Severity string `json:"severity"` // low, medium, high, critical
ObservedAt string `json:"observed_at"` // ISO timestamp
Category string `json:"category"` // safety, quality, environment
Tags []string `json:"tags"`
// Context fields (set by processor from config, preserved through JSON for NATS transport)
OrgID string `json:"org_id,omitempty"`
Platform string `json:"platform,omitempty"`
// contains filtered or unexported fields
}
Observation represents an observation or inspection record. It implements ContentStorable.
func (*Observation) ContentFields ¶
func (o *Observation) ContentFields() map[string]string
ContentFields implements message.ContentStorable interface.
func (*Observation) EntityID ¶
func (o *Observation) EntityID() string
EntityID returns a federated entity ID for the observation. Example: "acme.logistics.observation.record.high.obs-001"
func (*Observation) MarshalJSON ¶
func (o *Observation) MarshalJSON() ([]byte, error)
MarshalJSON implements json.Marshaler.
func (*Observation) RawContent ¶
func (o *Observation) RawContent() map[string]string
RawContent implements message.ContentStorable interface.
func (*Observation) Schema ¶
func (o *Observation) Schema() message.Type
Schema returns the message type for observations.
func (*Observation) SetStorageRef ¶
func (o *Observation) SetStorageRef(ref *message.StorageReference)
SetStorageRef is called by processor after storing content.
func (*Observation) StorageRef ¶
func (o *Observation) StorageRef() *message.StorageReference
StorageRef implements message.Storable interface.
func (*Observation) Triples ¶
func (o *Observation) Triples() []message.Triple
Triples returns METADATA ONLY facts about this observation. Body content is stored in ObjectStore, NOT in triples.
func (*Observation) UnmarshalJSON ¶
func (o *Observation) UnmarshalJSON(data []byte) error
UnmarshalJSON implements json.Unmarshaler.
func (*Observation) Validate ¶
func (o *Observation) Validate() error
Validate checks required fields.
type Payload ¶
Payload represents a document that implements both Graphable and Payload interfaces. Used as a common return type from the processor.
type Processor ¶
type Processor struct {
// contains filtered or unexported fields
}
Processor transforms incoming JSON document data into Graphable payloads. It applies organizational context from configuration and produces document instances with proper federated entity IDs and semantic triples.
func NewProcessor ¶
NewProcessor creates a new document processor with the given configuration.
func (*Processor) Process ¶
Process transforms incoming JSON data into a Graphable document payload. It determines the document type from the "type" field and creates the appropriate payload.
Expected JSON format (common fields):
{
"id": "doc-001",
"type": "document|maintenance|observation|sensor_doc",
"title": "Document Title",
"description": "Document description for semantic search",
...type-specific fields...
}
type SensorDocument ¶
type SensorDocument struct {
// Input fields
ID string `json:"id"` // e.g., "sensor-doc-001"
Title string `json:"title"` // e.g., "Temperature Sensor T-42"
Description string `json:"description"` // Sensor description
Body string `json:"body"` // Detailed documentation (stored in ObjectStore)
Location string `json:"location"` // Physical location description
Reading float64 `json:"reading"` // Current/reference reading
Unit string `json:"unit"` // Unit of measurement
Category string `json:"category"` // temperature, pressure, humidity
Tags []string `json:"tags"`
// Context fields (set by processor from config, preserved through JSON for NATS transport)
OrgID string `json:"org_id,omitempty"`
Platform string `json:"platform,omitempty"`
// contains filtered or unexported fields
}
SensorDocument represents rich-text documentation for a sensor. It implements ContentStorable.
func (*SensorDocument) ContentFields ¶
func (s *SensorDocument) ContentFields() map[string]string
ContentFields implements message.ContentStorable interface.
func (*SensorDocument) EntityID ¶
func (s *SensorDocument) EntityID() string
EntityID returns a federated entity ID for the sensor document. Example: "acme.logistics.sensor.document.temperature.sensor-doc-001"
func (*SensorDocument) MarshalJSON ¶
func (s *SensorDocument) MarshalJSON() ([]byte, error)
MarshalJSON implements json.Marshaler.
func (*SensorDocument) RawContent ¶
func (s *SensorDocument) RawContent() map[string]string
RawContent implements message.ContentStorable interface.
func (*SensorDocument) Schema ¶
func (s *SensorDocument) Schema() message.Type
Schema returns the message type for sensor documents.
func (*SensorDocument) SetStorageRef ¶
func (s *SensorDocument) SetStorageRef(ref *message.StorageReference)
SetStorageRef is called by processor after storing content.
func (*SensorDocument) StorageRef ¶
func (s *SensorDocument) StorageRef() *message.StorageReference
StorageRef implements message.Storable interface.
func (*SensorDocument) Triples ¶
func (s *SensorDocument) Triples() []message.Triple
Triples returns METADATA ONLY facts about this sensor document. Body content is stored in ObjectStore, NOT in triples.
func (*SensorDocument) UnmarshalJSON ¶
func (s *SensorDocument) UnmarshalJSON(data []byte) error
UnmarshalJSON implements json.Unmarshaler.
func (*SensorDocument) Validate ¶
func (s *SensorDocument) Validate() error
Validate checks required fields.