Documentation
¶
Overview ¶
Package shape provides unified body shape analysis with content-type dispatch. It follows the same engine pattern as pkg/textquery for query dispatch.
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type CSVColumn ¶
type CSVColumn struct {
Name string `json:"name"`
Type string `json:"type"` // string, number, boolean
Format string `json:"format,omitempty"` // uuid, iso8601, url, email, enum
EmptyFrequency float64 `json:"empty_frequency"` // Fraction of null/empty values
Examples []string `json:"examples,omitempty"` // Up to 3 example values
EnumValues []string `json:"enum_values,omitempty"` // When format is "enum"
}
CSVColumn describes a single CSV column.
type CSVColumnStats ¶
type CSVColumnStats struct {
Columns []CSVColumn `json:"columns"`
RowCount int `json:"row_count"`
HasHeaders bool `json:"has_headers"`
SampleCount int `json:"sample_count"`
}
CSVColumnStats represents the column structure of a CSV document.
func ExtractCSVColumns ¶
func ExtractCSVColumns(body []byte) (*CSVColumnStats, error)
ExtractCSVColumns parses a CSV body and detects column types and formats. Uses the first row as headers. Falls back to generated column names if the first row appears to be data.
func ExtractCSVColumnsMerged ¶
func ExtractCSVColumnsMerged(bodies [][]byte) (*CSVColumnStats, error)
ExtractCSVColumnsMerged parses multiple CSV bodies and combines their rows for more accurate type and format detection.
type Engine ¶
type Engine struct{}
Engine dispatches shape analysis by content type. It follows the same unified-engine pattern as textquery.Engine.
type FormKeyStat ¶
type FormKeyStat struct {
Key string `json:"key"`
Frequency float64 `json:"frequency"` // 0.0-1.0
Examples []string `json:"examples,omitempty"`
}
FormKeyStat describes a form field in form-urlencoded bodies.
type HTMLDOMOutline ¶
type HTMLDOMOutline struct {
Title string `json:"title,omitempty"`
TagCounts map[string]int `json:"tag_counts"`
ElementIDs []HTMLElementID `json:"element_ids,omitempty"`
Forms []HTMLFormOutline `json:"forms,omitempty"`
MetaTags []HTMLMetaTag `json:"meta_tags,omitempty"`
Truncated bool `json:"truncated,omitempty"`
SampleCount int `json:"sample_count"`
}
HTMLDOMOutline represents the structural summary of an HTML document.
func ExtractHTMLOutline ¶
func ExtractHTMLOutline(body []byte) (*HTMLDOMOutline, error)
ExtractHTMLOutline parses an HTML body and returns a structural summary including tag counts, elements with IDs, forms, and meta tags.
type HTMLElementID ¶
HTMLElementID records an element with an id attribute.
type HTMLFormInput ¶
type HTMLFormInput struct {
Name string `json:"name,omitempty"`
Type string `json:"type,omitempty"`
}
HTMLFormInput describes an input element within a form.
type HTMLFormOutline ¶
type HTMLFormOutline struct {
Action string `json:"action,omitempty"`
Method string `json:"method,omitempty"`
Inputs []HTMLFormInput `json:"inputs,omitempty"`
}
HTMLFormOutline describes a form element in an HTML document.
type HTMLMetaTag ¶
type HTMLMetaTag struct {
Name string `json:"name,omitempty"`
Content string `json:"content,omitempty"`
}
HTMLMetaTag describes a meta element.
type Result ¶
type Result struct {
ContentCategory string `json:"content_category"` // json, yaml, xml, csv, html, form
// JSON/YAML fields
Schema *jsonschema.Schema `json:"schema,omitempty"`
FieldStats []js.FieldStat `json:"field_stats,omitempty"`
SampleCount int `json:"sample_count,omitempty"`
AllMatch bool `json:"all_match,omitempty"`
// XML fields
XMLHierarchy *XMLElementHierarchy `json:"xml_hierarchy,omitempty"`
// CSV fields
CSVColumns *CSVColumnStats `json:"csv_columns,omitempty"`
// HTML fields
HTMLOutline *HTMLDOMOutline `json:"html_outline,omitempty"`
// Form fields
FormKeys []FormKeyStat `json:"form_keys,omitempty"`
// Skip info (for binary or unsupported types)
Skipped bool `json:"skipped,omitempty"`
SkipReason string `json:"skip_reason,omitempty"`
}
Result is a union envelope for shape analysis output. The ContentCategory field indicates which shape engine was used, and the corresponding format-specific field is populated.
type XMLElement ¶
type XMLElement struct {
Name string `json:"name"`
Attributes []string `json:"attributes,omitempty"`
Children []*XMLElement `json:"children,omitempty"`
ChildCount int `json:"child_count"`
Repeated bool `json:"repeated,omitempty"` // Appears multiple times as sibling
}
XMLElement represents a single element in the XML hierarchy.
type XMLElementHierarchy ¶
type XMLElementHierarchy struct {
Root *XMLElement `json:"root"`
MaxDepth int `json:"max_depth"`
Truncated bool `json:"truncated,omitempty"`
SampleCount int `json:"sample_count"`
}
XMLElementHierarchy represents the structural outline of an XML document.
func ExtractXMLHierarchy ¶
func ExtractXMLHierarchy(body []byte) (*XMLElementHierarchy, error)
ExtractXMLHierarchy parses an XML body and returns a structural outline of the element tree with tag names, attributes, child counts, and repeated element flags. Limits depth to prevent excessive output.