gontenttype

package module
v0.3.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 27, 2025 License: Apache-2.0 Imports: 5 Imported by: 0

README

gontenttype

  1. Detect the content type of a given string

    1. JSON (application/json)
    2. JSON Lines (application/jsonl)
    3. XML (application/xml)
    4. CSV (text/csv)
    5. Parquet (application/vnd.apache.parquet)
  2. Validate syntax for a given string and supported content types

Detect content type

Example
Usage
package main

import (
	"fmt"
    
	"github.com/costinmrr/gontenttype"
)

func main() { 
	// json
	myStr := `{"foo":"bar"}`
	contentType := gontenttype.Detect(myStr)
	fmt.Println(contentType) // application/json
	
	// jsonl
    myStr = `{"foo":"bar"}\n{"foo":"baz"}`
	contentType = gontenttype.Detect(myStr)
	fmt.Println(contentType) // application/jsonl

	// xml
	myStr = `<foo>bar</foo>`
	contentType = gontenttype.Detect(myStr)
	fmt.Println(contentType) // application/xml

	// csv
	myStr = `foo,bar`
	contentType = gontenttype.Detect(myStr)
	fmt.Println(contentType) // text/csv
	
	// parquet
	myStr = `PAR1...[parquet content]...PAR1` // use a valid parquet content
    contentType = gontenttype.Detect(myStr)
	fmt.Println(contentType) // application/vnd.apache.parquet
}
Output
application/json
application/jsonl
application/xml
text/csv
application/vnd.apache.parquet

Validate syntax

Example
package main

import (
	"fmt"
	
	"github.com/costinmrr/gontenttype/types/json"
	"github.com/costinmrr/gontenttype/types/jsonlines"
	"github.com/costinmrr/gontenttype/types/xml"
	"github.com/costinmrr/gontenttype/types/csv"
	"github.com/costinmrr/gontenttype/types/parquet"
)

func main() {
	// json
	myStr := `{"foo":"bar"}`
	err := json.IsJSON(myStr)
	fmt.Println(err) // <nil>

	myStr = `{"foo":"bar"`
	err = json.IsJSON(myStr)
	fmt.Println(err) // unexpected end of JSON input
	
	// jsonl
	myStr = "{\"foo\":\"bar\"}\n{\"foo\":\"baz\"}"
	err = jsonlines.IsJSONLines(myStr)
	fmt.Println(err) // <nil>
	
	myStr = "{\"foo\":\"bar\"}\n{\"foo\":\"baz\""
	err = jsonlines.IsJSONLines(myStr)
	fmt.Println(err) // error on line 2: unexpected end of JSON input

	// xml
	myStr = `<foo>bar</foo>`
	err = xml.IsXML(myStr)
	fmt.Println(err) // <nil>

	myStr = `<foo>bar</foo`
	err = xml.IsXML(myStr)
	fmt.Println(err) // XML syntax error on line 1: unexpected EOF

	// csv
	myStr = `foo,bar`
	err = csv.IsCSV(myStr)
	fmt.Println(err) // <nil>

	myStr = "col1,col2\nfoo,bar,baz"
	err = csv.IsCSV(myStr)
	fmt.Println(err) // record on line 2: wrong number of fields
	
	// parquet
	myStr = `PAR1...[parquet content]...PAR1` // use a valid parquet content
    err = parquet.IsParquet(myStr)
	fmt.Println(err) // <nil>
	
	myStr = `PAR1invalidPAR1`
	err = parquet.IsParquet(myStr)
	fmt.Println(err) // invalid parquet file: reading footer of parquet file: strings.Reader.ReadAt: negative offset
}
Output
<nil>
unexpected end of JSON input
<nil>
error on line 2: unexpected end of JSON input
<nil>
XML syntax error on line 1: unexpected EOF
<nil>
record on line 2: wrong number of fields
<nil>
invalid parquet file: reading footer of parquet file: strings.Reader.ReadAt: negative offset

Benchmarks

go test -bench=.
Output
goos: darwin
goarch: arm64
pkg: github.com/costinmrr/gontenttype
cpu: Apple M1 Pro
BenchmarkDetectJSON_SimpleString-8               3261396               362.7 ns/op
BenchmarkDetectJSON_1KB-8                         125338              9585 ns/op
BenchmarkDetectJSON_100KB-8                         1310            909954 ns/op
BenchmarkDetectJSON_1MB-8                            153           7776861 ns/op
BenchmarkDetectJSON_10MB-8                            14          77368839 ns/op
BenchmarkDetectJSONLines_SimpleString-8           764656              1426 ns/op
BenchmarkDetectJSONLines_1KB-8                  12037003                99.02 ns/op
BenchmarkDetectJSONLines_100KB-8                12053284                98.60 ns/op
BenchmarkDetectJSONLines_1MB-8                  12237555                98.79 ns/op
BenchmarkDetectJSONLines_10MB-8                 12056943                98.68 ns/op
BenchmarkDetectXML_SimpleString-8                1545378               775.3 ns/op
BenchmarkDetectXML_1KB-8                           45462             26035 ns/op
BenchmarkDetectXML_100KB-8                           511           2334531 ns/op
BenchmarkDetectXML_1MB-8                              48          24450518 ns/op
BenchmarkDetectXML_10MB-8                              5         248890150 ns/op
BenchmarkDetectCSV_SimpleString-8                 679507              1603 ns/op
BenchmarkDetectCSV_1KB-8                           56437             20994 ns/op
BenchmarkDetectCSV_100KB-8                           648           1887705 ns/op
BenchmarkDetectCSV_1MB-8                              58          20405615 ns/op
BenchmarkDetectCSV_10MB-8                              6         194822194 ns/op
BenchmarkDetectUnsupported_SimpleString-8         748053              1501 ns/op
goos: linux
goarch: amd64
pkg: github.com/costinmrr/gontenttype
cpu: AMD EPYC 7763 64-Core Processor                
BenchmarkDetectJSON_SimpleString-4          	 2135098	       580.3 ns/op
BenchmarkDetectJSON_1KB-4                   	   89289	     13288 ns/op
BenchmarkDetectJSON_100KB-4                 	     901	   1303257 ns/op
BenchmarkDetectJSON_1MB-4                   	     100	  10029120 ns/op
BenchmarkDetectJSON_10MB-4                  	       9	 111325401 ns/op
BenchmarkDetectJSONLines_SimpleString-4     	  503854	      2244 ns/op
BenchmarkDetectJSONLines_1KB-4              	 9440169	       127.4 ns/op
BenchmarkDetectJSONLines_100KB-4            	 9543016	       129.8 ns/op
BenchmarkDetectJSONLines_1MB-4              	 9292095	       126.8 ns/op
BenchmarkDetectJSONLines_10MB-4             	 9430472	       127.0 ns/op
BenchmarkDetectXML_SimpleString-4           	  465271	      2395 ns/op
BenchmarkDetectXML_1KB-4                    	   30892	     38756 ns/op
BenchmarkDetectXML_100KB-4                  	     348	   3417666 ns/op
BenchmarkDetectXML_1MB-4                    	      31	  36768652 ns/op
BenchmarkDetectXML_10MB-4                   	       3	 343983510 ns/op
BenchmarkDetectCSV_SimpleString-4           	  363416	      3219 ns/op
BenchmarkDetectCSV_1KB-4                    	   42157	     29058 ns/op
BenchmarkDetectCSV_100KB-4                  	     447	   2558199 ns/op
BenchmarkDetectCSV_1MB-4                    	      30	  34082715 ns/op
BenchmarkDetectCSV_10MB-4                   	       4	 302733827 ns/op
BenchmarkDetectUnsupported_SimpleString-4   	  331250	      3157 ns/op

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type ContentType

type ContentType string
const (
	Unsupported ContentType = ""
	JSON        ContentType = "application/json"
	JSONLines   ContentType = "application/jsonl"
	XML         ContentType = "application/xml"
	CSV         ContentType = "text/csv"
	Parquet     ContentType = "application/vnd.apache.parquet"
)

func Detect added in v0.1.1

func Detect(content string) ContentType

Directories

Path Synopsis
types
csv
xml

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL