Documentation
¶
Overview ¶
Package byline implements Reader interface for processing io.Reader line-by-line. You can add UNIX text processing principles to its Reader (like with awk, grep, sed ...).
Install
go get -u github.com/msoap/byline
Usage
import "github.com/msoap/byline"
// Create new line-by-line Reader from io.Reader:
lr := byline.NewReader(reader)
// Add to the Reader stack of a filter functions:
lr.MapString(func(line string) string {return "prefix_" + line}).GrepByRegexp(regexp.MustCompile("only this"))
// Read all content
result, err := lr.ReadAll()
// Use everywhere instead of io.Reader
_, err := io.Copy(os.Stdout, lr)
// Or in one place
result, err := byline.NewReader(reader).MapString(func(line string) string {return "prefix_" + line}).ReadAll()
Example ¶
package main
import (
"bytes"
"fmt"
"io"
"regexp"
"strings"
"github.com/msoap/byline"
)
func main() {
reader := strings.NewReader(`CSV Title
CSV description
ID,NAME,PRICE
A001,name one,12.3
A002,second row;7.1
A003,three row;15.51
Total: ....
Some text
`)
lr := byline.NewReader(reader).
GrepString(func(line string) bool {
// skip empty lines
return line != "" && line != "\n"
}).
Grep(func(line []byte) bool {
return !bytes.HasPrefix(line, []byte("CSV"))
}).
SetFS(regexp.MustCompile(`[,;]`)).
AWKMode(func(line string, fields []string, vars byline.AWKVars) (string, error) {
// skip header
if strings.HasPrefix(fields[0], "ID") {
return "", byline.ErrOmitLine
}
// skip footer
if strings.HasPrefix(fields[0], "Total:") {
return "", io.EOF
}
return line, nil
}).
MapString(func(line string) string {
return "Z" + line
}).
AWKMode(func(line string, fields []string, vars byline.AWKVars) (string, error) {
if vars.NF < 3 {
return "", fmt.Errorf("csv parse failed for %q", line)
}
return fmt.Sprintf("%s - %s (line:%d)", fields[0], fields[1], vars.NR), nil
})
result, err := lr.ReadAllString()
fmt.Print("\n", result, err)
}
Output: ZA001 - name one (line:4) ZA002 - second row (line:6) ZA003 - three row (line:7) <nil>
Index ¶
- Variables
- type AWKVars
- type Reader
- func (lr *Reader) AWKMode(filterFn func(line string, fields []string, vars AWKVars) (string, error)) *Reader
- func (lr *Reader) Discard() error
- func (lr *Reader) Each(filterFn func([]byte)) *Reader
- func (lr *Reader) EachString(filterFn func(string)) *Reader
- func (lr *Reader) Grep(filterFn func([]byte) bool) *Reader
- func (lr *Reader) GrepByRegexp(re *regexp.Regexp) *Reader
- func (lr *Reader) GrepString(filterFn func(string) bool) *Reader
- func (lr *Reader) Map(filterFn func([]byte) []byte) *Reader
- func (lr *Reader) MapErr(filterFn func([]byte) ([]byte, error)) *Reader
- func (lr *Reader) MapString(filterFn func(string) string) *Reader
- func (lr *Reader) MapStringErr(filterFn func(string) (string, error)) *Reader
- func (lr *Reader) Read(p []byte) (n int, err error)
- func (lr *Reader) ReadAll() ([]byte, error)
- func (lr *Reader) ReadAllSlice() ([][]byte, error)
- func (lr *Reader) ReadAllSliceString() ([]string, error)
- func (lr *Reader) ReadAllString() (string, error)
- func (lr *Reader) SetFS(fs *regexp.Regexp) *Reader
- func (lr *Reader) SetRS(rs byte) *Reader
Examples ¶
Constants ¶
This section is empty.
Variables ¶
var ( // ErrOmitLine - error for Map*Err/AWKMode, for omitting current line ErrOmitLine = errors.New("ErrOmitLine") // ErrNilReader - error for provided reader being nil ErrNilReader = errors.New("nil reader") )
Functions ¶
This section is empty.
Types ¶
type AWKVars ¶
type AWKVars struct {
NR int // number of the current line (begin from 1)
NF int // number of fields in the current line
RS byte // record separator, default is '\n'
FS *regexp.Regexp // field separator, default is `\s+`
}
AWKVars - settings for AWK mode, see man awk
type Reader ¶
type Reader struct {
// contains filtered or unexported fields
}
Reader - line by line Reader
func (*Reader) AWKMode ¶
func (lr *Reader) AWKMode(filterFn func(line string, fields []string, vars AWKVars) (string, error)) *Reader
AWKMode - process lines with AWK like mode
Example ¶
package main
import (
"fmt"
"io"
"regexp"
"strconv"
"strings"
"github.com/msoap/byline"
)
func main() {
reader := strings.NewReader(`ID,NAME,PRICE
A001,name one,12.3
A002,second row;7.1
A003,three row;15.51
Total: ....
Some text
`)
sum := 0.0
lr := byline.NewReader(reader).
SetFS(regexp.MustCompile(`[,;]`)).
AWKMode(func(line string, fields []string, vars byline.AWKVars) (string, error) {
if vars.NR == 1 {
// skip first line
return "", byline.ErrOmitLine
}
if vars.NF > 0 && strings.HasPrefix(fields[0], "Total:") {
// skip rest of file
return "", io.EOF
}
if vars.NF < 3 {
return "", fmt.Errorf("csv parse failed for %q", line)
}
if price, err := strconv.ParseFloat(fields[2], 10); err != nil {
return "", err
} else if price < 10 {
return "", byline.ErrOmitLine
} else {
sum += price
}
return fmt.Sprintf("line:%d. %s - %s", vars.NR, fields[0], fields[1]), nil
})
result, err := lr.ReadAllString()
if err != nil {
fmt.Println(err)
return
}
fmt.Print(result)
fmt.Printf("Sum: %.2f", sum)
}
Output: line:2. A001 - name one line:4. A003 - three row Sum: 27.81
func (*Reader) Discard ¶
Discard - read all content from Reader for side effect from filter functions
func (*Reader) Each ¶
Each - processing each line. Do not save the value of the byte slice, since it can change in the next filter-steps.
Example ¶
package main
import (
"fmt"
"strings"
"github.com/msoap/byline"
)
func main() {
reader := strings.NewReader(`1 1 1
2 2 2
3 3 3
`)
spacesCount, bytesCount, linesCount := 0, 0, 0
err := byline.NewReader(reader).
Each(func(line []byte) {
linesCount++
bytesCount += len(line)
for _, b := range line {
if b == ' ' {
spacesCount++
}
}
}).Discard()
if err == nil {
fmt.Printf("spaces: %d, bytes: %d, lines: %d\n", spacesCount, bytesCount, linesCount)
}
}
Output: spaces: 6, bytes: 18, lines: 3
func (*Reader) EachString ¶
EachString - processing each line as string
Example ¶
package main
import (
"fmt"
"strings"
"github.com/msoap/byline"
)
func main() {
reader := strings.NewReader(`111
222
333
`)
result := []string{}
err := byline.NewReader(reader).
EachString(func(line string) {
result = append(result, line)
}).Discard()
if err == nil {
fmt.Printf("%q\n", result)
}
}
Output: ["111\n" "222\n" "333\n"]
func (*Reader) Grep ¶
Grep - grep lines by func
Example ¶
package main
import (
"fmt"
"os"
"regexp"
"github.com/msoap/byline"
)
type StateMachine struct {
beginRe *regexp.Regexp
endRe *regexp.Regexp
inBlock bool
}
func (sm *StateMachine) SMFilter(line []byte) bool {
switch {
case sm.beginRe.Match(line):
sm.inBlock = true
return true
case sm.inBlock && sm.endRe.Match(line):
sm.inBlock = false
return true
default:
return sm.inBlock
}
}
func main() {
file, err := os.Open("byline.go")
if err != nil {
fmt.Println(err)
return
}
// get all lines between "^type..." and "^}"
sm := StateMachine{
beginRe: regexp.MustCompile(`^type `),
endRe: regexp.MustCompile(`^}\s+$`),
}
lr := byline.NewReader(file).Grep(sm.SMFilter).Map(func(line []byte) []byte {
// and remove comments
return regexp.MustCompile(`\s+//.+`).ReplaceAll(line, []byte{})
})
result, err := lr.ReadAllString()
if err != nil {
fmt.Println(err)
return
}
fmt.Print("\n" + result)
}
Output: type Reader struct { scanner *bufio.Scanner buffer bytes.Buffer existsData bool filterFuncs []func(line []byte) ([]byte, error) awkVars AWKVars } type AWKVars struct { NR int NF int RS byte FS *regexp.Regexp }
func (*Reader) GrepByRegexp ¶
GrepByRegexp - grep lines by regexp
Example ¶
package main
import (
"fmt"
"regexp"
"strings"
"github.com/msoap/byline"
)
func main() {
reader := strings.NewReader(`ID,NAME,PRICE
A001,name one,12.3
A002,second row;7.1
A003,three row;15.51
Total: ....
Some text
`)
result, err := byline.NewReader(reader).GrepByRegexp(regexp.MustCompile(`^A\d+,`)).ReadAllString()
fmt.Print("\n"+result, err)
}
Output: A001,name one,12.3 A002,second row;7.1 A003,three row;15.51 <nil>
func (*Reader) GrepString ¶
GrepString - grep lines as string by func
func (*Reader) MapErr ¶
MapErr - set filter function for process each line, returns error if needed (io.EOF for example)
func (*Reader) MapStringErr ¶
MapStringErr - set filter function for process each line as string, returns error if needed (io.EOF for example)
Example ¶
package main
import (
"fmt"
"io"
"strings"
"github.com/msoap/byline"
)
func main() {
reader := strings.NewReader(`
100000
200000
300000
end ...
Some text
`)
result, err := byline.NewReader(reader).
MapStringErr(func(line string) (string, error) {
switch {
case line == "" || line == "\n":
return "", byline.ErrOmitLine
case strings.HasPrefix(line, "end "):
return "", io.EOF
default:
return "<" + line, nil
}
}).
ReadAllString()
fmt.Print("\n"+result, err)
}
Output: <100000 <200000 <300000 <nil>
func (*Reader) ReadAllSlice ¶
ReadAllSlice - read all content from Reader by lines to slice of []byte
func (*Reader) ReadAllSliceString ¶
ReadAllSliceString - read all content from Reader to string slice by lines
func (*Reader) ReadAllString ¶
ReadAllString - read all content from Reader to one string