normalizer

package

v1.1.3 Latest Latest Go to latest Published: Jul 31, 2020 License: AGPL-3.0 Imports: 13 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/edgewhere/rooot-node-lib

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
func Normalize(data string, with interface{}, params ...string) (string, error)
func Uniformize(input string) string
type Dictionary
- func (d Dictionary) TranslateText(input string) string
- func (d Dictionary) TranslateWord(input string) string
type SimpleNormalizer
type VariadicNormalizer

Constants ¶

View Source

const (
	// FRENCH_FORMAT ...
	FRENCH_FORMAT = "DD/MM/YYYY"

	// ISO_FORMAT ...
	ISO_FORMAT = "YYYYMMDD"

	// TIMESTAMP ...
	TIMESTAMP = "timestamp"

	// TIMESTAMP_MILLIS ...
	TIMESTAMP_MILLIS = "timestamp_millis"
)

View Source

const (
	// CODE_F for female
	CODE_F = "2"

	// CODE_M for male
	CODE_M = "1"

	// CODE_U for unknown or undefined
	CODE_U = "0"
)

Variables ¶

This section is empty.

Functions ¶

func Normalize ¶

func Normalize(data string, with interface{}, params ...string) (string, error)

Normalize is the main function for the adaptation of the normalizing process developed for the Empreinte Sociometrique™ by Edgewhere: it takes the data to normalize and the normalizing function to use as well as some optional parameters, and returns the normalized string and an error if any.

func Uniformize ¶

func Uniformize(input string) string

Uniformize applies the basic normalizing process: trim, capitalize, ...

Types ¶

type Dictionary ¶

type Dictionary map[string]string

Dictionary ...

func (Dictionary) TranslateText ¶

func (d Dictionary) TranslateText(input string) string

TranslateText returns the passed string ith all its words passed through the TranslateWord function. The input should have been uniformized beforehand.

func (Dictionary) TranslateWord ¶

func (d Dictionary) TranslateWord(input string) string

TranslateWord returns the translated word if found in the dictionary, the original string otherwise. The input should have been uniformized beforehand.

type SimpleNormalizer ¶

type SimpleNormalizer func(string) (string, error)

SimpleNormalizer ...

var AddressLine SimpleNormalizer = func(input string) (string, error) {
	if len(input) == 0 {
		return "", errors.New("invalid empty string")
	}
	found := addressDictionary.TranslateText(Uniformize(input))
	if found == "" {
		return "", errors.New("unable to build a normalized string")
	}
	return found, nil
}

AddressLine returns a normalized address line (for line 2 to 6 in French postal convention but not only) TODO Become international in the address.dico file

var Any SimpleNormalizer = func(input string) (string, error) {
	uniformized := Uniformize(input)
	if uniformized == "" && input != "" {
		return "", errors.New("unable to normalize input string")
	}
	return uniformized, nil
}

Any is the normalizing function to use as normalizer argument for any kind of data when no specific normalizer exists

var City SimpleNormalizer = func(input string) (string, error) {
	if len(input) == 0 {
		return "", errors.New("invalid empty string")
	}
	found := addressDictionary.TranslateText(Uniformize(input))
	if found == "" {
		return "", errors.New("unable to build a normalized string")
	}
	spaces := regexp.MustCompile(`\s+`)
	found = spaces.ReplaceAllString(reCity.ReplaceAllString(found, ""), " ")
	return strings.TrimSpace(found), nil
}

City returns a normalized city name

var CodePostalFrance SimpleNormalizer = func(input string) (string, error) {
	uniformized := Uniformize(input)
	if !reCPF.MatchString(uniformized) {
		return "", errors.New("invalid code postal")
	}
	if reCorse.MatchString(uniformized) {
		uniformized = strings.NewReplacer("A", "0", "B", "0").Replace(uniformized)
	}
	return uniformized, nil
}

CodePostalFrance returns a normalized French zip code

var DepartementFrance SimpleNormalizer = func(input string) (string, error) {
	uniformized := Uniformize(input)
	matches := reDF.FindAllStringSubmatch(uniformized, 2)
	if len(matches) == 0 || len(matches[0]) != 3 {
		return "", errors.New("not a valid departement")
	}
	dpt := matches[0][1]
	if dpt == "20" {
		if cp, e := CodePostalFrance(input); e == nil {
			cpInt, _ := strconv.ParseInt(cp, 10, 64)
			if cpInt > 19999 && cpInt < 20200 {
				dpt = "2A"
			} else if cpInt > 20199 && cpInt < 20621 {
				dpt = "2B"
			}
		}
	}
	return dpt, nil
}

DepartementFrance returns the two-letter code of a French "département"

var Email SimpleNormalizer = func(input string) (string, error) {

	processed := strings.ToLower(strings.TrimSpace(input))
	if len(processed) > 255 || !reEmail.MatchString(processed) {
		return "", errors.New("invalid email")
	}
	return processed, nil
}

Email returns a normalized e-mail

var FirstName SimpleNormalizer = func(input string) (string, error) {
	if len(input) == 0 {
		return "", errors.New("invalid empty string")
	}
	found := firstNameDictionary.TranslateWord(Uniformize(input))
	if found == "" {
		return "", errors.New("unable to build a normalized string")
	}
	return found, nil
}

FirstName returns a normalized first name

var Mobile SimpleNormalizer = func(input string) (string, error) {
	matches := reMob.FindStringSubmatch(strings.ReplaceAll(Uniformize(input), " ", ""))
	if len(matches) < 1 {
		return "", errors.New("invalid mobile string")
	}
	var parts = make([]string, 5)
	var p1, p2, p3, p4, p5 bool
	mob := ""
	for i, v := range matches {
		if i == 4 {
			international := "+"
			if len(v) > 0 {
				international += v
			} else {
				international += "33"
			}
			parts[0] = international
			p1 = true
		}
		if i == 5 {
			prefix := "("
			if len(v) > 0 {
				prefix += v
			} else {
				prefix += "0"
			}
			prefix += ")"
			parts[1] = prefix
			p2 = true
		}
		if i == 6 {
			if v == "6" || v == "7" {
				mob = v
			} else {
				return "", errors.New("not a mobile phone")
			}
		}
		if i == 7 && mob != "" && v != "" {
			parts[2] = mob + v
			p3 = true
		}
		if i == 8 && v != "" {
			parts[3] = v
			p4 = true
		}
		if i == 9 && v != "" {
			parts[4] = v
			p5 = true
		}
	}
	if !p1 || !p2 || !p3 || !p4 || !p5 {
		return "", errors.New("unable to build normalized mobile")
	}
	return strings.Join(parts, " "), nil
}

Mobile returns a normalized mobile, or an empty string and an error if failed. WARNING: The current implementation is specific to French mobile phones TODO Become international

var PhoneNumber SimpleNormalizer = func(input string) (string, error) {
	matches := reTel.FindStringSubmatch(strings.ReplaceAll(Uniformize(input), " ", ""))
	if len(matches) < 1 {
		return "", errors.New("invalid phone number")
	}
	var parts = make([]string, 5)
	var p1, p2, p3, p4, p5 bool
	for i, v := range matches {
		switch i {
		case 4:
			{
				international := "+"
				if len(v) > 0 {
					international += v
				} else {
					international += "33"
				}
				parts[0] = international
				p1 = true
			}
		case 9:
			{
				prefix := "("
				if len(v) > 0 {
					prefix += v
				} else {
					prefix += "0"
				}
				prefix += ")"
				parts[1] = prefix
				p2 = true
			}
		case 11:
			if v != "" {
				parts[2] = v
				p3 = true
			}
		case 12:
			if v != "" {
				parts[3] = v
				p4 = true
			}
		case 13:
			if v != "" {
				parts[4] = v
				p5 = true
			}
		default:

		}
	}
	if !p1 || !p2 || !p3 || !p4 || !p5 {
		return "", errors.New("unable to build normalized phone number")
	}
	return strings.Join(parts, " "), nil
}

PhoneNumber returns a normalized landline phone number, or an empty string and an error if failed. WARNING: The current implementation is specific to French mobile phones TODO Become international

var StreetNumber SimpleNormalizer = func(input string) (string, error) {
	matches := reSN.FindAllStringSubmatch(strings.TrimSpace(input), 2)
	if len(matches) == 0 || len(matches[0]) != 3 {
		return "", errors.New("probably not a street number")
	}
	num := matches[0][1]
	compUni := ""
	if matches[0][2] != "" {
		comp := Uniformize(matches[0][2])
		if comp != "" {
			compUni = addressDictionary.TranslateText(comp)
		}
	}
	return num + compUni, nil
}

StreetNumber returns a sanitized street number

var Title SimpleNormalizer = func(input string) (string, error) {
	if len(input) == 0 {
		return "", errors.New("invalid empty string")
	}
	uniformized := Uniformize(input)
	if uniformized == "M" || uniformized == "H" {
		return CODE_M, nil
	}
	if uniformized == "F" {
		return CODE_F, nil
	}
	if uniformized == "U" {
		return CODE_U, nil
	}
	found := reCiv.FindAllString(titleDictionary.TranslateText(uniformized), 2)
	if len(found) != 1 {
		if uniformized == "" {
			return "", errors.New("unable to build a normalized string")
		}
		return CODE_U, nil
	}
	return found[0], nil
}

Title returns a code string: `0` for undefined, `1` for male, `2` for female

type VariadicNormalizer ¶

type VariadicNormalizer func(string, ...string) (string, error)

VariadicNormalizer ...

var DateOfBirth VariadicNormalizer = func(input string, params ...string) (string, error) {
	input = strings.TrimSpace(input)
	inputFormat := ISO_FORMAT
	if len(params) > 0 {
		switch strings.ToLower(params[0]) {
		case TIMESTAMP:
			inputFormat = TIMESTAMP
		case TIMESTAMP_MILLIS:
			inputFormat = TIMESTAMP_MILLIS
		default:
			inputFormat = strings.ToUpper(params[0])
		}
	}
	outputFormat := FRENCH_FORMAT
	if len(params) > 1 {
		outputFormat = strings.ToUpper(params[1])
	}
	var d time.Time
	if inputFormat == TIMESTAMP {
		i, e := strconv.ParseInt(input, 10, 64)
		if e != nil {
			return "", e
		}
		d = time.Unix(i, 0)
	} else if inputFormat == TIMESTAMP_MILLIS {
		millis := len(input) - 3
		if millis < 0 {
			return "", errors.New("invalid timestamp in milliseconds")
		}
		nanos, e := strconv.ParseInt(input[millis:], 10, 64)
		if e != nil {
			return "", e
		}
		secs, e := strconv.ParseInt(input[:millis], 10, 64)
		if e != nil {
			return "", e
		}
		d = time.Unix(secs, nanos)
	} else {
		parsed, e := fmtdate.Parse(inputFormat, input)
		if e != nil {
			return "", e
		}
		d = parsed
	}
	return fmtdate.Format(outputFormat, d), nil
}

DateOfBirth returns a normalized date using the `params` arguments, the latter being a list of optional arguments to use to format the output appropriately: - the first item is the string format of the input string (defaut to ISO format: `YYYYMMDD`); - the second item is the string format for the output (default to French date: `DD/MM/YYYY`). The input format could be a `timestamp` or a `timestamp_millis`.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL