normalizer

package
v1.1.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 31, 2020 License: AGPL-3.0 Imports: 13 Imported by: 0

Documentation

Index

Constants

View Source
const (
	// FRENCH_FORMAT ...
	FRENCH_FORMAT = "DD/MM/YYYY"

	// ISO_FORMAT ...
	ISO_FORMAT = "YYYYMMDD"

	// TIMESTAMP ...
	TIMESTAMP = "timestamp"

	// TIMESTAMP_MILLIS ...
	TIMESTAMP_MILLIS = "timestamp_millis"
)
View Source
const (
	// CODE_F for female
	CODE_F = "2"

	// CODE_M for male
	CODE_M = "1"

	// CODE_U for unknown or undefined
	CODE_U = "0"
)

Variables

This section is empty.

Functions

func Normalize

func Normalize(data string, with interface{}, params ...string) (string, error)

Normalize is the main function for the adaptation of the normalizing process developed for the Empreinte Sociometrique™ by Edgewhere: it takes the data to normalize and the normalizing function to use as well as some optional parameters, and returns the normalized string and an error if any.

func Uniformize

func Uniformize(input string) string

Uniformize applies the basic normalizing process: trim, capitalize, ...

Types

type Dictionary

type Dictionary map[string]string

Dictionary ...

func (Dictionary) TranslateText

func (d Dictionary) TranslateText(input string) string

TranslateText returns the passed string ith all its words passed through the TranslateWord function. The input should have been uniformized beforehand.

func (Dictionary) TranslateWord

func (d Dictionary) TranslateWord(input string) string

TranslateWord returns the translated word if found in the dictionary, the original string otherwise. The input should have been uniformized beforehand.

type SimpleNormalizer

type SimpleNormalizer func(string) (string, error)

SimpleNormalizer ...

var AddressLine SimpleNormalizer = func(input string) (string, error) {
	if len(input) == 0 {
		return "", errors.New("invalid empty string")
	}
	found := addressDictionary.TranslateText(Uniformize(input))
	if found == "" {
		return "", errors.New("unable to build a normalized string")
	}
	return found, nil
}

AddressLine returns a normalized address line (for line 2 to 6 in French postal convention but not only) TODO Become international in the address.dico file

var Any SimpleNormalizer = func(input string) (string, error) {
	uniformized := Uniformize(input)
	if uniformized == "" && input != "" {
		return "", errors.New("unable to normalize input string")
	}
	return uniformized, nil
}

Any is the normalizing function to use as normalizer argument for any kind of data when no specific normalizer exists

var City SimpleNormalizer = func(input string) (string, error) {
	if len(input) == 0 {
		return "", errors.New("invalid empty string")
	}
	found := addressDictionary.TranslateText(Uniformize(input))
	if found == "" {
		return "", errors.New("unable to build a normalized string")
	}
	spaces := regexp.MustCompile(`\s+`)
	found = spaces.ReplaceAllString(reCity.ReplaceAllString(found, ""), " ")
	return strings.TrimSpace(found), nil
}

City returns a normalized city name

var CodePostalFrance SimpleNormalizer = func(input string) (string, error) {
	uniformized := Uniformize(input)
	if !reCPF.MatchString(uniformized) {
		return "", errors.New("invalid code postal")
	}
	if reCorse.MatchString(uniformized) {
		uniformized = strings.NewReplacer("A", "0", "B", "0").Replace(uniformized)
	}
	return uniformized, nil
}

CodePostalFrance returns a normalized French zip code

var DepartementFrance SimpleNormalizer = func(input string) (string, error) {
	uniformized := Uniformize(input)
	matches := reDF.FindAllStringSubmatch(uniformized, 2)
	if len(matches) == 0 || len(matches[0]) != 3 {
		return "", errors.New("not a valid departement")
	}
	dpt := matches[0][1]
	if dpt == "20" {
		if cp, e := CodePostalFrance(input); e == nil {
			cpInt, _ := strconv.ParseInt(cp, 10, 64)
			if cpInt > 19999 && cpInt < 20200 {
				dpt = "2A"
			} else if cpInt > 20199 && cpInt < 20621 {
				dpt = "2B"
			}
		}
	}
	return dpt, nil
}

DepartementFrance returns the two-letter code of a French "département"

var Email SimpleNormalizer = func(input string) (string, error) {

	processed := strings.ToLower(strings.TrimSpace(input))
	if len(processed) > 255 || !reEmail.MatchString(processed) {
		return "", errors.New("invalid email")
	}
	return processed, nil
}

Email returns a normalized e-mail

var FirstName SimpleNormalizer = func(input string) (string, error) {
	if len(input) == 0 {
		return "", errors.New("invalid empty string")
	}
	found := firstNameDictionary.TranslateWord(Uniformize(input))
	if found == "" {
		return "", errors.New("unable to build a normalized string")
	}
	return found, nil
}

FirstName returns a normalized first name

var Mobile SimpleNormalizer = func(input string) (string, error) {
	matches := reMob.FindStringSubmatch(strings.ReplaceAll(Uniformize(input), " ", ""))
	if len(matches) < 1 {
		return "", errors.New("invalid mobile string")
	}
	var parts = make([]string, 5)
	var p1, p2, p3, p4, p5 bool
	mob := ""
	for i, v := range matches {
		if i == 4 {
			international := "+"
			if len(v) > 0 {
				international += v
			} else {
				international += "33"
			}
			parts[0] = international
			p1 = true
		}
		if i == 5 {
			prefix := "("
			if len(v) > 0 {
				prefix += v
			} else {
				prefix += "0"
			}
			prefix += ")"
			parts[1] = prefix
			p2 = true
		}
		if i == 6 {
			if v == "6" || v == "7" {
				mob = v
			} else {
				return "", errors.New("not a mobile phone")
			}
		}
		if i == 7 && mob != "" && v != "" {
			parts[2] = mob + v
			p3 = true
		}
		if i == 8 && v != "" {
			parts[3] = v
			p4 = true
		}
		if i == 9 && v != "" {
			parts[4] = v
			p5 = true
		}
	}
	if !p1 || !p2 || !p3 || !p4 || !p5 {
		return "", errors.New("unable to build normalized mobile")
	}
	return strings.Join(parts, " "), nil
}

Mobile returns a normalized mobile, or an empty string and an error if failed. WARNING: The current implementation is specific to French mobile phones TODO Become international

var PhoneNumber SimpleNormalizer = func(input string) (string, error) {
	matches := reTel.FindStringSubmatch(strings.ReplaceAll(Uniformize(input), " ", ""))
	if len(matches) < 1 {
		return "", errors.New("invalid phone number")
	}
	var parts = make([]string, 5)
	var p1, p2, p3, p4, p5 bool
	for i, v := range matches {
		switch i {
		case 4:
			{
				international := "+"
				if len(v) > 0 {
					international += v
				} else {
					international += "33"
				}
				parts[0] = international
				p1 = true
			}
		case 9:
			{
				prefix := "("
				if len(v) > 0 {
					prefix += v
				} else {
					prefix += "0"
				}
				prefix += ")"
				parts[1] = prefix
				p2 = true
			}
		case 11:
			if v != "" {
				parts[2] = v
				p3 = true
			}
		case 12:
			if v != "" {
				parts[3] = v
				p4 = true
			}
		case 13:
			if v != "" {
				parts[4] = v
				p5 = true
			}
		default:

		}
	}
	if !p1 || !p2 || !p3 || !p4 || !p5 {
		return "", errors.New("unable to build normalized phone number")
	}
	return strings.Join(parts, " "), nil
}

PhoneNumber returns a normalized landline phone number, or an empty string and an error if failed. WARNING: The current implementation is specific to French mobile phones TODO Become international

var StreetNumber SimpleNormalizer = func(input string) (string, error) {
	matches := reSN.FindAllStringSubmatch(strings.TrimSpace(input), 2)
	if len(matches) == 0 || len(matches[0]) != 3 {
		return "", errors.New("probably not a street number")
	}
	num := matches[0][1]
	compUni := ""
	if matches[0][2] != "" {
		comp := Uniformize(matches[0][2])
		if comp != "" {
			compUni = addressDictionary.TranslateText(comp)
		}
	}
	return num + compUni, nil
}

StreetNumber returns a sanitized street number

var Title SimpleNormalizer = func(input string) (string, error) {
	if len(input) == 0 {
		return "", errors.New("invalid empty string")
	}
	uniformized := Uniformize(input)
	if uniformized == "M" || uniformized == "H" {
		return CODE_M, nil
	}
	if uniformized == "F" {
		return CODE_F, nil
	}
	if uniformized == "U" {
		return CODE_U, nil
	}
	found := reCiv.FindAllString(titleDictionary.TranslateText(uniformized), 2)
	if len(found) != 1 {
		if uniformized == "" {
			return "", errors.New("unable to build a normalized string")
		}
		return CODE_U, nil
	}
	return found[0], nil
}

Title returns a code string: `0` for undefined, `1` for male, `2` for female

type VariadicNormalizer

type VariadicNormalizer func(string, ...string) (string, error)

VariadicNormalizer ...

var DateOfBirth VariadicNormalizer = func(input string, params ...string) (string, error) {
	input = strings.TrimSpace(input)
	inputFormat := ISO_FORMAT
	if len(params) > 0 {
		switch strings.ToLower(params[0]) {
		case TIMESTAMP:
			inputFormat = TIMESTAMP
		case TIMESTAMP_MILLIS:
			inputFormat = TIMESTAMP_MILLIS
		default:
			inputFormat = strings.ToUpper(params[0])
		}
	}
	outputFormat := FRENCH_FORMAT
	if len(params) > 1 {
		outputFormat = strings.ToUpper(params[1])
	}
	var d time.Time
	if inputFormat == TIMESTAMP {
		i, e := strconv.ParseInt(input, 10, 64)
		if e != nil {
			return "", e
		}
		d = time.Unix(i, 0)
	} else if inputFormat == TIMESTAMP_MILLIS {
		millis := len(input) - 3
		if millis < 0 {
			return "", errors.New("invalid timestamp in milliseconds")
		}
		nanos, e := strconv.ParseInt(input[millis:], 10, 64)
		if e != nil {
			return "", e
		}
		secs, e := strconv.ParseInt(input[:millis], 10, 64)
		if e != nil {
			return "", e
		}
		d = time.Unix(secs, nanos)
	} else {
		parsed, e := fmtdate.Parse(inputFormat, input)
		if e != nil {
			return "", e
		}
		d = parsed
	}
	return fmtdate.Format(outputFormat, d), nil
}

DateOfBirth returns a normalized date using the `params` arguments, the latter being a list of optional arguments to use to format the output appropriately: - the first item is the string format of the input string (defaut to ISO format: `YYYYMMDD`); - the second item is the string format for the output (default to French date: `DD/MM/YYYY`). The input format could be a `timestamp` or a `timestamp_millis`.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL