textparse

package

v0.9.1 Latest Latest Go to latest Published: Sep 4, 2021 License: BSD-3-Clause Imports: 9 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/emer/gospeech

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func AllToLower(word []rune)
func AllUpper(word string) bool
func AnotherWordFollows(rs []rune, i, length, mode int) bool
func CheckTonic(input []rune, startPos int, endPos int) (output []rune)
func ConditionInput(input string) (output []rune)
func ConvertDash(rs []rune, idx *int, len int) bool
func ConvertSecondaryStress(rs []rune) bool
func ConvertSilence(buf []rune, input []rune) (silence float64, output []rune)
func DeleteEllipsis(rs []rune, idx *int, len int)
func ExpandAbbreviation(input []rune, i, length int) (success bool, output []rune)
func ExpandLetterMode(input []rune, cp *int, len int, status *int) (output []rune)
func ExpandRawMode(buf []rune, j *int, length int, input []rune) (success bool, output []rune)
func ExpandTagNumber(input []rune, j *int, length int) (success bool, output []rune)
func GetState(buf []rune, i *int, mode, nextMode, curState, nextState, rawModeFlag *int, ...) (word []rune, output []rune, success bool)
func HasPrimaryStress(rs []rune) bool
func IllegalSlashCode(code string) bool
func IllegalToken(token []rune) bool
func InsertChunkMarker(input []rune, insertPos int, tgType []rune) (output []rune)
func InsertRunes(input []rune, insert []rune, pos int) (output []rune)
func InsertTag(input []rune, insertPt int, word []rune) (output []rune)
func IsAcronym(word string) string
func IsIsolated(rs []rune, i, len int) bool
func IsMode(b byte) bool
func IsPunctuation(r rune) bool
func IsTelephoneNumber(rs []rune, i, len int) bool
func MarkModes(input []rune) (success bool, output []rune)
func NumberFollows(rs []rune, idx int, len int) bool
func PartOfNumber(rs []rune, idx int, len int) bool
func SetToneGroup(input []rune, tgPos int, word string) (success bool, output []rune)
func ShiftSilence(buf []rune, i, length, mode int, input []rune) (shift bool, output []rune)
func StripPunctuation(buf []rune) (output []rune)
func WordFollows(rs []rune, i, length int) bool
type NumParseMode
type NumParser
- func (np *NumParser) DegenerateString(word []rune) []rune
- func (np *NumParser) ErrorCheck(mode NumParseMode) int
- func (np *NumParser) InitialParse()
- func (np *NumParser) NumberParser()
- func (np *NumParser) ParseNum(word string, mode NumParseMode) string
type TextParser
- func NewTextParser(configPath string, fns []string) *TextParser
- func (tp *TextParser) ExpandWord(word string, isTonic bool, input []rune) (output []rune)
- func (tp *TextParser) FinalConversion(input []rune) (success bool, output []rune)
- func (tp *TextParser) LookupWord(word string) (pron string)
- func (tp *TextParser) Parse(rawtext string) string

Constants ¶

View Source

const (
	Normal = iota

	OverrideYears

	ForceSpell

	NumParseModeN
)

View Source

const Abbreviation = 0

View Source

const And = "and"

View Source

const AreaCode = 4

View Source

const At = "at"

View Source

const Begin = 0

View Source

const ChunkBoundary = "/c"

View Source

const ClockMax = 2 // MAX # OF COLONS IN CLOCK TIMES

Todo: should this be an enum? (is in c++)

View Source

const CommasMax = 33 // MAX # OF COMMAS

View Source

const DefaultEndPunc = "."

View Source

const DefaultEscapeCharacter = 27

View Source

const Degenerate = 1

View Source

const Deleted = -11

View Source

const ElevenDigitCode = 3

View Source

const EmphasisMode = 2

View Source

const EmphasisModeBegin = -5

View Source

const EmphasisModeEnd = -6

View Source

const End = 1

View Source

const Equals = "equals"

View Source

const Expansion = 1

View Source

const FootBegin = "/_"

View Source

const FractionalDigitsMax = 100 // MAX # OF FRACTIONAL DIGITS

View Source

const HalfFlag = 2

View Source

const IntegerDigitsMax = 100 // MAX # OF INTEGER DIGITS

View Source

const IsGreaterThan = "is greater than"

View Source

const IsLessThan = "is less than"

View Source

const LastWord = "/l"

View Source

const LetterMode = 1

View Source

const LetterModeBegin = -3

View Source

const LetterModeEnd = -4

View Source

const LongMedialPause = "^ ^ ^"

View Source

const MaxFeetPerChunk = 100

View Source

const MaxPhonesPerChunk = 1500

View Source

const MedialPause = "^"

View Source

const Minus = "minus"

View Source

const ModeNestMax = 100

View Source

const NegativeMax = 3 // MAX # OF NEGATIVE SIGNS (-)

View Source

const No = 0 // GENERAL PURPOSE FLAGS

View Source

const NoNumerals = 0 // FLAGS RETURNED BY error_check()

View Source

const NonPhoneme = 0

View Source

const NonZero = 1

View Source

const NormalMode = -1

View Source

const Ok = 3

View Source

const OutputMax = 8192 // OUTPUT BUFFER SIZE IN CHARS

View Source

const Phoneme = 1

View Source

const Plus = "plus"

View Source

const Pronounciation = 1

View Source

const Punctuation = 1

View Source

const QuarterFlag = 3

View Source

const RawMode = 0

View Source

const RawModeBegin = -1

View Source

const RawModeEnd = -2

View Source

const SecondaryStress = "/\""

View Source

const SecondthFlag = 1 // FLAGS FOR special_flag

View Source

const SevenDigitCode = 1 // TELEPHONE FLAGS

View Source

const SilenceMax = 5.0

View Source

const SilenceMode = 4

View Source

const SilenceModeBegin = -9

View Source

const SilenceModeEnd = -10

View Source

const SilencePhone = "^"

View Source

const SilencePhoneLength = 0.1 // silence phone is 100ms

View Source

const StateBegin = 0

View Source

const StateEnd = 4

View Source

const StateFinalPunc = 3

View Source

const StateMedialPunc = 2

View Source

const StateSilence = 5

View Source

const StateTagging = 6

View Source

const StateUndefined = -1

View Source

const StateWord = 1

View Source

const SymbolLengthMax = 12

View Source

const TagBegin = "/t"

View Source

const TaggingMode = 3

View Source

const TaggingModeBegin = -7

View Source

const TaggingModeEnd = -8

View Source

const TenDigitCode = 2

View Source

const TgContinuation = "/3"

View Source

const TgExclamation = "/1"

View Source

const TgHalfPeriod = "/4"

View Source

const TgQuestion = "/2"

View Source

const TgStatement = "/0"

View Source

const TgUndefined = "/x"

View Source

const ToneGroupBoundary = "//"

View Source

const TonicBegin = "/*"

View Source

const TtsDictionary1 = 2

View Source

const TtsDictionary2 = 3

View Source

const TtsDictionary3 = 4

View Source

const TtsEmpty = 0

Dictionary Ordering Definitions

View Source

const TtsFalse = 0

View Source

const TtsLetterToSound = 5

View Source

const TtsNo = 0

View Source

const TtsNumberParser = 1

View Source

const TtsParserFailure = false

View Source

const TtsParserSuccess = true

View Source

const TtsTrue = 1

View Source

const TtsYes = 1

View Source

const UndefinedMode = -2

View Source

const UndefinedPosition = -1

View Source

const UtteranceBoundary = "#"

View Source

const Word = 0

View Source

const WordBegin = "/w"

View Source

const WordLengthMax = 1024

View Source

const Yes = 1

Variables ¶

View Source

var Acronyms [][]string

View Source

var Escape = rune(DefaultEscapeCharacter)

View Source

var Kit_SymbolType = kit.Enums.AddEnum(NumParseModeN, kit.NotBitFlag, nil)

View Source

var Triads = [][]string{
	{"NULL_STRING", "THOUSAND", "MILLION", "BILLION", "TRILLION", "QUADRILLION", "QUINTILLION",
		"SEXTILLION", "SEPTILLION", "OCTILLION", "NONILLION", "DECILLION", "UNDECILLION",
		"DUODECILLION", "TREDECILLION", "QUATTUORDECILLION", "QUINDECILLION", "SEXDECILLION",
		"SEPTENDECILLION", "OCTODECILLION", "NOVEMDECILLION", "VIGINTILLION"},
	{"NULL_STRING", "THOUSANDTH", "MILLIONTH", "BILLIONTH", "TRILLIONTH", "QUADRILLIONTH",
		"QUINTILLIONTH", "SEXTILLIONTH", "SEPTILLIONTH", "OCTILLIONTH", "NONILLIONTH",
		"DECILLIONTH", "UNDECILLIONTH", "DUODECILLIONTH", "TREDECILLIONTH",
		"QUATTUORDECILLIONTH", "QUINDECILLIONTH", "SEXDECILLIONTH", "SEPTENDECILLIONTH",
		"OCTODECILLIONTH", "NOVEMDECILLIONTH", "VIGINTILLIONTH"},
	{"NULL_STRING", "THOUSANDTHS", "MILLIONTHS", "BILLIONTHS", "TRILLIONTHS",
		"QUADRILLIONTHS", "QUINTILLIONTHS", "SEXTILLIONTHS", "SEPTILLIONTHS", "OCTILLIONTHS",
		"NONILLIONTHS", "DECILLIONTHS", "UNDECILLIONTHS", "DUODECILLIONTHS",
		"TREDECILLIONTHS", "QUATTUORDECILLIONTHS", "QUINDECILLIONTHS", "SEXDECILLIONTHS",
		"SEPTENDECILLIONTHS", "OCTODECILLIONTHS", "NOVEMDECILLIONTHS", "VIGINTILLIONTHS"}}

VARIABLES PERTAINING TO TRIADS AND TRIAD NAMES

Functions ¶

func AllUpper ¶

func AllUpper(word string) bool

func AnotherWordFollows ¶

func AnotherWordFollows(rs []rune, i, length, mode int) bool

AnotherWordFollows returns 1 if another word follows in buffer, after position i. Else, 0 is returned

func CheckTonic ¶

func CheckTonic(input []rune, startPos int, endPos int) (output []rune)

CheckTonic Checks to see if a tonic marker is present in the stream between the start and end positions. If no tonic is present, then put one in at the last foot marker if it exists.

func ConditionInput ¶

func ConditionInput(input string) (output []rune)

ConditionInput converts all non-printable characters (except escape) character to blanks. Also connects words hyphenated over a newline.

func ConvertDash ¶

func ConvertDash(rs []rune, idx *int, len int) bool

ConvertDash converts "--" to ", ", and "---" to ", ". Returns 1 if this is done, 0 otherwise.

func ConvertSecondaryStress ¶

func ConvertSecondaryStress(rs []rune) bool

ConvertSecondaryStress returns 1 if the pronunciation contains " (and ` for backwards compatibility)

func ConvertSilence ¶

func ConvertSilence(buf []rune, input []rune) (silence float64, output []rune)

ConvertSilence converts numeric quantity in "buffer" to appropriate number of silence phones, which are written onto the end of stream. Rounding is performed. Returns actual length of silence.

func DeleteEllipsis ¶

func DeleteEllipsis(rs []rune, idx *int, len int)

DeleteEllipsis deletes three dots in a row (disregarding whitespace). If four dots, then the last three are deleted.

func ExpandAbbreviation ¶

func ExpandAbbreviation(input []rune, i, length int) (success bool, output []rune)

ExpandAbbreviation expands listed abbreviations. Two lists are used (see abbreviations.h): one list expands unconditionally, the other only if the abbreviation is followed by a number. The abbreviation p. is expanded to page. Single alphabetic characters have periods deleted, but no expansion is made. They are also capitalized. Returns 1 if expansion made (i.e. period is deleted),

func ExpandLetterMode ¶

func ExpandLetterMode(input []rune, cp *int, len int, status *int) (output []rune)

ExpandLetterMode expands contents of letter mode string to word or words. A comma is added after each expansion, except the last letter when it is followed by punctuation. cp is current position

func ExpandRawMode ¶

func ExpandRawMode(buf []rune, j *int, length int, input []rune) (success bool, output []rune)

Todo: What's up with passing token without ExpandRawMode writes raw mode contents to stream, checking phones and marker

func ExpandTagNumber ¶

func ExpandTagNumber(input []rune, j *int, length int) (success bool, output []rune)

ExpandTagNumber expand tag number in buffer at position j and write to stream. Perform error checking, returning error code if format of tag number is illegal.

func GetState ¶

func GetState(buf []rune, i *int, mode, nextMode, curState, nextState, rawModeFlag *int, input []rune) (word []rune, output []rune, success bool)

GetState determines the current state and next state in buffer. A word or punctuation is put into word. Raw mode contents are expanded and written to stream.

func HasPrimaryStress ¶

func HasPrimaryStress(rs []rune) bool

HasPrimaryStress returns 1 if the pronunciation contains ' (and ` for backwards compatibility)

func IllegalSlashCode ¶

func IllegalSlashCode(code string) bool

IllegalSlashCode returns true if code is illegal

func IllegalToken ¶

func IllegalToken(token []rune) bool

IllegalToken returns 1 if token is not a valid DEGAS phone, otherwise 0.

func InsertChunkMarker ¶

func InsertChunkMarker(input []rune, insertPos int, tgType []rune) (output []rune)

/****************************************************************************** * * function: insert_chunk_marker * * purpose: Insert chunk markers and associated markers in the stream at the insert point. Use the tone group type passed in as an argument. * ******************************************************************************/

InsertChunkMarker inserts chunk markers and associated markers in the stream at the insert point. Use the tone group type passed in as an argument.

func InsertRunes ¶

func InsertRunes(input []rune, insert []rune, pos int) (output []rune)

func InsertTag ¶

func InsertTag(input []rune, insertPt int, word []rune) (output []rune)

InsertTag inserts the tag contained in word onto the stream at the insert point.

func IsAcronym ¶

func IsAcronym(word string) string

IsAcronym returns a pointer to the pronunciation of a special acronym if it is defined in the list

func IsIsolated ¶

func IsIsolated(rs []rune, i, len int) bool

IsIsolated returns true if character at position i is isolated, i.e. is surrounded by space or mode marker.

func IsMode ¶

func IsMode(b byte) bool

IsMode Returns 1 if character is a mode marker, otherwise 0.

func IsPunctuation ¶

func IsPunctuation(r rune) bool

IsPunctuation

func IsTelephoneNumber ¶

func IsTelephoneNumber(rs []rune, i, len int) bool

IsTelephoneNumber returns true if string at position i in buffer is of the form: (ddd)ddd-dddd where each d is a digit.

func MarkModes ¶

func MarkModes(input []rune) (success bool, output []rune)

MarkModes parses input for modes, checking for errors, and marks output with mode start and end points. Tagging and silence mode arguments are checked.

func NumberFollows ¶

func NumberFollows(rs []rune, idx int, len int) bool

NumberFollows returns true if at least one digit follows the character at position i, to white space or mode marker.

func PartOfNumber ¶

func PartOfNumber(rs []rune, idx int, len int) bool

PartOfNumber returns true if character at position i is part of a number (including mixtures with non-numeric characters)

func SetToneGroup ¶

func SetToneGroup(input []rune, tgPos int, word string) (success bool, output []rune)

SetToneGroup sets the tone group marker according to the punctuation passed in as "word". The marker is inserted in the

func ShiftSilence ¶

func ShiftSilence(buf []rune, i, length, mode int, input []rune) (shift bool, output []rune)

ShiftSilence looks past punctuation to see if some silence occurs before the next word (or raw mode contents), and shifts the silence to the current point on the stream. The the numeric quantity is converted to equivalent silence phones, and true is returned.

func StripPunctuation ¶

func StripPunctuation(buf []rune) (output []rune)

StripPunctuation deletes unnecessary punctuation, and converts some punctuation to another form.

func WordFollows ¶

func WordFollows(rs []rune, i, length int) bool

WordFollows returns a true if a word or speakable symbol (letter mode) follows the position i in buffer. Raw, tagging, and silence mode contents are ignored. Returns false if any punctuation (except . as part of number) follows.

Types ¶

type NumParseMode ¶

type NumParseMode int

type NumParser ¶

type NumParser struct {
	// contains filtered or unexported fields
}

func (*NumParser) DegenerateString ¶

func (np *NumParser) DegenerateString(word []rune) []rune

DegenerateString returns a string which contains a character-by-character pronunciation

func (*NumParser) ErrorCheck ¶

func (np *NumParser) ErrorCheck(mode NumParseMode) int

***************************************************************************** * * function: error_check * * purpose: Checks the initiallly parsed word for format errors. * Returns NoNumerals if the word contains no digits, * Degenerate if the word contains errors, OK otherwise. * ***************************************************************************** ErrorCheck checks the initiallly parsed word for format errors. Returns "NoNumerals" if the word contains no digits. Returns "Degenerate" if the word contains errors, returns "OK" otherwise.

func (*NumParser) InitialParse ¶

func (np *NumParser) InitialParse()

InitialParse Finds positions of numbers, commas, and other symbols within the word.

func (*NumParser) NumberParser ¶

func (np *NumParser) NumberParser()

func (*NumParser) ParseNum ¶

func (np *NumParser) ParseNum(word string, mode NumParseMode) string

{
	// SPECIAL PROCESSING OF WORD;  EACH RETURNS IMMEDIATELY
	// PROCESS CLOCK TIMES
	if (clock_) {
		// HOUR
		if (leftZeroPad_) {
			strcat(&output_[0], OH);
		}
		process_triad(&hour_[0], &output_[0], NO, NO, NO, NO, NO);
		// MINUTE
		if ((minute_[1] == '0') && (minute_[2] == '0')) {
			if (military_) {
				strcat(&output_[0], HUNDRED);
			} else if (!seconds_) {
				strcat(&output_[0], OCLOCK);
			}
		} else {
			if ((minute_[1] == '0') && (minute_[2] != '0')) {
				strcat(&output_[0], OH);
			}
			process_triad(&minute_[0], &output_[0], NO, NO, NO, NO, NO);
		}
		// SECOND
		if (seconds_) {
			strcat(&output_[0], AND);
			if ((second_[1] == '0') && (second_[2] == '0')) {
				strcat(&output_[0], ZERO);
			} else {
				process_triad(&second_[0], &output_[0], NO, NO, NO, NO, NO);
			}

			if ((second_[1] == '0') && (second_[2] == '1')) {
				strcat(&output_[0], SECOND);
			} else {
				strcat(&output_[0], SECONDS);
			}
		}
		return &output_[0];
	}
	// PROCESS TELEPHONE NUMBERS
	if (telephone_ == SEVEN_DIGIT_CODE) {
		for (int i = 0; i < 3; i++) {
			process_digit(*(word_ + integerDigitsPos_[i]), &output_[0], NO, NO, NO);
		}
		strcat(&output_[0], PAUSE);
		for (int i = 3; i < 7; i++) {
			process_digit(*(word_ + integerDigitsPos_[i]), &output_[0], NO, NO, NO);
		}
		return &output_[0];
	} else if (telephone_ == TenDigitCode) {
		for (int i = 0; i < 3; i++) {
			process_digit(*(word_ + integerDigitsPos_[i]), &output_[0], NO, NO, NO);
		}
		strcat(&output_[0], PAUSE);
		for (int i = 3; i < 6; i++) {
			process_digit(*(word_ + integerDigitsPos_[i]), &output_[0], NO, NO, NO);
		}
		strcat(&output_[0], PAUSE);
		for (int i = 6; i < 10; i++) {
			process_digit(*(word_ + integerDigitsPos_[i]), &output_[0], NO, NO, NO);
		}
		return &output_[0];
	} else if (telephone_ == ElevenDigitCode) {
		process_digit(word_[integerDigitsPos_[0]], &output_[0], NO, NO, NO);
		if ((word_[integerDigitsPos_[1]] != '0') &&
				(word_[integerDigitsPos_[2]] == '0') &&
				(word_[integerDigitsPos_[3]] == '0')) {
			process_digit(word_[integerDigitsPos_[1]], &output_[0], NO, NO, NO);
			strcat(&output_[0], HUNDRED);
		} else {
			strcat(&output_[0], PAUSE);
			for (int i = 1; i < 4; i++) {
				process_digit(*(word_ + integerDigitsPos_[i]), &output_[0], NO, NO, NO);
			}
		}
		strcat(&output_[0], PAUSE);
		for (int i = 4; i < 7; i++) {
			process_digit(*(word_ + integerDigitsPos_[i]), &output_[0], NO, NO, NO);
		}
		strcat(&output_[0], PAUSE);
		for (int i = 7; i < 11; i++) {
			process_digit(*(word_ + integerDigitsPos_[i]), &output_[0], NO, NO, NO);
		}
		return &output_[0];
	} else if (telephone_ == AreaCode) {
		strcat(&output_[0], AREA);
		strcat(&output_[0], CODE);
		for (int i = 0; i < 3; i++) {
			process_digit(*(word_ + integerDigitsPos_[i]), &output_[0], NO, NO, NO);
		}
		strcat(&output_[0], PAUSE);
		for (int i = 3; i < 6; i++) {
			process_digit(*(word_ + integerDigitsPos_[i]), &output_[0], NO, NO, NO);
		}
		strcat(&output_[0], PAUSE);
		for (int i = 6; i < 10; i++) {
			process_digit(*(word_ + integerDigitsPos_[i]), &output_[0], NO, NO, NO);
		}
		return &output_[0];
	}
	// PROCESS ZERO DOLLARS AND ZERO CENTS
	if (dollar_ && (!dollarNonzero_) && (!centsNonzero_)) {
		strcat(&output_[0], ZERO);
		strcat(&output_[0], DOLLARS);
		return &output_[0];
	}
	// PROCESS FOR YEAR IF INTEGER IN RANGE 1000 TO 1999
	if ((integerDigits_ == 4) && (wordLength_ == 4) &&
			(word_[integerDigitsPos_[0]] == '1') && (mode != OVERRIDE_YEARS)) {
		triad_[0] = '0';
		triad_[1] = word_[integerDigitsPos_[0]];
		triad_[2] = word_[integerDigitsPos_[1]];
		process_triad(&triad_[0], &output_[0], NO, NO, NO, NO, NO);
		if ((word_[integerDigitsPos_[2]] == '0') && (word_[integerDigitsPos_[3]] == '0')) {
			strcat(&output_[0], HUNDRED);
		} else if (word_[integerDigitsPos_[2]] == '0') {
			strcat(&output_[0], OH);
			process_digit(word_[integerDigitsPos_[3]], &output_[0], NO, NO, NO);
		} else {
			triad_[0] = '0';
			triad_[1] = word_[integerDigitsPos_[2]];
			triad_[2] = word_[integerDigitsPos_[3]];
			process_triad(&triad_[0], &output_[0], NO, NO, NO, NO, NO);
		}
		return &output_[0];
	}

	// ORDINARY SEQUENTIAL PROCESSING
	// APPEND POSITIVE OR NEGATIVE IF INDICATED
	if (positive_) {
		strcat(&output_[0], POSITIVE);
	} else if (negative_) {
		strcat(&output_[0], NEGATIVE);
	}

	// PROCESS SINGLE INTEGER DIGIT
	if (integerDigits_ == 1) {
		if ((word_[integerDigitsPos_[0]] == '0') && dollar_) {
			;
		} else {
			process_digit(word_[integerDigitsPos_[0]], &output_[0], ordinal_, NO, NO);
		}
		ordinalPlural_ = (word_[integerDigitsPos_[0]] == '1') ? NO : YES;
	} else if ((integerDigits_ >= 2) && (integerDigits_ <= (TRIADS_MAX * 3))) {
		// PROCESS INTEGERS AS TRIADS, UP TO MAX LENGTH

		int digit_index = 0, num_digits, triad_index, index, pause_flag = NO;
		for (int i = 0; i < 3; i++) {
			triad_[i] = '0';
		}
		index = (int) ((integerDigits_ - 1) / 3.0);
		num_digits = integerDigits_ - (index * 3);
		triad_index = 3 - num_digits;

		for (int i = index; i >= 0; i--) {
			while (num_digits--) {
				triad_[triad_index++] = word_[integerDigitsPos_[digit_index++]];
			}

			if (process_triad(&triad_[0], &output_[0], pause_flag,
					(ordinal_ && (ordinalTriad_ == i)),
					rightZeroPad_, NO, NO) == NONZERO) {
				if (ordinal_ && (ordinalTriad_ == i)) {
					strcat(&output_[0], triad_name[1][i]);
				} else {
					strcat(&output_[0], triad_name[0][i]);
				}
				pause_flag = YES;
			}
			if ((i == 1) && (word_[integerDigitsPos_[digit_index]] == '0') &&
					((word_[integerDigitsPos_[digit_index + 1]] != '0') ||
					(word_[integerDigitsPos_[digit_index + 2]] != '0'))) {
				strcat(&output_[0], AND);
				pause_flag = NO;
			}
			triad_index = 0;
			num_digits = 3;
		}
	} else if ((integerDigits_ > (TRIADS_MAX * 3)) && (!commas_) && (!ordinal_)) {
		// PROCESS EXTREMELY LARGE NUMBERS AS STREAM OF SINGLE DIGITS

		for (int i = 0; i < integerDigits_; i++) {
			process_digit(*(word_ + integerDigitsPos_[i]), &output_[0], NO, NO, NO);
		}
	}

	// APPEND DOLLAR OR DOLLARS IF NEEDED
	if (dollar_ && dollarNonzero_) {
		if (fractionalDigits_ && (fractionalDigits_ != 2)) {
			;
		} else if (dollarPlural_) {
			strcat(&output_[0], DOLLARS);
		} else if (!dollarPlural_) {
			strcat(&output_[0], DOLLAR);
		}
		if (centsNonzero_ && (fractionalDigits_ == 2)) {
			strcat(&output_[0], AND);
		}
	}

	// APPEND POINT IF FRACTIONAL DIGITS, NO SLASH,
		AND IF NOT .00 DOLLAR FORMAT
	if (fractionalDigits_ && (!slash_) &&
			((!dollar_) || (dollar_ && (fractionalDigits_ != 2)))) {
		strcat(&output_[0], POINT);
		for (int i = 0; i < fractionalDigits_; i++) {
			process_digit(word_[fractionalDigitsPos_[i]], &output_[0], NO, NO, NO);
		}
	} else if (slash_) {
		// PROCESS DENOMINATOR OF FRACTIONS

		char ones_digit = '\0', tens_digit = '\0';

		if (((integerDigits_ >= 3) && (fractionalDigits_ >= 3)) ||
				(word_[integerDigitsPos_[integerDigits_ - 1]] == '0')) {
			strcat(&output_[0], PAUSE);
		}

		ones_digit = word_[fractionalDigitsPos_[fractionalDigits_ - 1]];
		if (fractionalDigits_ >= 2) {
			tens_digit = word_[fractionalDigitsPos_[fractionalDigits_ - 2]];
		}

		ordinal_ = YES;
		int special_flag = NO;
		if ((ones_digit == '0' && tens_digit == '\0') ||
				(ones_digit == '1' && tens_digit != '1')) {
			strcat(&output_[0], OVER);
			ordinal_ = ordinalPlural_ = NO;
		} else if (ones_digit == '2') {
			if (tens_digit == '\0') {
				special_flag = HalfFlag;
			} else if (tens_digit != '1') {
				special_flag = SecondthFlag;
			}
		} else if (ones_digit == '4' && tens_digit == '\0') {
			special_flag = QuarterFlag;
		}

		if (fractionalDigits_ == 1) {
			process_digit(ones_digit, &output_[0], ordinal_, ordinalPlural_, special_flag);
		} else if (fractionalDigits_ >= 2 && (fractionalDigits_ <= (TRIADS_MAX * 3))) {
			int digit_index = 0, num_digits, triad_index, index, pause_flag = NO;
			for (int i = 0; i < 3; i++) {
				triad_[i] = '0';
			}
			index = (int) ((fractionalDigits_ - 1) / 3.0);
			num_digits = fractionalDigits_ - (index * 3);
			triad_index = 3 - num_digits;

			for (int i = index; i >= 0; i--) {
				while (num_digits--) {
					triad_[triad_index++] = word_[fractionalDigitsPos_[digit_index++]];
				}

				if (process_triad(&triad_[0], &output_[0], pause_flag,
						(ordinal_ && (fracOrdinalTriad_ == i)),
						fracRightZeroPad_,
						(ordinalPlural_ && (fracOrdinalTriad_ == i)),
						(special_flag && (fracOrdinalTriad_ == i))) == NONZERO) {
					if (ordinalPlural_ && (fracOrdinalTriad_ == i)) {
						strcat(&output_[0], triad_name[2][i]);
					} else if (ordinal_ && (fracOrdinalTriad_ == i)) {
						strcat(&output_[0], triad_name[1][i]);
					} else {
						strcat(&output_[0], triad_name[0][i]);
					}
					pause_flag = YES;
				}
				if ((i == 1) &&
						(word_[fractionalDigitsPos_[digit_index]] == '0') &&
						((word_[fractionalDigitsPos_[digit_index + 1]] != '0') ||
						(word_[fractionalDigitsPos_[digit_index + 2]] != '0'))) {
					strcat(&output_[0], AND);
					pause_flag = NO;
				}
				triad_index = 0;
				num_digits = 3;
			}
		}
	} else if (dollar_ && centsNonzero_ && (fractionalDigits_ == 2)) {
		// APPEND CENTS

		triad_[0] = '0';
		triad_[1] = word_[fractionalDigitsPos_[0]];
		triad_[2] = word_[fractionalDigitsPos_[1]];
		if (process_triad(&triad_[0], &output_[0], NO, NO, NO, NO, NO) == NONZERO) {
			if (centsPlural_) {
				strcat(&output_[0], CENTS);
			} else {
				strcat(&output_[0], CENT);
			}
		}
	}

	// APPEND DOLLARS IF NOT $.00 FORMAT
	if (dollar_ && fractionalDigits_ && (fractionalDigits_ != 2)) {
		strcat(&output_[0], DOLLARS);
	}

	// APPEND PERCENT IF NECESSARY
	if (percent_) {
		strcat(&output_[0], PERCENT);
	}

	// RETURN OUTPUT TO CALLER
	return &output_[0];
}

/****************************************************************************** * * function: number_parser * * purpose: Returns a pointer to a NULL terminated character string / which contains the pronunciation for the string pointed / at by the argument word_ptr. * ******************************************************************************/ const char* ParseNum returns the pronounciation

type TextParser ¶

type TextParser struct {
	NumParser    NumParser
	Dictionaries []dictionary.DictionarySearch
	// contains filtered or unexported fields
}

func NewTextParser ¶

func NewTextParser(configPath string, fns []string) *TextParser

NewTextParser create a new TextParser. fns (filenames) are to possible dictionaries. Could pass in different escape character in future

func (*TextParser) ExpandWord ¶

func (tp *TextParser) ExpandWord(word string, isTonic bool, input []rune) (output []rune)

TextParser writes pronunciation of word to stream. Deals with possessives if necessary. Also, deals with single characters, and upper case words (including special acronyms) if necessary. Add special marks if word is tonic

func (*TextParser) FinalConversion ¶

func (tp *TextParser) FinalConversion(input []rune) (success bool, output []rune)

FinalConversion converts contents of stream1 to stream2. Adds chunk, tone group, and associated markers expands words to pronunciations, and also expands other modes.

func (*TextParser) LookupWord ¶

func (tp *TextParser) LookupWord(word string) (pron string)

LookupWord returns the pronunciation of word, and sets dict to the dictionary in which it was found. Relies on the global dictionaryOrder. Todo: decide on struct/object members LookupWord

func (*TextParser) Parse ¶

func (tp *TextParser) Parse(rawtext string) string

ParseText takes plain english input, and produces phonetic suitable for further processing in the TTS system. If a parse error occurs, a value of 0 or above is returned. Usually this will point to the position of the error in the input buffer, but in later stages of the parse only a 0 is returned since positional information is lost. If no parser error, then TtsParserSuccess is returned.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

Documentation ¶

Index ¶

Constants ¶

Variables ¶

Functions ¶

func AllToLower ¶

func AllUpper ¶

func AnotherWordFollows ¶

func CheckTonic ¶

func ConditionInput ¶

func ConvertDash ¶

func ConvertSecondaryStress ¶

func ConvertSilence ¶

func DeleteEllipsis ¶

func ExpandAbbreviation ¶

func ExpandLetterMode ¶

func ExpandRawMode ¶

func ExpandTagNumber ¶

func GetState ¶

func HasPrimaryStress ¶

func IllegalSlashCode ¶

func IllegalToken ¶

func InsertChunkMarker ¶

func InsertRunes ¶

func InsertTag ¶

func IsAcronym ¶

func IsIsolated ¶

func IsMode ¶

func IsPunctuation ¶

func IsTelephoneNumber ¶

func MarkModes ¶

func NumberFollows ¶

func PartOfNumber ¶

func SetToneGroup ¶

func ShiftSilence ¶

func StripPunctuation ¶

func WordFollows ¶

Types ¶

type NumParseMode ¶

type NumParser ¶

func (*NumParser) DegenerateString ¶

func (*NumParser) ErrorCheck ¶

func (*NumParser) InitialParse ¶

func (*NumParser) NumberParser ¶

func (*NumParser) ParseNum ¶

type TextParser ¶

func NewTextParser ¶

func (*TextParser) ExpandWord ¶

func (*TextParser) FinalConversion ¶

func (*TextParser) LookupWord ¶

func (*TextParser) Parse ¶

Source Files ¶