class WordMage::Analysis

Overview

Represents aggregate analysis of multiple words.

Analysis combines data from multiple WordAnalysis instances to provide statistical insights into phonological patterns, frequencies, and structural tendencies. This data can be used to configure generators to produce words with similar characteristics.

This class is JSON-serializable for easy storage and transmission.

Example

analysis = Analysis.new(
  phoneme_frequencies: {"n" => 0.15, "a" => 0.25, "θ" => 0.08},
  syllable_count_distribution: {2 => 0.3, 3 => 0.5, 4 => 0.2},
  average_complexity: 6.2,
  recommended_budget: 6
)

Included Modules

Defined in:

analysis.cr

Constructors

Instance Method Summary

Constructor Detail

def self.new(pull : JSON::PullParser) #

[View source]
def self.new(phoneme_frequencies : Hash(String, Float32) = Hash(String, Float32).new, positional_frequencies : Hash(String, Hash(String, Float32)) = Hash(String, Hash(String, Float32)).new, syllable_count_distribution : Hash(Int32, Float32) = Hash(Int32, Float32).new, syllable_pattern_distribution : Hash(String, Float32) = Hash(String, Float32).new, cluster_patterns : Hash(String, Float32) = Hash(String, Float32).new, hiatus_patterns : Hash(String, Float32) = Hash(String, Float32).new, complexity_distribution : Hash(Int32, Float32) = Hash(Int32, Float32).new, average_complexity : Float32 = 0.0_f32, average_syllable_count : Float32 = 0.0_f32, consonant_vowel_ratio : Float32 = 0.0_f32, recommended_budget : Int32 = 6, recommended_templates : Array(String) = [] of String, recommended_hiatus_probability : Float32 = 0.2_f32, recommended_gemination_probability : Float32 = 0.0_f32, dominant_patterns : Array(String) = [] of String, vowel_transitions : Hash(String, Hash(String, Float32)) = Hash(String, Hash(String, Float32)).new, gemination_patterns : Hash(String, Float32) = Hash(String, Float32).new, vowel_lengthening_patterns : Hash(String, Float32) = Hash(String, Float32).new, phoneme_transitions : Hash(String, Hash(String, Float32)) = Hash(String, Hash(String, Float32)).new, bigram_frequencies : Hash(String, Float32) = Hash(String, Float32).new, trigram_frequencies : Hash(String, Float32) = Hash(String, Float32).new, provided_templates : Array(SyllableTemplate) | Nil = nil) #

Creates a new Analysis with specified parameters.


[View source]

Instance Method Detail

def average_complexity : Float32 #

Average complexity score across all words


[View source]
def average_complexity=(average_complexity : Float32) #

Average complexity score across all words


[View source]
def average_syllable_count : Float32 #

Average syllable count across all words


[View source]
def average_syllable_count=(average_syllable_count : Float32) #

Average syllable count across all words


[View source]
def bigram_frequencies : Hash(String, Float32) #

Bigram frequencies: phoneme_pair -> frequency


[View source]
def bigram_frequencies=(bigram_frequencies : Hash(String, Float32)) #

Bigram frequencies: phoneme_pair -> frequency


[View source]
def bigram_frequency(bigram : String) : Float32 #

Returns the frequency of a specific bigram.

Parameters

  • bigram: The two-phoneme sequence

Returns

Float32 frequency (0.0 if bigram not found)


[View source]
def cluster_patterns : Hash(String, Float32) #

Frequency of consonant clusters


[View source]
def cluster_patterns=(cluster_patterns : Hash(String, Float32)) #

Frequency of consonant clusters


[View source]
def complexity_distribution : Hash(Int32, Float32) #

Distribution of complexity scores


[View source]
def complexity_distribution=(complexity_distribution : Hash(Int32, Float32)) #

Distribution of complexity scores


[View source]
def complexity_preference : Symbol #

Determines if the analyzed language prefers simple or complex structures.

Returns

Symbol indicating complexity preference (:simple, :moderate, :complex)


[View source]
def consonant_vowel_ratio : Float32 #

Consonant to vowel ratio


[View source]
def consonant_vowel_ratio=(consonant_vowel_ratio : Float32) #

Consonant to vowel ratio


[View source]
def dominant_patterns : Array(String) #

Most common syllable patterns


[View source]
def dominant_patterns=(dominant_patterns : Array(String)) #

Most common syllable patterns


[View source]
def final_phonemes(threshold : Float32 = 0.1_f32) : Array(String) #

Returns phonemes that commonly appear in final position.

Parameters

  • threshold: Minimum frequency threshold (default: 0.1)

Returns

Array of phoneme strings that commonly end words


[View source]
def gemination_patterns : Hash(String, Float32) #

Frequency of gemination patterns


[View source]
def gemination_patterns=(gemination_patterns : Hash(String, Float32)) #

Frequency of gemination patterns


[View source]
def generate_vowel_harmony(strength : Float32 = 0.7_f32, threshold : Float32 = 0.1_f32) : VowelHarmony #

Generates a VowelHarmony configuration from the transition data.

Parameters

  • strength: Harmony strength (0.0-1.0)
  • threshold: Minimum frequency to include in rules (default: 0.1)

Returns

VowelHarmony instance configured from analysis


[View source]
def gusein_zade_deviation : Hash(String, Float32) #

Compares empirical frequencies with Gusein-Zade predictions.

Returns

Hash with deviation metrics between empirical and theoretical frequencies


[View source]
def gusein_zade_weights(phonemes : Array(String) | Nil = nil) : Hash(String, Float32) #

Calculates Gusein-Zade distribution weights for phonemes.

The Gusein-Zade formula: pr = C.(ln(n + 1) - ln r) where n = total phonemes, r = rank, C = normalization constant

Parameters

  • phonemes: Array of phoneme strings to weight (optional, uses all if not provided)

Returns

Hash mapping phonemes to their Gusein-Zade weights


[View source]
def hiatus_patterns : Hash(String, Float32) #

Frequency of hiatus sequences


[View source]
def hiatus_patterns=(hiatus_patterns : Hash(String, Float32)) #

Frequency of hiatus sequences


[View source]
def initial_phonemes(threshold : Float32 = 0.1_f32) : Array(String) #

Returns phonemes that commonly appear in initial position.

Parameters

  • threshold: Minimum frequency threshold (default: 0.1)

Returns

Array of phoneme strings that commonly start words


[View source]
def most_common_followers(phoneme : String, count : Int32 = 5) : Array(Tuple(String, Float32)) #

Returns the most common phonemes that follow a given phoneme.

Parameters

  • phoneme: The source phoneme
  • count: Number of top transitions to return (default: 5)

Returns

Array of {next_phoneme, frequency} tuples ordered by frequency


[View source]
def most_frequent_bigrams(count : Int32 = 10) : Array(String) #

Returns the most frequent bigrams in order.

Parameters

  • count: Number of top bigrams to return (default: 10)

Returns

Array of bigram strings ordered by frequency


[View source]
def most_frequent_clusters(count : Int32 = 10) : Array(String) #

Returns the most frequent clusters.

Parameters

  • count: Number of top clusters to return (default: 10)

Returns

Array of cluster strings ordered by frequency


[View source]
def most_frequent_patterns(count : Int32 = 5) : Array(String) #

Returns the most frequent syllable patterns.

Parameters

  • count: Number of top patterns to return (default: 5)

Returns

Array of pattern strings ordered by frequency


[View source]
def most_frequent_phonemes(count : Int32 = 10) : Array(String) #

Returns the most frequent phonemes in order.

Parameters

  • count: Number of top phonemes to return (default: 10)

Returns

Array of phoneme strings ordered by frequency


[View source]
def most_frequent_trigrams(count : Int32 = 10) : Array(String) #

Returns the most frequent trigrams in order.

Parameters

  • count: Number of top trigrams to return (default: 10)

Returns

Array of trigram strings ordered by frequency


[View source]
def ngram_diversity : Hash(String, Float32) #

Calculates the n-gram diversity (entropy) of the language.

Returns

Hash with bigram and trigram diversity scores


[View source]
def optimal_syllable_weights : Hash(Int32, Float32) #

Calculates the optimal syllable count weights for generation.

Returns

Hash mapping syllable counts to their optimal weights


[View source]
def phoneme_diversity : Float32 #

Calculates the diversity of phoneme usage.

Returns

Float32 representing phoneme diversity (higher = more diverse)


[View source]
def phoneme_frequencies : Hash(String, Float32) #

Frequency of each phoneme across all analyzed words


[View source]
def phoneme_frequencies=(phoneme_frequencies : Hash(String, Float32)) #

Frequency of each phoneme across all analyzed words


[View source]
def phoneme_frequency_ranking : Array(String) #

Returns phonemes ranked by their empirical frequency.

Returns

Array of phoneme strings ordered by frequency (highest first)


[View source]
def phoneme_rank(phoneme : String) : Int32 #

Calculates the rank of a phoneme in the frequency distribution.

Parameters

  • phoneme: The phoneme to rank

Returns

Int32 rank (1-based) or 0 if phoneme not found


[View source]
def phoneme_transitions : Hash(String, Hash(String, Float32)) #

Phoneme transition frequencies: phoneme -> {next_phoneme -> frequency}


[View source]
def phoneme_transitions=(phoneme_transitions : Hash(String, Hash(String, Float32))) #

Phoneme transition frequencies: phoneme -> {next_phoneme -> frequency}


[View source]
def positional_frequencies : Hash(String, Hash(String, Float32)) #

Positional frequencies: phoneme -> {position -> frequency}


[View source]
def positional_frequencies=(positional_frequencies : Hash(String, Hash(String, Float32))) #

Positional frequencies: phoneme -> {position -> frequency}


[View source]
def preferred_transitions(from_vowel : String, count : Int32 = 3) : Array(Tuple(String, Float32)) #

Returns the most preferred vowel transitions.

Parameters

  • from_vowel: The source vowel
  • count: Number of top transitions to return (default: 3)

Returns

Array of {vowel, frequency} tuples ordered by frequency


[View source]
def provided_templates : Array(SyllableTemplate) | Nil #

Provided SyllableTemplate objects (when analysis uses explicit templates)


[View source]
def provided_templates=(provided_templates : Array(SyllableTemplate) | Nil) #

Provided SyllableTemplate objects (when analysis uses explicit templates)


[View source]
def recommended_budget : Int32 #

Recommended complexity budget for generator


[View source]
def recommended_budget=(recommended_budget : Int32) #

Recommended complexity budget for generator


[View source]
def recommended_gemination_probability : Float32 #

Recommended gemination probability based on detected patterns


[View source]
def recommended_gemination_probability=(recommended_gemination_probability : Float32) #

Recommended gemination probability based on detected patterns


[View source]
def recommended_hiatus_probability : Float32 #

Recommended hiatus probability


[View source]
def recommended_hiatus_probability=(recommended_hiatus_probability : Float32) #

Recommended hiatus probability


[View source]
def recommended_templates : Array(String) #

Recommended syllable templates based on patterns


[View source]
def recommended_templates=(recommended_templates : Array(String)) #

Recommended syllable templates based on patterns


[View source]
def smoothed_phoneme_frequencies(smoothing_factor : Float32 = 0.3_f32, phonemes : Array(String) | Nil = nil) : Hash(String, Float32) #

Generates smoothed phoneme frequencies using Gusein-Zade distribution.

Combines empirical frequencies with theoretical Gusein-Zade weights to create more naturalistic frequency distributions.

Parameters

  • smoothing_factor: Weight of Gusein-Zade vs empirical (0.0-1.0, default: 0.3)
  • phonemes: Array of phoneme strings to smooth (optional, uses all if not provided)

Returns

Hash mapping phonemes to their smoothed frequencies


[View source]
def structural_complexity : Float32 #

Calculates the structural complexity index.

Returns

Float32 representing overall structural complexity


[View source]
def summary : String #

Generates a summary report of the analysis.

Returns

String containing a human-readable summary


[View source]
def syllable_count_distribution : Hash(Int32, Float32) #

Distribution of syllable counts


[View source]
def syllable_count_distribution=(syllable_count_distribution : Hash(Int32, Float32)) #

Distribution of syllable counts


[View source]
def syllable_pattern_distribution : Hash(String, Float32) #

Distribution of syllable patterns (CV, CVC, etc.)


[View source]
def syllable_pattern_distribution=(syllable_pattern_distribution : Hash(String, Float32)) #

Distribution of syllable patterns (CV, CVC, etc.)


[View source]
def transition_probability(from_phoneme : String, to_phoneme : String) : Float32 #

Returns the transition probability between two phonemes.

Parameters

  • from_phoneme: The source phoneme
  • to_phoneme: The target phoneme

Returns

Float32 probability (0.0 if transition not found)


[View source]
def trigram_frequencies : Hash(String, Float32) #

Trigram frequencies: phoneme_triple -> frequency


[View source]
def trigram_frequencies=(trigram_frequencies : Hash(String, Float32)) #

Trigram frequencies: phoneme_triple -> frequency


[View source]
def trigram_frequency(trigram : String) : Float32 #

Returns the frequency of a specific trigram.

Parameters

  • trigram: The three-phoneme sequence

Returns

Float32 frequency (0.0 if trigram not found)


[View source]
def valid? : Bool #

Validates the analysis data for consistency.

Returns

true if the analysis data is valid, false otherwise


[View source]
def vowel_harmony_strength : Symbol #

Checks if the language shows strong vowel harmony patterns.

Returns

Symbol indicating harmony strength (:none, :weak, :moderate, :strong)


[View source]
def vowel_lengthening_patterns : Hash(String, Float32) #

Frequency of vowel lengthening patterns


[View source]
def vowel_lengthening_patterns=(vowel_lengthening_patterns : Hash(String, Float32)) #

Frequency of vowel lengthening patterns


[View source]
def vowel_transition_diversity : Float32 #

Calculates vowel transition diversity.

Returns

Float32 representing how diverse vowel transitions are


[View source]
def vowel_transitions : Hash(String, Hash(String, Float32)) #

Vowel transition frequencies: vowel -> {next_vowel -> frequency}


[View source]
def vowel_transitions=(vowel_transitions : Hash(String, Hash(String, Float32))) #

Vowel transition frequencies: vowel -> {next_vowel -> frequency}


[View source]