# -*- coding: utf8 -*- # This script inserts marks syllable boundaries with periods to a # small lexicon of inflected Finnish words. In general syllabification # is regular but in some loan words, such as "ate.isti", exceptional # syllable boundaries have to be marked in the lexicon. For the # presentation of the syllabification rules, see # http://www.stanford.edu/~laurik/fsmbook/exercises/FinnishSyllabification.html. define FinnWords [{kala}|{riippuu}| {tietoinen} | {sataa} | {satoi}|{saata}|{saatoin}|{auta}|{laiva}| {leipä}|{häijy}|{koulu}|{köyhä}|{lea}| {viestien}|{tuote}|{virtuositeetti}| {laukaus}|{lakkautan}| {voimistelijoiden}| {heittäen}|{heittäisin}|{laulaen}| {kalastelet} | {kalasteleminen} | {ilmoittautuminen} | {järjestelmättömyydestänsä} | {kalastelemme} | {ilmoittautumisesta} | {järjestelmällisyydelläni} | {järjestelmällistämätöntä} | {voimisteluttelemasta} | {opiskelija} | {opettamassa} | {kalastelet} | {strukturalismi} | {onnittelemanikin} | {mäki} | {perijä} | {repeämä} | {ergonomia} | {puhelimellani} | {matematiikka} | {puhelimistani} | {rakastajattariansa} | {kuningas} | {kainostelijat} | {ravintolat} | {merkonomin} | {ate.isti} | {poikien} | {poikkeus}]; define HighV [u | y | i]; # High vowel define MidV [e | o | ö]; # Mid vowel define LowV [a | ä] ; # Low vowel define V [HighV | MidV | LowV]; # Vowel define C [b | c | d | f | g | h | j | k | l | m | n | p | q | r | s | t | v | w | x | z]; # Consonant # A diphthong is a combination of two unlike vowels that together form # the nucleus of a syllable. In general, Finnish diphthongs end in a high # vowel. However, there are three exceptional high-mid diphthongs, # ie, uo, and yö, that historically come from long ee, oo, and öö, # respectively. These special diphthongs occur only in the first syllable. # All other adjacent vowels must be separated by a syllable boundary. define MarkNonDiphthongs [ [. .] -> "." || [HighV | MidV] _ LowV, # i.a, e.a LowV _ MidV, # a.e i _ [MidV - e], # i.o, i.ö u _ [MidV - o], # u.e y _ [MidV - ö], # y.e $V i _ e, # poiki.en $V u _ o, # $V y _ ö, # $V [MidV | LowV] _ [u | y] C [C|.#.]]; # oike.us # The general syllabification rule has exceptions. In particular, loan # words such as ate.isti 'atheist' must be partially syllabified in the # lexicon. define Syllabify [ C* V+ C* @-> ... "." || _ C V ]; echo Testing ... regex FinnWords .o. MarkNonDiphthongs .o. Syllabify; print lower-words