#
-*- coding: utf-8 -*-
# finnish-prosody.script
# Copyright (C) 2004 Lauri Karttunen
#
# This program is free software; you can redistribute it and/or
modify
# it under the terms of GNU
General Public License
as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later
version.
# This program is distributed in the hope that it will be
useful,
# but WITHOUT ANY WARRANTY; without even the implied
warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the
# GNU General Public License for more details.
# This script maps Finnish words into a prosodic representation
# that splits the words into syllables, adds primary and secondary
# stress marks, and organizes the syllables into feet. For example,
# the input "ilmoittautumisesta" 'registering' (Sg. Elative) becomes
#
#
(íl.moit)(tàu.tu.mi)(sès.ta)
#
# where the acute accent on the first vowel indicates primary stress,
# the grave accents mark secondary stress, and feet are enclosed in
# parentheses.
# Note that this script is encoded in utf-8. To run it,
# you should start xfst in utf-8 mode:
#
# xfst -utf8 -l finnish-prosody.xfst
# The version of xfst that comes with the Book is not utf8-enabled
# To check about the availability of a utf8-enabled version of xfst,
# please write to karttunen@parc.com.
# The descriptive generalizations come from Paul Kiparsky's paper
# "Finnish Noun Inflection" in Generative Approaches to Finnic and
# Saami Linguistics, Diane Nelson and Satu Manninen (eds.), pp.109-161,
# CSLI Publications, 2003. Kiparsky writes (p. 111): "Speaking for the
# moment in derivational terms, Finnish stress is assigned by laying
down
# binary feet from left to right. Final syllables are not stressed if
# they are light, and only optionally if they are heavy. An important
# phenomenon is the LH` effect: when the left-to-right scansion
# encounters a Light-Heavy sequence, the light syllable is skipped
# with the result that a ternary foot is formed. At the edge of a
# word the LH` effect is superseded by the inviolable requirement that
# a word must have initial stress."
# For an OT account of the same generalizations, see the Finnish OT Prosody
# script.
################################## DATA
##################################
define FinnWords {kalastelet} |
{kalasteleminen} | {ilmoittautuminen} |
{järjestelemättömyydestänsä} | {kalastelemme} |
{ilmoittautumisesta} | {järjestelmällisyydelläni} |
{järjestelmällistämätöntä} | {voimisteluttelemasta} |
{opiskelija} | {opettamassa} | {kalastelet} |
{strukturalismi} | {onnittelemanikin} | {mäki} |
{perijä} | {repeämä} | {ergonomia} | {puhelimellani} |
{matematiikka} | {puhelimistani} | {rakastajattariansa} |
{kuningas} | {kainostelijat} | {ravintolat} |
{merkonomin} ;
######################### BASIC DEFINITIONS
#############################
define HighV [u | y |
i];
# High vowel
define MidV [e | o |
ö];
# Mid vowel
define LowV [a | ä]
;
# Low vowel
define USV [HighV | MidV |
LowV];
# Unstressed Vowel
define C [b | c | d | f | g | h | j | k | l | m |
n | p | q | r |
s | t | v | w | x | z]; # Consonant
define MSV [á | é | í | ó | ú | ý | "ä́" | "ö́"];
define SSV [à | è | ì | ò | ù | "ỳ" | "ä̀" | "ö̀"];
define SV [MSV |
SSV];
# Stressed vowel
define V [USV | SV]
;
# Vowel
define P [V |
C];
# Phone
define B [[\P+] |
.#.];
# Boundary
define E .#. |
".";
# Edge
define SB [~$"." "."
~$"."];
# At most one syllable boundary
define Light [C*
V];
# Light syllable
define Heavy [Light
P+];
# Heavy syllable
define S [Heavy |
Light];
# Syllable
define SS [S &
$SV];
# Stressed syllable
define US [S &
~$SV];
# Unstressed syllable
define MSS [S & $MSV]
;
# Syllable with main stress
define BF [S "."
S];
# Binary foot
######################### RULES FOR PROSODY
#############################
define MarkNonDiphthongs [ [. .] -> "." || [HighV | MidV] _ LowV, #
i.a, e.a
i _ [MidV - e], # i.o, i.ö
u _ [MidV - o], # u.e
y _ [MidV - ö] ]; # y.e
# The general syllabification rule has exceptions. In particular, loan
# words such as ate.isti 'atheist' must be partially syllabified in the
# lexicon.
define Syllabify C* V+ C* @-> ... "." || _ C V ;
define TernaryFeet BF "." Light @-> "(" ... ")"
// [{).} | .#.] [BF "."]* _
["." Heavy "." S ] | .#. ;
# Scan all the unfooted material into binary feet.
define BinaryFeet BF @-> "(" ... ")" || .#.|"." _ .#.|".";
# Assign the primary stress to the first vowel of the first syllable.
define MainStress a -> á, e -> é, i -> í,
o -> ó,
u -> ú, y -> ý, ä -> "ä́", ö
-> "ö́" || .#. "(" C* _ ;
# Assign secondary stress to all initial vowels of non-initial
syllables.
define SecondaryStress a -> à, e -> è, i ->
ì, o -> ò,
u -> ù, y -> "ỳ", ä -> "ä̀", ö ->
"ö̀" || "(" C* _ ;
# Assign an optional secondary stress to an unfooted final syllable
# if it is heavy.
define OptFinalStress a (->) à, e (->) è, i (->) ì,
o (->) ò, u (->) ù, y (->) "ỳ",
ä (->) "ä̀", ö (->) "ö̀" || "." C* _ P .#.;
define FinnProsody [ MarkNonDiphthongs
.o.
Syllabify
.o.
TernaryFeet
.o.
BinaryFeet
.o.
MainStress
.o.
SecondaryStress
.o.
OptFinalStress
];
echo ### Computing [FinnWords .o. FinnProsody]
regex FinnWords .o. FinnProsody;
print lower-words
################################ END
######################################
# Here is the output produced by the script:
# (ón.nit).(tè.le).(mà.ni).kìn
# (ón.nit).(tè.le).(mà.ni).kin
# (ó.pet.ta).(màs.sa)
# (ó.pis).(kè.li.ja)
# (ér.go).(nò.mi.a)
# (íl.moit).(tàu.tu).(mì.nen)
# (íl.moit).(tàu.tu.mi).(sès.ta)
# (vói.mis.te).(lùt.te.le).(màs.ta)
# (strúk.tu.ra).(lìs.mi)
# (rá.kas.ta).(jàt.ta.ri).(àn.sa)
# (rá.vin).(tò.lat)
# (ré.pe).(ä̀.mä)
# (pé.ri.jä)
# (pú.he.li).(mèl.la.ni)
# (pú.he.li).(mìs.ta.ni)
# (mä́.ki)
# (má.te.ma).(tìik.ka)
# (mér.ko).(nò.min)
# (kái.nos).(tè.li).jàt
# (kái.nos).(tè.li).jat
# (ká.las).(tè.let)
# (ká.las).(tè.le).(mì.nen)
# (ká.las.te).(lèm.me)
# (kú.nin).gàs
# (kú.nin).gas
# (jä́r.jes).(tèl.mäl).(lìs.tä.mä).(tö̀n.tä)
# (jä́r.jes).(tèl.mäl.li).(sỳy.del).(lä̀.ni)
# (jä́r.jes).(tè.le).(mä̀t.tö).(mỳy.des).(tä̀n.sä)