#
DateParser.script
# Copyright (C) 2004 Lauri Karttunen
#
# This program is free software; you can redistribute it and/or
modify
# it under the terms of GNU
General Public License
as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later
version.
# This program is distributed in the hope that it will be
useful,
# but WITHOUT ANY WARRANTY; without even the implied
warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the
# GNU General Public License for more details.
# date.script
# define date expressions from "Monday, January 1, 1" to "Sunday,
# December 31, 9999" and build a shallow parser to mark them
# This script constructs a parser that marks up expressions such
# "Friday", "October 15", "October 15, 2004", and
# "Friday, October 15, 2005" by wrapping then inside HTML
# tags "<DATE>Friday</DATE>", "<DATE>February 29,
2004</DATE>", etc.
# See the end of the script usage examples.
define OneToNine [1|2|3|4|5|6|7|8|9];
define ZeroToNine ["0"|OneToNine];
define Even [{0} | 2 | 4 | 6 | 8] ;
define Odd [1 | 3 | 5 | 7 | 9] ;
define N [Even | Odd];
define Day [{Monday} | {Tuesday} | {Wednesday}
| {Thursday} |
{Friday} | {Saturday} | {Sunday}] ;
define Month29 {February};
define Month30 [{April} | {June} | {September} | {December}];
define Month31 [{January} | {March} | {May} | {July} | {August} |
{October} | {December}] ;
define Month [Month29 | Month30 | Month31];
# Numbers from 1 to 31
define Date [OneToNine | [1 | 2] ZeroToNine | 3 [%0 |
1]] ;
# Numbers from 1 to 9999
define Year [OneToNine ZeroToNine^<4];
# Day or [Month and Date] with optional Day and Year
define AllDates [Day | (Day {, }) Month { } Date ({, } Year)];
# Constraints on dates 30 and 31
define MaxDays30 ~$[Month29 { 30}];
define MaxDays31 ~$[[Month29 | Month30] { 31}];
# Combining constraints on dates 30 and 31
define MaxDays [MaxDays30 & MaxDays31];
# Divisible by 4
# Of single digit numbers, 4 and 8 are divisible by 4.
# In larger numbers divisible with 4, if the penultimate
# is even, the last number is 0, 4, or 8. If the penultimate
# is odd, the last number is 2 or 6.
define Div4 [4 | 8 | N* [Even [%0 | 4 | 8] | Odd [2 | 6]]];
# Leap years are divisible by 4 but we subtract centuries
# that are not divisible by 400. Note the double
# subtraction. [[N+ - Div4] {00}] includes 1900 but not 2000.
define LeapYear [Div4 - [[N+ - Div4] {00}]] ;
# Note the [.#. | \N] at the end of the right context.
# 2405 is not a leap year although the year 240 was (in principle).
define LeapDates {February 29, } => _ LeapYear [.#. | \N];
define ValidDates [AllDates & MaxDays & LeapDates];
define DateParser [ValidDates @-> "<DATE>" ...
"</DATE>"] ;
echo
echo Testing DateParser
push DateParser
down Today is Wednesday, October 5, 2004.
down Yesterday was Tuesday.
down February 29, 2000 was a leap day.
down but February 29, 1900 was not a leap day.
down Next leap day is on February 29, 2008