#!/usr/bin/foma -q -f # Construct a DFA for the digit strings of A339803 and try some properties. # Kevin Ryde, January 2021. # Usage: foma -q -f a339803-dfa.foma # # Created for Foma version 0.9.18. "-q" quiet can be omitted to see # detailed progress. Works with HFST too, # # hfst-xfst -q -F a339803-dfa.foma # # The DFA is 1606588 states (omitting a non-accepting sink state) and is # "cyclic" meaning it accepts an infinite set of strings. Foma may take a # minute or so and peak at about 350 mbytes memory (in 32-bits). # # Roughly speaking, states are distinguishable combinations of digit # restrictions which occur. Restrictions are things like 9 not permitted # until after three more digits, or an 8 must occur after exactly four more # digits (so that some preceding 8 isn't isolated). # Note digit 0 quoted "0" since input 0 is the empty regex. define digit "0"|1|2|3|4|5|6|7|8|9; # tooclose9 is strings with somewhere two 9s too close together, # which means < 9 symbols in between. Likewise each other digit. # There's no tooclose0 for digit 0, since consecutive 00 is allowed. # define tooclose1 ?* 1 ?^<1 1 ?*; define tooclose2 ?* 2 ?^<2 2 ?*; define tooclose3 ?* 3 ?^<3 3 ?*; define tooclose4 ?* 4 ?^<4 4 ?*; define tooclose5 ?* 5 ?^<5 5 ?*; define tooclose6 ?* 6 ?^<6 6 ?*; define tooclose7 ?* 7 ?^<7 7 ?*; define tooclose8 ?* 8 ?^<8 8 ?*; define tooclose9 ?* 9 ?^<9 9 ?*; # isolated9 is strings with an isolated 9 somewhere, which means > 9 # symbols not-9 both before and after. The start of string and end of # string are reckoned "infinite" not-9s. # define isolated0 [\"0"* | ?* \"0"^>0] "0" [\"0"^>0 ?* | \"0"*]; define isolated1 [\1* | ?* \1^>1 ] 1 [\1^>1 ?* | \1* ]; define isolated2 [\2* | ?* \2^>2 ] 2 [\2^>2 ?* | \2* ]; define isolated3 [\3* | ?* \3^>3 ] 3 [\3^>3 ?* | \3* ]; define isolated4 [\4* | ?* \4^>4 ] 4 [\4^>4 ?* | \4* ]; define isolated5 [\5* | ?* \5^>5 ] 5 [\5^>5 ?* | \5* ]; define isolated6 [\6* | ?* \6^>6 ] 6 [\6^>6 ?* | \6* ]; define isolated7 [\7* | ?* \7^>7 ] 7 [\7^>7 ?* | \7* ]; define isolated8 [\8* | ?* \8^>8 ] 8 [\8^>8 ?* | \8* ]; define isolated9 [\9* | ?* \9^>9 ] 9 [\9^>9 ?* | \9* ]; # The DFA constructed is for digit strings as they appear in A339803. # This is human-style numbers with zero written as a single 0 digit and # otherwise a string of digits starts not-0. # define not0 digit - "0"; define number "0" | not0 digit*; # Each of the above toocloseX and isolatedX are not permitted. # Build by subtracting them (intersect their complements) from all the # number strings. # # Foma seems quicker (and less memory) minimizing each individual intersect # rather than a single big expression with all subtracts. The echo messages # show progress. Run without "-q" to see the state machine size increasing. # echo start from number strings, and then regex number; echo must not tooclose1 regex ~tooclose1; intersect net echo must not tooclose2 regex ~tooclose2; intersect net echo must not tooclose3 regex ~tooclose3; intersect net echo must not tooclose4 regex ~tooclose4; intersect net echo must not tooclose5 regex ~tooclose5; intersect net echo must not tooclose6 regex ~tooclose6; intersect net echo must not tooclose7 regex ~tooclose7; intersect net echo must not tooclose8 regex ~tooclose8; intersect net echo must not tooclose9 regex ~tooclose9; intersect net echo must not isolated0 regex ~isolated0; intersect net echo must not isolated1 regex ~isolated1; intersect net echo must not isolated2 regex ~isolated2; intersect net echo must not isolated3 regex ~isolated3; intersect net echo must not isolated4 regex ~isolated4; intersect net echo must not isolated5 regex ~isolated5; intersect net echo must not isolated6 regex ~isolated6; intersect net echo must not isolated7 regex ~isolated7; intersect net echo must not isolated8 regex ~isolated8; intersect net echo must not isolated9 regex ~isolated9; intersect net # # Uncomment this to write the DFA to a file. # # AT&T format is text (about 80mbytes) and accepted by various tools. # # "save stack / load stack" is Foma specific but faster. # # # write att a339803-dfa.att # save stack a339803-dfa.stack echo echo A339803 DFA print size define A339803 # ----------------------------------------------------------------------------- # Example Strings echo echo Some example strings accepted: regex A339803; apply down 2002 apply down 231213 apply down 3000300 echo or 002002 not accepted (has leading 0): apply down 002002 echo or 12345 not accepted (wildly wrong): apply down 12345 clear stack echo all words of length 0..6 (there are 9 of them): regex A339803 & ?^{0,6}; print words clear stack # # The b-file b339803.txt by David A. Corneth is all strings to 12 digits. # # The following commented-out lines can make those strings in a file # # length12.txt. There are 4080 of them. They won't be in numerical order. # # # regex A339803 & ?^{0,12}; # print words >length12.txt # clear stack # ----------------------------------------------------------------------------- # Complements # # The various complements intersected above are permitted digit patterns. # These patterns can be formed directly if desired. # Digit 0s are in runs of lengths >1 (ie. 2 or more each), so any # combinations of not-0 and run of 0s of length >1. # This is the complement of isolated0. # echo echo Complement of isolated0 is run lengths >1: regex ~isolated0; regex [ \"0" | "0"^>1 ]*; test equivalent clear stack # Digit 9s are in runs of two or more 9s with a gap of exactly 9 non-9s # between each. Such runs (zero or more) are separated by >9 non-9s. # This is the complement of tooclose9 | isolated9. echo echo Complement of tooclose9 and isolated9 is allowed9: define run9 9 [ \9^9 9 ]^>0; define allowed9 \9* (run9 [ \9^>9 run9 ]* \9*); regex ~tooclose9 & ~isolated9; regex allowed9; test equivalent clear stack # ----------------------------------------------------------------------------- # Always Digit 0..3 # # Every string contains somewhere one of the digits 0..3, ie. there are no # strings at all using only digits 4..9. # # This follows mechanically from the state machine. Or no strings of 5..9 is # clear simply since an initial say 5...5 has a gap of 5 between which the # four further 6..9 cannot fill. Digits 4..9 fail after longer initial # considerations. 4..9 certainly couldn't make strings of unbounded length # since digit d can occur at most 1 in every d+1 digit positions, so maximum # density of filled positions for 4..9 would be only # # 1/5 + 1/6 + 1/7 + 1/8 + 1/9 + 1/10 = 2131/2520 < 1 echo echo no strings using just 4..9, intersect is null: set minimal OFF regex A339803 & [ 4|5|6|7|8|9]*; test null clear stack # The following are digits 4..9 plus any one digit from among 0..3. # Each suffices to make infinite sets of strings (the resulting intersection # is cyclic). echo But any one of 0..3 together with 4..9 suffices, intersect not null: regex A339803 & ["0" |4|5|6|7|8|9]*; test non-null clear stack regex A339803 & [ 1 |4|5|6|7|8|9]*; test non-null clear stack regex A339803 & [ 2 |4|5|6|7|8|9]*; test non-null clear stack regex A339803 & [ 3 |4|5|6|7|8|9]*; test non-null clear stack # ----------------------------------------------------------------------------- # Trailing Zeros # # M. F. Hasler notes in comments in A339803 that any number >=2 of trailing # 0 digits can be added to a string. # # Trailing 0s are optional so long as when present they are length >=2. # The following A339803noend0 is strings not ending 0. Appending optional # 0s of length >=2 gives all strings again. echo echo Trailing 0s of length >=2 always allowed: define A339803noend0 A339803 - [?* "0"]; regex A339803noend0 ("0"^>1); regex A339803; test equivalent clear stack # ----------------------------------------------------------------------------- # Reversible # # M. F. Hasler also notes in comments in A339803 that strings are reversible, # with trailing 0s ignored. # # All the rules for closeness, isolated terms, and runs, are unchanged when # read backwards or forwards. echo echo isolated9 equivalent to own reversal: regex isolated9.r; regex isolated9; test equivalent clear stack echo tooclose9 equivalent to own reversal: regex tooclose9.r; regex tooclose9; test equivalent clear stack # Foma's reversal applied to the full DFA is rather memory-hungry so the # following is left commented out. # # echo A339803noend0 equivalent to own reversal: # regex A339803noend0.r; # regex A339803noend0; # test equivalent # clear stack # ----------------------------------------------------------------------------- echo end