/* Find minimal e-free NFA for a?b?c?...z? for alphabet of size n. See http://www.research.att.com/~njas/sequences/A129403 Russ Cox April 2007 rsc@swtch.com gcc -O2 -Wall a129403.c a.out 6 >graph circo -Tps graph >graph.ps; gv graph.ps n total-edges 1 1 2 3 3 6 4 9 5 13 6 18 7 23 8 <= 28 9 <= 34 10 <= 41 */ #include <sys/types.h> #include <sys/signal.h> #include <stdio.h> #include <stdlib.h> #include <assert.h> #include <unistd.h> #include <string.h> #define uint _uint typedef unsigned int uint; typedef struct NFA NFA; enum { N = 16, Debug = 0, Heuristic = 0, /* set for heuristic construction */ }; int n; struct NFA { uint next[N+1][N]; }; uint next(NFA *nfa, uint state, int c) { int i; uint nstate; nstate = 0; for(i=0; i<n; i++) if(state&(1<<i)) nstate |= nfa->next[i][c]; return nstate; } int accepts(NFA *nfa, char *s) { uint state; state = 1; for(; *s; s++){ state = next(nfa, state, *s-'a'); if(state == 0) return 0; } return 1; } typedef struct Work Work; struct Work { uint state; int c; }; int oknfa(NFA *nfa, int maxletter) { int i, rq, wq; Work *e, *f; static Work q[(1<<N)+1]; rq = 0; wq = 1; q[0].state = 1; q[0].c = 0; while(rq < wq){ e = &q[rq++]; for(i=maxletter-1; i>=e->c; i--){ f = &q[wq++]; f->state = next(nfa, e->state, i); if(f->state == 0) return 0; f->c = i+1; } } assert(rq <= (1<<maxletter)+1); return 1; } /* make list of arrows */ typedef struct Arrow Arrow; struct Arrow { int from; int c; int to; int used; }; Arrow arrow[1000]; int narrow; int best; int nsol; int fixed; /* * Print NFA in digraph mode. */ void printnfa(void) { int i; Arrow *a; printf("{\n"); printf("\trankdir=LR;\n"); printf("\trank=same;\n"); printf("\tsize=\"%d,%d\";\n", 8, 8); printf("\tnode [shape = circle];\n"); for(i=0; i<n; i++) printf("\t%c%d -> %c%d [label = \"%c\"];\n", 'A'+nsol, i, 'A'+nsol, i+1, 'a'+i); for(i=1; i<n; i++){ if(i+1==n/2 && Heuristic) continue; printf("\t%c0 -> %c%d [label = \"%c\"];\n", 'A'+nsol, 'A'+nsol, i+1, 'a'+i); } if(Heuristic){ for(i=0; i<n/2; i++) printf("\t%c0 -> %c%d [label = \"%c\"];\n", 'A'+nsol, 'A'+nsol, n/2, 'a'+i); } for(i=0; i<narrow; i++){ a = arrow+i; if(a->used) printf("\t%c%d -> %c%d [label = \"%c\"];\n", 'A'+nsol, a->from, 'A'+nsol, a->to, 'a'+a->c); } printf("}\n"); } /* * Can this NFA be completed successfully? */ int anyhope(NFA *nfa, int nextarrow) { int i, r; Arrow *a; a = arrow+nextarrow; for(i=nextarrow; i<narrow; i++, a++) nfa->next[a->from][a->c] |= 1<<a->to; r = oknfa(nfa, n); for(i=nextarrow; i<narrow; i++){ a--; nfa->next[a->from][a->c] ^= 1<<a->to; } return r; } void printarrows(void) { int i; uint abits; abits = 0; for(i=0; i<narrow; i++){ abits = (abits<<1) | arrow[i].used; if(i%32 == 31){ fprintf(stderr, "%.8x", abits); abits = 0; } } if(i%32) fprintf(stderr, "%.*x", ((i%32)+3)/4, abits); fprintf(stderr, "\n"); } /* * Try building on top of NFA. The next arrow * to choose whether to use is arrow[nextarrow], * and so far usedarrows have been used. */ void trynfa(NFA *nfa, int nextarrow, int usedarrows, int abits) { Arrow *a; /* * Since arrows are sorted by label, if we are * crossing into the arrows with a given label, * the NFA had better accept all the strings * involving letters before that label. */ a = arrow+nextarrow; if(a > arrow && nextarrow < narrow && a[-1].c != a[0].c && !oknfa(nfa, a[0].c)) return; /* * See if we made a good NFA - if so, no more arrows! */ if(nextarrow == narrow && oknfa(nfa, n)){ if(usedarrows <= best){ if(usedarrows < best || nsol == 0){ nsol = 0; fseek(stdout, 0, 0); ftruncate(1, 0); printf("digraph {\n"); } fprintf(stderr, "best - %d ", usedarrows+fixed); printarrows(); best = usedarrows; printnfa(); nsol++; } return; } if(nextarrow == narrow) return; if(nextarrow%8 == 0){ if(!anyhope(nfa, nextarrow)) return; } /* Try not using the arrow. */ trynfa(nfa, nextarrow+1, usedarrows, abits<<1); /* Then try using the arrow (maybe it will be better!). */ if(usedarrows < best){ nfa->next[a->from][a->c] |= 1<<a->to; a->used = 1; trynfa(nfa, nextarrow+1, usedarrows+1, (abits<<1)|1); nfa->next[a->from][a->c] ^= 1<<a->to; a->used = 0; } } void alrm(int s) { printarrows(); alarm(30); } void quit(int s) { printarrows(); } int main(int argc, char **argv) { int label, start, end; Arrow *a; NFA nfa; signal(SIGALRM, alrm); signal(SIGQUIT, quit); alarm(30); if(argc < 2) n = 4; else n = atoi(argv[1]); if(argc < 3) best = (n*(n-1))/2+1+n; else best = atoi(argv[2]); /* Add required arrows. */ memset(&nfa, 0, sizeof nfa); for(label=0; label<n; label++){ nfa.next[label][label] |= 1<<(label+1); nfa.next[0][label] |= 1<<(label+1); } fixed = n + n-1; if(Heuristic){ for(label=0; label<n/2; label++){ if(!(nfa.next[0][label] & (1<<(n/2)))){ nfa.next[0][label] |= 1<<(n/2); fixed++; } } } best -= fixed; narrow = 0; for(label=0; label<n; label++){ for(start=0; start<=label; start++){ for(end=label+1; end<=n; end++){ // If arrow is required, don't bother listing it again. if(nfa.next[start][label] & (1<<end)) continue; // If arrow cannot help, don't bother listing it. if(label == n-2 && end == n) continue; if(Heuristic){ if(start < n/2 && end > n/2) continue; } a = &arrow[narrow++]; a->from = start; a->to = end; a->c = label; } } } fprintf(stderr, "%d arrows [best %d+%d]\n", narrow, fixed, best); trynfa(&nfa, 0, 0, 0); fprintf(stderr, "best %d\n", best+fixed); printf("}\n"); return 0; }