#!/usr/bin/perl -w

# Generate terms of A339611.
# Kevin Ryde, December 2020.
#
# Usage: perl a339611.pl
#        perl a339611.pl 100     # target number of terms
#        perl a339611.pl -f      # include "full" markers
#
# A number argument like 100 is the target number of terms to find.
# Actual output extends beyond this to the next point of certainty.
# Output is b-file style to standard output
#
#     1 1
#     2 2
#     3 13
#     4 20
#     ...
#
# The strategy is a recursive backtracking search.  Terms are printed when
# they won't be changed by backtracking.  This is when the sequence
# conditions are fully satisfied if the sequence stopped at this point.
# If backtracking wanted to go back earlier (and increase a term there) then
# a finite sequence to this point would be lexicographically smaller.
#
# The sequence is infinite insofar as it's always possible to extend a full
# sequence by appending more digits.  But the question is how long the
# lex-smallest no-duplicates rules might conspire so as not to reach a new
# full point (or maybe other point of certainty by other reasoning).
# In data calculated so far, full points are not very far apart so the
# present code suffices.
#
# Option "-f" includes "# full here" markers in the output to show the
# fully-satisfied places.  These markers won't be wanted in an actual
# b-file, but indicate the sort of progress made.
#
# Option "-v" prints a great deal of impenetrable debug output.
#
#
# Efficiency
# ----------
#
# Digit spacing restrictions are represented by a "state" saying what must
# be avoided or required in subsequent digit positions.  An early check is
# made to see the "required" won't require a particular subsequent position
# be two different digits, since that would be impossible.  This check
# prunes the search tree by excluding immediately some impossible branches.
#
# Backtracking is by recursive calls to a search() function.  When a "full"
# point of certainty is reached, these calls return so as to unwind the
# stack and free memory.  Within the search, prospective new terms (numbers)
# at each position are held in a queue of strings (ordered numerically).
# There's no attempt to minimize memory in the queue (such as breadth-first
# walk of permitted subsequent digits).  Peak memory usage seems reasonable
# with simple queues.
#
# The ASSERTS compile-time option adds consistency checks at various places
# in the code, including some regexps to check the digit restrictions in the
# values, both as they're made and at claimed points of fully-satisfied.
# These checks are slow and are disabled by default.


use 5.006;
use strict;
use warnings;
use List::Util 'max';
no warnings 'recursion';   # potential deep descents by search()
$|=1;

# Change this to 1 to include some time consuming self-checks along the way.
# Leave it 0 for full speed.
use constant ASSERTS => 0;

my $verbose = 0;
my $found_limit = 100;
my $show_full = 0;
foreach my $arg (@ARGV) {
  if ($arg =~ /^\d+$/) {
    $found_limit = $arg;
  } elsif ($arg eq '-v') {
    $verbose++;
  } elsif ($arg eq '-f') {
    $show_full = 1;
  } else {
    die "Unrecognised argument: $arg";
  }
}

my @regexps_tooclose;
my @regexps_isolated_part;
my @regexps_isolated_full;
foreach my $d (0 .. 9) {

  # Cannot have two digits d closer than d apart.
  if ($d > 0) {
    my $dsub1 = $d - 1;
    push @regexps_tooclose, qr/$d [^$d]{0,$dsub1} $d/x;
  }

  # For full complete string:
  # Cannot have an isolated d, ie. gap >d both before and after.
  # Start of string is an infinite gap before.
  # End of string is an infinite gap after.
  my $dplus1 = $d + 1;
  push @regexps_isolated_full, qr/(^[^$d]*  | [^$d]{$dplus1})
                                  $d
                                  ( [^$d]*$ | [^$d]{$dplus1})/x;

  # For initial prefix (not yet full):
  # Relax so end of string is not an infinite gap (because an appropriate
  # further d might come later).
  push @regexps_isolated_part, qr/(^[^$d]* | [^$d]{$dplus1})
                                  $d
                                  (           [^$d]{$dplus1})/x;
}

# string_is_full() returns true if $str satisfies the digit restrictions:
# Nowhere digits too close together, nowhere an isolated digit d.
sub string_is_full {
  my ($str) = @_;
  foreach my $re (@regexps_tooclose, @regexps_isolated_full) {
    if ($str =~ $re) { return 0; }
  }
  return 1;
}

# string_is_prefix() returns true if $str satisfies the digit restrictions,
# as an initial prefix.
# (But no check whether two isolated end digits would both require a partner
# at the same later position, which would be impossible.)
sub string_is_prefix {
  my ($str) = @_;
  foreach my $re (@regexps_tooclose, @regexps_isolated_part) {
    if ($str =~ $re) {
      if ($verbose >= 2) { print "  str $str bad, match $& by $re\n"; }
      return 0;
    }
  }
  if ($verbose >= 2) { print "  str $str ok\n"; }
  return 1;
}

# simple string checks
foreach my $elem (['121', 1,0],
                  ['11', 0,0],
                  ['22', 0,0],
                  ['33', 0,0],
                  ['99', 0,0],
                  ['2342', 1,0],
                  ['23452', 0,0],
                  ['12100', 0,0],
                  ['121011', 0,0],
                  ['200200', 1,1],
                  ['200200200200', 1,1],
                  ['20020000000', 1,1],
                  ['12132003', 1,1],
                  ['131003', 1,1],
                  ['1410014', 1,0],
                  ['14100141', 1,1],
                  ['15120025', 1,1],
                  ['12132003', 1,1],
                  ['30023121310031213200312132003', 1,1],
                 ) {
  my ($str,$want_prefix,$want_full) = @$elem;
  my $got_prefix = string_is_prefix($str);
  my $got_full   = string_is_full($str);
  unless ($got_prefix == $want_prefix) {
    die "string_is_prefix() $str got $got_prefix want $want_prefix";
  }
  unless ($got_full == $want_full) {
    die "string_is_full() $str got $got_full want $want_full";
  }
}

# $state is an arrayref [ g0, g1, ..., g9,       # elems 0 to 9
#                         r0, r1, ..., r9 ].     # elems 10 to 19
# gX is what gap is required to the next occurrence of digit X.
# rX is true if X is required after exactly gX gap.
#
# gX = 0 when the previous occurrence of X was exactly distance X before.
# If rX true then X must appear next.
# If rX false then X can optionally appear next.
#
# gX = -1 when the previous occurrence of X is >X distance before.
# This means X is free to appear at any time.  (Such an appearance will
# then get rX true because it will require later X at distance X.)
#
# state_transition() takes arrayref $state and returns a new arrayref which
# is the state after the given $digit is appended.  If $digit is not
# permitted next (due to gaps etc), then return undef.
#
# A gap gX with rX true constrains the gX'th next term to be X.  If another
# digit Y has rY true and gX=gY then this gX=gY'th future term would have to
# be X and Y at the same time, which is impossible.  state_transition()
# notices when $digit would cause this, and returns undef for not permitted.
#
sub state_transition {
  my ($state, $digit) = @_;
  my @new_state;
  my @required;
  foreach my $d (0 .. 9) {
    if ($d == $digit) {
      if ($state->[$d] > 0) {
        return undef;   # bad, too soon for $digit
      }
      $new_state[$d] = $d;
      if ($state->[$d] < 0) {
        # no preceding $d to pair with, require one later
        $new_state[$d+10] = 1;
      }
    } else {
      if ($state->[$d] == 0 && $state->[$d+10]) {
        return undef;   # bad, required $d here, not $digit
      }
      $new_state[$d]    = max(-1, $state->[$d] - 1);
      $new_state[$d+10] = $state->[$d+10];
    }

    if ($new_state[$d+10] && $required[$new_state[$d]]++) {
      # bad, two different digits required after gap $new_state[$d]
      return undef;
    }
  }
  return \@new_state;
}

# Return true if $state fully satisfies the digit restrictions.
# This means all "rX" are false, no digit required to appear later.
sub state_is_full {
  my ($state) = @_;
  foreach my $i (10 .. 19) {
    if ($state->[$i]) { return 0; }
  }
  return 1;
}

sub state_validate {
  my ($state) = @_;
  my @required;
  foreach my $d (0 .. 9) {
    ($state->[$d] >= -1 && $state->[$d] <= $d)
      or die "oops, state gap out of range";
    if ($state->[$d+10]) {
      if ($required[$state->[$d]]++) {
        die "oops, state requires two different at gap $state->[$d]:  ",
          state_to_string($state);
      }
    }
  }
  return 1;
}

sub state_to_string {
  my ($state) = @_;
  return (defined $state
          ? join(',', map {$state->[$_] . ($state->[$_+10] ? '*' : '')} 0 .. 9)
          : "none");
}

# @values is terms of the sequence, so far.
# %values_used is a hash with keys which are all of @values.
# $values_show_next is the next index in @values to print out when ready.
#
my @values;
my %values_used;
my $values_show_next = 0;

sub values_push {
  my ($t) = @_;
  if (ASSERTS) { if ($values_used{$t}) { die "oops, duplicate"; } }
  push @values, $t;
  $values_used{$t} = 1;

  if (ASSERTS) {
    join(',',sort keys %values_used) eq join(',',sort @values)
      or die "oops, inconsistent values and values_used";
    string_is_prefix(join('',@values))
      or die "oops, values concat fails string_is_prefix()";
  }
}
sub values_pop {
  my ($t) = @_;
  if (ASSERTS) { $values[-1] eq $t or die "oops, wrong value popped";
                 $values_used{$values[-1]} or die "oops, value not in hash"; }
  delete $values_used{pop @values};
}

# search() looks at @values (and %values_used) and tries to add more terms.
# $state is the digit state after all of the current @values.
#
# If unable to extend, then return string 'backtrack' and @values is
# unchanged.
#
# If able to extend with more terms certain to be correct, then @values has
# those values pushed and the return is string 'unwind'.  Global variable
# $unwind_state is the digits state after all of @values.
#
# In the code, @queue is prospective new terms to push onto @values.
# These satisfy the digit conditions insofar as state_transition() has not
# returned undef.  @queue terms might duplicate some of @values.
# Duplicates are not candidates to go into @values, but they do get a
# further digit appended to make more prospective terms in @queue.
#
my $unwind_state;
sub search {
  my ($state) = @_;
  my @queue;
  my @queue_state;

  # $t = 0 to 9 prospective new terms, state permitting
  foreach my $t (0 .. 9) {
    if (my $new_state = state_transition($state,$t)) {
      push @queue, $t;
      push @queue_state, $new_state;
    }
  }

  for (;;) {
    unless (@queue) {
      if ($verbose) { print "  backtrack\n"; }
      return 'backtrack';
    }

    my $t = shift @queue;
    $state = shift @queue_state;
    if ($verbose) {
      my $p = max(0, $#values - 4);
      my $prev = join(',', @values[$p .. $#values]);
      if ($p) { $prev = "...$prev"; }
      if (@values) { $prev .= ', '; }
      print "consider at len=",scalar(@values),
        " terms $prev$t  state ",state_to_string($state),"\n";
    }
    if (ASSERTS) { state_validate($state); }

    if (state_is_full($state) && ! $values_used{$t}) {
      values_push($t);
      if ($verbose) {
        print "  found full, length ",scalar(@values),"\n";
      }
      # b-file style output
      for ( ; $values_show_next <= $#values; $values_show_next++) {
        my $n = $values_show_next + 1;
        print "$n $values[$values_show_next]\n";
      }
      if ($show_full) {
        print "# full here\n";
      }
      if (scalar(@values) >= $found_limit) {
        if ($verbose) { print "stop for limit ($found_limit)\n"; }
        exit 0;
      }
      if (ASSERTS) {
        my $str = join('',@values);
        string_is_prefix($str) or die "oops, regexps say invalid string";
        string_is_full($str) or die "oops, regexps say not full";
      }

      $unwind_state = $state;
      return 'unwind';
    }

    if ($values_used{$t}) {
      if ($verbose) { print "  skip duplicate $t\n"; }
    } else {
      values_push($t);
      if ($verbose) { print "  descend\n"; }

      if (search($state) eq 'unwind') { return 'unwind'; }

      values_pop($t);
      if ($verbose) { print "back to len=",scalar(@values),"\n"; }
    }

    # new digit appended to $t
    unless ($t eq '0') {
      foreach my $d (0 .. 9) {
        if (my $new_state = state_transition($state,$d)) {
          push @queue, $t.$d;
          push @queue_state, $new_state;
        }
      }
    }
  }
}

# Initial state for the empty string.
$unwind_state = [(-1) x 10];
while (search($unwind_state) eq 'unwind') { }

exit 0;