\\ Kevin Ryde, October 2023

\\ This is some PARI/GP code to calculate individual terms of
\\ A366408 and A365624, which are the first start location and the
\\ length of a maximum length block of terms in the Thue-Morse
\\ sequence (A010060) in which every length n subword is distinct.
\\
\\ The strategy is a brute force search through subwords in a large
\\ enough initial portion of the Thue-Morse sequence, tracking the
\\ no-duplicates extent.  This is suitable up to n=2000 or so but
\\ becomes ever slower as n increases.
\\
\\ Results suggest an easy pattern in both sequences.
\\ If proven, that would make searches here completely unnecessary.
\\
\\ Requires PARI/GP 2.9 or higher for Map().


default(strictargs,1);
default(recover,0);

ThueMorse(n) = bitand(hammingweight(n), 1);

\\ Number of different subwords of length n which occur in the
\\ Thue-Morse sequence.
A005942_ThueMorse_subword_complexity(n) =
{
  if(n<=2,1<<n,
     n--;
     my(i=logint(n,2));
     if(bittest(n,i-1),  \\ second-most significant bit
        n<<1 + 2<<i,
        n<<2 - 1<<i));
}

\\ Length of the initial part of the Thue-Morse sequence needed
\\ to contain all subwords of length n which occur.
A334227_ThueMorse_all_subwords_initial_length(n) =
{
  if(n<=2, [0,2,7][n+1],
     n-1 + 6<<logint(n-2,2));
}

\\ Return a 2-element vector [A366408(n), A365624(n)].
A366408_A365624_start_and_length(n) =
{
  my(L = A005942_ThueMorse_subword_complexity(n) + n-1,
     sub = fromdigits(apply(ThueMorse, [0..n-1]), 2),
     seen = Map(Mat([sub,n-1])), \\ sub ends at n-1
     dup = n-2,  \\ no duplicates among endings at dup+1 onward
     mask = bitneg(0,n),
     max_distinct = 1,  \\ initial "sub" only
     max_at_i = n-1,
     pos);

  for(i=n, A334227_ThueMorse_all_subwords_initial_length(L)-1,
     sub = bitor(bitand(sub<<1,mask), ThueMorse(i));
     if(mapisdefined(seen,sub,&pos) && pos>dup,
          dup = pos,
        i-dup > max_distinct,
          max_distinct = i-dup; max_at_i = i);  \\ hew high
     mapput(seen,sub,i));

  my(len = max_distinct + n-1);
  [ max_at_i - len + 1, len ];
}

A366408_start(n) = A366408_A365624_start_and_length(n)[1];
A365624_length(n) = A366408_A365624_start_and_length(n)[2];

{
  print1("A366408 start = ");
  for(n=1,18, print1(A366408_start(n),","));
  print(" ...");
  print1("A365624 length = ");
  for(n=1,18, print1(A365624_length(n),","));
  print(" ...");
}


\\-----------------------------------------------------------------------------

\\ Algorithm
\\ ---------
\\
\\ An upper bound for length A365624(n) follows from the Thue-Morse
\\ subword complexity A005942, which is how many different subwords
\\ of length n occur.  At most all of those could be the distinct
\\ subwords in a block, so
\\
\\     A365624(n) <= L
\\     where L  = A005942_ThueMorse_subword_complexity(n) + n-1
\\
\\ The aim is to examine the length n subwords in enough of the
\\ Thue-Morse sequence to see all blocks of length <= L, knowing
\\ the actual maximum length distinct subwords will be somewhere
\\ in one of them.
\\
\\ A334227_ThueMorse_all_subwords_initial_length(L) is the
\\ length of the initial part of the Thue-Morse sequence needed
\\ to see all length L blocks (and lengths < L too).


\\ Implementation Notes
\\ -------------------
\\
\\ In the code, i is the index of the end of a length n subword in
\\ the Thue-Morse sequence.  A subword of length n is represented
\\ by an n bit integer where each bit is a term of the Thue-Morse
\\ sequence from most to least significant,
\\
\\                   high bit                   low bit
\\     sub = binary ThueMorse(i+n-1), ...,  ThueMorse(i)
\\
\\ At the top of the loop, "sub" is ThueMorse terms ending at i-1.
\\ New bit ThueMorse(i) shifts into sub, and the old highest bit
\\ (smallest ThueMorse index) is discarded by the "mask".
\\
\\ "seen" is a Map() of seen[sub] = pos where pos is the past
\\ value of i where sub was last seen before now (if it was ever
\\ seen before).
\\
\\ "dup" is the maximum end index where a duplicate word occurs,
\\ so that subwords ending at dup+1 .. i-1 have no duplicates.
\\ Initial dup = n-2 is so dup+1 = n-1 is the end of the initial
\\ "sub" and which, being a solitary subword, has no duplicates.
\\
\\ If the new sub ending at i occurred before now at pos > dup,
\\ then increase dup to dup=pos for new maximum duplicate position.
\\
\\ If the new sub occurred at pos <= dup, or never before, then
\\ no change and now the distinct part is dup+1 .. i inclusive
\\ and has increased in length.
\\
\\ "max_distinct" is the maximum difference "i - dup" seen, and so
\\ the maximum number of distinct subwords seen.  After the search,
\\ the resulting maximum block length is
\\
\\      A365624(n) = len = max_distinct + n-1
\\
\\ "max_at_i" is the i where max_distinct was first seen.  This is
\\ the end index of the block (end of the last subword in it) and
\\ so the start index is
\\
\\      A366408(n) = max_at_i - len + 1
\\
\\
\\ Things Not Done
\\ ---------------
\\
\\ The initial "sub" value could be calculated more efficiently
\\ by some length doublings, instead of bits one by one.
\\ But it's a barely noticeable part of the total time.


\\-----------------------------------------------------------------------------
print("end");