\documentstyle[12pt,fullpage]{article}

\hsize=6in
\vsize=9in

\pagestyle{myheadings}
\markright{\sc the electronic journal of combinatorics 5 (1998),\#R27\hfill}
\thispagestyle{empty}

\newcommand{\GN}{\mbox{\rm I$\!$N}}
\newcommand{\GF}{\mbox{\rm I$\!$F}}
\newcommand{\GR}{\mbox{\rm I$\!$R}}
\newcommand{\GZ}{\mbox{\rm Z$\!\!$Z}}
\newcommand{\gN}{\mbox{\rm \scriptsize I$\!$N}}
\newcommand{\gF}{\mbox{\rm \scriptsize I$\!$F}}
\newcommand{\gZ}{\mbox{\rm \scriptsize Z$\!\!$Z}}
\newcommand{\gR}{\mbox{\rm \scriptsize I$\!$R}}
\newcommand{\GQ}{\mbox{\rm I\hskip -1.95mm Q}}

\def \bgno{\medbreak\noindent}
\def \proof{\bgno{\bf Proof.\ \ }}
\def \prend{\vrule depth-1pt height7pt width6pt}
\def \endpf{\ \ \prend \medskip}
\def \mod#1 #2{#1\ (\rm{mod}\ #2)}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}{Lemma}
\newtheorem{corollary}{Corollary}
\newtheorem{proposition}{Proposition}
\newtheorem{definition}{Definition}

\title{Extremal infinite overlap-free binary words}
\author{
Jean-Paul Allouche \\
CNRS, LRI \\
B\^atiment 490 \\
F-91405 Orsay Cedex (France) \\
{\tt allouche@lri.fr} \\
\and
James Currie \\
Department of Mathematics \\
University of Winnipeg \\
Winnipeg, Manitoba R3B 2E9 (Canada) \\
{\tt currie@io.uwinnipeg.ca} \\
\and Jeffrey Shallit \\
Department of Computer Science\\
University of Waterloo\\
Waterloo, Ontario N2L 3G1 (Canada) \\
{\tt shallit@graceland.uwaterloo.ca} \\
\\
\\
Submitted:  August 29, 1997; Accepted:  May 3, 1998.\\
}

\date{ }

\begin{document}


\maketitle

\begin{abstract}
     Let $\overline{\bf t}$ be the infinite fixed point, starting 
with $1$, of the morphism $\mu: 0 \rightarrow 01$, $1 \rightarrow 10$.
An infinite word over $\lbrace 0, 1 \rbrace$ is said to be
{\sl overlap-free} if it contains no factor of the form $axaxa$, where
$a \in \lbrace 0,1 \rbrace$ and $x \in \lbrace 0,1 \rbrace^*$.
We prove that the lexicographically least infinite overlap-free
binary word beginning with any specified prefix, if it exists, has a 
suffix which is a suffix of \ $\overline{\bf t}$.
In particular, the lexicographically least infinite overlap-free binary 
word is $001001 \overline{\bf t}$.
\end{abstract}

{\it Keywords:}\ Homomorphism, fixed point, overlap-free word.

1991 Mathematics Subject Classification:  Primary 68R15.

\section{Introduction}

Since the pioneering work of Thue \cite{Thue1,Thue2} (see also \cite{Ber2})
the overlap-free words on a finite alphabet, i.e., those words that do
not contain a factor $axaxa$, where $x$ is a word and $a$ a letter, have 
been studied extensively. The question of extremality (for the lexicographic
order) of overlap-free binary infinite words seems to have been addressed 
only once: Berstel proved \cite{Ber1}, (see also \cite{Ber2}),
that the lexicographically {\it greatest\/}
infinite overlap-free word on the binary alphabet $\{0,1\}$ that begins with 
$0$ is the Thue-Morse sequence ${\bf t} = 01101001 \cdots$, which shows 
once more the ubiquity of this sequence.  (Recall that $\bf t$
is one of the fixed points of the morphism $0 \longrightarrow 01$,
$1 \longrightarrow 10$.  We let $\overline{\bf t} = 10010110 \cdots$
denote the other fixed point.)  The following 
natural question then arises:  what is the 
lexicographically {\it least\/} overlap-free word on $\{0,1\}$ that 
begins with $0$?  Computing the first few terms suggests that this word is 
$0010011001011001101001 \cdots = 001001\overline{{\bf t}}$.

The main result of this paper is the following:
{\sl the lexicographically least 
overlap-free sequence with any specified prefix, if it exists, has a suffix
equal to a suffix of \ $\overline{\bf t}$}.
Furthermore, we give an algorithm to construct this sequence.  Of course, 
replacing ``least'' by ``greatest'' and interchanging $0$'s and $1$'s gives 
a dual result. 

\section{Notation} 

In what follows we consider words or infinite words (sequences) on the 
binary alphabet $\{0,1\}$. Words will be denoted by lower-case letters 
(usually $r$, $s$, $\cdots$). The set of all finite words on $\{0,1\}$ is 
denoted by $\{0,1\}^*$. Elements of $\{0,1\}$ will be denoted by $a$, $b$, 
$c$, $d$, $\cdots$.  Infinite words will be denoted by boldface small letters 
${\bf x}$, ${\bf y}$, $\cdots$. The length (number of letters) of 
a (finite) word $w$ is denoted by $|w|$. If $w$ is a word, $\overline{w}$ 
is the word obtained from $w$ by replacing the $0$'s by $1$'s and the $1$'s 
by $0$'s.

We define the morphism $\mu$ on $\{0,1\}^*$ by 
$\mu(0) = 01$, $\mu(1) =10$,
and we extend it to infinite words by continuity.
The infinite fixed point of $\mu$ beginning with $0$ is denoted by 
${\bf t}$, hence
$${\bf t} = 0110100110010110 \cdots.$$
The infinite fixed point of $\mu$ beginning with $1$ is 
$\overline{{\bf t}}$. These sequences are called the
Thue-Morse sequences.

The lexicographic order for infinite words is defined by
${\bf x} < {\bf y}$ if and only if there exists $k \geq 0$ such
that the prefixes of length $k$ of ${\bf x}$ and ${\bf y}$ are
equal, and the $(k+1)$-th letter of ${\bf x}$ is
$0$ while the $(k+1)$-th letter of ${\bf y}$ is $1$. We note that the 
morphism $\mu$ is order-preserving on the set of infinite words;
more precisely, ${\bf x} < {\bf y} \Leftrightarrow 
\mu({\bf x}) < \mu({\bf y})$.

\section{Tools}

In this section we provide some basic tools that will be useful
in the proof of our main result. The three lemmas we 
give demonstrate the close relationship between the morphism $\mu$ and 
overlap-free words.
Parts of the lemmas below can be found in the literature (essentially in
\cite{Thue1,Thue2}; see also \cite{BerSee}). Nevertheless, it seems that
in the factorization lemma we give below (Lemma~\ref{factorization}),
the question of uniqueness of the decomposition
was first addressed in \cite{Kfo}.

\bigskip

We start with a technical lemma.

\begin{lemma}\label{technical}

\mbox{ }

1. If $y, y' \in \{0,1\}^*$, and if there exist 
$c, d \in \{0,1\}$ such that $u = c \mu(y) = \mu(y') d$, then 
$u = c \, (\overline{c} \, c)^{|y|}$.

2. If $y, z \in \{0,1\}^*$ satisfy $zz = \mu(y)$, then
there exists $x \in \{0,1\}^*$ such that $z=\mu(x)$.
\end{lemma}

\proof

1. We first note that $y$ and $y'$ must have the same length.
Let $y=a_1 \, a_2 \, \cdots \,  a_s$ and $y'=b_1 \, b_2 \, \cdots \, b_s$.
Then we have:
$$
c \, a_1 \, \overline{a_1} \, a_2 \, \overline{a_2} \, \cdots \,
a_{s-1} \, \overline{a_{s-1}} \, a_s \, \overline{a_s} =
b_1 \, \overline{b_1} \, b_2 \, \overline{b_2} \, \cdots \,
b_{s-1} \, \overline{b_{s-1}} \, b_s \, \overline{b_s} \, d.
$$
Hence $b_1 = c$, $a_1 = \overline{b_1} = \overline{c}$, 
$b_2 = \overline{a_1} = c$,
$a_2 = \overline{b_2} = \overline{c}$, $\cdots$, 
$b_s = \overline{a_{s-1}} = c$, 
$a_s = \overline{b_s} = \overline{c}$,
and $\overline{a_s} = d$, i.e., $c=d$, and hence finally
$u = c (\overline{c} \, c)^s$.

\bigskip

2. Suppose that $zz=\mu(y)$. If $|z|$ is even, the result is clear, since 
then $|\mu(y)| \equiv \mod{0} {4}$, and hence $|y|$ is even. 
Hence $z$ is the image by $\mu$ of the prefix of $y$ of length $|y|/2$.
Let us show that $|z|$ cannot be odd. If it were, let $z=au=vb$, where
$a,b \in \{0,1\}$ and $u$ and $v$ are binary words of even length. 
Then $\mu(y)=zz=vbau$; hence $u$ and $v$ must be the images by $\mu$ 
of binary words, say $u=\mu(r)$ and $v=\mu(s)$. Hence $zz=\mu(s)ba\mu(r)$,
and $b=\overline{a}$. But we have $z=a\mu(r)=\mu(s)b$, hence from assertion
1 above, $z=a \, (\overline{a} \, a)^{|r|}$. But the last letter of $z$
cannot be simultaneously equal to $a$ and to $\overline{a}$.
\endpf

\bigskip

Next, we prove that the morphism $\mu$ behaves nicely
when applied to overlap-free words.

\begin{lemma}\label{imagebymu}

\mbox{ }

1. Let $w \in \{0,1\}^*$. Then $w$ is overlap-free if and only if 
$\mu(w)$ is overlap-free. 

\bigskip

2. Let ${\bf x}$ be an infinite word on $\{0,1\}$. Then
\begin{itemize}
\item[(a)] $\mu({\bf x})$ is overlap-free if and only if 
${\bf x}$ is overlap-free.
\item[(b)] $0\mu({\bf x})$ is overlap-free if and only if 
$1{\bf x}$ is overlap-free.
\item[(c)] $1\mu({\bf x})$ is overlap-free if and only if 
$0{\bf x}$ is overlap-free.
\item[(d)] $00\mu({\bf x})$ is overlap-free if and only if 
$1{\bf x}$ is overlap-free and ${\bf x}$ begins with $101$.
\item[(e)] $11\mu({\bf x})$ is overlap-free if and only if 
$0{\bf x}$ is overlap-free and ${\bf x}$ begins with $010$.
\end{itemize}

3. The Thue-Morse sequences ${\bf t}$ and $\overline{{\bf t}}$ 
are overlap-free. The sequences $0{\bf t}$, $1{\bf t}$, 
$0 \, \overline{{\bf t}}$, and $1 \, \overline{{\bf t}}$ are 
overlap-free. 

\bigskip

4. The sequences $01 \,\overline{{\bf t}}$, $10 \,\overline{{\bf t}}$,
$110 \,\overline{{\bf t}}$, and $001001 \,\overline{{\bf t}}$ are
overlap-free. 

\end{lemma}

\proof

1. This is proved in \cite{Thue2}. 

\bigskip

2. (a) As in \cite{Thue2}, assertion 1 above immediately extends to infinite 
words, showing that $\mu({\bf x})$ is overlap-free if and only if 
${\bf x}$ is overlap-free. 

\bigskip

(b) Now if the infinite word $0\mu({\bf x})$ contains an overlap, then 
the word ${\bf s}=10\mu({\bf x})$ {\em a fortiori} contains an overlap. 
Since ${\bf s}=\mu(1{\bf x})$, this implies that $1{\bf x}$ 
contains an overlap. If conversely the word $1{\bf x}$ contains an overlap, 
it can occur in two ways: either the overlap is a prefix of $1{\bf x}$, 
hence ${\bf x}$ begins with $z1z1$ for some finite word $z$, or the 
overlap occurs ``inside'' $1{\bf x}$, which means that ${\bf x}$ 
itself contains an overlap. In the latter case, $\mu({\bf x})$ contains an 
overlap, hence $0\mu({\bf x})$ contains an overlap. In the former case, 
$0\mu({\bf x})$ begins with $0\mu(z)10\mu(z)10$ and hence contains an 
overlap. This proves (b). The proof of (c) is similar.

\bigskip

(d) Suppose now that $00\mu({\bf x})$ is overlap-free. Then 
$0\mu({\bf x})$ is also overlap-free; hence, from the preceding argument, 
$1{\bf x}$ is overlap-free. Furthermore, if ${\bf x}$ begins with 
$abc$ where $a,b,c \in \{0,1\}$, then 
$0 \, 0 \, a \, \overline{a} \, b \, \overline{b} \, c \, \overline{c}$ 
and $1abc$ must be overlap-free; hence, easily, $a=1$, $b=0$, and $c=1$. 
Conversely, suppose that $1{\bf x}$ is overlap-free and begins with 
$101$. From the preceding argument this implies that $0\mu({\bf x})$ 
is overlap-free. Hence any overlap of the word $00\mu({\bf x})$ 
must in fact be a prefix.
Hence, in order to show that $00\mu({\bf x})$ is overlap-free, we will
suppose that $00\mu({\bf x})$ begins with $azaza$, with $a \in \{0,1\}$ 
and $z \in \{0,1\}^*$ and obtain a contradiction. By the hypothesis, the 
word ${\bf x}$ begins with $101$, say ${\bf x} = 101 {\bf y}$ 
for some infinite word ${\bf y}$, hence the word $00\mu({\bf x})$ 
is equal to $00100110\mu({\bf y})$, and begins with $azaza$. 
By inspection, we see that $|az| \geq 6$, hence the word $001001$ occurs as 
a factor of $10\mu({\bf y})$. Since this last word begins with $1$, this 
means that there exists a nonempty word $w$ such that 
$10\mu({\bf y}) = w001001\cdots$. But $10\mu({\bf y})$ is 
overlap-free, as it is a factor of the overlap-free word $0\mu({\bf x})$. 
This implies that $001001$ can be extended to the left
(with the last letter of $w$) to an overlap-free word, although $001001$
is clearly not extendable to the left to an overlap-free word.
This proves (d). The proof of (e) is similar.

\bigskip

3. The Thue-Morse sequence $\overline{{\bf t}}$ is the limit, as
$n \rightarrow \infty$,
of the binary words $\mu^n(1)$ which are all overlap-free, 
since $1$ is overlap-free: this is Thue's proof \cite{Thue2}. 

    Let $a \in \lbrace 0, 1 \rbrace$, and suppose that 
$a \, \overline{{\bf t}}$ contains an overlap. Then the overlap must 
be a prefix of $a \, \overline{{\bf t}}$. Hence there exists a finite 
word $z$ such that $\overline{{\bf t}}$ begins with $zaza$. We will 
show that this is impossible. More precisely we show that the sequence 
$\overline{{\bf t}}$ cannot begin with $z0z0$ nor $z1z1$ for any finite 
word $z$. Let us suppose that $\overline{{\bf t}}$ begins with $zaza$ 
with $z$ a finite word and $a \in \{0,1\}$, and take $z$ a word of minimal 
length for which there exists $a \in \{0,1\}$ such that 
$\overline{{\bf t}}$ begins with $zaza$. Since $zaza$ has even length 
and $\overline{{\bf t}} = \mu(\overline{{\bf t}})$, the word $zaza$ 
is the image by $\mu$ of a binary word. Hence, applying Lemma \ref{technical}, 
the word $za$ itself is the image of a binary word by $\mu$, say $za=\mu(y)$. 
Hence $y$ must end with $\overline{a}$, say $y=x \, \overline{a}$. 
Then $\overline{{\bf t}}$ begins with $zaza=\mu(x \, \overline{a} \, x \, 
\overline{a})$. But $\overline{{\bf t}} = \mu(\overline{{\bf t}})$. 
Hence $\overline{{\bf t}}$ begins with 
$x \, \overline{a} \, x \, \overline{a}$, which contradicts the minimality 
of the length of $z$.

\bigskip

4. Since $01 \,\overline{{\bf t}} = \mu (0 \,\overline{{\bf t}})$, 
and $10 \,\overline{{\bf t}} = \mu(1 \,\overline{{\bf t}})$, we 
deduce from 2.(a) and 3 above that these two words are overlap-free.
The word $110 \,\overline{{\bf t}}$ is overlap-free, since it is a 
suffix of the word
$0110 \,\overline{{\bf t}} = \mu^2(0 \,\overline{{\bf t}})$ that 
is overlap-free from 2.(a) and 3 above.
Finally to prove that the word $001001 \,\overline{{\bf t}}$ is
overlap-free, we write $001001 \,\overline{{\bf t}} = 
00 \mu(10 \,\overline{{\bf t}})$. This last word is overlap-free from
(d) above, since we have just proved that $110 \,\overline{{\bf t}}$
is overlap-free.  \endpf

\bigskip

We now give a factorization lemma for both finite and infinite
overlap-free binary words.

\begin{lemma}\label{factorization}

\mbox{ }

1. If $x \in \{0,1\}^*$ is overlap-free, then there exist $u,v,y$
with $u,v \in \{\varepsilon, 0, 1, 00, 11\}$ and $y \in \{0,1\}^*$ an
overlap-free word, such that $x = u \mu(y) v$. Furthermore this decomposition
is unique if $|x| \geq 7$, and $u$ (resp.\ $v$) is completely determined by
the prefix (resp.\ suffix) of length $7$ of $x$.  The bound $7$ is sharp as
shown by the example $x=001011=00\mu(1)11=0\mu(00)1$.

2. If $\bf x$ is an infinite overlap-free word on $\{0,1\}$, then there
exist $u \in \{\varepsilon, 0, 1, 00, 11\}$ and an infinite overlap-free
word $\bf y$ on $\{0,1\}$ such that ${\bf x} = u \mu({\bf y})$. 
The prefix $u$ is completely determined by the prefix of $\bf x$ of
length $4$, except if $\bf x$ begins with $0010$ or $1101$, in which case
the word $u$ is completely determined by the prefix of $\bf x$ of length
$5$.

\end{lemma}

\proof
First note that the word $y$ is necessarily overlap-free by assertion
1 of Lemma \ref{imagebymu}.

\bigskip

The existence of the decomposition for finite words is proved in
\cite{Kob}, in the proof of Theorem 6.4.
The uniqueness is proved in \cite{Kfo}, Lemma 2.2.

Finally in this factorization of $x$, the word $u$ (resp.\ $v$) depends only
on the prefix (resp.\ suffix) of $x$ of length $7$. This is recalled in the
following table of all possible prefixes (resp.\ suffixes) of length $\geq 7$:
by mere inspection, we see that the word $u$ (resp.\ $v$) is uniquely
determined, knowing the decomposition does exist. (Note however that some of
the words below, e.g., $0011011$, might be non-extendable to longer
overlap-free words.)

\bigskip

$$
\begin{array}{lllll}
\begin{tabular}{|c|c|}
\hline
$x$ &$u$ \\ \hline
$0010011 \cdots$ &$00$ \\
$0010110 \cdots$ &$0$ \\
$0011001 \cdots$ &$0$ \\
$0011010 \cdots$ &$0$ \\
$0011011 \cdots$ &$0$ \\
$0100101 \cdots$ &$0$ \\
\hline
\end{tabular}
& \ \ \ \
\begin{tabular}{|c|c|}
\hline 
$x$ &$u$ \\ \hline
$0100110 \cdots$ &$0$ \\
$0101100 \cdots$ &$\varepsilon$ \\
$0101101 \cdots$ &$\varepsilon$ \\
$0110010 \cdots$ &$\varepsilon$ \\
$0110011 \cdots$ &$\varepsilon$ \\
$0110100 \cdots$ &$\varepsilon$ \\
\hline
\end{tabular}
& \ \ \ \
\begin{tabular}{|c|c|}
\hline
$x$ &$u$ \\ \hline
$1001011 \cdots$ &$\varepsilon$ \\
$1001100 \cdots$ &$\varepsilon$ \\
$1001101 \cdots$ &$\varepsilon$ \\
$1010010 \cdots$ &$\varepsilon$ \\
$1010011 \cdots$ &$\varepsilon$ \\
$1011001 \cdots$ &$1$ \\
\hline
\end{tabular}
& \ \ \ \
\begin{tabular}{|c|c|}
\hline
$x$ &$u$ \\ \hline
$1011010 \cdots$ &$1$ \\
$1100100 \cdots$ &$1$ \\
$1100101 \cdots$ &$1$ \\
$1100110 \cdots$ &$1$ \\
$1101001 \cdots$ &$1$ \\
$1101100 \cdots$ &$11$ \\
\hline
\end{tabular}
\end{array}
$$

\bigskip

$$
\begin{array}{lllll}
\begin{tabular}{|c|c|}
\hline
$x$ &$v$ \\ \hline
$\cdots 0010011$ &$1$ \\
$\cdots 0010110$ &$\varepsilon$ \\
$\cdots 0011001$ &$\varepsilon$ \\
$\cdots 0011010$ &$\varepsilon$ \\
$\cdots 0011011$ &$11$ \\
$\cdots 0100101$ &$\varepsilon$ \\
\hline
\end{tabular}
& \ \ \ \
\begin{tabular}{|c|c|}
\hline
$x$ &$v$ \\ \hline
$\cdots 0100110$ &$\varepsilon$ \\
$\cdots 0101100$ &$0$ \\
$\cdots 0101101$ &$1$ \\
$\cdots 0110010$ &$0$ \\
$\cdots 0110011$ &$1$ \\
$\cdots 0110100$ &$0$ \\
\hline
\end{tabular}
& \ \ \ \
\begin{tabular}{|c|c|}
\hline
$x$ &$v$ \\ \hline
$\cdots 1001011$ &$1$ \\
$\cdots 1001100$ &$0$ \\
$\cdots 1001101$ &$1$ \\
$\cdots 1010010$ &$0$ \\
$\cdots 1010011$ &$1$ \\
$\cdots 1011001$ &$\varepsilon$ \\
\hline
\end{tabular}
& \ \ \ \
\begin{tabular}{|c|c|}
\hline
$x$ &$v$ \\ \hline
$\cdots 1011010$ &$\varepsilon$ \\
$\cdots 1100100$ &$00$ \\
$\cdots 1100101$ &$\varepsilon$ \\
$\cdots 1100110$ &$\varepsilon$ \\
$\cdots 1101001$ &$\varepsilon$ \\
$\cdots 1101100$ &$0$ \\
\hline
\end{tabular}
\end{array}
$$

\bigskip

2. For any prefix $x_n$ of ${\bf x}$ such that, say $|x_n|=n$, 
assertion 1 of Lemma \ref{factorization} above gives the existence of $u_n$
and $v_n$ in $\{\varepsilon, 0, 1, 00, 11\}$ and of an overlap-free word
$y_n$ on $\{0,1\}$, such that $x_n=u_n\mu(y_n)v_n$. Furthermore, $u_n$ does
not depend on $n$ for $n \geq 7$. Say $u_n=u$.
Hence ${\bf x} = \displaystyle\lim_{n \to \infty} x_n =
\displaystyle\lim_{n \to \infty} u \mu(y_n) v_n$.
Since $\mu(y_n)$ goes to infinity, this implies
${\bf x} = \displaystyle\lim_{n \to \infty} u \mu(y_n)$. Hence
$\displaystyle\lim_{n \to \infty} \mu(y_n)$ exists, which gives the
existence of ${\bf y} = \displaystyle\lim_{n \to \infty} y_n$. This
sequence is overlap-free as it is a limit of overlap-free words, and we have
${\bf x} = u \mu({\bf y})$. Note that in the decomposition of $x_n$, 
$v_n$ cannot be equal to $00$ or $11$, since these words are
not images of a word by $\mu$.  Hence 
$v_n$ is either empty or is equal to $0$ or $1$. Finally, inspecting the table 
above shows that the prefix of length $4$ of a $7$-letter word determines 
the word $u$ in all cases but the two cases where the word begins with $0010$
or $1101$ for which we need to look at the prefix of length $5$.
\endpf

\bigskip

\section{The main result}
Before proving our main theorem, we need to state two results on
overlap-free sequences and to study eight particular cases. This is 
the purpose of the following proposition.

\begin{proposition}\label{particular}

Let $w$ be a binary word, and let ${\bf x}(w)$ be the lexicographically 
least overlap-free sequence (if it exists) beginning with $w$. 
Let $\overline{{\bf t}}$ be the Thue-Morse sequence beginning with $1$. 
Then 
\begin{itemize}
\item[(a)] ${\bf x}(1) = \overline{{\bf t}}$. 

\item[(b)] If ${\bf x}(w)$ exists, and if $z$ is a finite word such that
$z \, {\bf x}(w)$ is overlap-free, then ${\bf x}(zw)$ exists and
${\bf x}(zw) = z \, {\bf x}(w)$. 
In particular ${\bf x}(01)=0\overline{{\bf t}}$ and
${\bf x}(11)=1\overline{{\bf t}}$.

If ${\bf x}(w)$ exists, and if $ww_1$ is a prefix of ${\bf x}(w)$,
then ${\bf x}(ww_1)$ exists and ${\bf x}(ww_1) = {\bf x}(w)$.
In particular ${\bf x}(10)=\overline{{\bf t}}$.

\item[(c)] We have ${\bf x}(001)=001001\overline{{\bf t}}$.
Furthermore, ${\bf x}(0)={\bf x}(00)=001001\overline{{\bf t}}$.
\item[(d)] ${\bf x}(010)=0\overline{{\bf t}}$.
\item[(e)] ${\bf x}(011)=01\overline{{\bf t}}$.
\item[(f)] ${\bf x}(100)=\overline{{\bf t}}$.
\item[(g)] ${\bf x}(101)=10\overline{{\bf t}}$.
\item[(h)] ${\bf x}(110)=1\overline{{\bf t}}$.
\item[(i)] ${\bf x}(0010)=001001\overline{{\bf t}}$.
\item[(j)] ${\bf x}(1101)=110\overline{{\bf t}}$.
\end{itemize}

\end{proposition}

\proof

(a)
We note that $\overline{{\bf t}}$ is an overlap-free sequence
beginning with $1$. On the other hand, the set of overlap-free sequences 
beginning with a given word is a closed set (a limit of overlap-free 
sequences is also overlap-free). Let ${\bf y}$ be the least overlap-free 
sequence beginning with $1$. Then ${\bf y} \leq \overline{{\bf t}}$.
Hence ${\bf y}$ must start with $1001$. Now, from Lemma 
\ref{factorization} there exists an infinite sequence ${\bf x}$ such
that ${\bf y} = \mu({\bf x})$. The sequence ${\bf x}$ is
overlap-free from assertion 2 (a) in Lemma \ref{imagebymu}. As ${\bf x}$ 
begins with $1$, we have ${\bf y} \leq {\bf x}$. Hence 
$\mu({\bf y}) \leq \mu({\bf x}) = {\bf y}$, since $\mu$ is 
order-preserving.  The sequence $\mu({\bf y})$ is overlap-free
as it is the image under $\mu$ of an overlap-free sequence. Hence we must 
have $\mu({\bf y}) = {\bf y}$ from the minimality of ${\bf y}$. 
Hence ${\bf y} = \overline{{\bf t}}$. 
[We remark that this result 
was already proved by Berstel \cite{Ber1} in the following ``dual'' form:
the lexicographically {\it greatest\/} infinite overlap-free word on the binary 
alphabet $\{0,1\}$ that begins with $0$ is the Thue-Morse sequence 
${\bf t} = 01101001 \cdots$.]

\bigskip

(b) Suppose that ${\bf x}(w)$ exists, and that $z \, {\bf x}(w)$ is 
overlap-free. Hence $z \, {\bf x}(w)$ is an overlap-free sequence 
beginning with $zw$. Let $z \, {\bf y}$ be an infinite overlap-free 
sequence beginning with $zw$, that satisfies 
$z \, {\bf y} \leq z \, {\bf x}(w)$. Then ${\bf y}$ 
is an overlap-free sequence beginning with $w$, and that satisfies 
${\bf y} \leq {\bf x}(w)$. Hence ${\bf y} = {\bf x}(w)$,
and $z \, {\bf y} = z \, {\bf x}(w)$.

\medskip

Suppose now that ${\bf x}(w)$ exists, and that $ww_1$ is a prefix of
${\bf x}(w)$. Suppose that ${\bf y}$ is an overlap-free sequence
that begins with $ww_1$, and that ${\bf y} \leq {\bf x}(w)$. 
Since ${\bf y}$ also begins with $w$, we must have ${\bf y} =
{\bf x}(w)$. 

\bigskip

(c) We note that an overlap-free sequence beginning with $001$ cannot be 
smaller than $00100 \cdots$, hence (no overlap) than $0010011 \cdots$.
Now $001001 \, \overline{{\bf t}}$ is overlap-free from assertion 4
of Lemma \ref{imagebymu}. It then suffices to note that, from the first
assertion in (b) above, $001001 \, \overline{{\bf t}}$ is the 
lexicographically least sequence beginning with $0010011$.
Hence ${\bf x}(001)={\bf x}(0010011)=
001001 \, \overline{{\bf t}}$.
Similarly an overlap-free sequence beginning with $0$ or $00$ cannot be
smaller than $00100 \cdots$, and the same reasoning applies: hence
${\bf x}(0)={\bf x}(00)=001001\overline{{\bf t}}$.

\bigskip

(d) to (j) Using properties 3 and 4 of Lemma \ref{imagebymu}, and assertions
(a), (b) and (c) above, we have:

${\bf x}(010) = 0 \, \overline{{\bf t}}$, 
since ${\bf x}(01) = 0 \, \overline{{\bf t}}$;

${\bf x}(011) = 01 \, \overline{{\bf t}}$;

${\bf x}(100) = \overline{{\bf t}}$, since ${\bf x}(1) = 
\overline{{\bf t}}$; 

${\bf x}(101) = 10 \, \overline{{\bf t}}$;

${\bf x}(110) = 1 \, \overline{{\bf t}}$, 
since ${\bf x}(10) = \overline{{\bf t}}$;

${\bf x}(0010) = 001001 \, \overline{{\bf t}}$, since 
${\bf x}(001) = 001001 \, \overline{{\bf t}}$;

${\bf x}(1101) = 110 \, \overline{{\bf t}}$, since 
${\bf x}(110) = 1 \, \overline{{\bf t}}$.
\endpf

     We are now ready to state and prove our main theorem.

\begin{theorem} \label{main}
Let $w$ be a finite binary word such that there exists an infinite
overlap-free binary sequence which begins with $w$.   Let $\bf x$ be
the lexicographically least infinite overlap-free binary sequence
which begins with $w$.  Then there exists a
suffix of $\bf x$ that is equal to a suffix of the Thue-Morse sequence
$\overline{{\bf t}} = 10010110 \cdots$. The construction of this minimal 
sequence is given by an explicit algorithm.
\end{theorem}

\proof
We will prove the result by induction on the length of $w$. The induction
will show the construction to be algorithmic.

\bigskip

Using Proposition \ref{particular} above, we see that the result has 
already been obtained if the word $w$ has length at most $3$ or is equal 
to $0010$ or $1101$. The result is also true if $w=00100$ and $w=001001$,
using Proposition \ref{particular} (b), and the equality
${\bf x}(0010)=001001\overline{{\bf t}}$: this gives
${\bf x}(00100) = {\bf x}(001001)=001001\overline{{\bf t}}$. 
Furthermore the result holds also true if $w = 11011$ or $w=110110$, since
we obtain, using Lemma \ref{imagebymu} (e) and Proposition \ref{particular} 
(i) and (b), that ${\bf x}(11011) = {\bf x}(110110) = 
110110010110 \overline{{\bf t}}$.

\bigskip

If $w$ has length $ \geq 4$, and is not one of the six words above,
we will prove that ${\bf x}(w)$ ends with $\mu({\bf x}(w'))$ for
some $w'$ such that $|w'| < |w|$. 
Since $\mu(\overline{{\bf t}}) = \overline{{\bf t}}$, the image by $\mu$
of an infinite word having a suffix in common with $\overline{{\bf t}}$
has the same property: hence the induction hypothesis for $w'$ will
imply the required result for $w$.

Using Lemma \ref{factorization}, write $w = u \mu(y) v$.
Note that, from the proof of the second part of Lemma \ref{factorization},
the word $v$ is either empty or has length $1$. Furthermore ${\bf x}(w)$ 
has to begin with $u \mu(y) v \overline{v} = w \overline{v}$, hence
${\bf x}(w) = {\bf x}(w \overline{v})$.

\medskip

- If $u$ is empty, then ${\bf x}(w) = {\bf x}(w\overline{v}) = 
{\bf x}(\mu(yv)) = \mu({\bf x}(yv))$. We have $|yv| < |\mu(y)v|=|w|$, 
since $y$ cannot be empty ($|w| \geq 4$). 

\medskip

- If $u=0$, using Lemma \ref{imagebymu} and Proposition \ref{particular} we 
have $1{\bf x}(w) = 1{\bf x}(w\overline{v}) = 1{\bf x}(0\mu(yv))
= {\bf x}(10\mu(yv)) = {\bf x}(\mu(1yv)) = \mu({\bf x}(1yv))$.
We have $|1yv| < |0 \mu(y) v| = |w|$, since $y$ cannot be empty. The same
reasoning holds for $u=1$.

\medskip

- If $u=00$, we have ${\bf x}(w) = {\bf x}(w \overline{v})
= {\bf x}(00\mu(yv))$. Let ${\bf x}(w) = 00\mu(yv)\mu({\bf z})$.
Using Lemma \ref{imagebymu}, we know that $1yv({\bf z})$ has to be 
overlap-free, and that $yv({\bf z})$ has to begin with $101$.

\begin{itemize} 
\item If $4 \leq |w| \leq 6$, the equality $w=00\mu(y)v$ easily implies that
$w$ is one of the words $0010$, $00100$, $00101$, or $001001$. The cases
$w \in \{0010, 00100, 001001\}$ have been excluded.
The case $w=00101$ is impossible: namely ${\bf x}(00101) = {\bf x}(001011)$, 
since $001010$ contains an overlap. But the word $001011$ is not of the form 
$00\mu(y)v$ with $|v|\leq 1$.
\item If $|w| \geq 7$, then the equality $w=00\mu(y)v$ shows that 
$|yv| \geq 3$, hence $yv$ has to begin with $101$. Let $yv=101y'$.
We thus have ${\bf x}(00\mu(yv)) = {\bf x}(00\mu(101y'))
=00\mu(101y'{\bf z})$. Then, by Lemma \ref{imagebymu},
$1(101y'{\bf z}) = {\bf x}(1101y')$. 
Finally $|1101y'| = |1yv| < |00\mu(y)v| = |w|$.
\end{itemize}

\medskip

The same reasoning holds if $u=11$.  \endpf

\bigskip

\noindent{\bf Remark 1.}

The algorithm we give resembles an algorithm given in \cite{Kfo}
to decide (in linear time) whether a {\it finite} binary word contains an 
overlap.

\bigskip
\noindent{\bf Remark 2.}

- Let us show an example of how the algorithm works. This example also
shows that it is possible for the lexicographically least sequence beginning 
with a word $w$ to have no suffix equal to the Thue-Morse sequence 
$\overline{{\bf t}}$. Let $w=0010110$. If there exists an infinite 
overlap-free sequence ${\bf s}$ beginning with $w$, say 
${\bf s} = w{\bf x}$, then its factorization must be 
${\bf s} = 0\mu(001{\bf y})$. 
Now ${\bf s}$ is the least overlap-free sequence beginning with $w$ if 
and only if $1001{\bf y}$ is the least overlap-free sequence beginning 
with $1001$ (assertion 2 (b) in Lemma \ref{imagebymu}). Now the 
factorization of $1001{\bf y}$ must be 
$\mu(10{\bf z})$, with ${\bf y} = \mu({\bf z})$. 
And $1001{\bf y}$ is the least overlap-free sequence beginning with 
$1001$ if and only if $10{\bf z}$ is the least overlap-free sequence 
beginning with $10$. From Proposition \ref{particular}, we see that the least 
overlap-free sequence beginning with $100$ is $\overline{{\bf t}}$, and 
that the least overlap-free sequence beginning with $101$ is 
$10\overline{{\bf t}}$. Hence the least overlap-free sequence beginning 
with $10$ is $\overline{{\bf t}}$. Hence $10{\bf z} = 
\overline{{\bf t}}$. We thus have successively $1001{\bf y} = 
1001\mu({\bf z}) = \mu(10{\bf z})= \mu(\overline{{\bf t}}) = 
\overline{{\bf t}}$. Then $1{\bf s} = 10\mu(001{\bf y}) = 
\mu(1001{\bf y}) = \mu(\overline{{\bf t}}) = \overline{{\bf t}}$. 
Hence ${\bf s}$ is the sequence obtained by deleting the first letter of 
$\overline{{\bf t}}$.

\bigskip

Finally, we give as a corollary of the above result a new proof of a result
obtained in \cite{All1,AllCos} in relation with iterations of continuous 
functions and kneading sequences.

\begin{corollary}
For a binary sequence ${\bf s}$ let $S({\bf s})$ be the sequence 
obtained by deleting the first letter of ${\bf s}$ ($S$ is also called 
the shift).
Let $S^k$ be the $k$-th iterate of the map $S$. 
Let ${\bf a} = S({\bf t}) = 11010011001 \cdots$. 
Then, for each $k \geq 1$, 
we have $\overline{{\bf a}} < S^k({\bf a}) < {\bf a}$.
\end{corollary}

\proof
Since the sequence ${\bf t}$ is overlap-free, so is the sequence 
${\bf a}$. Hence ${\bf a}$ cannot be periodic. This implies that 
$S^k({\bf a})$ cannot be equal to either ${\bf a}$ or 
$\overline{{\bf a}}$ for $k \geq 1$. We thus have only to prove that, 
for all $k \geq 1$ (actually this is true for all $k \geq 0$), 
$\overline{{\bf a}} \leq S^k({\bf a}) \leq {\bf a}$.
We first prove that, for each $k \geq 0$, we have 
$\overline{{\bf a}} \leq S^k({\bf a})$. 
Since $\overline{{\bf a}}=0010110\cdots$ we are done if 
$S^k({\bf a})$ begins with $1$ or with $01$ or with $0011$. 
If $S^k({\bf a})$ begins with $0010$, it cannot begin with $00100$. 
For if this were the case, $S^k({\bf a})$ would begin with $001001$ 
since it is overlap-free. But the word $001001$ is not extendable to the 
left into an overlap-free word, hence cannot occur in the sequence 
${\bf a}$, since this sequence is extendable to the left into 
${\bf t}$ which is overlap-free.

Hence $S^k({\bf a})$ begins with $00101$, hence with $001011$, hence with 
$0010110$. But, as shown by the remark above, the least overlap-free sequence 
beginning with $0010110$ is $S(\overline{{\bf t}}) = 
\overline{{\bf a}}$. Hence $S^k({\bf a}) \geq \overline{{\bf a}}$.

\bigskip

We now prove that, for each $k \geq 0$, we have $S^k({\bf a}) \leq 
{\bf a}$. Since ${\bf a} = 11010011\cdots$, we are done if 
$S^k({\bf a})$ begins with $0$, $10$, or $1100$. Hence we can suppose 
that $S^k({\bf a})$ begins with $1101$. 

     It is easy to see that $S^k({\bf a})$ 
cannot begin with $11011$ because the only substrings of $\bf t$
starting at even positions are $10$ and $01$.
Hence $S^k({\bf a})$ begins with 
$11010$, hence with $110100$, hence with $1101001$. But the ``dual'' of the 
above remark shows that the lexicographically greatest overlap-free sequence 
beginning with $110100110$ is $S({\bf t}) = {\bf a}$ and the result 
is proved.  \endpf

\bigskip

\noindent{\bf Remark 3.}

An even simpler argument shows that the sequence 
${\bf b} = 001001\overline{{\bf t}}$ satisfies, for all $k \geq 0$,
${\bf b} \leq S^k({\bf b}) \leq \overline{{\bf b}}$.

\bigskip


We give another simple corollary.

\begin{corollary}
The lexicographically least overlap-free sequence that is extendable to
the left to a doubly-infinite 
overlap-free sequence is $S(\overline{{\bf t}})$.
\end{corollary}

\proof
We have seen that the lexicographically least sequence on $\{0,1\}$ is
the sequence $001001\overline{{\bf t}}$. This sequence cannot be 
extended to the left to an overlap-free sequence, since concatenating
either $0$ or $1$ to the front creates an overlap.
Now let us consider the 
sequence $S(\overline{{\bf t}})$. 
It begins with $0010110$ and is the 
least overlap-free sequence having this prefix from Remark 2 above.
It is not possible to find a smaller sequence that is extendable to the
left to an overlap-free doubly-infinite sequence,
since the prefix $00100$ is forbidden from the
claim above. Hence such a sequence must begin with $00101$, hence with
$0010110$. 

On the other hand,
it is well-known that $\overline{\bf t}$ is extendable to the left to
a doubly-infinite overlap-free sequence 
(\cite[Satz 7]{Thue2}, \cite{MH}, \cite[p.\ 30]{Ber2}).
\endpf

\section{The lexicographically least square-free sequence}

As already proved by Thue, it is possible to construct, on a three-letter 
alphabet, a sequence without squares, i.e., without any factor of the form
$ww$, where $w$ is a nonempty word.
(It is readily checked that there is no square-free word of
length $\geq 4$, and hence no square-free infinite word, on a two-letter
alphabet.) Now take the alphabet to be $\{0,1,2\}$.  What is the
lexicographically least 
square-free sequence over $\{0,1,2\}$?
We do not know a simple description of this sequence. Using the results of
Shelton \cite{Shelton, Currie}
it can be proved that the first twenty terms are
$$ 0 1 0 2 0 1 2 0 2 1 0 1 2 0 1 0 2 0 1 2.$$

     In particular, is it true that this
sequence can be generated by a finite automaton (in the sense of \cite{Cob};
see also \cite{All2,Dek})?  As proved above the lexicographically
least overlap-free sequence over $\lbrace 0, 1\rbrace$ is
$001001\overline{\bf t}$, and hence is $2$-automatic.

\section{Acknowledgments}

We thank the referee 
for many useful remarks, particularly those
concerning our proof of the main theorem.  We also thank Larry Cummings, who
read a draft of this paper and made many valuable suggestions.
The first two authors acknowledge 
with thanks the hospitality of the University of Waterloo, where this paper 
was largely written.

\newpage

\begin{thebibliography}{99}

\bibitem{All1} J.-P. Allouche, Th\'eorie des nombres et automates, 
Th\`ese d'\'Etat, Universit\'e Bordeaux I, 1983.

\bibitem{All2} J.-P. Allouche, Automates finis en th\'eorie des nombres,
{\it Exposition.\ Math.} {\bf 5} (1987), 239--266.

\bibitem{AllCos} J.-P. Allouche and M. Cosnard, It\'erations de fonctions
unimodales et suites engendr\'ees par automates, {\it C.\ R.\ Acad.\ Sci.\ 
Paris, S\'er. A} {\bf 296} (1983), 159--162.

\bibitem{Ber1} J. Berstel, A rewriting of Fife's theorem about 
overlap-free words, in
J. Karhum\"aki, H. Maurer, G. Rozenberg, eds.,
{\it Results and Trends in Theoretical
Computer Science},
Lecture Notes in Computer Science {\bf 812},
Springer-Verlag, 1994, pp.~19--29.

\bibitem{Ber2}
J. Berstel, Axel Thue's papers on repetitions in words: a translation,
Publications du Laboratoire de Combinatoire et d'Informatique Math\'ematique,
Universit\'e du Qu\'ebec \`a Montr\'eal {\bf 20}, 1995.

\bibitem{BerSee} J. Berstel, P. S\'e\'ebold, A characterization of 
overlap-free morphisms, {\it Disc.\ Appl.\ Math.} {\bf 46} (1993), 275--281.
        
\bibitem{Cob} A. Cobham, Uniform tag sequences, {\it Math.\ Systems Theory}
{\bf 6} (1972), 164--192.

\bibitem{Currie} J. D. Currie, On the structure and extendibility of
k-power free words, {\em Eur. J. Comb.} (1995) {\bf 16}, 111--124.

\bibitem{Dek} F. M. Dekking, M. Mend\`es France, and A. van der Poorten,
Folds!, {\it Math.\ Intelligencer} {\bf 4} (1982), 130--138, 173--181, 
190--195.

\bibitem{Kob} Y. Kobayashi, Repetition-free words, {\it Theoret.\ Comput.\ 
Sci.} {\bf 44} (1986), 175--197.

\bibitem{Kfo} A. J. Kfoury, A linear-time algorithm to decide whether
a binary word contains an overlap,  {\it RAIRO Inform.\ Th\'eor.\ App.} 
{\bf 22} (1988), 135--145.

\bibitem{MH} M. Morse and G. A. Hedlund, Unending chess, symbolic
dynamics and a problem in semigroups, {\it Duke Math.\ J.} {\bf 11}
(1944), 1-7.

\bibitem{Shelton}  
R. O. Shelton (I, II, III) and R. P. Soni (II, III), 
Aperiodic words on three symbols 
I, II, III, {\em J. Reine Angew.\ Math.} {\bf 321; 327; 330}
(1981), 195--209; 1--11; 44--52.

\bibitem{Thue1}
A. Thue, \"Uber unendliche Zeichenreihen, {\it Norske vid.\ Selsk.\ Skr.\ 
Mat.\ Nat.\ Kl.} {\bf 7} (1906), 1--22.  Reprinted in T. Nagell, ed.,
{\it Selected Mathematical 
Papers of Axel Thue}, Universitetsforlaget, Oslo, 1977, pp.~139--158.

\bibitem{Thue2}
A. Thue, \"Uber die gegenseitige Lage gleicher Teile gewisser 
Zeichenreihen, {\it Norske vid.\ Selsk.\ Skr.\ Mat.\ Nat.\ Kl.} {\bf 1}
(1912), 1--67. 
Reprinted in T. Nagell, ed., 
{\it Selected Mathematical Papers of Axel Thue},
Universitetsforlaget, Oslo, 1977, pp.~413--478.

\end{thebibliography}

\end{document}




