Library Coq.Strings.String


Contributed by Laurent Théry (INRIA); Adapted to Coq V8 by the Coq Development Team

Require Import Arith.
Require Import Ascii.
Require Import Bool.
Require Import Coq.Strings.Byte.

Definition of strings

Implementation of string as list of ascii characters

Inductive string : Set :=
  | EmptyString : string
  | String : ascii -> string -> string.

Declare Scope string_scope.
Delimit Scope string_scope with string.
Bind Scope string_scope with string.
Local Open Scope string_scope.

Register string as core.string.type.
Register EmptyString as core.string.empty.
Register String as core.string.string.

Equality is decidable

Definition string_dec : forall s1 s2 : string, {s1 = s2} + {s1 <> s2}.

Local Open Scope lazy_bool_scope.

Fixpoint eqb s1 s2 : bool :=
 match s1, s2 with
 | EmptyString, EmptyString => true
 | String c1 s1', String c2 s2' => Ascii.eqb c1 c2 &&& eqb s1' s2'
 | _,_ => false
 end.

Infix "=?" := eqb : string_scope.

Lemma eqb_spec s1 s2 : Bool.reflect (s1 = s2) (s1 =? s2)%string.

Local Ltac t_eqb :=
  repeat first [ congruence
               | progress subst
               | apply conj
               | match goal with
                 | [ |- context[eqb ?x ?y] ] => destruct (eqb_spec x y)
                 end
               | intro ].
Lemma eqb_refl x : (x =? x)%string = true.
Lemma eqb_sym x y : (x =? y)%string = (y =? x)%string.
Lemma eqb_eq n m : (n =? m)%string = true <-> n = m.
Lemma eqb_neq x y : (x =? y)%string = false <-> x <> y.
Lemma eqb_compat: Morphisms.Proper (Morphisms.respectful eq (Morphisms.respectful eq eq)) eqb.

Concatenation of strings


Reserved Notation "x ++ y" (right associativity, at level 60).

Fixpoint append (s1 s2 : string) : string :=
  match s1 with
  | EmptyString => s2
  | String c s1' => String c (s1' ++ s2)
  end
where "s1 ++ s2" := (append s1 s2) : string_scope.

Length

Fixpoint length (s : string) : nat :=
  match s with
  | EmptyString => 0
  | String c s' => S (length s')
  end.

Nth character of a string

Fixpoint get (n : nat) (s : string) {struct s} : option ascii :=
  match s with
  | EmptyString => None
  | String c s' => match n with
                   | O => Some c
                   | S n' => get n' s'
                   end
  end.

Two lists that are identical through get are syntactically equal

Theorem get_correct :
  forall s1 s2 : string, (forall n : nat, get n s1 = get n s2) <-> s1 = s2.

The first elements of s1 ++ s2 are the ones of s1

Theorem append_correct1 :
 forall (s1 s2 : string) (n : nat),
 n < length s1 -> get n s1 = get n (s1 ++ s2).

The last elements of s1 ++ s2 are the ones of s2

Theorem append_correct2 :
 forall (s1 s2 : string) (n : nat),
 get n s2 = get (n + length s1) (s1 ++ s2).

Substrings

substring n m s returns the substring of s that starts at position n and of length m; if this does not make sense it returns ""

Fixpoint substring (n m : nat) (s : string) : string :=
  match n, m, s with
  | O, O, _ => EmptyString
  | O, S m', EmptyString => s
  | O, S m', String c s' => String c (substring 0 m' s')
  | S n', _, EmptyString => s
  | S n', _, String c s' => substring n' m s'
  end.

The substring is included in the initial string

Theorem substring_correct1 :
 forall (s : string) (n m p : nat),
 p < m -> get p (substring n m s) = get (p + n) s.

The substring has at most m elements

Theorem substring_correct2 :
 forall (s : string) (n m p : nat), m <= p -> get p (substring n m s) = None.

Concatenating lists of strings

concat sep sl concatenates the list of strings sl, inserting the separator string sep between each.

Fixpoint concat (sep : string) (ls : list string) :=
  match ls with
  | nil => EmptyString
  | cons x nil => x
  | cons x xs => x ++ sep ++ concat sep xs
  end.

Test functions

Test if s1 is a prefix of s2

Fixpoint prefix (s1 s2 : string) {struct s2} : bool :=
  match s1 with
  | EmptyString => true
  | String a s1' =>
      match s2 with
      | EmptyString => false
      | String b s2' =>
          match ascii_dec a b with
          | left _ => prefix s1' s2'
          | right _ => false
          end
      end
  end.

If s1 is a prefix of s2, it is the substring of length length s1 starting at position O of s2

Theorem prefix_correct :
 forall s1 s2 : string,
 prefix s1 s2 = true <-> substring 0 (length s1) s2 = s1.

Test if, starting at position n, s1 occurs in s2; if so it returns the position

Fixpoint index (n : nat) (s1 s2 : string) : option nat :=
  match s2, n with
  | EmptyString, O =>
      match s1 with
      | EmptyString => Some O
      | String a s1' => None
      end
  | EmptyString, S n' => None
  | String b s2', O =>
      if prefix s1 s2 then Some O
      else
        match index O s1 s2' with
        | Some n => Some (S n)
        | None => None
        end
   | String b s2', S n' =>
      match index n' s1 s2' with
      | Some n => Some (S n)
      | None => None
      end
  end.

Opaque prefix.

If the result of index is Some m, s1 in s2 at position m

Theorem index_correct1 :
 forall (n m : nat) (s1 s2 : string),
 index n s1 s2 = Some m -> substring m (length s1) s2 = s1.

If the result of index is Some m, s1 does not occur in s2 before m

Theorem index_correct2 :
 forall (n m : nat) (s1 s2 : string),
 index n s1 s2 = Some m ->
 forall p : nat, n <= p -> p < m -> substring p (length s1) s2 <> s1.

If the result of index is None, s1 does not occur in s2 after n

Theorem index_correct3 :
 forall (n m : nat) (s1 s2 : string),
 index n s1 s2 = None ->
 s1 <> EmptyString -> n <= m -> substring m (length s1) s2 <> s1.

Transparent prefix.

If we are searching for the Empty string and the answer is no this means that n is greater than the size of s

Theorem index_correct4 :
 forall (n : nat) (s : string),
 index n EmptyString s = None -> length s < n.

Same as index but with no optional type, we return 0 when it does not occur

Definition findex n s1 s2 :=
  match index n s1 s2 with
  | Some n => n
  | None => O
  end.

Conversion to/from list ascii and list byte

Concrete syntax

The concrete syntax for strings in scope string_scope follows the Coq convention for strings: all ascii characters of code less than 128 are literals to the exception of the character `double quote' which must be doubled.
Strings that involve ascii characters of code >= 128 which are not part of a valid utf8 sequence of characters are not representable using the Coq string notation (use explicitly the String constructor with the ascii codes of the characters).

Module Export StringSyntax.
End StringSyntax.

Example HelloWorld := " ""Hello world!"" ".