[comp.lang.prolog] definite clause grammar

tem@ttrdc.UUCP (Gary H. Jones) (04/07/89)

	This is a request for sources of information on Definite Clause
	Grammar. I'm looking for references to books and articles on
	DCGs.

						Thanks in advance,

ken@aiai.ed.ac.uk (Ken Johnson) (06/02/89)

This may be of interest to readers of this group.  It is a version of
the definite clause grammar mechanism.  The dialect of Prolog I have
used is Edinburgh Prolog, but there is nothing in here so wierd it won't
work on most dialects.  I wrote this for a short course in Prolog to
illustrate how the mechanism works: I hope you may find it useful.  I
have tested this code with the example grammars given, but I do not
guarantee it. 

There are four files here

dcg.pl      Definite clause grammar. Instructions for use are in this
            file.

roman.dcg   Definite clause grammar for roman numerals

numbers.dcg Definite clause grammar for numbers

time.dcg    Definite clause grammar for times of day

Be warned that the time and numbers dcg's use British English, i.e.  `a
quarter past two' (not `after'); `three hundred and one' (not `three
hundred one'). 

Enjoy it! (But please do not sell copies at a profit unless I get a
share.)

#	This is a shell archive.
#	Remove everything above and including the cut line.
#	Then run the rest of the file through sh.
-----cut here-----cut here-----cut here-----cut here-----
#!/bin/sh
# shar:	Shell Archiver
#	Run the following text with /bin/sh to create:
#	dcg.pl
#	numbers.dcg
#	time.dcg
#	roman.dcg
# This archive created: Fri Jun  2 17:43:11 1989
cat << \SHAR_EOF > dcg.pl
% This implements a DCG in a Prolog that doesn't have one.
% It is a fairly good example of how to manipulate terms using
% functor/3 and arg/3.
%
% To load a grammar, `load_grammar(File).'
% To use it, `my_phrase(Struct_name,Text)'
% Terminals appear as thing --> [Terminal,...]
% Nonterminals and bits of Prolog are standard.
%
% To rewrite a single term call rewrite/2, e.g.
% rewrite((det-->[the]),Text).
%
% Ken Johnson, 2 June 1989

% In a system that did not have dcgs you would probably also have to
% define
% :- op(1200,xfx,'-->').
% :- op(901,fx,'{').
% :- op(900,xf,'}').

load_grammar(File) :-				% Load the file by
	seeing(S),				% repeatedly loading terms
	see(File),				% and transforming them
	repeat,
		read(Term),
		(
			Term == end_of_file
		;
			process_grammar_term(Term),
			fail
		),
	!,
	seen,
	see(S).


process_grammar_term(Term) :-		% Attempt to rewrite; complain if
	rewrite(Term,Rewritten),	% can't rewrite a term whose principal
	!,				% functor is -->
	assert(Rewritten).

process_grammar_term(Term) :-
	write('Cannot rewrite: '),
	write(Term),
	nl.

% In a system the does not have `phrase' you could rename this
% routine to `phrase'. It is identical to `phrase'; the call sequence
% is e.g. my_phrase(non_terminal(T),[text...])

my_phrase(Term,Text) :-
	add_two_args(Term,New_term,Text,[]),
	New_term.

rewrite((Term-->Sequence),Out) :-
	add_two_args(Term,New_term,A,B),
	transform(Sequence,A,B,Transformed),
	trim_null_output(Transformed,New_term,(New_term :- Transformed),Out),
	!.

transform((P,Q),A,B,U) :-
	transform_2(P,A,C),
	!,
	transform(Q,C,B,U).
	
transform((P,Q),A,B,Out) :-
	transform_1(P,A,C,T),
	!,
	transform(Q,C,B,U),
	trim_null_output(U,T,(T,U),Out).
	
transform(P,A,B,true) :-
	transform_2(P,A,B),
	!.

transform(P,A,B,U) :-
	transform_1(P,A,B,U).

transform_1({Prolog},A,A,Prolog) :- !.

transform_1(T,A,B,New_term) :-
	add_two_args(T,New_term,A,B).

transform_2([H|T],A,B) :-
	split([H|T],A,B),
	!.

transform_2([],A,A) :-
	!.

add_two_args(T,New_term,A,B) :-
	functor(T,F,N),
	N1 is N + 1,
	N2 is N + 2,
	functor(New_term,F,N2),
	copy_args(N,T,New_term),
	arg(N1,New_term,A),
	arg(N2,New_term,B).

copy_args(0,_,_).

copy_args(N,T,U) :-
	arg(N,T,A),
	arg(N,U,A),
	M is N - 1,
	copy_args(M,T,U).



split([H|T],[H|U],V) :-
	split(T,U,V).

split([],V,V).

%
% This is a device to allow me to replace
% x,y,true. by x,y.
% and p :- true. by p.
%

trim_null_output(true,Out,_,Out) :-
	!.

trim_null_output(_,_,Out,Out).

SHAR_EOF
cat << \SHAR_EOF > numbers.dcg
% Grammar for numbers, e.g.
% phrase(number(I),[two,hundred,and,fifty,six]).
% An astonishing characteristic of this code is that it's
% fully bidirectional. The expression
% phrase(number(256),Words)
% will instantiate Words = [two,hundred,and,fifty,six].
% What's more,
% phrase(number(I),Words)
% will eventually instantiate I and Words to all the numbers it knows.
%
% Ken Johnson 17-9-87

number(I) --> '1_to_999'(I).
number(I) --> '1000_to_999999'(I).

 '1_to_999'(I) --> '1_to_99'(I).
 '1_to_999'(I) --> '100_to_999'(I).

% Compound number with thousands

 '1000_to_999999'(I) --> thousands(I).

 '1000_to_999999'(I) --> thousands(K),'100_to_999'(Htu),
	{
		I is K + Htu
	}.

 '1000_to_999999'(I) --> thousands(K),[and],'1_to_99'(Tu),
	{
		I is K + Tu
	}.

% Thousands

thousands(I) --> '1_to_999'(K), [thousand],
	{
		I is K * 1000
	}.

% Compound number with hundreds, tens and units

 '100_to_999'(C) --> one_to_nine(H),[hundred],
	{
		C is 100 * H
	}.

 '100_to_999'(C) --> one_to_nine(H),[hundred],[and],'1_to_99'(Tu),
	{
		C is (100 * H) + Tu
	}.

% Complete number: a single word 1-9 or 10-19

 '1_to_99'(I) --> one_to_nine(I).
 '1_to_99'(I) --> ten_to_nineteen(I).
 '1_to_99'(I) --> multiple_of_ten(I).
 '1_to_99'(I) --> '20_to_99'(I).

% Compound number with tens and units

  '20_to_99'(I) --> multiple_of_ten(T), one_to_nine(U),
	{
		I is T + U
	}.

% Single words (terminal nodes)

one_to_nine(1) --> [one].
one_to_nine(2) --> [two].
one_to_nine(3) --> [three].
one_to_nine(4) --> [four].
one_to_nine(5) --> [five].
one_to_nine(6) --> [six].
one_to_nine(7) --> [seven].
one_to_nine(8) --> [eight].
one_to_nine(9) --> [nine].

ten_to_nineteen(10) --> [ten].
ten_to_nineteen(11) --> [eleven].
ten_to_nineteen(12) --> [twelve].
ten_to_nineteen(13) --> [thirteen].
ten_to_nineteen(14) --> [fourteen].
ten_to_nineteen(15) --> [fifteen].
ten_to_nineteen(16) --> [sixteen].
ten_to_nineteen(17) --> [seventeen].
ten_to_nineteen(18) --> [eighteen].
ten_to_nineteen(19) --> [nineteen].

multiple_of_ten(20) --> [twenty].
multiple_of_ten(30) --> [thirty].
multiple_of_ten(40) --> [forty].
multiple_of_ten(50) --> [fifty].
multiple_of_ten(60) --> [sixty].
multiple_of_ten(70) --> [seventy].
multiple_of_ten(80) --> [eighty].
multiple_of_ten(90) --> [ninety].
SHAR_EOF
cat << \SHAR_EOF > time.dcg
% Telling the time DCG. Needs "numbers.dcg"
% Ken Johnson 18-9-87

% Note that : is declared an operator (prec 600, xfx) and it can therefore
% be used without a declaration.

time(H:0) --> hour(H), [o_clock].
time(H:0) --> named_time(H).
time(H:15) --> [a,quarter,past],hour(H).
time(H:30) --> [half,past],hour(H).
time(H:45) --> [a,quarter,to],hour(Hprev),
	{
	   (
		H = 12 -> Hprev = 1
	   ;
		H is Hprev - 1
	   )
	}.

time(H:M) --> number(M),minutes,[past],hour(H),
	{
		M >= 1,
		M =< 29
	}.

time(H:M) --> number(Mp),minutes,[to],hour(Hp),
	{
		Mp >= 1,
		Mp =< 29,
		M is 60 - Mp,
	   	(
			H = 12 -> Hp = 1
		;
			H is Hp - 1
		)
	}.

hour(I) --> number(I),
	{
		I >= 1,
		I =< 12
	}.

hour(I) --> named_time(I).

named_time(12) --> [midnight].
named_time(12) --> [noon].
named_time(12) --> [midday].

% Minutes -- a word that may be present and may not

minutes --> [minutes].

minutes --> [].

SHAR_EOF
cat << \SHAR_EOF > roman.dcg
% Roman numerals up to 1999. (There is no special notation for 5000, hence
% 4000 is mmmm, 5000 is mmmmm and that is not interesting. So I have
% stopped at allowing at most one 'm').

% Bidirectional. Call with either
%  ?- phrase(roman(1989),List)
% or
%  ?- phrase(roman(N),[c,d,i,i]).

roman(N) --> thousands(K), hundreds(H), tens(T), units(U),
	     {
		N is K * 1000 + H * 100 + T * 10 + U
	     }.

thousands(0) --> [].
thousands(1) --> [m].

hundreds(N) --> legal_sequence(c,d,m,N).
tens(N) --> legal_sequence(x,l,c,N).
units(N) --> legal_sequence(i,v,x,N).

% The predicate `legal_sequence' exploits the fact that each group
% of Roman symbols is used in the same combinations, e.g.
% i, ii, iii, iv, v, vi, vii, viii, ix is paralleled by
% x, xx, xxx, xl, l, lx, lxx, lxxx, xc and by
% c, cc, ccc, cd, d, dc, dcc, dccc, cm

legal_sequence(_,_,_,0) --> [].

legal_sequence(Single,Five,Ten,N) -->
		[Single],
		follows_single(Single,Five,Ten,N).

legal_sequence(Single,Five,Ten,N) -->
		[Five],
		follows_five(Single,Five,Ten,N).

follows_single(_,_,_,1) --> [].
follows_single(Single,_,_,N) -->
		one_or_two(Single,V),
		    {
			N is V + 1
		    }.
follows_single(_,Five,_,4) --> [Five].
follows_single(_,_,Ten,9) --> [Ten].

follows_five(_,_,_,5) --> [].
follows_five(Single,_,_,N) --> one_two_or_three(Single,V),
				{N is V + 5}.

one_two_or_three(Single,N) --> one_or_two(Single,N).
one_two_or_three(Single,3) --> [Single,Single,Single].

one_or_two(Single,1) --> [Single].
one_or_two(Single,2) --> [Single,Single].

SHAR_EOF
#	End of shell archive
exit 0
-- 
Ken Johnson, AI Applications Institute, 80 South Bridge, Edinburgh EH1 1HN
E-mail ken@aiai.ed.ac.uk, phone 031-225 4464 extension 212
`I have read your article, Mr.  Johnson, and I am no wiser than when I
started.' -- `Possibly not, sir, but far better informed.'

thom@sbcs.sunysb.edu (Thom Fruhwirth) (06/10/89)

/*
This is a comment on the posting from ken@aiai.UUCP of Fri Jun 2 12:49:17 1989.
Ken Johnson wrote a definite clause translator "for a short course in Prolog
to illustrate how the mechanism works".
In my opinion, a Prolog program for educational purposes should be
simple, consistent, and specification-like (avoiding built-ins including cut),
so that the main principles can be seen without having to deal with optimized
implementation code.
Therefore I modified Ken Johnsons Prolog code according to these guidelines.
I claim that the following modified version is simpler and shorter than 
the original and therefore more suited for a short course in Prolog.

For those, who have access to the original version, I describe how I modified
the definite clause translator program:
- Because split(A,B,C) is equivalent to append(A,C,B), I replaced split/3 by
  the more familiar append/3.
- Because the second and last clause of transform_2/3 dealing with the empty
  list can also be handled by split/3 and append/3 respectively, I merged
  the two clauses of transform_2/3 into one simpler clause.
- I split trim_null_output/4 into two different predicates, namely
  simplify_clause/3 and simplify_conjunction/3, because making a fact out of
  a rule with the body true and removing true's in conjunctions are two 
  different things.
- Finally I merged the predicates transform_1 and transform_2 into transform,
  so simplifying the code and getting rid of two superfluos predicates.
- I also changed some names of variables and predicates slightly.

The DCG program translates/rewrites/transforms/ *compiles* a DCG rule into
a Prolog clause. Consequently, a DCG *interpreter* must exist. Actually,
the interpreter is very similar to the compiler, its code is given after
that of the compiler. As an interpreter is usually simpler to understand
than a compiler, instruction should maybe start with the interpreter.

Note that I did not take the time to document the code properly.
*/

%----------------------- The DCG Translator -----------------------------------
%
% To load a grammar, call `load_grammar(File)'
% To use it, call `my_phrase(Struct_name,Text)'
% To rewrite it, call `rewrite(Rule,Rewritten)'
%
% Ken Johnson, 2 June 1989, modified Thom Fruehwirth 8 June 1989

% If necessary, define the following operators used in DCGs
% :- op(1200,xfx,'-->').
% :- op(901,fx,'{').
% :- op(900,xf,'}').

% The following is only slightly modified

load_grammar(File) :-			% Load the file by
	seeing(S),			% repeatedly reading terms
	see(File),			% and transforming them
	repeat,
	  read(Term),
	  (
	   Term == end_of_file
	  ;
	   process_grammar_rule(Term),
	   fail
	  ),
	!,
	seen,
	see(S).


process_grammar_rule(Rule) :-		% Attempt to rewrite; complain if
	rewrite(Rule,Rewritten),	% can't rewrite a term whose principal
	!,				% functor is -->
	assert(Rewritten).

process_grammar_rule(Term) :-
	write('Cannot rewrite: '),
	write(Term),
	nl.


my_phrase(Term,Text) :-
	add_two_args(Term,New_term,Text,[]),
	call(New_term).			% wrap by call/1 for clarity


rewrite((Term-->Sequence),Clause) :-
	add_two_args(Term,New_term,A,B),
	transform(Sequence,A,B,Transformed),
	simplify_clause(New_term,Transformed,Clause).	% no cut needed here


% Here the main modifications appear
% transform/4 does the translation from dcg into Prolog clauses

transform((P,Q),A,C,W) :-
	!,
	transform(P,A,B,U),
	transform(Q,B,C,V),
	simplify_conjunction(U,V,W).

transform({Goal},A,A,Goal) :- 
	!.

transform(Terminals,A,B,true):-
	append(Terminals,B,A),		% append/3 also tests for lists,
	!.				% so do not move cut before it.

transform(T,A,B,G) :-
	add_two_args(T,G,A,B).


% Add two arguments to a compound term

add_two_args(T,New_term,A,B) :-
	functor(T,F,N),
	N1 is N + 1,
	N2 is N + 2,
	functor(New_term,F,N2),
	copy_args(N,T,New_term),
	arg(N1,New_term,A),
	arg(N2,New_term,B).

copy_args(0,_,_).
copy_args(N,T,U) :-
	N>0,			% test avoids cut
	arg(N,T,A),
	arg(N,U,A),
	M is N - 1,
	copy_args(M,T,U).

% There is always a need for good ol' append/3

append([],L,L).
append([X|L1],L2,[X|L3]):-
	append(L1,L2,L3).

% remove true from conjunction

simplify_conjunction(true,B,B):-!.
simplify_conjunction(A,true,A):-!.
simplify_conjunction(A,B,(A,B)).

% rules with body true are facts

simplify_clause(H,true,H):-!.
simplify_clause(H,B,(H:-B)).




%----------------------- The DCG Interpreter ---------------------------------
%
% To load a grammar, call `load_file(File)'
% To use it, call `parse(Struct_name,Text)'
%
% Thom Fruehwirth 8 June 1989, based on the ken Johnsons DCG translator

% load_file/1 is like consult/1, but consult/1 might automatically transform
% DCG rules into Prolog clauses (Quintus Prolog does).

load_file(File) :-			% Load the file by
	seeing(S),			% repeatedly asserting clauses
	see(File),
	repeat,
	  read(Clause),
	  (
	   Clause == end_of_file
	  ;
	   assert(Clause),
	   fail
	  ),
	!,
	seen,
	see(S).

% Here is the parser. Note the similarity to transform/4 and my_phrase/2.

parse(Term,Text):-
	parse(Term,Text,[]).

parse((P,Q),A,C) :-
	!,
	parse(P,A,B),
	parse(Q,B,C).

parse({Goal},A,A) :- 
	!,
	call(Goal).

parse(Terminals,A,B) :-
	append(Terminals,B,C),
	!,
	A=C.			% take care when using a cut

parse(T,A,B) :-
	(T-->S),		% access DCG rule
	parse(S,A,B).

% code for append/3 see DCG translator


% the end