1 /*********************************************************************
2 Copyright 2012, Ralph Ronnquist.
4 This file is part of GORITE.
6 GORITE is free software: you can redistribute it and/or modify it
7 under the terms of the Lesser GNU General Public License as published
8 by the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 GORITE is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 License for more details.
16 You should have received a copy of the Lesser GNU General Public
17 License along with GORITE. If not, see <http://www.gnu.org/licenses/>.
18 **********************************************************************/
20 package com.intendico.sdp;
22 import java.util.Vector;
23 import java.util.Hashtable;
24 import java.util.Iterator;
25 import java.util.TreeSet;
28 * The BNFGrammar class is a base class for grammars that make use of
29 * the LLBNF translator for defining their syntax.
31 public class BNFGrammar extends Grammar {
34 * Utility method to scan for blank or comment.
36 public int findBlankOrComment(CharSequence s,int from) {
38 for ( ; from < end; from++ ) {
39 if ( Character.isWhitespace( s.charAt( from ) ) )
41 if ( s.charAt( from ) != '/' )
43 if ( from + 1 >= end )
45 if ( s.charAt( from + 1 ) == '*' )
47 if ( s.charAt( from + 1 ) == '/' )
54 // Predefined lexical scanners.
58 * A Nothing is a lexical token that accepts.
60 public class Nothing extends Lexical {
65 public Nothing(String r) {
73 public int scan(CharSequence s,int from) {
79 * An AnySymbol is a lexical token between blanks or comments.
81 public class AnySymbol extends Lexical {
86 public AnySymbol(String r) {
91 * Scanning anything unto next blank or comment or end.
94 public int scan(CharSequence s,int from) {
95 int to = findBlankOrComment( s, from );
96 return from == to? -1 : to ;
101 * An End is a lexical token for nothing after blanks or comments.
103 public class End extends Lexical {
108 public End(String r) {
113 * Requires there to remain nothing.
115 public int scan(CharSequence s,int from) {
116 return from == s.length()? from : -1 ;
121 * An Identifier is a lexical token of Java identifier characters.
123 public class Identifier extends Lexical {
128 public Identifier(String r) {
133 * Scanning a Java identifier.
135 public int scan(CharSequence s,int from) {
136 int end = s.length();
140 if ( ! Character.isJavaIdentifierStart( s.charAt( from++ ) ) )
143 for ( ; from < end; from++ ) {
144 if ( ! Character.isJavaIdentifierPart( s.charAt( from ) ) )
152 * A PackedChars is a lexical token consisting of a string of
153 * characters of a given collection, in any order.
155 public class PackedChars extends Lexical {
158 * The accepted characters.
160 public String characters;
165 public PackedChars(String r,String set) {
171 * Scanning a PackedChars.
173 public int scan(CharSequence s,int from) {
174 int end = s.length();
178 if ( characters.indexOf( s.charAt( from++ ) ) < 0 )
181 for ( ; from < end; from++ ) {
182 if ( characters.indexOf( s.charAt( from ) ) < 0 )
190 * A Number is a lexical token of digits. Optionally preceded by a
193 public class Number extends Lexical {
198 public Number(String r) {
203 * Scanning an integer, with an optional preceding "-".
205 public int scan(CharSequence s,int from) {
206 int end = s.length();
211 if ( inSet( s, from, end, "+-" ) )
214 int h = scanSet( s, from, end, "0123456789" );
215 return h == 0? -1 : from + h;
220 * A Number is a lexical token of digits. Optionally preceded by a
223 public class RealNumber extends Lexical {
228 public RealNumber(String r) {
233 * Scanning a real number, with an optional preceding "-" or '+'.
235 public int scan(CharSequence s,int from) {
236 int end = s.length();
243 if ( inSet( s, from, end, "+-" ) ) {
247 h = scanSet( s, from, end, "0123456789" );
249 if ( s.charAt( from++ ) == '0' ) {
250 // possible lead-in for 0xh* or otherwise octal?
253 if ( s.charAt( from++ ) == 'x' ) {
255 h = scanSet( s, from+2, end, "01234567abcdefABCDEF" );
261 return from + scanSet( s, from, end, "01234567" );
264 h = scanSet( s, from, end, "0123456789" );
271 return h == 0? -1 : end;
273 if ( s.charAt( from++ ) == '.' ) {
274 // there's a fraction part
275 f = scanSet( s, from, end, "0123456789" );
276 if ( h == 0 && f == 0 )
281 if ( inSet( s, from, end, "fF" ) )
284 // There's no fractional part
288 if ( inSet( s, from, end, "lL" ) )
292 if ( ! inSet( s, from++, end, "eE" ) )
296 if ( inSet( s, from, end, "+-" ) )
298 f = scanSet( s, from, end, "0123456789" );
299 return f == 0? -1 : from + f;
304 * Scans past the given set of characters, and tells how far it
307 static public int scanSet(CharSequence s,int from,int end,String set) {
309 for ( ; from < end; from++ )
310 if ( set.indexOf( s.charAt( from ) ) < 0 )
311 return from - start ;
316 * Tells if the character in s at from is one of those in the set.
318 static public boolean inSet(CharSequence s,int from,int end,String set) {
319 return from < end? set.indexOf( s.charAt( from ) ) >= 0 : false;
323 * Scan a compound character after the '\':
324 * \ a - letter or any non-digit
326 * \ x hh - hexadecimal
329 static public int compound(CharSequence s,int from,int end) {
332 switch ( s.charAt( from ) ) {
337 case '0': case '1': case '2': case '3':
338 case '4': case '5': case '6': case '7':
346 * A String is a lexical token between a mark characters
347 * (double-quotes by default), but without a new-line in it.
349 public class QuotedString extends Lexical {
356 public QuotedString(String r,char m) {
362 * Utility constructor to use double-quote as mark character.
364 public QuotedString(String r) {
371 public int scan(CharSequence s,int from) {
372 int end = s.length();
374 if ( from == end || s.charAt( from++ ) != mark )
377 tryToken( this, from ); // Special marker
379 for ( ; from < end; ) {
380 char c = s.charAt( from++ );
384 return -1; // bad string
386 // lead-in for compound characters.
387 from += compound( s, from, end );
397 * The error name for a buggy string.
399 public String errorName() {
400 return "terminated and well-formed <" + rule + ">" ;
405 * A QuotedCharacter is a lexical token between a pair of
406 * single-quotes, with restrictions.
408 public class QuotedCharacter extends Lexical {
413 public QuotedCharacter(String r) {
420 public int scan(CharSequence s,int from) {
421 int end = s.length();
423 if ( from == end || s.charAt( from++ ) != '\'' )
426 tryToken( this, from ); // Special marker
431 switch ( s.charAt( from++ ) ) {
435 from += compound( s, from, end );
443 if ( s.charAt( from++ ) != '\'' )
449 * The error name for a buggy string.
451 public String errorName() {
452 return "terminated and well-formed <" + rule + ">" ;
458 * A lexical scanner to match multi-word phrases from a given
459 * collection. It greedily consumes its longest match.
461 public class PhraseSet extends Lexical {
464 * Matching phrases. This contains the successive heads of the
465 * matching phrases, mapped to 0, 1, or 2. 0 means that the
466 * phrase ends there, 1 means it must continue, and 2 means it
467 * may end or continue. The scanner always consumes as much as
470 public TreeSet/*<String>*/ phrases = new TreeSet/*<String>*/();
473 * Cache of defining phrases.
475 public Vector/*<String>*/ definition = new Vector/*<String>*/();
478 * Constructor for a given name and collection of phrases.
480 public PhraseSet(String name,Vector/*<String>*/ set) {
486 * Utility method to add a collection of phrases.
488 public void addSet(Vector/*<String>*/ set) {
489 for ( Iterator si = set.iterator(); si.hasNext(); ) {
490 String phrase = (String) si.next();
491 definition.add( phrase );
492 phrases.add( phrase.toLowerCase() );
497 * Returns index to first character following a matching
500 public int scan(CharSequence input,int from) {
501 CharSequence phrase = input.subSequence( from, input.length() );
502 if ( from >= input.length() )
504 char first = phrase.charAt( 0 );
505 for ( Iterator/*<String>*/ pi =
506 phrases.headSet( phrase, true ).descendingIterator();
508 String key = (String) pi.next();
509 if ( key.charAt( 0 ) != first ) {
510 System.err.println( "failed: " + key );
513 if ( startsWith( phrase, 0, key ) ) {
514 return from + key.length();
521 * Returns this collection as a defintion clause:
524 public String toString() {
525 StringBuilder s = new StringBuilder();
528 String separator = "\n ";
529 //for ( String phrase : definition ) {
530 for ( Iterator pi = phrases.iterator(); pi.hasNext(); ) {
531 String phrase = (String) pi.next();
532 s.append( separator );
542 // Predefined utility actions
546 * Utility method to add elements individually from a Vector
547 * operand, if it begins with the operator.
549 static public void addArguments(Vector args,Object op,Object opnd) {
550 if ( opnd instanceof Vector ) {
551 Vector v = (Vector) opnd;
552 if ( v.size() > 0 && op.equals( v.get( 0 ) ) ) {
562 * Helper class for infix syntax
564 public class Infix extends Production {
567 * Default Infix action is to move the middle element to
568 * beginning, if there are three arguments, and otherwise
569 * re-use the Grammar.Production action.
571 public Object action(Vector v) throws Exception {
573 return super.action( v );
575 v.add( 0, v.remove( 1 ) );
581 * Helper class for postfix syntax
583 public class Postfix extends Production {
586 * Default Postfix action is to move the last element to
587 * beginning, if there are more than one argument, and otherwise
588 * re-use the Grammar.Production action.
590 public Object action(Vector v) throws Exception {
592 return super.action( v );
594 v.add( 0, v.remove( v.size() - 1 ) );
600 * Helper class for associative infix syntax
602 public class Associative extends Production {
605 * Default Associative action is to move the middle element to
606 * beginning, if there are three arguments, and then flatten
607 * out the first and last argument of they are vectors with
608 * the same operator. If there are not three arguments, then
609 * re-use the Grammar.Production action.
611 public Object action(Vector v) throws Exception {
613 return super.action( v );
615 Object op = v.get( 1 );
616 Vector args = new Vector();
618 addArguments( args, op, v.get( 0 ) );
619 addArguments( args, op, v.get( 2 ) );
625 * Helper class for lists
627 public class Enlist extends Production {
630 * Default Enlist action is coalesc arguments into a Vector.
632 public Object action(Vector v) {
636 Object last = v.remove( v.size() - 1 );
637 if ( last instanceof Vector ) {
638 v.addAll( (Vector) last );
647 * Helper class for left-associative lists
649 public class Leftlist extends Production {
652 * Default Enlist action is coalesc arguments into a ".."
655 public Object action(Vector v) {
659 Object first = v.remove( 0 );
661 if ( first instanceof Vector ) {
662 ((Vector) first).addAll( v );
672 * Installs a grammar from string rules using LLBNF.
674 public void install(String rules) {
676 new LLBNF().addRules( this, rules );
677 } catch (Throwable t) {
679 throw new Error( "Grammar failure" );
684 * Utility method to clip a string after some length
686 public String clip(String input,int clip) {
687 int x = input.indexOf( "\n", 0 );
688 if ( x > 0 && x < clip )
689 return input.substring( 0, x ) + " ..." ;
690 if ( clip < input.length() )
691 return input.substring( 0, clip ) + "...";
696 * Utility method to time parse and process.
698 public boolean timedProcess(int i,String goal,String input) {
701 "(" + i + ") " + goal + ": " + clip( input, 65 ) );
702 long time = System.currentTimeMillis();
703 ParseTree pr = parse( goal, input );
704 long snap = System.currentTimeMillis() - time;
706 //System.out.println( "--> " + pr );
707 time = System.currentTimeMillis();
708 Object result = process( pr );
709 time = System.currentTimeMillis() - time;
710 System.out.println( "==> " + result );
712 "--------- time = " + snap + "/" + time + " ms" );
715 System.out.println( lastError( input ) );
717 "--------- time = " + snap + "/-- ms" );
720 } catch (Throwable t) {
727 * Utility method to parse and process several tests.
729 public boolean processAll(String [][] args) {
731 for ( int i=0; i<args.length; i++ ) {
732 if ( ! timedProcess( i, args[i][0], args[i][1] ) )