Context-Free Grammar & JSON
JSON 和 Context-free Grammar 的關係
Copy from JSON.org
object
{}
{ members }
members
pair
pair , members
pair
string : value
array
[]
[ elements ]
elements
value
value , elements
value
string
number
object
array
true
false
null
JSON 可以由 Context-free Grammar 所做成。當中的 rule 都是 non-ambiguous,內裡的 leftmost derivation, 都是 deterministic (只有一個方法)。而且可以先做 rule1 再做 rule2。因此他是 LL(1) parser, 可以用 predictive parsing,用 recursive decent method.
每次先看看一個 character 然後決定走那條 rule 下面是 sample code, 當中的 members 和 elements 不用 recursion 那麼深... 可以用 while 取代, syntax error detection 的做法參加了原作者, 一檢測到就 throw exception
Sample Code
public class JSONValidator {
private String jsonString;
private int idx = -1; // next scan character index
public JSONValidator(String jsonString) {
this.jsonString = jsonString;
this.idx = 0;
}
public char peek() {
return jsonString.charAt(idx);
}
public void next(char expectedChar) {
if (peek() != expectedChar) {
// run time exceptions
throw new JSONParseException("Expected" + expectedChar + "at" + idx + "but" + peek() + "is found");
}
idx++;
}
public void next() {
idx++;
}
public void parse() {
object();
}
public void object() {
ws();
next('{');
ws();
if (peek() == '}') {
// System.out.println("Empty JSON Object");
next('}');
} else {
members();
ws();
next('}');
}
}
public void members() {
ws();
pair();
ws();
if (isEnd()){
throw new JSONParseException("Unexpected ending of JSON Object, may be missing }");
}
if (peek() == ',') {
next(',');
members();
}
}
public void pair() {
ws();
String key = string();
System.out.println("Key in json object:" + key);
ws();
next(':');
ws();
String value = value();
System.out.println("Value in json object:" + value);
}
public String value() {
ws();
if (isEnd()){
throw new JSONParseException("Expected a JSON value at" + idx);
}
char ch = peek();
String returnStr = "";
switch (ch){
case '{' :
object();
break;
case '[' :
array();
break;
case '"' :
return string();
case '-':
return number();
default:
returnStr = (Character.isDigit(ch)) ? number() : word() + "";
}
return returnStr;
}
public String word() {
char ch = peek();
switch (ch) {
case 't':
next('t');
next('r');
next('u');
next('e');
return "true";
case 'f':
next('f');
next('a');
next('l');
next('s');
next('e');
return "false";
case 'n':
next('n');
next('u');
next('l');
next('l');
return "null";
default:
throw new JSONParseException("Unknowned token" + ch + "at" + idx);
}
}
public void array() {
ws();
next('[');
ws();
if (peek() == ']'){
next(']'); // empty array;
} else {
elements();
ws();
next(']');
}
}
public void elements() {
ws();
String value = value();
System.out.println("value of array elements:" + value);
ws();
if (peek() == ','){
next(',');
elements();
}
}
public boolean isEnd() {
return idx >= jsonString.length();
}
/** skip white Space */
public void ws() {
while (!isEnd()) {
if (Character.isWhitespace(peek())) {
idx++;
} else {
break;
}
}
}
public String number() {
ws();
int initIdx = idx;
String numberString = "";
if (peek() == '-'){
next('-');
numberString += '-';
}
// digit before .
while (!isEnd()) {
char c = peek();
if (Character.isDigit(c)) {
numberString += c;
idx++;
} else {
break;
}
}
if (isEnd()){
throw new JSONParseException("Unexpected number at the end");
}
// digit after .
if (peek() == '.'){
next('.');
numberString += '.';
while (!isEnd()) {
char c = peek();
if (Character.isDigit(c)) {
numberString += c;
idx++;
} else {
break;
}
}
}
// integer or double
try {
System.out.println("Parse as Int for" + numberString);
return Integer.parseInt(numberString) + "";
} catch (NumberFormatException ex){
try {
System.out.println("Parse as double for" + numberString);
return Double.parseDouble(numberString) + "";
} catch (NumberFormatException ex2){
throw new JSONParseException("Expected number at" + initIdx + "but it is not found");
}
}
}
public int nextInt() {
ws();
String intStr = "";
while (!isEnd()) {
char c = peek();
if (Character.isDigit(c)) {
intStr += c;
idx++;
} else {
break;
}
}
return Integer.parseInt(intStr);
}
/* next string without "" */
public String string() {
ws();
String str = null;
boolean scanning = false;
char c = peek();
if (c == '"') {
scanning = true;
str = "";
next();
}
while (!isEnd() && scanning) {
c = peek();
if (c == '"') {
scanning = false;
next();
} else {
str += c;
next();
}
}
return str;
}
}