Bryan Ford’s Thesis
http://pdos.csail.mit.edu/~baford/packrat/thesis
(see section 2.3.1 of thesis)
Empty String |
'' |
Terminal (fancy word for character) |
'a' |
Non-Terminal (fancy word for a rule) |
rule |
Sequence |
e1 e2 e3 ... |
Choice |
e1 / e2 / e3 / ... |
0 or more |
e* |
1 or more |
e+ |
Optional |
e? |
Followed-by |
&e |
Not followed-by |
!e |
E.g. trivial arithmetic expressions
E ← N / ‘(‘ E ‘+’ E ‘)’ / ‘(‘ E ‘)-’ E ‘)’
N ← D N / D
D ← ‘0’ / … / ‘9’
(note parsing of nested, matched, parentheses ‘(‘ … ‘)’
REGEXP |
PEG |
/a/ |
OneA = "a" |
const text = "a";
const re = /a/;
function main () {
if (re.test (text)) {
console.log ('Success');
} else {
console.log ('Failure');
}
}
main ();
(defparameter text1 "a")
(defun example1-esrap ()
(esrap:defrule oneA #\a (:lambda (x) x))
(esrap:parse 'oneA text1))
const text = "a";
const example1_grammar = '
Example1 {
OneA = "a"
}';
const ohm = require ('ohm-js');
const ohmParser = ohm.grammar (example1_grammar);
const result = ohmParser.match (text);
if (result.succeeded ()) {
console.log ("Success");
} else {
console.log ("Failure");
}
REGEXP |
PEG (Ohm-JS) |
/ab/ |
OneAThenOneB = OneA OneB OneA = "a" OneB = "b" |
const text = "ab";
const re = /ab/;
function main () {
if (re.test (text)) {
console.log ('Success');
} else {
console.log ('Failure'ß);
}
}
main ();
(defparameter text2 "ab")
(defun example2-esrap ()
(esrap:defrule oneA #\a (:lambda (x) x))
(esrap:defrule oneB #\b (:lambda (x) x))
(esrap:defrule oneAThenOneB (and oneA oneB) (:lambda (x) x))
(esrap:parse 'oneAThenOneB text2))
const text = "ab";
const example2_grammar = `
Example2 {
OneAThenOneB = OneA OneB
OneA = "a"
OneB = "b"
}`;
const ohm = require ('ohm-js');
const ohmParser = ohm.grammar (example2_grammar);
const result = ohmParser.match (text);
if (result.succeeded ()) {
console.log ("Ohm matching succeeded");
} else {
console.log ("Ohm matching failed");
}
REGEXP |
PEG (Ohm-JS) |
/(a)(b)/ |
OneAThenOneB = OneA OneB OneA = "a" OneB = "b" |
const text = "ab";
const re = /(a)(b)/;
function main () {
var matchArray = text.match (re);
if (matchArray) {
var first = matchArray[1];
var second = matchArray[2];
console.log (`(javascript) flipped is "${second}${first}"`);
} else {
console.log (`pattern failed`);
}
}
main ();
(defparameter text "ab")
(defun example3-esrap ()
(esrap:defrule oneA #\a (:lambda (x) x))
(esrap:defrule oneB #\b (:lambda (x) x))
(esrap:defrule oneAThenOneBAndFlip (and oneA oneB)
(:destructure (a b) (list b a)))
(esrap:parse 'oneAThenOneBAndFlip text))
const text = `
<svg width="800" height="800">
<rect id="id0" x="40" y="120" width="150" height="60"></rect>
<rect id="id1" x="40" y="320" width="150" height="60"></rect>
<rect id="id2" x="280" y="120" width="250" height="130"></rect>
<rect id="id3" x="650" y="120" width="150" height="60"></rect>
<rect id="id4" x="650" y="270" width="150" height="60"></rect>
<rect id="id5" x="650" y="360" width="150" height="60"></rect>
<rect id="id6" x="650" y="440" width="150" height="60"></rect>
</svg>
`;
const example6_grammar = `
Example1 {
SVGElement = "<svg" SVGAttribute* ">" SVGRect+ "</svg>"
SVGAttribute = WidthAttribute | HeightAttribute
SVGRect = "<rect" RectContent+ ">" "</rect>"
RectContent = IDAttribute | XAttribute | YAttribute | WidthAttribute | HeightAttribute
IDAttribute = "id=" string
XAttribute = "x=" integerString
YAttribute = "y=" integerString
WidthAttribute = "width=" integerString
HeightAttribute = "height=" integerString
string = "\\"" notDQuote* "\\""
notDQuote = ~"\\"" any
integerString = "\\"" integerDigit+ "\\""
integerDigit = "0" .. "9"
}`;
const ohm = require ('ohm-js');
const ohmParser = ohm.grammar (example6_grammar);
const result = ohmParser.match (text);
if (result.succeeded ()) {
var semantics = ohmParser.createSemantics ();
addExample6 (semantics);
addJSON6 (semantics);
console.log ('example6 ohm:');
console.log (semantics (result).example6 ());
console.log ('example6 ohm JSON:');
console.log (semantics (result).JSON6 ());
} else {
console.log ("Ohm matching failed");
}
function addExample6 (semantics) {
semantics.addOperation (
'example6',
{
SVGElement: function (_svg, attrs, _gt, rects, _end) {
return "<svg" + attrs.example6 ().join ('') + ">\n" + rects.example6 ().join ('\n') + "\n</svg>"; },
SVGAttribute: function (attribute) { return attribute.example6 (); },
SVGRect: function (_rect, contents, _gt, _end) {
return "<rect" + contents.example6 ().join ('') + ">" + "</rect>";},
RectContent: function (attribute) { return attribute.example6 (); },
IDAttribute: function (_id, str) {return "id=" + str.example6 (); },
XAttribute: function (_x, n) { return "x=" + n.example6 (); },
YAttribute: function (_y, n) { return "y=" + n.example6 (); },
WidthAttribute: function (_w, n) { return "width="+ n.example6 (); },
HeightAttribute: function (_h, n) { return "height=" + n.example6 (); },
string: function (_q1, characters, _q2) { return '"' + characters.example6 ().join('') + '"'; },
notDQuote: function (c) { return c.example6 (); },
integerString: function (_q1, digits, _q2) { return '"' + digits.example6 ().join('') + '"'; },
integerDigit: function (d) { return d.example6 (); },
_terminal: function() { return this.primitiveValue; }
}
);
}
function addJSON6 (semantics) {
semantics.addOperation (
'JSON6',
{
SVGElement: function (_svg, attrs, _gt, rects, _end) {
return "svg {\n" + attrs.JSON6 ().join (',\n') + ",\ncontents: [\n " + rects.JSON6 ().join (',\n ') + "\n]}"; },
SVGAttribute: function (attribute) { return attribute.JSON6 (); },
SVGRect: function (_rect, contents, _gt, _end) {
return "{ kind: 'rect', " + contents.JSON6 ().join (', ') + " }";},
RectContent: function (attribute) { return attribute.JSON6 (); },
IDAttribute: function (_id, str) {return "id: " + str.JSON6 (); },
XAttribute: function (_x, n) { return "x: " + n.JSON6 (); },
YAttribute: function (_y, n) { return "y: " + n.JSON6 (); },
WidthAttribute: function (_w, n) { return "w: "+ n.JSON6 (); },
HeightAttribute: function (_h, n) { return "h: " + n.JSON6 (); },
string: function (_q1, characters, _q2) { return "'" + characters.JSON6 ().join('') + "'"; },
notDQuote: function (c) { return c.JSON6 (); },
integerString: function (_q1, digits, _q2) { return digits.JSON6 ().join(''); },
integerDigit: function (d) { return d.JSON6 (); },
_terminal: function() { return this.primitiveValue; }
}
);
}
const text = `
<html>
<title>Top</title>
<head>
<style>
rect { fill:#DAE8FC }
</style>
</head>
<body>
<h1>Top Part (Schematic)</h1>
<svg width="800" height="800">
<rect id="id0" x="40" y="120" width="150" height="60"></rect>
<rect id="id1" x="40" y="320" width="150" height="60"></rect>
<rect id="id2" x="280" y="120" width="250" height="130"></rect>
<rect id="id3" x="650" y="120" width="150" height="60"></rect>
<rect id="id4" x="650" y="270" width="150" height="60"></rect>
<rect id="id5" x="650" y="360" width="150" height="60"></rect>
<rect id="id6" x="650" y="440" width="150" height="60"></rect>
<text id="id7" x="50" y="150">FileSelector</text>
<text id="id8" x="50" y="350">TimeoutTimer</text>
<text id="id9" x="290" y="185">CallbackLogic</text>
<text id="id10" x="660" y="150">Display</text>
<text id="id11" x="660" y="320">ErrorHandler</text>
<text id="id12" x="660" y="410">AbortHandler</text>
<text id="id13" x="660" y="490">NoResponseHandler</text>
</svg>
</body>
</html>
`;
const unityGrammar = `
htmlUnity {
html = ws* htmlElement headerStuff bodyElement bodyStuff bodyElementEnd htmlEnd
htmlElement = "<html>" ws*
headerStuff = notBody*
bodyElement = "<body>" ws*
bodyStuff = notBodyEnd*
notBody = ~"<body>" any
notBodyEnd = ~"</body>" any
bodyElementEnd = "</body>" ws*
htmlEnd = "</html>" ws*
ws = " " | "\\t" | "\\n"
}
`;
const ohm = require ('ohm-js');
const ohmParser = ohm.grammar (unityGrammar);
const result = ohmParser.match (text);
if (result.succeeded ()) {
console.log ("Ohm matching succeeded");
var semantics = ohmParser.createSemantics ();
addUnity (semantics);
console.log ('unity ohm:');
console.log (semantics (result).unity ());
} else {
console.log ("Ohm matching failed");
}
function addUnity (semantics) {
semantics.addOperation (
'unity',
{
html: function (ws_plural, htmlElement, headerStuff, bodyElement, bodyStuff, bodyElementEnd, htmlEnd) { return ws_plural.unity ().join ('') + htmlElement.unity () + headerStuff.unity () +
bodyElement.unity () + bodyStuff.unity () + bodyElementEnd.unity () + htmlEnd.unity (); },
htmlElement: function (html, ws_plural) { return html.unity () + ws_plural.unity ().join('');},
headerStuff: function (notBody_plural) { return notBody_plural.unity ().join(''); },
bodyElement: function (body, ws_plural) { return body.unity () + ws_plural.unity ().join (''); },
bodyStuff: function (notBodyEnd_plural) { return notBodyEnd_plural.unity ().join (''); },
notBody: function (any) { return any.unity (); },
notBodyEnd: function (any) { return any.unity (); },
bodyElementEnd: function (slash_body, ws_plural) { return slash_body.unity () + ws_plural.unity ().join (''); },
htmlEnd: function (slash_html, ws_plural) { return slash_html.unity () + ws_plural.unity ().join (''); },
ws: function (c) { return c.unity (); },
_terminal: function() { return this.primitiveValue; }
}
);};
const text = `
<html>
<title>Top</title>
<head>
<style>
rect { fill:#DAE8FC }
</style>
</head>
<body>
<h1>Top Part (Schematic)</h1>
<svg width="800" height="800">
<rect id="id0" x="40" y="120" width="150" height="60"></rect>
<rect id="id1" x="40" y="320" width="150" height="60"></rect>
<rect id="id2" x="280" y="120" width="250" height="130"></rect>
<rect id="id3" x="650" y="120" width="150" height="60"></rect>
<rect id="id4" x="650" y="270" width="150" height="60"></rect>
<rect id="id5" x="650" y="360" width="150" height="60"></rect>
<rect id="id6" x="650" y="440" width="150" height="60"></rect>
<text id="id7" x="50" y="150">FileSelector</text>
<text id="id8" x="50" y="350">TimeoutTimer</text>
<text id="id9" x="290" y="185">CallbackLogic</text>
<text id="id10" x="660" y="150">Display</text>
<text id="id11" x="660" y="320">ErrorHandler</text>
<text id="id12" x="660" y="410">AbortHandler</text>
<text id="id13" x="660" y="490">NoResponseHandler</text>
</svg>
</body>
</html>
`;
const unityGrammar = `
htmlUnity {
html = ws* htmlElement headerStuff bodyElement bodyStuff bodyElementEnd htmlEnd
htmlElement = "<html>" ws*
headerStuff = notBody*
bodyElement = "<body>" ws*
bodyStuff = notBodyEnd*
notBody = ~"<body>" any
notBodyEnd = ~"</body>" any
bodyElementEnd = "</body>" ws*
htmlEnd = "</html>" ws*
ws = " " | "\\t" | "\\n"
}
`;
const ohm = require ('ohm-js');
const ohmParser = ohm.grammar (unityGrammar);
const result = ohmParser.match (text);
if (result.succeeded ()) {
console.log ("Ohm matching succeeded");
var semantics = ohmParser.createSemantics ();
addUnity (semantics);
console.log ('unity ohm:');
console.log (semantics (result).unity ());
} else {
console.log ("Ohm matching failed");
}
function addUnity (semantics) {
semantics.addOperation (
'unity',
{
html: function (ws_plural, htmlElement, headerStuff, bodyElement, bodyStuff, bodyElementEnd, htmlEnd) { return ws_plural.unity ().join ('') + htmlElement.unity () + headerStuff.unity () +
bodyElement.unity () + bodyStuff.unity () + bodyElementEnd.unity () + htmlEnd.unity (); },
htmlElement: function (html, ws_plural) { return html.unity () + ws_plural.unity ().join('');},
headerStuff: function (notBody_plural) { return notBody_plural.unity ().join(''); },
bodyElement: function (body, ws_plural) { return body.unity () + ws_plural.unity ().join (''); },
bodyStuff: function (notBodyEnd_plural) { return notBodyEnd_plural.unity ().join (''); },
notBody: function (any) { return any.unity (); },
notBodyEnd: function (any) { return any.unity (); },
bodyElementEnd: function (slash_body, ws_plural) { return slash_body.unity () + ws_plural.unity ().join (''); },
htmlEnd: function (slash_html, ws_plural) { return slash_html.unity () + ws_plural.unity ().join (''); },
ws: function (c) { return c.unity (); },
_terminal: function() { return this.primitiveValue; }
}
);};
(can't do this in REGEXP)
const text = `
<svg width="800" height="800">
<rect id="id0" x="40" y="120" width="150" height="60"></rect>
<rect id="id1" x="40" y="320" width="150" height="60"></rect>
<rect id="id2" x="280" y="120" width="250" height="130"></rect>
<rect id="id3" x="650" y="120" width="150" height="60"></rect>
<rect id="id4" x="650" y="270" width="150" height="60"></rect>
<rect id="id5" x="650" y="360" width="150" height="60"></rect>
<rect id="id6" x="650" y="440" width="150" height="60"></rect>
</svg>
`;
const example6_grammar = `
Example1 {
SVGElement = "<svg" SVGAttribute* ">" SVGRect+ "</svg>"
SVGAttribute = WidthAttribute | HeightAttribute
SVGRect = "<rect" RectContent+ ">" "</rect>"
RectContent = IDAttribute | XAttribute | YAttribute | WidthAttribute | HeightAttribute
IDAttribute = "id=" string
XAttribute = "x=" integerString
YAttribute = "y=" integerString
WidthAttribute = "width=" integerString
HeightAttribute = "height=" integerString
string = "\\"" notDQuote* "\\""
notDQuote = ~"\\"" any
integerString = "\\"" integerDigit+ "\\""
integerDigit = "0" .. "9"
}`;
const ohm = require ('ohm-js');
const ohmParser = ohm.grammar (example6_grammar);
const result = ohmParser.match (text);
if (result.succeeded ()) {
var semantics = ohmParser.createSemantics ();
addExample6 (semantics);
addJSON6 (semantics);
console.log ('example6 ohm:');
console.log (semantics (result).example6 ());
console.log ('example6 ohm JSON:');
console.log (semantics (result).JSON6 ());
} else {
console.log ("Ohm matching failed");
}
function addExample6 (semantics) {
semantics.addOperation (
'example6',
{
SVGElement: function (_svg, attrs, _gt, rects, _end) {
return "<svg" + attrs.example6 ().join ('') + ">\n" + rects.example6 ().join ('\n') + "\n</svg>"; },
SVGAttribute: function (attribute) { return attribute.example6 (); },
SVGRect: function (_rect, contents, _gt, _end) {
return "<rect" + contents.example6 ().join ('') + ">" + "</rect>";},
RectContent: function (attribute) { return attribute.example6 (); },
IDAttribute: function (_id, str) {return "id=" + str.example6 (); },
XAttribute: function (_x, n) { return "x=" + n.example6 (); },
YAttribute: function (_y, n) { return "y=" + n.example6 (); },
WidthAttribute: function (_w, n) { return "width="+ n.example6 (); },
HeightAttribute: function (_h, n) { return "height=" + n.example6 (); },
string: function (_q1, characters, _q2) { return '"' + characters.example6 ().join('') + '"'; },
notDQuote: function (c) { return c.example6 (); },
integerString: function (_q1, digits, _q2) { return '"' + digits.example6 ().join('') + '"'; },
integerDigit: function (d) { return d.example6 (); },
_terminal: function() { return this.primitiveValue; }
}
);
}
function addJSON6 (semantics) {
semantics.addOperation (
'JSON6',
{
SVGElement: function (_svg, attrs, _gt, rects, _end) {
return "svg {\n" + attrs.JSON6 ().join (',\n') + ",\ncontents: [\n " + rects.JSON6 ().join (',\n ') + "\n]}"; },
SVGAttribute: function (attribute) { return attribute.JSON6 (); },
SVGRect: function (_rect, contents, _gt, _end) {
return "{ kind: 'rect', " + contents.JSON6 ().join (', ') + " }";},
RectContent: function (attribute) { return attribute.JSON6 (); },
IDAttribute: function (_id, str) {return "id: " + str.JSON6 (); },
XAttribute: function (_x, n) { return "x: " + n.JSON6 (); },
YAttribute: function (_y, n) { return "y: " + n.JSON6 (); },
WidthAttribute: function (_w, n) { return "w: "+ n.JSON6 (); },
HeightAttribute: function (_h, n) { return "h: " + n.JSON6 (); },
string: function (_q1, characters, _q2) { return "'" + characters.JSON6 ().join('') + "'"; },
notDQuote: function (c) { return c.JSON6 (); },
integerString: function (_q1, digits, _q2) { return digits.JSON6 ().join(''); },
integerDigit: function (d) { return d.JSON6 (); },
_terminal: function() { return this.primitiveValue; }
}
);
}
const text = `
<svg width="800" height="800">
<rect id="id0" x="40" y="120" width="150" height="60"></rect>
<rect id="id1" x="40" y="320" width="150" height="60"></rect>
<rect id="id2" x="280" y="120" width="250" height="130"></rect>
<rect id="id3" x="650" y="120" width="150" height="60"></rect>
<rect id="id4" x="650" y="270" width="150" height="60"></rect>
<rect id="id5" x="650" y="360" width="150" height="60"></rect>
<rect id="id6" x="650" y="440" width="150" height="60"></rect>
</svg>
`;
const example6_grammar = `
Example1 {
SVGElement = "<svg" SVGAttribute* ">" SVGRect+ "</svg>"
SVGAttribute = WidthAttribute | HeightAttribute
SVGRect = "<rect" RectContent+ ">" "</rect>"
RectContent = IDAttribute | XAttribute | YAttribute | WidthAttribute | HeightAttribute
IDAttribute = "id=" string
XAttribute = "x=" integerString
YAttribute = "y=" integerString
WidthAttribute = "width=" integerString
HeightAttribute = "height=" integerString
string = "\\"" notDQuote* "\\""
notDQuote = ~"\\"" any
integerString = "\\"" integerDigit+ "\\""
integerDigit = "0" .. "9"
}`;
const ohm = require ('ohm-js');
const ohmParser = ohm.grammar (example6_grammar);
const result = ohmParser.match (text);
if (result.succeeded ()) {
var semantics = ohmParser.createSemantics ();
addExample6 (semantics);
addJSON6 (semantics);
console.log ('example6 ohm:');
console.log (semantics (result).example6 ());
console.log ('example6 ohm JSON:');
console.log (semantics (result).JSON6 ());
} else {
console.log ("Ohm matching failed");
}
function addExample6 (semantics) {
semantics.addOperation (
'example6',
{
SVGElement: function (_svg, attrs, _gt, rects, _end) {
return "<svg" + attrs.example6 ().join ('') + ">\n" + rects.example6 ().join ('\n') + "\n</svg>"; },
SVGAttribute: function (attribute) { return attribute.example6 (); },
SVGRect: function (_rect, contents, _gt, _end) {
return "<rect" + contents.example6 ().join ('') + ">" + "</rect>";},
RectContent: function (attribute) { return attribute.example6 (); },
IDAttribute: function (_id, str) {return "id=" + str.example6 (); },
XAttribute: function (_x, n) { return "x=" + n.example6 (); },
YAttribute: function (_y, n) { return "y=" + n.example6 (); },
WidthAttribute: function (_w, n) { return "width="+ n.example6 (); },
HeightAttribute: function (_h, n) { return "height=" + n.example6 (); },
string: function (_q1, characters, _q2) { return '"' + characters.example6 ().join('') + '"'; },
notDQuote: function (c) { return c.example6 (); },
integerString: function (_q1, digits, _q2) { return '"' + digits.example6 ().join('') + '"'; },
integerDigit: function (d) { return d.example6 (); },
_terminal: function() { return this.primitiveValue; }
}
);
}
function addJSON6 (semantics) {
semantics.addOperation (
'JSON6',
{
SVGElement: function (_svg, attrs, _gt, rects, _end) {
return "svg {\n" + attrs.JSON6 ().join (',\n') + ",\ncontents: [\n " + rects.JSON6 ().join (',\n ') + "\n]}"; },
SVGAttribute: function (attribute) { return attribute.JSON6 (); },
SVGRect: function (_rect, contents, _gt, _end) {
return "{ kind: 'rect', " + contents.JSON6 ().join (', ') + " }";},
RectContent: function (attribute) { return attribute.JSON6 (); },
IDAttribute: function (_id, str) {return "id: " + str.JSON6 (); },
XAttribute: function (_x, n) { return "x: " + n.JSON6 (); },
YAttribute: function (_y, n) { return "y: " + n.JSON6 (); },
WidthAttribute: function (_w, n) { return "w: "+ n.JSON6 (); },
HeightAttribute: function (_h, n) { return "h: " + n.JSON6 (); },
string: function (_q1, characters, _q2) { return "'" + characters.JSON6 ().join('') + "'"; },
notDQuote: function (c) { return c.JSON6 (); },
integerString: function (_q1, digits, _q2) { return digits.JSON6 ().join(''); },
integerDigit: function (d) { return d.JSON6 (); },
_terminal: function() { return this.primitiveValue; }
}
);
}
(see next instalment)
(see next instalment)