blob: 592bf91393d840809235f0440d3308fb23b978cd [file] [log] [blame]
# Copyright (C) 2013 Google Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""Parser for Blink IDL.
The parser uses the PLY (Python Lex-Yacc) library to build a set of parsing
rules which understand the Blink dialect of Web IDL.
It derives from a standard Web IDL parser, overriding rules where Blink IDL
differs syntactically or semantically from the base parser, or where the base
parser diverges from the Web IDL standard.
Web IDL:
http://www.w3.org/TR/WebIDL/
Web IDL Grammar:
http://www.w3.org/TR/WebIDL/#idl-grammar
PLY:
http://www.dabeaz.com/ply/
Design doc:
http://www.chromium.org/developers/design-documents/idl-compiler#TOC-Front-end
"""
# Disable check for line length and Member as Function due to how grammar rules
# are defined with PLY
#
# pylint: disable=R0201
# pylint: disable=C0301
#
# Disable attribute validation, as lint can't import parent class to check
# pylint: disable=E1101
import os.path
import sys
# PLY is in Chromium src/third_party/ply
module_path, module_name = os.path.split(__file__)
third_party = os.path.join(module_path, os.pardir, os.pardir, os.pardir, os.pardir)
# Insert at front to override system libraries, and after path[0] == script dir
sys.path.insert(1, third_party)
from ply import yacc
# Base parser is in Chromium src/tools/idl_parser
tools_dir = os.path.join(module_path, os.pardir, os.pardir, os.pardir, os.pardir, os.pardir, 'tools')
sys.path.append(tools_dir)
from idl_parser.idl_parser import IDLParser, ListFromConcat
from idl_parser.idl_parser import ParseFile as parse_file
from blink_idl_lexer import BlinkIDLLexer
import blink_idl_lexer
# Explicitly set starting symbol to rule defined only in base parser.
# BEWARE that the starting symbol should NOT be defined in both the base parser
# and the derived one, as otherwise which is used depends on which line number
# is lower, which is fragile. Instead, either use one in base parser or
# create a new symbol, so that this is unambiguous.
# FIXME: unfortunately, this doesn't work in PLY 3.4, so need to duplicate the
# rule below.
STARTING_SYMBOL = 'Definitions'
# We ignore comments (and hence don't need 'Top') but base parser preserves them
# FIXME: Upstream: comments should be removed in base parser
REMOVED_RULES = ['Top', # [0]
'Comments', # [0.1]
'CommentsRest', # [0.2]
]
# Remove rules from base class
# FIXME: add a class method upstream: @classmethod IDLParser._RemoveRules
for rule in REMOVED_RULES:
production_name = 'p_' + rule
delattr(IDLParser, production_name)
class BlinkIDLParser(IDLParser):
# [1]
# FIXME: Need to duplicate rule for starting symbol here, with line number
# *lower* than in the base parser (idl_parser.py).
# This is a bug in PLY: it determines starting symbol by lowest line number.
# This can be overridden by the 'start' parameter, but as of PLY 3.4 this
# doesn't work correctly.
def p_Definitions(self, p):
"""Definitions : ExtendedAttributeList Definition Definitions
| """
if len(p) > 1:
p[2].AddChildren(p[1])
p[0] = ListFromConcat(p[2], p[3])
# Below are grammar rules used by yacc, given by functions named p_<RULE>.
# * The docstring is the production rule in BNF (grammar).
# * The body is the yacc action (semantics).
#
# The PLY framework builds the actual low-level parser by introspecting this
# parser object, selecting all attributes named p_<RULE> as grammar rules.
# It extracts the docstrings and uses them as the production rules, building
# the table of a LALR parser, and uses the body of the functions as actions.
#
# Reference:
# http://www.dabeaz.com/ply/ply.html#ply_nn23
#
# Review of yacc:
# Yacc parses a token stream, internally producing a Concrete Syntax Tree
# (CST), where each node corresponds to a production rule in the grammar.
# At each node, it runs an action, which is usually "produce a node in the
# Abstract Syntax Tree (AST)" or "ignore this node" (for nodes in the CST
# that aren't included in the AST, since only needed for parsing).
#
# The rules use pseudo-variables; in PLY syntax:
# p[0] is the left side: assign return value to p[0] instead of returning,
# p[1] ... p[n] are the right side: the values can be accessed, and they
# can be modified.
# (In yacc these are $$ and $1 ... $n.)
#
# The rules can look cryptic at first, but there are a few standard
# transforms from the CST to AST. With these in mind, the actions should
# be reasonably legible.
#
# * Ignore production
# Discard this branch. Primarily used when one alternative is empty.
#
# Sample code:
# if len(p) > 1:
# p[0] = ...
# # Note no assignment if len(p) == 1
#
# * Eliminate singleton production
# Discard this node in the CST, pass the next level down up the tree.
# Used to ignore productions only necessary for parsing, but not needed
# in the AST.
#
# Sample code:
# p[0] = p[1]
#
# * Build node
# The key type of rule. In this parser, produces object of class IDLNode.
# There are several helper functions:
# * BuildProduction: actually builds an IDLNode, based on a production.
# * BuildAttribute: builds an IDLAttribute, which is a temporary
# object to hold a name-value pair, which is then
# set as a Property of the IDLNode when the IDLNode
# is built.
# * BuildNamed: Same as BuildProduction, and sets the 'NAME' property.
# * BuildTrue: BuildAttribute with value True, for flags.
# See base idl_parser.py for definitions and more examples of use.
#
# Sample code:
# # Build node of type NodeType, with value p[1], and children.
# p[0] = self.BuildProduction('NodeType', p, 1, children)
#
# # Build named node of type NodeType, with name and value p[1].
# # (children optional)
# p[0] = self.BuildNamed('NodeType', p, 1)
#
# # Make a list
# # Used if one node has several children.
# children = ListFromConcat(p[2], p[3])
# p[0] = self.BuildProduction('NodeType', p, 1, children)
#
# # Also used to collapse the right-associative tree
# # produced by parsing a list back into a single list.
# """Foos : Foo Foos
# |"""
# if len(p) > 1:
# p[0] = ListFromConcat(p[1], p[2])
#
# # Add children.
# # Primarily used to add attributes, produced via BuildTrue.
# # p_StaticAttribute
# """StaticAttribute : STATIC Attribute"""
# p[2].AddChildren(self.BuildTrue('STATIC'))
# p[0] = p[2]
#
# Numbering scheme for the rules is:
# [1] for Web IDL spec (or additions in base parser)
# These should all be upstreamed to the base parser.
# [b1] for Blink IDL changes (overrides Web IDL)
# [b1.1] for Blink IDL additions, auxiliary rules for [b1]
# Numbers are as per Candidate Recommendation 19 April 2012:
# http://www.w3.org/TR/2012/CR-WebIDL-20120419/
# [3] Override action, since we distinguish callbacks
# FIXME: Upstream
def p_CallbackOrInterface(self, p):
"""CallbackOrInterface : CALLBACK CallbackRestOrInterface
| Interface"""
if len(p) > 2:
p[2].AddChildren(self.BuildTrue('CALLBACK'))
p[0] = p[2]
else:
p[0] = p[1]
# [b27] Add strings, more 'Literal' productions
# 'Literal's needed because integers and strings are both internally strings
def p_ConstValue(self, p):
"""ConstValue : BooleanLiteral
| FloatLiteral
| IntegerLiteral
| StringLiteral
| null"""
# Standard is (no 'string', fewer 'Literal's):
# ConstValue : BooleanLiteral
# | FloatLiteral
# | integer
# | NULL
p[0] = p[1]
# [b27.1]
def p_IntegerLiteral(self, p):
"""IntegerLiteral : integer"""
p[0] = ListFromConcat(self.BuildAttribute('TYPE', 'integer'),
self.BuildAttribute('NAME', p[1]))
# [b27.2]
def p_StringLiteral(self, p):
"""StringLiteral : string"""
p[0] = ListFromConcat(self.BuildAttribute('TYPE', 'DOMString'),
self.BuildAttribute('NAME', p[1]))
# [b47]
def p_ExceptionMember(self, p):
"""ExceptionMember : Const
| ExceptionField
| Attribute
| ExceptionOperation"""
# Standard is (no Attribute, no ExceptionOperation):
# ExceptionMember : Const
# | ExceptionField
# FIXME: In DOMException.idl, Attributes should be changed to
# ExceptionFields, and Attribute removed from this rule.
p[0] = p[1]
# [b47.1]
def p_ExceptionOperation(self, p):
"""ExceptionOperation : Type identifier '(' ')' ';'"""
# Needed to handle one case in DOMException.idl:
# // Override in a Mozilla compatible format
# [NotEnumerable] DOMString toString();
# Limited form of Operation to prevent others from being added.
# FIXME: Should be a stringifier instead.
p[0] = self.BuildNamed('ExceptionOperation', p, 2, p[1])
# Extended attributes
# [b49] Override base parser: remove comment field, since comments stripped
# FIXME: Upstream
def p_ExtendedAttributeList(self, p):
"""ExtendedAttributeList : '[' ExtendedAttribute ExtendedAttributes ']'
| '[' ']'
| """
if len(p) > 3:
items = ListFromConcat(p[2], p[3])
p[0] = self.BuildProduction('ExtAttributes', p, 1, items)
# [b50] Allow optional trailing comma
# Blink-only, marked as WONTFIX in Web IDL spec:
# https://www.w3.org/Bugs/Public/show_bug.cgi?id=22156
def p_ExtendedAttributes(self, p):
"""ExtendedAttributes : ',' ExtendedAttribute ExtendedAttributes
| ','
|"""
if len(p) > 3:
p[0] = ListFromConcat(p[2], p[3])
# [b51] Add ExtendedAttributeStringLiteral and ExtendedAttributeStringLiteralList
def p_ExtendedAttribute(self, p):
"""ExtendedAttribute : ExtendedAttributeNoArgs
| ExtendedAttributeArgList
| ExtendedAttributeIdent
| ExtendedAttributeIdentList
| ExtendedAttributeNamedArgList
| ExtendedAttributeStringLiteral
| ExtendedAttributeStringLiteralList"""
p[0] = p[1]
# [59]
# FIXME: Upstream UnionType
def p_UnionType(self, p):
"""UnionType : '(' UnionMemberType OR UnionMemberType UnionMemberTypes ')'"""
members = ListFromConcat(p[2], p[4], p[5])
p[0] = self.BuildProduction('UnionType', p, 1, members)
# [60]
def p_UnionMemberType(self, p):
"""UnionMemberType : NonAnyType
| UnionType TypeSuffix
| ANY '[' ']' TypeSuffix"""
if len(p) == 2:
p[0] = self.BuildProduction('Type', p, 1, p[1])
elif len(p) == 3:
p[0] = self.BuildProduction('Type', p, 1, ListFromConcat(p[1], p[2]))
else:
any_node = ListFromConcat(self.BuildProduction('Any', p, 1), p[4])
p[0] = self.BuildProduction('Type', p, 1, any_node)
# [61]
def p_UnionMemberTypes(self, p):
"""UnionMemberTypes : OR UnionMemberType UnionMemberTypes
|"""
if len(p) > 2:
p[0] = ListFromConcat(p[2], p[3])
# [70] Override base parser to remove non-standard sized array
# FIXME: Upstream
def p_TypeSuffix(self, p):
"""TypeSuffix : '[' ']' TypeSuffix
| '?' TypeSuffixStartingWithArray
|"""
if len(p) == 4:
p[0] = self.BuildProduction('Array', p, 1, p[3])
elif len(p) == 3:
p[0] = ListFromConcat(self.BuildTrue('NULLABLE'), p[2])
# Blink extension: Add support for string literal Extended Attribute values
def p_ExtendedAttributeStringLiteral(self, p):
"""ExtendedAttributeStringLiteral : identifier '=' StringLiteral """
def unwrap_string(ls):
"""Reach in and grab the string literal's "NAME"."""
return ls[1].value
value = self.BuildAttribute('VALUE', unwrap_string(p[3]))
p[0] = self.BuildNamed('ExtAttribute', p, 1, value)
# Blink extension: Add support for compound Extended Attribute values over string literals ("A","B")
def p_ExtendedAttributeStringLiteralList(self, p):
"""ExtendedAttributeStringLiteralList : identifier '=' '(' StringLiteralList ')' """
value = self.BuildAttribute('VALUE', p[4])
p[0] = self.BuildNamed('ExtAttribute', p, 1, value)
# Blink extension: one or more string literals. The values aren't propagated as literals,
# but their by their value only.
def p_StringLiteralList(self, p):
"""StringLiteralList : StringLiteral ',' StringLiteralList
| StringLiteral"""
def unwrap_string(ls):
"""Reach in and grab the string literal's "NAME"."""
return ls[1].value
if len(p) > 3:
p[0] = ListFromConcat(unwrap_string(p[1]), p[3])
else:
p[0] = ListFromConcat(unwrap_string(p[1]))
def __init__(self,
# common parameters
debug=False,
# local parameters
rewrite_tables=False,
# idl_parser parameters
lexer=None, verbose=False, mute_error=False,
# yacc parameters
outputdir='', optimize=True, write_tables=False,
picklefile=None):
if debug:
# Turn off optimization and caching, and write out tables,
# to help debugging
optimize = False
outputdir = None
picklefile = None
write_tables = True
if outputdir:
picklefile = picklefile or os.path.join(outputdir, 'parsetab.pickle')
if rewrite_tables:
try:
os.unlink(picklefile)
except OSError:
pass
lexer = lexer or BlinkIDLLexer(debug=debug,
outputdir=outputdir,
optimize=optimize)
self.lexer = lexer
self.tokens = lexer.KnownTokens()
# Using SLR (instead of LALR) generates the table faster,
# but produces the same output. This is ok b/c Web IDL (and Blink IDL)
# is an SLR grammar (as is often the case for simple LL(1) grammars).
#
# Optimized mode substantially decreases startup time (by disabling
# error checking), and also allows use of Python's optimized mode.
# See: Using Python's Optimized Mode
# http://www.dabeaz.com/ply/ply.html#ply_nn38
#
# |picklefile| allows simpler importing than |tabmodule| (parsetab.py),
# as we don't need to modify sys.path; virtually identical speed.
# See: CHANGES, Version 3.2
# http://ply.googlecode.com/svn/trunk/CHANGES
self.yaccobj = yacc.yacc(module=self,
start=STARTING_SYMBOL,
method='SLR',
debug=debug,
optimize=optimize,
write_tables=write_tables,
picklefile=picklefile)
self.parse_debug = debug
self.verbose = verbose
self.mute_error = mute_error
self._parse_errors = 0
self._parse_warnings = 0
self._last_error_msg = None
self._last_error_lineno = 0
self._last_error_pos = 0
################################################################################
def main(argv):
# If file itself executed, cache lex/parse tables
try:
outputdir = argv[1]
except IndexError as err:
print 'Usage: %s OUTPUT_DIR' % argv[0]
return 1
blink_idl_lexer.main(argv)
# Important: rewrite_tables=True causes the cache file to be deleted if it
# exists, thus making sure that PLY doesn't load it instead of regenerating
# the parse table.
parser = BlinkIDLParser(outputdir=outputdir, rewrite_tables=True)
if __name__ == '__main__':
sys.exit(main(sys.argv))