blob: 489cd53cc492c1d40e78efd7905bfb1c7b66856b [file] [log] [blame]
# Copyright (c) 2023, the Dart project authors. Please see the AUTHORS file
# for details. All rights reserved. Use of this source code is governed by a
# BSD-style license that can be found in the LICENSE file.
#
"""Clang based C++ code indexer which produces xref.json."""
from __future__ import annotations
import glob
import json
import logging
import os
import platform
import re
import sys
import subprocess
from contextlib import contextmanager
from dataclasses import dataclass, field
from typing import Dict, Optional, Set
from marshmallow_dataclass import class_schema
from progress.bar import ShadyBar
_libclang_path = None
_clang_include_dir = None
if platform.system() == 'Darwin':
_path_to_llvm = subprocess.check_output(['brew', '--prefix', 'llvm'],
encoding='utf-8').strip()
_clang_include_dir = glob.glob(f'{_path_to_llvm}/lib/clang/**/include')[0]
_libclang_path = f'{_path_to_llvm}/lib/libclang.dylib'
sys.path.append(
f'{_path_to_llvm}/lib/python{sys.version_info.major}.{sys.version_info.minor}/site-packages'
)
# pylint: disable=wrong-import-position,line-too-long
from clang.cindex import Config, CompilationDatabase, Cursor, CursorKind, Index, SourceLocation, TranslationUnit
if _libclang_path is not None:
Config.set_library_file(_libclang_path)
_DART_CONFIGURATION = 'Release' + (
'ARM64' if platform.uname().machine == 'arm64' else 'X64')
_DART_BUILD_DIR = ('xcodebuild' if platform.system() == 'Darwin' else
'out') + '/' + _DART_CONFIGURATION
@contextmanager
def _change_working_directory_to(path):
oldpwd = os.getcwd()
os.chdir(path)
try:
yield
finally:
os.chdir(oldpwd)
def _create_compilation_database():
"""Create compilation database for the default configuration.
Extacts compilation database (compile_commands.json) for the default
build configuration and filters it down to commands taht builds just
the core VM pieces in the host configuration.
"""
logging.info('Extracting compilation commands from build files for %s',
_DART_CONFIGURATION)
with _change_working_directory_to(_DART_BUILD_DIR):
commands = json.loads(
subprocess.check_output(['ninja', '-C', '.', '-t', 'compdb',
'cxx']))
pattern = re.compile(
r'libdart(_vm|_compiler)?_precompiler_host_targeting_host\.')
with open('compile_commands.json', 'w', encoding='utf-8') as outfile:
json.dump([
cmd for cmd in commands
if pattern.search(cmd['command']) is not None
], outfile)
def _get_current_commit_hash():
return subprocess.check_output(['git', 'merge-base', 'main', 'HEAD'],
text=True).strip()
@dataclass
class _ClassInfo:
location: str
members: Dict[str, str] = field(default_factory=dict)
@dataclass
class Location:
"""Symbol location referring to a line in a specific file."""
filename: str
lineno: int
@dataclass
class SymbolsIndex:
"""Index of C++ symbols extracted from source."""
commit: str
files: list[str] = field(default_factory=list)
classes: Dict[str, _ClassInfo] = field(default_factory=dict)
functions: Dict[str, str] = field(default_factory=dict)
def try_resolve(self, ref: str) -> Optional[Location]:
"""Resolve the location of the given reference."""
loc = self._try_resolve_impl(ref)
if loc is not None:
return self._location_from_string(loc)
return None
def _try_resolve_impl(self, ref: str) -> Optional[str]:
if ref in self.functions:
return self.functions[ref]
if ref in self.classes:
return self.classes[ref].location
if '::' in ref:
(class_name, function_name) = ref.rsplit('::', 1)
if class_name in self.classes:
return self.classes[class_name].members.get(function_name)
return None
def _location_from_string(self, loc: str) -> Location:
(file_idx, line_idx) = loc.split(':', 1)
return Location(self.files[int(file_idx)], int(line_idx))
class _Indexer:
symbols_index: SymbolsIndex
processed_files: Set[str]
classes_by_usr: Dict[str, _ClassInfo]
name_stack: list[str]
info_stack: list[_ClassInfo]
files_seen_in_unit: Set[str]
def __init__(self):
self.symbols_index = SymbolsIndex(commit=_get_current_commit_hash())
self.processed_files = set()
self.classes_by_usr = {}
self.files_index = {}
self.name_stack = []
self.info_stack = []
self.files_seen_in_unit = set()
def index(self, unit: TranslationUnit):
"""Index the given translation unit and append new symbols to index."""
self.name_stack.clear()
self.info_stack.clear()
self.files_seen_in_unit.clear()
self._recurse(unit.cursor)
self.processed_files |= self.files_seen_in_unit
def _recurse(self, cursor: Cursor):
name = ""
kind = cursor.kind
if cursor.location.file is not None:
name = cursor.location.file.name
if name in self.processed_files or not name.startswith('../..'):
return
self.files_seen_in_unit.add(name)
if kind == CursorKind.CLASS_DECL:
if not cursor.is_definition():
return
usr = cursor.get_usr()
if usr in self.classes_by_usr:
return
self.name_stack.append(cursor.spelling)
class_name = '::'.join(self.name_stack)
class_info = _ClassInfo(self._format_location(cursor.location))
self.info_stack.append(class_info)
self.symbols_index.classes[class_name] = class_info
self.classes_by_usr[usr] = class_info
elif kind == CursorKind.NAMESPACE:
self.name_stack.append(cursor.spelling)
elif kind == CursorKind.FUNCTION_DECL and cursor.is_definition():
namespace_prefix = ""
if cursor.semantic_parent.kind == CursorKind.NAMESPACE:
namespace_prefix = '::'.join(self.name_stack) + (
'::' if len(self.name_stack) > 0 else "")
function_name = namespace_prefix + cursor.spelling
self.symbols_index.functions[function_name] = self._format_location(
cursor.location)
return
elif kind == CursorKind.CXX_METHOD and cursor.is_definition():
parent = cursor.semantic_parent
if parent.kind == CursorKind.CLASS_DECL:
class_info_or_none = self.classes_by_usr.get(parent.get_usr())
if class_info_or_none is None:
return
class_info_or_none.members[
cursor.spelling] = self._format_location(cursor.location)
return
elif kind == CursorKind.VAR_DECL and cursor.is_definition():
parent = cursor.semantic_parent
if parent.kind == CursorKind.CLASS_DECL:
class_info_or_none = self.classes_by_usr.get(parent.get_usr())
if class_info_or_none is None:
return
class_info_or_none.members[
cursor.spelling] = self._format_location(cursor.location)
for child in cursor.get_children():
self._recurse(child)
if kind == CursorKind.NAMESPACE:
self.name_stack.pop()
elif kind == CursorKind.CLASS_DECL:
self.name_stack.pop()
self.info_stack.pop()
def _format_location(self, loc: SourceLocation):
file_name = loc.file.name
lineno = loc.line
return f'{self._get_file_index(file_name)}:{lineno}'
def _get_file_index(self, file_name: str):
index = self.files_index.get(file_name)
if index is None:
index = len(self.symbols_index.files)
self.files_index[file_name] = index
self.symbols_index.files.append(
os.path.relpath(os.path.abspath(file_name),
os.path.abspath('../..')))
return index
def _index_source() -> SymbolsIndex:
indexer = _Indexer()
_create_compilation_database()
with _change_working_directory_to(_DART_BUILD_DIR):
index = Index.create()
compdb = CompilationDatabase.fromDirectory('.')
commands = list(compdb.getAllCompileCommands())
with ShadyBar('Indexing',
max=len(commands),
suffix='%(percent)d%% eta %(eta_td)s') as progress_bar:
for command in commands:
args = [
arg for arg in command.arguments
if arg.startswith('-I') or arg.startswith('-W') or
arg.startswith('-D') or arg.startswith('-i') or
arg.startswith('sdk/') or arg.startswith('-std')
] + [
'-Wno-macro-redefined', '-Wno-unused-const-variable',
'-Wno-unused-function', '-Wno-unused-variable'
]
if _clang_include_dir is not None:
args.append(f'-I{_clang_include_dir}')
unit = index.parse(command.filename, args=args)
for diag in unit.diagnostics:
print(diag.format())
indexer.index(unit)
progress_bar.next()
return indexer.symbols_index
_SymbolsIndexSchema = class_schema(SymbolsIndex)()
def load_index(filename: str) -> SymbolsIndex:
"""Load symbols index from the given file.
If index is out of date or missing it will be generated.
"""
index: SymbolsIndex
if os.path.exists(filename):
with open(filename, 'r', encoding='utf-8') as json_file:
index = _SymbolsIndexSchema.loads(json_file.read())
if _get_current_commit_hash() == index.commit:
logging.info('Loaded symbols index from %s', filename)
return index
logging.warning(
'%s is generated for commit %s while current commit is %s',
filename, index.commit, _get_current_commit_hash())
index = _index_source()
with open(filename, 'w', encoding='utf-8') as json_file:
json_file.write(_SymbolsIndexSchema.dumps(index))
logging.info(
'Successfully indexed C++ source and written symbols index into %s',
filename)
return index