| // Copyright (c) 2022, the Dart project authors. Please see the AUTHORS file |
| // for details. All rights reserved. Use of this source code is governed by a |
| // BSD-style license that can be found in the LICENSE file. |
| |
| import 'dart:async'; |
| import 'dart:convert'; |
| |
| import 'package:analyzer/file_system/file_system.dart'; |
| import 'package:collection/collection.dart'; |
| import 'package:dartdoc/src/dartdoc_options.dart'; |
| import 'package:dartdoc/src/logging.dart'; |
| import 'package:dartdoc/src/model/canonicalization.dart'; |
| import 'package:dartdoc/src/model/model_element.dart'; |
| import 'package:dartdoc/src/model/package_graph.dart'; |
| import 'package:dartdoc/src/runtime_stats.dart'; |
| import 'package:dartdoc/src/warnings.dart'; |
| import 'package:html/parser.dart' show parse; |
| import 'package:path/path.dart' as path; |
| |
| class Validator { |
| final PackageGraph _packageGraph; |
| final DartdocOptionContext _config; |
| final String _origin; |
| final Set<String> _writtenFiles; |
| final StreamController<String> _onCheckProgress; |
| |
| final Map<String, Set<ModelElement>> _hrefs; |
| |
| final Set<String> _visited = {}; |
| |
| Validator(this._packageGraph, this._config, String origin, this._writtenFiles, |
| this._onCheckProgress) |
| : _origin = path.normalize(origin), |
| _hrefs = _packageGraph.allHrefs; |
| |
| /// Don't call this method more than once, and only after you've |
| /// generated all docs for the Package. |
| void validateLinks() { |
| logInfo('Validating...'); |
| runtimeStats.resetAccumulators({ |
| 'readCountForLinkValidation', |
| 'readCountForIndexValidation', |
| }); |
| _collectLinks('index.html'); |
| _checkForOrphans(); |
| _checkSearchIndex(); |
| } |
| |
| void _collectLinks(String pathToCheck, [String? source, String? fullPath]) { |
| fullPath ??= path.join(_origin, pathToCheck); |
| |
| final pageLinks = _getLinksAndBaseHref(fullPath); |
| if (pageLinks == null) { |
| // The file could not be read. |
| _warn(PackageWarning.brokenLink, pathToCheck, _origin, |
| referredFrom: source); |
| _onCheckProgress.add(pathToCheck); |
| // Remove so that we properly count that the file doesn't exist for |
| // the orphan check. |
| _visited.remove(fullPath); |
| return; |
| } |
| _visited.add(fullPath); |
| final links = pageLinks.links; |
| final baseHref = pageLinks.baseHref; |
| |
| // Prevent extremely large stacks by storing the paths we are using |
| // here instead -- occasionally, very large jobs have overflowed |
| // the stack without this. |
| final toVisit = <(String newPathToCheck, String newFullPath)>{}; |
| final pathDirectory = baseHref == null |
| ? path.dirname(pathToCheck) |
| : '${path.dirname(pathToCheck)}/$baseHref'; |
| |
| for (final href in links) { |
| final uri = Uri.tryParse(href); |
| if (uri == null || !uri.hasAuthority && !uri.hasFragment) { |
| var linkPath = '$pathDirectory/$href'; |
| |
| linkPath = path.normalize(linkPath); |
| final newFullPath = path.join(_origin, linkPath); |
| if (!_visited.contains(newFullPath)) { |
| toVisit.add((linkPath, newFullPath)); |
| _visited.add(newFullPath); |
| } |
| } |
| } |
| for (final visitPaths in toVisit) { |
| var (linkPath, fullPath) = visitPaths; |
| _collectLinks(linkPath, pathToCheck, fullPath); |
| } |
| _onCheckProgress.add(pathToCheck); |
| } |
| |
| void _checkForOrphans() { |
| final staticAssets = path.join(_origin, 'static-assets', ''); |
| final indexJson = path.join(_origin, 'index.json'); |
| var foundIndexJson = false; |
| |
| void checkDirectory(Folder dir) { |
| for (final child in dir.getChildren()) { |
| final fullPath = path.normalize(child.path); |
| if (_visited.contains(fullPath)) { |
| continue; |
| } |
| if (child is Folder) { |
| checkDirectory(child); |
| continue; |
| } |
| if (fullPath.startsWith(staticAssets)) { |
| continue; |
| } |
| if (path.equals(fullPath, indexJson)) { |
| foundIndexJson = true; |
| _onCheckProgress.add(fullPath); |
| continue; |
| } |
| final relativeFullPath = path.relative(fullPath, from: _origin); |
| if (!_writtenFiles.contains(relativeFullPath)) { |
| // This isn't a file we wrote (this time); don't claim we did. |
| _warn(PackageWarning.unknownFile, fullPath, _origin); |
| } else { |
| // Error messages are orphaned by design and do not appear in the |
| // search index. |
| if (const {'__404error.html', 'search.html', 'categories.json'} |
| .contains(fullPath)) { |
| _warn(PackageWarning.orphanedFile, fullPath, _origin); |
| } |
| } |
| _onCheckProgress.add(fullPath); |
| } |
| } |
| |
| checkDirectory(_config.resourceProvider.getFolder(_origin)); |
| |
| if (!foundIndexJson) { |
| _warn(PackageWarning.brokenLink, indexJson, _origin); |
| _onCheckProgress.add(indexJson); |
| } |
| } |
| |
| void _checkSearchIndex() { |
| final fullPath = path.join(_origin, 'index.json'); |
| final indexPath = path.join(_origin, 'index.html'); |
| final file = _config.resourceProvider.getFile(fullPath); |
| if (!file.exists) { |
| return; |
| } |
| final jsonData = json.decode(file.readAsStringSync()) as List; |
| runtimeStats.incrementAccumulator('readCountForIndexValidation'); |
| |
| final found = <String>{}; |
| found.add(fullPath); |
| // The package index isn't supposed to be in the search, so suppress the |
| // warning. |
| found.add(indexPath); |
| for (var entry in jsonData.cast<Map<String, dynamic>>()) { |
| if (entry.containsKey('href')) { |
| final entryPath = path |
| .joinAll([_origin, ...path.posix.split(entry['href'] as String)]); |
| if (!_visited.contains(entryPath)) { |
| _warn(PackageWarning.brokenLink, entryPath, _origin, |
| referredFrom: fullPath); |
| } |
| found.add(entryPath); |
| } |
| } |
| final missingFromSearch = _visited.difference(found); |
| for (final missingFile in missingFromSearch) { |
| _warn(PackageWarning.missingFromSearchIndex, missingFile, _origin, |
| referredFrom: fullPath); |
| } |
| } |
| |
| /// Gets all link destinations, and the base link destination, if one is found |
| /// on the page. |
| /// |
| /// This is extracted to save memory during the check; be careful not to hang |
| /// on to anything referencing the full file and doc tree. |
| _PageLinks? _getLinksAndBaseHref(String fullPath) { |
| final file = _config.resourceProvider.getFile(fullPath); |
| if (!file.exists) { |
| return null; |
| } |
| // TODO(srawlins): It is possible that instantiating an HtmlParser using |
| // `lowercaseElementName: false` and `lowercaseAttrName: false` may save |
| // time or memory. |
| final doc = parse(file.readAsBytesSync()); |
| runtimeStats.incrementAccumulator('readCountForLinkValidation'); |
| final base = doc.querySelector('base'); |
| String? baseHref; |
| if (base != null) { |
| baseHref = base.attributes['href']; |
| } |
| final links = doc.querySelectorAll('a'); |
| final stringLinks = links |
| .map((link) => link.attributes['href']) |
| .whereType<String>() |
| .where((href) => |
| href.isNotEmpty && |
| !href.startsWith('https:') && |
| !href.startsWith('http:') && |
| !href.startsWith('mailto:') && |
| !href.startsWith('ftp:')) |
| .toSet(); |
| |
| return _PageLinks(stringLinks, baseHref); |
| } |
| |
| /// Warns on erroneous file paths. |
| void _warn( |
| PackageWarning kind, |
| String warnOn, |
| String origin, { |
| String? referredFrom, |
| }) { |
| final referredFromElements = <Canonicalization>{}; |
| |
| // Make all paths relative to origin. |
| if (path.isWithin(origin, warnOn)) { |
| warnOn = path.relative(warnOn, from: origin); |
| } |
| if (referredFrom != null) { |
| if (path.isWithin(origin, referredFrom)) { |
| referredFrom = path.relative(referredFrom, from: origin); |
| } |
| final hrefReferredFrom = _hrefs[referredFrom]; |
| // Source paths are always relative. |
| if (hrefReferredFrom != null) { |
| referredFromElements.addAll(hrefReferredFrom); |
| } |
| } |
| var warnOnElements = _hrefs[warnOn]; |
| |
| if (referredFromElements.any((e) => e.isCanonical)) { |
| referredFromElements.removeWhere((e) => !e.isCanonical); |
| } |
| var warnOnElement = warnOnElements?.firstWhereOrNull((e) => e.isCanonical); |
| |
| if (referredFromElements.isEmpty && referredFrom == 'index.html') { |
| referredFromElements.add(_packageGraph.defaultPackage); |
| } |
| final message = |
| referredFrom == 'index.json' ? '$warnOn (from index.json)' : warnOn; |
| _packageGraph.warnOnElement(warnOnElement, kind, |
| message: message, referredFrom: referredFromElements); |
| } |
| } |
| |
| /// Stores all links found in a page, and the base href. |
| class _PageLinks { |
| final Set<String> links; |
| final String? baseHref; |
| |
| const _PageLinks(this.links, this.baseHref); |
| } |