Merge pull request #3 from danschubert/master
Fixed broken markdown package, switches from callback to syntax list for matching classifiers.
diff --git a/AUTHORS b/AUTHORS
index 74ad8d8..302a1de 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -6,3 +6,4 @@
Google Inc.
David Peek <ninjascript@gmail.com>
+Daniel Schubert <daniel.schubert+github.com@gmail.com>
diff --git a/README.md b/README.md
index a41870d..5359546 100644
--- a/README.md
+++ b/README.md
@@ -32,20 +32,15 @@
```
Version 0.4 adds support for GitHub style triple backtick code blocks, with
-built in Dart syntax coloring. Custom classifiers can be added using a callback:
+built in Dart syntax coloring. Custom classifiers can be added using a syntax list:
```dart
-import 'package:markdown/markdown.dart' show markdownToHtml;
+import 'package:markdown/markdown.dart';
main() {
- print(markdownToHtml("Hello *Markdown*"), (syntax, source) {
- if (syntax == 'mysyntax') return classifySyntax(source);
- return source;
- });
-}
-
-String classifySyntax(String source) {
- return '<span class="mysyntax">$source</span>';
+ List<InlineSyntax> nyanSyntax =
+ [new TextSyntax('nyan', sub: '~=[,,_,,]:3')];
+ print(markdownToHtml('nyan', inlineSyntaxes: nyanSyntax));
}
```
diff --git a/lib/markdown.dart b/lib/markdown.dart
index 0bcc777..01de970 100644
--- a/lib/markdown.dart
+++ b/lib/markdown.dart
@@ -5,19 +5,18 @@
/// Parses text in a markdown-like format and renders to HTML.
library markdown;
-import 'src/classify/dart.dart';
-
// TODO(rnystrom): Use "package:" URL (#4968).
part 'src/markdown/ast.dart';
part 'src/markdown/block_parser.dart';
part 'src/markdown/html_renderer.dart';
part 'src/markdown/inline_parser.dart';
-typedef String ClassifierFunction(String syntax, String source);
+typedef Node Resolver(String name);
/// Converts the given string of markdown to HTML.
-String markdownToHtml(String markdown, [ClassifierFunction classifier]) {
- final document = new Document(classifier);
+String markdownToHtml(String markdown, {inlineSyntaxes, linkResolver}) {
+ final document = new Document(inlineSyntaxes: inlineSyntaxes,
+ linkResolver: linkResolver);
// Replace windows line endings with unix line endings, and split.
final lines = markdown.replaceAll('\r\n','\n').split('\n');
@@ -33,26 +32,15 @@
.replaceAll('>', '>');
}
-var _implicitLinkResolver;
-
-Node setImplicitLinkResolver(Node resolver(String text)) {
- _implicitLinkResolver = resolver;
-}
-
/// Maintains the context needed to parse a markdown document.
class Document {
final Map<String, Link> refLinks;
- final ClassifierFunction classifier;
-
- Document(this.classifier)
+ List<InlineSyntax> inlineSyntaxes;
+ Resolver linkResolver;
+
+ Document({this.inlineSyntaxes, this.linkResolver})
: refLinks = <String, Link>{};
-
- String classify(String syntax, String source) {
- if (syntax == 'dart') return classifyDart(source);
- if (classifier == null) return source;
- return classifier(syntax, source);
- }
-
+
parseRefLinks(List<String> lines) {
// This is a hideous regex. It matches:
// [id]: http:foo.com "some title"
diff --git a/lib/src/classify/dart.dart b/lib/src/classify/dart.dart
deleted file mode 100644
index bf9c942..0000000
--- a/lib/src/classify/dart.dart
+++ /dev/null
@@ -1,179 +0,0 @@
-// Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
-// for details. All rights reserved. Use of this source code is governed by a
-// BSD-style license that can be found in the LICENSE file.
-
-library classify;
-
-import 'package:analyzer_experimental/src/generated/java_core.dart';
-import 'package:analyzer_experimental/src/generated/scanner.dart';
-
-class Classification {
- static const NONE = "";
- static const ERROR = "e";
- static const COMMENT = "c";
- static const IDENTIFIER = "i";
- static const KEYWORD = "k";
- static const OPERATOR = "o";
- static const STRING = "s";
- static const NUMBER = "n";
- static const PUNCTUATION = "p";
- static const TYPE_IDENTIFIER = "t";
- static const SPECIAL_IDENTIFIER = "r";
- static const ARROW_OPERATOR = "a";
- static const STRING_INTERPOLATION = 'si';
-}
-
-String classifyDart(String src) {
- var scanner = new StringScanner(null, src, null);
- var token = scanner.tokenize();
- var out = new StringBuffer();
- var pos = 0;
- while (token.type != TokenType.EOF) {
- // If not a token and not whitespace assume comment.
- var comment = src.slice(pos, token.offset);
- if (comment.trim().length > 0) {
- out.add('<span class="${Classification.COMMENT}">$comment</span>');
- }
- else out.add(comment);
- pos = token.end;
-
- var inString = (token.type == TokenType.STRING
- || token.type == TokenType.STRING_INTERPOLATION_EXPRESSION
- || token.type == TokenType.STRING_INTERPOLATION_IDENTIFIER);
- var stringClass = inString ? ' ${Classification.STRING_INTERPOLATION}' : '';
- var kind = classify(token);
- out.add('<span class="$kind$stringClass">$token</span>');
- token = token.next;
- }
- return out.toString();
-}
-
-Map createTokenMap() {
- var map = new Map();
- [ TokenType.OPEN_PAREN,
- TokenType.CLOSE_PAREN,
- TokenType.OPEN_CURLY_BRACKET,
- TokenType.CLOSE_CURLY_BRACKET,
- TokenType.OPEN_SQUARE_BRACKET,
- TokenType.OPEN_SQUARE_BRACKET,
- TokenType.COLON,
- TokenType.SEMICOLON,
- TokenType.COMMA,
- TokenType.PERIOD,
- TokenType.PERIOD_PERIOD
- ].forEach((t) => map[t] = Classification.PUNCTUATION);
-
- [ TokenType.INT,
- TokenType.HEXADECIMAL,
- TokenType.DOUBLE
- ].forEach((t) => map[t] = Classification.NUMBER);
-
- [ TokenType.STRING,
- TokenType.STRING_INTERPOLATION_IDENTIFIER,
- TokenType.STRING_INTERPOLATION_EXPRESSION,
- TokenType.DOUBLE
- ].forEach((t) => map[t] = Classification.STRING);
-
- [ TokenType.PLUS_PLUS,
- TokenType.MINUS_MINUS,
- TokenType.TILDE,
- TokenType.BANG,
- TokenType.EQ,
- TokenType.BAR_EQ,
- TokenType.CARET_EQ,
- TokenType.AMPERSAND_EQ,
- TokenType.LT_LT_EQ,
- TokenType.GT_GT_EQ,
- TokenType.PLUS_EQ,
- TokenType.MINUS_EQ,
- TokenType.STAR_EQ,
- TokenType.SLASH_EQ,
- TokenType.TILDE_SLASH_EQ,
- TokenType.PERCENT_EQ,
- TokenType.QUESTION,
- TokenType.BAR_BAR,
- TokenType.AMPERSAND_AMPERSAND,
- TokenType.BAR,
- TokenType.CARET,
- TokenType.AMPERSAND,
- TokenType.LT_LT,
- TokenType.GT_GT,
- TokenType.PLUS,
- TokenType.MINUS,
- TokenType.STAR,
- TokenType.SLASH,
- TokenType.TILDE_SLASH,
- TokenType.PERCENT,
- TokenType.EQ_EQ,
- TokenType.BANG_EQ,
- TokenType.LT,
- TokenType.GT,
- TokenType.LT_EQ,
- TokenType.GT_EQ,
- TokenType.INDEX,
- TokenType.INDEX_EQ,
- ].forEach((t) => map[t] = Classification.OPERATOR);
-
- // => is so awesome it is in a class of its own.
- map[TokenType.FUNCTION] = Classification.ARROW_OPERATOR;
-
- map[TokenType.IDENTIFIER] = Classification.IDENTIFIER;
- map[TokenType.KEYWORD] = Classification.KEYWORD;
- map[TokenType.HASH] = Classification.KEYWORD;
-
- return map;
-}
-var _tokenMap = createTokenMap();
-
-String classify(Token token) {
- if (!_tokenMap.containsKey(token.type)) {
- return Classification.NONE;
- }
- var classification = _tokenMap[token.type];
-
- // Special case for names that look like types.
- if (classification == Classification.IDENTIFIER) {
- final text = token.lexeme;
- if (_looksLikeType(text)
- || text == 'num'
- || text == 'bool'
- || text == 'int'
- || text == 'double') {
- return Classification.TYPE_IDENTIFIER;
- }
- }
-
- // Color keyword token. Most are colored as keywords.
- if (classification == Classification.KEYWORD) {
- if (token.lexeme == 'void') {
- // Color "void" as a type.
- return Classification.TYPE_IDENTIFIER;
- }
- if (token.lexeme == 'this' || token.lexeme == 'super') {
- // Color "this" and "super" as identifiers.
- return Classification.SPECIAL_IDENTIFIER;
- }
- }
-
- return classification;
-}
-
-bool _looksLikeType(String name) {
- // If the name looks like an UppercaseName, assume it's a type.
- return _looksLikePublicType(name) || _looksLikePrivateType(name);
-}
-
-bool _looksLikePublicType(String name) {
- // If the name looks like an UppercaseName, assume it's a type.
- return name.length >= 2 && isUpper(name[0]) && isLower(name[1]);
-}
-
-bool _looksLikePrivateType(String name) {
- // If the name looks like an _UppercaseName, assume it's a type.
- return (name.length >= 3 && name[0] == '_' && isUpper(name[1])
- && isLower(name[2]));
-}
-
-// These ensure that they don't return "true" if the string only has symbols.
-bool isUpper(String s) => s.toLowerCase() != s;
-bool isLower(String s) => s.toUpperCase() != s;
diff --git a/lib/src/markdown/block_parser.dart b/lib/src/markdown/block_parser.dart
index b59c795..42b9353 100644
--- a/lib/src/markdown/block_parser.dart
+++ b/lib/src/markdown/block_parser.dart
@@ -20,7 +20,7 @@
final _RE_INDENT = new RegExp(r'^(?: |\t)(.*)$');
/// GitHub style triple quoted code block.
-final _RE_CODE = new RegExp(r'^```(\w*)$');
+final _RE_CODE = new RegExp(r'^```(.*)$');
/// Three or more hyphens, asterisks or underscores by themselves. Note that
/// a line like `----` is valid as both HR and SETEXT. In case of a tie,
@@ -238,9 +238,9 @@
childLines.add('');
// Escape the code.
- final escaped = childLines.join('n');//classifySource();
+ final escaped = escapeHtml(childLines.join('\n'));
- return new Element.text('pre', escaped);
+ return new Element('pre', [new Element.text('code', escaped)]);
}
}
@@ -274,9 +274,9 @@
childLines.add('');
// Escape the code.
- final escaped = parser.document.classify(syntax, childLines.join('\n').trim());
+ final escaped = escapeHtml(childLines.join('\n'));
- return new Element.text('pre', escaped);
+ return new Element('pre', [new Element.text('code', escaped)]);
}
}
@@ -315,7 +315,7 @@
parser.advance();
}
- return new Text(Strings.join(childLines, '\n'));
+ return new Text(childLines.join('\n'));
}
}
@@ -497,8 +497,7 @@
parser.advance();
}
- final contents = parser.document.parseInline(
- Strings.join(childLines, '\n'));
+ final contents = parser.document.parseInline(childLines.join('\n'));
return new Element('p', contents);
}
}
diff --git a/lib/src/markdown/html_renderer.dart b/lib/src/markdown/html_renderer.dart
index 1695e96..5494394 100644
--- a/lib/src/markdown/html_renderer.dart
+++ b/lib/src/markdown/html_renderer.dart
@@ -24,17 +24,17 @@
}
void visitText(Text text) {
- buffer.add(text.text);
+ buffer.write(text.text);
}
bool visitElementBefore(Element element) {
// Hackish. Separate block-level elements with newlines.
if (!buffer.isEmpty &&
_BLOCK_TAGS.firstMatch(element.tag) != null) {
- buffer.add('\n');
+ buffer.write('\n');
}
- buffer.add('<${element.tag}');
+ buffer.write('<${element.tag}');
// Sort the keys so that we generate stable output.
// TODO(rnystrom): This assumes keys returns a fresh mutable
@@ -42,20 +42,20 @@
final attributeNames = element.attributes.keys.toList();
attributeNames.sort((a, b) => a.compareTo(b));
for (final name in attributeNames) {
- buffer.add(' $name="${element.attributes[name]}"');
+ buffer.write(' $name="${element.attributes[name]}"');
}
if (element.isEmpty) {
// Empty element like <hr/>.
- buffer.add(' />');
+ buffer.write(' />');
return false;
} else {
- buffer.add('>');
+ buffer.write('>');
return true;
}
}
void visitElementAfter(Element element) {
- buffer.add('</${element.tag}>');
+ buffer.write('</${element.tag}>');
}
}
diff --git a/lib/src/markdown/inline_parser.dart b/lib/src/markdown/inline_parser.dart
index af42e3e..d3ec7ae 100644
--- a/lib/src/markdown/inline_parser.dart
+++ b/lib/src/markdown/inline_parser.dart
@@ -7,56 +7,51 @@
/// Maintains the internal state needed to parse inline span elements in
/// markdown.
class InlineParser {
- static List<InlineSyntax> get syntaxes {
- // Lazy initialize.
- if (_syntaxes == null) {
- _syntaxes = <InlineSyntax>[
- // This first regexp matches plain text to accelerate parsing. It must
- // be written so that it does not match any prefix of any following
- // syntax. Most markdown is plain text, so it is faster to match one
- // regexp per 'word' rather than fail to match all the following regexps
- // at each non-syntax character position. It is much more important
- // that the regexp is fast than complete (for example, adding grouping
- // is likely to slow the regexp down enough to negate its benefit).
- // Since it is purely for optimization, it can be removed for debugging.
- new TextSyntax(r'\s*[A-Za-z0-9]+'),
+ static List<InlineSyntax> defaultSyntaxes = <InlineSyntax>[
+ // This first regexp matches plain text to accelerate parsing. It must
+ // be written so that it does not match any prefix of any following
+ // syntax. Most markdown is plain text, so it is faster to match one
+ // regexp per 'word' rather than fail to match all the following regexps
+ // at each non-syntax character position. It is much more important
+ // that the regexp is fast than complete (for example, adding grouping
+ // is likely to slow the regexp down enough to negate its benefit).
+ // Since it is purely for optimization, it can be removed for debugging.
- // The real syntaxes.
+ // TODO(amouravski): this regex will glom up any custom syntaxes unless
+ // they're at the beginning.
+ new TextSyntax(r'\s*[A-Za-z0-9]+'),
- new AutolinkSyntax(),
- new LinkSyntax(),
- // "*" surrounded by spaces is left alone.
- new TextSyntax(r' \* '),
- // "_" surrounded by spaces is left alone.
- new TextSyntax(r' _ '),
- // Leave already-encoded HTML entities alone. Ensures we don't turn
- // "&" into "&amp;"
- new TextSyntax(r'&[#a-zA-Z0-9]*;'),
- // Encode "&".
- new TextSyntax(r'&', sub: '&'),
- // Encode "<". (Why not encode ">" too? Gruber is toying with us.)
- new TextSyntax(r'<', sub: '<'),
- // Parse "**strong**" tags.
- new TagSyntax(r'\*\*', tag: 'strong'),
- // Parse "__strong__" tags.
- new TagSyntax(r'__', tag: 'strong'),
- // Parse "*emphasis*" tags.
- new TagSyntax(r'\*', tag: 'em'),
- // Parse "_emphasis_" tags.
- // TODO(rnystrom): Underscores in the middle of a word should not be
- // parsed as emphasis like_in_this.
- new TagSyntax(r'_', tag: 'em'),
- // Parse inline code within double backticks: "``code``".
- new CodeSyntax(r'``\s?((?:.|\n)*?)\s?``'),
- // Parse inline code within backticks: "`code`".
- new CodeSyntax(r'`([^`]*)`')
- ];
- }
+ // The real syntaxes.
- return _syntaxes;
- }
-
- static List<InlineSyntax> _syntaxes;
+ new AutolinkSyntax(),
+ new LinkSyntax(),
+ // "*" surrounded by spaces is left alone.
+ new TextSyntax(r' \* '),
+ // "_" surrounded by spaces is left alone.
+ new TextSyntax(r' _ '),
+ // Leave already-encoded HTML entities alone. Ensures we don't turn
+ // "&" into "&amp;"
+ new TextSyntax(r'&[#a-zA-Z0-9]*;'),
+ // Encode "&".
+ new TextSyntax(r'&', sub: '&'),
+ // Encode "<". (Why not encode ">" too? Gruber is toying with us.)
+ new TextSyntax(r'<', sub: '<'),
+ // Parse "**strong**" tags.
+ new TagSyntax(r'\*\*', tag: 'strong'),
+ // Parse "__strong__" tags.
+ new TagSyntax(r'__', tag: 'strong'),
+ // Parse "*emphasis*" tags.
+ new TagSyntax(r'\*', tag: 'em'),
+ // Parse "_emphasis_" tags.
+ // TODO(rnystrom): Underscores in the middle of a word should not be
+ // parsed as emphasis like_in_this.
+ new TagSyntax(r'_', tag: 'em'),
+ // Parse inline code within double backticks: "``code``".
+ new CodeSyntax(r'``\s?((?:.|\n)*?)\s?``'),
+ // Parse inline code within backticks: "`code`".
+ new CodeSyntax(r'`([^`]*)`')
+ // We will add the LinkSyntax once we know about the specific link resolver.
+ ];
/// The string of markdown being parsed.
final String source;
@@ -64,6 +59,8 @@
/// The markdown document this parser is parsing.
final Document document;
+ List<InlineSyntax> syntaxes;
+
/// The current read position.
int pos = 0;
@@ -73,7 +70,18 @@
final List<TagState> _stack;
InlineParser(this.source, this.document)
- : _stack = <TagState>[];
+ : _stack = <TagState>[] {
+ /// User specified syntaxes will be the first syntaxes to be evaluated.
+ if (document.inlineSyntaxes != null) {
+ syntaxes = [];
+ syntaxes.addAll(document.inlineSyntaxes);
+ syntaxes.addAll(defaultSyntaxes);
+ } else {
+ syntaxes = defaultSyntaxes;
+ }
+ // Custom link resolver goes after the generic text syntax.
+ syntaxes.insert(1, new LinkSyntax(linkResolver: document.linkResolver));
+ }
List<Node> parse() {
// Make a fake top tag to hold the results.
@@ -236,6 +244,8 @@
/// Matches inline links like `[blah] [id]` and `[blah] (url)`.
class LinkSyntax extends TagSyntax {
+ Resolver linkResolver;
+
/// The regex for the end of a link needs to handle both reference style and
/// inline styles as well as optional titles for inline links. To make that
/// a bit more palatable, this breaks it into pieces.
@@ -253,7 +263,7 @@
// 4: Contains the title, if present, for an inline link.
}
- LinkSyntax()
+ LinkSyntax({this.linkResolver})
: super(r'\[', end: linkPattern);
bool onMatchEnd(InlineParser parser, Match match, TagState state) {
@@ -263,10 +273,10 @@
// If we didn't match refLink or inlineLink, then it means there was
// nothing after the first square bracket, so it isn't a normal markdown
// link at all. Instead, we allow users of the library to specify a special
- // resolver function ([setImplicitLinkResolver]) that may choose to handle
+ // resolver function ([linkResolver]) that may choose to handle
// this. Otherwise, it's just treated as plain text.
if ((match[1] == null) || (match[1] == '')) {
- if (_implicitLinkResolver == null) return false;
+ if (linkResolver == null) return false;
// Only allow implicit links if the content is just text.
// TODO(rnystrom): Do we want to relax this?
@@ -276,7 +286,7 @@
Text link = state.children[0];
// See if we have a resolver that will generate a link for us.
- final node = _implicitLinkResolver(link.text);
+ final node = linkResolver(link.text);
if (node == null) return false;
parser.addNode(node);
@@ -376,9 +386,8 @@
int index = parser._stack.indexOf(this);
// Remove the unmatched children.
- final unmatchedTags = parser._stack.getRange(index + 1,
- parser._stack.length - index - 1);
- parser._stack.removeRange(index + 1, parser._stack.length - index - 1);
+ final unmatchedTags = parser._stack.sublist(index + 1);
+ parser._stack.removeRange(index + 1, parser._stack.length);
// Flatten them out onto this tag.
for (final unmatched in unmatchedTags) {
diff --git a/pubspec.yaml b/pubspec.yaml
index 595e292..3ed10d5 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -3,6 +3,5 @@
author: Dart Team <misc@dartlang.org>
description: A library for converting markdown to HTML.
homepage: https://github.com/dpeek/dart-markdown
-dependencies:
+dev_dependencies:
unittest: any
- analyzer_experimental: ">=0.3.7+6 <0.3.8"
diff --git a/test/markdown_test.dart b/test/markdown_test.dart
index 3875de0..fcf76d1 100644
--- a/test/markdown_test.dart
+++ b/test/markdown_test.dart
@@ -5,12 +5,11 @@
/// Unit tests for markdown.
library markdownTests;
+import 'package:unittest/unittest.dart';
+
// TODO(rnystrom): Use "package:" URL (#4968).
import '../lib/markdown.dart';
-// TODO(rnystrom): Better path to unittest.
-import 'package:unittest/unittest.dart';
-
/// Most of these tests are based on observing how showdown behaves:
/// http://softwaremaniacs.org/playground/showdown-highlight/
void main() {
@@ -425,6 +424,35 @@
''');
});
+ group('Fenced code blocks', () {
+ validate('without an optional language identifier', '''
+ ```
+ code
+ ```
+ ''', '''
+ <pre><code>code
+ </code></pre>
+ ''');
+
+ validate('with an optional language identifier', '''
+ ```dart
+ code
+ ```
+ ''', '''
+ <pre><code>code
+ </code></pre>
+ ''');
+
+ validate('escape HTML characters', '''
+ ```
+ <&>
+ ```
+ ''', '''
+ <pre><code><&>
+ </code></pre>
+ ''');
+ });
+
group('Horizontal rules', () {
validate('from dashes', '''
---
@@ -649,6 +677,12 @@
second</code> after</p>
''');
+ validate('simple double backticks', '''
+ before ``source`` after
+ ''', '''
+ <p>before <code>source</code> after</p>
+ ''');
+
validate('double backticks', '''
before ``can `contain` backticks`` after
''', '''
@@ -798,6 +832,29 @@
<p>links <a href="http://foo.com"><em>are</em></a> awesome</p>
''');
});
+
+ group('Resolver', () {
+ var nyanResolver = (text) => new Text('~=[,,_${text}_,,]:3');
+ validate('simple resolver', '''
+ resolve [this] thing
+ ''', '''
+ <p>resolve ~=[,,_this_,,]:3 thing</p>
+ ''', linkResolver: nyanResolver);
+ });
+
+ group('Custom inline syntax', () {
+ List<InlineSyntax> nyanSyntax =
+ [new TextSyntax('nyan', sub: '~=[,,_,,]:3')];
+ validate('simple inline syntax', '''
+ nyan
+ ''', '''
+ <p>~=[,,_,,]:3</p>
+ ''', inlineSyntaxes: nyanSyntax);
+
+ // TODO(amouravski): need more tests here for custom syntaxes, as some
+ // things are not quite working properly. The regexps are sometime a little
+ // too greedy, I think.
+ });
}
/**
@@ -825,16 +882,17 @@
}
}
- return Strings.join(lines, '\n');
+ return lines.join('\n');
}
validate(String description, String markdown, String html,
- {bool verbose: false}) {
+ {bool verbose: false, inlineSyntaxes, linkResolver}) {
test(description, () {
markdown = cleanUpLiteral(markdown);
html = cleanUpLiteral(html);
- var result = markdownToHtml(markdown);
+ var result = markdownToHtml(markdown, inlineSyntaxes: inlineSyntaxes,
+ linkResolver: linkResolver);
var passed = compareOutput(html, result);
if (!passed) {