| // Copyright (c) 2024, the Dart project authors. Please see the AUTHORS file |
| // for details. All rights reserved. Use of this source code is governed by a |
| // BSD-style license that can be found in the LICENSE file. |
| |
| #include "bin/uri.h" |
| |
| #include <memory> |
| #include <utility> |
| |
| #include "platform/allocation.h" |
| #include "platform/utils.h" |
| |
| namespace dart { |
| |
| static CStringUniquePtr MakeCopyOfString(const char* str) { |
| if (str == nullptr) { |
| return CStringUniquePtr(); |
| } |
| intptr_t len = strlen(str) + 1; // '\0'-terminated. |
| char* copy = static_cast<char*>(malloc(len)); |
| strncpy(copy, str, len); |
| return CStringUniquePtr(copy); |
| } |
| |
| static CStringUniquePtr MakeCopyOfStringN(const char* str, intptr_t len) { |
| ASSERT(len >= 0); |
| for (intptr_t i = 0; i < len; i++) { |
| if (str[i] == '\0') { |
| len = i; |
| break; |
| } |
| } |
| char* copy = static_cast<char*>(malloc(len + 1)); // +1 for '\0' |
| strncpy(copy, str, len); |
| copy[len] = '\0'; |
| return CStringUniquePtr(copy); |
| } |
| |
| static CStringUniquePtr PrintToString(const char* format, ...) { |
| va_list args; |
| va_start(args, format); |
| char* buffer = Utils::VSCreate(format, args); |
| va_end(args); |
| return CStringUniquePtr(buffer); |
| } |
| |
| static bool IsUnreservedChar(intptr_t value) { |
| return ((value >= 'a' && value <= 'z') || (value >= 'A' && value <= 'Z') || |
| (value >= '0' && value <= '9') || value == '-' || value == '.' || |
| value == '_' || value == '~'); |
| } |
| |
| static bool IsDelimiter(intptr_t value) { |
| switch (value) { |
| case ':': |
| case '/': |
| case '?': |
| case '#': |
| case '[': |
| case ']': |
| case '@': |
| case '!': |
| case '$': |
| case '&': |
| case '\'': |
| case '(': |
| case ')': |
| case '*': |
| case '+': |
| case ',': |
| case ';': |
| case '=': |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| static bool IsHexDigit(char value) { |
| return ((value >= '0' && value <= '9') || (value >= 'A' && value <= 'F') || |
| (value >= 'a' && value <= 'f')); |
| } |
| |
| static int HexValue(char digit) { |
| if ((digit >= '0' && digit <= '9')) { |
| return digit - '0'; |
| } |
| if ((digit >= 'A' && digit <= 'F')) { |
| return digit - 'A' + 10; |
| } |
| if ((digit >= 'a' && digit <= 'f')) { |
| return digit - 'a' + 10; |
| } |
| UNREACHABLE(); |
| return 0; |
| } |
| |
| static int GetEscapedValue(const char* str, intptr_t pos, intptr_t len) { |
| if (pos + 2 >= len) { |
| // Not enough room for a valid escape sequence. |
| return -1; |
| } |
| if (str[pos] != '%') { |
| // Escape sequences start with '%'. |
| return -1; |
| } |
| |
| char digit1 = str[pos + 1]; |
| char digit2 = str[pos + 2]; |
| if (!IsHexDigit(digit1) || !IsHexDigit(digit2)) { |
| // Invalid escape sequence. Ignore it. |
| return -1; |
| } |
| return HexValue(digit1) * 16 + HexValue(digit2); |
| } |
| |
| CStringUniquePtr NormalizeEscapes(const char* str, intptr_t len) { |
| // Allocate the buffer. |
| // We multiply len by three because a percent-escape sequence is |
| // three characters long (e.g. ' ' -> '%20). +1 for '\0'. We could |
| // take two passes through the string and avoid the excess |
| // allocation, but it's zone-memory so it doesn't seem necessary. |
| char* buffer = static_cast<char*>(malloc(len * 3 + 1)); |
| |
| // Copy the string, normalizing as we go. |
| intptr_t buffer_pos = 0; |
| intptr_t pos = 0; |
| while (pos < len) { |
| int escaped_value = GetEscapedValue(str, pos, len); |
| if (escaped_value >= 0) { |
| // If one of the special "unreserved" characters has been |
| // escaped, revert the escaping. Otherwise preserve the |
| // escaping. |
| if (IsUnreservedChar(escaped_value)) { |
| buffer[buffer_pos] = escaped_value; |
| buffer_pos++; |
| } else { |
| Utils::SNPrint(buffer + buffer_pos, 4, "%%%02X", escaped_value); |
| buffer_pos += 3; |
| } |
| pos += 3; |
| } else { |
| char c = str[pos]; |
| // If a delimiter or unreserved character is currently not |
| // escaped, preserve that. If there is a busted %-sequence in |
| // the input, preserve that too. |
| if (c == '%' || IsDelimiter(c) || IsUnreservedChar(c)) { |
| buffer[buffer_pos] = c; |
| buffer_pos++; |
| } else { |
| // Escape funky characters. |
| Utils::SNPrint(buffer + buffer_pos, 4, "%%%02X", c); |
| buffer_pos += 3; |
| } |
| pos++; |
| } |
| } |
| buffer[buffer_pos] = '\0'; |
| return CStringUniquePtr(buffer); |
| } |
| |
| // Lower-case a string in place. |
| static void StringLower(char* str) { |
| const intptr_t len = strlen(str); |
| intptr_t i = 0; |
| while (i < len) { |
| int escaped_value = GetEscapedValue(str, i, len); |
| if (escaped_value >= 0) { |
| // Don't lowercase escape sequences. |
| i += 3; |
| } else { |
| // I don't use tolower() because I don't want the locale |
| // transforming any non-ascii characters. |
| char c = str[i]; |
| if (c >= 'A' && c <= 'Z') { |
| str[i] = c + ('a' - 'A'); |
| } |
| i++; |
| } |
| } |
| } |
| |
| static intptr_t ParseAuthority(const char* authority, ParsedUri& parsed_uri) { |
| const char* current = authority; |
| intptr_t len = 0; |
| |
| size_t userinfo_len = strcspn(current, "@/"); |
| if (current[userinfo_len] == '@') { |
| // The '@' character follows the optional userinfo string. |
| parsed_uri.userinfo = NormalizeEscapes(current, userinfo_len); |
| current += userinfo_len + 1; |
| len += userinfo_len + 1; |
| } |
| |
| size_t host_len = strcspn(current, ":/"); |
| CStringUniquePtr host = NormalizeEscapes(current, host_len); |
| StringLower(host.get()); |
| parsed_uri.host = std::move(host); |
| len += host_len; |
| |
| if (current[host_len] == ':') { |
| // The ':' character precedes the optional port string. |
| const char* port_start = current + host_len + 1; // +1 for ':' |
| size_t port_len = strcspn(port_start, "/"); |
| parsed_uri.port = MakeCopyOfStringN(port_start, port_len); |
| len += 1 + port_len; // +1 for ':' |
| } |
| return len; |
| } |
| |
| // Performs a simple parse of a uri into its components. |
| // See RFC 3986 Section 3: Syntax. |
| std::unique_ptr<ParsedUri> ParseUri(const char* uri) { |
| auto parsed_uri = std::make_unique<ParsedUri>(); |
| |
| // The first ':' separates the scheme from the rest of the uri. If |
| // a ':' occurs after the first '/' it doesn't count. |
| size_t scheme_len = strcspn(uri, ":/"); |
| const char* rest = uri; |
| if (uri[scheme_len] == ':') { |
| CStringUniquePtr scheme = MakeCopyOfStringN(uri, scheme_len); |
| StringLower(scheme.get()); |
| parsed_uri->scheme = std::move(scheme); |
| rest = uri + scheme_len + 1; |
| } |
| |
| // The first '#' separates the optional fragment |
| const char* hash_pos = rest + strcspn(rest, "#"); |
| if (*hash_pos == '#') { |
| // There is a fragment part. |
| const char* fragment_start = hash_pos + 1; |
| parsed_uri->fragment = |
| NormalizeEscapes(fragment_start, strlen(fragment_start)); |
| } |
| |
| // The first '?' or '#' separates the hierarchical part from the |
| // optional query. |
| const char* question_pos = rest + strcspn(rest, "?#"); |
| if (*question_pos == '?') { |
| // There is a query part. |
| const char* query_start = question_pos + 1; |
| parsed_uri->query = NormalizeEscapes(query_start, (hash_pos - query_start)); |
| } |
| |
| const char* path_start = rest; |
| if (rest[0] == '/' && rest[1] == '/') { |
| // There is an authority part. |
| const char* authority_start = rest + 2; // 2 for '//'. |
| |
| intptr_t authority_len = ParseAuthority(authority_start, *parsed_uri.get()); |
| if (authority_len < 0) { |
| return std::unique_ptr<ParsedUri>(); |
| } |
| path_start = authority_start + authority_len; |
| } |
| |
| // The path is the substring between the authority and the query. |
| parsed_uri->path = NormalizeEscapes(path_start, (question_pos - path_start)); |
| return parsed_uri; |
| } |
| |
| static char* RemoveLastSegment(char* current, char* base) { |
| if (current == base) { |
| return current; |
| } |
| ASSERT(current > base); |
| for (current--; current > base; current--) { |
| if (*current == '/') { |
| // We have found the beginning of the last segment. |
| return current; |
| } |
| } |
| ASSERT(current == base); |
| return current; |
| } |
| |
| static intptr_t SegmentLength(const char* input) { |
| const char* cp = input; |
| |
| // Include initial slash in the segment, if any. |
| if (*cp == '/') { |
| cp++; |
| } |
| |
| // Don't include trailing slash in the segment. |
| cp += strcspn(cp, "/"); |
| return cp - input; |
| } |
| |
| // See RFC 3986 Section 5.2.4: Remove Dot Segments. |
| CStringUniquePtr RemoveDotSegments(const char* path) { |
| const char* input = path; |
| |
| // The output path will always be less than or equal to the size of |
| // the input path. |
| |
| char* buffer = static_cast<char*>(malloc(strlen(path) + 1)); // +1 for '\0' |
| char* output = buffer; |
| |
| while (*input != '\0') { |
| if (strncmp("../", input, 3) == 0) { |
| // Discard initial "../" from the input. It's junk. |
| input += 3; |
| |
| } else if (strncmp("./", input, 3) == 0) { |
| // Discard initial "./" from the input. It's junk. |
| input += 2; |
| |
| } else if (strncmp("/./", input, 3) == 0) { |
| // Advance past the "/." part of the input. |
| input += 2; |
| |
| } else if (strcmp("/.", input) == 0) { |
| // Pretend the input just contains a "/". |
| input = "/"; |
| |
| } else if (strncmp("/../", input, 4) == 0) { |
| // Advance past the "/.." part of the input and remove one |
| // segment from the output. |
| input += 3; |
| output = RemoveLastSegment(output, buffer); |
| |
| } else if (strcmp("/..", input) == 0) { |
| // Pretend the input contains a "/" and remove one segment from |
| // the output. |
| input = "/"; |
| output = RemoveLastSegment(output, buffer); |
| |
| } else if (strcmp("..", input) == 0) { |
| // The input has been reduced to nothing useful. |
| input += 2; |
| |
| } else if (strcmp(".", input) == 0) { |
| // The input has been reduced to nothing useful. |
| input += 1; |
| |
| } else { |
| intptr_t segment_len = SegmentLength(input); |
| if (input[0] != '/' && output != buffer) { |
| *output = '/'; |
| output++; |
| } |
| strncpy(output, input, segment_len); |
| output += segment_len; |
| input += segment_len; |
| } |
| } |
| *output = '\0'; |
| return CStringUniquePtr(buffer); |
| } |
| |
| // See RFC 3986 Section 5.2.3: Merge Paths. |
| CStringUniquePtr MergePaths(const char* base_path, const char* ref_path) { |
| if (base_path[0] == '\0') { |
| // If the base_path is empty, we prepend '/'. |
| return PrintToString("/%s", ref_path); |
| } |
| |
| // We need to find the last '/' in base_path. |
| const char* last_slash = strrchr(base_path, '/'); |
| if (last_slash == nullptr) { |
| // There is no slash in the base_path. Return the ref_path unchanged. |
| return MakeCopyOfString(ref_path); |
| } |
| |
| // We found a '/' in the base_path. Cut off everything after it and |
| // add the ref_path. |
| intptr_t truncated_base_len = last_slash - base_path; |
| intptr_t ref_path_len = strlen(ref_path); |
| intptr_t len = truncated_base_len + ref_path_len + 1; // +1 for '/' |
| char* buffer = static_cast<char*>(malloc(len + 1)); // +1 for '\0' |
| |
| // Copy truncated base. |
| strncpy(buffer, base_path, truncated_base_len); |
| |
| // Add a slash. |
| buffer[truncated_base_len] = '/'; |
| |
| // Copy the ref_path. |
| strncpy((buffer + truncated_base_len + 1), ref_path, ref_path_len + 1); |
| |
| return CStringUniquePtr(buffer); |
| } |
| |
| CStringUniquePtr BuildUri(const ParsedUri& uri) { |
| ASSERT(uri.path != nullptr); |
| |
| const char* fragment = uri.fragment == nullptr ? "" : uri.fragment.get(); |
| const char* fragment_separator = uri.fragment == nullptr ? "" : "#"; |
| const char* query = uri.query == nullptr ? "" : uri.query.get(); |
| const char* query_separator = uri.query == nullptr ? "" : "?"; |
| |
| // If there is no scheme for this uri, just build a relative uri of |
| // the form: "path[?query][#fragment]". This occurs when we resolve |
| // relative urls inside a "dart:" library. |
| if (uri.scheme == nullptr) { |
| ASSERT(uri.userinfo == nullptr && uri.host == nullptr && |
| uri.port == nullptr); |
| return PrintToString("%s%s%s%s%s", uri.path.get(), query_separator, query, |
| fragment_separator, fragment); |
| } |
| |
| // Uri with no authority: "scheme:path[?query][#fragment]" |
| if (uri.host == nullptr) { |
| ASSERT(uri.userinfo == nullptr && uri.port == nullptr); |
| return PrintToString("%s:%s%s%s%s%s", uri.scheme.get(), uri.path.get(), |
| query_separator, query, fragment_separator, fragment); |
| } |
| |
| const char* user = uri.userinfo == nullptr ? "" : uri.userinfo.get(); |
| const char* user_separator = uri.userinfo == nullptr ? "" : "@"; |
| const char* port = uri.port == nullptr ? "" : uri.port.get(); |
| const char* port_separator = uri.port == nullptr ? "" : ":"; |
| |
| // If the path doesn't start with a '/', add one. We need it to |
| // separate the path from the authority. |
| const char* path_separator = |
| ((uri.path.get()[0] == '\0' || uri.path.get()[0] == '/') ? "" : "/"); |
| |
| // Uri with authority: |
| // "scheme://[userinfo@]host[:port][/]path[?query][#fragment]" |
| return PrintToString( |
| "%s://%s%s%s%s%s%s%s%s%s%s%s", // There is *nothing* wrong with this. |
| uri.scheme.get(), user, user_separator, uri.host.get(), port_separator, |
| port, path_separator, uri.path.get(), query_separator, query, |
| fragment_separator, fragment); |
| } |
| |
| // See RFC 3986 Section 5: Reference Resolution |
| CStringUniquePtr ResolveUri(const char* ref_uri, const char* base_uri) { |
| // Parse the reference uri. |
| std::unique_ptr<ParsedUri> ref = ParseUri(ref_uri); |
| if (!ref) { |
| return CStringUniquePtr(); |
| } |
| |
| ParsedUri target; |
| if (ref->scheme != nullptr) { |
| if (strcmp(ref->scheme.get(), "dart") == 0) { |
| return MakeCopyOfString(ref_uri); |
| } |
| |
| // When the ref_uri specifies a scheme, the base_uri is ignored. |
| target.scheme = std::move(ref->scheme); |
| target.userinfo = std::move(ref->userinfo); |
| target.host = std::move(ref->host); |
| target.port = std::move(ref->port); |
| target.path = std::move(ref->path); |
| target.query = std::move(ref->query); |
| target.fragment = std::move(ref->fragment); |
| return BuildUri(target); |
| } |
| |
| // Parse the base uri. |
| std::unique_ptr<ParsedUri> base = ParseUri(base_uri); |
| if (!base) { |
| return CStringUniquePtr(); |
| } |
| |
| if ((base->scheme != nullptr) && strcmp(base->scheme.get(), "dart") == 0) { |
| return MakeCopyOfString(ref_uri); |
| } |
| |
| if (ref->host != nullptr) { |
| // When the ref_uri specifies an authority, we only use the base scheme. |
| target.scheme = std::move(base->scheme); |
| target.userinfo = std::move(ref->userinfo); |
| target.host = std::move(ref->host); |
| target.port = std::move(ref->port); |
| target.path = RemoveDotSegments(ref->path.get()); |
| target.query = std::move(ref->query); |
| target.fragment = std::move(ref->fragment); |
| return BuildUri(target); |
| } |
| |
| if (ref->path.get()[0] == '\0') { |
| // Empty path. Use most parts of base_uri. |
| target.scheme = std::move(base->scheme); |
| target.userinfo = std::move(base->userinfo); |
| target.host = std::move(base->host); |
| target.port = std::move(base->port); |
| target.path = std::move(base->path); |
| target.query = ((ref->query == nullptr) ? std::move(base->query) |
| : std::move(ref->query)); |
| target.fragment = std::move(ref->fragment); |
| return BuildUri(target); |
| |
| } else if (ref->path.get()[0] == '/') { |
| // Absolute path. ref_path wins. |
| target.scheme = std::move(base->scheme); |
| target.userinfo = std::move(base->userinfo); |
| target.host = std::move(base->host); |
| target.port = std::move(base->port); |
| target.path = RemoveDotSegments(ref->path.get()); |
| target.query = std::move(ref->query); |
| target.fragment = std::move(ref->fragment); |
| return BuildUri(target); |
| |
| } else { |
| // Relative path. We need to merge the base path and the ref path. |
| |
| if (base->scheme == nullptr && base->host == nullptr && |
| base->path.get()[0] != '/') { |
| // The dart:core Uri class handles resolving a relative uri |
| // against a second relative uri specially, in a way not |
| // described in the RFC. We do not need to support this for |
| // library resolution. If we need to implement this later, we |
| // can. |
| return CStringUniquePtr(); |
| } |
| |
| target.scheme = std::move(base->scheme); |
| target.userinfo = std::move(base->userinfo); |
| target.host = std::move(base->host); |
| target.port = std::move(base->port); |
| CStringUniquePtr merged_paths = |
| MergePaths(base->path.get(), ref->path.get()); |
| target.path = RemoveDotSegments(merged_paths.get()); |
| target.query = std::move(ref->query); |
| target.fragment = std::move(ref->fragment); |
| return BuildUri(target); |
| } |
| } |
| |
| } // namespace dart |