// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package htmlescape
import (
"bytes"
"strings"
"unicode/utf8"
"git.sr.ht/~rj/gemplate/internal/css"
)
// isCSSNmchar reports whether rune is allowed anywhere in a CSS identifier.
func isCSSNmchar(r rune) bool {
// Based on the CSS3 nmchar production but ignores multi-rune escape
// sequences.
// https://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar
return 'a' <= r && r <= 'z' ||
'A' <= r && r <= 'Z' ||
'0' <= r && r <= '9' ||
r == '-' ||
r == '_' ||
// Non-ASCII cases below.
0x80 <= r && r <= 0xd7ff ||
0xe000 <= r && r <= 0xfffd ||
0x10000 <= r && r <= 0x10ffff
}
// cssEscaper escapes HTML and CSS special characters using \<hex>+ escapes.
func EscapeCSS(s string) string {
var b strings.Builder
r, w, written := rune(0), 0, 0
for i := 0; i < len(s); i += w {
// See comment in htmlEscaper.
r, w = utf8.DecodeRuneInString(s[i:])
var repl string
switch {
case int(r) < len(cssReplacementTable) && cssReplacementTable[r] != "":
repl = cssReplacementTable[r]
default:
continue
}
if written == 0 {
b.Grow(len(s))
}
b.WriteString(s[written:i])
b.WriteString(repl)
written = i + w
if repl != `\\` && (written == len(s) || isHex(s[written]) || css.IsSpace(s[written])) {
b.WriteByte(' ')
}
}
if written == 0 {
return s
}
b.WriteString(s[written:])
return b.String()
}
var cssReplacementTable = []string{
0: `\0`,
'\t': `\9`,
'\n': `\a`,
'\f': `\c`,
'\r': `\d`,
// Encode HTML specials as hex so the output can be embedded
// in HTML attributes without further encoding.
'"': `\22`,
'&': `\26`,
'\'': `\27`,
'(': `\28`,
')': `\29`,
'+': `\2b`,
'/': `\2f`,
':': `\3a`,
';': `\3b`,
'<': `\3c`,
'>': `\3e`,
'\\': `\\`,
'{': `\7b`,
'}': `\7d`,
}
var expressionBytes = []byte("expression")
var mozBindingBytes = []byte("mozbinding")
// cssValueFilter allows innocuous CSS values in the output including CSS
// quantities (10px or 25%), ID or class literals (#foo, .bar), keyword values
// (inherit, blue), and colors (#888).
// It filters out unsafe values, such as those that affect token boundaries,
// and anything that might execute scripts.
func FilterCSSValues(s string) string {
//if t == contentTypeCSS {
// return s
//}
b, id := css.DecodeCSS([]byte(s)), make([]byte, 0, 64)
// CSS3 error handling is specified as honoring string boundaries per
// https://www.w3.org/TR/css3-syntax/#error-handling :
// Malformed declarations. User agents must handle unexpected
// tokens encountered while parsing a declaration by reading until
// the end of the declaration, while observing the rules for
// matching pairs of (), [], {}, "", and '', and correctly handling
// escapes. For example, a malformed declaration may be missing a
// property, colon (:) or value.
// So we need to make sure that values do not have mismatched bracket
// or quote characters to prevent the browser from restarting parsing
// inside a string that might embed JavaScript source.
for i, c := range b {
switch c {
case 0, '"', '\'', '(', ')', '/', ';', '@', '[', '\\', ']', '`', '{', '}', '<', '>':
return filterFailsafe
case '-':
// Disallow <!-- or -->.
// -- should not appear in valid identifiers.
if i != 0 && b[i-1] == '-' {
return filterFailsafe
}
default:
if c < utf8.RuneSelf && isCSSNmchar(rune(c)) {
id = append(id, c)
}
}
}
id = bytes.ToLower(id)
if bytes.Contains(id, expressionBytes) || bytes.Contains(id, mozBindingBytes) {
return filterFailsafe
}
return string(b)
}
package htmlescape
// isHex reports whether the given character is a hex digit.
func isHex(c byte) bool {
return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
}
package htmlescape
import (
"fmt"
"html/template"
"strings"
"unicode/utf8"
"git.sr.ht/~rj/gemplate/internal/content"
"git.sr.ht/~rj/gemplate/internal/htmlescape"
)
func EscapeAttr(s string) string {
return htmlReplacer(s, htmlReplacementTable, true)
}
func EscapeAttrNorm(s template.HTML) string {
return htmlReplacer(htmlescape.StripTags(string(s)), htmlNormReplacementTable, true)
}
// EscapeHTML escapes a string for inclusion in HTML text.
func EscapeHTML(s string) string {
return htmlReplacer(s, htmlReplacementTable, true)
}
// EscapeHTMLNoSpace escapes a string for inclusion in in unquoted attribute values.
func EscapeHTMLNoSpace(s string) string {
if s == "" {
return filterFailsafe
}
return htmlReplacer(s, htmlNospaceReplacementTable, false)
}
func EscapeHTMLNoSpaceNorm(s template.HTML) string {
if s == "" {
return filterFailsafe
}
return htmlReplacer(htmlescape.StripTags(string(s)), htmlNospaceNormReplacementTable, false)
}
// htmlReplacementTable contains the runes that need to be escaped
// inside a quoted attribute value or in a text node.
var htmlReplacementTable = []string{
// https://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
// U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT
// CHARACTER character to the current attribute's value.
// "
// and similarly
// https://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
0: "\uFFFD",
'"': """,
'&': "&",
'\'': "'",
'+': "+",
'<': "<",
'>': ">",
}
// htmlNormReplacementTable is like htmlReplacementTable but without '&' to
// avoid over-encoding existing entities.
var htmlNormReplacementTable = []string{
0: "\uFFFD",
'"': """,
'\'': "'",
'+': "+",
'<': "<",
'>': ">",
}
// htmlNospaceReplacementTable contains the runes that need to be escaped
// inside an unquoted attribute value.
// The set of runes escaped is the union of the HTML specials and
// those determined by running the JS below in browsers:
//
// <div id=d></div>
// <script>(function () {
// var a = [], d = document.getElementById("d"), i, c, s;
// for (i = 0; i < 0x10000; ++i) {
//
// c = String.fromCharCode(i);
// d.innerHTML = "<span title=" + c + "lt" + c + "></span>"
// s = d.getElementsByTagName("SPAN")[0];
// if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); }
// }
// document.write(a.join(", "));
// })()</script>
var htmlNospaceReplacementTable = []string{
0: "�",
'\t': "	",
'\n': " ",
'\v': "",
'\f': "",
'\r': " ",
' ': " ",
'"': """,
'&': "&",
'\'': "'",
'+': "+",
'<': "<",
'=': "=",
'>': ">",
// A parse error in the attribute value (unquoted) and
// before attribute value states.
// Treated as a quoting character by IE.
'`': "`",
}
// htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but
// without '&' to avoid over-encoding existing entities.
var htmlNospaceNormReplacementTable = []string{
0: "�",
'\t': "	",
'\n': " ",
'\v': "",
'\f': "",
'\r': " ",
' ': " ",
'"': """,
'\'': "'",
'+': "+",
'<': "<",
'=': "=",
'>': ">",
// A parse error in the attribute value (unquoted) and
// before attribute value states.
// Treated as a quoting character by IE.
'`': "`",
}
// htmlReplacer returns s with runes replaced according to replacementTable
// and when badRunes is true, certain bad runes are allowed through unescaped.
func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
written, b := 0, new(strings.Builder)
r, w := rune(0), 0
for i := 0; i < len(s); i += w {
// Cannot use 'for range s' because we need to preserve the width
// of the runes in the input. If we see a decoding error, the input
// width will not be utf8.Runelen(r) and we will overrun the buffer.
r, w = utf8.DecodeRuneInString(s[i:])
if int(r) < len(replacementTable) {
if repl := replacementTable[r]; len(repl) != 0 {
if written == 0 {
b.Grow(len(s))
}
b.WriteString(s[written:i])
b.WriteString(repl)
written = i + w
}
} else if badRunes {
// No-op.
// IE does not allow these ranges in unquoted attrs.
} else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
if written == 0 {
b.Grow(len(s))
}
fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
written = i + w
}
}
if written == 0 {
return s
}
b.WriteString(s[written:])
return b.String()
}
// EscapeComments returns the empty string regardless of input.
// Comment content does not correspond to any parsed structure or
// human-readable content, so the simplest and most secure policy is to drop
// content interpolated into comments.
// This approach is equally valid whether or not static comment content is
// removed from the template.
func EscapeComments(string) string {
return ""
}
func FilterHTMLNames(s string) string {
if len(s) == 0 {
// Avoid violation of structure preservation.
// <input checked {{.K}}={{.V}}>.
// Without this, if .K is empty then .V is the value of
// checked, but otherwise .V is the value of the attribute
// named .K.
return filterFailsafe
}
s = strings.ToLower(s)
if t := content.AttrType(s); t != content.TypePlain {
// TODO: Split attr and element name part filters so we can recognize known attributes.
return filterFailsafe
}
for _, r := range s {
switch {
case '0' <= r && r <= '9':
case 'a' <= r && r <= 'z':
default:
return filterFailsafe
}
}
return s
}
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package htmlescape
import (
"encoding/json"
"fmt"
"html/template"
"reflect"
"regexp"
"strings"
"unicode/utf8"
)
var jsonMarshalType = reflect.TypeFor[json.Marshaler]()
// indirectToJSONMarshaler returns the value, after dereferencing as many times
// as necessary to reach the base type (or nil) or an implementation of json.Marshal.
func indirectToJSONMarshaler(a any) any {
// text/template now supports passing untyped nil as a func call
// argument, so we must support it. Otherwise we'd panic below, as one
// cannot call the Type or Interface methods on an invalid
// reflect.Value. See golang.org/issue/18716.
if a == nil {
return nil
}
v := reflect.ValueOf(a)
for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Pointer && !v.IsNil() {
v = v.Elem()
}
return v.Interface()
}
var scriptTagRe = regexp.MustCompile("(?i)<(/?)script")
// jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
// neither side-effects nor free variables outside (NaN, Infinity).
func EscapeJSVal(args ...any) string {
var a any
if len(args) == 1 {
a = indirectToJSONMarshaler(args[0])
switch t := a.(type) {
case template.JS:
return string(t)
case template.JSStr:
// TODO: normalize quotes.
return `"` + string(t) + `"`
case json.Marshaler:
// Do not treat as a Stringer.
case fmt.Stringer:
a = t.String()
}
} else {
for i, arg := range args {
args[i] = indirectToJSONMarshaler(arg)
}
a = fmt.Sprint(args...)
}
// TODO: detect cycles before calling Marshal which loops infinitely on
// cyclic data. This may be an unacceptable DoS risk.
b, err := json.Marshal(a)
if err != nil {
// While the standard JSON marshaler does not include user controlled
// information in the error message, if a type has a MarshalJSON method,
// the content of the error message is not guaranteed. Since we insert
// the error into the template, as part of a comment, we attempt to
// prevent the error from either terminating the comment, or the script
// block itself.
//
// In particular we:
// * replace "*/" comment end tokens with "* /", which does not
// terminate the comment
// * replace "<script" and "</script" with "\x3Cscript" and "\x3C/script"
// (case insensitively), and "<!--" with "\x3C!--", which prevents
// confusing script block termination semantics
//
// We also put a space before the comment so that if it is flush against
// a division operator it is not turned into a line comment:
// x/{{y}}
// turning into
// x//* error marshaling y:
// second line of error message */null
errStr := err.Error()
errStr = string(scriptTagRe.ReplaceAll([]byte(errStr), []byte(`\x3C${1}script`)))
errStr = strings.ReplaceAll(errStr, "*/", "* /")
errStr = strings.ReplaceAll(errStr, "<!--", `\x3C!--`)
return fmt.Sprintf(" /* %s */null ", errStr)
}
// TODO: maybe post-process output to prevent it from containing
// "<!--", "-->", "<![CDATA[", "]]>", or "</script"
// in case custom marshalers produce output containing those.
// Note: Do not use \x escaping to save bytes because it is not JSON compatible and this escaper
// supports ld+json content-type.
if len(b) == 0 {
// In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
// not cause the output `x=y/*z`.
return " null "
}
first, _ := utf8.DecodeRune(b)
last, _ := utf8.DecodeLastRune(b)
var buf strings.Builder
// Prevent IdentifierNames and NumericLiterals from running into
// keywords: in, instanceof, typeof, void
pad := isJSIdentPart(first) || isJSIdentPart(last)
if pad {
buf.WriteByte(' ')
}
written := 0
// Make sure that json.Marshal escapes codepoints U+2028 & U+2029
// so it falls within the subset of JSON which is valid JS.
for i := 0; i < len(b); {
rune, n := utf8.DecodeRune(b[i:])
repl := ""
if rune == 0x2028 {
repl = `\u2028`
} else if rune == 0x2029 {
repl = `\u2029`
}
if repl != "" {
buf.Write(b[written:i])
buf.WriteString(repl)
written = i + n
}
i += n
}
if buf.Len() != 0 {
buf.Write(b[written:])
if pad {
buf.WriteByte(' ')
}
return buf.String()
}
return string(b)
}
// jsStrEscaper produces a string that can be included between quotes in
// JavaScript source, in JavaScript embedded in an HTML5 <script> element,
// or in an HTML5 event handler attribute such as onclick.
func EscapeJSStr(s string) string {
return replace(s, jsStrReplacementTable)
}
func EscapeJSStrNorm(s template.JSStr) string {
return replace(string(s), jsStrNormReplacementTable)
}
func EscapeJSTmpLit(s string) string {
return replace(s, jsBqStrReplacementTable)
}
// jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
// specials so the result is treated literally when included in a regular
// expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
// the literal text of {{.X}} followed by the string "bar".
func EscapeJSRegexp(s string) string {
s = replace(s, jsRegexpReplacementTable)
if s == "" {
// /{{.X}}/ should not produce a line comment when .X == "".
return "(?:)"
}
return s
}
// replace replaces each rune r of s with replacementTable[r], provided that
// r < len(replacementTable). If replacementTable[r] is the empty string then
// no replacement is made.
// It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
// `\u2029`.
func replace(s string, replacementTable []string) string {
var b strings.Builder
r, w, written := rune(0), 0, 0
for i := 0; i < len(s); i += w {
// See comment in htmlEscaper.
r, w = utf8.DecodeRuneInString(s[i:])
var repl string
switch {
case int(r) < len(lowUnicodeReplacementTable):
repl = lowUnicodeReplacementTable[r]
case int(r) < len(replacementTable) && replacementTable[r] != "":
repl = replacementTable[r]
case r == '\u2028':
repl = `\u2028`
case r == '\u2029':
repl = `\u2029`
default:
continue
}
if written == 0 {
b.Grow(len(s))
}
b.WriteString(s[written:i])
b.WriteString(repl)
written = i + w
}
if written == 0 {
return s
}
b.WriteString(s[written:])
return b.String()
}
var lowUnicodeReplacementTable = []string{
0: `\u0000`, 1: `\u0001`, 2: `\u0002`, 3: `\u0003`, 4: `\u0004`, 5: `\u0005`, 6: `\u0006`,
'\a': `\u0007`,
'\b': `\u0008`,
'\t': `\t`,
'\n': `\n`,
'\v': `\u000b`, // "\v" == "v" on IE 6.
'\f': `\f`,
'\r': `\r`,
0xe: `\u000e`, 0xf: `\u000f`, 0x10: `\u0010`, 0x11: `\u0011`, 0x12: `\u0012`, 0x13: `\u0013`,
0x14: `\u0014`, 0x15: `\u0015`, 0x16: `\u0016`, 0x17: `\u0017`, 0x18: `\u0018`, 0x19: `\u0019`,
0x1a: `\u001a`, 0x1b: `\u001b`, 0x1c: `\u001c`, 0x1d: `\u001d`, 0x1e: `\u001e`, 0x1f: `\u001f`,
}
var jsStrReplacementTable = []string{
0: `\u0000`,
'\t': `\t`,
'\n': `\n`,
'\v': `\u000b`, // "\v" == "v" on IE 6.
'\f': `\f`,
'\r': `\r`,
// Encode HTML specials as hex so the output can be embedded
// in HTML attributes without further encoding.
'"': `\u0022`,
'`': `\u0060`,
'&': `\u0026`,
'\'': `\u0027`,
'+': `\u002b`,
'/': `\/`,
'<': `\u003c`,
'>': `\u003e`,
'\\': `\\`,
}
// jsBqStrReplacementTable is like jsStrReplacementTable except it also contains
// the special characters for JS template literals: $, {, and }.
var jsBqStrReplacementTable = []string{
0: `\u0000`,
'\t': `\t`,
'\n': `\n`,
'\v': `\u000b`, // "\v" == "v" on IE 6.
'\f': `\f`,
'\r': `\r`,
// Encode HTML specials as hex so the output can be embedded
// in HTML attributes without further encoding.
'"': `\u0022`,
'`': `\u0060`,
'&': `\u0026`,
'\'': `\u0027`,
'+': `\u002b`,
'/': `\/`,
'<': `\u003c`,
'>': `\u003e`,
'\\': `\\`,
'$': `\u0024`,
'{': `\u007b`,
'}': `\u007d`,
}
// jsStrNormReplacementTable is like jsStrReplacementTable but does not
// overencode existing escapes since this table has no entry for `\`.
var jsStrNormReplacementTable = []string{
0: `\u0000`,
'\t': `\t`,
'\n': `\n`,
'\v': `\u000b`, // "\v" == "v" on IE 6.
'\f': `\f`,
'\r': `\r`,
// Encode HTML specials as hex so the output can be embedded
// in HTML attributes without further encoding.
'"': `\u0022`,
'&': `\u0026`,
'\'': `\u0027`,
'`': `\u0060`,
'+': `\u002b`,
'/': `\/`,
'<': `\u003c`,
'>': `\u003e`,
}
var jsRegexpReplacementTable = []string{
0: `\u0000`,
'\t': `\t`,
'\n': `\n`,
'\v': `\u000b`, // "\v" == "v" on IE 6.
'\f': `\f`,
'\r': `\r`,
// Encode HTML specials as hex so the output can be embedded
// in HTML attributes without further encoding.
'"': `\u0022`,
'$': `\$`,
'&': `\u0026`,
'\'': `\u0027`,
'(': `\(`,
')': `\)`,
'*': `\*`,
'+': `\u002b`,
'-': `\-`,
'.': `\.`,
'/': `\/`,
'<': `\u003c`,
'>': `\u003e`,
'?': `\?`,
'[': `\[`,
'\\': `\\`,
']': `\]`,
'^': `\^`,
'{': `\{`,
'|': `\|`,
'}': `\}`,
}
// isJSIdentPart reports whether the given rune is a JS identifier part.
// It does not handle all the non-Latin letters, joiners, and combining marks,
// but it does handle every codepoint that can occur in a numeric literal or
// a keyword.
func isJSIdentPart(r rune) bool {
switch {
case r == '$':
return true
case '0' <= r && r <= '9':
return true
case 'A' <= r && r <= 'Z':
return true
case r == '_':
return true
case 'a' <= r && r <= 'z':
return true
}
return false
}
package htmlescape
import "html/template"
// EscapeRCData escapes for inclusion in an RCDATA element body.
func EscapeRCData(s string) string {
return htmlReplacer(s, htmlReplacementTable, true)
}
func EscapeRCDataNorm(s template.HTML) string {
return htmlReplacer(string(s), htmlNormReplacementTable, true)
}
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package htmlescape
import (
"fmt"
"html/template"
"strings"
)
// urlFilter returns its input unless it contains an unsafe scheme in which
// case it defangs the entire URL.
//
// Schemes that cause unintended side effects that are irreversible without user
// interaction are considered unsafe. For example, clicking on a "javascript:"
// link can immediately trigger JavaScript code execution.
//
// This filter conservatively assumes that all schemes other than the following
// are unsafe:
// - http: Navigates to a new website, and may open a new window or tab.
// These side effects can be reversed by navigating back to the
// previous website, or closing the window or tab. No irreversible
// changes will take place without further user interaction with
// the new website.
// - https: Same as http.
// - mailto: Opens an email program and starts a new draft. This side effect
// is not irreversible until the user explicitly clicks send; it
// can be undone by closing the email program.
//
// To allow URLs containing other schemes to bypass this filter, developers must
// explicitly indicate that such a URL is expected and safe by encapsulating it
// in a template.URL value.
func FilterURL(s string) string {
if !isSafeURL(s) {
return "#" + filterFailsafe
}
return s
}
// isSafeURL is true if s is a relative URL or if URL has a protocol in
// (http, https, mailto).
func isSafeURL(s string) bool {
if protocol, _, ok := strings.Cut(s, ":"); ok && !strings.Contains(protocol, "/") {
return strings.EqualFold(protocol, "http") || strings.EqualFold(protocol, "https") ||
strings.EqualFold(protocol, "mailto")
}
return true
}
// EscapeURL produces an output that can be embedded in a URL query.
// The output can be embedded in an HTML attribute without further escaping.
func EscapeURL(s string) string {
return urlProcessor(false, s)
}
func EscapeURLNorm(s template.URL) string {
return urlProcessor(true, string(s))
}
// urlNormalizer normalizes URL content so it can be embedded in a quote-delimited
// string or parenthesis delimited url(...).
// The normalizer does not encode all HTML specials. Specifically, it does not
// encode '&' so correct embedding in an HTML attribute requires escaping of
// '&' to '&'.
func NormalizeURL(s string) string {
return urlProcessor(true, s)
}
// urlProcessor normalizes (when norm is true) or escapes its input to produce
// a valid hierarchical or opaque URL part.
func urlProcessor(norm bool, s string) string {
//if t == contentTypeURL {
// norm = true
//}
var b strings.Builder
if processURLOnto(s, norm, &b) {
return b.String()
}
return s
}
// processURLOnto appends a normalized URL corresponding to its input to b
// and reports whether the appended content differs from s.
func processURLOnto(s string, norm bool, b *strings.Builder) bool {
b.Grow(len(s) + 16)
written := 0
// The byte loop below assumes that all URLs use UTF-8 as the
// content-encoding. This is similar to the URI to IRI encoding scheme
// defined in section 3.1 of RFC 3987, and behaves the same as the
// EcmaScript builtin encodeURIComponent.
// It should not cause any misencoding of URLs in pages with
// Content-type: text/html;charset=UTF-8.
for i, n := 0, len(s); i < n; i++ {
c := s[i]
switch c {
// Single quote and parens are sub-delims in RFC 3986, but we
// escape them so the output can be embedded in single
// quoted attributes and unquoted CSS url(...) constructs.
// Single quotes are reserved in URLs, but are only used in
// the obsolete "mark" rule in an appendix in RFC 3986
// so can be safely encoded.
case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']':
if norm {
continue
}
// Unreserved according to RFC 3986 sec 2.3
// "For consistency, percent-encoded octets in the ranges of
// ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
// period (%2E), underscore (%5F), or tilde (%7E) should not be
// created by URI producers
case '-', '.', '_', '~':
continue
case '%':
// When normalizing do not re-encode valid escapes.
if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) {
continue
}
default:
// Unreserved according to RFC 3986 sec 2.3
if 'a' <= c && c <= 'z' {
continue
}
if 'A' <= c && c <= 'Z' {
continue
}
if '0' <= c && c <= '9' {
continue
}
}
b.WriteString(s[written:i])
fmt.Fprintf(b, "%%%02x", c)
written = i + 1
}
b.WriteString(s[written:])
return written != 0
}
// Filters and normalizes srcset values which are comma separated
// URLs followed by metadata.
func FilterAndEscapeSrcset(s string) string {
/* switch t {
case contentTypeSrcset:
return s
case contentTypeURL:
// Normalizing gets rid of all HTML whitespace
// which separate the image URL from its metadata.
var b strings.Builder
if processURLOnto(s, true, &b) {
s = b.String()
}
// Additionally, commas separate one source from another.
return strings.ReplaceAll(s, ",", "%2c")
}*/
var b strings.Builder
written := 0
for i := 0; i < len(s); i++ {
if s[i] == ',' {
filterSrcsetElement(s, written, i, &b)
b.WriteString(",")
written = i + 1
}
}
filterSrcsetElement(s, written, len(s), &b)
return b.String()
}
func FilterAndEscapeSrcsetURL(s template.URL) string {
return filterAndEscapeSrcsetURL(string(s))
}
func filterAndEscapeSrcsetURL(s string) string {
// Normalizing gets rid of all HTML whitespace
// which separate the image URL from its metadata.
var b strings.Builder
if processURLOnto(s, true, &b) {
s = b.String()
}
// Additionally, commas separate one source from another.
return strings.ReplaceAll(s, ",", "%2c")
}
// Derived from https://play.golang.org/p/Dhmj7FORT5
const htmlSpaceAndASCIIAlnumBytes = "\x00\x36\x00\x00\x01\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07"
// isHTMLSpace is true iff c is a whitespace character per
// https://infra.spec.whatwg.org/#ascii-whitespace
func isHTMLSpace(c byte) bool {
return (c <= 0x20) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7)))
}
func isHTMLSpaceOrASCIIAlnum(c byte) bool {
return (c < 0x80) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7)))
}
func filterSrcsetElement(s string, left int, right int, b *strings.Builder) {
start := left
for start < right && isHTMLSpace(s[start]) {
start++
}
end := right
for i := start; i < right; i++ {
if isHTMLSpace(s[i]) {
end = i
break
}
}
if url := s[start:end]; isSafeURL(url) {
// If image metadata is only spaces or alnums then
// we don't need to URL normalize it.
metadataOk := true
for i := end; i < right; i++ {
if !isHTMLSpaceOrASCIIAlnum(s[i]) {
metadataOk = false
break
}
}
if metadataOk {
b.WriteString(s[left:start])
processURLOnto(url, true, b)
b.WriteString(s[end:right])
return
}
}
b.WriteString("#")
b.WriteString(filterFailsafe)
}
package coverage
//go:generate go run ../../main.go -o markdown.go markdown.tmpl
// Coverage holds information about the coverage of some element over some scope.
// Typically elements are either individual lines, or functions. Tyipcally, the
// scope is a source file or executable.
type Coverage struct {
Hits int // Count of lines or functions that were executed.
Total int // Count of lines or functions in the scope.
}
// P returns the percentage of lines or functions that were executed.
func (c Coverage) P() float32 {
return float32(c.Hits) * 100 / float32(c.Total)
}
// Q returns the percentage of lines or functions that were not executed.
func (c Coverage) Q() float32 {
return 100 - float32(c.Hits)*100/float32(c.Total)
}
// Rating returns the rating (low, medium, or high) for this coverage.
func (c Coverage) Rating() Rating {
if c.P() >= 90 {
return HighCoverage
}
if c.P() >= 75 {
return MediumCoverage
}
return LowCoverage
}
// Valid returns true if data was collected. In otherwords, unless the
// coverage is valid, any calculations may lead to divide by zero.
func (c Coverage) Valid() bool {
return c.Total > 0
}
// CoverageRating is a classification of the coverage into low, medium or high.
type Rating uint8
// These constants provide a rough classification for the amount of coverage in a scope.
const (
LowCoverage Rating = iota
MediumCoverage
HighCoverage
)
// String returns a string representation of the rating.
func (cr Rating) String() string {
if cr == LowCoverage {
return "low"
}
if cr == MediumCoverage {
return "medium"
}
return "high"
}
package coverage
import (
"fmt"
"io"
)
// Suppress errors if either imported package is not used.
var (
_ = fmt.Fprint
_ = io.WriteString
)
func Sparkbar(out io.Writer, dot Coverage) error {
if dot.Valid() {
_, _ = fmt.Fprint(out, dot.Hits)
_, _ = io.WriteString(out, "/")
_, _ = fmt.Fprint(out, dot.Total)
_, _ = io.WriteString(out, " (")
_, _ = fmt.Fprintf(out, "%.1f", dot.P())
_, _ = io.WriteString(out, "%)\n")
} else {
_, _ = io.WriteString(out, "No Data\n")
}
return nil
}
package compile
import (
"bytes"
"fmt"
"go/types"
"io"
"git.sr.ht/~rj/gemplate/internal/parsecond"
"git.sr.ht/~rj/gemplate/parse"
)
func Compile(out io.Writer, node *parse.Tree, pkg *types.Package, funcs ...map[string]Func) error {
ctx := NewContext(out, pkg, funcs...)
obj, err := CompileTypeExpr(pkg, node.DotType)
if err != nil {
return &Error{
Node: node.Root,
Message: err.Error(),
}
}
ctx.dot = obj
if parsecond.HasDollar(node.Root) {
fmt.Fprintf(ctx.out, "func %s(out io.Writer, dot %s) error {\n", node.Name, node.DotType)
fmt.Fprint(ctx.out, "dot0 := dot\n")
ctx.AddVariable("$", ctx.dot)
} else {
fmt.Fprintf(ctx.out, "func %s(out io.Writer, dot %s) error {\n", node.Name, node.DotType)
}
defer fmt.Fprint(ctx.out, "\nreturn nil\n}\n\n")
return ctx.CompileListNode(node.Root)
}
func (ctx *Context) CompileListNode(node *parse.ListNode) error {
var err error
for _, v := range node.Nodes {
switch v.Type() {
case parse.NodeText:
ctx.CompileTextNode(v.(*parse.TextNode))
case parse.NodeAction:
err2 := ctx.CompileActionNode(v.(*parse.ActionNode))
err = AppendError(err, err2)
case parse.NodeBreak:
fmt.Fprint(ctx.out, "break\n")
case parse.NodeContinue:
fmt.Fprint(ctx.out, "continue\n")
case parse.NodeIf:
err2 := ctx.CompileIfNode(v.(*parse.IfNode))
err = AppendError(err, err2)
case parse.NodeRange:
err2 := ctx.CompileRangeNode(v.(*parse.RangeNode))
err = AppendError(err, err2)
case parse.NodeTemplate:
err2 := ctx.CompileTemplateNode(v.(*parse.TemplateNode))
err = AppendError(err, err2)
case parse.NodeWith:
err2 := ctx.CompileWithNode(v.(*parse.WithNode))
err = AppendError(err, err2)
default:
panic("unreachable")
}
}
return err
}
func (ctx *Context) CompileTextNode(node *parse.TextNode) {
fmt.Fprintf(ctx.out, "_, _ = io.WriteString(out, %#v)\n", string(node.Text))
}
// Returns true if the final command of a pipeline is a call to print, printf,
// or println. These commands can be compiled into an optimized call directly
// to the matching function in the standard library.
func isPrintAction(node *parse.ActionNode) bool {
cmd := node.Pipe.Cmds[len(node.Pipe.Cmds)-1]
return len(node.Pipe.Decl) == 0 &&
cmd.Args[0].Type() == parse.NodeIdentifier &&
(cmd.Args[0].String() == "print" || cmd.Args[0].String() == "printf" || cmd.Args[0].String() == "println")
}
func (ctx *Context) CompileActionNode(node *parse.ActionNode) error {
// Special case calls to printf
if isPrintAction(node) {
// Rearrange pipeline into a single command.
for len(node.Pipe.Cmds) > 1 {
// Move the leading command as the last argument to the next
// command.
node.Pipe.Cmds[1].Args = append(node.Pipe.Cmds[1].Args, node.Pipe.Cmds[0])
node.Pipe.Cmds = node.Pipe.Cmds[1:]
}
node := node.Pipe.Cmds[0]
fmt.Fprintf(ctx.out, "_, _ = fmt.F%s(out, ", node.Args[0].String())
_, err := ctx.CompileCommandNodeArg(node.Args[1], true)
for _, v := range node.Args[2:] {
fmt.Fprintf(ctx.out, ", ")
_, err2 := ctx.CompileCommandNodeArg(v, true)
err = AppendError(err, err2)
}
fmt.Fprint(ctx.out, ")\n")
return err
}
// Assigning to variables
if len(node.Pipe.Decl) > 0 {
_, err := ctx.CompilePipeNode(node.Pipe)
fmt.Fprint(ctx.out, "\n")
return err
}
buf := bytes.Buffer{}
newctx := ctx.WithWriter(&buf)
dot, err := newctx.CompilePipeNode(node.Pipe)
if err != nil {
return err
}
if dot != nil && types.AssignableTo(dot, types.Typ[types.String]) {
fmt.Fprint(ctx.out, "_, _ = io.WriteString(out, ", buf.String(), ")\n")
} else if bt, ok := dot.(*types.Basic); ok && bt.Kind() == types.UntypedNil {
err = AppendError(err, &Error{
Node: node,
Message: "result of action node is nil",
})
} else {
fmt.Fprint(ctx.out, "_, _ = fmt.Fprint(out, ", buf.String(), ")\n")
}
return err
}
func (ctx *Context) CompileChainNode(node *parse.ChainNode) (types.Type, error) {
dot, err := ctx.CompileCommandNodeArg(node.Node, true)
if err != nil {
return nil, err
}
for i, ident := range node.Field {
newdot := indexInto(ctx.out, ctx.pkg, dot, ident)
if newdot == nil {
return nil, &Error{
Node: node,
Message: fmt.Sprintf("cannot index into type '%s' with identifier '%s'", dot, ident),
}
}
dot = newdot
// Implicitly call functions except for last identifier
if i < len(node.Field)-1 {
dot = implicitCall(ctx.out, dot)
}
}
return dot, nil
}
func (ctx *Context) CompileCommandNode(node *parse.CommandNode) (types.Type, error) {
if node.Args[0].Type() == parse.NodeIdentifier {
name := node.Args[0].(*parse.IdentifierNode).Ident
fn := ctx.FindFunction(name)
return fn.Compile(ctx, node)
}
dot, err := ctx.CompileCommandNodeArg(node.Args[0], false)
if err != nil {
return nil, err
}
if ft, ok := dot.(*types.Signature); ok {
if len(node.Args) == 1 {
fmt.Fprint(ctx.out, "()")
return ft.Results().At(0).Type(), nil
}
fmt.Fprint(ctx.out, "(")
_, err2 := ctx.CompileCommandNodeArg(node.Args[1], true)
err = AppendError(err, err2)
for _, arg := range node.Args[2:] {
fmt.Fprint(ctx.out, ", ")
_, err2 := ctx.CompileCommandNodeArg(arg, true)
err = AppendError(err, err2)
}
fmt.Fprint(ctx.out, ")")
return ft.Results().At(0).Type(), err
}
if len(node.Args) > 1 {
err = AppendError(err, &Error{
Node: node,
Message: fmt.Sprintf("expected 0 arguments for command, found %d", len(node.Args)-1),
})
}
return dot, err
}
func (ctx *Context) CompileCommandNodeArg(node parse.Node, implicit bool) (types.Type, error) {
switch node.Type() {
case parse.NodeIdentifier:
ident := node.(*parse.IdentifierNode).Ident
fn := ctx.FindFunction(ident)
if fn == nil {
return nil, &Error{
Node: node,
Message: fmt.Sprintf("identifier not found: %s", ident),
}
}
dot, err := fn.Compile(ctx, &parse.CommandNode{
NodeType: parse.NodeCommand,
Args: []parse.Node{node},
})
if err != nil {
return nil, err
}
if implicit {
dot = implicitCall(ctx.out, dot)
}
return dot, nil
case parse.NodeBool:
fmt.Fprint(ctx.out, node)
return types.Typ[types.UntypedBool], nil
case parse.NodeChain:
return ctx.CompileChainNode(node.(*parse.ChainNode))
case parse.NodeCommand:
return ctx.CompileCommandNode(node.(*parse.CommandNode))
case parse.NodeDot:
_, _ = io.WriteString(ctx.out, "dot")
if implicit {
return implicitCall(ctx.out, ctx.dot), nil
}
return ctx.dot, nil
case parse.NodeField:
dot, err := ctx.CompileFieldNode(node.(*parse.FieldNode))
if err != nil {
return nil, err
}
if implicit {
dot = implicitCall(ctx.out, dot)
}
return dot, nil
case parse.NodeNil:
fmt.Fprint(ctx.out, "nil")
return types.Typ[types.UntypedNil], nil
case parse.NodeNumber:
return ctx.CompileNumberNode(node.(*parse.NumberNode))
case parse.NodePipe:
return ctx.CompilePipeNode(node.(*parse.PipeNode))
case parse.NodeString:
fmt.Fprint(ctx.out, node.(*parse.StringNode).Quoted)
return types.Typ[types.UntypedString], nil
case parse.NodeVariable:
return ctx.CompileVariableNode(node.(*parse.VariableNode))
default:
panic(fmt.Sprintf("command-node-arg-%d: %s\n", node.Type(), node.String()))
}
}
func (ctx *Context) CompileCommandNodeArgTruthy(node parse.Node) (types.Type, error) {
buf := bytes.Buffer{}
newctx := ctx.WithWriter(&buf)
dot, err := newctx.CompileCommandNodeArg(node, true)
if err != nil {
return dot, err
}
if !compileAsTruthy(ctx.out, dot, buf.String()) {
return dot, &Error{
Node: node,
Message: "expected a 'truthy' value",
}
}
return dot, nil
}
func (ctx *Context) CompileFieldNode(node *parse.FieldNode) (types.Type, error) {
fmt.Fprint(ctx.out, "dot")
dot := ctx.dot
for i, ident := range node.Ident {
newdot := indexInto(ctx.out, ctx.pkg, dot, ident)
if newdot == nil {
return nil, &Error{
Node: node,
Message: fmt.Sprintf("cannot index into type '%s' with identifier '%s'", dot, ident),
}
}
dot = newdot
// Implicitly call functions except for last identifier
if i < len(node.Ident)-1 {
dot = implicitCall(ctx.out, dot)
}
}
return dot, nil
}
func (ctx *Context) CompileIfNode(node *parse.IfNode) error {
var err error
newctx := ctx.WithNewScope()
fmt.Fprint(ctx.out, "if ")
_, err2 := newctx.CompileIfNodePipe(node.Pipe)
err = AppendError(err, err2)
fmt.Fprint(ctx.out, " {\n")
err2 = newctx.CompileListNode(node.List)
err = AppendError(err, err2)
if node.ElseList != nil && len(node.ElseList.Nodes) == 1 {
if in, ok := node.ElseList.Nodes[0].(*parse.IfNode); ok {
fmt.Fprint(ctx.out, "} else ")
return ctx.CompileIfNode(in)
}
}
if node.ElseList != nil {
fmt.Fprint(ctx.out, "} else {\n")
err2 := ctx.CompileListNode(node.ElseList)
err = AppendError(err, err2)
}
fmt.Fprint(ctx.out, "}\n")
return err
}
func (ctx *Context) CompileIfNodePipe(node *parse.PipeNode) (types.Type, error) {
buf := bytes.Buffer{}
newctx := ctx.WithWriter(&buf)
dot, err := newctx.CompilePipeNode(node)
if err != nil {
return nil, err
}
if len(node.Decl) > 0 {
fmt.Fprint(ctx.out, buf.String(), "; ")
buf.Reset()
_, _ = newctx.CompilePipeNode(&parse.PipeNode{
NodeType: node.NodeType,
Pos: node.Pos,
Cmds: []*parse.CommandNode{{
NodeType: parse.NodeCommand,
Args: []parse.Node{&parse.VariableNode{
NodeType: parse.NodeVariable,
Ident: node.Decl[0].Ident,
}},
}},
})
}
if !compileAsTruthy(ctx.out, dot, buf.String()) {
return dot, &Error{
Node: node,
Message: "expected a 'truthy' value",
}
}
return dot, nil
}
func compileAsTruthy(out io.Writer, dot types.Type, expr string) bool {
if bt, ok := dot.Underlying().(*types.Basic); ok {
if (bt.Info() & types.IsBoolean) != 0 {
fmt.Fprint(out, expr)
} else if (bt.Info() & types.IsNumeric) != 0 {
fmt.Fprint(out, expr, " != 0")
} else if (bt.Info() & types.IsString) != 0 {
fmt.Fprint(out, expr, " != \"\"")
} else {
return false
}
} else if _, ok := dot.Underlying().(*types.Slice); ok {
fmt.Fprint(out, "len(", expr, ") > 0")
} else if _, ok := dot.Underlying().(*types.Map); ok {
fmt.Fprint(out, "len(", expr, ") > 0")
} else if _, ok := dot.Underlying().(*types.Pointer); ok {
fmt.Fprint(out, expr, " != nil")
} else if _, ok := dot.Underlying().(*types.Interface); ok {
fmt.Fprint(out, expr, " != nil")
} else if _, ok := dot.Underlying().(*types.Chan); ok {
fmt.Fprint(out, expr, " != nil")
} else {
return false
}
return true
}
func (ctx *Context) CompileNumberNode(node *parse.NumberNode) (types.Type, error) {
if node.IsInt {
fmt.Fprint(ctx.out, node.Text)
return types.Typ[types.UntypedInt], nil
}
if node.IsUint {
fmt.Fprint(ctx.out, node.Text)
return types.Typ[types.UntypedInt], nil
}
if node.IsFloat {
fmt.Fprint(ctx.out, node.Text)
return types.Typ[types.UntypedFloat], nil
}
fmt.Fprint(ctx.out, node.Text)
return types.Typ[types.UntypedComplex], nil
}
func (ctx *Context) CompilePipeNode(node *parse.PipeNode) (types.Type, error) {
// Rewrite commands to match semantic meaning.
for len(node.Cmds) > 1 {
node.Cmds[1].Args = append(node.Cmds[1].Args, node.Cmds[0])
node.Cmds = node.Cmds[1:]
}
// Assignment and declaration in templates is not like go. There is only a
// single variable. The exception is for ranges, which handle declarations
// on their own.
if len(node.Decl) > 0 {
ctx.CompilePipeNodeDecl(node)
if node.IsAssign {
fmt.Fprint(ctx.out, " = ")
} else {
fmt.Fprint(ctx.out, " := ")
}
dot, err := ctx.CompileCommandNode(node.Cmds[0])
if err != nil {
return nil, err
}
ctx.AddVariable(node.Decl[0].Ident[0], dot)
return dot, nil
}
return ctx.CompileCommandNode(node.Cmds[0])
}
func (ctx *Context) CompilePipeNodeDecl(node *parse.PipeNode) {
ctx.compileVariableNodeLHS(node.Decl[0])
for _, v := range node.Decl[1:] {
fmt.Fprint(ctx.out, ", ")
ctx.compileVariableNodeLHS(v)
}
}
func (ctx *Context) CompileRangeNode(node *parse.RangeNode) error {
var err error
if node.ElseList != nil {
fmt.Fprint(ctx.out, "if ")
_, err2 := ctx.CompileIfNodePipe(node.Pipe)
err = AppendError(err, err2)
fmt.Fprint(ctx.out, " {\n")
defer func() {
fmt.Fprint(ctx.out, "} else {\n")
err2 := ctx.CompileListNode(node.ElseList)
err = AppendError(err, err2)
fmt.Fprint(ctx.out, "}\n")
}()
}
newctx := ctx.WithNewScope()
fmt.Fprint(ctx.out, "for ")
switch len(node.Pipe.Decl) {
case 1:
fmt.Fprint(newctx.out, "_, ")
newctx.CompilePipeNodeDecl(node.Pipe)
fmt.Fprint(newctx.out, " := range ")
newpipe := *node.Pipe
newpipe.Decl = nil
dot, err2 := newctx.CompilePipeNode(&newpipe)
err = AppendError(err, err2)
if dot == nil {
panic("dot == nil")
}
newctx.dot = dot
newctx.AddVariable(node.Pipe.Decl[0].Ident[0], dot)
case 2:
newctx.CompilePipeNodeDecl(node.Pipe)
fmt.Fprint(ctx.out, " := range ")
newpipe := *node.Pipe
newpipe.Decl = nil
dot, err2 := newctx.CompilePipeNode(&newpipe)
err = AppendError(err, err2)
if dot == nil {
panic("dot == nil")
}
newctx.dot = dot
newctx.AddVariable(node.Pipe.Decl[0].Ident[0], types.Typ[types.Int])
newctx.AddVariable(node.Pipe.Decl[1].Ident[0], dot)
default:
// len(node.Pipe.Decl) == 0
if parsecond.HasDot(node.List) {
fmt.Fprint(ctx.out, "_, dot := range ")
} else {
fmt.Fprint(ctx.out, "range ")
}
dot, err2 := ctx.CompilePipeNode(node.Pipe)
err = AppendError(err, err2)
newctx.dot = dot
}
fmt.Fprint(ctx.out, " {\n")
if st, ok := newctx.dot.(*types.Slice); ok {
newctx.dot = st.Elem()
} else if mt, ok := newctx.dot.(*types.Map); ok {
newctx.dot = mt.Elem()
} else if ct, ok := newctx.dot.(*types.Chan); ok {
newctx.dot = ct.Elem()
} else if ct, ok := newctx.dot.(*types.Array); ok {
newctx.dot = ct.Elem()
} else if bt, ok := newctx.dot.(*types.Basic); ok && (bt.Info()&types.IsString) != 0 {
newctx.dot = types.Typ[types.Rune]
} else {
err = AppendError(err, &Error{
Node: node.Pipe,
Message: "pipe for range must be an array, slice, map, string, or channel",
})
}
err2 := newctx.CompileListNode(node.List)
err = AppendError(err, err2)
fmt.Fprint(ctx.out, "}\n")
return err
}
func (ctx *Context) CompileTemplateNode(node *parse.TemplateNode) error {
if node.Pipe == nil {
fmt.Fprintf(ctx.out, "%s(out, nil)\n", node.Name)
return nil
}
fmt.Fprintf(ctx.out, "%s(out, ", node.Name)
_, err := ctx.CompilePipeNode(node.Pipe)
fmt.Fprint(ctx.out, ")\n")
return err
}
func (ctx *Context) CompileVariableNode(node *parse.VariableNode) (types.Type, error) {
ident := node.Ident[0]
if ident == "$" {
fmt.Fprint(ctx.out, "dot0")
} else {
fmt.Fprint(ctx.out, ident[1:])
}
dot := ctx.vars[ident]
if dot == nil {
panic("internal error: " + ident)
}
for i, ident := range node.Ident[1:] {
newdot := indexInto(ctx.out, ctx.pkg, dot, ident)
if newdot == nil {
return nil, &Error{
Node: node,
Message: fmt.Sprintf("cannot index into type '%s' with identifier '%s'", dot, ident),
}
}
dot = newdot
// Implicitly call functions except for last identifier
if i < len(node.Ident)-2 {
dot = implicitCall(ctx.out, dot)
}
}
return dot, nil
}
func (ctx *Context) compileVariableNodeLHS(node *parse.VariableNode) {
if len(node.Ident) != 1 {
panic("internal error")
}
if node.Ident[0] == "$" {
panic("internal error")
}
fmt.Fprint(ctx.out, node.Ident[0][1:])
}
func (ctx *Context) CompileWithNode(node *parse.WithNode) error {
var err error
newctx := ctx.WithNewScope()
if len(node.Pipe.Decl) > 0 {
fmt.Fprint(ctx.out, "if ")
dot, err2 := newctx.CompilePipeNode(node.Pipe)
err = AppendError(err, err2)
newctx.dot = dot
fmt.Fprint(ctx.out, "; ")
_, err2 = newctx.CompileIfNodePipe(&parse.PipeNode{
Cmds: []*parse.CommandNode{{
Args: []parse.Node{node.Pipe.Decl[len(node.Pipe.Decl)-1]},
}},
})
err = AppendError(err, err2)
fmt.Fprint(ctx.out, " {\n")
if parsecond.HasDot(node.List) {
fmt.Fprint(ctx.out, "dot := ")
_, err2 := newctx.CompileVariableNode(node.Pipe.Decl[len(node.Pipe.Decl)-1])
err = AppendError(err, err2)
fmt.Fprint(ctx.out, "\n")
}
} else {
fmt.Fprint(ctx.out, "if withdot := ")
dot, err2 := ctx.CompilePipeNode(node.Pipe)
err = AppendError(err, err2)
newctx.dot = dot
newctx.AddVariable("$withdot", dot)
fmt.Fprint(ctx.out, "; ")
_, err2 = newctx.CompileIfNodePipe(&parse.PipeNode{
Cmds: []*parse.CommandNode{{
NodeType: parse.NodeCommand,
Args: []parse.Node{&parse.VariableNode{
NodeType: parse.NodeVariable,
Ident: []string{"$withdot"},
}},
}},
})
err = AppendError(err, err2)
fmt.Fprint(ctx.out, " {\n")
if node.List != nil && parsecond.HasDot(node.List) {
fmt.Fprint(ctx.out, "dot := withdot\n")
}
}
err2 := newctx.CompileListNode(node.List)
err = AppendError(err, err2)
if node.ElseList != nil && len(node.ElseList.Nodes) == 1 {
if wn, ok := node.ElseList.Nodes[0].(*parse.WithNode); ok {
fmt.Fprint(ctx.out, "} else ")
err = ctx.CompileWithNode(wn)
if err != nil {
return err
}
return nil
}
}
if node.ElseList != nil {
fmt.Fprint(ctx.out, "} else {\n")
err2 := newctx.CompileListNode(node.ElseList)
err = AppendError(err, err2)
}
fmt.Fprint(ctx.out, "}\n")
return err
}
package compile
import (
"go/types"
"io"
)
type Context struct {
// Current state
dot types.Type
vars map[string]types.Type
// Environment
out io.Writer
pkg *types.Package
funcs []map[string]Func
}
func NewContext(out io.Writer, pkg *types.Package, funcs ...map[string]Func) *Context {
return &Context{
out: out,
pkg: pkg,
funcs: funcs,
}
}
func (ctx *Context) SetDot(dot types.Type) {
ctx.dot = dot
}
func (ctx *Context) WithNewScope() *Context {
ret := &Context{
dot: ctx.dot,
vars: make(map[string]types.Type),
out: ctx.out,
pkg: ctx.pkg,
funcs: ctx.funcs,
}
ret.vars = make(map[string]types.Type)
for k, v := range ctx.vars {
ret.vars[k] = v
}
return ret
}
func (ctx *Context) WithWriter(out io.Writer) *Context {
return &Context{
dot: ctx.dot,
vars: ctx.vars,
out: out,
pkg: ctx.pkg,
funcs: ctx.funcs,
}
}
func (ctx *Context) FindFunction(name string) Func {
for _, f := range ctx.funcs {
if f == nil {
continue
}
if fn, ok := f[name]; ok {
return fn
}
}
// Since the parser checks function names, the following should be
// unreachable. However, there are separate calls for parsing and
// compiling, so this is possible.
return nil
}
func (ctx *Context) AddVariable(name string, t types.Type) {
if ctx.vars == nil {
ctx.vars = make(map[string]types.Type)
}
ctx.vars[name] = t
}
func (ctx *Context) Writer() io.Writer {
return ctx.out
}
package compile
import (
"strings"
"git.sr.ht/~rj/gemplate/parse"
)
// Error is an error found while trying to compile a node of the AST.
type Error struct {
Node parse.Node
Message string
}
func (err *Error) Error() string {
location, _ := parse.ErrorContext(err.Node)
return location + ": " + err.Message
}
// AppendError returns an error that supports appending errors together to
// create a multi-error. It handles cases where either value is nil.
// The error formats as the concatenation of the strings obtained by calling the
// Error method of each element of errs, with a newline between each string.
//
// A non-nil error returned by AppendError may implement the Unwrap() []error
// method, but only if more than one error was joined together.
func AppendError(err, err2 error) error {
if err2 == nil {
return err
}
if err == nil {
return err2
}
if multi, ok := err.(*multiError); ok {
multi.errs = append(multi.errs, err2)
return err
}
return &multiError{errs: []error{err, err2}}
}
type multiError struct {
errs []error
}
func (e *multiError) Error() string {
// Given the design of AppendError, there will always be at least two errors
// in the list.
b := &strings.Builder{}
b.WriteString(e.errs[0].Error())
for _, err := range e.errs[1:] {
b.WriteByte('\n')
b.WriteString(err.Error())
}
return b.String()
}
func (e *multiError) Unwrap() []error {
return e.errs
}
package compile
import (
"fmt"
"go/types"
"git.sr.ht/~rj/gemplate/parse"
)
// Func compiles the command node to a Go expression.
type Func interface {
Compile(*Context, *parse.CommandNode) (types.Type, error)
}
var Funcs = map[string]Func{
"eq": binaryComparison('='<<8 | '='),
"ne": binaryComparison('!'<<8 | '='),
"gt": binaryComparison('>'),
"lt": binaryComparison('<'),
"ge": binaryComparison('>'<<8 | '='),
"le": binaryComparison('<'<<8 | '='),
"not": (*notFn)(nil),
"or": logicalFn('|'<<8 | '|'),
"and": logicalFn('&'<<8 | '&'),
"call": (*callFn)(nil),
"len": (*lenFn)(nil),
"index": (*indexFn)(nil),
"slice": (*sliceFn)(nil),
"print": printFn(0),
"printf": printFn('f'),
"println": printFn('l'<<8 | 'n'),
}
func MakeParseFuncs(funcs ...map[string]Func) map[string]struct{} {
ret := map[string]struct{}{}
for _, fs := range funcs {
for name := range fs {
ret[name] = struct{}{}
}
}
return ret
}
type binaryComparison uint
func (f binaryComparison) Compile(ctx *Context, node *parse.CommandNode) (types.Type, error) {
if len(node.Args) != 3 {
return nil, &Error{
Node: node,
Message: fmt.Sprintf("expected 2 arguments, found %d", len(node.Args)-1),
}
}
_, err := ctx.CompileCommandNodeArg(node.Args[1], true)
fmt.Fprint(ctx.out, " ", f.String(), " ")
_, err2 := ctx.CompileCommandNodeArg(node.Args[2], true)
err = AppendError(err, err2)
return types.Typ[types.UntypedBool], err
}
func (f binaryComparison) String() string {
if f > 0xff {
b := [2]byte{uint8(f >> 8), uint8(f & 0xff)}
return string(b[:])
}
return string(rune(f))
}
type notFn struct{}
func (*notFn) Compile(ctx *Context, node *parse.CommandNode) (types.Type, error) {
if len(node.Args) != 2 {
return nil, &Error{
Node: node,
Message: fmt.Sprintf("expected 1 arguments, found %d", len(node.Args)-1),
}
}
fmt.Fprint(ctx.out, "!(")
_, err := ctx.CompileCommandNodeArgTruthy(node.Args[1])
fmt.Fprint(ctx.out, ")")
return types.Typ[types.UntypedBool], err
}
type logicalFn uint
func (f logicalFn) Compile(ctx *Context, node *parse.CommandNode) (types.Type, error) {
if len(node.Args) < 3 {
return nil, &Error{
Node: node,
Message: fmt.Sprintf("expect at least 2 arguments, found %d", len(node.Args)-1),
}
}
_, err := ctx.CompileCommandNodeArgTruthy(node.Args[1])
for i := 2; i < len(node.Args); i++ {
fmt.Fprint(ctx.out, " ", f.String(), " ")
_, err2 := ctx.CompileCommandNodeArgTruthy(node.Args[i])
err = AppendError(err, err2)
}
return types.Typ[types.UntypedBool], err
}
func (f logicalFn) String() string {
b := [2]byte{uint8(f >> 8), uint8(f & 0xff)}
return string(b[:])
}
type callFn struct{}
func (*callFn) Compile(ctx *Context, node *parse.CommandNode) (types.Type, error) {
if len(node.Args) < 2 {
return nil, &Error{
Node: node,
Message: fmt.Sprintf("expected at least 1 argument, found %d", len(node.Args)-1),
}
}
dot, err := ctx.CompileCommandNodeArg(node.Args[1], false)
if err != nil {
return nil, err
}
sig, ok := dot.(*types.Signature)
if !ok {
return nil, &Error{
Node: node.Args[1],
Message: "expected a function",
}
}
if sig.Variadic() {
if len(node.Args)-2 < sig.Params().Len()-1 {
return dot, &Error{
Node: node,
Message: fmt.Sprintf("expected at least %d arguments, found %d",
1+sig.Params().Len(),
len(node.Args),
),
}
}
} else {
if len(node.Args)-2 != sig.Params().Len() {
return dot, &Error{
Node: node,
Message: fmt.Sprintf("expected %d arguments, found %d",
2+sig.Params().Len(),
len(node.Args),
),
}
}
}
fmt.Fprint(ctx.out, "(")
for i, arg := range node.Args[2:] {
if i > 0 {
fmt.Fprint(ctx.out, ", ")
}
dot, err2 := ctx.CompileCommandNodeArg(arg, true)
err = AppendError(err, err2)
if dot != nil && !assignableToParam(sig, i, dot) {
err = AppendError(err, &Error{
Node: arg,
Message: "argument not assignable to parameter",
})
}
}
fmt.Fprint(ctx.out, ")")
return sig.Results().At(0).Type(), err
}
type lenFn struct{}
func (*lenFn) Compile(ctx *Context, node *parse.CommandNode) (types.Type, error) {
if len(node.Args) != 2 {
return nil, &Error{
Node: node,
Message: fmt.Sprintf("expected 1 argument, found %d", len(node.Args)-1),
}
}
fmt.Fprint(ctx.out, "len(")
_, err := ctx.CompileCommandNodeArg(node.Args[1], true)
fmt.Fprint(ctx.out, ")")
return types.Typ[types.Int], err
}
type indexFn struct{}
func (*indexFn) Compile(ctx *Context, node *parse.CommandNode) (types.Type, error) {
if len(node.Args) < 3 {
return nil, &Error{
Node: node,
Message: fmt.Sprintf("expected at least 2 arguments, found %d", len(node.Args)-1),
}
}
dot, err := ctx.CompileCommandNodeArg(node.Args[1], true)
if err != nil {
return nil, err
}
for _, index := range node.Args[2:] {
if st, ok := dot.(*types.Slice); ok {
fmt.Fprint(ctx.out, "[")
_, err := ctx.CompileCommandNodeArg(index, true)
if err != nil {
return nil, err
}
fmt.Fprint(ctx.out, "]")
dot = st.Elem()
} else if mt, ok := dot.(*types.Map); ok {
fmt.Fprint(ctx.out, "[")
_, err := ctx.CompileCommandNodeArg(index, true)
if err != nil {
return nil, err
}
fmt.Fprint(ctx.out, "]")
dot = mt.Elem()
} else {
return nil, &Error{
Node: index,
Message: "expected a slice or a map in call to index",
}
}
}
return dot, nil
}
type sliceFn struct{}
func (*sliceFn) Compile(ctx *Context, node *parse.CommandNode) (types.Type, error) {
if count := len(node.Args); count < 2 || count > 5 {
return nil, &Error{
Node: node,
Message: fmt.Sprintf("expected between 1 and 4 arguments, found %d", len(node.Args)-1),
}
}
dot, err := ctx.CompileCommandNodeArg(node.Args[1], true)
if err != nil {
return nil, err
}
if at, ok := dot.Underlying().(*types.Array); ok {
dot = types.NewSlice(at.Elem())
} else if bt, ok := dot.Underlying().(*types.Basic); ok && bt.Kind() == types.String {
// Slice of a string is another string.
dot = dot //nolint
} else if _, ok := dot.(*types.Slice); !ok {
return nil, &Error{
Node: node.Args[1],
Message: "expected a slice or a map",
}
}
switch len(node.Args) {
case 2:
fmt.Fprint(ctx.out, "[:]")
case 3:
fmt.Fprint(ctx.out, "[")
_, err := ctx.CompileCommandNodeArg(node.Args[2], true)
if err != nil {
return dot, err
}
fmt.Fprint(ctx.out, ":]")
case 4:
fmt.Fprint(ctx.out, "[")
_, err := ctx.CompileCommandNodeArg(node.Args[2], true)
if err != nil {
return dot, err
}
fmt.Fprint(ctx.out, ":")
_, err = ctx.CompileCommandNodeArg(node.Args[3], true)
if err != nil {
return dot, err
}
fmt.Fprint(ctx.out, "]")
case 5:
fmt.Fprint(ctx.out, "[")
_, err := ctx.CompileCommandNodeArg(node.Args[2], true)
if err != nil {
return dot, err
}
fmt.Fprint(ctx.out, ":")
_, err = ctx.CompileCommandNodeArg(node.Args[3], true)
if err != nil {
return dot, err
}
fmt.Fprint(ctx.out, ":")
_, err = ctx.CompileCommandNodeArg(node.Args[4], true)
if err != nil {
return dot, err
}
fmt.Fprint(ctx.out, "]")
}
return dot, nil
}
type printFn uint
func (f printFn) Compile(ctx *Context, node *parse.CommandNode) (types.Type, error) {
fmt.Fprint(ctx.out, "fmt.", f.String(), "(")
_, err := ctx.CompileCommandNodeArg(node.Args[1], true)
for _, v := range node.Args[2:] {
fmt.Fprint(ctx.out, ", ")
_, err2 := ctx.CompileCommandNodeArg(v, true)
err = AppendError(err, err2)
}
fmt.Fprint(ctx.out, ")")
return types.Typ[types.String], err
}
func (f printFn) String() string {
if f == 0 {
return "Sprint"
}
if f <= 0xff {
return "Sprint" + string(rune(f))
}
b := [2]byte{uint8(f >> 8), uint8(f & 0xff)}
return "Sprint" + string(b[:])
}
package compile
import (
"fmt"
"go/types"
"io"
)
func indexInto(out io.Writer, pkg *types.Package, dot types.Type, ident string) types.Type {
// If the current type is a map, treat the identifier as a key.
if mt, ok := dot.(*types.Map); ok {
if !types.AssignableTo(types.Typ[types.String], mt.Key()) {
return nil
}
fmt.Fprintf(out, "[%q]", ident)
return mt.Elem()
}
index, _, _ := types.LookupFieldOrMethod(dot, true, pkg, ident)
if index == nil {
return nil
}
fmt.Fprint(out, ".", ident)
return index.Type()
}
package compile
import (
"fmt"
"go/types"
"git.sr.ht/~rj/gemplate/parse"
)
func MakePackageFuncs(pkg *types.Package) map[string]Func {
if pkg == nil {
return map[string]Func{}
}
funcs := map[string]Func{}
scope := pkg.Scope()
for _, v := range scope.Names() {
obj := scope.Lookup(v)
if fn, ok := obj.(*types.Func); ok {
makePackageFunc(funcs, v, fn)
}
}
return funcs
}
func makePackageFunc(funcs map[string]Func, name string, fn *types.Func) {
sig := fn.Type().(*types.Signature)
if results := sig.Results(); results == nil || results.Len() != 1 {
return
}
if sig.Recv() == nil {
funcs[name] = externFunction{sig}
}
}
type externFunction struct {
*types.Signature
}
func (sig externFunction) Compile(ctx *Context, node *parse.CommandNode) (types.Type, error) {
if sig.Variadic() {
if len(node.Args)-1 < sig.Params().Len() {
return nil, &Error{
Node: node,
Message: fmt.Sprintf("expected %d or more arguments, found %d", sig.Params().Len()-1, len(node.Args)-1),
}
}
} else {
if len(node.Args)-1 != sig.Params().Len() {
return nil, &Error{
Node: node,
Message: fmt.Sprintf("expected %d arguments, found %d", sig.Params().Len(), len(node.Args)-1),
}
}
}
if sig.Params().Len() == 0 {
fmt.Fprint(ctx.out, node.Args[0], "()")
return sig.Results().At(0).Type(), nil
}
fmt.Fprint(ctx.out, node.Args[0], "(")
_, err := ctx.CompileCommandNodeArg(node.Args[1], true)
if err != nil {
return nil, err
}
for _, v := range node.Args[2:] {
fmt.Fprint(ctx.out, ", ")
_, err := ctx.CompileCommandNodeArg(v, true)
if err != nil {
return nil, err
}
}
fmt.Fprint(ctx.out, ")")
return sig.Results().At(0).Type(), nil
}
package compile
import (
"fmt"
"go/ast"
"go/parser"
"go/token"
"go/types"
"io"
"strings"
)
// CompileTypeExpr parses a Go expression, and returns the corresponding
// types.Type.
//
// If ident is an empty string, it returns the "any" type.
func CompileTypeExpr(pkg *types.Package, ident string) (types.Type, error) {
if ident == "" {
ident = "any"
}
node, err := parser.ParseExpr(ident)
if err != nil {
return nil, fmt.Errorf("failed to parse type expression '%s': %w", strings.TrimSpace(ident), err)
}
return compileTypeAST(pkg, node)
}
func compileTypeAST(pkg *types.Package, node ast.Node) (types.Type, error) {
if ident, ok := node.(*ast.Ident); ok {
_, obj := pkg.Scope().LookupParent(ident.String(), token.NoPos)
if obj == nil {
return nil, fmt.Errorf("failed to lookup type '%s'", ident)
}
return obj.Type(), nil
}
if st, ok := node.(*ast.StarExpr); ok {
elem, err := compileTypeAST(pkg, st.X)
if err != nil {
return nil, err
}
return types.NewPointer(elem), nil
}
if at, ok := node.(*ast.ArrayType); ok {
elem, err := compileTypeAST(pkg, at.Elt)
if err != nil {
return nil, err
}
return types.NewSlice(elem), nil
}
if mt, ok := node.(*ast.MapType); ok {
key, err := compileTypeAST(pkg, mt.Key)
value, err2 := compileTypeAST(pkg, mt.Value)
if err = AppendError(err, err2); err != nil {
return nil, err
}
return types.NewMap(key, value), nil
}
if st, ok := node.(*ast.StructType); ok {
if st.Fields.NumFields() != 0 {
return nil, fmt.Errorf("inline definition of struct types not support in template definition")
}
return types.NewStruct(nil, nil), nil
}
if it, ok := node.(*ast.InterfaceType); ok {
methods := []*types.Func(nil)
embeddeds := []types.Type(nil)
for _, m := range it.Methods.List {
ft, err := compileTypeAST(pkg, m.Type)
if err != nil {
return nil, err
}
if sig, ok := ft.(*types.Signature); ok {
methods = append(methods,
types.NewFunc(token.NoPos, pkg, m.Names[0].String(), sig))
} else {
panic(nil)
}
}
return types.NewInterfaceType(methods, embeddeds), nil
}
if ft, ok := node.(*ast.FuncType); ok {
params := []*types.Var(nil)
results := []*types.Var(nil)
variadic := false
if ft.Params != nil {
for _, v := range ft.Params.List {
paramType := v.Type
if et, ok := paramType.(*ast.Ellipsis); ok {
variadic = true
paramType = et.Elt
}
pt, err := compileTypeAST(pkg, paramType)
if err != nil {
return nil, err
}
if variadic {
pt = types.NewSlice(pt)
}
params = append(params, types.NewParam(token.NoPos, pkg, "", pt))
}
}
if ft.Results != nil {
for _, v := range ft.Results.List {
pt, err := compileTypeAST(pkg, v.Type)
if err != nil {
return nil, err
}
results = append(results, types.NewVar(token.NoPos, pkg, "", pt))
}
}
return types.NewSignatureType(nil, nil, nil,
types.NewTuple(params...), types.NewTuple(results...), variadic),
nil
}
if se, ok := node.(*ast.SelectorExpr); ok {
pkgname := se.X.(*ast.Ident).String()
selname := se.Sel.String()
ipkg := func() *types.Package {
for _, v := range pkg.Imports() {
if v.Name() == pkgname {
return v
}
}
return nil
}()
if ipkg == nil {
return nil, fmt.Errorf("failed to lookup package'%s'", pkgname)
}
_, obj := ipkg.Scope().LookupParent(selname, token.NoPos)
if obj == nil {
return nil, fmt.Errorf("failed to lookup type '%s.%s'", pkgname, selname)
}
return obj.Type(), nil
}
return nil, fmt.Errorf("failed to compile type expression")
}
func assignableToParam(sig *types.Signature, index int, dot types.Type) bool {
params := sig.Params()
paramsLen := params.Len()
if sig.Variadic() && index >= paramsLen-1 {
argType := params.At(paramsLen - 1).Type().(*types.Slice).Elem()
return types.AssignableTo(dot, argType)
}
return types.AssignableTo(dot, params.At(index).Type())
}
func implicitCall(out io.Writer, dot types.Type) types.Type {
// Implicitly call functions except for last identifier
if sig, ok := dot.(*types.Signature); ok {
// Assume that the function is nilary, and has a single return value.
fmt.Fprint(out, "()")
dot = sig.Results().At(0).Type()
}
return dot
}
package content
import (
"strings"
)
// attrTypeMap[n] describes the value of the given attribute.
// If an attribute affects (or can mask) the encoding or interpretation of
// other content, or affects the contents, idempotency, or credentials of a
// network message, then the value in this map is TypeUnsafe.
// This map is derived from HTML5, specifically
// https://www.w3.org/TR/html5/Overview.html#attributes-1
// as well as "%URI"-typed attributes from
// https://www.w3.org/TR/html4/index/attributes.html
var attrTypeMap = map[string]Type{
"accept": TypePlain,
"accept-charset": TypeUnsafe,
"action": TypeURL,
"alt": TypePlain,
"archive": TypeURL,
"async": TypeUnsafe,
"autocomplete": TypePlain,
"autofocus": TypePlain,
"autoplay": TypePlain,
"background": TypeURL,
"border": TypePlain,
"checked": TypePlain,
"cite": TypeURL,
"challenge": TypeUnsafe,
"charset": TypeUnsafe,
"class": TypePlain,
"classid": TypeURL,
"codebase": TypeURL,
"cols": TypePlain,
"colspan": TypePlain,
"content": TypeUnsafe,
"contenteditable": TypePlain,
"contextmenu": TypePlain,
"controls": TypePlain,
"coords": TypePlain,
"crossorigin": TypeUnsafe,
"data": TypeURL,
"datetime": TypePlain,
"default": TypePlain,
"defer": TypeUnsafe,
"dir": TypePlain,
"dirname": TypePlain,
"disabled": TypePlain,
"draggable": TypePlain,
"dropzone": TypePlain,
"enctype": TypeUnsafe,
"for": TypePlain,
"form": TypeUnsafe,
"formaction": TypeURL,
"formenctype": TypeUnsafe,
"formmethod": TypeUnsafe,
"formnovalidate": TypeUnsafe,
"formtarget": TypePlain,
"headers": TypePlain,
"height": TypePlain,
"hidden": TypePlain,
"high": TypePlain,
"href": TypeURL,
"hreflang": TypePlain,
"http-equiv": TypeUnsafe,
"icon": TypeURL,
"id": TypePlain,
"ismap": TypePlain,
"keytype": TypeUnsafe,
"kind": TypePlain,
"label": TypePlain,
"lang": TypePlain,
"language": TypeUnsafe,
"list": TypePlain,
"longdesc": TypeURL,
"loop": TypePlain,
"low": TypePlain,
"manifest": TypeURL,
"max": TypePlain,
"maxlength": TypePlain,
"media": TypePlain,
"mediagroup": TypePlain,
"method": TypeUnsafe,
"min": TypePlain,
"multiple": TypePlain,
"name": TypePlain,
"novalidate": TypeUnsafe,
// Skip handler names from
// https://www.w3.org/TR/html5/webappapis.html#event-handlers-on-elements,-document-objects,-and-window-objects
// since we have special handling in attrType.
"open": TypePlain,
"optimum": TypePlain,
"pattern": TypeUnsafe,
"placeholder": TypePlain,
"poster": TypeURL,
"profile": TypeURL,
"preload": TypePlain,
"pubdate": TypePlain,
"radiogroup": TypePlain,
"readonly": TypePlain,
"rel": TypeUnsafe,
"required": TypePlain,
"reversed": TypePlain,
"rows": TypePlain,
"rowspan": TypePlain,
"sandbox": TypeUnsafe,
"spellcheck": TypePlain,
"scope": TypePlain,
"scoped": TypePlain,
"seamless": TypePlain,
"selected": TypePlain,
"shape": TypePlain,
"size": TypePlain,
"sizes": TypePlain,
"span": TypePlain,
"src": TypeURL,
"srcdoc": TypeHTML,
"srclang": TypePlain,
"srcset": TypeSrcset,
"start": TypePlain,
"step": TypePlain,
"style": TypeCSS,
"tabindex": TypePlain,
"target": TypePlain,
"title": TypePlain,
"type": TypeUnsafe,
"usemap": TypeURL,
"value": TypeUnsafe,
"width": TypePlain,
"wrap": TypePlain,
"xmlns": TypeURL,
}
// attrType returns a conservative (upper-bound on authority) guess at the
// type of the lowercase named attribute.
func AttrType(name string) Type {
if strings.HasPrefix(name, "data-") {
// Strip data- so that custom attribute heuristics below are
// widely applied.
// Treat data-action as URL below.
name = name[5:]
} else if prefix, short, ok := strings.Cut(name, ":"); ok {
if prefix == "xmlns" {
return TypeURL
}
// Treat svg:href and xlink:href as href below.
name = short
}
if t, ok := attrTypeMap[name]; ok {
return t
}
// Treat partial event handler names as script.
if strings.HasPrefix(name, "on") {
return TypeJS
}
// Heuristics to prevent "javascript:..." injection in custom
// data attributes and custom attributes like g:tweetUrl.
// https://www.w3.org/TR/html5/dom.html#embedding-custom-non-visible-data-with-the-data-*-attributes
// "Custom data attributes are intended to store custom data
// private to the page or application, for which there are no
// more appropriate attributes or elements."
// Developers seem to store URL content in data URLs that start
// or end with "URI" or "URL".
if strings.Contains(name, "src") ||
strings.Contains(name, "uri") ||
strings.Contains(name, "url") {
return TypeURL
}
return TypePlain
}
package css
import (
"bytes"
"unicode"
"unicode/utf8"
)
// DecodeCSS decodes CSS3 escapes given a sequence of stringchars.
// If there is no change, it returns the input, otherwise it returns a slice
// backed by a new array.
// https://www.w3.org/TR/css3-syntax/#SUBTOK-stringchar defines stringchar.
func DecodeCSS(s []byte) []byte {
i := bytes.IndexByte(s, '\\')
if i == -1 {
return s
}
// The UTF-8 sequence for a codepoint is never longer than 1 + the
// number hex digits need to represent that codepoint, so len(s) is an
// upper bound on the output length.
b := make([]byte, 0, len(s))
for len(s) != 0 {
i := bytes.IndexByte(s, '\\')
if i == -1 {
i = len(s)
}
b, s = append(b, s[:i]...), s[i:]
if len(s) < 2 {
break
}
// https://www.w3.org/TR/css3-syntax/#SUBTOK-escape
// escape ::= unicode | '\' [#x20-#x7E#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
if IsHex(s[1]) {
// https://www.w3.org/TR/css3-syntax/#SUBTOK-unicode
// unicode ::= '\' [0-9a-fA-F]{1,6} wc?
j := 2
for j < len(s) && j < 7 && IsHex(s[j]) {
j++
}
r := HexDecode(s[1:j])
if r > unicode.MaxRune {
r, j = r/16, j-1
}
n := utf8.EncodeRune(b[len(b):cap(b)], r)
// The optional space at the end allows a hex
// sequence to be followed by a literal hex.
// string(decodeCSS([]byte(`\A B`))) == "\nB"
b, s = b[:len(b)+n], SkipSpace(s[j:])
} else {
// `\\` decodes to `\` and `\"` to `"`.
_, n := utf8.DecodeRune(s[1:])
b, s = append(b, s[1:1+n]...), s[1+n:]
}
}
return b
}
package css
import "fmt"
// IsHex reports whether the given character is a hex digit.
func IsHex(c byte) bool {
return '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F'
}
// HexDecode decodes a short hex digit sequence: "10" -> 16.
func HexDecode(s []byte) rune {
n := '\x00'
for _, c := range s {
n <<= 4
switch {
case '0' <= c && c <= '9':
n |= rune(c - '0')
case 'a' <= c && c <= 'f':
n |= rune(c-'a') + 10
case 'A' <= c && c <= 'F':
n |= rune(c-'A') + 10
default:
panic(fmt.Sprintf("Bad hex digit in %q", s))
}
}
return n
}
package css
// SkipSpace returns a suffix of c, skipping over a single space.
func SkipSpace(c []byte) []byte {
if len(c) == 0 {
return c
}
// wc ::= #x9 | #xA | #xC | #xD | #x20
switch c[0] {
case '\t', '\n', '\f', ' ':
return c[1:]
case '\r':
// This differs from CSS3's wc production because it contains a
// probable spec error whereby wc contains all the single byte
// sequences in nl (newline) but not CRLF.
if len(c) >= 2 && c[1] == '\n' {
return c[2:]
}
return c[1:]
}
return c
}
// IsSpace reports whether b is a CSS space char as defined in wc.
func IsSpace(b byte) bool {
switch b {
case '\t', '\n', '\f', '\r', ' ':
return true
}
return false
}
// Code generated by "stringer -type attr"; DO NOT EDIT.
package htmlescape
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[attrNone-0]
_ = x[attrScript-1]
_ = x[attrScriptType-2]
_ = x[attrStyle-3]
_ = x[attrURL-4]
_ = x[attrSrcset-5]
}
const _attr_name = "attrNoneattrScriptattrScriptTypeattrStyleattrURLattrSrcset"
var _attr_index = [...]uint8{0, 8, 18, 32, 41, 48, 58}
func (i attr) String() string {
if i >= attr(len(_attr_index)-1) {
return "attr(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _attr_name[_attr_index[i]:_attr_index[i+1]]
}
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package htmlescape
import (
"fmt"
"git.sr.ht/~rj/gemplate/parse"
)
// context describes the state an HTML parser must be in when it reaches the
// portion of HTML produced by evaluating a particular template node.
//
// The zero value of type context is the start context for a template that
// produces an HTML fragment as defined at
// https://www.w3.org/TR/html5/syntax.html#the-end
// where the context element is null.
type context struct {
state state
delim delim
urlPart urlPart
jsCtx jsCtx
// jsBraceDepth contains the current depth, for each JS template literal
// string interpolation expression, of braces we've seen. This is used to
// determine if the next } will close a JS template literal string
// interpolation expression or not.
jsBraceDepth []int
attr attr
element element
n parse.Node // for range break/continue
err *Error
}
func (c context) String() string {
var err error
if c.err != nil {
err = c.err
}
return fmt.Sprintf("{%v %v %v %v %v %v %v}", c.state, c.delim, c.urlPart, c.jsCtx, c.attr, c.element, err)
}
// eq reports whether two contexts are equal.
func (c context) eq(d context) bool {
return c.state == d.state &&
c.delim == d.delim &&
c.urlPart == d.urlPart &&
c.jsCtx == d.jsCtx &&
c.attr == d.attr &&
c.element == d.element &&
c.err == d.err
}
// mangle produces an identifier that includes a suffix that distinguishes it
// from template names mangled with different contexts.
func (c context) mangle(templateName string) string {
s := templateName + "_" + c.state.String()
if c.delim != delimNone {
s += "_" + c.delim.String()
}
if c.urlPart != urlPartNone {
s += "_" + c.urlPart.String()
}
if c.jsCtx != jsCtxRegexp {
s += "_" + c.jsCtx.String()
}
if c.attr != attrNone {
s += "_" + c.attr.String()
}
if c.element != elementNone {
s += "_" + c.element.String()
}
return s
}
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package htmlescape
import (
"bytes"
"unicode/utf8"
)
// endsWithCSSKeyword reports whether b ends with an ident that
// case-insensitively matches the lower-case kw.
func endsWithCSSKeyword(b []byte, kw string) bool {
i := len(b) - len(kw)
if i < 0 {
// Too short.
return false
}
if i != 0 {
r, _ := utf8.DecodeLastRune(b[:i])
if isCSSNmchar(r) {
// Too long.
return false
}
}
// Many CSS keywords, such as "!important" can have characters encoded,
// but the URI production does not allow that according to
// https://www.w3.org/TR/css3-syntax/#TOK-URI
// This does not attempt to recognize encoded keywords. For example,
// given "\75\72\6c" and "url" this return false.
return string(bytes.ToLower(b[i:])) == kw
}
// isCSSNmchar reports whether rune is allowed anywhere in a CSS identifier.
func isCSSNmchar(r rune) bool {
// Based on the CSS3 nmchar production but ignores multi-rune escape
// sequences.
// https://www.w3.org/TR/css3-syntax/#SUBTOK-nmchar
return 'a' <= r && r <= 'z' ||
'A' <= r && r <= 'Z' ||
'0' <= r && r <= '9' ||
r == '-' ||
r == '_' ||
// Non-ASCII cases below.
0x80 <= r && r <= 0xd7ff ||
0xe000 <= r && r <= 0xfffd ||
0x10000 <= r && r <= 0x10ffff
}
// Code generated by "stringer -type delim"; DO NOT EDIT.
package htmlescape
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[delimNone-0]
_ = x[delimDoubleQuote-1]
_ = x[delimSingleQuote-2]
_ = x[delimSpaceOrTagEnd-3]
}
const _delim_name = "delimNonedelimDoubleQuotedelimSingleQuotedelimSpaceOrTagEnd"
var _delim_index = [...]uint8{0, 9, 25, 41, 59}
func (i delim) String() string {
if i >= delim(len(_delim_index)-1) {
return "delim(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _delim_name[_delim_index[i]:_delim_index[i+1]]
}
package htmlescape
import "regexp"
// delimEnds maps each delim to a string of characters that terminate it.
var delimEnds = [...]string{
delimDoubleQuote: `"`,
delimSingleQuote: "'",
// Determined empirically by running the below in various browsers.
// var div = document.createElement("DIV");
// for (var i = 0; i < 0x10000; ++i) {
// div.innerHTML = "<span title=x" + String.fromCharCode(i) + "-bar>";
// if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0)
// document.write("<p>U+" + i.toString(16));
// }
delimSpaceOrTagEnd: " \t\n\f\r>",
}
var (
// Per WHATWG HTML specification, section 4.12.1.3, there are extremely
// complicated rules for how to handle the set of opening tags <!--,
// <script, and </script when they appear in JS literals (i.e. strings,
// regexs, and comments). The specification suggests a simple solution,
// rather than implementing the arcane ABNF, which involves simply escaping
// the opening bracket with \x3C. We use the below regex for this, since it
// makes doing the case-insensitive find-replace much simpler.
specialScriptTagRE = regexp.MustCompile("(?i)<(script|/script|!--)")
specialScriptTagReplacement = []byte("\\x3C$1")
)
func containsSpecialScriptTag(s []byte) bool {
return specialScriptTagRE.Match(s)
}
func escapeSpecialScriptTags(s []byte) []byte {
return specialScriptTagRE.ReplaceAll(s, specialScriptTagReplacement)
}
// Code generated by "stringer -type element"; DO NOT EDIT.
package htmlescape
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[elementNone-0]
_ = x[elementScript-1]
_ = x[elementStyle-2]
_ = x[elementTextarea-3]
_ = x[elementTitle-4]
}
const _element_name = "elementNoneelementScriptelementStyleelementTextareaelementTitle"
var _element_index = [...]uint8{0, 11, 24, 36, 51, 63}
func (i element) String() string {
if i >= element(len(_element_index)-1) {
return "element(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _element_name[_element_index[i]:_element_index[i+1]]
}
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package htmlescape
import (
"fmt"
"git.sr.ht/~rj/gemplate/parse"
)
// Error describes a problem encountered during template Escaping.
type Error struct {
// ErrorCode describes the kind of error.
ErrorCode ErrorCode
// Node is the node that caused the problem, if known.
// If not nil, it overrides Name and Line.
Node parse.Node
// Name is the name of the template in which the error was encountered.
Name string
// Line is the line number of the error in the template source or 0.
Line int
// Description is a human-readable description of the problem.
Description string
}
// ErrorCode is a code for a kind of error.
type ErrorCode int
// We define codes for each error that manifests while escaping templates, but
// escaped templates may also fail at runtime.
//
// Output: "ZgotmplZ"
// Example:
//
// <img src="{{.X}}">
// where {{.X}} evaluates to `javascript:...`
//
// Discussion:
//
// "ZgotmplZ" is a special value that indicates that unsafe content reached a
// CSS or URL context at runtime. The output of the example will be
// <img src="#ZgotmplZ">
// If the data comes from a trusted source, use content types to exempt it
// from filtering: URL(`javascript:...`).
const (
// OK indicates the lack of an error.
OK ErrorCode = iota
// ErrAmbigContext: "... appears in an ambiguous context within a URL"
// Example:
// <a href="
// {{if .C}}
// /path/
// {{else}}
// /search?q=
// {{end}}
// {{.X}}
// ">
// Discussion:
// {{.X}} is in an ambiguous URL context since, depending on {{.C}},
// it may be either a URL suffix or a query parameter.
// Moving {{.X}} into the condition removes the ambiguity:
// <a href="{{if .C}}/path/{{.X}}{{else}}/search?q={{.X}}">
ErrAmbigContext
// ErrBadHTML: "expected space, attr name, or end of tag, but got ...",
// "... in unquoted attr", "... in attribute name"
// Example:
// <a href = /search?q=foo>
// <href=foo>
// <form na<e=...>
// <option selected<
// Discussion:
// This is often due to a typo in an HTML element, but some runes
// are banned in tag names, attribute names, and unquoted attribute
// values because they can tickle parser ambiguities.
// Quoting all attributes is the best policy.
ErrBadHTML
// ErrBranchEnd: "{{if}} branches end in different contexts"
// Examples:
// {{if .C}}<a href="{{end}}{{.X}}
// <script {{with .T}}type="{{.}}"{{end}}>
// Discussion:
// Package html/template statically examines each path through an
// {{if}}, {{range}}, or {{with}} to escape any following pipelines.
// The first example is ambiguous since {{.X}} might be an HTML text node,
// or a URL prefix in an HTML attribute. The context of {{.X}} is
// used to figure out how to escape it, but that context depends on
// the run-time value of {{.C}} which is not statically known.
// The second example is ambiguous as the script type attribute
// can change the type of escaping needed for the script contents.
//
// The problem is usually something like missing quotes or angle
// brackets, or can be avoided by refactoring to put the two contexts
// into different branches of an if, range or with. If the problem
// is in a {{range}} over a collection that should never be empty,
// adding a dummy {{else}} can help.
ErrBranchEnd
// ErrEndContext: "... ends in a non-text context: ..."
// Examples:
// <div
// <div title="no close quote>
// <script>f()
// Discussion:
// Executed templates should produce a DocumentFragment of HTML.
// Templates that end without closing tags will trigger this error.
// Templates that should not be used in an HTML context or that
// produce incomplete Fragments should not be executed directly.
//
// {{define "main"}} <script>{{template "helper"}}</script> {{end}}
// {{define "helper"}} document.write(' <div title=" ') {{end}}
//
// "helper" does not produce a valid document fragment, so should
// not be Executed directly.
ErrEndContext
// ErrNoSuchTemplate: "no such template ..."
// Examples:
// {{define "main"}}<div {{template "attrs"}}>{{end}}
// {{define "attrs"}}href="{{.URL}}"{{end}}
// Discussion:
// Package html/template looks through template calls to compute the
// context.
// Here the {{.URL}} in "attrs" must be treated as a URL when called
// from "main", but you will get this error if "attrs" is not defined
// when "main" is parsed.
ErrNoSuchTemplate
// ErrOutputContext: "cannot compute output context for template ..."
// Examples:
// {{define "t"}}{{if .T}}{{template "t" .T}}{{end}}{{.H}}",{{end}}
// Discussion:
// A recursive template does not end in the same context in which it
// starts, and a reliable output context cannot be computed.
// Look for typos in the named template.
// If the template should not be called in the named start context,
// look for calls to that template in unexpected contexts.
// Maybe refactor recursive templates to not be recursive.
ErrOutputContext
// ErrPartialCharset: "unfinished JS regexp charset in ..."
// Example:
// <script>var pattern = /foo[{{.Chars}}]/</script>
// Discussion:
// Package html/template does not support interpolation into regular
// expression literal character sets.
ErrPartialCharset
// ErrPartialEscape: "unfinished escape sequence in ..."
// Example:
// <script>alert("\{{.X}}")</script>
// Discussion:
// Package html/template does not support actions following a
// backslash.
// This is usually an error and there are better solutions; for
// example
// <script>alert("{{.X}}")</script>
// should work, and if {{.X}} is a partial escape sequence such as
// "xA0", mark the whole sequence as safe content: JSStr(`\xA0`)
ErrPartialEscape
// ErrRangeLoopReentry: "on range loop re-entry: ..."
// Example:
// <script>var x = [{{range .}}'{{.}},{{end}}]</script>
// Discussion:
// If an iteration through a range would cause it to end in a
// different context than an earlier pass, there is no single context.
// In the example, there is missing a quote, so it is not clear
// whether {{.}} is meant to be inside a JS string or in a JS value
// context. The second iteration would produce something like
//
// <script>var x = ['firstValue,'secondValue]</script>
ErrRangeLoopReentry
// ErrSlashAmbig: '/' could start a division or regexp.
// Example:
// <script>
// {{if .C}}var x = 1{{end}}
// /-{{.N}}/i.test(x) ? doThis : doThat();
// </script>
// Discussion:
// The example above could produce `var x = 1/-2/i.test(s)...`
// in which the first '/' is a mathematical division operator or it
// could produce `/-2/i.test(s)` in which the first '/' starts a
// regexp literal.
// Look for missing semicolons inside branches, and maybe add
// parentheses to make it clear which interpretation you intend.
ErrSlashAmbig
// ErrPredefinedEscaper: "predefined escaper ... disallowed in template"
// Example:
// <div class={{. | html}}>Hello<div>
// Discussion:
// Package html/template already contextually escapes all pipelines to
// produce HTML output safe against code injection. Manually escaping
// pipeline output using the predefined escapers "html" or "urlquery" is
// unnecessary, and may affect the correctness or safety of the escaped
// pipeline output in Go 1.8 and earlier.
//
// In most cases, such as the given example, this error can be resolved by
// simply removing the predefined escaper from the pipeline and letting the
// contextual autoescaper handle the escaping of the pipeline. In other
// instances, where the predefined escaper occurs in the middle of a
// pipeline where subsequent commands expect escaped input, e.g.
// {{.X | html | makeALink}}
// where makeALink does
// return `<a href="`+input+`">link</a>`
// consider refactoring the surrounding template to make use of the
// contextual autoescaper, i.e.
// <a href="{{.X}}">link</a>
//
// To ease migration to Go 1.9 and beyond, "html" and "urlquery" will
// continue to be allowed as the last command in a pipeline. However, if the
// pipeline occurs in an unquoted attribute value context, "html" is
// disallowed. Avoid using "html" and "urlquery" entirely in new templates.
ErrPredefinedEscaper
// ErrJSTemplate: "... appears in a JS template literal"
// Example:
// <script>var tmpl = `{{.Interp}}`</script>
// Discussion:
// Package html/template does not support actions inside of JS template
// literals.
//
// Deprecated: ErrJSTemplate is no longer returned when an action is present
// in a JS template literal. Actions inside of JS template literals are now
// escaped as expected.
ErrJSTemplate
)
func (e *Error) Error() string {
switch {
case e.Node != nil:
loc, _ := (*parse.Tree)(nil).ErrorContext(e.Node)
return fmt.Sprintf("html/template:%s: %s", loc, e.Description)
case e.Line != 0:
return fmt.Sprintf("html/template:%s:%d: %s", e.Name, e.Line, e.Description)
case e.Name != "":
return fmt.Sprintf("html/template:%s: %s", e.Name, e.Description)
}
return "html/template: " + e.Description
}
// errorf creates an error given a format string f and args.
// The template Name still needs to be supplied.
func errorf(k ErrorCode, node parse.Node, line int, f string, args ...any) *Error {
return &Error{k, node, "", line, fmt.Sprintf(f, args...)}
}
package htmlescape
import (
"bytes"
"fmt"
"html"
"git.sr.ht/~rj/gemplate/parse"
)
// escaper collects type inferences about templates and changes needed to make
// templates injection safe.
type escaper struct {
// ns is the nameSpace that this escaper is associated with.
ns map[string]*parse.Tree
// rangeContext holds context about the current range loop.
rangeContext *rangeContext
}
// makeEscaper creates a blank escaper for the given set.
func makeEscaper(ns map[string]*parse.Tree) escaper {
return escaper{
ns: ns,
rangeContext: nil,
}
}
func Escape(tree map[string]*parse.Tree, t *parse.Tree) (*parse.Tree, error) {
e0 := makeEscaper(tree)
c0 := context{}
c1, t1 := e0.escapeTree(c0, t)
if c1.state == stateError {
return nil, c1.err
}
tree[t1.Name] = t1
return t1, nil
}
func (e *escaper) escapeTree(ctx context, t *parse.Tree) (context, *parse.Tree) {
// Create a mangled name for the template.
dname := ctx.mangle(t.Name)
if t1, ok := e.ns[dname]; ok {
// TODO: Provide the correct output context.
return context{}, t1
}
// Create a copy of the template.
t = t.Copy()
t.Name = dname
// Propagate context over the body.
c1, ok := e.escapeTemplateBody(ctx, t)
if !ok {
// Look for a fixed point by assuming c1 as the output context.
if c2, ok2 := e.escapeTemplateBody(c1, t); ok2 {
c1, ok = c2, true
}
// Use c1 as the error context if neither assumption worked.
}
if !ok && c1.state != stateError {
return context{
state: stateError,
err: errorf(ErrOutputContext, t.Root, 0, "cannot compute output context for template %s", t.Name),
}, nil
}
return c1, t
}
func (e *escaper) escapeTemplateBody(ctx context, t *parse.Tree) (context, bool) {
/*filter := func( /*e1 *escaper, c1 context) bool {
if c1.state == stateError {
// Do not update the input escaper, e.
return false
}
//if !e1.called[t.Name()] {
// // If t is not recursively called, then c1 is an
// // accurate output context.
// return true
//}
// c1 is accurate if it matches our assumed output context.
return ctx.eq(c1)
}*/
//return escapeListConditionally(ctx, tree, t.Root, filter)
return e.escapeList(ctx, t.Root), true
}
func (e *escaper) escapeListConditionally(ctx context, list *parse.ListNode, filter func(*escaper, context) bool) (context, bool) {
e1 := makeEscaper(e.ns)
e1.rangeContext = e.rangeContext
c := e.escapeList(ctx, list)
ok := filter != nil && filter(&e1, c)
if ok {
panic(nil)
// Copy inferences and edits from e1 back into e.
// maps.Copy(e.output, e1.output)
// maps.Copy(e.derived, e1.derived)
// maps.Copy(e.called, e1.called)
// for k, v := range e1.actionNodeEdits {
// e.editActionNode(k, v)
// }
// for k, v := range e1.templateNodeEdits {
// e.editTemplateNode(k, v)
// }
// for k, v := range e1.textNodeEdits {
// e.editTextNode(k, v)
// }
}
return c, ok
}
// escape escapes a template node.
func (e *escaper) escape(c context, n parse.Node) context {
switch n := n.(type) {
case *parse.ActionNode:
return e.escapeAction(c, n)
case *parse.BreakNode:
c.n = n
//e.rangeContext.breaks = append(e.rangeContext.breaks, c)
return context{state: stateDead}
case *parse.CommentNode:
return c
case *parse.ContinueNode:
c.n = n
//e.rangeContext.continues = append(e.rangeContext.continues, c)
return context{state: stateDead}
case *parse.IfNode:
return e.escapeBranch(c, &n.BranchNode, "if")
case *parse.ListNode:
return e.escapeList(c, n)
case *parse.RangeNode:
return e.escapeBranch(c, &n.BranchNode, "range")
case *parse.TemplateNode:
return e.escapeTemplate(c, n)
case *parse.TextNode:
return e.escapeText(c, n)
case *parse.WithNode:
return e.escapeBranch(c, &n.BranchNode, "with")
}
panic("escaping " + n.String() + " is unimplemented")
}
func (e *escaper) escapeAction(c context, n *parse.ActionNode) context {
if len(n.Pipe.Decl) != 0 {
// A local variable assignment, not an interpolation.
return c
}
c = nudge(c)
// Check for disallowed use of predefined escapers in the pipeline.
for pos, idNode := range n.Pipe.Cmds {
node, ok := idNode.Args[0].(*parse.IdentifierNode)
if !ok {
// A predefined escaper "esc" will never be found as an identifier in a
// Chain or Field node, since:
// - "esc.x ..." is invalid, since predefined escapers return strings, and
// strings do not have methods, keys or fields.
// - "... .esc" is invalid, since predefined escapers are global functions,
// not methods or fields of any types.
// Therefore, it is safe to ignore these two node types.
continue
}
ident := node.Ident
if _, ok := predefinedEscapers[ident]; ok {
if pos < len(n.Pipe.Cmds)-1 ||
c.state == stateAttr && c.delim == delimSpaceOrTagEnd && ident == "html" {
return context{
state: stateError,
err: errorf(ErrPredefinedEscaper, n, n.Line, "predefined escaper %q disallowed in template", ident),
}
}
}
}
s := make([]string, 0, 3)
switch c.state {
case stateError:
return c
case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL:
switch c.urlPart {
case urlPartNone:
s = append(s, "_html_template_urlfilter")
fallthrough
case urlPartPreQuery:
switch c.state {
case stateCSSDqStr, stateCSSSqStr:
s = append(s, "_html_template_cssescaper")
default:
s = append(s, "_html_template_urlnormalizer")
}
case urlPartQueryOrFrag:
s = append(s, "_html_template_urlescaper")
case urlPartUnknown:
return context{
state: stateError,
err: errorf(ErrAmbigContext, n, n.Line, "%s appears in an ambiguous context within a URL", n),
}
default:
panic(c.urlPart.String())
}
case stateJS:
s = append(s, "_html_template_jsvalescaper")
// A slash after a value starts a div operator.
c.jsCtx = jsCtxDivOp
case stateJSDqStr, stateJSSqStr:
s = append(s, "_html_template_jsstrescaper")
case stateJSTmplLit:
s = append(s, "_html_template_jstmpllitescaper")
case stateJSRegexp:
s = append(s, "_html_template_jsregexpescaper")
case stateCSS:
s = append(s, "_html_template_cssvaluefilter")
case stateText:
s = append(s, "_html_template_htmlescaper")
case stateRCDATA:
s = append(s, "_html_template_rcdataescaper")
case stateAttr:
// Handled below in delim check.
case stateAttrName, stateTag:
c.state = stateAttrName
s = append(s, "_html_template_htmlnamefilter")
case stateSrcset:
s = append(s, "_html_template_srcsetescaper")
default:
if c.state.isComment() {
s = append(s, "_html_template_commentescaper")
} else {
panic("unexpected state " + c.state.String())
}
}
switch c.delim {
case delimNone:
// No extra-escaping needed for raw text content.
case delimSpaceOrTagEnd:
s = append(s, "_html_template_nospaceescaper")
default:
s = append(s, "_html_template_attrescaper")
}
ensurePipelineContains(n.Pipe, s)
return c
}
// ensurePipelineContains ensures that the pipeline ends with the commands with
// the identifiers in s in order. If the pipeline ends with a predefined escaper
// (i.e. "html" or "urlquery"), merge it with the identifiers in s.
func ensurePipelineContains(p *parse.PipeNode, s []string) {
if len(s) == 0 {
// Do not rewrite pipeline if we have no escapers to insert.
return
}
// Precondition: p.Cmds contains at most one predefined escaper and the
// escaper will be present at p.Cmds[len(p.Cmds)-1]. This precondition is
// always true because of the checks in escapeAction.
pipelineLen := len(p.Cmds)
if pipelineLen > 0 {
lastCmd := p.Cmds[pipelineLen-1]
if idNode, ok := lastCmd.Args[0].(*parse.IdentifierNode); ok {
if esc := idNode.Ident; predefinedEscapers[esc] {
// Pipeline ends with a predefined escaper.
if len(p.Cmds) == 1 && len(lastCmd.Args) > 1 {
// Special case: pipeline is of the form {{ esc arg1 arg2 ... argN }},
// where esc is the predefined escaper, and arg1...argN are its arguments.
// Convert this into the equivalent form
// {{ _eval_args_ arg1 arg2 ... argN | esc }}, so that esc can be easily
// merged with the escapers in s.
lastCmd.Args[0] = parse.NewIdentifier("_eval_args_").SetTree(nil).SetPos(lastCmd.Args[0].Position())
p.Cmds = appendCmd(p.Cmds, newIdentCmd(esc, p.Position()))
pipelineLen++
}
// If any of the commands in s that we are about to insert is equivalent
// to the predefined escaper, use the predefined escaper instead.
dup := false
for i, escaper := range s {
if escFnsEq(esc, escaper) {
s[i] = idNode.Ident
dup = true
}
}
if dup {
// The predefined escaper will already be inserted along with the
// escapers in s, so do not copy it to the rewritten pipeline.
pipelineLen--
}
}
}
}
// Rewrite the pipeline, creating the escapers in s at the end of the pipeline.
newCmds := make([]*parse.CommandNode, pipelineLen, pipelineLen+len(s))
insertedIdents := make(map[string]bool)
for i := 0; i < pipelineLen; i++ {
cmd := p.Cmds[i]
newCmds[i] = cmd
if idNode, ok := cmd.Args[0].(*parse.IdentifierNode); ok {
insertedIdents[normalizeEscFn(idNode.Ident)] = true
}
}
for _, name := range s {
if !insertedIdents[normalizeEscFn(name)] {
// When two templates share an underlying parse tree via the use of
// AddParseTree and one template is executed after the other, this check
// ensures that escapers that were already inserted into the pipeline on
// the first escaping pass do not get inserted again.
newCmds = appendCmd(newCmds, newIdentCmd(name, p.Position()))
}
}
p.Cmds = newCmds
}
// escFnsEq reports whether the two escaping functions are equivalent.
func escFnsEq(a, b string) bool {
return normalizeEscFn(a) == normalizeEscFn(b)
}
// normalizeEscFn(a) is equal to normalizeEscFn(b) for any pair of names of
// escaper functions a and b that are equivalent.
func normalizeEscFn(e string) string {
if norm := equivEscapers[e]; norm != "" {
return norm
}
return e
}
// redundantFuncs[a][b] implies that funcMap[b](funcMap[a](x)) == funcMap[a](x)
// for all x.
var redundantFuncs = map[string]map[string]bool{
"_html_template_commentescaper": {
"_html_template_attrescaper": true,
"_html_template_htmlescaper": true,
},
"_html_template_cssescaper": {
"_html_template_attrescaper": true,
},
"_html_template_jsregexpescaper": {
"_html_template_attrescaper": true,
},
"_html_template_jsstrescaper": {
"_html_template_attrescaper": true,
},
"_html_template_jstmpllitescaper": {
"_html_template_attrescaper": true,
},
"_html_template_urlescaper": {
"_html_template_urlnormalizer": true,
},
}
// appendCmd appends the given command to the end of the command pipeline
// unless it is redundant with the last command.
func appendCmd(cmds []*parse.CommandNode, cmd *parse.CommandNode) []*parse.CommandNode {
if n := len(cmds); n != 0 {
last, okLast := cmds[n-1].Args[0].(*parse.IdentifierNode)
next, okNext := cmd.Args[0].(*parse.IdentifierNode)
if okLast && okNext && redundantFuncs[last.Ident][next.Ident] {
return cmds
}
}
return append(cmds, cmd)
}
// newIdentCmd produces a command containing a single identifier node.
func newIdentCmd(identifier string, pos parse.Pos) *parse.CommandNode {
return &parse.CommandNode{
NodeType: parse.NodeCommand,
Args: []parse.Node{parse.NewIdentifier(identifier).SetTree(nil).SetPos(pos)}, // TODO: SetTree.
}
}
// nudge returns the context that would result from following empty string
// transitions from the input context.
// For example, parsing:
//
// `<a href=`
//
// will end in context{stateBeforeValue, attrURL}, but parsing one extra rune:
//
// `<a href=x`
//
// will end in context{stateURL, delimSpaceOrTagEnd, ...}.
// There are two transitions that happen when the 'x' is seen:
// (1) Transition from a before-value state to a start-of-value state without
//
// consuming any character.
//
// (2) Consume 'x' and transition past the first value character.
// In this case, nudging produces the context after (1) happens.
func nudge(c context) context {
switch c.state {
case stateTag:
// In `<foo {{.}}`, the action should emit an attribute.
c.state = stateAttrName
case stateBeforeValue:
// In `<foo bar={{.}}`, the action is an undelimited value.
c.state, c.delim, c.attr = attrStartStates[c.attr], delimSpaceOrTagEnd, attrNone
case stateAfterName:
// In `<foo bar {{.}}`, the action is an attribute name.
c.state, c.attr = stateAttrName, attrNone
}
return c
}
// join joins the two contexts of a branch template node. The result is an
// error context if either of the input contexts are error contexts, or if the
// input contexts differ.
func join(a, b context, node parse.Node, nodeName string) context {
if a.state == stateError {
return a
}
if b.state == stateError {
return b
}
if a.state == stateDead {
return b
}
if b.state == stateDead {
return a
}
if a.eq(b) {
return a
}
c := a
c.urlPart = b.urlPart
if c.eq(b) {
// The contexts differ only by urlPart.
c.urlPart = urlPartUnknown
return c
}
c = a
c.jsCtx = b.jsCtx
if c.eq(b) {
// The contexts differ only by jsCtx.
c.jsCtx = jsCtxUnknown
return c
}
// Allow a nudged context to join with an unnudged one.
// This means that
// <p title={{if .C}}{{.}}{{end}}
// ends in an unquoted value state even though the else branch
// ends in stateBeforeValue.
if c, d := nudge(a), nudge(b); !(c.eq(a) && d.eq(b)) {
if e := join(c, d, node, nodeName); e.state != stateError {
return e
}
}
return context{
state: stateError,
err: errorf(ErrBranchEnd, node, 0, "{{%s}} branches end in different contexts: %v, %v", nodeName, a, b),
}
}
// escapeBranch escapes a branch template node: "if", "range" and "with".
func (e *escaper) escapeBranch(c context, n *parse.BranchNode, nodeName string) context {
if nodeName == "range" {
e.rangeContext = &rangeContext{outer: e.rangeContext}
}
c0 := e.escapeList(c, n.List)
if nodeName == "range" {
if c0.state != stateError {
c0 = joinRange(c0, e.rangeContext)
}
e.rangeContext = e.rangeContext.outer
if c0.state == stateError {
return c0
}
// The "true" branch of a "range" node can execute multiple times.
// We check that executing n.List once results in the same context
// as executing n.List twice.
e.rangeContext = &rangeContext{outer: e.rangeContext}
c1, _ := e.escapeListConditionally(c0, n.List, nil)
c0 = join(c0, c1, n, nodeName)
if c0.state == stateError {
e.rangeContext = e.rangeContext.outer
// Make clear that this is a problem on loop re-entry
// since developers tend to overlook that branch when
// debugging templates.
c0.err.Line = n.Line
c0.err.Description = "on range loop re-entry: " + c0.err.Description
return c0
}
c0 = joinRange(c0, e.rangeContext)
e.rangeContext = e.rangeContext.outer
if c0.state == stateError {
return c0
}
}
c1 := e.escapeList(c, n.ElseList)
return join(c0, c1, n, nodeName)
}
func joinRange(c0 context, rc *rangeContext) context {
// Merge contexts at break and continue statements into overall body context.
// In theory we could treat breaks differently from continues, but for now it is
// enough to treat them both as going back to the start of the loop (which may then stop).
for _, c := range rc.breaks {
c0 = join(c0, c, c.n, "range")
if c0.state == stateError {
c0.err.Line = c.n.(*parse.BreakNode).Line
c0.err.Description = "at range loop break: " + c0.err.Description
return c0
}
}
for _, c := range rc.continues {
c0 = join(c0, c, c.n, "range")
if c0.state == stateError {
c0.err.Line = c.n.(*parse.ContinueNode).Line
c0.err.Description = "at range loop continue: " + c0.err.Description
return c0
}
}
return c0
}
func (e *escaper) escapeList(ctx context, n *parse.ListNode) context {
if n == nil {
return ctx
}
for _, m := range n.Nodes {
ctx = e.escape(ctx, m)
if ctx.state == stateDead {
break
}
}
return ctx
}
// escapeTemplate escapes a {{template}} call node.
func (e *escaper) escapeTemplate(c context, n *parse.TemplateNode) context {
panic(nil)
//c, name := e.escapeTree(c, n, n.Name, n.Line)
//if name != n.Name {
// e.editTemplateNode(n, name)
//}
//return c
}
var doctypeBytes = []byte("<!DOCTYPE")
// escapeText escapes a text template node.
func (e *escaper) escapeText(c context, n *parse.TextNode) context {
s, written, i, b := n.Text, 0, 0, new(bytes.Buffer)
for i != len(s) {
c1, nread := contextAfterText(c, s[i:])
i1 := i + nread
if c.state == stateText || c.state == stateRCDATA {
end := i1
if c1.state != c.state {
for j := end - 1; j >= i; j-- {
if s[j] == '<' {
end = j
break
}
}
}
for j := i; j < end; j++ {
if s[j] == '<' && !bytes.HasPrefix(bytes.ToUpper(s[j:]), doctypeBytes) {
b.Write(s[written:j])
b.WriteString("<")
written = j + 1
}
}
} else if c.state.isComment() && c.delim == delimNone {
switch c.state {
case stateJSBlockCmt:
// https://es5.github.io/#x7.4:
// "Comments behave like white space and are
// discarded except that, if a MultiLineComment
// contains a line terminator character, then
// the entire comment is considered to be a
// LineTerminator for purposes of parsing by
// the syntactic grammar."
if bytes.ContainsAny(s[written:i1], "\n\r\u2028\u2029") {
b.WriteByte('\n')
} else {
b.WriteByte(' ')
}
case stateCSSBlockCmt:
b.WriteByte(' ')
}
written = i1
}
if c.state != c1.state && c1.state.isComment() && c1.delim == delimNone {
// Preserve the portion between written and the comment start.
cs := i1 - 2
if c1.state == stateHTMLCmt || c1.state == stateJSHTMLOpenCmt {
// "<!--" instead of "/*" or "//"
cs -= 2
} else if c1.state == stateJSHTMLCloseCmt {
// "-->" instead of "/*" or "//"
cs -= 1
}
b.Write(s[written:cs])
written = i1
}
if c.state.isInScriptLiteral() && containsSpecialScriptTag(s[i:i1]) {
b.Write(s[written:i])
b.Write(escapeSpecialScriptTags(s[i:i1]))
written = i1
}
if i == i1 && c.state == c1.state {
panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:]))
}
c, i = c1, i1
}
if written != 0 && c.state != stateError {
if !c.state.isComment() || c.delim != delimNone {
b.Write(n.Text[written:])
}
// Edit the text node.
n.Text = b.Bytes()
}
return c
}
// contextAfterText starts in context c, consumes some tokens from the front of
// s, then returns the context after those tokens and the unprocessed suffix.
func contextAfterText(c context, s []byte) (context, int) {
if c.delim == delimNone {
c1, i := tSpecialTagEnd(c, s)
if i == 0 {
// A special end tag (`</script>`) has been seen and
// all content preceding it has been consumed.
return c1, 0
}
// Consider all content up to any end tag.
return transitionFunc[c.state](c, s[:i])
}
// We are at the beginning of an attribute value.
i := bytes.IndexAny(s, delimEnds[c.delim])
if i == -1 {
i = len(s)
}
if c.delim == delimSpaceOrTagEnd {
// https://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
// lists the runes below as error characters.
// Error out because HTML parsers may differ on whether
// "<a id= onclick=f(" ends inside id's or onclick's value,
// "<a class=`foo " ends inside a value,
// "<a style=font:'Arial'" needs open-quote fixup.
// IE treats '`' as a quotation character.
if j := bytes.IndexAny(s[:i], "\"'<=`"); j >= 0 {
return context{
state: stateError,
err: errorf(ErrBadHTML, nil, 0, "%q in unquoted attr: %q", s[j:j+1], s[:i]),
}, len(s)
}
}
if i == len(s) {
// Remain inside the attribute.
// Decode the value so non-HTML rules can easily handle
// <button onclick="alert("Hi!")">
// without having to entity decode token boundaries.
for u := []byte(html.UnescapeString(string(s))); len(u) != 0; {
c1, i1 := transitionFunc[c.state](c, u)
c, u = c1, u[i1:]
}
return c, len(s)
}
element := c.element
// If this is a non-JS "type" attribute inside "script" tag, do not treat the contents as JS.
if c.state == stateAttr && c.element == elementScript && c.attr == attrScriptType && !isJSType(string(s[:i])) {
element = elementNone
}
if c.delim != delimSpaceOrTagEnd {
// Consume any quote.
i++
}
// On exiting an attribute, we discard all state information
// except the state and element.
return context{state: stateTag, element: element}, i
}
package htmlescape
import (
"bytes"
"fmt"
"go/types"
"git.sr.ht/~rj/gemplate/internal/compile"
"git.sr.ht/~rj/gemplate/internal/content"
"git.sr.ht/~rj/gemplate/parse"
)
var Funcs = map[string]compile.Func{
"_html_template_attrescaper": escapeFn(attrEscaper),
"_html_template_htmlescaper": escapeFn(htmlEscaper),
"_html_template_htmlnamefilter": escapeFn(htmlNameFilter),
"_html_template_cssvaluefilter": escapeFn(cssvaluefilter),
"_html_template_jsstrescaper": escapeFn(jsStrEscaper),
"_html_template_srcsetescaper": escapeFn(srcsetFilterAndEscaper),
"_html_template_nospaceescaper": escapeFn(htmlNospaceEscaper),
"_html_template_rcdataescaper": escapeFn(rcdataEscaper),
"_html_template_jsvalescaper": escapeFn(jsValEscaper),
"_html_template_urlescaper": escapeFn(urlEscaper),
}
type escapeFn func(content.Type) (string, bool)
func (f escapeFn) Compile(ctx *compile.Context, node *parse.CommandNode) (types.Type, error) {
if len(node.Args) != 2 {
return nil, &compile.Error{
Node: node,
Message: fmt.Sprintf("expected 1 arguments, found %d", len(node.Args)-1),
}
}
buf := bytes.Buffer{}
ctx1 := ctx.WithWriter(&buf)
t, err := ctx1.CompileCommandNodeArg(node.Args[1], true)
if err != nil {
return t, err
}
fn, direct := f(getContentType(t))
if fn == "" {
fmt.Fprintf(ctx.Writer(), "string(%s)", buf.Bytes())
} else if direct {
fmt.Fprintf(ctx.Writer(), "htmlescape.%s(%s)", fn, buf.Bytes())
} else if bt, ok := t.(*types.Basic); ok {
if bt.Info()&types.IsString == 0 {
_, _ = ctx.Writer().Write(buf.Bytes())
} else {
fmt.Fprintf(ctx.Writer(), "htmlescape.%s(%s)", fn, buf.Bytes())
}
} else if types.AssignableTo(t, types.NewInterfaceType([]*types.Func{
types.NewFunc(0, nil, "String", types.NewSignatureType(nil, nil, nil, nil, types.NewTuple(types.NewVar(0, nil, "", types.Typ[types.String])), false)),
}, nil).Complete()) {
fmt.Fprintf(ctx.Writer(), "htmlescape.%s(%s.String())", fn, buf.Bytes())
} else {
fmt.Fprintf(ctx.Writer(), "htmlescape.%s(fmt.Sprint(%s))", fn, buf.Bytes())
}
return types.Typ[types.UntypedString], err
}
func getContentType(t types.Type) content.Type {
switch t.String() {
case "html/template.CSS":
return content.TypeCSS
case "html/template.HTML":
return content.TypeHTML
case "html/template.HTMLAttr":
return content.TypeHTMLAttr
case "html/template.JS":
return content.TypeJS
case "html/template.JSStr":
return content.TypeJSStr
case "html/template.Srcset":
return content.TypeSrcset
case "html/template.URL":
return content.TypeURL
}
return content.TypePlain
}
func attrEscaper(typ content.Type) (string, bool) {
if typ == content.TypeHTML {
return "EscapeAttrNorm", true
}
return "EscapeAttr", false
}
func htmlEscaper(typ content.Type) (string, bool) {
if typ == content.TypeHTML {
return "", false
}
return "EscapeHTML", false
}
func cssvaluefilter(typ content.Type) (string, bool) {
if typ == content.TypeCSS {
return "", false
}
return "FilterCSSValues", false
}
func htmlNameFilter(typ content.Type) (string, bool) {
if typ == content.TypeHTMLAttr {
return "", false
}
return "FilterHTMLNames", false
}
func jsStrEscaper(typ content.Type) (string, bool) {
if typ == content.TypeJSStr {
return "EscapeJSStrNorm", true
}
return "EscapeJSStr", false
}
func srcsetFilterAndEscaper(typ content.Type) (string, bool) {
switch typ {
case content.TypeSrcset:
return "", false
case content.TypeURL:
return "FilterAndEscapeSrcsetURL", true
default:
return "FilterAndEscapeSrcset", false
}
}
func htmlNospaceEscaper(typ content.Type) (string, bool) {
if typ == content.TypeHTML {
return "EscapeHTMLNoSpaceNorm", true
}
return "EscapeHTMLNoSpace", false
}
func rcdataEscaper(typ content.Type) (string, bool) {
if typ == content.TypeHTML {
return "EscapeRCDataNorm", true
}
return "EscapeRCData", false
}
func jsValEscaper(content.Type) (string, bool) {
return "EscapeJSVal", true
}
func urlEscaper(typ content.Type) (string, bool) {
if typ == content.TypeURL {
return "EscapeURLNorm", true
}
return "EscapeURL", false
}
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package htmlescape
import (
"bytes"
"strings"
)
// stripTags takes a snippet of HTML and returns only the text content.
// For example, `<b>¡Hi!</b> <script>...</script>` -> `¡Hi! `.
func StripTags(html string) string {
var b strings.Builder
s, c, i, allText := []byte(html), context{}, 0, true
// Using the transition funcs helps us avoid mangling
// `<div title="1>2">` or `I <3 Ponies!`.
for i != len(s) {
if c.delim == delimNone {
st := c.state
// Use RCDATA instead of parsing into JS or CSS styles.
if c.element != elementNone && !st.isInTag() {
st = stateRCDATA
}
d, nread := transitionFunc[st](c, s[i:])
i1 := i + nread
if c.state == stateText || c.state == stateRCDATA {
// Emit text up to the start of the tag or comment.
j := i1
if d.state != c.state {
for j1 := j - 1; j1 >= i; j1-- {
if s[j1] == '<' {
j = j1
break
}
}
}
b.Write(s[i:j])
} else {
allText = false
}
c, i = d, i1
continue
}
i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim])
if i1 < i {
break
}
if c.delim != delimSpaceOrTagEnd {
// Consume any quote.
i1++
}
c, i = context{state: stateTag, element: c.element}, i1
}
if allText {
return html
} else if c.state == stateText || c.state == stateRCDATA {
b.Write(s[i:])
}
return b.String()
}
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package htmlescape
import (
"bytes"
"strings"
)
// jsWhitespace contains all of the JS whitespace characters, as defined
// by the \s character class.
// See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes.
const jsWhitespace = "\f\n\r\t\v\u0020\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\ufeff"
// nextJSCtx returns the context that determines whether a slash after the
// given run of tokens starts a regular expression instead of a division
// operator: / or /=.
//
// This assumes that the token run does not include any string tokens, comment
// tokens, regular expression literal tokens, or division operators.
//
// This fails on some valid but nonsensical JavaScript programs like
// "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
// fail on any known useful programs. It is based on the draft
// JavaScript 2.0 lexical grammar and requires one token of lookbehind:
// https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
// Trim all JS whitespace characters
s = bytes.TrimRight(s, jsWhitespace)
if len(s) == 0 {
return preceding
}
// All cases below are in the single-byte UTF-8 group.
switch c, n := s[len(s)-1], len(s); c {
case '+', '-':
// ++ and -- are not regexp preceders, but + and - are whether
// they are used as infix or prefix operators.
start := n - 1
// Count the number of adjacent dashes or pluses.
for start > 0 && s[start-1] == c {
start--
}
if (n-start)&1 == 1 {
// Reached for trailing minus signs since "---" is the
// same as "-- -".
return jsCtxRegexp
}
return jsCtxDivOp
case '.':
// Handle "42."
if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
return jsCtxDivOp
}
return jsCtxRegexp
// Suffixes for all punctuators from section 7.7 of the language spec
// that only end binary operators not handled above.
case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
return jsCtxRegexp
// Suffixes for all punctuators from section 7.7 of the language spec
// that are prefix operators not handled above.
case '!', '~':
return jsCtxRegexp
// Matches all the punctuators from section 7.7 of the language spec
// that are open brackets not handled above.
case '(', '[':
return jsCtxRegexp
// Matches all the punctuators from section 7.7 of the language spec
// that precede expression starts.
case ':', ';', '{':
return jsCtxRegexp
// CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
// are handled in the default except for '}' which can precede a
// division op as in
// ({ valueOf: function () { return 42 } } / 2
// which is valid, but, in practice, developers don't divide object
// literals, so our heuristic works well for code like
// function () { ... } /foo/.test(x) && sideEffect();
// The ')' punctuator can precede a regular expression as in
// if (b) /foo/.test(x) && ...
// but this is much less likely than
// (a + b) / c
case '}':
return jsCtxRegexp
default:
// Look for an IdentifierName and see if it is a keyword that
// can precede a regular expression.
j := n
for j > 0 && isJSIdentPart(rune(s[j-1])) {
j--
}
if regexpPrecederKeywords[string(s[j:])] {
return jsCtxRegexp
}
}
// Otherwise is a punctuator not listed above, or
// a string which precedes a div op, or an identifier
// which precedes a div op.
return jsCtxDivOp
}
// regexpPrecederKeywords is a set of reserved JS keywords that can precede a
// regular expression in JS source.
var regexpPrecederKeywords = map[string]bool{
"break": true,
"case": true,
"continue": true,
"delete": true,
"do": true,
"else": true,
"finally": true,
"in": true,
"instanceof": true,
"return": true,
"throw": true,
"try": true,
"typeof": true,
"void": true,
}
// isJSIdentPart reports whether the given rune is a JS identifier part.
// It does not handle all the non-Latin letters, joiners, and combining marks,
// but it does handle every codepoint that can occur in a numeric literal or
// a keyword.
func isJSIdentPart(r rune) bool {
switch {
case r == '$':
return true
case '0' <= r && r <= '9':
return true
case 'A' <= r && r <= 'Z':
return true
case r == '_':
return true
case 'a' <= r && r <= 'z':
return true
}
return false
}
// isJSType reports whether the given MIME type should be considered JavaScript.
//
// It is used to determine whether a script tag with a type attribute is a javascript container.
func isJSType(mimeType string) bool {
// per
// https://www.w3.org/TR/html5/scripting-1.html#attr-script-type
// https://tools.ietf.org/html/rfc7231#section-3.1.1
// https://tools.ietf.org/html/rfc4329#section-3
// https://www.ietf.org/rfc/rfc4627.txt
// discard parameters
mimeType, _, _ = strings.Cut(mimeType, ";")
mimeType = strings.ToLower(mimeType)
mimeType = strings.TrimSpace(mimeType)
switch mimeType {
case
"application/ecmascript",
"application/javascript",
"application/json",
"application/ld+json",
"application/x-ecmascript",
"application/x-javascript",
"module",
"text/ecmascript",
"text/javascript",
"text/javascript1.0",
"text/javascript1.1",
"text/javascript1.2",
"text/javascript1.3",
"text/javascript1.4",
"text/javascript1.5",
"text/jscript",
"text/livescript",
"text/x-ecmascript",
"text/x-javascript":
return true
default:
return false
}
}
// Code generated by "stringer -type jsCtx"; DO NOT EDIT.
package htmlescape
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[jsCtxRegexp-0]
_ = x[jsCtxDivOp-1]
_ = x[jsCtxUnknown-2]
}
const _jsCtx_name = "jsCtxRegexpjsCtxDivOpjsCtxUnknown"
var _jsCtx_index = [...]uint8{0, 11, 21, 33}
func (i jsCtx) String() string {
if i >= jsCtx(len(_jsCtx_index)-1) {
return "jsCtx(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _jsCtx_name[_jsCtx_index[i]:_jsCtx_index[i+1]]
}
package htmlescape
// state describes a high-level HTML parser state.
//
// It bounds the top of the element stack, and by extension the HTML insertion
// mode, but also contains state that does not correspond to anything in the
// HTML5 parsing algorithm because a single token production in the HTML
// grammar may contain embedded actions in a template. For instance, the quoted
// HTML attribute produced by
//
// <div title="Hello {{.World}}">
//
// is a single token in HTML's grammar but in a template spans several nodes.
type state uint8
//go:generate stringer -type state
const (
// stateText is parsed character data. An HTML parser is in
// this state when its parse position is outside an HTML tag,
// directive, comment, and special element body.
stateText state = iota
// stateTag occurs before an HTML attribute or the end of a tag.
stateTag
// stateAttrName occurs inside an attribute name.
// It occurs between the ^'s in ` ^name^ = value`.
stateAttrName
// stateAfterName occurs after an attr name has ended but before any
// equals sign. It occurs between the ^'s in ` name^ ^= value`.
stateAfterName
// stateBeforeValue occurs after the equals sign but before the value.
// It occurs between the ^'s in ` name =^ ^value`.
stateBeforeValue
// stateHTMLCmt occurs inside an <!-- HTML comment -->.
stateHTMLCmt
// stateRCDATA occurs inside an RCDATA element (<textarea> or <title>)
// as described at https://www.w3.org/TR/html5/syntax.html#elements-0
stateRCDATA
// stateAttr occurs inside an HTML attribute whose content is text.
stateAttr
// stateURL occurs inside an HTML attribute whose content is a URL.
stateURL
// stateSrcset occurs inside an HTML srcset attribute.
stateSrcset
// stateJS occurs inside an event handler or script element.
stateJS
// stateJSDqStr occurs inside a JavaScript double quoted string.
stateJSDqStr
// stateJSSqStr occurs inside a JavaScript single quoted string.
stateJSSqStr
// stateJSTmplLit occurs inside a JavaScript back quoted string.
stateJSTmplLit
// stateJSRegexp occurs inside a JavaScript regexp literal.
stateJSRegexp
// stateJSBlockCmt occurs inside a JavaScript /* block comment */.
stateJSBlockCmt
// stateJSLineCmt occurs inside a JavaScript // line comment.
stateJSLineCmt
// stateJSHTMLOpenCmt occurs inside a JavaScript <!-- HTML-like comment.
stateJSHTMLOpenCmt
// stateJSHTMLCloseCmt occurs inside a JavaScript --> HTML-like comment.
stateJSHTMLCloseCmt
// stateCSS occurs inside a <style> element or style attribute.
stateCSS
// stateCSSDqStr occurs inside a CSS double quoted string.
stateCSSDqStr
// stateCSSSqStr occurs inside a CSS single quoted string.
stateCSSSqStr
// stateCSSDqURL occurs inside a CSS double quoted url("...").
stateCSSDqURL
// stateCSSSqURL occurs inside a CSS single quoted url('...').
stateCSSSqURL
// stateCSSURL occurs inside a CSS unquoted url(...).
stateCSSURL
// stateCSSBlockCmt occurs inside a CSS /* block comment */.
stateCSSBlockCmt
// stateCSSLineCmt occurs inside a CSS // line comment.
stateCSSLineCmt
// stateError is an infectious error state outside any valid
// HTML/CSS/JS construct.
stateError
// stateDead marks unreachable code after a {{break}} or {{continue}}.
stateDead
)
// isComment is true for any state that contains content meant for template
// authors & maintainers, not for end-users or machines.
func (s state) isComment() bool {
switch s {
case stateHTMLCmt, stateJSBlockCmt, stateJSLineCmt, stateJSHTMLOpenCmt, stateJSHTMLCloseCmt, stateCSSBlockCmt, stateCSSLineCmt:
return true
}
return false
}
// isInTag return whether s occurs solely inside an HTML tag.
func (s state) isInTag() bool {
switch s {
case stateTag, stateAttrName, stateAfterName, stateBeforeValue, stateAttr:
return true
}
return false
}
// isInScriptLiteral returns true if s is one of the literal states within a
// <script> tag, and as such occurrences of "<!--", "<script", and "</script"
// need to be treated specially.
func (s state) isInScriptLiteral() bool {
// Ignore the comment states (stateJSBlockCmt, stateJSLineCmt,
// stateJSHTMLOpenCmt, stateJSHTMLCloseCmt) because their content is already
// omitted from the output.
switch s {
case stateJSDqStr, stateJSSqStr, stateJSTmplLit, stateJSRegexp:
return true
}
return false
}
// Code generated by "stringer -type state"; DO NOT EDIT.
package htmlescape
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[stateText-0]
_ = x[stateTag-1]
_ = x[stateAttrName-2]
_ = x[stateAfterName-3]
_ = x[stateBeforeValue-4]
_ = x[stateHTMLCmt-5]
_ = x[stateRCDATA-6]
_ = x[stateAttr-7]
_ = x[stateURL-8]
_ = x[stateSrcset-9]
_ = x[stateJS-10]
_ = x[stateJSDqStr-11]
_ = x[stateJSSqStr-12]
_ = x[stateJSTmplLit-13]
_ = x[stateJSRegexp-14]
_ = x[stateJSBlockCmt-15]
_ = x[stateJSLineCmt-16]
_ = x[stateJSHTMLOpenCmt-17]
_ = x[stateJSHTMLCloseCmt-18]
_ = x[stateCSS-19]
_ = x[stateCSSDqStr-20]
_ = x[stateCSSSqStr-21]
_ = x[stateCSSDqURL-22]
_ = x[stateCSSSqURL-23]
_ = x[stateCSSURL-24]
_ = x[stateCSSBlockCmt-25]
_ = x[stateCSSLineCmt-26]
_ = x[stateError-27]
_ = x[stateDead-28]
}
const _state_name = "stateTextstateTagstateAttrNamestateAfterNamestateBeforeValuestateHTMLCmtstateRCDATAstateAttrstateURLstateSrcsetstateJSstateJSDqStrstateJSSqStrstateJSTmplLitstateJSRegexpstateJSBlockCmtstateJSLineCmtstateJSHTMLOpenCmtstateJSHTMLCloseCmtstateCSSstateCSSDqStrstateCSSSqStrstateCSSDqURLstateCSSSqURLstateCSSURLstateCSSBlockCmtstateCSSLineCmtstateErrorstateDead"
var _state_index = [...]uint16{0, 9, 17, 30, 44, 60, 72, 83, 92, 100, 111, 118, 130, 142, 156, 169, 184, 198, 216, 235, 243, 256, 269, 282, 295, 306, 322, 337, 347, 356}
func (i state) String() string {
if i >= state(len(_state_index)-1) {
return "state(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _state_name[_state_index[i]:_state_index[i+1]]
}
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package htmlescape
import (
"bytes"
"strings"
"git.sr.ht/~rj/gemplate/internal/content"
"git.sr.ht/~rj/gemplate/internal/css"
)
// transitionFunc is the array of context transition functions for text nodes.
// A transition function takes a context and template text input, and returns
// the updated context and the number of bytes consumed from the front of the
// input.
var transitionFunc = [...]func(context, []byte) (context, int){
stateText: tText,
stateTag: tTag,
stateAttrName: tAttrName,
stateAfterName: tAfterName,
stateBeforeValue: tBeforeValue,
stateHTMLCmt: tHTMLCmt,
stateRCDATA: tSpecialTagEnd,
stateAttr: tAttr,
stateURL: tURL,
stateSrcset: tURL,
stateJS: tJS,
stateJSDqStr: tJSDelimited,
stateJSSqStr: tJSDelimited,
stateJSRegexp: tJSDelimited,
stateJSTmplLit: tJSTmpl,
stateJSBlockCmt: tBlockCmt,
stateJSLineCmt: tLineCmt,
stateJSHTMLOpenCmt: tLineCmt,
stateJSHTMLCloseCmt: tLineCmt,
stateCSS: tCSS,
stateCSSDqStr: tCSSStr,
stateCSSSqStr: tCSSStr,
stateCSSDqURL: tCSSStr,
stateCSSSqURL: tCSSStr,
stateCSSURL: tCSSStr,
stateCSSBlockCmt: tBlockCmt,
stateCSSLineCmt: tLineCmt,
stateError: tError,
}
var commentStart = []byte("<!--")
var commentEnd = []byte("-->")
// tText is the context transition function for the text state.
func tText(c context, s []byte) (context, int) {
k := 0
for {
i := k + bytes.IndexByte(s[k:], '<')
if i < k || i+1 == len(s) {
return c, len(s)
} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
return context{state: stateHTMLCmt}, i + 4
}
i++
end := false
if s[i] == '/' {
if i+1 == len(s) {
return c, len(s)
}
end, i = true, i+1
}
j, e := eatTagName(s, i)
if j != i {
if end {
e = elementNone
}
// We've found an HTML tag.
return context{state: stateTag, element: e}, j
}
k = j
}
}
var elementContentType = [...]state{
elementNone: stateText,
elementScript: stateJS,
elementStyle: stateCSS,
elementTextarea: stateRCDATA,
elementTitle: stateRCDATA,
}
// tTag is the context transition function for the tag state.
func tTag(c context, s []byte) (context, int) {
// Find the attribute name.
i := eatWhiteSpace(s, 0)
if i == len(s) {
return c, len(s)
}
if s[i] == '>' {
return context{
state: elementContentType[c.element],
element: c.element,
}, i + 1
}
j, err := eatAttrName(s, i)
if err != nil {
return context{state: stateError, err: err}, len(s)
}
state, attr := stateTag, attrNone
if i == j {
return context{
state: stateError,
err: errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
}, len(s)
}
attrName := strings.ToLower(string(s[i:j]))
if c.element == elementScript && attrName == "type" {
attr = attrScriptType
} else {
switch content.AttrType(attrName) {
case content.TypeURL:
attr = attrURL
case content.TypeCSS:
attr = attrStyle
case content.TypeJS:
attr = attrScript
case content.TypeSrcset:
attr = attrSrcset
}
}
if j == len(s) {
state = stateAttrName
} else {
state = stateAfterName
}
return context{state: state, element: c.element, attr: attr}, j
}
// tAttrName is the context transition function for stateAttrName.
func tAttrName(c context, s []byte) (context, int) {
i, err := eatAttrName(s, 0)
if err != nil {
return context{state: stateError, err: err}, len(s)
} else if i != len(s) {
c.state = stateAfterName
}
return c, i
}
// tAfterName is the context transition function for stateAfterName.
func tAfterName(c context, s []byte) (context, int) {
// Look for the start of the value.
i := eatWhiteSpace(s, 0)
if i == len(s) {
return c, len(s)
} else if s[i] != '=' {
// Occurs due to tag ending '>', and valueless attribute.
c.state = stateTag
return c, i
}
c.state = stateBeforeValue
// Consume the "=".
return c, i + 1
}
var attrStartStates = [...]state{
attrNone: stateAttr,
attrScript: stateJS,
attrScriptType: stateAttr,
attrStyle: stateCSS,
attrURL: stateURL,
attrSrcset: stateSrcset,
}
// tBeforeValue is the context transition function for stateBeforeValue.
func tBeforeValue(c context, s []byte) (context, int) {
i := eatWhiteSpace(s, 0)
if i == len(s) {
return c, len(s)
}
// Find the attribute delimiter.
delim := delimSpaceOrTagEnd
switch s[i] {
case '\'':
delim, i = delimSingleQuote, i+1
case '"':
delim, i = delimDoubleQuote, i+1
}
c.state, c.delim = attrStartStates[c.attr], delim
return c, i
}
// tHTMLCmt is the context transition function for stateHTMLCmt.
func tHTMLCmt(c context, s []byte) (context, int) {
if i := bytes.Index(s, commentEnd); i != -1 {
return context{}, i + 3
}
return c, len(s)
}
// specialTagEndMarkers maps element types to the character sequence that
// case-insensitively signals the end of the special tag body.
var specialTagEndMarkers = [...][]byte{
elementScript: []byte("script"),
elementStyle: []byte("style"),
elementTextarea: []byte("textarea"),
elementTitle: []byte("title"),
}
var (
specialTagEndPrefix = []byte("</")
tagEndSeparators = []byte("> \t\n\f/")
)
// tSpecialTagEnd is the context transition function for raw text and RCDATA
// element states.
func tSpecialTagEnd(c context, s []byte) (context, int) {
if c.element != elementNone {
// script end tags ("</script") within script literals are ignored, so that
// we can properly escape them.
if c.element == elementScript && (c.state.isInScriptLiteral() || c.state.isComment()) {
return c, len(s)
}
if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
return context{}, i
}
}
return c, len(s)
}
// indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1.
func indexTagEnd(s []byte, tag []byte) int {
res := 0
plen := len(specialTagEndPrefix)
for len(s) > 0 {
// Try to find the tag end prefix first
i := bytes.Index(s, specialTagEndPrefix)
if i == -1 {
return i
}
s = s[i+plen:]
// Try to match the actual tag if there is still space for it
if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
s = s[len(tag):]
// Check the tag is followed by a proper separator
if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
return res + i
}
res += len(tag)
}
res += i + plen
}
return -1
}
// tAttr is the context transition function for the attribute state.
func tAttr(c context, s []byte) (context, int) {
return c, len(s)
}
// tURL is the context transition function for the URL state.
func tURL(c context, s []byte) (context, int) {
if bytes.ContainsAny(s, "#?") {
c.urlPart = urlPartQueryOrFrag
} else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
// HTML5 uses "Valid URL potentially surrounded by spaces" for
// attrs: https://www.w3.org/TR/html5/index.html#attributes-1
c.urlPart = urlPartPreQuery
}
return c, len(s)
}
// tJS is the context transition function for the JS state.
func tJS(c context, s []byte) (context, int) {
i := bytes.IndexAny(s, "\"`'/{}<-#")
if i == -1 {
// Entire input is non string, comment, regexp tokens.
c.jsCtx = nextJSCtx(s, c.jsCtx)
return c, len(s)
}
c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
switch s[i] {
case '"':
c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
case '\'':
c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
case '`':
c.state, c.jsCtx = stateJSTmplLit, jsCtxRegexp
case '/':
switch {
case i+1 < len(s) && s[i+1] == '/':
c.state, i = stateJSLineCmt, i+1
case i+1 < len(s) && s[i+1] == '*':
c.state, i = stateJSBlockCmt, i+1
case c.jsCtx == jsCtxRegexp:
c.state = stateJSRegexp
case c.jsCtx == jsCtxDivOp:
c.jsCtx = jsCtxRegexp
default:
return context{
state: stateError,
err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
}, len(s)
}
// ECMAScript supports HTML style comments for legacy reasons, see Appendix
// B.1.1 "HTML-like Comments". The handling of these comments is somewhat
// confusing. Multi-line comments are not supported, i.e. anything on lines
// between the opening and closing tokens is not considered a comment, but
// anything following the opening or closing token, on the same line, is
// ignored. As such we simply treat any line prefixed with "<!--" or "-->"
// as if it were actually prefixed with "//" and move on.
case '<':
if i+3 < len(s) && bytes.Equal(commentStart, s[i:i+4]) {
c.state, i = stateJSHTMLOpenCmt, i+3
}
case '-':
if i+2 < len(s) && bytes.Equal(commentEnd, s[i:i+3]) {
c.state, i = stateJSHTMLCloseCmt, i+2
}
// ECMAScript also supports "hashbang" comment lines, see Section 12.5.
case '#':
if i+1 < len(s) && s[i+1] == '!' {
c.state, i = stateJSLineCmt, i+1
}
case '{':
// We only care about tracking brace depth if we are inside of a
// template literal.
if len(c.jsBraceDepth) == 0 {
return c, i + 1
}
c.jsBraceDepth[len(c.jsBraceDepth)-1]++
case '}':
if len(c.jsBraceDepth) == 0 {
return c, i + 1
}
// There are no cases where a brace can be escaped in the JS context
// that are not syntax errors, it seems. Because of this we can just
// count "\}" as "}" and move on, the script is already broken as
// fully fledged parsers will just fail anyway.
c.jsBraceDepth[len(c.jsBraceDepth)-1]--
if c.jsBraceDepth[len(c.jsBraceDepth)-1] >= 0 {
return c, i + 1
}
c.jsBraceDepth = c.jsBraceDepth[:len(c.jsBraceDepth)-1]
c.state = stateJSTmplLit
default:
panic("unreachable")
}
return c, i + 1
}
func tJSTmpl(c context, s []byte) (context, int) {
var k int
for {
i := k + bytes.IndexAny(s[k:], "`\\$")
if i < k {
break
}
switch s[i] {
case '\\':
i++
if i == len(s) {
return context{
state: stateError,
err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
}, len(s)
}
case '$':
if len(s) >= i+2 && s[i+1] == '{' {
c.jsBraceDepth = append(c.jsBraceDepth, 0)
c.state = stateJS
return c, i + 2
}
case '`':
// end
c.state = stateJS
return c, i + 1
}
k = i + 1
}
return c, len(s)
}
// tJSDelimited is the context transition function for the JS string and regexp
// states.
func tJSDelimited(c context, s []byte) (context, int) {
specials := `\"`
switch c.state {
case stateJSSqStr:
specials = `\'`
case stateJSRegexp:
specials = `\/[]`
}
k, inCharset := 0, false
for {
i := k + bytes.IndexAny(s[k:], specials)
if i < k {
break
}
switch s[i] {
case '\\':
i++
if i == len(s) {
return context{
state: stateError,
err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
}, len(s)
}
case '[':
inCharset = true
case ']':
inCharset = false
case '/':
// If "</script" appears in a regex literal, the '/' should not
// close the regex literal, and it will later be escaped to
// "\x3C/script" in escapeText.
if i > 0 && i+7 <= len(s) && bytes.Equal(bytes.ToLower(s[i-1:i+7]), []byte("</script")) {
i++
} else if !inCharset {
c.state, c.jsCtx = stateJS, jsCtxDivOp
return c, i + 1
}
default:
// end delimiter
if !inCharset {
c.state, c.jsCtx = stateJS, jsCtxDivOp
return c, i + 1
}
}
k = i + 1
}
if inCharset {
// This can be fixed by making context richer if interpolation
// into charsets is desired.
return context{
state: stateError,
err: errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
}, len(s)
}
return c, len(s)
}
var blockCommentEnd = []byte("*/")
// tBlockCmt is the context transition function for /*comment*/ states.
func tBlockCmt(c context, s []byte) (context, int) {
i := bytes.Index(s, blockCommentEnd)
if i == -1 {
return c, len(s)
}
switch c.state {
case stateJSBlockCmt:
c.state = stateJS
case stateCSSBlockCmt:
c.state = stateCSS
default:
panic(c.state.String())
}
return c, i + 2
}
// tLineCmt is the context transition function for //comment states, and the JS HTML-like comment state.
func tLineCmt(c context, s []byte) (context, int) {
var lineTerminators string
var endState state
switch c.state {
case stateJSLineCmt, stateJSHTMLOpenCmt, stateJSHTMLCloseCmt:
lineTerminators, endState = "\n\r\u2028\u2029", stateJS
case stateCSSLineCmt:
lineTerminators, endState = "\n\f\r", stateCSS
// Line comments are not part of any published CSS standard but
// are supported by the 4 major browsers.
// This defines line comments as
// LINECOMMENT ::= "//" [^\n\f\d]*
// since https://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
// newlines:
// nl ::= #xA | #xD #xA | #xD | #xC
default:
panic(c.state.String())
}
i := bytes.IndexAny(s, lineTerminators)
if i == -1 {
return c, len(s)
}
c.state = endState
// Per section 7.4 of EcmaScript 5 : https://es5.github.io/#x7.4
// "However, the LineTerminator at the end of the line is not
// considered to be part of the single-line comment; it is
// recognized separately by the lexical grammar and becomes part
// of the stream of input elements for the syntactic grammar."
return c, i
}
// tCSS is the context transition function for the CSS state.
func tCSS(c context, s []byte) (context, int) {
// CSS quoted strings are almost never used except for:
// (1) URLs as in background: "/foo.png"
// (2) Multiword font-names as in font-family: "Times New Roman"
// (3) List separators in content values as in inline-lists:
// <style>
// ul.inlineList { list-style: none; padding:0 }
// ul.inlineList > li { display: inline }
// ul.inlineList > li:before { content: ", " }
// ul.inlineList > li:first-child:before { content: "" }
// </style>
// <ul class=inlineList><li>One<li>Two<li>Three</ul>
// (4) Attribute value selectors as in a[href="http://example.com/"]
//
// We conservatively treat all strings as URLs, but make some
// allowances to avoid confusion.
//
// In (1), our conservative assumption is justified.
// In (2), valid font names do not contain ':', '?', or '#', so our
// conservative assumption is fine since we will never transition past
// urlPartPreQuery.
// In (3), our protocol heuristic should not be tripped, and there
// should not be non-space content after a '?' or '#', so as long as
// we only %-encode RFC 3986 reserved characters we are ok.
// In (4), we should URL escape for URL attributes, and for others we
// have the attribute name available if our conservative assumption
// proves problematic for real code.
k := 0
for {
i := k + bytes.IndexAny(s[k:], `("'/`)
if i < k {
return c, len(s)
}
switch s[i] {
case '(':
// Look for url to the left.
p := bytes.TrimRight(s[:i], "\t\n\f\r ")
if endsWithCSSKeyword(p, "url") {
j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
switch {
case j != len(s) && s[j] == '"':
c.state, j = stateCSSDqURL, j+1
case j != len(s) && s[j] == '\'':
c.state, j = stateCSSSqURL, j+1
default:
c.state = stateCSSURL
}
return c, j
}
case '/':
if i+1 < len(s) {
switch s[i+1] {
case '/':
c.state = stateCSSLineCmt
return c, i + 2
case '*':
c.state = stateCSSBlockCmt
return c, i + 2
}
}
case '"':
c.state = stateCSSDqStr
return c, i + 1
case '\'':
c.state = stateCSSSqStr
return c, i + 1
}
k = i + 1
}
}
// tCSSStr is the context transition function for the CSS string and URL states.
func tCSSStr(c context, s []byte) (context, int) {
var endAndEsc string
switch c.state {
case stateCSSDqStr, stateCSSDqURL:
endAndEsc = `\"`
case stateCSSSqStr, stateCSSSqURL:
endAndEsc = `\'`
case stateCSSURL:
// Unquoted URLs end with a newline or close parenthesis.
// The below includes the wc (whitespace character) and nl.
endAndEsc = "\\\t\n\f\r )"
default:
panic(c.state.String())
}
k := 0
for {
i := k + bytes.IndexAny(s[k:], endAndEsc)
if i < k {
c, nread := tURL(c, css.DecodeCSS(s[k:]))
return c, k + nread
}
if s[i] == '\\' {
i++
if i == len(s) {
return context{
state: stateError,
err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
}, len(s)
}
} else {
c.state = stateCSS
return c, i + 1
}
c, _ = tURL(c, css.DecodeCSS(s[:i+1]))
k = i + 1
}
}
// tError is the context transition function for the error state.
func tError(c context, s []byte) (context, int) {
return c, len(s)
}
// eatAttrName returns the largest j such that s[i:j] is an attribute name.
// It returns an error if s[i:] does not look like it begins with an
// attribute name, such as encountering a quote mark without a preceding
// equals sign.
func eatAttrName(s []byte, i int) (int, *Error) {
for j := i; j < len(s); j++ {
switch s[j] {
case ' ', '\t', '\n', '\f', '\r', '=', '>':
return j, nil
case '\'', '"', '<':
// These result in a parse warning in HTML5 and are
// indicative of serious problems if seen in an attr
// name in a template.
return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
default:
// No-op.
}
}
return len(s), nil
}
var elementNameMap = map[string]element{
"script": elementScript,
"style": elementStyle,
"textarea": elementTextarea,
"title": elementTitle,
}
// asciiAlpha reports whether c is an ASCII letter.
func asciiAlpha(c byte) bool {
return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
}
// asciiAlphaNum reports whether c is an ASCII letter or digit.
func asciiAlphaNum(c byte) bool {
return asciiAlpha(c) || '0' <= c && c <= '9'
}
// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
func eatTagName(s []byte, i int) (int, element) {
if i == len(s) || !asciiAlpha(s[i]) {
return i, elementNone
}
j := i + 1
for j < len(s) {
x := s[j]
if asciiAlphaNum(x) {
j++
continue
}
// Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
j += 2
continue
}
break
}
return j, elementNameMap[strings.ToLower(string(s[i:j]))]
}
// eatWhiteSpace returns the largest j such that s[i:j] is white space.
func eatWhiteSpace(s []byte, i int) int {
for j := i; j < len(s); j++ {
switch s[j] {
case ' ', '\t', '\n', '\f', '\r':
// No-op.
default:
return j
}
}
return len(s)
}
// Code generated by "stringer -type urlPart"; DO NOT EDIT.
package htmlescape
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[urlPartNone-0]
_ = x[urlPartPreQuery-1]
_ = x[urlPartQueryOrFrag-2]
_ = x[urlPartUnknown-3]
}
const _urlPart_name = "urlPartNoneurlPartPreQueryurlPartQueryOrFragurlPartUnknown"
var _urlPart_index = [...]uint8{0, 11, 26, 44, 58}
func (i urlPart) String() string {
if i >= urlPart(len(_urlPart_index)-1) {
return "urlPart(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _urlPart_name[_urlPart_index[i]:_urlPart_index[i+1]]
}
package parsecond
import (
"git.sr.ht/~rj/gemplate/parse"
)
func HasDot(node parse.Node) bool {
if node == nil {
return false
}
switch node.Type() {
case parse.NodeDot:
return true
case parse.NodeAction:
return HasDot(node.(*parse.ActionNode).Pipe)
case parse.NodeChain:
return HasDot(node.(*parse.ChainNode).Node)
case parse.NodeCommand:
for _, v := range node.(*parse.CommandNode).Args {
if HasDot(v) {
return true
}
}
return false
case parse.NodeField:
return true
case parse.NodeIf:
wn := node.(*parse.IfNode)
return (wn.Pipe != nil && HasDot(wn.Pipe)) ||
(wn.List != nil && HasDot(wn.List)) ||
(wn.ElseList != nil && HasDot(wn.ElseList))
case parse.NodeList:
for _, v := range node.(*parse.ListNode).Nodes {
if HasDot(v) {
return true
}
}
return false
case parse.NodePipe:
for _, v := range node.(*parse.PipeNode).Cmds {
if HasDot(v) {
return true
}
}
return false
case parse.NodeRange:
wn := node.(*parse.RangeNode)
return (wn.Pipe != nil && HasDot(wn.Pipe)) ||
(wn.List != nil && HasDot(wn.List)) ||
(wn.ElseList != nil && HasDot(wn.ElseList))
case parse.NodeWith:
wn := node.(*parse.WithNode)
return (wn.Pipe != nil && HasDot(wn.Pipe)) ||
(wn.List != nil && HasDot(wn.List)) ||
(wn.ElseList != nil && HasDot(wn.ElseList))
default:
return false
}
}
func HasDollar(node parse.Node) bool {
if node == nil {
return false
}
switch node.Type() {
case parse.NodeAction:
return HasDollar(node.(*parse.ActionNode).Pipe)
case parse.NodeChain:
return HasDollar(node.(*parse.ChainNode).Node)
case parse.NodeCommand:
for _, v := range node.(*parse.CommandNode).Args {
if HasDollar(v) {
return true
}
}
return false
case parse.NodeIf:
wn := node.(*parse.IfNode)
return (wn.Pipe != nil && HasDollar(wn.Pipe)) ||
(wn.List != nil && HasDollar(wn.List)) ||
(wn.ElseList != nil && HasDollar(wn.ElseList))
case parse.NodeList:
for _, v := range node.(*parse.ListNode).Nodes {
if HasDollar(v) {
return true
}
}
return false
case parse.NodePipe:
for _, v := range node.(*parse.PipeNode).Cmds {
if HasDollar(v) {
return true
}
}
return false
case parse.NodeRange:
wn := node.(*parse.RangeNode)
return (wn.Pipe != nil && HasDollar(wn.Pipe)) ||
(wn.List != nil && HasDollar(wn.List)) ||
(wn.ElseList != nil && HasDollar(wn.ElseList))
case parse.NodeVariable:
vn := node.(*parse.VariableNode)
return vn.Ident[0] == "$"
case parse.NodeWith:
wn := node.(*parse.WithNode)
return (wn.Pipe != nil && HasDollar(wn.Pipe)) ||
(wn.List != nil && HasDollar(wn.List)) ||
(wn.ElseList != nil && HasDollar(wn.ElseList))
default:
return false
}
}
package main
import (
"errors"
"fmt"
"go/ast"
"go/build"
"go/importer"
"go/parser"
"go/token"
"go/types"
"io"
"os"
"path/filepath"
"sort"
"git.sr.ht/~rj/flags"
"git.sr.ht/~rj/gemplate/internal/compile"
"git.sr.ht/~rj/gemplate/internal/htmlescape"
"git.sr.ht/~rj/gemplate/parse"
)
var (
version = "development"
)
func main() {
cmd := &Command{
In: []string{},
Out: "-",
Package: "",
}
flags.RunSingle(cmd,
flags.Name("gemplate"),
flags.Version(version),
)
}
type Command struct {
In []string `flags:"" description:"Input filename or filenames. Use - to read from standard input."`
Out string `flags:"-o,--output" description:"Output filename. Use - to write to standard output."`
Package string `flags:"-p,--package" description:"Package name. Only required if it cannot be inferred."`
Dot string `flags:"-d,--dot" description:"Static type for the file scope's template's cursor."`
HTML bool `flags:"-h,--html" description:"Compile templates that are safe against code injection."`
Verbose bool `flags:"-v,--verbose" description:"Verbose output." env:"VERBOSE"`
}
func (cmd *Command) Run() error {
// All of the templates must be in the same directory.
packageDir := filepath.Dir(cmd.In[0])
for _, v := range cmd.In {
if rel, err := filepath.Rel(packageDir, v); err != nil {
return err
} else if rel != filepath.Base(v) {
return fmt.Errorf("all input files must be in the same directory")
}
}
// Get type checker for the package
cmd.Info("info:", "package dir:", packageDir)
pkg, err := cmd.readPackage(packageDir)
if err != nil {
var typesError types.Error
if !errors.As(err, &typesError) {
return err
}
cmd.Info("warn:", err)
}
// Parse all templates.
tree, err := cmd.RunFilename(pkg, cmd.In[0])
if err != nil {
return err
}
for _, filename := range cmd.In[1:] {
tree2, err := cmd.RunFilename(pkg, filename)
if err != nil {
return err
}
// Copy templates info first tree
for k, v := range tree2 {
tree[k] = v
}
}
// Escape template for HTML if requested.
if cmd.HTML {
// Escaping the templates will generate new entries in the tree map, but
// only the original templates need to be escaped.
toplevel := make([]*parse.Tree, 0, len(tree))
for _, v := range tree {
toplevel = append(toplevel, v)
}
// Now, we can actually escape the templates.
for _, v := range toplevel {
_, err := htmlescape.Escape(tree, v)
if err != nil {
return err
}
// Gross hack
v.Mode |= 1 << 7
}
}
// Prepare file for writing
cmd.Info("info:", "output:", cmd.Out)
out, closer, err := open(cmd.Out)
if err != nil {
return err
}
defer closer()
if cmd.Package == "" {
if pkg == nil {
return fmt.Errorf("package name unknown")
}
cmd.Package = pkg.Name()
cmd.Info("info: using '", cmd.Package, "' as package name\n")
}
// Compile
fmt.Fprintf(out, "package %s\n\n", cmd.Package)
if cmd.HTML {
fmt.Fprint(out, "import(\n\t\"fmt\"\n\t\"io\"\n\t\"git.sr.ht/~rj/gemplate/htmlescape\"\n)\n\n")
} else {
fmt.Fprint(out, "import(\n\t\"fmt\"\n\t\"io\"\n)\n\n")
}
fmt.Fprint(out,
"// Suppress errors if package fmt is not used.\n",
"var (\n\t_ = fmt.Fprint\n)\n\n",
)
if cmd.HTML {
fmt.Fprint(out,
"// Suppress errors if package htmlescape is not used.\n",
"var (\n\t_ = htmlescape.EscapeHTML\n)\n\n",
)
}
// To have reproducible output, need to have a reproducible sorting of the
// templates.
templates := make([]*parse.Tree, 0, len(tree))
for _, v := range tree {
templates = append(templates, v)
}
sort.Slice(templates, func(i, j int) bool {
return templates[i].Name < templates[j].Name
})
// Should use errors.Join here, but it is not available in the standard
// library until Go version 1.20.
for _, v := range templates {
err := func() error {
if cmd.HTML {
if v.Mode&(1<<7) == 0 {
return compile.Compile(out, v, pkg, compile.Funcs, htmlescape.Funcs, compile.MakePackageFuncs(pkg))
} else {
_, err := fmt.Fprintf(out, "func %s(w io.Writer, dot %s) error {\nreturn %s(w, dot)\n}\n\n",
v.Name,
v.DotType,
v.Name+"_stateText",
)
return err
}
}
return compile.Compile(out, v, pkg, compile.Funcs, compile.MakePackageFuncs(pkg))
}()
if err != nil {
return err
}
}
return nil
}
func (cmd *Command) RunFilename(pkg *types.Package, filename string) (map[string]*parse.Tree, error) {
cmd.Info("info:", "parse template:", filename)
// Read and parse the template
data, err := readInput(filename)
if err != nil {
return nil, err
}
tree, err := parse.Parse(filename, string(data), "{{", "}}",
compile.MakeParseFuncs(compile.Funcs),
compile.MakeParseFuncs(compile.MakePackageFuncs(pkg)),
)
if err != nil {
return nil, err
}
// Parsing will create a template for the file-scope. Only keep this
// template if the user has requested it.
if cmd.Dot == "" {
delete(tree, filename)
} else {
name := guessTemplateName(filename)
if name == "" {
return nil, fmt.Errorf("could not create valid Go identifier from ")
}
node := tree[filename]
node.Name = name
node.DotType = cmd.Dot
}
return tree, nil
}
func (cmd *Command) Info(a ...interface{}) {
if !cmd.Verbose {
return
}
fmt.Fprintln(os.Stdout, a...)
}
func readInput(path string) ([]byte, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer file.Close()
return io.ReadAll(file)
}
func (cmd *Command) readPackage(path string) (*types.Package, error) {
buildPkg, err := build.ImportDir(path, 0)
if err != nil {
var target *build.NoGoError
if errors.As(err, &target) {
return nil, nil
}
return nil, err
}
fset := token.NewFileSet()
files := []*ast.File{}
for _, v := range buildPkg.GoFiles {
v = filepath.Join(path, v)
if rel, err := filepath.Rel(v, cmd.Out); err == nil && rel == "." {
continue
}
cmd.Info("info:", "parsing Go file:", v)
f, err := parser.ParseFile(fset, v, nil, parser.SkipObjectResolution|parser.AllErrors)
if err != nil {
return nil, err
}
files = append(files, f)
}
conf := types.Config{
IgnoreFuncBodies: true,
Importer: importer.Default(),
}
// Type-check the package.
return conf.Check("cmd/hello", fset, files, nil)
}
func open(path string) (*os.File, func(), error) {
if path == "-" {
return os.Stdout, func() {}, nil
}
file, err := os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
if err != nil {
return nil, func() {}, err
}
return file, func() { file.Close() }, nil
}
func guessTemplateName(name string) string {
name = filepath.Base(name)
for !token.IsIdentifier(name) {
name = name[:len(name)-1]
if name == "" {
return ""
}
}
return name
}
package parse
func ErrorContext(node Node) (string, string) {
if node == nil || node.tree() == nil {
return "?:?", ""
}
return node.tree().ErrorContext(node)
}
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package parse
import (
"fmt"
"strings"
"unicode"
"unicode/utf8"
)
// item represents a token or text string returned from the scanner.
type item struct {
typ itemType // The type of this item.
pos Pos // The starting position, in bytes, of this item in the input string.
val string // The value of this item.
line int // The line number at the start of this item.
}
func (i item) String() string {
switch {
case i.typ == itemEOF:
return "EOF"
case i.typ == itemError:
return i.val
case i.typ > itemKeyword:
return fmt.Sprintf("<%s>", i.val)
case len(i.val) > 10:
return fmt.Sprintf("%.10q...", i.val)
}
return fmt.Sprintf("%q", i.val)
}
// itemType identifies the type of lex items.
type itemType int
const (
itemError itemType = iota // error occurred; value is text of error
itemBool // boolean constant
itemChar // printable ASCII character; grab bag for comma etc.
itemCharConstant // character constant
itemComment // comment text
itemComplex // complex constant (1+2i); imaginary is just a number
itemAssign // equals ('=') introducing an assignment
itemDeclare // colon-equals (':=') introducing a declaration
itemEOF
itemField // alphanumeric identifier starting with '.'
itemIdentifier // alphanumeric identifier not starting with '.'
itemLeftDelim // left action delimiter
itemLeftParen // '(' inside action
itemNumber // simple number, including imaginary
itemPipe // pipe symbol
itemRawString // raw quoted string (includes quotes)
itemRightDelim // right action delimiter
itemRightParen // ')' inside action
itemSpace // run of spaces separating arguments
itemString // quoted string (includes quotes)
itemText // plain text
itemVariable // variable starting with '$', such as '$' or '$1' or '$hello'
// Keywords appear after all the rest.
itemKeyword // used only to delimit the keywords
itemBlock // block keyword
itemBreak // break keyword
itemContinue // continue keyword
itemDot // the cursor, spelled '.'
itemDefine // define keyword
itemElse // else keyword
itemEnd // end keyword
itemIf // if keyword
itemNil // the untyped nil constant, easiest to treat as a keyword
itemRange // range keyword
itemTemplate // template keyword
itemWith // with keyword
)
var key = map[string]itemType{
".": itemDot,
"block": itemBlock,
"break": itemBreak,
"continue": itemContinue,
"define": itemDefine,
"else": itemElse,
"end": itemEnd,
"if": itemIf,
"range": itemRange,
"nil": itemNil,
"template": itemTemplate,
"with": itemWith,
}
const eof = -1
// Trimming spaces.
// If the action begins "{{- " rather than "{{", then all space/tab/newlines
// preceding the action are trimmed; conversely if it ends " -}}" the
// leading spaces are trimmed. This is done entirely in the lexer; the
// parser never sees it happen. We require an ASCII space (' ', \t, \r, \n)
// to be present to avoid ambiguity with things like "{{-3}}". It reads
// better with the space present anyway. For simplicity, only ASCII
// does the job.
const (
spaceChars = " \t\r\n" // These are the space characters defined by Go itself.
trimMarker = '-' // Attached to left/right delimiter, trims trailing spaces from preceding/following text.
trimMarkerLen = Pos(1 + 1) // marker plus space before or after
)
// stateFn represents the state of the scanner as a function that returns the next state.
type stateFn func(*lexer) stateFn
// lexer holds the state of the scanner.
type lexer struct {
name string // the name of the input; used only for error reports
input string // the string being scanned
leftDelim string // start of action marker
rightDelim string // end of action marker
pos Pos // current position in the input
start Pos // start position of this item
atEOF bool // we have hit the end of input and returned eof
parenDepth int // nesting depth of ( ) exprs
line int // 1+number of newlines seen
startLine int // start line of this item
item item // item to return to parser
insideAction bool // are we inside an action?
options lexOptions
}
// lexOptions control behavior of the lexer. All default to false.
type lexOptions struct {
emitComment bool // emit itemComment tokens.
breakOK bool // break keyword allowed
continueOK bool // continue keyword allowed
}
// next returns the next rune in the input.
func (l *lexer) next() rune {
if int(l.pos) >= len(l.input) {
l.atEOF = true
return eof
}
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
l.pos += Pos(w)
if r == '\n' {
l.line++
}
return r
}
// peek returns but does not consume the next rune in the input.
func (l *lexer) peek() rune {
r := l.next()
l.backup()
return r
}
// backup steps back one rune.
func (l *lexer) backup() {
if !l.atEOF && l.pos > 0 {
r, w := utf8.DecodeLastRuneInString(l.input[:l.pos])
l.pos -= Pos(w)
// Correct newline count.
if r == '\n' {
l.line--
}
}
}
// thisItem returns the item at the current input point with the specified type
// and advances the input.
func (l *lexer) thisItem(t itemType) item {
i := item{t, l.start, l.input[l.start:l.pos], l.startLine}
l.start = l.pos
l.startLine = l.line
return i
}
// emit passes the trailing text as an item back to the parser.
func (l *lexer) emit(t itemType) stateFn {
return l.emitItem(l.thisItem(t))
}
// emitItem passes the specified item to the parser.
func (l *lexer) emitItem(i item) stateFn {
l.item = i
return nil
}
// ignore skips over the pending input before this point.
// It tracks newlines in the ignored text, so use it only
// for text that is skipped without calling l.next.
func (l *lexer) ignore() {
l.line += strings.Count(l.input[l.start:l.pos], "\n")
l.start = l.pos
l.startLine = l.line
}
// accept consumes the next rune if it's from the valid set.
func (l *lexer) accept(valid string) bool {
if strings.ContainsRune(valid, l.next()) {
return true
}
l.backup()
return false
}
// acceptRun consumes a run of runes from the valid set.
func (l *lexer) acceptRun(valid string) {
for strings.ContainsRune(valid, l.next()) {
}
l.backup()
}
// errorf returns an error token and terminates the scan by passing
// back a nil pointer that will be the next state, terminating l.nextItem.
func (l *lexer) errorf(format string, args ...any) stateFn {
l.item = item{itemError, l.start, fmt.Sprintf(format, args...), l.startLine}
l.start = 0
l.pos = 0
l.input = l.input[:0]
return nil
}
// nextItem returns the next item from the input.
// Called by the parser, not in the lexing goroutine.
func (l *lexer) nextItem() item {
l.item = item{itemEOF, l.pos, "EOF", l.startLine}
state := lexText
if l.insideAction {
state = lexInsideAction
}
for {
state = state(l)
if state == nil {
return l.item
}
}
}
// lex creates a new scanner for the input string.
func lex(name, input, left, right string) *lexer {
if left == "" {
left = leftDelim
}
if right == "" {
right = rightDelim
}
l := &lexer{
name: name,
input: input,
leftDelim: left,
rightDelim: right,
line: 1,
startLine: 1,
insideAction: false,
}
return l
}
// state functions
const (
leftDelim = "{{"
rightDelim = "}}"
leftComment = "/*"
rightComment = "*/"
)
// lexText scans until an opening action delimiter, "{{".
func lexText(l *lexer) stateFn {
if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 {
if x > 0 {
l.pos += Pos(x)
// Do we trim any trailing space?
trimLength := Pos(0)
delimEnd := l.pos + Pos(len(l.leftDelim))
if hasLeftTrimMarker(l.input[delimEnd:]) {
trimLength = rightTrimLength(l.input[l.start:l.pos])
}
l.pos -= trimLength
l.line += strings.Count(l.input[l.start:l.pos], "\n")
i := l.thisItem(itemText)
l.pos += trimLength
l.ignore()
if len(i.val) > 0 {
return l.emitItem(i)
}
}
return lexLeftDelim
}
l.pos = Pos(len(l.input))
// Correctly reached EOF.
if l.pos > l.start {
l.line += strings.Count(l.input[l.start:l.pos], "\n")
return l.emit(itemText)
}
return l.emit(itemEOF)
}
// rightTrimLength returns the length of the spaces at the end of the string.
func rightTrimLength(s string) Pos {
return Pos(len(s) - len(strings.TrimRight(s, spaceChars)))
}
// atRightDelim reports whether the lexer is at a right delimiter, possibly preceded by a trim marker.
func (l *lexer) atRightDelim() (delim, trimSpaces bool) {
if hasRightTrimMarker(l.input[l.pos:]) && strings.HasPrefix(l.input[l.pos+trimMarkerLen:], l.rightDelim) { // With trim marker.
return true, true
}
if strings.HasPrefix(l.input[l.pos:], l.rightDelim) { // Without trim marker.
return true, false
}
return false, false
}
// leftTrimLength returns the length of the spaces at the beginning of the string.
func leftTrimLength(s string) Pos {
return Pos(len(s) - len(strings.TrimLeft(s, spaceChars)))
}
// lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker.
// (The text to be trimmed has already been emitted.)
func lexLeftDelim(l *lexer) stateFn {
l.pos += Pos(len(l.leftDelim))
trimSpace := hasLeftTrimMarker(l.input[l.pos:])
afterMarker := Pos(0)
if trimSpace {
afterMarker = trimMarkerLen
}
if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) {
l.pos += afterMarker
l.ignore()
return lexComment
}
i := l.thisItem(itemLeftDelim)
l.insideAction = true
l.pos += afterMarker
l.ignore()
l.parenDepth = 0
return l.emitItem(i)
}
// lexComment scans a comment. The left comment marker is known to be present.
func lexComment(l *lexer) stateFn {
l.pos += Pos(len(leftComment))
x := strings.Index(l.input[l.pos:], rightComment)
if x < 0 {
return l.errorf("unclosed comment")
}
l.pos += Pos(x + len(rightComment))
delim, trimSpace := l.atRightDelim()
if !delim {
return l.errorf("comment ends before closing delimiter")
}
l.line += strings.Count(l.input[l.start:l.pos], "\n")
i := l.thisItem(itemComment)
if trimSpace {
l.pos += trimMarkerLen
}
l.pos += Pos(len(l.rightDelim))
if trimSpace {
l.pos += leftTrimLength(l.input[l.pos:])
}
l.ignore()
if l.options.emitComment {
return l.emitItem(i)
}
return lexText
}
// lexRightDelim scans the right delimiter, which is known to be present, possibly with a trim marker.
func lexRightDelim(l *lexer) stateFn {
_, trimSpace := l.atRightDelim()
if trimSpace {
l.pos += trimMarkerLen
l.ignore()
}
l.pos += Pos(len(l.rightDelim))
i := l.thisItem(itemRightDelim)
if trimSpace {
l.pos += leftTrimLength(l.input[l.pos:])
l.ignore()
}
l.insideAction = false
return l.emitItem(i)
}
// lexInsideAction scans the elements inside action delimiters.
func lexInsideAction(l *lexer) stateFn {
// Either number, quoted string, or identifier.
// Spaces separate arguments; runs of spaces turn into itemSpace.
// Pipe symbols separate and are emitted.
delim, _ := l.atRightDelim()
if delim {
if l.parenDepth == 0 {
return lexRightDelim
}
return l.errorf("unclosed left paren")
}
switch r := l.next(); {
case r == eof:
return l.errorf("unclosed action")
case isSpace(r):
l.backup() // Put space back in case we have " -}}".
return lexSpace
case r == '=':
return l.emit(itemAssign)
case r == ':':
if l.next() != '=' {
return l.errorf("expected :=")
}
return l.emit(itemDeclare)
case r == '|':
return l.emit(itemPipe)
case r == '"':
return lexQuote
case r == '`':
return lexRawQuote
case r == '$':
return lexVariable
case r == '\'':
return lexChar
case r == '.':
// special look-ahead for ".field" so we don't break l.backup().
if l.pos < Pos(len(l.input)) {
r := l.input[l.pos]
if r < '0' || '9' < r {
return lexField
}
}
fallthrough // '.' can start a number.
case r == '+' || r == '-' || ('0' <= r && r <= '9'):
l.backup()
return lexNumber
case isAlphaNumeric(r):
l.backup()
return lexIdentifier
case r == '(':
l.parenDepth++
return l.emit(itemLeftParen)
case r == ')':
l.parenDepth--
if l.parenDepth < 0 {
return l.errorf("unexpected right paren")
}
return l.emit(itemRightParen)
case r <= unicode.MaxASCII && unicode.IsPrint(r):
return l.emit(itemChar)
default:
return l.errorf("unrecognized character in action: %#U", r)
}
}
// lexSpace scans a run of space characters.
// We have not consumed the first space, which is known to be present.
// Take care if there is a trim-marked right delimiter, which starts with a space.
func lexSpace(l *lexer) stateFn {
var r rune
var numSpaces int
for {
r = l.peek()
if !isSpace(r) {
break
}
l.next()
numSpaces++
}
// Be careful about a trim-marked closing delimiter, which has a minus
// after a space. We know there is a space, so check for the '-' that might follow.
if hasRightTrimMarker(l.input[l.pos-1:]) && strings.HasPrefix(l.input[l.pos-1+trimMarkerLen:], l.rightDelim) {
l.backup() // Before the space.
if numSpaces == 1 {
return lexRightDelim // On the delim, so go right to that.
}
}
return l.emit(itemSpace)
}
// lexIdentifier scans an alphanumeric.
func lexIdentifier(l *lexer) stateFn {
for {
switch r := l.next(); {
case isAlphaNumeric(r):
// absorb.
default:
l.backup()
word := l.input[l.start:l.pos]
if !l.atTerminator() {
return l.errorf("bad character %#U", r)
}
switch {
case key[word] > itemKeyword:
item := key[word]
if item == itemBreak && !l.options.breakOK || item == itemContinue && !l.options.continueOK {
return l.emit(itemIdentifier)
}
return l.emit(item)
case word[0] == '.':
return l.emit(itemField)
case word == "true", word == "false":
return l.emit(itemBool)
default:
return l.emit(itemIdentifier)
}
}
}
}
// lexField scans a field: .Alphanumeric.
// The . has been scanned.
func lexField(l *lexer) stateFn {
return lexFieldOrVariable(l, itemField)
}
// lexVariable scans a Variable: $Alphanumeric.
// The $ has been scanned.
func lexVariable(l *lexer) stateFn {
if l.atTerminator() { // Nothing interesting follows -> "$".
return l.emit(itemVariable)
}
return lexFieldOrVariable(l, itemVariable)
}
// lexFieldOrVariable scans a field or variable: [.$]Alphanumeric.
// The . or $ has been scanned.
func lexFieldOrVariable(l *lexer, typ itemType) stateFn {
if l.atTerminator() { // Nothing interesting follows -> "." or "$".
if typ == itemVariable {
return l.emit(itemVariable)
}
return l.emit(itemDot)
}
var r rune
for {
r = l.next()
if !isAlphaNumeric(r) {
l.backup()
break
}
}
if !l.atTerminator() {
return l.errorf("bad character %#U", r)
}
return l.emit(typ)
}
// atTerminator reports whether the input is at valid termination character to
// appear after an identifier. Breaks .X.Y into two pieces. Also catches cases
// like "$x+2" not being acceptable without a space, in case we decide one
// day to implement arithmetic.
func (l *lexer) atTerminator() bool {
r := l.peek()
if isSpace(r) {
return true
}
switch r {
case eof, '.', ',', '|', ':', ')', '(':
return true
}
return strings.HasPrefix(l.input[l.pos:], l.rightDelim)
}
// lexChar scans a character constant. The initial quote is already
// scanned. Syntax checking is done by the parser.
func lexChar(l *lexer) stateFn {
Loop:
for {
switch l.next() {
case '\\':
if r := l.next(); r != eof && r != '\n' {
break
}
fallthrough
case eof, '\n':
return l.errorf("unterminated character constant")
case '\'':
break Loop
}
}
return l.emit(itemCharConstant)
}
// lexNumber scans a number: decimal, octal, hex, float, or imaginary. This
// isn't a perfect number scanner - for instance it accepts "." and "0x0.2"
// and "089" - but when it's wrong the input is invalid and the parser (via
// strconv) will notice.
func lexNumber(l *lexer) stateFn {
if !l.scanNumber() {
return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
}
if sign := l.peek(); sign == '+' || sign == '-' {
// Complex: 1+2i. No spaces, must end in 'i'.
if !l.scanNumber() || l.input[l.pos-1] != 'i' {
return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
}
return l.emit(itemComplex)
}
return l.emit(itemNumber)
}
func (l *lexer) scanNumber() bool {
// Optional leading sign.
l.accept("+-")
// Is it hex?
digits := "0123456789_"
if l.accept("0") {
// Note: Leading 0 does not mean octal in floats.
if l.accept("xX") {
digits = "0123456789abcdefABCDEF_"
} else if l.accept("oO") {
digits = "01234567_"
} else if l.accept("bB") {
digits = "01_"
}
}
l.acceptRun(digits)
if l.accept(".") {
l.acceptRun(digits)
}
if len(digits) == 10+1 && l.accept("eE") {
l.accept("+-")
l.acceptRun("0123456789_")
}
if len(digits) == 16+6+1 && l.accept("pP") {
l.accept("+-")
l.acceptRun("0123456789_")
}
// Is it imaginary?
l.accept("i")
// Next thing mustn't be alphanumeric.
if isAlphaNumeric(l.peek()) {
l.next()
return false
}
return true
}
// lexQuote scans a quoted string.
func lexQuote(l *lexer) stateFn {
Loop:
for {
switch l.next() {
case '\\':
if r := l.next(); r != eof && r != '\n' {
break
}
fallthrough
case eof, '\n':
return l.errorf("unterminated quoted string")
case '"':
break Loop
}
}
return l.emit(itemString)
}
// lexRawQuote scans a raw quoted string.
func lexRawQuote(l *lexer) stateFn {
Loop:
for {
switch l.next() {
case eof:
return l.errorf("unterminated raw quoted string")
case '`':
break Loop
}
}
return l.emit(itemRawString)
}
// isSpace reports whether r is a space character.
func isSpace(r rune) bool {
return r == ' ' || r == '\t' || r == '\r' || r == '\n'
}
// isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
func isAlphaNumeric(r rune) bool {
return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)
}
func hasLeftTrimMarker(s string) bool {
return len(s) >= 2 && s[0] == trimMarker && isSpace(rune(s[1]))
}
func hasRightTrimMarker(s string) bool {
return len(s) >= 2 && isSpace(rune(s[0])) && s[1] == trimMarker
}
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Parse nodes.
package parse
import (
"fmt"
"strconv"
"strings"
)
var textFormat = "%s" // Changed to "%q" in tests for better error messages.
// A Node is an element in the parse tree. The interface is trivial.
// The interface contains an unexported method so that only
// types local to this package can satisfy it.
type Node interface {
Type() NodeType
String() string
// Copy does a deep copy of the Node and all its components.
// To avoid type assertions, some XxxNodes also have specialized
// CopyXxx methods that return *XxxNode.
Copy() Node
Position() Pos // byte position of start of node in full original input string
// tree returns the containing *Tree.
// It is unexported so all implementations of Node are in this package.
tree() *Tree
// writeTo writes the String output to the builder.
writeTo(*strings.Builder)
}
// NodeType identifies the type of a parse tree node.
type NodeType int
// Pos represents a byte position in the original input text from which
// this template was parsed.
type Pos int
func (p Pos) Position() Pos {
return p
}
// Type returns itself and provides an easy default implementation
// for embedding in a Node. Embedded in all non-trivial Nodes.
func (t NodeType) Type() NodeType {
return t
}
const (
NodeText NodeType = iota // Plain text.
NodeAction // A non-control action such as a field evaluation.
NodeBool // A boolean constant.
NodeChain // A sequence of field accesses.
NodeCommand // An element of a pipeline.
NodeDot // The cursor, dot.
nodeElse // An else action. Not added to tree.
nodeEnd // An end action. Not added to tree.
NodeField // A field or method name.
NodeIdentifier // An identifier; always a function name.
NodeIf // An if action.
NodeList // A list of Nodes.
NodeNil // An untyped nil constant.
NodeNumber // A numerical constant.
NodePipe // A pipeline of commands.
NodeRange // A range action.
NodeString // A string constant.
NodeTemplate // A template invocation action.
NodeVariable // A $ variable.
NodeWith // A with action.
NodeComment // A comment.
NodeBreak // A break action.
NodeContinue // A continue action.
)
// Nodes.
// ListNode holds a sequence of nodes.
type ListNode struct {
NodeType
Pos
tr *Tree
Nodes []Node // The element nodes in lexical order.
}
func (t *Tree) newList(pos Pos) *ListNode {
return &ListNode{tr: t, NodeType: NodeList, Pos: pos}
}
func (l *ListNode) append(n Node) {
l.Nodes = append(l.Nodes, n)
}
func (l *ListNode) tree() *Tree {
return l.tr
}
func (l *ListNode) String() string {
var sb strings.Builder
l.writeTo(&sb)
return sb.String()
}
func (l *ListNode) writeTo(sb *strings.Builder) {
for _, n := range l.Nodes {
n.writeTo(sb)
}
}
func (l *ListNode) CopyList() *ListNode {
if l == nil {
return l
}
n := l.tr.newList(l.Pos)
for _, elem := range l.Nodes {
n.append(elem.Copy())
}
return n
}
func (l *ListNode) Copy() Node {
return l.CopyList()
}
// TextNode holds plain text.
type TextNode struct {
NodeType
Pos
tr *Tree
Text []byte // The text; may span newlines.
}
func (t *Tree) newText(pos Pos, text string) *TextNode {
return &TextNode{tr: t, NodeType: NodeText, Pos: pos, Text: []byte(text)}
}
func (t *TextNode) String() string {
return fmt.Sprintf(textFormat, t.Text)
}
func (t *TextNode) writeTo(sb *strings.Builder) {
sb.WriteString(t.String())
}
func (t *TextNode) tree() *Tree {
return t.tr
}
func (t *TextNode) Copy() Node {
return &TextNode{tr: t.tr, NodeType: NodeText, Pos: t.Pos, Text: append([]byte{}, t.Text...)}
}
// CommentNode holds a comment.
type CommentNode struct {
NodeType
Pos
tr *Tree
Text string // Comment text.
}
func (t *Tree) newComment(pos Pos, text string) *CommentNode {
return &CommentNode{tr: t, NodeType: NodeComment, Pos: pos, Text: text}
}
func (c *CommentNode) String() string {
var sb strings.Builder
c.writeTo(&sb)
return sb.String()
}
func (c *CommentNode) writeTo(sb *strings.Builder) {
sb.WriteString("{{")
sb.WriteString(c.Text)
sb.WriteString("}}")
}
func (c *CommentNode) tree() *Tree {
return c.tr
}
func (c *CommentNode) Copy() Node {
return &CommentNode{tr: c.tr, NodeType: NodeComment, Pos: c.Pos, Text: c.Text}
}
// PipeNode holds a pipeline with optional declaration.
type PipeNode struct {
NodeType
Pos
tr *Tree
Line int // The line number in the input. Deprecated: Kept for compatibility.
IsAssign bool // The variables are being assigned, not declared.
Decl []*VariableNode // Variables in lexical order.
Cmds []*CommandNode // The commands in lexical order.
}
func (t *Tree) newPipeline(pos Pos, line int, vars []*VariableNode) *PipeNode {
return &PipeNode{tr: t, NodeType: NodePipe, Pos: pos, Line: line, Decl: vars}
}
func (p *PipeNode) append(command *CommandNode) {
p.Cmds = append(p.Cmds, command)
}
func (p *PipeNode) String() string {
var sb strings.Builder
p.writeTo(&sb)
return sb.String()
}
func (p *PipeNode) writeTo(sb *strings.Builder) {
if len(p.Decl) > 0 {
for i, v := range p.Decl {
if i > 0 {
sb.WriteString(", ")
}
v.writeTo(sb)
}
if p.IsAssign {
sb.WriteString(" = ")
} else {
sb.WriteString(" := ")
}
}
for i, c := range p.Cmds {
if i > 0 {
sb.WriteString(" | ")
}
c.writeTo(sb)
}
}
func (p *PipeNode) tree() *Tree {
return p.tr
}
func (p *PipeNode) CopyPipe() *PipeNode {
if p == nil {
return p
}
vars := make([]*VariableNode, len(p.Decl))
for i, d := range p.Decl {
vars[i] = d.Copy().(*VariableNode)
}
n := p.tr.newPipeline(p.Pos, p.Line, vars)
n.IsAssign = p.IsAssign
for _, c := range p.Cmds {
n.append(c.Copy().(*CommandNode))
}
return n
}
func (p *PipeNode) Copy() Node {
return p.CopyPipe()
}
// ActionNode holds an action (something bounded by delimiters).
// Control actions have their own nodes; ActionNode represents simple
// ones such as field evaluations and parenthesized pipelines.
type ActionNode struct {
NodeType
Pos
tr *Tree
Line int // The line number in the input. Deprecated: Kept for compatibility.
Pipe *PipeNode // The pipeline in the action.
}
func (t *Tree) newAction(pos Pos, line int, pipe *PipeNode) *ActionNode {
return &ActionNode{tr: t, NodeType: NodeAction, Pos: pos, Line: line, Pipe: pipe}
}
func (a *ActionNode) String() string {
var sb strings.Builder
a.writeTo(&sb)
return sb.String()
}
func (a *ActionNode) writeTo(sb *strings.Builder) {
sb.WriteString("{{")
a.Pipe.writeTo(sb)
sb.WriteString("}}")
}
func (a *ActionNode) tree() *Tree {
return a.tr
}
func (a *ActionNode) Copy() Node {
return a.tr.newAction(a.Pos, a.Line, a.Pipe.CopyPipe())
}
// CommandNode holds a command (a pipeline inside an evaluating action).
type CommandNode struct {
NodeType
Pos
tr *Tree
Args []Node // Arguments in lexical order: Identifier, field, or constant.
}
func (t *Tree) newCommand(pos Pos) *CommandNode {
return &CommandNode{tr: t, NodeType: NodeCommand, Pos: pos}
}
func (c *CommandNode) append(arg Node) {
c.Args = append(c.Args, arg)
}
func (c *CommandNode) String() string {
var sb strings.Builder
c.writeTo(&sb)
return sb.String()
}
func (c *CommandNode) writeTo(sb *strings.Builder) {
for i, arg := range c.Args {
if i > 0 {
sb.WriteByte(' ')
}
if arg, ok := arg.(*PipeNode); ok {
sb.WriteByte('(')
arg.writeTo(sb)
sb.WriteByte(')')
continue
}
arg.writeTo(sb)
}
}
func (c *CommandNode) tree() *Tree {
return c.tr
}
func (c *CommandNode) Copy() Node {
if c == nil {
return c
}
n := c.tr.newCommand(c.Pos)
for _, c := range c.Args {
n.append(c.Copy())
}
return n
}
// IdentifierNode holds an identifier.
type IdentifierNode struct {
NodeType
Pos
tr *Tree
Ident string // The identifier's name.
}
// NewIdentifier returns a new [IdentifierNode] with the given identifier name.
func NewIdentifier(ident string) *IdentifierNode {
return &IdentifierNode{NodeType: NodeIdentifier, Ident: ident}
}
// SetPos sets the position. [NewIdentifier] is a public method so we can't modify its signature.
// Chained for convenience.
// TODO: fix one day?
func (i *IdentifierNode) SetPos(pos Pos) *IdentifierNode {
i.Pos = pos
return i
}
// SetTree sets the parent tree for the node. [NewIdentifier] is a public method so we can't modify its signature.
// Chained for convenience.
// TODO: fix one day?
func (i *IdentifierNode) SetTree(t *Tree) *IdentifierNode {
i.tr = t
return i
}
func (i *IdentifierNode) String() string {
return i.Ident
}
func (i *IdentifierNode) writeTo(sb *strings.Builder) {
sb.WriteString(i.String())
}
func (i *IdentifierNode) tree() *Tree {
return i.tr
}
func (i *IdentifierNode) Copy() Node {
return NewIdentifier(i.Ident).SetTree(i.tr).SetPos(i.Pos)
}
// VariableNode holds a list of variable names, possibly with chained field
// accesses. The dollar sign is part of the (first) name.
type VariableNode struct {
NodeType
Pos
tr *Tree
Ident []string // Variable name and fields in lexical order.
}
func (t *Tree) newVariable(pos Pos, ident string) *VariableNode {
return &VariableNode{tr: t, NodeType: NodeVariable, Pos: pos, Ident: strings.Split(ident, ".")}
}
func (v *VariableNode) String() string {
var sb strings.Builder
v.writeTo(&sb)
return sb.String()
}
func (v *VariableNode) writeTo(sb *strings.Builder) {
for i, id := range v.Ident {
if i > 0 {
sb.WriteByte('.')
}
sb.WriteString(id)
}
}
func (v *VariableNode) tree() *Tree {
return v.tr
}
func (v *VariableNode) Copy() Node {
return &VariableNode{tr: v.tr, NodeType: NodeVariable, Pos: v.Pos, Ident: append([]string{}, v.Ident...)}
}
// DotNode holds the special identifier '.'.
type DotNode struct {
NodeType
Pos
tr *Tree
}
func (t *Tree) newDot(pos Pos) *DotNode {
return &DotNode{tr: t, NodeType: NodeDot, Pos: pos}
}
func (d *DotNode) Type() NodeType {
// Override method on embedded NodeType for API compatibility.
// TODO: Not really a problem; could change API without effect but
// api tool complains.
return NodeDot
}
func (d *DotNode) String() string {
return "."
}
func (d *DotNode) writeTo(sb *strings.Builder) {
sb.WriteString(d.String())
}
func (d *DotNode) tree() *Tree {
return d.tr
}
func (d *DotNode) Copy() Node {
return d.tr.newDot(d.Pos)
}
// NilNode holds the special identifier 'nil' representing an untyped nil constant.
type NilNode struct {
NodeType
Pos
tr *Tree
}
func (t *Tree) newNil(pos Pos) *NilNode {
return &NilNode{tr: t, NodeType: NodeNil, Pos: pos}
}
func (n *NilNode) Type() NodeType {
// Override method on embedded NodeType for API compatibility.
// TODO: Not really a problem; could change API without effect but
// api tool complains.
return NodeNil
}
func (n *NilNode) String() string {
return "nil"
}
func (n *NilNode) writeTo(sb *strings.Builder) {
sb.WriteString(n.String())
}
func (n *NilNode) tree() *Tree {
return n.tr
}
func (n *NilNode) Copy() Node {
return n.tr.newNil(n.Pos)
}
// FieldNode holds a field (identifier starting with '.').
// The names may be chained ('.x.y').
// The period is dropped from each ident.
type FieldNode struct {
NodeType
Pos
tr *Tree
Ident []string // The identifiers in lexical order.
}
func (t *Tree) newField(pos Pos, ident string) *FieldNode {
return &FieldNode{tr: t, NodeType: NodeField, Pos: pos, Ident: strings.Split(ident[1:], ".")} // [1:] to drop leading period
}
func (f *FieldNode) String() string {
var sb strings.Builder
f.writeTo(&sb)
return sb.String()
}
func (f *FieldNode) writeTo(sb *strings.Builder) {
for _, id := range f.Ident {
sb.WriteByte('.')
sb.WriteString(id)
}
}
func (f *FieldNode) tree() *Tree {
return f.tr
}
func (f *FieldNode) Copy() Node {
return &FieldNode{tr: f.tr, NodeType: NodeField, Pos: f.Pos, Ident: append([]string{}, f.Ident...)}
}
// ChainNode holds a term followed by a chain of field accesses (identifier starting with '.').
// The names may be chained ('.x.y').
// The periods are dropped from each ident.
type ChainNode struct {
NodeType
Pos
tr *Tree
Node Node
Field []string // The identifiers in lexical order.
}
func (t *Tree) newChain(pos Pos, node Node) *ChainNode {
return &ChainNode{tr: t, NodeType: NodeChain, Pos: pos, Node: node}
}
// Add adds the named field (which should start with a period) to the end of the chain.
func (c *ChainNode) Add(field string) {
if len(field) == 0 || field[0] != '.' {
panic("no dot in field")
}
field = field[1:] // Remove leading dot.
if field == "" {
panic("empty field")
}
c.Field = append(c.Field, field)
}
func (c *ChainNode) String() string {
var sb strings.Builder
c.writeTo(&sb)
return sb.String()
}
func (c *ChainNode) writeTo(sb *strings.Builder) {
if _, ok := c.Node.(*PipeNode); ok {
sb.WriteByte('(')
c.Node.writeTo(sb)
sb.WriteByte(')')
} else {
c.Node.writeTo(sb)
}
for _, field := range c.Field {
sb.WriteByte('.')
sb.WriteString(field)
}
}
func (c *ChainNode) tree() *Tree {
return c.tr
}
func (c *ChainNode) Copy() Node {
return &ChainNode{tr: c.tr, NodeType: NodeChain, Pos: c.Pos, Node: c.Node, Field: append([]string{}, c.Field...)}
}
// BoolNode holds a boolean constant.
type BoolNode struct {
NodeType
Pos
tr *Tree
True bool // The value of the boolean constant.
}
func (t *Tree) newBool(pos Pos, true bool) *BoolNode {
return &BoolNode{tr: t, NodeType: NodeBool, Pos: pos, True: true}
}
func (b *BoolNode) String() string {
if b.True {
return "true"
}
return "false"
}
func (b *BoolNode) writeTo(sb *strings.Builder) {
sb.WriteString(b.String())
}
func (b *BoolNode) tree() *Tree {
return b.tr
}
func (b *BoolNode) Copy() Node {
return b.tr.newBool(b.Pos, b.True)
}
// NumberNode holds a number: signed or unsigned integer, float, or complex.
// The value is parsed and stored under all the types that can represent the value.
// This simulates in a small amount of code the behavior of Go's ideal constants.
type NumberNode struct {
NodeType
Pos
tr *Tree
IsInt bool // Number has an integral value.
IsUint bool // Number has an unsigned integral value.
IsFloat bool // Number has a floating-point value.
IsComplex bool // Number is complex.
Int64 int64 // The signed integer value.
Uint64 uint64 // The unsigned integer value.
Float64 float64 // The floating-point value.
Complex128 complex128 // The complex value.
Text string // The original textual representation from the input.
}
func (t *Tree) newNumber(pos Pos, text string, typ itemType) (*NumberNode, error) {
n := &NumberNode{tr: t, NodeType: NodeNumber, Pos: pos, Text: text}
switch typ {
case itemCharConstant:
rune, _, tail, err := strconv.UnquoteChar(text[1:], text[0]) //nolint:revive
if err != nil {
return nil, err
}
if tail != "'" {
return nil, fmt.Errorf("malformed character constant: %s", text)
}
n.Int64 = int64(rune)
n.IsInt = true
n.Uint64 = uint64(rune)
n.IsUint = true
n.Float64 = float64(rune) // odd but those are the rules.
n.IsFloat = true
return n, nil
case itemComplex:
// fmt.Sscan can parse the pair, so let it do the work.
if _, err := fmt.Sscan(text, &n.Complex128); err != nil {
return nil, err
}
n.IsComplex = true
n.simplifyComplex()
return n, nil
}
// Imaginary constants can only be complex unless they are zero.
if len(text) > 0 && text[len(text)-1] == 'i' {
f, err := strconv.ParseFloat(text[:len(text)-1], 64)
if err == nil {
n.IsComplex = true
n.Complex128 = complex(0, f)
n.simplifyComplex()
return n, nil
}
}
// Do integer test first so we get 0x123 etc.
u, err := strconv.ParseUint(text, 0, 64) // will fail for -0; fixed below.
if err == nil {
n.IsUint = true
n.Uint64 = u
}
i, err := strconv.ParseInt(text, 0, 64)
if err == nil {
n.IsInt = true
n.Int64 = i
if i == 0 {
n.IsUint = true // in case of -0.
n.Uint64 = u
}
}
// If an integer extraction succeeded, promote the float.
if n.IsInt {
n.IsFloat = true
n.Float64 = float64(n.Int64)
} else if n.IsUint {
n.IsFloat = true
n.Float64 = float64(n.Uint64)
} else {
f, err := strconv.ParseFloat(text, 64)
if err == nil {
// If we parsed it as a float but it looks like an integer,
// it's a huge number too large to fit in an int. Reject it.
if !strings.ContainsAny(text, ".eEpP") {
return nil, fmt.Errorf("integer overflow: %q", text)
}
n.IsFloat = true
n.Float64 = f
// If a floating-point extraction succeeded, extract the int if needed.
if !n.IsInt && float64(int64(f)) == f {
n.IsInt = true
n.Int64 = int64(f)
}
if !n.IsUint && float64(uint64(f)) == f {
n.IsUint = true
n.Uint64 = uint64(f)
}
}
}
if !n.IsInt && !n.IsUint && !n.IsFloat {
return nil, fmt.Errorf("illegal number syntax: %q", text)
}
return n, nil
}
// simplifyComplex pulls out any other types that are represented by the complex number.
// These all require that the imaginary part be zero.
func (n *NumberNode) simplifyComplex() {
n.IsFloat = imag(n.Complex128) == 0
if n.IsFloat {
n.Float64 = real(n.Complex128)
n.IsInt = float64(int64(n.Float64)) == n.Float64
if n.IsInt {
n.Int64 = int64(n.Float64)
}
n.IsUint = float64(uint64(n.Float64)) == n.Float64
if n.IsUint {
n.Uint64 = uint64(n.Float64)
}
}
}
func (n *NumberNode) String() string {
return n.Text
}
func (n *NumberNode) writeTo(sb *strings.Builder) {
sb.WriteString(n.String())
}
func (n *NumberNode) tree() *Tree {
return n.tr
}
func (n *NumberNode) Copy() Node {
nn := new(NumberNode)
*nn = *n // Easy, fast, correct.
return nn
}
// StringNode holds a string constant. The value has been "unquoted".
type StringNode struct {
NodeType
Pos
tr *Tree
Quoted string // The original text of the string, with quotes.
Text string // The string, after quote processing.
}
func (t *Tree) newString(pos Pos, orig, text string) *StringNode {
return &StringNode{tr: t, NodeType: NodeString, Pos: pos, Quoted: orig, Text: text}
}
func (s *StringNode) String() string {
return s.Quoted
}
func (s *StringNode) writeTo(sb *strings.Builder) {
sb.WriteString(s.String())
}
func (s *StringNode) tree() *Tree {
return s.tr
}
func (s *StringNode) Copy() Node {
return s.tr.newString(s.Pos, s.Quoted, s.Text)
}
// endNode represents an {{end}} action.
// It does not appear in the final parse tree.
type endNode struct {
NodeType
Pos
tr *Tree
}
func (t *Tree) newEnd(pos Pos) *endNode {
return &endNode{tr: t, NodeType: nodeEnd, Pos: pos}
}
func (e *endNode) String() string {
return "{{end}}"
}
func (e *endNode) writeTo(sb *strings.Builder) {
sb.WriteString(e.String())
}
func (e *endNode) tree() *Tree {
return e.tr
}
func (e *endNode) Copy() Node {
return e.tr.newEnd(e.Pos)
}
// elseNode represents an {{else}} action. Does not appear in the final tree.
type elseNode struct {
NodeType
Pos
tr *Tree
Line int // The line number in the input. Deprecated: Kept for compatibility.
}
func (t *Tree) newElse(pos Pos, line int) *elseNode {
return &elseNode{tr: t, NodeType: nodeElse, Pos: pos, Line: line}
}
func (e *elseNode) Type() NodeType {
return nodeElse
}
func (e *elseNode) String() string {
return "{{else}}"
}
func (e *elseNode) writeTo(sb *strings.Builder) {
sb.WriteString(e.String())
}
func (e *elseNode) tree() *Tree {
return e.tr
}
func (e *elseNode) Copy() Node {
return e.tr.newElse(e.Pos, e.Line)
}
// BranchNode is the common representation of if, range, and with.
type BranchNode struct {
NodeType
Pos
tr *Tree
Line int // The line number in the input. Deprecated: Kept for compatibility.
Pipe *PipeNode // The pipeline to be evaluated.
List *ListNode // What to execute if the value is non-empty.
ElseList *ListNode // What to execute if the value is empty (nil if absent).
}
func (b *BranchNode) String() string {
var sb strings.Builder
b.writeTo(&sb)
return sb.String()
}
func (b *BranchNode) writeTo(sb *strings.Builder) {
name := ""
switch b.NodeType {
case NodeIf:
name = "if"
case NodeRange:
name = "range"
case NodeWith:
name = "with"
default:
panic("unknown branch type")
}
sb.WriteString("{{")
sb.WriteString(name)
sb.WriteByte(' ')
b.Pipe.writeTo(sb)
sb.WriteString("}}")
b.List.writeTo(sb)
if b.ElseList != nil {
sb.WriteString("{{else}}")
b.ElseList.writeTo(sb)
}
sb.WriteString("{{end}}")
}
func (b *BranchNode) tree() *Tree {
return b.tr
}
func (b *BranchNode) Copy() Node {
switch b.NodeType {
case NodeIf:
return b.tr.newIf(b.Pos, b.Line, b.Pipe, b.List, b.ElseList)
case NodeRange:
return b.tr.newRange(b.Pos, b.Line, b.Pipe, b.List, b.ElseList)
case NodeWith:
return b.tr.newWith(b.Pos, b.Line, b.Pipe, b.List, b.ElseList)
default:
panic("unknown branch type")
}
}
// IfNode represents an {{if}} action and its commands.
type IfNode struct {
BranchNode
}
func (t *Tree) newIf(pos Pos, line int, pipe *PipeNode, list, elseList *ListNode) *IfNode {
return &IfNode{BranchNode{tr: t, NodeType: NodeIf, Pos: pos, Line: line, Pipe: pipe, List: list, ElseList: elseList}}
}
func (i *IfNode) Copy() Node {
return i.tr.newIf(i.Pos, i.Line, i.Pipe.CopyPipe(), i.List.CopyList(), i.ElseList.CopyList())
}
// BreakNode represents a {{break}} action.
type BreakNode struct {
tr *Tree
NodeType
Pos
Line int
}
func (t *Tree) newBreak(pos Pos, line int) *BreakNode {
return &BreakNode{tr: t, NodeType: NodeBreak, Pos: pos, Line: line}
}
func (b *BreakNode) Copy() Node { return b.tr.newBreak(b.Pos, b.Line) }
func (b *BreakNode) String() string { return "{{break}}" }
func (b *BreakNode) tree() *Tree { return b.tr }
func (b *BreakNode) writeTo(sb *strings.Builder) { sb.WriteString("{{break}}") }
// ContinueNode represents a {{continue}} action.
type ContinueNode struct {
tr *Tree
NodeType
Pos
Line int
}
func (t *Tree) newContinue(pos Pos, line int) *ContinueNode {
return &ContinueNode{tr: t, NodeType: NodeContinue, Pos: pos, Line: line}
}
func (c *ContinueNode) Copy() Node { return c.tr.newContinue(c.Pos, c.Line) }
func (c *ContinueNode) String() string { return "{{continue}}" }
func (c *ContinueNode) tree() *Tree { return c.tr }
func (c *ContinueNode) writeTo(sb *strings.Builder) { sb.WriteString("{{continue}}") }
// RangeNode represents a {{range}} action and its commands.
type RangeNode struct {
BranchNode
}
func (t *Tree) newRange(pos Pos, line int, pipe *PipeNode, list, elseList *ListNode) *RangeNode {
return &RangeNode{BranchNode{tr: t, NodeType: NodeRange, Pos: pos, Line: line, Pipe: pipe, List: list, ElseList: elseList}}
}
func (r *RangeNode) Copy() Node {
return r.tr.newRange(r.Pos, r.Line, r.Pipe.CopyPipe(), r.List.CopyList(), r.ElseList.CopyList())
}
// WithNode represents a {{with}} action and its commands.
type WithNode struct {
BranchNode
}
func (t *Tree) newWith(pos Pos, line int, pipe *PipeNode, list, elseList *ListNode) *WithNode {
return &WithNode{BranchNode{tr: t, NodeType: NodeWith, Pos: pos, Line: line, Pipe: pipe, List: list, ElseList: elseList}}
}
func (w *WithNode) Copy() Node {
return w.tr.newWith(w.Pos, w.Line, w.Pipe.CopyPipe(), w.List.CopyList(), w.ElseList.CopyList())
}
// TemplateNode represents a {{template}} action.
type TemplateNode struct {
NodeType
Pos
tr *Tree
Line int // The line number in the input. Deprecated: Kept for compatibility.
Name string // The name of the template (unquoted).
Pipe *PipeNode // The command to evaluate as dot for the template.
}
func (t *Tree) newTemplate(pos Pos, line int, name string, pipe *PipeNode) *TemplateNode {
return &TemplateNode{tr: t, NodeType: NodeTemplate, Pos: pos, Line: line, Name: name, Pipe: pipe}
}
func (t *TemplateNode) String() string {
var sb strings.Builder
t.writeTo(&sb)
return sb.String()
}
func (t *TemplateNode) writeTo(sb *strings.Builder) {
sb.WriteString("{{template ")
sb.WriteString(strconv.Quote(t.Name))
if t.Pipe != nil {
sb.WriteByte(' ')
t.Pipe.writeTo(sb)
}
sb.WriteString("}}")
}
func (t *TemplateNode) tree() *Tree {
return t.tr
}
func (t *TemplateNode) Copy() Node {
return t.tr.newTemplate(t.Pos, t.Line, t.Name, t.Pipe.CopyPipe())
}
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package parse
import (
"bytes"
"fmt"
"runtime"
"strconv"
"strings"
)
// Tree is the representation of a single parsed template.
type Tree struct {
Name string // name of the template represented by the tree.
ParseName string // name of the top-level template during parsing, for error messages.
DotType string // static type for the dot
Root *ListNode // top-level root of the tree.
Mode Mode // parsing mode.
text string // text parsed to create the template (or its parent)
// Parsing only; cleared after parse.
funcs []map[string]struct{}
lex *lexer
token [3]item // three-token lookahead for parser.
peekCount int
vars []string // variables defined at the moment.
treeSet map[string]*Tree
actionLine int // line of left delim starting action
rangeDepth int
}
// A mode value is a set of flags (or 0). Modes control parser behavior.
type Mode uint
const (
ParseComments Mode = 1 << iota // parse comments and add them to AST
SkipFuncCheck // do not check that functions are defined
)
// Copy returns a copy of the [Tree]. Any parsing state is discarded.
func (t *Tree) Copy() *Tree {
if t == nil {
return nil
}
return &Tree{
Name: t.Name,
ParseName: t.ParseName,
DotType: t.DotType,
Root: t.Root.CopyList(),
text: t.text,
}
}
// Parse returns a map from template name to [Tree], created by parsing the
// templates described in the argument string. The top-level template will be
// given the specified name. If an error is encountered, parsing stops and an
// empty map is returned with the error.
func Parse(name, text, leftDelim, rightDelim string, funcs ...map[string]struct{}) (map[string]*Tree, error) {
treeSet := make(map[string]*Tree)
t := New(name)
t.text = text
_, err := t.Parse(text, leftDelim, rightDelim, treeSet, funcs...)
return treeSet, err
}
// next returns the next token.
func (t *Tree) next() item {
if t.peekCount > 0 {
t.peekCount--
} else {
t.token[0] = t.lex.nextItem()
}
return t.token[t.peekCount]
}
// backup backs the input stream up one token.
func (t *Tree) backup() {
t.peekCount++
}
// backup2 backs the input stream up two tokens.
// The zeroth token is already there.
func (t *Tree) backup2(t1 item) {
t.token[1] = t1
t.peekCount = 2
}
// backup3 backs the input stream up three tokens
// The zeroth token is already there.
func (t *Tree) backup3(t2, t1 item) { // Reverse order: we're pushing back.
t.token[1] = t1
t.token[2] = t2
t.peekCount = 3
}
// peek returns but does not consume the next token.
func (t *Tree) peek() item {
if t.peekCount > 0 {
return t.token[t.peekCount-1]
}
t.peekCount = 1
t.token[0] = t.lex.nextItem()
return t.token[0]
}
// nextNonSpace returns the next non-space token.
func (t *Tree) nextNonSpace() (token item) {
for {
token = t.next()
if token.typ != itemSpace {
break
}
}
return token
}
// peekNonSpace returns but does not consume the next non-space token.
func (t *Tree) peekNonSpace() item {
token := t.nextNonSpace()
t.backup()
return token
}
// Parsing.
// New allocates a new parse tree with the given name.
func New(name string, funcs ...map[string]struct{}) *Tree {
return &Tree{
Name: name,
funcs: funcs,
}
}
// ErrorContext returns a textual representation of the location of the node in the input text.
// The receiver is only used when the node does not have a pointer to the tree inside,
// which can occur in old code.
func (t *Tree) ErrorContext(n Node) (location, context string) {
pos := int(n.Position())
tree := n.tree()
if tree == nil {
tree = t
}
text := tree.text[:pos]
byteNum := strings.LastIndex(text, "\n")
if byteNum == -1 {
byteNum = pos // On first line.
} else {
byteNum++ // After the newline.
byteNum = pos - byteNum
}
lineNum := 1 + strings.Count(text, "\n")
context = n.String()
return fmt.Sprintf("%s:%d:%d", tree.ParseName, lineNum, byteNum), context
}
// errorf formats the error and terminates processing.
func (t *Tree) errorf(format string, args ...any) {
t.Root = nil
format = fmt.Sprintf("template: %s:%d: %s", t.ParseName, t.token[0].line, format)
panic(fmt.Errorf(format, args...))
}
// error terminates processing.
func (t *Tree) error(err error) {
t.errorf("%s", err)
}
// expect consumes the next token and guarantees it has the required type.
func (t *Tree) expect(expected itemType, context string) item {
token := t.nextNonSpace()
if token.typ != expected {
t.unexpected(token, context)
}
return token
}
// expectOneOf consumes the next token and guarantees it has one of the required types.
func (t *Tree) expectOneOf(expected1, expected2 itemType, context string) item {
token := t.nextNonSpace()
if token.typ != expected1 && token.typ != expected2 {
t.unexpected(token, context)
}
return token
}
// unexpected complains about the token and terminates processing.
func (t *Tree) unexpected(token item, context string) {
if token.typ == itemError {
extra := ""
if t.actionLine != 0 && t.actionLine != token.line {
extra = fmt.Sprintf(" in action started at %s:%d", t.ParseName, t.actionLine)
if strings.HasSuffix(token.val, " action") {
extra = extra[len(" in action"):] // avoid "action in action"
}
}
t.errorf("%s%s", token, extra)
}
t.errorf("unexpected %s in %s", token, context)
}
// recover is the handler that turns panics into returns from the top level of Parse.
func (t *Tree) recover(errp *error) {
e := recover()
if e != nil {
if _, ok := e.(runtime.Error); ok {
panic(e)
}
if t != nil {
t.stopParse()
}
*errp = e.(error)
}
}
// startParse initializes the parser, using the lexer.
func (t *Tree) startParse(funcs []map[string]struct{}, lex *lexer, treeSet map[string]*Tree) {
t.Root = nil
t.lex = lex
t.vars = []string{"$"}
t.funcs = funcs
t.treeSet = treeSet
lex.options = lexOptions{
emitComment: t.Mode&ParseComments != 0,
breakOK: !t.hasFunction("break"),
continueOK: !t.hasFunction("continue"),
}
}
// stopParse terminates parsing.
func (t *Tree) stopParse() {
t.lex = nil
t.vars = nil
t.funcs = nil
t.treeSet = nil
}
// Parse parses the template definition string to construct a representation of
// the template for execution. If either action delimiter string is empty, the
// default ("{{" or "}}") is used. Embedded template definitions are added to
// the treeSet map.
func (t *Tree) Parse(text, leftDelim, rightDelim string, treeSet map[string]*Tree, funcs ...map[string]struct{}) (tree *Tree, err error) {
defer t.recover(&err)
t.ParseName = t.Name
lexer := lex(t.Name, text, leftDelim, rightDelim)
t.startParse(funcs, lexer, treeSet)
t.text = text
t.parse()
t.add()
t.stopParse()
return t, nil
}
// add adds tree to t.treeSet.
func (t *Tree) add() {
tree := t.treeSet[t.Name]
if tree == nil || IsEmptyTree(tree.Root) {
t.treeSet[t.Name] = t
return
}
if !IsEmptyTree(t.Root) {
t.errorf("template: multiple definition of template %q", t.Name)
}
}
// IsEmptyTree reports whether this tree (node) is empty of everything but space or comments.
func IsEmptyTree(n Node) bool {
switch n := n.(type) {
case nil:
return true
case *ActionNode:
case *CommentNode:
return true
case *IfNode:
case *ListNode:
for _, node := range n.Nodes {
if !IsEmptyTree(node) {
return false
}
}
return true
case *RangeNode:
case *TemplateNode:
case *TextNode:
return len(bytes.TrimSpace(n.Text)) == 0
case *WithNode:
default:
panic("unknown node: " + n.String())
}
return false
}
// parse is the top-level parser for a template, essentially the same
// as itemList except it also parses {{define}} actions.
// It runs to EOF.
func (t *Tree) parse() {
t.Root = t.newList(t.peek().pos)
for t.peek().typ != itemEOF {
if t.peek().typ == itemLeftDelim {
delim := t.next()
if t.nextNonSpace().typ == itemDefine {
newT := New("definition") // name will be updated once we know it.
newT.text = t.text
newT.Mode = t.Mode
newT.ParseName = t.ParseName
newT.startParse(t.funcs, t.lex, t.treeSet)
newT.parseDefinition()
continue
}
t.backup2(delim)
}
switch n := t.textOrAction(); n.Type() {
case nodeEnd, nodeElse:
t.errorf("unexpected %s", n)
default:
t.Root.append(n)
}
}
}
// parseDefinition parses a {{define}} ... {{end}} template definition and
// installs the definition in t.treeSet. The "define" keyword has already
// been scanned.
func (t *Tree) parseDefinition() {
const context = "define clause"
name := t.expectOneOf(itemString, itemRawString, context)
var err error
t.Name, err = strconv.Unquote(name.val)
if err != nil {
t.error(err)
}
dottype := t.expectOneOf(itemString, itemRawString, context)
t.DotType, err = strconv.Unquote(dottype.val)
if err != nil {
t.error(err)
}
t.expect(itemRightDelim, context)
var end Node
t.Root, end = t.itemList()
if end.Type() != nodeEnd {
t.errorf("unexpected %s in %s", end, context)
}
t.add()
t.stopParse()
}
// itemList:
//
// textOrAction*
//
// Terminates at {{end}} or {{else}}, returned separately.
func (t *Tree) itemList() (list *ListNode, next Node) {
list = t.newList(t.peekNonSpace().pos)
for t.peekNonSpace().typ != itemEOF {
n := t.textOrAction()
switch n.Type() {
case nodeEnd, nodeElse:
return list, n
}
list.append(n)
}
t.errorf("unexpected EOF")
return
}
// textOrAction:
//
// text | comment | action
func (t *Tree) textOrAction() Node {
switch token := t.nextNonSpace(); token.typ {
case itemText:
return t.newText(token.pos, token.val)
case itemLeftDelim:
t.actionLine = token.line
defer t.clearActionLine()
return t.action()
case itemComment:
return t.newComment(token.pos, token.val)
default:
t.unexpected(token, "input")
}
return nil
}
func (t *Tree) clearActionLine() {
t.actionLine = 0
}
// Action:
//
// control
// command ("|" command)*
//
// Left delim is past. Now get actions.
// First word could be a keyword such as range.
func (t *Tree) action() (n Node) {
switch token := t.nextNonSpace(); token.typ {
case itemBlock:
return t.blockControl()
case itemBreak:
return t.breakControl(token.pos, token.line)
case itemContinue:
return t.continueControl(token.pos, token.line)
case itemElse:
return t.elseControl()
case itemEnd:
return t.endControl()
case itemIf:
return t.ifControl()
case itemRange:
return t.rangeControl()
case itemTemplate:
return t.templateControl()
case itemWith:
return t.withControl()
}
t.backup()
token := t.peek()
// Do not pop variables; they persist until "end".
return t.newAction(token.pos, token.line, t.pipeline("command", itemRightDelim))
}
// Break:
//
// {{break}}
//
// Break keyword is past.
func (t *Tree) breakControl(pos Pos, line int) Node {
if token := t.nextNonSpace(); token.typ != itemRightDelim {
t.unexpected(token, "{{break}}")
}
if t.rangeDepth == 0 {
t.errorf("{{break}} outside {{range}}")
}
return t.newBreak(pos, line)
}
// Continue:
//
// {{continue}}
//
// Continue keyword is past.
func (t *Tree) continueControl(pos Pos, line int) Node {
if token := t.nextNonSpace(); token.typ != itemRightDelim {
t.unexpected(token, "{{continue}}")
}
if t.rangeDepth == 0 {
t.errorf("{{continue}} outside {{range}}")
}
return t.newContinue(pos, line)
}
// Pipeline:
//
// declarations? command ('|' command)*
func (t *Tree) pipeline(context string, end itemType) (pipe *PipeNode) {
token := t.peekNonSpace()
pipe = t.newPipeline(token.pos, token.line, nil)
// Are there declarations or assignments?
decls:
if v := t.peekNonSpace(); v.typ == itemVariable {
t.next()
// Since space is a token, we need 3-token look-ahead here in the worst case:
// in "$x foo" we need to read "foo" (as opposed to ":=") to know that $x is an
// argument variable rather than a declaration. So remember the token
// adjacent to the variable so we can push it back if necessary.
tokenAfterVariable := t.peek()
next := t.peekNonSpace()
switch {
case next.typ == itemAssign, next.typ == itemDeclare:
pipe.IsAssign = next.typ == itemAssign
t.nextNonSpace()
pipe.Decl = append(pipe.Decl, t.newVariable(v.pos, v.val))
t.vars = append(t.vars, v.val)
case next.typ == itemChar && next.val == ",":
t.nextNonSpace()
pipe.Decl = append(pipe.Decl, t.newVariable(v.pos, v.val))
t.vars = append(t.vars, v.val)
if context == "range" && len(pipe.Decl) < 2 {
switch t.peekNonSpace().typ {
case itemVariable, itemRightDelim, itemRightParen:
// second initialized variable in a range pipeline
goto decls
default:
t.errorf("range can only initialize variables")
}
}
t.errorf("too many declarations in %s", context)
case tokenAfterVariable.typ == itemSpace:
t.backup3(v, tokenAfterVariable)
default:
t.backup2(v)
}
}
for {
switch token := t.nextNonSpace(); token.typ {
case end:
// At this point, the pipeline is complete
t.checkPipeline(pipe, context)
return
case itemBool, itemCharConstant, itemComplex, itemDot, itemField, itemIdentifier,
itemNumber, itemNil, itemRawString, itemString, itemVariable, itemLeftParen:
t.backup()
pipe.append(t.command())
default:
t.unexpected(token, context)
}
}
}
func (t *Tree) checkPipeline(pipe *PipeNode, context string) {
// Reject empty pipelines
if len(pipe.Cmds) == 0 {
t.errorf("missing value for %s", context)
}
// Only the first command of a pipeline can start with a non executable operand
for i, c := range pipe.Cmds[1:] {
switch c.Args[0].Type() {
case NodeBool, NodeDot, NodeNil, NodeNumber, NodeString:
// With A|B|C, pipeline stage 2 is B
t.errorf("non executable command in pipeline stage %d", i+2)
}
}
}
func (t *Tree) parseControl(context string) (pos Pos, line int, pipe *PipeNode, list, elseList *ListNode) {
defer t.popVars(len(t.vars))
pipe = t.pipeline(context, itemRightDelim)
if context == "range" {
t.rangeDepth++
}
var next Node
list, next = t.itemList()
if context == "range" {
t.rangeDepth--
}
switch next.Type() {
case nodeEnd: //done
case nodeElse:
// Special case for "else if" and "else with".
// If the "else" is followed immediately by an "if" or "with",
// the elseControl will have left the "if" or "with" token pending. Treat
// {{if a}}_{{else if b}}_{{end}}
// {{with a}}_{{else with b}}_{{end}}
// as
// {{if a}}_{{else}}{{if b}}_{{end}}{{end}}
// {{with a}}_{{else}}{{with b}}_{{end}}{{end}}.
// To do this, parse the "if" or "with" as usual and stop at it {{end}};
// the subsequent{{end}} is assumed. This technique works even for long if-else-if chains.
if context == "if" && t.peek().typ == itemIf {
t.next() // Consume the "if" token.
elseList = t.newList(next.Position())
elseList.append(t.ifControl())
} else if context == "with" && t.peek().typ == itemWith {
t.next()
elseList = t.newList(next.Position())
elseList.append(t.withControl())
} else {
elseList, next = t.itemList()
if next.Type() != nodeEnd {
t.errorf("expected end; found %s", next)
}
}
}
return pipe.Position(), pipe.Line, pipe, list, elseList
}
// If:
//
// {{if pipeline}} itemList {{end}}
// {{if pipeline}} itemList {{else}} itemList {{end}}
//
// If keyword is past.
func (t *Tree) ifControl() Node {
return t.newIf(t.parseControl("if"))
}
// Range:
//
// {{range pipeline}} itemList {{end}}
// {{range pipeline}} itemList {{else}} itemList {{end}}
//
// Range keyword is past.
func (t *Tree) rangeControl() Node {
r := t.newRange(t.parseControl("range"))
return r
}
// With:
//
// {{with pipeline}} itemList {{end}}
// {{with pipeline}} itemList {{else}} itemList {{end}}
//
// If keyword is past.
func (t *Tree) withControl() Node {
return t.newWith(t.parseControl("with"))
}
// End:
//
// {{end}}
//
// End keyword is past.
func (t *Tree) endControl() Node {
return t.newEnd(t.expect(itemRightDelim, "end").pos)
}
// Else:
//
// {{else}}
//
// Else keyword is past.
func (t *Tree) elseControl() Node {
peek := t.peekNonSpace()
// The "{{else if ... " and "{{else with ..." will be
// treated as "{{else}}{{if ..." and "{{else}}{{with ...".
// So return the else node here.
if peek.typ == itemIf || peek.typ == itemWith {
return t.newElse(peek.pos, peek.line)
}
token := t.expect(itemRightDelim, "else")
return t.newElse(token.pos, token.line)
}
// Block:
//
// {{block stringValue pipeline}}
//
// Block keyword is past.
// The name must be something that can evaluate to a string.
// The pipeline is mandatory.
func (t *Tree) blockControl() Node {
const context = "block clause"
token := t.nextNonSpace()
name := t.parseTemplateName(token, context)
pipe := t.pipeline(context, itemRightDelim)
block := New(name) // name will be updated once we know it.
block.text = t.text
block.Mode = t.Mode
block.ParseName = t.ParseName
block.startParse(t.funcs, t.lex, t.treeSet)
var end Node
block.Root, end = block.itemList()
if end.Type() != nodeEnd {
t.errorf("unexpected %s in %s", end, context)
}
block.add()
block.stopParse()
return t.newTemplate(token.pos, token.line, name, pipe)
}
// Template:
//
// {{template stringValue pipeline}}
//
// Template keyword is past. The name must be something that can evaluate
// to a string.
func (t *Tree) templateControl() Node {
const context = "template clause"
token := t.nextNonSpace()
name := t.parseTemplateName(token, context)
var pipe *PipeNode
if t.nextNonSpace().typ != itemRightDelim {
t.backup()
// Do not pop variables; they persist until "end".
pipe = t.pipeline(context, itemRightDelim)
}
return t.newTemplate(token.pos, token.line, name, pipe)
}
func (t *Tree) parseTemplateName(token item, context string) (name string) {
switch token.typ {
case itemString, itemRawString:
s, err := strconv.Unquote(token.val)
if err != nil {
t.error(err)
}
name = s
default:
t.unexpected(token, context)
}
return
}
// command:
//
// operand (space operand)*
//
// space-separated arguments up to a pipeline character or right delimiter.
// we consume the pipe character but leave the right delim to terminate the action.
func (t *Tree) command() *CommandNode {
cmd := t.newCommand(t.peekNonSpace().pos)
for {
t.peekNonSpace() // skip leading spaces.
operand := t.operand()
if operand != nil {
cmd.append(operand)
}
switch token := t.next(); token.typ {
case itemSpace:
continue
case itemRightDelim, itemRightParen:
t.backup()
case itemPipe:
// nothing here; break loop below
default:
t.unexpected(token, "operand")
}
break
}
if len(cmd.Args) == 0 {
t.errorf("empty command")
}
return cmd
}
// operand:
//
// term .Field*
//
// An operand is a space-separated component of a command,
// a term possibly followed by field accesses.
// A nil return means the next item is not an operand.
func (t *Tree) operand() Node {
node := t.term()
if node == nil {
return nil
}
if t.peek().typ == itemField {
chain := t.newChain(t.peek().pos, node)
for t.peek().typ == itemField {
chain.Add(t.next().val)
}
// Compatibility with original API: If the term is of type NodeField
// or NodeVariable, just put more fields on the original.
// Otherwise, keep the Chain node.
// Obvious parsing errors involving literal values are detected here.
// More complex error cases will have to be handled at execution time.
switch node.Type() {
case NodeField:
node = t.newField(chain.Position(), chain.String())
case NodeVariable:
node = t.newVariable(chain.Position(), chain.String())
case NodeBool, NodeString, NodeNumber, NodeNil, NodeDot:
t.errorf("unexpected . after term %q", node.String())
default:
node = chain
}
}
return node
}
// term:
//
// literal (number, string, nil, boolean)
// function (identifier)
// .
// .Field
// $
// '(' pipeline ')'
//
// A term is a simple "expression".
// A nil return means the next item is not a term.
func (t *Tree) term() Node {
switch token := t.nextNonSpace(); token.typ {
case itemIdentifier:
checkFunc := t.Mode&SkipFuncCheck == 0
if checkFunc && !t.hasFunction(token.val) {
t.errorf("function %q not defined", token.val)
}
return NewIdentifier(token.val).SetTree(t).SetPos(token.pos)
case itemDot:
return t.newDot(token.pos)
case itemNil:
return t.newNil(token.pos)
case itemVariable:
return t.useVar(token.pos, token.val)
case itemField:
return t.newField(token.pos, token.val)
case itemBool:
return t.newBool(token.pos, token.val == "true")
case itemCharConstant, itemComplex, itemNumber:
number, err := t.newNumber(token.pos, token.val, token.typ)
if err != nil {
t.error(err)
}
return number
case itemLeftParen:
return t.pipeline("parenthesized pipeline", itemRightParen)
case itemString, itemRawString:
s, err := strconv.Unquote(token.val)
if err != nil {
t.error(err)
}
return t.newString(token.pos, token.val, s)
}
t.backup()
return nil
}
// hasFunction reports if a function name exists in the Tree's maps.
func (t *Tree) hasFunction(name string) bool {
for _, funcMap := range t.funcs {
if funcMap == nil {
continue
}
if _, ok := funcMap[name]; ok {
return true
}
}
return false
}
// popVars trims the variable list to the specified length.
func (t *Tree) popVars(n int) {
t.vars = t.vars[:n]
}
// useVar returns a node for a variable reference. It errors if the
// variable is not defined.
func (t *Tree) useVar(pos Pos, name string) Node {
v := t.newVariable(pos, name)
for _, varName := range t.vars {
if varName == v.Ident[0] {
return v
}
}
t.errorf("undefined variable %q", v.Ident[0])
return nil
}