github.com/hashicorp/hcl v1.0.0

This commit is contained in:
Jordan Liggitt
2019-06-22 11:44:03 -07:00
parent 63e5ad08aa
commit 855048b1b3
32 changed files with 514 additions and 152 deletions

View File

@@ -156,7 +156,8 @@ func (o *ObjectKey) Pos() token.Pos {
type LiteralType struct {
Token token.Token
// associated line comment, only when used in a list
// comment types, only used when in a list
LeadComment *CommentGroup
LineComment *CommentGroup
}
@@ -214,4 +215,5 @@ func (c *CommentGroup) Pos() token.Pos {
// GoStringer
//-------------------------------------------------------------------
func (o *ObjectKey) GoString() string { return fmt.Sprintf("*%#v", *o) }
func (o *ObjectKey) GoString() string { return fmt.Sprintf("*%#v", *o) }
func (o *ObjectList) GoString() string { return fmt.Sprintf("*%#v", *o) }

View File

@@ -3,6 +3,7 @@
package parser
import (
"bytes"
"errors"
"fmt"
"strings"
@@ -36,6 +37,11 @@ func newParser(src []byte) *Parser {
// Parse returns the fully parsed source and returns the abstract syntax tree.
func Parse(src []byte) (*ast.File, error) {
// normalize all line endings
// since the scanner and output only work with "\n" line endings, we may
// end up with dangling "\r" characters in the parsed data.
src = bytes.Replace(src, []byte("\r\n"), []byte("\n"), -1)
p := newParser(src)
return p.Parse()
}
@@ -50,7 +56,7 @@ func (p *Parser) Parse() (*ast.File, error) {
scerr = &PosError{Pos: pos, Err: errors.New(msg)}
}
f.Node, err = p.objectList()
f.Node, err = p.objectList(false)
if scerr != nil {
return nil, scerr
}
@@ -62,11 +68,23 @@ func (p *Parser) Parse() (*ast.File, error) {
return f, nil
}
func (p *Parser) objectList() (*ast.ObjectList, error) {
// objectList parses a list of items within an object (generally k/v pairs).
// The parameter" obj" tells this whether to we are within an object (braces:
// '{', '}') or just at the top level. If we're within an object, we end
// at an RBRACE.
func (p *Parser) objectList(obj bool) (*ast.ObjectList, error) {
defer un(trace(p, "ParseObjectList"))
node := &ast.ObjectList{}
for {
if obj {
tok := p.scan()
p.unscan()
if tok.Type == token.RBRACE {
break
}
}
n, err := p.objectItem()
if err == errEofToken {
break // we are finished
@@ -179,9 +197,18 @@ func (p *Parser) objectItem() (*ast.ObjectItem, error) {
keyStr = append(keyStr, k.Token.Text)
}
return nil, fmt.Errorf(
"key '%s' expected start of object ('{') or assignment ('=')",
strings.Join(keyStr, " "))
return nil, &PosError{
Pos: p.tok.Pos,
Err: fmt.Errorf(
"key '%s' expected start of object ('{') or assignment ('=')",
strings.Join(keyStr, " ")),
}
}
// key=#comment
// val
if p.lineComment != nil {
o.LineComment, p.lineComment = p.lineComment, nil
}
// do a look-ahead for line comment
@@ -244,7 +271,10 @@ func (p *Parser) objectKey() ([]*ast.ObjectKey, error) {
keyCount++
keys = append(keys, &ast.ObjectKey{Token: p.tok})
case token.ILLEGAL:
fmt.Println("illegal")
return keys, &PosError{
Pos: p.tok.Pos,
Err: fmt.Errorf("illegal character"),
}
default:
return keys, &PosError{
Pos: p.tok.Pos,
@@ -288,7 +318,7 @@ func (p *Parser) objectType() (*ast.ObjectType, error) {
Lbrace: p.tok.Pos,
}
l, err := p.objectList()
l, err := p.objectList(true)
// if we hit RBRACE, we are good to go (means we parsed all Items), if it's
// not a RBRACE, it's an syntax error and we just return it.
@@ -296,9 +326,12 @@ func (p *Parser) objectType() (*ast.ObjectType, error) {
return nil, err
}
// If there is no error, we should be at a RBRACE to end the object
if p.tok.Type != token.RBRACE {
return nil, fmt.Errorf("object expected closing RBRACE got: %s", p.tok.Type)
// No error, scan and expect the ending to be a brace
if tok := p.scan(); tok.Type != token.RBRACE {
return nil, &PosError{
Pos: tok.Pos,
Err: fmt.Errorf("object expected closing RBRACE got: %s", tok.Type),
}
}
o.List = l
@@ -331,12 +364,18 @@ func (p *Parser) listType() (*ast.ListType, error) {
}
}
switch tok.Type {
case token.NUMBER, token.FLOAT, token.STRING, token.HEREDOC:
case token.BOOL, token.NUMBER, token.FLOAT, token.STRING, token.HEREDOC:
node, err := p.literalType()
if err != nil {
return nil, err
}
// If there is a lead comment, apply it
if p.leadComment != nil {
node.LeadComment = p.leadComment
p.leadComment = nil
}
l.Add(node)
needComma = true
case token.COMMA:
@@ -367,12 +406,16 @@ func (p *Parser) listType() (*ast.ListType, error) {
}
l.Add(node)
needComma = true
case token.BOOL:
// TODO(arslan) should we support? not supported by HCL yet
case token.LBRACK:
// TODO(arslan) should we support nested lists? Even though it's
// written in README of HCL, it's not a part of the grammar
// (not defined in parse.y)
node, err := p.listType()
if err != nil {
return nil, &PosError{
Pos: tok.Pos,
Err: fmt.Errorf(
"error while trying to parse list within list: %s", err),
}
}
l.Add(node)
case token.RBRACK:
// finished
l.Rbrack = p.tok.Pos

View File

@@ -62,6 +62,14 @@ func (p *printer) collectComments(node ast.Node) {
ast.Walk(node, func(nn ast.Node) (ast.Node, bool) {
switch t := nn.(type) {
case *ast.LiteralType:
if t.LeadComment != nil {
for _, comment := range t.LeadComment.List {
if _, ok := standaloneComments[comment.Pos()]; ok {
delete(standaloneComments, comment.Pos())
}
}
}
if t.LineComment != nil {
for _, comment := range t.LineComment.List {
if _, ok := standaloneComments[comment.Pos()]; ok {
@@ -95,7 +103,6 @@ func (p *printer) collectComments(node ast.Node) {
}
sort.Sort(ByPosition(p.standaloneComments))
}
// output prints creates b printable HCL output and returns it.
@@ -104,35 +111,60 @@ func (p *printer) output(n interface{}) []byte {
switch t := n.(type) {
case *ast.File:
// File doesn't trace so we add the tracing here
defer un(trace(p, "File"))
return p.output(t.Node)
case *ast.ObjectList:
var index int
var nextItem token.Pos
var commented bool
for {
// TODO(arslan): refactor below comment printing, we have the same in objectType
for _, c := range p.standaloneComments {
for _, comment := range c.List {
if index != len(t.Items) {
nextItem = t.Items[index].Pos()
} else {
nextItem = token.Pos{Offset: infinity, Line: infinity}
}
defer un(trace(p, "ObjectList"))
var index int
for {
// Determine the location of the next actual non-comment
// item. If we're at the end, the next item is at "infinity"
var nextItem token.Pos
if index != len(t.Items) {
nextItem = t.Items[index].Pos()
} else {
nextItem = token.Pos{Offset: infinity, Line: infinity}
}
// Go through the standalone comments in the file and print out
// the comments that we should be for this object item.
for _, c := range p.standaloneComments {
// Go through all the comments in the group. The group
// should be printed together, not separated by double newlines.
printed := false
newlinePrinted := false
for _, comment := range c.List {
// We only care about comments after the previous item
// we've printed so that comments are printed in the
// correct locations (between two objects for example).
// And before the next item.
if comment.Pos().After(p.prev) && comment.Pos().Before(nextItem) {
// if we hit the end add newlines so we can print the comment
if index == len(t.Items) {
// we don't do this if prev is invalid which means the
// beginning of the file since the first comment should
// be at the first line.
if !newlinePrinted && p.prev.IsValid() && index == len(t.Items) {
buf.Write([]byte{newline, newline})
newlinePrinted = true
}
// Write the actual comment.
buf.WriteString(comment.Text)
buf.WriteByte(newline)
if index != len(t.Items) {
buf.WriteByte(newline)
}
// Set printed to true to note that we printed something
printed = true
}
}
// If we're not at the last item, write a new line so
// that there is a newline separating this comment from
// the next object.
if printed && index != len(t.Items) {
buf.WriteByte(newline)
}
}
if index == len(t.Items) {
@@ -140,8 +172,29 @@ func (p *printer) output(n interface{}) []byte {
}
buf.Write(p.output(t.Items[index]))
if !commented && index != len(t.Items)-1 {
buf.Write([]byte{newline, newline})
if index != len(t.Items)-1 {
// Always write a newline to separate us from the next item
buf.WriteByte(newline)
// Need to determine if we're going to separate the next item
// with a blank line. The logic here is simple, though there
// are a few conditions:
//
// 1. The next object is more than one line away anyways,
// so we need an empty line.
//
// 2. The next object is not a "single line" object, so
// we need an empty line.
//
// 3. This current object is not a single line object,
// so we need an empty line.
current := t.Items[index]
next := t.Items[index+1]
if next.Pos().Line != t.Items[index].Pos().Line+1 ||
!p.isSingleLineObject(next) ||
!p.isSingleLineObject(current) {
buf.WriteByte(newline)
}
}
index++
}
@@ -165,7 +218,8 @@ func (p *printer) output(n interface{}) []byte {
func (p *printer) literalType(lit *ast.LiteralType) []byte {
result := []byte(lit.Token.Text)
if lit.Token.Type == token.HEREDOC {
switch lit.Token.Type {
case token.HEREDOC:
// Clear the trailing newline from heredocs
if result[len(result)-1] == '\n' {
result = result[:len(result)-1]
@@ -173,6 +227,12 @@ func (p *printer) literalType(lit *ast.LiteralType) []byte {
// Poison lines 2+ so that we don't indent them
result = p.heredocIndent(result)
case token.STRING:
// If this is a multiline string, poison lines 2+ so we don't
// indent them.
if bytes.IndexRune(result, '\n') >= 0 {
result = p.heredocIndent(result)
}
}
return result
@@ -192,6 +252,14 @@ func (p *printer) objectItem(o *ast.ObjectItem) []byte {
}
}
// If key and val are on different lines, treat line comments like lead comments.
if o.LineComment != nil && o.Val.Pos().Line != o.Keys[0].Pos().Line {
for _, comment := range o.LineComment.List {
buf.WriteString(comment.Text)
buf.WriteByte(newline)
}
}
for i, k := range o.Keys {
buf.WriteString(k.Token.Text)
buf.WriteByte(blank)
@@ -205,7 +273,7 @@ func (p *printer) objectItem(o *ast.ObjectItem) []byte {
buf.Write(p.output(o.Val))
if o.Val.Pos().Line == o.Keys[0].Pos().Line && o.LineComment != nil {
if o.LineComment != nil && o.Val.Pos().Line == o.Keys[0].Pos().Line {
buf.WriteByte(blank)
for _, comment := range o.LineComment.List {
buf.WriteString(comment.Text)
@@ -226,17 +294,24 @@ func (p *printer) objectType(o *ast.ObjectType) []byte {
var nextItem token.Pos
var commented, newlinePrinted bool
for {
// Determine the location of the next actual non-comment
// item. If we're at the end, the next item is the closing brace
if index != len(o.List.Items) {
nextItem = o.List.Items[index].Pos()
} else {
nextItem = o.Rbrace
}
// Print stand alone comments
// Go through the standalone comments in the file and print out
// the comments that we should be for this object item.
for _, c := range p.standaloneComments {
printed := false
var lastCommentPos token.Pos
for _, comment := range c.List {
// if we hit the end, last item should be the brace
if index != len(o.List.Items) {
nextItem = o.List.Items[index].Pos()
} else {
nextItem = o.Rbrace
}
// We only care about comments after the previous item
// we've printed so that comments are printed in the
// correct locations (between two objects for example).
// And before the next item.
if comment.Pos().After(p.prev) && comment.Pos().Before(nextItem) {
// If there are standalone comments and the initial newline has not
// been printed yet, do it now.
@@ -251,11 +326,33 @@ func (p *printer) objectType(o *ast.ObjectType) []byte {
buf.WriteByte(newline)
}
buf.Write(p.indent([]byte(comment.Text)))
// Store this position
lastCommentPos = comment.Pos()
// output the comment itself
buf.Write(p.indent(p.heredocIndent([]byte(comment.Text))))
// Set printed to true to note that we printed something
printed = true
/*
if index != len(o.List.Items) {
buf.WriteByte(newline) // do not print on the end
}
*/
}
}
// Stuff to do if we had comments
if printed {
// Always write a newline
buf.WriteByte(newline)
// If there is another item in the object and our comment
// didn't hug it directly, then make sure there is a blank
// line separating them.
if nextItem != o.Rbrace && nextItem.Line != lastCommentPos.Line+1 {
buf.WriteByte(newline)
if index != len(o.List.Items) {
buf.WriteByte(newline) // do not print on the end
}
}
}
}
@@ -420,8 +517,13 @@ func (p *printer) alignedItems(items []*ast.ObjectItem) []byte {
// list returns the printable HCL form of an list type.
func (p *printer) list(l *ast.ListType) []byte {
if p.isSingleLineList(l) {
return p.singleLineList(l)
}
var buf bytes.Buffer
buf.WriteString("[")
buf.WriteByte(newline)
var longestLine int
for _, item := range l.List {
@@ -434,45 +536,112 @@ func (p *printer) list(l *ast.ListType) []byte {
}
}
insertSpaceBeforeItem := false
haveEmptyLine := false
for i, item := range l.List {
if item.Pos().Line != l.Lbrack.Line {
// multiline list, add newline before we add each item
buf.WriteByte(newline)
insertSpaceBeforeItem = false
// also indent each line
val := p.output(item)
curLen := len(val)
buf.Write(p.indent(val))
buf.WriteString(",")
// If we have a lead comment, then we want to write that first
leadComment := false
if lit, ok := item.(*ast.LiteralType); ok && lit.LeadComment != nil {
leadComment = true
if lit, ok := item.(*ast.LiteralType); ok && lit.LineComment != nil {
// if the next item doesn't have any comments, do not align
buf.WriteByte(blank) // align one space
for i := 0; i < longestLine-curLen; i++ {
buf.WriteByte(blank)
}
for _, comment := range lit.LineComment.List {
buf.WriteString(comment.Text)
}
}
if i == len(l.List)-1 {
// Ensure an empty line before every element with a
// lead comment (except the first item in a list).
if !haveEmptyLine && i != 0 {
buf.WriteByte(newline)
}
} else {
if insertSpaceBeforeItem {
buf.WriteByte(blank)
insertSpaceBeforeItem = false
}
buf.Write(p.output(item))
if i != len(l.List)-1 {
buf.WriteString(",")
insertSpaceBeforeItem = true
for _, comment := range lit.LeadComment.List {
buf.Write(p.indent([]byte(comment.Text)))
buf.WriteByte(newline)
}
}
// also indent each line
val := p.output(item)
curLen := len(val)
buf.Write(p.indent(val))
// if this item is a heredoc, then we output the comma on
// the next line. This is the only case this happens.
comma := []byte{','}
if lit, ok := item.(*ast.LiteralType); ok && lit.Token.Type == token.HEREDOC {
buf.WriteByte(newline)
comma = p.indent(comma)
}
buf.Write(comma)
if lit, ok := item.(*ast.LiteralType); ok && lit.LineComment != nil {
// if the next item doesn't have any comments, do not align
buf.WriteByte(blank) // align one space
for i := 0; i < longestLine-curLen; i++ {
buf.WriteByte(blank)
}
for _, comment := range lit.LineComment.List {
buf.WriteString(comment.Text)
}
}
buf.WriteByte(newline)
// Ensure an empty line after every element with a
// lead comment (except the first item in a list).
haveEmptyLine = leadComment && i != len(l.List)-1
if haveEmptyLine {
buf.WriteByte(newline)
}
}
buf.WriteString("]")
return buf.Bytes()
}
// isSingleLineList returns true if:
// * they were previously formatted entirely on one line
// * they consist entirely of literals
// * there are either no heredoc strings or the list has exactly one element
// * there are no line comments
func (printer) isSingleLineList(l *ast.ListType) bool {
for _, item := range l.List {
if item.Pos().Line != l.Lbrack.Line {
return false
}
lit, ok := item.(*ast.LiteralType)
if !ok {
return false
}
if lit.Token.Type == token.HEREDOC && len(l.List) != 1 {
return false
}
if lit.LineComment != nil {
return false
}
}
return true
}
// singleLineList prints a simple single line list.
// For a definition of "simple", see isSingleLineList above.
func (p *printer) singleLineList(l *ast.ListType) []byte {
buf := &bytes.Buffer{}
buf.WriteString("[")
for i, item := range l.List {
if i != 0 {
buf.WriteString(", ")
}
// Output the item itself
buf.Write(p.output(item))
// The heredoc marker needs to be at the end of line.
if lit, ok := item.(*ast.LiteralType); ok && lit.Token.Type == token.HEREDOC {
buf.WriteByte(newline)
}
}
buf.WriteString("]")
@@ -547,6 +716,36 @@ func (p *printer) heredocIndent(buf []byte) []byte {
return res
}
// isSingleLineObject tells whether the given object item is a single
// line object such as "obj {}".
//
// A single line object:
//
// * has no lead comments (hence multi-line)
// * has no assignment
// * has no values in the stanza (within {})
//
func (p *printer) isSingleLineObject(val *ast.ObjectItem) bool {
// If there is a lead comment, can't be one line
if val.LeadComment != nil {
return false
}
// If there is assignment, we always break by line
if val.Assign.IsValid() {
return false
}
// If it isn't an object type, then its not a single line object
ot, ok := val.Val.(*ast.ObjectType)
if !ok {
return false
}
// If the object has no items, it is single line!
return len(ot.List.Items) == 0
}
func lines(txt string) int {
endline := 1
for i := 0; i < len(txt); i++ {

View File

@@ -62,6 +62,5 @@ func Format(src []byte) ([]byte, error) {
// Add trailing newline to result
buf.WriteString("\n")
return buf.Bytes(), nil
}

View File

@@ -74,14 +74,6 @@ func (s *Scanner) next() rune {
return eof
}
if ch == utf8.RuneError && size == 1 {
s.srcPos.Column++
s.srcPos.Offset += size
s.lastCharLen = size
s.err("illegal UTF-8 encoding")
return ch
}
// remember last position
s.prevPos = s.srcPos
@@ -89,12 +81,27 @@ func (s *Scanner) next() rune {
s.lastCharLen = size
s.srcPos.Offset += size
if ch == utf8.RuneError && size == 1 {
s.err("illegal UTF-8 encoding")
return ch
}
if ch == '\n' {
s.srcPos.Line++
s.lastLineLen = s.srcPos.Column
s.srcPos.Column = 0
}
if ch == '\x00' {
s.err("unexpected null character (0x00)")
return eof
}
if ch == '\uE123' {
s.err("unicode code point U+E123 reserved for internal use")
return utf8.RuneError
}
// debug
// fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
return ch
@@ -224,6 +231,11 @@ func (s *Scanner) Scan() token.Token {
func (s *Scanner) scanComment(ch rune) {
// single line comments
if ch == '#' || (ch == '/' && s.peek() != '*') {
if ch == '/' && s.peek() != '/' {
s.err("expected '/' for comment")
return
}
ch = s.next()
for ch != '\n' && ch >= 0 && ch != eof {
ch = s.next()
@@ -340,7 +352,7 @@ func (s *Scanner) scanNumber(ch rune) token.Type {
return token.NUMBER
}
// scanMantissa scans the mantissa begining from the rune. It returns the next
// scanMantissa scans the mantissa beginning from the rune. It returns the next
// non decimal rune. It's used to determine wheter it's a fraction or exponent.
func (s *Scanner) scanMantissa(ch rune) rune {
scanned := false
@@ -421,16 +433,16 @@ func (s *Scanner) scanHeredoc() {
// Read the identifier
identBytes := s.src[offs : s.srcPos.Offset-s.lastCharLen]
if len(identBytes) == 0 {
if len(identBytes) == 0 || (len(identBytes) == 1 && identBytes[0] == '-') {
s.err("zero-length heredoc anchor")
return
}
var identRegexp *regexp.Regexp
if identBytes[0] == '-' {
identRegexp = regexp.MustCompile(fmt.Sprintf(`[[:space:]]*%s\z`, identBytes[1:]))
identRegexp = regexp.MustCompile(fmt.Sprintf(`^[[:space:]]*%s\r*\z`, identBytes[1:]))
} else {
identRegexp = regexp.MustCompile(fmt.Sprintf(`[[:space:]]*%s\z`, identBytes))
identRegexp = regexp.MustCompile(fmt.Sprintf(`^[[:space:]]*%s\r*\z`, identBytes))
}
// Read the actual string value
@@ -469,7 +481,7 @@ func (s *Scanner) scanString() {
// read character after quote
ch := s.next()
if ch < 0 || ch == eof {
if (ch == '\n' && braces == 0) || ch < 0 || ch == eof {
s.err("literal not terminated")
return
}
@@ -540,7 +552,7 @@ func (s *Scanner) scanDigits(ch rune, base, n int) rune {
s.err("illegal char escape")
}
if n != start {
if n != start && ch != eof {
// we scanned all digits, put the last non digit char back,
// only if we read anything at all
s.unread()

View File

@@ -27,6 +27,9 @@ func Unquote(s string) (t string, err error) {
if quote != '"' {
return "", ErrSyntax
}
if !contains(s, '$') && !contains(s, '{') && contains(s, '\n') {
return "", ErrSyntax
}
// Is it trivial? Avoid allocation.
if !contains(s, '\\') && !contains(s, quote) && !contains(s, '$') {
@@ -46,7 +49,7 @@ func Unquote(s string) (t string, err error) {
for len(s) > 0 {
// If we're starting a '${}' then let it through un-unquoted.
// Specifically: we don't unquote any characters within the `${}`
// section, except for escaped backslashes, which we handle specifically.
// section.
if s[0] == '$' && len(s) > 1 && s[1] == '{' {
buf = append(buf, '$', '{')
s = s[2:]
@@ -61,16 +64,6 @@ func Unquote(s string) (t string, err error) {
s = s[size:]
// We special case escaped backslashes in interpolations, converting
// them to their unescaped equivalents.
if r == '\\' {
q, _ := utf8.DecodeRuneInString(s)
switch q {
case '\\':
continue
}
}
n := utf8.EncodeRune(runeTmp[:], r)
buf = append(buf, runeTmp[:n]...)
@@ -94,6 +87,10 @@ func Unquote(s string) (t string, err error) {
}
}
if s[0] == '\n' {
return "", ErrSyntax
}
c, multibyte, ss, err := unquoteChar(s, quote)
if err != nil {
return "", err