// Copyright 2019 The LUCI Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package ast defines AST relevant for the documentation generation.
//
// It recognizes top-level function declarations, top-level assignments (e.g.
// for constants and aliases), load(...) statements (to follow imported
// symbols), and struct(...) declarations.
package ast

import (
	"go.starlark.net/syntax"
)

// Ellipsis represents a complex expression that we don't care about.
//
// A value of Ellipsis type is usually literally just "...".
type Ellipsis string

// Node is a documentation-relevant declaration of something in a file.
//
// Nodes form a tree. This tree is a reduction of a full AST of the starlark
// file to a form we care about when generating the documentation.
//
// The top of the tree is represented by a Module node.
type Node interface {
	// Name is the name of the entity this node defines.
	//
	// E.g. it's the name of a function, variable, constant, etc.
	//
	// It may be a "private" name. Many definitions are defined using their
	// private names first, and then exposed publicly via separate definition
	// (such definitions are represented by Reference or ExternalReference nodes).
	Name() string

	// Span is where this node was defined in the original starlark code.
	Span() (start syntax.Position, end syntax.Position)

	// Comments are full comment lines preceding the definition.
	//
	// Have '#' in them. Empty lines between comment blocks are skipped.
	Comments() []string

	// Doc is a documentation string for this symbol extracted either from a
	// docstring or from comments.
	Doc() string

	// populateFromAST sets the fields based on the given starlark AST node.
	populateFromAST(name string, n syntax.Node)
}

// base is embedded by all node types and implements some Node methods for them.
//
// It carries name of the node, where it is defined, and surrounding comments.
type base struct {
	name string
	ast  syntax.Node // where it was defined in Starlark AST
}

func (b *base) Name() string                             { return b.name }
func (b *base) Span() (syntax.Position, syntax.Position) { return b.ast.Span() }

func (b *base) Comments() (out []string) {
	if all := b.ast.Comments(); all != nil && len(all.Before) != 0 {
		out = make([]string, len(all.Before))
		for i, cm := range all.Before {
			out[i] = cm.Text
		}
	}
	return
}

// Doc extracts the documentation for the symbol from its comments.
func (b *base) Doc() string {
	return "" // TODO: implement
}

func (b *base) populateFromAST(name string, ast syntax.Node) {
	b.name = name
	b.ast = ast
}

// Var is a node that represents '<var> = int|string|<expr>' definition.
//
// This is a "terminal" definition, not a reference to something defined
// elsewhere. Usually a constant or some computation we replace with '...' in
// the docs.
type Var struct {
	base

	Value interface{} // string | int64 | *big.Int | Ellipsis
}

// Function is a node that represents a function definition.
type Function struct {
	base

	docstring string // a doc string, if any
}

// Doc extracts the documentation from the docstring.
func (n *Function) Doc() string { return n.docstring }

// Reference is a node that represents <var> = a.b.c.
//
// It is either a top-level assignment, or a keyword argument in a function call
// (e.g. when defining struct(...)).
type Reference struct {
	base

	Path []string // the ref path on the right hand side, e.g. ['a', 'b', 'c'].
}

// ExternalReference is a node that represents a symbol imported though
// load(...) statement.
//
// For load statement load("file.star", x="y") we get an ExternalReference with
// name "x", ExternalName "y" and Module "file.star".
type ExternalReference struct {
	base

	ExternalName string // name of the symbol in the loaded module
	Module       string // path of the loaded module
}

// Namespace is a node that contains a bunch of definitions grouped together.
//
// Examples of namespaces are top-level module dicts and structs.
type Namespace struct {
	base

	Nodes []Node // nodes defined in the namespace, in order they were defined
}

// NodeByName finds a contained node given its name or returns nil.
func (n *Namespace) NodeByName(name string) Node {
	for _, node := range n.Nodes {
		if node.Name() == name {
			return node
		}
	}
	return nil
}

// Module is a parsed Starlark file.
type Module struct {
	Namespace // all top-level symbols

	docstring string // a doc string, if any
}

// Doc extracts the documentation from the docstring.
func (n *Module) Doc() string { return n.docstring }

// ParseModule parses a single Starlark module.
//
// Filename is only used when recording position information.
func ParseModule(filename, body string) (*Module, error) {
	ast, err := syntax.Parse(filename, body, syntax.RetainComments)
	if err != nil {
		return nil, err
	}

	m := &Module{docstring: extractDocstring(ast.Stmts)}
	m.populateFromAST(filename, ast)

	// emit adds a node to the module.
	emit := func(name string, ast syntax.Node, n Node) {
		n.populateFromAST(name, ast)
		m.Nodes = append(m.Nodes, n)
	}

	// Walk over top-level statements and match them against patterns we recognize
	// as relevant.
	for _, stmt := range ast.Stmts {
		switch st := stmt.(type) {
		case *syntax.LoadStmt:
			// A load(...) statement. Each imported symbol ends up in the module's
			// namespace, so add corresponding ExternalReference nodes.
			for i, nm := range st.To {
				emit(nm.Name, st, &ExternalReference{
					ExternalName: st.From[i].Name,
					Module:       st.Module.Value.(string),
				})
			}

		case *syntax.DefStmt:
			// A function declaration: "def name(...)".
			emit(st.Name.Name, st, &Function{
				docstring: extractDocstring(st.Body),
			})

		case *syntax.AssignStmt:
			// A top level assignment. We care only about <var> = ... (i.e. when LHS
			// is a simple variable, not a tuple or anything like that).
			if st.Op != syntax.EQ {
				continue
			}
			lhs := matchSingleIdent(st.LHS)
			if lhs == "" {
				continue
			}
			if n := parseAssignmentRHS(st.RHS); n != nil {
				emit(lhs, st, n)
			}
		}
	}

	return m, nil
}

// parseAssignmentRHS parses RHS of statements like "<var> = <expr>".
//
// Name of the returned node and Star/End/Comments should be populated by the
// caller.
//
// Only the following forms are recognized:
//   Var: <var> = <literal>|<complex expr>
//   Reference: <var> = <var>[.<field>]*
//   Namespace: <var> = struct(...)
func parseAssignmentRHS(rhs syntax.Expr) Node {
	// <var> = <literal>.
	if literal := matchSingleLiteral(rhs); literal != nil {
		return &Var{Value: literal}
	}

	// <var> = <var>[.<field>]*.
	if path := matchRefPath(rhs); path != nil {
		return &Reference{Path: path}
	}

	// <var> = struct(...).
	if fn, args := matchSimpleCall(rhs); fn == "struct" {
		// Pick all 'k=v' pairs from args and parse them as assignments.
		var nodes []Node
		for _, arg := range args {
			if lhs, rhs := matchEqExpr(arg); lhs != "" {
				if n := parseAssignmentRHS(rhs); n != nil {
					n.populateFromAST(lhs, arg)
					nodes = append(nodes, n)
				}
			}
		}
		return &Namespace{Nodes: nodes}
	}

	// <var> = <expr>.
	return &Var{Value: Ellipsis("...")}
}

// extractDocstring returns a doc string for the given body.
//
// A docstring is a string literal that comes first in the body, if any.
func extractDocstring(body []syntax.Stmt) string {
	if len(body) == 0 {
		return ""
	}
	expr, ok := body[0].(*syntax.ExprStmt)
	if !ok {
		return ""
	}
	literal, ok := expr.X.(*syntax.Literal)
	if !ok || literal.Token != syntax.STRING {
		return ""
	}
	return literal.Value.(string)
}

// matchSingleIdent matches an <Expr> to <Ident>, returning ident's name.
func matchSingleIdent(expr syntax.Expr) string {
	if ident, ok := expr.(*syntax.Ident); ok {
		return ident.Name
	}
	return ""
}

// matchSingleLiteral matches an <Expr> to <Literal>, returning literal's value.
//
// The returned value is string | int64 | *big.Int.
func matchSingleLiteral(expr syntax.Expr) interface{} {
	if literal, ok := expr.(*syntax.Literal); ok {
		return literal.Value
	}
	return nil
}

// matchRefPath matches an <Expr> to <Ident>(.<Ident>)* returning identifier'
// names as a list of strings.
func matchRefPath(expr syntax.Expr) (path []string) {
loop:
	for {
		switch next := expr.(type) {
		case *syntax.DotExpr: // next in chain
			path = append(path, next.Name.Name)
			expr = next.X
		case *syntax.Ident: // last in chain
			path = append(path, next.Name)
			break loop
		default:
			return nil // not a simple ref path, has additional structure, give up
		}
	}
	// Expr "a.b.c" results in ['c', 'b', 'a'], reverse.
	for l, r := 0, len(path)-1; l < r; l, r = l+1, r-1 {
		path[l], path[r] = path[r], path[l]
	}
	return
}

// matchSimpleCall matches an <Expr> to <Ident>(<Expr>*), returning them.
func matchSimpleCall(expr syntax.Expr) (fn string, args []syntax.Expr) {
	call, ok := expr.(*syntax.CallExpr)
	if !ok {
		return "", nil
	}
	if fn = matchSingleIdent(call.Fn); fn == "" {
		return "", nil
	}
	return fn, call.Args
}

// matchEqExpr matches an <Expr> to <Ident>=<Expr>, returning them.
func matchEqExpr(expr syntax.Expr) (lhs string, rhs syntax.Expr) {
	bin, ok := expr.(*syntax.BinaryExpr)
	if !ok || bin.Op != syntax.EQ {
		return "", nil
	}
	if lhs = matchSingleIdent(bin.X); lhs == "" {
		return "", nil
	}
	return lhs, bin.Y
}
