package io

import (
	"fmt"
	"io"
	"regexp"
	"strconv"
	"strings"

	"git.sr.ht/~charles/rq/util"
)

func init() {
	registerInputHandler("raw", func() InputHandler { return &RawInputHandler{} })

	// aliases
	registerInputHandler("awk", func() InputHandler {
		return &RawInputHandler{
			fs:          regexp.MustCompile("[ \t]+"),
			rs:          regexp.MustCompile("[\n]+"),
			cutset:      "\t\n\r ",
			initialized: true,
		}
	})

	registerInputHandler("tabular", func() InputHandler {
		return &RawInputHandler{
			fs:          regexp.MustCompile("[ \t]+"),
			rs:          regexp.MustCompile("[\n]+"),
			cutset:      "\t\n\r ",
			headers:     true,
			infer:       true,
			initialized: true,
		}
	})

	registerInputHandler("lines", func() InputHandler {
		return &RawInputHandler{
			rs:          regexp.MustCompile("[\n]+"),
			cutset:      "\t\n\r ",
			initialized: true,
		}
	})
}

// Declare conformance with InputHandler interface.
var _ InputHandler = &RawInputHandler{}

// RawInputHandler simply converts the input directly to a string. It supports
// optional AWK-like field and record splitting.
//
// The data shape of this handler depends on how it is configured. If fs and rs
// are both unset, then the result is a string. If only rs is set, the result
// is an array, and if both fs and rs are set, the result is an array of
// arrays. In the latter case the data shape can instead be a list of objects
// if raw.header is true.
//
// If raw.infer is true, then the type of the innermost scalar values will be
// inferred automatically, otherwise they will be left as strings. Type
// inference happens after the cutset(s) are applied.
//
// The following options are supported:
//
// raw.fs (regex) split records into fields with the given regex as the
// delimiter. If this value is the empty string, then records will remain as
// strings and not be split into fields. (default: "")
//
// raw.rs (regex) split records into records with the given regex as the
// delimiter. If this value is the empty string, then the input will remain as
// a string and not be split into records. (default: "")
//
// raw.lcutset (string) apply strings.TrimLeft with this cutset to each element
// after splitting is complete, if the cutset is non-empty (default: "").
//
// raw.rcutset (string) apply strings.TrimLeft with this cutset to each element
// after splitting is complete, if the cutset is non-empty (default: "").
//
// raw.cutset (string) apply strings.Trim with this cutset to each element
// after splitting is complete, if the cutset is non-empty (default: "").
//
// raw.coalesce (int) if a nonzero value n, only the first n elements are
// split and any remaining elements are coalesced into column n (default: 0).
//
// raw.infer (bool) if true, attempt to convert boolean, integer, or floating
// point elements into those Rego types, rather than leaving them as strings
// (default: false).
//
// raw.headers (bool) if true, and rs and fs are both set, then treat the first
// record as a header row (default: false).
type RawInputHandler struct {
	fs          *regexp.Regexp
	rs          *regexp.Regexp
	cutset      string
	lcutset     string
	rcutset     string
	headers     bool
	infer       bool
	initialized bool
	coalesce    int
}

// Name implements InputHandler.Name().
func (r *RawInputHandler) Name() string {
	return "raw"
}

func (r *RawInputHandler) init() {
	if r.initialized {
		return
	}

	r.cutset = ""
	r.fs = nil
	r.headers = false
	r.infer = false
	r.initialized = true
	r.lcutset = ""
	r.rcutset = ""
	r.rs = nil
}

func (r *RawInputHandler) inferValue(s string) interface{} {
	if r.infer {
		return util.StringToValue(s)
	}

	return s
}

func (r *RawInputHandler) cut(s string) string {
	if r.lcutset != "" {
		s = strings.TrimLeft(s, r.lcutset)
	}

	if r.rcutset != "" {
		s = strings.TrimRight(s, r.rcutset)
	}

	if r.cutset != "" {
		s = strings.Trim(s, r.cutset)
	}

	return s
}

func (r *RawInputHandler) parseSingle(reader io.Reader) (interface{}, error) {
	inputBytes, err := io.ReadAll(reader)
	if err != nil {
		return nil, err
	}

	if r.headers {
		return nil, fmt.Errorf("headers cannot be inferred unless both fs and rs are set")
	}

	s := string(inputBytes)
	s = r.cut(s)
	v := r.inferValue(s)

	return v, nil
}

func (r *RawInputHandler) parseList(reader io.Reader) (interface{}, error) {
	inputBytes, err := io.ReadAll(reader)
	if err != nil {
		return nil, err
	}

	if r.headers {
		return nil, fmt.Errorf("headers cannot be inferred unless both fs and rs are set")
	}

	n := -1
	if r.coalesce > 0 {
		n = r.coalesce
	}
	split := r.rs.Split(string(inputBytes), n)
	recs := []string{}

	for _, rec := range split {
		s := r.cut(rec)
		if s == "" {
			continue
		}
		recs = append(recs, s)
	}

	if !r.infer {
		return recs, nil
	}

	result := make([]interface{}, len(recs))
	for i, rec := range recs {
		result[i] = r.inferValue(rec)
	}

	return result, nil
}

func (r *RawInputHandler) parseTable(reader io.Reader) (interface{}, error) {
	inputBytes, err := io.ReadAll(reader)
	if err != nil {
		return nil, err
	}

	n := -1
	if r.coalesce > 0 {
		n = r.coalesce
	}

	lines := r.rs.Split(string(inputBytes), -1)
	records := make([][]interface{}, 0)

	for _, line := range lines {
		split := r.fs.Split(line, n)
		rec := make([]interface{}, 0)
		for _, field := range split {
			s := r.cut(field)
			if s == "" {
				continue
			}
			rec = append(rec, r.inferValue(s))
		}
		if len(rec) > 0 {
			records = append(records, rec)
		}
	}

	if !r.headers {
		return records, nil
	}

	if len(records) == 0 {
		return [][]interface{}{}, nil
	}

	result := []map[string]interface{}{}
	headers := []string{}

	for _, h := range records[0] {
		s := r.cut(util.ValueToString(h))
		headers = append(headers, s)
	}

	for nr, rec := range records {
		// skip headers
		if nr == 0 {
			continue
		}

		obj := make(map[string]interface{})
		for nf, field := range rec {
			var key string
			if nf < len(headers) {
				key = headers[nf]
			} else {
				key = fmt.Sprintf("column%d", nf)
			}

			if key == "" && field == "" {
				continue
			}

			obj[key] = field
		}

		if len(obj) == 0 {
			continue
		}

		result = append(result, obj)
	}

	return result, nil
}

// Parse implements InputHandler.Parse().
func (r *RawInputHandler) Parse(reader io.Reader) (interface{}, error) {
	r.init()

	if r.fs == nil && r.rs == nil {
		return r.parseSingle(reader)

	} else if r.fs == nil && r.rs != nil {
		return r.parseList(reader)

	} else if r.fs != nil && r.rs == nil {
		return nil, fmt.Errorf("fs cannot be used if rs is omitted")
	}

	// both fs and rs are non-nil
	return r.parseTable(reader)

}

// SetOption implements InputHandler.SetOption().
func (r *RawInputHandler) SetOption(name string, value string) error {
	r.init()

	if (name == "raw.split") || (name == "raw.cutset") || (name == "raw.lcutset") || (name == "raw.rcutset") {
		u, err := util.Unescape(value)
		if err == nil {
			value = u
		}
	}

	switch name {
	case "raw.cutset":
		r.cutset = value
	case "raw.fs":
		x, err := regexp.Compile(value)
		if err != nil {
			return err
		}
		r.fs = x
	case "raw.headers":
		r.headers = util.StringToValue(value).(bool)
	case "raw.infer":
		r.infer = util.StringToValue(value).(bool)
	case "raw.lcutset":
		r.lcutset = value
	case "raw.rcutset":
		r.rcutset = value
	case "raw.rs":
		x, err := regexp.Compile(value)
		if err != nil {
			return err
		}
		r.rs = x
	case "raw.coalesce":
		x, err := strconv.Atoi(value)
		if err != nil {
			return err
		}
		r.coalesce = x
	}

	return nil
}
