unidiff-comments/changeset_reader.go
Norwin 71ca77c7f9
restructure package
so parser-only usage has no dependencies
2020-12-19 09:50:07 +01:00

460 lines
11 KiB
Go

package unidiff
import (
"bufio"
"fmt"
"io"
"regexp"
"strconv"
"strings"
"time"
"gitea.com/noerw/unidiff-comments/types"
)
const (
stateStartOfDiff = "stateStartOfDiff"
stateDiffHeader = "stateDiffHeader"
stateHunkHeader = "stateHunkHeader"
stateHunkBody = "stateHunkBody"
stateComment = "stateComment"
stateCommentDelim = "stateCommentDelim"
stateCommentHeader = "stateCommentHeader"
stateDiffComment = "stateDiffComment"
stateDiffCommentDelim = "stateDiffCommentDelim"
stateDiffCommentHeader = "stateDiffCommentHeader"
ignorePrefix = "###"
)
var (
reDiffHeader = regexp.MustCompile(
`^--- |^\+\+\+ `)
reGitDiffHeader = regexp.MustCompile(
`^diff |^index `)
reFromFile = regexp.MustCompile(
`^--- (\S+)(\s+(.*))`)
reToFile = regexp.MustCompile(
`^\+\+\+ (\S+)(\s+(.*))`)
reHunk = regexp.MustCompile(
`^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@`)
reSegmentContext = regexp.MustCompile(
`^ `)
reSegmentAdded = regexp.MustCompile(
`^\+`)
reSegmentRemoved = regexp.MustCompile(
`^-`)
reCommentDelim = regexp.MustCompile(
`^#\s+---`)
reCommentHeader = regexp.MustCompile(
`^#\s+\[(\d+)@(\d+)\]\s+\|([^|]+)\|(.*)`)
reCommentText = regexp.MustCompile(
`^#(\s*)(.*)\s*`)
reIndent = regexp.MustCompile(
`^#(\s+)`)
reEmptyLine = regexp.MustCompile(
`^\n$`)
reIgnoredLine = regexp.MustCompile(
`^` + ignorePrefix)
)
type parser struct {
state string
changeset types.Changeset
diff *types.Diff
hunk *types.Hunk
segment *types.Segment
comment *types.Comment
line *types.Line
lineNumber int
segmentType string
commentsList []*types.Comment
}
type Error struct {
LineNumber int
Message string
}
func (err Error) Error() string {
return fmt.Sprintf("line %d: %s", err.LineNumber, err.Message)
}
func ReadChangeset(r io.Reader) (types.Changeset, error) {
buffer := bufio.NewReader(r)
current := parser{}
current.state = stateStartOfDiff
for {
current.lineNumber++
line, err := buffer.ReadString('\n')
if err != nil {
break
}
if reIgnoredLine.MatchString(line) {
continue
}
err = current.switchState(line)
if err != nil {
return current.changeset, err
}
err = current.createNodes(line)
if err != nil {
return current.changeset, err
}
err = current.locateNodes(line)
if err != nil {
return current.changeset, err
}
err = current.parseLine(line)
if err != nil {
return current.changeset, err
}
}
for _, comment := range current.commentsList {
comment.Text = strings.TrimSpace(comment.Text)
}
return current.changeset, nil
}
func (current *parser) switchState(line string) error {
inComment := false
switch current.state {
case stateStartOfDiff:
switch {
case reDiffHeader.MatchString(line), reGitDiffHeader.MatchString(line):
current.state = stateDiffHeader
case reCommentText.MatchString(line):
inComment = true
case reEmptyLine.MatchString(line):
// body intentionally left empty
default:
return Error{
current.lineNumber,
"expected diff header, but none found",
}
}
case stateDiffHeader:
switch {
case reHunk.MatchString(line):
current.state = stateHunkHeader
}
case stateDiffComment, stateDiffCommentDelim, stateDiffCommentHeader:
switch {
case reDiffHeader.MatchString(line), reGitDiffHeader.MatchString(line):
current.state = stateDiffHeader
case reCommentText.MatchString(line):
inComment = true
case reEmptyLine.MatchString(line):
current.state = stateStartOfDiff
}
case stateHunkHeader:
current.state = stateHunkBody
fallthrough
case stateHunkBody, stateComment, stateCommentDelim, stateCommentHeader:
switch {
case reSegmentContext.MatchString(line):
current.state = stateHunkBody
current.segmentType = types.SegmentTypeContext
case reSegmentRemoved.MatchString(line):
current.state = stateHunkBody
current.segmentType = types.SegmentTypeRemoved
case reSegmentAdded.MatchString(line):
current.state = stateHunkBody
current.segmentType = types.SegmentTypeAdded
case reHunk.MatchString(line):
current.state = stateHunkHeader
case reCommentText.MatchString(line):
inComment = true
case reGitDiffHeader.MatchString(line):
current.state = stateDiffHeader
current.diff = nil
current.hunk = nil
current.segment = nil
current.line = nil
case reEmptyLine.MatchString(line):
current.state = stateStartOfDiff
current.diff = nil
current.hunk = nil
current.segment = nil
current.line = nil
}
}
if !inComment {
current.comment = nil
} else {
switch current.state {
case stateStartOfDiff:
fallthrough
case stateDiffComment, stateDiffCommentDelim, stateDiffCommentHeader:
switch {
case reCommentDelim.MatchString(line):
current.state = stateDiffCommentDelim
case reCommentHeader.MatchString(line):
current.state = stateDiffCommentHeader
case reCommentText.MatchString(line):
current.state = stateDiffComment
}
case stateHunkBody:
fallthrough
case stateComment, stateCommentDelim, stateCommentHeader:
switch {
case reCommentDelim.MatchString(line):
current.state = stateCommentDelim
case reCommentHeader.MatchString(line):
current.state = stateCommentHeader
case reCommentText.MatchString(line):
current.state = stateComment
}
}
}
// Uncomment for debug state switching
// fmt.Printf("%20s : %#v\n", current.state, line)
return nil
}
func (current *parser) createNodes(line string) error {
switch current.state {
case stateDiffComment:
if current.comment != nil {
break
}
fallthrough
case stateDiffCommentDelim, stateDiffCommentHeader:
current.comment = &types.Comment{}
fallthrough
case stateDiffHeader:
if current.diff == nil {
current.diff = &types.Diff{}
current.changeset.Diffs = append(current.changeset.Diffs,
current.diff)
}
case stateHunkHeader:
current.hunk = &types.Hunk{}
current.segment = &types.Segment{}
case stateCommentDelim, stateCommentHeader:
current.comment = &types.Comment{}
case stateComment:
if current.comment == nil {
current.comment = &types.Comment{}
}
case stateHunkBody:
if current.segment.Type != current.segmentType {
current.segment = &types.Segment{Type: current.segmentType}
current.hunk.Segments = append(current.hunk.Segments,
current.segment)
}
current.line = &types.Line{}
current.segment.Lines = append(current.segment.Lines, current.line)
}
return nil
}
func (current *parser) locateNodes(line string) error {
switch current.state {
case stateComment, stateDiffComment:
current.locateComment(line)
case stateHunkBody:
current.locateLine(line)
}
return nil
}
func (current *parser) locateComment(line string) error {
if current.comment.Parented || strings.TrimSpace(line) == "#" {
return nil
}
current.commentsList = append(current.commentsList, current.comment)
current.comment.Parented = true
if current.hunk != nil {
current.comment.Anchor.LineType = current.segment.Type
current.comment.Anchor.Line = current.segment.GetLineNum(current.line)
current.comment.Anchor.Path = current.diff.Destination.ToString
current.comment.Anchor.SrcPath = current.diff.Source.ToString
}
current.comment.Indent = getIndentSize(line)
parent := current.findParentComment(current.comment)
if parent != nil {
parent.Comments = append(parent.Comments, current.comment)
} else {
if current.line != nil {
current.diff.LineComments = append(current.diff.LineComments,
current.comment)
current.line.Comments = append(current.line.Comments,
current.comment)
} else {
current.diff.FileComments = append(current.diff.FileComments,
current.comment)
}
}
return nil
}
func (current *parser) locateLine(line string) error {
sourceOffset := current.hunk.SourceLine - 1
destinationOffset := current.hunk.DestinationLine - 1
if len(current.hunk.Segments) > 1 {
prevSegment := current.hunk.Segments[len(current.hunk.Segments)-2]
lastLine := prevSegment.Lines[len(prevSegment.Lines)-1]
sourceOffset = lastLine.Source
destinationOffset = lastLine.Destination
}
hunkLength := int64(len(current.segment.Lines))
switch current.segment.Type {
case types.SegmentTypeContext:
current.line.Source = sourceOffset + hunkLength
current.line.Destination = destinationOffset + hunkLength
case types.SegmentTypeAdded:
current.line.Source = sourceOffset
current.line.Destination = destinationOffset + hunkLength
case types.SegmentTypeRemoved:
current.line.Source = sourceOffset + hunkLength
current.line.Destination = destinationOffset
}
return nil
}
func (current *parser) parseLine(line string) error {
switch current.state {
case stateDiffHeader:
current.parseDiffHeader(line)
case stateHunkHeader:
current.parseHunkHeader(line)
case stateHunkBody:
current.parseHunkBody(line)
case stateComment, stateDiffComment:
current.parseComment(line)
case stateCommentHeader, stateDiffCommentHeader:
current.parseCommentHeader(line)
}
return nil
}
func (current *parser) parseDiffHeader(line string) error {
switch {
case reFromFile.MatchString(line):
matches := reFromFile.FindStringSubmatch(line)
current.changeset.Path = matches[1]
current.diff.Source.ToString = matches[1]
current.changeset.FromHash = matches[3]
current.diff.Attributes.FromHash = []string{matches[3]}
case reToFile.MatchString(line):
matches := reToFile.FindStringSubmatch(line)
current.diff.Destination.ToString = matches[1]
current.changeset.ToHash = matches[3]
current.diff.Attributes.ToHash = []string{matches[3]}
default:
return Error{
current.lineNumber,
"expected diff header, but not found",
}
}
return nil
}
func (current *parser) parseHunkHeader(line string) error {
matches := reHunk.FindStringSubmatch(line)
current.hunk.SourceLine, _ = strconv.ParseInt(matches[1], 10, 64)
current.hunk.SourceSpan, _ = strconv.ParseInt(matches[3], 10, 64)
current.hunk.DestinationLine, _ = strconv.ParseInt(matches[4], 10, 64)
current.hunk.DestinationSpan, _ = strconv.ParseInt(matches[6], 10, 64)
current.diff.Hunks = append(current.diff.Hunks, current.hunk)
return nil
}
func (current *parser) parseHunkBody(line string) error {
current.line.Line = line[1 : len(line)-1]
return nil
}
func (current *parser) parseCommentHeader(line string) error {
matches := reCommentHeader.FindStringSubmatch(line)
current.comment.Author.DisplayName = strings.TrimSpace(matches[3])
current.comment.Id, _ = strconv.ParseInt(matches[1], 10, 64)
updatedDate, _ := time.ParseInLocation(time.ANSIC,
strings.TrimSpace(matches[4]),
time.Local)
current.comment.UpdatedDate = types.UnixTimestamp(updatedDate.Unix() * 1000)
version, _ := strconv.ParseInt(matches[2], 10, 64)
current.comment.Version = int(version)
return nil
}
func (current *parser) parseComment(line string) error {
matches := reCommentText.FindStringSubmatch(line)
if len(matches[1]) < current.comment.Indent {
return Error{
LineNumber: current.lineNumber,
Message: fmt.Sprintf(
"unexpected indent, should be at least: %d",
current.comment.Indent,
),
}
}
indentedLine := matches[1][current.comment.Indent:] + matches[2]
current.comment.Text += "\n" + indentedLine
return nil
}
func (current *parser) findParentComment(comment *types.Comment) *types.Comment {
for i := len(current.commentsList) - 1; i >= 0; i-- {
c := current.commentsList[i]
if comment.Indent > c.Indent {
return c
}
}
return nil
}
func getIndentSize(line string) int {
matches := reIndent.FindStringSubmatch(line)
if len(matches) == 0 {
return 0
}
return len(matches[1])
}