Skip to content

Commit 758deef

Browse files
committed
refactor(parser): rework RawMessage into Buffer
The old RawMessage implementation effectively brute forced the initial processing of a comment message by breaking it down into lines, and grouping them into paragraphs. This is useful, but, we actually only need the first paragraph, the last paragraph, and then everything between. So there's no need to break down the message into each paragraph. In theory, the Buffer implementation is more performant than RawMessage was, but most importantly I think it will be easier to work with it.
1 parent e8ca009 commit 758deef

File tree

8 files changed

+1746
-1114
lines changed

8 files changed

+1746
-1114
lines changed

buffer.go

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
package conventionalcommit
2+
3+
import (
4+
"regexp"
5+
)
6+
7+
// footerToken will match against all variations of Conventional Commit footer
8+
// formats.
9+
//
10+
// Examples of valid footer tokens:
11+
//
12+
// Approved-by: John Carter
13+
// ReviewdBy: Noctis
14+
// Fixes #49
15+
// Reverts #SOL-42
16+
// BREAKING CHANGE: Flux capacitor no longer exists.
17+
// BREAKING-CHANGE: Time will flow backwads
18+
//
19+
// Examples of invalid footer tokens:
20+
//
21+
// Approved-by:
22+
// Approved-by:John Carter
23+
// Approved by: John Carter
24+
// ReviewdBy: Noctis
25+
// Fixes#49
26+
// Fixes #
27+
// Fixes 49
28+
// BREAKING CHANGE:Flux capacitor no longer exists.
29+
// Breaking Change: Flux capacitor no longer exists.
30+
// Breaking-Change: Time will flow backwads
31+
//
32+
var footerToken = regexp.MustCompile(
33+
`^(?:([\w-]+)\s+(#.+)|([\w-]+|BREAKING[\s-]CHANGE):\s+(.+))$`,
34+
)
35+
36+
// Buffer represents a commit message in a more structured form than a simple
37+
// string or byte slice. This makes it easier to process a message for the
38+
// purposes of extracting detailed information, linting, and formatting.
39+
//
40+
// The commit message is conceptually broken down into two three separate
41+
// sections:
42+
//
43+
// - Head section holds the commit message subject/description, along with type
44+
// and scope for conventional commits. The head section should only ever be a
45+
// single line according to git convention, but Buffer supports multi-line
46+
// headers so they can be parsed and handled as needed.
47+
//
48+
// - Body section holds the rest of the message. Except if the last paragraph
49+
// starts with a footer token, then the last paragraph is omitted from the
50+
// body section.
51+
//
52+
// - Foot section holds conventional commit footers. It is always the last
53+
// paragraph of a commit message, and is only considered to be the foot
54+
// section if the first line of the paragraph beings with a footer token.
55+
//
56+
// Each section is returned as a Lines type, which provides per-line access to
57+
// the text within the section.
58+
type Buffer struct {
59+
// firstLine is the lines offset for the first line which contains any
60+
// non-whitespace character.
61+
firstLine int
62+
63+
// lastLine is the lines offset for the last line which contains any
64+
// non-whitespace character.
65+
lastLine int
66+
67+
// headLen is the number of lines that the headLen section (first paragraph)
68+
// spans.
69+
headLen int
70+
71+
// footLen is the number of lines that the footLen section (last paragraph)
72+
// spans.
73+
footLen int
74+
75+
// lines is a list of all individual lines of text in the commit message,
76+
// which also includes the original line number, making it easy to pass a
77+
// single Line around while still knowing where in the original commit
78+
// message it belongs.
79+
lines Lines
80+
}
81+
82+
// NewBuffer returns a Buffer, with the given commit message broken down into
83+
// individual lines of text, with sequential non-empty lines grouped into
84+
// paragraphs.
85+
func NewBuffer(message []byte) *Buffer {
86+
buf := &Buffer{
87+
lines: Lines{},
88+
}
89+
90+
if len(message) == 0 {
91+
return buf
92+
}
93+
94+
buf.lines = NewLines(message)
95+
// Find fist non-whitespace line.
96+
if i := buf.lines.FirstTextIndex(); i > -1 {
97+
buf.firstLine = i
98+
}
99+
100+
// Find last non-whitespace line.
101+
if i := buf.lines.LastTextIndex(); i > -1 {
102+
buf.lastLine = i
103+
}
104+
105+
// Determine number of lines in first paragraph (head section).
106+
for i := buf.firstLine; i <= buf.lastLine; i++ {
107+
if buf.lines[i].Blank() {
108+
break
109+
}
110+
buf.headLen++
111+
}
112+
113+
// Determine number of lines in the last paragraph.
114+
lastLen := 0
115+
for i := buf.lastLine; i > buf.firstLine+buf.headLen; i-- {
116+
if buf.lines[i].Blank() {
117+
break
118+
}
119+
lastLen++
120+
}
121+
122+
// If last paragraph starts with a Convention Commit footer token, it is the
123+
// foot section, otherwise it is part of the body.
124+
if lastLen > 0 {
125+
line := buf.lines[buf.lastLine-lastLen+1]
126+
if footerToken.Match(line.Content) {
127+
buf.footLen = lastLen
128+
}
129+
}
130+
131+
return buf
132+
}
133+
134+
// Head returns the first paragraph, defined as the first group of sequential
135+
// lines which contain any non-whitespace characters.
136+
func (s *Buffer) Head() Lines {
137+
return s.lines[s.firstLine : s.firstLine+s.headLen]
138+
}
139+
140+
// Body returns all lines between the first and last paragraphs. If the body is
141+
// surrounded by multiple empty lines, they will be removed, ensuring first and
142+
// last line of body is not a blank whitespace line.
143+
func (s *Buffer) Body() Lines {
144+
if s.firstLine == s.lastLine {
145+
return Lines{}
146+
}
147+
148+
first := s.firstLine + s.headLen + 1
149+
last := s.lastLine + 1
150+
151+
if s.footLen > 0 {
152+
last -= s.footLen
153+
}
154+
155+
return s.lines[first:last].Trim()
156+
}
157+
158+
// Head returns the last paragraph, defined as the last group of sequential
159+
// lines which contain any non-whitespace characters.
160+
func (s *Buffer) Foot() Lines {
161+
if s.footLen == 0 {
162+
return Lines{}
163+
}
164+
165+
return s.lines[s.lastLine-s.footLen+1 : s.lastLine+1]
166+
}
167+
168+
// Lines returns all lines with any blank lines from the beginning and end of
169+
// the buffer removed. Effectively all lines from the first to the last line
170+
// which contain any non-whitespace characters.
171+
func (s *Buffer) Lines() Lines {
172+
if s.lastLine+1 > len(s.lines) || (s.lastLine == 0 && s.lines[0].Blank()) {
173+
return Lines{}
174+
}
175+
176+
return s.lines[s.firstLine : s.lastLine+1]
177+
}
178+
179+
func (s *Buffer) LineCount() int {
180+
if s.headLen == 0 {
181+
return 0
182+
}
183+
184+
return (s.lastLine + 1) - s.firstLine
185+
}
186+
187+
// Bytes renders the Buffer back into a byte slice, without any leading or
188+
// trailing whitespace lines. Leading whitespace on the first line which
189+
// contains non-whitespace characters is retained. It is only whole lines
190+
// consisting of only whitespace which are excluded.
191+
func (s *Buffer) Bytes() []byte {
192+
return s.Lines().Bytes()
193+
}
194+
195+
// String renders the Buffer back into a string, without any leading or trailing
196+
// whitespace lines. Leading whitespace on the first line which contains
197+
// non-whitespace characters is retained. It is only whole lines consisting of
198+
// only whitespace which are excluded.
199+
func (s *Buffer) String() string {
200+
return s.Lines().String()
201+
}
202+
203+
// BytesRaw renders the Buffer back into a byte slice which is identical to the
204+
// original input byte slice given to NewBuffer. This includes retaining the
205+
// original line break types for each line.
206+
func (s *Buffer) BytesRaw() []byte {
207+
return s.lines.Bytes()
208+
}
209+
210+
// StringRaw renders the Buffer back into a string which is identical to the
211+
// original input byte slice given to NewBuffer. This includes retaining the
212+
// original line break types for each line.
213+
func (s *Buffer) StringRaw() string {
214+
return s.lines.String()
215+
}

0 commit comments

Comments
 (0)