Skip to content

Commit 895b720

Browse files
committed
odb: avoid quadratic string building when rendering bodies
1 parent e5e97e0 commit 895b720

File tree

1 file changed

+43
-21
lines changed

1 file changed

+43
-21
lines changed

gitrevise/odb.py

Lines changed: 43 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from types import TracebackType
1717
from typing import (
1818
TYPE_CHECKING,
19+
Iterator,
1920
Dict,
2021
Generic,
2122
Mapping,
@@ -77,7 +78,9 @@ def short(self) -> str:
7778
def for_object(cls, tag: str, body: bytes) -> Oid:
7879
"""Hash an object with the given type tag and body to determine its Oid"""
7980
hasher = hashlib.sha1()
80-
hasher.update(tag.encode() + b" " + str(len(body)).encode() + b"\0" + body)
81+
hasher.update(f"{tag} {len(body)}".encode())
82+
hasher.update(b"\0")
83+
hasher.update(body)
8184
return cls(hasher.digest())
8285

8386
def __repr__(self) -> str:
@@ -301,24 +304,41 @@ def new_commit(
301304
"""Directly create an in-memory commit object, without persisting it.
302305
If a commit object with these properties already exists, it will be
303306
returned instead."""
304-
if author is None:
305-
author = self.default_author
306-
if committer is None:
307-
committer = self.default_committer
308-
309-
body = b"tree " + tree.oid.hex().encode() + b"\n"
310-
for parent in parents:
311-
body += b"parent " + parent.oid.hex().encode() + b"\n"
312-
body += b"author " + author.replace(b"\n", b"\n ") + b"\n"
313-
body += b"committer " + committer.replace(b"\n", b"\n ") + b"\n"
314307

315-
body_tail = b"\n" + message
316-
if self.sign_commits:
317-
gpgsig = self.get_gpgsig(body + body_tail)
318-
body += b"gpgsig " + gpgsig.replace(b"\n", b"\n ") + b"\n"
319-
body += body_tail
320-
321-
return Commit(self, body)
308+
def header_kvs(gpgsig: Optional[bytes]) -> Iterator[Tuple[bytes, bytes]]:
309+
"""Yields each header name and value."""
310+
yield b"tree", tree.oid.hex().encode()
311+
yield from ((b"parent", p.oid.hex().encode()) for p in parents)
312+
yield b"author", author or self.default_author
313+
yield b"committer", committer or self.default_committer
314+
if gpgsig:
315+
yield b"gpgsig", gpgsig
316+
317+
def body_parts(gpgsig: Optional[bytes]) -> Iterator[bytes]:
318+
"""Yields each chunk of the body for rendering into a contiguous buffer."""
319+
for key, value in header_kvs(gpgsig=gpgsig):
320+
# Key, space, value (with embedded newlines indented by space), newline.
321+
yield from (key, b" ", value.replace(b"\n", b"\n "), b"\n")
322+
yield b"\n"
323+
yield message
324+
325+
def build(gpgsig: Optional[bytes] = None) -> bytes:
326+
"""Render the body, optionally including the given gpgsig header."""
327+
return b"".join(body_parts(gpgsig=gpgsig))
328+
329+
def get_body() -> bytes:
330+
# Generate the unsigned body.
331+
unsigned_body = build()
332+
if not self.sign_commits:
333+
return unsigned_body
334+
335+
# Get the signature for the unsigned body.
336+
gpgsig = self.get_gpgsig(unsigned_body)
337+
338+
# Include the signature as a header in the final body.
339+
return build(gpgsig=gpgsig)
340+
341+
return Commit(self, body=get_body())
322342

323343
def get_gpgsig(self, buffer: bytes) -> bytes:
324344
"""Return the text of the signed commit object."""
@@ -361,9 +381,11 @@ def entry_key(pair: Tuple[bytes, Entry]) -> bytes:
361381
return name + b"/"
362382
return name
363383

364-
body = b""
365-
for name, entry in sorted(entries.items(), key=entry_key):
366-
body += cast(bytes, entry.mode.value) + b" " + name + b"\0" + entry.oid
384+
body = b"".join(
385+
field
386+
for name, entry in sorted(entries.items(), key=entry_key)
387+
for field in (cast(bytes, entry.mode.value), b" ", name, b"\0", entry.oid)
388+
)
367389
return Tree(self, body)
368390

369391
def get_obj(self, ref: Union[Oid, str]) -> GitObj:

0 commit comments

Comments
 (0)