From ea9a3f794a590a63a23999d525650e7e47b763fc Mon Sep 17 00:00:00 2001 From: Benjamin Yeh Date: Fri, 7 Nov 2025 17:59:57 -0800 Subject: [PATCH 1/2] AlignedSegment.set_tag(): validate Hex values for tag type 'H' Added validation for hex values in tag processing. --- pysam/libcalignedsegment.pyx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pysam/libcalignedsegment.pyx b/pysam/libcalignedsegment.pyx index ceffc3e3..f38aa74d 100644 --- a/pysam/libcalignedsegment.pyx +++ b/pysam/libcalignedsegment.pyx @@ -85,6 +85,7 @@ cdef int NCIGAR_CODES = 10 CIGAR2CODE = dict([y, x] for x, y in enumerate(CODE2CIGAR)) CIGAR_REGEX = re.compile("(\d+)([MIDNSHP=XB])") +HEX_VALUE_REGEX = re.compile(r"([0-9A-F][0-9A-F])*") # from SAM specification # names for keys in dictionary representation of an AlignedSegment KEY_NAMES = ["name", "flag", "ref_name", "ref_pos", "map_quality", "cigar", @@ -2520,6 +2521,12 @@ cdef class AlignedSegment: value_ptr = value value_size = len(value)+1 elif typecode == b'H': + # validate Hex values + if not HEX_VALUE_REGEX.fullmatch(value): + raise ValueError( + f"Invalid value {value} for tag {tag.decode()} with value_type 'H': + f"must match the regular expression {HEX_VALUE_REGEX.pattern}" + ) # Note that hex tags are stored the very same # way as Z string.s value = force_bytes(value) From 184b61d2381fab61c42670a06af841ea811c4178 Mon Sep 17 00:00:00 2001 From: Benjamin Yeh Date: Sat, 8 Nov 2025 01:44:02 -0800 Subject: [PATCH 2/2] bugfix for previous commit: missing trailing doublequote --- pysam/libcalignedsegment.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pysam/libcalignedsegment.pyx b/pysam/libcalignedsegment.pyx index f38aa74d..32b65e4c 100644 --- a/pysam/libcalignedsegment.pyx +++ b/pysam/libcalignedsegment.pyx @@ -2524,7 +2524,7 @@ cdef class AlignedSegment: # validate Hex values if not HEX_VALUE_REGEX.fullmatch(value): raise ValueError( - f"Invalid value {value} for tag {tag.decode()} with value_type 'H': + f"Invalid value {value} for tag {tag.decode()} with value_type 'H': " f"must match the regular expression {HEX_VALUE_REGEX.pattern}" ) # Note that hex tags are stored the very same