diff --git a/embed.fnc b/embed.fnc
index 3a16e2ffbe43..e15a894a60b3 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -6194,7 +6194,7 @@ RS	|char * |scan_heredoc	|NN char *s
 S	|char * |scan_ident	|NN char *s				\
 				|SPTR char *dest			\
 				|EPTR char *dest_end			\
-				|bool chk_unary
+				|U32 flags
 RS	|char * |scan_inputsymbol					\
 				|NN char *start
 RS	|char * |scan_pat	|NN char *start 			\
diff --git a/proto.h b/proto.h
index c2908b823fc1..80b0c279021d 100644
--- a/proto.h
+++ b/proto.h
@@ -9500,7 +9500,7 @@ S_scan_heredoc(pTHX_ char *s)
         assert(s)
 
 STATIC char *
-S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary);
+S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, U32 flags);
 # define PERL_ARGS_ASSERT_SCAN_IDENT            \
         assert(s); assert(dest); assert(dest_end); assert(dest < dest_end)
 
diff --git a/regen/unicode_constants.pl b/regen/unicode_constants.pl
index 538a397edce5..8afc21703464 100644
--- a/regen/unicode_constants.pl
+++ b/regen/unicode_constants.pl
@@ -1039,6 +1039,7 @@ END
 
 U+10FFFF string MAX_UNICODE
 
+SHY native
 NBSP native
 NBSP string
 
diff --git a/t/comp/parser.t b/t/comp/parser.t
index 44d8fef2ced1..658811374f65 100644
--- a/t/comp/parser.t
+++ b/t/comp/parser.t
@@ -8,7 +8,7 @@ BEGIN {
     chdir 't' if -d 't';
 }
 
-print "1..192\n";
+print "1..193\n";
 
 sub failed {
     my ($got, $expected, $name) = @_;
@@ -673,6 +673,18 @@ is $@, "", 'substr keys assignment';
     is ($@, "", "Handles all numeric package component after ::");
 }
 
+{
+    my $expected = "this is the way the identifier ends; not with a bang";
+    my $result;
+    eval "use utf8; my \$e\x{1df8}claire = '$expected'; \$result = \${e\x{1df8}claire}";
+    if ($@) {
+        failed($@, "no error", "Didn't crash");
+    }
+    else {
+        is ($result, $expected, "Parser can handle a continuation as 2nd char");
+    }
+}
+
 # Add new tests HERE (above this line)
 
 # bug #74022: Loop on characters in \p{OtherIDContinue}
diff --git a/toke.c b/toke.c
index f9a20bdd9250..aef854a9e641 100644
--- a/toke.c
+++ b/toke.c
@@ -180,6 +180,8 @@ static const char ident_var_zero_multi_digit[] = "Numeric variables with more th
 #define IDFIRST_ONLY                (1 << 3)
 #define STOP_AT_FIRST_NON_DIGIT     (1 << 4)
 #define CHECK_ONLY                  (1 << 5)
+#define CHECK_UNARY                 (1 << 6)
+#define IDCONT_first_OK             (1 << 7)
 
 #ifdef DEBUGGING
 static const char* const lex_state_names[] = {
@@ -4741,12 +4743,13 @@ S_intuit_more(pTHX_ char *s, char *e,
                  * changed since the code was first added */
                 char tmpbuf[ C_ARRAY_LENGTH(PL_tokenbuf) * 4 ];
 
-                /* khw: scan_ident shouldn't be used as-is.  It has side
-                 * effects and can end up calling this function recursively.
-                 *
-                 * khw: If what follows can't be an identifier, say it is too
-                 * long or is $001, then it must be a charclass */
-                scan_ident(s, tmpbuf, C_ARRAY_END(tmpbuf), FALSE);
+                if (! scan_ident(s, tmpbuf, C_ARRAY_END(tmpbuf), CHECK_ONLY))
+                {
+                    /* An illegal identifier means this can't be a subscript;
+                     * it's an error or it could be a charclass */
+                    return false;
+                }
+
                 len = strlen(tmpbuf);
 
                 /* khw: This only looks at global variables; lexicals came
@@ -5606,8 +5609,7 @@ yyl_dollar(pTHX_ char *s)
             || memCHRs("{$:+-@", s[2])))
     {
         PL_tokenbuf[0] = '@';
-        s = scan_ident(s + 1, PL_tokenbuf + 1, C_ARRAY_END(PL_tokenbuf),
-                       FALSE);
+        s = scan_ident(s + 1, PL_tokenbuf + 1, C_ARRAY_END(PL_tokenbuf), 0);
         S_warn_expect_operator(aTHX_ "Array length", s, POP_OLDBUFPTR);
         if (!PL_tokenbuf[1])
             PREREF(DOLSHARP);
@@ -5617,7 +5619,7 @@ yyl_dollar(pTHX_ char *s)
     }
 
     PL_tokenbuf[0] = '$';
-    s = scan_ident(s, PL_tokenbuf + 1, C_ARRAY_END(PL_tokenbuf), FALSE);
+    s = scan_ident(s, PL_tokenbuf + 1, C_ARRAY_END(PL_tokenbuf), 0);
     S_warn_expect_operator(aTHX_ "Scalar", s, POP_OLDBUFPTR);
     if (!PL_tokenbuf[1]) {
         if (s == PL_bufend)
@@ -6282,7 +6284,7 @@ yyl_star(pTHX_ char *s)
         POSTDEREF(PERLY_STAR);
 
     if (PL_expect != XOPERATOR) {
-        s = scan_ident(s, PL_tokenbuf, C_ARRAY_END(PL_tokenbuf), TRUE);
+        s = scan_ident(s, PL_tokenbuf, C_ARRAY_END(PL_tokenbuf), CHECK_UNARY);
         PL_expect = XOPERATOR;
         force_ident(PL_tokenbuf, PERLY_STAR);
         if (!*PL_tokenbuf)
@@ -6330,7 +6332,7 @@ yyl_percent(pTHX_ char *s)
         POSTDEREF(PERLY_PERCENT_SIGN);
 
     PL_tokenbuf[0] = '%';
-    s = scan_ident(s, PL_tokenbuf + 1, C_ARRAY_END(PL_tokenbuf), FALSE);
+    s = scan_ident(s, PL_tokenbuf + 1, C_ARRAY_END(PL_tokenbuf), 0);
     pl_yylval.ival = 0;
     if (!PL_tokenbuf[1]) {
         PREREF(PERLY_PERCENT_SIGN);
@@ -6867,7 +6869,8 @@ yyl_ampersand(pTHX_ char *s)
     }
 
     PL_tokenbuf[0] = '&';
-    s = scan_ident(s - 1, PL_tokenbuf + 1, C_ARRAY_END(PL_tokenbuf), TRUE);
+    s = scan_ident(s - 1, PL_tokenbuf + 1, C_ARRAY_END(PL_tokenbuf),
+                   CHECK_UNARY);
     pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
 
     if (PL_tokenbuf[1])
@@ -6952,7 +6955,7 @@ yyl_snail(pTHX_ char *s)
     if (PL_expect == XPOSTDEREF)
         POSTDEREF(PERLY_SNAIL);
     PL_tokenbuf[0] = '@';
-    s = scan_ident(s, PL_tokenbuf + 1, C_ARRAY_END(PL_tokenbuf), FALSE);
+    s = scan_ident(s, PL_tokenbuf + 1, C_ARRAY_END(PL_tokenbuf), 0);
     S_warn_expect_operator(aTHX_ "Array", s, POP_OLDBUFPTR);
     pl_yylval.ival = 0;
     if (!PL_tokenbuf[1]) {
@@ -10596,6 +10599,11 @@ S_parse_ident(pTHX_ const char *s, const char * const s_end,
      * in things like Foo::$bar */
     const bool check_dollar = flags & CHECK_DOLLAR;
 
+    /* There is a use case for calling this function in the middle of having
+     * parsed a portion of an identifier.  Therefore it should be able to
+     * accept the first character being an IDCont, and not necessarily an
+     * IDFIRST.  The 'IDCONT_first_OK' flag is used to indicate this */
+
     while (s < s_end) {
 
         /* For non-UTF8, variables that match ASCII \w are a superset of
@@ -10603,8 +10611,11 @@ S_parse_ident(pTHX_ const char *s, const char * const s_end,
          * Unicode definition only when UTF-8 is in effect.  We have to check
          * for the subset before checking for the superset. */
         Size_t advance;
-        if (   (advance = isIDFIRST_lazy_if_safe(s, s_end, is_utf8))
-            && (is_utf8 || idfirst_only))
+        if (   (is_utf8 || idfirst_only)
+            && (advance = (flags & IDCONT_first_OK)
+                          ? isIDCONT_lazy_if_safe((U8 *) s, (U8 *) s_end,
+                                                  is_utf8)
+                          : isIDFIRST_lazy_if_safe(s, s_end, is_utf8)))
         {
             const char *this_start = s;
             s += advance;
@@ -10749,43 +10760,71 @@ Perl_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STR
     return s;
 }
 
-/* scan s and extract an identifier ($var) from it if possible
- * into dest.
- * XXX: This function has subtle implications on parsing, and
- * changing how it behaves can cause a variable to change from
- * being a run time rv2sv call or a compile time binding to a
- * specific variable name.
+/* scan 's' and extract an identifier ($var) from it into 'dest' if possible.
+ *
+ * Unlike S_parse_ident which looks for the more usual types of identifiers
+ * (and which this calls if needed), this looks for every possible identifier
+ * type, such as punctuation ones.
+ *
+ * It returns a pointer into the input buffer pointing to just after all the
+ * bytes this function consumed; or croaks if an invalid identifier is found.
+ *
+ * XXX: This function normally has subtle implications on parsing, and
+ * changing how it behaves can cause a variable to change from being a run
+ * time rv2sv call or a compile time binding to a specific variable name.
+ *
+ * However, it can be called with the CHECK_ONLY flag which keeps it from
+ * making any changes besides populating the memory 'dest' points to.  If the
+ * identifier is illegal, it returns NULL instead of croaking.
+ *
+ * And use the CHECK_UNARY flag to cause this to look for ambiguities with
+ * unary operators.  This is silently overriden if CHECK_ONLY is also
+ * specified.
  */
 STATIC char *
-S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary)
+S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, U32 flags)
 {
     PERL_ARGS_ASSERT_SCAN_IDENT;
 
     I32 herelines = PL_parser->herelines;
-    SSize_t bracket = -1;
+
+#define NO_BRACE -1
+    SSize_t bracket = NO_BRACE;
+
     char funny = *s++;
     char *d = dest;
     char * const e = dest_end - 3;    /* two-character token, ending NUL */
     bool is_utf8 = cBOOL(UTF);
     line_t orig_copline = 0, tmp_copline = 0;
 
+    /* Leave the flag in its position, so can pass this on without needing to
+     * anything extra */
+    const U32 check_only = flags & CHECK_ONLY;
+
+    const bool chk_unary = ! check_only && (flags & CHECK_UNARY);
+
     if (isSPACE(*s) || !*s)
         s = skipspace(s);
 
     /* See if it is a "normal" identifier */
     s = parse_ident(s, PL_bufend, &d, e, is_utf8,
-                    (ALLOW_PACKAGE | STOP_AT_FIRST_NON_DIGIT));
-    d = dest;
+                    (ALLOW_PACKAGE | STOP_AT_FIRST_NON_DIGIT | check_only));
+    if (s == NULL) {
+        return NULL;
+    }
 
+    d = dest;
     if (*d) {
+
         /* Here parse_ident() found a digit variable or an identifier
            (anything valid as a bareword), so job done and return.  */
-        if (PL_lex_state != LEX_NORMAL)
+        if (! check_only && PL_lex_state != LEX_NORMAL)
             PL_lex_state = LEX_INTERPENDMAYBE;
         return s;
     }
 
-    /* Here, it is not a run-of-the-mill identifier name */
+    /* Here, it is not a run-of-the-mill identifier name; maybe not an
+     * identifier at all.  Note *d is a NUL */
 
     if (*s == '$' && s[1]
         && (   isIDFIRST_lazy_if_safe(s+1, PL_bufend, is_utf8)
@@ -10800,9 +10839,12 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary)
         return s;
     }
 
-    /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}  */
+    /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...}
+     * Skip to the first non-space past the brace */
     if (*s == '{') {
+        /* 'bracket' becomes the offset from the beginning of this chunk */
         bracket = s - SvPVX(PL_linestr);
+
         s++;
         orig_copline = CopLINE(PL_curcop);
         if (s < PL_bufend && isSPACE(*s)) {
@@ -10810,11 +10852,12 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary)
         }
     }
 
-    /* Extract the first character of the variable name from 's' and
-     * copy it, null terminated into 'd'. Note that this does not
-     * involve checking for just IDFIRST characters, as it allows the
-     * '^' for ${^FOO} type variable names, and it allows all the
-     * characters that are legal in a single character variable name.
+    /* Here, 's' points to the next "interesting" character.
+     * Extract the first character of the potential variable name from 's' and
+     * copy it, NUL terminated, into 'd'. Note that this does not involve
+     * checking for just IDFIRST characters, as it allows the '^' for ${^FOO}
+     * type variable names, and it allows all the characters that are legal in
+     * a single character variable name.
      *
      * The legal ones are any of:
      *  a) all ASCII characters except:
@@ -10829,82 +10872,126 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary)
      * Because all ASCII characters have the same representation whether
      * encoded in UTF-8 or not, we can use the foo_A macros below and '\0' and
      * '{' without knowing if is UTF-8 or not. */
-
-    if (   (s <= PL_bufend - ((is_utf8) ? UTF8SKIP(s) : 1))
+    STRLEN advance = 1;
+    if (    s < PL_bufend
         && (  isGRAPH_A(*s)
-            || (is_utf8
-               ? isIDFIRST_utf8_safe(s, PL_bufend)
-               : (   isGRAPH_L1(*s)
-                  && LIKELY((U8) *s != LATIN1_TO_NATIVE(0xAD))))))
+            || (is_utf8 ? (advance = isIDFIRST_utf8_safe(s, PL_bufend))
+                        : (isGRAPH_L1(*s) && LIKELY((U8) *s != SHY_NATIVE)))))
     {
-        if (is_utf8) {
-            const STRLEN skip = UTF8SKIP(s);
-            STRLEN i;
-            d[skip] = '\0';
-            for ( i = 0; i < skip; i++ )
-                d[i] = *s++;
-        }
-        else {
-            *d = *s++;
-            d[1] = '\0';
-        }
+        STRLEN i;
+        d[advance] = '\0';
+        for ( i = 0; i < advance; i++ )
+            d[i] = *s++;
     }
 
-    /* special case to handle ${10}, ${11} the same way we handle $1 etc */
+    /* 'd' has not been advanced, but if 's' pointed to a legal identifier
+     * character, it has been advanced to the next character, and the
+     * character it previously pointed to has been copied to where 'd'
+     * continues to point to.
+     *
+     * If that copied character is a digit, it means we have something like
+     * ${10}, ${1547}, etc.  Handle those the same way we handle $1, etc */
     if (isDIGIT(*d)) {
+        assert(bracket != NO_BRACE);
         s = parse_ident(s - 1, PL_bufend, &d, e, is_utf8,
-                        STOP_AT_FIRST_NON_DIGIT);
+                        STOP_AT_FIRST_NON_DIGIT | check_only);
+        if (s == NULL) {
+            return NULL;
+        }
 
         /* The code below is expecting d to point to the final digit */
         d--;
     }
-
-    /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
-    else if (*d == '^' && *s && isCONTROLVAR(*s)) {
+    else  /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
+      if (*d == '^' && *s && isCONTROLVAR(*s)) {
         *d = toCTRL(*s);
         s++;
     }
-    /* Warn about ambiguous code after unary operators if {...} notation isn't
-       used.  There's no difference in ambiguity; it's merely a heuristic
-       about when not to warn.  */
-    else if (chk_unary && bracket == -1)
+    else  /* Warn about ambiguous code after unary operators if {...} notation
+             isn't used.  There's no difference in ambiguity; it's merely a
+             heuristic about when not to warn.  */
+      if (chk_unary && bracket == -1) {
         check_unary();
+    }
 
-    if (bracket != -1) {
-        bool skip;
-        char *s2;
-        /* If we were processing {...} notation then...  */
-        if (   isIDFIRST_lazy_if_safe(d, e, is_utf8)
-            || (  ! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
-                 && isWORDCHAR(*s))
-        ) {
-            /* note we have to check for a normal identifier first,
-             * as it handles utf8 symbols, and only after that has
-             * been ruled out can we look at the caret words */
-            Size_t advance;
-            if ((advance = isIDFIRST_lazy_if_safe(d, e, is_utf8) )) {
-                /* if it starts as a valid identifier, assume that it is one.
-                   (the later check for } being at the expected point will trap
-                   cases where this doesn't pan out.)  */
+    /* Here, 's' points to the next "interesting" character to be parsed. And
+     * *d points to the first byte of the final so-far parsed and copied
+     * character.  This is one of four things:
+     *   1) The only byte of the final character of an all-digit numeric
+     *      variable inside braces. e.g. if the input is ${ 123 }, '123' has
+     *      been copied to 'dest', and 'd' points to the '3'.  We don't know
+     *      yet if there is a closing brace.
+     *   2) A control character
+     *   3) The first (or only) byte of some other identifier
+     *   4) *d is NUL for anything else.
+     */
+
+    if (bracket == NO_BRACE) {
+        if (   ! check_only
+            &&   PL_lex_state == LEX_INTERPNORMAL
+            && ! PL_lex_brackets
+            && ! intuit_more(s, PL_bufend, FROM_IDENT, NULL, 0))
+        PL_lex_state = LEX_INTERPEND;
+    }
+    else {  /* Found a '{' */
+
+        /* Handle the interior of braces.  First look to see if the character
+         * pointed to by 'd' is legal as the start of an identifier. */
+        Size_t advance = isIDFIRST_lazy_if_safe(d, e, is_utf8);
+
+        /* If it isn't a normal identifier, it could be a control-character
+         * one.  Those have to be followed by a \w character. */
+        if (advance || ( ! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
+                        && isWORDCHAR(*s)))
+        {
+            /* Prefer a normal identifier, as UTF-8 strings could erroneously
+             * be conflated with a control character identifier. */
+            if (advance) {
+
+                /* Now parse the normal identifier.  But note, we already have
+                 * parsed and copied the first character of it.  That means we
+                 * are jumping into the middle; so tell that to parse_ident.
+                 * */
                 d += advance;
                 s = parse_ident(s, PL_bufend, &d, e, is_utf8,
-                                (ALLOW_PACKAGE | CHECK_DOLLAR));
+                                (  ALLOW_PACKAGE
+                                 | CHECK_DOLLAR
+                                 | IDCONT_first_OK
+                                 | check_only));
             }
             else { /* caret word: ${^Foo} ${^CAPTURE[0]} */
+
+                /* Now parse the control character identifier.  Again, we have
+                 * already copied the first character.  This routine is
+                 * sufficiently chummy with parse_ident to know that when we
+                 * say the string isn't UTF-8, it will do the right thing in
+                 * looking only for ASCII \w characters as identifier
+                 * continuations */
                 d++;
-                while (isWORDCHAR(*s) && d < e) {
-                    *d++ = *s++;
-                }
-                if (d >= e)
-                    croak("%s", ident_too_long);
-                *d = '\0';
+                s = parse_ident(s, PL_bufend, &d, e,
+                                false,  /* Don't allow UTF-8 */
+                                IDCONT_first_OK);
             }
+
+            if (s == NULL) {    /* Can't be NULL unless is check_only */
+                return NULL;
+            }
+
             tmp_copline = CopLINE(PL_curcop);
             if (s < PL_bufend && isSPACE(*s)) {
                 s = skipspace(s);
             }
-            if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
+
+            if (*s == '[' || (*s == '{' && strNE(dest, "sub"))) {
+
+                /* In this branch, 's' is not changed further.  If only
+                 * checking validity, return now before any state changes */
+                if (check_only) {
+                    return s;
+                }
+
                 /* ${foo[0]} and ${foo{bar}} and ${^CAPTURE[0]} notation.  */
+
                 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
                     const char * const brack =
                         (const char *)
@@ -10926,6 +11013,9 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary)
 
         if ( !tmp_copline )
             tmp_copline = CopLINE(PL_curcop);
+
+        char *s2;
+        bool skip;
         if ((skip = s < PL_bufend && isSPACE(*s))) {
             /* Avoid incrementing line numbers or resetting PL_linestart,
                in case we have to back up.  */
@@ -10936,13 +11026,35 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary)
         else
             s2 = s;
 
-        /* Expect to find a closing } after consuming any trailing whitespace.
-         */
-        if (*s2 == '}') {
-            /* Now increment line numbers if applicable.  */
+        /* Expect to find a closing '}' after consuming any trailing
+         * whitespace. */
+        if (*s2 != '}') {
+            /* Didn't find the closing '}' at the point we expected, so
+             * restore the state such that the next thing to process is the
+             * opening '{' and let the parser handle it */
+            s = SvPVX(PL_linestr) + bracket;
+
+            /* The final change to 's' has just been made.  If only validity
+             * checking, return before making any state changes */
+            if (check_only) {
+                return s;
+            }
+            CopLINE_set(PL_curcop, orig_copline);
+            PL_parser->herelines = herelines;
+            *dest = '\0';
+            PL_parser->sub_no_recover = TRUE;
+        }
+        else {  /* Now increment line numbers if applicable.  */
             if (skip)
                 s = skipspace(s);
             s++;
+
+            /* The final change to 's' has just been made.  If only validity
+             * checking, return before making any state changes */
+            if (check_only) {
+                return s;
+            }
+
             if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
                 PL_lex_state = LEX_INTERPEND;
                 PL_expect = XREF;
@@ -10967,20 +11079,8 @@ S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary)
                 }
             }
         }
-        else {
-            /* Didn't find the closing } at the point we expected, so restore
-               state such that the next thing to process is the opening { and */
-            s = SvPVX(PL_linestr) + bracket; /* let the parser handle it */
-            CopLINE_set(PL_curcop, orig_copline);
-            PL_parser->herelines = herelines;
-            *dest = '\0';
-            PL_parser->sub_no_recover = TRUE;
-        }
     }
-    else if (   PL_lex_state == LEX_INTERPNORMAL
-             && !PL_lex_brackets
-             && !intuit_more(s, PL_bufend, FROM_IDENT, NULL, 0))
-        PL_lex_state = LEX_INTERPEND;
+
     return s;
 }
 
diff --git a/unicode_constants.h b/unicode_constants.h
index 78b401683265..574678d9e10f 100644
--- a/unicode_constants.h
+++ b/unicode_constants.h
@@ -76,6 +76,7 @@ bytes.
 
 #   define MAX_UNICODE_UTF8  "\xF4\x8F\xBF\xBF"    /* U+10FFFF */
 
+#   define SHY_NATIVE  0xAD    /* U+00AD */
 #   define NBSP_NATIVE  0xA0    /* U+00A0 */
 #   define NBSP_UTF8  "\xC2\xA0"    /* U+00A0 */
 
@@ -142,6 +143,7 @@ bytes.
 
 #   define MAX_UNICODE_UTF8  "\xEE\x42\x73\x73\x73"    /* U+10FFFF */
 
+#   define SHY_NATIVE  0xCA    /* U+00AD */
 #   define NBSP_NATIVE  0x41    /* U+00A0 */
 #   define NBSP_UTF8  "\x80\x41"    /* U+00A0 */
 
@@ -208,6 +210,7 @@ bytes.
 
 #   define MAX_UNICODE_UTF8  "\xEE\x42\x72\x72\x72"    /* U+10FFFF */
 
+#   define SHY_NATIVE  0xCA    /* U+00AD */
 #   define NBSP_NATIVE  0x41    /* U+00A0 */
 #   define NBSP_UTF8  "\x78\x41"    /* U+00A0 */