LCOV - coverage report for master 98b443d9 - lib/util/charset/util

LCOV - code coverage report

Current view:	top level - lib/util/charset - util_unistr.c (source / functions)		Hit	Total	Coverage
Test:	coverage report for master 98b443d9	Lines:	229	344	66.6 %
Date:	2024-05-31 13:13:24	Functions:	20	24	83.3 %

          Line data    Source code

       1             : /*
       2             :    Unix SMB/CIFS implementation.
       3             :    Samba utility functions
       4             :    Copyright (C) Andrew Tridgell 1992-2001
       5             :    Copyright (C) Simo Sorce 2001
       6             : 
       7             :    This program is free software; you can redistribute it and/or modify
       8             :    it under the terms of the GNU General Public License as published by
       9             :    the Free Software Foundation; either version 3 of the License, or
      10             :    (at your option) any later version.
      11             : 
      12             :    This program is distributed in the hope that it will be useful,
      13             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15             :    GNU General Public License for more details.
      16             : 
      17             :    You should have received a copy of the GNU General Public License
      18             :    along with this program.  If not, see <http://www.gnu.org/licenses/>.
      19             : */
      20             : 
      21             : #include "replace.h"
      22             : #include "system/locale.h"
      23             : #include "charset.h"
      24             : #include "lib/util/byteorder.h"
      25             : #include "lib/util/fault.h"
      26             : #include "lib/util/tsort.h"
      27             : 
      28             : /**
      29             :  String replace.
      30             :  NOTE: oldc and newc must be 7 bit characters
      31             : **/
      32           5 : _PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
      33             : {
      34           5 :         struct smb_iconv_handle *ic = get_iconv_handle();
      35          19 :         while (s && *s) {
      36          14 :                 size_t size;
      37          14 :                 codepoint_t c = next_codepoint_handle(ic, s, &size);
      38          14 :                 if (c == oldc) {
      39           5 :                         *s = newc;
      40             :                 }
      41          14 :                 s += size;
      42             :         }
      43           5 : }
      44             : 
      45             : /**
      46             :  Convert a string to lower case, allocated with talloc
      47             : **/
      48     6079285 : _PUBLIC_ char *strlower_talloc_handle(struct smb_iconv_handle *iconv_handle,
      49             :                                       TALLOC_CTX *ctx, const char *src)
      50             : {
      51     6079285 :         size_t size=0;
      52       18078 :         char *dest;
      53             : 
      54     6079285 :         if(src == NULL) {
      55           0 :                 return NULL;
      56             :         }
      57             : 
      58             :         /* this takes advantage of the fact that upper/lower can't
      59             :            change the length of a character by more than 1 byte */
      60     6079285 :         dest = talloc_array(ctx, char, 2*(strlen(src))+1);
      61     6079285 :         if (dest == NULL) {
      62           0 :                 return NULL;
      63             :         }
      64             : 
      65   134999483 :         while (*src) {
      66      415963 :                 size_t c_size;
      67   128920198 :                 codepoint_t c = next_codepoint_handle(iconv_handle, src, &c_size);
      68   128920198 :                 src += c_size;
      69             : 
      70   128920198 :                 c = tolower_m(c);
      71             : 
      72   128920198 :                 c_size = push_codepoint_handle(iconv_handle, dest+size, c);
      73   128920198 :                 if (c_size == -1) {
      74           0 :                         talloc_free(dest);
      75           0 :                         return NULL;
      76             :                 }
      77   128920198 :                 size += c_size;
      78             :         }
      79             : 
      80     6079285 :         dest[size] = 0;
      81             : 
      82             :         /* trim it so talloc_append_string() works */
      83     6079285 :         dest = talloc_realloc(ctx, dest, char, size+1);
      84             : 
      85     6079285 :         talloc_set_name_const(dest, dest);
      86             : 
      87     6079285 :         return dest;
      88             : }
      89             : 
      90     6079279 : _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
      91             : {
      92     6079279 :         struct smb_iconv_handle *iconv_handle = get_iconv_handle();
      93     6079279 :         return strlower_talloc_handle(iconv_handle, ctx, src);
      94             : }
      95             : 
      96             : /**
      97             :  Convert a string to UPPER case, allocated with talloc
      98             :  source length limited to n bytes, iconv handle supplied
      99             : **/
     100   702803850 : _PUBLIC_ char *strupper_talloc_n_handle(struct smb_iconv_handle *iconv_handle,
     101             :                                         TALLOC_CTX *ctx, const char *src, size_t n)
     102             : {
     103   702803850 :         size_t size=0;
     104    12973714 :         char *dest;
     105             : 
     106   702803850 :         if (!src) {
     107      192650 :                 return NULL;
     108             :         }
     109             : 
     110             :         /* this takes advantage of the fact that upper/lower can't
     111             :            change the length of a character by more than 1 byte */
     112   702573506 :         dest = talloc_array(ctx, char, 2*(n+1));
     113   702573506 :         if (dest == NULL) {
     114           0 :                 return NULL;
     115             :         }
     116             : 
     117 10749076050 :         while (n && *src) {
     118   118927733 :                 size_t c_size;
     119 10046502546 :                 codepoint_t c = next_codepoint_handle_ext(iconv_handle, src, n,
     120             :                                                           CH_UNIX, &c_size);
     121 10046502546 :                 src += c_size;
     122 10046502546 :                 n -= c_size;
     123             : 
     124 10046502546 :                 c = toupper_m(c);
     125             : 
     126 10046502546 :                 c_size = push_codepoint_handle(iconv_handle, dest+size, c);
     127 10046502546 :                 if (c_size == -1) {
     128           2 :                         talloc_free(dest);
     129           2 :                         return NULL;
     130             :                 }
     131 10046502544 :                 size += c_size;
     132             :         }
     133             : 
     134   702573504 :         dest[size] = 0;
     135             : 
     136             :         /* trim it so talloc_append_string() works */
     137   702573504 :         dest = talloc_realloc(ctx, dest, char, size+1);
     138             : 
     139   702573504 :         talloc_set_name_const(dest, dest);
     140             : 
     141   702573504 :         return dest;
     142             : }
     143             : 
     144             : /**
     145             :  Convert a string to UPPER case, allocated with talloc
     146             :  source length limited to n bytes
     147             : **/
     148   702803844 : _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
     149             : {
     150   702803844 :         struct smb_iconv_handle *iconv_handle = get_iconv_handle();
     151   702803844 :         return strupper_talloc_n_handle(iconv_handle, ctx, src, n);
     152             : }
     153             : /**
     154             :  Convert a string to UPPER case, allocated with talloc
     155             : **/
     156     5988847 : _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
     157             : {
     158     5988847 :         return strupper_talloc_n(ctx, src, src?strlen(src):0);
     159             : }
     160             : 
     161             : /**
     162             :  talloc_strdup() a unix string to upper case.
     163             : **/
     164     3070349 : _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
     165             : {
     166     3070349 :         return strupper_talloc(ctx, src);
     167             : }
     168             : 
     169             : 
     170             : /*
     171             :  * strncasecmp_ldb() works like a *bit* like strncasecmp, with various
     172             :  * tricks to suit the way LDB compares strings. The differences are:
     173             :  *
     174             :  * 0. each string has it's own length.
     175             :  *
     176             :  * 1. consecutive spaces are collapsed down to one space, so that
     177             :  *    "a  b" equals "a b". (this is why each string needs its own
     178             :  *    length). Leading and trailing spaces are removed altogether.
     179             :  *
     180             :  * 2. Comparisons are done in UPPER CASE, as Windows does, not in
     181             :  *    lowercase as POSIX would have it.
     182             :  *
     183             :  * 3. An invalid byte compares higher than any real character. For example,
     184             :  *    "hello\xc2\xff" would sort higher than "hello\xcd\xb6", because CD
     185             :  *    B6 is a valid sequence and C2 FF is not.
     186             :  *
     187             :  * 4. If two strings become invalid on the same character, the rest
     188             :  *    of the string is compared via ldb ASCII case fold rules.
     189             :  *
     190             :  *    For example, "hellō\xC2\xFFworld" < " hElLŌ\xFE ", because the the
     191             :  *    strings are equal up to 'ō' by utf-8 casefold, but the "\xc2\xff" and
     192             :  *    "\xfe" are invalid sequences. At that point, we skip to the byte-by-byte
     193             :  *    (but space-eating, casefolding) comparison, and 0xc2 < 0xff.
     194             :  */
     195             : 
     196             : #define EAT_SPACE(s, len, ends_in_space)                         \
     197             :         do {                                                     \
     198             :                 while (len) {                                    \
     199             :                         if (*s != ' ') {                         \
     200             :                                 break;                           \
     201             :                         }                                        \
     202             :                         s++;                                     \
     203             :                         len--;                                   \
     204             :                 }                                                \
     205             :                 ends_in_space = (len == 0 || *s == '\0');        \
     206             :         } while(0)
     207             : 
     208             : 
     209   126976050 : _PUBLIC_ int strncasecmp_ldb(const char *s1,
     210             :                              size_t len1,
     211             :                              const char *s2,
     212             :                              size_t len2)
     213             : {
     214   126976050 :         struct smb_iconv_handle *iconv_handle = get_iconv_handle();
     215     1616386 :         codepoint_t c1, c2;
     216     1616386 :         size_t cs1, cs2;
     217     1616386 :         bool ends_in_space1, ends_in_space2;
     218     1616386 :         int ret;
     219     1616386 :         bool end1, end2;
     220             : 
     221   126977107 :         EAT_SPACE(s1, len1, ends_in_space1);
     222   126976417 :         EAT_SPACE(s2, len2, ends_in_space2);
     223             :         /*
     224             :          * if ends_in_space was set, the string was empty or only
     225             :          * spaces (which we treat as equivalent).
     226             :          */
     227   126976050 :         if (ends_in_space1 && ends_in_space2) {
     228           2 :                 return 0;
     229             :         }
     230   126976044 :         if (ends_in_space1) {
     231         858 :                 return -1;
     232             :         }
     233   126975158 :         if (ends_in_space2) {
     234         173 :                 return 1;
     235             :         }
     236             : 
     237    11604934 :         while (true) {
     238             :                 /*
     239             :                  * If the next byte is a space, we eat all the spaces,
     240             :                  * and say we found a single codepoint. If the spaces
     241             :                  * were at the end of the string, the codepoint is 0,
     242             :                  * as if there were no spaces. Otherwise it is 0x20,
     243             :                  * as if there was one space.
     244             :                  *
     245             :                  * Setting the codepoint to 0 will break the loop, but
     246             :                  * only after codepoints have been found in both strings.
     247             :                  */
     248   911993469 :                 if (len1 == 0 || *s1 == 0) {
     249    51184014 :                         c1 = 0;
     250   860066979 :                 } else if (*s1 == ' ') {
     251       16612 :                         EAT_SPACE(s1, len1, ends_in_space1);
     252        8266 :                         c1 = ends_in_space1 ? 0 : ' ';
     253   860058713 :                 } else if ((*s1 & 0x80) == 0) {
     254   859920098 :                         c1 = *s1;
     255   859920098 :                         s1++;
     256   859920098 :                         len1--;
     257             :                 } else {
     258      138615 :                         c1 = next_codepoint_handle_ext(iconv_handle, s1, len1,
     259             :                                                        CH_UNIX, &cs1);
     260      138615 :                         if (c1 != INVALID_CODEPOINT) {
     261      138497 :                                 s1 += cs1;
     262      138497 :                                 len1 -= cs1;
     263             :                         }
     264             :                 }
     265             : 
     266   911993469 :                 if (len2 == 0 || *s2 == 0) {
     267    51155014 :                         c2 = 0;
     268   860096754 :                 } else if (*s2 == ' ') {
     269       15779 :                         EAT_SPACE(s2, len2, ends_in_space2);
     270        7851 :                         c2 = ends_in_space2 ? 0 : ' ';
     271   860088903 :                 } else if ((*s2 & 0x80) == 0) {
     272   859952121 :                         c2 = *s2;
     273   859952121 :                         s2++;
     274   859952121 :                         len2--;
     275             :                 } else {
     276      136782 :                         c2 = next_codepoint_handle_ext(iconv_handle, s2, len2,
     277             :                                                        CH_UNIX, &cs2);
     278      136782 :                         if (c2 != INVALID_CODEPOINT) {
     279      136664 :                                 s2 += cs2;
     280      136664 :                                 len2 -= cs2;
     281             :                         }
     282             :                 }
     283             : 
     284   911993469 :                 if (c1 == 0 || c2 == 0 ||
     285   860042393 :                     c1 == INVALID_CODEPOINT || c2 == INVALID_CODEPOINT) {
     286             :                         break;
     287             :                 }
     288             : 
     289   860042239 :                 if (c1 == c2) {
     290   778679286 :                         continue;
     291             :                 }
     292    81362953 :                 c1 = toupper_m(c1);
     293    81362953 :                 c2 = toupper_m(c2);
     294    81362953 :                 if (c1 != c2) {
     295    74150086 :                         break;
     296             :                 }
     297             :         }
     298             : 
     299             :         /*
     300             :          * Either a difference has been found, or one or both strings have
     301             :          * ended or hit invalid codepoints.
     302             :          */
     303   126974957 :         ret = NUMERIC_CMP(c1, c2);
     304             : 
     305   126974957 :         if (ret != 0) {
     306    74228140 :                 return ret;
     307             :         }
     308             :         /*
     309             :          * the strings are equal up to here, but one might be longer.
     310             :          */
     311    51872205 :         end1 = len1 == 0 || *s1 == 0;
     312    51872205 :         end2 = len2 == 0 || *s2 == 0;
     313             : 
     314    51872205 :         if (end1 && end2) {
     315    51130491 :                 return 0;
     316             :         }
     317          54 :         if (end1) {
     318           0 :                 return -1;
     319             :         }
     320          54 :         if (end2) {
     321           0 :                 return -1;
     322             :         }
     323             : 
     324             :         /*
     325             :          * By elimination, if we got here, we have INVALID_CODEPOINT on both
     326             :          * sides.
     327             :          *
     328             :          * THere is no perfect option, but what we choose to do is continue on
     329             :          * with ascii case fold (as if calling ldb_comparison_fold_ascii()
     330             :          * which is private to ldb, so we can't just defer to it).
     331             :          */
     332         180 :         while (true) {
     333         180 :                 if (len1 == 0 || *s1 == 0) {
     334           0 :                         c1 = 0;
     335         161 :                 } else if (*s1 == ' ') {
     336         104 :                         EAT_SPACE(s1, len1, ends_in_space1);
     337          31 :                         c1 = ends_in_space1 ? 0 : ' ';
     338             :                 } else {
     339         130 :                         c1 = *s1;
     340         130 :                         s1++;
     341         130 :                         len1--;
     342         130 :                         c1 = ('a' <= c1 && c1 <= 'z') ? c1 ^ 0x20 : c1;
     343             :                 }
     344             : 
     345         180 :                 if (len2 == 0 || *s2 == 0) {
     346           0 :                         c2 = 0;
     347         161 :                 } else if (*s2 == ' ') {
     348         104 :                         EAT_SPACE(s2, len2, ends_in_space2);
     349          31 :                         c2 = ends_in_space2 ? 0 : ' ';
     350             :                 } else {
     351         130 :                         c2 = *s2;
     352         130 :                         s2++;
     353         130 :                         len2--;
     354         130 :                         c2 = ('a' <= c2 && c2 <= 'z') ? c2 ^ 0x20 : c2;
     355             :                 }
     356             : 
     357         180 :                 if (c1 == 0 || c2 == 0 || c1 != c2) {
     358             :                         break;
     359             :                 }
     360             :         }
     361          54 :         return NUMERIC_CMP(c1, c2);
     362             : }
     363             : 
     364             : #undef EAT_SPACE
     365             : 
     366             : 
     367             : /**
     368             :  Find the number of 'c' chars in a string
     369             : **/
     370           4 : _PUBLIC_ size_t count_chars_m(const char *s, char c)
     371             : {
     372           4 :         struct smb_iconv_handle *ic = get_iconv_handle();
     373           4 :         size_t count = 0;
     374             : 
     375          13 :         while (*s) {
     376           9 :                 size_t size;
     377           9 :                 codepoint_t c2 = next_codepoint_handle(ic, s, &size);
     378           9 :                 if (c2 == c) count++;
     379           9 :                 s += size;
     380             :         }
     381             : 
     382           4 :         return count;
     383             : }
     384             : 
     385     3038321 : size_t ucs2_align(const void *base_ptr, const void *p, int flags)
     386             : {
     387     3038321 :         if (flags & (STR_NOALIGN|STR_ASCII)) {
     388      128599 :                 return 0;
     389             :         }
     390     2909628 :         return PTR_DIFF(p, base_ptr) & 1;
     391             : }
     392             : 
     393             : /**
     394             : return the number of bytes occupied by a buffer in CH_UTF16 format
     395             : **/
     396         408 : size_t utf16_len(const void *buf)
     397             : {
     398          17 :         size_t len;
     399             : 
     400       51904 :         for (len = 0; PULL_LE_U16(buf,len); len += 2) ;
     401             : 
     402         408 :         return len;
     403             : }
     404             : 
     405             : /**
     406             : return the number of bytes occupied by a buffer in CH_UTF16 format
     407             : the result includes the null termination
     408             : **/
     409         133 : size_t utf16_null_terminated_len(const void *buf)
     410             : {
     411         133 :         return utf16_len(buf) + 2;
     412             : }
     413             : 
     414             : /**
     415             : return the number of bytes occupied by a buffer in CH_UTF16 format
     416             : limited by 'n' bytes
     417             : **/
     418     1390734 : size_t utf16_len_n(const void *src, size_t n)
     419             : {
     420        6270 :         size_t len;
     421             : 
     422    17742974 :         for (len = 0; (len+2 <= n) && PULL_LE_U16(src, len); len += 2) ;
     423             : 
     424     1390734 :         return len;
     425             : }
     426             : 
     427             : /**
     428             : return the number of bytes occupied by a buffer in CH_UTF16 format
     429             : the result includes the null termination
     430             : limited by 'n' bytes
     431             : **/
     432     1390551 : size_t utf16_null_terminated_len_n(const void *src, size_t n)
     433             : {
     434        6263 :         size_t len;
     435             : 
     436     1390551 :         len = utf16_len_n(src, n);
     437             : 
     438     1390551 :         if (len+2 <= n) {
     439      377158 :                 len += 2;
     440             :         }
     441             : 
     442     1390551 :         return len;
     443             : }
     444             : 
     445         216 : unsigned char *talloc_utf16_strlendup(TALLOC_CTX *mem_ctx, const char *str, size_t len)
     446             : {
     447         216 :         unsigned char *new_str = NULL;
     448             : 
     449             :         /* Check for overflow. */
     450         216 :         if (len > SIZE_MAX - 2) {
     451           0 :                 return NULL;
     452             :         }
     453             : 
     454             :         /*
     455             :          * Allocate the new string, including space for the
     456             :          * UTF‐16 null terminator.
     457             :          */
     458         216 :         new_str = talloc_size(mem_ctx, len + 2);
     459         216 :         if (new_str == NULL) {
     460           0 :                 return NULL;
     461             :         }
     462             : 
     463         216 :         memcpy(new_str, str, len);
     464             : 
     465             :         /*
     466             :          * Ensure that the UTF‐16 string is
     467             :          * null‐terminated.
     468             :          */
     469         216 :         new_str[len] = '\0';
     470         216 :         new_str[len + 1] = '\0';
     471             : 
     472         216 :         return new_str;
     473             : }
     474             : 
     475           0 : unsigned char *talloc_utf16_strdup(TALLOC_CTX *mem_ctx, const char *str)
     476             : {
     477           0 :         if (str == NULL) {
     478           0 :                 return NULL;
     479             :         }
     480           0 :         return talloc_utf16_strlendup(mem_ctx, str, utf16_len(str));
     481             : }
     482             : 
     483           0 : unsigned char *talloc_utf16_strndup(TALLOC_CTX *mem_ctx, const char *str, size_t n)
     484             : {
     485           0 :         if (str == NULL) {
     486           0 :                 return NULL;
     487             :         }
     488           0 :         return talloc_utf16_strlendup(mem_ctx, str, utf16_len_n(str, n));
     489             : }
     490             : 
     491             : /**
     492             :  * Determine the length and validity of a utf-8 string.
     493             :  *
     494             :  * @param input the string pointer
     495             :  * @param maxlen maximum size of the string
     496             :  * @param byte_len receives the length of the valid section
     497             :  * @param char_len receives the number of unicode characters in the valid section
     498             :  * @param utf16_len receives the number of bytes the string would need in UTF16 encoding.
     499             :  *
     500             :  * @return true if the input is valid up to maxlen, or a '\0' byte, otherwise false.
     501             :  */
     502           0 : bool utf8_check(const char *input, size_t maxlen,
     503             :                 size_t *byte_len,
     504             :                 size_t *char_len,
     505             :                 size_t *utf16_len)
     506             : {
     507           0 :         const uint8_t *s = (const uint8_t *)input;
     508           0 :         size_t i;
     509           0 :         size_t chars = 0;
     510           0 :         size_t long_chars = 0;
     511           0 :         uint32_t codepoint;
     512           0 :         uint8_t a, b, c, d;
     513           0 :         for (i = 0; i < maxlen; i++, chars++) {
     514           0 :                 if (s[i] == 0) {
     515           0 :                         break;
     516             :                 }
     517           0 :                 if (s[i] < 0x80) {
     518           0 :                         continue;
     519             :                 }
     520           0 :                 if ((s[i] & 0xe0) == 0xc0) {
     521             :                         /* 110xxxxx 10xxxxxx */
     522           0 :                         a = s[i];
     523           0 :                         if (maxlen - i < 2) {
     524           0 :                                 goto error;
     525             :                         }
     526           0 :                         b = s[i + 1];
     527           0 :                         if ((b & 0xc0) != 0x80) {
     528           0 :                                 goto error;
     529             :                         }
     530           0 :                         codepoint = (a & 31) << 6 | (b & 63);
     531           0 :                         if (codepoint < 0x80) {
     532           0 :                                 goto error;
     533             :                         }
     534           0 :                         i++;
     535           0 :                         continue;
     536             :                 }
     537           0 :                 if ((s[i] & 0xf0) == 0xe0) {
     538             :                         /* 1110xxxx 10xxxxxx 10xxxxxx */
     539           0 :                         if (maxlen - i < 3) {
     540           0 :                                 goto error;
     541             :                         }
     542           0 :                         a = s[i];
     543           0 :                         b = s[i + 1];
     544           0 :                         c = s[i + 2];
     545           0 :                         if ((b & 0xc0) != 0x80 || (c & 0xc0) != 0x80) {
     546           0 :                                 goto error;
     547             :                         }
     548           0 :                         codepoint = (c & 63) | (b & 63) << 6 | (a & 15) << 12;
     549             : 
     550           0 :                         if (codepoint < 0x800) {
     551           0 :                                 goto error;
     552             :                         }
     553           0 :                         if (codepoint >= 0xd800 && codepoint <= 0xdfff) {
     554             :                                 /*
     555             :                                  * This is an invalid codepoint, per
     556             :                                  * RFC3629, as it encodes part of a
     557             :                                  * UTF-16 surrogate pair for a
     558             :                                  * character over U+10000, which ought
     559             :                                  * to have been encoded as a four byte
     560             :                                  * utf-8 sequence.
     561             :                                  */
     562           0 :                                 goto error;
     563             :                         }
     564           0 :                         i += 2;
     565           0 :                         continue;
     566             :                 }
     567             : 
     568           0 :                 if ((s[i] & 0xf8) == 0xf0) {
     569             :                         /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
     570           0 :                         if (maxlen - i < 4) {
     571           0 :                                 goto error;
     572             :                         }
     573           0 :                         a = s[i];
     574           0 :                         b = s[i + 1];
     575           0 :                         c = s[i + 2];
     576           0 :                         d = s[i + 3];
     577             : 
     578           0 :                         if ((b & 0xc0) != 0x80 ||
     579           0 :                             (c & 0xc0) != 0x80 ||
     580           0 :                             (d & 0xc0) != 0x80) {
     581           0 :                                 goto error;
     582             :                         }
     583           0 :                         codepoint = (d & 63) | (c & 63) << 6 | (b & 63) << 12 | (a & 7) << 18;
     584             : 
     585           0 :                         if (codepoint < 0x10000 || codepoint > 0x10ffff) {
     586           0 :                                 goto error;
     587             :                         }
     588             :                         /* this one will need two UTF16 characters */
     589           0 :                         long_chars++;
     590           0 :                         i += 3;
     591           0 :                         continue;
     592             :                 }
     593             :                 /*
     594             :                  * If it wasn't handled yet, it's wrong.
     595             :                  */
     596           0 :                 goto error;
     597             :         }
     598           0 :         *byte_len = i;
     599           0 :         *char_len = chars;
     600           0 :         *utf16_len = chars + long_chars;
     601           0 :         return true;
     602             : 
     603           0 : error:
     604           0 :         *byte_len = i;
     605           0 :         *char_len = chars;
     606           0 :         *utf16_len = chars + long_chars;
     607           0 :         return false;
     608             : }
     609             : 
     610             : 
     611             : /**
     612             :  * Copy a string from a char* unix src to a dos codepage string destination.
     613             :  *
     614             :  * @converted_size the number of bytes occupied by the string in the destination.
     615             :  * @return bool true if success.
     616             :  *
     617             :  * @param flags can include
     618             :  * <dl>
     619             :  * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
     620             :  * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
     621             :  * </dl>
     622             :  *
     623             :  * @param dest_len the maximum length in bytes allowed in the
     624             :  * destination.  If @p dest_len is -1 then no maximum is used.
     625             :  **/
     626        4123 : static bool push_ascii_string(void *dest, const char *src, size_t dest_len, int flags, size_t *converted_size)
     627             : {
     628         144 :         size_t src_len;
     629         144 :         bool ret;
     630             : 
     631        4123 :         if (flags & STR_UPPER) {
     632           4 :                 char *tmpbuf = strupper_talloc(NULL, src);
     633           4 :                 if (tmpbuf == NULL) {
     634           0 :                         return false;
     635             :                 }
     636           4 :                 ret = push_ascii_string(dest, tmpbuf, dest_len, flags & ~STR_UPPER, converted_size);
     637           4 :                 talloc_free(tmpbuf);
     638           4 :                 return ret;
     639             :         }
     640             : 
     641        4119 :         src_len = strlen(src);
     642             : 
     643        4119 :         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
     644        4075 :                 src_len++;
     645             : 
     646        4119 :         return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, converted_size);
     647             : }
     648             : 
     649             : /**
     650             :  * Copy a string from a dos codepage source to a unix char* destination.
     651             :  *
     652             :  * The resulting string in "dest" is always null terminated.
     653             :  *
     654             :  * @param flags can have:
     655             :  * <dl>
     656             :  * <dt>STR_TERMINATE</dt>
     657             :  * <dd>STR_TERMINATE means the string in @p src
     658             :  * is null terminated, and src_len is ignored.</dd>
     659             :  * </dl>
     660             :  *
     661             :  * @param src_len is the length of the source area in bytes.
     662             :  * @returns the number of bytes occupied by the string in @p src.
     663             :  **/
     664         178 : static ssize_t pull_ascii_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
     665             : {
     666         178 :         size_t size = 0;
     667             : 
     668         178 :         if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
     669           0 :                 if (src_len == (size_t)-1) {
     670           0 :                         src_len = strlen((const char *)src) + 1;
     671             :                 } else {
     672           0 :                         size_t len = strnlen((const char *)src, src_len);
     673           0 :                         if (len < src_len)
     674           0 :                                 len++;
     675           0 :                         src_len = len;
     676             :                 }
     677             :         }
     678             : 
     679             :         /* We're ignoring the return here.. */
     680         178 :         (void)convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, &size);
     681             : 
     682         178 :         if (dest_len)
     683         178 :                 dest[MIN(size, dest_len-1)] = 0;
     684             : 
     685         178 :         return src_len;
     686             : }
     687             : 
     688             : /**
     689             :  * Copy a string from a char* src to a unicode destination.
     690             :  *
     691             :  * @returns the number of bytes occupied by the string in the destination.
     692             :  *
     693             :  * @param flags can have:
     694             :  *
     695             :  * <dl>
     696             :  * <dt>STR_TERMINATE <dd>means include the null termination.
     697             :  * <dt>STR_UPPER     <dd>means uppercase in the destination.
     698             :  * <dt>STR_NOALIGN   <dd>means don't do alignment.
     699             :  * </dl>
     700             :  *
     701             :  * @param dest_len is the maximum length allowed in the
     702             :  * destination. If dest_len is -1 then no maximum is used.
     703             :  **/
     704      457385 : static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
     705             : {
     706      457385 :         size_t len=0;
     707      457385 :         size_t src_len = strlen(src);
     708      457385 :         size_t size = 0;
     709       11140 :         bool ret;
     710             : 
     711      457385 :         if (flags & STR_UPPER) {
     712        2952 :                 char *tmpbuf = strupper_talloc(NULL, src);
     713         144 :                 ssize_t retval;
     714        2952 :                 if (tmpbuf == NULL) {
     715           0 :                         return -1;
     716             :                 }
     717        2952 :                 retval = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
     718        2952 :                 talloc_free(tmpbuf);
     719        2952 :                 return retval;
     720             :         }
     721             : 
     722      454433 :         if (flags & STR_TERMINATE)
     723      314831 :                 src_len++;
     724             : 
     725      454433 :         if (ucs2_align(NULL, dest, flags)) {
     726      148065 :                 *(char *)dest = 0;
     727      148065 :                 dest = (void *)((char *)dest + 1);
     728      148065 :                 if (dest_len) dest_len--;
     729      142995 :                 len++;
     730             :         }
     731             : 
     732             :         /* ucs2 is always a multiple of 2 bytes */
     733      454433 :         dest_len &= ~1;
     734             : 
     735      454433 :         ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len, &size);
     736      454433 :         if (ret == false) {
     737           0 :                 return 0;
     738             :         }
     739             : 
     740      454433 :         len += size;
     741             : 
     742      454433 :         return (ssize_t)len;
     743             : }
     744             : 
     745             : 
     746             : /**
     747             :  Copy a string from a ucs2 source to a unix char* destination.
     748             :  Flags can have:
     749             :   STR_TERMINATE means the string in src is null terminated.
     750             :   STR_NOALIGN   means don't try to align.
     751             :  if STR_TERMINATE is set then src_len is ignored if it is -1.
     752             :  src_len is the length of the source area in bytes
     753             :  Return the number of bytes occupied by the string in src.
     754             :  The resulting string in "dest" is always null terminated.
     755             : **/
     756             : 
     757           0 : static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
     758             : {
     759           0 :         size_t size = 0;
     760             : 
     761           0 :         if (ucs2_align(NULL, src, flags)) {
     762           0 :                 src = (const void *)((const char *)src + 1);
     763           0 :                 if (src_len > 0)
     764           0 :                         src_len--;
     765             :         }
     766             : 
     767           0 :         if (flags & STR_TERMINATE) {
     768           0 :                 if (src_len == (size_t)-1) {
     769           0 :                         src_len = utf16_null_terminated_len(src);
     770             :                 } else {
     771           0 :                         src_len = utf16_null_terminated_len_n(src, src_len);
     772             :                 }
     773             :         }
     774             : 
     775             :         /* ucs2 is always a multiple of 2 bytes */
     776           0 :         if (src_len != (size_t)-1)
     777           0 :                 src_len &= ~1;
     778             : 
     779             :         /* We're ignoring the return here.. */
     780           0 :         (void)convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len, &size);
     781           0 :         if (dest_len)
     782           0 :                 dest[MIN(size, dest_len-1)] = 0;
     783             : 
     784           0 :         return src_len;
     785             : }
     786             : 
     787             : /**
     788             :  Copy a string from a char* src to a unicode or ascii
     789             :  dos codepage destination choosing unicode or ascii based on the
     790             :  flags in the SMB buffer starting at base_ptr.
     791             :  Return the number of bytes occupied by the string in the destination.
     792             :  flags can have:
     793             :   STR_TERMINATE means include the null termination.
     794             :   STR_UPPER     means uppercase in the destination.
     795             :   STR_ASCII     use ascii even with unicode packet.
     796             :   STR_NOALIGN   means don't do alignment.
     797             :  dest_len is the maximum length allowed in the destination. If dest_len
     798             :  is -1 then no maximum is used.
     799             : **/
     800             : 
     801      458552 : _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
     802             : {
     803      458552 :         if (flags & STR_ASCII) {
     804        4119 :                 size_t size = 0;
     805        4119 :                 if (push_ascii_string(dest, src, dest_len, flags, &size)) {
     806        4119 :                         return (ssize_t)size;
     807             :                 } else {
     808           0 :                         return (ssize_t)-1;
     809             :                 }
     810      454433 :         } else if (flags & STR_UNICODE) {
     811      454433 :                 return push_ucs2(dest, src, dest_len, flags);
     812             :         } else {
     813           0 :                 smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
     814             :                 return -1;
     815             :         }
     816             : }
     817             : 
     818             : 
     819             : /**
     820             :  Copy a string from a unicode or ascii source (depending on
     821             :  the packet flags) to a char* destination.
     822             :  Flags can have:
     823             :   STR_TERMINATE means the string in src is null terminated.
     824             :   STR_UNICODE   means to force as unicode.
     825             :   STR_ASCII     use ascii even with unicode packet.
     826             :   STR_NOALIGN   means don't do alignment.
     827             :  if STR_TERMINATE is set then src_len is ignored is it is -1
     828             :  src_len is the length of the source area in bytes.
     829             :  Return the number of bytes occupied by the string in src.
     830             :  The resulting string in "dest" is always null terminated.
     831             : **/
     832             : 
     833         178 : _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
     834             : {
     835         178 :         if (flags & STR_ASCII) {
     836         178 :                 return pull_ascii_string(dest, src, dest_len, src_len, flags);
     837           0 :         } else if (flags & STR_UNICODE) {
     838           0 :                 return pull_ucs2(dest, src, dest_len, src_len, flags);
     839             :         } else {
     840           0 :                 smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
     841             :                 return -1;
     842             :         }
     843             : }

Generated by: LCOV version 1.14