Line data Source code
1 : /*
2 : Unix SMB/CIFS implementation.
3 : Samba utility functions
4 : Copyright (C) Andrew Tridgell 1992-2001
5 : Copyright (C) Simo Sorce 2001
6 :
7 : This program is free software; you can redistribute it and/or modify
8 : it under the terms of the GNU General Public License as published by
9 : the Free Software Foundation; either version 3 of the License, or
10 : (at your option) any later version.
11 :
12 : This program is distributed in the hope that it will be useful,
13 : but WITHOUT ANY WARRANTY; without even the implied warranty of
14 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 : GNU General Public License for more details.
16 :
17 : You should have received a copy of the GNU General Public License
18 : along with this program. If not, see <http://www.gnu.org/licenses/>.
19 : */
20 :
21 : #include "replace.h"
22 : #include "system/locale.h"
23 : #include "charset.h"
24 : #include "lib/util/byteorder.h"
25 : #include "lib/util/fault.h"
26 : #include "lib/util/tsort.h"
27 :
28 : /**
29 : String replace.
30 : NOTE: oldc and newc must be 7 bit characters
31 : **/
32 5 : _PUBLIC_ void string_replace_m(char *s, char oldc, char newc)
33 : {
34 5 : struct smb_iconv_handle *ic = get_iconv_handle();
35 19 : while (s && *s) {
36 14 : size_t size;
37 14 : codepoint_t c = next_codepoint_handle(ic, s, &size);
38 14 : if (c == oldc) {
39 5 : *s = newc;
40 : }
41 14 : s += size;
42 : }
43 5 : }
44 :
45 : /**
46 : Convert a string to lower case, allocated with talloc
47 : **/
48 6079285 : _PUBLIC_ char *strlower_talloc_handle(struct smb_iconv_handle *iconv_handle,
49 : TALLOC_CTX *ctx, const char *src)
50 : {
51 6079285 : size_t size=0;
52 18078 : char *dest;
53 :
54 6079285 : if(src == NULL) {
55 0 : return NULL;
56 : }
57 :
58 : /* this takes advantage of the fact that upper/lower can't
59 : change the length of a character by more than 1 byte */
60 6079285 : dest = talloc_array(ctx, char, 2*(strlen(src))+1);
61 6079285 : if (dest == NULL) {
62 0 : return NULL;
63 : }
64 :
65 134999483 : while (*src) {
66 415963 : size_t c_size;
67 128920198 : codepoint_t c = next_codepoint_handle(iconv_handle, src, &c_size);
68 128920198 : src += c_size;
69 :
70 128920198 : c = tolower_m(c);
71 :
72 128920198 : c_size = push_codepoint_handle(iconv_handle, dest+size, c);
73 128920198 : if (c_size == -1) {
74 0 : talloc_free(dest);
75 0 : return NULL;
76 : }
77 128920198 : size += c_size;
78 : }
79 :
80 6079285 : dest[size] = 0;
81 :
82 : /* trim it so talloc_append_string() works */
83 6079285 : dest = talloc_realloc(ctx, dest, char, size+1);
84 :
85 6079285 : talloc_set_name_const(dest, dest);
86 :
87 6079285 : return dest;
88 : }
89 :
90 6079279 : _PUBLIC_ char *strlower_talloc(TALLOC_CTX *ctx, const char *src)
91 : {
92 6079279 : struct smb_iconv_handle *iconv_handle = get_iconv_handle();
93 6079279 : return strlower_talloc_handle(iconv_handle, ctx, src);
94 : }
95 :
96 : /**
97 : Convert a string to UPPER case, allocated with talloc
98 : source length limited to n bytes, iconv handle supplied
99 : **/
100 702803850 : _PUBLIC_ char *strupper_talloc_n_handle(struct smb_iconv_handle *iconv_handle,
101 : TALLOC_CTX *ctx, const char *src, size_t n)
102 : {
103 702803850 : size_t size=0;
104 12973714 : char *dest;
105 :
106 702803850 : if (!src) {
107 192650 : return NULL;
108 : }
109 :
110 : /* this takes advantage of the fact that upper/lower can't
111 : change the length of a character by more than 1 byte */
112 702573506 : dest = talloc_array(ctx, char, 2*(n+1));
113 702573506 : if (dest == NULL) {
114 0 : return NULL;
115 : }
116 :
117 10749076050 : while (n && *src) {
118 118927733 : size_t c_size;
119 10046502546 : codepoint_t c = next_codepoint_handle_ext(iconv_handle, src, n,
120 : CH_UNIX, &c_size);
121 10046502546 : src += c_size;
122 10046502546 : n -= c_size;
123 :
124 10046502546 : c = toupper_m(c);
125 :
126 10046502546 : c_size = push_codepoint_handle(iconv_handle, dest+size, c);
127 10046502546 : if (c_size == -1) {
128 2 : talloc_free(dest);
129 2 : return NULL;
130 : }
131 10046502544 : size += c_size;
132 : }
133 :
134 702573504 : dest[size] = 0;
135 :
136 : /* trim it so talloc_append_string() works */
137 702573504 : dest = talloc_realloc(ctx, dest, char, size+1);
138 :
139 702573504 : talloc_set_name_const(dest, dest);
140 :
141 702573504 : return dest;
142 : }
143 :
144 : /**
145 : Convert a string to UPPER case, allocated with talloc
146 : source length limited to n bytes
147 : **/
148 702803844 : _PUBLIC_ char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n)
149 : {
150 702803844 : struct smb_iconv_handle *iconv_handle = get_iconv_handle();
151 702803844 : return strupper_talloc_n_handle(iconv_handle, ctx, src, n);
152 : }
153 : /**
154 : Convert a string to UPPER case, allocated with talloc
155 : **/
156 5988847 : _PUBLIC_ char *strupper_talloc(TALLOC_CTX *ctx, const char *src)
157 : {
158 5988847 : return strupper_talloc_n(ctx, src, src?strlen(src):0);
159 : }
160 :
161 : /**
162 : talloc_strdup() a unix string to upper case.
163 : **/
164 3070349 : _PUBLIC_ char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src)
165 : {
166 3070349 : return strupper_talloc(ctx, src);
167 : }
168 :
169 :
170 : /*
171 : * strncasecmp_ldb() works like a *bit* like strncasecmp, with various
172 : * tricks to suit the way LDB compares strings. The differences are:
173 : *
174 : * 0. each string has it's own length.
175 : *
176 : * 1. consecutive spaces are collapsed down to one space, so that
177 : * "a b" equals "a b". (this is why each string needs its own
178 : * length). Leading and trailing spaces are removed altogether.
179 : *
180 : * 2. Comparisons are done in UPPER CASE, as Windows does, not in
181 : * lowercase as POSIX would have it.
182 : *
183 : * 3. An invalid byte compares higher than any real character. For example,
184 : * "hello\xc2\xff" would sort higher than "hello\xcd\xb6", because CD
185 : * B6 is a valid sequence and C2 FF is not.
186 : *
187 : * 4. If two strings become invalid on the same character, the rest
188 : * of the string is compared via ldb ASCII case fold rules.
189 : *
190 : * For example, "hellō\xC2\xFFworld" < " hElLŌ\xFE ", because the the
191 : * strings are equal up to 'ō' by utf-8 casefold, but the "\xc2\xff" and
192 : * "\xfe" are invalid sequences. At that point, we skip to the byte-by-byte
193 : * (but space-eating, casefolding) comparison, and 0xc2 < 0xff.
194 : */
195 :
196 : #define EAT_SPACE(s, len, ends_in_space) \
197 : do { \
198 : while (len) { \
199 : if (*s != ' ') { \
200 : break; \
201 : } \
202 : s++; \
203 : len--; \
204 : } \
205 : ends_in_space = (len == 0 || *s == '\0'); \
206 : } while(0)
207 :
208 :
209 126976050 : _PUBLIC_ int strncasecmp_ldb(const char *s1,
210 : size_t len1,
211 : const char *s2,
212 : size_t len2)
213 : {
214 126976050 : struct smb_iconv_handle *iconv_handle = get_iconv_handle();
215 1616386 : codepoint_t c1, c2;
216 1616386 : size_t cs1, cs2;
217 1616386 : bool ends_in_space1, ends_in_space2;
218 1616386 : int ret;
219 1616386 : bool end1, end2;
220 :
221 126977107 : EAT_SPACE(s1, len1, ends_in_space1);
222 126976417 : EAT_SPACE(s2, len2, ends_in_space2);
223 : /*
224 : * if ends_in_space was set, the string was empty or only
225 : * spaces (which we treat as equivalent).
226 : */
227 126976050 : if (ends_in_space1 && ends_in_space2) {
228 2 : return 0;
229 : }
230 126976044 : if (ends_in_space1) {
231 858 : return -1;
232 : }
233 126975158 : if (ends_in_space2) {
234 173 : return 1;
235 : }
236 :
237 11604934 : while (true) {
238 : /*
239 : * If the next byte is a space, we eat all the spaces,
240 : * and say we found a single codepoint. If the spaces
241 : * were at the end of the string, the codepoint is 0,
242 : * as if there were no spaces. Otherwise it is 0x20,
243 : * as if there was one space.
244 : *
245 : * Setting the codepoint to 0 will break the loop, but
246 : * only after codepoints have been found in both strings.
247 : */
248 911993469 : if (len1 == 0 || *s1 == 0) {
249 51184014 : c1 = 0;
250 860066979 : } else if (*s1 == ' ') {
251 16612 : EAT_SPACE(s1, len1, ends_in_space1);
252 8266 : c1 = ends_in_space1 ? 0 : ' ';
253 860058713 : } else if ((*s1 & 0x80) == 0) {
254 859920098 : c1 = *s1;
255 859920098 : s1++;
256 859920098 : len1--;
257 : } else {
258 138615 : c1 = next_codepoint_handle_ext(iconv_handle, s1, len1,
259 : CH_UNIX, &cs1);
260 138615 : if (c1 != INVALID_CODEPOINT) {
261 138497 : s1 += cs1;
262 138497 : len1 -= cs1;
263 : }
264 : }
265 :
266 911993469 : if (len2 == 0 || *s2 == 0) {
267 51155014 : c2 = 0;
268 860096754 : } else if (*s2 == ' ') {
269 15779 : EAT_SPACE(s2, len2, ends_in_space2);
270 7851 : c2 = ends_in_space2 ? 0 : ' ';
271 860088903 : } else if ((*s2 & 0x80) == 0) {
272 859952121 : c2 = *s2;
273 859952121 : s2++;
274 859952121 : len2--;
275 : } else {
276 136782 : c2 = next_codepoint_handle_ext(iconv_handle, s2, len2,
277 : CH_UNIX, &cs2);
278 136782 : if (c2 != INVALID_CODEPOINT) {
279 136664 : s2 += cs2;
280 136664 : len2 -= cs2;
281 : }
282 : }
283 :
284 911993469 : if (c1 == 0 || c2 == 0 ||
285 860042393 : c1 == INVALID_CODEPOINT || c2 == INVALID_CODEPOINT) {
286 : break;
287 : }
288 :
289 860042239 : if (c1 == c2) {
290 778679286 : continue;
291 : }
292 81362953 : c1 = toupper_m(c1);
293 81362953 : c2 = toupper_m(c2);
294 81362953 : if (c1 != c2) {
295 74150086 : break;
296 : }
297 : }
298 :
299 : /*
300 : * Either a difference has been found, or one or both strings have
301 : * ended or hit invalid codepoints.
302 : */
303 126974957 : ret = NUMERIC_CMP(c1, c2);
304 :
305 126974957 : if (ret != 0) {
306 74228140 : return ret;
307 : }
308 : /*
309 : * the strings are equal up to here, but one might be longer.
310 : */
311 51872205 : end1 = len1 == 0 || *s1 == 0;
312 51872205 : end2 = len2 == 0 || *s2 == 0;
313 :
314 51872205 : if (end1 && end2) {
315 51130491 : return 0;
316 : }
317 54 : if (end1) {
318 0 : return -1;
319 : }
320 54 : if (end2) {
321 0 : return -1;
322 : }
323 :
324 : /*
325 : * By elimination, if we got here, we have INVALID_CODEPOINT on both
326 : * sides.
327 : *
328 : * THere is no perfect option, but what we choose to do is continue on
329 : * with ascii case fold (as if calling ldb_comparison_fold_ascii()
330 : * which is private to ldb, so we can't just defer to it).
331 : */
332 180 : while (true) {
333 180 : if (len1 == 0 || *s1 == 0) {
334 0 : c1 = 0;
335 161 : } else if (*s1 == ' ') {
336 104 : EAT_SPACE(s1, len1, ends_in_space1);
337 31 : c1 = ends_in_space1 ? 0 : ' ';
338 : } else {
339 130 : c1 = *s1;
340 130 : s1++;
341 130 : len1--;
342 130 : c1 = ('a' <= c1 && c1 <= 'z') ? c1 ^ 0x20 : c1;
343 : }
344 :
345 180 : if (len2 == 0 || *s2 == 0) {
346 0 : c2 = 0;
347 161 : } else if (*s2 == ' ') {
348 104 : EAT_SPACE(s2, len2, ends_in_space2);
349 31 : c2 = ends_in_space2 ? 0 : ' ';
350 : } else {
351 130 : c2 = *s2;
352 130 : s2++;
353 130 : len2--;
354 130 : c2 = ('a' <= c2 && c2 <= 'z') ? c2 ^ 0x20 : c2;
355 : }
356 :
357 180 : if (c1 == 0 || c2 == 0 || c1 != c2) {
358 : break;
359 : }
360 : }
361 54 : return NUMERIC_CMP(c1, c2);
362 : }
363 :
364 : #undef EAT_SPACE
365 :
366 :
367 : /**
368 : Find the number of 'c' chars in a string
369 : **/
370 4 : _PUBLIC_ size_t count_chars_m(const char *s, char c)
371 : {
372 4 : struct smb_iconv_handle *ic = get_iconv_handle();
373 4 : size_t count = 0;
374 :
375 13 : while (*s) {
376 9 : size_t size;
377 9 : codepoint_t c2 = next_codepoint_handle(ic, s, &size);
378 9 : if (c2 == c) count++;
379 9 : s += size;
380 : }
381 :
382 4 : return count;
383 : }
384 :
385 3038321 : size_t ucs2_align(const void *base_ptr, const void *p, int flags)
386 : {
387 3038321 : if (flags & (STR_NOALIGN|STR_ASCII)) {
388 128599 : return 0;
389 : }
390 2909628 : return PTR_DIFF(p, base_ptr) & 1;
391 : }
392 :
393 : /**
394 : return the number of bytes occupied by a buffer in CH_UTF16 format
395 : **/
396 408 : size_t utf16_len(const void *buf)
397 : {
398 17 : size_t len;
399 :
400 51904 : for (len = 0; PULL_LE_U16(buf,len); len += 2) ;
401 :
402 408 : return len;
403 : }
404 :
405 : /**
406 : return the number of bytes occupied by a buffer in CH_UTF16 format
407 : the result includes the null termination
408 : **/
409 133 : size_t utf16_null_terminated_len(const void *buf)
410 : {
411 133 : return utf16_len(buf) + 2;
412 : }
413 :
414 : /**
415 : return the number of bytes occupied by a buffer in CH_UTF16 format
416 : limited by 'n' bytes
417 : **/
418 1390734 : size_t utf16_len_n(const void *src, size_t n)
419 : {
420 6270 : size_t len;
421 :
422 17742974 : for (len = 0; (len+2 <= n) && PULL_LE_U16(src, len); len += 2) ;
423 :
424 1390734 : return len;
425 : }
426 :
427 : /**
428 : return the number of bytes occupied by a buffer in CH_UTF16 format
429 : the result includes the null termination
430 : limited by 'n' bytes
431 : **/
432 1390551 : size_t utf16_null_terminated_len_n(const void *src, size_t n)
433 : {
434 6263 : size_t len;
435 :
436 1390551 : len = utf16_len_n(src, n);
437 :
438 1390551 : if (len+2 <= n) {
439 377158 : len += 2;
440 : }
441 :
442 1390551 : return len;
443 : }
444 :
445 216 : unsigned char *talloc_utf16_strlendup(TALLOC_CTX *mem_ctx, const char *str, size_t len)
446 : {
447 216 : unsigned char *new_str = NULL;
448 :
449 : /* Check for overflow. */
450 216 : if (len > SIZE_MAX - 2) {
451 0 : return NULL;
452 : }
453 :
454 : /*
455 : * Allocate the new string, including space for the
456 : * UTF‐16 null terminator.
457 : */
458 216 : new_str = talloc_size(mem_ctx, len + 2);
459 216 : if (new_str == NULL) {
460 0 : return NULL;
461 : }
462 :
463 216 : memcpy(new_str, str, len);
464 :
465 : /*
466 : * Ensure that the UTF‐16 string is
467 : * null‐terminated.
468 : */
469 216 : new_str[len] = '\0';
470 216 : new_str[len + 1] = '\0';
471 :
472 216 : return new_str;
473 : }
474 :
475 0 : unsigned char *talloc_utf16_strdup(TALLOC_CTX *mem_ctx, const char *str)
476 : {
477 0 : if (str == NULL) {
478 0 : return NULL;
479 : }
480 0 : return talloc_utf16_strlendup(mem_ctx, str, utf16_len(str));
481 : }
482 :
483 0 : unsigned char *talloc_utf16_strndup(TALLOC_CTX *mem_ctx, const char *str, size_t n)
484 : {
485 0 : if (str == NULL) {
486 0 : return NULL;
487 : }
488 0 : return talloc_utf16_strlendup(mem_ctx, str, utf16_len_n(str, n));
489 : }
490 :
491 : /**
492 : * Determine the length and validity of a utf-8 string.
493 : *
494 : * @param input the string pointer
495 : * @param maxlen maximum size of the string
496 : * @param byte_len receives the length of the valid section
497 : * @param char_len receives the number of unicode characters in the valid section
498 : * @param utf16_len receives the number of bytes the string would need in UTF16 encoding.
499 : *
500 : * @return true if the input is valid up to maxlen, or a '\0' byte, otherwise false.
501 : */
502 0 : bool utf8_check(const char *input, size_t maxlen,
503 : size_t *byte_len,
504 : size_t *char_len,
505 : size_t *utf16_len)
506 : {
507 0 : const uint8_t *s = (const uint8_t *)input;
508 0 : size_t i;
509 0 : size_t chars = 0;
510 0 : size_t long_chars = 0;
511 0 : uint32_t codepoint;
512 0 : uint8_t a, b, c, d;
513 0 : for (i = 0; i < maxlen; i++, chars++) {
514 0 : if (s[i] == 0) {
515 0 : break;
516 : }
517 0 : if (s[i] < 0x80) {
518 0 : continue;
519 : }
520 0 : if ((s[i] & 0xe0) == 0xc0) {
521 : /* 110xxxxx 10xxxxxx */
522 0 : a = s[i];
523 0 : if (maxlen - i < 2) {
524 0 : goto error;
525 : }
526 0 : b = s[i + 1];
527 0 : if ((b & 0xc0) != 0x80) {
528 0 : goto error;
529 : }
530 0 : codepoint = (a & 31) << 6 | (b & 63);
531 0 : if (codepoint < 0x80) {
532 0 : goto error;
533 : }
534 0 : i++;
535 0 : continue;
536 : }
537 0 : if ((s[i] & 0xf0) == 0xe0) {
538 : /* 1110xxxx 10xxxxxx 10xxxxxx */
539 0 : if (maxlen - i < 3) {
540 0 : goto error;
541 : }
542 0 : a = s[i];
543 0 : b = s[i + 1];
544 0 : c = s[i + 2];
545 0 : if ((b & 0xc0) != 0x80 || (c & 0xc0) != 0x80) {
546 0 : goto error;
547 : }
548 0 : codepoint = (c & 63) | (b & 63) << 6 | (a & 15) << 12;
549 :
550 0 : if (codepoint < 0x800) {
551 0 : goto error;
552 : }
553 0 : if (codepoint >= 0xd800 && codepoint <= 0xdfff) {
554 : /*
555 : * This is an invalid codepoint, per
556 : * RFC3629, as it encodes part of a
557 : * UTF-16 surrogate pair for a
558 : * character over U+10000, which ought
559 : * to have been encoded as a four byte
560 : * utf-8 sequence.
561 : */
562 0 : goto error;
563 : }
564 0 : i += 2;
565 0 : continue;
566 : }
567 :
568 0 : if ((s[i] & 0xf8) == 0xf0) {
569 : /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
570 0 : if (maxlen - i < 4) {
571 0 : goto error;
572 : }
573 0 : a = s[i];
574 0 : b = s[i + 1];
575 0 : c = s[i + 2];
576 0 : d = s[i + 3];
577 :
578 0 : if ((b & 0xc0) != 0x80 ||
579 0 : (c & 0xc0) != 0x80 ||
580 0 : (d & 0xc0) != 0x80) {
581 0 : goto error;
582 : }
583 0 : codepoint = (d & 63) | (c & 63) << 6 | (b & 63) << 12 | (a & 7) << 18;
584 :
585 0 : if (codepoint < 0x10000 || codepoint > 0x10ffff) {
586 0 : goto error;
587 : }
588 : /* this one will need two UTF16 characters */
589 0 : long_chars++;
590 0 : i += 3;
591 0 : continue;
592 : }
593 : /*
594 : * If it wasn't handled yet, it's wrong.
595 : */
596 0 : goto error;
597 : }
598 0 : *byte_len = i;
599 0 : *char_len = chars;
600 0 : *utf16_len = chars + long_chars;
601 0 : return true;
602 :
603 0 : error:
604 0 : *byte_len = i;
605 0 : *char_len = chars;
606 0 : *utf16_len = chars + long_chars;
607 0 : return false;
608 : }
609 :
610 :
611 : /**
612 : * Copy a string from a char* unix src to a dos codepage string destination.
613 : *
614 : * @converted_size the number of bytes occupied by the string in the destination.
615 : * @return bool true if success.
616 : *
617 : * @param flags can include
618 : * <dl>
619 : * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
620 : * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
621 : * </dl>
622 : *
623 : * @param dest_len the maximum length in bytes allowed in the
624 : * destination. If @p dest_len is -1 then no maximum is used.
625 : **/
626 4123 : static bool push_ascii_string(void *dest, const char *src, size_t dest_len, int flags, size_t *converted_size)
627 : {
628 144 : size_t src_len;
629 144 : bool ret;
630 :
631 4123 : if (flags & STR_UPPER) {
632 4 : char *tmpbuf = strupper_talloc(NULL, src);
633 4 : if (tmpbuf == NULL) {
634 0 : return false;
635 : }
636 4 : ret = push_ascii_string(dest, tmpbuf, dest_len, flags & ~STR_UPPER, converted_size);
637 4 : talloc_free(tmpbuf);
638 4 : return ret;
639 : }
640 :
641 4119 : src_len = strlen(src);
642 :
643 4119 : if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
644 4075 : src_len++;
645 :
646 4119 : return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, converted_size);
647 : }
648 :
649 : /**
650 : * Copy a string from a dos codepage source to a unix char* destination.
651 : *
652 : * The resulting string in "dest" is always null terminated.
653 : *
654 : * @param flags can have:
655 : * <dl>
656 : * <dt>STR_TERMINATE</dt>
657 : * <dd>STR_TERMINATE means the string in @p src
658 : * is null terminated, and src_len is ignored.</dd>
659 : * </dl>
660 : *
661 : * @param src_len is the length of the source area in bytes.
662 : * @returns the number of bytes occupied by the string in @p src.
663 : **/
664 178 : static ssize_t pull_ascii_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
665 : {
666 178 : size_t size = 0;
667 :
668 178 : if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
669 0 : if (src_len == (size_t)-1) {
670 0 : src_len = strlen((const char *)src) + 1;
671 : } else {
672 0 : size_t len = strnlen((const char *)src, src_len);
673 0 : if (len < src_len)
674 0 : len++;
675 0 : src_len = len;
676 : }
677 : }
678 :
679 : /* We're ignoring the return here.. */
680 178 : (void)convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, &size);
681 :
682 178 : if (dest_len)
683 178 : dest[MIN(size, dest_len-1)] = 0;
684 :
685 178 : return src_len;
686 : }
687 :
688 : /**
689 : * Copy a string from a char* src to a unicode destination.
690 : *
691 : * @returns the number of bytes occupied by the string in the destination.
692 : *
693 : * @param flags can have:
694 : *
695 : * <dl>
696 : * <dt>STR_TERMINATE <dd>means include the null termination.
697 : * <dt>STR_UPPER <dd>means uppercase in the destination.
698 : * <dt>STR_NOALIGN <dd>means don't do alignment.
699 : * </dl>
700 : *
701 : * @param dest_len is the maximum length allowed in the
702 : * destination. If dest_len is -1 then no maximum is used.
703 : **/
704 457385 : static ssize_t push_ucs2(void *dest, const char *src, size_t dest_len, int flags)
705 : {
706 457385 : size_t len=0;
707 457385 : size_t src_len = strlen(src);
708 457385 : size_t size = 0;
709 11140 : bool ret;
710 :
711 457385 : if (flags & STR_UPPER) {
712 2952 : char *tmpbuf = strupper_talloc(NULL, src);
713 144 : ssize_t retval;
714 2952 : if (tmpbuf == NULL) {
715 0 : return -1;
716 : }
717 2952 : retval = push_ucs2(dest, tmpbuf, dest_len, flags & ~STR_UPPER);
718 2952 : talloc_free(tmpbuf);
719 2952 : return retval;
720 : }
721 :
722 454433 : if (flags & STR_TERMINATE)
723 314831 : src_len++;
724 :
725 454433 : if (ucs2_align(NULL, dest, flags)) {
726 148065 : *(char *)dest = 0;
727 148065 : dest = (void *)((char *)dest + 1);
728 148065 : if (dest_len) dest_len--;
729 142995 : len++;
730 : }
731 :
732 : /* ucs2 is always a multiple of 2 bytes */
733 454433 : dest_len &= ~1;
734 :
735 454433 : ret = convert_string(CH_UNIX, CH_UTF16, src, src_len, dest, dest_len, &size);
736 454433 : if (ret == false) {
737 0 : return 0;
738 : }
739 :
740 454433 : len += size;
741 :
742 454433 : return (ssize_t)len;
743 : }
744 :
745 :
746 : /**
747 : Copy a string from a ucs2 source to a unix char* destination.
748 : Flags can have:
749 : STR_TERMINATE means the string in src is null terminated.
750 : STR_NOALIGN means don't try to align.
751 : if STR_TERMINATE is set then src_len is ignored if it is -1.
752 : src_len is the length of the source area in bytes
753 : Return the number of bytes occupied by the string in src.
754 : The resulting string in "dest" is always null terminated.
755 : **/
756 :
757 0 : static size_t pull_ucs2(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
758 : {
759 0 : size_t size = 0;
760 :
761 0 : if (ucs2_align(NULL, src, flags)) {
762 0 : src = (const void *)((const char *)src + 1);
763 0 : if (src_len > 0)
764 0 : src_len--;
765 : }
766 :
767 0 : if (flags & STR_TERMINATE) {
768 0 : if (src_len == (size_t)-1) {
769 0 : src_len = utf16_null_terminated_len(src);
770 : } else {
771 0 : src_len = utf16_null_terminated_len_n(src, src_len);
772 : }
773 : }
774 :
775 : /* ucs2 is always a multiple of 2 bytes */
776 0 : if (src_len != (size_t)-1)
777 0 : src_len &= ~1;
778 :
779 : /* We're ignoring the return here.. */
780 0 : (void)convert_string(CH_UTF16, CH_UNIX, src, src_len, dest, dest_len, &size);
781 0 : if (dest_len)
782 0 : dest[MIN(size, dest_len-1)] = 0;
783 :
784 0 : return src_len;
785 : }
786 :
787 : /**
788 : Copy a string from a char* src to a unicode or ascii
789 : dos codepage destination choosing unicode or ascii based on the
790 : flags in the SMB buffer starting at base_ptr.
791 : Return the number of bytes occupied by the string in the destination.
792 : flags can have:
793 : STR_TERMINATE means include the null termination.
794 : STR_UPPER means uppercase in the destination.
795 : STR_ASCII use ascii even with unicode packet.
796 : STR_NOALIGN means don't do alignment.
797 : dest_len is the maximum length allowed in the destination. If dest_len
798 : is -1 then no maximum is used.
799 : **/
800 :
801 458552 : _PUBLIC_ ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
802 : {
803 458552 : if (flags & STR_ASCII) {
804 4119 : size_t size = 0;
805 4119 : if (push_ascii_string(dest, src, dest_len, flags, &size)) {
806 4119 : return (ssize_t)size;
807 : } else {
808 0 : return (ssize_t)-1;
809 : }
810 454433 : } else if (flags & STR_UNICODE) {
811 454433 : return push_ucs2(dest, src, dest_len, flags);
812 : } else {
813 0 : smb_panic("push_string requires either STR_ASCII or STR_UNICODE flag to be set");
814 : return -1;
815 : }
816 : }
817 :
818 :
819 : /**
820 : Copy a string from a unicode or ascii source (depending on
821 : the packet flags) to a char* destination.
822 : Flags can have:
823 : STR_TERMINATE means the string in src is null terminated.
824 : STR_UNICODE means to force as unicode.
825 : STR_ASCII use ascii even with unicode packet.
826 : STR_NOALIGN means don't do alignment.
827 : if STR_TERMINATE is set then src_len is ignored is it is -1
828 : src_len is the length of the source area in bytes.
829 : Return the number of bytes occupied by the string in src.
830 : The resulting string in "dest" is always null terminated.
831 : **/
832 :
833 178 : _PUBLIC_ ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
834 : {
835 178 : if (flags & STR_ASCII) {
836 178 : return pull_ascii_string(dest, src, dest_len, src_len, flags);
837 0 : } else if (flags & STR_UNICODE) {
838 0 : return pull_ucs2(dest, src, dest_len, src_len, flags);
839 : } else {
840 0 : smb_panic("pull_string requires either STR_ASCII or STR_UNICODE flag to be set");
841 : return -1;
842 : }
843 : }
|