LCOV - code coverage report
Current view: top level - lib/ldb/common - ldb_utf8.c (source / functions) Hit Total Coverage
Test: coverage report for master 98b443d9 Lines: 69 80 86.2 %
Date: 2024-05-31 13:13:24 Functions: 9 10 90.0 %

          Line data    Source code
       1             : /*
       2             :    ldb database library
       3             : 
       4             :    Copyright (C) Andrew Tridgell  2004
       5             : 
       6             :      ** NOTE! The following LGPL license applies to the ldb
       7             :      ** library. This does NOT imply that all of Samba is released
       8             :      ** under the LGPL
       9             : 
      10             :    This library is free software; you can redistribute it and/or
      11             :    modify it under the terms of the GNU Lesser General Public
      12             :    License as published by the Free Software Foundation; either
      13             :    version 3 of the License, or (at your option) any later version.
      14             : 
      15             :    This library is distributed in the hope that it will be useful,
      16             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      17             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
      18             :    Lesser General Public License for more details.
      19             : 
      20             :    You should have received a copy of the GNU Lesser General Public
      21             :    License along with this library; if not, see <http://www.gnu.org/licenses/>.
      22             : */
      23             : 
      24             : /*
      25             :  *  Name: ldb
      26             :  *
      27             :  *  Component: ldb utf8 handling
      28             :  *
      29             :  *  Description: case folding and case comparison for UTF8 strings
      30             :  *
      31             :  *  Author: Andrew Tridgell
      32             :  */
      33             : 
      34             : #include "ldb_private.h"
      35             : #include "system/locale.h"
      36             : 
      37             : /*
      38             :  * Set functions for comparing and case-folding case-insensitive ldb val
      39             :  * strings.
      40             :  */
      41     1487556 : void ldb_set_utf8_functions(struct ldb_context *ldb,
      42             :                             void *context,
      43             :                             char *(*casefold)(void *, void *, const char *, size_t),
      44             :                             int (*casecmp)(void *ctx,
      45             :                                            const struct ldb_val *v1,
      46             :                                            const struct ldb_val *v2))
      47             : {
      48     1487556 :         if (context) {
      49           0 :                 ldb->utf8_fns.context = context;
      50             :         }
      51     1487556 :         if (casefold) {
      52     1487556 :                 ldb->utf8_fns.casefold = casefold;
      53             :         }
      54     1487556 :         if (casecmp) {
      55     1487556 :                 ldb->utf8_fns.casecmp = casecmp;
      56             :         }
      57     1487556 : }
      58             : 
      59             : /*
      60             :   this allow the user to pass in a caseless comparison
      61             :   function to handle utf8 caseless comparisons
      62             :  */
      63           0 : void ldb_set_utf8_fns(struct ldb_context *ldb,
      64             :                       void *context,
      65             :                       char *(*casefold)(void *, void *, const char *, size_t))
      66             : {
      67           0 :         ldb_set_utf8_functions(ldb, context, casefold, NULL);
      68           0 : }
      69             : 
      70             : 
      71             : /*
      72             :   a simple case folding function
      73             :   NOTE: does not handle UTF8
      74             : */
      75      938108 : char *ldb_casefold_default(void *context, TALLOC_CTX *mem_ctx, const char *s, size_t n)
      76             : {
      77      127484 :         size_t i;
      78      938108 :         char *ret = talloc_strndup(mem_ctx, s, n);
      79      938108 :         if (!s) {
      80           0 :                 errno = ENOMEM;
      81           0 :                 return NULL;
      82             :         }
      83     6317523 :         for (i=0;ret[i];i++) {
      84     5379415 :                 ret[i] = ldb_ascii_toupper(ret[i]);
      85             :         }
      86      810624 :         return ret;
      87             : }
      88             : 
      89             : 
      90             : /*
      91             :  * The default comparison fold function only knows ASCII. Multiple
      92             :  * spaces (0x20) are collapsed into one, and [a-z] map to [A-Z]. All
      93             :  * other bytes are compared without casefolding.
      94             :  *
      95             :  * Note that as well as not handling UTF-8, this function does not exactly
      96             :  * implement RFC 4518 (2.6.1. Insignificant Space Handling and Appendix B).
      97             :  */
      98             : 
      99        7659 : int ldb_comparison_fold_ascii(void *ignored,
     100             :                               const struct ldb_val *v1,
     101             :                               const struct ldb_val *v2)
     102             : {
     103        7659 :         const uint8_t *s1 = v1->data;
     104        7659 :         const uint8_t *s2 = v2->data;
     105        7659 :         size_t n1 = v1->length, n2 = v2->length;
     106             : 
     107        7833 :         while (n1 && *s1 == ' ') { s1++; n1--; };
     108        7833 :         while (n2 && *s2 == ' ') { s2++; n2--; };
     109             : 
     110       40224 :         while (n1 && n2 && *s1 && *s2) {
     111       38403 :                 if (ldb_ascii_toupper(*s1) != ldb_ascii_toupper(*s2)) {
     112        4892 :                         break;
     113             :                 }
     114       32565 :                 if (*s1 == ' ') {
     115          91 :                         while (n1 > 1 && s1[0] == s1[1]) { s1++; n1--; }
     116          91 :                         while (n2 > 1 && s2[0] == s2[1]) { s2++; n2--; }
     117             :                 }
     118       32565 :                 s1++; s2++;
     119       32565 :                 n1--; n2--;
     120             :         }
     121             : 
     122             :         /* check for trailing spaces only if the other pointers has
     123             :          * reached the end of the strings otherwise we can
     124             :          * mistakenly match.  ex. "domain users" <->
     125             :          * "domainUpdates"
     126             :          */
     127        7659 :         if (n1 && *s1 == ' ' && (!n2 || !*s2)) {
     128          48 :                 while (n1 && *s1 == ' ') { s1++; n1--; }
     129             :         }
     130        7659 :         if (n2 && *s2 == ' ' && (!n1 || !*s1)) {
     131          48 :                 while (n2 && *s2 == ' ') { s2++; n2--; }
     132             :         }
     133        7659 :         if (n1 == 0 && n2 != 0) {
     134          44 :                 return *s2 ? -1 : 0;
     135             :         }
     136        7615 :         if (n2 == 0 && n1 != 0) {
     137         325 :                 return *s1 ? 1 : 0;
     138             :         }
     139        7290 :         if (n1 == 0 && n2 == 0) {
     140         908 :                 return 0;
     141             :         }
     142        5859 :         return NUMERIC_CMP(*s1, *s2);
     143             : }
     144             : 
     145      746826 : void ldb_set_utf8_default(struct ldb_context *ldb)
     146             : {
     147      746826 :         ldb_set_utf8_functions(ldb, NULL,
     148             :                           ldb_casefold_default,
     149             :                           ldb_comparison_fold_ascii);
     150      746826 : }
     151             : 
     152   763590611 : char *ldb_casefold(struct ldb_context *ldb, TALLOC_CTX *mem_ctx, const char *s, size_t n)
     153             : {
     154   763590611 :         return ldb->utf8_fns.casefold(ldb->utf8_fns.context, mem_ctx, s, n);
     155             : }
     156             : 
     157             : /*
     158             :   check the attribute name is valid according to rfc2251
     159             :   returns 1 if the name is ok
     160             :  */
     161             : 
     162      218036 : int ldb_valid_attr_name(const char *s)
     163             : {
     164       38896 :         size_t i;
     165             : 
     166      218036 :         if (!s || !s[0])
     167           0 :                 return 0;
     168             : 
     169             :         /* handle special ldb_tdb wildcard */
     170      218036 :         if (strcmp(s, "*") == 0) return 1;
     171             : 
     172     3974005 :         for (i = 0; s[i]; i++) {
     173     3755971 :                 if (! isascii(s[i])) {
     174           0 :                         return 0;
     175             :                 }
     176     3755971 :                 if (i == 0) { /* first char must be an alpha (or our special '@' identifier) */
     177      218036 :                         if (! (isalpha(s[i]) || (s[i] == '@'))) {
     178           2 :                                 return 0;
     179             :                         }
     180             :                 } else {
     181     3537935 :                         if (! (isalnum(s[i]) || (s[i] == '-'))) {
     182           0 :                                 return 0;
     183             :                         }
     184             :                 }
     185             :         }
     186      179138 :         return 1;
     187             : }
     188             : 
     189   786800176 : char *ldb_attr_casefold(TALLOC_CTX *mem_ctx, const char *s)
     190             : {
     191    14540635 :         size_t i;
     192   786800176 :         char *ret = talloc_strdup(mem_ctx, s);
     193   786800176 :         if (!ret) {
     194           0 :                 errno = ENOMEM;
     195           0 :                 return NULL;
     196             :         }
     197  2693190511 :         for (i = 0; ret[i]; i++) {
     198  1906390335 :                 ret[i] = ldb_ascii_toupper(ret[i]);
     199             :         }
     200   772259541 :         return ret;
     201             : }
     202             : 
     203             : /*
     204             :   we accept either 'dn' or 'distinguishedName' for a distinguishedName
     205             : */
     206   483203649 : int ldb_attr_dn(const char *attr)
     207             : {
     208   483203649 :         if (ldb_attr_cmp(attr, "dn") == 0 ||
     209   483202784 :             ldb_attr_cmp(attr, "distinguishedName") == 0) {
     210     6384958 :                 return 0;
     211             :         }
     212   465879867 :         return -1;
     213             : }
     214             : 
     215  1911846556 : _PRIVATE_ char ldb_ascii_toupper(char c) {
     216             :         /*
     217             :          * We are aiming for a 1970s C-locale toupper(), when all letters
     218             :          * were 7-bit and behaved with true American spirit.
     219             :          *
     220             :          * For example, we don't want the "i" in "<guid=" to be upper-cased to
     221             :          * "İ" as would happen in some locales, or we won't be able to parse
     222             :          * that properly. This is unfortunate for cases where we are dealing
     223             :          * with real text; a search for the name "Ali" would need to be
     224             :          * written "Alİ" to match.
     225             :          */
     226  1911826947 :         return ('a' <= c && c <= 'z') ? c ^ 0x20 : c;
     227             : }

Generated by: LCOV version 1.14