Line data Source code
1 : /*
2 : ldb database library
3 :
4 : Copyright (C) Andrew Tridgell 2004
5 :
6 : ** NOTE! The following LGPL license applies to the ldb
7 : ** library. This does NOT imply that all of Samba is released
8 : ** under the LGPL
9 :
10 : This library is free software; you can redistribute it and/or
11 : modify it under the terms of the GNU Lesser General Public
12 : License as published by the Free Software Foundation; either
13 : version 3 of the License, or (at your option) any later version.
14 :
15 : This library is distributed in the hope that it will be useful,
16 : but WITHOUT ANY WARRANTY; without even the implied warranty of
17 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 : Lesser General Public License for more details.
19 :
20 : You should have received a copy of the GNU Lesser General Public
21 : License along with this library; if not, see <http://www.gnu.org/licenses/>.
22 : */
23 :
24 : /*
25 : * Name: ldb
26 : *
27 : * Component: ldb utf8 handling
28 : *
29 : * Description: case folding and case comparison for UTF8 strings
30 : *
31 : * Author: Andrew Tridgell
32 : */
33 :
34 : #include "ldb_private.h"
35 : #include "system/locale.h"
36 :
37 : /*
38 : * Set functions for comparing and case-folding case-insensitive ldb val
39 : * strings.
40 : */
41 1487556 : void ldb_set_utf8_functions(struct ldb_context *ldb,
42 : void *context,
43 : char *(*casefold)(void *, void *, const char *, size_t),
44 : int (*casecmp)(void *ctx,
45 : const struct ldb_val *v1,
46 : const struct ldb_val *v2))
47 : {
48 1487556 : if (context) {
49 0 : ldb->utf8_fns.context = context;
50 : }
51 1487556 : if (casefold) {
52 1487556 : ldb->utf8_fns.casefold = casefold;
53 : }
54 1487556 : if (casecmp) {
55 1487556 : ldb->utf8_fns.casecmp = casecmp;
56 : }
57 1487556 : }
58 :
59 : /*
60 : this allow the user to pass in a caseless comparison
61 : function to handle utf8 caseless comparisons
62 : */
63 0 : void ldb_set_utf8_fns(struct ldb_context *ldb,
64 : void *context,
65 : char *(*casefold)(void *, void *, const char *, size_t))
66 : {
67 0 : ldb_set_utf8_functions(ldb, context, casefold, NULL);
68 0 : }
69 :
70 :
71 : /*
72 : a simple case folding function
73 : NOTE: does not handle UTF8
74 : */
75 938108 : char *ldb_casefold_default(void *context, TALLOC_CTX *mem_ctx, const char *s, size_t n)
76 : {
77 127484 : size_t i;
78 938108 : char *ret = talloc_strndup(mem_ctx, s, n);
79 938108 : if (!s) {
80 0 : errno = ENOMEM;
81 0 : return NULL;
82 : }
83 6317523 : for (i=0;ret[i];i++) {
84 5379415 : ret[i] = ldb_ascii_toupper(ret[i]);
85 : }
86 810624 : return ret;
87 : }
88 :
89 :
90 : /*
91 : * The default comparison fold function only knows ASCII. Multiple
92 : * spaces (0x20) are collapsed into one, and [a-z] map to [A-Z]. All
93 : * other bytes are compared without casefolding.
94 : *
95 : * Note that as well as not handling UTF-8, this function does not exactly
96 : * implement RFC 4518 (2.6.1. Insignificant Space Handling and Appendix B).
97 : */
98 :
99 7659 : int ldb_comparison_fold_ascii(void *ignored,
100 : const struct ldb_val *v1,
101 : const struct ldb_val *v2)
102 : {
103 7659 : const uint8_t *s1 = v1->data;
104 7659 : const uint8_t *s2 = v2->data;
105 7659 : size_t n1 = v1->length, n2 = v2->length;
106 :
107 7833 : while (n1 && *s1 == ' ') { s1++; n1--; };
108 7833 : while (n2 && *s2 == ' ') { s2++; n2--; };
109 :
110 40224 : while (n1 && n2 && *s1 && *s2) {
111 38403 : if (ldb_ascii_toupper(*s1) != ldb_ascii_toupper(*s2)) {
112 4892 : break;
113 : }
114 32565 : if (*s1 == ' ') {
115 91 : while (n1 > 1 && s1[0] == s1[1]) { s1++; n1--; }
116 91 : while (n2 > 1 && s2[0] == s2[1]) { s2++; n2--; }
117 : }
118 32565 : s1++; s2++;
119 32565 : n1--; n2--;
120 : }
121 :
122 : /* check for trailing spaces only if the other pointers has
123 : * reached the end of the strings otherwise we can
124 : * mistakenly match. ex. "domain users" <->
125 : * "domainUpdates"
126 : */
127 7659 : if (n1 && *s1 == ' ' && (!n2 || !*s2)) {
128 48 : while (n1 && *s1 == ' ') { s1++; n1--; }
129 : }
130 7659 : if (n2 && *s2 == ' ' && (!n1 || !*s1)) {
131 48 : while (n2 && *s2 == ' ') { s2++; n2--; }
132 : }
133 7659 : if (n1 == 0 && n2 != 0) {
134 44 : return *s2 ? -1 : 0;
135 : }
136 7615 : if (n2 == 0 && n1 != 0) {
137 325 : return *s1 ? 1 : 0;
138 : }
139 7290 : if (n1 == 0 && n2 == 0) {
140 908 : return 0;
141 : }
142 5859 : return NUMERIC_CMP(*s1, *s2);
143 : }
144 :
145 746826 : void ldb_set_utf8_default(struct ldb_context *ldb)
146 : {
147 746826 : ldb_set_utf8_functions(ldb, NULL,
148 : ldb_casefold_default,
149 : ldb_comparison_fold_ascii);
150 746826 : }
151 :
152 763590611 : char *ldb_casefold(struct ldb_context *ldb, TALLOC_CTX *mem_ctx, const char *s, size_t n)
153 : {
154 763590611 : return ldb->utf8_fns.casefold(ldb->utf8_fns.context, mem_ctx, s, n);
155 : }
156 :
157 : /*
158 : check the attribute name is valid according to rfc2251
159 : returns 1 if the name is ok
160 : */
161 :
162 218036 : int ldb_valid_attr_name(const char *s)
163 : {
164 38896 : size_t i;
165 :
166 218036 : if (!s || !s[0])
167 0 : return 0;
168 :
169 : /* handle special ldb_tdb wildcard */
170 218036 : if (strcmp(s, "*") == 0) return 1;
171 :
172 3974005 : for (i = 0; s[i]; i++) {
173 3755971 : if (! isascii(s[i])) {
174 0 : return 0;
175 : }
176 3755971 : if (i == 0) { /* first char must be an alpha (or our special '@' identifier) */
177 218036 : if (! (isalpha(s[i]) || (s[i] == '@'))) {
178 2 : return 0;
179 : }
180 : } else {
181 3537935 : if (! (isalnum(s[i]) || (s[i] == '-'))) {
182 0 : return 0;
183 : }
184 : }
185 : }
186 179138 : return 1;
187 : }
188 :
189 786800176 : char *ldb_attr_casefold(TALLOC_CTX *mem_ctx, const char *s)
190 : {
191 14540635 : size_t i;
192 786800176 : char *ret = talloc_strdup(mem_ctx, s);
193 786800176 : if (!ret) {
194 0 : errno = ENOMEM;
195 0 : return NULL;
196 : }
197 2693190511 : for (i = 0; ret[i]; i++) {
198 1906390335 : ret[i] = ldb_ascii_toupper(ret[i]);
199 : }
200 772259541 : return ret;
201 : }
202 :
203 : /*
204 : we accept either 'dn' or 'distinguishedName' for a distinguishedName
205 : */
206 483203649 : int ldb_attr_dn(const char *attr)
207 : {
208 483203649 : if (ldb_attr_cmp(attr, "dn") == 0 ||
209 483202784 : ldb_attr_cmp(attr, "distinguishedName") == 0) {
210 6384958 : return 0;
211 : }
212 465879867 : return -1;
213 : }
214 :
215 1911846556 : _PRIVATE_ char ldb_ascii_toupper(char c) {
216 : /*
217 : * We are aiming for a 1970s C-locale toupper(), when all letters
218 : * were 7-bit and behaved with true American spirit.
219 : *
220 : * For example, we don't want the "i" in "<guid=" to be upper-cased to
221 : * "İ" as would happen in some locales, or we won't be able to parse
222 : * that properly. This is unfortunate for cases where we are dealing
223 : * with real text; a search for the name "Ali" would need to be
224 : * written "Alİ" to match.
225 : */
226 1911826947 : return ('a' <= c && c <= 'z') ? c ^ 0x20 : c;
227 : }
|