提交 61a3364d 编写于 作者: R Rich Felker

overhaul locale internals to treat categories roughly uniformly

previously, LC_MESSAGES was treated specially as the only category
which could be set to a locale name without a definition file, in
order to facilitate gettext message translations when no libc locale
was available. LC_NUMERIC was completely un-settable, and LC_CTYPE
stored a flag intended to be used for a possible future byte-based C
locale, instead of storing a __locale_map pointer like the other
categories use.

this patch changes all categories to be represented by pointers to
__locale_map structures, and allows locale names without definition
files to be treated as valid locales with trivial definition when used
in any category. outwardly visible functional changes should be minor,
limited mainly to the strings read back from setlocale and the way
gettext handles translations in categories other than LC_MESSAGES.

various internal refactoring has also been performed, and improvements
in const correctness have been made.
上级 63c188ec
......@@ -8,9 +8,7 @@
struct __locale_map;
struct __locale_struct {
volatile int ctype_utf8;
char *messages_name;
struct __locale_map *volatile cat[4];
const struct __locale_map *volatile cat[6];
};
struct __libc {
......
......@@ -9,20 +9,20 @@ struct __locale_map {
const void *map;
size_t map_size;
char name[LOCALE_NAME_MAX+1];
struct __locale_map *next;
const struct __locale_map *next;
};
int __setlocalecat(locale_t, int, const char *);
const struct __locale_map *__get_locale(int, const char *);
const char *__mo_lookup(const void *, size_t, const char *);
const char *__lctrans(const char *, const struct __locale_map *);
const char *__lctrans_cur(const char *);
#define LCTRANS(msg, lc, loc) __lctrans(msg, (loc)->cat[(lc)-2])
#define LCTRANS(msg, lc, loc) __lctrans(msg, (loc)->cat[(lc)])
#define LCTRANS_CUR(msg) __lctrans_cur(msg)
#define CURRENT_LOCALE (__pthread_self()->locale)
#define CURRENT_UTF8 (__pthread_self()->locale->ctype_utf8)
#define CURRENT_UTF8 (!!__pthread_self()->locale->cat[LC_CTYPE])
#undef MB_CUR_MAX
#define MB_CUR_MAX (CURRENT_UTF8 ? 4 : 1)
......@@ -16,5 +16,5 @@ const char *__lctrans(const char *msg, const struct __locale_map *lm)
const char *__lctrans_cur(const char *msg)
{
return __lctrans_impl(msg, CURRENT_LOCALE->cat[LC_MESSAGES-2]);
return __lctrans_impl(msg, CURRENT_LOCALE->cat[LC_MESSAGES]);
}
......@@ -15,24 +15,60 @@ const unsigned char *__map_file(const char *, size_t *);
int __munmap(void *, size_t);
char *__strchrnul(const char *, int);
static struct __locale_map *findlocale(const char *name, size_t n)
static const char envvars[][12] = {
"LC_CTYPE",
"LC_NUMERIC",
"LC_TIME",
"LC_COLLATE",
"LC_MONETARY",
"LC_MESSAGES",
};
static const uint32_t empty_mo[] = { 0x950412de, 0, -1, -1, -1 };
static const struct __locale_map c_dot_utf8 = {
.map = empty_mo,
.map_size = sizeof empty_mo,
.name = "C.UTF-8"
};
const struct __locale_map *__get_locale(int cat, const char *val)
{
static int lock[2];
static void *volatile loc_head;
struct __locale_map *p, *new = 0;
const struct __locale_map *p;
struct __locale_map *new = 0;
const char *path = 0, *z;
char buf[256];
size_t l;
const void *map;
size_t map_size;
size_t l, n;
if (!*val) {
(val = getenv("LC_ALL")) && *val ||
(val = getenv(envvars[cat])) && *val ||
(val = getenv("LANG")) && *val ||
(val = "C.UTF-8");
}
/* Limit name length and forbid leading dot or any slashes. */
for (n=0; n<LOCALE_NAME_MAX && val[n] && val[n]!='/'; n++);
if (val[0]=='.' || val[n]) val = "C.UTF-8";
int builtin = (val[0]=='C' && !val[1])
|| !strcmp(val, "C.UTF-8")
|| !strcmp(val, "POSIX");
if (builtin) {
if (cat == LC_CTYPE && val[1]=='.')
return (void *)&c_dot_utf8;
return 0;
}
for (p=loc_head; p; p=p->next)
if (!strcmp(name, p->name)) return p;
if (!strcmp(val, p->name)) return p;
LOCK(lock);
for (p=loc_head; p; p=p->next)
if (!strcmp(name, p->name)) {
if (!strcmp(val, p->name)) {
UNLOCK(lock);
return p;
}
......@@ -46,9 +82,10 @@ static struct __locale_map *findlocale(const char *name, size_t n)
if (l >= sizeof buf - n - 2) continue;
memcpy(buf, path, l);
buf[l] = '/';
memcpy(buf+l+1, name, n);
memcpy(buf+l+1, val, n);
buf[l+1+n] = 0;
map = __map_file(buf, &map_size);
size_t map_size;
const void *map = __map_file(buf, &map_size);
if (map) {
new = malloc(sizeof *new);
if (!new) {
......@@ -57,58 +94,31 @@ static struct __locale_map *findlocale(const char *name, size_t n)
}
new->map = map;
new->map_size = map_size;
memcpy(new->name, name, n);
memcpy(new->name, val, n);
new->name[n] = 0;
new->next = loc_head;
loc_head = new;
break;
}
}
UNLOCK(lock);
return new;
}
static const char envvars[][12] = {
"LC_CTYPE",
"LC_NUMERIC",
"LC_TIME",
"LC_COLLATE",
"LC_MONETARY",
"LC_MESSAGES",
};
int __setlocalecat(locale_t loc, int cat, const char *val)
{
if (!*val) {
(val = getenv("LC_ALL")) && *val ||
(val = getenv(envvars[cat])) && *val ||
(val = getenv("LANG")) && *val ||
(val = "C.UTF-8");
/* If no locale definition was found, make a locale map
* object anyway to store the name, which is kept for the
* sake of being able to do message translations at the
* application level. */
if (!new && (new = malloc(sizeof *new))) {
new->map = empty_mo;
new->map_size = sizeof empty_mo;
memcpy(new->name, val, n);
new->name[n] = 0;
new->next = loc_head;
loc_head = new;
}
size_t n;
for (n=0; n<LOCALE_NAME_MAX && val[n] && val[n]!='/'; n++);
if (val[0]=='.' || val[n]) val = "C.UTF-8";
int builtin = (val[0]=='C' && !val[1])
|| !strcmp(val, "C.UTF-8")
|| !strcmp(val, "POSIX");
/* For LC_CTYPE, never return a null pointer unless the
* requested name was "C" or "POSIX". */
if (!new && cat == LC_CTYPE) new = (void *)&c_dot_utf8;
switch (cat) {
case LC_CTYPE:
loc->ctype_utf8 = !builtin || val[1]=='.';
break;
case LC_MESSAGES:
if (builtin) {
loc->messages_name[0] = 0;
} else {
memcpy(loc->messages_name, val, n);
loc->messages_name[n] = 0;
}
/* fall through */
default:
loc->cat[cat-2] = builtin ? 0 : findlocale(val, n);
case LC_NUMERIC:
break;
}
return 0;
UNLOCK(lock);
return new;
}
......@@ -84,13 +84,15 @@ char *bindtextdomain(const char *domainname, const char *dirname)
}
static const char catnames[][12] = {
"LC_CTYPE",
"LC_NUMERIC",
"LC_TIME",
"LC_COLLATE",
"LC_MONETARY",
"LC_MESSAGES",
};
static const char catlens[] = { 7, 10, 11, 11 };
static const char catlens[] = { 8, 10, 7, 10, 11, 11 };
struct msgcat {
struct msgcat *next;
......@@ -117,10 +119,12 @@ char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2,
static struct msgcat *volatile cats;
struct msgcat *p;
struct __locale_struct *loc = CURRENT_LOCALE;
struct __locale_map *lm;
const struct __locale_map *lm;
const char *dirname, *locname, *catname;
size_t dirlen, loclen, catlen, domlen;
if ((unsigned)category >= LC_ALL) goto notrans;
if (!domainname) domainname = __gettextdomain();
domlen = strlen(domainname);
......@@ -129,25 +133,15 @@ char *dcngettext(const char *domainname, const char *msgid1, const char *msgid2,
dirname = gettextdir(domainname, &dirlen);
if (!dirname) goto notrans;
switch (category) {
case LC_MESSAGES:
locname = loc->messages_name;
if (!locname || !*locname) goto notrans;
break;
case LC_TIME:
case LC_MONETARY:
case LC_COLLATE:
lm = loc->cat[category-2];
if (!lm) goto notrans;
locname = lm->name;
break;
default:
lm = loc->cat[category];
if (!lm) {
notrans:
return (char *) ((n == 1) ? msgid1 : msgid2);
}
locname = lm->name;
catname = catnames[category-2];
catlen = catlens[category-2];
catname = catnames[category];
catlen = catlens[category];
loclen = strlen(locname);
size_t namelen = dirlen+1 + loclen+1 + catlen+1 + domlen+3;
......
......@@ -5,17 +5,10 @@
locale_t __duplocale(locale_t old)
{
locale_t new = calloc(1, sizeof *new + LOCALE_NAME_MAX + 1);
locale_t new = malloc(sizeof *new);
if (!new) return 0;
new->messages_name = (void *)(new+1);
if (old == LC_GLOBAL_LOCALE) old = &libc.global_locale;
new->ctype_utf8 = old->ctype_utf8;
if (old->messages_name)
strcpy(new->messages_name, old->messages_name);
for (size_t i=0; i<sizeof new->cat/sizeof new->cat[0]; i++)
new->cat[i] = old->cat[i];
*new = *old;
return new;
}
......
......@@ -8,17 +8,16 @@ locale_t __newlocale(int mask, const char *name, locale_t loc)
int i;
if (!loc) {
loc = calloc(1, sizeof *loc + LOCALE_NAME_MAX + 1);
loc = malloc(sizeof *loc);
if (!loc) return 0;
loc->messages_name = (void *)(loc+1);
for (i=0; i<LC_ALL; i++)
if (!(mask & (1<<i)))
__setlocalecat(loc, i, "");
loc->cat[i] = __get_locale(i, "");
}
for (i=0; i<LC_ALL; i++)
if (mask & (1<<i))
__setlocalecat(loc, i, name);
loc->cat[i] = __get_locale(i, name);
return loc;
}
......
......@@ -5,38 +5,23 @@
#include "libc.h"
#include "atomic.h"
static char buf[2+4*(LOCALE_NAME_MAX+1)];
static char buf[LC_ALL*(LOCALE_NAME_MAX+1)];
static char *setlocale_one_unlocked(int cat, const char *name)
{
struct __locale_map *lm;
const struct __locale_map *lm;
if (name) __setlocalecat(&libc.global_locale, cat, name);
if (name) libc.global_locale.cat[cat] = lm = __get_locale(cat, name);
else lm = libc.global_locale.cat[cat];
switch (cat) {
case LC_CTYPE:
return libc.global_locale.ctype_utf8 ? "C.UTF-8" : "C";
case LC_NUMERIC:
return "C";
case LC_MESSAGES:
return libc.global_locale.messages_name[0]
? libc.global_locale.messages_name : "C";
default:
lm = libc.global_locale.cat[cat-2];
return lm ? lm->name : "C";
}
return lm ? (char *)lm->name : "C";
}
char *__strchrnul(const char *, int);
char *setlocale(int cat, const char *name)
{
static volatile int lock[2];
struct __locale_map *lm;
int i, j;
if (!libc.global_locale.messages_name) {
libc.global_locale.messages_name =
buf + 2 + 3*(LOCALE_NAME_MAX+1);
}
if ((unsigned)cat > LC_ALL) return 0;
......@@ -48,34 +33,31 @@ char *setlocale(int cat, const char *name)
* performs both the serialization and deserialization, depends
* on the format, so it can easily be changed if needed. */
if (cat == LC_ALL) {
int i;
if (name) {
char part[LOCALE_NAME_MAX+1];
if (name[0] && name[1]==';'
&& strlen(name) > 2 + 3*(LOCALE_NAME_MAX+1)) {
part[0] = name[0];
part[1] = 0;
setlocale(LC_CTYPE, part);
part[LOCALE_NAME_MAX] = 0;
for (i=LC_TIME; i<LC_MESSAGES; i++) {
memcpy(part, name + 2 + (i-2)*(LOCALE_NAME_MAX+1), LOCALE_NAME_MAX);
for (j=LOCALE_NAME_MAX-1; j && part[j]==';'; j--)
part[j] = 0;
setlocale_one_unlocked(i, part);
char part[LOCALE_NAME_MAX+1] = "C.UTF-8";
const char *p = name;
for (i=0; i<LC_ALL; i++) {
const char *z = __strchrnul(p, ';');
if (z-p <= LOCALE_NAME_MAX) {
memcpy(part, p, z-p);
part[z-p] = 0;
if (*z) p = z+1;
}
setlocale_one_unlocked(LC_MESSAGES, name
+ 2 + 3*(LOCALE_NAME_MAX+1));
} else {
for (i=0; i<LC_ALL; i++)
setlocale_one_unlocked(i, name);
setlocale_one_unlocked(i, part);
}
}
memset(buf, ';', 2 + 3*(LOCALE_NAME_MAX+1));
buf[0] = libc.global_locale.ctype_utf8 ? 'U' : 'C';
for (i=LC_TIME; i<LC_MESSAGES; i++) {
lm = libc.global_locale.cat[i-2];
if (lm) memcpy(buf + 2 + (i-2)*(LOCALE_NAME_MAX+1),
lm->name, strlen(lm->name));
char *s = buf;
for (i=0; i<LC_ALL; i++) {
const struct __locale_map *lm =
libc.global_locale.cat[i];
const char *part = lm ? lm->name : "C";
size_t l = strlen(part);
memcpy(s, part, l);
s[l] = ';';
s += l+1;
}
*--s = 0;
UNLOCK(lock);
return buf;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册