summaryrefslogtreecommitdiff
path: root/libc
diff options
context:
space:
mode:
Diffstat (limited to 'libc')
-rw-r--r--libc/README31
-rw-r--r--libc/example.c75
-rw-r--r--libc/getaddrinfo-idn.txt117
3 files changed, 223 insertions, 0 deletions
diff --git a/libc/README b/libc/README
new file mode 100644
index 0000000..d57412e
--- /dev/null
+++ b/libc/README
@@ -0,0 +1,31 @@
+Libidn libc/README -- Instructions for building as a GNU Libc add-on.
+Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Simon Josefsson
+See the end for copying conditions.
+
+GNU Libidn is now part of GNU Libc, so the stand-alone patch and
+instructions that were distributed here before has been removed.
+
+See getaddrinfo-idn.txt for the write-up of the API that is used in
+Libc.
+
+Simply build Libc as you would normally, but specify "libidn" as an
+add-on, as in --enable-add-ons=libidn to configure.
+
+Once installed, you may build and run the sample still distributed in
+this directory, perhaps as follows:
+
+$ gcc -o example example.c -L/usr/local/glibc/lib -Wl,-rpath,/usr/local/glibc/lib -nostdinc -I/usr/local/glibc/include -I/usr/include -I/usr/lib/gcc-lib/i386-linux/2.95.4/include
+$ CHARSET=iso-8859-1 ./example
+locale charset `iso-8859-1'
+gettaddrinfo(räksmörgås.josefsson.org):
+address `217.13.230.178'
+canonical name `178.230.13.217.in-addr.dgcsystems.net'
+$
+
+Internally the domain name xn--rksmrgs-5wao1o.josefsson.org is looked
+up in DNS.
+
+----------------------------------------------------------------------
+Copying and distribution of this file, with or without modification,
+are permitted in any medium without royalty provided the copyright
+notice and this notice are preserved.
diff --git a/libc/example.c b/libc/example.c
new file mode 100644
index 0000000..02c9bcb
--- /dev/null
+++ b/libc/example.c
@@ -0,0 +1,75 @@
+/* example.c --- Example code showing how to use IDN enabled getaddrinfo().
+ * Copyright (C) 2003, 2004 Simon Josefsson
+ *
+ * This file is part of GNU Libidn.
+ *
+ * GNU Libidn is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * GNU Libidn is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with GNU Libidn; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ *
+ */
+
+#define _GNU_SOURCE 1
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <locale.h> /* setlocale() */
+
+/*
+ * Compiling against IDN enabled Libc:
+ *
+ * $ gcc -o example example.c -L/usr/local/glibc/lib -Wl,-rpath,/usr/local/glibc/lib -nostdinc -I/usr/local/glibc/include -I/usr/include -I/usr/lib/gcc-lib/i486-linux/3.3.3/include
+ * $ CHARSET=iso-8859-1 ./example
+ * locale charset `iso-8859-1'
+ * gettaddrinfo(räksmörgås.josefsson.org):
+ * address `217.13.230.178'
+ * canonical name `178.230.13.217.in-addr.dgcsystems.net'
+ * $
+ *
+ * Internally the name iesg--rksmrgsa-0zap8p.josefsson.org is looked
+ * up in DNS.
+ */
+
+int
+main(int argc, char *argv[])
+{
+ char *in = argc > 1 ? argv[1] : "räksmörgås.josefsson.org";
+ struct addrinfo hints;
+ struct addrinfo *res = NULL;
+ int rc;
+
+ setlocale (LC_ALL, "");
+
+ //printf("locale charset `%s'\n", stringprep_locale_charset());
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_flags = AI_CANONNAME|AI_IDN;
+
+ printf("gettaddrinfo(%s):\n", in);
+ rc = getaddrinfo(in, NULL, &hints, &res);
+ if (rc)
+ printf("gai err %d: %s\n", rc, gai_strerror(rc));
+ else if (res)
+ printf("address `%s'\ncanonical name `%s'\n",
+ res->ai_addr ?
+ /* FIXME: Use inet_ntop, so it works for IPv6 too. */
+ inet_ntoa(((struct sockaddr_in*)res->ai_addr)->sin_addr) : "ERROR",
+ res->ai_canonname ? res->ai_canonname : "ERROR");
+ else
+ printf("Bad magic\n");
+
+ return 0;
+}
diff --git a/libc/getaddrinfo-idn.txt b/libc/getaddrinfo-idn.txt
new file mode 100644
index 0000000..73705d6
--- /dev/null
+++ b/libc/getaddrinfo-idn.txt
@@ -0,0 +1,117 @@
+Libidn getaddrinfo-idn.txt -- Proposal for IDN support in POSIX getaddrinfo.
+Copyright (C) 2003, 2004 Simon Josefsson
+See the end for copying conditions.
+
+Background
+----------
+
+Libidn is a package for internationalized string handling based on the
+Stringprep, Punycode and Internationalized Domain Names in
+Applications (IDNA) specifications. It can be used by applications
+directly by linking to it, as is done by, e.g., Gnus, KDE, and Mutt.
+
+Having each and every application link with and perform its own IDN
+handling is not a good idea. It bloats the code and makes things
+unnecessarily complex. Only few applications, such as web browsers
+and mail clients, will need to do this in the future, to provide good
+user interfaces for internationalization.
+
+See http://josefsson.org/libidn/ for more information.
+
+Alternative Approaches
+----------------------
+
+There are implementation that modify gethostbyname() to accept UTF-8
+strings and perform the IDNA ToASCII operation within gethostbyname().
+
+There are even implementations that assume gethostbyname (on the
+client host) perform no validation of the string and will send UTF-8
+strings out to the DNS server, and perform the IDN-conversion on the
+DNS server.
+
+Some doubts can be raised whether this is an approach that is likely
+to be standardized. It also lack in functionality: it only provide
+black-box ToASCII functionality. The application cannot extract the
+output from the ToASCII operation. More important, there is no way to
+perform a ToUnicode operation that applications may want to use for
+display purposes. Furthermore, while the first can support locale
+specific character sets (e.g., ISO-8859-1), the second approach is
+bound to either guess the character set, or always use UTF-8.
+
+See also the thread rooted in <iluel7n6bmu.fsf@latte.josefsson.org>
+posted to libc-alpha@sources.redhat.com on 08 Jan 2003.
+
+What I propose
+--------------
+
+The getaddrinfo() API should have two new flags, AI_IDN and
+AI_CANONIDN. Roughly they correspond to IDNA ToASCII and IDNA
+ToUnicode, but there are several details. Note that strings are still
+'char*', i.e. it does not use the "wide" character type, and that the
+encoding of non-ASCII strings are the current locale's character set
+(i.e., nl_langinfo(CODESET)).
+
+An application that uses AI_IDN signal to the getaddrinfo()
+implementation that the input host name may be non-ASCII and that the
+appropriate IDNA ToASCII steps should be carried out on the input, and
+the output from the ToASCII operation (if any) should be used in the
+lookup using the current resolver processing.
+
+An application that uses AI_CANONIDN signal to the getaddrinfo()
+implementation that the input host name should be put through the IDNA
+ToUnicode steps, and the output of that placed in the 'ai_canonname'
+field of the resulting structure. Normal resolver processing applies
+to the input string, of course.
+
+Consequently, an application that uses AI_IDN|AI_CANONIDN signal to
+the getaddrinfo() implementation that the input host name may be
+non-ASCII and should be put through the IDNA ToASCII steps before run
+through the resolver, and that the input string should also be run
+through the IDNA ToUnicode steps and the output of that placed in the
+'ai_canonname' field.
+
+The semantics of AI_CANONNAME|AI_CANONIDN is that instead of running
+the ToUnicode IDNA steps on the input string, the canonical host name
+as returned by the resolver for the input string should be used in the
+ToUnicode IDNA step.
+
+Details
+-------
+
+Four new flags has been proposed; AI_IDN_ALLOW_UNASSIGNED,
+AI_IDN_USE_STD3_ASCII_RULES for getaddrinfo, and
+NI_IDN_ALLOW_UNASSIGNED, NI_IDN_USE_STD3_ASCII_RULES for getnameinfo.
+The implementation is simple, if specified those flag will set the
+appropriate flag in the call to the IDNA functions. See the RFC for
+the meaning of those flags.
+
+Status
+------
+
+The AI_IDN flag has been implemented and shipped as a proof-of-concept
+patch for GNU Libc with GNU Libidn since January 2003. Binary libc
+packages with the patch exists for (at least) two GNU/Linux
+distributions. The AI_CANONIDN flag is not yet implemented.
+
+As of March 2004, Libidn has been integrated as an add-on in the GNU
+Libc CVS repository. The AI_CANONIDN flag has been implemented. The
+AllowUnassigned and UseSTD3ASCIIRules flags were added.
+
+Future
+------
+
+Allow non-ASCII in gethostname (and similar functions), if
+administrator has supplied, e.g., 'option idn' in /etc/resolv.conf?
+
+Feedback
+--------
+
+This document is a work-in-progress and the details may change.
+Contact me at simon@josefsson.org to discuss changes.
+
+----------------------------------------------------------------------
+Permission is granted to anyone to make or distribute verbatim copies
+of this document, in any medium, provided that the copyright notice
+and permission notice are preserved, and that the distributor grants
+the recipient permission for further redistribution as permitted by
+this notice. Modified versions may not be made.