diff --git a/Makefile b/Makefile index 315ad97..ad73456 100644 --- a/Makefile +++ b/Makefile @@ -44,6 +44,7 @@ LIB_SRCS := \ fmtcheck.c \ nlist.c \ progname.c \ + radixsort.c \ vis.c unvis.c \ $(LIB_SRCS_GEN) LIB_SRCS_GEN := $(patsubst %,src/%,$(LIB_SRCS_GEN)) @@ -93,6 +94,8 @@ LIB_MANS := \ humanize_number.3 \ fmtcheck.3 \ mergesort.3 \ + radixsort.3 \ + sradixsort.3 \ nlist.3 \ pidfile.3 \ setmode.3 \ diff --git a/Versions b/Versions index cac2efe..51dc9ec 100644 --- a/Versions +++ b/Versions @@ -70,5 +70,7 @@ LIBBSD_0.3 { getpeereid; mergesort; + radixsort; + sradixsort; } LIBBSD_0.2; diff --git a/include/bsd/stdlib.h b/include/bsd/stdlib.h index bfe3027..ea790bc 100644 --- a/include/bsd/stdlib.h +++ b/include/bsd/stdlib.h @@ -54,6 +54,10 @@ void setprogname(const char *); int heapsort (void *, size_t, size_t, int (*)(const void *, const void *)); int mergesort(void *base, size_t nmemb, size_t size, int (*cmp)(const void *, const void *)); +int radixsort(const unsigned char **base, int nmemb, + const unsigned char *table, unsigned endbyte); +int sradixsort(const unsigned char **base, int nmemb, + const unsigned char *table, unsigned endbyte); void *reallocf(void *ptr, size_t size); diff --git a/src/radixsort.3 b/src/radixsort.3 new file mode 100644 index 0000000..dfa65f1 --- /dev/null +++ b/src/radixsort.3 @@ -0,0 +1,160 @@ +.\" Copyright (c) 1990, 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)radixsort.3 8.2 (Berkeley) 1/27/94 +.\" $FreeBSD$ +.\" +.Dd January 27, 1994 +.Dt RADIXSORT 3 +.Os +.Sh NAME +.Nm radixsort , sradixsort +.Nd radix sort +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In limits.h +.In stdlib.h +.Ft int +.Fn radixsort "const unsigned char **base" "int nmemb" "const unsigned char *table" "unsigned endbyte" +.Ft int +.Fn sradixsort "const unsigned char **base" "int nmemb" "const unsigned char *table" "unsigned endbyte" +.Sh DESCRIPTION +The +.Fn radixsort +and +.Fn sradixsort +functions +are implementations of radix sort. +.Pp +These functions sort an array of pointers to byte strings, the initial +member of which is referenced by +.Fa base . +The byte strings may contain any values; the end of each string +is denoted by the user-specified value +.Fa endbyte . +.Pp +Applications may specify a sort order by providing the +.Fa table +argument. +If +.Pf non- Dv NULL , +.Fa table +must reference an array of +.Dv UCHAR_MAX ++ 1 bytes which contains the sort +weight of each possible byte value. +The end-of-string byte must have a sort weight of 0 or 255 +(for sorting in reverse order). +More than one byte may have the same sort weight. +The +.Fa table +argument +is useful for applications which wish to sort different characters +equally, for example, providing a table with the same weights +for A-Z as for a-z will result in a case-insensitive sort. +If +.Fa table +is NULL, the contents of the array are sorted in ascending order +according to the +.Tn ASCII +order of the byte strings they reference and +.Fa endbyte +has a sorting weight of 0. +.Pp +The +.Fn sradixsort +function is stable, that is, if two elements compare as equal, their +order in the sorted array is unchanged. +The +.Fn sradixsort +function uses additional memory sufficient to hold +.Fa nmemb +pointers. +.Pp +The +.Fn radixsort +function is not stable, but uses no additional memory. +.Pp +These functions are variants of most-significant-byte radix sorting; in +particular, see +.An "D.E. Knuth" Ns 's +.%T "Algorithm R" +and section 5.2.5, exercise 10. +They take linear time relative to the number of bytes in the strings. +.Sh RETURN VALUES +.Rv -std radixsort +.Sh ERRORS +.Bl -tag -width Er +.It Bq Er EINVAL +The value of the +.Fa endbyte +element of +.Fa table +is not 0 or 255. +.El +.Pp +Additionally, the +.Fn sradixsort +function +may fail and set +.Va errno +for any of the errors specified for the library routine +.Xr malloc 3 . +.Sh SEE ALSO +.Xr sort 1 , +.Xr qsort 3 +.Pp +.Rs +.%A Knuth, D.E. +.%D 1968 +.%B "The Art of Computer Programming" +.%T "Sorting and Searching" +.%V Vol. 3 +.%P pp. 170-178 +.Re +.Rs +.%A Paige, R. +.%D 1987 +.%T "Three Partition Refinement Algorithms" +.%J "SIAM J. Comput." +.%V Vol. 16 +.%N No. 6 +.Re +.Rs +.%A McIlroy, P. +.%D 1993 +.%B "Engineering Radix Sort" +.%T "Computing Systems" +.%V Vol. 6:1 +.%P pp. 5-27 +.Re +.Sh HISTORY +The +.Fn radixsort +function first appeared in +.Bx 4.4 . diff --git a/src/radixsort.c b/src/radixsort.c new file mode 100644 index 0000000..82ff1bc --- /dev/null +++ b/src/radixsort.c @@ -0,0 +1,327 @@ +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy and by Dan Bernstein at New York University, + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)radixsort.c 8.2 (Berkeley) 4/28/95"; +#endif /* LIBC_SCCS and not lint */ +#include +__FBSDID("$FreeBSD$"); + +/* + * Radixsort routines. + * + * Program r_sort_a() is unstable but uses O(logN) extra memory for a stack. + * Use radixsort(a, n, trace, endchar) for this case. + * + * For stable sorting (using N extra pointers) use sradixsort(), which calls + * r_sort_b(). + * + * For a description of this code, see D. McIlroy, P. McIlroy, K. Bostic, + * "Engineering Radix Sort". + */ + +#include +#include +#include +#include + +typedef struct { + const u_char **sa; + int sn, si; +} stack; + +static inline void simplesort +(const u_char **, int, int, const u_char *, u_int); +static void r_sort_a(const u_char **, int, int, const u_char *, u_int); +static void r_sort_b(const u_char **, const u_char **, int, int, + const u_char *, u_int); + +#define THRESHOLD 20 /* Divert to simplesort(). */ +#define SIZE 512 /* Default stack size. */ + +#define SETUP { \ + if (tab == NULL) { \ + tr = tr0; \ + for (c = 0; c < endch; c++) \ + tr0[c] = c + 1; \ + tr0[c] = 0; \ + for (c++; c < 256; c++) \ + tr0[c] = c; \ + endch = 0; \ + } else { \ + endch = tab[endch]; \ + tr = tab; \ + if (endch != 0 && endch != 255) { \ + errno = EINVAL; \ + return (-1); \ + } \ + } \ +} + +int +radixsort(a, n, tab, endch) + const u_char **a, *tab; + int n; + u_int endch; +{ + const u_char *tr; + int c; + u_char tr0[256]; + + SETUP; + r_sort_a(a, n, 0, tr, endch); + return (0); +} + +int +sradixsort(a, n, tab, endch) + const u_char **a, *tab; + int n; + u_int endch; +{ + const u_char *tr, **ta; + int c; + u_char tr0[256]; + + SETUP; + if (n < THRESHOLD) + simplesort(a, n, 0, tr, endch); + else { + if ((ta = malloc(n * sizeof(a))) == NULL) + return (-1); + r_sort_b(a, ta, n, 0, tr, endch); + free(ta); + } + return (0); +} + +#define empty(s) (s >= sp) +#define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si +#define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i +#define swap(a, b, t) t = a, a = b, b = t + +/* Unstable, in-place sort. */ +static void +r_sort_a(a, n, i, tr, endch) + const u_char **a; + int n, i; + const u_char *tr; + u_int endch; +{ + static int count[256], nc, bmin; + int c; + const u_char **ak, *r; + stack s[SIZE], *sp, *sp0, *sp1, temp; + int *cp, bigc; + const u_char **an, *t, **aj, **top[256]; + + /* Set up stack. */ + sp = s; + push(a, n, i); + while (!empty(s)) { + pop(a, n, i); + if (n < THRESHOLD) { + simplesort(a, n, i, tr, endch); + continue; + } + an = a + n; + + /* Make character histogram. */ + if (nc == 0) { + bmin = 255; /* First occupied bin, excluding eos. */ + for (ak = a; ak < an;) { + c = tr[(*ak++)[i]]; + if (++count[c] == 1 && c != endch) { + if (c < bmin) + bmin = c; + nc++; + } + } + if (sp + nc > s + SIZE) { /* Get more stack. */ + r_sort_a(a, n, i, tr, endch); + continue; + } + } + + /* + * Special case: if all strings have the same + * character at position i, move on to the next + * character. + */ + if (nc == 1 && count[bmin] == n) { + push(a, n, i+1); + nc = count[bmin] = 0; + continue; + } + + /* + * Set top[]; push incompletely sorted bins onto stack. + * top[] = pointers to last out-of-place element in bins. + * count[] = counts of elements in bins. + * Before permuting: top[c-1] + count[c] = top[c]; + * during deal: top[c] counts down to top[c-1]. + */ + sp0 = sp1 = sp; /* Stack position of biggest bin. */ + bigc = 2; /* Size of biggest bin. */ + if (endch == 0) /* Special case: set top[eos]. */ + top[0] = ak = a + count[0]; + else { + ak = a; + top[255] = an; + } + for (cp = count + bmin; nc > 0; cp++) { + while (*cp == 0) /* Find next non-empty pile. */ + cp++; + if (*cp > 1) { + if (*cp > bigc) { + bigc = *cp; + sp1 = sp; + } + push(ak, *cp, i+1); + } + top[cp-count] = ak += *cp; + nc--; + } + swap(*sp0, *sp1, temp); /* Play it safe -- biggest bin last. */ + + /* + * Permute misplacements home. Already home: everything + * before aj, and in bin[c], items from top[c] on. + * Inner loop: + * r = next element to put in place; + * ak = top[r[i]] = location to put the next element. + * aj = bottom of 1st disordered bin. + * Outer loop: + * Once the 1st disordered bin is done, ie. aj >= ak, + * aj<-aj + count[c] connects the bins in a linked list; + * reset count[c]. + */ + for (aj = a; aj < an; *aj = r, aj += count[c], count[c] = 0) + for (r = *aj; aj < (ak = --top[c = tr[r[i]]]);) + swap(*ak, r, t); + } +} + +/* Stable sort, requiring additional memory. */ +static void +r_sort_b(a, ta, n, i, tr, endch) + const u_char **a, **ta; + int n, i; + const u_char *tr; + u_int endch; +{ + static int count[256], nc, bmin; + int c; + const u_char **ak, **ai; + stack s[512], *sp, *sp0, *sp1, temp; + const u_char **top[256]; + int *cp, bigc; + + sp = s; + push(a, n, i); + while (!empty(s)) { + pop(a, n, i); + if (n < THRESHOLD) { + simplesort(a, n, i, tr, endch); + continue; + } + + if (nc == 0) { + bmin = 255; + for (ak = a + n; --ak >= a;) { + c = tr[(*ak)[i]]; + if (++count[c] == 1 && c != endch) { + if (c < bmin) + bmin = c; + nc++; + } + } + if (sp + nc > s + SIZE) { + r_sort_b(a, ta, n, i, tr, endch); + continue; + } + } + + sp0 = sp1 = sp; + bigc = 2; + if (endch == 0) { + top[0] = ak = a + count[0]; + count[0] = 0; + } else { + ak = a; + top[255] = a + n; + count[255] = 0; + } + for (cp = count + bmin; nc > 0; cp++) { + while (*cp == 0) + cp++; + if ((c = *cp) > 1) { + if (c > bigc) { + bigc = c; + sp1 = sp; + } + push(ak, c, i+1); + } + top[cp-count] = ak += c; + *cp = 0; /* Reset count[]. */ + nc--; + } + swap(*sp0, *sp1, temp); + + for (ak = ta + n, ai = a+n; ak > ta;) /* Copy to temp. */ + *--ak = *--ai; + for (ak = ta+n; --ak >= ta;) /* Deal to piles. */ + *--top[tr[(*ak)[i]]] = *ak; + } +} + +static inline void +simplesort(a, n, b, tr, endch) /* insertion sort */ + const u_char **a; + int n, b; + const u_char *tr; + u_int endch; +{ + u_char ch; + const u_char **ak, **ai, *s, *t; + + for (ak = a+1; --n >= 1; ak++) + for (ai = ak; ai > a; ai--) { + for (s = ai[0] + b, t = ai[-1] + b; + (ch = tr[*s]) != endch; s++, t++) + if (ch != tr[*t]) + break; + if (ch >= tr[*t]) + break; + swap(ai[0], ai[-1], s); + } +} diff --git a/src/sradixsort.3 b/src/sradixsort.3 new file mode 100644 index 0000000..86a95a3 --- /dev/null +++ b/src/sradixsort.3 @@ -0,0 +1 @@ +.so man3/radixsort.3