diff --git a/Makefile b/Makefile index 357d679..b44afd7 100644 --- a/Makefile +++ b/Makefile @@ -91,6 +91,8 @@ LIB_MANS := \ setmode.3 \ getmode.3 \ strmode.3 \ + unvis.3 \ + vis.3 \ $(LIB_MANS_GEN) LIB_MANS_GEN := $(patsubst %,src/%,$(LIB_MANS_GEN)) LIB_MANS := $(patsubst %,src/%,$(LIB_MANS)) diff --git a/src/unvis.3 b/src/unvis.3 new file mode 100644 index 0000000..f318ecf --- /dev/null +++ b/src/unvis.3 @@ -0,0 +1,198 @@ +.\" $OpenBSD: unvis.3,v 1.15 2005/07/22 03:16:58 jaredy Exp $ +.\" +.\" Copyright (c) 1989, 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd $Mdocdate: May 31 2007 $ +.Dt UNVIS 3 +.Os +.Sh NAME +.Nm unvis , +.Nm strunvis , +.Nm strnunvis +.Nd decode a visual representation of characters +.Sh LIBRARY +.ds str-Lb-libbsd Utility functions from BSD systems (libbsd, \-lbsd) +.Lb libbsd +.Sh SYNOPSIS +.In vis.h +.Ft int +.Fn unvis "char *cp" "char c" "int *astate" "int flag" +.Ft int +.Fn strunvis "char *dst" "char *src" +.Ft ssize_t +.Fn strnunvis "char *dst" "char *src" "size_t size" +.Sh DESCRIPTION +The +.Fn unvis , +.Fn strunvis +and +.Fn strnunvis +functions are used to decode a visual representation of characters, +as produced by the +.Xr vis 3 +function, back into the original form. +.Fn unvis +is called with successive characters in +.Fa c +until a valid +sequence is recognized, at which time the decoded character is +available at the character pointed to by +.Fa cp . +.Pp +.Fn strunvis +decodes the characters pointed to by +.Fa src +into the buffer pointed to by +.Fa dst . +.Pp +.Fn strnunvis +decodes the characters pointed to by +.Fa src +into the buffer pointed to by +.Fa dst , +writing a maximum of +.Fa size +bytes. +The +.Fn strunvis +function simply copies +.Fa src +to +.Fa dst , +decoding any escape sequences along the way, +and returns the number of characters placed into +.Fa dst , +or \-1 if an +invalid escape sequence was detected. +The size of +.Fa dst +should be +equal to the size of +.Fa src +(that is, no expansion takes place during decoding). +.Fn strunvis +terminates the destination string with a trailing NUL byte; +.Fn strnunvis +does so if +.Fa size +is larger than 0. +.Pp +The +.Fn unvis +function implements a state machine that can be used to decode an arbitrary +stream of bytes. +All state associated with the bytes being decoded is stored outside the +.Fn unvis +function (that is, a pointer to the state is passed in), so +calls decoding different streams can be freely intermixed. +To start decoding a stream of bytes, first initialize an integer +to zero. +Call +.Fn unvis +with each successive byte, along with a pointer +to this integer, and a pointer to a destination character. +.Sh RETURN VALUES +The +.Fn unvis +function has several return codes that must be handled properly. +They are: +.Bl -tag -width UNVIS_VALIDPUSH +.It Li \&0 (zero) +Another character is necessary; nothing has been recognized yet. +.It Dv UNVIS_VALID +A valid character has been recognized and is available at the location +pointed to by +.Fa cp . +.It Dv UNVIS_VALIDPUSH +A valid character has been recognized and is available at the location +pointed to by +.Fa cp ; +however, the character currently passed in should be passed in again. +.It Dv UNVIS_NOCHAR +A valid sequence was detected, but no character was produced. +This return code is necessary to indicate a logical break between characters. +.It Dv UNVIS_SYNBAD +An invalid escape sequence was detected, or the decoder is in an +unknown state. +The decoder is placed into the starting state. +.El +.Pp +When all bytes in the stream have been processed, call +.Fn unvis +one more time with +.Fa flag +set to +.Dv UNVIS_END +to extract any remaining character (the character passed in is ignored). +.Pp +The +.Fn strunvis +function returns the number of bytes written (not counting +the trailing NUL byte) or \-1 if an error occurred. +.Pp +The +.Fn strnunvis +function returns the number of bytes (not counting the trailing NUL byte) +that would be needed to fully convert the input string, or \-1 if an +error occurred. +.Sh EXAMPLES +The following code fragment illustrates a proper use of +.Fn unvis . +.Bd -literal -offset indent +int state = 0; +char out; + +while ((ch = getchar()) != EOF) { +again: + switch(unvis(&out, ch, &state, 0)) { + case 0: + case UNVIS_NOCHAR: + break; + case UNVIS_VALID: + (void) putchar(out); + break; + case UNVIS_VALIDPUSH: + (void) putchar(out); + goto again; + case UNVIS_SYNBAD: + (void)fprintf(stderr, "bad sequence!\en"); + exit(1); + } +} +if (unvis(&out, (char)0, &state, UNVIS_END) == UNVIS_VALID) + (void) putchar(out); +.Ed +.Sh SEE ALSO +.Xr unvis 1 , +.Xr vis 1 , +.Xr vis 3 +.Sh HISTORY +The +.Fn unvis +function first appeared in +.Bx 4.4 . diff --git a/src/vis.3 b/src/vis.3 new file mode 100644 index 0000000..3da6eb1 --- /dev/null +++ b/src/vis.3 @@ -0,0 +1,321 @@ +.\" $OpenBSD: vis.3,v 1.23 2005/08/28 19:51:27 millert Exp $ +.\" +.\" Copyright (c) 1989, 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd $Mdocdate: May 31 2007 $ +.Dt VIS 3 +.Os +.Sh NAME +.Nm vis , +.Nm strvis , +.Nm strnvis , +.Nm strvisx +.Nd visually encode characters +.Sh LIBRARY +.ds str-Lb-libbsd Utility functions from BSD systems (libbsd, \-lbsd) +.Lb libbsd +.Sh SYNOPSIS +.In stdlib.h +.In vis.h +.Ft char * +.Fn vis "char *dst" "int c" "int flag" "int nextc" +.Ft int +.Fn strvis "char *dst" "const char *src" "int flag" +.Ft int +.Fn strnvis "char *dst" "const char *src" "size_t size" "int flag" +.Ft int +.Fn strvisx "char *dst" "const char *src" "size_t len" "int flag" +.Sh DESCRIPTION +The +.Fn vis +function copies into +.Fa dst +a string which represents the character +.Fa c . +If +.Fa c +needs no encoding, it is copied in unaltered. +The string is NUL terminated and a pointer to the end of the string is +returned. +The maximum length of any encoding is four +characters (not including the trailing NUL); +thus, when +encoding a set of characters into a buffer, the size of the buffer should +be four times the number of characters encoded, plus one for the trailing +NUL. +The +.Fa flag +parameter is used for altering the default range of +characters considered for encoding and for altering the visual +representation. +The additional character, +.Fa nextc , +is only used when selecting the +.Dv VIS_CSTYLE +encoding format (explained below). +.Pp +The +.Fn strvis , +.Fn strnvis +and +.Fn strvisx +functions copy into +.Fa dst +a visual representation of +the string +.Fa src . +The +.Fn strvis +function encodes characters from +.Fa src +up to the first NUL. +The +.Fn strnvis +function encodes characters from +.Fa src +up to the first NUL or the end of +.Fa dst , +as indicated by +.Fa size . +The +.Fn strvisx +function encodes exactly +.Fa len +characters from +.Fa src +(this +is useful for encoding a block of data that may contain NULs). +All three forms NUL terminate +.Fa dst , +except for +.Fn strnvis +when +.Fa size +is zero, in which case +.Fa dst +is not touched. +For +.Fn strvis +and +.Fn strvisx , +the size of +.Fa dst +must be four times the number +of characters encoded from +.Fa src +(plus one for the NUL). +.Fn strvis +and +.Fn strvisx +return the number of characters in +.Fa dst +(not including the trailing NUL). +.Fn strnvis +returns the length that +.Fa dst +would become if it were of unlimited size (similar to +.Xr snprintf 3 +or +.Xr strlcpy 3 ) . +This can be used to detect truncation but it also means that +the return value of +.Fn strnvis +must not be used without checking it against +.Fa size . +.Pp +The encoding is a unique, invertible representation composed entirely of +graphic characters; it can be decoded back into the original form using +the +.Xr unvis 3 +or +.Xr strunvis 3 +functions. +.Pp +There are two parameters that can be controlled: the range of +characters that are encoded, and the type +of representation used. +By default, all non-graphic characters +except space, tab, and newline are encoded +(see +.Xr isgraph 3 ) . +The following flags +alter this: +.Bl -tag -width VIS_WHITEX +.It Dv VIS_GLOB +Also encode magic characters recognized by +.Xr glob 3 +.Pf ( Ql * , +.Ql \&? , +.Ql \&[ ) +and +.Ql # . +.It Dv VIS_SP +Also encode space. +.It Dv VIS_TAB +Also encode tab. +.It Dv VIS_NL +Also encode newline. +.It Dv VIS_WHITE +Synonym for +.Dv VIS_SP +\&| +.Dv VIS_TAB +\&| +.Dv VIS_NL . +.It Dv VIS_SAFE +Only encode +.Dq unsafe +characters. +These are control characters which may cause common terminals to perform +unexpected functions. +Currently this form allows space, +tab, newline, backspace, bell, and return -- in addition +to all graphic characters -- unencoded. +.El +.Pp +There are three forms of encoding. +All forms use the backslash +.Ql \e +character to introduce a special +sequence; two backslashes are used to represent a real backslash. +These are the visual formats: +.Bl -tag -width VIS_CSTYLE +.It (default) +Use an +.Ql M +to represent meta characters (characters with the 8th +bit set), and use a caret +.Ql ^ +to represent control characters (see +.Xr iscntrl 3 ) . +The following formats are used: +.Bl -tag -width xxxxx +.It Dv \e^C +Represents the control character +.Ql C . +Spans characters +.Ql \e000 +through +.Ql \e037 , +and +.Ql \e177 +(as +.Ql \e^? ) . +.It Dv \eM-C +Represents character +.Ql C +with the 8th bit set. +Spans characters +.Ql \e241 +through +.Ql \e376 . +.It Dv \eM^C +Represents control character +.Ql C +with the 8th bit set. +Spans characters +.Ql \e200 +through +.Ql \e237 , +and +.Ql \e377 +(as +.Ql \eM^? ) . +.It Dv \e040 +Represents +.Tn ASCII +space. +.It Dv \e240 +Represents Meta-space. +.El +.Pp +.It Dv VIS_CSTYLE +Use C-style backslash sequences to represent standard non-printable +characters. +The following sequences are used to represent the indicated characters: +.Bd -unfilled -offset indent +.Li \ea Tn - BEL No (007) +.Li \eb Tn - BS No (010) +.Li \ef Tn - NP No (014) +.Li \en Tn - NL No (012) +.Li \er Tn - CR No (015) +.Li \es Tn - SP No (040) +.Li \et Tn - HT No (011) +.Li \ev Tn - VT No (013) +.Li \e0 Tn - NUL No (000) +.Ed +.Pp +When using this format, the +.Fa nextc +parameter is looked at to determine +if a NUL character can be encoded as +.Ql \e0 +instead of +.Ql \e000 . +If +.Fa nextc +is an octal digit, the latter representation is used to +avoid ambiguity. +.It Dv VIS_OCTAL +Use a three digit octal sequence. +The form is +.Ql \eddd +where +.Ar d +represents an octal digit. +.El +.Pp +There is one additional flag, +.Dv VIS_NOSLASH , +which inhibits the +doubling of backslashes and the backslash before the default +format (that is, control characters are represented by +.Ql ^C +and +meta characters as +.Ql M-C ) . +With this flag set, the encoding is +ambiguous and non-invertible. +.Sh SEE ALSO +.Xr unvis 1 , +.Xr vis 1 , +.Xr snprintf 3 , +.Xr strlcpy 3 , +.Xr unvis 3 +.Sh HISTORY +The +.Fn vis , +.Fn strvis +and +.Fn strvisx +functions first appeared in +.Bx 4.4 . +The +.Fn strnvis +function first appeared in +.Ox 2.9 .