1
/* HTFormat: The format manager in the WWW Library
2
MANAGE DIFFERENT DOCUMENT FORMATS
4
Here we describe the functions of the HTFormat module which handles conversion between
5
different data representations. (In MIME parlance, a representation is known as a
6
content-type. In WWW the term "format" is often used as it is shorter).
8
This module is implemented by HTFormat.c. This hypertext document is used to generate
9
the HTFormat.h include file. Part of the WWW library.
25
We use the HTAtom object for holding representations. This allows faster manipulation
26
(comparison and copying) that if we stayed with strings.
29
typedef HTAtom * HTFormat;
33
These macros (which used to be constants) define some basic internally referenced
34
representations. The www/xxx ones are of course not MIME standard.
36
www/source is an output format which leaves the input untouched. It is useful for
37
diagnostics, and for users who want to see the original, whatever it is.
41
/* #define WWW_SOURCE HTAtom_for("www/source") */ /* Whatever it was originally*/
42
extern HTAtom * WWW_SOURCE; /* calculated once, heavy used */
46
www/present represents the user's perception of the document. If you convert to
47
www/present, you present the material to the user.
50
#define WWW_PRESENT HTAtom_for("www/present") /* The user's perception */
52
#define WWW_DEBUG HTAtom_for("www/debug")
55
WWW_DEBUG represents the user's perception of debug information, for example sent as a
56
HTML document in a HTTP redirection message.
62
The message/rfc822 format means a MIME message or a plain text message with no MIME
63
header. This is what is returned by an HTTP server.
66
#define WWW_MIME HTAtom_for("www/mime") /* A MIME message */
69
For parsing only the header. - kw
71
#define WWW_MIME_HEAD HTAtom_for("message/x-rfc822-head")
75
www/print is like www/present except it represents a printed copy.
78
#define WWW_PRINT HTAtom_for("www/print") /* A printed copy */
82
www/unknown is a really unknown type. Some default action is appropriate.
85
#define WWW_UNKNOWN HTAtom_for("www/unknown")
89
www/dired signals directory edit mode.
91
#define WWW_DIRED HTAtom_for("www/dired")
96
These are regular MIME types. HTML is assumed to be added by the W3 code.
97
application/octet-stream was mistakenly application/binary in earlier libwww versions
101
#define WWW_PLAINTEXT HTAtom_for("text/plain")
102
#define WWW_POSTSCRIPT HTAtom_for("application/postscript")
103
#define WWW_RICHTEXT HTAtom_for("application/rtf")
104
#define WWW_AUDIO HTAtom_for("audio/basic")
105
#define WWW_HTML HTAtom_for("text/html")
106
#define WWW_BINARY HTAtom_for("application/octet-stream")
110
We must include the following file after defining HTFormat, to which it makes
116
typedef HTAtom* HTEncoding;
120
The following are values for the MIME types:
123
#define WWW_ENC_7BIT HTAtom_for("7bit")
124
#define WWW_ENC_8BIT HTAtom_for("8bit")
125
#define WWW_ENC_BINARY HTAtom_for("binary")
132
#define WWW_ENC_COMPRESS HTAtom_for("compress")
135
Does a string designate a real encoding, or is it just
136
a "dummy" as for example 7bit, 8bit, and binary?
138
#define IsUnityEncStr(senc) \
139
((senc)==NULL || *(senc)=='\0' || !strcmp(senc,"identity") ||\
140
!strcmp(senc,"8bit") || !strcmp(senc,"binary") || !strcmp(senc,"7bit"))
142
#define IsUnityEnc(enc) \
143
((enc)==NULL || (enc)==HTAtom_for("identity") ||\
144
(enc)==WWW_ENC_8BIT || (enc)==WWW_ENC_BINARY || (enc)==WWW_ENC_7BIT)
147
#include <HTAnchor.h>
151
The HTPresentation and HTConverter types
153
This HTPresentation structure represents a possible conversion algorithm from one
154
format to another. It includes a pointer to a conversion routine. The conversion
155
routine returns a stream to which data should be fed. See also HTStreamStack which
156
scans the list of registered converters and calls one. See the initialisation module
157
for a list of conversion routines.
160
typedef struct _HTPresentation HTPresentation;
162
typedef HTStream * HTConverter PARAMS((
163
HTPresentation * pres,
164
HTParentAnchor * anchor,
167
struct _HTPresentation {
168
HTAtom * rep; /* representation name atomized */
169
HTAtom * rep_out; /* resulting representation */
170
HTConverter * converter; /* routine to gen the stream stack */
171
char * command; /* MIME-format string */
172
float quality; /* Between 0 (bad) and 1 (good) */
176
BOOL get_accept; /* list in "Accept:" for GET */
181
The list of presentations is kept by this module. It is also scanned by modules which
182
want to know the set of formats supported. for example.
185
extern HTList * HTPresentations;
189
The default presentation is used when no other is appropriate
192
extern HTPresentation* default_presentation;
196
HTSetPresentation: Register a system command to present a format
200
rep is the MIME - style format name
202
command is the MAILCAP - style command template
204
quality A degradation faction 0..1.0
206
secs A limit on the time user will wait (0.0 for infinity)
209
maxbytes A limit on the length acceptable as input (0 infinite)
212
extern void HTSetPresentation PARAMS((
213
CONST char * representation,
214
CONST char * command,
217
double secs_per_byte,
224
HTSetConversion: Register a converstion routine
228
rep_in is the content-type input
230
rep_out is the resulting content-type
232
converter is the routine to make the stream to do it
236
extern void HTSetConversion PARAMS((
238
CONST char * rep_out,
239
HTConverter * converter,
249
HTStreamStack: Create a stack of streams
251
This is the routine which actually sets up the conversion. It currently checks only for
252
direct conversions, but multi-stage conversions are forseen. It takes a stream into
253
which the output should be sent in the final format, builds the conversion stack, and
254
returns a stream into which the data in the input format should be fed. The anchor is
255
passed because hypertxet objects load information into the anchor object which
259
extern HTStream * HTStreamStack PARAMS((
262
HTStream* stream_out,
263
HTParentAnchor* anchor));
266
HTReorderPresentation: put presentation near head of list
268
Look up a presentation (exact match only) and, if found, reorder
269
it to the start of the HTPresentations list. - kw
272
extern void HTReorderPresentation PARAMS((
274
HTFormat format_out));
277
* Setup 'get_accept' flag to denote presentations that are not redundant,
278
* and will be listed in "Accept:" header.
280
extern void HTFilterPresentations NOPARAMS;
284
HTStackValue: Find the cost of a filter stack
286
Must return the cost of the same stack which HTStreamStack would set up.
290
format_in The fomat of the data to be converted
292
format_out The format required
294
initial_value The intrinsic "value" of the data before conversion on a scale
297
length The number of bytes expected in the input format
300
extern float HTStackValue PARAMS((
306
#define NO_VALUE_FOUND -1e20 /* returned if none found */
308
/* Display the page while transfer in progress
309
** -------------------------------------------
311
** Repaint the page only when necessary.
312
** This is a traverse call for HText_pageDispaly() - it works!.
315
extern void HTDisplayPartial NOPARAMS;
317
extern void HTFinishDisplayPartial NOPARAMS;
321
HTCopy: Copy a socket to a stream
323
This is used by the protocol engines to send data down a stream, typically one which
324
has been generated by HTStreamStack.
327
extern int HTCopy PARAMS((
328
HTParentAnchor * anchor,
336
HTFileCopy: Copy a file to a stream
338
This is used by the protocol engines to send data down a stream, typically one which
339
has been generated by HTStreamStack. It is currently called by HTParseFile
342
extern int HTFileCopy PARAMS((
347
#ifdef USE_SOURCE_CACHE
351
HTMemCopy: Copy a memory chunk to a stream
353
This is used by the protocol engines to send data down a stream, typically one which
354
has been generated by HTStreamStack. It is currently called by HTParseMem
357
extern int HTMemCopy PARAMS((
365
HTCopyNoCR: Copy a socket to a stream, stripping CR characters.
367
It is slower than HTCopy .
371
extern void HTCopyNoCR PARAMS((
372
HTParentAnchor * anchor,
379
Clear input buffer and set file number
381
This routine and the one below provide simple character input from sockets. (They are
382
left over from the older architecure and may not be used very much.) The existence of
383
a common routine and buffer saves memory space in small implementations.
386
extern void HTInitInput PARAMS((int file_number));
390
Get next character from buffer
393
extern int interrupted_in_htgetcharacter;
394
extern int HTGetCharacter NOPARAMS;
399
HTParseSocket: Parse a socket given its format
401
This routine is called by protocol modules to load an object. uses HTStreamStack and
402
the copy routines above. Returns HT_LOADED if succesful, <0 if not.
405
extern int HTParseSocket PARAMS((
408
HTParentAnchor *anchor,
414
HTParseFile: Parse a File through a file pointer
416
This routine is called by protocols modules to load an object. uses
417
HTStreamStack and HTFileCopy. Returns HT_LOADED if successful, can also
418
return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure.
421
extern int HTParseFile PARAMS((
424
HTParentAnchor *anchor,
428
#ifdef USE_SOURCE_CACHE
431
HTParseMem: Parse a document in memory
433
This routine is called by protocols modules to load an object. uses
434
HTStreamStack and HTMemCopy. Returns HT_LOADED if successful, can also
435
return <0 for failure.
438
extern int HTParseMem PARAMS((
441
HTParentAnchor *anchor,
449
HTParseGzFile: Parse a gzip'ed File through a file pointer
451
This routine is called by protocols modules to load an object. uses
452
HTStreamStack and HTGzFileCopy. Returns HT_LOADED if successful, can also
453
return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure.
455
extern int HTParseGzFile PARAMS((
458
HTParentAnchor *anchor,
462
#endif /* USE_ZLIB */
467
HTParseBzFile: Parse a bzip2'ed File through a file pointer
469
This routine is called by protocols modules to load an object. uses
470
HTStreamStack and HTGzFileCopy. Returns HT_LOADED if successful, can also
471
return HT_PARTIAL_CONTENT, HT_NO_DATA, or other <0 for failure.
473
extern int HTParseBzFile PARAMS((
476
HTParentAnchor *anchor,
480
#endif /* USE_BZLIB */
484
HTNetToText: Convert Net ASCII to local representation
486
This is a filter stream suitable for taking text from a socket and passing it into a
487
stream which expects text in the local C representation. It does ASCII and newline
488
conversion. As usual, pass its output stream to it when creating it.
491
extern HTStream * HTNetToText PARAMS ((HTStream * sink));
495
HTFormatInit: Set up default presentations and conversions
497
These are defined in HTInit.c or HTSInit.c if these have been replaced. If you don't
498
call this routine, and you don't define any presentations, then this routine will
499
automatically be called the first time a conversion is needed. However, if you
500
explicitly add some conversions (eg using HTLoadRules) then you may want also to
501
explicitly call this to get the defaults as well.
504
extern void HTFormatInit NOPARAMS;
511
extern BOOL HTOutputSource; /* Flag: shortcut parser */
513
#endif /* HTFORMAT_H */