6
makeConnection ( HTTPInputSource *pHttp )
8
struct sockaddr_in sock;
11
/* Get internet address of the host. */
12
if (!(pHost = gethostbyname ( pHttp->pHostName )))
16
/* Copy the address of the host to socket description. */
17
memcpy (&sock.sin_addr, pHost->h_addr, pHost->h_length);
19
/* Set port and protocol */
20
sock.sin_family = AF_INET;
21
sock.sin_port = htons( pHttp->nPort );
23
/* Make an internet socket, stream type. */
24
if ((pHttp->s = socket (AF_INET, SOCK_STREAM, 0)) == -1)
27
/* Connect the socket to the remote host. */
28
if (connect (pHttp->s, (struct sockaddr *) &sock, sizeof( sock )))
30
if (errno == ECONNREFUSED)
39
int parseURL( HTTPInputSource *pHttp, tmbstr url )
44
pStr = tmbsubstr( url, "://" );
46
/* If protocol is there, but not http, bail out, else assume http. */
49
if (tmbstrncasecmp( url, "http://", 7 ))
55
for (i = j; url[i] && url[i] != ':' && url[i] != '/'; i++) {}
59
/* Get the hostname. */
60
pHttp->pHostName = tmbstrndup (&url[j], i - j );
64
/* We have a colon delimiting the hostname. It should mean that
65
a port number is following it */
67
if (isdigit( url[++i] )) /* A port number */
69
for (; url[i] && url[i] != '/'; i++)
71
if (isdigit( url[i] ))
72
pHttp->nPort = 10 * pHttp->nPort + (url[i] - '0');
79
else /* or just a misformed port number */
83
/* Assume default port. */
86
/* skip past the delimiting slash (we'll add it later ) */
87
while (url[i] && url[i] == '/')
89
pHttp->pResource = tmbstrdup (url + i );
94
int fillBuffer( HTTPInputSource *in )
98
in->nBufSize = recv( in->s, in->buffer, sizeof( in->buffer ), 0);
100
if (in->nBufSize < sizeof( in->buffer ))
101
in->buffer[in->nBufSize] = '\0';
109
int openURL( HTTPInputSource *in, tmbstr pUrl )
115
rc = WSAStartup( 514, &wsaData );
118
in->tis.getByte = (TidyGetByteFunc) HTTPGetByte;
119
in->tis.ungetByte = (TidyUngetByteFunc) HTTPUngetByte;
120
in->tis.eof = (TidyEOFFunc) HTTPIsEOF;
121
in->tis.sourceData = (uint) in;
122
in->nextBytePos = in->nextUnGotBytePos = in->nBufSize = 0;
123
parseURL( in, pUrl );
124
if (0 == (rc = makeConnection( in )))
126
char ch, lastCh = '\0';
129
char *getCmd = MemAlloc( 48 + strlen( in->pResource ));
130
sprintf( getCmd, "GET /%s HTTP/1.0\r\nAccept: text/html\r\n\r\n", in->pResource );
131
send( in->s, getCmd, strlen( getCmd ), 0 );
134
/* skip past the header information */
135
while ( in->nextBytePos >= in->nBufSize
136
&& 0 < (rc = fillBuffer( in )))
140
for (; in->nextBytePos < sizeof( in->buffer )
141
&& 0 != in->buffer[ in->nextBytePos ];
144
ch = in->buffer[ in->nextBytePos ];
145
if (ch == '\r' || ch == '\n')
149
/* Two carriage returns or two newlines in a row,
150
that's good enough */
153
if (lastCh == '\r' || lastCh == '\n')
163
/* end of header, scan to first non-white and return */
164
while ('\0' != ch && isspace( ch ))
165
ch = in->buffer[ ++in->nextBytePos ];
175
void closeURL( HTTPInputSource *source )
178
closesocket( source->s );
180
source->tis.sourceData = 0;
187
int HTTPGetByte( HTTPInputSource *source )
189
if (source->nextUnGotBytePos)
190
return source->unGetBuffer[ --source->nextUnGotBytePos ];
191
if (0 != source->nBufSize && source->nextBytePos >= source->nBufSize)
193
fillBuffer( source );
195
if (0 == source->nBufSize)
197
return source->buffer[ source->nextBytePos++ ];
200
void HTTPUngetByte( HTTPInputSource *source, uint byteValue )
202
if (source->nextUnGotBytePos < 16 ) /* Only you can prevent buffer overflows */
203
source->unGetBuffer[ source->nextUnGotBytePos++ ] = (char) byteValue;
206
Bool HTTPIsEOF( HTTPInputSource *source )
208
if (source->nextUnGotBytePos)
209
/* pending ungot bytes, not done */
212
if ( 0 != source->nBufSize
213
&& source->nextBytePos >= source->nBufSize)
214
/* We've consumed the existing buffer, get another */
215
fillBuffer( source );
217
if (source->nextBytePos < source->nBufSize)
218
/* we have stuff in the buffer, must not be done. */
221
/* Nothing in the buffer, and the last receive failed, must be done. */