1
/* Copyright (C) 2002-2003 Jean-Marc Valin
4
Redistribution and use in source and binary forms, with or without
5
modification, are permitted provided that the following conditions
8
- Redistributions of source code must retain the above copyright
9
notice, this list of conditions and the following disclaimer.
11
- Redistributions in binary form must reproduce the above copyright
12
notice, this list of conditions and the following disclaimer in the
13
documentation and/or other materials provided with the distribution.
15
- Neither the name of the Xiph.org Foundation nor the names of its
16
contributors may be used to endorse or promote products derived from
17
this software without specific prior written permission.
19
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
23
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
#if !defined WIN32 && !defined _WIN32
44
#include "speex_header.h"
45
#include "speex_stereo.h"
47
#include "speex_preprocess.h"
49
#if defined WIN32 || defined _WIN32
50
#include "getopt_win.h"
51
/* We need the following two to set stdout to binary */
57
void comment_init(char **comments, int* length, char *vendor_string);
58
void comment_add(char **comments, int* length, char *tag, char *val);
61
/*Write an Ogg page to a file pointer*/
62
int oe_write_page(ogg_page *page, FILE *fp)
65
written = fwrite(page->header,1,page->header_len, fp);
66
written += fwrite(page->body,1,page->body_len, fp);
71
#define MAX_FRAME_SIZE 2000
72
#define MAX_FRAME_BYTES 2000
74
/* Convert input audio bits, endians and channels */
75
static int read_samples(FILE *fin,int frame_size, int bits, int channels, int lsb, short * input, char *buff, int *size)
77
unsigned char in[MAX_FRAME_BYTES*2];
88
*size -= bits/8*channels*frame_size;
93
nb_read = fread(in+12,1,bits/8*channels*frame_size-12, fin) + 12;
97
nb_read = fread(in,1,bits/8*channels* frame_size, fin);
99
nb_read /= bits/8*channels;
101
/*fprintf (stderr, "%d\n", nb_read);*/
108
/* Convert 8->16 bits */
109
for(i=frame_size*channels-1;i>=0;i--)
111
s[i]=(in[i]<<8)^0x8000;
115
/* convert to our endian format */
116
for(i=0;i<frame_size*channels;i++)
125
/* FIXME: This is probably redundent now */
126
/* copy to float input buffer */
127
for (i=0;i<frame_size*channels;i++)
129
input[i]=(short)s[i];
132
for (i=nb_read*channels;i<frame_size*channels;i++)
143
printf ("speexenc (Speex encoder) version " VERSION " (compiled " __DATE__ ")\n");
144
printf ("Copyright (C) 2002-2003 Jean-Marc Valin\n");
149
printf ("speexenc version " VERSION "\n");
150
printf ("Copyright (C) 2002-2003 Jean-Marc Valin\n");
155
printf ("Usage: speexenc [options] input_file output_file\n");
157
printf ("Encodes input_file using Speex. It can read the WAV or raw files.\n");
159
printf ("input_file can be:\n");
160
printf (" filename.wav wav file\n");
161
printf (" filename.* Raw PCM file (any extension other than .wav)\n");
162
printf (" - stdin\n");
164
printf ("output_file can be:\n");
165
printf (" filename.spx Speex file\n");
166
printf (" - stdout\n");
168
printf ("Options:\n");
169
printf (" -n, --narrowband Narrowband (8 kHz) input file\n");
170
printf (" -w, --wideband Wideband (16 kHz) input file\n");
171
printf (" -u, --ultra-wideband \"Ultra-wideband\" (32 kHz) input file\n");
172
printf (" --quality n Encoding quality (0-10), default 8\n");
173
printf (" --bitrate n Encoding bit-rate (use bit-rate n or lower)\n");
174
printf (" --vbr Enable variable bit-rate (VBR)\n");
175
printf (" --abr rate Enable average bit-rate (ABR) at rate bps\n");
176
printf (" --vad Enable voice activity detection (VAD)\n");
177
printf (" --dtx Enable file-based discontinuous transmission (DTX)\n");
178
printf (" --comp n Set encoding complexity (0-10), default 3\n");
179
printf (" --nframes n Number of frames per Ogg packet (1-10), default 1\n");
180
printf (" --denoise Denoise the input before encoding\n");
181
printf (" --agc Apply adaptive gain control (AGC) before encoding\n");
182
printf (" --comment Add the given string as an extra comment. This may be\n");
183
printf (" used multiple times\n");
184
printf (" --author Author of this track\n");
185
printf (" --title Title for this track\n");
186
printf (" -h, --help This help\n");
187
printf (" -v, --version Version information\n");
188
printf (" -V Verbose mode (show bit-rate)\n");
189
printf ("Raw input options:\n");
190
printf (" --rate n Sampling rate for raw input\n");
191
printf (" --stereo Consider raw input as stereo\n");
192
printf (" --le Raw input is little-endian\n");
193
printf (" --be Raw input is big-endian\n");
194
printf (" --8bit Raw input is 8-bit unsigned\n");
195
printf (" --16bit Raw input is 16-bit signed\n");
196
printf ("Default raw PCM input is 16-bit, little-endian, mono\n");
198
printf ("More information is available from the Speex site: http://www.speex.org\n");
200
printf ("Please report bugs to the mailing list `speex-dev@xiph.org'.\n");
204
int main(int argc, char **argv)
207
int option_index = 0;
208
char *inFile, *outFile;
210
short input[MAX_FRAME_SIZE];
218
const SpeexMode *mode=NULL;
221
char cbits[MAX_FRAME_BYTES];
222
struct option long_options[] =
224
{"wideband", no_argument, NULL, 0},
225
{"ultra-wideband", no_argument, NULL, 0},
226
{"narrowband", no_argument, NULL, 0},
227
{"vbr", no_argument, NULL, 0},
228
{"abr", required_argument, NULL, 0},
229
{"vad", no_argument, NULL, 0},
230
{"dtx", no_argument, NULL, 0},
231
{"quality", required_argument, NULL, 0},
232
{"bitrate", required_argument, NULL, 0},
233
{"nframes", required_argument, NULL, 0},
234
{"comp", required_argument, NULL, 0},
235
{"denoise", no_argument, NULL, 0},
236
{"agc", no_argument, NULL, 0},
237
{"help", no_argument, NULL, 0},
238
{"quiet", no_argument, NULL, 0},
239
{"le", no_argument, NULL, 0},
240
{"be", no_argument, NULL, 0},
241
{"8bit", no_argument, NULL, 0},
242
{"16bit", no_argument, NULL, 0},
243
{"stereo", no_argument, NULL, 0},
244
{"rate", required_argument, NULL, 0},
245
{"version", no_argument, NULL, 0},
246
{"version-short", no_argument, NULL, 0},
247
{"comment", required_argument, NULL, 0},
248
{"author", required_argument, NULL, 0},
249
{"title", required_argument, NULL, 0},
257
float vbr_quality=-1;
262
int bytes_written=0, ret, result;
267
char *vendor_string = "Encoded with Speex " VERSION;
270
int close_in=0, close_out=0;
273
double cumul_bits=0, enc_frames=0;
274
char first_bytes[12];
277
SpeexPreprocessState *preprocess = NULL;
278
int denoise_enabled=0, agc_enabled=0;
280
comment_init(&comments, &comments_length, vendor_string);
282
/*Process command-line options*/
285
c = getopt_long (argc, argv, "nwuhvV",
286
long_options, &option_index);
293
if (strcmp(long_options[option_index].name,"narrowband")==0)
296
} else if (strcmp(long_options[option_index].name,"wideband")==0)
299
} else if (strcmp(long_options[option_index].name,"ultra-wideband")==0)
301
mode=&speex_uwb_mode;
302
} else if (strcmp(long_options[option_index].name,"vbr")==0)
305
} else if (strcmp(long_options[option_index].name,"abr")==0)
307
abr_enabled=atoi(optarg);
310
fprintf (stderr, "Invalid ABR value: %d\n", abr_enabled);
313
} else if (strcmp(long_options[option_index].name,"vad")==0)
316
} else if (strcmp(long_options[option_index].name,"dtx")==0)
319
} else if (strcmp(long_options[option_index].name,"quality")==0)
321
quality = atoi (optarg);
322
vbr_quality=atof(optarg);
323
} else if (strcmp(long_options[option_index].name,"bitrate")==0)
325
bitrate = atoi (optarg);
326
} else if (strcmp(long_options[option_index].name,"nframes")==0)
328
nframes = atoi (optarg);
333
} else if (strcmp(long_options[option_index].name,"comp")==0)
335
complexity = atoi (optarg);
336
} else if (strcmp(long_options[option_index].name,"denoise")==0)
339
} else if (strcmp(long_options[option_index].name,"agc")==0)
342
} else if (strcmp(long_options[option_index].name,"help")==0)
346
} else if (strcmp(long_options[option_index].name,"quiet")==0)
349
} else if (strcmp(long_options[option_index].name,"version")==0)
353
} else if (strcmp(long_options[option_index].name,"version-short")==0)
357
} else if (strcmp(long_options[option_index].name,"le")==0)
360
} else if (strcmp(long_options[option_index].name,"be")==0)
363
} else if (strcmp(long_options[option_index].name,"8bit")==0)
366
} else if (strcmp(long_options[option_index].name,"16bit")==0)
369
} else if (strcmp(long_options[option_index].name,"stereo")==0)
372
} else if (strcmp(long_options[option_index].name,"rate")==0)
375
} else if (strcmp(long_options[option_index].name,"comment")==0)
377
comment_add(&comments, &comments_length, NULL, optarg);
378
} else if (strcmp(long_options[option_index].name,"author")==0)
380
comment_add(&comments, &comments_length, "author=", optarg);
381
} else if (strcmp(long_options[option_index].name,"title")==0)
383
comment_add(&comments, &comments_length, "title=", optarg);
405
mode=&speex_uwb_mode;
419
outFile=argv[optind+1];
421
/*Initialize Ogg stream struct*/
423
if (ogg_stream_init(&os, rand())==-1)
425
fprintf(stderr,"Error: stream init failed\n");
429
if (strcmp(inFile, "-")==0)
431
#if defined WIN32 || defined _WIN32
432
_setmode(_fileno(stdin), _O_BINARY);
438
#if defined WIN32 || defined _WIN32
439
fin = fopen(inFile, "rb");
441
fin = fopen(inFile, "r");
452
fread(first_bytes, 1, 12, fin);
453
if (strncmp(first_bytes,"RIFF",4)==0 && strncmp(first_bytes,"RIFF",4)==0)
455
if (read_wav_header(fin, &rate, &chan, &fmt, &size)==-1)
458
lsb=1; /* CHECK: exists big-endian .wav ?? */
464
/* By default, use narrowband/8 kHz */
467
} else if (mode && rate)
471
fprintf (stderr, "Error: sampling rate too high: %d Hz, try down-sampling\n", rate);
473
} else if (rate>25000)
475
if (mode!=&speex_uwb_mode)
477
fprintf (stderr, "Warning: Trying to encode in %s at %d Hz. I'll do it but I suggest you try ultra-wideband instead\n", mode->modeName , rate);
479
} else if (rate>12500)
481
if (mode!=&speex_wb_mode)
483
fprintf (stderr, "Warning: Trying to encode in %s at %d Hz. I'll do it but I suggest you try wideband instead\n", mode->modeName , rate);
485
} else if (rate>=6000)
487
if (mode!=&speex_nb_mode)
489
fprintf (stderr, "Warning: Trying to encode in %s at %d Hz. I'll do it but I suggest you try narrowband instead\n", mode->modeName , rate);
492
fprintf (stderr, "Error: sampling rate too low: %d Hz\n", rate);
499
fprintf (stderr, "Error: sampling rate too high: %d Hz, try down-sampling\n", rate);
501
} else if (rate>25000)
503
mode=&speex_uwb_mode;
504
} else if (rate>12500)
507
} else if (rate>=6000)
511
fprintf (stderr, "Error: Sampling rate too low: %d Hz\n", rate);
516
if (mode==&speex_nb_mode)
518
else if (mode==&speex_wb_mode)
520
else if (mode==&speex_uwb_mode)
525
if (rate!=8000 && rate!=16000 && rate!=32000)
526
fprintf (stderr, "Warning: Speex is only optimized for 8, 16 and 32 kHz. It will still work at %d Hz but your mileage may vary\n", rate);
528
speex_init_header(&header, rate, 1, mode);
529
header.frames_per_packet=nframes;
530
header.vbr=vbr_enabled;
531
header.nb_channels = chan;
534
char *st_string="mono";
538
fprintf (stderr, "Encoding %d Hz audio using %s mode (%s)\n",
539
header.rate, mode->modeName, st_string);
541
/*fprintf (stderr, "Encoding %d Hz audio at %d bps using %s mode\n",
542
header.rate, mode->bitrate, mode->modeName);*/
544
/*Initialize Speex encoder*/
545
st = speex_encoder_init(mode);
547
if (strcmp(outFile,"-")==0)
549
#if defined WIN32 || defined _WIN32
550
_setmode(_fileno(stdout), _O_BINARY);
556
#if defined WIN32 || defined _WIN32
557
fout = fopen(outFile, "wb");
559
fout = fopen(outFile, "w");
570
/*Write header (format will change)*/
573
op.packet = (unsigned char *)speex_header_to_packet(&header, (int*)&(op.bytes));
578
ogg_stream_packetin(&os, &op);
581
op.packet = (unsigned char *)comments;
582
op.bytes = comments_length;
587
ogg_stream_packetin(&os, &op);
589
while((result = ogg_stream_flush(&os, &og)))
592
ret = oe_write_page(&og, fout);
593
if(ret != og.header_len + og.body_len)
595
fprintf (stderr,"Error: failed writing header to output stream\n");
599
bytes_written += ret;
605
speex_encoder_ctl(st, SPEEX_GET_FRAME_SIZE, &frame_size);
606
speex_encoder_ctl(st, SPEEX_SET_COMPLEXITY, &complexity);
607
speex_encoder_ctl(st, SPEEX_SET_SAMPLING_RATE, &rate);
612
speex_encoder_ctl(st, SPEEX_SET_VBR_QUALITY, &vbr_quality);
614
speex_encoder_ctl(st, SPEEX_SET_QUALITY, &quality);
618
if (quality >= 0 && vbr_enabled)
619
fprintf (stderr, "Warning: --bitrate option is overriding --quality\n");
620
speex_encoder_ctl(st, SPEEX_SET_BITRATE, &bitrate);
625
speex_encoder_ctl(st, SPEEX_SET_VBR, &tmp);
626
} else if (vad_enabled)
629
speex_encoder_ctl(st, SPEEX_SET_VAD, &tmp);
632
speex_encoder_ctl(st, SPEEX_SET_DTX, &tmp);
633
if (dtx_enabled && !(vbr_enabled || abr_enabled || vad_enabled))
635
fprintf (stderr, "Warning: --dtx is useless without --vad, --vbr or --abr\n");
636
} else if ((vbr_enabled || abr_enabled) && (vad_enabled))
638
fprintf (stderr, "Warning: --vad is already implied by --vbr or --abr\n");
643
speex_encoder_ctl(st, SPEEX_SET_ABR, &abr_enabled);
646
if (denoise_enabled || agc_enabled)
648
preprocess = speex_preprocess_state_init(frame_size, rate);
649
speex_preprocess_ctl(preprocess, SPEEX_PREPROCESS_SET_DENOISE, &denoise_enabled);
650
speex_preprocess_ctl(preprocess, SPEEX_PREPROCESS_SET_AGC, &agc_enabled);
653
speex_bits_init(&bits);
657
if (read_samples(fin,frame_size,fmt,chan,lsb,input, first_bytes, NULL))
660
if (read_samples(fin,frame_size,fmt,chan,lsb,input, NULL, &size))
663
/*Main encoding loop (one frame per iteration)*/
667
/*Encode current frame*/
669
speex_encode_stereo(input, frame_size, &bits);
672
speex_preprocess(preprocess, input, NULL);
674
speex_encode(st, input, &bits);
679
speex_encoder_ctl(st, SPEEX_GET_BITRATE, &tmp);
685
if (vad_enabled || vbr_enabled || abr_enabled)
686
fprintf (stderr, "Bitrate is use: %d bps (average %d bps) ", tmp, (int)(cumul_bits/enc_frames));
688
fprintf (stderr, "Bitrate is use: %d bps ", tmp);
695
if (read_samples(fin,frame_size,fmt,chan,lsb,input, NULL, &size))
701
if (read_samples(fin,frame_size,fmt,chan,lsb,input, NULL, NULL))
708
if ((id+1)%nframes!=0)
710
speex_bits_insert_terminator(&bits);
711
nbBytes = speex_bits_write(&bits, cbits, MAX_FRAME_BYTES);
712
speex_bits_reset(&bits);
713
op.packet = (unsigned char *)cbits;
720
op.granulepos = (id+nframes)*frame_size;
721
op.packetno = 2+id/nframes;
722
ogg_stream_packetin(&os, &op);
724
/*Write all new pages (most likely 0 or 1)*/
725
while (ogg_stream_pageout(&os,&og))
727
ret = oe_write_page(&og, fout);
728
if(ret != og.header_len + og.body_len)
730
fprintf (stderr,"Error: failed writing header to output stream\n");
734
bytes_written += ret;
737
if ((id+1)%nframes!=0)
739
while ((id+1)%nframes!=0)
742
speex_bits_pack(&bits, 15, 5);
744
nbBytes = speex_bits_write(&bits, cbits, MAX_FRAME_BYTES);
745
op.packet = (unsigned char *)cbits;
749
op.granulepos = (id+nframes)*frame_size;
750
op.packetno = 2+id/nframes;
751
ogg_stream_packetin(&os, &op);
753
/*Flush all pages left to be written*/
754
while (ogg_stream_flush(&os, &og))
756
ret = oe_write_page(&og, fout);
757
if(ret != og.header_len + og.body_len)
759
fprintf (stderr,"Error: failed writing header to output stream\n");
763
bytes_written += ret;
767
speex_encoder_destroy(st);
768
speex_bits_destroy(&bits);
769
ogg_stream_clear(&os);
779
Comments will be stored in the Vorbis style.
780
It is describled in the "Structure" section of
781
http://www.xiph.org/ogg/vorbis/doc/v-comment.html
783
The comment header is decoded as follows:
784
1) [vendor_length] = read an unsigned integer of 32 bits
785
2) [vendor_string] = read a UTF-8 vector as [vendor_length] octets
786
3) [user_comment_list_length] = read an unsigned integer of 32 bits
787
4) iterate [user_comment_list_length] times {
788
5) [length] = read an unsigned integer of 32 bits
789
6) this iteration's user comment = read a UTF-8 vector as [length] octets
791
7) [framing_bit] = read a single bit as boolean
792
8) if ( [framing_bit] unset or end of packet ) then ERROR
795
If you have troubles, please write to ymnk@jcraft.com.
798
#define readint(buf, base) (((buf[base+3]<<24)&0xff000000)| \
799
((buf[base+2]<<16)&0xff0000)| \
800
((buf[base+1]<<8)&0xff00)| \
802
#define writeint(buf, base, val) do{ buf[base+3]=((val)>>24)&0xff; \
803
buf[base+2]=((val)>>16)&0xff; \
804
buf[base+1]=((val)>>8)&0xff; \
805
buf[base]=(val)&0xff; \
808
void comment_init(char **comments, int* length, char *vendor_string)
810
int vendor_length=strlen(vendor_string);
811
int user_comment_list_length=0;
812
int len=4+vendor_length+4;
813
char *p=(char*)malloc(len);
816
writeint(p, 0, vendor_length);
817
memcpy(p+4, vendor_string, vendor_length);
818
writeint(p, 4+vendor_length, user_comment_list_length);
822
void comment_add(char **comments, int* length, char *tag, char *val)
825
int vendor_length=readint(p, 0);
826
int user_comment_list_length=readint(p, 4+vendor_length);
827
int tag_len=(tag?strlen(tag):0);
828
int val_len=strlen(val);
829
int len=(*length)+4+tag_len+val_len;
831
p=(char*)realloc(p, len);
835
writeint(p, *length, tag_len+val_len); /* length of comment */
836
if(tag) memcpy(p+*length+4, tag, tag_len); /* comment */
837
memcpy(p+*length+4+tag_len, val, val_len); /* comment */
838
writeint(p, 4+vendor_length, user_comment_list_length+1);