diff --git a/README.md b/README.md
index a6db722..2c55a4d 100644
--- a/README.md
+++ b/README.md
@@ -127,6 +127,7 @@ typedef struct xcparams_t {
     int         extract_images_sz;          // Size of the array extract_images_ts
 
     int         video_time_base;            // New video encoder time_base (1/video_time_base)
+    int         video_frame_duration_ts;    // Frame duration of the output video in time base
 
     int         debug_frame_level;
     int         connection_timeout;         // Connection timeout in sec for RTMP or MPEGTS protocols
@@ -143,7 +144,7 @@ typedef struct xcparams_t {
   - If xc_type=xc_audio_join then avpipe library creates an audio join filter graph and joins the selected input audio streams to produce a joint audio stream.
   - If xc_type=xc_audio_pan then avpipe library creates an audio pan filter graph to pan multiple channels in one input stream to one output stereo stream.
 - **Specifying decoder/encoder:** the ecodec/decodec params are used to set video encoder/decoder. Also ecodec2/decodec2 params are used to set audio encoder/decoder. For video the decoder can be one of "h264", "h264_cuvid", "jpeg2000", "hevc" and encoder can be "libx264", "libx265", "h264_nvenc", "h264_videotoolbox", or "mjpeg". For audio the decoder can be “aac” or “ac3” and the encoder can be "aac", "ac3", "mp2" or "mp3".
-- **Joining/merging multiple audio:** avpipe library has the capability to join and pan multiple audio input streams by setting xc_type parameter to xc_audio_join and xc_audio_pan respectively (merging multiple audio is not complete yet).
+- **Transcoding multiple audio:** avpipe library has the capability to transcode one or multiple audio streams at the same time. The _audio_index_ array includes the audio index of the streams that will be transcoded. The parameter _n_audio_ determines the number of audio indexes in the _audio_index_ array.
 - **Using GPU:** avpipe library can utilize NVIDIA cards for transcoding. In order to utilize the NVIDIA GPU, the gpu_index must be set (the default is using GPU with index 0). To find the existing GPU indexes on a machine, nvidia-smi command can be used. In addition, the decoder and encoder should be set to "h264_cuvid" or "h264_nvenc" respectively. And finally, in order to pick the correct GPU index the following environment variable must be set “CUDA_DEVICE_ORDER=PCI_BUS_ID” before running the program.
 - **Text watermarking:** this can be done with setting watermark_text, watermark_xloc, watermark_yloc, watermark_relative_sz, and watermark_font_color while transcoding a video (xc_type=xc_video), which makes specified watermark text to appear at specified location.
 - **Image watermarking:** this can be done with setting watermark_overlay (the buffer containing overlay image), watermark_overlay_len, watermark_xloc, and watermark_yloc while transcoding a video (xc_type=xc_video).
@@ -157,7 +158,8 @@ typedef struct xcparams_t {
   - setting xc_type = xc_audio_join would join 2 or more audio inputs and create a new audio output (for example joining two mono streams and creating one stereo).
   - setting xc_type = xc_audio_pan would pick different audio channels from input and create a new audio stream (for example picking different channels from a 5.1 channel layout and producing a stereo containing two channels).
   - setting xc_type = xc_audio_merge would merge different input audio streams and produce a new multi-channel output stream (for example, merging different input mono streams and create a new 5.1)
-- **Setting video timebase:** setting video_time_base will set the timebase of generated video to 1/video_time_base (the timebase has to be bigger than 10000). 
+- **Setting video timebase:** setting _video_time_base_ will set the timebase of generated video to 1/video_time_base (the timebase has to be bigger than 10000).
+- **Video frame duration:** the parameter _video_frame_duration_ts_ can be used to set the duration of each video frame with the specified timebase for output video. This along with video_time_base can be used to normalize the video frames and their duration. For example, for a stream with 60 fps and _video_frame_duration_ts_ equal to 256, the _video_time_base_ would be 15360. As another example, for a 59.94 fps, the _video_frame_duration_ts_ can be 1001 and _video_time_base_ would be 60000. In this case a segment of 1800 frames would be 1801800 timebase long.
 - **Debugging with frames:** if the parameter debug_frame_level is on then the logs will also include very low level debug messages to trace reading/writing every piece of data.
 - **Connection timeout:** This parameter is useful when recording / transcoding RTMP or MPEGTS streams. If avpipe is listening for an RTMP stream, connection_timeout determines the time in sec to listen for an incoming RTMP stream. If avpipe is listening for incoming UDP MPEGTS packets, connection_timeout determines the time in sec to wait for the first incoming UDP packet (if no packet is received during connection_timeout, then timeout would happen and an error would be generated).
 
diff --git a/avpipe.c b/avpipe.c
index a1d27e8..771612c 100644
--- a/avpipe.c
+++ b/avpipe.c
@@ -48,6 +48,7 @@ typedef struct udp_thread_params_t {
 static int
 out_stat(
     void *opaque,
+    int stream_index,
     avp_stat_t stat_type);
 
 int64_t AVPipeOpenInput(char *, int64_t *);
@@ -55,7 +56,7 @@ int64_t AVPipeOpenMuxInput(char *, char *, int64_t *);
 int     AVPipeReadInput(int64_t, uint8_t *, int);
 int64_t AVPipeSeekInput(int64_t, int64_t, int);
 int     AVPipeCloseInput(int64_t);
-int     AVPipeStatInput(int64_t, avp_stat_t, void *);
+int     AVPipeStatInput(int64_t, int, avp_stat_t, void *);
 int64_t AVPipeOpenOutput(int64_t, int, int, int64_t, int);
 int64_t AVPipeOpenMuxOutput(char *, int);
 int     AVPipeWriteOutput(int64_t, int64_t, uint8_t *, int);
@@ -64,8 +65,8 @@ int     AVPipeSeekOutput(int64_t, int64_t, int64_t, int);
 int     AVPipeSeekMuxOutput(int64_t, int64_t, int);
 int     AVPipeCloseOutput(int64_t, int64_t);
 int     AVPipeCloseMuxOutput(int64_t);
-int     AVPipeStatOutput(int64_t, int64_t, avpipe_buftype_t, avp_stat_t, void *);
-int     AVPipeStatMuxOutput(int64_t, avp_stat_t, void *);
+int     AVPipeStatOutput(int64_t, int64_t, int, avpipe_buftype_t, avp_stat_t, void *);
+int     AVPipeStatMuxOutput(int64_t, int, avp_stat_t, void *);
 int     CLog(char *);
 int     CDebug(char *);
 int     CInfo(char *);
@@ -87,6 +88,7 @@ static pthread_mutex_t tx_mutex = PTHREAD_MUTEX_INITIALIZER;
 static int
 in_stat(
     void *opaque,
+    int stream_index,
     avp_stat_t stat_type);
 
 static int
@@ -182,7 +184,8 @@ in_read_packet(
         inctx->read_pos += r;
 
         if (inctx->read_bytes - inctx->read_reported > BYTES_READ_REPORT) {
-            in_stat(opaque, in_stat_bytes_read);
+            /* Pass stream_index 0 (stream_index has no meaning for in_stat_bytes_read) */
+            in_stat(opaque, 0, in_stat_bytes_read);
             inctx->read_reported = inctx->read_bytes;
         }
     }
@@ -254,6 +257,7 @@ in_seek(
 static int
 in_stat(
     void *opaque,
+    int stream_index,
     avp_stat_t stat_type)
 {
     int64_t fd;
@@ -268,24 +272,24 @@ in_stat(
 
     switch (stat_type) {
     case in_stat_bytes_read:
-        rc = AVPipeStatInput(fd, stat_type, &c->read_bytes);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, &c->read_bytes);
         break;
 
     case in_stat_decoding_audio_start_pts:
     case in_stat_decoding_video_start_pts:
-        rc = AVPipeStatInput(fd, stat_type, &c->decoding_start_pts);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, &c->decoding_start_pts);
         break;
 
     case in_stat_audio_frame_read:
-        rc = AVPipeStatInput(fd, stat_type, &c->audio_frames_read);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, &c->audio_frames_read);
         break;
 
     case in_stat_video_frame_read:
-        rc = AVPipeStatInput(fd, stat_type, &c->video_frames_read);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, &c->video_frames_read);
         break;
 
     case in_stat_first_keyframe_pts:
-        rc = AVPipeStatInput(fd, stat_type, &c->first_key_frame_pts);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, &c->first_key_frame_pts);
         break;
 
     default:
@@ -514,6 +518,7 @@ udp_in_seek(
 static int
 udp_in_stat(
     void *opaque,
+    int stream_index,
     avp_stat_t stat_type)
 {
     int64_t fd;
@@ -534,32 +539,32 @@ udp_in_stat(
     case in_stat_decoding_audio_start_pts:
         if (debug_frame_level)
             elv_dbg("IN STAT UDP fd=%d, audio start PTS=%"PRId64", url=%s", fd, c->decoding_start_pts, c->url);
-        rc = AVPipeStatInput(fd, stat_type, &c->decoding_start_pts);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, &c->decoding_start_pts);
         break;
     case in_stat_decoding_video_start_pts:
         if (debug_frame_level)
             elv_dbg("IN STAT UDP fd=%d, video start PTS=%"PRId64", url=%s", fd, c->decoding_start_pts, c->url);
-        rc = AVPipeStatInput(fd, stat_type, &c->decoding_start_pts);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, &c->decoding_start_pts);
         break;
     case in_stat_audio_frame_read:
         if (debug_frame_level)
             elv_dbg("IN STAT UDP fd=%d, audio frame read=%"PRId64", url=%s", fd, c->audio_frames_read, c->url);
-        rc = AVPipeStatInput(fd, stat_type, &c->audio_frames_read);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, &c->audio_frames_read);
         break;
     case in_stat_video_frame_read:
         if (debug_frame_level)
             elv_dbg("IN STAT UDP fd=%d, video frame read=%"PRId64", url=%s", fd, c->video_frames_read, c->url);
-        rc = AVPipeStatInput(fd, stat_type, &c->video_frames_read);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, &c->video_frames_read);
         break;
     case in_stat_first_keyframe_pts:
         if (debug_frame_level)
             elv_dbg("IN STAT UDP fd=%d, first keyframe PTS=%"PRId64", url=%s", fd, c->first_key_frame_pts, c->url);
-        rc = AVPipeStatInput(fd, stat_type, &c->first_key_frame_pts);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, &c->first_key_frame_pts);
         break;
     case in_stat_data_scte35:
         if (debug_frame_level)
             elv_dbg("IN STAT UDP SCTE35 fd=%d, stat_type=%d, url=%s", fd, stat_type, c->url);
-        rc = AVPipeStatInput(fd, stat_type, c->data);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, c->data);
         break;
     default:
         elv_err("IN STAT UDP fd=%d, invalid input stat=%d, url=%s", stat_type, c->url);
@@ -634,13 +639,13 @@ out_write_packet(
         outctx->written_bytes - outctx->write_reported > VIDEO_BYTES_WRITE_REPORT) ||
         (outctx->type == avpipe_audio_fmp4_segment &&
         outctx->written_bytes - outctx->write_reported > AUDIO_BYTES_WRITE_REPORT)) {
-        out_stat(opaque, out_stat_bytes_written);
+        out_stat(opaque, outctx->stream_index, out_stat_bytes_written);
         outctx->write_reported = outctx->written_bytes;
     }
 
     if (xcparams && xcparams->debug_frame_level)
-        elv_dbg("OUT WRITE fd=%"PRId64", size=%d written=%d pos=%d total=%d",
-            fd, buf_size, bwritten, outctx->write_pos, outctx->written_bytes);
+        elv_dbg("OUT WRITE stream_index=%d, fd=%"PRId64", size=%d written=%d pos=%d total=%d",
+            outctx->stream_index, fd, buf_size, bwritten, outctx->write_pos, outctx->written_bytes);
 
     return buf_size;
 }
@@ -691,6 +696,7 @@ out_closer(
 static int
 out_stat(
     void *opaque,
+    int stream_index,
     avp_stat_t stat_type)
 {
     ioctx_t *outctx = (ioctx_t *)opaque;
@@ -711,14 +717,14 @@ out_stat(
     fd = *((int64_t *)(outctx->opaque));
     switch (stat_type) {
     case out_stat_bytes_written:
-        rc = AVPipeStatOutput(h, fd, buftype, stat_type, &outctx->written_bytes);
+        rc = AVPipeStatOutput(h, fd, stream_index, buftype, stat_type, &outctx->written_bytes);
         break;
     case out_stat_encoding_end_pts:
         if (buftype == avpipe_audio_segment ||
             buftype == avpipe_audio_fmp4_segment)
-            rc = AVPipeStatOutput(h, fd, buftype, stat_type, &outctx->encoder_ctx->audio_last_pts_sent_encode);
+            rc = AVPipeStatOutput(h, fd, stream_index, buftype, stat_type, &outctx->encoder_ctx->audio_last_pts_sent_encode);
         else
-            rc = AVPipeStatOutput(h, fd, buftype, stat_type, &outctx->encoder_ctx->video_last_pts_sent_encode);
+            rc = AVPipeStatOutput(h, fd, stream_index, buftype, stat_type, &outctx->encoder_ctx->video_last_pts_sent_encode);
         break;
     case out_stat_frame_written:
         {
@@ -726,7 +732,7 @@ out_stat(
                 .total_frames_written = outctx->total_frames_written,
                 .frames_written = outctx->frames_written,
             };
-            rc = AVPipeStatOutput(h, fd, buftype, stat_type, &encoding_frame_stats);
+            rc = AVPipeStatOutput(h, fd, stream_index, buftype, stat_type, &encoding_frame_stats);
         }
         break;
     default:
@@ -1216,7 +1222,8 @@ in_mux_read_packet(
     }
 
     if (c->read_bytes - c->read_reported > BYTES_READ_REPORT) {
-        in_stat(opaque, in_stat_bytes_read);
+        /* Pass stream_index 0 (stream_index has no meaning for in_stat_bytes_read) */
+        in_stat(opaque, 0, in_stat_bytes_read);
         c->read_reported = c->read_bytes;
     }
 
@@ -1319,6 +1326,7 @@ out_mux_seek(
 static int
 out_mux_stat(
     void *opaque,
+    int stream_index,
     avp_stat_t stat_type)
 {
     ioctx_t *outctx = (ioctx_t *)opaque;
@@ -1328,10 +1336,10 @@ out_mux_stat(
 
     switch (stat_type) {
     case out_stat_bytes_written:
-        rc = AVPipeStatMuxOutput(fd, stat_type, &outctx->written_bytes);
+        rc = AVPipeStatMuxOutput(fd, stream_index, stat_type, &outctx->written_bytes);
         break;
     case out_stat_encoding_end_pts:
-        rc = AVPipeStatMuxOutput(fd, stat_type, &outctx->encoder_ctx->video_last_pts_sent_encode);
+        rc = AVPipeStatMuxOutput(fd, stream_index, stat_type, &outctx->encoder_ctx->video_last_pts_sent_encode);
         break;
     default:
         break;
diff --git a/avpipe.go b/avpipe.go
index c1bd68c..e36a22e 100644
--- a/avpipe.go
+++ b/avpipe.go
@@ -172,7 +172,7 @@ const (
 	CryptCBCS
 )
 
-const MaxAudioMux = C.MAX_AUDIO_MUX
+const MaxAudioMux = C.MAX_STREAMS
 
 // XcParams should match with txparams_t in avpipe_xc.h
 type XcParams struct {
@@ -228,7 +228,6 @@ type XcParams struct {
 	MaxCLL                 string             `json:"max_cll,omitempty"`
 	MasterDisplay          string             `json:"master_display,omitempty"`
 	BitDepth               int32              `json:"bitdepth,omitempty"`
-	AudioFillGap           bool               `json:"audio_fill_gap,omitempty"`
 	SyncAudioToStreamId    int                `json:"sync_audio_to_stream_id"`
 	ForceEqualFDuration    bool               `json:"force_equal_frame_duration,omitempty"`
 	MuxingSpec             string             `json:"muxing_spec,omitempty"`
@@ -377,11 +376,11 @@ type IOHandler interface {
 	InReader(buf []byte) (int, error)
 	InSeeker(offset C.int64_t, whence C.int) error
 	InCloser() error
-	InStat(avp_stat C.avp_stat_t, stat_args *C.void) error
+	InStat(stream_index C.int, avp_stat C.avp_stat_t, stat_args *C.void) error
 	OutWriter(fd C.int, buf []byte) (int, error)
 	OutSeeker(fd C.int, offset C.int64_t, whence C.int) (int64, error)
 	OutCloser(fd C.int) error
-	OutStat(avp_stat C.avp_stat_t, stat_args *C.void) error
+	OutStat(stream_index C.int, avp_stat C.avp_stat_t, stat_args *C.void) error
 }
 
 type InputOpener interface {
@@ -405,7 +404,7 @@ type InputHandler interface {
 	Size() int64
 
 	// Reports some stats
-	Stat(statType AVStatType, statArgs interface{}) error
+	Stat(streamIndex int, statType AVStatType, statArgs interface{}) error
 }
 
 type OutputOpener interface {
@@ -430,7 +429,7 @@ type OutputHandler interface {
 	Close() error
 
 	// Reports some stats
-	Stat(avType AVType, statType AVStatType, statArgs interface{}) error
+	Stat(streamIndex int, avType AVType, statType AVStatType, statArgs interface{}) error
 }
 
 // Implement IOHandler
@@ -711,7 +710,7 @@ func (h *ioHandler) InCloser() error {
 }
 
 //export AVPipeStatInput
-func AVPipeStatInput(fd C.int64_t, avp_stat C.avp_stat_t, stat_args unsafe.Pointer) C.int {
+func AVPipeStatInput(fd C.int64_t, stream_index C.int, avp_stat C.avp_stat_t, stat_args unsafe.Pointer) C.int {
 	gMutex.Lock()
 	h := gHandlers[int64(fd)]
 	if h == nil {
@@ -720,7 +719,7 @@ func AVPipeStatInput(fd C.int64_t, avp_stat C.avp_stat_t, stat_args unsafe.Point
 	}
 	gMutex.Unlock()
 
-	err := h.InStat(avp_stat, stat_args)
+	err := h.InStat(stream_index, avp_stat, stat_args)
 	if err != nil {
 		return C.int(-1)
 	}
@@ -728,31 +727,32 @@ func AVPipeStatInput(fd C.int64_t, avp_stat C.avp_stat_t, stat_args unsafe.Point
 	return C.int(0)
 }
 
-func (h *ioHandler) InStat(avp_stat C.avp_stat_t, stat_args unsafe.Pointer) error {
+func (h *ioHandler) InStat(stream_index C.int, avp_stat C.avp_stat_t, stat_args unsafe.Pointer) error {
 	var err error
 
+	streamIndex := (int)(stream_index)
 	switch avp_stat {
 	case C.in_stat_bytes_read:
 		statArgs := *(*uint64)(stat_args)
-		err = h.input.Stat(AV_IN_STAT_BYTES_READ, &statArgs)
+		err = h.input.Stat(streamIndex, AV_IN_STAT_BYTES_READ, &statArgs)
 	case C.in_stat_decoding_audio_start_pts:
 		statArgs := *(*uint64)(stat_args)
-		err = h.input.Stat(AV_IN_STAT_DECODING_AUDIO_START_PTS, &statArgs)
+		err = h.input.Stat(streamIndex, AV_IN_STAT_DECODING_AUDIO_START_PTS, &statArgs)
 	case C.in_stat_decoding_video_start_pts:
 		statArgs := *(*uint64)(stat_args)
-		err = h.input.Stat(AV_IN_STAT_DECODING_VIDEO_START_PTS, &statArgs)
+		err = h.input.Stat(streamIndex, AV_IN_STAT_DECODING_VIDEO_START_PTS, &statArgs)
 	case C.in_stat_audio_frame_read:
 		statArgs := *(*uint64)(stat_args)
-		err = h.input.Stat(AV_IN_STAT_AUDIO_FRAME_READ, &statArgs)
+		err = h.input.Stat(streamIndex, AV_IN_STAT_AUDIO_FRAME_READ, &statArgs)
 	case C.in_stat_video_frame_read:
 		statArgs := *(*uint64)(stat_args)
-		err = h.input.Stat(AV_IN_STAT_VIDEO_FRAME_READ, &statArgs)
+		err = h.input.Stat(streamIndex, AV_IN_STAT_VIDEO_FRAME_READ, &statArgs)
 	case C.in_stat_first_keyframe_pts:
 		statArgs := *(*uint64)(stat_args)
-		err = h.input.Stat(AV_IN_STAT_FIRST_KEYFRAME_PTS, &statArgs)
+		err = h.input.Stat(streamIndex, AV_IN_STAT_FIRST_KEYFRAME_PTS, &statArgs)
 	case C.in_stat_data_scte35:
 		statArgs := C.GoString((*C.char)(stat_args))
-		err = h.input.Stat(AV_IN_STAT_DATA_SCTE35, statArgs)
+		err = h.input.Stat(streamIndex, AV_IN_STAT_DATA_SCTE35, statArgs)
 	}
 
 	return err
@@ -1048,6 +1048,7 @@ func (h *ioHandler) OutCloser(fd C.int64_t) error {
 //export AVPipeStatOutput
 func AVPipeStatOutput(handler C.int64_t,
 	fd C.int64_t,
+	stream_index C.int,
 	buf_type C.avpipe_buftype_t,
 	avp_stat C.avp_stat_t,
 	stat_args unsafe.Pointer) C.int {
@@ -1060,7 +1061,7 @@ func AVPipeStatOutput(handler C.int64_t,
 	}
 	gMutex.Unlock()
 
-	err := h.OutStat(fd, buf_type, avp_stat, stat_args)
+	err := h.OutStat(fd, stream_index, buf_type, avp_stat, stat_args)
 	if err != nil {
 		return C.int(-1)
 	}
@@ -1069,7 +1070,7 @@ func AVPipeStatOutput(handler C.int64_t,
 }
 
 //export AVPipeStatMuxOutput
-func AVPipeStatMuxOutput(fd C.int64_t, avp_stat C.avp_stat_t, stat_args unsafe.Pointer) C.int {
+func AVPipeStatMuxOutput(fd C.int64_t, stream_index C.int, avp_stat C.avp_stat_t, stat_args unsafe.Pointer) C.int {
 	gMutex.Lock()
 	outHandler := gMuxHandlers[int64(fd)]
 	if outHandler == nil {
@@ -1078,14 +1079,15 @@ func AVPipeStatMuxOutput(fd C.int64_t, avp_stat C.avp_stat_t, stat_args unsafe.P
 	}
 	gMutex.Unlock()
 
+	streamIndex := (int)(stream_index)
 	var err error
 	switch avp_stat {
 	case C.out_stat_bytes_written:
 		statArgs := *(*uint64)(stat_args)
-		err = outHandler.Stat(MuxSegment, AV_OUT_STAT_BYTES_WRITTEN, &statArgs)
+		err = outHandler.Stat(streamIndex, MuxSegment, AV_OUT_STAT_BYTES_WRITTEN, &statArgs)
 	case C.out_stat_encoding_end_pts:
 		statArgs := *(*uint64)(stat_args)
-		err = outHandler.Stat(MuxSegment, AV_OUT_STAT_ENCODING_END_PTS, &statArgs)
+		err = outHandler.Stat(streamIndex, MuxSegment, AV_OUT_STAT_ENCODING_END_PTS, &statArgs)
 	}
 
 	if err != nil {
@@ -1101,6 +1103,7 @@ type EncodingFrameStats struct {
 }
 
 func (h *ioHandler) OutStat(fd C.int64_t,
+	stream_index C.int,
 	av_type C.avpipe_buftype_t,
 	avp_stat C.avp_stat_t,
 	stat_args unsafe.Pointer) error {
@@ -1111,21 +1114,22 @@ func (h *ioHandler) OutStat(fd C.int64_t,
 		return fmt.Errorf("OutStat nil handler, fd=%d", int64(fd))
 	}
 
+	streamIndex := (int)(stream_index)
 	avType := getAVType(C.int(av_type))
 	switch avp_stat {
 	case C.out_stat_bytes_written:
 		statArgs := *(*uint64)(stat_args)
-		err = outHandler.Stat(avType, AV_OUT_STAT_BYTES_WRITTEN, &statArgs)
+		err = outHandler.Stat(streamIndex, avType, AV_OUT_STAT_BYTES_WRITTEN, &statArgs)
 	case C.out_stat_encoding_end_pts:
 		statArgs := *(*uint64)(stat_args)
-		err = outHandler.Stat(avType, AV_OUT_STAT_ENCODING_END_PTS, &statArgs)
+		err = outHandler.Stat(streamIndex, avType, AV_OUT_STAT_ENCODING_END_PTS, &statArgs)
 	case C.out_stat_frame_written:
 		encodingFramesStats := (*C.encoding_frame_stats_t)(stat_args)
 		statArgs := &EncodingFrameStats{
 			TotalFramesWritten: int64(encodingFramesStats.total_frames_written),
 			FramesWritten:      int64(encodingFramesStats.frames_written),
 		}
-		err = outHandler.Stat(avType, AV_OUT_STAT_FRAME_WRITTEN, statArgs)
+		err = outHandler.Stat(streamIndex, avType, AV_OUT_STAT_FRAME_WRITTEN, statArgs)
 	}
 
 	return err
@@ -1261,10 +1265,6 @@ func getCParams(params *XcParams) (*C.xcparams_t, error) {
 		cparams.force_equal_fduration = C.int(1)
 	}
 
-	if params.AudioFillGap {
-		cparams.audio_fill_gap = C.int(1)
-	}
-
 	if params.SkipDecoding {
 		cparams.skip_decoding = C.int(1)
 	}
diff --git a/avpipe_test.go b/avpipe_test.go
index afd8023..5880c1b 100644
--- a/avpipe_test.go
+++ b/avpipe_test.go
@@ -28,7 +28,7 @@ const baseOutPath = "test_out"
 const debugFrameLevel = true
 const h264Codec = "libx264"
 const videoBigBuckBunnyPath = "media/bbb_1080p_30fps_60sec.mp4"
-const videoRockyPath = "media/rocky.mp4"
+const videoBigBuckBunny3AudioPath = "media/BBB_3x_audio_streams_music_2min_48kHz.mp4"
 
 type XcTestResult struct {
 	mezFile           []string
@@ -126,39 +126,39 @@ func (i *fileInput) Size() int64 {
 	return fi.Size()
 }
 
-func (i *fileInput) Stat(statType avpipe.AVStatType, statArgs interface{}) error {
+func (i *fileInput) Stat(streamIndex int, statType avpipe.AVStatType, statArgs interface{}) error {
 	switch statType {
 	case avpipe.AV_IN_STAT_BYTES_READ:
 		readOffset := statArgs.(*uint64)
 		if debugFrameLevel {
-			log.Debug("AVP TEST IN STAT", "STAT read offset", *readOffset)
+			log.Debug("AVP TEST IN STAT", "STAT read offset", *readOffset, "streamIndex", streamIndex)
 		}
 	case avpipe.AV_IN_STAT_AUDIO_FRAME_READ:
 		audioFramesRead := statArgs.(*uint64)
 		if debugFrameLevel {
-			log.Debug("AVP TEST IN STAT", "audioFramesRead", *audioFramesRead)
+			log.Debug("AVP TEST IN STAT", "audioFramesRead", *audioFramesRead, "streamIndex", streamIndex)
 		}
 		statsInfo.audioFramesRead = *audioFramesRead
 	case avpipe.AV_IN_STAT_VIDEO_FRAME_READ:
 		videoFramesRead := statArgs.(*uint64)
 		if debugFrameLevel {
-			log.Debug("AVP TEST IN STAT", "videoFramesRead", *videoFramesRead)
+			log.Debug("AVP TEST IN STAT", "videoFramesRead", *videoFramesRead, "streamIndex", streamIndex)
 		}
 		statsInfo.videoFramesRead = *videoFramesRead
 	case avpipe.AV_IN_STAT_DECODING_AUDIO_START_PTS:
 		startPTS := statArgs.(*uint64)
 		if debugFrameLevel {
-			log.Debug("AVP TEST IN STAT", "audio start PTS", *startPTS)
+			log.Debug("AVP TEST IN STAT", "audio start PTS", *startPTS, "streamIndex", streamIndex)
 		}
 	case avpipe.AV_IN_STAT_DECODING_VIDEO_START_PTS:
 		startPTS := statArgs.(*uint64)
 		if debugFrameLevel {
-			log.Debug("AVP TEST IN STAT", "video start PTS", *startPTS)
+			log.Debug("AVP TEST IN STAT", "video start PTS", *startPTS, "streamIndex", streamIndex)
 		}
 	case avpipe.AV_IN_STAT_FIRST_KEYFRAME_PTS:
 		keyFramePTS := statArgs.(*uint64)
 		if debugFrameLevel {
-			log.Debug("AVP TEST IN STAT", "video first keyframe PTS", *keyFramePTS)
+			log.Debug("AVP TEST IN STAT", "video first keyframe PTS", *keyFramePTS, "streamIndex", streamIndex)
 		}
 		statsInfo.firstKeyFramePTS = *keyFramePTS
 	}
@@ -200,7 +200,7 @@ func (oo *fileOutputOpener) Open(_, _ int64, streamIndex, segIndex int,
 	case avpipe.FMP4VideoSegment:
 		filename = fmt.Sprintf("./%s/vsegment-%d.mp4", oo.dir, segIndex)
 	case avpipe.FMP4AudioSegment:
-		filename = fmt.Sprintf("./%s/asegment-%d.mp4", oo.dir, segIndex)
+		filename = fmt.Sprintf("./%s/asegment%d-%d.mp4", oo.dir, streamIndex, segIndex)
 	case avpipe.FrameImage:
 		filename = fmt.Sprintf("./%s/%d.jpeg", oo.dir, pts)
 	}
@@ -308,23 +308,23 @@ func (o *fileOutput) Close() error {
 	return err
 }
 
-func (o fileOutput) Stat(avType avpipe.AVType, statType avpipe.AVStatType, statArgs interface{}) error {
+func (o fileOutput) Stat(streamIndex int, avType avpipe.AVType, statType avpipe.AVStatType, statArgs interface{}) error {
 	switch statType {
 	case avpipe.AV_OUT_STAT_BYTES_WRITTEN:
 		writeOffset := statArgs.(*uint64)
 		if debugFrameLevel {
-			log.Debug("AVP TEST OUT STAT", "STAT, write offset", *writeOffset)
+			log.Debug("AVP TEST OUT STAT", "STAT, write offset", *writeOffset, "streamIndex", streamIndex)
 		}
 	case avpipe.AV_OUT_STAT_ENCODING_END_PTS:
 		endPTS := statArgs.(*uint64)
 		if debugFrameLevel {
-			log.Debug("AVP TEST OUT STAT", "STAT, endPTS", *endPTS)
+			log.Debug("AVP TEST OUT STAT", "STAT, endPTS", *endPTS, "streamIndex", streamIndex)
 		}
 	case avpipe.AV_OUT_STAT_FRAME_WRITTEN:
 		encodingStats := statArgs.(*avpipe.EncodingFrameStats)
 		if debugFrameLevel {
 			log.Debug("AVP TEST OUT STAT", "avType", avType,
-				"encodingStats", encodingStats)
+				"encodingStats", encodingStats, "streamIndex", streamIndex)
 		}
 		if avType == avpipe.FMP4AudioSegment {
 			statsInfo.encodingAudioFrameStats = *encodingStats
@@ -368,7 +368,6 @@ func TestAudioSeg(t *testing.T) {
 		Url:                    url,
 		DebugFrameLevel:        debugFrameLevel,
 	}
-	setFastEncodeParams(params, true)
 	xcTest(t, outputDir, params, nil, true)
 }
 
@@ -980,7 +979,7 @@ func TestAudioAAC2AACMezMaker(t *testing.T) {
 	}
 
 	xcTestResult := &XcTestResult{
-		mezFile:    []string{fmt.Sprintf("%s/asegment-1.mp4", outputDir)},
+		mezFile:    []string{fmt.Sprintf("%s/asegment0-1.mp4", outputDir)},
 		timeScale:  48000,
 		sampleRate: 48000,
 	}
@@ -1018,7 +1017,7 @@ func TestAudioAC3Ts2AC3MezMaker(t *testing.T) {
 	params.AudioIndex[0] = 2
 
 	xcTestResult := &XcTestResult{
-		mezFile:    []string{fmt.Sprintf("%s/asegment-1.mp4", outputDir)},
+		mezFile:    []string{fmt.Sprintf("%s/asegment0-1.mp4", outputDir)},
 		timeScale:  48000,
 		sampleRate: 48000,
 	}
@@ -1056,7 +1055,7 @@ func TestAudioAC3Ts2AACMezMaker(t *testing.T) {
 	params.AudioIndex[0] = 2
 
 	xcTestResult := &XcTestResult{
-		mezFile:    []string{fmt.Sprintf("%s/asegment-1.mp4", outputDir)},
+		mezFile:    []string{fmt.Sprintf("%s/asegment0-1.mp4", outputDir)},
 		timeScale:  48000,
 		sampleRate: 48000,
 	}
@@ -1095,7 +1094,7 @@ func TestAudioMP3Ts2AACMezMaker(t *testing.T) {
 	params.AudioIndex[0] = 1
 
 	xcTestResult := &XcTestResult{
-		mezFile:    []string{fmt.Sprintf("%s/asegment-1.mp4", outputDir)},
+		mezFile:    []string{fmt.Sprintf("%s/asegment0-1.mp4", outputDir)},
 		timeScale:  48000,
 		sampleRate: 48000,
 	}
@@ -1135,7 +1134,7 @@ func TestAudioDownmix2AACMezMaker(t *testing.T) {
 	params.AudioIndex[0] = 6
 
 	xcTestResult := &XcTestResult{
-		mezFile:           []string{fmt.Sprintf("%s/asegment-1.mp4", outputDir)},
+		mezFile:           []string{fmt.Sprintf("%s/asegment0-1.mp4", outputDir)},
 		timeScale:         48000,
 		sampleRate:        48000,
 		channelLayoutName: "stereo",
@@ -1178,7 +1177,7 @@ func TestAudio2MonoTo1Stereo(t *testing.T) {
 		channelLayoutName: "stereo",
 	}
 	for i := 1; i <= 2; i++ {
-		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i))
+		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i))
 	}
 
 	xcTest(t, outputDir, params, xcTestResult, true)
@@ -1214,7 +1213,7 @@ func TestAudio5_1To5_1(t *testing.T) {
 		channelLayoutName: "5.1",
 	}
 	for i := 1; i <= 2; i++ {
-		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i))
+		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i))
 	}
 
 	xcTest(t, outputDir, params, xcTestResult, true)
@@ -1252,7 +1251,7 @@ func TestAudio5_1ToStereo(t *testing.T) {
 		channelLayoutName: "stereo",
 	}
 	for i := 1; i <= 2; i++ {
-		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i))
+		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i))
 	}
 
 	xcTest(t, outputDir, params, xcTestResult, true)
@@ -1291,7 +1290,7 @@ func TestAudioMonoToMono(t *testing.T) {
 		channelLayoutName: "mono",
 	}
 	for i := 1; i <= 2; i++ {
-		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i))
+		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i))
 	}
 
 	xcTest(t, outputDir, params, xcTestResult, true)
@@ -1330,7 +1329,7 @@ func TestAudioQuadToQuad(t *testing.T) {
 		channelLayoutName: "quad",
 	}
 	for i := 1; i <= 2; i++ {
-		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i))
+		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i))
 	}
 
 	xcTest(t, outputDir, params, xcTestResult, true)
@@ -1375,7 +1374,7 @@ func TestAudio6MonoTo5_1(t *testing.T) {
 		channelLayoutName: "5.1",
 	}
 	for i := 1; i <= 2; i++ {
-		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i))
+		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i))
 	}
 
 	xcTest(t, outputDir, params, xcTestResult, true)
@@ -1420,7 +1419,7 @@ func TestAudio6MonoUnequalChannelLayoutsTo5_1(t *testing.T) {
 		channelLayoutName: "5.1",
 	}
 	for i := 1; i < 2; i++ {
-		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i))
+		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i))
 	}
 
 	xcTest(t, outputDir, params, xcTestResult, true)
@@ -1460,7 +1459,7 @@ func TestAudio10Channel_s16To6Channel_5_1(t *testing.T) {
 		channelLayoutName: "5.1",
 	}
 	for i := 1; i <= 1; i++ {
-		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i))
+		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i))
 	}
 
 	xcTest(t, outputDir, params, xcTestResult, true)
@@ -1501,7 +1500,7 @@ func TestAudio2Channel1Stereo(t *testing.T) {
 	}
 
 	for i := 1; i <= 2; i++ {
-		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i))
+		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i))
 	}
 
 	xcTest(t, outputDir, params, xcTestResult, true)
@@ -1544,7 +1543,7 @@ func TestAudioPan2Channel1Stereo_pcm_60000(t *testing.T) {
 	}
 
 	for i := 1; i <= 1; i++ {
-		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i))
+		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i))
 	}
 
 	xcTest(t, outputDir, params, xcTestResult, true)
@@ -1586,7 +1585,54 @@ func TestAudioMonoToStereo_pcm_60000(t *testing.T) {
 	}
 
 	for i := 1; i <= 1; i++ {
-		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i))
+		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i))
+	}
+
+	xcTest(t, outputDir, params, xcTestResult, true)
+}
+
+func TestMultiAudioXc(t *testing.T) {
+	url := videoBigBuckBunny3AudioPath
+
+	if fileMissing(url, fn()) {
+		return
+	}
+
+	outputDir := path.Join(baseOutPath, fn())
+
+	params := &avpipe.XcParams{
+		BypassTranscoding:   false,
+		Format:              "fmp4-segment",
+		StartTimeTs:         0,
+		DurationTs:          -1,
+		StartSegmentStr:     "1",
+		VideoSegDurationTs:  460800,
+		AudioSegDurationTs:  1428480,
+		Ecodec:              h264Codec,
+		Dcodec:              "",
+		Ecodec2:             "aac",
+		EncHeight:           720,
+		EncWidth:            1280,
+		XcType:              avpipe.XcAll,
+		StreamId:            -1,
+		SyncAudioToStreamId: -1,
+		ForceKeyInt:         60,
+		Url:                 url,
+		DebugFrameLevel:     debugFrameLevel,
+		NumAudio:            3,
+	}
+
+	params.AudioIndex[0] = 1
+	params.AudioIndex[1] = 2
+	params.AudioIndex[2] = 3
+
+	xcTestResult := &XcTestResult{
+		timeScale: 15360,
+		pixelFmt:  "yuv420p",
+	}
+
+	for i := 1; i <= 4; i++ {
+		xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/vsegment-%d.mp4", outputDir, i))
 	}
 
 	xcTest(t, outputDir, params, xcTestResult, true)
@@ -2028,7 +2074,7 @@ func TestABRMuxing(t *testing.T) {
 	videoMezDir := path.Join(baseOutPath, f, "VideoMez4Muxing")
 	audioMezDir := path.Join(baseOutPath, f, "AudioMez4Muxing")
 	videoABRDir := path.Join(baseOutPath, f, "VideoABR4Muxing")
-	videoABRDir2 := path.Join(baseOutPath, f, "VideoABR4Muxing2")
+	videoABRDir2 := path.Join(baseOutPath, f, "VideoABR4Mugooglexing2")
 	audioABRDir := path.Join(baseOutPath, f, "AudioABR4Muxing")
 	audioABRDir2 := path.Join(baseOutPath, f, "AudioABR4Muxing2")
 	muxOutDir := path.Join(baseOutPath, f, "MuxingOutput")
@@ -2095,7 +2141,7 @@ func TestABRMuxing(t *testing.T) {
 
 	// Create audio ABR files for the first mez segment
 	setupOutDir(t, audioABRDir)
-	url = audioMezDir + "/asegment-1.mp4"
+	url = audioMezDir + "/asegment0-1.mp4"
 	log.Debug("STARTING audio ABR for muxing", "file", url)
 	params.XcType = avpipe.XcAudio
 	params.Format = "dash"
@@ -2109,7 +2155,7 @@ func TestABRMuxing(t *testing.T) {
 
 	// Create audio ABR files for the second mez segment
 	setupOutDir(t, audioABRDir2)
-	url = audioMezDir + "/asegment-2.mp4"
+	url = audioMezDir + "/asegment0-2.mp4"
 	log.Debug("STARTING audio ABR for muxing (first segment)", "file", url)
 	params.XcType = avpipe.XcAudio
 	params.Format = "dash"
diff --git a/elvxc/cmd/mux.go b/elvxc/cmd/mux.go
index 1b92e1c..df1a64b 100644
--- a/elvxc/cmd/mux.go
+++ b/elvxc/cmd/mux.go
@@ -101,17 +101,17 @@ func (muxInput *elvxcMuxInput) Size() int64 {
 	return fi.Size()
 }
 
-func (muxInput *elvxcMuxInput) Stat(statType avpipe.AVStatType, statArgs interface{}) error {
+func (muxInput *elvxcMuxInput) Stat(streamIndex int, statType avpipe.AVStatType, statArgs interface{}) error {
 	switch statType {
 	case avpipe.AV_IN_STAT_BYTES_READ:
 		readOffset := statArgs.(*uint64)
-		log.Info("elvxcMuxInput", "stat read offset", *readOffset)
+		log.Info("elvxcMuxInput", "stat read offset", *readOffset, "streamIndex", streamIndex)
 	case avpipe.AV_IN_STAT_DECODING_AUDIO_START_PTS:
 		startPTS := statArgs.(*uint64)
-		log.Info("elvxcMuxInput", "audio start PTS", *startPTS)
+		log.Info("elvxcMuxInput", "audio start PTS", *startPTS, "streamIndex", streamIndex)
 	case avpipe.AV_IN_STAT_DECODING_VIDEO_START_PTS:
 		startPTS := statArgs.(*uint64)
-		log.Info("elvxcMuxInput", "video start PTS", *startPTS)
+		log.Info("elvxcMuxInput", "video start PTS", *startPTS, "streamIndex", streamIndex)
 	}
 
 	return nil
@@ -165,14 +165,14 @@ func (muxOutput *elvxcMuxOutput) Close() error {
 	return err
 }
 
-func (muxOutput *elvxcMuxOutput) Stat(avType avpipe.AVType, statType avpipe.AVStatType, statArgs interface{}) error {
+func (muxOutput *elvxcMuxOutput) Stat(streamIndex int, avType avpipe.AVType, statType avpipe.AVStatType, statArgs interface{}) error {
 	switch statType {
 	case avpipe.AV_OUT_STAT_BYTES_WRITTEN:
 		writeOffset := statArgs.(*uint64)
-		log.Info("elvxcMuxOutput", "STAT, write offset", *writeOffset)
+		log.Info("elvxcMuxOutput", "STAT, write offset", *writeOffset, "streamIndex", streamIndex)
 	case avpipe.AV_OUT_STAT_ENCODING_END_PTS:
 		endPTS := statArgs.(*uint64)
-		log.Info("elvxcMuxOutput", "STAT, endPTS", *endPTS)
+		log.Info("elvxcMuxOutput", "STAT, endPTS", *endPTS, "streamIndex", streamIndex)
 
 	}
 
diff --git a/elvxc/cmd/transcode.go b/elvxc/cmd/transcode.go
index e8869d9..63fb413 100644
--- a/elvxc/cmd/transcode.go
+++ b/elvxc/cmd/transcode.go
@@ -83,25 +83,25 @@ func (i *elvxcInput) Size() int64 {
 	return fi.Size()
 }
 
-func (i *elvxcInput) Stat(statType avpipe.AVStatType, statArgs interface{}) error {
+func (i *elvxcInput) Stat(streamIndex int, statType avpipe.AVStatType, statArgs interface{}) error {
 	switch statType {
 	case avpipe.AV_IN_STAT_BYTES_READ:
 		readOffset := statArgs.(*uint64)
-		log.Info("AVCMD InputHandler.Stat", "read offset", *readOffset)
+		log.Info("AVCMD InputHandler.Stat", "read offset", *readOffset, "streamIndex", streamIndex)
 	case avpipe.AV_IN_STAT_AUDIO_FRAME_READ:
 		audioFrameRead := statArgs.(*uint64)
-		log.Info("AVCMD InputHandler.Stat", "audioFrameRead", *audioFrameRead)
+		log.Info("AVCMD InputHandler.Stat", "audioFrameRead", *audioFrameRead, "streamIndex", streamIndex)
 	case avpipe.AV_IN_STAT_VIDEO_FRAME_READ:
 		videoFrameRead := statArgs.(*uint64)
-		log.Info("AVCMD InputHandler.Stat", "videoFrameRead", *videoFrameRead)
+		log.Info("AVCMD InputHandler.Stat", "videoFrameRead", *videoFrameRead, "streamIndex", streamIndex)
 	case avpipe.AV_IN_STAT_DECODING_AUDIO_START_PTS:
 		startPTS := statArgs.(*uint64)
-		log.Info("AVCMD InputHandler.Stat", "audio start PTS", *startPTS)
+		log.Info("AVCMD InputHandler.Stat", "audio start PTS", *startPTS, "streamIndex", streamIndex)
 	case avpipe.AV_IN_STAT_DECODING_VIDEO_START_PTS:
 		startPTS := statArgs.(*uint64)
-		log.Info("AVCMD InputHandler.Stat", "video start PTS", *startPTS)
+		log.Info("AVCMD InputHandler.Stat", "video start PTS", *startPTS, "streamIndex", streamIndex)
 	case avpipe.AV_IN_STAT_DATA_SCTE35:
-		log.Info("AVCMD InputHandler.Stat", "scte35", statArgs)
+		log.Info("AVCMD InputHandler.Stat", "scte35", statArgs, "streamIndex", streamIndex)
 	}
 
 	return nil
@@ -197,19 +197,19 @@ func (o *elvxcOutput) Close() error {
 	return err
 }
 
-func (o *elvxcOutput) Stat(avType avpipe.AVType, statType avpipe.AVStatType, statArgs interface{}) error {
+func (o *elvxcOutput) Stat(streamIndex int, avType avpipe.AVType, statType avpipe.AVStatType, statArgs interface{}) error {
 
 	switch statType {
 	case avpipe.AV_OUT_STAT_BYTES_WRITTEN:
 		writeOffset := statArgs.(*uint64)
-		log.Info("AVCMD OutputHandler.Stat", "write offset", *writeOffset)
+		log.Info("AVCMD OutputHandler.Stat", "write offset", *writeOffset, "streamIndex", streamIndex)
 	case avpipe.AV_OUT_STAT_ENCODING_END_PTS:
 		endPTS := statArgs.(*uint64)
-		log.Info("AVCMD OutputHandler.Stat", "endPTS", *endPTS)
+		log.Info("AVCMD OutputHandler.Stat", "endPTS", *endPTS, "streamIndex", streamIndex)
 	case avpipe.AV_OUT_STAT_FRAME_WRITTEN:
 		encodingStats := statArgs.(*avpipe.EncodingFrameStats)
 		log.Info("AVCMD OutputHandler.Stat", "avType", avType,
-			"encodingStats", encodingStats)
+			"encodingStats", encodingStats, "streamIndex", streamIndex)
 	}
 	return nil
 }
@@ -272,7 +272,6 @@ func InitTranscode(cmdRoot *cobra.Command) error {
 	cmdTranscode.PersistentFlags().StringP("audio-index", "", "", "the indexes of audio stream (comma separated).")
 	cmdTranscode.PersistentFlags().StringP("channel-layout", "", "", "audio channel layout.")
 	cmdTranscode.PersistentFlags().Int32P("gpu-index", "", -1, "Use the GPU with specified index for transcoding (export CUDA_DEVICE_ORDER=PCI_BUS_ID would use smi index).")
-	cmdTranscode.PersistentFlags().BoolP("audio-fill-gap", "", false, "fill audio gap when encoder is aac and decoder is mpegts")
 	cmdTranscode.PersistentFlags().Int32P("sync-audio-to-stream-id", "", -1, "sync audio to video iframe of specific stream-id when input stream is mpegts")
 	cmdTranscode.PersistentFlags().StringP("encoder", "e", "libx264", "encoder codec, default is 'libx264', can be: 'libx264', 'libx265', 'h264_nvenc', 'h264_videotoolbox', or 'mjpeg'.")
 	cmdTranscode.PersistentFlags().StringP("audio-encoder", "", "aac", "audio encoder, default is 'aac', can be: 'aac', 'ac3', 'mp2', 'mp3'.")
@@ -387,11 +386,6 @@ func doTranscode(cmd *cobra.Command, args []string) error {
 		return fmt.Errorf("Invalid gpu index flag")
 	}
 
-	audioFillGap, err := cmd.Flags().GetBool("audio-fill-gap")
-	if err != nil {
-		return fmt.Errorf("Invalid audio-fill-gap flag")
-	}
-
 	syncAudioToStreamId, err := cmd.Flags().GetInt32("sync-audio-to-stream-id")
 	if err != nil {
 		return fmt.Errorf("Invalid sync-audio-to-stream-id flag")
@@ -681,7 +675,6 @@ func doTranscode(cmd *cobra.Command, args []string) error {
 		MasterDisplay:          masterDisplay,
 		BitDepth:               bitDepth,
 		ForceEqualFDuration:    forceEqualFrameDuration,
-		AudioFillGap:           audioFillGap,
 		SyncAudioToStreamId:    int(syncAudioToStreamId),
 		StreamId:               streamId,
 		Listen:                 listen,
diff --git a/exc/elv_mux.c b/exc/elv_mux.c
index 81a9be0..184796c 100644
--- a/exc/elv_mux.c
+++ b/exc/elv_mux.c
@@ -300,6 +300,7 @@ out_mux_closer(
 static int
 out_mux_stat(
     void *opaque,
+    int stream_index,
     avp_stat_t stat_type)
 {
     ioctx_t *outctx = (ioctx_t *)opaque;
@@ -316,7 +317,8 @@ out_mux_stat(
 
     switch (stat_type) {
     case out_stat_bytes_written:
-        elv_log("OUT MUX STAT fd=%d, write offset=%"PRId64, fd, outctx->written_bytes);
+        elv_log("OUT MUX STAT stream_index=%d, fd=%d, write offset=%"PRId64,
+            stream_index, fd, outctx->written_bytes);
         break;
 #if 0
     /* PENDING(RM) set the hooks properly for muxing */
diff --git a/exc/elv_xc.c b/exc/elv_xc.c
index f57e29e..20d4f29 100644
--- a/exc/elv_xc.c
+++ b/exc/elv_xc.c
@@ -39,11 +39,13 @@ udp_thread_func(
 int
 in_stat(
     void *opaque,
+    int stream_index,
     avp_stat_t stat_type);
 
 int
 out_stat(
     void *opaque,
+    int stream_index,
     avp_stat_t stat_type);
 
 typedef struct udp_thread_params_t {
@@ -278,7 +280,8 @@ in_read_packet(
     }
 
     if (r > 0 && c->read_bytes - c->read_reported > BYTES_READ_REPORT) {
-        in_stat(opaque, in_stat_bytes_read);
+        /* Pass stream_index 0 (stream_index has no meaning for in_stat_bytes_read) */
+        in_stat(opaque, 0, in_stat_bytes_read);
         c->read_reported = c->read_bytes;
     }
 
@@ -323,6 +326,7 @@ in_seek(
 int
 in_stat(
     void *opaque,
+    int stream_index,
     avp_stat_t stat_type)
 {
     int64_t fd;
@@ -337,30 +341,30 @@ in_stat(
     switch (stat_type) {
     case in_stat_bytes_read:
         if (debug_frame_level)
-            elv_dbg("IN STAT fd=%d, read offset=%"PRId64, fd, c->read_bytes);
+            elv_dbg("IN STAT stream_index=%d, fd=%d, read offset=%"PRId64, stream_index, fd, c->read_bytes);
         break;
     case in_stat_decoding_audio_start_pts:
         if (debug_frame_level)
-            elv_dbg("IN STAT fd=%d, audio start PTS=%"PRId64, fd, c->decoding_start_pts);
+            elv_dbg("IN STAT stream_index=%d, fd=%d, audio start PTS=%"PRId64, stream_index, fd, c->decoding_start_pts);
         break;
     case in_stat_decoding_video_start_pts:
         if (debug_frame_level)
-            elv_dbg("IN STAT fd=%d, video start PTS=%"PRId64, fd, c->decoding_start_pts);
+            elv_dbg("IN STAT stream_index=%d, fd=%d, video start PTS=%"PRId64, stream_index, fd, c->decoding_start_pts);
         break;
     case in_stat_audio_frame_read:
         if (debug_frame_level)
-            elv_dbg("IN STAT fd=%d, audio frame read=%"PRId64, fd, c->audio_frames_read);
+            elv_dbg("IN STAT stream_index=%d, fd=%d, audio frame read=%"PRId64, stream_index, fd, c->audio_frames_read);
         break;
     case in_stat_video_frame_read:
         if (debug_frame_level)
-            elv_dbg("IN STAT fd=%d, video frame read=%"PRId64, fd, c->video_frames_read);
+            elv_dbg("IN STAT stream_index=%d, fd=%d, video frame read=%"PRId64, stream_index, fd, c->video_frames_read);
         break;
     case in_stat_data_scte35:
         if (debug_frame_level)
-            elv_dbg("IN STAT fd=%d, data=%s", fd, c->data);
+            elv_dbg("IN STAT stream_index=%d, fd=%d, data=%s", stream_index, fd, c->data);
         break;
     default:
-        elv_err("IN STAT fd=%d, invalid input stat=%d", fd, stat_type);
+        elv_err("IN STAT stream_index=%d, fd=%d, invalid input stat=%d", stream_index, fd, stat_type);
         return 1;
     }
 
@@ -549,7 +553,7 @@ out_write_packet(
         outctx->written_bytes - outctx->write_reported > VIDEO_BYTES_WRITE_REPORT) ||
         (outctx->type == avpipe_audio_fmp4_segment &&
         outctx->written_bytes - outctx->write_reported > AUDIO_BYTES_WRITE_REPORT)) {
-        out_stat(opaque, out_stat_bytes_written);
+        out_stat(opaque, outctx->stream_index, out_stat_bytes_written);
         outctx->write_reported = outctx->written_bytes;
     }
 
@@ -602,6 +606,7 @@ out_closer(
 int
 out_stat(
     void *opaque,
+    int stream_index,
     avp_stat_t stat_type)
 {
     ioctx_t *outctx = (ioctx_t *)opaque;
@@ -625,21 +630,21 @@ out_stat(
     switch (stat_type) {
     case out_stat_bytes_written:
         if (xcparams->debug_frame_level)
-            elv_dbg("OUT STAT fd=%d, type=%d, write offset=%"PRId64,
-                fd, outctx->type, outctx->written_bytes);
+            elv_dbg("OUT STAT stream_index=%d, fd=%d, type=%d, write offset=%"PRId64,
+                stream_index, fd, outctx->type, outctx->written_bytes);
         break;
     case out_stat_encoding_end_pts:
         if (xcparams->debug_frame_level)
-            elv_dbg("OUT STAT fd=%d, video encoding end PTS=%"PRId64
+            elv_dbg("OUT STAT stream_index=%d, fd=%d, video encoding end PTS=%"PRId64
                 ", audio encoding end PTS=%"PRId64,
-                fd, outctx->encoder_ctx->video_last_pts_sent_encode,
+                stream_index, fd, outctx->encoder_ctx->video_last_pts_sent_encode,
                 outctx->encoder_ctx->audio_last_pts_sent_encode);
         break;
     case out_stat_frame_written:
         if (xcparams->debug_frame_level)
-            elv_dbg("OUT STAT fd=%d, type=%d, total_frames_written=%"PRId64
+            elv_dbg("OUT STAT stream_index=%d, fd=%d, type=%d, total_frames_written=%"PRId64
                 ", frames_written=%"PRId64,
-                fd, outctx->type, outctx->total_frames_written,
+                stream_index, fd, outctx->type, outctx->total_frames_written,
                 outctx->frames_written);
         break;
     default:
@@ -1046,7 +1051,6 @@ usage(
         "\t-audio-bitrate :         (optional) Default: 128000\n"
         "\t-audio-decoder :         (optional) Audio decoder name. For audio default is \"aac\", but for ts files should be set to \"ac3\"\n"
         "\t-audio-encoder :         (optional) Audio encoder name. Default is \"aac\", can be \"ac3\", \"mp2\" or \"mp3\"\n"
-        "\t-audio-fill-gap :        (optional) Default: 0, must be 0 or 1. It only effects if encoder is aac.\n"
         "\t-audio-index :           (optional) Default: the indexes of audio stream (comma separated)\n"
         "\t-audio-seg-duration-ts : (mandatory If format is not \"segment\" and transcoding audio) audio segment duration time base (positive integer).\n"
         "\t-bitdepth :              (optional) Bitdepth of color space. Default is 8, can be 8, 10, or 12.\n"
@@ -1156,7 +1160,6 @@ main(
         .stream_id = -1,
         .audio_bitrate = 128000,            /* Default bitrate */
         .n_audio = 0,                       /* # of audio index */
-        .audio_fill_gap = 0,                /* Don't fill gap if there is JUMP */
         .bitdepth = 8,
         .crf_str = strdup("23"),            /* 1 best -> 23 standard middle -> 52 poor */
         .crypt_iv = NULL,
@@ -1229,13 +1232,6 @@ main(
                 p.dcodec2 = strdup(argv[i+1]);
             } else if (!strcmp(argv[i], "-audio-encoder")) {
                 p.ecodec2 = strdup(argv[i+1]);
-            } else if (!strcmp(argv[i], "-audio-fill-gap")) {
-                if (sscanf(argv[i+1], "%d", &p.audio_fill_gap) != 1) {
-                    usage(argv[0], argv[i], EXIT_FAILURE);
-                }
-                if (p.audio_fill_gap != 0 && p.audio_fill_gap != 1) {
-                    usage(argv[0], argv[i], EXIT_FAILURE);
-                }
             } else if (!strcmp(argv[i], "-audio-bitrate")) {
                 if (sscanf(argv[i+1], "%d", &p.audio_bitrate) != 1) {
                     usage(argv[0], argv[i], EXIT_FAILURE);
diff --git a/libavpipe/include/avpipe_version.h b/libavpipe/include/avpipe_version.h
index 6bd09aa..395e3de 100644
--- a/libavpipe/include/avpipe_version.h
+++ b/libavpipe/include/avpipe_version.h
@@ -10,5 +10,5 @@
 
 /* Only increase these versions for release purposes */
 #define AVPIPE_MAJOR_VERSION    1
-#define AVPIPE_MINOR_VERSION    12
+#define AVPIPE_MINOR_VERSION    13
 
diff --git a/libavpipe/include/avpipe_xc.h b/libavpipe/include/avpipe_xc.h
index b1427d9..a6b4bb9 100644
--- a/libavpipe/include/avpipe_xc.h
+++ b/libavpipe/include/avpipe_xc.h
@@ -19,8 +19,6 @@
 
 #define MAX_STREAMS	        64
 #define MAX_MUX_IN_STREAM   4096
-#define MAX_AUDIO_MUX       8
-#define MAX_CAPTION_MUX     8
 
 #define AVIO_OUT_BUF_SIZE   (1*1024*1024)   // avio output buffer size
 #define AVIO_IN_BUF_SIZE    (1*1024*1024)   // avio input buffer size
@@ -122,10 +120,10 @@ typedef struct io_mux_ctx_t {
     mux_input_ctx_t video;
     int64_t         last_video_pts;
     int             last_audio_index;
-    mux_input_ctx_t audios[MAX_AUDIO_MUX];
+    mux_input_ctx_t audios[MAX_STREAMS];
     int64_t         last_audio_pts;
     int             last_caption_index;
-    mux_input_ctx_t captions[MAX_CAPTION_MUX];
+    mux_input_ctx_t captions[MAX_STREAMS];
 } io_mux_ctx_t;
 
 typedef struct xcparams_t xcparams_t;
@@ -231,6 +229,7 @@ typedef int64_t
 typedef int
 (*avpipe_stater_f)(
     void *opaque,
+    int stream_index,           /* The stream_index is not valid for input stat in_stat_bytes_read. */
     avp_stat_t stat_type);
 
 typedef struct avpipe_io_handler_t {
@@ -242,17 +241,20 @@ typedef struct avpipe_io_handler_t {
     avpipe_stater_f avpipe_stater;
 } avpipe_io_handler_t;
 
-#define MAX_WRAP_PTS    ((int64_t)8589000000)
+#define MAX_WRAP_PTS        ((int64_t)8589000000)
+#define MAX_AVFILENAME_LEN  128
 
 /* Decoder/encoder context, keeps both video and audio stream ffmpeg contexts */
 typedef struct coderctx_t {
-    AVFormatContext     *format_context;        /* Input format context or video output format context */
-    AVFormatContext     *format_context2;       /* Audio output format context */
+    AVFormatContext     *format_context;                                /* Input format context or video output format context */
+    AVFormatContext     *format_context2[MAX_STREAMS];                  /* Audio output format context, indexed by audio index */
+    char                filename2[MAX_STREAMS][MAX_AVFILENAME_LEN];     /* Audio filename formats */
+    int                 n_audio_output;                                 /* Number of audio output streams, it is set for encoder */
 
     AVCodec             *codec[MAX_STREAMS];
     AVStream            *stream[MAX_STREAMS];
     AVCodecParameters   *codec_parameters[MAX_STREAMS];
-    AVCodecContext      *codec_context[MAX_STREAMS];
+    AVCodecContext      *codec_context[MAX_STREAMS];    /* Audio/video AVCodecContext, indexed by stream_index */
     SwrContext          *resampler_context;             /* resample context for audio */
     AVAudioFifo         *fifo;                          /* audio sampling fifo */
 
@@ -261,27 +263,26 @@ typedef struct coderctx_t {
     ioctx_t             *inctx;                         /* Input context needed for stat callbacks */
 
     int video_stream_index;
-    int audio_stream_index[MAX_AUDIO_MUX];              /* Audio input stream indexes */
+    int audio_stream_index[MAX_STREAMS];                /* Audio input stream indexes */
     int n_audio;                                        /* Number of audio streams that will be decoded */
 
     int data_scte35_stream_index;                       /* Index of SCTE-35 data stream */
     int data_stream_index;                              /* Index of an unrecognized data stream */
-    int audio_enc_stream_index;                         /* Audio output stream index */
 
     int64_t video_last_wrapped_pts;                     /* Video last wrapped pts */
     int64_t video_last_input_pts;                       /* Video last input pts */
-    int64_t audio_last_wrapped_pts;                     /* Audio last wrapped pts */
-    int64_t audio_last_input_pts;                       /* Audio last input pts */
+    int64_t audio_last_wrapped_pts[MAX_STREAMS];        /* Audio last wrapped pts */
+    int64_t audio_last_input_pts[MAX_STREAMS];          /* Audio last input pts */
     int64_t video_last_dts;
-    int64_t audio_last_dts;
+    int64_t audio_last_dts[MAX_STREAMS];
     int64_t last_key_frame;                             /* pts of last key frame */
     int64_t forced_keyint_countdown;                    /* frames until next forced key frame */
     int64_t video_last_pts_read;                        /* Video input last pts read */
-    int64_t audio_last_pts_read;                        /* Audio input last pts reas */
+    int64_t audio_last_pts_read[MAX_STREAMS];           /* Audio input last pts read */
     int64_t video_last_pts_sent_encode;                 /* Video last pts to encode if tx_type & tx_video */
-    int64_t audio_last_pts_sent_encode;                 /* Audio last pts to encode if tx_type & tx_audio */
+    int64_t audio_last_pts_sent_encode[MAX_STREAMS];    /* Audio last pts to encode if tx_type & tx_audio */
     int64_t video_last_pts_encoded;                     /* Video last input pts encoded if tx_type & tx_video */
-    int64_t audio_last_pts_encoded;                     /* Audio last input pts encoded if tx_type & tx_audio */
+    int64_t audio_last_pts_encoded[MAX_STREAMS];        /* Audio last input pts encoded if tx_type & tx_audio */
 
     int64_t audio_output_pts;                           /* Used to set PTS directly when using audio FIFO */
 
@@ -291,25 +292,25 @@ typedef struct coderctx_t {
     AVFilterGraph   *video_filter_graph;
 
     /* Audio filter */
-    AVFilterContext *audio_buffersink_ctx;
-    AVFilterContext *audio_buffersrc_ctx[MAX_AUDIO_MUX];
-    AVFilterGraph   *audio_filter_graph;
+    AVFilterContext *audio_buffersink_ctx[MAX_STREAMS];
+    AVFilterContext *audio_buffersrc_ctx[MAX_STREAMS];
+    AVFilterGraph   *audio_filter_graph[MAX_STREAMS];
+    int     n_audio_filters;                            /* Number of initialized audio filters */
 
     int64_t video_frames_written;                       /* Total video frames written so far */
-    int64_t audio_frames_written;                       /* Total audio frames written so far */
+    int64_t audio_frames_written[MAX_STREAMS];          /* Total audio frames written so far */
     int64_t video_pts;                                  /* Video decoder/encoder pts */
-    int64_t audio_pts;                                  /* Audio decoder/encoder pts */
+    int64_t audio_pts[MAX_STREAMS];                     /* Audio decoder/encoder pts for each track/stream */
     int64_t video_input_start_pts;                      /* In case video input stream starts at PTS > 0 */
     int     video_input_start_pts_notified;             /* Will be set as soon as out_stat_decoding_video_start_pts is fired */
-    int64_t audio_input_start_pts;                      /* In case audio input stream starts at PTS > 0 */
+    int64_t audio_input_start_pts[MAX_STREAMS];         /* In case audio input stream starts at PTS > 0 */
     int     audio_input_start_pts_notified;             /* Will be set as soon as out_stat_decoding_audio_start_pts is fired */
     int64_t first_decoding_video_pts;                   /* PTS of first video frame read from the decoder */
-    int64_t first_decoding_audio_pts;                   /* PTS of first audio frame read from the decoder */
+    int64_t first_decoding_audio_pts[MAX_STREAMS];      /* PTS of first audio frame read from the decoder */
     int64_t first_encoding_video_pts;                   /* PTS of first video frame sent to the encoder */
-    int64_t first_encoding_audio_pts;                   /* PTS of first audio frame sent to the encoder */
+    int64_t first_encoding_audio_pts[MAX_STREAMS];      /* PTS of first audio frame sent to the encoder */
     int64_t first_read_frame_pts[MAX_STREAMS];          /* PTS of first frame read - which might not be decodable */
 
-    int64_t audio_input_prev_pts;       /* Previous pts for audio input */
     int64_t video_encoder_prev_pts;     /* Previous pts for video output (encoder) */
     int64_t video_duration;             /* Duration/pts of original frame */
     int64_t audio_duration;             /* Audio duration/pts of original frame when tx_type == tx_all */
@@ -420,9 +421,8 @@ typedef struct xcparams_t {
     char        *watermark_timecode;        // Watermark timecode string (i.e 00\:00\:00\:00)
     float       watermark_timecode_rate;    // Watermark timecode frame rate
 
-    int         audio_index[MAX_AUDIO_MUX]; // Audio index(s) for mez making, may need to become an array of indexes
+    int         audio_index[MAX_STREAMS]; // Audio index(s) for mez making, may need to become an array of indexes
     int         n_audio;                    // Number of entries in audio_index
-    int         audio_fill_gap;             // Audio only, fills the gap if there is a jump in PTS
     int         sync_audio_to_stream_id;    // mpegts only, default is 0
     int         bitdepth;                   // Can be 8, 10, 12
     char        *max_cll;                   // Maximum Content Light Level (HDR only)
@@ -500,13 +500,13 @@ typedef struct xctx_t {
      * Each video/audio/caption input stream can have multiple input files/parts.
      * Each video/audio/caption input stream has its own coderctx_t and ioctx_t.
      */
-    io_mux_ctx_t        *in_mux_ctx;                                        // Input muxer context
-    coderctx_t          in_muxer_ctx[MAX_AUDIO_MUX+MAX_CAPTION_MUX+1];      // Video, audio, captions coder input muxer context (one video, multiple audio/caption)
-    ioctx_t             *inctx_muxer[MAX_AUDIO_MUX+MAX_CAPTION_MUX+1];      // Video, audio, captions io muxer context (one video, multiple audio/caption)
-    coderctx_t          out_muxer_ctx;                                      // Output muxer
+    io_mux_ctx_t        *in_mux_ctx;                    // Input muxer context
+    coderctx_t          in_muxer_ctx[MAX_STREAMS];      // Video, audio, captions coder input muxer context (one video, multiple audio/caption)
+    ioctx_t             *inctx_muxer[MAX_STREAMS];      // Video, audio, captions io muxer context (one video, multiple audio/caption)
+    coderctx_t          out_muxer_ctx;                  // Output muxer
 
-    AVPacket            pkt_array[MAX_AUDIO_MUX+MAX_CAPTION_MUX+1];
-    int                 is_pkt_valid[MAX_AUDIO_MUX+MAX_CAPTION_MUX+1];
+    AVPacket            pkt_array[MAX_STREAMS];
+    int                 is_pkt_valid[MAX_STREAMS];
 
     elv_channel_t       *vc;        // Video frame channel
     elv_channel_t       *ac;        // Audio frame channel
diff --git a/libavpipe/src/avpipe_filters.c b/libavpipe/src/avpipe_filters.c
index 44888e3..56a1e64 100644
--- a/libavpipe/src/avpipe_filters.c
+++ b/libavpipe/src/avpipe_filters.c
@@ -161,8 +161,6 @@ init_audio_filters(
         return eav_num_streams;
     }
 
-    AVCodecContext *dec_codec_ctx = decoder_context->codec_context[decoder_context->audio_stream_index[0]];
-    AVCodecContext *enc_codec_ctx = encoder_context->codec_context[encoder_context->audio_stream_index[0]];
     char args[512];
     int ret = 0;
     AVFilterContext **abuffersrc_ctx = NULL;
@@ -173,92 +171,99 @@ init_audio_filters(
     const AVFilter *aformat = avfilter_get_by_name("aformat");
     AVFilterGraph *filter_graph;
 
-    if (!dec_codec_ctx) {
-        elv_err("init_audio_filters, audio decoder was not initialized!");
-        ret = AVERROR_UNKNOWN;
-        goto end;
-    }
+    for (int i=0; i<encoder_context->n_audio_output; i++) {
+        int audio_stream_index = decoder_context->audio_stream_index[i];
 
-    filter_graph = avfilter_graph_alloc();
-    if (!buffersrc || !buffersink || !filter_graph) {
-        elv_err("init_audio_filters, audio filtering source or sink element not found");
-        ret = AVERROR_UNKNOWN;
-        goto end;
-    }
+        AVCodecContext *dec_codec_ctx = decoder_context->codec_context[audio_stream_index];
+        AVCodecContext *enc_codec_ctx = encoder_context->codec_context[audio_stream_index];
 
-    get_avfilter_args(decoder_context, decoder_context->audio_stream_index[0], args, sizeof(args));
-    elv_dbg("init_audio_filters, audio srcfilter args=%s", args);
+        if (!dec_codec_ctx) {
+            elv_err("init_audio_filters, audio decoder was not initialized!");
+            ret = AVERROR_UNKNOWN;
+            goto end;
+        }
 
-    /* decoder_context->n_audio is 1 */
-    abuffersrc_ctx = decoder_context->audio_buffersrc_ctx;
+        filter_graph = avfilter_graph_alloc();
+        if (!buffersrc || !buffersink || !filter_graph) {
+            elv_err("init_audio_filters, audio filtering source or sink element not found");
+            ret = AVERROR_UNKNOWN;
+            goto end;
+        }
 
-    ret = avfilter_graph_create_filter(&abuffersrc_ctx[0], buffersrc, "in", args, NULL, filter_graph);
-    if (ret < 0) {
-        elv_err("init_audio_filters, cannot create audio buffer source");
-        goto end;
-    }
+        get_avfilter_args(decoder_context, audio_stream_index, args, sizeof(args));
+        elv_dbg("init_audio_filters, audio srcfilter args=%s", args);
 
-    ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out", NULL, NULL, filter_graph);
-    if (ret < 0) {
-        elv_err("init_audio_filters, cannot create audio buffer sink");
-        goto end;
-    }
+        abuffersrc_ctx = decoder_context->audio_buffersrc_ctx;
 
-    ret = av_opt_set_bin(buffersink_ctx, "sample_fmts",
-        (uint8_t*)&enc_codec_ctx->sample_fmt, sizeof(enc_codec_ctx->sample_fmt),
-        AV_OPT_SEARCH_CHILDREN);
-    if (ret < 0) {
-        elv_err("init_audio_filters, cannot set output sample format");
-        goto end;
-    }
+        ret = avfilter_graph_create_filter(&abuffersrc_ctx[i], buffersrc, "in", args, NULL, filter_graph);
+        if (ret < 0) {
+            elv_err("init_audio_filters, cannot create audio buffer source");
+            goto end;
+        }
 
-    ret = av_opt_set_bin(buffersink_ctx, "sample_rates",
-        (uint8_t*)&enc_codec_ctx->sample_rate, sizeof(enc_codec_ctx->sample_rate),
-        AV_OPT_SEARCH_CHILDREN);
-    if (ret < 0) {
-        elv_err("init_audio_filters, cannot set output sample rate");
-        goto end;
-    }
+        ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out", NULL, NULL, filter_graph);
+        if (ret < 0) {
+            elv_err("init_audio_filters, cannot create audio buffer sink");
+            goto end;
+        }
 
-    ret = av_opt_set_bin(buffersink_ctx, "channel_layouts",
-        (uint8_t*)&enc_codec_ctx->channel_layout,
-        sizeof(enc_codec_ctx->channel_layout), AV_OPT_SEARCH_CHILDREN);
-    if (ret < 0) {
-        elv_err("init_audio_filters, cannot set output channel layout");
-        goto end;
-    }
+        ret = av_opt_set_bin(buffersink_ctx, "sample_fmts",
+            (uint8_t*)&enc_codec_ctx->sample_fmt, sizeof(enc_codec_ctx->sample_fmt),
+            AV_OPT_SEARCH_CHILDREN);
+        if (ret < 0) {
+            elv_err("init_audio_filters, cannot set output sample format");
+            goto end;
+        }
 
-    snprintf(args, sizeof(args),
+        ret = av_opt_set_bin(buffersink_ctx, "sample_rates",
+            (uint8_t*)&enc_codec_ctx->sample_rate, sizeof(enc_codec_ctx->sample_rate),
+            AV_OPT_SEARCH_CHILDREN);
+        if (ret < 0) {
+            elv_err("init_audio_filters, cannot set output sample rate");
+            goto end;
+        }
+
+        ret = av_opt_set_bin(buffersink_ctx, "channel_layouts",
+            (uint8_t*)&enc_codec_ctx->channel_layout,
+            sizeof(enc_codec_ctx->channel_layout), AV_OPT_SEARCH_CHILDREN);
+        if (ret < 0) {
+            elv_err("init_audio_filters, cannot set output channel layout");
+            goto end;
+        }
+
+        snprintf(args, sizeof(args),
              "sample_fmts=%s:sample_rates=%d:channel_layouts=0x%"PRIx64,
              av_get_sample_fmt_name(enc_codec_ctx->sample_fmt), enc_codec_ctx->sample_rate,
              (uint64_t)enc_codec_ctx->channel_layout);
-    elv_dbg("init_audio_filters, audio format_filter args=%s", args);
+        elv_dbg("init_audio_filters, audio format_filter args=%s", args);
 
-    ret = avfilter_graph_create_filter(&format_ctx, aformat, "format_out_0_0", args, NULL, filter_graph);
-    if (ret < 0) {
-        elv_err("init_audio_filters, cannot create audio format filter");
-        goto end;
-    }
+        ret = avfilter_graph_create_filter(&format_ctx, aformat, "format_out_0_0", args, NULL, filter_graph);
+        if (ret < 0) {
+            elv_err("init_audio_filters, cannot create audio format filter");
+            goto end;
+        }
 
-    if ((ret = avfilter_link(abuffersrc_ctx[0], 0, format_ctx, 0)) < 0) {
-        elv_err("init_audio_filters, failed to link audio src to format, ret=%d", ret);
-        goto end;
-    }
+        if ((ret = avfilter_link(abuffersrc_ctx[i], 0, format_ctx, 0)) < 0) {
+            elv_err("init_audio_filters, failed to link audio src to format, ret=%d", ret);
+            goto end;
+        }
 
-    if ((ret = avfilter_link(format_ctx, 0, buffersink_ctx, 0)) < 0) {
-        elv_err("init_audio_filters, failed to link audio format to sink, ret=%d", ret);
-        goto end;
-    }
+        if ((ret = avfilter_link(format_ctx, 0, buffersink_ctx, 0)) < 0) {
+            elv_err("init_audio_filters, failed to link audio format to sink, ret=%d", ret);
+            goto end;
+        }
 
-    av_buffersink_set_frame_size(buffersink_ctx,
-        encoder_context->codec_context[decoder_context->audio_stream_index[0]]->frame_size);
+        av_buffersink_set_frame_size(buffersink_ctx,
+            encoder_context->codec_context[audio_stream_index]->frame_size);
 
-    if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0)
-        goto end;
+        if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0)
+            goto end;
 
-    /* Fill FilteringContext */
-    decoder_context->audio_filter_graph = filter_graph;
-    decoder_context->audio_buffersink_ctx = buffersink_ctx;
+        /* Fill FilteringContext */
+        decoder_context->audio_filter_graph[i] = filter_graph;
+        decoder_context->audio_buffersink_ctx[i] = buffersink_ctx;
+        decoder_context->n_audio_filters++;
+    }
 
 end:
     if (ret < 0)
@@ -390,14 +395,15 @@ init_audio_pan_filters(
     }
 
     av_buffersink_set_frame_size(buffersink_ctx,
-        encoder_context->codec_context[decoder_context->audio_stream_index[0]]->frame_size);
+        encoder_context->codec_context[0]->frame_size);
 
     if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0)
         goto end;
 
     /* Fill FilteringContext */
-    decoder_context->audio_filter_graph = filter_graph;
-    decoder_context->audio_buffersink_ctx = buffersink_ctx;
+    decoder_context->audio_filter_graph[0] = filter_graph;
+    decoder_context->audio_buffersink_ctx[0] = buffersink_ctx;
+    decoder_context->n_audio_filters++;
 
 end:
     if (ret < 0)
@@ -523,15 +529,15 @@ init_audio_merge_pan_filters(
         goto end;
     }
 
-    av_buffersink_set_frame_size(buffersink_ctx,
-        encoder_context->codec_context[decoder_context->audio_stream_index[0]]->frame_size);
+    av_buffersink_set_frame_size(buffersink_ctx, encoder_context->codec_context[0]->frame_size);
 
     if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0)
         goto end;
 
     /* Fill FilteringContext */
-    decoder_context->audio_filter_graph = filter_graph;
-    decoder_context->audio_buffersink_ctx = buffersink_ctx;
+    decoder_context->audio_filter_graph[0] = filter_graph;
+    decoder_context->audio_buffersink_ctx[0] = buffersink_ctx;
+    decoder_context->n_audio_filters++;
 
 end:
     if (ret < 0)
@@ -554,7 +560,7 @@ init_audio_join_filters(
     xcparams_t *params)
 {
     if (decoder_context->n_audio < 0 ||
-        decoder_context->n_audio > MAX_AUDIO_MUX) {
+        decoder_context->n_audio > MAX_STREAMS) {
         return eav_num_streams;
     }
 
@@ -571,8 +577,8 @@ init_audio_join_filters(
     const AVFilter *aformat = avfilter_get_by_name("aformat");
     const AVFilter *join = avfilter_get_by_name("join");
 
-    decoder_context->audio_filter_graph = avfilter_graph_alloc();
-    if (!buffersrc || !buffersink || !join || !decoder_context->audio_filter_graph) {
+    decoder_context->audio_filter_graph[0] = avfilter_graph_alloc();
+    if (!buffersrc || !buffersink || !join || !decoder_context->audio_filter_graph[0]) {
         elv_err("init_audio_join_filters, audio filtering source/sink/join filter not found");
         ret = AVERROR_UNKNOWN;
         goto end;
@@ -582,7 +588,7 @@ init_audio_join_filters(
 
     /* Create join filter with n inputs */
     sprintf(args, "inputs=%d", decoder_context->n_audio);
-    ret = avfilter_graph_create_filter(&join_ctx, join, "join", args, NULL, decoder_context->audio_filter_graph);
+    ret = avfilter_graph_create_filter(&join_ctx, join, "join", args, NULL, decoder_context->audio_filter_graph[0]);
     if (ret < 0) {
         elv_err("init_audio_join_filters, cannot create audio join");
         goto end;
@@ -590,7 +596,8 @@ init_audio_join_filters(
 
     /* For each audio input create an audio source filter and link it to join filter */
     for (int i=0; i<decoder_context->n_audio; i++) {
-        AVCodecContext *dec_codec_ctx = decoder_context->codec_context[decoder_context->audio_stream_index[i]];
+        int audio_stream_index = decoder_context->audio_stream_index[i];
+        AVCodecContext *dec_codec_ctx = decoder_context->codec_context[audio_stream_index];
         char filt_name[32];
 
         if (!dec_codec_ctx) {
@@ -599,12 +606,12 @@ init_audio_join_filters(
             goto end;
         }
 
-        get_avfilter_args(decoder_context, decoder_context->audio_stream_index[i], args, sizeof(args));
+        get_avfilter_args(decoder_context, audio_stream_index, args, sizeof(args));
 
         sprintf(filt_name, "in_%d", i);
         elv_dbg("init_audio_join_filters, audio srcfilter=%s args=%s", filt_name, args);
 
-        ret = avfilter_graph_create_filter(&abuffersrc_ctx[i], buffersrc, filt_name, args, NULL, decoder_context->audio_filter_graph);
+        ret = avfilter_graph_create_filter(&abuffersrc_ctx[i], buffersrc, filt_name, args, NULL, decoder_context->audio_filter_graph[0]);
         if (ret < 0) {
             elv_err("init_audio_join_filters, cannot create audio buffer source %d", i);
             goto end;
@@ -617,7 +624,7 @@ init_audio_join_filters(
 
     }
 
-    ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out", NULL, NULL, decoder_context->audio_filter_graph);
+    ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out", NULL, NULL, decoder_context->audio_filter_graph[0]);
     if (ret < 0) {
         elv_err("init_audio_join_filters, cannot create audio buffer sink");
         goto end;
@@ -653,7 +660,7 @@ init_audio_join_filters(
              (uint64_t)enc_codec_ctx->channel_layout);
     elv_dbg("init_audio_join_filters, audio format_filter args=%s", format_args);
 
-    ret = avfilter_graph_create_filter(&format_ctx, aformat, "format_out_0_0", format_args, NULL, decoder_context->audio_filter_graph);
+    ret = avfilter_graph_create_filter(&format_ctx, aformat, "format_out_0_0", format_args, NULL, decoder_context->audio_filter_graph[0]);
     if (ret < 0) {
         elv_err("Cannot create audio format filter");
         goto end;
@@ -672,11 +679,12 @@ init_audio_join_filters(
     av_buffersink_set_frame_size(buffersink_ctx,
         encoder_context->codec_context[encoder_context->audio_stream_index[0]]->frame_size);
 
-    if ((ret = avfilter_graph_config(decoder_context->audio_filter_graph, NULL)) < 0)
+    if ((ret = avfilter_graph_config(decoder_context->audio_filter_graph[0], NULL)) < 0)
         goto end;
 
     /* Save FilteringContext */
-    decoder_context->audio_buffersink_ctx = buffersink_ctx;
+    decoder_context->audio_buffersink_ctx[0] = buffersink_ctx;
+    decoder_context->n_audio_filters++;
 
 end:
     if (ret < 0)
diff --git a/libavpipe/src/avpipe_io.c b/libavpipe/src/avpipe_io.c
index 9ef48a4..b975939 100644
--- a/libavpipe/src/avpipe_io.c
+++ b/libavpipe/src/avpipe_io.c
@@ -75,13 +75,13 @@ elv_io_open(
         (*pb) = avioctx;
         out_tracker[outctx->stream_index].last_outctx = outctx;
 
-        elv_dbg("OUT elv_io_open stream_index=%d, seg_index=%d avioctx=%p, avioctx->opaque=%p, buf=%p, outctx=%p, outtracker[0]->last_outctx=%p, outtracker[1]->last_outctx=%p",
-            outctx->stream_index, outctx->seg_index, avioctx, avioctx->opaque, avioctx->buffer, outctx, out_tracker[0].last_outctx, out_tracker[1].last_outctx);
+        elv_dbg("OUT elv_io_open stream_index=%d, seg_index=%d avioctx=%p, avioctx->opaque=%p, buf=%p, outctx=%p, outtracker->last_outctx=%p, outtracker->last_outctx=%p",
+            outctx->stream_index, outctx->seg_index, avioctx, avioctx->opaque, avioctx->buffer, outctx, out_tracker[outctx->stream_index].last_outctx, out_tracker[outctx->stream_index].last_outctx);
     } else {
         ioctx_t *outctx = (ioctx_t *) calloc(1, sizeof(ioctx_t));
         outctx->stream_index = 0;
-        outctx->encoder_ctx = out_tracker->encoder_ctx;
-        outctx->inctx = out_tracker[0].inctx;
+        outctx->encoder_ctx = out_tracker[outctx->stream_index].encoder_ctx;
+        outctx->inctx = out_tracker[outctx->stream_index].inctx;
         outctx->seg_index = 0; // init segment has stream_index and seg_index = 0
 
         if (!url || url[0] == '\0') {
@@ -97,13 +97,19 @@ elv_io_open(
             }
         } else {
             outctx->url = strdup(url);
-            int i = 0;
-            while (i < strlen(url) && !isdigit(url[i]))
-                i++;
-            if (i < strlen(url)) {
-                // Assumes a filename like segment%d-%05d.mp4
-                outctx->stream_index = url[i] - '0';
+            outctx->stream_index = 0;
+            if (!strstr(url, "m3u8")) {
+                int i = 0;
+                while (i < strlen(url) && !isdigit(url[i]))
+                    i++;
+                if (i < strlen(url)) {
+                    // Assumes a filename like segment%d-%05d.mp4
+                    outctx->stream_index = url[i] - '0';
+                }
             }
+            outctx->encoder_ctx = out_tracker[outctx->stream_index].encoder_ctx;
+            outctx->inctx = out_tracker[outctx->stream_index].inctx;
+            //elv_dbg("XXX stream_index=%d", outctx->stream_index);
             if (!strncmp(url + strlen(url) - 3, "mpd", 3)) {
                 outctx->type = avpipe_manifest;
                 outctx->seg_index = -1;     // Special index for manifest
@@ -170,8 +176,8 @@ elv_io_open(
         AVIOContext *avioctx = avio_alloc_context(outctx->buf, outctx->bufsz, AVIO_FLAG_WRITE, (void *)outctx,
             out_handlers->avpipe_reader, out_handlers->avpipe_writer, out_handlers->avpipe_seeker);
 
-        elv_dbg("OUT elv_io_open url=%s, type=%d, seg_index=%d, last_outctx=%p, buf=%p",
-            url, outctx->type, outctx->seg_index, out_tracker[outctx->stream_index].last_outctx, avioctx->buffer);
+        elv_dbg("OUT elv_io_open url=%s, type=%d, stream_index=%d, seg_index=%d, last_outctx=%p, buf=%p",
+            url, outctx->type, outctx->stream_index, outctx->seg_index, out_tracker[outctx->stream_index].last_outctx, avioctx->buffer);
 
         /* libavformat expects seekable streams for mp4 */
         if (outctx->type == avpipe_mp4_stream || outctx->type == avpipe_mp4_segment)
diff --git a/libavpipe/src/avpipe_mux.c b/libavpipe/src/avpipe_mux.c
index a971530..fe52545 100644
--- a/libavpipe/src/avpipe_mux.c
+++ b/libavpipe/src/avpipe_mux.c
@@ -41,8 +41,11 @@ elv_mux_close(
     ioctx_t *outctx = (ioctx_t *)pb->opaque;
 
     elv_dbg("OUT elv_mux_close avioctx=%p", pb);
-    if (out_handlers) {
-        out_handlers->avpipe_stater(outctx, out_stat_encoding_end_pts);
+    if (out_handlers && outctx) {
+        if (outctx->type == avpipe_video_fmp4_segment)
+            out_handlers->avpipe_stater(outctx, 0, out_stat_encoding_end_pts);
+        else
+            out_handlers->avpipe_stater(outctx, 1, out_stat_encoding_end_pts);
         out_handlers->avpipe_closer(outctx);
     }
     free(outctx);
@@ -163,11 +166,11 @@ init_mux_ctx(
             elv_err("init_mux_ctx invalid video stream_index=%d", stream_index);
             return eav_param;
         }
-        if (!strcmp(stream_type, "audio") && (stream_index > MAX_AUDIO_MUX || stream_index > in_mux_ctx->last_audio_index+1)) {
+        if (!strcmp(stream_type, "audio") && (stream_index > MAX_STREAMS || stream_index > in_mux_ctx->last_audio_index+1)) {
             elv_err("init_mux_ctx invalid audio stream_index=%d", stream_index);
             return eav_param;
         }
-        if (!strcmp(stream_type, "caption") && (stream_index > MAX_CAPTION_MUX || stream_index > in_mux_ctx->last_caption_index+1)) {
+        if (!strcmp(stream_type, "caption") && (stream_index > MAX_STREAMS || stream_index > in_mux_ctx->last_caption_index+1)) {
             elv_err("init_mux_ctx invalid caption stream_index=%d", stream_index);
             return eav_param;
         }
diff --git a/libavpipe/src/avpipe_xc.c b/libavpipe/src/avpipe_xc.c
index 4890ceb..462a780 100644
--- a/libavpipe/src/avpipe_xc.c
+++ b/libavpipe/src/avpipe_xc.c
@@ -366,13 +366,13 @@ selected_audio_index(
 static int
 selected_decoded_audio(
     coderctx_t *decoder_context,
-    int index)
+    int stream_index)
 {
     if (decoder_context->n_audio <= 0)
         return -1;
 
     for (int i=0; i<decoder_context->n_audio; i++) {
-        if (decoder_context->audio_stream_index[i] == index)
+        if (decoder_context->audio_stream_index[i] == stream_index)
             return i;
     }
 
@@ -389,7 +389,6 @@ prepare_decoder(
 {
     int rc;
     decoder_context->video_last_dts = AV_NOPTS_VALUE;
-    decoder_context->audio_last_dts = AV_NOPTS_VALUE;
     int stream_id_index = -1;
     int sync_id_index = -1;     // Index of the video stream used for audio sync
     char *url = params ? params->url : "";
@@ -398,8 +397,10 @@ prepare_decoder(
     decoder_context->inctx = inctx;
     decoder_context->video_stream_index = -1;
     decoder_context->data_scte35_stream_index = -1;
-    for (int j=0; j<MAX_AUDIO_MUX; j++)
+    for (int j=0; j<MAX_STREAMS; j++) {
         decoder_context->audio_stream_index[j] = -1;
+        decoder_context->audio_last_dts[j] = AV_NOPTS_VALUE;
+    }
 
     decoder_context->format_context = avformat_alloc_context();
     if (!decoder_context->format_context) {
@@ -455,7 +456,7 @@ prepare_decoder(
             decoder_context->stream[i] = decoder_context->format_context->streams[i];
 
             /* If no stream ID specified - choose the first video stream encountered */
-            if (params && params->stream_id < 0 && decoder_context->video_stream_index < 0)
+            if (params && (params->xc_type & xc_video) && params->stream_id < 0 && decoder_context->video_stream_index < 0)
                 decoder_context->video_stream_index = i;
             elv_dbg("VIDEO STREAM %d, codec_id=%s, stream_id=%d, timebase=%d, xc_type=%d, url=%s",
                 i, avcodec_get_name(decoder_context->codec_parameters[i]->codec_id), decoder_context->stream[i]->id,
@@ -654,7 +655,7 @@ prepare_decoder(
     elv_dbg("prepare_decoder xc_type=%d, video_stream_index=%d, audio_stream_index=%d, n_audio=%d, nb_streams=%d, url=%s",
         params ? params->xc_type : 0,
         decoder_context->video_stream_index,
-        decoder_context->audio_stream_index[0],
+        decoder_context->audio_stream_index[decoder_context->n_audio-1],
         decoder_context->n_audio,
         decoder_context->format_context->nb_streams,
         url);
@@ -706,6 +707,8 @@ set_encoder_options(
     int stream_index,
     int timebase)
 {
+    int i;
+
     if (timebase <= 0) {
         elv_err("Setting encoder options failed, invalid timebase=%d (check encoding params), url=%s",
             timebase, params->url);
@@ -715,14 +718,14 @@ set_encoder_options(
     if (!strcmp(params->format, "fmp4")) {
         if (stream_index == decoder_context->video_stream_index)
             av_opt_set(encoder_context->format_context->priv_data, "movflags", "frag_every_frame", 0);
-        if (selected_decoded_audio(decoder_context, stream_index) >= 0)
-            av_opt_set(encoder_context->format_context2->priv_data, "movflags", "frag_every_frame", 0);
+        if ((i = selected_decoded_audio(decoder_context, stream_index)) >= 0)
+            av_opt_set(encoder_context->format_context2[i]->priv_data, "movflags", "frag_every_frame", 0);
     }
 
     // Segment duration (in ts) - notice it is set on the format context not codec
     if (params->audio_seg_duration_ts > 0 && (!strcmp(params->format, "dash") || !strcmp(params->format, "hls"))) {
-        if (selected_decoded_audio(decoder_context, stream_index) >= 0)
-            av_opt_set_int(encoder_context->format_context2->priv_data, "seg_duration_ts", params->audio_seg_duration_ts,
+        if ((i = selected_decoded_audio(decoder_context, stream_index)) >= 0)
+            av_opt_set_int(encoder_context->format_context2[i]->priv_data, "seg_duration_ts", params->audio_seg_duration_ts,
                 AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_SEARCH_CHILDREN);
     }
 
@@ -732,20 +735,20 @@ set_encoder_options(
                 AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_SEARCH_CHILDREN);
     }
 
-    if (selected_decoded_audio(decoder_context, stream_index) >= 0) {
+    if ((i = selected_decoded_audio(decoder_context, stream_index)) >= 0) {
         if (!(params->xc_type & xc_audio)) {
-            elv_err("Failed to set encoder options, stream_index=%d, xc_type=%d, url=%s",
+            elv_err("Failed to set audio encoder options, stream_index=%d, xc_type=%d, url=%s",
                 stream_index, params->xc_type, params->url);
             return eav_param;
         }
-        av_opt_set_int(encoder_context->format_context2->priv_data, "start_fragment_index", params->start_fragment_index,
+        av_opt_set_int(encoder_context->format_context2[i]->priv_data, "start_fragment_index", params->start_fragment_index,
             AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_SEARCH_CHILDREN);
-        av_opt_set(encoder_context->format_context2->priv_data, "start_segment", params->start_segment_str, 0);
+        av_opt_set(encoder_context->format_context2[i]->priv_data, "start_segment", params->start_segment_str, 0);
     }
 
     if (stream_index == decoder_context->video_stream_index) {
         if (!(params->xc_type & xc_video)) {
-            elv_err("Failed to set encoder options, stream_index=%d, xc_type=%d, url=%s",
+            elv_err("Failed to set video encoder options, stream_index=%d, xc_type=%d, url=%s",
                 stream_index, params->xc_type, params->url);
             return eav_param;
         }
@@ -764,17 +767,17 @@ set_encoder_options(
                 timebase = calc_timebase(params, 1, timebase);
             seg_duration_ts = seg_duration * timebase;
         }
-        if (selected_decoded_audio(decoder_context, stream_index) >= 0) {
+        if ((i = selected_decoded_audio(decoder_context, stream_index)) >= 0) {
             if (params->audio_seg_duration_ts > 0)
                 seg_duration_ts = params->audio_seg_duration_ts;
-            av_opt_set_int(encoder_context->format_context2->priv_data, "segment_duration_ts", seg_duration_ts, 0);
+            av_opt_set_int(encoder_context->format_context2[i]->priv_data, "segment_duration_ts", seg_duration_ts, 0);
             /* If audio_seg_duration_ts is not set, set it now */
             if (params->audio_seg_duration_ts <= 0)
                 params->audio_seg_duration_ts = seg_duration_ts;
             elv_dbg("setting \"fmp4-segment\" audio segment_time to %s, seg_duration_ts=%"PRId64", url=%s",
                 params->seg_duration, seg_duration_ts, params->url);
-            av_opt_set(encoder_context->format_context2->priv_data, "reset_timestamps", "on", 0);
-        }
+            av_opt_set(encoder_context->format_context2[i]->priv_data, "reset_timestamps", "on", 0);
+        } 
         if (stream_index == decoder_context->video_stream_index) {
             if (params->video_seg_duration_ts > 0)
                 seg_duration_ts = params->video_seg_duration_ts;
@@ -790,8 +793,8 @@ set_encoder_options(
         // av_opt_set(encoder_context->format_context->priv_data, "segment_format_options", "movflags=faststart", 0);
         // So lets use flag_every_frame option instead.
         if (!strcmp(params->format, "fmp4-segment")) {
-            if (selected_decoded_audio(decoder_context, stream_index) >= 0)
-                av_opt_set(encoder_context->format_context2->priv_data, "segment_format_options", "movflags=frag_every_frame", 0);
+            if ((i = selected_decoded_audio(decoder_context, stream_index)) >= 0)
+                av_opt_set(encoder_context->format_context2[i]->priv_data, "segment_format_options", "movflags=frag_every_frame", 0);
             if (stream_index == decoder_context->video_stream_index)
                 av_opt_set(encoder_context->format_context->priv_data, "segment_format_options", "movflags=frag_every_frame", 0);
         }
@@ -1261,182 +1264,217 @@ prepare_audio_encoder(
     coderctx_t *decoder_context,
     xcparams_t *params)
 {
-    int index = decoder_context->audio_stream_index[0];
+    int n_audio = encoder_context->n_audio_output;
     char *ecodec;
     AVFormatContext *format_context;
     int rc;
 
-    if (index < 0) {
-        elv_dbg("No audio stream detected by decoder.");
-        return eav_stream_index;
+    if (params->xc_type == xc_audio_merge ||
+        params->xc_type == xc_audio_join ||
+        params->xc_type == xc_audio_pan) {
+        // Only we have one output audio in these cases
+        n_audio = 1;
     }
 
-    if (!decoder_context->codec_context[index]) {
-        elv_err("Decoder codec context is NULL! index=%d, url=%s", index, params->url);
-        return eav_codec_context;
-    }
+    for (int i=0; i<n_audio; i++) {
+        int stream_index = decoder_context->audio_stream_index[i];
+        int output_stream_index = stream_index;
 
-    /* If there are more than 1 audio stream do encode, we can't do bypass */
-    if (params && params->bypass_transcoding && decoder_context->n_audio > 1) {
-        elv_err("Can not bypass multiple audio streams, n_audio=%d, url=%s", decoder_context->n_audio, params->url);
-        return eav_num_streams;
-    }
+        if (params->xc_type == xc_audio_merge ||
+            params->xc_type == xc_audio_join ||
+            params->xc_type == xc_audio_pan) {
+            // Only we have one output audio in these cases
+            output_stream_index = 0;
+        }
 
-    format_context = encoder_context->format_context2;
-    ecodec = params->ecodec2;
-    encoder_context->audio_last_dts = AV_NOPTS_VALUE;
+        if (stream_index < 0) {
+            elv_dbg("No audio stream detected by decoder.");
+            return eav_stream_index;
+        }
 
-    encoder_context->audio_stream_index[0] = index;
-    encoder_context->n_audio = 1;
+        if (!decoder_context->codec_context[stream_index]) {
+            elv_err("Decoder codec context is NULL! stream_index=%d, url=%s", stream_index, params->url);
+            return eav_codec_context;
+        }
 
-    encoder_context->stream[index] = avformat_new_stream(format_context, NULL);
-    if (params->bypass_transcoding)
-        encoder_context->codec[index] = avcodec_find_encoder(decoder_context->codec_context[index]->codec_id);
-    else
-        encoder_context->codec[index] = avcodec_find_encoder_by_name(ecodec);
-    if (!encoder_context->codec[index]) {
-        elv_err("Codec not found, codec_id=%s, url=%s",
-            avcodec_get_name(decoder_context->codec_context[index]->codec_id), params->url);
-        return eav_codec_context;
-    }
+        /* If there are more than 1 audio stream do encode, we can't do bypass */
+        if (params && params->bypass_transcoding && decoder_context->n_audio > 1) {
+            elv_err("Can not bypass multiple audio streams, n_audio=%d, url=%s", decoder_context->n_audio, params->url);
+            return eav_num_streams;
+        }
 
-    format_context->io_open = elv_io_open;
-    format_context->io_close = elv_io_close;
+        format_context = encoder_context->format_context2[i];
+        ecodec = params->ecodec2;
+        encoder_context->audio_last_dts[i] = AV_NOPTS_VALUE;
 
-    encoder_context->codec_context[index] = avcodec_alloc_context3(encoder_context->codec[index]);
+        encoder_context->audio_stream_index[output_stream_index] = output_stream_index;
+        encoder_context->n_audio = 1;
 
-    /* By default use decoder parameters */
-    encoder_context->codec_context[index]->sample_rate = decoder_context->codec_context[index]->sample_rate;
+        encoder_context->stream[output_stream_index] = avformat_new_stream(format_context, NULL);
+        if (params->bypass_transcoding)
+            encoder_context->codec[output_stream_index] = avcodec_find_encoder(decoder_context->codec_context[stream_index]->codec_id);
+        else
+            encoder_context->codec[output_stream_index] = avcodec_find_encoder_by_name(ecodec);
+        if (!encoder_context->codec[output_stream_index]) {
+            elv_err("Codec not found, codec_id=%s, url=%s",
+                avcodec_get_name(decoder_context->codec_context[stream_index]->codec_id), params->url);
+            return eav_codec_context;
+        }
 
-    /* Set the default time_base based on input sample_rate */
-    encoder_context->codec_context[index]->time_base = (AVRational){1, encoder_context->codec_context[index]->sample_rate};
-    encoder_context->stream[index]->time_base = encoder_context->codec_context[index]->time_base;
+        format_context->io_open = elv_io_open;
+        format_context->io_close = elv_io_close;
 
-    if (decoder_context->codec[index] && 
-        decoder_context->codec[index]->sample_fmts && params->bypass_transcoding)
-        encoder_context->codec_context[index]->sample_fmt = decoder_context->codec[index]->sample_fmts[0];
-    else if (encoder_context->codec[index]->sample_fmts && encoder_context->codec[index]->sample_fmts[0])
-        encoder_context->codec_context[index]->sample_fmt = encoder_context->codec[index]->sample_fmts[0];
-    else
-        encoder_context->codec_context[index]->sample_fmt = AV_SAMPLE_FMT_FLTP;
+        encoder_context->codec_context[output_stream_index] = avcodec_alloc_context3(encoder_context->codec[output_stream_index]);
 
-    if (params->channel_layout > 0)
-        encoder_context->codec_context[index]->channel_layout = params->channel_layout;
-    else
-        /* If the input stream is stereo the decoder_context->codec_context[index]->channel_layout is AV_CH_LAYOUT_STEREO */
-        encoder_context->codec_context[index]->channel_layout =
-            get_channel_layout_for_encoder(decoder_context->codec_context[index]->channel_layout);
-    encoder_context->codec_context[index]->channels = av_get_channel_layout_nb_channels(encoder_context->codec_context[index]->channel_layout);
+        /* By default use decoder parameters */
+        encoder_context->codec_context[output_stream_index]->sample_rate = decoder_context->codec_context[stream_index]->sample_rate;
 
-    const char *channel_name = avpipe_channel_name(
-                                av_get_channel_layout_nb_channels(encoder_context->codec_context[index]->channel_layout),
-                                decoder_context->codec_context[index]->channel_layout);
+        /* Set the default time_base based on input sample_rate */
+        encoder_context->codec_context[output_stream_index]->time_base = (AVRational){1, encoder_context->codec_context[output_stream_index]->sample_rate};
+        encoder_context->stream[output_stream_index]->time_base = encoder_context->codec_context[output_stream_index]->time_base;
 
-    /* If decoder channel layout is DOWNMIX and params->ecodec == "aac" and channel_layout is not set
-     * then set the channel layout to STEREO. Preserve the channel layout otherwise.
-     */
-    if (decoder_context->codec_context[index]->channel_layout == AV_CH_LAYOUT_STEREO_DOWNMIX &&
-        !strcmp(ecodec, "aac") &&
-        !params->channel_layout) {
-        /* This encoder is prepared specifically for AAC, therefore set the channel layout to AV_CH_LAYOUT_STEREO */
-        encoder_context->codec_context[index]->channels = av_get_channel_layout_nb_channels(AV_CH_LAYOUT_STEREO);
-        encoder_context->codec_context[index]->channel_layout = AV_CH_LAYOUT_STEREO;    // AV_CH_LAYOUT_STEREO is av_get_default_channel_layout(encoder_context->codec_context[index]->channels)
-    }
+        if (decoder_context->codec[stream_index] && 
+            decoder_context->codec[stream_index]->sample_fmts && params->bypass_transcoding)
+            encoder_context->codec_context[output_stream_index]->sample_fmt = decoder_context->codec[stream_index]->sample_fmts[0];
+        else if (encoder_context->codec[output_stream_index]->sample_fmts && encoder_context->codec[output_stream_index]->sample_fmts[0])
+            encoder_context->codec_context[output_stream_index]->sample_fmt = encoder_context->codec[output_stream_index]->sample_fmts[0];
+        else
+            encoder_context->codec_context[output_stream_index]->sample_fmt = AV_SAMPLE_FMT_FLTP;
 
-    int sample_rate = params->sample_rate;
-    if (!strcmp(ecodec, "aac") &&
-        !is_valid_aac_sample_rate(encoder_context->codec_context[index]->sample_rate) &&
-        sample_rate <= 0)
-        sample_rate = DEFAULT_ACC_SAMPLE_RATE;
+        if (params->channel_layout > 0)
+            encoder_context->codec_context[output_stream_index]->channel_layout = params->channel_layout;
+        else
+            /* If the input stream is stereo the decoder_context->codec_context[index]->channel_layout is AV_CH_LAYOUT_STEREO */
+            encoder_context->codec_context[output_stream_index]->channel_layout =
+                get_channel_layout_for_encoder(decoder_context->codec_context[stream_index]->channel_layout);
+        encoder_context->codec_context[output_stream_index]->channels = av_get_channel_layout_nb_channels(encoder_context->codec_context[output_stream_index]->channel_layout);
 
-    /*
-     *  If sample_rate is set and
-     *      - encoder is not "aac" or
-     *      - if encoder is "aac" and encoder sample_rate is not valid and transcoding is pan/merge/join
-     *  then
-     *      - set encoder sample_rate to the specified sample_rate.
-     */
-    if (sample_rate > 0 &&
-        (strcmp(ecodec, "aac") || !is_valid_aac_sample_rate(encoder_context->codec_context[index]->sample_rate))) {
-        /*
-         * Audio resampling, which is active for aac encoder, needs more work to adjust sampling properly
-         * when input sample rate is different from output sample rate. (--RM)
+        const char *channel_name = avpipe_channel_name(
+                                av_get_channel_layout_nb_channels(encoder_context->codec_context[output_stream_index]->channel_layout),
+                                decoder_context->codec_context[stream_index]->channel_layout);
+
+        /* If decoder channel layout is DOWNMIX and params->ecodec == "aac" and channel_layout is not set
+         * then set the channel layout to STEREO. Preserve the channel layout otherwise.
          */
-        encoder_context->codec_context[index]->sample_rate = sample_rate;
+        if (decoder_context->codec_context[stream_index]->channel_layout == AV_CH_LAYOUT_STEREO_DOWNMIX &&
+            !strcmp(ecodec, "aac") &&
+            !params->channel_layout) {
+            /* This encoder is prepared specifically for AAC, therefore set the channel layout to AV_CH_LAYOUT_STEREO */
+            encoder_context->codec_context[output_stream_index]->channels = av_get_channel_layout_nb_channels(AV_CH_LAYOUT_STEREO);
+            encoder_context->codec_context[output_stream_index]->channel_layout = AV_CH_LAYOUT_STEREO;    // AV_CH_LAYOUT_STEREO is av_get_default_channel_layout(encoder_context->codec_context[index]->channels)
+        }
 
-        /* Update timebase for the new sample rate */
-        encoder_context->codec_context[index]->time_base = (AVRational){1, sample_rate};
-        encoder_context->stream[index]->time_base = (AVRational){1, sample_rate};
-    }
+        int sample_rate = params->sample_rate;
+        if (!strcmp(ecodec, "aac") &&
+            !is_valid_aac_sample_rate(encoder_context->codec_context[output_stream_index]->sample_rate) &&
+            sample_rate <= 0)
+            sample_rate = DEFAULT_ACC_SAMPLE_RATE;
 
-    elv_dbg("ENCODER channels=%d, channel_layout=%d (%s), sample_fmt=%s, sample_rate=%d",
-        encoder_context->codec_context[index]->channels,
-        encoder_context->codec_context[index]->channel_layout,
-        avpipe_channel_layout_name(encoder_context->codec_context[index]->channel_layout),
-        av_get_sample_fmt_name(encoder_context->codec_context[index]->sample_fmt),
-        encoder_context->codec_context[index]->sample_rate);
+        /*
+         *  If sample_rate is set and
+         *      - encoder is not "aac" or
+         *      - if encoder is "aac" and encoder sample_rate is not valid and transcoding is pan/merge/join
+         *  then
+         *      - set encoder sample_rate to the specified sample_rate.
+         */
+        if (sample_rate > 0 &&
+            (strcmp(ecodec, "aac") || !is_valid_aac_sample_rate(encoder_context->codec_context[output_stream_index]->sample_rate))) {
+            /*
+             * Audio resampling, which is active for aac encoder, needs more work to adjust sampling properly
+             * when input sample rate is different from output sample rate. (--RM)
+             */
+            encoder_context->codec_context[output_stream_index]->sample_rate = sample_rate;
+    
+            /* Update timebase for the new sample rate */
+            encoder_context->codec_context[output_stream_index]->time_base = (AVRational){1, sample_rate};
+            encoder_context->stream[output_stream_index]->time_base = (AVRational){1, sample_rate};
+        }
 
-    encoder_context->codec_context[index]->bit_rate = params->audio_bitrate;
+        elv_dbg("ENCODER channels=%d, channel_layout=%d (%s), sample_fmt=%s, sample_rate=%d",
+            encoder_context->codec_context[output_stream_index]->channels,
+            encoder_context->codec_context[output_stream_index]->channel_layout,
+            avpipe_channel_layout_name(encoder_context->codec_context[output_stream_index]->channel_layout),
+            av_get_sample_fmt_name(encoder_context->codec_context[output_stream_index]->sample_fmt),
+            encoder_context->codec_context[output_stream_index]->sample_rate);
 
-    /* Allow the use of the experimental AAC encoder. */
-    encoder_context->codec_context[index]->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
+        encoder_context->codec_context[output_stream_index]->bit_rate = params->audio_bitrate;
 
-    rc = set_encoder_options(encoder_context, decoder_context, params, encoder_context->audio_stream_index[0],
-        encoder_context->stream[encoder_context->audio_stream_index[0]]->time_base.den);
-    if (rc < 0) {
-        elv_err("Failed to set audio encoder options, url=%s", params->url);
-        return rc;
-    }
+        /* Allow the use of the experimental AAC encoder. */
+        encoder_context->codec_context[output_stream_index]->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
 
-    AVCodecContext *encoder_codec_context = encoder_context->codec_context[index];
-    /* Some container formats (like MP4) require global headers to be present.
-     * Mark the encoder so that it behaves accordingly. */
-    if (format_context->oformat->flags & AVFMT_GLOBALHEADER)
-        encoder_codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
+        rc = set_encoder_options(encoder_context, decoder_context, params, decoder_context->audio_stream_index[i],
+            encoder_context->stream[output_stream_index]->time_base.den);
+        if (rc < 0) {
+            elv_err("Failed to set audio encoder options, url=%s", params->url);
+            return rc;
+        }
 
-    /* Open audio encoder codec */
-    if (avcodec_open2(encoder_context->codec_context[index], encoder_context->codec[index], NULL) < 0) {
-        elv_dbg("Could not open encoder for audio, index=%d", index);
-        return eav_open_codec;
-    }
+        AVCodecContext *encoder_codec_context = encoder_context->codec_context[output_stream_index];
+        /* Some container formats (like MP4) require global headers to be present.
+         * Mark the encoder so that it behaves accordingly. */
+        if (format_context->oformat->flags & AVFMT_GLOBALHEADER)
+            encoder_codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
 
-    elv_dbg("encoder audio stream index=%d, bitrate=%d, sample_fmts=%s, timebase=%d, output frame_size=%d, sample_rate=%d, channel_layout=%s",
-        index, encoder_context->codec_context[index]->bit_rate,
-        av_get_sample_fmt_name(encoder_context->codec_context[index]->sample_fmt),
-        encoder_context->codec_context[index]->time_base.den, encoder_context->codec_context[index]->frame_size,
-        encoder_context->codec_context[index]->sample_rate,
-	channel_name);
+        /* Open audio encoder codec */
+        if (avcodec_open2(encoder_context->codec_context[output_stream_index], encoder_context->codec[output_stream_index], NULL) < 0) {
+            elv_dbg("Could not open encoder for audio, stream_index=%d", stream_index);
+            return eav_open_codec;
+        }
 
-    if (avcodec_parameters_from_context(
-            encoder_context->stream[index]->codecpar,
-            encoder_context->codec_context[index]) < 0) {
-        elv_err("Failed to copy encoder parameters to output stream, url=%s", params->url);
-        return eav_codec_param;
+        elv_dbg("encoder audio stream index=%d, bitrate=%d, sample_fmts=%s, timebase=%d, output frame_size=%d, sample_rate=%d, channel_layout=%s",
+            index, encoder_context->codec_context[output_stream_index]->bit_rate,
+            av_get_sample_fmt_name(encoder_context->codec_context[output_stream_index]->sample_fmt),
+            encoder_context->codec_context[output_stream_index]->time_base.den, encoder_context->codec_context[output_stream_index]->frame_size,
+            encoder_context->codec_context[output_stream_index]->sample_rate,
+    	channel_name);
 
-    }
+        if (avcodec_parameters_from_context(
+            encoder_context->stream[output_stream_index]->codecpar,
+            encoder_context->codec_context[output_stream_index]) < 0) {
+            elv_err("Failed to copy encoder parameters to output stream, url=%s", params->url);
+            return eav_codec_param;
+
+        }
 
 #ifdef USE_RESAMPLE_AAC
-    if (!strcmp(ecodec, "aac") &&
-        params->xc_type & xc_audio &&
-        params->xc_type != xc_audio_merge &&
-        params->xc_type != xc_audio_join &&
-        params->xc_type != xc_audio_pan) {
-        init_resampler(decoder_context->codec_context[index], encoder_context->codec_context[index],
+        if (!strcmp(ecodec, "aac") &&
+            params->xc_type & xc_audio &&
+            params->xc_type != xc_audio_merge &&
+            params->xc_type != xc_audio_join &&
+            params->xc_type != xc_audio_pan) {
+            init_resampler(decoder_context->codec_context[stream_index], encoder_context->codec_context[output_stream_index],
                        &decoder_context->resampler_context);
 
-        /* Create the FIFO buffer based on the specified output sample format. */
-        if (!(decoder_context->fifo = av_audio_fifo_alloc(encoder_context->codec_context[index]->sample_fmt,
-                encoder_context->codec_context[index]->channels, 1))) {
-            elv_err("Failed to allocate audio FIFO, url=%s", params->url);
-            return eav_mem_alloc;
+            /* Create the FIFO buffer based on the specified output sample format. */
+            if (!(decoder_context->fifo = av_audio_fifo_alloc(encoder_context->codec_context[output_stream_index]->sample_fmt,
+                    encoder_context->codec_context[index]->channels, 1))) {
+                elv_err("Failed to allocate audio FIFO, url=%s", params->url);
+                return eav_mem_alloc;
+            }
         }
-    }
 #endif
 
-    encoder_context->audio_enc_stream_index = index;
+        //encoder_context->audio_enc_stream_index[i] = stream_index; CLEAN
+    }
+
     return 0;
 }
 
+static int
+num_audio_output(
+    coderctx_t *decoder_context,
+    xcparams_t *params)
+{
+    int n_decoder_auido = decoder_context ? decoder_context->n_audio : 0;
+    if (!params)
+        return 0;
+
+    if (params->xc_type == xc_audio_merge || params->xc_type == xc_audio_join || params->xc_type == xc_audio_pan)
+        return 1;
+
+    return params->n_audio > 0 ? params->n_audio : n_decoder_auido;
+}
+
 static int
 prepare_encoder(
     coderctx_t *encoder_context,
@@ -1498,10 +1536,18 @@ prepare_encoder(
         }
     }
     if (params->xc_type & xc_audio) {
-        avformat_alloc_output_context2(&encoder_context->format_context2, NULL, format, filename2);
-        if (!encoder_context->format_context2) {
-            elv_dbg("could not allocate memory for audio output format");
-            return eav_codec_context;
+        encoder_context->n_audio_output = num_audio_output(decoder_context, params);
+        for (int i=0; i<encoder_context->n_audio_output; i++) {
+            if (!strcmp(params->format, "hls") || !strcmp(params->format, "dash")) {
+                avformat_alloc_output_context2(&encoder_context->format_context2[i], NULL, format, filename2);
+            } else {
+                snprintf(encoder_context->filename2[i], MAX_AVFILENAME_LEN, "fsegment-audio%d-%s.mp4", i, "%05d");
+                avformat_alloc_output_context2(&encoder_context->format_context2[i], NULL, format, encoder_context->filename2[i]);
+            }
+            if (!encoder_context->format_context2[i]) {
+                elv_dbg("could not allocate memory for audio output format stream_index=%d", params->audio_index[i]);
+                return eav_codec_context;
+            }
         }
     }
 
@@ -1527,16 +1573,18 @@ prepare_encoder(
                            "hls_enc_key_url", params->crypt_key_url, 0);
         }
         if (params->xc_type & xc_audio) {
-            av_opt_set(encoder_context->format_context2->priv_data, "hls_enc", "1", 0);
-            if (params->crypt_iv != NULL)
-                av_opt_set(encoder_context->format_context2->priv_data, "hls_enc_iv",
-                           params->crypt_iv, 0);
-            if (params->crypt_key != NULL)
-                av_opt_set(encoder_context->format_context2->priv_data,
-                           "hls_enc_key", params->crypt_key, 0);
-            if (params->crypt_key_url != NULL)
-                av_opt_set(encoder_context->format_context2->priv_data,
-                           "hls_enc_key_url", params->crypt_key_url, 0);
+            for (int i=0; i<encoder_context->n_audio_output; i++) {
+                av_opt_set(encoder_context->format_context2[i]->priv_data, "hls_enc", "1", 0);
+                if (params->crypt_iv != NULL)
+                    av_opt_set(encoder_context->format_context2[i]->priv_data, "hls_enc_iv",
+                        params->crypt_iv, 0);
+                if (params->crypt_key != NULL)
+                    av_opt_set(encoder_context->format_context2[i]->priv_data,
+                       "hls_enc_key", params->crypt_key, 0);
+                if (params->crypt_key_url != NULL)
+                    av_opt_set(encoder_context->format_context2[i]->priv_data,
+                       "hls_enc_key_url", params->crypt_key_url, 0);
+            }
         }
         break;
     case crypt_cenc:
@@ -1548,10 +1596,12 @@ prepare_encoder(
                        "encryption_scheme", "cenc-aes-ctr", 0);
         }
         if (params->xc_type & xc_audio) {
-            av_opt_set(encoder_context->format_context2->priv_data,
+            for (int i=0; i<encoder_context->n_audio_output; i++) {
+                av_opt_set(encoder_context->format_context2[i]->priv_data,
                       "encryption_scheme", "cenc", 0);
-            av_opt_set(encoder_context->format_context2->priv_data,
+                av_opt_set(encoder_context->format_context2[i]->priv_data,
                        "encryption_scheme", "cenc-aes-ctr", 0);
+            }
         }
         break;
     case crypt_cbc1:
@@ -1562,10 +1612,12 @@ prepare_encoder(
                        "encryption_scheme", "cenc-aes-cbc", 0);
         }
         if (params->xc_type & xc_audio) {
-            av_opt_set(encoder_context->format_context2->priv_data,
+            for (int i=0; i<encoder_context->n_audio_output; i++) {
+                av_opt_set(encoder_context->format_context2[i]->priv_data,
                        "encryption_scheme", "cbc1", 0);
-            av_opt_set(encoder_context->format_context2->priv_data,
+                av_opt_set(encoder_context->format_context2[i]->priv_data,
                        "encryption_scheme", "cenc-aes-cbc", 0);
+            }
         }
         break;
     case crypt_cens:
@@ -1576,10 +1628,12 @@ prepare_encoder(
                        "encryption_scheme", "cenc-aes-ctr-pattern", 0);
         }
         if (params->xc_type & xc_audio) {
-            av_opt_set(encoder_context->format_context2->priv_data,
+            for (int i=0; i<encoder_context->n_audio_output; i++) {
+                av_opt_set(encoder_context->format_context2[i]->priv_data,
                        "encryption_scheme", "cens", 0);
-            av_opt_set(encoder_context->format_context2->priv_data,
+                av_opt_set(encoder_context->format_context2[i]->priv_data,
                        "encryption_scheme", "cenc-aes-ctr-pattern", 0);
+            }
         }
         break;
     case crypt_cbcs:
@@ -1594,14 +1648,16 @@ prepare_encoder(
                 params->crypt_iv, 0);
         }
         if (params->xc_type & xc_audio) {
-            av_opt_set(encoder_context->format_context2->priv_data,
+            for (int i=0; i<encoder_context->n_audio_output; i++) {
+                av_opt_set(encoder_context->format_context2[i]->priv_data,
                        "encryption_scheme", "cbcs", 0);
-            av_opt_set(encoder_context->format_context2->priv_data,
+                av_opt_set(encoder_context->format_context2[i]->priv_data,
                        "encryption_scheme", "cenc-aes-cbc-pattern", 0);
-            av_opt_set(encoder_context->format_context2->priv_data, "encryption_iv",
-                params->crypt_iv, 0);
-            av_opt_set(encoder_context->format_context2->priv_data, "hls_enc_iv",        /* To remove */
-                params->crypt_iv, 0);
+                av_opt_set(encoder_context->format_context2[i]->priv_data, "encryption_iv",
+                        params->crypt_iv, 0);
+                av_opt_set(encoder_context->format_context2[i]->priv_data, "hls_enc_iv",        /* To remove */
+                        params->crypt_iv, 0);
+            }
         }
         break;
     case crypt_none:
@@ -1622,10 +1678,12 @@ prepare_encoder(
                        params->crypt_key, 0);
         }
         if (params->xc_type & xc_audio) {
-            av_opt_set(encoder_context->format_context2->priv_data, "encryption_kid",
+            for (int i=0; i<encoder_context->n_audio_output; i++) {
+                av_opt_set(encoder_context->format_context2[i]->priv_data, "encryption_kid",
                        params->crypt_kid, 0);
-            av_opt_set(encoder_context->format_context2->priv_data, "encryption_key",
+                av_opt_set(encoder_context->format_context2[i]->priv_data, "encryption_key",
                        params->crypt_key, 0);
+            }
         }
     default:
         break;
@@ -1639,7 +1697,8 @@ prepare_encoder(
     }
 
     if (params->xc_type & xc_audio) {
-        encoder_context->audio_enc_stream_index = -1;
+        //for (int i=0; i<MAX_STREAMS; i++) CLEAN
+        //    encoder_context->audio_enc_stream_index[i] = -1;
         if ((rc = prepare_audio_encoder(encoder_context, decoder_context, params)) != eav_success) {
             elv_err("Failure in preparing audio encoder, rc=%d, url=%s", rc, params->url);
             return rc;
@@ -1647,36 +1706,42 @@ prepare_encoder(
     }
 
     /*
-     * Allocate an array of 2 out_handler_t: one for video and one for audio output stream.
-     * TODO: needs to allocate up to number of streams when transcoding multiple streams at the same time (RM)
+     * Allocate an array of MAX_STREAMS out_handler_t: one for video and one for each audio output stream.
+     * Needs to allocate up to number of streams when transcoding multiple streams at the same time.
      */
     if (params->xc_type & xc_video) {
-        out_tracker = (out_tracker_t *) calloc(2, sizeof(out_tracker_t));
-        out_tracker[0].out_handlers = out_tracker[1].out_handlers = out_handlers;
-        out_tracker[0].inctx = out_tracker[1].inctx = inctx;
-        out_tracker[0].video_stream_index = out_tracker[1].video_stream_index = decoder_context->video_stream_index;
-        out_tracker[0].audio_stream_index = out_tracker[1].audio_stream_index = decoder_context->audio_stream_index[0];
-        out_tracker[0].seg_index = out_tracker[1].seg_index = atoi(params->start_segment_str);
-        out_tracker[0].encoder_ctx = out_tracker[1].encoder_ctx = encoder_context;
-        out_tracker[0].xc_type = out_tracker[1].xc_type = xc_video;
+        out_tracker = (out_tracker_t *) calloc(MAX_STREAMS, sizeof(out_tracker_t));
+        out_tracker[0].out_handlers = out_handlers;
+        out_tracker[0].inctx = inctx;
+        out_tracker[0].video_stream_index = decoder_context->video_stream_index;
+        out_tracker[0].audio_stream_index = decoder_context->audio_stream_index[0];
+        out_tracker[0].seg_index = atoi(params->start_segment_str);
+        out_tracker[0].encoder_ctx = encoder_context;
+        out_tracker[0].xc_type = xc_video;
         encoder_context->format_context->avpipe_opaque = out_tracker;
     }
 
     if (params->xc_type & xc_audio) {
-        out_tracker = (out_tracker_t *) calloc(2, sizeof(out_tracker_t));
-        out_tracker[0].out_handlers = out_tracker[1].out_handlers = out_handlers;
-        out_tracker[0].inctx = out_tracker[1].inctx = inctx;
-        out_tracker[0].video_stream_index = out_tracker[1].video_stream_index = decoder_context->video_stream_index;
-        out_tracker[0].audio_stream_index = out_tracker[1].audio_stream_index = decoder_context->audio_stream_index[0];
-        out_tracker[0].seg_index = out_tracker[1].seg_index = atoi(params->start_segment_str);
-        out_tracker[0].encoder_ctx = out_tracker[1].encoder_ctx = encoder_context;
-        out_tracker[0].xc_type = out_tracker[1].xc_type = xc_audio;
-        encoder_context->format_context2->avpipe_opaque = out_tracker;
+        for (int j=0; j<encoder_context->n_audio_output; j++) {
+            out_tracker = (out_tracker_t *) calloc(MAX_STREAMS, sizeof(out_tracker_t));
+            for (int i=0; i<encoder_context->n_audio_output; i++) {
+                out_tracker[i].out_handlers = out_handlers;
+                out_tracker[i].inctx = inctx;
+                out_tracker[i].video_stream_index = decoder_context->video_stream_index;
+                out_tracker[i].audio_stream_index = decoder_context->audio_stream_index[i];
+                out_tracker[i].seg_index = atoi(params->start_segment_str);
+                out_tracker[i].encoder_ctx = encoder_context;
+                out_tracker[i].xc_type = xc_audio;
+            }
+            encoder_context->format_context2[j]->avpipe_opaque = out_tracker;
+        }
     }
 
     dump_encoder(inctx->url, encoder_context->format_context, params);
     dump_codec_context(encoder_context->codec_context[encoder_context->video_stream_index]);
-    dump_encoder(inctx->url, encoder_context->format_context2, params);
+    for (int i=0; i<encoder_context->n_audio_output; i++) {
+        dump_encoder(inctx->url, encoder_context->format_context2[i], params);
+    }
     dump_codec_context(encoder_context->codec_context[encoder_context->audio_stream_index[0]]);
 
     return 0;
@@ -1837,12 +1902,12 @@ should_skip_encoding(
             url = decoder_context->inctx->url;
         elv_warn("ENCODE SKIP invalid frame, stream_index=%d, url=%s, video_last_pts_read=%"PRId64", audio_last_pts_read=%"PRId64,
             stream_index, url,
-            encoder_context->video_last_pts_read, encoder_context->audio_last_pts_read);
+            encoder_context->video_last_pts_read, encoder_context->audio_last_pts_read[stream_index]);
         return 1;
     }
 
     if (selected_decoded_audio(decoder_context, stream_index) >= 0)
-        frame_in_pts_offset = frame->pts - decoder_context->audio_input_start_pts;
+        frame_in_pts_offset = frame->pts - decoder_context->audio_input_start_pts[stream_index];
     else
         frame_in_pts_offset = frame->pts - decoder_context->video_input_start_pts;
 
@@ -1899,6 +1964,7 @@ encode_frame(
     int debug_frame_level)
 {
     int ret;
+    int index = stream_index; 
     int rc = eav_success;
     AVFormatContext *format_context = encoder_context->format_context;
     AVCodecContext *codec_context = encoder_context->codec_context[stream_index];
@@ -1906,8 +1972,22 @@ encode_frame(
     avpipe_io_handler_t *out_handlers;
     ioctx_t *outctx;
 
-    if (selected_decoded_audio(decoder_context, stream_index) >= 0)
-        format_context = encoder_context->format_context2;
+    if (params->xc_type == xc_audio_merge ||
+        params->xc_type == xc_audio_join ||
+        params->xc_type == xc_audio_pan) {
+        index = 0;
+    }
+    codec_context = encoder_context->codec_context[index];
+
+    int i = -1;
+    if ((i = selected_decoded_audio(decoder_context, stream_index)) >= 0) {
+        if (params->xc_type == xc_audio_merge ||
+            params->xc_type == xc_audio_join ||
+            params->xc_type == xc_audio_pan) {
+            i = 0;
+        }
+        format_context = encoder_context->format_context2[i];
+    }
 
     int skip = should_skip_encoding(decoder_context, encoder_context, stream_index, params, frame);
     if (skip)
@@ -1924,8 +2004,8 @@ encode_frame(
 
         const char *st = stream_type_str(encoder_context, stream_index);
 
-        // Adjust PTS if input stream starts at an arbitrary value (MPEG-TS/RTMP)
-        if ( is_protocol(decoder_context) && (!strcmp(params->format, "fmp4-segment"))) {
+        // Adjust PTS if input stream starts at an arbitrary value (i.e mostly for MPEG-TS/RTMP)
+        if (!strcmp(params->format, "fmp4-segment")) {
             if (stream_index == decoder_context->video_stream_index) {
                 if (encoder_context->first_encoding_video_pts == -1) {
                     /* Remember the first video PTS to use as an offset later */
@@ -1947,22 +2027,23 @@ encode_frame(
             }
 #ifndef USE_RESAMPLE_AAC
             else if (selected_decoded_audio(decoder_context, stream_index) >= 0) {
-                if (encoder_context->first_encoding_audio_pts == -1) {
+                if (encoder_context->first_encoding_audio_pts[stream_index] == AV_NOPTS_VALUE) {
                     /* Remember the first video PTS to use as an offset later */
-                    encoder_context->first_encoding_audio_pts = frame->pts;
-                    elv_log("PTS first_encoding_audio_pts=%"PRId64" dec=%"PRId64" read=%"PRId64" stream=%d:%s",
-                        encoder_context->first_encoding_audio_pts,
-                        decoder_context->first_decoding_audio_pts,
+                    encoder_context->first_encoding_audio_pts[stream_index] = frame->pts;
+                    elv_log("PTS stream_index=%d first_encoding_audio_pts=%"PRId64" dec=%"PRId64" read=%"PRId64" stream=%d:%s",
+                        stream_index,
+                        encoder_context->first_encoding_audio_pts[stream_index],
+                        decoder_context->first_decoding_audio_pts[stream_index],
                         encoder_context->first_read_frame_pts[stream_index], params->xc_type, st);
                 }
 
                 // Adjust audio frame pts such that first frame sent to the encoder has PTS 0
                 if (frame->pts != AV_NOPTS_VALUE) {
-                    frame->pts -= encoder_context->first_encoding_audio_pts;
+                    frame->pts -= encoder_context->first_encoding_audio_pts[stream_index];
                     frame->pkt_dts = frame->pts;
                 }
                 if (frame->best_effort_timestamp != AV_NOPTS_VALUE)
-                    frame->best_effort_timestamp -= encoder_context->first_encoding_audio_pts;
+                    frame->best_effort_timestamp -= encoder_context->first_encoding_audio_pts[stream_index];
             }
 #endif
         }
@@ -1996,7 +2077,7 @@ encode_frame(
 
     if (frame) {
         if (params->xc_type & xc_audio && selected_decoded_audio(decoder_context, stream_index) >= 0)
-            encoder_context->audio_last_pts_sent_encode = frame->pts;
+            encoder_context->audio_last_pts_sent_encode[stream_index] = frame->pts;
         else if (params->xc_type & xc_video && stream_index == decoder_context->video_stream_index)
             encoder_context->video_last_pts_sent_encode = frame->pts;
     }
@@ -2054,10 +2135,10 @@ encode_frame(
         if (params->xc_type == xc_video)
             assert(output_packet->duration == 0); /* Only to notice if this ever gets set */
         if (selected_decoded_audio(decoder_context, stream_index) >= 0 && params->xc_type == xc_all) {
-            if (!output_packet->duration && encoder_context->audio_last_dts != AV_NOPTS_VALUE)
-                output_packet->duration = output_packet->dts - encoder_context->audio_last_dts;
-            encoder_context->audio_last_dts = output_packet->dts;
-            encoder_context->audio_last_pts_encoded = output_packet->pts;
+            if (!output_packet->duration && encoder_context->audio_last_dts[stream_index] != AV_NOPTS_VALUE)
+                output_packet->duration = output_packet->dts - encoder_context->audio_last_dts[stream_index];
+            encoder_context->audio_last_dts[stream_index] = output_packet->dts;
+            encoder_context->audio_last_pts_encoded[stream_index] = output_packet->pts;
         } else {
             if (!output_packet->duration && encoder_context->video_last_dts != AV_NOPTS_VALUE)
                 output_packet->duration = output_packet->dts - encoder_context->video_last_dts;
@@ -2068,7 +2149,7 @@ encode_frame(
         output_packet->pts += params->start_pts;
         output_packet->dts += params->start_pts;
 
-        if (decoder_context->is_mpegts &&
+        if ((decoder_context->is_mpegts || decoder_context->is_srt) &&
             encoder_context->video_encoder_prev_pts > 0 &&
             stream_index == decoder_context->video_stream_index &&
             encoder_context->calculated_frame_duration > 0 &&
@@ -2097,23 +2178,23 @@ encode_frame(
              params->ecodec2 != NULL &&
              !strcmp(avcodec_get_name(decoder_context->codec_parameters[stream_index]->codec_id), params->ecodec2))) &&
             (decoder_context->stream[stream_index]->time_base.den !=
-            encoder_context->stream[stream_index]->time_base.den ||
+            encoder_context->stream[index]->time_base.den ||
             decoder_context->stream[stream_index]->time_base.num !=
-            encoder_context->stream[stream_index]->time_base.num)) {
+            encoder_context->stream[index]->time_base.num)) {
             av_packet_rescale_ts(output_packet,
                 decoder_context->stream[stream_index]->time_base,
-                encoder_context->stream[stream_index]->time_base
+                encoder_context->stream[index]->time_base
             );
         }
 
         if (selected_decoded_audio(decoder_context, stream_index) >= 0) {
             /* Set the packet duration if it is not the first audio packet */
-            if (encoder_context->audio_pts != AV_NOPTS_VALUE)
-                output_packet->duration = output_packet->pts - encoder_context->audio_pts;
+            if (encoder_context->audio_pts[stream_index] != AV_NOPTS_VALUE)
+                output_packet->duration = output_packet->pts - encoder_context->audio_pts[stream_index];
             else
                 output_packet->duration = 0;
-            encoder_context->audio_pts = output_packet->pts;
-            encoder_context->audio_frames_written++;
+            encoder_context->audio_pts[stream_index] = output_packet->pts;
+            encoder_context->audio_frames_written[stream_index]++;
         } else {
             if (encoder_context->video_pts != AV_NOPTS_VALUE)
                 output_packet->duration = output_packet->pts - encoder_context->video_pts;
@@ -2162,9 +2243,9 @@ encode_frame(
             if (stream_index == decoder_context->video_stream_index)
                 outctx->total_frames_written = encoder_context->video_frames_written;
             else
-                outctx->total_frames_written = encoder_context->audio_frames_written;
+                outctx->total_frames_written = encoder_context->audio_frames_written[stream_index];
             outctx->frames_written++;
-            out_handlers->avpipe_stater(outctx, out_stat_frame_written);
+            out_handlers->avpipe_stater(outctx, stream_index, out_stat_frame_written);
         }
 
         /* mux encoded frame */
@@ -2203,9 +2284,10 @@ do_bypass(
 
     AVFormatContext *format_context;
 
-    if (is_audio)
-        format_context = encoder_context->format_context2;
-    else
+    if (is_audio) {
+        int i = selected_decoded_audio(decoder_context, packet->stream_index);
+        format_context = encoder_context->format_context2[i];
+    } else
         format_context = encoder_context->format_context;
 
     if (packet->pts == AV_NOPTS_VALUE ||
@@ -2232,9 +2314,9 @@ do_bypass(
         if (out_handlers->avpipe_stater && outctx) {
             if (is_audio) {
                 if (outctx->type != avpipe_audio_init_stream)
-                    encoder_context->audio_frames_written++;
-                encoder_context->audio_last_pts_sent_encode = packet->pts;
-                outctx->total_frames_written = encoder_context->audio_frames_written;
+                    encoder_context->audio_frames_written[packet->stream_index]++;
+                encoder_context->audio_last_pts_sent_encode[packet->stream_index] = packet->pts;
+                outctx->total_frames_written = encoder_context->audio_frames_written[packet->stream_index];
             } else {
                 if (outctx->type != avpipe_video_init_stream)
                     encoder_context->video_frames_written++;
@@ -2242,7 +2324,7 @@ do_bypass(
                 outctx->total_frames_written = encoder_context->video_frames_written;
             }
             outctx->frames_written++;
-            out_handlers->avpipe_stater(outctx, out_stat_frame_written);
+            out_handlers->avpipe_stater(outctx, packet->stream_index, out_stat_frame_written);
         }
     }
 
@@ -2282,23 +2364,28 @@ transcode_audio(
     AVFrame *frame,
     AVFrame *filt_frame,
     int stream_index,
-    xcparams_t *p,
+    xcparams_t *params,
     int debug_frame_level)
 {
     int ret;
     AVCodecContext *codec_context = decoder_context->codec_context[stream_index];
-    int audio_enc_stream_index = encoder_context->audio_enc_stream_index;
+    int audio_enc_stream_index = stream_index;
     int response;
 
 
+    if (params->xc_type == xc_audio_merge ||
+        params->xc_type == xc_audio_join ||
+        params->xc_type == xc_audio_pan)
+        audio_enc_stream_index = 0;
+
     if (debug_frame_level)
         elv_dbg("DECODE stream_index=%d send_packet pts=%"PRId64" dts=%"PRId64
             " duration=%d, input frame_size=%d, output frame_size=%d, audio_output_pts=%"PRId64,
             stream_index, packet->pts, packet->dts, packet->duration, codec_context->frame_size,
             encoder_context->codec_context[audio_enc_stream_index]->frame_size, decoder_context->audio_output_pts);
 
-    if (p->bypass_transcoding) {
-        return do_bypass(1, decoder_context, encoder_context, packet, p, debug_frame_level);
+    if (params->bypass_transcoding) {
+        return do_bypass(1, decoder_context, encoder_context, packet, params, debug_frame_level);
     }
 
     // Send the packet to the decoder
@@ -2309,7 +2396,7 @@ transcode_audio(
          * Ignore the error and continue.
          */
         elv_err("Failure while sending an audio packet to the decoder: err=%d, %s, url=%s",
-            response, av_err2str(response), p->url);
+            response, av_err2str(response), params->url);
         // Ignore the error and continue
         return eav_success;
     }
@@ -2321,24 +2408,24 @@ transcode_audio(
             break;
         } else if (response < 0) {
             elv_err("Failure while receiving a frame from the decoder: %s, url=%s",
-                av_err2str(response), p->url);
+                av_err2str(response), params->url);
             return eav_receive_frame;
         }
 
-        if (decoder_context->first_decoding_audio_pts == AV_NOPTS_VALUE) {
-            decoder_context->first_decoding_audio_pts = frame->pts;
+        if (decoder_context->first_decoding_audio_pts[stream_index] == AV_NOPTS_VALUE) {
+            decoder_context->first_decoding_audio_pts[stream_index] = frame->pts;
             avpipe_io_handler_t *in_handlers = decoder_context->in_handlers;
-            decoder_context->inctx->decoding_start_pts = decoder_context->first_decoding_audio_pts;
-            elv_log("first_decoding_audio_pts=%"PRId64,
-                decoder_context->first_decoding_audio_pts);
+            decoder_context->inctx->decoding_start_pts = decoder_context->first_decoding_audio_pts[stream_index];
+            elv_log("stream_index=%d first_decoding_audio_pts=%"PRId64,
+                stream_index, decoder_context->first_decoding_audio_pts[stream_index]);
             if (in_handlers->avpipe_stater)
-                in_handlers->avpipe_stater(decoder_context->inctx, in_stat_decoding_audio_start_pts);
+                in_handlers->avpipe_stater(decoder_context->inctx, stream_index, in_stat_decoding_audio_start_pts);
         }
 
         dump_frame(1, "IN ", codec_context->frame_number, frame, debug_frame_level);
 
-        ret = check_pts_wrapped(&decoder_context->audio_last_wrapped_pts,
-            &decoder_context->audio_last_input_pts,
+        ret = check_pts_wrapped(&decoder_context->audio_last_wrapped_pts[stream_index],
+            &decoder_context->audio_last_input_pts[stream_index],
             frame,
             stream_index);
         if (ret == eav_pts_wrapped) {
@@ -2346,34 +2433,37 @@ transcode_audio(
             return ret;
         }
 
-        decoder_context->audio_pts = packet->pts;
+        decoder_context->audio_pts[stream_index] = packet->pts;
 
         /* push the decoded frame into the filtergraph */
-        for (int i=0; i<decoder_context->n_audio; i++) {
-            /* push the decoded frame into the filtergraph */
-            if (stream_index == decoder_context->audio_stream_index[i]) {
-                if (av_buffersrc_add_frame_flags(decoder_context->audio_buffersrc_ctx[i], frame, AV_BUFFERSRC_FLAG_KEEP_REF) < 0) {
-                    elv_err("Failure in feeding into audio filtergraph source %d, url=%s", i, p->url);
-                    break;
-                }
+        int i = selected_decoded_audio(decoder_context, stream_index);
+        if (i >= 0) {
+            if (av_buffersrc_add_frame_flags(decoder_context->audio_buffersrc_ctx[i], frame, AV_BUFFERSRC_FLAG_KEEP_REF) < 0) {
+                elv_err("Failure in feeding into audio filtergraph source %d, url=%s", i, params->url);
+                break;
             }
         }
 
         /* pull filtered frames from the filtergraph */
         while (1) {
-            ret = av_buffersink_get_frame(decoder_context->audio_buffersink_ctx, filt_frame);
+            /* For audio join, merge or pan there is only one buffer sink (0) */
+            if (params->xc_type == xc_audio_join ||
+                params->xc_type == xc_audio_merge ||
+                params->xc_type == xc_audio_pan)
+                i = 0;
+            ret = av_buffersink_get_frame(decoder_context->audio_buffersink_ctx[i], filt_frame);
             if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
                 //elv_dbg("av_buffersink_get_frame() ret=EAGAIN");
                 break;
             }
 
             if (ret < 0) {
-                elv_err("Failed to execute audio frame filter ret=%d, url=%s", ret, p->url);
+                elv_err("Failed to execute audio frame filter ret=%d, url=%s", ret, params->url);
                 return eav_receive_filter_frame;
             }
 
             dump_frame(1, "FILT ", codec_context->frame_number, filt_frame, debug_frame_level);
-            ret = encode_frame(decoder_context, encoder_context, filt_frame, encoder_context->audio_enc_stream_index, p, debug_frame_level);
+            ret = encode_frame(decoder_context, encoder_context, filt_frame, packet->stream_index, params, debug_frame_level);
             av_frame_unref(filt_frame);
             if (ret == eav_write_frame) {
                 av_frame_unref(frame);
@@ -2437,20 +2527,20 @@ transcode_audio_aac(
             return eav_receive_frame;
         }
 
-        if (decoder_context->first_decoding_audio_pts == AV_NOPTS_VALUE) {
-            decoder_context->first_decoding_audio_pts = frame->pts;
+        if (decoder_context->first_decoding_audio_pts[stream_index] == AV_NOPTS_VALUE) {
+            decoder_context->first_decoding_audio_pts[stream_index] = frame->pts;
             avpipe_io_handler_t *in_handlers = decoder_context->in_handlers;
-            decoder_context->inctx->decoding_start_pts = decoder_context->first_decoding_audio_pts;
-            elv_log("first_decoding_audio_pts=%"PRId64,
-                decoder_context->first_decoding_audio_pts);
+            decoder_context->inctx->decoding_start_pts = decoder_context->first_decoding_audio_pts[stream_index];
+            elv_log("stream_index=%d first_decoding_audio_pts=%"PRId64,
+                stream_index, decoder_context->first_decoding_audio_pts);
             if (in_handlers->avpipe_stater)
-                in_handlers->avpipe_stater(decoder_context->inctx, in_stat_decoding_audio_start_pts);
+                in_handlers->avpipe_stater(decoder_context->inctx, stream_index, in_stat_decoding_audio_start_pts);
         }
 
         dump_frame(1, "IN ", codec_context->frame_number, frame, debug_frame_level);
 
-        ret = check_pts_wrapped(&decoder_context->audio_last_wrapped_pts,
-            &decoder_context->audio_last_input_pts,
+        ret = check_pts_wrapped(&decoder_context->audio_last_wrapped_pts[stream_index],
+            &decoder_context->audio_last_input_pts[stream_index],
             frame,
             stream_index);
         if (ret == eav_pts_wrapped) {
@@ -2458,9 +2548,6 @@ transcode_audio_aac(
             return ret;
         }
 
-        if (decoder_context->audio_input_prev_pts < 0)
-            decoder_context->audio_input_prev_pts = frame->pts;
-
         decoder_context->audio_pts = packet->pts;
         /* Temporary storage for the converted input samples. */
         uint8_t **converted_input_samples = NULL;
@@ -2507,38 +2594,7 @@ transcode_audio_aac(
             }
 
             int64_t d;
-            // TODO this handles packet loss but not irregular PTS deltas that are not equal to pkt_duration
-            // If audio frames have irregular PTS the code will produce incorrect results - disabled by default
-            if (p->audio_fill_gap &&
-                (decoder_context->is_mpegts && frame->pts - decoder_context->audio_input_prev_pts > frame->pkt_duration)) {
-                /*
-                 * float pkt_ratio = ((float)(encoder_context->codec_context[stream_index]->sample_rate * frame->pkt_duration)) /
-                 *                    (((float) decoder_context->stream[stream_index]->time_base.den) * filt_frame->nb_samples);
-                 * pkt_ratio shows the transcoding ratio of output frames (packets) to input frames (packets).
-                 * For example, if input timebase is 90000 with pkt_duration = 2880,
-                 * and output sample rate is 48000 with frame duration = 1024 then pkt_ratio = 3/2 that means
-                 * for every 2 input audio frames, there would be 3 output audio frame.
-                 * Now to calculate output packet pts from input packet pts:
-                 * output_pkt_pts = decoder_context->audio_output_pts + d
-                 * where d = ((float) (frame->pts - decoder_context->audio_input_prev_pts) / frame->pkt_duration) * pkt_ratio * filt_frame->nb_samples
-                 * After simplification we will have d as follows:
-                 */
-                d = (frame->pts - decoder_context->audio_input_prev_pts) * (encoder_context->codec_context[stream_index]->time_base.den) /
-                        decoder_context->stream[stream_index]->time_base.den;
-
-                /* Round up d to nearest multiple of output frame size */
-                d = ((d+output_frame_size-1)/output_frame_size)*output_frame_size;
-                elv_warn("AUDIO JUMP from=%"PRId64", to=%"PRId64", frame->pts=%"PRId64", audio_input_prev_pts=%"PRId64", pkt_duration=%d",
-                    decoder_context->audio_output_pts,
-                    decoder_context->audio_output_pts + d,
-                    frame->pts,
-                    decoder_context->audio_input_prev_pts,
-                    frame->pkt_duration);
-            } else {
-                d = output_frame_size;
-            }
-
-            decoder_context->audio_input_prev_pts = frame->pts;
+            d = output_frame_size;
 
             while (d > 0) {
                 /* When using FIFO frames no longer have PTS */
@@ -2548,7 +2604,7 @@ transcode_audio_aac(
                     decoder_context->audio_duration = filt_frame->pts;
 
                     int should_skip = 0;
-                    int64_t frame_in_pts_offset = frame->pts - decoder_context->audio_input_start_pts;
+                    int64_t frame_in_pts_offset = frame->pts - decoder_context->audio_input_start_pts[stream_index];
                     /* If frame PTS < start_time_ts then don't encode audio frame */
                     if (p->start_time_ts > 0 && frame_in_pts_offset < p->start_time_ts) {
                          elv_dbg("ENCODE SKIP audio frame early pts=%" PRId64
@@ -2571,13 +2627,8 @@ transcode_audio_aac(
                         filt_frame->pts, decoder_context->audio_duration);
                 }
 
-                if (p->audio_fill_gap) {
-                    decoder_context->audio_output_pts += output_frame_size;
-                    d -= output_frame_size;
-                } else {
-                    decoder_context->audio_output_pts += d;
-                    d = 0;
-                }
+                decoder_context->audio_output_pts += d;
+                d = 0;
             }
 
             av_frame_unref(filt_frame);
@@ -2675,7 +2726,7 @@ transcode_video(
             elv_log("first_decoding_video_pts=%"PRId64" pktdts=%"PRId64,
                 decoder_context->first_decoding_video_pts, frame->pkt_dts);
             if (in_handlers->avpipe_stater)
-                in_handlers->avpipe_stater(decoder_context->inctx, in_stat_decoding_video_start_pts);
+                in_handlers->avpipe_stater(decoder_context->inctx, stream_index, in_stat_decoding_video_start_pts);
         }
 
         /* If force_equal_fduration is set then frame_duration > 0 is true */
@@ -2976,7 +3027,7 @@ flush_decoder(
     if (!p->bypass_transcoding &&
         (i = selected_decoded_audio(decoder_context, stream_index)) >= 0) {
         buffersrc_ctx = decoder_context->audio_buffersrc_ctx[i];
-        buffersink_ctx = decoder_context->audio_buffersink_ctx;
+        buffersink_ctx = decoder_context->audio_buffersink_ctx[i];
     }
 
     while (response >=0) {
@@ -3043,15 +3094,16 @@ should_stop_decoding(
     int frames_allowed_past_duration)
 {
     int64_t input_packet_rel_pts = 0;
+    int stream_index = input_packet->stream_index;
 
     if (decoder_context->cancelled)
         return 1;
 
-    if (input_packet->stream_index != decoder_context->video_stream_index &&
-        selected_decoded_audio(decoder_context, input_packet->stream_index) < 0)
+    if (stream_index != decoder_context->video_stream_index &&
+        selected_decoded_audio(decoder_context, stream_index) < 0)
         return 0;
 
-    if (input_packet->stream_index == decoder_context->video_stream_index &&
+    if (stream_index == decoder_context->video_stream_index &&
         (params->xc_type & xc_video)) {
         if (decoder_context->video_input_start_pts == AV_NOPTS_VALUE) {
             decoder_context->video_input_start_pts = input_packet->pts;
@@ -3060,15 +3112,15 @@ should_stop_decoding(
         }
 
         input_packet_rel_pts = input_packet->pts - decoder_context->video_input_start_pts;
-    } else if (selected_decoded_audio(decoder_context, input_packet->stream_index) >= 0 &&
+    } else if (selected_decoded_audio(decoder_context, stream_index) >= 0 &&
         params->xc_type & xc_audio) {
-        if (decoder_context->audio_input_start_pts == AV_NOPTS_VALUE) {
-            decoder_context->audio_input_start_pts = input_packet->pts;
-            elv_log("audio_input_start_pts=%"PRId64,
-                decoder_context->audio_input_start_pts);
+        if (decoder_context->audio_input_start_pts[stream_index] == AV_NOPTS_VALUE) {
+            decoder_context->audio_input_start_pts[stream_index] = input_packet->pts;
+            elv_log("stream_index=%d audio_input_start_pts=%"PRId64,
+                stream_index, decoder_context->audio_input_start_pts[stream_index]);
         }
 
-        input_packet_rel_pts = input_packet->pts - decoder_context->audio_input_start_pts;
+        input_packet_rel_pts = input_packet->pts - decoder_context->audio_input_start_pts[stream_index];
     }
 
     /* PENDING (RM) for some of the live feeds (like RTMP) we need to scale input_packet_rel_pts */
@@ -3095,10 +3147,10 @@ should_stop_decoding(
     }
 
     if (input_packet->pts != AV_NOPTS_VALUE) {
-        if (selected_decoded_audio(decoder_context, input_packet->stream_index) >= 0 &&
+        if (selected_decoded_audio(decoder_context, stream_index) >= 0 &&
             params->xc_type & xc_audio)
-            encoder_context->audio_last_pts_read = input_packet->pts;
-        else if (input_packet->stream_index == decoder_context->video_stream_index &&
+            encoder_context->audio_last_pts_read[stream_index] = input_packet->pts;
+        else if (stream_index == decoder_context->video_stream_index &&
             params->xc_type & xc_video)
             encoder_context->video_last_pts_read = input_packet->pts;
     }
@@ -3129,13 +3181,14 @@ skip_until_start_time_pts(
     if (params->xc_type == xc_video)
         input_start_pts = decoder_context->video_input_start_pts;
     else
-        input_start_pts = decoder_context->audio_input_start_pts;
+        input_start_pts = decoder_context->audio_input_start_pts[input_packet->stream_index];
 
     const int64_t packet_in_pts_offset = input_packet->pts - input_start_pts;
     /* Drop frames before the desired 'start_time' */
     if (packet_in_pts_offset < params->start_time_ts) {
-        elv_dbg("PREDECODE SKIP frame early pts=%" PRId64 ", start_time_ts=%" PRId64
+        elv_dbg("PREDECODE SKIP frame early stream_index=%d, pts=%" PRId64 ", start_time_ts=%" PRId64
             ", input_start_pts=%" PRId64 ", packet_in_pts_offset=%" PRId64,
+            input_packet->stream_index,
             input_packet->pts, params->start_time_ts,
             input_start_pts, packet_in_pts_offset);
         return 1;
@@ -3175,7 +3228,7 @@ skip_for_sync(
                 decoder_context->first_key_frame_pts, input_packet->stream_index, input_packet->flags, input_packet->dts);
 
             if (in_handlers->avpipe_stater)
-                in_handlers->avpipe_stater(decoder_context->inctx, in_stat_first_keyframe_pts);
+                in_handlers->avpipe_stater(decoder_context->inctx, input_packet->stream_index, in_stat_first_keyframe_pts);
 
             dump_packet(0, "SYNC ", input_packet, 1);
             return 0;
@@ -3472,11 +3525,14 @@ avpipe_xc(
         goto xc_done;
     }
 
-    if (params->xc_type & xc_audio &&
-        avformat_write_header(encoder_context->format_context2, NULL) != eav_success) {
-        elv_err("Failed to write audio output file header, url=%s", params->url);
-        rc = eav_write_header;
-        goto xc_done;
+    if (params->xc_type & xc_audio) {
+        for (int i=0; i<encoder_context->n_audio_output; i++) {
+            if (avformat_write_header(encoder_context->format_context2[i], NULL) != eav_success) {
+                elv_err("Failed to write audio output file header, url=%s", params->url);
+                rc = eav_write_header;
+                goto xc_done;
+            }
+        }
     }
 
     int video_stream_index = decoder_context->video_stream_index;
@@ -3509,30 +3565,32 @@ avpipe_xc(
     if (params->start_time_ts != -1) {
         if (params->xc_type == xc_video)
             encoder_context->format_context->start_time = params->start_time_ts;
-        if (params->xc_type & xc_audio)
-            encoder_context->format_context2->start_time = params->start_time_ts;
+        if (params->xc_type & xc_audio) {
+            for (int i=0; i<encoder_context->n_audio_output; i++)
+               encoder_context->format_context2[i]->start_time = params->start_time_ts;
+        }
         /* PENDING (RM) add new start_time_ts for audio */
     }
 
     decoder_context->video_input_start_pts = AV_NOPTS_VALUE;
-    decoder_context->audio_input_start_pts = AV_NOPTS_VALUE;
     decoder_context->video_duration = -1;
     encoder_context->audio_duration = -1;
-    decoder_context->audio_input_prev_pts = -1;
     encoder_context->video_encoder_prev_pts = -1;
     decoder_context->first_decoding_video_pts = AV_NOPTS_VALUE;
-    decoder_context->first_decoding_audio_pts = AV_NOPTS_VALUE;
     encoder_context->first_encoding_video_pts = -1;
-    encoder_context->first_encoding_audio_pts = -1;
-    encoder_context->audio_pts = AV_NOPTS_VALUE;
     encoder_context->video_pts = AV_NOPTS_VALUE;
 
-    for (int j=0; j<MAX_STREAMS; j++)
+    for (int j=0; j<MAX_STREAMS; j++) {
+        decoder_context->first_decoding_audio_pts[j] = AV_NOPTS_VALUE;
+        encoder_context->first_encoding_audio_pts[j] = AV_NOPTS_VALUE;
+        decoder_context->audio_input_start_pts[j] = AV_NOPTS_VALUE;
+        encoder_context->audio_pts[j] = AV_NOPTS_VALUE;
         encoder_context->first_read_frame_pts[j] = -1;
+        encoder_context->audio_last_pts_sent_encode[j] = AV_NOPTS_VALUE;
+    }
     decoder_context->first_key_frame_pts = AV_NOPTS_VALUE;
     decoder_context->is_av_synced = 0;
     encoder_context->video_last_pts_sent_encode = -1;
-    encoder_context->audio_last_pts_sent_encode = -1;
 
     int64_t video_last_dts = 0;
     int frames_read_past_duration = 0;
@@ -3636,7 +3694,7 @@ avpipe_xc(
 
             inctx->video_frames_read++;
             if (in_handlers->avpipe_stater)
-                in_handlers->avpipe_stater(inctx, in_stat_video_frame_read);
+                in_handlers->avpipe_stater(inctx, input_packet->stream_index, in_stat_video_frame_read);
 
             if (decoder_context->first_key_frame_pts == AV_NOPTS_VALUE &&
                     input_packet->flags == AV_PKT_FLAG_KEY) {
@@ -3645,7 +3703,7 @@ avpipe_xc(
                 elv_log("PTS first_key_frame_pts=%"PRId64" sidx=%d flags=%d dts=%"PRId64,
                     decoder_context->first_key_frame_pts, input_packet->stream_index, input_packet->flags, input_packet->dts);
                 if (in_handlers->avpipe_stater)
-                    in_handlers->avpipe_stater(decoder_context->inctx, in_stat_first_keyframe_pts);
+                    in_handlers->avpipe_stater(decoder_context->inctx, input_packet->stream_index, in_stat_first_keyframe_pts);
             }
 
             // Assert DTS is growing as expected (accommodate non integer and irregular frame duration)
@@ -3664,13 +3722,13 @@ avpipe_xc(
         } else if (selected_decoded_audio(decoder_context, input_packet->stream_index) >= 0 &&
             params->xc_type & xc_audio) {
 
-            encoder_context->audio_last_dts = input_packet->dts;
+            encoder_context->audio_last_dts[input_packet->stream_index] = input_packet->dts;
 
             dump_packet(1, "IN ", input_packet, debug_frame_level);
 
             inctx->audio_frames_read++;
             if (in_handlers->avpipe_stater)
-                in_handlers->avpipe_stater(inctx, in_stat_audio_frame_read);
+                in_handlers->avpipe_stater(inctx, input_packet->stream_index, in_stat_audio_frame_read);
 
             xc_frame_t *xc_frame = (xc_frame_t *) calloc(1, sizeof(xc_frame_t));
             xc_frame->packet = input_packet;
@@ -3699,7 +3757,7 @@ avpipe_xc(
 
                         if (in_handlers->avpipe_stater) {
                             inctx->data = (uint8_t *)hex_str;
-                            in_handlers->avpipe_stater(inctx, in_stat_data_scte35);
+                            in_handlers->avpipe_stater(inctx, input_packet->stream_index, in_stat_data_scte35);
                         }
                         break;
                     }
@@ -3728,46 +3786,85 @@ avpipe_xc(
      */
     if (params->xc_type & xc_video && xctx->err != eav_write_frame)
         flush_decoder(decoder_context, encoder_context, encoder_context->video_stream_index, params, debug_frame_level);
-    if (params->xc_type & xc_audio && xctx->err != eav_write_frame)
-        flush_decoder(decoder_context, encoder_context, encoder_context->audio_stream_index[0], params, debug_frame_level);
+    if (params->xc_type & xc_audio && xctx->err != eav_write_frame) {
+        for (int i=0; i<decoder_context->n_audio; i++)
+            flush_decoder(decoder_context, encoder_context, encoder_context->audio_stream_index[i], params, debug_frame_level);
+    }
     if (params->xc_type & xc_audio_join || params->xc_type & xc_audio_merge) {
         for (int i=0; i<decoder_context->n_audio; i++)
             flush_decoder(decoder_context, encoder_context, decoder_context->audio_stream_index[i], params, debug_frame_level);
     }
 
     if (!params->bypass_transcoding && (params->xc_type & xc_video) && xctx->err != eav_write_frame)
-        encode_frame(decoder_context, encoder_context, NULL, encoder_context->video_stream_index, params, debug_frame_level);
-    if (!params->bypass_transcoding && params->xc_type & xc_audio && xctx->err != eav_write_frame)
-        encode_frame(decoder_context, encoder_context, NULL, encoder_context->audio_stream_index[0], params, debug_frame_level);
+        encode_frame(decoder_context, encoder_context, NULL, decoder_context->video_stream_index, params, debug_frame_level);
+    /* Loop through and flush all audio frames */
+    if (!params->bypass_transcoding && params->xc_type & xc_audio && xctx->err != eav_write_frame) {
+        for (int i=0; i<decoder_context->n_audio; i++)
+            encode_frame(decoder_context, encoder_context, NULL, decoder_context->audio_stream_index[i], params, debug_frame_level);
+    }
 
     dump_trackers(decoder_context->format_context, encoder_context->format_context);
 
     if ((params->xc_type & xc_video) && rc == eav_success)
         av_write_trailer(encoder_context->format_context);
-    if ((params->xc_type & xc_audio) && rc == eav_success)
-        av_write_trailer(encoder_context->format_context2);
+    if ((params->xc_type & xc_audio) && rc == eav_success) {
+        for (int i=0; i<encoder_context->n_audio_output; i++)
+            av_write_trailer(encoder_context->format_context2[i]);
+    }
+
+    char audio_last_dts_buf[(MAX_STREAMS + 1) * 20];
+    char audio_input_start_pts_buf[(MAX_STREAMS + 1) * 20];
+    char audio_last_pts_read_buf[(MAX_STREAMS + 1) * 20];
+    char audio_last_pts_sent_encode_buf[(MAX_STREAMS + 1) * 20];
+    char audio_last_pts_encoded_buf[(MAX_STREAMS + 1) * 20];
+    audio_last_dts_buf[0] = '\0';
+    audio_input_start_pts_buf[0] = '\0';
+    audio_last_pts_read_buf[0] = '\0';
+    audio_last_pts_sent_encode_buf[0] = '\0';
+    audio_last_pts_encoded_buf[0] = '\0';
+    for (int i=0; i<params->n_audio; i++) {
+        char buf[32];
+        int audio_index = params->audio_index[i];
+        if (i > 0) {
+            strncat(audio_last_dts_buf, ",", (MAX_STREAMS + 1) * 20 - strlen(audio_last_dts_buf));
+            strncat(audio_input_start_pts_buf, ",", (MAX_STREAMS + 1) * 20 - strlen(audio_input_start_pts_buf));
+            strncat(audio_last_pts_read_buf, ",", (MAX_STREAMS + 1) * 20 - strlen(audio_last_pts_read_buf));
+            strncat(audio_last_pts_sent_encode_buf, ",", (MAX_STREAMS + 1) * 20 - strlen(audio_last_pts_sent_encode_buf));
+            strncat(audio_last_pts_encoded_buf, ",", (MAX_STREAMS + 1) * 20 - strlen(audio_last_pts_encoded_buf));
+        }
+        sprintf(buf, "%"PRId64, encoder_context->audio_last_dts[audio_index]);
+        strncat(audio_last_dts_buf, buf, (MAX_STREAMS + 1) * 20 - strlen(audio_last_dts_buf));
+        sprintf(buf, "%"PRId64, encoder_context->audio_input_start_pts[audio_index]);
+        strncat(audio_input_start_pts_buf, buf, (MAX_STREAMS + 1) * 20 - strlen(audio_input_start_pts_buf));
+        sprintf(buf, "%"PRId64, encoder_context->audio_last_pts_read[audio_index]);
+        strncat(audio_last_pts_read_buf, buf, (MAX_STREAMS + 1) * 20 - strlen(audio_last_pts_read_buf));
+        sprintf(buf, "%"PRId64, encoder_context->audio_last_pts_sent_encode[audio_index]);
+        strncat(audio_last_pts_sent_encode_buf, buf, (MAX_STREAMS + 1) * 20 - strlen(audio_last_pts_sent_encode_buf));
+        sprintf(buf, "%"PRId64, encoder_context->audio_last_pts_encoded[audio_index]);
+        strncat(audio_last_pts_encoded_buf, buf, (MAX_STREAMS + 1) * 20 - strlen(audio_last_pts_encoded_buf));
+    } 
 
     elv_log("avpipe_xc done url=%s, rc=%d, xctx->err=%d, xc-type=%d, "
         "last video_pts=%"PRId64" audio_pts=%"PRId64
-        " video_input_start_pts=%"PRId64" audio_input_start_pts=%"PRId64
-        " video_last_dts=%"PRId64" audio_last_dts="PRId64
-        " last_pts_read=%"PRId64" last_pts_read2=%"PRId64
-        " video_pts_sent_encode=%"PRId64" audio_pts_sent_encode=%"PRId64
-        " last_pts_encoded=%"PRId64" last_pts_encoded2=%"PRId64,
+        " video_input_start_pts=%"PRId64" audio_input_start_pts=[%s]"
+        " video_last_dts=%"PRId64" audio_last_dts=[%s]"
+        " video_last_pts_read=%"PRId64" audio_last_pts_read=[%s]"
+        " video_pts_sent_encode=%"PRId64" audio_last_pts_sent_encode=[%s]"
+        " last_pts_encoded=%"PRId64" audio_last_pts_encoded=[%s]",
         params->url,
         rc, xctx->err, params->xc_type,
         encoder_context->video_pts,
         encoder_context->audio_pts,
         encoder_context->video_input_start_pts,
-        encoder_context->audio_input_start_pts,
+        audio_input_start_pts_buf,
         encoder_context->video_last_dts,
-        encoder_context->audio_last_dts,
+        audio_last_dts_buf,
         encoder_context->video_last_pts_read,
-        encoder_context->audio_last_pts_read,
+        audio_last_pts_read_buf,
         encoder_context->video_last_pts_sent_encode,
-        encoder_context->audio_last_pts_sent_encode,
+        audio_last_pts_sent_encode_buf,
         encoder_context->video_last_pts_encoded,
-        encoder_context->audio_last_pts_encoded);
+        audio_last_pts_encoded_buf);
 
     decoder_context->stopped = 1;
     encoder_context->stopped = 1;
@@ -4226,15 +4323,7 @@ check_params(
         return eav_param;
     }
 
-    if (params->xc_type != xc_audio_join &&
-        params->xc_type != xc_audio_pan &&
-        params->xc_type != xc_audio_merge &&
-        params->n_audio > 1) {
-        elv_err("Invalid number of audio streams, n_audio=%d, url=%s", params->n_audio, params->url);
-        return eav_param;
-    }
-
-    if (params->n_audio > MAX_AUDIO_MUX) {
+    if (params->n_audio > MAX_STREAMS) {
         elv_err("Too many audio indexes, n_audio=%d, url=%s", params->n_audio, params->url);
         return eav_param;
     }
@@ -4329,7 +4418,7 @@ avpipe_init(
     char index_str[10];
 
     audio_index_str[0] = '\0';
-    for (int i=0; i<params->n_audio && i<MAX_AUDIO_MUX; i++) {
+    for (int i=0; i<params->n_audio && i<MAX_STREAMS; i++) {
         snprintf(index_str, 10, "%d", params->audio_index[i]);
         strcat(audio_index_str, index_str);
         if (i < params->n_audio-1)
@@ -4379,7 +4468,6 @@ avpipe_init(
         "audio_index=%s "
         "channel_layout=%d (%s) "
         "sync_audio_to_stream_id=%d "
-        "audio_fill_gap=%d "
         "wm_overlay_type=%d "
         "wm_overlay_len=%d "
         "bitdepth=%d "
@@ -4406,7 +4494,7 @@ avpipe_init(
         params->crypt_iv, params->crypt_key, params->crypt_kid, params->crypt_key_url,
         params->crypt_scheme, params->n_audio, audio_index_str,
         params->channel_layout, avpipe_channel_layout_name(params->channel_layout),
-        params->sync_audio_to_stream_id, params->audio_fill_gap,
+        params->sync_audio_to_stream_id,
         params->watermark_overlay_type, params->watermark_overlay_len,
         params->bitdepth, params->listen,
         params->max_cll ? params->max_cll : "",
@@ -4527,18 +4615,22 @@ avpipe_fini(
     /* Free filter graph resources */
     if (decoder_context && decoder_context->video_filter_graph)
         avfilter_graph_free(&decoder_context->video_filter_graph);
-    if (decoder_context && decoder_context->audio_filter_graph)
-        avfilter_graph_free(&decoder_context->audio_filter_graph);
+    if (decoder_context && decoder_context->n_audio > 0) {
+        for (int i=0; i<decoder_context->n_audio; i++)
+            avfilter_graph_free(&decoder_context->audio_filter_graph[i]);
+    }
 
     if (encoder_context && encoder_context->format_context) {
         void *avpipe_opaque = encoder_context->format_context->avpipe_opaque;
         avformat_free_context(encoder_context->format_context);
         free(avpipe_opaque);
     }
-    if (encoder_context && encoder_context->format_context2) {
-        void *avpipe_opaque = encoder_context->format_context2->avpipe_opaque;
-        avformat_free_context(encoder_context->format_context2);
-        free(avpipe_opaque);
+    if (encoder_context) {
+        for (int i=0; i<encoder_context->n_audio_output; i++) { 
+            void *avpipe_opaque = encoder_context->format_context2[i]->avpipe_opaque;
+            avformat_free_context(encoder_context->format_context2[i]);
+            free(avpipe_opaque);
+        }
     }
 
     for (int i=0; i<MAX_STREAMS; i++) {
diff --git a/live/lhr_tool_test.go b/live/lhr_tool_test.go
index d929378..458ad3c 100644
--- a/live/lhr_tool_test.go
+++ b/live/lhr_tool_test.go
@@ -23,16 +23,16 @@ import (
 	"github.com/stretchr/testify/assert"
 )
 
-//
 // Test Streams
-// * FFmpeg HLS TS stream (separate a/v): ffmpeg -re -f lavfi -i sine=b=2 -f lavfi -i testsrc -map 0:a -map 1:v -f hls -hls_time 6 -c:a aac -ac 2 -c:v h264_videotoolbox -vf scale=1280:720 -profile:v high -pix_fmt yuv420p -r 25 -g 50 -force_key_frames "expr:gte(t,n_forced*2)" -var_stream_map "a:0,name:audio,agroup:audio v:0,name:video,agroup:audio" -hls_segment_filename "%v/%d.ts" -master_pl_name master.m3u8 "%v/playlist.m3u8"
-//   (use local HTTP server, e.g. http-server . -p 80 --cors)
-// * FFmpeg HLS TS stream (muxed a/v): ffmpeg -re -f lavfi -i sine=b=2 -f lavfi -i testsrc -map 0:a -map 1:v -f hls -hls_time 6 -c:a aac -ac 2 -c:v h264_videotoolbox -vf scale=1280:720 -profile:v high -pix_fmt yuv420p -r 25 -g 50 -force_key_frames "expr:gte(t,n_forced*2)" -var_stream_map "v:0,a:0,name:muxed" -hls_segment_filename "%v/%d.ts" -master_pl_name master.m3u8 "%v/playlist.m3u8"
-// * Sky 1080 stream: http://origin1.sedev02_newsdemuxclear.stage-cdhls.skydvn.com/cdsedev04demuxclearnews/13012/cd.m3u8
-// * Sky 720 stream: http://origin1.skynews.mobile.skydvn.com/skynews/1404/latest.m3u8
+//   - FFmpeg HLS TS stream (separate a/v): ffmpeg -re -f lavfi -i sine=b=2 -f lavfi -i testsrc -map 0:a -map 1:v -f hls -hls_time 6 -c:a aac -ac 2 -c:v h264_videotoolbox -vf scale=1280:720 -profile:v high -pix_fmt yuv420p -r 25 -g 50 -force_key_frames "expr:gte(t,n_forced*2)" -var_stream_map "a:0,name:audio,agroup:audio v:0,name:video,agroup:audio" -hls_segment_filename "%v/%d.ts" -master_pl_name master.m3u8 "%v/playlist.m3u8"
+//     (use local HTTP server, e.g. http-server . -p 80 --cors)
+//   - FFmpeg HLS TS stream (muxed a/v): ffmpeg -re -f lavfi -i sine=b=2 -f lavfi -i testsrc -map 0:a -map 1:v -f hls -hls_time 6 -c:a aac -ac 2 -c:v h264_videotoolbox -vf scale=1280:720 -profile:v high -pix_fmt yuv420p -r 25 -g 50 -force_key_frames "expr:gte(t,n_forced*2)" -var_stream_map "v:0,a:0,name:muxed" -hls_segment_filename "%v/%d.ts" -master_pl_name master.m3u8 "%v/playlist.m3u8"
+//   - Sky 1080 stream: http://origin1.sedev02_newsdemuxclear.stage-cdhls.skydvn.com/cdsedev04demuxclearnews/13012/cd.m3u8
+//   - Sky 720 stream: http://origin1.skynews.mobile.skydvn.com/skynews/1404/latest.m3u8
 //
 // To save HLS files, add the following to the test:
-//   TESTSaveToDir = "~/temp"
+//
+//	TESTSaveToDir = "~/temp"
 //
 // Akamai live stream
 const manifestURLStr = "https://moctobpltc-i.akamaihd.net/hls/live/571329/eight/playlist.m3u8"
@@ -56,7 +56,7 @@ type testCtx struct {
 	r            io.Reader
 }
 
-//Implement AVPipeInputOpener
+// Implement AVPipeInputOpener
 type inputOpener struct {
 	dir string
 	url string
@@ -496,8 +496,8 @@ func (i *inputCtx) Stat(statType avpipe.AVStatType, statArgs interface{}) error
 	return nil
 }
 
-func (oo *outputOpener) Open(h, fd int64, stream_index, seg_index int, _ int64,
-	out_type avpipe.AVType) (avpipe.OutputHandler, error) {
+func (oo *outputOpener) Open(h, fd int64, streamIndex, segIndex int, _ int64,
+	outType avpipe.AVType) (avpipe.OutputHandler, error) {
 
 	tc, err := getReqCtxByFD(h)
 	if err != nil {
@@ -511,34 +511,34 @@ func (oo *outputOpener) Open(h, fd int64, stream_index, seg_index int, _ int64,
 
 	var filename string
 
-	switch out_type {
+	switch outType {
 	case avpipe.DASHVideoInit:
 		fallthrough
 	case avpipe.DASHAudioInit:
-		filename = fmt.Sprintf("./%s/video-init-stream%d.mp4", oo.dir, stream_index)
+		filename = fmt.Sprintf("./%s/video-init-stream%d.mp4", oo.dir, streamIndex)
 	case avpipe.DASHManifest:
 		filename = fmt.Sprintf("./%s/dash.mpd", oo.dir)
 	case avpipe.DASHVideoSegment:
-		filename = fmt.Sprintf("./%s/video-chunk-stream%d-%05d.mp4", oo.dir, stream_index, seg_index)
+		filename = fmt.Sprintf("./%s/video-chunk-stream%d-%05d.mp4", oo.dir, streamIndex, segIndex)
 	case avpipe.DASHAudioSegment:
-		filename = fmt.Sprintf("./%s/audio-chunk-stream%d-%05d.mp4", oo.dir, stream_index, seg_index)
+		filename = fmt.Sprintf("./%s/audio-chunk-stream%d-%05d.mp4", oo.dir, streamIndex, segIndex)
 	case avpipe.HLSMasterM3U:
 		filename = fmt.Sprintf("./%s/master.m3u8", oo.dir)
 	case avpipe.HLSVideoM3U:
-		filename = fmt.Sprintf("./%s/video-media_%d.m3u8", oo.dir, stream_index)
+		filename = fmt.Sprintf("./%s/video-media_%d.m3u8", oo.dir, streamIndex)
 	case avpipe.HLSAudioM3U:
-		filename = fmt.Sprintf("./%s/audio-media_%d.m3u8", oo.dir, stream_index)
+		filename = fmt.Sprintf("./%s/audio-media_%d.m3u8", oo.dir, streamIndex)
 	case avpipe.AES128Key:
 		filename = fmt.Sprintf("./%s/%s-key.bin", oo.dir, url)
 	case avpipe.MP4Segment:
-		filename = fmt.Sprintf("./%s/segment-%d.mp4", oo.dir, seg_index)
+		filename = fmt.Sprintf("./%s/segment-%d.mp4", oo.dir, segIndex)
 	case avpipe.FMP4AudioSegment:
-		filename = fmt.Sprintf("./%s/audio-mez-segment-%d.mp4", oo.dir, seg_index)
+		filename = fmt.Sprintf("./%s/audio-mez-segment%d-%d.mp4", oo.dir, streamIndex, segIndex)
 	case avpipe.FMP4VideoSegment:
-		filename = fmt.Sprintf("./%s/video-mez-segment-%d.mp4", oo.dir, seg_index)
+		filename = fmt.Sprintf("./%s/video-mez-segment-%d.mp4", oo.dir, segIndex)
 	}
 
-	tlog.Debug("OUT_OPEN", "url", tc.url, "h", h, "stream_index", stream_index, "seg_index", seg_index, "filename", filename)
+	tlog.Debug("OUT_OPEN", "url", tc.url, "h", h, "streamIndex", streamIndex, "segIndex", segIndex, "filename", filename, "outType", outType)
 
 	file, err := os.Create(filename)
 	if err != nil {
diff --git a/live/live_source.go b/live/live_source.go
index 0b74ce7..5bcda08 100644
--- a/live/live_source.go
+++ b/live/live_source.go
@@ -34,6 +34,8 @@ func (l *LiveSource) Start(stream string) (err error) {
 	switch streamingMode {
 	case "udp":
 		return l.startUDP()
+	case "multi_audio_udp":
+		return l.startMultiAudioUDP()
 	case "srt":
 		return l.startSRT()
 	case "rtmp_connect":
@@ -45,6 +47,73 @@ func (l *LiveSource) Start(stream string) (err error) {
 	return e(fmt.Errorf("Invalid stream %s", stream))
 }
 
+func (l *LiveSource) startMultiAudioUDP() (err error) {
+	log.Debug("In LiveSource startMultiAudioUDP")
+	e := errors.Template("start live source", errors.K.IO)
+
+	if l.cmd != nil {
+		return e("reason", "already started")
+	}
+
+	var ffmpeg string
+
+	if toolchain, ok := os.LookupEnv("FFMPEG_DIST"); ok {
+		ffmpeg = filepath.Join(toolchain, "bin/ffmpeg")
+		if _, err = os.Stat(ffmpeg); err != nil {
+			log.Warn("ffmpeg in FFMPEG_DIST not found", "command", ffmpeg)
+			ffmpeg = ""
+		} else {
+			log.Debug("using ffmpeg from FFMPEG_DIST", "command", ffmpeg)
+		}
+	}
+
+	if ffmpeg == "" {
+		ffmpeg, err = exec.LookPath("ffmpeg")
+		if err != nil {
+			log.Error("Failed to find ffmpeg binary, check ELV_TOOLCHAIN env variable")
+			return e(err, "reason", "failed to find ffmpeg binary, check ELV_TOOLCHAIN env variable")
+		}
+		log.Debug("using system ffmpeg", "command", ffmpeg)
+	}
+
+	sourceUrl := fmt.Sprintf("udp://127.0.0.1:%d", l.Port)
+
+	log.Info("starting multi audio live source", "url", sourceUrl)
+
+	// i.e $FFMPEG_BIN/ffmpeg -re -i media/BBB_3x_audio_streams_music_2min_48kHz.mp4 -map 0 -c:v copy -c:a aac  -f mpegts udp://localhost:22001
+	l.cmd = exec.Command(ffmpeg,
+		"-re",
+		"-i",
+		"../media/BBB_3x_audio_streams_music_2min_48kHz.mp4",
+		"-map",
+		"0",
+		"-c:v",
+		"copy",
+		"-c:a",
+		"aac",
+		"-f",
+		"mpegts",
+		sourceUrl)
+	l.cmd.Stdout = nil
+	l.cmd.Stderr = nil
+
+	err = l.cmd.Start()
+	if err != nil {
+		log.Error("Failed to start multi audio UDP live source", "port", l.Port)
+		return e(err)
+	}
+
+	l.Pid = l.cmd.Process.Pid
+	go func() {
+		err := l.cmd.Wait()
+		if err != nil {
+			log.Error("Failed to run command", err, "pid", l.Pid, "cmd", fmt.Sprintf("%s %s", l.cmd.Path, l.cmd.Args))
+		}
+	}()
+
+	return nil
+}
+
 func (l *LiveSource) startUDP() (err error) {
 	log.Debug("In LiveSource startUDP")
 	e := errors.Template("start live source", errors.K.IO)
diff --git a/live/rtmp_recorder_test.go b/live/rtmp_recorder_test.go
index ed83d93..5658443 100644
--- a/live/rtmp_recorder_test.go
+++ b/live/rtmp_recorder_test.go
@@ -77,7 +77,7 @@ func TestRtmpToMp4_1(t *testing.T) {
 	xcParams.NumAudio = 0
 	xcParams.AudioSegDurationTs = 96000 // almost 2 * 48000
 	xcParams.XcType = avpipe.XcAudio
-	audioMezFiles := [2]string{"audio-mez-segment-1.mp4", "audio-mez-segment-2.mp4"}
+	audioMezFiles := [2]string{"audio-mez-segment0-1.mp4", "audio-mez-segment0-2.mp4"}
 
 	// Now create audio dash segments out of audio mezzanines
 	go func() {
diff --git a/live/srt_recorder_test.go b/live/srt_recorder_test.go
index b694c2c..831ea9a 100644
--- a/live/srt_recorder_test.go
+++ b/live/srt_recorder_test.go
@@ -70,7 +70,7 @@ func TestSrtToMp4(t *testing.T) {
 	xcParams.Dcodec2 = "aac"
 	xcParams.AudioSegDurationTs = 96000 // almost 2 * 48000
 	xcParams.XcType = avpipe.XcAudio
-	audioMezFiles := [2]string{"audio-mez-segment-1.mp4", "audio-mez-segment-2.mp4"}
+	audioMezFiles := [2]string{"audio-mez-segment0-1.mp4", "audio-mez-segment0-2.mp4"}
 
 	// Now create audio dash segments out of audio mezzanines
 	go func() {
diff --git a/live/ts_recorder_test.go b/live/ts_recorder_test.go
index cb5485b..c6bc852 100644
--- a/live/ts_recorder_test.go
+++ b/live/ts_recorder_test.go
@@ -64,7 +64,7 @@ func TestUdpToMp4(t *testing.T) {
 	xcParams.Dcodec2 = "aac"
 	xcParams.AudioSegDurationTs = 96106 // almost 2 * 48000
 	xcParams.XcType = avpipe.XcAudio
-	audioMezFiles := [3]string{"audio-mez-segment-1.mp4", "audio-mez-segment-2.mp4", "audio-mez-segment-3.mp4"}
+	audioMezFiles := [3]string{"audio-mez-segment0-1.mp4", "audio-mez-segment0-2.mp4", "audio-mez-segment0-3.mp4"}
 
 	// Now create audio dash segments out of audio mezzanines
 	go func() {
@@ -116,6 +116,90 @@ func TestUdpToMp4(t *testing.T) {
 	testComplete <- true
 }
 
+func TestMultiAudioUdpToMp4(t *testing.T) {
+	setupLogging()
+	outputDir := path.Join(baseOutPath, fn())
+	setupOutDir(t, outputDir)
+
+	liveSource := NewLiveSource()
+	url := fmt.Sprintf("udp://localhost:%d", liveSource.Port)
+
+	err := liveSource.Start("multi_audio_udp")
+	if err != nil {
+		t.Error(err)
+	}
+
+	xcParams := &avpipe.XcParams{
+		Format:              "fmp4-segment",
+		Seekable:            false,
+		DurationTs:          -1,
+		StartSegmentStr:     "1",
+		AudioBitrate:        384000,
+		VideoBitrate:        20000000,
+		ForceKeyInt:         60,
+		VideoSegDurationTs:  2700000,
+		AudioSegDurationTs:  1428480,
+		Ecodec2:             "aac",     // "aac"
+		Ecodec:              "libx264", // libx264 software / h264_videotoolbox mac hardware
+		EncHeight:           720,       // 1080
+		EncWidth:            1280,      // 1920
+		XcType:              avpipe.XcAll,
+		StreamId:            -1,
+		Url:                 url,
+		SyncAudioToStreamId: -1,
+		DebugFrameLevel:     debugFrameLevel,
+		NumAudio:            3,
+	}
+
+	xcParams.AudioIndex[0] = 1
+	xcParams.AudioIndex[1] = 2
+	xcParams.AudioIndex[2] = 3
+
+	// Transcode audio mez files in background
+	reqCtx := &testCtx{url: url}
+	putReqCtxByURL(url, reqCtx)
+
+	avpipe.InitIOHandler(&inputOpener{dir: outputDir}, &outputOpener{dir: outputDir})
+
+	tlog.Info("Transcoding UDP stream multi audio start", "params", fmt.Sprintf("%+v", *xcParams))
+	err = avpipe.Xc(xcParams)
+	tlog.Info("Transcoding UDP stream multi audio done", "err", err, "last pts", nil)
+	if err != nil {
+		t.Error("Transcoding UDP stream multi audio failed", "err", err)
+	}
+
+	done := make(chan bool, 1)
+
+	xcParams.NumAudio = 1
+	xcParams.AudioIndex[0] = 0
+	xcParams.Format = "dash"
+	xcParams.Dcodec2 = "aac"
+	xcParams.AudioSegDurationTs = 96106 // almost 2 * 48000
+	xcParams.XcType = avpipe.XcAudio
+	audioMezFiles := [3]string{"audio-mez-segment1-1.mp4", "audio-mez-segment1-2.mp4", "audio-mez-segment1-3.mp4"}
+
+	// Now create audio dash segments out of audio mezzanines
+	go func() {
+		for i, url := range audioMezFiles {
+			xcParams.Url = outputDir + "/" + url
+			tlog.Info("Transcoding Audio Dash start", "audioParams", fmt.Sprintf("%+v", *xcParams), "url", xcParams.Url)
+			reqCtx := &testCtx{url: xcParams.Url}
+			putReqCtxByURL(xcParams.Url, reqCtx)
+			xcParams.StartSegmentStr = fmt.Sprintf("%d", i*15+1)
+			err := avpipe.Xc(xcParams)
+			tlog.Info("Transcoding Audio Dash done", "err", err)
+			if err != nil {
+				t.Error("Transcoding Audio Dash failed", "err", err, "url", xcParams.Url)
+			}
+			done <- true
+		}
+	}()
+
+	for _ = range audioMezFiles {
+		<-done
+	}
+}
+
 // Cancels the live stream transcoding immediately after initializing the transcoding (after XcInit).
 func TestUdpToMp4WithCancelling1(t *testing.T) {
 	setupLogging()
diff --git a/scripts/cmd_samples.sh b/scripts/cmd_samples.sh
index 8bdfbcb..e231cae 100755
--- a/scripts/cmd_samples.sh
+++ b/scripts/cmd_samples.sh
@@ -5,6 +5,9 @@
 ./bin/exc -seg-duration 30 -xc-type video -f media/TOS8_FHD_51-2_PRHQ_60s_CCBYblendercloud.mov -wm-text "TEST WATERMARK" -wm-color black -wm-relative-size 0.05 -wm-xloc W/2 -wm-yloc H*0.9 -format fmp4-segment
 ./bin/exc -seg-duration 30 -xc-type video -f media/TOS8_FHD_51-2_PRHQ_60s_CCBYblendercloud.mov -wm-text "%{pts\\:gmtime\\:1602968400\\:%d-%m-%Y %T}" -wm-color black -wm-relative-size 0.05 -wm-xloc W/2 -wm-yloc H*0.9 -format fmp4-segment
 ./bin/exc -seg-duration 30 -xc-type video -f media/TOS8_FHD_51-2_PRHQ_60s_CCBYblendercloud.mov -wm-text "%{localtime}" -wm-color black -wm-relative-size 0.05 -wm-xloc W/2 -wm-yloc H*0.9 -format fmp4-segment
+./bin/exc -seg-duration 30 -xc-type video -f media/TOS8_FHD_51-2_PRHQ_60s_CCBYblendercloud.mov -wm-text "新年快乐" -wm-color black -wm-relative-size 0.05 -wm-xloc W/2 -wm-yloc H*0.9 -format fmp4-segment
+
+ffmpeg -i $input -vf \"format=yuv444p, drawbox=y=ih/PHI:color=black@0.4:width=iw:height=48:t=max, drawtext=fontfile=$font:text='$text':fontcolor=white:fontsize=24:x=w-tw:y=(h/PHI)+th, format=yuv420p\" -c:v libx264 -c:a copy -movflags +faststart $output";
 
 # Timecode watermarking
 ./bin/elvxc transcode --seg-duration 30 --xc-type video -f media/TOS8_FHD_51-2_PRHQ_60s_CCBYblendercloud.mov --wm-timecode-rate 24 --wm-color black --wm-relative-size 0.05 --wm-xloc W/2 --wm-yloc H*0.9 --format fmp4-segment --wm-timecode "00\:00\:00\:00"