diff --git a/README.md b/README.md index a6db722..2c55a4d 100644 --- a/README.md +++ b/README.md @@ -127,6 +127,7 @@ typedef struct xcparams_t { int extract_images_sz; // Size of the array extract_images_ts int video_time_base; // New video encoder time_base (1/video_time_base) + int video_frame_duration_ts; // Frame duration of the output video in time base int debug_frame_level; int connection_timeout; // Connection timeout in sec for RTMP or MPEGTS protocols @@ -143,7 +144,7 @@ typedef struct xcparams_t { - If xc_type=xc_audio_join then avpipe library creates an audio join filter graph and joins the selected input audio streams to produce a joint audio stream. - If xc_type=xc_audio_pan then avpipe library creates an audio pan filter graph to pan multiple channels in one input stream to one output stereo stream. - **Specifying decoder/encoder:** the ecodec/decodec params are used to set video encoder/decoder. Also ecodec2/decodec2 params are used to set audio encoder/decoder. For video the decoder can be one of "h264", "h264_cuvid", "jpeg2000", "hevc" and encoder can be "libx264", "libx265", "h264_nvenc", "h264_videotoolbox", or "mjpeg". For audio the decoder can be “aac” or “ac3” and the encoder can be "aac", "ac3", "mp2" or "mp3". -- **Joining/merging multiple audio:** avpipe library has the capability to join and pan multiple audio input streams by setting xc_type parameter to xc_audio_join and xc_audio_pan respectively (merging multiple audio is not complete yet). +- **Transcoding multiple audio:** avpipe library has the capability to transcode one or multiple audio streams at the same time. The _audio_index_ array includes the audio index of the streams that will be transcoded. The parameter _n_audio_ determines the number of audio indexes in the _audio_index_ array. - **Using GPU:** avpipe library can utilize NVIDIA cards for transcoding. In order to utilize the NVIDIA GPU, the gpu_index must be set (the default is using GPU with index 0). To find the existing GPU indexes on a machine, nvidia-smi command can be used. In addition, the decoder and encoder should be set to "h264_cuvid" or "h264_nvenc" respectively. And finally, in order to pick the correct GPU index the following environment variable must be set “CUDA_DEVICE_ORDER=PCI_BUS_ID” before running the program. - **Text watermarking:** this can be done with setting watermark_text, watermark_xloc, watermark_yloc, watermark_relative_sz, and watermark_font_color while transcoding a video (xc_type=xc_video), which makes specified watermark text to appear at specified location. - **Image watermarking:** this can be done with setting watermark_overlay (the buffer containing overlay image), watermark_overlay_len, watermark_xloc, and watermark_yloc while transcoding a video (xc_type=xc_video). @@ -157,7 +158,8 @@ typedef struct xcparams_t { - setting xc_type = xc_audio_join would join 2 or more audio inputs and create a new audio output (for example joining two mono streams and creating one stereo). - setting xc_type = xc_audio_pan would pick different audio channels from input and create a new audio stream (for example picking different channels from a 5.1 channel layout and producing a stereo containing two channels). - setting xc_type = xc_audio_merge would merge different input audio streams and produce a new multi-channel output stream (for example, merging different input mono streams and create a new 5.1) -- **Setting video timebase:** setting video_time_base will set the timebase of generated video to 1/video_time_base (the timebase has to be bigger than 10000). +- **Setting video timebase:** setting _video_time_base_ will set the timebase of generated video to 1/video_time_base (the timebase has to be bigger than 10000). +- **Video frame duration:** the parameter _video_frame_duration_ts_ can be used to set the duration of each video frame with the specified timebase for output video. This along with video_time_base can be used to normalize the video frames and their duration. For example, for a stream with 60 fps and _video_frame_duration_ts_ equal to 256, the _video_time_base_ would be 15360. As another example, for a 59.94 fps, the _video_frame_duration_ts_ can be 1001 and _video_time_base_ would be 60000. In this case a segment of 1800 frames would be 1801800 timebase long. - **Debugging with frames:** if the parameter debug_frame_level is on then the logs will also include very low level debug messages to trace reading/writing every piece of data. - **Connection timeout:** This parameter is useful when recording / transcoding RTMP or MPEGTS streams. If avpipe is listening for an RTMP stream, connection_timeout determines the time in sec to listen for an incoming RTMP stream. If avpipe is listening for incoming UDP MPEGTS packets, connection_timeout determines the time in sec to wait for the first incoming UDP packet (if no packet is received during connection_timeout, then timeout would happen and an error would be generated). diff --git a/avpipe.c b/avpipe.c index a1d27e8..771612c 100644 --- a/avpipe.c +++ b/avpipe.c @@ -48,6 +48,7 @@ typedef struct udp_thread_params_t { static int out_stat( void *opaque, + int stream_index, avp_stat_t stat_type); int64_t AVPipeOpenInput(char *, int64_t *); @@ -55,7 +56,7 @@ int64_t AVPipeOpenMuxInput(char *, char *, int64_t *); int AVPipeReadInput(int64_t, uint8_t *, int); int64_t AVPipeSeekInput(int64_t, int64_t, int); int AVPipeCloseInput(int64_t); -int AVPipeStatInput(int64_t, avp_stat_t, void *); +int AVPipeStatInput(int64_t, int, avp_stat_t, void *); int64_t AVPipeOpenOutput(int64_t, int, int, int64_t, int); int64_t AVPipeOpenMuxOutput(char *, int); int AVPipeWriteOutput(int64_t, int64_t, uint8_t *, int); @@ -64,8 +65,8 @@ int AVPipeSeekOutput(int64_t, int64_t, int64_t, int); int AVPipeSeekMuxOutput(int64_t, int64_t, int); int AVPipeCloseOutput(int64_t, int64_t); int AVPipeCloseMuxOutput(int64_t); -int AVPipeStatOutput(int64_t, int64_t, avpipe_buftype_t, avp_stat_t, void *); -int AVPipeStatMuxOutput(int64_t, avp_stat_t, void *); +int AVPipeStatOutput(int64_t, int64_t, int, avpipe_buftype_t, avp_stat_t, void *); +int AVPipeStatMuxOutput(int64_t, int, avp_stat_t, void *); int CLog(char *); int CDebug(char *); int CInfo(char *); @@ -87,6 +88,7 @@ static pthread_mutex_t tx_mutex = PTHREAD_MUTEX_INITIALIZER; static int in_stat( void *opaque, + int stream_index, avp_stat_t stat_type); static int @@ -182,7 +184,8 @@ in_read_packet( inctx->read_pos += r; if (inctx->read_bytes - inctx->read_reported > BYTES_READ_REPORT) { - in_stat(opaque, in_stat_bytes_read); + /* Pass stream_index 0 (stream_index has no meaning for in_stat_bytes_read) */ + in_stat(opaque, 0, in_stat_bytes_read); inctx->read_reported = inctx->read_bytes; } } @@ -254,6 +257,7 @@ in_seek( static int in_stat( void *opaque, + int stream_index, avp_stat_t stat_type) { int64_t fd; @@ -268,24 +272,24 @@ in_stat( switch (stat_type) { case in_stat_bytes_read: - rc = AVPipeStatInput(fd, stat_type, &c->read_bytes); + rc = AVPipeStatInput(fd, stream_index, stat_type, &c->read_bytes); break; case in_stat_decoding_audio_start_pts: case in_stat_decoding_video_start_pts: - rc = AVPipeStatInput(fd, stat_type, &c->decoding_start_pts); + rc = AVPipeStatInput(fd, stream_index, stat_type, &c->decoding_start_pts); break; case in_stat_audio_frame_read: - rc = AVPipeStatInput(fd, stat_type, &c->audio_frames_read); + rc = AVPipeStatInput(fd, stream_index, stat_type, &c->audio_frames_read); break; case in_stat_video_frame_read: - rc = AVPipeStatInput(fd, stat_type, &c->video_frames_read); + rc = AVPipeStatInput(fd, stream_index, stat_type, &c->video_frames_read); break; case in_stat_first_keyframe_pts: - rc = AVPipeStatInput(fd, stat_type, &c->first_key_frame_pts); + rc = AVPipeStatInput(fd, stream_index, stat_type, &c->first_key_frame_pts); break; default: @@ -514,6 +518,7 @@ udp_in_seek( static int udp_in_stat( void *opaque, + int stream_index, avp_stat_t stat_type) { int64_t fd; @@ -534,32 +539,32 @@ udp_in_stat( case in_stat_decoding_audio_start_pts: if (debug_frame_level) elv_dbg("IN STAT UDP fd=%d, audio start PTS=%"PRId64", url=%s", fd, c->decoding_start_pts, c->url); - rc = AVPipeStatInput(fd, stat_type, &c->decoding_start_pts); + rc = AVPipeStatInput(fd, stream_index, stat_type, &c->decoding_start_pts); break; case in_stat_decoding_video_start_pts: if (debug_frame_level) elv_dbg("IN STAT UDP fd=%d, video start PTS=%"PRId64", url=%s", fd, c->decoding_start_pts, c->url); - rc = AVPipeStatInput(fd, stat_type, &c->decoding_start_pts); + rc = AVPipeStatInput(fd, stream_index, stat_type, &c->decoding_start_pts); break; case in_stat_audio_frame_read: if (debug_frame_level) elv_dbg("IN STAT UDP fd=%d, audio frame read=%"PRId64", url=%s", fd, c->audio_frames_read, c->url); - rc = AVPipeStatInput(fd, stat_type, &c->audio_frames_read); + rc = AVPipeStatInput(fd, stream_index, stat_type, &c->audio_frames_read); break; case in_stat_video_frame_read: if (debug_frame_level) elv_dbg("IN STAT UDP fd=%d, video frame read=%"PRId64", url=%s", fd, c->video_frames_read, c->url); - rc = AVPipeStatInput(fd, stat_type, &c->video_frames_read); + rc = AVPipeStatInput(fd, stream_index, stat_type, &c->video_frames_read); break; case in_stat_first_keyframe_pts: if (debug_frame_level) elv_dbg("IN STAT UDP fd=%d, first keyframe PTS=%"PRId64", url=%s", fd, c->first_key_frame_pts, c->url); - rc = AVPipeStatInput(fd, stat_type, &c->first_key_frame_pts); + rc = AVPipeStatInput(fd, stream_index, stat_type, &c->first_key_frame_pts); break; case in_stat_data_scte35: if (debug_frame_level) elv_dbg("IN STAT UDP SCTE35 fd=%d, stat_type=%d, url=%s", fd, stat_type, c->url); - rc = AVPipeStatInput(fd, stat_type, c->data); + rc = AVPipeStatInput(fd, stream_index, stat_type, c->data); break; default: elv_err("IN STAT UDP fd=%d, invalid input stat=%d, url=%s", stat_type, c->url); @@ -634,13 +639,13 @@ out_write_packet( outctx->written_bytes - outctx->write_reported > VIDEO_BYTES_WRITE_REPORT) || (outctx->type == avpipe_audio_fmp4_segment && outctx->written_bytes - outctx->write_reported > AUDIO_BYTES_WRITE_REPORT)) { - out_stat(opaque, out_stat_bytes_written); + out_stat(opaque, outctx->stream_index, out_stat_bytes_written); outctx->write_reported = outctx->written_bytes; } if (xcparams && xcparams->debug_frame_level) - elv_dbg("OUT WRITE fd=%"PRId64", size=%d written=%d pos=%d total=%d", - fd, buf_size, bwritten, outctx->write_pos, outctx->written_bytes); + elv_dbg("OUT WRITE stream_index=%d, fd=%"PRId64", size=%d written=%d pos=%d total=%d", + outctx->stream_index, fd, buf_size, bwritten, outctx->write_pos, outctx->written_bytes); return buf_size; } @@ -691,6 +696,7 @@ out_closer( static int out_stat( void *opaque, + int stream_index, avp_stat_t stat_type) { ioctx_t *outctx = (ioctx_t *)opaque; @@ -711,14 +717,14 @@ out_stat( fd = *((int64_t *)(outctx->opaque)); switch (stat_type) { case out_stat_bytes_written: - rc = AVPipeStatOutput(h, fd, buftype, stat_type, &outctx->written_bytes); + rc = AVPipeStatOutput(h, fd, stream_index, buftype, stat_type, &outctx->written_bytes); break; case out_stat_encoding_end_pts: if (buftype == avpipe_audio_segment || buftype == avpipe_audio_fmp4_segment) - rc = AVPipeStatOutput(h, fd, buftype, stat_type, &outctx->encoder_ctx->audio_last_pts_sent_encode); + rc = AVPipeStatOutput(h, fd, stream_index, buftype, stat_type, &outctx->encoder_ctx->audio_last_pts_sent_encode); else - rc = AVPipeStatOutput(h, fd, buftype, stat_type, &outctx->encoder_ctx->video_last_pts_sent_encode); + rc = AVPipeStatOutput(h, fd, stream_index, buftype, stat_type, &outctx->encoder_ctx->video_last_pts_sent_encode); break; case out_stat_frame_written: { @@ -726,7 +732,7 @@ out_stat( .total_frames_written = outctx->total_frames_written, .frames_written = outctx->frames_written, }; - rc = AVPipeStatOutput(h, fd, buftype, stat_type, &encoding_frame_stats); + rc = AVPipeStatOutput(h, fd, stream_index, buftype, stat_type, &encoding_frame_stats); } break; default: @@ -1216,7 +1222,8 @@ in_mux_read_packet( } if (c->read_bytes - c->read_reported > BYTES_READ_REPORT) { - in_stat(opaque, in_stat_bytes_read); + /* Pass stream_index 0 (stream_index has no meaning for in_stat_bytes_read) */ + in_stat(opaque, 0, in_stat_bytes_read); c->read_reported = c->read_bytes; } @@ -1319,6 +1326,7 @@ out_mux_seek( static int out_mux_stat( void *opaque, + int stream_index, avp_stat_t stat_type) { ioctx_t *outctx = (ioctx_t *)opaque; @@ -1328,10 +1336,10 @@ out_mux_stat( switch (stat_type) { case out_stat_bytes_written: - rc = AVPipeStatMuxOutput(fd, stat_type, &outctx->written_bytes); + rc = AVPipeStatMuxOutput(fd, stream_index, stat_type, &outctx->written_bytes); break; case out_stat_encoding_end_pts: - rc = AVPipeStatMuxOutput(fd, stat_type, &outctx->encoder_ctx->video_last_pts_sent_encode); + rc = AVPipeStatMuxOutput(fd, stream_index, stat_type, &outctx->encoder_ctx->video_last_pts_sent_encode); break; default: break; diff --git a/avpipe.go b/avpipe.go index c1bd68c..e36a22e 100644 --- a/avpipe.go +++ b/avpipe.go @@ -172,7 +172,7 @@ const ( CryptCBCS ) -const MaxAudioMux = C.MAX_AUDIO_MUX +const MaxAudioMux = C.MAX_STREAMS // XcParams should match with txparams_t in avpipe_xc.h type XcParams struct { @@ -228,7 +228,6 @@ type XcParams struct { MaxCLL string `json:"max_cll,omitempty"` MasterDisplay string `json:"master_display,omitempty"` BitDepth int32 `json:"bitdepth,omitempty"` - AudioFillGap bool `json:"audio_fill_gap,omitempty"` SyncAudioToStreamId int `json:"sync_audio_to_stream_id"` ForceEqualFDuration bool `json:"force_equal_frame_duration,omitempty"` MuxingSpec string `json:"muxing_spec,omitempty"` @@ -377,11 +376,11 @@ type IOHandler interface { InReader(buf []byte) (int, error) InSeeker(offset C.int64_t, whence C.int) error InCloser() error - InStat(avp_stat C.avp_stat_t, stat_args *C.void) error + InStat(stream_index C.int, avp_stat C.avp_stat_t, stat_args *C.void) error OutWriter(fd C.int, buf []byte) (int, error) OutSeeker(fd C.int, offset C.int64_t, whence C.int) (int64, error) OutCloser(fd C.int) error - OutStat(avp_stat C.avp_stat_t, stat_args *C.void) error + OutStat(stream_index C.int, avp_stat C.avp_stat_t, stat_args *C.void) error } type InputOpener interface { @@ -405,7 +404,7 @@ type InputHandler interface { Size() int64 // Reports some stats - Stat(statType AVStatType, statArgs interface{}) error + Stat(streamIndex int, statType AVStatType, statArgs interface{}) error } type OutputOpener interface { @@ -430,7 +429,7 @@ type OutputHandler interface { Close() error // Reports some stats - Stat(avType AVType, statType AVStatType, statArgs interface{}) error + Stat(streamIndex int, avType AVType, statType AVStatType, statArgs interface{}) error } // Implement IOHandler @@ -711,7 +710,7 @@ func (h *ioHandler) InCloser() error { } //export AVPipeStatInput -func AVPipeStatInput(fd C.int64_t, avp_stat C.avp_stat_t, stat_args unsafe.Pointer) C.int { +func AVPipeStatInput(fd C.int64_t, stream_index C.int, avp_stat C.avp_stat_t, stat_args unsafe.Pointer) C.int { gMutex.Lock() h := gHandlers[int64(fd)] if h == nil { @@ -720,7 +719,7 @@ func AVPipeStatInput(fd C.int64_t, avp_stat C.avp_stat_t, stat_args unsafe.Point } gMutex.Unlock() - err := h.InStat(avp_stat, stat_args) + err := h.InStat(stream_index, avp_stat, stat_args) if err != nil { return C.int(-1) } @@ -728,31 +727,32 @@ func AVPipeStatInput(fd C.int64_t, avp_stat C.avp_stat_t, stat_args unsafe.Point return C.int(0) } -func (h *ioHandler) InStat(avp_stat C.avp_stat_t, stat_args unsafe.Pointer) error { +func (h *ioHandler) InStat(stream_index C.int, avp_stat C.avp_stat_t, stat_args unsafe.Pointer) error { var err error + streamIndex := (int)(stream_index) switch avp_stat { case C.in_stat_bytes_read: statArgs := *(*uint64)(stat_args) - err = h.input.Stat(AV_IN_STAT_BYTES_READ, &statArgs) + err = h.input.Stat(streamIndex, AV_IN_STAT_BYTES_READ, &statArgs) case C.in_stat_decoding_audio_start_pts: statArgs := *(*uint64)(stat_args) - err = h.input.Stat(AV_IN_STAT_DECODING_AUDIO_START_PTS, &statArgs) + err = h.input.Stat(streamIndex, AV_IN_STAT_DECODING_AUDIO_START_PTS, &statArgs) case C.in_stat_decoding_video_start_pts: statArgs := *(*uint64)(stat_args) - err = h.input.Stat(AV_IN_STAT_DECODING_VIDEO_START_PTS, &statArgs) + err = h.input.Stat(streamIndex, AV_IN_STAT_DECODING_VIDEO_START_PTS, &statArgs) case C.in_stat_audio_frame_read: statArgs := *(*uint64)(stat_args) - err = h.input.Stat(AV_IN_STAT_AUDIO_FRAME_READ, &statArgs) + err = h.input.Stat(streamIndex, AV_IN_STAT_AUDIO_FRAME_READ, &statArgs) case C.in_stat_video_frame_read: statArgs := *(*uint64)(stat_args) - err = h.input.Stat(AV_IN_STAT_VIDEO_FRAME_READ, &statArgs) + err = h.input.Stat(streamIndex, AV_IN_STAT_VIDEO_FRAME_READ, &statArgs) case C.in_stat_first_keyframe_pts: statArgs := *(*uint64)(stat_args) - err = h.input.Stat(AV_IN_STAT_FIRST_KEYFRAME_PTS, &statArgs) + err = h.input.Stat(streamIndex, AV_IN_STAT_FIRST_KEYFRAME_PTS, &statArgs) case C.in_stat_data_scte35: statArgs := C.GoString((*C.char)(stat_args)) - err = h.input.Stat(AV_IN_STAT_DATA_SCTE35, statArgs) + err = h.input.Stat(streamIndex, AV_IN_STAT_DATA_SCTE35, statArgs) } return err @@ -1048,6 +1048,7 @@ func (h *ioHandler) OutCloser(fd C.int64_t) error { //export AVPipeStatOutput func AVPipeStatOutput(handler C.int64_t, fd C.int64_t, + stream_index C.int, buf_type C.avpipe_buftype_t, avp_stat C.avp_stat_t, stat_args unsafe.Pointer) C.int { @@ -1060,7 +1061,7 @@ func AVPipeStatOutput(handler C.int64_t, } gMutex.Unlock() - err := h.OutStat(fd, buf_type, avp_stat, stat_args) + err := h.OutStat(fd, stream_index, buf_type, avp_stat, stat_args) if err != nil { return C.int(-1) } @@ -1069,7 +1070,7 @@ func AVPipeStatOutput(handler C.int64_t, } //export AVPipeStatMuxOutput -func AVPipeStatMuxOutput(fd C.int64_t, avp_stat C.avp_stat_t, stat_args unsafe.Pointer) C.int { +func AVPipeStatMuxOutput(fd C.int64_t, stream_index C.int, avp_stat C.avp_stat_t, stat_args unsafe.Pointer) C.int { gMutex.Lock() outHandler := gMuxHandlers[int64(fd)] if outHandler == nil { @@ -1078,14 +1079,15 @@ func AVPipeStatMuxOutput(fd C.int64_t, avp_stat C.avp_stat_t, stat_args unsafe.P } gMutex.Unlock() + streamIndex := (int)(stream_index) var err error switch avp_stat { case C.out_stat_bytes_written: statArgs := *(*uint64)(stat_args) - err = outHandler.Stat(MuxSegment, AV_OUT_STAT_BYTES_WRITTEN, &statArgs) + err = outHandler.Stat(streamIndex, MuxSegment, AV_OUT_STAT_BYTES_WRITTEN, &statArgs) case C.out_stat_encoding_end_pts: statArgs := *(*uint64)(stat_args) - err = outHandler.Stat(MuxSegment, AV_OUT_STAT_ENCODING_END_PTS, &statArgs) + err = outHandler.Stat(streamIndex, MuxSegment, AV_OUT_STAT_ENCODING_END_PTS, &statArgs) } if err != nil { @@ -1101,6 +1103,7 @@ type EncodingFrameStats struct { } func (h *ioHandler) OutStat(fd C.int64_t, + stream_index C.int, av_type C.avpipe_buftype_t, avp_stat C.avp_stat_t, stat_args unsafe.Pointer) error { @@ -1111,21 +1114,22 @@ func (h *ioHandler) OutStat(fd C.int64_t, return fmt.Errorf("OutStat nil handler, fd=%d", int64(fd)) } + streamIndex := (int)(stream_index) avType := getAVType(C.int(av_type)) switch avp_stat { case C.out_stat_bytes_written: statArgs := *(*uint64)(stat_args) - err = outHandler.Stat(avType, AV_OUT_STAT_BYTES_WRITTEN, &statArgs) + err = outHandler.Stat(streamIndex, avType, AV_OUT_STAT_BYTES_WRITTEN, &statArgs) case C.out_stat_encoding_end_pts: statArgs := *(*uint64)(stat_args) - err = outHandler.Stat(avType, AV_OUT_STAT_ENCODING_END_PTS, &statArgs) + err = outHandler.Stat(streamIndex, avType, AV_OUT_STAT_ENCODING_END_PTS, &statArgs) case C.out_stat_frame_written: encodingFramesStats := (*C.encoding_frame_stats_t)(stat_args) statArgs := &EncodingFrameStats{ TotalFramesWritten: int64(encodingFramesStats.total_frames_written), FramesWritten: int64(encodingFramesStats.frames_written), } - err = outHandler.Stat(avType, AV_OUT_STAT_FRAME_WRITTEN, statArgs) + err = outHandler.Stat(streamIndex, avType, AV_OUT_STAT_FRAME_WRITTEN, statArgs) } return err @@ -1261,10 +1265,6 @@ func getCParams(params *XcParams) (*C.xcparams_t, error) { cparams.force_equal_fduration = C.int(1) } - if params.AudioFillGap { - cparams.audio_fill_gap = C.int(1) - } - if params.SkipDecoding { cparams.skip_decoding = C.int(1) } diff --git a/avpipe_test.go b/avpipe_test.go index afd8023..5880c1b 100644 --- a/avpipe_test.go +++ b/avpipe_test.go @@ -28,7 +28,7 @@ const baseOutPath = "test_out" const debugFrameLevel = true const h264Codec = "libx264" const videoBigBuckBunnyPath = "media/bbb_1080p_30fps_60sec.mp4" -const videoRockyPath = "media/rocky.mp4" +const videoBigBuckBunny3AudioPath = "media/BBB_3x_audio_streams_music_2min_48kHz.mp4" type XcTestResult struct { mezFile []string @@ -126,39 +126,39 @@ func (i *fileInput) Size() int64 { return fi.Size() } -func (i *fileInput) Stat(statType avpipe.AVStatType, statArgs interface{}) error { +func (i *fileInput) Stat(streamIndex int, statType avpipe.AVStatType, statArgs interface{}) error { switch statType { case avpipe.AV_IN_STAT_BYTES_READ: readOffset := statArgs.(*uint64) if debugFrameLevel { - log.Debug("AVP TEST IN STAT", "STAT read offset", *readOffset) + log.Debug("AVP TEST IN STAT", "STAT read offset", *readOffset, "streamIndex", streamIndex) } case avpipe.AV_IN_STAT_AUDIO_FRAME_READ: audioFramesRead := statArgs.(*uint64) if debugFrameLevel { - log.Debug("AVP TEST IN STAT", "audioFramesRead", *audioFramesRead) + log.Debug("AVP TEST IN STAT", "audioFramesRead", *audioFramesRead, "streamIndex", streamIndex) } statsInfo.audioFramesRead = *audioFramesRead case avpipe.AV_IN_STAT_VIDEO_FRAME_READ: videoFramesRead := statArgs.(*uint64) if debugFrameLevel { - log.Debug("AVP TEST IN STAT", "videoFramesRead", *videoFramesRead) + log.Debug("AVP TEST IN STAT", "videoFramesRead", *videoFramesRead, "streamIndex", streamIndex) } statsInfo.videoFramesRead = *videoFramesRead case avpipe.AV_IN_STAT_DECODING_AUDIO_START_PTS: startPTS := statArgs.(*uint64) if debugFrameLevel { - log.Debug("AVP TEST IN STAT", "audio start PTS", *startPTS) + log.Debug("AVP TEST IN STAT", "audio start PTS", *startPTS, "streamIndex", streamIndex) } case avpipe.AV_IN_STAT_DECODING_VIDEO_START_PTS: startPTS := statArgs.(*uint64) if debugFrameLevel { - log.Debug("AVP TEST IN STAT", "video start PTS", *startPTS) + log.Debug("AVP TEST IN STAT", "video start PTS", *startPTS, "streamIndex", streamIndex) } case avpipe.AV_IN_STAT_FIRST_KEYFRAME_PTS: keyFramePTS := statArgs.(*uint64) if debugFrameLevel { - log.Debug("AVP TEST IN STAT", "video first keyframe PTS", *keyFramePTS) + log.Debug("AVP TEST IN STAT", "video first keyframe PTS", *keyFramePTS, "streamIndex", streamIndex) } statsInfo.firstKeyFramePTS = *keyFramePTS } @@ -200,7 +200,7 @@ func (oo *fileOutputOpener) Open(_, _ int64, streamIndex, segIndex int, case avpipe.FMP4VideoSegment: filename = fmt.Sprintf("./%s/vsegment-%d.mp4", oo.dir, segIndex) case avpipe.FMP4AudioSegment: - filename = fmt.Sprintf("./%s/asegment-%d.mp4", oo.dir, segIndex) + filename = fmt.Sprintf("./%s/asegment%d-%d.mp4", oo.dir, streamIndex, segIndex) case avpipe.FrameImage: filename = fmt.Sprintf("./%s/%d.jpeg", oo.dir, pts) } @@ -308,23 +308,23 @@ func (o *fileOutput) Close() error { return err } -func (o fileOutput) Stat(avType avpipe.AVType, statType avpipe.AVStatType, statArgs interface{}) error { +func (o fileOutput) Stat(streamIndex int, avType avpipe.AVType, statType avpipe.AVStatType, statArgs interface{}) error { switch statType { case avpipe.AV_OUT_STAT_BYTES_WRITTEN: writeOffset := statArgs.(*uint64) if debugFrameLevel { - log.Debug("AVP TEST OUT STAT", "STAT, write offset", *writeOffset) + log.Debug("AVP TEST OUT STAT", "STAT, write offset", *writeOffset, "streamIndex", streamIndex) } case avpipe.AV_OUT_STAT_ENCODING_END_PTS: endPTS := statArgs.(*uint64) if debugFrameLevel { - log.Debug("AVP TEST OUT STAT", "STAT, endPTS", *endPTS) + log.Debug("AVP TEST OUT STAT", "STAT, endPTS", *endPTS, "streamIndex", streamIndex) } case avpipe.AV_OUT_STAT_FRAME_WRITTEN: encodingStats := statArgs.(*avpipe.EncodingFrameStats) if debugFrameLevel { log.Debug("AVP TEST OUT STAT", "avType", avType, - "encodingStats", encodingStats) + "encodingStats", encodingStats, "streamIndex", streamIndex) } if avType == avpipe.FMP4AudioSegment { statsInfo.encodingAudioFrameStats = *encodingStats @@ -368,7 +368,6 @@ func TestAudioSeg(t *testing.T) { Url: url, DebugFrameLevel: debugFrameLevel, } - setFastEncodeParams(params, true) xcTest(t, outputDir, params, nil, true) } @@ -980,7 +979,7 @@ func TestAudioAAC2AACMezMaker(t *testing.T) { } xcTestResult := &XcTestResult{ - mezFile: []string{fmt.Sprintf("%s/asegment-1.mp4", outputDir)}, + mezFile: []string{fmt.Sprintf("%s/asegment0-1.mp4", outputDir)}, timeScale: 48000, sampleRate: 48000, } @@ -1018,7 +1017,7 @@ func TestAudioAC3Ts2AC3MezMaker(t *testing.T) { params.AudioIndex[0] = 2 xcTestResult := &XcTestResult{ - mezFile: []string{fmt.Sprintf("%s/asegment-1.mp4", outputDir)}, + mezFile: []string{fmt.Sprintf("%s/asegment0-1.mp4", outputDir)}, timeScale: 48000, sampleRate: 48000, } @@ -1056,7 +1055,7 @@ func TestAudioAC3Ts2AACMezMaker(t *testing.T) { params.AudioIndex[0] = 2 xcTestResult := &XcTestResult{ - mezFile: []string{fmt.Sprintf("%s/asegment-1.mp4", outputDir)}, + mezFile: []string{fmt.Sprintf("%s/asegment0-1.mp4", outputDir)}, timeScale: 48000, sampleRate: 48000, } @@ -1095,7 +1094,7 @@ func TestAudioMP3Ts2AACMezMaker(t *testing.T) { params.AudioIndex[0] = 1 xcTestResult := &XcTestResult{ - mezFile: []string{fmt.Sprintf("%s/asegment-1.mp4", outputDir)}, + mezFile: []string{fmt.Sprintf("%s/asegment0-1.mp4", outputDir)}, timeScale: 48000, sampleRate: 48000, } @@ -1135,7 +1134,7 @@ func TestAudioDownmix2AACMezMaker(t *testing.T) { params.AudioIndex[0] = 6 xcTestResult := &XcTestResult{ - mezFile: []string{fmt.Sprintf("%s/asegment-1.mp4", outputDir)}, + mezFile: []string{fmt.Sprintf("%s/asegment0-1.mp4", outputDir)}, timeScale: 48000, sampleRate: 48000, channelLayoutName: "stereo", @@ -1178,7 +1177,7 @@ func TestAudio2MonoTo1Stereo(t *testing.T) { channelLayoutName: "stereo", } for i := 1; i <= 2; i++ { - xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i)) + xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i)) } xcTest(t, outputDir, params, xcTestResult, true) @@ -1214,7 +1213,7 @@ func TestAudio5_1To5_1(t *testing.T) { channelLayoutName: "5.1", } for i := 1; i <= 2; i++ { - xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i)) + xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i)) } xcTest(t, outputDir, params, xcTestResult, true) @@ -1252,7 +1251,7 @@ func TestAudio5_1ToStereo(t *testing.T) { channelLayoutName: "stereo", } for i := 1; i <= 2; i++ { - xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i)) + xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i)) } xcTest(t, outputDir, params, xcTestResult, true) @@ -1291,7 +1290,7 @@ func TestAudioMonoToMono(t *testing.T) { channelLayoutName: "mono", } for i := 1; i <= 2; i++ { - xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i)) + xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i)) } xcTest(t, outputDir, params, xcTestResult, true) @@ -1330,7 +1329,7 @@ func TestAudioQuadToQuad(t *testing.T) { channelLayoutName: "quad", } for i := 1; i <= 2; i++ { - xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i)) + xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i)) } xcTest(t, outputDir, params, xcTestResult, true) @@ -1375,7 +1374,7 @@ func TestAudio6MonoTo5_1(t *testing.T) { channelLayoutName: "5.1", } for i := 1; i <= 2; i++ { - xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i)) + xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i)) } xcTest(t, outputDir, params, xcTestResult, true) @@ -1420,7 +1419,7 @@ func TestAudio6MonoUnequalChannelLayoutsTo5_1(t *testing.T) { channelLayoutName: "5.1", } for i := 1; i < 2; i++ { - xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i)) + xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i)) } xcTest(t, outputDir, params, xcTestResult, true) @@ -1460,7 +1459,7 @@ func TestAudio10Channel_s16To6Channel_5_1(t *testing.T) { channelLayoutName: "5.1", } for i := 1; i <= 1; i++ { - xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i)) + xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i)) } xcTest(t, outputDir, params, xcTestResult, true) @@ -1501,7 +1500,7 @@ func TestAudio2Channel1Stereo(t *testing.T) { } for i := 1; i <= 2; i++ { - xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i)) + xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i)) } xcTest(t, outputDir, params, xcTestResult, true) @@ -1544,7 +1543,7 @@ func TestAudioPan2Channel1Stereo_pcm_60000(t *testing.T) { } for i := 1; i <= 1; i++ { - xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i)) + xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i)) } xcTest(t, outputDir, params, xcTestResult, true) @@ -1586,7 +1585,54 @@ func TestAudioMonoToStereo_pcm_60000(t *testing.T) { } for i := 1; i <= 1; i++ { - xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment-%d.mp4", outputDir, i)) + xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/asegment0-%d.mp4", outputDir, i)) + } + + xcTest(t, outputDir, params, xcTestResult, true) +} + +func TestMultiAudioXc(t *testing.T) { + url := videoBigBuckBunny3AudioPath + + if fileMissing(url, fn()) { + return + } + + outputDir := path.Join(baseOutPath, fn()) + + params := &avpipe.XcParams{ + BypassTranscoding: false, + Format: "fmp4-segment", + StartTimeTs: 0, + DurationTs: -1, + StartSegmentStr: "1", + VideoSegDurationTs: 460800, + AudioSegDurationTs: 1428480, + Ecodec: h264Codec, + Dcodec: "", + Ecodec2: "aac", + EncHeight: 720, + EncWidth: 1280, + XcType: avpipe.XcAll, + StreamId: -1, + SyncAudioToStreamId: -1, + ForceKeyInt: 60, + Url: url, + DebugFrameLevel: debugFrameLevel, + NumAudio: 3, + } + + params.AudioIndex[0] = 1 + params.AudioIndex[1] = 2 + params.AudioIndex[2] = 3 + + xcTestResult := &XcTestResult{ + timeScale: 15360, + pixelFmt: "yuv420p", + } + + for i := 1; i <= 4; i++ { + xcTestResult.mezFile = append(xcTestResult.mezFile, fmt.Sprintf("%s/vsegment-%d.mp4", outputDir, i)) } xcTest(t, outputDir, params, xcTestResult, true) @@ -2028,7 +2074,7 @@ func TestABRMuxing(t *testing.T) { videoMezDir := path.Join(baseOutPath, f, "VideoMez4Muxing") audioMezDir := path.Join(baseOutPath, f, "AudioMez4Muxing") videoABRDir := path.Join(baseOutPath, f, "VideoABR4Muxing") - videoABRDir2 := path.Join(baseOutPath, f, "VideoABR4Muxing2") + videoABRDir2 := path.Join(baseOutPath, f, "VideoABR4Mugooglexing2") audioABRDir := path.Join(baseOutPath, f, "AudioABR4Muxing") audioABRDir2 := path.Join(baseOutPath, f, "AudioABR4Muxing2") muxOutDir := path.Join(baseOutPath, f, "MuxingOutput") @@ -2095,7 +2141,7 @@ func TestABRMuxing(t *testing.T) { // Create audio ABR files for the first mez segment setupOutDir(t, audioABRDir) - url = audioMezDir + "/asegment-1.mp4" + url = audioMezDir + "/asegment0-1.mp4" log.Debug("STARTING audio ABR for muxing", "file", url) params.XcType = avpipe.XcAudio params.Format = "dash" @@ -2109,7 +2155,7 @@ func TestABRMuxing(t *testing.T) { // Create audio ABR files for the second mez segment setupOutDir(t, audioABRDir2) - url = audioMezDir + "/asegment-2.mp4" + url = audioMezDir + "/asegment0-2.mp4" log.Debug("STARTING audio ABR for muxing (first segment)", "file", url) params.XcType = avpipe.XcAudio params.Format = "dash" diff --git a/elvxc/cmd/mux.go b/elvxc/cmd/mux.go index 1b92e1c..df1a64b 100644 --- a/elvxc/cmd/mux.go +++ b/elvxc/cmd/mux.go @@ -101,17 +101,17 @@ func (muxInput *elvxcMuxInput) Size() int64 { return fi.Size() } -func (muxInput *elvxcMuxInput) Stat(statType avpipe.AVStatType, statArgs interface{}) error { +func (muxInput *elvxcMuxInput) Stat(streamIndex int, statType avpipe.AVStatType, statArgs interface{}) error { switch statType { case avpipe.AV_IN_STAT_BYTES_READ: readOffset := statArgs.(*uint64) - log.Info("elvxcMuxInput", "stat read offset", *readOffset) + log.Info("elvxcMuxInput", "stat read offset", *readOffset, "streamIndex", streamIndex) case avpipe.AV_IN_STAT_DECODING_AUDIO_START_PTS: startPTS := statArgs.(*uint64) - log.Info("elvxcMuxInput", "audio start PTS", *startPTS) + log.Info("elvxcMuxInput", "audio start PTS", *startPTS, "streamIndex", streamIndex) case avpipe.AV_IN_STAT_DECODING_VIDEO_START_PTS: startPTS := statArgs.(*uint64) - log.Info("elvxcMuxInput", "video start PTS", *startPTS) + log.Info("elvxcMuxInput", "video start PTS", *startPTS, "streamIndex", streamIndex) } return nil @@ -165,14 +165,14 @@ func (muxOutput *elvxcMuxOutput) Close() error { return err } -func (muxOutput *elvxcMuxOutput) Stat(avType avpipe.AVType, statType avpipe.AVStatType, statArgs interface{}) error { +func (muxOutput *elvxcMuxOutput) Stat(streamIndex int, avType avpipe.AVType, statType avpipe.AVStatType, statArgs interface{}) error { switch statType { case avpipe.AV_OUT_STAT_BYTES_WRITTEN: writeOffset := statArgs.(*uint64) - log.Info("elvxcMuxOutput", "STAT, write offset", *writeOffset) + log.Info("elvxcMuxOutput", "STAT, write offset", *writeOffset, "streamIndex", streamIndex) case avpipe.AV_OUT_STAT_ENCODING_END_PTS: endPTS := statArgs.(*uint64) - log.Info("elvxcMuxOutput", "STAT, endPTS", *endPTS) + log.Info("elvxcMuxOutput", "STAT, endPTS", *endPTS, "streamIndex", streamIndex) } diff --git a/elvxc/cmd/transcode.go b/elvxc/cmd/transcode.go index e8869d9..63fb413 100644 --- a/elvxc/cmd/transcode.go +++ b/elvxc/cmd/transcode.go @@ -83,25 +83,25 @@ func (i *elvxcInput) Size() int64 { return fi.Size() } -func (i *elvxcInput) Stat(statType avpipe.AVStatType, statArgs interface{}) error { +func (i *elvxcInput) Stat(streamIndex int, statType avpipe.AVStatType, statArgs interface{}) error { switch statType { case avpipe.AV_IN_STAT_BYTES_READ: readOffset := statArgs.(*uint64) - log.Info("AVCMD InputHandler.Stat", "read offset", *readOffset) + log.Info("AVCMD InputHandler.Stat", "read offset", *readOffset, "streamIndex", streamIndex) case avpipe.AV_IN_STAT_AUDIO_FRAME_READ: audioFrameRead := statArgs.(*uint64) - log.Info("AVCMD InputHandler.Stat", "audioFrameRead", *audioFrameRead) + log.Info("AVCMD InputHandler.Stat", "audioFrameRead", *audioFrameRead, "streamIndex", streamIndex) case avpipe.AV_IN_STAT_VIDEO_FRAME_READ: videoFrameRead := statArgs.(*uint64) - log.Info("AVCMD InputHandler.Stat", "videoFrameRead", *videoFrameRead) + log.Info("AVCMD InputHandler.Stat", "videoFrameRead", *videoFrameRead, "streamIndex", streamIndex) case avpipe.AV_IN_STAT_DECODING_AUDIO_START_PTS: startPTS := statArgs.(*uint64) - log.Info("AVCMD InputHandler.Stat", "audio start PTS", *startPTS) + log.Info("AVCMD InputHandler.Stat", "audio start PTS", *startPTS, "streamIndex", streamIndex) case avpipe.AV_IN_STAT_DECODING_VIDEO_START_PTS: startPTS := statArgs.(*uint64) - log.Info("AVCMD InputHandler.Stat", "video start PTS", *startPTS) + log.Info("AVCMD InputHandler.Stat", "video start PTS", *startPTS, "streamIndex", streamIndex) case avpipe.AV_IN_STAT_DATA_SCTE35: - log.Info("AVCMD InputHandler.Stat", "scte35", statArgs) + log.Info("AVCMD InputHandler.Stat", "scte35", statArgs, "streamIndex", streamIndex) } return nil @@ -197,19 +197,19 @@ func (o *elvxcOutput) Close() error { return err } -func (o *elvxcOutput) Stat(avType avpipe.AVType, statType avpipe.AVStatType, statArgs interface{}) error { +func (o *elvxcOutput) Stat(streamIndex int, avType avpipe.AVType, statType avpipe.AVStatType, statArgs interface{}) error { switch statType { case avpipe.AV_OUT_STAT_BYTES_WRITTEN: writeOffset := statArgs.(*uint64) - log.Info("AVCMD OutputHandler.Stat", "write offset", *writeOffset) + log.Info("AVCMD OutputHandler.Stat", "write offset", *writeOffset, "streamIndex", streamIndex) case avpipe.AV_OUT_STAT_ENCODING_END_PTS: endPTS := statArgs.(*uint64) - log.Info("AVCMD OutputHandler.Stat", "endPTS", *endPTS) + log.Info("AVCMD OutputHandler.Stat", "endPTS", *endPTS, "streamIndex", streamIndex) case avpipe.AV_OUT_STAT_FRAME_WRITTEN: encodingStats := statArgs.(*avpipe.EncodingFrameStats) log.Info("AVCMD OutputHandler.Stat", "avType", avType, - "encodingStats", encodingStats) + "encodingStats", encodingStats, "streamIndex", streamIndex) } return nil } @@ -272,7 +272,6 @@ func InitTranscode(cmdRoot *cobra.Command) error { cmdTranscode.PersistentFlags().StringP("audio-index", "", "", "the indexes of audio stream (comma separated).") cmdTranscode.PersistentFlags().StringP("channel-layout", "", "", "audio channel layout.") cmdTranscode.PersistentFlags().Int32P("gpu-index", "", -1, "Use the GPU with specified index for transcoding (export CUDA_DEVICE_ORDER=PCI_BUS_ID would use smi index).") - cmdTranscode.PersistentFlags().BoolP("audio-fill-gap", "", false, "fill audio gap when encoder is aac and decoder is mpegts") cmdTranscode.PersistentFlags().Int32P("sync-audio-to-stream-id", "", -1, "sync audio to video iframe of specific stream-id when input stream is mpegts") cmdTranscode.PersistentFlags().StringP("encoder", "e", "libx264", "encoder codec, default is 'libx264', can be: 'libx264', 'libx265', 'h264_nvenc', 'h264_videotoolbox', or 'mjpeg'.") cmdTranscode.PersistentFlags().StringP("audio-encoder", "", "aac", "audio encoder, default is 'aac', can be: 'aac', 'ac3', 'mp2', 'mp3'.") @@ -387,11 +386,6 @@ func doTranscode(cmd *cobra.Command, args []string) error { return fmt.Errorf("Invalid gpu index flag") } - audioFillGap, err := cmd.Flags().GetBool("audio-fill-gap") - if err != nil { - return fmt.Errorf("Invalid audio-fill-gap flag") - } - syncAudioToStreamId, err := cmd.Flags().GetInt32("sync-audio-to-stream-id") if err != nil { return fmt.Errorf("Invalid sync-audio-to-stream-id flag") @@ -681,7 +675,6 @@ func doTranscode(cmd *cobra.Command, args []string) error { MasterDisplay: masterDisplay, BitDepth: bitDepth, ForceEqualFDuration: forceEqualFrameDuration, - AudioFillGap: audioFillGap, SyncAudioToStreamId: int(syncAudioToStreamId), StreamId: streamId, Listen: listen, diff --git a/exc/elv_mux.c b/exc/elv_mux.c index 81a9be0..184796c 100644 --- a/exc/elv_mux.c +++ b/exc/elv_mux.c @@ -300,6 +300,7 @@ out_mux_closer( static int out_mux_stat( void *opaque, + int stream_index, avp_stat_t stat_type) { ioctx_t *outctx = (ioctx_t *)opaque; @@ -316,7 +317,8 @@ out_mux_stat( switch (stat_type) { case out_stat_bytes_written: - elv_log("OUT MUX STAT fd=%d, write offset=%"PRId64, fd, outctx->written_bytes); + elv_log("OUT MUX STAT stream_index=%d, fd=%d, write offset=%"PRId64, + stream_index, fd, outctx->written_bytes); break; #if 0 /* PENDING(RM) set the hooks properly for muxing */ diff --git a/exc/elv_xc.c b/exc/elv_xc.c index f57e29e..20d4f29 100644 --- a/exc/elv_xc.c +++ b/exc/elv_xc.c @@ -39,11 +39,13 @@ udp_thread_func( int in_stat( void *opaque, + int stream_index, avp_stat_t stat_type); int out_stat( void *opaque, + int stream_index, avp_stat_t stat_type); typedef struct udp_thread_params_t { @@ -278,7 +280,8 @@ in_read_packet( } if (r > 0 && c->read_bytes - c->read_reported > BYTES_READ_REPORT) { - in_stat(opaque, in_stat_bytes_read); + /* Pass stream_index 0 (stream_index has no meaning for in_stat_bytes_read) */ + in_stat(opaque, 0, in_stat_bytes_read); c->read_reported = c->read_bytes; } @@ -323,6 +326,7 @@ in_seek( int in_stat( void *opaque, + int stream_index, avp_stat_t stat_type) { int64_t fd; @@ -337,30 +341,30 @@ in_stat( switch (stat_type) { case in_stat_bytes_read: if (debug_frame_level) - elv_dbg("IN STAT fd=%d, read offset=%"PRId64, fd, c->read_bytes); + elv_dbg("IN STAT stream_index=%d, fd=%d, read offset=%"PRId64, stream_index, fd, c->read_bytes); break; case in_stat_decoding_audio_start_pts: if (debug_frame_level) - elv_dbg("IN STAT fd=%d, audio start PTS=%"PRId64, fd, c->decoding_start_pts); + elv_dbg("IN STAT stream_index=%d, fd=%d, audio start PTS=%"PRId64, stream_index, fd, c->decoding_start_pts); break; case in_stat_decoding_video_start_pts: if (debug_frame_level) - elv_dbg("IN STAT fd=%d, video start PTS=%"PRId64, fd, c->decoding_start_pts); + elv_dbg("IN STAT stream_index=%d, fd=%d, video start PTS=%"PRId64, stream_index, fd, c->decoding_start_pts); break; case in_stat_audio_frame_read: if (debug_frame_level) - elv_dbg("IN STAT fd=%d, audio frame read=%"PRId64, fd, c->audio_frames_read); + elv_dbg("IN STAT stream_index=%d, fd=%d, audio frame read=%"PRId64, stream_index, fd, c->audio_frames_read); break; case in_stat_video_frame_read: if (debug_frame_level) - elv_dbg("IN STAT fd=%d, video frame read=%"PRId64, fd, c->video_frames_read); + elv_dbg("IN STAT stream_index=%d, fd=%d, video frame read=%"PRId64, stream_index, fd, c->video_frames_read); break; case in_stat_data_scte35: if (debug_frame_level) - elv_dbg("IN STAT fd=%d, data=%s", fd, c->data); + elv_dbg("IN STAT stream_index=%d, fd=%d, data=%s", stream_index, fd, c->data); break; default: - elv_err("IN STAT fd=%d, invalid input stat=%d", fd, stat_type); + elv_err("IN STAT stream_index=%d, fd=%d, invalid input stat=%d", stream_index, fd, stat_type); return 1; } @@ -549,7 +553,7 @@ out_write_packet( outctx->written_bytes - outctx->write_reported > VIDEO_BYTES_WRITE_REPORT) || (outctx->type == avpipe_audio_fmp4_segment && outctx->written_bytes - outctx->write_reported > AUDIO_BYTES_WRITE_REPORT)) { - out_stat(opaque, out_stat_bytes_written); + out_stat(opaque, outctx->stream_index, out_stat_bytes_written); outctx->write_reported = outctx->written_bytes; } @@ -602,6 +606,7 @@ out_closer( int out_stat( void *opaque, + int stream_index, avp_stat_t stat_type) { ioctx_t *outctx = (ioctx_t *)opaque; @@ -625,21 +630,21 @@ out_stat( switch (stat_type) { case out_stat_bytes_written: if (xcparams->debug_frame_level) - elv_dbg("OUT STAT fd=%d, type=%d, write offset=%"PRId64, - fd, outctx->type, outctx->written_bytes); + elv_dbg("OUT STAT stream_index=%d, fd=%d, type=%d, write offset=%"PRId64, + stream_index, fd, outctx->type, outctx->written_bytes); break; case out_stat_encoding_end_pts: if (xcparams->debug_frame_level) - elv_dbg("OUT STAT fd=%d, video encoding end PTS=%"PRId64 + elv_dbg("OUT STAT stream_index=%d, fd=%d, video encoding end PTS=%"PRId64 ", audio encoding end PTS=%"PRId64, - fd, outctx->encoder_ctx->video_last_pts_sent_encode, + stream_index, fd, outctx->encoder_ctx->video_last_pts_sent_encode, outctx->encoder_ctx->audio_last_pts_sent_encode); break; case out_stat_frame_written: if (xcparams->debug_frame_level) - elv_dbg("OUT STAT fd=%d, type=%d, total_frames_written=%"PRId64 + elv_dbg("OUT STAT stream_index=%d, fd=%d, type=%d, total_frames_written=%"PRId64 ", frames_written=%"PRId64, - fd, outctx->type, outctx->total_frames_written, + stream_index, fd, outctx->type, outctx->total_frames_written, outctx->frames_written); break; default: @@ -1046,7 +1051,6 @@ usage( "\t-audio-bitrate : (optional) Default: 128000\n" "\t-audio-decoder : (optional) Audio decoder name. For audio default is \"aac\", but for ts files should be set to \"ac3\"\n" "\t-audio-encoder : (optional) Audio encoder name. Default is \"aac\", can be \"ac3\", \"mp2\" or \"mp3\"\n" - "\t-audio-fill-gap : (optional) Default: 0, must be 0 or 1. It only effects if encoder is aac.\n" "\t-audio-index : (optional) Default: the indexes of audio stream (comma separated)\n" "\t-audio-seg-duration-ts : (mandatory If format is not \"segment\" and transcoding audio) audio segment duration time base (positive integer).\n" "\t-bitdepth : (optional) Bitdepth of color space. Default is 8, can be 8, 10, or 12.\n" @@ -1156,7 +1160,6 @@ main( .stream_id = -1, .audio_bitrate = 128000, /* Default bitrate */ .n_audio = 0, /* # of audio index */ - .audio_fill_gap = 0, /* Don't fill gap if there is JUMP */ .bitdepth = 8, .crf_str = strdup("23"), /* 1 best -> 23 standard middle -> 52 poor */ .crypt_iv = NULL, @@ -1229,13 +1232,6 @@ main( p.dcodec2 = strdup(argv[i+1]); } else if (!strcmp(argv[i], "-audio-encoder")) { p.ecodec2 = strdup(argv[i+1]); - } else if (!strcmp(argv[i], "-audio-fill-gap")) { - if (sscanf(argv[i+1], "%d", &p.audio_fill_gap) != 1) { - usage(argv[0], argv[i], EXIT_FAILURE); - } - if (p.audio_fill_gap != 0 && p.audio_fill_gap != 1) { - usage(argv[0], argv[i], EXIT_FAILURE); - } } else if (!strcmp(argv[i], "-audio-bitrate")) { if (sscanf(argv[i+1], "%d", &p.audio_bitrate) != 1) { usage(argv[0], argv[i], EXIT_FAILURE); diff --git a/libavpipe/include/avpipe_version.h b/libavpipe/include/avpipe_version.h index 6bd09aa..395e3de 100644 --- a/libavpipe/include/avpipe_version.h +++ b/libavpipe/include/avpipe_version.h @@ -10,5 +10,5 @@ /* Only increase these versions for release purposes */ #define AVPIPE_MAJOR_VERSION 1 -#define AVPIPE_MINOR_VERSION 12 +#define AVPIPE_MINOR_VERSION 13 diff --git a/libavpipe/include/avpipe_xc.h b/libavpipe/include/avpipe_xc.h index b1427d9..a6b4bb9 100644 --- a/libavpipe/include/avpipe_xc.h +++ b/libavpipe/include/avpipe_xc.h @@ -19,8 +19,6 @@ #define MAX_STREAMS 64 #define MAX_MUX_IN_STREAM 4096 -#define MAX_AUDIO_MUX 8 -#define MAX_CAPTION_MUX 8 #define AVIO_OUT_BUF_SIZE (1*1024*1024) // avio output buffer size #define AVIO_IN_BUF_SIZE (1*1024*1024) // avio input buffer size @@ -122,10 +120,10 @@ typedef struct io_mux_ctx_t { mux_input_ctx_t video; int64_t last_video_pts; int last_audio_index; - mux_input_ctx_t audios[MAX_AUDIO_MUX]; + mux_input_ctx_t audios[MAX_STREAMS]; int64_t last_audio_pts; int last_caption_index; - mux_input_ctx_t captions[MAX_CAPTION_MUX]; + mux_input_ctx_t captions[MAX_STREAMS]; } io_mux_ctx_t; typedef struct xcparams_t xcparams_t; @@ -231,6 +229,7 @@ typedef int64_t typedef int (*avpipe_stater_f)( void *opaque, + int stream_index, /* The stream_index is not valid for input stat in_stat_bytes_read. */ avp_stat_t stat_type); typedef struct avpipe_io_handler_t { @@ -242,17 +241,20 @@ typedef struct avpipe_io_handler_t { avpipe_stater_f avpipe_stater; } avpipe_io_handler_t; -#define MAX_WRAP_PTS ((int64_t)8589000000) +#define MAX_WRAP_PTS ((int64_t)8589000000) +#define MAX_AVFILENAME_LEN 128 /* Decoder/encoder context, keeps both video and audio stream ffmpeg contexts */ typedef struct coderctx_t { - AVFormatContext *format_context; /* Input format context or video output format context */ - AVFormatContext *format_context2; /* Audio output format context */ + AVFormatContext *format_context; /* Input format context or video output format context */ + AVFormatContext *format_context2[MAX_STREAMS]; /* Audio output format context, indexed by audio index */ + char filename2[MAX_STREAMS][MAX_AVFILENAME_LEN]; /* Audio filename formats */ + int n_audio_output; /* Number of audio output streams, it is set for encoder */ AVCodec *codec[MAX_STREAMS]; AVStream *stream[MAX_STREAMS]; AVCodecParameters *codec_parameters[MAX_STREAMS]; - AVCodecContext *codec_context[MAX_STREAMS]; + AVCodecContext *codec_context[MAX_STREAMS]; /* Audio/video AVCodecContext, indexed by stream_index */ SwrContext *resampler_context; /* resample context for audio */ AVAudioFifo *fifo; /* audio sampling fifo */ @@ -261,27 +263,26 @@ typedef struct coderctx_t { ioctx_t *inctx; /* Input context needed for stat callbacks */ int video_stream_index; - int audio_stream_index[MAX_AUDIO_MUX]; /* Audio input stream indexes */ + int audio_stream_index[MAX_STREAMS]; /* Audio input stream indexes */ int n_audio; /* Number of audio streams that will be decoded */ int data_scte35_stream_index; /* Index of SCTE-35 data stream */ int data_stream_index; /* Index of an unrecognized data stream */ - int audio_enc_stream_index; /* Audio output stream index */ int64_t video_last_wrapped_pts; /* Video last wrapped pts */ int64_t video_last_input_pts; /* Video last input pts */ - int64_t audio_last_wrapped_pts; /* Audio last wrapped pts */ - int64_t audio_last_input_pts; /* Audio last input pts */ + int64_t audio_last_wrapped_pts[MAX_STREAMS]; /* Audio last wrapped pts */ + int64_t audio_last_input_pts[MAX_STREAMS]; /* Audio last input pts */ int64_t video_last_dts; - int64_t audio_last_dts; + int64_t audio_last_dts[MAX_STREAMS]; int64_t last_key_frame; /* pts of last key frame */ int64_t forced_keyint_countdown; /* frames until next forced key frame */ int64_t video_last_pts_read; /* Video input last pts read */ - int64_t audio_last_pts_read; /* Audio input last pts reas */ + int64_t audio_last_pts_read[MAX_STREAMS]; /* Audio input last pts read */ int64_t video_last_pts_sent_encode; /* Video last pts to encode if tx_type & tx_video */ - int64_t audio_last_pts_sent_encode; /* Audio last pts to encode if tx_type & tx_audio */ + int64_t audio_last_pts_sent_encode[MAX_STREAMS]; /* Audio last pts to encode if tx_type & tx_audio */ int64_t video_last_pts_encoded; /* Video last input pts encoded if tx_type & tx_video */ - int64_t audio_last_pts_encoded; /* Audio last input pts encoded if tx_type & tx_audio */ + int64_t audio_last_pts_encoded[MAX_STREAMS]; /* Audio last input pts encoded if tx_type & tx_audio */ int64_t audio_output_pts; /* Used to set PTS directly when using audio FIFO */ @@ -291,25 +292,25 @@ typedef struct coderctx_t { AVFilterGraph *video_filter_graph; /* Audio filter */ - AVFilterContext *audio_buffersink_ctx; - AVFilterContext *audio_buffersrc_ctx[MAX_AUDIO_MUX]; - AVFilterGraph *audio_filter_graph; + AVFilterContext *audio_buffersink_ctx[MAX_STREAMS]; + AVFilterContext *audio_buffersrc_ctx[MAX_STREAMS]; + AVFilterGraph *audio_filter_graph[MAX_STREAMS]; + int n_audio_filters; /* Number of initialized audio filters */ int64_t video_frames_written; /* Total video frames written so far */ - int64_t audio_frames_written; /* Total audio frames written so far */ + int64_t audio_frames_written[MAX_STREAMS]; /* Total audio frames written so far */ int64_t video_pts; /* Video decoder/encoder pts */ - int64_t audio_pts; /* Audio decoder/encoder pts */ + int64_t audio_pts[MAX_STREAMS]; /* Audio decoder/encoder pts for each track/stream */ int64_t video_input_start_pts; /* In case video input stream starts at PTS > 0 */ int video_input_start_pts_notified; /* Will be set as soon as out_stat_decoding_video_start_pts is fired */ - int64_t audio_input_start_pts; /* In case audio input stream starts at PTS > 0 */ + int64_t audio_input_start_pts[MAX_STREAMS]; /* In case audio input stream starts at PTS > 0 */ int audio_input_start_pts_notified; /* Will be set as soon as out_stat_decoding_audio_start_pts is fired */ int64_t first_decoding_video_pts; /* PTS of first video frame read from the decoder */ - int64_t first_decoding_audio_pts; /* PTS of first audio frame read from the decoder */ + int64_t first_decoding_audio_pts[MAX_STREAMS]; /* PTS of first audio frame read from the decoder */ int64_t first_encoding_video_pts; /* PTS of first video frame sent to the encoder */ - int64_t first_encoding_audio_pts; /* PTS of first audio frame sent to the encoder */ + int64_t first_encoding_audio_pts[MAX_STREAMS]; /* PTS of first audio frame sent to the encoder */ int64_t first_read_frame_pts[MAX_STREAMS]; /* PTS of first frame read - which might not be decodable */ - int64_t audio_input_prev_pts; /* Previous pts for audio input */ int64_t video_encoder_prev_pts; /* Previous pts for video output (encoder) */ int64_t video_duration; /* Duration/pts of original frame */ int64_t audio_duration; /* Audio duration/pts of original frame when tx_type == tx_all */ @@ -420,9 +421,8 @@ typedef struct xcparams_t { char *watermark_timecode; // Watermark timecode string (i.e 00\:00\:00\:00) float watermark_timecode_rate; // Watermark timecode frame rate - int audio_index[MAX_AUDIO_MUX]; // Audio index(s) for mez making, may need to become an array of indexes + int audio_index[MAX_STREAMS]; // Audio index(s) for mez making, may need to become an array of indexes int n_audio; // Number of entries in audio_index - int audio_fill_gap; // Audio only, fills the gap if there is a jump in PTS int sync_audio_to_stream_id; // mpegts only, default is 0 int bitdepth; // Can be 8, 10, 12 char *max_cll; // Maximum Content Light Level (HDR only) @@ -500,13 +500,13 @@ typedef struct xctx_t { * Each video/audio/caption input stream can have multiple input files/parts. * Each video/audio/caption input stream has its own coderctx_t and ioctx_t. */ - io_mux_ctx_t *in_mux_ctx; // Input muxer context - coderctx_t in_muxer_ctx[MAX_AUDIO_MUX+MAX_CAPTION_MUX+1]; // Video, audio, captions coder input muxer context (one video, multiple audio/caption) - ioctx_t *inctx_muxer[MAX_AUDIO_MUX+MAX_CAPTION_MUX+1]; // Video, audio, captions io muxer context (one video, multiple audio/caption) - coderctx_t out_muxer_ctx; // Output muxer + io_mux_ctx_t *in_mux_ctx; // Input muxer context + coderctx_t in_muxer_ctx[MAX_STREAMS]; // Video, audio, captions coder input muxer context (one video, multiple audio/caption) + ioctx_t *inctx_muxer[MAX_STREAMS]; // Video, audio, captions io muxer context (one video, multiple audio/caption) + coderctx_t out_muxer_ctx; // Output muxer - AVPacket pkt_array[MAX_AUDIO_MUX+MAX_CAPTION_MUX+1]; - int is_pkt_valid[MAX_AUDIO_MUX+MAX_CAPTION_MUX+1]; + AVPacket pkt_array[MAX_STREAMS]; + int is_pkt_valid[MAX_STREAMS]; elv_channel_t *vc; // Video frame channel elv_channel_t *ac; // Audio frame channel diff --git a/libavpipe/src/avpipe_filters.c b/libavpipe/src/avpipe_filters.c index 44888e3..56a1e64 100644 --- a/libavpipe/src/avpipe_filters.c +++ b/libavpipe/src/avpipe_filters.c @@ -161,8 +161,6 @@ init_audio_filters( return eav_num_streams; } - AVCodecContext *dec_codec_ctx = decoder_context->codec_context[decoder_context->audio_stream_index[0]]; - AVCodecContext *enc_codec_ctx = encoder_context->codec_context[encoder_context->audio_stream_index[0]]; char args[512]; int ret = 0; AVFilterContext **abuffersrc_ctx = NULL; @@ -173,92 +171,99 @@ init_audio_filters( const AVFilter *aformat = avfilter_get_by_name("aformat"); AVFilterGraph *filter_graph; - if (!dec_codec_ctx) { - elv_err("init_audio_filters, audio decoder was not initialized!"); - ret = AVERROR_UNKNOWN; - goto end; - } + for (int i=0; in_audio_output; i++) { + int audio_stream_index = decoder_context->audio_stream_index[i]; - filter_graph = avfilter_graph_alloc(); - if (!buffersrc || !buffersink || !filter_graph) { - elv_err("init_audio_filters, audio filtering source or sink element not found"); - ret = AVERROR_UNKNOWN; - goto end; - } + AVCodecContext *dec_codec_ctx = decoder_context->codec_context[audio_stream_index]; + AVCodecContext *enc_codec_ctx = encoder_context->codec_context[audio_stream_index]; - get_avfilter_args(decoder_context, decoder_context->audio_stream_index[0], args, sizeof(args)); - elv_dbg("init_audio_filters, audio srcfilter args=%s", args); + if (!dec_codec_ctx) { + elv_err("init_audio_filters, audio decoder was not initialized!"); + ret = AVERROR_UNKNOWN; + goto end; + } - /* decoder_context->n_audio is 1 */ - abuffersrc_ctx = decoder_context->audio_buffersrc_ctx; + filter_graph = avfilter_graph_alloc(); + if (!buffersrc || !buffersink || !filter_graph) { + elv_err("init_audio_filters, audio filtering source or sink element not found"); + ret = AVERROR_UNKNOWN; + goto end; + } - ret = avfilter_graph_create_filter(&abuffersrc_ctx[0], buffersrc, "in", args, NULL, filter_graph); - if (ret < 0) { - elv_err("init_audio_filters, cannot create audio buffer source"); - goto end; - } + get_avfilter_args(decoder_context, audio_stream_index, args, sizeof(args)); + elv_dbg("init_audio_filters, audio srcfilter args=%s", args); - ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out", NULL, NULL, filter_graph); - if (ret < 0) { - elv_err("init_audio_filters, cannot create audio buffer sink"); - goto end; - } + abuffersrc_ctx = decoder_context->audio_buffersrc_ctx; - ret = av_opt_set_bin(buffersink_ctx, "sample_fmts", - (uint8_t*)&enc_codec_ctx->sample_fmt, sizeof(enc_codec_ctx->sample_fmt), - AV_OPT_SEARCH_CHILDREN); - if (ret < 0) { - elv_err("init_audio_filters, cannot set output sample format"); - goto end; - } + ret = avfilter_graph_create_filter(&abuffersrc_ctx[i], buffersrc, "in", args, NULL, filter_graph); + if (ret < 0) { + elv_err("init_audio_filters, cannot create audio buffer source"); + goto end; + } - ret = av_opt_set_bin(buffersink_ctx, "sample_rates", - (uint8_t*)&enc_codec_ctx->sample_rate, sizeof(enc_codec_ctx->sample_rate), - AV_OPT_SEARCH_CHILDREN); - if (ret < 0) { - elv_err("init_audio_filters, cannot set output sample rate"); - goto end; - } + ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out", NULL, NULL, filter_graph); + if (ret < 0) { + elv_err("init_audio_filters, cannot create audio buffer sink"); + goto end; + } - ret = av_opt_set_bin(buffersink_ctx, "channel_layouts", - (uint8_t*)&enc_codec_ctx->channel_layout, - sizeof(enc_codec_ctx->channel_layout), AV_OPT_SEARCH_CHILDREN); - if (ret < 0) { - elv_err("init_audio_filters, cannot set output channel layout"); - goto end; - } + ret = av_opt_set_bin(buffersink_ctx, "sample_fmts", + (uint8_t*)&enc_codec_ctx->sample_fmt, sizeof(enc_codec_ctx->sample_fmt), + AV_OPT_SEARCH_CHILDREN); + if (ret < 0) { + elv_err("init_audio_filters, cannot set output sample format"); + goto end; + } - snprintf(args, sizeof(args), + ret = av_opt_set_bin(buffersink_ctx, "sample_rates", + (uint8_t*)&enc_codec_ctx->sample_rate, sizeof(enc_codec_ctx->sample_rate), + AV_OPT_SEARCH_CHILDREN); + if (ret < 0) { + elv_err("init_audio_filters, cannot set output sample rate"); + goto end; + } + + ret = av_opt_set_bin(buffersink_ctx, "channel_layouts", + (uint8_t*)&enc_codec_ctx->channel_layout, + sizeof(enc_codec_ctx->channel_layout), AV_OPT_SEARCH_CHILDREN); + if (ret < 0) { + elv_err("init_audio_filters, cannot set output channel layout"); + goto end; + } + + snprintf(args, sizeof(args), "sample_fmts=%s:sample_rates=%d:channel_layouts=0x%"PRIx64, av_get_sample_fmt_name(enc_codec_ctx->sample_fmt), enc_codec_ctx->sample_rate, (uint64_t)enc_codec_ctx->channel_layout); - elv_dbg("init_audio_filters, audio format_filter args=%s", args); + elv_dbg("init_audio_filters, audio format_filter args=%s", args); - ret = avfilter_graph_create_filter(&format_ctx, aformat, "format_out_0_0", args, NULL, filter_graph); - if (ret < 0) { - elv_err("init_audio_filters, cannot create audio format filter"); - goto end; - } + ret = avfilter_graph_create_filter(&format_ctx, aformat, "format_out_0_0", args, NULL, filter_graph); + if (ret < 0) { + elv_err("init_audio_filters, cannot create audio format filter"); + goto end; + } - if ((ret = avfilter_link(abuffersrc_ctx[0], 0, format_ctx, 0)) < 0) { - elv_err("init_audio_filters, failed to link audio src to format, ret=%d", ret); - goto end; - } + if ((ret = avfilter_link(abuffersrc_ctx[i], 0, format_ctx, 0)) < 0) { + elv_err("init_audio_filters, failed to link audio src to format, ret=%d", ret); + goto end; + } - if ((ret = avfilter_link(format_ctx, 0, buffersink_ctx, 0)) < 0) { - elv_err("init_audio_filters, failed to link audio format to sink, ret=%d", ret); - goto end; - } + if ((ret = avfilter_link(format_ctx, 0, buffersink_ctx, 0)) < 0) { + elv_err("init_audio_filters, failed to link audio format to sink, ret=%d", ret); + goto end; + } - av_buffersink_set_frame_size(buffersink_ctx, - encoder_context->codec_context[decoder_context->audio_stream_index[0]]->frame_size); + av_buffersink_set_frame_size(buffersink_ctx, + encoder_context->codec_context[audio_stream_index]->frame_size); - if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0) - goto end; + if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0) + goto end; - /* Fill FilteringContext */ - decoder_context->audio_filter_graph = filter_graph; - decoder_context->audio_buffersink_ctx = buffersink_ctx; + /* Fill FilteringContext */ + decoder_context->audio_filter_graph[i] = filter_graph; + decoder_context->audio_buffersink_ctx[i] = buffersink_ctx; + decoder_context->n_audio_filters++; + } end: if (ret < 0) @@ -390,14 +395,15 @@ init_audio_pan_filters( } av_buffersink_set_frame_size(buffersink_ctx, - encoder_context->codec_context[decoder_context->audio_stream_index[0]]->frame_size); + encoder_context->codec_context[0]->frame_size); if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0) goto end; /* Fill FilteringContext */ - decoder_context->audio_filter_graph = filter_graph; - decoder_context->audio_buffersink_ctx = buffersink_ctx; + decoder_context->audio_filter_graph[0] = filter_graph; + decoder_context->audio_buffersink_ctx[0] = buffersink_ctx; + decoder_context->n_audio_filters++; end: if (ret < 0) @@ -523,15 +529,15 @@ init_audio_merge_pan_filters( goto end; } - av_buffersink_set_frame_size(buffersink_ctx, - encoder_context->codec_context[decoder_context->audio_stream_index[0]]->frame_size); + av_buffersink_set_frame_size(buffersink_ctx, encoder_context->codec_context[0]->frame_size); if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0) goto end; /* Fill FilteringContext */ - decoder_context->audio_filter_graph = filter_graph; - decoder_context->audio_buffersink_ctx = buffersink_ctx; + decoder_context->audio_filter_graph[0] = filter_graph; + decoder_context->audio_buffersink_ctx[0] = buffersink_ctx; + decoder_context->n_audio_filters++; end: if (ret < 0) @@ -554,7 +560,7 @@ init_audio_join_filters( xcparams_t *params) { if (decoder_context->n_audio < 0 || - decoder_context->n_audio > MAX_AUDIO_MUX) { + decoder_context->n_audio > MAX_STREAMS) { return eav_num_streams; } @@ -571,8 +577,8 @@ init_audio_join_filters( const AVFilter *aformat = avfilter_get_by_name("aformat"); const AVFilter *join = avfilter_get_by_name("join"); - decoder_context->audio_filter_graph = avfilter_graph_alloc(); - if (!buffersrc || !buffersink || !join || !decoder_context->audio_filter_graph) { + decoder_context->audio_filter_graph[0] = avfilter_graph_alloc(); + if (!buffersrc || !buffersink || !join || !decoder_context->audio_filter_graph[0]) { elv_err("init_audio_join_filters, audio filtering source/sink/join filter not found"); ret = AVERROR_UNKNOWN; goto end; @@ -582,7 +588,7 @@ init_audio_join_filters( /* Create join filter with n inputs */ sprintf(args, "inputs=%d", decoder_context->n_audio); - ret = avfilter_graph_create_filter(&join_ctx, join, "join", args, NULL, decoder_context->audio_filter_graph); + ret = avfilter_graph_create_filter(&join_ctx, join, "join", args, NULL, decoder_context->audio_filter_graph[0]); if (ret < 0) { elv_err("init_audio_join_filters, cannot create audio join"); goto end; @@ -590,7 +596,8 @@ init_audio_join_filters( /* For each audio input create an audio source filter and link it to join filter */ for (int i=0; in_audio; i++) { - AVCodecContext *dec_codec_ctx = decoder_context->codec_context[decoder_context->audio_stream_index[i]]; + int audio_stream_index = decoder_context->audio_stream_index[i]; + AVCodecContext *dec_codec_ctx = decoder_context->codec_context[audio_stream_index]; char filt_name[32]; if (!dec_codec_ctx) { @@ -599,12 +606,12 @@ init_audio_join_filters( goto end; } - get_avfilter_args(decoder_context, decoder_context->audio_stream_index[i], args, sizeof(args)); + get_avfilter_args(decoder_context, audio_stream_index, args, sizeof(args)); sprintf(filt_name, "in_%d", i); elv_dbg("init_audio_join_filters, audio srcfilter=%s args=%s", filt_name, args); - ret = avfilter_graph_create_filter(&abuffersrc_ctx[i], buffersrc, filt_name, args, NULL, decoder_context->audio_filter_graph); + ret = avfilter_graph_create_filter(&abuffersrc_ctx[i], buffersrc, filt_name, args, NULL, decoder_context->audio_filter_graph[0]); if (ret < 0) { elv_err("init_audio_join_filters, cannot create audio buffer source %d", i); goto end; @@ -617,7 +624,7 @@ init_audio_join_filters( } - ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out", NULL, NULL, decoder_context->audio_filter_graph); + ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out", NULL, NULL, decoder_context->audio_filter_graph[0]); if (ret < 0) { elv_err("init_audio_join_filters, cannot create audio buffer sink"); goto end; @@ -653,7 +660,7 @@ init_audio_join_filters( (uint64_t)enc_codec_ctx->channel_layout); elv_dbg("init_audio_join_filters, audio format_filter args=%s", format_args); - ret = avfilter_graph_create_filter(&format_ctx, aformat, "format_out_0_0", format_args, NULL, decoder_context->audio_filter_graph); + ret = avfilter_graph_create_filter(&format_ctx, aformat, "format_out_0_0", format_args, NULL, decoder_context->audio_filter_graph[0]); if (ret < 0) { elv_err("Cannot create audio format filter"); goto end; @@ -672,11 +679,12 @@ init_audio_join_filters( av_buffersink_set_frame_size(buffersink_ctx, encoder_context->codec_context[encoder_context->audio_stream_index[0]]->frame_size); - if ((ret = avfilter_graph_config(decoder_context->audio_filter_graph, NULL)) < 0) + if ((ret = avfilter_graph_config(decoder_context->audio_filter_graph[0], NULL)) < 0) goto end; /* Save FilteringContext */ - decoder_context->audio_buffersink_ctx = buffersink_ctx; + decoder_context->audio_buffersink_ctx[0] = buffersink_ctx; + decoder_context->n_audio_filters++; end: if (ret < 0) diff --git a/libavpipe/src/avpipe_io.c b/libavpipe/src/avpipe_io.c index 9ef48a4..b975939 100644 --- a/libavpipe/src/avpipe_io.c +++ b/libavpipe/src/avpipe_io.c @@ -75,13 +75,13 @@ elv_io_open( (*pb) = avioctx; out_tracker[outctx->stream_index].last_outctx = outctx; - elv_dbg("OUT elv_io_open stream_index=%d, seg_index=%d avioctx=%p, avioctx->opaque=%p, buf=%p, outctx=%p, outtracker[0]->last_outctx=%p, outtracker[1]->last_outctx=%p", - outctx->stream_index, outctx->seg_index, avioctx, avioctx->opaque, avioctx->buffer, outctx, out_tracker[0].last_outctx, out_tracker[1].last_outctx); + elv_dbg("OUT elv_io_open stream_index=%d, seg_index=%d avioctx=%p, avioctx->opaque=%p, buf=%p, outctx=%p, outtracker->last_outctx=%p, outtracker->last_outctx=%p", + outctx->stream_index, outctx->seg_index, avioctx, avioctx->opaque, avioctx->buffer, outctx, out_tracker[outctx->stream_index].last_outctx, out_tracker[outctx->stream_index].last_outctx); } else { ioctx_t *outctx = (ioctx_t *) calloc(1, sizeof(ioctx_t)); outctx->stream_index = 0; - outctx->encoder_ctx = out_tracker->encoder_ctx; - outctx->inctx = out_tracker[0].inctx; + outctx->encoder_ctx = out_tracker[outctx->stream_index].encoder_ctx; + outctx->inctx = out_tracker[outctx->stream_index].inctx; outctx->seg_index = 0; // init segment has stream_index and seg_index = 0 if (!url || url[0] == '\0') { @@ -97,13 +97,19 @@ elv_io_open( } } else { outctx->url = strdup(url); - int i = 0; - while (i < strlen(url) && !isdigit(url[i])) - i++; - if (i < strlen(url)) { - // Assumes a filename like segment%d-%05d.mp4 - outctx->stream_index = url[i] - '0'; + outctx->stream_index = 0; + if (!strstr(url, "m3u8")) { + int i = 0; + while (i < strlen(url) && !isdigit(url[i])) + i++; + if (i < strlen(url)) { + // Assumes a filename like segment%d-%05d.mp4 + outctx->stream_index = url[i] - '0'; + } } + outctx->encoder_ctx = out_tracker[outctx->stream_index].encoder_ctx; + outctx->inctx = out_tracker[outctx->stream_index].inctx; + //elv_dbg("XXX stream_index=%d", outctx->stream_index); if (!strncmp(url + strlen(url) - 3, "mpd", 3)) { outctx->type = avpipe_manifest; outctx->seg_index = -1; // Special index for manifest @@ -170,8 +176,8 @@ elv_io_open( AVIOContext *avioctx = avio_alloc_context(outctx->buf, outctx->bufsz, AVIO_FLAG_WRITE, (void *)outctx, out_handlers->avpipe_reader, out_handlers->avpipe_writer, out_handlers->avpipe_seeker); - elv_dbg("OUT elv_io_open url=%s, type=%d, seg_index=%d, last_outctx=%p, buf=%p", - url, outctx->type, outctx->seg_index, out_tracker[outctx->stream_index].last_outctx, avioctx->buffer); + elv_dbg("OUT elv_io_open url=%s, type=%d, stream_index=%d, seg_index=%d, last_outctx=%p, buf=%p", + url, outctx->type, outctx->stream_index, outctx->seg_index, out_tracker[outctx->stream_index].last_outctx, avioctx->buffer); /* libavformat expects seekable streams for mp4 */ if (outctx->type == avpipe_mp4_stream || outctx->type == avpipe_mp4_segment) diff --git a/libavpipe/src/avpipe_mux.c b/libavpipe/src/avpipe_mux.c index a971530..fe52545 100644 --- a/libavpipe/src/avpipe_mux.c +++ b/libavpipe/src/avpipe_mux.c @@ -41,8 +41,11 @@ elv_mux_close( ioctx_t *outctx = (ioctx_t *)pb->opaque; elv_dbg("OUT elv_mux_close avioctx=%p", pb); - if (out_handlers) { - out_handlers->avpipe_stater(outctx, out_stat_encoding_end_pts); + if (out_handlers && outctx) { + if (outctx->type == avpipe_video_fmp4_segment) + out_handlers->avpipe_stater(outctx, 0, out_stat_encoding_end_pts); + else + out_handlers->avpipe_stater(outctx, 1, out_stat_encoding_end_pts); out_handlers->avpipe_closer(outctx); } free(outctx); @@ -163,11 +166,11 @@ init_mux_ctx( elv_err("init_mux_ctx invalid video stream_index=%d", stream_index); return eav_param; } - if (!strcmp(stream_type, "audio") && (stream_index > MAX_AUDIO_MUX || stream_index > in_mux_ctx->last_audio_index+1)) { + if (!strcmp(stream_type, "audio") && (stream_index > MAX_STREAMS || stream_index > in_mux_ctx->last_audio_index+1)) { elv_err("init_mux_ctx invalid audio stream_index=%d", stream_index); return eav_param; } - if (!strcmp(stream_type, "caption") && (stream_index > MAX_CAPTION_MUX || stream_index > in_mux_ctx->last_caption_index+1)) { + if (!strcmp(stream_type, "caption") && (stream_index > MAX_STREAMS || stream_index > in_mux_ctx->last_caption_index+1)) { elv_err("init_mux_ctx invalid caption stream_index=%d", stream_index); return eav_param; } diff --git a/libavpipe/src/avpipe_xc.c b/libavpipe/src/avpipe_xc.c index 4890ceb..462a780 100644 --- a/libavpipe/src/avpipe_xc.c +++ b/libavpipe/src/avpipe_xc.c @@ -366,13 +366,13 @@ selected_audio_index( static int selected_decoded_audio( coderctx_t *decoder_context, - int index) + int stream_index) { if (decoder_context->n_audio <= 0) return -1; for (int i=0; in_audio; i++) { - if (decoder_context->audio_stream_index[i] == index) + if (decoder_context->audio_stream_index[i] == stream_index) return i; } @@ -389,7 +389,6 @@ prepare_decoder( { int rc; decoder_context->video_last_dts = AV_NOPTS_VALUE; - decoder_context->audio_last_dts = AV_NOPTS_VALUE; int stream_id_index = -1; int sync_id_index = -1; // Index of the video stream used for audio sync char *url = params ? params->url : ""; @@ -398,8 +397,10 @@ prepare_decoder( decoder_context->inctx = inctx; decoder_context->video_stream_index = -1; decoder_context->data_scte35_stream_index = -1; - for (int j=0; jaudio_stream_index[j] = -1; + decoder_context->audio_last_dts[j] = AV_NOPTS_VALUE; + } decoder_context->format_context = avformat_alloc_context(); if (!decoder_context->format_context) { @@ -455,7 +456,7 @@ prepare_decoder( decoder_context->stream[i] = decoder_context->format_context->streams[i]; /* If no stream ID specified - choose the first video stream encountered */ - if (params && params->stream_id < 0 && decoder_context->video_stream_index < 0) + if (params && (params->xc_type & xc_video) && params->stream_id < 0 && decoder_context->video_stream_index < 0) decoder_context->video_stream_index = i; elv_dbg("VIDEO STREAM %d, codec_id=%s, stream_id=%d, timebase=%d, xc_type=%d, url=%s", i, avcodec_get_name(decoder_context->codec_parameters[i]->codec_id), decoder_context->stream[i]->id, @@ -654,7 +655,7 @@ prepare_decoder( elv_dbg("prepare_decoder xc_type=%d, video_stream_index=%d, audio_stream_index=%d, n_audio=%d, nb_streams=%d, url=%s", params ? params->xc_type : 0, decoder_context->video_stream_index, - decoder_context->audio_stream_index[0], + decoder_context->audio_stream_index[decoder_context->n_audio-1], decoder_context->n_audio, decoder_context->format_context->nb_streams, url); @@ -706,6 +707,8 @@ set_encoder_options( int stream_index, int timebase) { + int i; + if (timebase <= 0) { elv_err("Setting encoder options failed, invalid timebase=%d (check encoding params), url=%s", timebase, params->url); @@ -715,14 +718,14 @@ set_encoder_options( if (!strcmp(params->format, "fmp4")) { if (stream_index == decoder_context->video_stream_index) av_opt_set(encoder_context->format_context->priv_data, "movflags", "frag_every_frame", 0); - if (selected_decoded_audio(decoder_context, stream_index) >= 0) - av_opt_set(encoder_context->format_context2->priv_data, "movflags", "frag_every_frame", 0); + if ((i = selected_decoded_audio(decoder_context, stream_index)) >= 0) + av_opt_set(encoder_context->format_context2[i]->priv_data, "movflags", "frag_every_frame", 0); } // Segment duration (in ts) - notice it is set on the format context not codec if (params->audio_seg_duration_ts > 0 && (!strcmp(params->format, "dash") || !strcmp(params->format, "hls"))) { - if (selected_decoded_audio(decoder_context, stream_index) >= 0) - av_opt_set_int(encoder_context->format_context2->priv_data, "seg_duration_ts", params->audio_seg_duration_ts, + if ((i = selected_decoded_audio(decoder_context, stream_index)) >= 0) + av_opt_set_int(encoder_context->format_context2[i]->priv_data, "seg_duration_ts", params->audio_seg_duration_ts, AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_SEARCH_CHILDREN); } @@ -732,20 +735,20 @@ set_encoder_options( AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_SEARCH_CHILDREN); } - if (selected_decoded_audio(decoder_context, stream_index) >= 0) { + if ((i = selected_decoded_audio(decoder_context, stream_index)) >= 0) { if (!(params->xc_type & xc_audio)) { - elv_err("Failed to set encoder options, stream_index=%d, xc_type=%d, url=%s", + elv_err("Failed to set audio encoder options, stream_index=%d, xc_type=%d, url=%s", stream_index, params->xc_type, params->url); return eav_param; } - av_opt_set_int(encoder_context->format_context2->priv_data, "start_fragment_index", params->start_fragment_index, + av_opt_set_int(encoder_context->format_context2[i]->priv_data, "start_fragment_index", params->start_fragment_index, AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_SEARCH_CHILDREN); - av_opt_set(encoder_context->format_context2->priv_data, "start_segment", params->start_segment_str, 0); + av_opt_set(encoder_context->format_context2[i]->priv_data, "start_segment", params->start_segment_str, 0); } if (stream_index == decoder_context->video_stream_index) { if (!(params->xc_type & xc_video)) { - elv_err("Failed to set encoder options, stream_index=%d, xc_type=%d, url=%s", + elv_err("Failed to set video encoder options, stream_index=%d, xc_type=%d, url=%s", stream_index, params->xc_type, params->url); return eav_param; } @@ -764,17 +767,17 @@ set_encoder_options( timebase = calc_timebase(params, 1, timebase); seg_duration_ts = seg_duration * timebase; } - if (selected_decoded_audio(decoder_context, stream_index) >= 0) { + if ((i = selected_decoded_audio(decoder_context, stream_index)) >= 0) { if (params->audio_seg_duration_ts > 0) seg_duration_ts = params->audio_seg_duration_ts; - av_opt_set_int(encoder_context->format_context2->priv_data, "segment_duration_ts", seg_duration_ts, 0); + av_opt_set_int(encoder_context->format_context2[i]->priv_data, "segment_duration_ts", seg_duration_ts, 0); /* If audio_seg_duration_ts is not set, set it now */ if (params->audio_seg_duration_ts <= 0) params->audio_seg_duration_ts = seg_duration_ts; elv_dbg("setting \"fmp4-segment\" audio segment_time to %s, seg_duration_ts=%"PRId64", url=%s", params->seg_duration, seg_duration_ts, params->url); - av_opt_set(encoder_context->format_context2->priv_data, "reset_timestamps", "on", 0); - } + av_opt_set(encoder_context->format_context2[i]->priv_data, "reset_timestamps", "on", 0); + } if (stream_index == decoder_context->video_stream_index) { if (params->video_seg_duration_ts > 0) seg_duration_ts = params->video_seg_duration_ts; @@ -790,8 +793,8 @@ set_encoder_options( // av_opt_set(encoder_context->format_context->priv_data, "segment_format_options", "movflags=faststart", 0); // So lets use flag_every_frame option instead. if (!strcmp(params->format, "fmp4-segment")) { - if (selected_decoded_audio(decoder_context, stream_index) >= 0) - av_opt_set(encoder_context->format_context2->priv_data, "segment_format_options", "movflags=frag_every_frame", 0); + if ((i = selected_decoded_audio(decoder_context, stream_index)) >= 0) + av_opt_set(encoder_context->format_context2[i]->priv_data, "segment_format_options", "movflags=frag_every_frame", 0); if (stream_index == decoder_context->video_stream_index) av_opt_set(encoder_context->format_context->priv_data, "segment_format_options", "movflags=frag_every_frame", 0); } @@ -1261,182 +1264,217 @@ prepare_audio_encoder( coderctx_t *decoder_context, xcparams_t *params) { - int index = decoder_context->audio_stream_index[0]; + int n_audio = encoder_context->n_audio_output; char *ecodec; AVFormatContext *format_context; int rc; - if (index < 0) { - elv_dbg("No audio stream detected by decoder."); - return eav_stream_index; + if (params->xc_type == xc_audio_merge || + params->xc_type == xc_audio_join || + params->xc_type == xc_audio_pan) { + // Only we have one output audio in these cases + n_audio = 1; } - if (!decoder_context->codec_context[index]) { - elv_err("Decoder codec context is NULL! index=%d, url=%s", index, params->url); - return eav_codec_context; - } + for (int i=0; iaudio_stream_index[i]; + int output_stream_index = stream_index; - /* If there are more than 1 audio stream do encode, we can't do bypass */ - if (params && params->bypass_transcoding && decoder_context->n_audio > 1) { - elv_err("Can not bypass multiple audio streams, n_audio=%d, url=%s", decoder_context->n_audio, params->url); - return eav_num_streams; - } + if (params->xc_type == xc_audio_merge || + params->xc_type == xc_audio_join || + params->xc_type == xc_audio_pan) { + // Only we have one output audio in these cases + output_stream_index = 0; + } - format_context = encoder_context->format_context2; - ecodec = params->ecodec2; - encoder_context->audio_last_dts = AV_NOPTS_VALUE; + if (stream_index < 0) { + elv_dbg("No audio stream detected by decoder."); + return eav_stream_index; + } - encoder_context->audio_stream_index[0] = index; - encoder_context->n_audio = 1; + if (!decoder_context->codec_context[stream_index]) { + elv_err("Decoder codec context is NULL! stream_index=%d, url=%s", stream_index, params->url); + return eav_codec_context; + } - encoder_context->stream[index] = avformat_new_stream(format_context, NULL); - if (params->bypass_transcoding) - encoder_context->codec[index] = avcodec_find_encoder(decoder_context->codec_context[index]->codec_id); - else - encoder_context->codec[index] = avcodec_find_encoder_by_name(ecodec); - if (!encoder_context->codec[index]) { - elv_err("Codec not found, codec_id=%s, url=%s", - avcodec_get_name(decoder_context->codec_context[index]->codec_id), params->url); - return eav_codec_context; - } + /* If there are more than 1 audio stream do encode, we can't do bypass */ + if (params && params->bypass_transcoding && decoder_context->n_audio > 1) { + elv_err("Can not bypass multiple audio streams, n_audio=%d, url=%s", decoder_context->n_audio, params->url); + return eav_num_streams; + } - format_context->io_open = elv_io_open; - format_context->io_close = elv_io_close; + format_context = encoder_context->format_context2[i]; + ecodec = params->ecodec2; + encoder_context->audio_last_dts[i] = AV_NOPTS_VALUE; - encoder_context->codec_context[index] = avcodec_alloc_context3(encoder_context->codec[index]); + encoder_context->audio_stream_index[output_stream_index] = output_stream_index; + encoder_context->n_audio = 1; - /* By default use decoder parameters */ - encoder_context->codec_context[index]->sample_rate = decoder_context->codec_context[index]->sample_rate; + encoder_context->stream[output_stream_index] = avformat_new_stream(format_context, NULL); + if (params->bypass_transcoding) + encoder_context->codec[output_stream_index] = avcodec_find_encoder(decoder_context->codec_context[stream_index]->codec_id); + else + encoder_context->codec[output_stream_index] = avcodec_find_encoder_by_name(ecodec); + if (!encoder_context->codec[output_stream_index]) { + elv_err("Codec not found, codec_id=%s, url=%s", + avcodec_get_name(decoder_context->codec_context[stream_index]->codec_id), params->url); + return eav_codec_context; + } - /* Set the default time_base based on input sample_rate */ - encoder_context->codec_context[index]->time_base = (AVRational){1, encoder_context->codec_context[index]->sample_rate}; - encoder_context->stream[index]->time_base = encoder_context->codec_context[index]->time_base; + format_context->io_open = elv_io_open; + format_context->io_close = elv_io_close; - if (decoder_context->codec[index] && - decoder_context->codec[index]->sample_fmts && params->bypass_transcoding) - encoder_context->codec_context[index]->sample_fmt = decoder_context->codec[index]->sample_fmts[0]; - else if (encoder_context->codec[index]->sample_fmts && encoder_context->codec[index]->sample_fmts[0]) - encoder_context->codec_context[index]->sample_fmt = encoder_context->codec[index]->sample_fmts[0]; - else - encoder_context->codec_context[index]->sample_fmt = AV_SAMPLE_FMT_FLTP; + encoder_context->codec_context[output_stream_index] = avcodec_alloc_context3(encoder_context->codec[output_stream_index]); - if (params->channel_layout > 0) - encoder_context->codec_context[index]->channel_layout = params->channel_layout; - else - /* If the input stream is stereo the decoder_context->codec_context[index]->channel_layout is AV_CH_LAYOUT_STEREO */ - encoder_context->codec_context[index]->channel_layout = - get_channel_layout_for_encoder(decoder_context->codec_context[index]->channel_layout); - encoder_context->codec_context[index]->channels = av_get_channel_layout_nb_channels(encoder_context->codec_context[index]->channel_layout); + /* By default use decoder parameters */ + encoder_context->codec_context[output_stream_index]->sample_rate = decoder_context->codec_context[stream_index]->sample_rate; - const char *channel_name = avpipe_channel_name( - av_get_channel_layout_nb_channels(encoder_context->codec_context[index]->channel_layout), - decoder_context->codec_context[index]->channel_layout); + /* Set the default time_base based on input sample_rate */ + encoder_context->codec_context[output_stream_index]->time_base = (AVRational){1, encoder_context->codec_context[output_stream_index]->sample_rate}; + encoder_context->stream[output_stream_index]->time_base = encoder_context->codec_context[output_stream_index]->time_base; - /* If decoder channel layout is DOWNMIX and params->ecodec == "aac" and channel_layout is not set - * then set the channel layout to STEREO. Preserve the channel layout otherwise. - */ - if (decoder_context->codec_context[index]->channel_layout == AV_CH_LAYOUT_STEREO_DOWNMIX && - !strcmp(ecodec, "aac") && - !params->channel_layout) { - /* This encoder is prepared specifically for AAC, therefore set the channel layout to AV_CH_LAYOUT_STEREO */ - encoder_context->codec_context[index]->channels = av_get_channel_layout_nb_channels(AV_CH_LAYOUT_STEREO); - encoder_context->codec_context[index]->channel_layout = AV_CH_LAYOUT_STEREO; // AV_CH_LAYOUT_STEREO is av_get_default_channel_layout(encoder_context->codec_context[index]->channels) - } + if (decoder_context->codec[stream_index] && + decoder_context->codec[stream_index]->sample_fmts && params->bypass_transcoding) + encoder_context->codec_context[output_stream_index]->sample_fmt = decoder_context->codec[stream_index]->sample_fmts[0]; + else if (encoder_context->codec[output_stream_index]->sample_fmts && encoder_context->codec[output_stream_index]->sample_fmts[0]) + encoder_context->codec_context[output_stream_index]->sample_fmt = encoder_context->codec[output_stream_index]->sample_fmts[0]; + else + encoder_context->codec_context[output_stream_index]->sample_fmt = AV_SAMPLE_FMT_FLTP; - int sample_rate = params->sample_rate; - if (!strcmp(ecodec, "aac") && - !is_valid_aac_sample_rate(encoder_context->codec_context[index]->sample_rate) && - sample_rate <= 0) - sample_rate = DEFAULT_ACC_SAMPLE_RATE; + if (params->channel_layout > 0) + encoder_context->codec_context[output_stream_index]->channel_layout = params->channel_layout; + else + /* If the input stream is stereo the decoder_context->codec_context[index]->channel_layout is AV_CH_LAYOUT_STEREO */ + encoder_context->codec_context[output_stream_index]->channel_layout = + get_channel_layout_for_encoder(decoder_context->codec_context[stream_index]->channel_layout); + encoder_context->codec_context[output_stream_index]->channels = av_get_channel_layout_nb_channels(encoder_context->codec_context[output_stream_index]->channel_layout); - /* - * If sample_rate is set and - * - encoder is not "aac" or - * - if encoder is "aac" and encoder sample_rate is not valid and transcoding is pan/merge/join - * then - * - set encoder sample_rate to the specified sample_rate. - */ - if (sample_rate > 0 && - (strcmp(ecodec, "aac") || !is_valid_aac_sample_rate(encoder_context->codec_context[index]->sample_rate))) { - /* - * Audio resampling, which is active for aac encoder, needs more work to adjust sampling properly - * when input sample rate is different from output sample rate. (--RM) + const char *channel_name = avpipe_channel_name( + av_get_channel_layout_nb_channels(encoder_context->codec_context[output_stream_index]->channel_layout), + decoder_context->codec_context[stream_index]->channel_layout); + + /* If decoder channel layout is DOWNMIX and params->ecodec == "aac" and channel_layout is not set + * then set the channel layout to STEREO. Preserve the channel layout otherwise. */ - encoder_context->codec_context[index]->sample_rate = sample_rate; + if (decoder_context->codec_context[stream_index]->channel_layout == AV_CH_LAYOUT_STEREO_DOWNMIX && + !strcmp(ecodec, "aac") && + !params->channel_layout) { + /* This encoder is prepared specifically for AAC, therefore set the channel layout to AV_CH_LAYOUT_STEREO */ + encoder_context->codec_context[output_stream_index]->channels = av_get_channel_layout_nb_channels(AV_CH_LAYOUT_STEREO); + encoder_context->codec_context[output_stream_index]->channel_layout = AV_CH_LAYOUT_STEREO; // AV_CH_LAYOUT_STEREO is av_get_default_channel_layout(encoder_context->codec_context[index]->channels) + } - /* Update timebase for the new sample rate */ - encoder_context->codec_context[index]->time_base = (AVRational){1, sample_rate}; - encoder_context->stream[index]->time_base = (AVRational){1, sample_rate}; - } + int sample_rate = params->sample_rate; + if (!strcmp(ecodec, "aac") && + !is_valid_aac_sample_rate(encoder_context->codec_context[output_stream_index]->sample_rate) && + sample_rate <= 0) + sample_rate = DEFAULT_ACC_SAMPLE_RATE; - elv_dbg("ENCODER channels=%d, channel_layout=%d (%s), sample_fmt=%s, sample_rate=%d", - encoder_context->codec_context[index]->channels, - encoder_context->codec_context[index]->channel_layout, - avpipe_channel_layout_name(encoder_context->codec_context[index]->channel_layout), - av_get_sample_fmt_name(encoder_context->codec_context[index]->sample_fmt), - encoder_context->codec_context[index]->sample_rate); + /* + * If sample_rate is set and + * - encoder is not "aac" or + * - if encoder is "aac" and encoder sample_rate is not valid and transcoding is pan/merge/join + * then + * - set encoder sample_rate to the specified sample_rate. + */ + if (sample_rate > 0 && + (strcmp(ecodec, "aac") || !is_valid_aac_sample_rate(encoder_context->codec_context[output_stream_index]->sample_rate))) { + /* + * Audio resampling, which is active for aac encoder, needs more work to adjust sampling properly + * when input sample rate is different from output sample rate. (--RM) + */ + encoder_context->codec_context[output_stream_index]->sample_rate = sample_rate; + + /* Update timebase for the new sample rate */ + encoder_context->codec_context[output_stream_index]->time_base = (AVRational){1, sample_rate}; + encoder_context->stream[output_stream_index]->time_base = (AVRational){1, sample_rate}; + } - encoder_context->codec_context[index]->bit_rate = params->audio_bitrate; + elv_dbg("ENCODER channels=%d, channel_layout=%d (%s), sample_fmt=%s, sample_rate=%d", + encoder_context->codec_context[output_stream_index]->channels, + encoder_context->codec_context[output_stream_index]->channel_layout, + avpipe_channel_layout_name(encoder_context->codec_context[output_stream_index]->channel_layout), + av_get_sample_fmt_name(encoder_context->codec_context[output_stream_index]->sample_fmt), + encoder_context->codec_context[output_stream_index]->sample_rate); - /* Allow the use of the experimental AAC encoder. */ - encoder_context->codec_context[index]->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; + encoder_context->codec_context[output_stream_index]->bit_rate = params->audio_bitrate; - rc = set_encoder_options(encoder_context, decoder_context, params, encoder_context->audio_stream_index[0], - encoder_context->stream[encoder_context->audio_stream_index[0]]->time_base.den); - if (rc < 0) { - elv_err("Failed to set audio encoder options, url=%s", params->url); - return rc; - } + /* Allow the use of the experimental AAC encoder. */ + encoder_context->codec_context[output_stream_index]->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL; - AVCodecContext *encoder_codec_context = encoder_context->codec_context[index]; - /* Some container formats (like MP4) require global headers to be present. - * Mark the encoder so that it behaves accordingly. */ - if (format_context->oformat->flags & AVFMT_GLOBALHEADER) - encoder_codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; + rc = set_encoder_options(encoder_context, decoder_context, params, decoder_context->audio_stream_index[i], + encoder_context->stream[output_stream_index]->time_base.den); + if (rc < 0) { + elv_err("Failed to set audio encoder options, url=%s", params->url); + return rc; + } - /* Open audio encoder codec */ - if (avcodec_open2(encoder_context->codec_context[index], encoder_context->codec[index], NULL) < 0) { - elv_dbg("Could not open encoder for audio, index=%d", index); - return eav_open_codec; - } + AVCodecContext *encoder_codec_context = encoder_context->codec_context[output_stream_index]; + /* Some container formats (like MP4) require global headers to be present. + * Mark the encoder so that it behaves accordingly. */ + if (format_context->oformat->flags & AVFMT_GLOBALHEADER) + encoder_codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; - elv_dbg("encoder audio stream index=%d, bitrate=%d, sample_fmts=%s, timebase=%d, output frame_size=%d, sample_rate=%d, channel_layout=%s", - index, encoder_context->codec_context[index]->bit_rate, - av_get_sample_fmt_name(encoder_context->codec_context[index]->sample_fmt), - encoder_context->codec_context[index]->time_base.den, encoder_context->codec_context[index]->frame_size, - encoder_context->codec_context[index]->sample_rate, - channel_name); + /* Open audio encoder codec */ + if (avcodec_open2(encoder_context->codec_context[output_stream_index], encoder_context->codec[output_stream_index], NULL) < 0) { + elv_dbg("Could not open encoder for audio, stream_index=%d", stream_index); + return eav_open_codec; + } - if (avcodec_parameters_from_context( - encoder_context->stream[index]->codecpar, - encoder_context->codec_context[index]) < 0) { - elv_err("Failed to copy encoder parameters to output stream, url=%s", params->url); - return eav_codec_param; + elv_dbg("encoder audio stream index=%d, bitrate=%d, sample_fmts=%s, timebase=%d, output frame_size=%d, sample_rate=%d, channel_layout=%s", + index, encoder_context->codec_context[output_stream_index]->bit_rate, + av_get_sample_fmt_name(encoder_context->codec_context[output_stream_index]->sample_fmt), + encoder_context->codec_context[output_stream_index]->time_base.den, encoder_context->codec_context[output_stream_index]->frame_size, + encoder_context->codec_context[output_stream_index]->sample_rate, + channel_name); - } + if (avcodec_parameters_from_context( + encoder_context->stream[output_stream_index]->codecpar, + encoder_context->codec_context[output_stream_index]) < 0) { + elv_err("Failed to copy encoder parameters to output stream, url=%s", params->url); + return eav_codec_param; + + } #ifdef USE_RESAMPLE_AAC - if (!strcmp(ecodec, "aac") && - params->xc_type & xc_audio && - params->xc_type != xc_audio_merge && - params->xc_type != xc_audio_join && - params->xc_type != xc_audio_pan) { - init_resampler(decoder_context->codec_context[index], encoder_context->codec_context[index], + if (!strcmp(ecodec, "aac") && + params->xc_type & xc_audio && + params->xc_type != xc_audio_merge && + params->xc_type != xc_audio_join && + params->xc_type != xc_audio_pan) { + init_resampler(decoder_context->codec_context[stream_index], encoder_context->codec_context[output_stream_index], &decoder_context->resampler_context); - /* Create the FIFO buffer based on the specified output sample format. */ - if (!(decoder_context->fifo = av_audio_fifo_alloc(encoder_context->codec_context[index]->sample_fmt, - encoder_context->codec_context[index]->channels, 1))) { - elv_err("Failed to allocate audio FIFO, url=%s", params->url); - return eav_mem_alloc; + /* Create the FIFO buffer based on the specified output sample format. */ + if (!(decoder_context->fifo = av_audio_fifo_alloc(encoder_context->codec_context[output_stream_index]->sample_fmt, + encoder_context->codec_context[index]->channels, 1))) { + elv_err("Failed to allocate audio FIFO, url=%s", params->url); + return eav_mem_alloc; + } } - } #endif - encoder_context->audio_enc_stream_index = index; + //encoder_context->audio_enc_stream_index[i] = stream_index; CLEAN + } + return 0; } +static int +num_audio_output( + coderctx_t *decoder_context, + xcparams_t *params) +{ + int n_decoder_auido = decoder_context ? decoder_context->n_audio : 0; + if (!params) + return 0; + + if (params->xc_type == xc_audio_merge || params->xc_type == xc_audio_join || params->xc_type == xc_audio_pan) + return 1; + + return params->n_audio > 0 ? params->n_audio : n_decoder_auido; +} + static int prepare_encoder( coderctx_t *encoder_context, @@ -1498,10 +1536,18 @@ prepare_encoder( } } if (params->xc_type & xc_audio) { - avformat_alloc_output_context2(&encoder_context->format_context2, NULL, format, filename2); - if (!encoder_context->format_context2) { - elv_dbg("could not allocate memory for audio output format"); - return eav_codec_context; + encoder_context->n_audio_output = num_audio_output(decoder_context, params); + for (int i=0; in_audio_output; i++) { + if (!strcmp(params->format, "hls") || !strcmp(params->format, "dash")) { + avformat_alloc_output_context2(&encoder_context->format_context2[i], NULL, format, filename2); + } else { + snprintf(encoder_context->filename2[i], MAX_AVFILENAME_LEN, "fsegment-audio%d-%s.mp4", i, "%05d"); + avformat_alloc_output_context2(&encoder_context->format_context2[i], NULL, format, encoder_context->filename2[i]); + } + if (!encoder_context->format_context2[i]) { + elv_dbg("could not allocate memory for audio output format stream_index=%d", params->audio_index[i]); + return eav_codec_context; + } } } @@ -1527,16 +1573,18 @@ prepare_encoder( "hls_enc_key_url", params->crypt_key_url, 0); } if (params->xc_type & xc_audio) { - av_opt_set(encoder_context->format_context2->priv_data, "hls_enc", "1", 0); - if (params->crypt_iv != NULL) - av_opt_set(encoder_context->format_context2->priv_data, "hls_enc_iv", - params->crypt_iv, 0); - if (params->crypt_key != NULL) - av_opt_set(encoder_context->format_context2->priv_data, - "hls_enc_key", params->crypt_key, 0); - if (params->crypt_key_url != NULL) - av_opt_set(encoder_context->format_context2->priv_data, - "hls_enc_key_url", params->crypt_key_url, 0); + for (int i=0; in_audio_output; i++) { + av_opt_set(encoder_context->format_context2[i]->priv_data, "hls_enc", "1", 0); + if (params->crypt_iv != NULL) + av_opt_set(encoder_context->format_context2[i]->priv_data, "hls_enc_iv", + params->crypt_iv, 0); + if (params->crypt_key != NULL) + av_opt_set(encoder_context->format_context2[i]->priv_data, + "hls_enc_key", params->crypt_key, 0); + if (params->crypt_key_url != NULL) + av_opt_set(encoder_context->format_context2[i]->priv_data, + "hls_enc_key_url", params->crypt_key_url, 0); + } } break; case crypt_cenc: @@ -1548,10 +1596,12 @@ prepare_encoder( "encryption_scheme", "cenc-aes-ctr", 0); } if (params->xc_type & xc_audio) { - av_opt_set(encoder_context->format_context2->priv_data, + for (int i=0; in_audio_output; i++) { + av_opt_set(encoder_context->format_context2[i]->priv_data, "encryption_scheme", "cenc", 0); - av_opt_set(encoder_context->format_context2->priv_data, + av_opt_set(encoder_context->format_context2[i]->priv_data, "encryption_scheme", "cenc-aes-ctr", 0); + } } break; case crypt_cbc1: @@ -1562,10 +1612,12 @@ prepare_encoder( "encryption_scheme", "cenc-aes-cbc", 0); } if (params->xc_type & xc_audio) { - av_opt_set(encoder_context->format_context2->priv_data, + for (int i=0; in_audio_output; i++) { + av_opt_set(encoder_context->format_context2[i]->priv_data, "encryption_scheme", "cbc1", 0); - av_opt_set(encoder_context->format_context2->priv_data, + av_opt_set(encoder_context->format_context2[i]->priv_data, "encryption_scheme", "cenc-aes-cbc", 0); + } } break; case crypt_cens: @@ -1576,10 +1628,12 @@ prepare_encoder( "encryption_scheme", "cenc-aes-ctr-pattern", 0); } if (params->xc_type & xc_audio) { - av_opt_set(encoder_context->format_context2->priv_data, + for (int i=0; in_audio_output; i++) { + av_opt_set(encoder_context->format_context2[i]->priv_data, "encryption_scheme", "cens", 0); - av_opt_set(encoder_context->format_context2->priv_data, + av_opt_set(encoder_context->format_context2[i]->priv_data, "encryption_scheme", "cenc-aes-ctr-pattern", 0); + } } break; case crypt_cbcs: @@ -1594,14 +1648,16 @@ prepare_encoder( params->crypt_iv, 0); } if (params->xc_type & xc_audio) { - av_opt_set(encoder_context->format_context2->priv_data, + for (int i=0; in_audio_output; i++) { + av_opt_set(encoder_context->format_context2[i]->priv_data, "encryption_scheme", "cbcs", 0); - av_opt_set(encoder_context->format_context2->priv_data, + av_opt_set(encoder_context->format_context2[i]->priv_data, "encryption_scheme", "cenc-aes-cbc-pattern", 0); - av_opt_set(encoder_context->format_context2->priv_data, "encryption_iv", - params->crypt_iv, 0); - av_opt_set(encoder_context->format_context2->priv_data, "hls_enc_iv", /* To remove */ - params->crypt_iv, 0); + av_opt_set(encoder_context->format_context2[i]->priv_data, "encryption_iv", + params->crypt_iv, 0); + av_opt_set(encoder_context->format_context2[i]->priv_data, "hls_enc_iv", /* To remove */ + params->crypt_iv, 0); + } } break; case crypt_none: @@ -1622,10 +1678,12 @@ prepare_encoder( params->crypt_key, 0); } if (params->xc_type & xc_audio) { - av_opt_set(encoder_context->format_context2->priv_data, "encryption_kid", + for (int i=0; in_audio_output; i++) { + av_opt_set(encoder_context->format_context2[i]->priv_data, "encryption_kid", params->crypt_kid, 0); - av_opt_set(encoder_context->format_context2->priv_data, "encryption_key", + av_opt_set(encoder_context->format_context2[i]->priv_data, "encryption_key", params->crypt_key, 0); + } } default: break; @@ -1639,7 +1697,8 @@ prepare_encoder( } if (params->xc_type & xc_audio) { - encoder_context->audio_enc_stream_index = -1; + //for (int i=0; iaudio_enc_stream_index[i] = -1; if ((rc = prepare_audio_encoder(encoder_context, decoder_context, params)) != eav_success) { elv_err("Failure in preparing audio encoder, rc=%d, url=%s", rc, params->url); return rc; @@ -1647,36 +1706,42 @@ prepare_encoder( } /* - * Allocate an array of 2 out_handler_t: one for video and one for audio output stream. - * TODO: needs to allocate up to number of streams when transcoding multiple streams at the same time (RM) + * Allocate an array of MAX_STREAMS out_handler_t: one for video and one for each audio output stream. + * Needs to allocate up to number of streams when transcoding multiple streams at the same time. */ if (params->xc_type & xc_video) { - out_tracker = (out_tracker_t *) calloc(2, sizeof(out_tracker_t)); - out_tracker[0].out_handlers = out_tracker[1].out_handlers = out_handlers; - out_tracker[0].inctx = out_tracker[1].inctx = inctx; - out_tracker[0].video_stream_index = out_tracker[1].video_stream_index = decoder_context->video_stream_index; - out_tracker[0].audio_stream_index = out_tracker[1].audio_stream_index = decoder_context->audio_stream_index[0]; - out_tracker[0].seg_index = out_tracker[1].seg_index = atoi(params->start_segment_str); - out_tracker[0].encoder_ctx = out_tracker[1].encoder_ctx = encoder_context; - out_tracker[0].xc_type = out_tracker[1].xc_type = xc_video; + out_tracker = (out_tracker_t *) calloc(MAX_STREAMS, sizeof(out_tracker_t)); + out_tracker[0].out_handlers = out_handlers; + out_tracker[0].inctx = inctx; + out_tracker[0].video_stream_index = decoder_context->video_stream_index; + out_tracker[0].audio_stream_index = decoder_context->audio_stream_index[0]; + out_tracker[0].seg_index = atoi(params->start_segment_str); + out_tracker[0].encoder_ctx = encoder_context; + out_tracker[0].xc_type = xc_video; encoder_context->format_context->avpipe_opaque = out_tracker; } if (params->xc_type & xc_audio) { - out_tracker = (out_tracker_t *) calloc(2, sizeof(out_tracker_t)); - out_tracker[0].out_handlers = out_tracker[1].out_handlers = out_handlers; - out_tracker[0].inctx = out_tracker[1].inctx = inctx; - out_tracker[0].video_stream_index = out_tracker[1].video_stream_index = decoder_context->video_stream_index; - out_tracker[0].audio_stream_index = out_tracker[1].audio_stream_index = decoder_context->audio_stream_index[0]; - out_tracker[0].seg_index = out_tracker[1].seg_index = atoi(params->start_segment_str); - out_tracker[0].encoder_ctx = out_tracker[1].encoder_ctx = encoder_context; - out_tracker[0].xc_type = out_tracker[1].xc_type = xc_audio; - encoder_context->format_context2->avpipe_opaque = out_tracker; + for (int j=0; jn_audio_output; j++) { + out_tracker = (out_tracker_t *) calloc(MAX_STREAMS, sizeof(out_tracker_t)); + for (int i=0; in_audio_output; i++) { + out_tracker[i].out_handlers = out_handlers; + out_tracker[i].inctx = inctx; + out_tracker[i].video_stream_index = decoder_context->video_stream_index; + out_tracker[i].audio_stream_index = decoder_context->audio_stream_index[i]; + out_tracker[i].seg_index = atoi(params->start_segment_str); + out_tracker[i].encoder_ctx = encoder_context; + out_tracker[i].xc_type = xc_audio; + } + encoder_context->format_context2[j]->avpipe_opaque = out_tracker; + } } dump_encoder(inctx->url, encoder_context->format_context, params); dump_codec_context(encoder_context->codec_context[encoder_context->video_stream_index]); - dump_encoder(inctx->url, encoder_context->format_context2, params); + for (int i=0; in_audio_output; i++) { + dump_encoder(inctx->url, encoder_context->format_context2[i], params); + } dump_codec_context(encoder_context->codec_context[encoder_context->audio_stream_index[0]]); return 0; @@ -1837,12 +1902,12 @@ should_skip_encoding( url = decoder_context->inctx->url; elv_warn("ENCODE SKIP invalid frame, stream_index=%d, url=%s, video_last_pts_read=%"PRId64", audio_last_pts_read=%"PRId64, stream_index, url, - encoder_context->video_last_pts_read, encoder_context->audio_last_pts_read); + encoder_context->video_last_pts_read, encoder_context->audio_last_pts_read[stream_index]); return 1; } if (selected_decoded_audio(decoder_context, stream_index) >= 0) - frame_in_pts_offset = frame->pts - decoder_context->audio_input_start_pts; + frame_in_pts_offset = frame->pts - decoder_context->audio_input_start_pts[stream_index]; else frame_in_pts_offset = frame->pts - decoder_context->video_input_start_pts; @@ -1899,6 +1964,7 @@ encode_frame( int debug_frame_level) { int ret; + int index = stream_index; int rc = eav_success; AVFormatContext *format_context = encoder_context->format_context; AVCodecContext *codec_context = encoder_context->codec_context[stream_index]; @@ -1906,8 +1972,22 @@ encode_frame( avpipe_io_handler_t *out_handlers; ioctx_t *outctx; - if (selected_decoded_audio(decoder_context, stream_index) >= 0) - format_context = encoder_context->format_context2; + if (params->xc_type == xc_audio_merge || + params->xc_type == xc_audio_join || + params->xc_type == xc_audio_pan) { + index = 0; + } + codec_context = encoder_context->codec_context[index]; + + int i = -1; + if ((i = selected_decoded_audio(decoder_context, stream_index)) >= 0) { + if (params->xc_type == xc_audio_merge || + params->xc_type == xc_audio_join || + params->xc_type == xc_audio_pan) { + i = 0; + } + format_context = encoder_context->format_context2[i]; + } int skip = should_skip_encoding(decoder_context, encoder_context, stream_index, params, frame); if (skip) @@ -1924,8 +2004,8 @@ encode_frame( const char *st = stream_type_str(encoder_context, stream_index); - // Adjust PTS if input stream starts at an arbitrary value (MPEG-TS/RTMP) - if ( is_protocol(decoder_context) && (!strcmp(params->format, "fmp4-segment"))) { + // Adjust PTS if input stream starts at an arbitrary value (i.e mostly for MPEG-TS/RTMP) + if (!strcmp(params->format, "fmp4-segment")) { if (stream_index == decoder_context->video_stream_index) { if (encoder_context->first_encoding_video_pts == -1) { /* Remember the first video PTS to use as an offset later */ @@ -1947,22 +2027,23 @@ encode_frame( } #ifndef USE_RESAMPLE_AAC else if (selected_decoded_audio(decoder_context, stream_index) >= 0) { - if (encoder_context->first_encoding_audio_pts == -1) { + if (encoder_context->first_encoding_audio_pts[stream_index] == AV_NOPTS_VALUE) { /* Remember the first video PTS to use as an offset later */ - encoder_context->first_encoding_audio_pts = frame->pts; - elv_log("PTS first_encoding_audio_pts=%"PRId64" dec=%"PRId64" read=%"PRId64" stream=%d:%s", - encoder_context->first_encoding_audio_pts, - decoder_context->first_decoding_audio_pts, + encoder_context->first_encoding_audio_pts[stream_index] = frame->pts; + elv_log("PTS stream_index=%d first_encoding_audio_pts=%"PRId64" dec=%"PRId64" read=%"PRId64" stream=%d:%s", + stream_index, + encoder_context->first_encoding_audio_pts[stream_index], + decoder_context->first_decoding_audio_pts[stream_index], encoder_context->first_read_frame_pts[stream_index], params->xc_type, st); } // Adjust audio frame pts such that first frame sent to the encoder has PTS 0 if (frame->pts != AV_NOPTS_VALUE) { - frame->pts -= encoder_context->first_encoding_audio_pts; + frame->pts -= encoder_context->first_encoding_audio_pts[stream_index]; frame->pkt_dts = frame->pts; } if (frame->best_effort_timestamp != AV_NOPTS_VALUE) - frame->best_effort_timestamp -= encoder_context->first_encoding_audio_pts; + frame->best_effort_timestamp -= encoder_context->first_encoding_audio_pts[stream_index]; } #endif } @@ -1996,7 +2077,7 @@ encode_frame( if (frame) { if (params->xc_type & xc_audio && selected_decoded_audio(decoder_context, stream_index) >= 0) - encoder_context->audio_last_pts_sent_encode = frame->pts; + encoder_context->audio_last_pts_sent_encode[stream_index] = frame->pts; else if (params->xc_type & xc_video && stream_index == decoder_context->video_stream_index) encoder_context->video_last_pts_sent_encode = frame->pts; } @@ -2054,10 +2135,10 @@ encode_frame( if (params->xc_type == xc_video) assert(output_packet->duration == 0); /* Only to notice if this ever gets set */ if (selected_decoded_audio(decoder_context, stream_index) >= 0 && params->xc_type == xc_all) { - if (!output_packet->duration && encoder_context->audio_last_dts != AV_NOPTS_VALUE) - output_packet->duration = output_packet->dts - encoder_context->audio_last_dts; - encoder_context->audio_last_dts = output_packet->dts; - encoder_context->audio_last_pts_encoded = output_packet->pts; + if (!output_packet->duration && encoder_context->audio_last_dts[stream_index] != AV_NOPTS_VALUE) + output_packet->duration = output_packet->dts - encoder_context->audio_last_dts[stream_index]; + encoder_context->audio_last_dts[stream_index] = output_packet->dts; + encoder_context->audio_last_pts_encoded[stream_index] = output_packet->pts; } else { if (!output_packet->duration && encoder_context->video_last_dts != AV_NOPTS_VALUE) output_packet->duration = output_packet->dts - encoder_context->video_last_dts; @@ -2068,7 +2149,7 @@ encode_frame( output_packet->pts += params->start_pts; output_packet->dts += params->start_pts; - if (decoder_context->is_mpegts && + if ((decoder_context->is_mpegts || decoder_context->is_srt) && encoder_context->video_encoder_prev_pts > 0 && stream_index == decoder_context->video_stream_index && encoder_context->calculated_frame_duration > 0 && @@ -2097,23 +2178,23 @@ encode_frame( params->ecodec2 != NULL && !strcmp(avcodec_get_name(decoder_context->codec_parameters[stream_index]->codec_id), params->ecodec2))) && (decoder_context->stream[stream_index]->time_base.den != - encoder_context->stream[stream_index]->time_base.den || + encoder_context->stream[index]->time_base.den || decoder_context->stream[stream_index]->time_base.num != - encoder_context->stream[stream_index]->time_base.num)) { + encoder_context->stream[index]->time_base.num)) { av_packet_rescale_ts(output_packet, decoder_context->stream[stream_index]->time_base, - encoder_context->stream[stream_index]->time_base + encoder_context->stream[index]->time_base ); } if (selected_decoded_audio(decoder_context, stream_index) >= 0) { /* Set the packet duration if it is not the first audio packet */ - if (encoder_context->audio_pts != AV_NOPTS_VALUE) - output_packet->duration = output_packet->pts - encoder_context->audio_pts; + if (encoder_context->audio_pts[stream_index] != AV_NOPTS_VALUE) + output_packet->duration = output_packet->pts - encoder_context->audio_pts[stream_index]; else output_packet->duration = 0; - encoder_context->audio_pts = output_packet->pts; - encoder_context->audio_frames_written++; + encoder_context->audio_pts[stream_index] = output_packet->pts; + encoder_context->audio_frames_written[stream_index]++; } else { if (encoder_context->video_pts != AV_NOPTS_VALUE) output_packet->duration = output_packet->pts - encoder_context->video_pts; @@ -2162,9 +2243,9 @@ encode_frame( if (stream_index == decoder_context->video_stream_index) outctx->total_frames_written = encoder_context->video_frames_written; else - outctx->total_frames_written = encoder_context->audio_frames_written; + outctx->total_frames_written = encoder_context->audio_frames_written[stream_index]; outctx->frames_written++; - out_handlers->avpipe_stater(outctx, out_stat_frame_written); + out_handlers->avpipe_stater(outctx, stream_index, out_stat_frame_written); } /* mux encoded frame */ @@ -2203,9 +2284,10 @@ do_bypass( AVFormatContext *format_context; - if (is_audio) - format_context = encoder_context->format_context2; - else + if (is_audio) { + int i = selected_decoded_audio(decoder_context, packet->stream_index); + format_context = encoder_context->format_context2[i]; + } else format_context = encoder_context->format_context; if (packet->pts == AV_NOPTS_VALUE || @@ -2232,9 +2314,9 @@ do_bypass( if (out_handlers->avpipe_stater && outctx) { if (is_audio) { if (outctx->type != avpipe_audio_init_stream) - encoder_context->audio_frames_written++; - encoder_context->audio_last_pts_sent_encode = packet->pts; - outctx->total_frames_written = encoder_context->audio_frames_written; + encoder_context->audio_frames_written[packet->stream_index]++; + encoder_context->audio_last_pts_sent_encode[packet->stream_index] = packet->pts; + outctx->total_frames_written = encoder_context->audio_frames_written[packet->stream_index]; } else { if (outctx->type != avpipe_video_init_stream) encoder_context->video_frames_written++; @@ -2242,7 +2324,7 @@ do_bypass( outctx->total_frames_written = encoder_context->video_frames_written; } outctx->frames_written++; - out_handlers->avpipe_stater(outctx, out_stat_frame_written); + out_handlers->avpipe_stater(outctx, packet->stream_index, out_stat_frame_written); } } @@ -2282,23 +2364,28 @@ transcode_audio( AVFrame *frame, AVFrame *filt_frame, int stream_index, - xcparams_t *p, + xcparams_t *params, int debug_frame_level) { int ret; AVCodecContext *codec_context = decoder_context->codec_context[stream_index]; - int audio_enc_stream_index = encoder_context->audio_enc_stream_index; + int audio_enc_stream_index = stream_index; int response; + if (params->xc_type == xc_audio_merge || + params->xc_type == xc_audio_join || + params->xc_type == xc_audio_pan) + audio_enc_stream_index = 0; + if (debug_frame_level) elv_dbg("DECODE stream_index=%d send_packet pts=%"PRId64" dts=%"PRId64 " duration=%d, input frame_size=%d, output frame_size=%d, audio_output_pts=%"PRId64, stream_index, packet->pts, packet->dts, packet->duration, codec_context->frame_size, encoder_context->codec_context[audio_enc_stream_index]->frame_size, decoder_context->audio_output_pts); - if (p->bypass_transcoding) { - return do_bypass(1, decoder_context, encoder_context, packet, p, debug_frame_level); + if (params->bypass_transcoding) { + return do_bypass(1, decoder_context, encoder_context, packet, params, debug_frame_level); } // Send the packet to the decoder @@ -2309,7 +2396,7 @@ transcode_audio( * Ignore the error and continue. */ elv_err("Failure while sending an audio packet to the decoder: err=%d, %s, url=%s", - response, av_err2str(response), p->url); + response, av_err2str(response), params->url); // Ignore the error and continue return eav_success; } @@ -2321,24 +2408,24 @@ transcode_audio( break; } else if (response < 0) { elv_err("Failure while receiving a frame from the decoder: %s, url=%s", - av_err2str(response), p->url); + av_err2str(response), params->url); return eav_receive_frame; } - if (decoder_context->first_decoding_audio_pts == AV_NOPTS_VALUE) { - decoder_context->first_decoding_audio_pts = frame->pts; + if (decoder_context->first_decoding_audio_pts[stream_index] == AV_NOPTS_VALUE) { + decoder_context->first_decoding_audio_pts[stream_index] = frame->pts; avpipe_io_handler_t *in_handlers = decoder_context->in_handlers; - decoder_context->inctx->decoding_start_pts = decoder_context->first_decoding_audio_pts; - elv_log("first_decoding_audio_pts=%"PRId64, - decoder_context->first_decoding_audio_pts); + decoder_context->inctx->decoding_start_pts = decoder_context->first_decoding_audio_pts[stream_index]; + elv_log("stream_index=%d first_decoding_audio_pts=%"PRId64, + stream_index, decoder_context->first_decoding_audio_pts[stream_index]); if (in_handlers->avpipe_stater) - in_handlers->avpipe_stater(decoder_context->inctx, in_stat_decoding_audio_start_pts); + in_handlers->avpipe_stater(decoder_context->inctx, stream_index, in_stat_decoding_audio_start_pts); } dump_frame(1, "IN ", codec_context->frame_number, frame, debug_frame_level); - ret = check_pts_wrapped(&decoder_context->audio_last_wrapped_pts, - &decoder_context->audio_last_input_pts, + ret = check_pts_wrapped(&decoder_context->audio_last_wrapped_pts[stream_index], + &decoder_context->audio_last_input_pts[stream_index], frame, stream_index); if (ret == eav_pts_wrapped) { @@ -2346,34 +2433,37 @@ transcode_audio( return ret; } - decoder_context->audio_pts = packet->pts; + decoder_context->audio_pts[stream_index] = packet->pts; /* push the decoded frame into the filtergraph */ - for (int i=0; in_audio; i++) { - /* push the decoded frame into the filtergraph */ - if (stream_index == decoder_context->audio_stream_index[i]) { - if (av_buffersrc_add_frame_flags(decoder_context->audio_buffersrc_ctx[i], frame, AV_BUFFERSRC_FLAG_KEEP_REF) < 0) { - elv_err("Failure in feeding into audio filtergraph source %d, url=%s", i, p->url); - break; - } + int i = selected_decoded_audio(decoder_context, stream_index); + if (i >= 0) { + if (av_buffersrc_add_frame_flags(decoder_context->audio_buffersrc_ctx[i], frame, AV_BUFFERSRC_FLAG_KEEP_REF) < 0) { + elv_err("Failure in feeding into audio filtergraph source %d, url=%s", i, params->url); + break; } } /* pull filtered frames from the filtergraph */ while (1) { - ret = av_buffersink_get_frame(decoder_context->audio_buffersink_ctx, filt_frame); + /* For audio join, merge or pan there is only one buffer sink (0) */ + if (params->xc_type == xc_audio_join || + params->xc_type == xc_audio_merge || + params->xc_type == xc_audio_pan) + i = 0; + ret = av_buffersink_get_frame(decoder_context->audio_buffersink_ctx[i], filt_frame); if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { //elv_dbg("av_buffersink_get_frame() ret=EAGAIN"); break; } if (ret < 0) { - elv_err("Failed to execute audio frame filter ret=%d, url=%s", ret, p->url); + elv_err("Failed to execute audio frame filter ret=%d, url=%s", ret, params->url); return eav_receive_filter_frame; } dump_frame(1, "FILT ", codec_context->frame_number, filt_frame, debug_frame_level); - ret = encode_frame(decoder_context, encoder_context, filt_frame, encoder_context->audio_enc_stream_index, p, debug_frame_level); + ret = encode_frame(decoder_context, encoder_context, filt_frame, packet->stream_index, params, debug_frame_level); av_frame_unref(filt_frame); if (ret == eav_write_frame) { av_frame_unref(frame); @@ -2437,20 +2527,20 @@ transcode_audio_aac( return eav_receive_frame; } - if (decoder_context->first_decoding_audio_pts == AV_NOPTS_VALUE) { - decoder_context->first_decoding_audio_pts = frame->pts; + if (decoder_context->first_decoding_audio_pts[stream_index] == AV_NOPTS_VALUE) { + decoder_context->first_decoding_audio_pts[stream_index] = frame->pts; avpipe_io_handler_t *in_handlers = decoder_context->in_handlers; - decoder_context->inctx->decoding_start_pts = decoder_context->first_decoding_audio_pts; - elv_log("first_decoding_audio_pts=%"PRId64, - decoder_context->first_decoding_audio_pts); + decoder_context->inctx->decoding_start_pts = decoder_context->first_decoding_audio_pts[stream_index]; + elv_log("stream_index=%d first_decoding_audio_pts=%"PRId64, + stream_index, decoder_context->first_decoding_audio_pts); if (in_handlers->avpipe_stater) - in_handlers->avpipe_stater(decoder_context->inctx, in_stat_decoding_audio_start_pts); + in_handlers->avpipe_stater(decoder_context->inctx, stream_index, in_stat_decoding_audio_start_pts); } dump_frame(1, "IN ", codec_context->frame_number, frame, debug_frame_level); - ret = check_pts_wrapped(&decoder_context->audio_last_wrapped_pts, - &decoder_context->audio_last_input_pts, + ret = check_pts_wrapped(&decoder_context->audio_last_wrapped_pts[stream_index], + &decoder_context->audio_last_input_pts[stream_index], frame, stream_index); if (ret == eav_pts_wrapped) { @@ -2458,9 +2548,6 @@ transcode_audio_aac( return ret; } - if (decoder_context->audio_input_prev_pts < 0) - decoder_context->audio_input_prev_pts = frame->pts; - decoder_context->audio_pts = packet->pts; /* Temporary storage for the converted input samples. */ uint8_t **converted_input_samples = NULL; @@ -2507,38 +2594,7 @@ transcode_audio_aac( } int64_t d; - // TODO this handles packet loss but not irregular PTS deltas that are not equal to pkt_duration - // If audio frames have irregular PTS the code will produce incorrect results - disabled by default - if (p->audio_fill_gap && - (decoder_context->is_mpegts && frame->pts - decoder_context->audio_input_prev_pts > frame->pkt_duration)) { - /* - * float pkt_ratio = ((float)(encoder_context->codec_context[stream_index]->sample_rate * frame->pkt_duration)) / - * (((float) decoder_context->stream[stream_index]->time_base.den) * filt_frame->nb_samples); - * pkt_ratio shows the transcoding ratio of output frames (packets) to input frames (packets). - * For example, if input timebase is 90000 with pkt_duration = 2880, - * and output sample rate is 48000 with frame duration = 1024 then pkt_ratio = 3/2 that means - * for every 2 input audio frames, there would be 3 output audio frame. - * Now to calculate output packet pts from input packet pts: - * output_pkt_pts = decoder_context->audio_output_pts + d - * where d = ((float) (frame->pts - decoder_context->audio_input_prev_pts) / frame->pkt_duration) * pkt_ratio * filt_frame->nb_samples - * After simplification we will have d as follows: - */ - d = (frame->pts - decoder_context->audio_input_prev_pts) * (encoder_context->codec_context[stream_index]->time_base.den) / - decoder_context->stream[stream_index]->time_base.den; - - /* Round up d to nearest multiple of output frame size */ - d = ((d+output_frame_size-1)/output_frame_size)*output_frame_size; - elv_warn("AUDIO JUMP from=%"PRId64", to=%"PRId64", frame->pts=%"PRId64", audio_input_prev_pts=%"PRId64", pkt_duration=%d", - decoder_context->audio_output_pts, - decoder_context->audio_output_pts + d, - frame->pts, - decoder_context->audio_input_prev_pts, - frame->pkt_duration); - } else { - d = output_frame_size; - } - - decoder_context->audio_input_prev_pts = frame->pts; + d = output_frame_size; while (d > 0) { /* When using FIFO frames no longer have PTS */ @@ -2548,7 +2604,7 @@ transcode_audio_aac( decoder_context->audio_duration = filt_frame->pts; int should_skip = 0; - int64_t frame_in_pts_offset = frame->pts - decoder_context->audio_input_start_pts; + int64_t frame_in_pts_offset = frame->pts - decoder_context->audio_input_start_pts[stream_index]; /* If frame PTS < start_time_ts then don't encode audio frame */ if (p->start_time_ts > 0 && frame_in_pts_offset < p->start_time_ts) { elv_dbg("ENCODE SKIP audio frame early pts=%" PRId64 @@ -2571,13 +2627,8 @@ transcode_audio_aac( filt_frame->pts, decoder_context->audio_duration); } - if (p->audio_fill_gap) { - decoder_context->audio_output_pts += output_frame_size; - d -= output_frame_size; - } else { - decoder_context->audio_output_pts += d; - d = 0; - } + decoder_context->audio_output_pts += d; + d = 0; } av_frame_unref(filt_frame); @@ -2675,7 +2726,7 @@ transcode_video( elv_log("first_decoding_video_pts=%"PRId64" pktdts=%"PRId64, decoder_context->first_decoding_video_pts, frame->pkt_dts); if (in_handlers->avpipe_stater) - in_handlers->avpipe_stater(decoder_context->inctx, in_stat_decoding_video_start_pts); + in_handlers->avpipe_stater(decoder_context->inctx, stream_index, in_stat_decoding_video_start_pts); } /* If force_equal_fduration is set then frame_duration > 0 is true */ @@ -2976,7 +3027,7 @@ flush_decoder( if (!p->bypass_transcoding && (i = selected_decoded_audio(decoder_context, stream_index)) >= 0) { buffersrc_ctx = decoder_context->audio_buffersrc_ctx[i]; - buffersink_ctx = decoder_context->audio_buffersink_ctx; + buffersink_ctx = decoder_context->audio_buffersink_ctx[i]; } while (response >=0) { @@ -3043,15 +3094,16 @@ should_stop_decoding( int frames_allowed_past_duration) { int64_t input_packet_rel_pts = 0; + int stream_index = input_packet->stream_index; if (decoder_context->cancelled) return 1; - if (input_packet->stream_index != decoder_context->video_stream_index && - selected_decoded_audio(decoder_context, input_packet->stream_index) < 0) + if (stream_index != decoder_context->video_stream_index && + selected_decoded_audio(decoder_context, stream_index) < 0) return 0; - if (input_packet->stream_index == decoder_context->video_stream_index && + if (stream_index == decoder_context->video_stream_index && (params->xc_type & xc_video)) { if (decoder_context->video_input_start_pts == AV_NOPTS_VALUE) { decoder_context->video_input_start_pts = input_packet->pts; @@ -3060,15 +3112,15 @@ should_stop_decoding( } input_packet_rel_pts = input_packet->pts - decoder_context->video_input_start_pts; - } else if (selected_decoded_audio(decoder_context, input_packet->stream_index) >= 0 && + } else if (selected_decoded_audio(decoder_context, stream_index) >= 0 && params->xc_type & xc_audio) { - if (decoder_context->audio_input_start_pts == AV_NOPTS_VALUE) { - decoder_context->audio_input_start_pts = input_packet->pts; - elv_log("audio_input_start_pts=%"PRId64, - decoder_context->audio_input_start_pts); + if (decoder_context->audio_input_start_pts[stream_index] == AV_NOPTS_VALUE) { + decoder_context->audio_input_start_pts[stream_index] = input_packet->pts; + elv_log("stream_index=%d audio_input_start_pts=%"PRId64, + stream_index, decoder_context->audio_input_start_pts[stream_index]); } - input_packet_rel_pts = input_packet->pts - decoder_context->audio_input_start_pts; + input_packet_rel_pts = input_packet->pts - decoder_context->audio_input_start_pts[stream_index]; } /* PENDING (RM) for some of the live feeds (like RTMP) we need to scale input_packet_rel_pts */ @@ -3095,10 +3147,10 @@ should_stop_decoding( } if (input_packet->pts != AV_NOPTS_VALUE) { - if (selected_decoded_audio(decoder_context, input_packet->stream_index) >= 0 && + if (selected_decoded_audio(decoder_context, stream_index) >= 0 && params->xc_type & xc_audio) - encoder_context->audio_last_pts_read = input_packet->pts; - else if (input_packet->stream_index == decoder_context->video_stream_index && + encoder_context->audio_last_pts_read[stream_index] = input_packet->pts; + else if (stream_index == decoder_context->video_stream_index && params->xc_type & xc_video) encoder_context->video_last_pts_read = input_packet->pts; } @@ -3129,13 +3181,14 @@ skip_until_start_time_pts( if (params->xc_type == xc_video) input_start_pts = decoder_context->video_input_start_pts; else - input_start_pts = decoder_context->audio_input_start_pts; + input_start_pts = decoder_context->audio_input_start_pts[input_packet->stream_index]; const int64_t packet_in_pts_offset = input_packet->pts - input_start_pts; /* Drop frames before the desired 'start_time' */ if (packet_in_pts_offset < params->start_time_ts) { - elv_dbg("PREDECODE SKIP frame early pts=%" PRId64 ", start_time_ts=%" PRId64 + elv_dbg("PREDECODE SKIP frame early stream_index=%d, pts=%" PRId64 ", start_time_ts=%" PRId64 ", input_start_pts=%" PRId64 ", packet_in_pts_offset=%" PRId64, + input_packet->stream_index, input_packet->pts, params->start_time_ts, input_start_pts, packet_in_pts_offset); return 1; @@ -3175,7 +3228,7 @@ skip_for_sync( decoder_context->first_key_frame_pts, input_packet->stream_index, input_packet->flags, input_packet->dts); if (in_handlers->avpipe_stater) - in_handlers->avpipe_stater(decoder_context->inctx, in_stat_first_keyframe_pts); + in_handlers->avpipe_stater(decoder_context->inctx, input_packet->stream_index, in_stat_first_keyframe_pts); dump_packet(0, "SYNC ", input_packet, 1); return 0; @@ -3472,11 +3525,14 @@ avpipe_xc( goto xc_done; } - if (params->xc_type & xc_audio && - avformat_write_header(encoder_context->format_context2, NULL) != eav_success) { - elv_err("Failed to write audio output file header, url=%s", params->url); - rc = eav_write_header; - goto xc_done; + if (params->xc_type & xc_audio) { + for (int i=0; in_audio_output; i++) { + if (avformat_write_header(encoder_context->format_context2[i], NULL) != eav_success) { + elv_err("Failed to write audio output file header, url=%s", params->url); + rc = eav_write_header; + goto xc_done; + } + } } int video_stream_index = decoder_context->video_stream_index; @@ -3509,30 +3565,32 @@ avpipe_xc( if (params->start_time_ts != -1) { if (params->xc_type == xc_video) encoder_context->format_context->start_time = params->start_time_ts; - if (params->xc_type & xc_audio) - encoder_context->format_context2->start_time = params->start_time_ts; + if (params->xc_type & xc_audio) { + for (int i=0; in_audio_output; i++) + encoder_context->format_context2[i]->start_time = params->start_time_ts; + } /* PENDING (RM) add new start_time_ts for audio */ } decoder_context->video_input_start_pts = AV_NOPTS_VALUE; - decoder_context->audio_input_start_pts = AV_NOPTS_VALUE; decoder_context->video_duration = -1; encoder_context->audio_duration = -1; - decoder_context->audio_input_prev_pts = -1; encoder_context->video_encoder_prev_pts = -1; decoder_context->first_decoding_video_pts = AV_NOPTS_VALUE; - decoder_context->first_decoding_audio_pts = AV_NOPTS_VALUE; encoder_context->first_encoding_video_pts = -1; - encoder_context->first_encoding_audio_pts = -1; - encoder_context->audio_pts = AV_NOPTS_VALUE; encoder_context->video_pts = AV_NOPTS_VALUE; - for (int j=0; jfirst_decoding_audio_pts[j] = AV_NOPTS_VALUE; + encoder_context->first_encoding_audio_pts[j] = AV_NOPTS_VALUE; + decoder_context->audio_input_start_pts[j] = AV_NOPTS_VALUE; + encoder_context->audio_pts[j] = AV_NOPTS_VALUE; encoder_context->first_read_frame_pts[j] = -1; + encoder_context->audio_last_pts_sent_encode[j] = AV_NOPTS_VALUE; + } decoder_context->first_key_frame_pts = AV_NOPTS_VALUE; decoder_context->is_av_synced = 0; encoder_context->video_last_pts_sent_encode = -1; - encoder_context->audio_last_pts_sent_encode = -1; int64_t video_last_dts = 0; int frames_read_past_duration = 0; @@ -3636,7 +3694,7 @@ avpipe_xc( inctx->video_frames_read++; if (in_handlers->avpipe_stater) - in_handlers->avpipe_stater(inctx, in_stat_video_frame_read); + in_handlers->avpipe_stater(inctx, input_packet->stream_index, in_stat_video_frame_read); if (decoder_context->first_key_frame_pts == AV_NOPTS_VALUE && input_packet->flags == AV_PKT_FLAG_KEY) { @@ -3645,7 +3703,7 @@ avpipe_xc( elv_log("PTS first_key_frame_pts=%"PRId64" sidx=%d flags=%d dts=%"PRId64, decoder_context->first_key_frame_pts, input_packet->stream_index, input_packet->flags, input_packet->dts); if (in_handlers->avpipe_stater) - in_handlers->avpipe_stater(decoder_context->inctx, in_stat_first_keyframe_pts); + in_handlers->avpipe_stater(decoder_context->inctx, input_packet->stream_index, in_stat_first_keyframe_pts); } // Assert DTS is growing as expected (accommodate non integer and irregular frame duration) @@ -3664,13 +3722,13 @@ avpipe_xc( } else if (selected_decoded_audio(decoder_context, input_packet->stream_index) >= 0 && params->xc_type & xc_audio) { - encoder_context->audio_last_dts = input_packet->dts; + encoder_context->audio_last_dts[input_packet->stream_index] = input_packet->dts; dump_packet(1, "IN ", input_packet, debug_frame_level); inctx->audio_frames_read++; if (in_handlers->avpipe_stater) - in_handlers->avpipe_stater(inctx, in_stat_audio_frame_read); + in_handlers->avpipe_stater(inctx, input_packet->stream_index, in_stat_audio_frame_read); xc_frame_t *xc_frame = (xc_frame_t *) calloc(1, sizeof(xc_frame_t)); xc_frame->packet = input_packet; @@ -3699,7 +3757,7 @@ avpipe_xc( if (in_handlers->avpipe_stater) { inctx->data = (uint8_t *)hex_str; - in_handlers->avpipe_stater(inctx, in_stat_data_scte35); + in_handlers->avpipe_stater(inctx, input_packet->stream_index, in_stat_data_scte35); } break; } @@ -3728,46 +3786,85 @@ avpipe_xc( */ if (params->xc_type & xc_video && xctx->err != eav_write_frame) flush_decoder(decoder_context, encoder_context, encoder_context->video_stream_index, params, debug_frame_level); - if (params->xc_type & xc_audio && xctx->err != eav_write_frame) - flush_decoder(decoder_context, encoder_context, encoder_context->audio_stream_index[0], params, debug_frame_level); + if (params->xc_type & xc_audio && xctx->err != eav_write_frame) { + for (int i=0; in_audio; i++) + flush_decoder(decoder_context, encoder_context, encoder_context->audio_stream_index[i], params, debug_frame_level); + } if (params->xc_type & xc_audio_join || params->xc_type & xc_audio_merge) { for (int i=0; in_audio; i++) flush_decoder(decoder_context, encoder_context, decoder_context->audio_stream_index[i], params, debug_frame_level); } if (!params->bypass_transcoding && (params->xc_type & xc_video) && xctx->err != eav_write_frame) - encode_frame(decoder_context, encoder_context, NULL, encoder_context->video_stream_index, params, debug_frame_level); - if (!params->bypass_transcoding && params->xc_type & xc_audio && xctx->err != eav_write_frame) - encode_frame(decoder_context, encoder_context, NULL, encoder_context->audio_stream_index[0], params, debug_frame_level); + encode_frame(decoder_context, encoder_context, NULL, decoder_context->video_stream_index, params, debug_frame_level); + /* Loop through and flush all audio frames */ + if (!params->bypass_transcoding && params->xc_type & xc_audio && xctx->err != eav_write_frame) { + for (int i=0; in_audio; i++) + encode_frame(decoder_context, encoder_context, NULL, decoder_context->audio_stream_index[i], params, debug_frame_level); + } dump_trackers(decoder_context->format_context, encoder_context->format_context); if ((params->xc_type & xc_video) && rc == eav_success) av_write_trailer(encoder_context->format_context); - if ((params->xc_type & xc_audio) && rc == eav_success) - av_write_trailer(encoder_context->format_context2); + if ((params->xc_type & xc_audio) && rc == eav_success) { + for (int i=0; in_audio_output; i++) + av_write_trailer(encoder_context->format_context2[i]); + } + + char audio_last_dts_buf[(MAX_STREAMS + 1) * 20]; + char audio_input_start_pts_buf[(MAX_STREAMS + 1) * 20]; + char audio_last_pts_read_buf[(MAX_STREAMS + 1) * 20]; + char audio_last_pts_sent_encode_buf[(MAX_STREAMS + 1) * 20]; + char audio_last_pts_encoded_buf[(MAX_STREAMS + 1) * 20]; + audio_last_dts_buf[0] = '\0'; + audio_input_start_pts_buf[0] = '\0'; + audio_last_pts_read_buf[0] = '\0'; + audio_last_pts_sent_encode_buf[0] = '\0'; + audio_last_pts_encoded_buf[0] = '\0'; + for (int i=0; in_audio; i++) { + char buf[32]; + int audio_index = params->audio_index[i]; + if (i > 0) { + strncat(audio_last_dts_buf, ",", (MAX_STREAMS + 1) * 20 - strlen(audio_last_dts_buf)); + strncat(audio_input_start_pts_buf, ",", (MAX_STREAMS + 1) * 20 - strlen(audio_input_start_pts_buf)); + strncat(audio_last_pts_read_buf, ",", (MAX_STREAMS + 1) * 20 - strlen(audio_last_pts_read_buf)); + strncat(audio_last_pts_sent_encode_buf, ",", (MAX_STREAMS + 1) * 20 - strlen(audio_last_pts_sent_encode_buf)); + strncat(audio_last_pts_encoded_buf, ",", (MAX_STREAMS + 1) * 20 - strlen(audio_last_pts_encoded_buf)); + } + sprintf(buf, "%"PRId64, encoder_context->audio_last_dts[audio_index]); + strncat(audio_last_dts_buf, buf, (MAX_STREAMS + 1) * 20 - strlen(audio_last_dts_buf)); + sprintf(buf, "%"PRId64, encoder_context->audio_input_start_pts[audio_index]); + strncat(audio_input_start_pts_buf, buf, (MAX_STREAMS + 1) * 20 - strlen(audio_input_start_pts_buf)); + sprintf(buf, "%"PRId64, encoder_context->audio_last_pts_read[audio_index]); + strncat(audio_last_pts_read_buf, buf, (MAX_STREAMS + 1) * 20 - strlen(audio_last_pts_read_buf)); + sprintf(buf, "%"PRId64, encoder_context->audio_last_pts_sent_encode[audio_index]); + strncat(audio_last_pts_sent_encode_buf, buf, (MAX_STREAMS + 1) * 20 - strlen(audio_last_pts_sent_encode_buf)); + sprintf(buf, "%"PRId64, encoder_context->audio_last_pts_encoded[audio_index]); + strncat(audio_last_pts_encoded_buf, buf, (MAX_STREAMS + 1) * 20 - strlen(audio_last_pts_encoded_buf)); + } elv_log("avpipe_xc done url=%s, rc=%d, xctx->err=%d, xc-type=%d, " "last video_pts=%"PRId64" audio_pts=%"PRId64 - " video_input_start_pts=%"PRId64" audio_input_start_pts=%"PRId64 - " video_last_dts=%"PRId64" audio_last_dts="PRId64 - " last_pts_read=%"PRId64" last_pts_read2=%"PRId64 - " video_pts_sent_encode=%"PRId64" audio_pts_sent_encode=%"PRId64 - " last_pts_encoded=%"PRId64" last_pts_encoded2=%"PRId64, + " video_input_start_pts=%"PRId64" audio_input_start_pts=[%s]" + " video_last_dts=%"PRId64" audio_last_dts=[%s]" + " video_last_pts_read=%"PRId64" audio_last_pts_read=[%s]" + " video_pts_sent_encode=%"PRId64" audio_last_pts_sent_encode=[%s]" + " last_pts_encoded=%"PRId64" audio_last_pts_encoded=[%s]", params->url, rc, xctx->err, params->xc_type, encoder_context->video_pts, encoder_context->audio_pts, encoder_context->video_input_start_pts, - encoder_context->audio_input_start_pts, + audio_input_start_pts_buf, encoder_context->video_last_dts, - encoder_context->audio_last_dts, + audio_last_dts_buf, encoder_context->video_last_pts_read, - encoder_context->audio_last_pts_read, + audio_last_pts_read_buf, encoder_context->video_last_pts_sent_encode, - encoder_context->audio_last_pts_sent_encode, + audio_last_pts_sent_encode_buf, encoder_context->video_last_pts_encoded, - encoder_context->audio_last_pts_encoded); + audio_last_pts_encoded_buf); decoder_context->stopped = 1; encoder_context->stopped = 1; @@ -4226,15 +4323,7 @@ check_params( return eav_param; } - if (params->xc_type != xc_audio_join && - params->xc_type != xc_audio_pan && - params->xc_type != xc_audio_merge && - params->n_audio > 1) { - elv_err("Invalid number of audio streams, n_audio=%d, url=%s", params->n_audio, params->url); - return eav_param; - } - - if (params->n_audio > MAX_AUDIO_MUX) { + if (params->n_audio > MAX_STREAMS) { elv_err("Too many audio indexes, n_audio=%d, url=%s", params->n_audio, params->url); return eav_param; } @@ -4329,7 +4418,7 @@ avpipe_init( char index_str[10]; audio_index_str[0] = '\0'; - for (int i=0; in_audio && in_audio && iaudio_index[i]); strcat(audio_index_str, index_str); if (i < params->n_audio-1) @@ -4379,7 +4468,6 @@ avpipe_init( "audio_index=%s " "channel_layout=%d (%s) " "sync_audio_to_stream_id=%d " - "audio_fill_gap=%d " "wm_overlay_type=%d " "wm_overlay_len=%d " "bitdepth=%d " @@ -4406,7 +4494,7 @@ avpipe_init( params->crypt_iv, params->crypt_key, params->crypt_kid, params->crypt_key_url, params->crypt_scheme, params->n_audio, audio_index_str, params->channel_layout, avpipe_channel_layout_name(params->channel_layout), - params->sync_audio_to_stream_id, params->audio_fill_gap, + params->sync_audio_to_stream_id, params->watermark_overlay_type, params->watermark_overlay_len, params->bitdepth, params->listen, params->max_cll ? params->max_cll : "", @@ -4527,18 +4615,22 @@ avpipe_fini( /* Free filter graph resources */ if (decoder_context && decoder_context->video_filter_graph) avfilter_graph_free(&decoder_context->video_filter_graph); - if (decoder_context && decoder_context->audio_filter_graph) - avfilter_graph_free(&decoder_context->audio_filter_graph); + if (decoder_context && decoder_context->n_audio > 0) { + for (int i=0; in_audio; i++) + avfilter_graph_free(&decoder_context->audio_filter_graph[i]); + } if (encoder_context && encoder_context->format_context) { void *avpipe_opaque = encoder_context->format_context->avpipe_opaque; avformat_free_context(encoder_context->format_context); free(avpipe_opaque); } - if (encoder_context && encoder_context->format_context2) { - void *avpipe_opaque = encoder_context->format_context2->avpipe_opaque; - avformat_free_context(encoder_context->format_context2); - free(avpipe_opaque); + if (encoder_context) { + for (int i=0; in_audio_output; i++) { + void *avpipe_opaque = encoder_context->format_context2[i]->avpipe_opaque; + avformat_free_context(encoder_context->format_context2[i]); + free(avpipe_opaque); + } } for (int i=0; i