eluv-io · elv-reza · May 2, 2024 · Mar 5, 2024 · Mar 11, 2024 · Mar 13, 2024
diff --git a/README.md b/README.md
@@ -127,6 +127,7 @@ typedef struct xcparams_t {
     int         extract_images_sz;          // Size of the array extract_images_ts
 
     int         video_time_base;            // New video encoder time_base (1/video_time_base)
+    int         video_frame_duration_ts;    // Frame duration of the output video in time base
 
     int         debug_frame_level;
     int         connection_timeout;         // Connection timeout in sec for RTMP or MPEGTS protocols
@@ -143,7 +144,7 @@ typedef struct xcparams_t {
   - If xc_type=xc_audio_join then avpipe library creates an audio join filter graph and joins the selected input audio streams to produce a joint audio stream.
   - If xc_type=xc_audio_pan then avpipe library creates an audio pan filter graph to pan multiple channels in one input stream to one output stereo stream.
 - **Specifying decoder/encoder:** the ecodec/decodec params are used to set video encoder/decoder. Also ecodec2/decodec2 params are used to set audio encoder/decoder. For video the decoder can be one of "h264", "h264_cuvid", "jpeg2000", "hevc" and encoder can be "libx264", "libx265", "h264_nvenc", "h264_videotoolbox", or "mjpeg". For audio the decoder can be “aac” or “ac3” and the encoder can be "aac", "ac3", "mp2" or "mp3".
-- **Joining/merging multiple audio:** avpipe library has the capability to join and pan multiple audio input streams by setting xc_type parameter to xc_audio_join and xc_audio_pan respectively (merging multiple audio is not complete yet).
+- **Transcoding multiple audio:** avpipe library has the capability to transcode one or multiple audio streams at the same time. The _audio_index_ array includes the audio index of the streams that will be transcoded. The parameter _n_audio_ determines the number of audio indexes in the _audio_index_ array.
 - **Using GPU:** avpipe library can utilize NVIDIA cards for transcoding. In order to utilize the NVIDIA GPU, the gpu_index must be set (the default is using GPU with index 0). To find the existing GPU indexes on a machine, nvidia-smi command can be used. In addition, the decoder and encoder should be set to "h264_cuvid" or "h264_nvenc" respectively. And finally, in order to pick the correct GPU index the following environment variable must be set “CUDA_DEVICE_ORDER=PCI_BUS_ID” before running the program.
 - **Text watermarking:** this can be done with setting watermark_text, watermark_xloc, watermark_yloc, watermark_relative_sz, and watermark_font_color while transcoding a video (xc_type=xc_video), which makes specified watermark text to appear at specified location.
 - **Image watermarking:** this can be done with setting watermark_overlay (the buffer containing overlay image), watermark_overlay_len, watermark_xloc, and watermark_yloc while transcoding a video (xc_type=xc_video).
@@ -157,7 +158,8 @@ typedef struct xcparams_t {
   - setting xc_type = xc_audio_join would join 2 or more audio inputs and create a new audio output (for example joining two mono streams and creating one stereo).
   - setting xc_type = xc_audio_pan would pick different audio channels from input and create a new audio stream (for example picking different channels from a 5.1 channel layout and producing a stereo containing two channels).
   - setting xc_type = xc_audio_merge would merge different input audio streams and produce a new multi-channel output stream (for example, merging different input mono streams and create a new 5.1)
-- **Setting video timebase:** setting video_time_base will set the timebase of generated video to 1/video_time_base (the timebase has to be bigger than 10000). 
+- **Setting video timebase:** setting _video_time_base_ will set the timebase of generated video to 1/video_time_base (the timebase has to be bigger than 10000).
+- **Video frame duration:** the parameter _video_frame_duration_ts_ can be used to set the duration of each video frame with the specified timebase for output video. This along with video_time_base can be used to normalize the video frames and their duration. For example, for a stream with 60 fps and _video_frame_duration_ts_ equal to 256, the _video_time_base_ would be 15360. As another example, for a 59.94 fps, the _video_frame_duration_ts_ can be 1001 and _video_time_base_ would be 60000. In this case a segment of 1800 frames would be 1801800 timebase long.
 - **Debugging with frames:** if the parameter debug_frame_level is on then the logs will also include very low level debug messages to trace reading/writing every piece of data.
 - **Connection timeout:** This parameter is useful when recording / transcoding RTMP or MPEGTS streams. If avpipe is listening for an RTMP stream, connection_timeout determines the time in sec to listen for an incoming RTMP stream. If avpipe is listening for incoming UDP MPEGTS packets, connection_timeout determines the time in sec to wait for the first incoming UDP packet (if no packet is received during connection_timeout, then timeout would happen and an error would be generated).
 

diff --git a/avpipe.c b/avpipe.c
@@ -48,14 +48,15 @@ typedef struct udp_thread_params_t {
 static int
 out_stat(
     void *opaque,
+    int stream_index,
     avp_stat_t stat_type);
 
 int64_t AVPipeOpenInput(char *, int64_t *);
 int64_t AVPipeOpenMuxInput(char *, char *, int64_t *);
 int     AVPipeReadInput(int64_t, uint8_t *, int);
 int64_t AVPipeSeekInput(int64_t, int64_t, int);
 int     AVPipeCloseInput(int64_t);
-int     AVPipeStatInput(int64_t, avp_stat_t, void *);
+int     AVPipeStatInput(int64_t, int, avp_stat_t, void *);
 int64_t AVPipeOpenOutput(int64_t, int, int, int64_t, int);
 int64_t AVPipeOpenMuxOutput(char *, int);
 int     AVPipeWriteOutput(int64_t, int64_t, uint8_t *, int);
@@ -64,8 +65,8 @@ int     AVPipeSeekOutput(int64_t, int64_t, int64_t, int);
 int     AVPipeSeekMuxOutput(int64_t, int64_t, int);
 int     AVPipeCloseOutput(int64_t, int64_t);
 int     AVPipeCloseMuxOutput(int64_t);
-int     AVPipeStatOutput(int64_t, int64_t, avpipe_buftype_t, avp_stat_t, void *);
-int     AVPipeStatMuxOutput(int64_t, avp_stat_t, void *);
+int     AVPipeStatOutput(int64_t, int64_t, int, avpipe_buftype_t, avp_stat_t, void *);
+int     AVPipeStatMuxOutput(int64_t, int, avp_stat_t, void *);
 int     CLog(char *);
 int     CDebug(char *);
 int     CInfo(char *);
@@ -87,6 +88,7 @@ static pthread_mutex_t tx_mutex = PTHREAD_MUTEX_INITIALIZER;
 static int
 in_stat(
     void *opaque,
+    int stream_index,
     avp_stat_t stat_type);
 
 static int
@@ -182,7 +184,8 @@ in_read_packet(
         inctx->read_pos += r;
 
         if (inctx->read_bytes - inctx->read_reported > BYTES_READ_REPORT) {
-            in_stat(opaque, in_stat_bytes_read);
+            /* Pass stream_index 0 (stream_index has no meaning for in_stat_bytes_read) */
+            in_stat(opaque, 0, in_stat_bytes_read);
             inctx->read_reported = inctx->read_bytes;
         }
     }
@@ -254,6 +257,7 @@ in_seek(
 static int
 in_stat(
     void *opaque,
+    int stream_index,
     avp_stat_t stat_type)
 {
     int64_t fd;
@@ -268,24 +272,24 @@ in_stat(
 
     switch (stat_type) {
     case in_stat_bytes_read:
-        rc = AVPipeStatInput(fd, stat_type, &c->read_bytes);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, &c->read_bytes);
         break;
 
     case in_stat_decoding_audio_start_pts:
     case in_stat_decoding_video_start_pts:
-        rc = AVPipeStatInput(fd, stat_type, &c->decoding_start_pts);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, &c->decoding_start_pts);
         break;
 
     case in_stat_audio_frame_read:
-        rc = AVPipeStatInput(fd, stat_type, &c->audio_frames_read);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, &c->audio_frames_read);
         break;
 
     case in_stat_video_frame_read:
-        rc = AVPipeStatInput(fd, stat_type, &c->video_frames_read);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, &c->video_frames_read);
         break;
 
     case in_stat_first_keyframe_pts:
-        rc = AVPipeStatInput(fd, stat_type, &c->first_key_frame_pts);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, &c->first_key_frame_pts);
         break;
 
     default:
@@ -514,6 +518,7 @@ udp_in_seek(
 static int
 udp_in_stat(
     void *opaque,
+    int stream_index,
     avp_stat_t stat_type)
 {
     int64_t fd;
@@ -534,32 +539,32 @@ udp_in_stat(
     case in_stat_decoding_audio_start_pts:
         if (debug_frame_level)
             elv_dbg("IN STAT UDP fd=%d, audio start PTS=%"PRId64", url=%s", fd, c->decoding_start_pts, c->url);
-        rc = AVPipeStatInput(fd, stat_type, &c->decoding_start_pts);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, &c->decoding_start_pts);
         break;
     case in_stat_decoding_video_start_pts:
         if (debug_frame_level)
             elv_dbg("IN STAT UDP fd=%d, video start PTS=%"PRId64", url=%s", fd, c->decoding_start_pts, c->url);
-        rc = AVPipeStatInput(fd, stat_type, &c->decoding_start_pts);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, &c->decoding_start_pts);
         break;
     case in_stat_audio_frame_read:
         if (debug_frame_level)
             elv_dbg("IN STAT UDP fd=%d, audio frame read=%"PRId64", url=%s", fd, c->audio_frames_read, c->url);
-        rc = AVPipeStatInput(fd, stat_type, &c->audio_frames_read);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, &c->audio_frames_read);
         break;
     case in_stat_video_frame_read:
         if (debug_frame_level)
             elv_dbg("IN STAT UDP fd=%d, video frame read=%"PRId64", url=%s", fd, c->video_frames_read, c->url);
-        rc = AVPipeStatInput(fd, stat_type, &c->video_frames_read);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, &c->video_frames_read);
         break;
     case in_stat_first_keyframe_pts:
         if (debug_frame_level)
             elv_dbg("IN STAT UDP fd=%d, first keyframe PTS=%"PRId64", url=%s", fd, c->first_key_frame_pts, c->url);
-        rc = AVPipeStatInput(fd, stat_type, &c->first_key_frame_pts);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, &c->first_key_frame_pts);
         break;
     case in_stat_data_scte35:
         if (debug_frame_level)
             elv_dbg("IN STAT UDP SCTE35 fd=%d, stat_type=%d, url=%s", fd, stat_type, c->url);
-        rc = AVPipeStatInput(fd, stat_type, c->data);
+        rc = AVPipeStatInput(fd, stream_index, stat_type, c->data);
         break;
     default:
         elv_err("IN STAT UDP fd=%d, invalid input stat=%d, url=%s", stat_type, c->url);
@@ -634,13 +639,13 @@ out_write_packet(
         outctx->written_bytes - outctx->write_reported > VIDEO_BYTES_WRITE_REPORT) ||
         (outctx->type == avpipe_audio_fmp4_segment &&
         outctx->written_bytes - outctx->write_reported > AUDIO_BYTES_WRITE_REPORT)) {
-        out_stat(opaque, out_stat_bytes_written);
+        out_stat(opaque, outctx->stream_index, out_stat_bytes_written);
         outctx->write_reported = outctx->written_bytes;
     }
 
     if (xcparams && xcparams->debug_frame_level)
-        elv_dbg("OUT WRITE fd=%"PRId64", size=%d written=%d pos=%d total=%d",
-            fd, buf_size, bwritten, outctx->write_pos, outctx->written_bytes);
+        elv_dbg("OUT WRITE stream_index=%d, fd=%"PRId64", size=%d written=%d pos=%d total=%d",
+            outctx->stream_index, fd, buf_size, bwritten, outctx->write_pos, outctx->written_bytes);
 
     return buf_size;
 }
@@ -691,6 +696,7 @@ out_closer(
 static int
 out_stat(
     void *opaque,
+    int stream_index,
     avp_stat_t stat_type)
 {
     ioctx_t *outctx = (ioctx_t *)opaque;
@@ -711,22 +717,22 @@ out_stat(
     fd = *((int64_t *)(outctx->opaque));
     switch (stat_type) {
     case out_stat_bytes_written:
-        rc = AVPipeStatOutput(h, fd, buftype, stat_type, &outctx->written_bytes);
+        rc = AVPipeStatOutput(h, fd, stream_index, buftype, stat_type, &outctx->written_bytes);
         break;
     case out_stat_encoding_end_pts:
         if (buftype == avpipe_audio_segment ||
             buftype == avpipe_audio_fmp4_segment)
-            rc = AVPipeStatOutput(h, fd, buftype, stat_type, &outctx->encoder_ctx->audio_last_pts_sent_encode);
+            rc = AVPipeStatOutput(h, fd, stream_index, buftype, stat_type, &outctx->encoder_ctx->audio_last_pts_sent_encode);
         else
-            rc = AVPipeStatOutput(h, fd, buftype, stat_type, &outctx->encoder_ctx->video_last_pts_sent_encode);
+            rc = AVPipeStatOutput(h, fd, stream_index, buftype, stat_type, &outctx->encoder_ctx->video_last_pts_sent_encode);
         break;
     case out_stat_frame_written:
         {
             encoding_frame_stats_t encoding_frame_stats = {
                 .total_frames_written = outctx->total_frames_written,
                 .frames_written = outctx->frames_written,
             };
-            rc = AVPipeStatOutput(h, fd, buftype, stat_type, &encoding_frame_stats);
+            rc = AVPipeStatOutput(h, fd, stream_index, buftype, stat_type, &encoding_frame_stats);
         }
         break;
     default:
@@ -1216,7 +1222,8 @@ in_mux_read_packet(
     }
 
     if (c->read_bytes - c->read_reported > BYTES_READ_REPORT) {
-        in_stat(opaque, in_stat_bytes_read);
+        /* Pass stream_index 0 (stream_index has no meaning for in_stat_bytes_read) */
+        in_stat(opaque, 0, in_stat_bytes_read);
         c->read_reported = c->read_bytes;
     }
 
@@ -1319,6 +1326,7 @@ out_mux_seek(
 static int
 out_mux_stat(
     void *opaque,
+    int stream_index,
     avp_stat_t stat_type)
 {
     ioctx_t *outctx = (ioctx_t *)opaque;
@@ -1328,10 +1336,10 @@ out_mux_stat(
 
     switch (stat_type) {
     case out_stat_bytes_written:
-        rc = AVPipeStatMuxOutput(fd, stat_type, &outctx->written_bytes);
+        rc = AVPipeStatMuxOutput(fd, stream_index, stat_type, &outctx->written_bytes);
         break;
     case out_stat_encoding_end_pts:
-        rc = AVPipeStatMuxOutput(fd, stat_type, &outctx->encoder_ctx->video_last_pts_sent_encode);
+        rc = AVPipeStatMuxOutput(fd, stream_index, stat_type, &outctx->encoder_ctx->video_last_pts_sent_encode);
         break;
     default:
         break;