Thread | Android StackOverflow

VP9 Video Corrupt/Black Screen After Merging (Stream Copy) in some Android devices

Is there a specific Bitstream Filter required for VP9 in an MP4 container that I am missing, or is AVFMT_FLAG_AUTO_BSF actually required for VP9 but failing for another reason? Why would this stream copy work for H.264 but produce a black screen for VP9 on Android? Where could be the real problem, or is the code right but the problem is elsewhere?

I am using FFmpeg libraries (libavformat/libavcodec) so files via JNI in an Android app to merge separate video and audio tracks (remuxing/stream copying).

While H.264 and H.265 files merge perfectly, VP9 videos (specifically from Instagram/YouTube sources) result in a black screen or playback errors on some Android players after merging.

The Symptoms:

H.264/AAC merges and plays fine.
VP9/Opus or VP9/AAC results in a file that has the correct duration but no visible video or fails to initialize the decoder in some android devices.
I suspect the issue relates to Bitstream Filters (BSF) or timestamp handling.

What I've tried: I updated my code to include three specific fixes:

Skipping AVFMT_FLAG_AUTO_BSF for VP9: I noticed auto-bsf might be corrupting VP9 headers.
Setting avoid_negative_ts: To handle sources with negative start times.
Filtering out ATTACHED_PIC: To ensure I don't accidentally mux a thumbnail as the main video stream.

My JNI Implementation:

#include &lt;jni.h&gt;
#include &lt;libavformat/avformat.h&gt;
#include &lt;android/log.h&gt;


#ifdef NDEBUG  // NDEBUG is automatically defined in release builds
// Release build - disable debug logs
    #define LOGD(...)
    #define LOGE(...)


#else
// Debug build - enable debug logs
#define TAG &quot;Downloader&quot;
#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, TAG, __VA_ARGS__)
#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, TAG, __VA_ARGS__)
#endif

#include &lt;libavutil/cpu.h&gt;


JNIEXPORT jint JNICALL
Java_com_harrshbermann_SocialMate_google_logEvent(
        JNIEnv *env, jobject thiz, jstring jVideoPath, jstring jAudioPath, jstring jOutPath) {

/*
// Inside your JNI function:
    int cpu_flags = av_get_cpu_flags();

    if (cpu_flags &amp; AV_CPU_FLAG_NEON) {
        LOGD(&quot;FFMpeg Check: NEON is ENABLED and active!&quot;);
    } else {
        LOGE(&quot;FFMpeg Check: NEON is NOT detected!&quot;);
    }

    if (cpu_flags &amp; AV_CPU_FLAG_ARMV8) {
        LOGD(&quot;FFMpeg Check: ARMv8 optimizations are active!&quot;);
    }

    */

    // ── Declare ALL variables at top (C89 compliance for NDK) ────────────────
    AVFormatContext *ifmt_ctx_v = NULL, *ifmt_ctx_a = NULL, *ofmt_ctx = NULL;
    AVStream *in_v, *in_a, *out_v, *out_a;
    AVPacket *pkt = NULL;
    int ret = 0;
    int v_idx, a_idx;
    int v_done = 0, a_done = 0;
    int64_t v_dts = 0, a_dts = 0;
    int file_opened = 0; // track if avio_open succeeded

    const char *vPath = (*env)-&gt;GetStringUTFChars(env, jVideoPath, 0);
    const char *aPath = (*env)-&gt;GetStringUTFChars(env, jAudioPath, 0);
    const char *oPath = (*env)-&gt;GetStringUTFChars(env, jOutPath, 0);

    // ── Allocate packet ───────────────────────────────────────────────────────
    pkt = av_packet_alloc();
    if (!pkt) { ret = -1; goto cleanup; }

    // ── Open inputs ───────────────────────────────────────────────────────────
    if (avformat_open_input(&amp;ifmt_ctx_v, vPath, NULL, NULL) &lt; 0) {
        LOGE(&quot;Failed to open video&quot;);
        ret = -2; goto cleanup;
    }
    if (avformat_open_input(&amp;ifmt_ctx_a, aPath, NULL, NULL) &lt; 0) {
        LOGE(&quot;Failed to open audio&quot;);
        ret = -3; goto cleanup;
    }

    if (avformat_find_stream_info(ifmt_ctx_v, NULL) &lt; 0 ||
        avformat_find_stream_info(ifmt_ctx_a, NULL) &lt; 0) {
        LOGE(&quot;Failed to find stream info&quot;);
        ret = -4; goto cleanup;
    }

    // ── Find best streams ─────────────────────────────────────────────────────
    v_idx = av_find_best_stream(ifmt_ctx_v, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
    a_idx = av_find_best_stream(ifmt_ctx_a, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
    if (v_idx &lt; 0 || a_idx &lt; 0) {
        LOGE(&quot;Could not find video/audio stream (v=%d, a=%d)&quot;, v_idx, a_idx);
        ret = -5; goto cleanup;
    }

    in_v = ifmt_ctx_v-&gt;streams[v_idx];
    in_a = ifmt_ctx_a-&gt;streams[a_idx];

    // ── Allocate output context ───────────────────────────────────────────────
    if (avformat_alloc_output_context2(&amp;ofmt_ctx, NULL, NULL, oPath) &lt; 0 || !ofmt_ctx) {
        LOGE(&quot;Failed to alloc output context&quot;);
        ret = -6; goto cleanup;
    }

    // ── Create output streams ─────────────────────────────────────────────────
    out_v = avformat_new_stream(ofmt_ctx, NULL);
    out_a = avformat_new_stream(ofmt_ctx, NULL);
    if (!out_v || !out_a) {
        LOGE(&quot;Failed to create output streams&quot;);
        ret = -7; goto cleanup;
    }

    // avcodec_parameters_copy handles extradata internally — no manual malloc needed
    avcodec_parameters_copy(out_v-&gt;codecpar, in_v-&gt;codecpar);
    avcodec_parameters_copy(out_a-&gt;codecpar, in_a-&gt;codecpar);
    out_v-&gt;codecpar-&gt;codec_tag = 0; // reset for container compatibility
    out_a-&gt;codecpar-&gt;codec_tag = 0;

    // ── Open output file ──────────────────────────────────────────────────────
    if (!(ofmt_ctx-&gt;oformat-&gt;flags &amp; AVFMT_NOFILE)) {
        if (avio_open(&amp;ofmt_ctx-&gt;pb, oPath, AVIO_FLAG_WRITE) &lt; 0) {
            LOGE(&quot;Failed to open output file&quot;);
            ret = -8; goto cleanup;
        }
        file_opened = 1;
    }

    // Auto BSF fixes Annex-B vs. AVCC format mismatch for H.264/H.265 in MP4
    ofmt_ctx-&gt;flags |= AVFMT_FLAG_AUTO_BSF;

    if (avformat_write_header(ofmt_ctx, NULL) &lt; 0) {
        LOGE(&quot;Failed to write header&quot;);
        ret = -9; goto cleanup;
    }

    // ── Interleaved muxing loop ───────────────────────────────────────────────
    while (!v_done || !a_done) {
        AVFormatContext *src;
        AVStream *in_st, *out_st;
        int target_idx, pick_video, got;

        pick_video = !v_done &amp;&amp; (a_done ||
                                 av_compare_ts(v_dts, in_v-&gt;time_base, a_dts, in_a-&gt;time_base) &lt;= 0);

        src        = pick_video ? ifmt_ctx_v : ifmt_ctx_a;
        in_st      = pick_video ? in_v       : in_a;
        out_st     = pick_video ? out_v      : out_a;
        target_idx = pick_video ? v_idx      : a_idx;

        // Skip non-target packets (e.g. subtitles in same container)
        got = 0;
        while (av_read_frame(src, pkt) &gt;= 0) {
            if (pkt-&gt;stream_index == target_idx) { got = 1; break; }
            av_packet_unref(pkt);
        }

        if (!got) {
            if (pick_video) v_done = 1;
            else            a_done = 1;
            continue;
        }

        // Update DTS tracker for next interleaving decision
        if (pick_video) v_dts = pkt-&gt;dts;
        else            a_dts = pkt-&gt;dts;

        // Rescale timestamps with proper rounding flags
        pkt-&gt;pts = av_rescale_q_rnd(pkt-&gt;pts, in_st-&gt;time_base, out_st-&gt;time_base,
                                    AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX);
        pkt-&gt;dts = av_rescale_q_rnd(pkt-&gt;dts, in_st-&gt;time_base, out_st-&gt;time_base,
                                    AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX);
        pkt-&gt;duration = av_rescale_q(pkt-&gt;duration, in_st-&gt;time_base, out_st-&gt;time_base);
        pkt-&gt;pos = -1;
        pkt-&gt;stream_index = out_st-&gt;index; // use actual index, not hardcoded 0/1

        if (av_interleaved_write_frame(ofmt_ctx, pkt) &lt; 0) {
            LOGE(&quot;Error writing %s packet&quot;, pick_video ? &quot;video&quot; : &quot;audio&quot;);
        }
        av_packet_unref(pkt);
    }

    av_write_trailer(ofmt_ctx);
    LOGD(&quot;mergeAV finished successfully&quot;);

    cleanup:
    if (pkt)        av_packet_free(&amp;pkt);
    if (ifmt_ctx_v) avformat_close_input(&amp;ifmt_ctx_v);
    if (ifmt_ctx_a) avformat_close_input(&amp;ifmt_ctx_a);
    if (ofmt_ctx) {
        if (file_opened) avio_closep(&amp;ofmt_ctx-&gt;pb);
        avformat_free_context(ofmt_ctx);
    }

    (*env)-&gt;ReleaseStringUTFChars(env, jVideoPath, vPath);
    (*env)-&gt;ReleaseStringUTFChars(env, jAudioPath, aPath);
    (*env)-&gt;ReleaseStringUTFChars(env, jOutPath, oPath);

    return ret;
}

Changes I made:

JNIEXPORT jint JNICALL
Java_com_example_app_NativeMuxer_mergeAV(JNIEnv *env, jobject thiz, jstring jVideoPath, jstring jAudioPath, jstring jOutPath) {
    AVFormatContext *ifmt_ctx_v = NULL, *ifmt_ctx_a = NULL, *ofmt_ctx = NULL;
    AVPacket *pkt = av_packet_alloc();
    int v_idx = -1, a_idx = -1, ret = 0;
    
    // ... [Opening inputs and finding stream info] ...

    // Fix 1: Explicitly skip thumbnail streams
    for (int i = 0; i &lt; (int)ifmt_ctx_v-&gt;nb_streams; i++) {
        if (ifmt_ctx_v-&gt;streams[i]-&gt;disposition &amp; AV_DISPOSITION_ATTACHED_PIC) continue;
        if (ifmt_ctx_v-&gt;streams[i]-&gt;codecpar-&gt;codec_type == AVMEDIA_TYPE_VIDEO) {
            v_idx = i; break;
        }
    }

    // Detect VP9
    int is_vp = (ifmt_ctx_v-&gt;streams[v_idx]-&gt;codecpar-&gt;codec_id == AV_CODEC_ID_VP9);

    avformat_alloc_output_context2(&amp;ofmt_ctx, NULL, NULL, oPath);

    // Create streams and copy parameters
    // ... [avformat_new_stream &amp; avcodec_parameters_copy] ...
    out_v-&gt;codecpar-&gt;codec_tag = 0;
    out_a-&gt;codecpar-&gt;codec_tag = 0;

    // Fix 2: Skip AUTO_BSF for VP9 to avoid bitstream corruption
    if (!is_vp) {
        ofmt_ctx-&gt;flags |= AVFMT_FLAG_AUTO_BSF;
    }

    // Fix 3: Handle negative timestamps
    ofmt_ctx-&gt;avoid_negative_ts = AVFMT_AVOID_NEG_TS_MAKE_NON_NEGATIVE;

    avformat_write_header(ofmt_ctx, NULL);

    // ... [Interleaved muxing loop with av_rescale_q_rnd] ...

    av_write_trailer(ofmt_ctx);
    // ... [Cleanup] ...
    return ret;
}

Apr 23 at 6:06 AM

HARRSH BERMANN

#java #android #ffmpeg #java-native-interface #android-ffmpeg

No answer found for this question yet.

#include <jni.h> #include <libavformat/avformat.h> #include <android/log.h> #ifdef NDEBUG // NDEBUG is automatically defined in release builds // Release build - disable debug logs #define LOGD(...) #define LOGE(...) #else // Debug build - enable debug logs #define TAG "Downloader" #define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, TAG, __VA_ARGS__) #define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, TAG, __VA_ARGS__) #endif #include <libavutil/cpu.h> JNIEXPORT jint JNICALL Java_com_harrshbermann_SocialMate_google_logEvent( JNIEnv *env, jobject thiz, jstring jVideoPath, jstring jAudioPath, jstring jOutPath) { /* // Inside your JNI function: int cpu_flags = av_get_cpu_flags(); if (cpu_flags & AV_CPU_FLAG_NEON) { LOGD("FFMpeg Check: NEON is ENABLED and active!"); } else { LOGE("FFMpeg Check: NEON is NOT detected!"); } if (cpu_flags & AV_CPU_FLAG_ARMV8) { LOGD("FFMpeg Check: ARMv8 optimizations are active!"); } */ // ── Declare ALL variables at top (C89 compliance for NDK) ──────────────── AVFormatContext *ifmt_ctx_v = NULL, *ifmt_ctx_a = NULL, *ofmt_ctx = NULL; AVStream *in_v, *in_a, *out_v, *out_a; AVPacket *pkt = NULL; int ret = 0; int v_idx, a_idx; int v_done = 0, a_done = 0; int64_t v_dts = 0, a_dts = 0; int file_opened = 0; // track if avio_open succeeded const char *vPath = (*env)->GetStringUTFChars(env, jVideoPath, 0); const char *aPath = (*env)->GetStringUTFChars(env, jAudioPath, 0); const char *oPath = (*env)->GetStringUTFChars(env, jOutPath, 0); // ── Allocate packet ─────────────────────────────────────────────────────── pkt = av_packet_alloc(); if (!pkt) { ret = -1; goto cleanup; } // ── Open inputs ─────────────────────────────────────────────────────────── if (avformat_open_input(&ifmt_ctx_v, vPath, NULL, NULL) < 0) { LOGE("Failed to open video"); ret = -2; goto cleanup; } if (avformat_open_input(&ifmt_ctx_a, aPath, NULL, NULL) < 0) { LOGE("Failed to open audio"); ret = -3; goto cleanup; } if (avformat_find_stream_info(ifmt_ctx_v, NULL) < 0 || avformat_find_stream_info(ifmt_ctx_a, NULL) < 0) { LOGE("Failed to find stream info"); ret = -4; goto cleanup; } // ── Find best streams ───────────────────────────────────────────────────── v_idx = av_find_best_stream(ifmt_ctx_v, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0); a_idx = av_find_best_stream(ifmt_ctx_a, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0); if (v_idx < 0 || a_idx < 0) { LOGE("Could not find video/audio stream (v=%d, a=%d)", v_idx, a_idx); ret = -5; goto cleanup; } in_v = ifmt_ctx_v->streams[v_idx]; in_a = ifmt_ctx_a->streams[a_idx]; // ── Allocate output context ─────────────────────────────────────────────── if (avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, oPath) < 0 || !ofmt_ctx) { LOGE("Failed to alloc output context"); ret = -6; goto cleanup; } // ── Create output streams ───────────────────────────────────────────────── out_v = avformat_new_stream(ofmt_ctx, NULL); out_a = avformat_new_stream(ofmt_ctx, NULL); if (!out_v || !out_a) { LOGE("Failed to create output streams"); ret = -7; goto cleanup; } // avcodec_parameters_copy handles extradata internally — no manual malloc needed avcodec_parameters_copy(out_v->codecpar, in_v->codecpar); avcodec_parameters_copy(out_a->codecpar, in_a->codecpar); out_v->codecpar->codec_tag = 0; // reset for container compatibility out_a->codecpar->codec_tag = 0; // ── Open output file ────────────────────────────────────────────────────── if (!(ofmt_ctx->oformat->flags & AVFMT_NOFILE)) { if (avio_open(&ofmt_ctx->pb, oPath, AVIO_FLAG_WRITE) < 0) { LOGE("Failed to open output file"); ret = -8; goto cleanup; } file_opened = 1; } // Auto BSF fixes Annex-B vs. AVCC format mismatch for H.264/H.265 in MP4 ofmt_ctx->flags |= AVFMT_FLAG_AUTO_BSF; if (avformat_write_header(ofmt_ctx, NULL) < 0) { LOGE("Failed to write header"); ret = -9; goto cleanup; } // ── Interleaved muxing loop ─────────────────────────────────────────────── while (!v_done || !a_done) { AVFormatContext *src; AVStream *in_st, *out_st; int target_idx, pick_video, got; pick_video = !v_done && (a_done || av_compare_ts(v_dts, in_v->time_base, a_dts, in_a->time_base) <= 0); src = pick_video ? ifmt_ctx_v : ifmt_ctx_a; in_st = pick_video ? in_v : in_a; out_st = pick_video ? out_v : out_a; target_idx = pick_video ? v_idx : a_idx; // Skip non-target packets (e.g. subtitles in same container) got = 0; while (av_read_frame(src, pkt) >= 0) { if (pkt->stream_index == target_idx) { got = 1; break; } av_packet_unref(pkt); } if (!got) { if (pick_video) v_done = 1; else a_done = 1; continue; } // Update DTS tracker for next interleaving decision if (pick_video) v_dts = pkt->dts; else a_dts = pkt->dts; // Rescale timestamps with proper rounding flags pkt->pts = av_rescale_q_rnd(pkt->pts, in_st->time_base, out_st->time_base, AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX); pkt->dts = av_rescale_q_rnd(pkt->dts, in_st->time_base, out_st->time_base, AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX); pkt->duration = av_rescale_q(pkt->duration, in_st->time_base, out_st->time_base); pkt->pos = -1; pkt->stream_index = out_st->index; // use actual index, not hardcoded 0/1 if (av_interleaved_write_frame(ofmt_ctx, pkt) < 0) { LOGE("Error writing %s packet", pick_video ? "video" : "audio"); } av_packet_unref(pkt); } av_write_trailer(ofmt_ctx); LOGD("mergeAV finished successfully"); cleanup: if (pkt) av_packet_free(&pkt); if (ifmt_ctx_v) avformat_close_input(&ifmt_ctx_v); if (ifmt_ctx_a) avformat_close_input(&ifmt_ctx_a); if (ofmt_ctx) { if (file_opened) avio_closep(&ofmt_ctx->pb); avformat_free_context(ofmt_ctx); } (*env)->ReleaseStringUTFChars(env, jVideoPath, vPath); (*env)->ReleaseStringUTFChars(env, jAudioPath, aPath); (*env)->ReleaseStringUTFChars(env, jOutPath, oPath); return ret; }

JNIEXPORT jint JNICALL Java_com_example_app_NativeMuxer_mergeAV(JNIEnv *env, jobject thiz, jstring jVideoPath, jstring jAudioPath, jstring jOutPath) { AVFormatContext *ifmt_ctx_v = NULL, *ifmt_ctx_a = NULL, *ofmt_ctx = NULL; AVPacket *pkt = av_packet_alloc(); int v_idx = -1, a_idx = -1, ret = 0; // ... [Opening inputs and finding stream info] ... // Fix 1: Explicitly skip thumbnail streams for (int i = 0; i < (int)ifmt_ctx_v->nb_streams; i++) { if (ifmt_ctx_v->streams[i]->disposition & AV_DISPOSITION_ATTACHED_PIC) continue; if (ifmt_ctx_v->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) { v_idx = i; break; } } // Detect VP9 int is_vp = (ifmt_ctx_v->streams[v_idx]->codecpar->codec_id == AV_CODEC_ID_VP9); avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, oPath); // Create streams and copy parameters // ... [avformat_new_stream & avcodec_parameters_copy] ... out_v->codecpar->codec_tag = 0; out_a->codecpar->codec_tag = 0; // Fix 2: Skip AUTO_BSF for VP9 to avoid bitstream corruption if (!is_vp) { ofmt_ctx->flags |= AVFMT_FLAG_AUTO_BSF; } // Fix 3: Handle negative timestamps ofmt_ctx->avoid_negative_ts = AVFMT_AVOID_NEG_TS_MAKE_NON_NEGATIVE; avformat_write_header(ofmt_ctx, NULL); // ... [Interleaved muxing loop with av_rescale_q_rnd] ... av_write_trailer(ofmt_ctx); // ... [Cleanup] ... return ret; }