VP9 Video Corrupt/Black Screen After Merging (Stream Copy) in some Android devices


Is there a specific Bitstream Filter required for VP9 in an MP4 container that I am missing, or is AVFMT_FLAG_AUTO_BSF actually required for VP9 but failing for another reason? Why would this stream copy work for H.264 but produce a black screen for VP9 on Android? Where could be the real problem, or is the code right but the problem is elsewhere?

I am using FFmpeg libraries (libavformat/libavcodec) so files via JNI in an Android app to merge separate video and audio tracks (remuxing/stream copying).

While H.264 and H.265 files merge perfectly, VP9 videos (specifically from Instagram/YouTube sources) result in a black screen or playback errors on some Android players after merging.

The Symptoms:

  • H.264/AAC merges and plays fine.

  • VP9/Opus or VP9/AAC results in a file that has the correct duration but no visible video or fails to initialize the decoder in some android devices.

  • I suspect the issue relates to Bitstream Filters (BSF) or timestamp handling.

What I've tried: I updated my code to include three specific fixes:

  1. Skipping AVFMT_FLAG_AUTO_BSF for VP9: I noticed auto-bsf might be corrupting VP9 headers.

  2. Setting avoid_negative_ts: To handle sources with negative start times.

  3. Filtering out ATTACHED_PIC: To ensure I don't accidentally mux a thumbnail as the main video stream.

My JNI Implementation:

#include <jni.h>
#include <libavformat/avformat.h>
#include <android/log.h>


#ifdef NDEBUG  // NDEBUG is automatically defined in release builds
// Release build - disable debug logs
    #define LOGD(...)
    #define LOGE(...)


#else
// Debug build - enable debug logs
#define TAG "Downloader"
#define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, TAG, __VA_ARGS__)
#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, TAG, __VA_ARGS__)
#endif

#include <libavutil/cpu.h>


JNIEXPORT jint JNICALL
Java_com_harrshbermann_SocialMate_google_logEvent(
        JNIEnv *env, jobject thiz, jstring jVideoPath, jstring jAudioPath, jstring jOutPath) {

/*
// Inside your JNI function:
    int cpu_flags = av_get_cpu_flags();

    if (cpu_flags & AV_CPU_FLAG_NEON) {
        LOGD("FFMpeg Check: NEON is ENABLED and active!");
    } else {
        LOGE("FFMpeg Check: NEON is NOT detected!");
    }

    if (cpu_flags & AV_CPU_FLAG_ARMV8) {
        LOGD("FFMpeg Check: ARMv8 optimizations are active!");
    }

    */

    // ── Declare ALL variables at top (C89 compliance for NDK) ────────────────
    AVFormatContext *ifmt_ctx_v = NULL, *ifmt_ctx_a = NULL, *ofmt_ctx = NULL;
    AVStream *in_v, *in_a, *out_v, *out_a;
    AVPacket *pkt = NULL;
    int ret = 0;
    int v_idx, a_idx;
    int v_done = 0, a_done = 0;
    int64_t v_dts = 0, a_dts = 0;
    int file_opened = 0; // track if avio_open succeeded

    const char *vPath = (*env)->GetStringUTFChars(env, jVideoPath, 0);
    const char *aPath = (*env)->GetStringUTFChars(env, jAudioPath, 0);
    const char *oPath = (*env)->GetStringUTFChars(env, jOutPath, 0);

    // ── Allocate packet ───────────────────────────────────────────────────────
    pkt = av_packet_alloc();
    if (!pkt) { ret = -1; goto cleanup; }

    // ── Open inputs ───────────────────────────────────────────────────────────
    if (avformat_open_input(&ifmt_ctx_v, vPath, NULL, NULL) < 0) {
        LOGE("Failed to open video");
        ret = -2; goto cleanup;
    }
    if (avformat_open_input(&ifmt_ctx_a, aPath, NULL, NULL) < 0) {
        LOGE("Failed to open audio");
        ret = -3; goto cleanup;
    }

    if (avformat_find_stream_info(ifmt_ctx_v, NULL) < 0 ||
        avformat_find_stream_info(ifmt_ctx_a, NULL) < 0) {
        LOGE("Failed to find stream info");
        ret = -4; goto cleanup;
    }

    // ── Find best streams ─────────────────────────────────────────────────────
    v_idx = av_find_best_stream(ifmt_ctx_v, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
    a_idx = av_find_best_stream(ifmt_ctx_a, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
    if (v_idx < 0 || a_idx < 0) {
        LOGE("Could not find video/audio stream (v=%d, a=%d)", v_idx, a_idx);
        ret = -5; goto cleanup;
    }

    in_v = ifmt_ctx_v->streams[v_idx];
    in_a = ifmt_ctx_a->streams[a_idx];

    // ── Allocate output context ───────────────────────────────────────────────
    if (avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, oPath) < 0 || !ofmt_ctx) {
        LOGE("Failed to alloc output context");
        ret = -6; goto cleanup;
    }

    // ── Create output streams ─────────────────────────────────────────────────
    out_v = avformat_new_stream(ofmt_ctx, NULL);
    out_a = avformat_new_stream(ofmt_ctx, NULL);
    if (!out_v || !out_a) {
        LOGE("Failed to create output streams");
        ret = -7; goto cleanup;
    }

    // avcodec_parameters_copy handles extradata internally — no manual malloc needed
    avcodec_parameters_copy(out_v->codecpar, in_v->codecpar);
    avcodec_parameters_copy(out_a->codecpar, in_a->codecpar);
    out_v->codecpar->codec_tag = 0; // reset for container compatibility
    out_a->codecpar->codec_tag = 0;

    // ── Open output file ──────────────────────────────────────────────────────
    if (!(ofmt_ctx->oformat->flags & AVFMT_NOFILE)) {
        if (avio_open(&ofmt_ctx->pb, oPath, AVIO_FLAG_WRITE) < 0) {
            LOGE("Failed to open output file");
            ret = -8; goto cleanup;
        }
        file_opened = 1;
    }

    // Auto BSF fixes Annex-B vs. AVCC format mismatch for H.264/H.265 in MP4
    ofmt_ctx->flags |= AVFMT_FLAG_AUTO_BSF;

    if (avformat_write_header(ofmt_ctx, NULL) < 0) {
        LOGE("Failed to write header");
        ret = -9; goto cleanup;
    }

    // ── Interleaved muxing loop ───────────────────────────────────────────────
    while (!v_done || !a_done) {
        AVFormatContext *src;
        AVStream *in_st, *out_st;
        int target_idx, pick_video, got;

        pick_video = !v_done && (a_done ||
                                 av_compare_ts(v_dts, in_v->time_base, a_dts, in_a->time_base) <= 0);

        src        = pick_video ? ifmt_ctx_v : ifmt_ctx_a;
        in_st      = pick_video ? in_v       : in_a;
        out_st     = pick_video ? out_v      : out_a;
        target_idx = pick_video ? v_idx      : a_idx;

        // Skip non-target packets (e.g. subtitles in same container)
        got = 0;
        while (av_read_frame(src, pkt) >= 0) {
            if (pkt->stream_index == target_idx) { got = 1; break; }
            av_packet_unref(pkt);
        }

        if (!got) {
            if (pick_video) v_done = 1;
            else            a_done = 1;
            continue;
        }

        // Update DTS tracker for next interleaving decision
        if (pick_video) v_dts = pkt->dts;
        else            a_dts = pkt->dts;

        // Rescale timestamps with proper rounding flags
        pkt->pts = av_rescale_q_rnd(pkt->pts, in_st->time_base, out_st->time_base,
                                    AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX);
        pkt->dts = av_rescale_q_rnd(pkt->dts, in_st->time_base, out_st->time_base,
                                    AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX);
        pkt->duration = av_rescale_q(pkt->duration, in_st->time_base, out_st->time_base);
        pkt->pos = -1;
        pkt->stream_index = out_st->index; // use actual index, not hardcoded 0/1

        if (av_interleaved_write_frame(ofmt_ctx, pkt) < 0) {
            LOGE("Error writing %s packet", pick_video ? "video" : "audio");
        }
        av_packet_unref(pkt);
    }

    av_write_trailer(ofmt_ctx);
    LOGD("mergeAV finished successfully");

    cleanup:
    if (pkt)        av_packet_free(&pkt);
    if (ifmt_ctx_v) avformat_close_input(&ifmt_ctx_v);
    if (ifmt_ctx_a) avformat_close_input(&ifmt_ctx_a);
    if (ofmt_ctx) {
        if (file_opened) avio_closep(&ofmt_ctx->pb);
        avformat_free_context(ofmt_ctx);
    }

    (*env)->ReleaseStringUTFChars(env, jVideoPath, vPath);
    (*env)->ReleaseStringUTFChars(env, jAudioPath, aPath);
    (*env)->ReleaseStringUTFChars(env, jOutPath, oPath);

    return ret;
}

Changes I made:

JNIEXPORT jint JNICALL
Java_com_example_app_NativeMuxer_mergeAV(JNIEnv *env, jobject thiz, jstring jVideoPath, jstring jAudioPath, jstring jOutPath) {
    AVFormatContext *ifmt_ctx_v = NULL, *ifmt_ctx_a = NULL, *ofmt_ctx = NULL;
    AVPacket *pkt = av_packet_alloc();
    int v_idx = -1, a_idx = -1, ret = 0;
    
    // ... [Opening inputs and finding stream info] ...

    // Fix 1: Explicitly skip thumbnail streams
    for (int i = 0; i < (int)ifmt_ctx_v->nb_streams; i++) {
        if (ifmt_ctx_v->streams[i]->disposition & AV_DISPOSITION_ATTACHED_PIC) continue;
        if (ifmt_ctx_v->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
            v_idx = i; break;
        }
    }

    // Detect VP9
    int is_vp = (ifmt_ctx_v->streams[v_idx]->codecpar->codec_id == AV_CODEC_ID_VP9);

    avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, oPath);

    // Create streams and copy parameters
    // ... [avformat_new_stream & avcodec_parameters_copy] ...
    out_v->codecpar->codec_tag = 0;
    out_a->codecpar->codec_tag = 0;

    // Fix 2: Skip AUTO_BSF for VP9 to avoid bitstream corruption
    if (!is_vp) {
        ofmt_ctx->flags |= AVFMT_FLAG_AUTO_BSF;
    }

    // Fix 3: Handle negative timestamps
    ofmt_ctx->avoid_negative_ts = AVFMT_AVOID_NEG_TS_MAKE_NON_NEGATIVE;

    avformat_write_header(ofmt_ctx, NULL);

    // ... [Interleaved muxing loop with av_rescale_q_rnd] ...

    av_write_trailer(ofmt_ctx);
    // ... [Cleanup] ...
    return ret;
}
0
Apr 23 at 6:06 AM
User AvatarHARRSH BERMANN
#java#android#ffmpeg#java-native-interface#android-ffmpeg

No answer found for this question yet.