'concatenating mp4 video and audio streams using Android media classes

I have a need to merge multiple mp4 files into a single file. The source files are encoded using the exact same video and audio codecs and parameters. I would like to avoid decoding/encoding the streams in order to merge them. As a test, I tried simply concatenating the raw encoded frames from the source videos using the MediaMuxer, adjusting the presentation times as necessary. It seems to work, the resulting mp4 file is playable on all video players I've tested and FFMPEG seems happy with the merged file. Just wondering if there is an issue that I am not anticipating since I am not an expert on the mp4 file structure. Here is the code I am using:

private static class VideoSegment {
    public VideoSegment(File file, long duration, int videoTrackIndex, int audioTrackIndex) {
        this.file = file;
        this.duration = duration;
        this.videoTrackIndex = videoTrackIndex;
        this.audioTrackIndex = audioTrackIndex;
    }
    File file;
    long duration;
    int videoTrackIndex;
    int audioTrackIndex;
}


private File mergeVideoFiles(List<File> files) throws IOException, IllegalArgumentException {

    final List<VideoSegment> segments = new ArrayList<>();
    int videoRotationHint = 0;
    long totalDuration = 0;
    MediaFormat sourceVideoFormat = null, sourceAudioFormat = null;

    for (int fileIndex = 0; fileIndex < files.size(); fileIndex++) {

        final File segmentFile = files.get(fileIndex);
        MediaExtractor extractor = new MediaExtractor();
        extractor.setDataSource(segmentFile.getAbsolutePath());

        int videoTrackIndex = this.getVideoTrackIndex(extractor);
        final MediaFormat videoFormat = extractor.getTrackFormat(videoTrackIndex);
        if (sourceVideoFormat == null) {
            sourceVideoFormat = videoFormat;
            videoRotationHint = videoFormat.getInteger(MediaFormat.KEY_ROTATION);
        }
        long segmentDuration = videoFormat.getLong(MediaFormat.KEY_DURATION);
        totalDuration += segmentDuration;

        int audioTrackIndex = this.getAudioTrackIndex(extractor);
        if (sourceAudioFormat == null) {
            sourceAudioFormat = extractor.getTrackFormat(audioTrackIndex);
        }

        segments.add(new VideoSegment(segmentFile, segmentDuration, videoTrackIndex, audioTrackIndex));

        extractor.release();
    }

    // simply create a copy of the video and audio formats, then adjust the duration
    final MediaFormat outputVideoFormat = new MediaFormat(sourceVideoFormat);
    final MediaFormat outputAudioFormat = new MediaFormat(sourceAudioFormat);

    outputVideoFormat.setLong(MediaFormat.KEY_DURATION, totalDuration);
    outputAudioFormat.setLong(MediaFormat.KEY_DURATION, totalDuration);

    final File mergedFile = new File(this.reactContext.getApplicationContext().getExternalFilesDir(null), "merged.mp4");
    final MediaMuxer muxer = new MediaMuxer(mergedFile.getPath(), MediaMuxer.OutputFormat.MUXER_OUTPUT_MPEG_4);
    int outputVideoTrackIndex = muxer.addTrack(outputVideoFormat);
    int outputAudioTrackIndex = muxer.addTrack(outputAudioFormat);

    muxer.setOrientationHint(videoRotationHint);
    muxer.start();

    long segmentOffset = 0;
    for (int segmentIndex = 0; segmentIndex < segments.size(); segmentIndex++) {

        final VideoSegment segment = segments.get(segmentIndex);
        MediaExtractor extractor = new MediaExtractor();
        extractor.setDataSource(segment.file.getAbsolutePath());

        extractor.selectTrack(segment.videoTrackIndex);
        this.copyTrackData(extractor, muxer, segmentOffset, outputVideoTrackIndex);

        extractor.selectTrack(segment.audioTrackIndex);
        this.copyTrackData(extractor, muxer, segmentOffset, outputAudioTrackIndex);

        extractor.release();

        segmentOffset += segment.duration;
    };

    muxer.stop();
    muxer.release();

    return mergedFile;
}

private void copyTrackData(final MediaExtractor extractor, final MediaMuxer muxer, final long startOffset, final int outputTrackIndex) throws IllegalArgumentException {

    MediaCodec.BufferInfo bufferInfo = new MediaCodec.BufferInfo();
    ByteBuffer inputBuffer = ByteBuffer.allocate(1024 * 1024);
    int bytesRead = 0;
    boolean moreFrames = true;
    do {
        bytesRead = extractor.readSampleData(inputBuffer, 0);
        bufferInfo.presentationTimeUs = startOffset + extractor.getSampleTime();
        bufferInfo.flags = extractor.getSampleFlags();
        bufferInfo.size = bytesRead;
        if (bytesRead > 0) {
            muxer.writeSampleData(outputTrackIndex, inputBuffer, bufferInfo);
            moreFrames = extractor.advance();
        } else {
            moreFrames = false;
        }
    } while (moreFrames);

}


Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source