/* encca_aac.c Copyright (c) 2003-2025 HandBrake Team This file is part of the HandBrake source code Homepage: . It may be used under the terms of the GNU General Public License v2. For full terms see the file COPYING file or visit http://www.gnu.org/licenses/gpl-2.0.html */ #include "handbrake/handbrake.h" #include "handbrake/audio_remap.h" #include "handbrake/hbffmpeg.h" #include "handbrake/extradata.h" #include #include enum AAC_MODE { AAC_MODE_LC, AAC_MODE_HE }; int encCoreAudioInitLC(hb_work_object_t*, hb_job_t*); int encCoreAudioInitHE(hb_work_object_t*, hb_job_t*); int encCoreAudioInit(hb_work_object_t*, hb_job_t*, enum AAC_MODE mode); int encCoreAudioWork(hb_work_object_t*, hb_buffer_t**, hb_buffer_t**); void encCoreAudioClose(hb_work_object_t*); hb_work_object_t hb_encca_aac = { WORK_ENC_CA_AAC, "AAC encoder (Apple)", encCoreAudioInitLC, encCoreAudioWork, encCoreAudioClose }; hb_work_object_t hb_encca_haac = { WORK_ENC_CA_HAAC, "HE-AAC encoder (Apple)", encCoreAudioInitHE, encCoreAudioWork, encCoreAudioClose }; struct hb_work_private_s { uint8_t *buf; hb_job_t *job; hb_list_t *list; AudioConverterRef converter; unsigned long isamples, isamplesiz, omaxpacket, nchannels; int64_t first_pts; int64_t delay; uint64_t samples, ibytes; Float64 osamplerate; int input_done; hb_audio_remap_t *remap; }; #define MP4ESDescrTag 0x03 #define MP4DecConfigDescrTag 0x04 #define MP4DecSpecificDescrTag 0x05 // based off of mov_mp4_read_descr_len from mov.c in ffmpeg's libavformat static int readDescrLen(UInt8 **buffer) { int len = 0; int count = 4; while (count--) { int c = *(*buffer)++; len = (len << 7) | (c & 0x7f); if (!(c & 0x80)) break; } return len; } // based off of mov_mp4_read_descr from mov.c in ffmpeg's libavformat static int readDescr(UInt8 **buffer, int *tag) { *tag = *(*buffer)++; return readDescrLen(buffer); } // based off of mov_read_esds from mov.c in ffmpeg's libavformat static long ReadESDSDescExt(void* descExt, UInt8 **buffer, UInt32 *size, int versionFlags) { UInt8 *esds = (UInt8*)descExt; int tag, len; *size = 0; if (versionFlags) esds += 4; // version + flags readDescr(&esds, &tag); esds += 2; // ID if (tag == MP4ESDescrTag) esds++; // priority readDescr(&esds, &tag); if (tag == MP4DecConfigDescrTag) { esds++; // object type id esds++; // stream type esds += 3; // buffer size db esds += 4; // max bitrate esds += 4; // average bitrate len = readDescr(&esds, &tag); if (tag == MP4DecSpecificDescrTag) { *buffer = calloc(1, len + 8); if (*buffer) { memcpy(*buffer, esds, len); *size = len; } } } return noErr; } /*********************************************************************** * hb_work_encCoreAudio_init switches *********************************************************************** * **********************************************************************/ int encCoreAudioInitLC(hb_work_object_t *w, hb_job_t *job) { return encCoreAudioInit(w, job, AAC_MODE_LC); } int encCoreAudioInitHE(hb_work_object_t *w, hb_job_t *job) { return encCoreAudioInit(w, job, AAC_MODE_HE); } /*********************************************************************** * hb_work_encCoreAudio_init *********************************************************************** * **********************************************************************/ int encCoreAudioInit(hb_work_object_t *w, hb_job_t *job, enum AAC_MODE mode) { hb_work_private_t *pv = calloc(1, sizeof(hb_work_private_t)); hb_audio_t *audio = w->audio; AudioStreamBasicDescription input, output; UInt32 tmp, tmpsiz = sizeof(tmp); OSStatus err; w->private_data = pv; pv->job = job; pv->first_pts = AV_NOPTS_VALUE; // pass the number of channels used into the private work data pv->nchannels = hb_mixdown_get_discrete_channel_count(audio->config.out.mixdown); bzero(&input, sizeof(AudioStreamBasicDescription)); input.mSampleRate = (Float64)audio->config.out.samplerate; input.mFormatID = kAudioFormatLinearPCM; input.mFormatFlags = (kLinearPCMFormatFlagIsFloat|kAudioFormatFlagsNativeEndian); input.mBytesPerPacket = 4 * pv->nchannels; input.mFramesPerPacket = 1; input.mBytesPerFrame = input.mBytesPerPacket * input.mFramesPerPacket; input.mChannelsPerFrame = pv->nchannels; input.mBitsPerChannel = 32; bzero(&output, sizeof(AudioStreamBasicDescription)); switch (mode) { case AAC_MODE_HE: output.mFormatID = kAudioFormatMPEG4AAC_HE; break; case AAC_MODE_LC: default: output.mFormatID = kAudioFormatMPEG4AAC; break; } output.mSampleRate = (Float64)audio->config.out.samplerate; output.mChannelsPerFrame = pv->nchannels; // let CoreAudio decide the rest // initialise encoder err = AudioConverterNew(&input, &output, &pv->converter); if (err != noErr) { // Retry without the samplerate bzero(&output, sizeof(AudioStreamBasicDescription)); switch (mode) { case AAC_MODE_HE: output.mFormatID = kAudioFormatMPEG4AAC_HE; break; case AAC_MODE_LC: default: output.mFormatID = kAudioFormatMPEG4AAC; break; } output.mChannelsPerFrame = pv->nchannels; err = AudioConverterNew(&input, &output, &pv->converter); if (err != noErr) { hb_log("Error creating an AudioConverter err=%"PRId64" output.mBytesPerFrame=%"PRIu64"", (int64_t)err, (uint64_t)output.mBytesPerFrame); *job->done_error = HB_ERROR_UNKNOWN; *job->die = 1; return -1; } } // set encoder quality to maximum tmp = kAudioConverterQuality_Max; AudioConverterSetProperty(pv->converter, kAudioConverterCodecQuality, sizeof(tmp), &tmp); if (audio->config.out.bitrate > 0) { // set encoder bitrate control mode to constrained variable tmp = kAudioCodecBitRateControlMode_VariableConstrained; AudioConverterSetProperty(pv->converter, kAudioCodecPropertyBitRateControlMode, sizeof(tmp), &tmp); // get available bitrates AudioValueRange *bitrates; ssize_t bitrateCounts; AudioConverterGetPropertyInfo(pv->converter, kAudioConverterApplicableEncodeBitRates, &tmpsiz, NULL); bitrates = malloc(tmpsiz); AudioConverterGetProperty(pv->converter, kAudioConverterApplicableEncodeBitRates, &tmpsiz, bitrates); bitrateCounts = tmpsiz / sizeof(AudioValueRange); // set bitrate tmp = audio->config.out.bitrate * 1000; if (tmp < bitrates[0].mMinimum) tmp = bitrates[0].mMinimum; if (tmp > bitrates[bitrateCounts-1].mMinimum) tmp = bitrates[bitrateCounts-1].mMinimum; free(bitrates); if (tmp != audio->config.out.bitrate * 1000) { hb_log("encCoreAudioInit: sanitizing track %d audio bitrate %d to %"PRIu32"", audio->config.out.track, audio->config.out.bitrate, tmp / 1000); } AudioConverterSetProperty(pv->converter, kAudioConverterEncodeBitRate, sizeof(tmp), &tmp); } else if (audio->config.out.quality >= 0) { if (mode != AAC_MODE_LC) { hb_error("encCoreAudioInit: internal error, VBR set but not applicable"); return 1; } // set encoder bitrate control mode to variable tmp = kAudioCodecBitRateControlMode_Variable; AudioConverterSetProperty(pv->converter, kAudioCodecPropertyBitRateControlMode, sizeof(tmp), &tmp); // set quality tmp = audio->config.out.quality; AudioConverterSetProperty(pv->converter, kAudioCodecPropertySoundQualityForVBR, sizeof(tmp), &tmp); } else { hb_error("encCoreAudioInit: internal error, bitrate/quality not set"); return 1; } // get real input tmpsiz = sizeof(input); AudioConverterGetProperty(pv->converter, kAudioConverterCurrentInputStreamDescription, &tmpsiz, &input); // get real output tmpsiz = sizeof(output); AudioConverterGetProperty(pv->converter, kAudioConverterCurrentOutputStreamDescription, &tmpsiz, &output); // set sizes pv->isamplesiz = input.mBytesPerPacket; pv->isamples = output.mFramesPerPacket; pv->osamplerate = output.mSampleRate; audio->config.out.samples_per_frame = pv->isamples; // channel remapping AVChannelLayout out_layout, mixdown_layout; hb_ff_mixdown_ch_xlat(&mixdown_layout, audio->config.out.mixdown, NULL); hb_audio_remap_map_channel_layout(&hb_aac_chan_map, &out_layout, &mixdown_layout); pv->remap = hb_audio_remap_init(AV_SAMPLE_FMT_FLT, &out_layout, &mixdown_layout); av_channel_layout_uninit(&out_layout); av_channel_layout_uninit(&mixdown_layout); if (pv->remap == NULL) { hb_error("encCoreAudioInit: hb_audio_remap_init() failed"); } // get maximum output size AudioConverterGetProperty(pv->converter, kAudioConverterPropertyMaximumOutputPacketSize, &tmpsiz, &tmp); pv->omaxpacket = tmp; // get magic cookie (elementary stream descriptor) AudioConverterGetPropertyInfo(pv->converter, kAudioConverterCompressionMagicCookie, &tmpsiz, NULL); UInt8 *magicCookie = malloc(tmpsiz); AudioConverterGetProperty(pv->converter, kAudioConverterCompressionMagicCookie, &tmpsiz, magicCookie); // CoreAudio returns a complete ESDS, but we only need // the DecoderSpecific info. UInt8 *buffer = NULL; ReadESDSDescExt(magicCookie, &buffer, &tmpsiz, 0); hb_set_extradata(w->extradata, buffer, tmpsiz); free(buffer); free(magicCookie); AudioConverterPrimeInfo primeInfo; UInt32 piSize = sizeof(primeInfo); bzero(&primeInfo, piSize); AudioConverterGetProperty(pv->converter, kAudioConverterPrimeInfo, &piSize, &primeInfo); pv->delay = primeInfo.leadingFrames * 90000LL / pv->osamplerate; *w->init_delay = pv->delay; pv->list = hb_list_init(); pv->buf = NULL; return 0; } /*********************************************************************** * Close *********************************************************************** * **********************************************************************/ void encCoreAudioClose(hb_work_object_t *w) { hb_work_private_t *pv = w->private_data; if (pv != NULL) { if (pv->converter) { AudioConverterDispose(pv->converter); } if (pv->buf != NULL) { free(pv->buf); } if (pv->remap != NULL) { hb_audio_remap_free(pv->remap); } hb_list_empty(&pv->list); free(pv); w->private_data = NULL; } } /* Called whenever necessary by AudioConverterFillComplexBuffer */ static OSStatus inInputDataProc(AudioConverterRef converter, UInt32 *npackets, AudioBufferList *buffers, AudioStreamPacketDescription **ignored, void *userdata) { hb_work_private_t *pv = userdata; if (!pv->ibytes) { *npackets = 0; if (pv->input_done) { // EOF on input buffers->mBuffers[0].mDataByteSize = 0; return noErr; } else { // Not enough data available return 1; } } if (pv->buf != NULL) { free(pv->buf); } buffers->mBuffers[0].mDataByteSize = MIN(pv->ibytes, pv->isamplesiz * *npackets); pv->buf = calloc(1, buffers->mBuffers[0].mDataByteSize); buffers->mBuffers[0].mData = pv->buf; if (hb_list_bytes(pv->list) >= buffers->mBuffers[0].mDataByteSize) { hb_list_getbytes(pv->list, buffers->mBuffers[0].mData, buffers->mBuffers[0].mDataByteSize, NULL, NULL); } else { *npackets = 0; return 1; } *npackets = buffers->mBuffers[0].mDataByteSize / pv->isamplesiz; pv->ibytes -= buffers->mBuffers[0].mDataByteSize; hb_audio_remap(pv->remap, (uint8_t**)(&buffers->mBuffers[0].mData), (int)(*npackets)); return noErr; } /*********************************************************************** * Encode *********************************************************************** * **********************************************************************/ static hb_buffer_t* Encode(hb_work_object_t *w) { hb_work_private_t *pv = w->private_data; UInt32 npackets = 1; // Check if we need more data or we have already got to EOF // if so, we need to call the audio converter again even // without data to get out the remaining packets. if (pv->input_done != 1 && (pv->ibytes = hb_list_bytes(pv->list)) < pv->isamples * pv->isamplesiz) { return NULL; } hb_buffer_t *obuf; AudioStreamPacketDescription odesc = { 0 }; AudioBufferList obuflist = { .mNumberBuffers = 1, .mBuffers = { { .mNumberChannels = pv->nchannels } }, }; obuf = hb_buffer_init(pv->omaxpacket); obuflist.mBuffers[0].mDataByteSize = obuf->size; obuflist.mBuffers[0].mData = obuf->data; OSStatus err = AudioConverterFillComplexBuffer(pv->converter, inInputDataProc, pv, &npackets, &obuflist, &odesc); if (err != noErr && err != 1) { hb_log("encCoreAudio: unexpected error in AudioConverterFillComplexBuffer()"); } // only drop the output buffer if it's actually empty if (!npackets || odesc.mDataByteSize <= 0) { hb_buffer_close(&obuf); return NULL; } obuf->size = odesc.mDataByteSize; obuf->s.start = pv->first_pts - pv->delay + 90000LL * pv->samples / pv->osamplerate; pv->samples += pv->isamples; obuf->s.stop = pv->first_pts - pv->delay + 90000LL * pv->samples / pv->osamplerate; obuf->s.duration = (double)90000 * pv->isamples / pv->osamplerate; obuf->s.type = AUDIO_BUF; obuf->s.frametype = HB_FRAME_AUDIO; return obuf; } static void Flush(hb_work_object_t *w, hb_buffer_list_t * list) { // Nothing to flush if all we got was a EOF if (w->private_data->first_pts == AV_NOPTS_VALUE) { return; } hb_buffer_t *buf; while ((buf = Encode(w))) { hb_buffer_list_append(list, buf); } } /*********************************************************************** * Work *********************************************************************** * **********************************************************************/ int encCoreAudioWork(hb_work_object_t *w, hb_buffer_t **buf_in, hb_buffer_t **buf_out) { hb_work_private_t *pv = w->private_data; hb_buffer_t * in = *buf_in; hb_buffer_t * buf; hb_buffer_list_t list; hb_buffer_list_clear(&list); if (in->s.flags & HB_BUF_FLAG_EOF) { /* EOF on input - send it downstream & say we're done */ pv->input_done = 1; Flush(w, &list); hb_buffer_list_append(&list, hb_buffer_eof_init()); *buf_out = hb_buffer_list_clear(&list); return HB_WORK_DONE; } if (pv->first_pts == AV_NOPTS_VALUE) { pv->first_pts = in->s.start; } hb_list_add(pv->list, in); *buf_in = NULL; while ((buf = Encode(w))) { hb_buffer_list_append(&list, buf); } *buf_out = hb_buffer_list_clear(&list); return HB_WORK_OK; }