Merge commit '7621e2f8dec938cf48181c8b10afc9b01f444e68' into beta

This commit is contained in:
Ilya Laktyushin
2025-12-06 02:17:48 +04:00
commit 8344b97e03
28070 changed files with 7995182 additions and 0 deletions
+20
View File
@@ -0,0 +1,20 @@
load("@build_bazel_rules_swift//swift:swift.bzl", "swift_library")
swift_library(
name = "ConvertOpusToAAC",
module_name = "ConvertOpusToAAC",
srcs = glob([
"Sources/**/*.swift",
]),
copts = [
"-warnings-as-errors",
],
deps = [
"//submodules/SSignalKit/SwiftSignalKit:SwiftSignalKit",
"//submodules/FFMpegBinding:FFMpegBinding",
"//submodules/MediaPlayer:UniversalMediaPlayer",
],
visibility = [
"//visibility:public",
],
)
@@ -0,0 +1,69 @@
import Foundation
import UniversalMediaPlayer
import AVFoundation
import SwiftSignalKit
public func convertOpusToAAC(sourcePath: String, allocateTempFile: @escaping () -> String) -> Signal<String?, NoError> {
return Signal { subscriber in
var isCancelled = false
let queue = Queue()
queue.async {
do {
let audioSource = SoftwareAudioSource(path: sourcePath)
let outputPath = allocateTempFile()
let assetWriter = try AVAssetWriter(outputURL: URL(fileURLWithPath: outputPath), fileType: .m4a)
var channelLayout = AudioChannelLayout()
memset(&channelLayout, 0, MemoryLayout<AudioChannelLayout>.size)
channelLayout.mChannelLayoutTag = kAudioChannelLayoutTag_Mono
let outputSettings: [String: Any] = [
AVFormatIDKey: Int(kAudioFormatMPEG4AAC),
AVSampleRateKey: 48000,
AVEncoderBitRateKey: 32000,
AVNumberOfChannelsKey: 1,
AVChannelLayoutKey: NSData(bytes: &channelLayout, length: MemoryLayout<AudioChannelLayout>.size)
]
let audioInput = AVAssetWriterInput(mediaType: .audio, outputSettings: outputSettings)
assetWriter.add(audioInput)
assetWriter.startWriting()
assetWriter.startSession(atSourceTime: .zero)
let finishWriting: () -> Void = {
assetWriter.finishWriting(completionHandler: {
subscriber.putNext(outputPath)
subscriber.putCompletion()
})
}
audioInput.requestMediaDataWhenReady(on: queue.queue, using: {
if audioInput.isReadyForMoreMediaData {
if !isCancelled, let sampleBuffer = audioSource.readSampleBuffer() {
if !audioInput.append(sampleBuffer) {
audioInput.markAsFinished()
finishWriting()
return
}
} else {
audioInput.markAsFinished()
finishWriting()
}
}
})
} catch let e {
print("Error: \(e)")
subscriber.putNext(nil)
subscriber.putCompletion()
}
}
return ActionDisposable {
isCancelled = true
}
}
}
@@ -0,0 +1,18 @@
load("@build_bazel_rules_swift//swift:swift.bzl", "swift_library")
swift_library(
name = "LocalAudioTranscription",
module_name = "LocalAudioTranscription",
srcs = glob([
"Sources/**/*.swift",
]),
copts = [
"-warnings-as-errors",
],
deps = [
"//submodules/SSignalKit/SwiftSignalKit:SwiftSignalKit",
],
visibility = [
"//visibility:public",
],
)
@@ -0,0 +1,137 @@
import Foundation
import SwiftSignalKit
import Speech
private var sharedRecognizers: [String: NSObject] = [:]
private struct TranscriptionResult {
var text: String
var confidence: Float
var isFinal: Bool
}
private func transcribeAudio(path: String, locale: String) -> Signal<TranscriptionResult?, NoError> {
return Signal { subscriber in
let disposable = MetaDisposable()
if #available(iOS 13.0, *) {
SFSpeechRecognizer.requestAuthorization { status in
Queue.mainQueue().async {
switch status {
case .notDetermined:
subscriber.putNext(nil)
subscriber.putCompletion()
case .restricted:
subscriber.putNext(nil)
subscriber.putCompletion()
case .denied:
subscriber.putNext(nil)
subscriber.putCompletion()
case .authorized:
let speechRecognizer: SFSpeechRecognizer
if let sharedRecognizer = sharedRecognizers[locale] as? SFSpeechRecognizer {
speechRecognizer = sharedRecognizer
} else {
guard let speechRecognizerValue = SFSpeechRecognizer(locale: Locale(identifier: locale)), speechRecognizerValue.isAvailable else {
subscriber.putNext(nil)
subscriber.putCompletion()
return
}
speechRecognizerValue.defaultTaskHint = .dictation
sharedRecognizers[locale] = speechRecognizerValue
speechRecognizer = speechRecognizerValue
if locale == "en-US" {
speechRecognizer.supportsOnDeviceRecognition = true
} else {
speechRecognizer.supportsOnDeviceRecognition = false
}
speechRecognizer.supportsOnDeviceRecognition = true
}
let tempFilePath = NSTemporaryDirectory() + "/\(UInt64.random(in: 0 ... UInt64.max)).m4a"
let _ = try? FileManager.default.copyItem(atPath: path, toPath: tempFilePath)
let request = SFSpeechURLRecognitionRequest(url: URL(fileURLWithPath: tempFilePath))
if #available(iOS 16.0, *) {
request.addsPunctuation = true
}
request.requiresOnDeviceRecognition = speechRecognizer.supportsOnDeviceRecognition
request.shouldReportPartialResults = false
let task = speechRecognizer.recognitionTask(with: request, resultHandler: { result, error in
if let result = result {
var confidence: Float = 0.0
for segment in result.bestTranscription.segments {
confidence += segment.confidence
}
confidence /= Float(result.bestTranscription.segments.count)
subscriber.putNext(TranscriptionResult(text: result.bestTranscription.formattedString, confidence: confidence, isFinal: result.isFinal))
if result.isFinal {
subscriber.putCompletion()
}
} else {
print("transcribeAudio: locale: \(locale), error: \(String(describing: error))")
subscriber.putNext(nil)
subscriber.putCompletion()
}
})
disposable.set(ActionDisposable {
task.cancel()
})
@unknown default:
subscriber.putNext(nil)
subscriber.putCompletion()
}
}
}
} else {
subscriber.putNext(nil)
subscriber.putCompletion()
}
return disposable
}
|> runOn(.mainQueue())
}
public struct LocallyTranscribedAudio {
public var text: String
public var isFinal: Bool
}
public func transcribeAudio(path: String, appLocale: String) -> Signal<LocallyTranscribedAudio?, NoError> {
var signals: [Signal<TranscriptionResult?, NoError>] = []
var locales: [String] = []
if !locales.contains(Locale.current.identifier) {
locales.append(Locale.current.identifier)
}
if locales.isEmpty {
locales.append("en-US")
}
for locale in locales {
signals.append(transcribeAudio(path: path, locale: locale))
}
var resultSignal: Signal<[TranscriptionResult?], NoError> = .single([])
for signal in signals {
resultSignal = resultSignal |> mapToSignal { result -> Signal<[TranscriptionResult?], NoError> in
return signal |> map { next in
return result + [next]
}
}
}
return resultSignal
|> map { results -> LocallyTranscribedAudio? in
let sortedResults = results.compactMap({ $0 }).sorted(by: { lhs, rhs in
return lhs.confidence > rhs.confidence
})
return sortedResults.first.flatMap { result -> LocallyTranscribedAudio in
return LocallyTranscribedAudio(text: result.text, isFinal: result.isFinal)
}
}
}