Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ruby : Make context accept initial parameters, API to retrieve a segment and more #2749

Open
wants to merge 30 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
c1ebdce
Fix type signature for Whisper.log_set
KitaitiMakoto Dec 30, 2024
5a1745e
Use cache file for model when offline
KitaitiMakoto Jan 2, 2025
af27378
Extract ruby_whisper_transcribe() into a file
KitaitiMakoto Jan 2, 2025
d9b09fa
Extract Whisper::Error
KitaitiMakoto Jan 2, 2025
61c6cf3
Use FileList for ext/*.{c,cpp,h}
KitaitiMakoto Jan 2, 2025
f3c71da
Extract Whisper::Segment
KitaitiMakoto Jan 2, 2025
7d2dc07
Extract Whisper::Model
KitaitiMakoto Jan 2, 2025
e3a8935
Extract Whisper::Params
KitaitiMakoto Jan 2, 2025
ab1353a
Extract Whisper::Context
KitaitiMakoto Jan 2, 2025
e11f598
Extract log_callback function
KitaitiMakoto Jan 2, 2025
88e65e2
Write base code in C rather than C++
KitaitiMakoto Jan 2, 2025
c436804
Use chdir instead of Dir.chdir in Rakefile
KitaitiMakoto Jan 5, 2025
5ab326d
Define alloc func for Whisper::Model
KitaitiMakoto Jan 6, 2025
efa18bb
Define Whisper::Params' calback and user data reader
KitaitiMakoto Jan 7, 2025
2b2dd5e
Add test for Whisper::Params.new with keyword arguments
KitaitiMakoto Jan 7, 2025
744b64e
Make Whisper::Params.new accept keyword arguments
KitaitiMakoto Jan 7, 2025
30f3c9d
Update type signatures
KitaitiMakoto Jan 7, 2025
9061843
Update README
KitaitiMakoto Jan 7, 2025
6eb114f
Update CLEAN targets
KitaitiMakoto Jan 9, 2025
31a33a8
Fix document comment for Whisper::Params#new_segment_callback=
KitaitiMakoto Jan 11, 2025
41eaec7
Use macro to define params
KitaitiMakoto Jan 11, 2025
86604aa
Fix dependency of build task
KitaitiMakoto Jan 18, 2025
3f85151
Set Whisper.finalize_log_callback visibility to private
KitaitiMakoto Jan 18, 2025
0a52548
Make Whisper::Context#full and full_parallel return self
KitaitiMakoto Jan 18, 2025
324b8d4
Add test for Whisper::Context#full_get_segment
KitaitiMakoto Jan 18, 2025
418d519
Add Whisper::Context#full_get_segment
KitaitiMakoto Jan 18, 2025
3e0b52a
Update signatures
KitaitiMakoto Jan 18, 2025
510ef4c
Update README
KitaitiMakoto Jan 18, 2025
9c9bcfc
Fix signature
KitaitiMakoto Jan 18, 2025
7ee9983
Resplace #initialize with .new in signature file [skip ci]
KitaitiMakoto Jan 18, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 26 additions & 24 deletions bindings/ruby/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,15 @@ require "whisper"

whisper = Whisper::Context.new("base")

params = Whisper::Params.new
params.language = "en"
params.offset = 10_000
params.duration = 60_000
params.max_text_tokens = 300
params.translate = true
params.print_timestamps = false
params.initial_prompt = "Initial prompt here."
params = Whisper::Params.new(
language: "en",
offset: 10_000,
duration: 60_000,
max_text_tokens: 300,
translate: true,
print_timestamps: false,
initial_prompt: "Initial prompt here."
)

whisper.transcribe("path/to/audio.wav", params) do |whole_text|
puts whole_text
Expand Down Expand Up @@ -113,18 +114,18 @@ def format_time(time_ms)
"%02d:%02d:%02d.%03d" % [hour, min, sec, decimal_part]
end

whisper.transcribe("path/to/audio.wav", params)

whisper.each_segment.with_index do |segment, index|
line = "[%{nth}: %{st} --> %{ed}] %{text}" % {
nth: index + 1,
st: format_time(segment.start_time),
ed: format_time(segment.end_time),
text: segment.text
}
line << " (speaker turned)" if segment.speaker_next_turn?
puts line
end
whisper
.transcribe("path/to/audio.wav", params)
.each_segment.with_index do |segment, index|
line = "[%{nth}: %{st} --> %{ed}] %{text}" % {
nth: index + 1,
st: format_time(segment.start_time),
ed: format_time(segment.end_time),
text: segment.text
}
line << " (speaker turned)" if segment.speaker_next_turn?
puts line
end

```

Expand Down Expand Up @@ -215,10 +216,11 @@ reader = WaveFile::Reader.new("path/to/audio.wav", WaveFile::Format.new(:mono, :
samples = reader.enum_for(:each_buffer).map(&:samples).flatten

whisper = Whisper::Context.new("base")
whisper.full(Whisper::Params.new, samples)
whisper.each_segment do |segment|
puts segment.text
end
whisper
.full(Whisper::Params.new, samples)
.each_segment do |segment|
puts segment.text
end
```

The second argument `samples` may be an array, an object with `length` and `each` method, or a MemoryView. If you can prepare audio data as C array and export it as a MemoryView, whispercpp accepts and works with it with zero copy.
Expand Down
14 changes: 8 additions & 6 deletions bindings/ruby/Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ EXTSOURCES.each do |src|
end

CLEAN.include SOURCES
CLEAN.include FileList["ext/*.o", "ext/*.metal", "ext/whisper.{so,bundle,dll}"]
CLEAN.include FileList["ext/**/*.o", "ext/**/*.metal", "ext/**/*.tmp", "ext/whisper.{so,bundle,dll}"]

task build: ["ext/Makefile", "ext/ruby_whisper.h", "ext/ruby_whisper.cpp", "whispercpp.gemspec"]
SRC = FileList["ext/*.{c,cpp,h}"]

task build: SOURCES

directory "pkg"
CLOBBER.include "pkg"
Expand All @@ -29,14 +31,14 @@ LIB_NAME = "whisper".ext(RbConfig::CONFIG["DLEXT"])
SO_FILE = File.join("ext", LIB_NAME)
LIB_FILE = File.join("lib", LIB_NAME)

file "ext/Makefile" => ["ext/extconf.rb", "ext/ruby_whisper.h", "ext/ruby_whisper.cpp"] + SOURCES do |t|
Dir.chdir "ext" do
file "ext/Makefile" => SRC + ["ext/extconf.rb"] + SOURCES do |t|
chdir "ext" do
ruby "extconf.rb"
end
end

file SO_FILE => "ext/Makefile" do |t|
Dir.chdir "ext" do
chdir "ext" do
sh "make"
end
end
Expand All @@ -54,7 +56,7 @@ end

TEST_MEMORY_VIEW = "tests/jfk_reader/jfk_reader.#{RbConfig::CONFIG['DLEXT']}"
file TEST_MEMORY_VIEW => "tests/jfk_reader/jfk_reader.c" do |t|
Dir.chdir "tests/jfk_reader" do
chdir "tests/jfk_reader" do
ruby "extconf.rb"
sh "make"
end
Expand Down
12 changes: 5 additions & 7 deletions bindings/ruby/ext/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@ whisper.bundle
whisper.dll
scripts/get-flags.mk
*.o
*.c
*.cpp
*.h
*.m
*.metal
!ruby_whisper.cpp
!ruby_whisper.h
/*/**/*.c
/*/**/*.cpp
/*/**/*.h
/*/**/*.m
/*/**/*.metal
9 changes: 8 additions & 1 deletion bindings/ruby/ext/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,14 @@
'src/whisper.o'

$objs = $OBJ_GGML + $OBJ_WHISPER + $OBJ_COMMON + $OBJ_SDL
$objs << "ruby_whisper.o"
$objs <<
"ruby_whisper.o" <<
"ruby_whisper_context.o" <<
"ruby_whisper_transcribe.o" <<
"ruby_whisper_params.o" <<
"ruby_whisper_error.o" <<
"ruby_whisper_segment.o" <<
"ruby_whisper_model.o"

$CPPFLAGS = "#{$MK_CPPFLAGS} #{$CPPFLAGS}"
$CFLAGS = "#{$CPPFLAGS} #{$MK_CFLAGS} #{$GF_CFLAGS} #{$CFLAGS}"
Expand Down
164 changes: 164 additions & 0 deletions bindings/ruby/ext/ruby_whisper.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
#include <ruby.h>
#include <ruby/memory_view.h>
#include "ruby_whisper.h"

VALUE mWhisper;
VALUE cContext;
VALUE cParams;
VALUE eError;

VALUE cSegment;
VALUE cModel;

ID id_to_s;
ID id_call;
ID id___method__;
ID id_to_enum;
ID id_length;
ID id_next;
ID id_new;
ID id_to_path;
ID id_URI;
ID id_pre_converted_models;

static bool is_log_callback_finalized = false;

// High level API
extern VALUE ruby_whisper_segment_allocate(VALUE klass);

extern void init_ruby_whisper_context(VALUE *mWhisper);
extern void init_ruby_whisper_params(VALUE *mWhisper);
extern void init_ruby_whisper_error(VALUE *mWhisper);
extern void init_ruby_whisper_segment(VALUE *mWhisper, VALUE *cSegment);
extern void init_ruby_whisper_model(VALUE *mWhisper);
extern void register_callbacks(ruby_whisper_params *rwp, VALUE *context);

/*
* call-seq:
* lang_max_id -> Integer
*/
static VALUE ruby_whisper_s_lang_max_id(VALUE self) {
return INT2NUM(whisper_lang_max_id());
}

/*
* call-seq:
* lang_id(lang_name) -> Integer
*/
static VALUE ruby_whisper_s_lang_id(VALUE self, VALUE lang) {
const char * lang_str = StringValueCStr(lang);
const int id = whisper_lang_id(lang_str);
if (-1 == id) {
rb_raise(rb_eArgError, "language not found: %s", lang_str);
}
return INT2NUM(id);
}

/*
* call-seq:
* lang_str(lang_id) -> String
*/
static VALUE ruby_whisper_s_lang_str(VALUE self, VALUE id) {
const int lang_id = NUM2INT(id);
const char * str = whisper_lang_str(lang_id);
if (NULL == str) {
rb_raise(rb_eIndexError, "id %d outside of language id", lang_id);
}
return rb_str_new2(str);
}

/*
* call-seq:
* lang_str(lang_id) -> String
*/
static VALUE ruby_whisper_s_lang_str_full(VALUE self, VALUE id) {
const int lang_id = NUM2INT(id);
const char * str_full = whisper_lang_str_full(lang_id);
if (NULL == str_full) {
rb_raise(rb_eIndexError, "id %d outside of language id", lang_id);
}
return rb_str_new2(str_full);
}

static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) {
is_log_callback_finalized = true;
return Qnil;
}

static void
ruby_whisper_log_callback(enum ggml_log_level level, const char * buffer, void * user_data) {
if (is_log_callback_finalized) {
return;
}
VALUE log_callback = rb_iv_get(mWhisper, "log_callback");
VALUE udata = rb_iv_get(mWhisper, "user_data");
rb_funcall(log_callback, id_call, 3, INT2NUM(level), rb_str_new2(buffer), udata);
}

/*
* call-seq:
* log_set ->(level, buffer, user_data) { ... }, user_data -> nil
*/
static VALUE ruby_whisper_s_log_set(VALUE self, VALUE log_callback, VALUE user_data) {
VALUE old_callback = rb_iv_get(self, "log_callback");
if (!NIL_P(old_callback)) {
rb_undefine_finalizer(old_callback);
}

rb_iv_set(self, "log_callback", log_callback);
rb_iv_set(self, "user_data", user_data);

VALUE finalize_log_callback = rb_funcall(mWhisper, rb_intern("method"), 1, rb_str_new2("finalize_log_callback"));
rb_define_finalizer(log_callback, finalize_log_callback);

whisper_log_set(ruby_whisper_log_callback, NULL);

return Qnil;
}

static void rb_whisper_model_mark(ruby_whisper_model *rwm) {
rb_gc_mark(rwm->context);
}

static VALUE ruby_whisper_model_allocate(VALUE klass) {
ruby_whisper_model *rwm;
rwm = ALLOC(ruby_whisper_model);
return Data_Wrap_Struct(klass, rb_whisper_model_mark, RUBY_DEFAULT_FREE, rwm);
}

void Init_whisper() {
id_to_s = rb_intern("to_s");
id_call = rb_intern("call");
id___method__ = rb_intern("__method__");
id_to_enum = rb_intern("to_enum");
id_length = rb_intern("length");
id_next = rb_intern("next");
id_new = rb_intern("new");
id_to_path = rb_intern("to_path");
id_URI = rb_intern("URI");
id_pre_converted_models = rb_intern("pre_converted_models");

mWhisper = rb_define_module("Whisper");

rb_define_const(mWhisper, "LOG_LEVEL_NONE", INT2NUM(GGML_LOG_LEVEL_NONE));
rb_define_const(mWhisper, "LOG_LEVEL_INFO", INT2NUM(GGML_LOG_LEVEL_INFO));
rb_define_const(mWhisper, "LOG_LEVEL_WARN", INT2NUM(GGML_LOG_LEVEL_WARN));
rb_define_const(mWhisper, "LOG_LEVEL_ERROR", INT2NUM(GGML_LOG_LEVEL_ERROR));
rb_define_const(mWhisper, "LOG_LEVEL_DEBUG", INT2NUM(GGML_LOG_LEVEL_DEBUG));
rb_define_const(mWhisper, "LOG_LEVEL_CONT", INT2NUM(GGML_LOG_LEVEL_CONT));

rb_define_singleton_method(mWhisper, "lang_max_id", ruby_whisper_s_lang_max_id, 0);
rb_define_singleton_method(mWhisper, "lang_id", ruby_whisper_s_lang_id, 1);
rb_define_singleton_method(mWhisper, "lang_str", ruby_whisper_s_lang_str, 1);
rb_define_singleton_method(mWhisper, "lang_str_full", ruby_whisper_s_lang_str_full, 1);
rb_define_singleton_method(mWhisper, "log_set", ruby_whisper_s_log_set, 2);
rb_define_private_method(rb_singleton_class(mWhisper), "finalize_log_callback", ruby_whisper_s_finalize_log_callback, 1);

init_ruby_whisper_context(&mWhisper);
init_ruby_whisper_params(&mWhisper);
init_ruby_whisper_error(&mWhisper);
init_ruby_whisper_segment(&mWhisper, &cContext);
init_ruby_whisper_model(&mWhisper);

rb_require("whisper/model/uri");
}
Loading