module Memo::Storage

Overview

Low-level storage operations for embeddings and chunks

Extended Modules

Defined in:

memo/storage.cr

Instance Method Summary

Instance Method Detail

def compute_hash(text : String) : Bytes #

Compute SHA256 hash for text content


[View source]
def create_chunk(db : DB::Database, hash : Bytes, source_type : String, source_id : Int64, offset : Int32 | Nil, size : Int32, pair_id : Int64 | Nil = nil, parent_id : Int64 | Nil = nil) : Int64 #

Create chunk reference (or ignore if already exists)

Returns chunk id if inserted, or 0 if chunk already existed (was ignored)


[View source]
def deserialize_embedding(blob : Bytes) : Array(Float64) #

Deserialize embedding from binary blob


[View source]
def get_rowid(db : DB::Database, hash : Bytes, service_id : Int64) : Int64 | Nil #

Get the rowid of an embedding by hash and service_id.


[View source]
def get_service_by_format_model(db : DB::Database, format : String, model : String) : Tuple(Int64, String, String | Nil, String, Int32, Int32, Float64) | Nil #

Returns service record by format and model, or nil if not found


[View source]
def get_service_by_name(db : DB::Database, name : String) : Tuple(Int64, String, String | Nil, String, Int32, Int32, Float64) | Nil #

Get service by name


[View source]
def increment_match_count(db : DB::Database, chunk_ids : Array(Int64)) #

Increment match_count for chunks


[View source]
def increment_read_count(db : DB::Database, chunk_ids : Array(Int64)) #

Increment read_count for chunks


[View source]
def register_service(db : DB::Database, name : String | Nil, format : String, base_url : String | Nil, model : String, dimensions : Int32, max_tokens : Int32) : Int64 #

Register or get existing service by name


[View source]
def serialize_embedding(embedding : Array(Float64)) : Bytes #

Serialize embedding to binary blob (Int16 for 50% storage reduction)


[View source]
def store_embedding(db : DB::Database, hash : Bytes, token_count : Int32, service_id : Int64) : Tuple(Bool, Int64) #

Register embedding hash in database (deduplicated by hash + service_id)

Returns {inserted, rowid} where inserted is true if new, rowid is the USearch key.


[View source]
def update_tokens_per_byte(db : DB::Database, service_id : Int64, observed_ratio : Float64) #

Update tokens_per_byte ratio using exponential moving average


[View source]