module Memo::Storage

Overview

Low-level storage operations for embeddings and chunks

Extended Modules

Defined in:

memo/storage.cr

Instance Method Summary

Instance Method Detail

def compute_hash(text : String) : Bytes #

Compute SHA256 hash for text content


[View source]
def create_chunk(db : DB::Database, hash : Bytes, source_type : String, source_id : Int64, offset : Int32 | Nil, size : Int32, pair_id : Int64 | Nil = nil, parent_id : Int64 | Nil = nil) : Int64 #

Create chunk reference (or ignore if already exists)

Links a hash to a source with optional relationships. Uses INSERT OR IGNORE to safely handle re-indexing with different services.

All IDs (source_id, pair_id, parent_id) are internal IDs (FK to sources table). source_type is denormalized for fast filtering.

Returns chunk id if inserted, or 0 if chunk already existed (was ignored)


[View source]
def deserialize_embedding(blob : Bytes) : Array(Float64) #

Deserialize embedding from binary blob


[View source]
def get_embedding(db : DB::Database, hash : Bytes, service_id : Int64) : Array(Float64) | Nil #

Get embedding by hash and service_id

Returns nil if not found


[View source]
def get_service_by_format_model(db : DB::Database, format : String, model : String) : Tuple(Int64, String, String | Nil, String, Int32, Int32, Float64) | Nil #

Returns service record by format and model, or nil if not found


[View source]
def get_service_by_name(db : DB::Database, name : String) : Tuple(Int64, String, String | Nil, String, Int32, Int32, Float64) | Nil #

Get service by name

Returns service record or nil if not found


[View source]
def increment_match_count(db : DB::Database, chunk_ids : Array(Int64)) #

Increment match_count for chunks


[View source]
def increment_read_count(db : DB::Database, chunk_ids : Array(Int64)) #

Increment read_count for chunks


[View source]
def register_service(db : DB::Database, name : String | Nil, format : String, base_url : String | Nil, model : String, dimensions : Int32, max_tokens : Int32) : Int64 #

Register or get existing service by name

Returns service_id for the named service configuration. If name is nil, auto-generates from "format/model".


[View source]
def serialize_embedding(embedding : Array(Float64)) : Bytes #

Serialize embedding to binary blob (Int16 for 50% storage reduction)

Maps normalized float range [-1, 1] to Int16 range [-32768, 32767]. Precision loss is ~0.003% for normalized vectors.


[View source]
def store_embedding(db : DB::Database, hash : Bytes, embedding : Array(Float64), token_count : Int32, service_id : Int64) : Bool #

Store embedding in database (deduplicated by hash + service_id)

Returns true if inserted, false if already exists for this service


[View source]
def update_tokens_per_byte(db : DB::Database, service_id : Int64, observed_ratio : Float64) #

Update tokens_per_byte ratio for a service using exponential moving average

Blends new observation with existing ratio: new = old * 0.9 + observed * 0.1


[View source]