module Similarity

Defined in:

similarity.cr

Class Method Summary

Class Method Detail

def self.calculate_signature(text : String) : Array(Int32) #

Calculate MinHash signature for a given text

The signature is an array of minimum hash values across all n-rams in the document, one for each hash function


[View source]
def self.clear_cache : Nil #

Clear the signatures cache (useful for testing or rebuilds)


[View source]
def self.create_signature(post : Markdown::File, lang : String) : Signature #

Create MinHash signature for a post


[View source]
def self.create_tasks(posts : Array(Markdown::File)) : Nil #

Create tasks to calculate and store signatures for all posts


[View source]
def self.enable(is_enabled : Bool, posts : Array(Markdown::File)) #

Enable similarity feature (actual work is done in Posts.create_tasks)


[View source]
def self.find_related(post : Markdown::File, lang : String, limit : Int32 = 5) : Array(RelatedPost) #

Find related posts for a given post

Returns an array of RelatedPost objects sorted by similarity score in descending order Filters out duplicates (same post in different languages), preferring the target language Results are cached for performance


[View source]
def self.get_all_signatures(lang : String) : Array(Signature) #

Get all signatures from the kv store with caching


[View source]
def self.get_signature(post_link : String, lang : String) : Signature | Nil #

Retrieve a post's signature from the kv store


[View source]
def self.jaccard_similarity(sig1 : Signature, sig2 : Signature) : Float64 #

Calculate Jaccard similarity between two MinHash signatures

Returns a value between 0.0 (no similarity) and 1.0 (identical)


[View source]
def self.ngram_size : Int32 #

[View source]
def self.ngram_size=(ngram_size : Int32) #

[View source]
def self.num_permutations : Int32 #

Configuration for MinHash generation


[View source]
def self.num_permutations=(num_permutations : Int32) #

Configuration for MinHash generation


[View source]
def self.store_index(post_links : Array(String), lang : String) : Nil #

Store the index of all post links


[View source]
def self.store_signature(post : Markdown::File, lang : String) : Nil #

Store a post's signature in the kv store


[View source]