supabase/db/migrations/core/67-vectors.sql
2025-11-14 14:46:49 +00:00

80 lines
3.7 KiB
PL/PgSQL

-- Vectors: file_vectors table and similarity search function
-- 1) Ensure pgvector extension is available
create extension if not exists vector;
-- 2) File vectors table
create table if not exists public.file_vectors (
id bigint generated by default as identity primary key,
created_at timestamp with time zone default now() not null,
embedding public.vector,
metadata jsonb,
content text
);
-- 3) ANN index (skipped until embedding dimension is fixed)
-- To enable: set column type to public.vector(<dim>) and uncomment:
-- create index if not exists file_vectors_embedding_idx
-- on public.file_vectors using ivfflat (embedding public.vector_cosine_ops)
-- with (lists='100');
-- 3b) Enable RLS and set policies (moved here to avoid ordering issues)
alter table if exists public.file_vectors enable row level security;
drop policy if exists "vectors_read_by_owner" on public.file_vectors;
create policy "vectors_read_by_owner" on public.file_vectors for select to authenticated
using (coalesce((metadata->>'file_id')::uuid, null) is null or exists (
select 1 from public.files f join public.file_cabinets c on f.cabinet_id = c.id
where f.id = (metadata->>'file_id')::uuid and c.user_id = auth.uid()
));
drop policy if exists "vectors_rw_service" on public.file_vectors;
create policy "vectors_rw_service" on public.file_vectors to service_role
using (true) with check (true);
-- 4) Match function mirrored from neoFS (generic metadata mapping)
create or replace function public.match_file_vectors(
filter jsonb,
match_count integer,
query_embedding public.vector
)
returns table (
id bigint,
file_id uuid,
cabinet_id uuid,
artefact_type text,
artefact_is text,
original_path_prefix text,
original_filename text,
content text,
metadata jsonb,
similarity double precision
)
language sql stable as $$
select
fv.id,
nullif(fv.metadata->>'file_id','')::uuid as file_id,
nullif(fv.metadata->>'cabinet_id','')::uuid as cabinet_id,
nullif(fv.metadata->>'artefact_type','') as artefact_type,
nullif(fv.metadata->>'artefact_is','') as artefact_is,
nullif(fv.metadata->>'original_path_prefix','') as original_path_prefix,
nullif(fv.metadata->>'original_filename','') as original_filename,
fv.content,
fv.metadata,
1 - (fv.embedding <=> query_embedding) as similarity
from public.file_vectors fv
where
(coalesce(filter ? 'file_id', false) = false or (fv.metadata->>'file_id')::uuid = (filter->>'file_id')::uuid)
and (coalesce(filter ? 'cabinet_id', false) = false or (fv.metadata->>'cabinet_id')::uuid = (filter->>'cabinet_id')::uuid)
and (coalesce(filter ? 'artefact_type', false) = false or (fv.metadata->>'artefact_type') = (filter->>'artefact_type'))
and (coalesce(filter ? 'artefact_id', false) = false or (fv.metadata->>'artefact_id') = (filter->>'artefact_id'))
and (coalesce(filter ? 'original_path_prefix', false) = false or (fv.metadata->>'original_path_prefix') like (filter->>'original_path_prefix') || '%')
and (coalesce(filter ? 'original_path_prefix_ilike', false)= false or (fv.metadata->>'original_path_prefix') ilike (filter->>'original_path_prefix_ilike') || '%')
and (coalesce(filter ? 'original_filename', false) = false or (fv.metadata->>'original_filename') = (filter->>'original_filename'))
and (coalesce(filter ? 'original_filename_ilike', false)= false or (fv.metadata->>'original_filename') ilike (filter->>'original_filename_ilike'))
order by fv.embedding <=> query_embedding
limit greatest(coalesce(match_count, 10), 1)
$$;