-- Vectors: file_vectors table and similarity search function -- 1) Ensure pgvector extension is available create extension if not exists vector; -- 2) File vectors table create table if not exists public.file_vectors ( id bigint generated by default as identity primary key, created_at timestamp with time zone default now() not null, embedding public.vector, metadata jsonb, content text ); -- 3) ANN index (skipped until embedding dimension is fixed) -- To enable: set column type to public.vector() and uncomment: -- create index if not exists file_vectors_embedding_idx -- on public.file_vectors using ivfflat (embedding public.vector_cosine_ops) -- with (lists='100'); -- 3b) Enable RLS and set policies (moved here to avoid ordering issues) alter table if exists public.file_vectors enable row level security; drop policy if exists "vectors_read_by_owner" on public.file_vectors; create policy "vectors_read_by_owner" on public.file_vectors for select to authenticated using (coalesce((metadata->>'file_id')::uuid, null) is null or exists ( select 1 from public.files f join public.file_cabinets c on f.cabinet_id = c.id where f.id = (metadata->>'file_id')::uuid and c.user_id = auth.uid() )); drop policy if exists "vectors_rw_service" on public.file_vectors; create policy "vectors_rw_service" on public.file_vectors to service_role using (true) with check (true); -- 4) Match function mirrored from neoFS (generic metadata mapping) create or replace function public.match_file_vectors( filter jsonb, match_count integer, query_embedding public.vector ) returns table ( id bigint, file_id uuid, cabinet_id uuid, artefact_type text, artefact_is text, original_path_prefix text, original_filename text, content text, metadata jsonb, similarity double precision ) language sql stable as $$ select fv.id, nullif(fv.metadata->>'file_id','')::uuid as file_id, nullif(fv.metadata->>'cabinet_id','')::uuid as cabinet_id, nullif(fv.metadata->>'artefact_type','') as artefact_type, nullif(fv.metadata->>'artefact_is','') as artefact_is, nullif(fv.metadata->>'original_path_prefix','') as original_path_prefix, nullif(fv.metadata->>'original_filename','') as original_filename, fv.content, fv.metadata, 1 - (fv.embedding <=> query_embedding) as similarity from public.file_vectors fv where (coalesce(filter ? 'file_id', false) = false or (fv.metadata->>'file_id')::uuid = (filter->>'file_id')::uuid) and (coalesce(filter ? 'cabinet_id', false) = false or (fv.metadata->>'cabinet_id')::uuid = (filter->>'cabinet_id')::uuid) and (coalesce(filter ? 'artefact_type', false) = false or (fv.metadata->>'artefact_type') = (filter->>'artefact_type')) and (coalesce(filter ? 'artefact_id', false) = false or (fv.metadata->>'artefact_id') = (filter->>'artefact_id')) and (coalesce(filter ? 'original_path_prefix', false) = false or (fv.metadata->>'original_path_prefix') like (filter->>'original_path_prefix') || '%') and (coalesce(filter ? 'original_path_prefix_ilike', false)= false or (fv.metadata->>'original_path_prefix') ilike (filter->>'original_path_prefix_ilike') || '%') and (coalesce(filter ? 'original_filename', false) = false or (fv.metadata->>'original_filename') = (filter->>'original_filename')) and (coalesce(filter ? 'original_filename_ilike', false)= false or (fv.metadata->>'original_filename') ilike (filter->>'original_filename_ilike')) order by fv.embedding <=> query_embedding limit greatest(coalesce(match_count, 10), 1) $$;