80 lines
3.7 KiB
PL/PgSQL
80 lines
3.7 KiB
PL/PgSQL
-- Vectors: file_vectors table and similarity search function
|
|
|
|
-- 1) Ensure pgvector extension is available
|
|
create extension if not exists vector;
|
|
|
|
-- 2) File vectors table
|
|
create table if not exists public.file_vectors (
|
|
id bigint generated by default as identity primary key,
|
|
created_at timestamp with time zone default now() not null,
|
|
embedding public.vector,
|
|
metadata jsonb,
|
|
content text
|
|
);
|
|
|
|
-- 3) ANN index (skipped until embedding dimension is fixed)
|
|
-- To enable: set column type to public.vector(<dim>) and uncomment:
|
|
-- create index if not exists file_vectors_embedding_idx
|
|
-- on public.file_vectors using ivfflat (embedding public.vector_cosine_ops)
|
|
-- with (lists='100');
|
|
|
|
-- 3b) Enable RLS and set policies (moved here to avoid ordering issues)
|
|
alter table if exists public.file_vectors enable row level security;
|
|
|
|
drop policy if exists "vectors_read_by_owner" on public.file_vectors;
|
|
create policy "vectors_read_by_owner" on public.file_vectors for select to authenticated
|
|
using (coalesce((metadata->>'file_id')::uuid, null) is null or exists (
|
|
select 1 from public.files f join public.file_cabinets c on f.cabinet_id = c.id
|
|
where f.id = (metadata->>'file_id')::uuid and c.user_id = auth.uid()
|
|
));
|
|
|
|
drop policy if exists "vectors_rw_service" on public.file_vectors;
|
|
create policy "vectors_rw_service" on public.file_vectors to service_role
|
|
using (true) with check (true);
|
|
|
|
-- 4) Match function mirrored from neoFS (generic metadata mapping)
|
|
create or replace function public.match_file_vectors(
|
|
filter jsonb,
|
|
match_count integer,
|
|
query_embedding public.vector
|
|
)
|
|
returns table (
|
|
id bigint,
|
|
file_id uuid,
|
|
cabinet_id uuid,
|
|
artefact_type text,
|
|
artefact_is text,
|
|
original_path_prefix text,
|
|
original_filename text,
|
|
content text,
|
|
metadata jsonb,
|
|
similarity double precision
|
|
)
|
|
language sql stable as $$
|
|
select
|
|
fv.id,
|
|
nullif(fv.metadata->>'file_id','')::uuid as file_id,
|
|
nullif(fv.metadata->>'cabinet_id','')::uuid as cabinet_id,
|
|
nullif(fv.metadata->>'artefact_type','') as artefact_type,
|
|
nullif(fv.metadata->>'artefact_is','') as artefact_is,
|
|
nullif(fv.metadata->>'original_path_prefix','') as original_path_prefix,
|
|
nullif(fv.metadata->>'original_filename','') as original_filename,
|
|
fv.content,
|
|
fv.metadata,
|
|
1 - (fv.embedding <=> query_embedding) as similarity
|
|
from public.file_vectors fv
|
|
where
|
|
(coalesce(filter ? 'file_id', false) = false or (fv.metadata->>'file_id')::uuid = (filter->>'file_id')::uuid)
|
|
and (coalesce(filter ? 'cabinet_id', false) = false or (fv.metadata->>'cabinet_id')::uuid = (filter->>'cabinet_id')::uuid)
|
|
and (coalesce(filter ? 'artefact_type', false) = false or (fv.metadata->>'artefact_type') = (filter->>'artefact_type'))
|
|
and (coalesce(filter ? 'artefact_id', false) = false or (fv.metadata->>'artefact_id') = (filter->>'artefact_id'))
|
|
and (coalesce(filter ? 'original_path_prefix', false) = false or (fv.metadata->>'original_path_prefix') like (filter->>'original_path_prefix') || '%')
|
|
and (coalesce(filter ? 'original_path_prefix_ilike', false)= false or (fv.metadata->>'original_path_prefix') ilike (filter->>'original_path_prefix_ilike') || '%')
|
|
and (coalesce(filter ? 'original_filename', false) = false or (fv.metadata->>'original_filename') = (filter->>'original_filename'))
|
|
and (coalesce(filter ? 'original_filename_ilike', false)= false or (fv.metadata->>'original_filename') ilike (filter->>'original_filename_ilike'))
|
|
order by fv.embedding <=> query_embedding
|
|
limit greatest(coalesce(match_count, 10), 1)
|
|
$$;
|
|
|
|
|