import { serve } from 'https://deno.land/std@0.131.0/http/server.ts' import { createClient } from 'https://esm.sh/@supabase/supabase-js@2' const corsHeaders = { 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type', } interface BatchGeocodingRequest { limit?: number force_refresh?: boolean institute_ids?: string[] } interface GeocodingResult { institute_id: string success: boolean message: string coordinates?: { latitude: number longitude: number boundingbox: string[] geojson?: any osm?: any } error?: string } serve(async (req: Request) => { // Handle CORS preflight requests if (req.method === 'OPTIONS') { return new Response('ok', { headers: corsHeaders }) } try { // Get environment variables const supabaseUrl = Deno.env.get('SUPABASE_URL') const supabaseServiceKey = Deno.env.get('SUPABASE_SERVICE_ROLE_KEY') const searxngUrl = Deno.env.get('SEARXNG_URL') || 'https://search.kevlarai.com' if (!supabaseUrl || !supabaseServiceKey) { throw new Error('Missing required environment variables') } // Create Supabase client const supabase = createClient(supabaseUrl, supabaseServiceKey) // Parse request body const body: BatchGeocodingRequest = await req.json() const limit = body.limit || 10 const forceRefresh = body.force_refresh || false // Get institutes that need geocoding let query = supabase .from('institutes') .select('id, name, address, geo_coordinates') .not('import_id', 'is', null) if (!forceRefresh) { // Only get institutes without coordinates or with empty coordinates query = query.or('geo_coordinates.is.null,geo_coordinates.eq.{}') } if (body.institute_ids && body.institute_ids.length > 0) { query = query.in('id', body.institute_ids) } const { data: institutes, error: fetchError } = await query.limit(limit) if (fetchError) { throw new Error(`Failed to fetch institutes: ${fetchError.message}`) } if (!institutes || institutes.length === 0) { return new Response( JSON.stringify({ success: true, message: 'No institutes found that need geocoding', processed: 0 }), { status: 200, headers: { ...corsHeaders, 'Content-Type': 'application/json' } } ) } console.log(`Processing ${institutes.length} institutes for geocoding`) const results: GeocodingResult[] = [] let successCount = 0 let errorCount = 0 // Process institutes sequentially to avoid overwhelming the SearXNG service let processedCount = 0 for (const institute of institutes) { try { const address = institute.address as any if (!address) { results.push({ institute_id: institute.id, success: false, message: 'No address information available', error: 'Missing address data' }) errorCount++ processedCount++ continue } // Build search query from address components const addressParts = [ address.street, address.town, address.county, address.postcode, address.country ].filter(Boolean) if (addressParts.length === 0) { results.push({ institute_id: institute.id, success: false, message: 'No valid address components found', error: 'Empty address parts' }) errorCount++ processedCount++ continue } const searchQuery = addressParts.join(', ') console.log(`Geocoding institute ${institute.id}: ${searchQuery}`) // Query SearXNG for geocoding with fallback strategy const geocodingResult = await geocodeAddressWithFallback(address, searxngUrl) if (geocodingResult.success && geocodingResult.coordinates) { // Update institute with geospatial coordinates const { error: updateError } = await supabase .from('institutes') .update({ geo_coordinates: { latitude: geocodingResult.coordinates.latitude, longitude: geocodingResult.coordinates.longitude, boundingbox: geocodingResult.coordinates.boundingbox, geojson: geocodingResult.coordinates.geojson, osm: geocodingResult.coordinates.osm, search_query: searchQuery, geocoded_at: new Date().toISOString() } }) .eq('id', institute.id) if (updateError) { throw new Error(`Failed to update institute: ${updateError.message}`) } results.push({ institute_id: institute.id, success: true, message: 'Successfully geocoded', coordinates: geocodingResult.coordinates }) successCount++ // Log the successful geocoding await supabase .from('function_logs') .insert({ file_id: null, step: 'batch_geocoding', message: 'Successfully geocoded institute address in batch', data: { institute_id: institute.id, search_query: searchQuery, coordinates: geocodingResult.coordinates } }) } else { results.push({ institute_id: institute.id, success: false, message: 'Geocoding failed', error: geocodingResult.error || 'Unknown error' }) errorCount++ } processedCount++ // Add a small delay between requests to be respectful to the SearXNG service // Optimize delay based on batch size for better performance if (processedCount < institutes.length) { // Don't delay after the last institute const delay = institutes.length > 200 ? 50 : 100; // Faster processing for large batches await new Promise(resolve => setTimeout(resolve, delay)) } } catch (error) { console.error(`Error processing institute ${institute.id}:`, error) results.push({ institute_id: institute.id, success: false, message: 'Processing error', error: error.message }) errorCount++ } } // Log the batch operation await supabase .from('function_logs') .insert({ file_id: null, step: 'batch_geocoding_complete', message: 'Batch geocoding operation completed', data: { total_processed: institutes.length, successful: successCount, failed: errorCount, results: results } }) return new Response( JSON.stringify({ success: true, message: 'Batch geocoding completed', summary: { total_processed: institutes.length, successful: successCount, failed: errorCount }, results: results }), { status: 200, headers: { ...corsHeaders, 'Content-Type': 'application/json' } } ) } catch (error) { console.error('Error in batch institute geocoder:', error) return new Response( JSON.stringify({ error: 'Internal server error', details: error.message }), { status: 500, headers: { ...corsHeaders, 'Content-Type': 'application/json' } } ) } }) async function geocodeAddress(searchQuery: string, searxngUrl: string): Promise<{ success: boolean coordinates?: { latitude: number longitude: number boundingbox: string[] geojson?: any osm?: any } error?: string }> { try { // Format search query for OSM const osmQuery = `!osm ${searchQuery}` const searchUrl = `${searxngUrl}/search?q=${encodeURIComponent(osmQuery)}&format=json` const response = await fetch(searchUrl, { method: 'GET', headers: { 'Accept': 'application/json', 'User-Agent': 'ClassroomCopilot-BatchGeocoder/1.0' } }) if (!response.ok) { throw new Error(`SearXNG request failed: ${response.status} ${response.statusText}`) } const data = await response.json() // Check if we have results - the number_of_results field might be unreliable // so we check the results array directly if (!data.results || data.results.length === 0) { return { success: false, error: 'No results returned from SearXNG' } } const result = data.results[0] if (!result.latitude || !result.longitude) { return { success: false, error: 'Missing latitude or longitude in SearXNG response' } } return { success: true, coordinates: { latitude: parseFloat(result.latitude), longitude: parseFloat(result.longitude), boundingbox: result.boundingbox || [], geojson: result.geojson, osm: result.osm } } } catch (error) { console.error('Geocoding error:', error) return { success: false, error: error.message } } } async function geocodeAddressWithFallback(address: any, searxngUrl: string): Promise<{ success: boolean coordinates?: { latitude: number longitude: number boundingbox: string[] geojson?: any osm?: any } error?: string }> { // Strategy 1: Try full address (street + town + county + postcode) if (address.street && address.town && address.county && address.postcode) { const fullQuery = `${address.street}, ${address.town}, ${address.county}, ${address.postcode}` console.log(`Trying full address: ${fullQuery}`) const result = await geocodeAddress(fullQuery, searxngUrl) if (result.success && result.coordinates) { console.log('Full address geocoding successful') return result } } // Strategy 2: Try town + county + postcode if (address.town && address.county && address.postcode) { const mediumQuery = `${address.town}, ${address.county}, ${address.postcode}` console.log(`Trying medium address: ${mediumQuery}`) const result = await geocodeAddress(mediumQuery, searxngUrl) if (result.success && result.coordinates) { console.log('Medium address geocoding successful') return result } } // Strategy 3: Try just postcode if (address.postcode) { console.log(`Trying postcode only: ${address.postcode}`) const result = await geocodeAddress(address.postcode, searxngUrl) if (result.success && result.coordinates) { console.log('Postcode geocoding successful') return result } } // Strategy 4: Try town + postcode if (address.town && address.postcode) { const simpleQuery = `${address.town}, ${address.postcode}` console.log(`Trying simple address: ${simpleQuery}`) const result = await geocodeAddress(simpleQuery, searxngUrl) if (result.success && result.coordinates) { console.log('Simple address geocoding successful') return result } } // All strategies failed return { success: false, error: 'No coordinates found with any address combination' } }