2025-11-14 14:46:49 +00:00

392 lines
11 KiB
TypeScript

import { serve } from 'https://deno.land/std@0.131.0/http/server.ts'
import { createClient } from 'https://esm.sh/@supabase/supabase-js@2'
const corsHeaders = {
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type',
}
interface BatchGeocodingRequest {
limit?: number
force_refresh?: boolean
institute_ids?: string[]
}
interface GeocodingResult {
institute_id: string
success: boolean
message: string
coordinates?: {
latitude: number
longitude: number
boundingbox: string[]
geojson?: any
osm?: any
}
error?: string
}
serve(async (req: Request) => {
// Handle CORS preflight requests
if (req.method === 'OPTIONS') {
return new Response('ok', { headers: corsHeaders })
}
try {
// Get environment variables
const supabaseUrl = Deno.env.get('SUPABASE_URL')
const supabaseServiceKey = Deno.env.get('SUPABASE_SERVICE_ROLE_KEY')
const searxngUrl = Deno.env.get('SEARXNG_URL') || 'https://search.kevlarai.com'
if (!supabaseUrl || !supabaseServiceKey) {
throw new Error('Missing required environment variables')
}
// Create Supabase client
const supabase = createClient(supabaseUrl, supabaseServiceKey)
// Parse request body
const body: BatchGeocodingRequest = await req.json()
const limit = body.limit || 10
const forceRefresh = body.force_refresh || false
// Get institutes that need geocoding
let query = supabase
.from('institutes')
.select('id, name, address, geo_coordinates')
.not('import_id', 'is', null)
if (!forceRefresh) {
// Only get institutes without coordinates or with empty coordinates
query = query.or('geo_coordinates.is.null,geo_coordinates.eq.{}')
}
if (body.institute_ids && body.institute_ids.length > 0) {
query = query.in('id', body.institute_ids)
}
const { data: institutes, error: fetchError } = await query.limit(limit)
if (fetchError) {
throw new Error(`Failed to fetch institutes: ${fetchError.message}`)
}
if (!institutes || institutes.length === 0) {
return new Response(
JSON.stringify({
success: true,
message: 'No institutes found that need geocoding',
processed: 0
}),
{
status: 200,
headers: { ...corsHeaders, 'Content-Type': 'application/json' }
}
)
}
console.log(`Processing ${institutes.length} institutes for geocoding`)
const results: GeocodingResult[] = []
let successCount = 0
let errorCount = 0
// Process institutes sequentially to avoid overwhelming the SearXNG service
let processedCount = 0
for (const institute of institutes) {
try {
const address = institute.address as any
if (!address) {
results.push({
institute_id: institute.id,
success: false,
message: 'No address information available',
error: 'Missing address data'
})
errorCount++
processedCount++
continue
}
// Build search query from address components
const addressParts = [
address.street,
address.town,
address.county,
address.postcode,
address.country
].filter(Boolean)
if (addressParts.length === 0) {
results.push({
institute_id: institute.id,
success: false,
message: 'No valid address components found',
error: 'Empty address parts'
})
errorCount++
processedCount++
continue
}
const searchQuery = addressParts.join(', ')
console.log(`Geocoding institute ${institute.id}: ${searchQuery}`)
// Query SearXNG for geocoding with fallback strategy
const geocodingResult = await geocodeAddressWithFallback(address, searxngUrl)
if (geocodingResult.success && geocodingResult.coordinates) {
// Update institute with geospatial coordinates
const { error: updateError } = await supabase
.from('institutes')
.update({
geo_coordinates: {
latitude: geocodingResult.coordinates.latitude,
longitude: geocodingResult.coordinates.longitude,
boundingbox: geocodingResult.coordinates.boundingbox,
geojson: geocodingResult.coordinates.geojson,
osm: geocodingResult.coordinates.osm,
search_query: searchQuery,
geocoded_at: new Date().toISOString()
}
})
.eq('id', institute.id)
if (updateError) {
throw new Error(`Failed to update institute: ${updateError.message}`)
}
results.push({
institute_id: institute.id,
success: true,
message: 'Successfully geocoded',
coordinates: geocodingResult.coordinates
})
successCount++
// Log the successful geocoding
await supabase
.from('function_logs')
.insert({
file_id: null,
step: 'batch_geocoding',
message: 'Successfully geocoded institute address in batch',
data: {
institute_id: institute.id,
search_query: searchQuery,
coordinates: geocodingResult.coordinates
}
})
} else {
results.push({
institute_id: institute.id,
success: false,
message: 'Geocoding failed',
error: geocodingResult.error || 'Unknown error'
})
errorCount++
}
processedCount++
// Add a small delay between requests to be respectful to the SearXNG service
// Optimize delay based on batch size for better performance
if (processedCount < institutes.length) { // Don't delay after the last institute
const delay = institutes.length > 200 ? 50 : 100; // Faster processing for large batches
await new Promise(resolve => setTimeout(resolve, delay))
}
} catch (error) {
console.error(`Error processing institute ${institute.id}:`, error)
results.push({
institute_id: institute.id,
success: false,
message: 'Processing error',
error: error.message
})
errorCount++
}
}
// Log the batch operation
await supabase
.from('function_logs')
.insert({
file_id: null,
step: 'batch_geocoding_complete',
message: 'Batch geocoding operation completed',
data: {
total_processed: institutes.length,
successful: successCount,
failed: errorCount,
results: results
}
})
return new Response(
JSON.stringify({
success: true,
message: 'Batch geocoding completed',
summary: {
total_processed: institutes.length,
successful: successCount,
failed: errorCount
},
results: results
}),
{
status: 200,
headers: { ...corsHeaders, 'Content-Type': 'application/json' }
}
)
} catch (error) {
console.error('Error in batch institute geocoder:', error)
return new Response(
JSON.stringify({
error: 'Internal server error',
details: error.message
}),
{
status: 500,
headers: { ...corsHeaders, 'Content-Type': 'application/json' }
}
)
}
})
async function geocodeAddress(searchQuery: string, searxngUrl: string): Promise<{
success: boolean
coordinates?: {
latitude: number
longitude: number
boundingbox: string[]
geojson?: any
osm?: any
}
error?: string
}> {
try {
// Format search query for OSM
const osmQuery = `!osm ${searchQuery}`
const searchUrl = `${searxngUrl}/search?q=${encodeURIComponent(osmQuery)}&format=json`
const response = await fetch(searchUrl, {
method: 'GET',
headers: {
'Accept': 'application/json',
'User-Agent': 'ClassroomCopilot-BatchGeocoder/1.0'
}
})
if (!response.ok) {
throw new Error(`SearXNG request failed: ${response.status} ${response.statusText}`)
}
const data = await response.json()
// Check if we have results - the number_of_results field might be unreliable
// so we check the results array directly
if (!data.results || data.results.length === 0) {
return {
success: false,
error: 'No results returned from SearXNG'
}
}
const result = data.results[0]
if (!result.latitude || !result.longitude) {
return {
success: false,
error: 'Missing latitude or longitude in SearXNG response'
}
}
return {
success: true,
coordinates: {
latitude: parseFloat(result.latitude),
longitude: parseFloat(result.longitude),
boundingbox: result.boundingbox || [],
geojson: result.geojson,
osm: result.osm
}
}
} catch (error) {
console.error('Geocoding error:', error)
return {
success: false,
error: error.message
}
}
}
async function geocodeAddressWithFallback(address: any, searxngUrl: string): Promise<{
success: boolean
coordinates?: {
latitude: number
longitude: number
boundingbox: string[]
geojson?: any
osm?: any
}
error?: string
}> {
// Strategy 1: Try full address (street + town + county + postcode)
if (address.street && address.town && address.county && address.postcode) {
const fullQuery = `${address.street}, ${address.town}, ${address.county}, ${address.postcode}`
console.log(`Trying full address: ${fullQuery}`)
const result = await geocodeAddress(fullQuery, searxngUrl)
if (result.success && result.coordinates) {
console.log('Full address geocoding successful')
return result
}
}
// Strategy 2: Try town + county + postcode
if (address.town && address.county && address.postcode) {
const mediumQuery = `${address.town}, ${address.county}, ${address.postcode}`
console.log(`Trying medium address: ${mediumQuery}`)
const result = await geocodeAddress(mediumQuery, searxngUrl)
if (result.success && result.coordinates) {
console.log('Medium address geocoding successful')
return result
}
}
// Strategy 3: Try just postcode
if (address.postcode) {
console.log(`Trying postcode only: ${address.postcode}`)
const result = await geocodeAddress(address.postcode, searxngUrl)
if (result.success && result.coordinates) {
console.log('Postcode geocoding successful')
return result
}
}
// Strategy 4: Try town + postcode
if (address.town && address.postcode) {
const simpleQuery = `${address.town}, ${address.postcode}`
console.log(`Trying simple address: ${simpleQuery}`)
const result = await geocodeAddress(simpleQuery, searxngUrl)
if (result.success && result.coordinates) {
console.log('Simple address geocoding successful')
return result
}
}
// All strategies failed
return {
success: false,
error: 'No coordinates found with any address combination'
}
}