392 lines
11 KiB
TypeScript
392 lines
11 KiB
TypeScript
import { serve } from 'https://deno.land/std@0.131.0/http/server.ts'
|
|
import { createClient } from 'https://esm.sh/@supabase/supabase-js@2'
|
|
|
|
const corsHeaders = {
|
|
'Access-Control-Allow-Origin': '*',
|
|
'Access-Control-Allow-Headers': 'authorization, x-client-info, apikey, content-type',
|
|
}
|
|
|
|
interface BatchGeocodingRequest {
|
|
limit?: number
|
|
force_refresh?: boolean
|
|
institute_ids?: string[]
|
|
}
|
|
|
|
interface GeocodingResult {
|
|
institute_id: string
|
|
success: boolean
|
|
message: string
|
|
coordinates?: {
|
|
latitude: number
|
|
longitude: number
|
|
boundingbox: string[]
|
|
geojson?: any
|
|
osm?: any
|
|
}
|
|
error?: string
|
|
}
|
|
|
|
serve(async (req: Request) => {
|
|
// Handle CORS preflight requests
|
|
if (req.method === 'OPTIONS') {
|
|
return new Response('ok', { headers: corsHeaders })
|
|
}
|
|
|
|
try {
|
|
// Get environment variables
|
|
const supabaseUrl = Deno.env.get('SUPABASE_URL')
|
|
const supabaseServiceKey = Deno.env.get('SUPABASE_SERVICE_ROLE_KEY')
|
|
const searxngUrl = Deno.env.get('SEARXNG_URL') || 'https://search.kevlarai.com'
|
|
|
|
if (!supabaseUrl || !supabaseServiceKey) {
|
|
throw new Error('Missing required environment variables')
|
|
}
|
|
|
|
// Create Supabase client
|
|
const supabase = createClient(supabaseUrl, supabaseServiceKey)
|
|
|
|
// Parse request body
|
|
const body: BatchGeocodingRequest = await req.json()
|
|
const limit = body.limit || 10
|
|
const forceRefresh = body.force_refresh || false
|
|
|
|
// Get institutes that need geocoding
|
|
let query = supabase
|
|
.from('institutes')
|
|
.select('id, name, address, geo_coordinates')
|
|
.not('import_id', 'is', null)
|
|
|
|
if (!forceRefresh) {
|
|
// Only get institutes without coordinates or with empty coordinates
|
|
query = query.or('geo_coordinates.is.null,geo_coordinates.eq.{}')
|
|
}
|
|
|
|
if (body.institute_ids && body.institute_ids.length > 0) {
|
|
query = query.in('id', body.institute_ids)
|
|
}
|
|
|
|
const { data: institutes, error: fetchError } = await query.limit(limit)
|
|
|
|
if (fetchError) {
|
|
throw new Error(`Failed to fetch institutes: ${fetchError.message}`)
|
|
}
|
|
|
|
if (!institutes || institutes.length === 0) {
|
|
return new Response(
|
|
JSON.stringify({
|
|
success: true,
|
|
message: 'No institutes found that need geocoding',
|
|
processed: 0
|
|
}),
|
|
{
|
|
status: 200,
|
|
headers: { ...corsHeaders, 'Content-Type': 'application/json' }
|
|
}
|
|
)
|
|
}
|
|
|
|
console.log(`Processing ${institutes.length} institutes for geocoding`)
|
|
|
|
const results: GeocodingResult[] = []
|
|
let successCount = 0
|
|
let errorCount = 0
|
|
|
|
// Process institutes sequentially to avoid overwhelming the SearXNG service
|
|
let processedCount = 0
|
|
for (const institute of institutes) {
|
|
try {
|
|
const address = institute.address as any
|
|
if (!address) {
|
|
results.push({
|
|
institute_id: institute.id,
|
|
success: false,
|
|
message: 'No address information available',
|
|
error: 'Missing address data'
|
|
})
|
|
errorCount++
|
|
processedCount++
|
|
continue
|
|
}
|
|
|
|
// Build search query from address components
|
|
const addressParts = [
|
|
address.street,
|
|
address.town,
|
|
address.county,
|
|
address.postcode,
|
|
address.country
|
|
].filter(Boolean)
|
|
|
|
if (addressParts.length === 0) {
|
|
results.push({
|
|
institute_id: institute.id,
|
|
success: false,
|
|
message: 'No valid address components found',
|
|
error: 'Empty address parts'
|
|
})
|
|
errorCount++
|
|
processedCount++
|
|
continue
|
|
}
|
|
|
|
const searchQuery = addressParts.join(', ')
|
|
console.log(`Geocoding institute ${institute.id}: ${searchQuery}`)
|
|
|
|
// Query SearXNG for geocoding with fallback strategy
|
|
const geocodingResult = await geocodeAddressWithFallback(address, searxngUrl)
|
|
|
|
if (geocodingResult.success && geocodingResult.coordinates) {
|
|
// Update institute with geospatial coordinates
|
|
const { error: updateError } = await supabase
|
|
.from('institutes')
|
|
.update({
|
|
geo_coordinates: {
|
|
latitude: geocodingResult.coordinates.latitude,
|
|
longitude: geocodingResult.coordinates.longitude,
|
|
boundingbox: geocodingResult.coordinates.boundingbox,
|
|
geojson: geocodingResult.coordinates.geojson,
|
|
osm: geocodingResult.coordinates.osm,
|
|
search_query: searchQuery,
|
|
geocoded_at: new Date().toISOString()
|
|
}
|
|
})
|
|
.eq('id', institute.id)
|
|
|
|
if (updateError) {
|
|
throw new Error(`Failed to update institute: ${updateError.message}`)
|
|
}
|
|
|
|
results.push({
|
|
institute_id: institute.id,
|
|
success: true,
|
|
message: 'Successfully geocoded',
|
|
coordinates: geocodingResult.coordinates
|
|
})
|
|
successCount++
|
|
|
|
// Log the successful geocoding
|
|
await supabase
|
|
.from('function_logs')
|
|
.insert({
|
|
file_id: null,
|
|
step: 'batch_geocoding',
|
|
message: 'Successfully geocoded institute address in batch',
|
|
data: {
|
|
institute_id: institute.id,
|
|
search_query: searchQuery,
|
|
coordinates: geocodingResult.coordinates
|
|
}
|
|
})
|
|
|
|
} else {
|
|
results.push({
|
|
institute_id: institute.id,
|
|
success: false,
|
|
message: 'Geocoding failed',
|
|
error: geocodingResult.error || 'Unknown error'
|
|
})
|
|
errorCount++
|
|
}
|
|
|
|
processedCount++
|
|
|
|
// Add a small delay between requests to be respectful to the SearXNG service
|
|
// Optimize delay based on batch size for better performance
|
|
if (processedCount < institutes.length) { // Don't delay after the last institute
|
|
const delay = institutes.length > 200 ? 50 : 100; // Faster processing for large batches
|
|
await new Promise(resolve => setTimeout(resolve, delay))
|
|
}
|
|
|
|
} catch (error) {
|
|
console.error(`Error processing institute ${institute.id}:`, error)
|
|
results.push({
|
|
institute_id: institute.id,
|
|
success: false,
|
|
message: 'Processing error',
|
|
error: error.message
|
|
})
|
|
errorCount++
|
|
}
|
|
}
|
|
|
|
// Log the batch operation
|
|
await supabase
|
|
.from('function_logs')
|
|
.insert({
|
|
file_id: null,
|
|
step: 'batch_geocoding_complete',
|
|
message: 'Batch geocoding operation completed',
|
|
data: {
|
|
total_processed: institutes.length,
|
|
successful: successCount,
|
|
failed: errorCount,
|
|
results: results
|
|
}
|
|
})
|
|
|
|
return new Response(
|
|
JSON.stringify({
|
|
success: true,
|
|
message: 'Batch geocoding completed',
|
|
summary: {
|
|
total_processed: institutes.length,
|
|
successful: successCount,
|
|
failed: errorCount
|
|
},
|
|
results: results
|
|
}),
|
|
{
|
|
status: 200,
|
|
headers: { ...corsHeaders, 'Content-Type': 'application/json' }
|
|
}
|
|
)
|
|
|
|
} catch (error) {
|
|
console.error('Error in batch institute geocoder:', error)
|
|
|
|
return new Response(
|
|
JSON.stringify({
|
|
error: 'Internal server error',
|
|
details: error.message
|
|
}),
|
|
{
|
|
status: 500,
|
|
headers: { ...corsHeaders, 'Content-Type': 'application/json' }
|
|
}
|
|
)
|
|
}
|
|
})
|
|
|
|
async function geocodeAddress(searchQuery: string, searxngUrl: string): Promise<{
|
|
success: boolean
|
|
coordinates?: {
|
|
latitude: number
|
|
longitude: number
|
|
boundingbox: string[]
|
|
geojson?: any
|
|
osm?: any
|
|
}
|
|
error?: string
|
|
}> {
|
|
try {
|
|
// Format search query for OSM
|
|
const osmQuery = `!osm ${searchQuery}`
|
|
const searchUrl = `${searxngUrl}/search?q=${encodeURIComponent(osmQuery)}&format=json`
|
|
|
|
const response = await fetch(searchUrl, {
|
|
method: 'GET',
|
|
headers: {
|
|
'Accept': 'application/json',
|
|
'User-Agent': 'ClassroomCopilot-BatchGeocoder/1.0'
|
|
}
|
|
})
|
|
|
|
if (!response.ok) {
|
|
throw new Error(`SearXNG request failed: ${response.status} ${response.statusText}`)
|
|
}
|
|
|
|
const data = await response.json()
|
|
|
|
// Check if we have results - the number_of_results field might be unreliable
|
|
// so we check the results array directly
|
|
if (!data.results || data.results.length === 0) {
|
|
return {
|
|
success: false,
|
|
error: 'No results returned from SearXNG'
|
|
}
|
|
}
|
|
|
|
const result = data.results[0]
|
|
|
|
if (!result.latitude || !result.longitude) {
|
|
return {
|
|
success: false,
|
|
error: 'Missing latitude or longitude in SearXNG response'
|
|
}
|
|
}
|
|
|
|
return {
|
|
success: true,
|
|
coordinates: {
|
|
latitude: parseFloat(result.latitude),
|
|
longitude: parseFloat(result.longitude),
|
|
boundingbox: result.boundingbox || [],
|
|
geojson: result.geojson,
|
|
osm: result.osm
|
|
}
|
|
}
|
|
|
|
} catch (error) {
|
|
console.error('Geocoding error:', error)
|
|
return {
|
|
success: false,
|
|
error: error.message
|
|
}
|
|
}
|
|
}
|
|
|
|
async function geocodeAddressWithFallback(address: any, searxngUrl: string): Promise<{
|
|
success: boolean
|
|
coordinates?: {
|
|
latitude: number
|
|
longitude: number
|
|
boundingbox: string[]
|
|
geojson?: any
|
|
osm?: any
|
|
}
|
|
error?: string
|
|
}> {
|
|
// Strategy 1: Try full address (street + town + county + postcode)
|
|
if (address.street && address.town && address.county && address.postcode) {
|
|
const fullQuery = `${address.street}, ${address.town}, ${address.county}, ${address.postcode}`
|
|
console.log(`Trying full address: ${fullQuery}`)
|
|
|
|
const result = await geocodeAddress(fullQuery, searxngUrl)
|
|
if (result.success && result.coordinates) {
|
|
console.log('Full address geocoding successful')
|
|
return result
|
|
}
|
|
}
|
|
|
|
// Strategy 2: Try town + county + postcode
|
|
if (address.town && address.county && address.postcode) {
|
|
const mediumQuery = `${address.town}, ${address.county}, ${address.postcode}`
|
|
console.log(`Trying medium address: ${mediumQuery}`)
|
|
|
|
const result = await geocodeAddress(mediumQuery, searxngUrl)
|
|
if (result.success && result.coordinates) {
|
|
console.log('Medium address geocoding successful')
|
|
return result
|
|
}
|
|
}
|
|
|
|
// Strategy 3: Try just postcode
|
|
if (address.postcode) {
|
|
console.log(`Trying postcode only: ${address.postcode}`)
|
|
|
|
const result = await geocodeAddress(address.postcode, searxngUrl)
|
|
if (result.success && result.coordinates) {
|
|
console.log('Postcode geocoding successful')
|
|
return result
|
|
}
|
|
}
|
|
|
|
// Strategy 4: Try town + postcode
|
|
if (address.town && address.postcode) {
|
|
const simpleQuery = `${address.town}, ${address.postcode}`
|
|
console.log(`Trying simple address: ${simpleQuery}`)
|
|
|
|
const result = await geocodeAddress(simpleQuery, searxngUrl)
|
|
if (result.success && result.coordinates) {
|
|
console.log('Simple address geocoding successful')
|
|
return result
|
|
}
|
|
}
|
|
|
|
// All strategies failed
|
|
return {
|
|
success: false,
|
|
error: 'No coordinates found with any address combination'
|
|
}
|
|
}
|