AirLibrary/Indexing/
mod.rs

1//! # File Indexing and Search Service
2//!
3//! ## File: Indexing/mod.rs
4//!
5//! ## Role in Air Architecture
6//!
7//! Provides comprehensive file indexing, search, and content analysis
8//! capabilities for the Land ecosystem, inspired by and compatible with
9//! Visual Studio Code's search service.
10//!
11//! ## Primary Responsibility
12//!
13//! Facade module for the Indexing service, exposing the public API for
14//! file indexing, search, and symbol extraction operations.
15//!
16//! ## Secondary Responsibilities
17//!
18//! - Re-export public types from submodule
19//! - Provide unified FileIndexer API
20//! - Coordinate between indexing subsystems
21//!
22//! ## Dependencies
23//!
24//! **External Crates:**
25//! - `regex` - Regular expression search patterns
26//! - `serde` - Serialization for index storage
27//! - `tokio` - Async runtime for all operations
28//! - `notify` - File system watching
29//! - `chrono` - Timestamp management
30//!
31//! **Internal Modules:**
32//! - `crate::Result` - Error handling type
33//! - `crate::AirError` - Error types
34//! - `crate::ApplicationState::ApplicationState` - Application state
35//! - `crate::Configuration::ConfigurationManager` - Configuration management
36//!
37//! ## Dependents
38//!
39//! - `Indexing::FileIndexer` - Main indexer implementation
40//! - `Vine::Server::AirVinegRPCService` - gRPC integration
41//!
42//! ## VSCode Integration
43//!
44//! This service integrates with VSCode's search and file service architecture:
45//!
46//! - References: vs/workbench/services/search
47//! - File Service: vs/workbench/services/files
48//!
49//! The indexing system supports VSCode features:
50//! - **Outline View**: Symbol extraction for class/function navigation
51//! - **Go to Symbol**: Cross-file symbol search and lookup
52//! - **Search Integration**: File content and name search with regex support
53//! - **Workspace Search**: Multi-workspace index sharing
54//!
55//! ## TODO
56//!
57//! - [ ] Implement full ripgrep integration for ultra-fast text search
58//! - [ ] Add project-level search with workspace awareness
59//! - [ ] Implement search query caching
60//! - [ ] Add fuzzy search with typos tolerance
61//! - [ ] Implement search history and recent queries
62//! - [ ] Add search result preview with context
63//! - [ ] Implement parallel indexing for large directories
64
65// Modules - file-based (no inline definitions)
66pub mod State;
67pub mod Scan;
68pub mod Process;
69pub mod Language;
70pub mod Store;
71pub mod Watch;
72pub mod Background;
73
74// Import types and functions needed for the FileIndexer implementation
75use std::{collections::HashMap, path::PathBuf, sync::Arc};
76
77use tokio::sync::{Mutex, RwLock};
78
79use crate::{
80	AirError,
81	ApplicationState::ApplicationState,
82	Configuration::ConfigurationManager,
83	Indexing::{
84		Process::ExtractSymbols::{ExtractSymbols, GroupSymbolsByKind, SymbolStatistics},
85		Scan::{
86			ScanDirectory::{ScanAndRemoveDeleted, ScanDirectoriesParallel},
87			ScanFile::IndexFileInternal,
88		},
89		State::UpdateState::{UpdateIndexMetadata, ValidateIndexConsistency},
90		Store::{
91			QueryIndex::{PaginatedSearchResults, QueryIndexSearch, SearchQuery},
92			StoreEntry::{BackupCorruptedIndex, EnsureIndexDirectory, LoadOrCreateIndex, SaveIndex},
93			UpdateIndex::UpdateFileContent,
94		},
95	},
96	Result,
97};
98// Import types from submodules with explicit full paths
99use crate::Indexing::State::CreateState::{CreateNewIndex, FileIndex, FileMetadata, SymbolInfo, SymbolLocation};
100
101/// Maximum number of parallel indexing operations
102const MAX_PARALLEL_INDEXING:usize = 10;
103
104/// Indexing result with statistics
105#[derive(Debug, Clone)]
106pub struct IndexResult {
107	/// Number of files successfully indexed
108	pub files_indexed:u32,
109	/// Total size of indexed files in bytes
110	pub total_size:u64,
111	/// Time taken in seconds
112	pub duration_seconds:f64,
113	/// Number of symbols extracted
114	pub symbols_extracted:u32,
115	/// Number of files with errors
116	pub files_with_errors:u32,
117}
118
119/// Index statistics
120#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
121pub struct IndexStatistics {
122	pub file_count:u32,
123	pub total_size:u64,
124	pub total_symbols:u32,
125	pub language_counts:HashMap<String, u32>,
126	pub last_updated:chrono::DateTime<chrono::Utc>,
127	pub index_version:String,
128}
129
130/// File indexer implementation with comprehensive search capabilities
131///
132/// This indexer provides:
133/// - Incremental file watching with real-time updates
134/// - Multi-mode search (literal, regex, fuzzy)
135/// - Symbol extraction for VSCode Outline View
136/// - Language detection for syntax highlighting
137/// - Index corruption detection and recovery
138/// - Parallel indexing with resource limits
139pub struct FileIndexer {
140	/// Application state
141	AppState:Arc<ApplicationState>,
142
143	/// File index with metadata and symbols
144	file_index:Arc<RwLock<FileIndex>>,
145
146	/// Index storage directory
147	index_directory:PathBuf,
148
149	/// File watcher for incremental updates
150	file_watcher:Arc<Mutex<Option<notify::RecommendedWatcher>>>,
151
152	/// Semaphore for limiting parallel indexing operations
153	indexing_semaphore:Arc<tokio::sync::Semaphore>,
154
155	/// Index corruption detection state
156	corruption_detected:Arc<Mutex<bool>>,
157}
158
159impl FileIndexer {
160	/// Create a new file indexer with comprehensive setup
161	///
162	/// Initializes the indexer with:
163	/// - Index directory creation
164	/// - Existing index loading or fresh creation
165	/// - Index corruption detection
166	/// - Service status initialization
167	pub async fn new(AppState:Arc<ApplicationState>) -> Result<Self> {
168		let config = &AppState.Configuration.Indexing;
169
170		// Expand index directory path with validation
171		let index_directory = Self::ValidateAndExpandPath(&config.IndexDirectory)?;
172
173		// Create index directory if it doesn't exist with error handling
174		EnsureIndexDirectory(&index_directory).await?;
175
176		// Load or create index with corruption detection
177		let file_index = LoadOrCreateIndex(&index_directory).await?;
178
179		let indexer = Self {
180			AppState:AppState.clone(),
181			file_index:Arc::new(RwLock::new(file_index)),
182			index_directory:index_directory.clone(),
183			file_watcher:Arc::new(Mutex::new(None)),
184			indexing_semaphore:Arc::new(tokio::sync::Semaphore::new(MAX_PARALLEL_INDEXING)),
185			corruption_detected:Arc::new(Mutex::new(false)),
186		};
187
188		// Verify index integrity
189		indexer.VerifyIndexIntegrity().await?;
190
191		// Initialize service status
192		indexer
193			.AppState
194			.UpdateServiceStatus("indexing", crate::ApplicationState::ServiceStatus::Running)
195			.await
196			.map_err(|e| AirError::Internal(e.to_string()))?;
197
198		log::info!("[FileIndexer] Initialized with index directory: {}", index_directory.display());
199
200		Ok(indexer)
201	}
202
203	/// Validate and expand path with traversal protection
204	fn ValidateAndExpandPath(path:&str) -> Result<PathBuf> {
205		let expanded = ConfigurationManager::ExpandPath(path)?;
206
207		// Prevent path traversal attacks
208		let path_str = expanded.to_string_lossy();
209		if path_str.contains("..") {
210			return Err(AirError::FileSystem("Path contains invalid traversal sequence".to_string()));
211		}
212
213		Ok(expanded)
214	}
215
216	/// Verify index integrity and detect corruption
217	async fn VerifyIndexIntegrity(&self) -> Result<()> {
218		let index = self.file_index.read().await;
219
220		// Check consistency
221		ValidateIndexConsistency(&index)?;
222
223		// Verify all indexed files exist
224		let mut missing_files = 0;
225		for file_path in index.files.keys() {
226			if !file_path.exists() {
227				missing_files += 1;
228			}
229		}
230
231		if missing_files > 0 {
232			log::warn!("[FileIndexer] Found {} missing files in index", missing_files);
233		}
234
235		log::info!("[FileIndexer] Index integrity verified successfully");
236
237		Ok(())
238	}
239
240	/// Index a directory with comprehensive validation and parallel processing
241	pub async fn IndexDirectory(&self, path:String, patterns:Vec<String>) -> Result<IndexResult> {
242		let start_time = std::time::Instant::now();
243
244		log::info!("[FileIndexer] Starting directory index: {}", path);
245
246		let config = &self.AppState.Configuration.Indexing;
247
248		// Scan directory
249		let (files_to_index, scan_result) =
250			ScanDirectoriesParallel(vec![path.clone()], patterns.clone(), config, MAX_PARALLEL_INDEXING).await?;
251
252		// Index files in parallel
253		let index_arc = self.file_index.clone();
254		let semaphore = self.indexing_semaphore.clone();
255		let config_clone = config.clone();
256		let mut index_tasks = Vec::new();
257
258		for file_path in files_to_index {
259			let permit = semaphore.clone().acquire_owned().await.unwrap();
260			let index_ref = index_arc.clone();
261			let config_for_task = config_clone.clone();
262
263			let task = tokio::spawn(async move {
264				let _permit = permit;
265				IndexFileInternal(&file_path, &config_for_task, &index_ref, &[]).await
266			});
267
268			index_tasks.push(task);
269		}
270
271		// Collect results
272		let mut index = self.file_index.write().await;
273		let mut indexed_paths = std::collections::HashSet::new();
274		let mut files_indexed = 0u32;
275		let mut total_size = 0u64;
276		let mut symbols_extracted = 0u32;
277		let mut files_with_errors = 0u32;
278
279		for task in index_tasks {
280			match task.await {
281				Ok(Ok((metadata, symbols))) => {
282					let file_path = metadata.path.clone();
283
284					index.files.insert(file_path.clone(), metadata.clone());
285					indexed_paths.insert(file_path.clone());
286
287					// Index content for search
288					if let Err(e) = UpdateFileContent(&mut index, &file_path, &metadata).await {
289						log::warn!("[FileIndexer] Failed to index content for {}: {}", file_path.display(), e);
290					}
291
292					// Index symbols
293					index.file_symbols.insert(file_path.clone(), symbols.clone());
294					symbols_extracted += symbols.len() as u32;
295
296					// Update symbol index
297					for symbol in symbols {
298						index
299							.symbol_index
300							.entry(symbol.name.clone())
301							.or_insert_with(Vec::new)
302							.push(SymbolLocation { file_path:file_path.clone(), line:symbol.line, symbol });
303					}
304
305					files_indexed += 1;
306					total_size += metadata.size;
307				},
308				Ok(Err(_)) => {
309					files_with_errors += 1;
310				},
311				Err(e) => {
312					log::error!("[FileIndexer] Indexing task failed: {}", e);
313					files_with_errors += 1;
314				},
315			}
316		}
317
318		// Remove files that were indexed before but no longer exist
319		ScanAndRemoveDeleted(&mut index, &Self::ValidateAndExpandPath(&path)?).await?;
320
321		// Update index metadata
322		UpdateIndexMetadata(&mut index)?;
323
324		// Save index to disk
325		SaveIndex(&self.index_directory, &index).await?;
326
327		let duration = start_time.elapsed().as_secs_f64();
328
329		log::info!(
330			"[FileIndexer] Indexing completed: {} files, {} bytes, {} symbols, {} errors in {:.2}s",
331			files_indexed,
332			total_size,
333			symbols_extracted,
334			files_with_errors,
335			duration
336		);
337
338		Ok(IndexResult {
339			files_indexed,
340			total_size,
341			duration_seconds:duration,
342			symbols_extracted,
343			files_with_errors,
344		})
345	}
346
347	/// Search files with multiple modes
348	pub async fn SearchFiles(
349		&self,
350		query:SearchQuery,
351		path:Option<String>,
352		language:Option<String>,
353	) -> Result<PaginatedSearchResults> {
354		let index = self.file_index.read().await;
355		QueryIndexSearch(&index, query, path, language).await
356	}
357
358	/// Search symbols across all files (for VSCode Go to Symbol)
359	pub async fn SearchSymbols(&self, query:&str, max_results:u32) -> Result<Vec<SymbolInfo>> {
360		let index = self.file_index.read().await;
361		let query_lower = query.to_lowercase();
362		let mut results = Vec::new();
363
364		for (symbol_name, locations) in &index.symbol_index {
365			if symbol_name.to_lowercase().contains(&query_lower) {
366				for loc in locations.iter().take(max_results as usize) {
367					results.push(loc.symbol.clone());
368					if results.len() >= max_results as usize {
369						break;
370					}
371				}
372			}
373		}
374
375		Ok(results)
376	}
377
378	/// Get symbols for a specific file (for VSCode Outline View)
379	pub async fn GetFileSymbols(&self, file_path:&PathBuf) -> Result<Vec<SymbolInfo>> {
380		let index = self.file_index.read().await;
381		Ok(index.file_symbols.get(file_path).cloned().unwrap_or_default())
382	}
383
384	/// Get file information
385	pub async fn GetFileInfo(&self, path:String) -> Result<Option<FileMetadata>> {
386		let file_path = Self::ValidateAndExpandPath(&path)?;
387		let index = self.file_index.read().await;
388
389		Ok(index.files.get(&file_path).cloned())
390	}
391
392	/// Get index statistics
393	pub async fn GetIndexStatistics(&self) -> Result<IndexStatistics> {
394		let index = self.file_index.read().await;
395
396		let mut language_counts:HashMap<String, u32> = HashMap::new();
397		let total_size = index.files.values().map(|m| m.size).sum();
398		let total_symbols = index.files.values().map(|m| m.symbol_count).sum();
399
400		for metadata in index.files.values() {
401			if let Some(lang) = &metadata.language {
402				*language_counts.entry(lang.clone()).or_insert(0) += 1;
403			}
404		}
405
406		Ok(IndexStatistics {
407			file_count:index.files.len() as u32,
408			total_size,
409			total_symbols,
410			language_counts,
411			last_updated:index.last_updated,
412			index_version:index.index_version.clone(),
413		})
414	}
415
416	/// Recover corrupted index
417	pub async fn recover_from_corruption(&self) -> Result<()> {
418		log::info!("[FileIndexer] Recovering from corrupted index...");
419
420		// Backup corrupted index
421		BackupCorruptedIndex(&self.index_directory).await?;
422
423		// Create new index
424		let new_index = CreateNewIndex();
425		*self.file_index.write().await = new_index;
426
427		// Clear corruption flag
428		*self.corruption_detected.lock().await = false;
429
430		log::info!("[FileIndexer] Index recovery completed");
431
432		Ok(())
433	}
434}
435
436impl Clone for FileIndexer {
437	fn clone(&self) -> Self {
438		Self {
439			AppState:self.AppState.clone(),
440			file_index:self.file_index.clone(),
441			index_directory:self.index_directory.clone(),
442			file_watcher:self.file_watcher.clone(),
443			indexing_semaphore:self.indexing_semaphore.clone(),
444			corruption_detected:self.corruption_detected.clone(),
445		}
446	}
447}