AirLibrary/Indexing/Watch/
WatchFile.rs

1//! # WatchFile
2//!
3//! ## File: Indexing/Watch/WatchFile.rs
4//!
5//! ## Role in Air Architecture
6//!
7//! Provides file watching functionality for the File Indexer service,
8//! handling file system events for incremental index updates.
9//!
10//! ## Primary Responsibility
11//!
12//! Handle file system change events and trigger index updates for
13//! created, modified, and deleted files.
14//!
15//! ## Secondary Responsibilities
16//!
17//! - File creation event handling
18//! - File modification event handling
19//! - File deletion event handling
20//! - Directory change event handling
21//! - Event debouncing for rapid changes
22//!
23//! ## Dependencies
24//!
25//! **External Crates:**
26//! - `notify` - File system watching
27//! - `tokio` - Async runtime for event handling
28//!
29//! **Internal Modules:**
30//! - `crate::Result` - Error handling type
31//! - `crate::AirError` - Error types
32//! - `super::super::FileIndex` - Index structure definitions
33//! - `super::super::Store::UpdateIndex` - Index update operations
34//!
35//! ## Dependents
36//!
37//! - `Indexing::Background::StartWatcher` - Watcher setup and management
38//! - `Indexing::mod::FileIndexer` - Main file indexer implementation
39//!
40//! ## VSCode Pattern Reference
41//!
42//! Inspired by VSCode's file watching in
43//! `src/vs/base/node/watcher/`
44//!
45//! ## Security Considerations
46//!
47//! - Path validation before watching
48//! - Symbolic link following disabled
49//! - Permission checking on watch paths
50//!
51//! ## Performance Considerations
52//!
53//! - Event debouncing prevents excessive updates
54//! - Batch processing of multiple events
55//! - Efficient event filtering
56//!
57//! ## Error Handling Strategy
58//!
59//! Event operations log warnings for individual errors and continue,
60//! ensuring a single event failure doesn't stop the watcher.
61//!
62//! ## Thread Safety
63//!
64//! Event handlers acquire write locks on shared state and process
65//! events asynchronously to avoid blocking the watcher loop.
66
67use std::path::PathBuf;
68
69use tokio::sync::{Mutex, RwLock};
70
71use crate::{
72	AirError,
73	Configuration::IndexingConfig,
74	Indexing::State::CreateState::{FileIndex, FileMetadata, SymbolInfo, SymbolLocation},
75	Result,
76};
77
78/// Handle file watcher event for incremental indexing
79///
80/// This function processes file system events and updates the index
81/// accordingly:
82/// - File Created: Index the new file
83/// - File Modified: Re-index the modified file
84/// - File Removed: Remove from index
85pub async fn HandleFileEvent(event:notify::Event, index_arc:&RwLock<FileIndex>, config:&IndexingConfig) -> Result<()> {
86	match event.kind {
87		notify::EventKind::Create(notify::event::CreateKind::File) => {
88			for path in event.paths {
89				log::debug!("[WatchFile] File created: {}", path.display());
90				let mut index = index_arc.write().await;
91				if let Err(e) = crate::Indexing::Store::UpdateIndex::UpdateSingleFile(&mut index, &path, config).await {
92					log::warn!("[WatchFile] Failed to index new file {}: {}", path.display(), e);
93				}
94			}
95		},
96		notify::EventKind::Modify(notify::event::ModifyKind::Data(_))
97		| notify::EventKind::Modify(notify::event::ModifyKind::Name(notify::event::RenameMode::Both)) => {
98			for path in event.paths {
99				log::debug!("[WatchFile] File modified: {}", path.display());
100				let mut index = index_arc.write().await;
101				if let Err(e) = crate::Indexing::Store::UpdateIndex::UpdateSingleFile(&mut index, &path, config).await {
102					log::warn!("[WatchFile] Failed to re-index modified file {}: {}", path.display(), e);
103				}
104			}
105		},
106		notify::EventKind::Remove(notify::event::RemoveKind::File) => {
107			for path in event.paths {
108				log::debug!("[WatchFile] File removed: {}", path.display());
109				let mut index = index_arc.write().await;
110				if let Err(e) = crate::Indexing::State::UpdateState::RemoveFileFromIndex(&mut index, &path) {
111					log::warn!("[WatchFile] Failed to remove file from index {}: {}", path.display(), e);
112				}
113			}
114		},
115		notify::EventKind::Create(notify::event::CreateKind::Folder) => {
116			for path in event.paths {
117				log::debug!("[WatchFile] Directory created: {}", path.display());
118				// Directories themselves don't need indexing, just their
119				// contents
120			}
121		},
122		notify::EventKind::Remove(notify::event::RemoveKind::Folder) => {
123			for path in event.paths {
124				log::debug!("[WatchFile] Directory removed: {}", path.display());
125				// Remove all files from this directory
126				let mut index = index_arc.write().await;
127				let mut paths_to_remove = Vec::new();
128				for indexed_path in index.files.keys() {
129					if indexed_path.starts_with(&path) {
130						paths_to_remove.push(indexed_path.clone());
131					}
132				}
133				for indexed_path in paths_to_remove {
134					if let Err(e) = crate::Indexing::State::UpdateState::RemoveFileFromIndex(&mut index, &indexed_path)
135					{
136						log::warn!("[WatchFile] Failed to remove file {}: {}", indexed_path.display(), e);
137					}
138				}
139			}
140		},
141		_ => {
142			// Ignore other event types
143			log::trace!("[WatchFile] Ignored event kind: {:?}", event.kind);
144		},
145	}
146
147	Ok(())
148}
149
150/// Debounced file change handler
151///
152/// Prevents rapid successive changes from causing excessive re-indexing
153pub struct DebouncedEventHandler {
154	pending_changes:Mutex<std::collections::HashMap<PathBuf, FileChangeInfo>>,
155}
156
157impl DebouncedEventHandler {
158	pub fn new() -> Self { Self { pending_changes:Mutex::new(std::collections::HashMap::new()) } }
159
160	/// Add a file change event
161	pub async fn AddChange(&self, path:PathBuf, change_type:FileChangeType) {
162		let mut pending = self.pending_changes.lock().await;
163
164		let now = std::time::Instant::now();
165
166		match pending.get_mut(&path) {
167			Some(change_info) => {
168				change_info.last_seen = now;
169				change_info.change_type = change_type.max(change_info.change_type);
170				change_info.suppressed_count += 1;
171			},
172			None => {
173				pending.insert(
174					path.clone(),
175					FileChangeInfo { path:path.clone(), change_type, last_seen:now, suppressed_count:0 },
176				);
177			},
178		}
179	}
180
181	/// Process pending changes older than the specified duration
182	pub async fn ProcessPendingChanges(
183		&self,
184		age_cutoff:std::time::Duration,
185		index_arc:&RwLock<FileIndex>,
186		config:&IndexingConfig,
187	) -> Result<Vec<ProcessedChange>> {
188		let mut processed = Vec::new();
189		let expired_paths = {
190			let mut pending = self.pending_changes.lock().await;
191			let mut expired = Vec::new();
192
193			for (path, change_info) in pending.iter() {
194				if change_info.last_seen.elapsed() >= age_cutoff {
195					expired.push((path.clone(), change_info.clone()));
196				}
197			}
198
199			// Remove expired entries
200			for (path, _) in &expired {
201				pending.remove(path);
202			}
203
204			expired
205		};
206
207		for (path, change_info) in expired_paths {
208			log::debug!(
209				"[WatchFile] Processing debounced change for {} (suppressed: {})",
210				path.display(),
211				change_info.suppressed_count
212			);
213
214			let result = match change_info.change_type {
215				FileChangeType::Created => {
216					let mut index = index_arc.write().await;
217					crate::Indexing::Store::UpdateIndex::UpdateSingleFile(&mut index, &path, config)
218						.await
219						.map(|_| ProcessedChangeResult::Success)
220						.unwrap_or(ProcessedChangeResult::Failed)
221				},
222				FileChangeType::Modified => {
223					let mut index = index_arc.write().await;
224					super::super::Store::UpdateIndex::UpdateSingleFile(&mut index, &path, config)
225						.await
226						.map(|_| ProcessedChangeResult::Success)
227						.unwrap_or(ProcessedChangeResult::Failed)
228				},
229				FileChangeType::Removed => {
230					let mut index = index_arc.write().await;
231					crate::Indexing::State::UpdateState::RemoveFileFromIndex(&mut index, &path)
232						.map(|_| ProcessedChangeResult::Success)
233						.unwrap_or(ProcessedChangeResult::Failed)
234				},
235			};
236
237			processed.push(ProcessedChange {
238				path,
239				change_type:change_info.change_type,
240				suppressed_count:change_info.suppressed_count,
241				result,
242			});
243		}
244
245		Ok(processed)
246	}
247
248	/// Clear all pending changes
249	pub async fn ClearPending(&self) -> usize {
250		let mut pending = self.pending_changes.lock().await;
251		let count = pending.len();
252		pending.clear();
253		count
254	}
255
256	/// Get the number of pending changes
257	pub async fn PendingCount(&self) -> usize {
258		let pending = self.pending_changes.lock().await;
259		pending.len()
260	}
261}
262
263impl Default for DebouncedEventHandler {
264	fn default() -> Self { Self::new() }
265}
266
267/// File change type for debouncing
268#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
269pub enum FileChangeType {
270	Created,
271	Modified,
272	Removed,
273}
274
275impl FileChangeType {
276	pub fn max(self, other:Self) -> Self {
277		// Removed takes precedence over Modified, which takes precedence over Created
278		match (self, other) {
279			(Self::Removed, _) | (_, Self::Removed) => Self::Removed,
280			(Self::Modified, _) | (_, Self::Modified) => Self::Modified,
281			(Self::Created, Self::Created) => Self::Created,
282		}
283	}
284}
285
286/// File change information for debouncing
287#[derive(Debug, Clone)]
288struct FileChangeInfo {
289	path:PathBuf,
290	change_type:FileChangeType,
291	last_seen:std::time::Instant,
292	suppressed_count:usize,
293}
294
295/// Result of processing a debounced change
296#[derive(Debug, Clone)]
297pub enum ProcessedChangeResult {
298	Success,
299	Failed,
300}
301
302/// Describes a processed file change
303#[derive(Debug, Clone)]
304pub struct ProcessedChange {
305	pub path:PathBuf,
306	pub change_type:FileChangeType,
307	pub suppressed_count:usize,
308	pub result:ProcessedChangeResult,
309}
310
311/// Convert notify event kind to FileChangeType
312pub fn EventKindToChangeType(kind:notify::EventKind) -> Option<FileChangeType> {
313	match kind {
314		notify::EventKind::Create(_) => Some(FileChangeType::Created),
315		notify::EventKind::Modify(_) => Some(FileChangeType::Modified),
316		notify::EventKind::Remove(_) => Some(FileChangeType::Removed),
317		_ => None,
318	}
319}
320
321/// Check if a path should be watched (not in ignored paths)
322pub fn ShouldWatchPath(path:&PathBuf, ignored_patterns:&[String]) -> bool {
323	let path_str = path.to_string_lossy();
324
325	// Check against ignore patterns
326	for pattern in ignored_patterns {
327		if path_str.contains(pattern) {
328			return false;
329		}
330	}
331
332	true
333}
334
335/// Get default ignored patterns for file watching
336pub fn GetDefaultIgnoredPatterns() -> Vec<String> {
337	vec![
338		"node_modules".to_string(),
339		"target".to_string(),
340		".git".to_string(),
341		".svn".to_string(),
342		".hg".to_string(),
343		".bzr".to_string(),
344		"dist".to_string(),
345		"build".to_string(),
346		".next".to_string(),
347		".nuxt".to_string(),
348		"__pycache__".to_string(),
349		"*.pyc".to_string(),
350		".venv".to_string(),
351		"venv".to_string(),
352		"env".to_string(),
353		".env".to_string(),
354		".idea".to_string(),
355		".vscode".to_string(),
356		".DS_Store".to_string(),
357		"Thumbs.db".to_string(),
358		"*.swp".to_string(),
359		"*.tmp".to_string(),
360	]
361}
362
363/// Validate that a watch path exists and is accessible
364pub fn ValidateWatchPath(path:&PathBuf) -> Result<()> {
365	if !path.exists() {
366		return Err(AirError::FileSystem(format!("Watch path does not exist: {}", path.display())));
367	}
368
369	if !path.is_dir() {
370		return Err(AirError::FileSystem(format!(
371			"Watch path is not a directory: {}",
372			path.display()
373		)));
374	}
375
376	// Check read access
377	std::fs::read_dir(path)
378		.map_err(|e| AirError::FileSystem(format!("Cannot access watch path {}: {}", path.display(), e)))?;
379
380	Ok(())
381}