AirLibrary/Indexing/Scan/
ScanDirectory.rs1use std::{collections::HashSet, path::Path, sync::Arc};
68
69use tokio::sync::{RwLock, Semaphore};
70
71use crate::{
72 AirError,
73 Configuration::IndexingConfig,
74 Indexing::{
75 Scan::ScanFile::{IndexFileInternal, ValidateFileAccess},
76 State::CreateState::{FileIndex, FileMetadata, SymbolInfo, SymbolLocation},
77 },
78 Result,
79};
80
81#[derive(Debug, Clone)]
83pub struct ScanDirectoryResult {
84 pub files_found:u32,
86 pub files_skipped:u32,
88 pub errors:u32,
90 pub total_size:u64,
92}
93
94pub async fn ScanDirectory(
104 path:&str,
105 patterns:Vec<String>,
106 config:&IndexingConfig,
107 max_parallel:usize,
108) -> Result<(Vec<std::path::PathBuf>, ScanDirectoryResult)> {
109 let directory_path = crate::Configuration::ConfigurationManager::ExpandPath(path)?;
110
111 if !directory_path.exists() {
113 return Err(AirError::FileSystem(format!("Directory does not exist: {}", path)));
114 }
115
116 if !directory_path.is_dir() {
117 return Err(AirError::FileSystem(format!("Path is not a directory: {}", path)));
118 }
119
120 CheckDirectoryPermissions(&directory_path).await?;
122
123 let include_patterns = if patterns.is_empty() { config.FileTypes.clone() } else { patterns };
125
126 let walker = ignore::WalkBuilder::new(&directory_path)
128 .max_depth(Some(10)) .hidden(false)
130 .follow_links(false) .build();
132
133 let mut files_to_scan:Vec<std::path::PathBuf> = Vec::new();
134 let mut files_found = 0u32;
135 let mut files_skipped = 0u32;
136 let mut errors = 0u32;
137 let mut total_size = 0u64;
138
139 for result in walker {
141 match result {
142 Ok(entry) => {
143 if entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
145 let file_path = entry.path().to_path_buf();
146
147 if entry.path_is_symlink() {
149 log::debug!("[ScanDirectory] Skipping symlink: {}", file_path.display());
150 files_skipped += 1;
151 continue;
152 }
153
154 if let Ok(metadata) = entry.metadata() {
156 let file_size = metadata.len();
157
158 if file_size > config.MaxFileSizeMb as u64 * 1024 * 1024 {
159 log::warn!(
160 "[ScanDirectory] Skipping oversized file: {} ({} bytes)",
161 file_path.display(),
162 file_size
163 );
164 files_skipped += 1;
165 continue;
166 }
167
168 if MatchesPatterns(&file_path, &include_patterns) {
170 if ValidateFileAccess(&file_path).await {
172 files_to_scan.push(file_path);
173 files_found += 1;
174 total_size += file_size;
175 } else {
176 log::warn!(
177 "[ScanDirectory] Cannot access file (permission denied): {}",
178 file_path.display()
179 );
180 errors += 1;
181 }
182 } else {
183 files_skipped += 1;
184 }
185 } else {
186 errors += 1;
187 }
188 }
189 },
190 Err(e) => {
191 log::warn!("[ScanDirectory] Error walking directory: {}", e);
192 errors += 1;
193 },
194 }
195 }
196
197 log::info!(
198 "[ScanDirectory] Directory scan completed: {} files, {} skipped, {} errors, {} bytes",
199 files_found,
200 files_skipped,
201 errors,
202 total_size
203 );
204
205 Ok((
206 files_to_scan,
207 ScanDirectoryResult { files_found, files_skipped, errors, total_size },
208 ))
209}
210
211pub async fn ScanAndRemoveDeleted(index:&mut FileIndex, directory_path:&Path) -> Result<u32> {
213 let mut paths_to_remove = Vec::new();
214 let all_paths:Vec<_> = index.files.keys().cloned().collect();
215
216 for path in all_paths {
217 if !path.exists() && path.starts_with(directory_path) {
218 paths_to_remove.push(path.clone());
219 }
220 }
221
222 let removed_count = paths_to_remove.len();
223 for path in paths_to_remove {
224 index.files.remove(&path);
225 index.file_symbols.remove(&path);
226
227 for (_, locations) in index.symbol_index.iter_mut() {
229 locations.retain(|loc| loc.file_path != path);
230 }
231
232 for (_, files) in index.content_index.iter_mut() {
234 files.retain(|p| p != &path);
235 }
236 }
237
238 Ok(removed_count as u32)
239}
240
241async fn CheckDirectoryPermissions(path:&Path) -> Result<()> {
243 tokio::task::spawn_blocking({
244 let path = path.to_path_buf();
245 move || {
246 std::fs::read_dir(&path)
247 .map_err(|e| AirError::FileSystem(format!("Cannot read directory {}: {}", path.display(), e)))?;
248 Ok(())
249 }
250 })
251 .await?
252}
253
254pub fn MatchesPatterns(file_path:&std::path::Path, patterns:&[String]) -> bool {
256 if patterns.is_empty() {
257 return true;
258 }
259
260 let file_name = file_path.file_name().unwrap_or_default().to_string_lossy().to_string();
261
262 for pattern in patterns {
263 if MatchesPattern(&file_name, pattern) {
264 return true;
265 }
266 }
267
268 false
269}
270
271pub fn MatchesPattern(filename:&str, pattern:&str) -> bool {
273 if pattern.starts_with("*.") {
274 let extension = &pattern[2..];
275 filename.ends_with(extension)
276 } else {
277 filename == pattern
278 }
279}
280
281pub fn GetDefaultExcludePatterns() -> Vec<String> {
283 vec![
284 "node_modules".to_string(),
285 "target".to_string(),
286 ".git".to_string(),
287 ".svn".to_string(),
288 ".hg".to_string(),
289 ".bzr".to_string(),
290 "dist".to_string(),
291 "build".to_string(),
292 ".next".to_string(),
293 ".nuxt".to_string(),
294 "__pycache__".to_string(),
295 "*.pyc".to_string(),
296 ".venv".to_string(),
297 "venv".to_string(),
298 "env".to_string(),
299 ".env".to_string(),
300 ".idea".to_string(),
301 ".vscode".to_string(),
302 ".DS_Store".to_string(),
303 "Thumbs.db".to_string(),
304 ]
305}
306
307pub async fn ScanDirectoriesParallel(
309 directories:Vec<String>,
310 patterns:Vec<String>,
311 config:&IndexingConfig,
312 max_parallel:usize,
313) -> Result<(Vec<std::path::PathBuf>, ScanDirectoryResult)> {
314 let semaphore = Arc::new(Semaphore::new(max_parallel));
315 let mut all_files = Vec::new();
316 let mut total_result = ScanDirectoryResult { files_found:0, files_skipped:0, errors:0, total_size:0 };
317
318 let mut scan_tasks = Vec::new();
319
320 for directory in directories {
321 let permit = semaphore.clone().acquire_owned().await.unwrap();
322 let config_clone = config.clone();
323 let patterns_clone = patterns.clone();
324
325 let task = tokio::spawn(async move {
326 let _permit = permit;
327 ScanDirectory(&directory, patterns_clone, &config_clone, max_parallel).await
328 });
329
330 scan_tasks.push(task);
331 }
332
333 for task in scan_tasks {
335 match task.await {
336 Ok(Ok((files, result))) => {
337 all_files.extend(files);
338 total_result.files_found += result.files_found;
339 total_result.files_skipped += result.files_skipped;
340 total_result.errors += result.errors;
341 total_result.total_size += result.total_size;
342 },
343 Ok(Err(e)) => {
344 log::error!("[ScanDirectory] Parallel scan failed: {}", e);
345 total_result.errors += 1;
346 },
347 Err(e) => {
348 log::error!("[ScanDirectory] Parallel task panicked: {}", e);
349 total_result.errors += 1;
350 },
351 }
352 }
353
354 Ok((all_files, total_result))
355}
356
357pub async fn GetDirectoryStatistics(path:&str, max_depth:Option<usize>) -> Result<DirectoryStatistics> {
359 let directory_path = crate::Configuration::ConfigurationManager::ExpandPath(path)?;
360
361 if !directory_path.exists() || !directory_path.is_dir() {
362 return Err(AirError::FileSystem(format!("Invalid directory: {}", path)));
363 }
364
365 let mut file_count = 0u64;
366 let mut total_size = 0u64;
367 let mut directory_count = 0u64;
368 let mut hidden_count = 0u64;
369
370 let walker = ignore::WalkBuilder::new(&directory_path)
371 .max_depth(max_depth)
372 .hidden(true)
373 .follow_links(false)
374 .build();
375
376 for entry in walker.flatten() {
377 let file_type = entry.file_type().expect("Failed to get file type");
378
379 if file_type.is_file() {
380 file_count += 1;
381 if let Ok(metadata) = entry.metadata() {
382 total_size += metadata.len();
383 }
384 } else if file_type.is_dir() {
385 directory_count += 1;
386 }
387
388 if entry.depth() > 0
389 && entry
390 .path()
391 .components()
392 .any(|c| c.as_os_str().to_string_lossy().starts_with('.'))
393 {
394 hidden_count += 1;
395 }
396 }
397
398 Ok(DirectoryStatistics { file_count, directory_count, hidden_count, total_size })
399}
400
401#[derive(Debug, Clone)]
403pub struct DirectoryStatistics {
404 pub file_count:u64,
405 pub directory_count:u64,
406 pub hidden_count:u64,
407 pub total_size:u64,
408}