# myapp/urls/urls_routes.py import datetime import logging from flask import request, jsonify, current_app, has_app_context # Flask utilities from bson.objectid import ObjectId, InvalidId # For MongoDB ObjectIds from collections import defaultdict # Potentially useful for keyword aggregation etc. from functools import wraps # For creating dummy decorators import re # For escaping regex characters in search # --- Local Blueprint Import --- from . import bp # Import the 'bp' instance defined in the local __init__.py # --- Shared Extensions and Utilities Imports --- try: from ..extensions import mongo # Import the initialized PyMongo instance from ..utils import token_required # Import the authentication decorator except ImportError: # Fallback or error handling if imports fail print("Warning: Could not import mongo or token_required in urls/urls_routes.py.") mongo = None # Define a dummy decorator if token_required is missing def token_required(f): @wraps(f) def wrapper(*args, **kwargs): print("ERROR: token_required decorator is not available!") return jsonify({"message": "Server configuration error: Missing authentication utility."}), 500 return wrapper # --- Schema Imports --- try: # Import the relevant schemas defined in schemas.py from ..schemas import ( URLCreateSchema, URLUpdateSchema, URLSchema, URLListSchema, URLSearchResultSchema ) from marshmallow import ValidationError except ImportError: print("Warning: Could not import URL schemas or ValidationError in urls/urls_routes.py.") URLCreateSchema = None URLUpdateSchema = None URLSchema = None URLListSchema = None URLSearchResultSchema = None ValidationError = None # --- Celery Task Imports --- # IMPORTANT: Assumes the project root directory ('your_fullstack_project/') is in PYTHONPATH try: from backend_flask.celery_worker.celery_app import async_extract_title_and_keywords, async_summarize_url, async_recalc_project_keywords except ModuleNotFoundError: print("Warning: Could not import Celery tasks from 'celery_worker'. Ensure project root is in PYTHONPATH.") # Define dummy task functions to prevent NameError if Celery isn't set up def _dummy_celery_task(*args, **kwargs): task_name = args[0] if args else 'dummy_task' print(f"ERROR: Celery task {task_name} not available!") class DummyTask: def __init__(self, name): self.__name__ = name def delay(self, *a, **kw): print(f"ERROR: Tried to call delay() on dummy task {self.__name__}") pass return DummyTask(task_name) async_extract_title_and_keywords = _dummy_celery_task('async_extract_title_and_keywords') async_summarize_url = _dummy_celery_task('async_summarize_url') async_recalc_project_keywords = _dummy_celery_task('async_recalc_project_keywords') # --- Helper to get logger safely --- def _get_logger(): if has_app_context(): return current_app.logger return logging.getLogger(__name__) # Note: Routes use paths relative to the '/api' prefix defined in __init__.py. # -------------------------- # Create URL Endpoint # Path: POST /api/projects//urls # -------------------------- @bp.route('/projects//urls', methods=['POST']) @token_required def create_url(current_user, project_id): """ Create a new URL entry within a specific project. Uses URLCreateSchema for input validation. Expects 'url' and optional fields in JSON payload. Verifies project access for the authenticated user. Triggers background Celery tasks for title/keyword extraction and summarization. """ logger = _get_logger() # Validate user object from token if not current_user or not current_user.get("_id"): logger.error("Invalid current_user object received in create_url") return jsonify({"message": "Internal authorization error."}), 500 try: user_id = ObjectId(current_user["_id"]) user_id_str = str(user_id) # Keep string version for Celery tasks except (InvalidId, TypeError) as e: logger.error(f"User ID conversion error in create_url: {e}") return jsonify({"message": "Invalid user ID format in token."}), 400 # Check dependencies if not mongo: return jsonify({"message": "Database connection not available."}), 500 if not URLCreateSchema or not ValidationError: return jsonify({"message": "Server configuration error: Schema unavailable."}), 500 # Get and validate JSON data using the schema json_data = request.get_json() or {} logger.debug(f"create_url called: project_id={project_id}, data={json_data}, user_id={user_id_str}") schema = URLCreateSchema() try: # Validate only the required 'url' field initially validated_input = schema.load(json_data) except ValidationError as err: logger.warning(f"Create URL validation failed: {err.messages}") return jsonify(err.messages), 422 user_url = validated_input['url'] # URL is guaranteed by schema try: # Validate project ID format from URL path try: project_obj_id = ObjectId(project_id) except InvalidId: return jsonify({"message": "Invalid project ID format in URL path."}), 400 # --- Verify Project Access --- db = mongo.db project = db.projects.find_one({"_id": project_obj_id}, {"ownerId": 1, "collaborators": 1}) if not project: return jsonify({"message": "Project not found."}), 404 owner_id = project.get("ownerId") collaborators = project.get("collaborators", []) if not owner_id: logger.error(f"Project {project_obj_id} is missing ownerId field.") return jsonify({"message": "Project data integrity issue."}), 500 if owner_id != user_id and user_id not in collaborators: return jsonify({"message": "You do not have access to add URLs to this project."}), 403 # --- Prepare URL Document (using original data for optional fields) --- # Optional fields are taken directly from original data, not schema output here keywords_data = json_data.get("keywords", []) # Process keywords manually as before keywords_converted = [] if isinstance(keywords_data, list): for kw in keywords_data: if isinstance(kw, dict): word = kw.get("word", "").strip() if word: try: percentage = float(kw.get("percentage", 0.0)) keywords_converted.append({"word": word, "percentage": percentage}) except (ValueError, TypeError): logger.warning( f"Could not convert keyword percentage for word '{word}' during URL creation.") else: logger.warning("Non-dict item found in keywords during URL creation.") now = datetime.datetime.now(datetime.timezone.utc) url_doc = { "projectId": project_obj_id, "url": user_url, # Use validated URL "title": json_data.get("title", "").strip(), # FIX: Changed from data to json_data "favicon": json_data.get("favicon", ""), # FIX: Changed from data to json_data "starred": bool(json_data.get("starred", False)), # FIX: Changed from data to json_data "note": json_data.get("note", "").strip(), # FIX: Changed from data to json_data "keywords": keywords_converted, "summary": json_data.get("summary", "").strip(), # FIX: Changed from data to json_data "processingStatus": "pending", "createdAt": now, "updatedAt": now } # Insert the new URL document result = db.urls.insert_one(url_doc) new_url_id_str = str(result.inserted_id) logger.info(f"Successfully inserted URL {new_url_id_str} for project {project_id}") # --- Trigger Background Tasks --- tasks_queued = True try: async_extract_title_and_keywords.delay(new_url_id_str, user_id_str) api_doc = db.api_list.find_one({"uid": user_id, "selected": True, "name": "Gemini"}) use_gemini = bool(api_doc and api_doc.get("key")) async_summarize_url.delay(new_url_id_str, user_id_str, use_gemini) logger.info(f"Queued Celery tasks for URL {new_url_id_str} (use_gemini={use_gemini})") except NameError as ne: logger.error(f"Celery tasks not available for URL {new_url_id_str}: {ne}. Processing cannot be initiated.") tasks_queued = False except Exception as celery_err: logger.error(f"Failed to queue Celery tasks for URL {new_url_id_str}: {celery_err}", exc_info=True) tasks_queued = False response_message = "URL created successfully and processing initiated." if tasks_queued else "URL created, but failed to initiate background processing." return jsonify({"message": response_message, "url_id": new_url_id_str}), 201 except Exception as e: logger.error(f"Error creating URL for project {project_id}: {e}", exc_info=True) return jsonify({"message": "An internal error occurred while creating the URL."}), 500 # -------------------------- # List URLs for Project (Simplified) # Path: GET /api/projects//urls # -------------------------- @bp.route('/projects//urls', methods=['GET']) @token_required def list_urls_for_project(current_user, project_id): """ Retrieve a simplified list (id, title, url) of all URLs within a specific project. Uses URLListSchema for output serialization. Verifies user access to the project. """ logger = _get_logger() # Validate user object from token if not current_user or not current_user.get("_id"): return jsonify({"message": "Internal authorization error."}), 500 try: user_id = ObjectId(current_user["_id"]) except (InvalidId, TypeError) as e: logger.error(f"User ID conversion error in list_urls_for_project: {e}") return jsonify({"message": "Invalid user ID format in token."}), 400 # Check dependencies if not mongo: return jsonify({"message": "Database connection not available."}), 500 if not URLListSchema: return jsonify({"message": "Server configuration error: Schema unavailable."}), 500 try: # Validate project ID format from URL path try: obj_project_id = ObjectId(project_id) except InvalidId: return jsonify({"message": "Invalid project ID format in URL path."}), 400 db = mongo.db # --- Verify Project Access --- project = db.projects.find_one( {"_id": obj_project_id}, {"ownerId": 1, "collaborators": 1} # Projection for access check ) if not project: return jsonify({"message": "Project not found."}), 404 owner_id = project.get("ownerId") collaborators = project.get("collaborators", []) if owner_id != user_id and user_id not in collaborators: return jsonify({"message": "You do not have access to this project's URLs."}), 403 # --- Fetch and Serialize URLs --- # Find URLs for the project, projecting only fields needed by schema cursor = db.urls.find( {"projectId": obj_project_id}, {"_id": 1, "title": 1, "url": 1} # Projection matching URLListSchema ).sort("updatedAt", -1) # Sort by most recently updated url_docs = list(cursor) # Convert cursor to list # Serialize using the schema output_schema = URLListSchema(many=True) serialized_result = output_schema.dump(url_docs) # Return the serialized list of URLs return jsonify({"urls": serialized_result}), 200 except Exception as e: logger.error(f"Error listing URLs for project {project_id}: {e}", exc_info=True) return jsonify({"message": "An error occurred while listing URLs."}), 500 # -------------------------- # Get URL Detail # Path: GET /api/urls/ # -------------------------- @bp.route('/urls/', methods=['GET']) @token_required def get_url_detail(current_user, url_id): """ Retrieve the full details for a specific URL entry by its ID. Uses URLSchema for output serialization. Verifies user access via the associated project. """ logger = _get_logger() # Validate user object from token if not current_user or not current_user.get("_id"): return jsonify({"message": "Internal authorization error."}), 500 try: user_id = ObjectId(current_user["_id"]) except (InvalidId, TypeError) as e: logger.error(f"User ID conversion error in get_url_detail: {e}") return jsonify({"message": "Invalid user ID format in token."}), 400 # Check dependencies if not mongo: return jsonify({"message": "Database connection not available."}), 500 if not URLSchema: return jsonify({"message": "Server configuration error: Schema unavailable."}), 500 try: # Validate URL ID format from URL path try: obj_url_id = ObjectId(url_id) except InvalidId: return jsonify({"message": "Invalid URL ID format."}), 400 db = mongo.db # Find the URL document url_doc = db.urls.find_one({"_id": obj_url_id}) if not url_doc: return jsonify({"message": "URL not found."}), 404 # --- Verify Project Access --- project_obj_id = url_doc.get("projectId") if not project_obj_id or not isinstance(project_obj_id, ObjectId): logger.error(f"URL {url_id} has missing or invalid projectId.") return jsonify({"message": "URL data integrity issue (missing project link)."}), 500 project = db.projects.find_one( {"_id": project_obj_id}, {"ownerId": 1, "collaborators": 1} # Projection for access check ) if not project: logger.error(f"Project {project_obj_id} associated with URL {url_id} not found.") return jsonify({"message": "Associated project not found; cannot verify access."}), 404 # Or 500 owner_id = project.get("ownerId") collaborators = project.get("collaborators", []) if not owner_id: logger.error(f"Project {project_obj_id} is missing ownerId in get_url_detail.") return jsonify({"message": "Project data integrity issue."}), 500 if owner_id != user_id and user_id not in collaborators: return jsonify({"message": "You do not have access to the project containing this URL."}), 403 # --- Serialize and Return URL Details --- output_schema = URLSchema() # Schema handles ObjectId, datetime conversion, and field selection serialized_result = output_schema.dump(url_doc) return jsonify(serialized_result), 200 except Exception as e: logger.error(f"Error retrieving URL detail for {url_id}: {e}", exc_info=True) return jsonify({"message": "An error occurred while retrieving the URL details."}), 500 # -------------------------- # Update URL # Path: PUT /api/urls/ # -------------------------- @bp.route('/urls/', methods=['PUT']) @token_required def update_url(current_user, url_id): """ Update specific fields of a URL entry (title, starred, note, keywords). Uses URLUpdateSchema for input validation. Verifies user access via the associated project. Triggers project keyword recalculation if keywords are changed. Returns simplified updated URL info using URLListSchema. """ logger = _get_logger() # Validate user object from token if not current_user or not current_user.get("_id"): return jsonify({"message": "Internal authorization error."}), 500 try: user_id = ObjectId(current_user["_id"]) except (InvalidId, TypeError) as e: logger.error(f"User ID conversion error in update_url: {e}") return jsonify({"message": "Invalid user ID format in token."}), 400 # Check dependencies if not mongo: return jsonify({"message": "Database connection not available."}), 500 if not URLUpdateSchema or not URLListSchema or not ValidationError: return jsonify({"message": "Server configuration error: Schema unavailable."}), 500 # Get and validate JSON data using the schema json_data = request.get_json() or {} update_schema = URLUpdateSchema() try: # Load validates only the allowed fields (title, starred, note, keywords) validated_data = update_schema.load(json_data) except ValidationError as err: logger.warning(f"Update URL validation failed: {err.messages}") return jsonify(err.messages), 422 # If validation passed but no valid fields were provided if not validated_data: return jsonify({"message": "No valid fields provided for update."}), 400 try: # Validate URL ID format try: obj_url_id = ObjectId(url_id) except InvalidId: return jsonify({"message": "Invalid URL ID format."}), 400 db = mongo.db # --- Find URL and Verify Access --- # Fetch projectId needed for access check url_doc = db.urls.find_one({"_id": obj_url_id}, {"projectId": 1}) if not url_doc: return jsonify({"message": "URL not found."}), 404 project_obj_id = url_doc.get("projectId") if not project_obj_id or not isinstance(project_obj_id, ObjectId): logger.error(f"URL {url_id} has missing or invalid projectId during update.") return jsonify({"message": "URL data integrity issue (missing project link)."}), 500 project = db.projects.find_one( {"_id": project_obj_id}, {"ownerId": 1, "collaborators": 1} # Projection for access check ) if not project: logger.error(f"Project {project_obj_id} associated with URL {url_id} not found during update.") return jsonify({"message": "Associated project not found; cannot verify access."}), 404 # Or 500 owner_id = project.get("ownerId") collaborators = project.get("collaborators", []) if not owner_id: logger.error(f"Project {project_obj_id} is missing ownerId during URL update.") return jsonify({"message": "Project data integrity issue."}), 500 if owner_id != user_id and user_id not in collaborators: return jsonify({"message": "You do not have access to update URLs in this project."}), 403 # --- Prepare Update Fields from validated data --- update_fields = validated_data # Use the validated dictionary directly keywords_changed = "keywords" in update_fields # Check if keywords were part of the update # Always update the 'updatedAt' timestamp update_fields["updatedAt"] = datetime.datetime.now(datetime.timezone.utc) # --- Perform Update --- result = db.urls.update_one({"_id": obj_url_id}, {"$set": update_fields}) # --- Return Response --- if result.matched_count == 1: # Retrieve the updated URL doc to return simplified info updated_url_doc = db.urls.find_one( {"_id": obj_url_id}, {"_id": 1, "title": 1, "url": 1} # Projection for list schema ) if updated_url_doc: # Serialize using the list schema for consistency output_schema = URLListSchema() serialized_url = output_schema.dump(updated_url_doc) # Trigger keyword recalc for the project in background if keywords changed if keywords_changed: try: async_recalc_project_keywords.delay(str(project_obj_id), str(user_id)) logger.info(f"Queued keyword recalc task for project {project_obj_id} after URL {url_id} update.") except NameError: logger.error("Celery task 'async_recalc_project_keywords' not available during URL update.") except Exception as celery_err: logger.error(f"Failed to queue Celery recalc task for project {project_obj_id} after URL update: {celery_err}", exc_info=True) return jsonify({"message": "URL updated successfully.", "url": serialized_url}), 200 else: logger.warning(f"URL {url_id} updated but could not be retrieved.") return jsonify({"message": "URL updated successfully, but failed to retrieve updated data."}), 200 else: # Matched count was 0 return jsonify({"message": "URL update failed (document not found)."}), 404 except Exception as e: logger.error(f"Error updating URL {url_id}: {e}", exc_info=True) return jsonify({"message": "An error occurred while updating the URL."}), 500 # -------------------------- # Delete URL # Path: DELETE /api/urls/ # -------------------------- @bp.route('/urls/', methods=['DELETE']) @token_required def delete_url(current_user, url_id): """ Delete a specific URL entry by its ID. Verifies user access via the associated project. Triggers project keyword recalculation after deletion. (No schema needed for input/output here) """ logger = _get_logger() # Validate user object from token if not current_user or not current_user.get("_id"): return jsonify({"message": "Internal authorization error."}), 500 try: user_id = ObjectId(current_user["_id"]) except (InvalidId, TypeError) as e: logger.error(f"User ID conversion error in delete_url: {e}") return jsonify({"message": "Invalid user ID format in token."}), 400 # Check DB connection if not mongo: return jsonify({"message": "Database connection not available."}), 500 try: # Validate URL ID format try: obj_url_id = ObjectId(url_id) except InvalidId: return jsonify({"message": "Invalid URL ID format."}), 400 db = mongo.db # --- Find URL and Verify Access --- # Fetch projectId needed for access check and recalc trigger url_doc = db.urls.find_one({"_id": obj_url_id}, {"projectId": 1}) if not url_doc: return jsonify({"message": "URL not found."}), 404 project_obj_id = url_doc.get("projectId") if not project_obj_id or not isinstance(project_obj_id, ObjectId): logger.error(f"URL {url_id} has missing or invalid projectId during delete.") return jsonify({"message": "URL data integrity issue (missing project link)."}), 500 project = db.projects.find_one( {"_id": project_obj_id}, {"ownerId": 1, "collaborators": 1} # Projection for access check ) # If associated project is missing, we cannot verify access, deny deletion. if not project: logger.error(f"Project {project_obj_id} associated with URL {url_id} not found during delete.") return jsonify({"message": "Cannot verify access; associated project missing."}), 403 # Deny access owner_id = project.get("ownerId") collaborators = project.get("collaborators", []) if not owner_id: logger.error(f"Project {project_obj_id} is missing ownerId during URL delete.") return jsonify({"message": "Project data integrity issue."}), 500 # Check if user has access rights (owner or collaborator) if owner_id != user_id and user_id not in collaborators: return jsonify({"message": "You do not have permission to delete URLs in this project."}), 403 # --- Perform Deletion --- delete_result = db.urls.delete_one({"_id": obj_url_id}) # --- Return Response --- if delete_result.deleted_count == 1: # Trigger keyword recalc for the project in background after successful URL deletion try: async_recalc_project_keywords.delay(str(project_obj_id), str(user_id)) logger.info(f"Queued keyword recalc task for project {project_obj_id} after URL {url_id} deletion.") except NameError: logger.error("Celery task 'async_recalc_project_keywords' not available during URL deletion.") except Exception as celery_err: logger.error(f"Failed to queue Celery recalc task for project {project_obj_id} after URL deletion: {celery_err}", exc_info=True) # Still return success for the deletion itself return jsonify({"message": "URL deleted successfully."}), 200 # 200 OK or 204 No Content else: # Document existed (find_one succeeded) but delete failed logger.error(f"URL {obj_url_id} found but delete_one failed (deleted_count=0).") return jsonify({"message": "Failed to delete URL (already deleted?)."}), 404 # Or 500 except Exception as e: logger.error(f"Error deleting URL {url_id}: {e}", exc_info=True) return jsonify({"message": "An error occurred while deleting the URL."}), 500 # -------------------------- # Celery Task Trigger Endpoints # Path: PUT /api/urls//extract_title_and_keywords # Path: PUT /api/urls//summarize # -------------------------- @bp.route('/urls//extract_title_and_keywords', methods=['PUT']) @token_required def trigger_extract_title_and_keywords(current_user, url_id): """ Manually triggers the background task for extracting title and keywords for a URL. Verifies user access via the associated project. Sets processingStatus to 'pending'. (No schema needed for input/output here) """ logger = _get_logger() # Validate user object from token if not current_user or not current_user.get("_id"): return jsonify({"message": "Internal authorization error."}), 500 try: user_id = ObjectId(current_user["_id"]) user_id_str = str(user_id) # Keep string version for Celery task except (InvalidId, TypeError) as e: logger.error(f"User ID conversion error in trigger_extract: {e}") return jsonify({"message": "Invalid user ID format in token."}), 400 # Check DB connection if not mongo: return jsonify({"message": "Database connection not available."}), 500 try: # Validate URL ID format try: obj_url_id = ObjectId(url_id) except InvalidId: return jsonify({"message": "Invalid URL ID format."}), 400 db = mongo.db # --- Find URL and Verify Access --- url_doc = db.urls.find_one({"_id": obj_url_id}, {"projectId": 1}) if not url_doc: return jsonify({"message": "URL not found."}), 404 project_obj_id = url_doc.get("projectId") if not project_obj_id or not isinstance(project_obj_id, ObjectId): logger.error(f"URL {url_id} has missing or invalid projectId during trigger_extract.") return jsonify({"message": "URL data integrity issue (missing project link)."}), 500 project = db.projects.find_one( {"_id": project_obj_id}, {"ownerId": 1, "collaborators": 1} # Projection for access check ) if not project: logger.error(f"Project {project_obj_id} associated with URL {url_id} not found during trigger_extract.") return jsonify({"message": "Associated project not found; cannot verify access."}), 404 # Or 500 owner_id = project.get("ownerId") collaborators = project.get("collaborators", []) if not owner_id: logger.error(f"Project {project_obj_id} is missing ownerId during trigger_extract.") return jsonify({"message": "Project data integrity issue."}), 500 if owner_id != user_id and user_id not in collaborators: return jsonify({"message": "You do not have access to trigger processing for this URL."}), 403 # --- Update Status and Queue Task --- # Set status to pending before queueing db.urls.update_one({"_id": obj_url_id}, {"$set": {"processingStatus": "pending", "updatedAt": datetime.datetime.now(datetime.timezone.utc)}}) try: # Queue the Celery task async_extract_title_and_keywords.delay(url_id, user_id_str) logger.info(f"Queued title/keyword extraction task for URL {url_id}") return jsonify({"message": "Title and keyword extraction task queued successfully."}), 202 # 202 Accepted except NameError: logger.error("Celery task 'async_extract_title_and_keywords' is not defined or imported correctly.") # Revert status? Or leave as pending with error? Let's leave as pending. return jsonify({"message": "Server configuration error: Extraction feature unavailable."}), 500 except Exception as e: logger.error(f"Error queueing extraction task for URL {url_id}: {e}", exc_info=True) # Revert status? Or leave as pending with error? Let's leave as pending. return jsonify({"message": "An error occurred while queueing the extraction task."}), 500 except Exception as e: logger.error(f"Error in trigger_extract_title_and_keywords endpoint for URL {url_id}: {e}", exc_info=True) return jsonify({"message": "An internal error occurred before queueing the task."}), 500 @bp.route('/urls//summarize', methods=['PUT']) @token_required def trigger_summarize_url(current_user, url_id): """ Manually triggers the background task for summarizing a URL. Verifies user access via the associated project. Determines whether to use Gemini based on user's selected API key. Sets processingStatus to 'pending'. (No schema needed for input/output here) """ logger = _get_logger() # Validate user object from token if not current_user or not current_user.get("_id"): return jsonify({"message": "Internal authorization error."}), 500 try: user_id = ObjectId(current_user["_id"]) user_id_str = str(user_id) # Keep string version for Celery task except (InvalidId, TypeError) as e: logger.error(f"User ID conversion error in trigger_summarize: {e}") return jsonify({"message": "Invalid user ID format in token."}), 400 # Check DB connection if not mongo: return jsonify({"message": "Database connection not available."}), 500 try: # Validate URL ID format try: obj_url_id = ObjectId(url_id) except InvalidId: return jsonify({"message": "Invalid URL ID format."}), 400 db = mongo.db # --- Find URL and Verify Access --- url_doc = db.urls.find_one({"_id": obj_url_id}, {"projectId": 1}) if not url_doc: return jsonify({"message": "URL not found."}), 404 project_obj_id = url_doc.get("projectId") if not project_obj_id or not isinstance(project_obj_id, ObjectId): logger.error(f"URL {url_id} has missing or invalid projectId during trigger_summarize.") return jsonify({"message": "URL data integrity issue (missing project link)."}), 500 project = db.projects.find_one( {"_id": project_obj_id}, {"ownerId": 1, "collaborators": 1} # Projection for access check ) if not project: logger.error(f"Project {project_obj_id} associated with URL {url_id} not found during trigger_summarize.") return jsonify({"message": "Associated project not found; cannot verify access."}), 404 # Or 500 owner_id = project.get("ownerId") collaborators = project.get("collaborators", []) if not owner_id: logger.error(f"Project {project_obj_id} is missing ownerId during trigger_summarize.") return jsonify({"message": "Project data integrity issue."}), 500 if owner_id != user_id and user_id not in collaborators: return jsonify({"message": "You do not have access to trigger processing for this URL."}), 403 # --- Update Status, Check API Key, and Queue Task --- # Set status to pending before queueing db.urls.update_one({"_id": obj_url_id}, {"$set": {"processingStatus": "pending", "updatedAt": datetime.datetime.now(datetime.timezone.utc)}}) # Check for user's selected Gemini API key api_doc = db.api_list.find_one({"uid": user_id, "selected": True, "name": "Gemini"}) use_gemini = bool(api_doc and api_doc.get("key")) # True if Gemini selected and key exists try: # Queue the Celery task, passing the use_gemini flag async_summarize_url.delay(url_id, user_id_str, use_gemini) logger.info(f"Queued summarization task for URL {url_id} (use_gemini={use_gemini})") return jsonify({"message": "Summarization task queued successfully."}), 202 # 202 Accepted except NameError: logger.error("Celery task 'async_summarize_url' is not defined or imported correctly.") # Revert status? Or leave as pending? Leave as pending. return jsonify({"message": "Server configuration error: Summarization feature unavailable."}), 500 except Exception as e: logger.error(f"Error queueing summarization task for URL {url_id}: {e}", exc_info=True) # Revert status? Or leave as pending? Leave as pending. return jsonify({"message": "An error occurred while queueing the summarization task."}), 500 except Exception as e: logger.error(f"Error in trigger_summarize_url endpoint for URL {url_id}: {e}", exc_info=True) return jsonify({"message": "An internal error occurred before queueing the task."}), 500 # -------------------------- # Search URLs within Project # Path: GET /api/projects//search?q=... # -------------------------- @bp.route('/projects//search', methods=['GET']) @token_required def search_urls(current_user, project_id): """ Search for URLs within a specific project based on a query string. Uses URLSearchResultSchema for output serialization. Searches 'title', 'note', 'keywords.word', and 'summary' fields using regex. Returns a simplified list (id, title, url) of matching URLs. Verifies user access to the project. """ logger = _get_logger() # Validate user object from token if not current_user or not current_user.get("_id"): return jsonify({"message": "Internal authorization error."}), 500 try: user_id = ObjectId(current_user["_id"]) except (InvalidId, TypeError) as e: logger.error(f"User ID conversion error in search_urls: {e}") return jsonify({"message": "Invalid user ID format in token."}), 400 # Check dependencies if not mongo: return jsonify({"message": "Database connection not available."}), 500 if not URLSearchResultSchema: return jsonify({"message": "Server configuration error: Schema unavailable."}), 500 try: # Get search query string from query parameters query_str = request.args.get("q", "").strip() # If query string is empty, return empty results immediately if not query_str: return jsonify({"results": []}), 200 # Validate project ID format from URL path try: obj_project_id = ObjectId(project_id) except InvalidId: return jsonify({"message": "Invalid project ID format in URL path."}), 400 db = mongo.db # --- Verify Project Access --- project = db.projects.find_one( {"_id": obj_project_id}, {"ownerId": 1, "collaborators": 1} # Projection for access check ) if not project: return jsonify({"message": "Project not found."}), 404 owner_id = project.get("ownerId") collaborators = project.get("collaborators", []) if owner_id != user_id and user_id not in collaborators: return jsonify({"message": "Access denied to search URLs in this project."}), 403 # --- Perform Search using Aggregation Pipeline --- # Escape regex special characters in the query string for safety escaped_query = re.escape(query_str) search_pipeline = [ {"$match": {"projectId": obj_project_id}}, {"$match": { "$or": [ {"title": {"$regex": escaped_query, "$options": "i"}}, {"note": {"$regex": escaped_query, "$options": "i"}}, {"keywords.word": {"$regex": escaped_query, "$options": "i"}}, {"summary": {"$regex": escaped_query, "$options": "i"}} ] }}, # Project only fields needed by the output schema {"$project": {"_id": 1, "title": 1, "url": 1, "updatedAt": 1}}, {"$sort": {"updatedAt": -1}} # Sort by update time # Add $limit stage if needed ] # Execute the aggregation pipeline results_cursor = db.urls.aggregate(search_pipeline) search_result_docs = list(results_cursor) # Convert cursor to list # --- Serialize results using the schema --- output_schema = URLSearchResultSchema(many=True) # Schema handles ObjectId conversion and field selection serialized_result = output_schema.dump(search_result_docs) # Return the search results return jsonify({"results": serialized_result}), 200 except Exception as e: logger.error(f"Error searching URLs in project {project_id} with query '{query_str}': {e}", exc_info=True) return jsonify({"message": "An error occurred during URL search."}), 500