Initial Commit
This commit is contained in:
14
backend_flask/myapp/urls/__init__.py
Normal file
14
backend_flask/myapp/urls/__init__.py
Normal file
@@ -0,0 +1,14 @@
|
||||
# myapp/urls/__init__.py
|
||||
|
||||
from flask import Blueprint
|
||||
|
||||
# Define the Blueprint instance for the URL management module.
|
||||
# 'urls' is the unique name for this blueprint.
|
||||
# url_prefix='/api' will be prepended to all routes defined in this blueprint.
|
||||
# Specific paths like '/projects/<id>/urls' or '/urls/<id>' will be defined in routes.py.
|
||||
bp = Blueprint('urls', __name__, url_prefix='/api')
|
||||
|
||||
# Import the routes module.
|
||||
# This connects the routes defined in routes.py to the 'bp' instance.
|
||||
# This import MUST come AFTER the Blueprint 'bp' is defined.
|
||||
from . import urls_routes
|
||||
817
backend_flask/myapp/urls/urls_routes.py
Normal file
817
backend_flask/myapp/urls/urls_routes.py
Normal file
@@ -0,0 +1,817 @@
|
||||
# myapp/urls/urls_routes.py
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
from flask import request, jsonify, current_app, has_app_context # Flask utilities
|
||||
from bson.objectid import ObjectId, InvalidId # For MongoDB ObjectIds
|
||||
from collections import defaultdict # Potentially useful for keyword aggregation etc.
|
||||
from functools import wraps # For creating dummy decorators
|
||||
import re # For escaping regex characters in search
|
||||
|
||||
# --- Local Blueprint Import ---
|
||||
from . import bp # Import the 'bp' instance defined in the local __init__.py
|
||||
|
||||
# --- Shared Extensions and Utilities Imports ---
|
||||
try:
|
||||
from ..extensions import mongo # Import the initialized PyMongo instance
|
||||
from ..utils import token_required # Import the authentication decorator
|
||||
except ImportError:
|
||||
# Fallback or error handling if imports fail
|
||||
print("Warning: Could not import mongo or token_required in urls/urls_routes.py.")
|
||||
mongo = None
|
||||
# Define a dummy decorator if token_required is missing
|
||||
def token_required(f):
|
||||
@wraps(f)
|
||||
def wrapper(*args, **kwargs):
|
||||
print("ERROR: token_required decorator is not available!")
|
||||
return jsonify({"message": "Server configuration error: Missing authentication utility."}), 500
|
||||
return wrapper
|
||||
|
||||
# --- Schema Imports ---
|
||||
try:
|
||||
# Import the relevant schemas defined in schemas.py
|
||||
from ..schemas import (
|
||||
URLCreateSchema, URLUpdateSchema, URLSchema,
|
||||
URLListSchema, URLSearchResultSchema
|
||||
)
|
||||
from marshmallow import ValidationError
|
||||
except ImportError:
|
||||
print("Warning: Could not import URL schemas or ValidationError in urls/urls_routes.py.")
|
||||
URLCreateSchema = None
|
||||
URLUpdateSchema = None
|
||||
URLSchema = None
|
||||
URLListSchema = None
|
||||
URLSearchResultSchema = None
|
||||
ValidationError = None
|
||||
|
||||
# --- Celery Task Imports ---
|
||||
# IMPORTANT: Assumes the project root directory ('your_fullstack_project/') is in PYTHONPATH
|
||||
try:
|
||||
from backend_flask.celery_worker.celery_app import async_extract_title_and_keywords, async_summarize_url, async_recalc_project_keywords
|
||||
except ModuleNotFoundError:
|
||||
print("Warning: Could not import Celery tasks from 'celery_worker'. Ensure project root is in PYTHONPATH.")
|
||||
# Define dummy task functions to prevent NameError if Celery isn't set up
|
||||
def _dummy_celery_task(*args, **kwargs):
|
||||
task_name = args[0] if args else 'dummy_task'
|
||||
print(f"ERROR: Celery task {task_name} not available!")
|
||||
class DummyTask:
|
||||
def __init__(self, name):
|
||||
self.__name__ = name
|
||||
def delay(self, *a, **kw):
|
||||
print(f"ERROR: Tried to call delay() on dummy task {self.__name__}")
|
||||
pass
|
||||
return DummyTask(task_name)
|
||||
|
||||
async_extract_title_and_keywords = _dummy_celery_task('async_extract_title_and_keywords')
|
||||
async_summarize_url = _dummy_celery_task('async_summarize_url')
|
||||
async_recalc_project_keywords = _dummy_celery_task('async_recalc_project_keywords')
|
||||
|
||||
|
||||
# --- Helper to get logger safely ---
|
||||
def _get_logger():
|
||||
if has_app_context():
|
||||
return current_app.logger
|
||||
return logging.getLogger(__name__)
|
||||
|
||||
# Note: Routes use paths relative to the '/api' prefix defined in __init__.py.
|
||||
|
||||
# --------------------------
|
||||
# Create URL Endpoint
|
||||
# Path: POST /api/projects/<project_id>/urls
|
||||
# --------------------------
|
||||
@bp.route('/projects/<string:project_id>/urls', methods=['POST'])
|
||||
@token_required
|
||||
def create_url(current_user, project_id):
|
||||
"""
|
||||
Create a new URL entry within a specific project.
|
||||
Uses URLCreateSchema for input validation.
|
||||
Expects 'url' and optional fields in JSON payload.
|
||||
Verifies project access for the authenticated user.
|
||||
Triggers background Celery tasks for title/keyword extraction and summarization.
|
||||
"""
|
||||
logger = _get_logger()
|
||||
# Validate user object from token
|
||||
if not current_user or not current_user.get("_id"):
|
||||
logger.error("Invalid current_user object received in create_url")
|
||||
return jsonify({"message": "Internal authorization error."}), 500
|
||||
try:
|
||||
user_id = ObjectId(current_user["_id"])
|
||||
user_id_str = str(user_id) # Keep string version for Celery tasks
|
||||
except (InvalidId, TypeError) as e:
|
||||
logger.error(f"User ID conversion error in create_url: {e}")
|
||||
return jsonify({"message": "Invalid user ID format in token."}), 400
|
||||
|
||||
# Check dependencies
|
||||
if not mongo: return jsonify({"message": "Database connection not available."}), 500
|
||||
if not URLCreateSchema or not ValidationError:
|
||||
return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
|
||||
|
||||
# Get and validate JSON data using the schema
|
||||
json_data = request.get_json() or {}
|
||||
logger.debug(f"create_url called: project_id={project_id}, data={json_data}, user_id={user_id_str}")
|
||||
schema = URLCreateSchema()
|
||||
try:
|
||||
# Validate only the required 'url' field initially
|
||||
validated_input = schema.load(json_data)
|
||||
except ValidationError as err:
|
||||
logger.warning(f"Create URL validation failed: {err.messages}")
|
||||
return jsonify(err.messages), 422
|
||||
|
||||
user_url = validated_input['url'] # URL is guaranteed by schema
|
||||
|
||||
try:
|
||||
# Validate project ID format from URL path
|
||||
try:
|
||||
project_obj_id = ObjectId(project_id)
|
||||
except InvalidId:
|
||||
return jsonify({"message": "Invalid project ID format in URL path."}), 400
|
||||
|
||||
# --- Verify Project Access ---
|
||||
db = mongo.db
|
||||
project = db.projects.find_one({"_id": project_obj_id}, {"ownerId": 1, "collaborators": 1})
|
||||
if not project:
|
||||
return jsonify({"message": "Project not found."}), 404
|
||||
|
||||
owner_id = project.get("ownerId")
|
||||
collaborators = project.get("collaborators", [])
|
||||
if not owner_id:
|
||||
logger.error(f"Project {project_obj_id} is missing ownerId field.")
|
||||
return jsonify({"message": "Project data integrity issue."}), 500
|
||||
if owner_id != user_id and user_id not in collaborators:
|
||||
return jsonify({"message": "You do not have access to add URLs to this project."}), 403
|
||||
|
||||
# --- Prepare URL Document (using original data for optional fields) ---
|
||||
# Optional fields are taken directly from original data, not schema output here
|
||||
keywords_data = data.get("keywords", []) # Process keywords manually as before
|
||||
keywords_converted = []
|
||||
if isinstance(keywords_data, list):
|
||||
for kw in keywords_data:
|
||||
if isinstance(kw, dict):
|
||||
word = kw.get("word", "").strip()
|
||||
if word:
|
||||
try:
|
||||
percentage = float(kw.get("percentage", 0.0))
|
||||
keywords_converted.append({"word": word, "percentage": percentage})
|
||||
except (ValueError, TypeError):
|
||||
logger.warning(f"Could not convert keyword percentage for word '{word}' during URL creation.")
|
||||
else:
|
||||
logger.warning("Non-dict item found in keywords during URL creation.")
|
||||
|
||||
now = datetime.datetime.now(datetime.timezone.utc)
|
||||
url_doc = {
|
||||
"projectId": project_obj_id,
|
||||
"url": user_url, # Use validated URL
|
||||
"title": data.get("title", "").strip(),
|
||||
"favicon": data.get("favicon", ""),
|
||||
"starred": bool(data.get("starred", False)),
|
||||
"note": data.get("note", "").strip(),
|
||||
"keywords": keywords_converted,
|
||||
"summary": data.get("summary", "").strip(),
|
||||
"processingStatus": "pending",
|
||||
"createdAt": now,
|
||||
"updatedAt": now
|
||||
}
|
||||
|
||||
# Insert the new URL document
|
||||
result = db.urls.insert_one(url_doc)
|
||||
new_url_id_str = str(result.inserted_id)
|
||||
logger.info(f"Successfully inserted URL {new_url_id_str} for project {project_id}")
|
||||
|
||||
# --- Trigger Background Tasks ---
|
||||
tasks_queued = True
|
||||
try:
|
||||
async_extract_title_and_keywords.delay(new_url_id_str, user_id_str)
|
||||
api_doc = db.api_list.find_one({"uid": user_id, "selected": True, "name": "Gemini"})
|
||||
use_gemini = bool(api_doc and api_doc.get("key"))
|
||||
async_summarize_url.delay(new_url_id_str, user_id_str, use_gemini)
|
||||
logger.info(f"Queued Celery tasks for URL {new_url_id_str} (use_gemini={use_gemini})")
|
||||
except NameError as ne:
|
||||
logger.error(f"Celery tasks not available for URL {new_url_id_str}: {ne}. Processing cannot be initiated.")
|
||||
tasks_queued = False
|
||||
except Exception as celery_err:
|
||||
logger.error(f"Failed to queue Celery tasks for URL {new_url_id_str}: {celery_err}", exc_info=True)
|
||||
tasks_queued = False
|
||||
|
||||
response_message = "URL created successfully and processing initiated." if tasks_queued else "URL created, but failed to initiate background processing."
|
||||
return jsonify({"message": response_message, "url_id": new_url_id_str}), 201
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating URL for project {project_id}: {e}", exc_info=True)
|
||||
return jsonify({"message": "An internal error occurred while creating the URL."}), 500
|
||||
|
||||
|
||||
# --------------------------
|
||||
# List URLs for Project (Simplified)
|
||||
# Path: GET /api/projects/<project_id>/urls
|
||||
# --------------------------
|
||||
@bp.route('/projects/<string:project_id>/urls', methods=['GET'])
|
||||
@token_required
|
||||
def list_urls_for_project(current_user, project_id):
|
||||
"""
|
||||
Retrieve a simplified list (id, title, url) of all URLs within a specific project.
|
||||
Uses URLListSchema for output serialization.
|
||||
Verifies user access to the project.
|
||||
"""
|
||||
logger = _get_logger()
|
||||
# Validate user object from token
|
||||
if not current_user or not current_user.get("_id"):
|
||||
return jsonify({"message": "Internal authorization error."}), 500
|
||||
try:
|
||||
user_id = ObjectId(current_user["_id"])
|
||||
except (InvalidId, TypeError) as e:
|
||||
logger.error(f"User ID conversion error in list_urls_for_project: {e}")
|
||||
return jsonify({"message": "Invalid user ID format in token."}), 400
|
||||
|
||||
# Check dependencies
|
||||
if not mongo: return jsonify({"message": "Database connection not available."}), 500
|
||||
if not URLListSchema: return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
|
||||
|
||||
try:
|
||||
# Validate project ID format from URL path
|
||||
try:
|
||||
obj_project_id = ObjectId(project_id)
|
||||
except InvalidId:
|
||||
return jsonify({"message": "Invalid project ID format in URL path."}), 400
|
||||
|
||||
db = mongo.db
|
||||
# --- Verify Project Access ---
|
||||
project = db.projects.find_one(
|
||||
{"_id": obj_project_id},
|
||||
{"ownerId": 1, "collaborators": 1} # Projection for access check
|
||||
)
|
||||
if not project:
|
||||
return jsonify({"message": "Project not found."}), 404
|
||||
|
||||
owner_id = project.get("ownerId")
|
||||
collaborators = project.get("collaborators", [])
|
||||
if owner_id != user_id and user_id not in collaborators:
|
||||
return jsonify({"message": "You do not have access to this project's URLs."}), 403
|
||||
|
||||
# --- Fetch and Serialize URLs ---
|
||||
# Find URLs for the project, projecting only fields needed by schema
|
||||
cursor = db.urls.find(
|
||||
{"projectId": obj_project_id},
|
||||
{"_id": 1, "title": 1, "url": 1} # Projection matching URLListSchema
|
||||
).sort("updatedAt", -1) # Sort by most recently updated
|
||||
|
||||
url_docs = list(cursor) # Convert cursor to list
|
||||
|
||||
# Serialize using the schema
|
||||
output_schema = URLListSchema(many=True)
|
||||
serialized_result = output_schema.dump(url_docs)
|
||||
|
||||
# Return the serialized list of URLs
|
||||
return jsonify({"urls": serialized_result}), 200
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error listing URLs for project {project_id}: {e}", exc_info=True)
|
||||
return jsonify({"message": "An error occurred while listing URLs."}), 500
|
||||
|
||||
|
||||
# --------------------------
|
||||
# Get URL Detail
|
||||
# Path: GET /api/urls/<url_id>
|
||||
# --------------------------
|
||||
@bp.route('/urls/<string:url_id>', methods=['GET'])
|
||||
@token_required
|
||||
def get_url_detail(current_user, url_id):
|
||||
"""
|
||||
Retrieve the full details for a specific URL entry by its ID.
|
||||
Uses URLSchema for output serialization.
|
||||
Verifies user access via the associated project.
|
||||
"""
|
||||
logger = _get_logger()
|
||||
# Validate user object from token
|
||||
if not current_user or not current_user.get("_id"):
|
||||
return jsonify({"message": "Internal authorization error."}), 500
|
||||
try:
|
||||
user_id = ObjectId(current_user["_id"])
|
||||
except (InvalidId, TypeError) as e:
|
||||
logger.error(f"User ID conversion error in get_url_detail: {e}")
|
||||
return jsonify({"message": "Invalid user ID format in token."}), 400
|
||||
|
||||
# Check dependencies
|
||||
if not mongo: return jsonify({"message": "Database connection not available."}), 500
|
||||
if not URLSchema: return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
|
||||
|
||||
try:
|
||||
# Validate URL ID format from URL path
|
||||
try:
|
||||
obj_url_id = ObjectId(url_id)
|
||||
except InvalidId:
|
||||
return jsonify({"message": "Invalid URL ID format."}), 400
|
||||
|
||||
db = mongo.db
|
||||
# Find the URL document
|
||||
url_doc = db.urls.find_one({"_id": obj_url_id})
|
||||
if not url_doc:
|
||||
return jsonify({"message": "URL not found."}), 404
|
||||
|
||||
# --- Verify Project Access ---
|
||||
project_obj_id = url_doc.get("projectId")
|
||||
if not project_obj_id or not isinstance(project_obj_id, ObjectId):
|
||||
logger.error(f"URL {url_id} has missing or invalid projectId.")
|
||||
return jsonify({"message": "URL data integrity issue (missing project link)."}), 500
|
||||
|
||||
project = db.projects.find_one(
|
||||
{"_id": project_obj_id},
|
||||
{"ownerId": 1, "collaborators": 1} # Projection for access check
|
||||
)
|
||||
if not project:
|
||||
logger.error(f"Project {project_obj_id} associated with URL {url_id} not found.")
|
||||
return jsonify({"message": "Associated project not found; cannot verify access."}), 404 # Or 500
|
||||
|
||||
owner_id = project.get("ownerId")
|
||||
collaborators = project.get("collaborators", [])
|
||||
if not owner_id:
|
||||
logger.error(f"Project {project_obj_id} is missing ownerId in get_url_detail.")
|
||||
return jsonify({"message": "Project data integrity issue."}), 500
|
||||
if owner_id != user_id and user_id not in collaborators:
|
||||
return jsonify({"message": "You do not have access to the project containing this URL."}), 403
|
||||
|
||||
# --- Serialize and Return URL Details ---
|
||||
output_schema = URLSchema()
|
||||
# Schema handles ObjectId, datetime conversion, and field selection
|
||||
serialized_result = output_schema.dump(url_doc)
|
||||
|
||||
return jsonify(serialized_result), 200
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrieving URL detail for {url_id}: {e}", exc_info=True)
|
||||
return jsonify({"message": "An error occurred while retrieving the URL details."}), 500
|
||||
|
||||
|
||||
# --------------------------
|
||||
# Update URL
|
||||
# Path: PUT /api/urls/<url_id>
|
||||
# --------------------------
|
||||
@bp.route('/urls/<string:url_id>', methods=['PUT'])
|
||||
@token_required
|
||||
def update_url(current_user, url_id):
|
||||
"""
|
||||
Update specific fields of a URL entry (title, starred, note, keywords).
|
||||
Uses URLUpdateSchema for input validation.
|
||||
Verifies user access via the associated project.
|
||||
Triggers project keyword recalculation if keywords are changed.
|
||||
Returns simplified updated URL info using URLListSchema.
|
||||
"""
|
||||
logger = _get_logger()
|
||||
# Validate user object from token
|
||||
if not current_user or not current_user.get("_id"):
|
||||
return jsonify({"message": "Internal authorization error."}), 500
|
||||
try:
|
||||
user_id = ObjectId(current_user["_id"])
|
||||
except (InvalidId, TypeError) as e:
|
||||
logger.error(f"User ID conversion error in update_url: {e}")
|
||||
return jsonify({"message": "Invalid user ID format in token."}), 400
|
||||
|
||||
# Check dependencies
|
||||
if not mongo: return jsonify({"message": "Database connection not available."}), 500
|
||||
if not URLUpdateSchema or not URLListSchema or not ValidationError:
|
||||
return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
|
||||
|
||||
# Get and validate JSON data using the schema
|
||||
json_data = request.get_json() or {}
|
||||
update_schema = URLUpdateSchema()
|
||||
try:
|
||||
# Load validates only the allowed fields (title, starred, note, keywords)
|
||||
validated_data = update_schema.load(json_data)
|
||||
except ValidationError as err:
|
||||
logger.warning(f"Update URL validation failed: {err.messages}")
|
||||
return jsonify(err.messages), 422
|
||||
|
||||
# If validation passed but no valid fields were provided
|
||||
if not validated_data:
|
||||
return jsonify({"message": "No valid fields provided for update."}), 400
|
||||
|
||||
try:
|
||||
# Validate URL ID format
|
||||
try:
|
||||
obj_url_id = ObjectId(url_id)
|
||||
except InvalidId:
|
||||
return jsonify({"message": "Invalid URL ID format."}), 400
|
||||
|
||||
db = mongo.db
|
||||
# --- Find URL and Verify Access ---
|
||||
# Fetch projectId needed for access check
|
||||
url_doc = db.urls.find_one({"_id": obj_url_id}, {"projectId": 1})
|
||||
if not url_doc:
|
||||
return jsonify({"message": "URL not found."}), 404
|
||||
|
||||
project_obj_id = url_doc.get("projectId")
|
||||
if not project_obj_id or not isinstance(project_obj_id, ObjectId):
|
||||
logger.error(f"URL {url_id} has missing or invalid projectId during update.")
|
||||
return jsonify({"message": "URL data integrity issue (missing project link)."}), 500
|
||||
|
||||
project = db.projects.find_one(
|
||||
{"_id": project_obj_id},
|
||||
{"ownerId": 1, "collaborators": 1} # Projection for access check
|
||||
)
|
||||
if not project:
|
||||
logger.error(f"Project {project_obj_id} associated with URL {url_id} not found during update.")
|
||||
return jsonify({"message": "Associated project not found; cannot verify access."}), 404 # Or 500
|
||||
|
||||
owner_id = project.get("ownerId")
|
||||
collaborators = project.get("collaborators", [])
|
||||
if not owner_id:
|
||||
logger.error(f"Project {project_obj_id} is missing ownerId during URL update.")
|
||||
return jsonify({"message": "Project data integrity issue."}), 500
|
||||
if owner_id != user_id and user_id not in collaborators:
|
||||
return jsonify({"message": "You do not have access to update URLs in this project."}), 403
|
||||
|
||||
# --- Prepare Update Fields from validated data ---
|
||||
update_fields = validated_data # Use the validated dictionary directly
|
||||
keywords_changed = "keywords" in update_fields # Check if keywords were part of the update
|
||||
|
||||
# Always update the 'updatedAt' timestamp
|
||||
update_fields["updatedAt"] = datetime.datetime.now(datetime.timezone.utc)
|
||||
|
||||
# --- Perform Update ---
|
||||
result = db.urls.update_one({"_id": obj_url_id}, {"$set": update_fields})
|
||||
|
||||
# --- Return Response ---
|
||||
if result.matched_count == 1:
|
||||
# Retrieve the updated URL doc to return simplified info
|
||||
updated_url_doc = db.urls.find_one(
|
||||
{"_id": obj_url_id},
|
||||
{"_id": 1, "title": 1, "url": 1} # Projection for list schema
|
||||
)
|
||||
if updated_url_doc:
|
||||
# Serialize using the list schema for consistency
|
||||
output_schema = URLListSchema()
|
||||
serialized_url = output_schema.dump(updated_url_doc)
|
||||
|
||||
# Trigger keyword recalc for the project in background if keywords changed
|
||||
if keywords_changed:
|
||||
try:
|
||||
async_recalc_project_keywords.delay(str(project_obj_id), str(user_id))
|
||||
logger.info(f"Queued keyword recalc task for project {project_obj_id} after URL {url_id} update.")
|
||||
except NameError:
|
||||
logger.error("Celery task 'async_recalc_project_keywords' not available during URL update.")
|
||||
except Exception as celery_err:
|
||||
logger.error(f"Failed to queue Celery recalc task for project {project_obj_id} after URL update: {celery_err}", exc_info=True)
|
||||
|
||||
return jsonify({"message": "URL updated successfully.", "url": serialized_url}), 200
|
||||
else:
|
||||
logger.warning(f"URL {url_id} updated but could not be retrieved.")
|
||||
return jsonify({"message": "URL updated successfully, but failed to retrieve updated data."}), 200
|
||||
else:
|
||||
# Matched count was 0
|
||||
return jsonify({"message": "URL update failed (document not found)."}), 404
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating URL {url_id}: {e}", exc_info=True)
|
||||
return jsonify({"message": "An error occurred while updating the URL."}), 500
|
||||
|
||||
|
||||
# --------------------------
|
||||
# Delete URL
|
||||
# Path: DELETE /api/urls/<url_id>
|
||||
# --------------------------
|
||||
@bp.route('/urls/<string:url_id>', methods=['DELETE'])
|
||||
@token_required
|
||||
def delete_url(current_user, url_id):
|
||||
"""
|
||||
Delete a specific URL entry by its ID.
|
||||
Verifies user access via the associated project.
|
||||
Triggers project keyword recalculation after deletion.
|
||||
(No schema needed for input/output here)
|
||||
"""
|
||||
logger = _get_logger()
|
||||
# Validate user object from token
|
||||
if not current_user or not current_user.get("_id"):
|
||||
return jsonify({"message": "Internal authorization error."}), 500
|
||||
try:
|
||||
user_id = ObjectId(current_user["_id"])
|
||||
except (InvalidId, TypeError) as e:
|
||||
logger.error(f"User ID conversion error in delete_url: {e}")
|
||||
return jsonify({"message": "Invalid user ID format in token."}), 400
|
||||
|
||||
# Check DB connection
|
||||
if not mongo: return jsonify({"message": "Database connection not available."}), 500
|
||||
|
||||
try:
|
||||
# Validate URL ID format
|
||||
try:
|
||||
obj_url_id = ObjectId(url_id)
|
||||
except InvalidId:
|
||||
return jsonify({"message": "Invalid URL ID format."}), 400
|
||||
|
||||
db = mongo.db
|
||||
# --- Find URL and Verify Access ---
|
||||
# Fetch projectId needed for access check and recalc trigger
|
||||
url_doc = db.urls.find_one({"_id": obj_url_id}, {"projectId": 1})
|
||||
if not url_doc:
|
||||
return jsonify({"message": "URL not found."}), 404
|
||||
|
||||
project_obj_id = url_doc.get("projectId")
|
||||
if not project_obj_id or not isinstance(project_obj_id, ObjectId):
|
||||
logger.error(f"URL {url_id} has missing or invalid projectId during delete.")
|
||||
return jsonify({"message": "URL data integrity issue (missing project link)."}), 500
|
||||
|
||||
project = db.projects.find_one(
|
||||
{"_id": project_obj_id},
|
||||
{"ownerId": 1, "collaborators": 1} # Projection for access check
|
||||
)
|
||||
# If associated project is missing, we cannot verify access, deny deletion.
|
||||
if not project:
|
||||
logger.error(f"Project {project_obj_id} associated with URL {url_id} not found during delete.")
|
||||
return jsonify({"message": "Cannot verify access; associated project missing."}), 403 # Deny access
|
||||
|
||||
owner_id = project.get("ownerId")
|
||||
collaborators = project.get("collaborators", [])
|
||||
if not owner_id:
|
||||
logger.error(f"Project {project_obj_id} is missing ownerId during URL delete.")
|
||||
return jsonify({"message": "Project data integrity issue."}), 500
|
||||
# Check if user has access rights (owner or collaborator)
|
||||
if owner_id != user_id and user_id not in collaborators:
|
||||
return jsonify({"message": "You do not have permission to delete URLs in this project."}), 403
|
||||
|
||||
# --- Perform Deletion ---
|
||||
delete_result = db.urls.delete_one({"_id": obj_url_id})
|
||||
|
||||
# --- Return Response ---
|
||||
if delete_result.deleted_count == 1:
|
||||
# Trigger keyword recalc for the project in background after successful URL deletion
|
||||
try:
|
||||
async_recalc_project_keywords.delay(str(project_obj_id), str(user_id))
|
||||
logger.info(f"Queued keyword recalc task for project {project_obj_id} after URL {url_id} deletion.")
|
||||
except NameError:
|
||||
logger.error("Celery task 'async_recalc_project_keywords' not available during URL deletion.")
|
||||
except Exception as celery_err:
|
||||
logger.error(f"Failed to queue Celery recalc task for project {project_obj_id} after URL deletion: {celery_err}", exc_info=True)
|
||||
# Still return success for the deletion itself
|
||||
|
||||
return jsonify({"message": "URL deleted successfully."}), 200 # 200 OK or 204 No Content
|
||||
else:
|
||||
# Document existed (find_one succeeded) but delete failed
|
||||
logger.error(f"URL {obj_url_id} found but delete_one failed (deleted_count=0).")
|
||||
return jsonify({"message": "Failed to delete URL (already deleted?)."}), 404 # Or 500
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting URL {url_id}: {e}", exc_info=True)
|
||||
return jsonify({"message": "An error occurred while deleting the URL."}), 500
|
||||
|
||||
|
||||
# --------------------------
|
||||
# Celery Task Trigger Endpoints
|
||||
# Path: PUT /api/urls/<url_id>/extract_title_and_keywords
|
||||
# Path: PUT /api/urls/<url_id>/summarize
|
||||
# --------------------------
|
||||
@bp.route('/urls/<string:url_id>/extract_title_and_keywords', methods=['PUT'])
|
||||
@token_required
|
||||
def trigger_extract_title_and_keywords(current_user, url_id):
|
||||
"""
|
||||
Manually triggers the background task for extracting title and keywords for a URL.
|
||||
Verifies user access via the associated project.
|
||||
Sets processingStatus to 'pending'.
|
||||
(No schema needed for input/output here)
|
||||
"""
|
||||
logger = _get_logger()
|
||||
# Validate user object from token
|
||||
if not current_user or not current_user.get("_id"):
|
||||
return jsonify({"message": "Internal authorization error."}), 500
|
||||
try:
|
||||
user_id = ObjectId(current_user["_id"])
|
||||
user_id_str = str(user_id) # Keep string version for Celery task
|
||||
except (InvalidId, TypeError) as e:
|
||||
logger.error(f"User ID conversion error in trigger_extract: {e}")
|
||||
return jsonify({"message": "Invalid user ID format in token."}), 400
|
||||
|
||||
# Check DB connection
|
||||
if not mongo: return jsonify({"message": "Database connection not available."}), 500
|
||||
|
||||
try:
|
||||
# Validate URL ID format
|
||||
try:
|
||||
obj_url_id = ObjectId(url_id)
|
||||
except InvalidId:
|
||||
return jsonify({"message": "Invalid URL ID format."}), 400
|
||||
|
||||
db = mongo.db
|
||||
# --- Find URL and Verify Access ---
|
||||
url_doc = db.urls.find_one({"_id": obj_url_id}, {"projectId": 1})
|
||||
if not url_doc:
|
||||
return jsonify({"message": "URL not found."}), 404
|
||||
|
||||
project_obj_id = url_doc.get("projectId")
|
||||
if not project_obj_id or not isinstance(project_obj_id, ObjectId):
|
||||
logger.error(f"URL {url_id} has missing or invalid projectId during trigger_extract.")
|
||||
return jsonify({"message": "URL data integrity issue (missing project link)."}), 500
|
||||
|
||||
project = db.projects.find_one(
|
||||
{"_id": project_obj_id},
|
||||
{"ownerId": 1, "collaborators": 1} # Projection for access check
|
||||
)
|
||||
if not project:
|
||||
logger.error(f"Project {project_obj_id} associated with URL {url_id} not found during trigger_extract.")
|
||||
return jsonify({"message": "Associated project not found; cannot verify access."}), 404 # Or 500
|
||||
|
||||
owner_id = project.get("ownerId")
|
||||
collaborators = project.get("collaborators", [])
|
||||
if not owner_id:
|
||||
logger.error(f"Project {project_obj_id} is missing ownerId during trigger_extract.")
|
||||
return jsonify({"message": "Project data integrity issue."}), 500
|
||||
if owner_id != user_id and user_id not in collaborators:
|
||||
return jsonify({"message": "You do not have access to trigger processing for this URL."}), 403
|
||||
|
||||
# --- Update Status and Queue Task ---
|
||||
# Set status to pending before queueing
|
||||
db.urls.update_one({"_id": obj_url_id},
|
||||
{"$set": {"processingStatus": "pending", "updatedAt": datetime.datetime.now(datetime.timezone.utc)}})
|
||||
|
||||
try:
|
||||
# Queue the Celery task
|
||||
async_extract_title_and_keywords.delay(url_id, user_id_str)
|
||||
logger.info(f"Queued title/keyword extraction task for URL {url_id}")
|
||||
return jsonify({"message": "Title and keyword extraction task queued successfully."}), 202 # 202 Accepted
|
||||
except NameError:
|
||||
logger.error("Celery task 'async_extract_title_and_keywords' is not defined or imported correctly.")
|
||||
# Revert status? Or leave as pending with error? Let's leave as pending.
|
||||
return jsonify({"message": "Server configuration error: Extraction feature unavailable."}), 500
|
||||
except Exception as e:
|
||||
logger.error(f"Error queueing extraction task for URL {url_id}: {e}", exc_info=True)
|
||||
# Revert status? Or leave as pending with error? Let's leave as pending.
|
||||
return jsonify({"message": "An error occurred while queueing the extraction task."}), 500
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in trigger_extract_title_and_keywords endpoint for URL {url_id}: {e}", exc_info=True)
|
||||
return jsonify({"message": "An internal error occurred before queueing the task."}), 500
|
||||
|
||||
|
||||
@bp.route('/urls/<string:url_id>/summarize', methods=['PUT'])
|
||||
@token_required
|
||||
def trigger_summarize_url(current_user, url_id):
|
||||
"""
|
||||
Manually triggers the background task for summarizing a URL.
|
||||
Verifies user access via the associated project.
|
||||
Determines whether to use Gemini based on user's selected API key.
|
||||
Sets processingStatus to 'pending'.
|
||||
(No schema needed for input/output here)
|
||||
"""
|
||||
logger = _get_logger()
|
||||
# Validate user object from token
|
||||
if not current_user or not current_user.get("_id"):
|
||||
return jsonify({"message": "Internal authorization error."}), 500
|
||||
try:
|
||||
user_id = ObjectId(current_user["_id"])
|
||||
user_id_str = str(user_id) # Keep string version for Celery task
|
||||
except (InvalidId, TypeError) as e:
|
||||
logger.error(f"User ID conversion error in trigger_summarize: {e}")
|
||||
return jsonify({"message": "Invalid user ID format in token."}), 400
|
||||
|
||||
# Check DB connection
|
||||
if not mongo: return jsonify({"message": "Database connection not available."}), 500
|
||||
|
||||
try:
|
||||
# Validate URL ID format
|
||||
try:
|
||||
obj_url_id = ObjectId(url_id)
|
||||
except InvalidId:
|
||||
return jsonify({"message": "Invalid URL ID format."}), 400
|
||||
|
||||
db = mongo.db
|
||||
# --- Find URL and Verify Access ---
|
||||
url_doc = db.urls.find_one({"_id": obj_url_id}, {"projectId": 1})
|
||||
if not url_doc:
|
||||
return jsonify({"message": "URL not found."}), 404
|
||||
|
||||
project_obj_id = url_doc.get("projectId")
|
||||
if not project_obj_id or not isinstance(project_obj_id, ObjectId):
|
||||
logger.error(f"URL {url_id} has missing or invalid projectId during trigger_summarize.")
|
||||
return jsonify({"message": "URL data integrity issue (missing project link)."}), 500
|
||||
|
||||
project = db.projects.find_one(
|
||||
{"_id": project_obj_id},
|
||||
{"ownerId": 1, "collaborators": 1} # Projection for access check
|
||||
)
|
||||
if not project:
|
||||
logger.error(f"Project {project_obj_id} associated with URL {url_id} not found during trigger_summarize.")
|
||||
return jsonify({"message": "Associated project not found; cannot verify access."}), 404 # Or 500
|
||||
|
||||
owner_id = project.get("ownerId")
|
||||
collaborators = project.get("collaborators", [])
|
||||
if not owner_id:
|
||||
logger.error(f"Project {project_obj_id} is missing ownerId during trigger_summarize.")
|
||||
return jsonify({"message": "Project data integrity issue."}), 500
|
||||
if owner_id != user_id and user_id not in collaborators:
|
||||
return jsonify({"message": "You do not have access to trigger processing for this URL."}), 403
|
||||
|
||||
# --- Update Status, Check API Key, and Queue Task ---
|
||||
# Set status to pending before queueing
|
||||
db.urls.update_one({"_id": obj_url_id},
|
||||
{"$set": {"processingStatus": "pending", "updatedAt": datetime.datetime.now(datetime.timezone.utc)}})
|
||||
|
||||
# Check for user's selected Gemini API key
|
||||
api_doc = db.api_list.find_one({"uid": user_id, "selected": True, "name": "Gemini"})
|
||||
use_gemini = bool(api_doc and api_doc.get("key")) # True if Gemini selected and key exists
|
||||
|
||||
try:
|
||||
# Queue the Celery task, passing the use_gemini flag
|
||||
async_summarize_url.delay(url_id, user_id_str, use_gemini)
|
||||
logger.info(f"Queued summarization task for URL {url_id} (use_gemini={use_gemini})")
|
||||
return jsonify({"message": "Summarization task queued successfully."}), 202 # 202 Accepted
|
||||
except NameError:
|
||||
logger.error("Celery task 'async_summarize_url' is not defined or imported correctly.")
|
||||
# Revert status? Or leave as pending? Leave as pending.
|
||||
return jsonify({"message": "Server configuration error: Summarization feature unavailable."}), 500
|
||||
except Exception as e:
|
||||
logger.error(f"Error queueing summarization task for URL {url_id}: {e}", exc_info=True)
|
||||
# Revert status? Or leave as pending? Leave as pending.
|
||||
return jsonify({"message": "An error occurred while queueing the summarization task."}), 500
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in trigger_summarize_url endpoint for URL {url_id}: {e}", exc_info=True)
|
||||
return jsonify({"message": "An internal error occurred before queueing the task."}), 500
|
||||
|
||||
|
||||
# --------------------------
|
||||
# Search URLs within Project
|
||||
# Path: GET /api/projects/<project_id>/search?q=...
|
||||
# --------------------------
|
||||
@bp.route('/projects/<string:project_id>/search', methods=['GET'])
|
||||
@token_required
|
||||
def search_urls(current_user, project_id):
|
||||
"""
|
||||
Search for URLs within a specific project based on a query string.
|
||||
Uses URLSearchResultSchema for output serialization.
|
||||
Searches 'title', 'note', 'keywords.word', and 'summary' fields using regex.
|
||||
Returns a simplified list (id, title, url) of matching URLs.
|
||||
Verifies user access to the project.
|
||||
"""
|
||||
logger = _get_logger()
|
||||
# Validate user object from token
|
||||
if not current_user or not current_user.get("_id"):
|
||||
return jsonify({"message": "Internal authorization error."}), 500
|
||||
try:
|
||||
user_id = ObjectId(current_user["_id"])
|
||||
except (InvalidId, TypeError) as e:
|
||||
logger.error(f"User ID conversion error in search_urls: {e}")
|
||||
return jsonify({"message": "Invalid user ID format in token."}), 400
|
||||
|
||||
# Check dependencies
|
||||
if not mongo: return jsonify({"message": "Database connection not available."}), 500
|
||||
if not URLSearchResultSchema: return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
|
||||
|
||||
try:
|
||||
# Get search query string from query parameters
|
||||
query_str = request.args.get("q", "").strip()
|
||||
# If query string is empty, return empty results immediately
|
||||
if not query_str:
|
||||
return jsonify({"results": []}), 200
|
||||
|
||||
# Validate project ID format from URL path
|
||||
try:
|
||||
obj_project_id = ObjectId(project_id)
|
||||
except InvalidId:
|
||||
return jsonify({"message": "Invalid project ID format in URL path."}), 400
|
||||
|
||||
db = mongo.db
|
||||
# --- Verify Project Access ---
|
||||
project = db.projects.find_one(
|
||||
{"_id": obj_project_id},
|
||||
{"ownerId": 1, "collaborators": 1} # Projection for access check
|
||||
)
|
||||
if not project:
|
||||
return jsonify({"message": "Project not found."}), 404
|
||||
|
||||
owner_id = project.get("ownerId")
|
||||
collaborators = project.get("collaborators", [])
|
||||
if owner_id != user_id and user_id not in collaborators:
|
||||
return jsonify({"message": "Access denied to search URLs in this project."}), 403
|
||||
|
||||
# --- Perform Search using Aggregation Pipeline ---
|
||||
# Escape regex special characters in the query string for safety
|
||||
escaped_query = re.escape(query_str)
|
||||
search_pipeline = [
|
||||
{"$match": {"projectId": obj_project_id}},
|
||||
{"$match": {
|
||||
"$or": [
|
||||
{"title": {"$regex": escaped_query, "$options": "i"}},
|
||||
{"note": {"$regex": escaped_query, "$options": "i"}},
|
||||
{"keywords.word": {"$regex": escaped_query, "$options": "i"}},
|
||||
{"summary": {"$regex": escaped_query, "$options": "i"}}
|
||||
]
|
||||
}},
|
||||
# Project only fields needed by the output schema
|
||||
{"$project": {"_id": 1, "title": 1, "url": 1, "updatedAt": 1}},
|
||||
{"$sort": {"updatedAt": -1}} # Sort by update time
|
||||
# Add $limit stage if needed
|
||||
]
|
||||
|
||||
# Execute the aggregation pipeline
|
||||
results_cursor = db.urls.aggregate(search_pipeline)
|
||||
search_result_docs = list(results_cursor) # Convert cursor to list
|
||||
|
||||
# --- Serialize results using the schema ---
|
||||
output_schema = URLSearchResultSchema(many=True)
|
||||
# Schema handles ObjectId conversion and field selection
|
||||
serialized_result = output_schema.dump(search_result_docs)
|
||||
|
||||
# Return the search results
|
||||
return jsonify({"results": serialized_result}), 200
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error searching URLs in project {project_id} with query '{query_str}': {e}", exc_info=True)
|
||||
return jsonify({"message": "An error occurred during URL search."}), 500
|
||||
|
||||
Reference in New Issue
Block a user