Initial Commit

This commit is contained in:
ldy
2025-06-09 17:53:19 +08:00
parent de861d4815
commit 2be3d00ac4
96 changed files with 13327 additions and 2 deletions

View File

@@ -0,0 +1,123 @@
# backend/myapp/__init__.py
import sys
import os
from flask import Flask, jsonify
# --- 1. Import Extensions ---
# Import specific extension instances defined in extensions.py
# Avoid 'import *'
try:
from .extensions import mongo, cors # Add other extensions like jwt, ma if used
except ImportError as e:
print(f"Error importing extensions: {e}. Make sure extensions.py exists and defines instances.")
# Provide dummy instances or raise an error if extensions are critical
mongo = None
cors = None
# --- 2. Import Default Config ---
# Assumes config.py is in the parent 'backend' directory. Adjust if moved.
try:
# This relative import works if 'backend' is treated as a package or is in sys.path
from .config import Config, config as config_options # Assuming config.py has a 'config' dict for selection
except ImportError:
print("Warning: Could not import default config from parent directory.")
# Define a minimal fallback Config class
class Config:
SECRET_KEY = os.environ.get('SECRET_KEY') or 'a-default-fallback-secret-key'
DEBUG = False
config_options = {'default': Config}
def create_app(config_name='default') -> Flask:
"""
Creates and configures the Flask application instance.
Uses the Application Factory pattern.
"""
# === Step 1: Create Flask App ===
# Enable loading from the instance/ folder relative to the 'backend' directory
app = Flask(__name__, instance_relative_config=True)
# === Step 2: Load Configuration ===
# Load default config based on config_name (if using different configs)
selected_config = config_options.get(config_name, Config)
app.config.from_object(selected_config)
# Load instance config (/instance/config.py) - Overrides defaults
# silent=True prevents errors if the file doesn't exist
app.config.from_pyfile('config.py', silent=True)
# === Step 3: Initialize Extensions ===
if mongo:
try:
mongo.init_app(app)
print("PyMongo initialized successfully.")
except Exception as e:
print(f"Error initializing PyMongo: {e}")
if cors:
try:
# Configure CORS using settings from app.config
frontend_origin = "http://localhost:5173"
cors.init_app(app, resources={r"/api/*": {"origins": app.config.get('FRONTEND_ORIGIN', '*')}}, supports_credentials=True)
print("CORS initialized successfully.")
except Exception as e:
print(f"Error initializing CORS: {e}")
# if jwt:
# try:
# jwt.init_app(app)
# print("JWTManager initialized successfully.")
# except Exception as e:
# print(f"Error initializing JWTManager: {e}")
# Add init_app calls for other extensions (ma, migrate, etc.) here
# === Step 4: Register Blueprints ===
# Use unique variable names and appropriate prefixes
try:
# Assuming each blueprint's __init__.py defines an object named 'bp'
from .auth import bp as auth_bp # checked
from .ai_services import bp as ai_services_bp
from .activity import bp as activity_bp
from .dialog import bp as dialog_bp
from .projects import bp as projects_bp # checked
from .urls import bp as urls_bp
# Register with potentially more specific prefixes
app.register_blueprint(auth_bp, url_prefix='/api/auth')
app.register_blueprint(ai_services_bp, url_prefix="/api/ai") # Changed prefix
app.register_blueprint(activity_bp, url_prefix='/api/activity')
app.register_blueprint(projects_bp, url_prefix='/api/projects')
app.register_blueprint(dialog_bp, url_prefix="/api/dialog")
app.register_blueprint(urls_bp, url_prefix="/api/urls")
print("Blueprints registered successfully.")
except (ModuleNotFoundError, ImportError) as e:
print(f"Error importing or registering blueprints: {e}. Check blueprint structure and 'bp' variable names.")
except Exception as e:
print(f"An unexpected error occurred during blueprint registration: {e}")
# === Step 5: Add Root Route (Optional) ===
@app.route("/")
def index():
# You could add a check here to see if mongo connection is working
db_status = "disconnected"
if mongo:
try:
# The ismaster command is cheap and does not require auth.
mongo.cx.admin.command('ismaster')
db_status = "connected"
except Exception:
db_status = "connection error"
return jsonify({"message": "Backend service is running!", "database_status": db_status})
# You can also add other app-wide error handlers here if needed
@app.errorhandler(404)
def page_not_found(e):
return jsonify(error=str(e)), 404
print(f"App created with config: {config_name}")
print(f"Instance path: {app.instance_path}") # Check instance path
return app

View File

@@ -0,0 +1,11 @@
# myapp/activity/__init__.py
from flask import Blueprint
# Define the Blueprint instance for the project activity module.
bp = Blueprint('activity', __name__, url_prefix='/api/activity')
# Import the routes module for this blueprint.
# This assumes your routes are defined in 'activity_routes.py'.
# The import MUST come AFTER 'bp' is defined.
from . import activity_routes

View File

@@ -0,0 +1,297 @@
# myapp/activity/activity_routes.py
import datetime
import logging
from flask import request, jsonify, current_app, has_app_context # Flask utilities
from bson.objectid import ObjectId, InvalidId # For MongoDB ObjectIds
from functools import wraps # Import wraps for dummy decorator
# --- Local Blueprint Import ---
from . import bp # Import the 'bp' instance defined in the local __init__.py
# --- Shared Extensions and Utilities Imports ---
try:
from ..extensions import mongo # Import the initialized PyMongo instance
from ..utils import token_required # Import the authentication decorator
except ImportError:
# Fallback or error handling if imports fail
print("Warning: Could not import mongo or token_required in activity/activity_routes.py.")
mongo = None
# Define a dummy decorator if token_required is missing
def token_required(f):
@wraps(f) # Use wraps for better introspection
def wrapper(*args, **kwargs):
print("ERROR: token_required decorator is not available!")
return jsonify({"message": "Server configuration error: Missing authentication utility."}), 500
return wrapper
# --- Schema Imports ---
try:
# Import the relevant schemas defined in schemas.py
from ..schemas import ActivityCreateSchema, ActivitySchema
from marshmallow import ValidationError
except ImportError:
print("Warning: Could not import Activity schemas or ValidationError in activity/activity_routes.py.")
ActivityCreateSchema = None
ActivitySchema = None
ValidationError = None # Define ValidationError as None if import fails
# --- Helper to get logger safely ---
def _get_logger():
if has_app_context():
return current_app.logger
return logging.getLogger(__name__)
# Note: Routes use paths relative to the '/api/activity' prefix defined in __init__.py.
@bp.route('/', methods=['POST']) # Path relative to blueprint prefix
@token_required
def create_activity(current_user):
"""
Create a new project activity log entry.
Uses ActivityCreateSchema for input validation.
Expects 'projectId', 'activityType', and optional 'message' in JSON payload.
Verifies user has access to the project.
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
logger.error("Invalid current_user object received in create_activity")
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in create_activity: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not ActivityCreateSchema or not ValidationError:
return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
# Get and validate JSON data using the schema
json_data = request.get_json() or {}
schema = ActivityCreateSchema()
try:
validated_data = schema.load(json_data)
except ValidationError as err:
logger.warning(f"Create activity validation failed: {err.messages}")
# Return validation errors from Marshmallow
return jsonify(err.messages), 422 # 422 Unprocessable Entity is appropriate
# Extract validated data
project_id_str = validated_data['projectId'] # Already validated as ObjectId string by schema if validator is used
activity_type = validated_data['activityType']
message = validated_data.get('message', "") # Get optional message
try:
# Convert projectId string to ObjectId (schema validator should ensure format)
try:
project_obj_id = ObjectId(project_id_str)
except InvalidId:
# This should ideally be caught by schema validation if using _validate_object_id
logger.error(f"Schema validation passed but ObjectId conversion failed for: {project_id_str}")
return jsonify({"message": "Invalid projectId format despite schema validation."}), 400
# --- Verify Project Access ---
db = mongo.db
project = db.projects.find_one({"_id": project_obj_id}, {"ownerId": 1, "collaborators": 1})
if not project:
return jsonify({"message": "Project not found."}), 404 # 404 Not Found
owner_id = project.get("ownerId")
collaborators = project.get("collaborators", [])
if owner_id != user_id and user_id not in collaborators:
# 403 Forbidden - authenticated but not authorized for this project
return jsonify({"message": "You do not have access to this project."}), 403
# --- Prepare and Insert Activity Log ---
now = datetime.datetime.now(datetime.timezone.utc) # Use timezone-aware UTC time
doc = {
"projectId": project_obj_id,
"userId": user_id, # Store the user who performed the activity
"activityType": activity_type,
"message": message,
"createdAt": now
# No updatedAt for activity logs usually
}
result = db.project_activity.insert_one(doc)
# Return success response with the ID of the new log entry
return jsonify({
"message": "Activity log created successfully.",
"activity_id": str(result.inserted_id) # Convert ObjectId to string
}), 201 # 201 Created status code
except KeyError: # Should be caught by token_required or initial check
logger.error(f"User ID (_id) not found in token payload for create_activity.")
return jsonify({"message": "Authentication token is invalid or missing user ID."}), 401
except AttributeError:
logger.error("PyMongo extension not initialized or attached correctly.")
return jsonify({"message": "Database configuration error."}), 500
except Exception as e:
logger.error(f"Error creating activity for user {current_user.get('_id', 'UNKNOWN')}: {e}", exc_info=True)
return jsonify({"message": "An error occurred while creating the activity log."}), 500
@bp.route('/', methods=['GET']) # Path relative to blueprint prefix
@token_required
def list_activity_logs(current_user):
"""
List activity logs for a specific project.
Uses ActivitySchema for output serialization.
Requires 'projectId' as a query parameter.
Supports 'limit' and 'offset' for pagination.
Verifies user has access to the project.
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in list_activity_logs: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not ActivitySchema: return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
try:
# Get query parameters
project_id_str = request.args.get("projectId", "")
limit_str = request.args.get("limit", "20") # Default limit 20
offset_str = request.args.get("offset", "0") # Default offset 0
# Validate and parse pagination parameters
try:
limit = max(int(limit_str), 1) # Ensure limit is at least 1
except ValueError:
limit = 20 # Default on parsing error
try:
offset = max(int(offset_str), 0) # Ensure offset is non-negative
except ValueError:
offset = 0 # Default on parsing error
# Project ID is required for listing logs
if not project_id_str:
return jsonify({"message": "Query parameter 'projectId' is required to list logs."}), 400
# Convert projectId string to ObjectId
try:
project_obj_id = ObjectId(project_id_str)
except InvalidId:
return jsonify({"message": "Invalid projectId format in query parameter."}), 400
# --- Verify Project Access ---
db = mongo.db
project = db.projects.find_one({"_id": project_obj_id}, {"ownerId": 1, "collaborators": 1})
if not project:
return jsonify({"message": "Project not found."}), 404
owner_id = project.get("ownerId")
collaborators = project.get("collaborators", [])
if owner_id != user_id and user_id not in collaborators:
return jsonify({"message": "You do not have access to this project's activity logs."}), 403
# --- Fetch Activity Logs ---
cursor = db.project_activity.find(
{"projectId": project_obj_id}
).sort("createdAt", -1).skip(offset).limit(limit) # Sort newest first
# Convert cursor to list for serialization
activity_docs = list(cursor)
# --- Serialize results using the schema ---
# Instantiate schema for multiple documents
output_schema = ActivitySchema(many=True)
# Use dump() to serialize the list of documents
# Schema handles ObjectId and datetime conversion
serialized_result = output_schema.dump(activity_docs)
# Return the serialized list of activity logs
return jsonify({"activity_logs": serialized_result}), 200
except KeyError: # Should be caught by token_required or initial check
logger.error(f"User ID (_id) not found in token payload for list_activity_logs.")
return jsonify({"message": "Authentication token is invalid or missing user ID."}), 401
except AttributeError:
logger.error("PyMongo extension not initialized or attached correctly.")
return jsonify({"message": "Database configuration error."}), 500
except Exception as e:
logger.error(f"Error listing activity logs for user {current_user.get('_id', 'UNKNOWN')}: {e}", exc_info=True)
return jsonify({"message": "An error occurred while listing activity logs."}), 500
@bp.route('/<string:activity_id>', methods=['DELETE']) # Path relative to blueprint prefix
@token_required
def delete_activity_log(current_user, activity_id):
"""
Delete a specific activity log entry by its ID.
Requires the authenticated user to be the owner of the associated project.
(No schema needed for input/output here)
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in delete_activity_log: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
if not mongo: return jsonify({"message": "Database connection not available."}), 500
try:
# Validate activity_id format
try:
obj_activity_id = ObjectId(activity_id)
except InvalidId:
return jsonify({"message": "Invalid activity log ID format."}), 400
db = mongo.db
# --- Find Log and Verify Ownership via Project ---
# Fetch projectId to check ownership
activity_doc = db.project_activity.find_one({"_id": obj_activity_id}, {"projectId": 1})
if not activity_doc:
return jsonify({"message": "Activity log not found."}), 404
project_id = activity_doc.get("projectId")
if not project_id or not isinstance(project_id, ObjectId):
logger.error(f"Activity log {activity_id} is missing valid projectId.")
return jsonify({"message": "Cannot verify ownership due to missing project reference."}), 500
project = db.projects.find_one({"_id": project_id}, {"ownerId": 1})
if not project:
logger.warning(f"Project {project_id} associated with activity log {activity_id} not found.")
# Even if project is gone, maybe allow deleting orphan log? Or deny? Deny for safety.
return jsonify({"message": "Associated project not found."}), 404
# Verify ownership (only project owner can delete logs in this implementation)
owner_id = project.get("ownerId")
if owner_id != user_id:
return jsonify({"message": "You do not have permission to delete this activity log (must be project owner)."}), 403
# --- Perform Deletion ---
result = db.project_activity.delete_one({"_id": obj_activity_id})
# --- Return Response ---
if result.deleted_count == 1:
return jsonify({"message": "Activity log deleted successfully."}), 200
else:
# Log was found but delete failed
logger.warning(f"Activity log {activity_id} found but delete_one removed 0 documents.")
return jsonify({"message": "Failed to delete activity log (already deleted?)."}), 404
except KeyError: # Should be caught by token_required or initial check
logger.error(f"User ID (_id) not found in token payload for delete_activity_log.")
return jsonify({"message": "Authentication token is invalid or missing user ID."}), 401
except AttributeError:
logger.error("PyMongo extension not initialized or attached correctly.")
return jsonify({"message": "Database configuration error."}), 500
except Exception as e:
logger.error(f"Error deleting activity log {activity_id} for user {current_user.get('_id', 'UNKNOWN')}: {e}", exc_info=True)
return jsonify({"message": "An error occurred while deleting the activity log."}), 500

View File

@@ -0,0 +1,13 @@
# myapp/api_keys/__init__.py
from flask import Blueprint
# Define the Blueprint instance for the API key management module.
# 'api_keys' is the unique name for this blueprint.
# url_prefix='/api/keys' will be prepended to all routes defined in this blueprint.
bp = Blueprint('api_keys', __name__, url_prefix='/api/keys')
# Import the routes module.
# This connects the routes defined in routes.py to the 'bp' instance.
# This import MUST come AFTER the Blueprint 'bp' is defined.
from . import ai_routes

View File

@@ -0,0 +1,337 @@
# myapp/ai_services/ai_routes.py
# This file handles API Key management logic.
import datetime
import logging
from flask import request, jsonify, current_app, has_app_context # Flask utilities
from bson.objectid import ObjectId, InvalidId # For MongoDB ObjectIds
from functools import wraps # Import wraps for dummy decorator
# --- Local Blueprint Import ---
from . import bp # Import the 'bp' instance defined in the local __init__.py
# --- Shared Extensions and Utilities Imports ---
try:
from ..extensions import mongo # Import the initialized PyMongo instance
from ..utils import token_required # Import the authentication decorator
except ImportError:
# Fallback or error handling if imports fail
print("Warning: Could not import mongo or token_required in ai_services/ai_routes.py.")
mongo = None
# Define a dummy decorator if token_required is missing
def token_required(f):
@wraps(f)
def wrapper(*args, **kwargs):
print("ERROR: token_required decorator is not available!")
return jsonify({"message": "Server configuration error: Missing authentication utility."}), 500
return wrapper
# --- Schema Imports ---
try:
# Import the relevant schemas defined in schemas.py
from ..schemas import APIKeyCreateSchema, APIKeyUpdateSchema, APIKeySchema
from marshmallow import ValidationError
except ImportError:
print("Warning: Could not import APIKey schemas or ValidationError in ai_services/ai_routes.py.")
APIKeyCreateSchema = None
APIKeyUpdateSchema = None
APIKeySchema = None
ValidationError = None # Define ValidationError as None if import fails
# --- Helper to get logger safely ---
def _get_logger():
if has_app_context():
return current_app.logger
return logging.getLogger(__name__)
# Note: Routes use paths relative to the '/api/ai' prefix.
# Original '/api_list' becomes '/keys'
# Original '/api_list/<api_id>' becomes '/keys/<api_id>'
@bp.route('/keys', methods=['GET']) # Path relative to blueprint prefix
@token_required
def list_api_keys(current_user):
"""
List all API keys belonging to the authenticated user.
Uses APIKeySchema for output serialization.
Fetches keys from the 'api_list' collection associated with the user's ID.
Sorts by update time descending.
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in list_api_keys: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not APIKeySchema: return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
try:
# Find all documents in the 'api_list' collection for this user
db = mongo.db
cursor = db.api_list.find({"uid": user_id}).sort("updatedAt", -1)
api_key_docs = list(cursor) # Convert cursor to list
# --- Serialize results using the schema ---
output_schema = APIKeySchema(many=True)
# Schema handles ObjectId and datetime conversion, and field selection/exclusion
# NOTE: APIKeySchema currently dumps the full key. Consider masking in schema if needed.
serialized_result = output_schema.dump(api_key_docs)
return jsonify({"api_keys": serialized_result}), 200
except KeyError: # Should be caught by initial user_id check
logger.error(f"User ID (_id) not found in token payload for list_api_keys.")
return jsonify({"message": "Authentication token is invalid or missing user ID."}), 401
except AttributeError:
logger.error("PyMongo extension not initialized or attached correctly.")
return jsonify({"message": "Database configuration error."}), 500
except Exception as e:
logger.error(f"Error listing API keys for user {current_user.get('_id', 'UNKNOWN')}: {e}", exc_info=True)
return jsonify({"message": "An error occurred while listing API keys."}), 500
@bp.route('/keys', methods=['POST']) # Path relative to blueprint prefix
@token_required
def create_api_key(current_user):
"""
Create a new API key entry for the authenticated user.
Uses APIKeyCreateSchema for input validation.
Expects 'name', 'key', and optional 'selected' in JSON payload.
Prevents duplicate names per user.
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in create_api_key: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not APIKeyCreateSchema or not ValidationError:
return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
# Get and validate JSON data using the schema
json_data = request.get_json() or {}
schema = APIKeyCreateSchema()
try:
validated_data = schema.load(json_data)
except ValidationError as err:
logger.warning(f"Create API key validation failed: {err.messages}")
return jsonify(err.messages), 422 # Return validation errors
# Extract validated data
name = validated_data['name']
api_key = validated_data['key']
selected = validated_data['selected'] # Schema provides default if missing
try:
# Check if an API key with the same name already exists for this user
db = mongo.db
existing = db.api_list.find_one({"uid": user_id, "name": name})
if existing:
# Return 409 Conflict status code for duplicates
return jsonify({"message": f"User already has an API key for {name}."}), 409
# --- Prepare and Insert Document ---
now = datetime.datetime.now(datetime.timezone.utc) # Use timezone-aware UTC time
doc = {
"uid": user_id, # Store user's ObjectId
"name": name,
"key": api_key, # Store the provided key
"selected": selected, # Use validated boolean
"createdAt": now,
"updatedAt": now
}
result = db.api_list.insert_one(doc)
# Return success response with the ID of the newly created key
return jsonify({
"message": "API key created successfully.",
"api_id": str(result.inserted_id) # Convert ObjectId to string
}), 201 # 201 Created status code
except KeyError: # Should be caught by initial user_id check
logger.error(f"User ID (_id) not found in token payload for create_api_key.")
return jsonify({"message": "Authentication token is invalid or missing user ID."}), 401
except AttributeError:
logger.error("PyMongo extension not initialized or attached correctly.")
return jsonify({"message": "Database configuration error."}), 500
except Exception as e:
logger.error(f"Error creating API key for user {current_user.get('_id', 'UNKNOWN')}: {e}", exc_info=True)
return jsonify({"message": "An error occurred while creating API key."}), 500
@bp.route('/keys/<string:api_id>', methods=['PUT']) # Path relative to blueprint prefix
@token_required
def update_api_key(current_user, api_id):
"""
Update an existing API key identified by its ID.
Uses APIKeyUpdateSchema for input validation.
Allows updating 'name', 'key', and 'selected' fields.
Verifies ownership before updating.
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in update_api_key: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not APIKeyUpdateSchema or not ValidationError:
return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
# Get and validate JSON data using the schema
json_data = request.get_json() or {}
# Note: Update schema should not have required fields, so load won't fail if empty,
# but we check if validated_data is empty later.
schema = APIKeyUpdateSchema()
try:
validated_data = schema.load(json_data)
except ValidationError as err:
logger.warning(f"Update API key validation failed: {err.messages}")
return jsonify(err.messages), 422
# If validation passed but no valid fields were provided
if not validated_data:
return jsonify({"message": "No valid fields provided for update."}), 400
try:
# Validate api_id format from URL path
try:
object_id = ObjectId(api_id)
except InvalidId:
return jsonify({"message": "Invalid API key ID format."}), 400
# Find the document to update
db = mongo.db
doc = db.api_list.find_one({"_id": object_id})
if not doc:
return jsonify({"message": "API key not found."}), 404 # 404 Not Found
# Verify that the authenticated user owns this API key
doc_uid = doc.get("uid")
if not doc_uid or doc_uid != user_id:
# 403 Forbidden - user is authenticated but not authorized for this resource
return jsonify({"message": "You do not have permission to update this API key."}), 403
# --- Prepare Update Fields based on validated data ---
update_fields = {}
if "name" in validated_data:
new_name = validated_data["name"]
# Check for name conflict only if name is actually changing
if new_name != doc.get("name") and db.api_list.find_one({"uid": user_id, "name": new_name, "_id": {"$ne": object_id}}):
return jsonify({"message": f"User already has another API key named {new_name}."}), 409 # Conflict
update_fields["name"] = new_name
if "key" in validated_data:
update_fields["key"] = validated_data["key"]
if "selected" in validated_data:
update_fields["selected"] = validated_data["selected"] # Already boolean from schema
# If, after validation and processing, there's nothing to update (e.g., only invalid fields were sent)
if not update_fields:
return jsonify({"message": "No valid changes detected in the provided data."}), 400
# Always update the 'updatedAt' timestamp
update_fields["updatedAt"] = datetime.datetime.now(datetime.timezone.utc)
# Perform the update operation in the database
result = db.api_list.update_one(
{"_id": object_id}, # Filter by ID
{"$set": update_fields} # Set the new values
)
# Check if the document was found and potentially modified
if result.matched_count == 1:
return jsonify({"message": "API key updated successfully."}), 200
else:
# This case should ideally not happen if find_one succeeded, but included for safety
logger.warning(f"Update matched count was {result.matched_count} for api_id {api_id}")
return jsonify({"message": "API key update failed (key not found after initial check)."}), 404
except KeyError: # Should be caught by initial user_id check
logger.error(f"User ID (_id) not found in token payload for update_api_key.")
return jsonify({"message": "Authentication token is invalid or missing user ID."}), 401
except AttributeError:
logger.error("PyMongo extension not initialized or attached correctly.")
return jsonify({"message": "Database configuration error."}), 500
except Exception as e:
logger.error(f"Error updating API key {api_id} for user {current_user.get('_id', 'UNKNOWN')}: {e}", exc_info=True)
return jsonify({"message": "An error occurred while updating the API key."}), 500
@bp.route('/keys/<string:api_id>', methods=['DELETE']) # Path relative to blueprint prefix
@token_required
def delete_api_key(current_user, api_id):
"""
Delete an API key identified by its ID.
Verifies ownership before deleting.
(No schema needed for input/output here)
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in delete_api_key: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
if not mongo: return jsonify({"message": "Database connection not available."}), 500
try:
# Validate api_id format from URL path
try:
object_id = ObjectId(api_id)
except InvalidId:
return jsonify({"message": "Invalid API key ID format."}), 400
# Find the document to delete
db = mongo.db
doc = db.api_list.find_one({"_id": object_id}, {"uid": 1}) # Fetch only uid for check
if not doc:
return jsonify({"message": "API key not found."}), 404 # 404 Not Found
# Verify that the authenticated user owns this API key
doc_uid = doc.get("uid")
if not doc_uid or doc_uid != user_id:
# 403 Forbidden
return jsonify({"message": "You do not have permission to delete this API key."}), 403
# Perform the delete operation
result = db.api_list.delete_one({"_id": object_id})
# Check if the deletion was successful
if result.deleted_count == 1:
return jsonify({"message": "API key deleted successfully."}), 200 # 200 OK or 204 No Content are suitable
else:
# This case means the document existed initially but couldn't be deleted
logger.error(f"Failed to delete API key {api_id} despite finding it initially.")
return jsonify({"message": "Failed to delete API key (already deleted?)."}), 404 # Or 500
except KeyError: # Should be caught by initial user_id check
logger.error(f"User ID (_id) not found in token payload for delete_api_key.")
return jsonify({"message": "Authentication token is invalid or missing user ID."}), 401
except AttributeError:
logger.error("PyMongo extension not initialized or attached correctly.")
return jsonify({"message": "Database configuration error."}), 500
except Exception as e:
logger.error(f"Error deleting API key {api_id} for user {current_user.get('_id', 'UNKNOWN')}: {e}", exc_info=True)
return jsonify({"message": "An error occurred while deleting the API key."}), 500

View File

@@ -0,0 +1,13 @@
# myapp/auth/__init__.py
from flask import Blueprint
# Define the Blueprint instance for the authentication module.
# 'auth' is the unique name for this blueprint.
# url_prefix='/api/auth' will be prepended to all routes defined in this blueprint.
bp = Blueprint('auth', __name__, url_prefix='/api/auth')
# Import the routes module.
# This connects the routes defined in routes.py to the 'bp' instance.
# This import MUST come AFTER the Blueprint 'bp' is defined to avoid circular imports.
from . import auth_routes

View File

@@ -0,0 +1,444 @@
# myapp/auth/auth_routes.py
import datetime
import jwt # For encoding JWT tokens
import logging
from flask import request, jsonify, current_app, has_app_context # Flask utilities
from werkzeug.security import generate_password_hash, check_password_hash # For hashing and checking passwords
from bson.objectid import ObjectId, InvalidId # For converting string IDs to MongoDB ObjectId
from functools import wraps # Import wraps for dummy decorator
# --- Local Blueprint Import (Moved to Top) ---
# Import the 'bp' instance defined in the local __init__.py FIRST
# This often helps resolve circular import issues involving blueprints and utilities/models.
from . import bp
# --- Shared Utilities Import ---
# Import the token_required decorator from the utils module
try:
# Assumes utils.py is in the parent 'myapp' package
from ..utils import token_required
except ImportError as e:
# Fallback or error handling if the decorator isn't found
print("Warning: token_required decorator not found in auth/auth_routes.py. Protected routes will fail.")
print(e)
# Define a dummy decorator to prevent NameError, but it won't protect routes
def token_required(f):
@wraps(f)
def wrapper(*args, **kwargs):
print("ERROR: token_required decorator is not available!")
return jsonify({"message": "Server configuration error: Missing authentication utility."}), 500
return wrapper
# --- Schema Imports ---
try:
# Import the relevant schemas defined in schemas.py
from ..schemas import UserRegistrationSchema, UserLoginSchema, UserSchema, UserUpdateSchema
from marshmallow import ValidationError
except ImportError:
print("Warning: Could not import User schemas or ValidationError in auth/auth_routes.py.")
UserRegistrationSchema = None
UserLoginSchema = None
UserSchema = None
UserUpdateSchema = None
ValidationError = None
# --- Shared Extensions Import ---
# Import mongo for direct use (alternative to current_app.mongo)
try:
from ..extensions import mongo
except ImportError:
print("Warning: Could not import mongo extension in auth/auth_routes.py.")
mongo = None
# --- Helper to get logger safely ---
def _get_logger():
if has_app_context():
return current_app.logger
return logging.getLogger(__name__)
# Note: Routes use paths relative to the '/api/auth' prefix defined in __init__.py.
@bp.route('/register', methods=['POST'])
def register():
"""
Register a new user.
Uses UserRegistrationSchema for input validation.
Expects 'username', 'email', 'password' in JSON payload.
Checks for existing username/email. Hashes password. Stores user.
Returns a JWT token and serialized user info (using UserSchema) upon success.
"""
logger = _get_logger()
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not UserRegistrationSchema or not UserSchema or not ValidationError:
return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
# Get and validate JSON data using the schema
json_data = request.get_json() or {}
schema = UserRegistrationSchema()
try:
validated_data = schema.load(json_data)
except ValidationError as err:
logger.warning(f"Registration validation failed: {err.messages}")
return jsonify(err.messages), 422 # Return validation errors
# Extract validated data
username = validated_data['username']
email = validated_data['email']
password = validated_data['password'] # Raw password (load_only)
try:
db = mongo.db # Use imported mongo instance's db attribute
# Check if username or email already exists
if db.users.find_one({"username": username}):
return jsonify({"message": "Username already exists."}), 409 # 409 Conflict
if db.users.find_one({"email": email}):
return jsonify({"message": "Email already registered."}), 409 # 409 Conflict
except AttributeError:
logger.error("PyMongo extension not initialized or db attribute missing.")
return jsonify({"message": "Database configuration error."}), 500
except Exception as e:
logger.error(f"Database error checking existing user: {e}", exc_info=True)
return jsonify({"message": "Database error during registration check."}), 500
# Hash the password before storing
hashed_pw = generate_password_hash(password)
# Create the new user document
now = datetime.datetime.now(datetime.timezone.utc) # Use timezone-aware UTC time
new_user_doc = {
"username": username,
"email": email,
"password": hashed_pw, # Store the hashed password
"createdAt": now,
"updatedAt": now
}
# Insert the new user into the database
try:
result = db.users.insert_one(new_user_doc)
user_id = result.inserted_id # This is an ObjectId
# Fetch the created user document to serialize it
created_user = db.users.find_one({"_id": user_id})
if not created_user: # Should not happen, but check
logger.error(f"Failed to retrieve user immediately after insertion: {user_id}")
# Don't fail the whole registration, maybe just log and proceed without user data in response
created_user = {"_id": user_id, "username": username, "email": email} # Construct manually if needed
except Exception as e:
logger.error(f"Error inserting new user: {e}", exc_info=True)
return jsonify({"message": "An error occurred during registration."}), 500
# Generate JWT token using settings from app config
try:
secret_key = current_app.config['SECRET_KEY']
algo = current_app.config.get('JWT_ALGORITHM', 'HS256')
exp_hours = current_app.config.get('JWT_EXP_DELTA_HOURS', 24)
token_payload = {
"user_id": str(user_id), # Convert ObjectId to string for JWT payload
"exp": datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(hours=exp_hours)
}
token = jwt.encode(token_payload, secret_key, algorithm=algo)
except KeyError:
logger.error("SECRET_KEY not configured in Flask app for JWT.")
return jsonify({"message": "Server configuration error: JWT secret missing."}), 500
except Exception as e:
logger.error(f"Error encoding JWT during registration: {e}", exc_info=True)
return jsonify({"message": "Could not generate authentication token."}), 500
# Serialize the created user data using UserSchema (excludes password)
output_schema = UserSchema()
serialized_user = output_schema.dump(created_user)
# Return success response with token and serialized user info
return jsonify({
"message": "User registered successfully.",
"token": token,
"user": serialized_user # Return user object instead of just id
}), 201 # 201 Created
@bp.route('/login', methods=['POST'])
def login():
"""
Log in an existing user.
Uses UserLoginSchema for input validation.
Expects 'username' and 'password' in JSON payload.
Verifies credentials against the database.
Returns a JWT token and serialized user info (using UserSchema) upon success.
"""
logger = _get_logger()
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not UserLoginSchema or not UserSchema or not ValidationError:
return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
# Get and validate JSON data using the schema
json_data = request.get_json() or {}
schema = UserLoginSchema()
try:
validated_data = schema.load(json_data)
except ValidationError as err:
logger.warning(f"Login validation failed: {err.messages}")
return jsonify(err.messages), 422
username = validated_data['username']
password = validated_data['password'] # Raw password (load_only)
# Access the database
try:
db = mongo.db
if db is None: raise AttributeError("db attribute is None")
# Find user by username
user_doc = db.users.find_one({"username": username})
except AttributeError:
logger.error("PyMongo extension not initialized or attached correctly during login.")
return jsonify({"message": "Database configuration error."}), 500
except Exception as e:
logger.error(f"Database error during login for user {username}: {e}", exc_info=True)
return jsonify({"message": "An error occurred during login."}), 500
# Check if user exists and if the password hash matches
if not user_doc or 'password' not in user_doc or not check_password_hash(user_doc["password"], password):
return jsonify({"message": "Invalid credentials."}), 401 # Use 401 for authentication failure
# Generate JWT token using settings from app config
try:
user_id = user_doc["_id"] # Get ObjectId
secret_key = current_app.config['SECRET_KEY']
algo = current_app.config.get('JWT_ALGORITHM', 'HS256')
exp_hours = current_app.config.get('JWT_EXP_DELTA_HOURS', 24)
token_payload = {
"user_id": str(user_id), # Convert ObjectId to string
"exp": datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(hours=exp_hours)
}
token = jwt.encode(token_payload, secret_key, algorithm=algo)
except KeyError:
logger.error("SECRET_KEY not configured in Flask app for JWT.")
return jsonify({"message": "Server configuration error: JWT secret missing."}), 500
except Exception as e:
logger.error(f"Error encoding JWT for user {username}: {e}", exc_info=True)
return jsonify({"message": "Could not generate authentication token."}), 500
# Serialize the user data using UserSchema (excludes password)
output_schema = UserSchema()
serialized_user = output_schema.dump(user_doc)
# Return success response with token and serialized user info
return jsonify({
"message": "Login successful.",
"token": token,
"user": serialized_user # Return user object instead of just id
}), 200
@bp.route('/delete_account', methods=['DELETE'])
@token_required # Apply the decorator to protect the route and inject 'current_user'
def delete_account(current_user):
"""
Delete the account of the currently authenticated user (identified by token).
Also handles associated data like projects and URLs.
Requires a valid JWT token.
(No schema needed for input/output here)
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id_str = str(current_user.get("user_id") or current_user.get("_id"))
if not user_id_str:
return jsonify({"message": "Invalid token or user information not found in token."}), 401
user_id = ObjectId(user_id_str) # Convert string ID back to ObjectId
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in delete_account from token data {current_user}: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
if not mongo: return jsonify({"message": "Database connection not available."}), 500
try:
db = mongo.db
# --- Data handling logic (remains the same) ---
# [ Deletion logic for user, projects, urls, activity, dialogs ]
# 1. Delete the user document itself
user_result = db.users.delete_one({"_id": user_id})
# 2. Remove user from collaborator lists in projects they didn't own
db.projects.update_many(
{"ownerId": {"$ne": user_id}, "collaborators": user_id},
{"$pull": {"collaborators": user_id}}
)
# 3. Handle projects owned by the user
owned_projects_cursor = db.projects.find({"ownerId": user_id}, {"_id": 1, "collaborators": 1})
project_ids_to_delete = []
projects_to_reassign = []
for project in owned_projects_cursor:
project_id = project["_id"]
collaborators = [collab_id for collab_id in project.get("collaborators", []) if collab_id != user_id]
if collaborators:
new_owner = collaborators[0]
projects_to_reassign.append({
"filter": {"_id": project_id},
"update": {
"$set": {"ownerId": new_owner, "lastActivityBy": new_owner},
"$pull": {"collaborators": new_owner}
}
})
else:
project_ids_to_delete.append(project_id)
if projects_to_reassign:
for reassignment in projects_to_reassign:
db.projects.update_one(reassignment["filter"], reassignment["update"])
logger.info(f"Reassigned ownership for {len(projects_to_reassign)} projects previously owned by {user_id_str}")
if project_ids_to_delete:
delete_owned_projects_result = db.projects.delete_many({"_id": {"$in": project_ids_to_delete}})
logger.info(f"Deleted {delete_owned_projects_result.deleted_count} projects owned by {user_id_str} with no remaining collaborators.")
# Cascade deletes
delete_urls_result = db.urls.delete_many({"projectId": {"$in": project_ids_to_delete}})
logger.info(f"Deleted {delete_urls_result.deleted_count} URLs for deleted projects of user {user_id_str}")
delete_activity_result = db.project_activity.delete_many({"projectId": {"$in": project_ids_to_delete}})
logger.info(f"Deleted {delete_activity_result.deleted_count} activity logs for deleted projects of user {user_id_str}")
delete_dialog_result = db.dialog_activity.delete_many({"projectId": {"$in": project_ids_to_delete}})
logger.info(f"Deleted {delete_dialog_result.deleted_count} dialog sessions for deleted projects of user {user_id_str}")
# --- End data handling logic ---
if user_result.deleted_count == 1:
return jsonify({"message": "Account and associated data handled successfully."}), 200
elif user_result.deleted_count == 0:
return jsonify({"message": "User not found or already deleted."}), 404
else:
logger.warning(f"Unexpected deleted_count ({user_result.deleted_count}) for user {user_id}")
return jsonify({"message": "An issue occurred during account deletion."}), 500
except AttributeError:
logger.error("PyMongo extension not initialized or attached correctly.")
return jsonify({"message": "Database configuration error."}), 500
except Exception as e:
logger.error(f"Error during account deletion for user {user_id_str}: {e}", exc_info=True)
return jsonify({"message": "An internal error occurred during account deletion."}), 500
@bp.route('/logout', methods=['POST'])
@token_required # Ensures only logged-in users can call logout (though it's stateless)
def logout(current_user):
"""
Logs out a user (stateless JWT). Client is responsible for discarding the token.
(No schema needed for input/output here)
"""
return jsonify({"message": "Logout successful. Please discard your token."}), 200
@bp.route('/account', methods=['PUT'])
@token_required # Protect the route and get user info from token
def update_account(current_user):
"""
Update the authenticated user's username, email, and/or password.
Uses UserUpdateSchema for input validation.
Expects JSON payload with optional 'username', 'email', 'password' fields.
(Returns simple message, no schema needed for output)
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id_str = str(current_user.get("_id") or current_user.get("user_id"))
if not user_id_str:
return jsonify({"message": "User ID not found in token."}), 401
user_id = ObjectId(user_id_str)
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error from token ({current_user}) in update_account: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not UserUpdateSchema or not ValidationError:
return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
# Get and validate JSON data using the schema
json_data = request.get_json() or {}
schema = UserUpdateSchema()
try:
# Load validates optional fields based on schema rules
validated_data = schema.load(json_data)
except ValidationError as err:
logger.warning(f"Update account validation failed: {err.messages}")
return jsonify(err.messages), 422
# If validation passed but no valid fields were provided
if not validated_data:
return jsonify({"message": "No valid update fields provided (username, email, or password)."}), 400
db = mongo.db
update_fields = {} # Dictionary to hold fields to be updated
db_validation_errors = {} # Store potential db-level validation errors (like uniqueness)
# --- Validate uniqueness and prepare updates based on validated_data ---
try:
# Check username uniqueness if provided and validated
if "username" in validated_data:
new_username = validated_data["username"]
if db.users.find_one({"username": new_username, "_id": {"$ne": user_id}}):
db_validation_errors["username"] = "Username is already taken."
else:
update_fields["username"] = new_username
# Check email uniqueness if provided and validated
if "email" in validated_data:
new_email = validated_data["email"]
if db.users.find_one({"email": new_email, "_id": {"$ne": user_id}}):
db_validation_errors["email"] = "Email is already registered by another user."
else:
update_fields["email"] = new_email
# Hash password if provided and validated
if "password" in validated_data:
update_fields["password"] = generate_password_hash(validated_data["password"])
except AttributeError:
logger.error("PyMongo extension not initialized or attached correctly during validation.")
return jsonify({"message": "Database configuration error."}), 500
except Exception as e:
logger.error(f"Error during database validation for user {user_id}: {e}", exc_info=True)
return jsonify({"message": "An error occurred during data validation."}), 500
# If database validation errors occurred (e.g., uniqueness checks)
if db_validation_errors:
return jsonify({"message": "Validation errors occurred.", "errors": db_validation_errors}), 409 # 409 Conflict
# If there are fields to update, add the timestamp and perform the update
if update_fields:
update_fields["updatedAt"] = datetime.datetime.now(datetime.timezone.utc)
try:
result = db.users.update_one({"_id": user_id}, {"$set": update_fields})
if result.matched_count == 0:
# This case means the user_id from the token doesn't exist in the DB anymore
return jsonify({"message": "User not found."}), 404
# modified_count might be 0 if the provided data was the same as existing data
# We consider it a success even if no fields were technically modified
return jsonify({"message": "Account updated successfully."}), 200
except AttributeError:
logger.error("PyMongo extension not initialized or attached correctly during update.")
return jsonify({"message": "Database configuration error."}), 500
except Exception as e:
logger.error(f"Error updating account for user {user_id}: {e}", exc_info=True)
return jsonify({"message": "An error occurred while updating the account."}), 500
else:
# This case should ideally not be reached due to the checks at the beginning,
# but included for completeness if validation passed with no update fields.
return jsonify({"message": "No changes were requested or fields were invalid."}), 400

View File

@@ -0,0 +1,31 @@
import os
import secrets
class Config:
# MongoDB Atlas connection string: set it in your environment variables
MONGO_URI: str = os.environ.get(
"MONGO_URI",
"mongodb+srv://surfsmart_server:IVV0mzUcwoEqHjNV@projectdatacluster.ki0t3z8.mongodb.net/surfsmart?retryWrites=true&w=majority&appName=ProjectDataCluster"
)
# Flask secret key for sessions and JWT (use a secure value in production)
SECRET_KEY: str = os.environ.get("SECRET_KEY", secrets.token_hex(32))
# JWT configuration
JWT_ALGORITHM: str = "HS256"
JWT_EXP_DELTA_HOURS: int = 2
# TODO make this name selectable
GEMINI_MODEL_NAME = 'gemini-1.5-pro-latest'
# For celery
CELERY_BROKER_URL = os.environ.get("CELERY_BROKER_URL", "redis://localhost:6379/0")
CELERY_RESULT_BACKEND = os.environ.get("CELERY_RESULT_BACKEND", "redis://localhost:6379/0")
config = {
"default": Config()
}

View File

@@ -0,0 +1,13 @@
# myapp/dialog/__init__.py
from flask import Blueprint
# Define the Blueprint instance for the dialog module.
# 'dialog' is the unique name for this blueprint.
# url_prefix='/api/dialog' will be prepended to all routes defined in this blueprint.
bp = Blueprint('dialog', __name__, url_prefix='/api/dialog')
# Import the routes module.
# This connects the routes defined in routes.py (including helper functions) to the 'bp' instance.
# This import MUST come AFTER the Blueprint 'bp' is defined.
from . import dialog_routes

View File

@@ -0,0 +1,787 @@
# myapp/dialog/dialog_routes.py
import datetime
import os
import logging
from flask import request, jsonify, current_app, has_app_context # Flask utilities
from bson.objectid import ObjectId, InvalidId # For MongoDB ObjectIds
from functools import wraps # Import wraps for dummy decorator
# --- Local Blueprint Import ---
from . import bp # Import the 'bp' instance defined in the local __init__.py
# --- Shared Extensions and Utilities Imports ---
try:
from ..extensions import mongo # Import the initialized PyMongo instance
from ..utils import token_required # Import the authentication decorator
except ImportError:
# Fallback or error handling if imports fail
print("Warning: Could not import mongo or token_required in dialog/dialog_routes.py.")
mongo = None
# Define a dummy decorator if token_required is missing
def token_required(f):
@wraps(f)
def wrapper(*args, **kwargs):
print("ERROR: token_required decorator is not available!")
return jsonify({"message": "Server configuration error: Missing authentication utility."}), 500
return wrapper
# --- Schema Imports ---
try:
# Import the relevant schemas defined in schemas.py
from ..schemas import (
DialogCreateSchema, DialogSendMessageSchema,
DialogSchema, DialogSummarySchema
)
from marshmallow import ValidationError
except ImportError:
print("Warning: Could not import Dialog schemas or ValidationError in dialog/dialog_routes.py.")
DialogCreateSchema = None
DialogSendMessageSchema = None
DialogSchema = None
DialogSummarySchema = None
ValidationError = None # Define ValidationError as None if import fails
# --- External API and Langchain Imports ---
# Keep these imports conditional to avoid errors if libraries are not installed
try:
import google.generativeai as genai
from google.api_core import exceptions as google_exceptions
except ImportError:
print("Warning: google.generativeai not installed. Gemini functionality will fail.")
genai = None
google_exceptions = None
try:
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.docstore.document import Document
except ImportError:
print("Warning: Langchain components not installed. Vector store functionality will fail.")
OpenAIEmbeddings = None
FAISS = None
Document = None
# --- Constants ---
# Adjust model name if needed, potentially move to config
MAX_HISTORY_MESSAGES = 20 # Max messages to keep in history for context (if applicable, not used in current Gemini call)
# --- Helper to get logger safely ---
def _get_logger():
if has_app_context():
return current_app.logger
return logging.getLogger(__name__)
##################################################
# Helper Functions (kept within this module)
##################################################
# --- Knowledge Base Helpers ---
# (generate_knowledge_base_message, update_project_knowledge, process_api_response_and_update_knowledge - unchanged from previous version)
def generate_knowledge_base_message(project_id):
"""
Retrieves all URL documents for the given project and formats them
into a single knowledge base message string for context.
Args:
project_id (ObjectId): The ObjectId of the project.
Returns:
str: A formatted string containing knowledge base entries, or an empty string on error.
Returns "No external knowledge available." if no URLs are found.
"""
logger = _get_logger()
if not mongo:
logger.error("Mongo extension not available in generate_knowledge_base_message.")
return "" # Cannot proceed without DB connection
try:
# Ensure project_id is ObjectId
if not isinstance(project_id, ObjectId):
project_id = ObjectId(project_id)
# Find all URL documents linked to the project ID
urls_cursor = mongo.db.urls.find({"projectId": project_id})
knowledge_entries = []
for doc in urls_cursor:
# Format keywords with percentages
keywords_list = [f"{kw.get('word', '')}({kw.get('percentage', 'N/A')}%)" for kw in doc.get("keywords", [])]
# Aggregate relevant fields into a string entry
aggregated = (
f"Title: {doc.get('title', 'N/A')}\n"
f"URL: {doc.get('url', 'N/A')}\n"
# f"Starred: {doc.get('starred', False)}\n" # Optionally include starred status
f"Note: {doc.get('note', 'N/A')}\n"
f"Keywords: {', '.join(keywords_list) if keywords_list else 'N/A'}\n"
f"Summary: {doc.get('summary', 'N/A')}"
)
knowledge_entries.append(aggregated)
# Handle case where no URLs are found
if not knowledge_entries:
return "No external knowledge available for this project."
# Combine entries and truncate if necessary
combined = "\n\n---\n\n".join(knowledge_entries)
# Use Flask config for max length if available, otherwise default
max_length = current_app.config.get('KNOWLEDGE_BASE_MAX_LENGTH', 4000) if has_app_context() else 4000
if len(combined) > max_length:
combined = combined[:max_length] + " ... [truncated]"
return combined
except InvalidId:
logger.error(f"Invalid project_id format passed to generate_knowledge_base_message: {project_id}")
return "Error: Invalid project identifier."
except AttributeError:
logger.error("PyMongo extension not initialized or available.")
return "Error: Database configuration issue."
except Exception as e:
# Log the error with project ID for easier debugging
logger.error(f"Error generating knowledge base message for project {project_id}: {e}", exc_info=True)
return "" # Return empty string on generic error
def update_project_knowledge(project_id):
"""
Updates the project's 'summary' field with a condensed version of its knowledge base.
This acts as a cache or snapshot for quick reference.
Args:
project_id (ObjectId): The ObjectId of the project.
"""
logger = _get_logger()
if not mongo:
logger.error("Mongo extension not available in update_project_knowledge.")
return
try:
# Ensure project_id is ObjectId
if not isinstance(project_id, ObjectId):
project_id = ObjectId(project_id)
knowledge_message = generate_knowledge_base_message(project_id)
# Condense the message for storage (e.g., first 1000 chars)
condensed = knowledge_message[:1000] if len(knowledge_message) > 1000 else knowledge_message
# Update the project document in the 'projects' collection
mongo.db.projects.update_one(
{"_id": project_id},
{"$set": {"summary": condensed, "updatedAt": datetime.datetime.now(datetime.timezone.utc)}}
)
except InvalidId:
logger.error(f"Invalid project_id format passed to update_project_knowledge: {project_id}")
except AttributeError:
logger.error("PyMongo extension not initialized or available.")
except Exception as e:
logger.error(f"Error updating project knowledge cache for {project_id}: {e}", exc_info=True)
def process_api_response_and_update_knowledge(api_response, project_id):
"""
Placeholder function to process LLM responses. Currently updates project knowledge cache.
"""
# For now, simply update the cached summary in the project document
update_project_knowledge(project_id)
# Future enhancements could go here
# --- Vector Store Helpers ---
def build_vector_knowledge_base(project_id, query, k=3):
"""
Builds a vector index (FAISS) from project URL content and retrieves top-k relevant documents.
Args:
project_id (ObjectId): The ObjectId of the project.
query (str): The user query for similarity search.
k (int): The number of top similar documents to retrieve.
Returns:
List[Document]: A list of LangChain Document objects, or an empty list on error/no data.
"""
logger = _get_logger()
# Check if necessary components are available
if not mongo or not OpenAIEmbeddings or not FAISS or not Document:
logger.error("Missing dependencies (Mongo, Langchain) for build_vector_knowledge_base.")
return []
try:
# Ensure project_id is ObjectId
if not isinstance(project_id, ObjectId):
project_id = ObjectId(project_id)
# Fetch URL documents from MongoDB
urls_cursor = mongo.db.urls.find({"projectId": project_id})
texts = []
metadatas = []
for doc in urls_cursor:
# Aggregate text content for embedding
keywords_list = [f"{kw.get('word', '')}({kw.get('percentage', 'N/A')}%)" for kw in doc.get("keywords", [])]
aggregated = (
f"Title: {doc.get('title', 'N/A')}\n"
f"URL: {doc.get('url', 'N/A')}\n"
# f"Starred: {doc.get('starred', False)}\n" # Optionally include more fields
f"Note: {doc.get('note', 'N/A')}\n"
f"Keywords: {', '.join(keywords_list) if keywords_list else 'N/A'}\n"
f"Summary: {doc.get('summary', 'N/A')}"
)
texts.append(aggregated)
# Store relevant metadata alongside the text
metadatas.append({"url": doc.get("url", ""), "title": doc.get("title", ""), "doc_id": str(doc["_id"])})
# If no text content found, return empty list
if not texts:
logger.info(f"No URL text content found for project {project_id} to build vector base.")
return []
# Initialize embeddings model (ensure OPENAI_API_KEY is set in environment or config)
try:
# Check if OPENAI_API_KEY exists (more robust check)
openai_api_key = os.environ.get("OPENAI_API_KEY") or (current_app.config.get("OPENAI_API_KEY") if has_app_context() else None)
if not openai_api_key:
raise ValueError("OPENAI_API_KEY environment variable or Flask config not set.")
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
except Exception as e:
logger.error(f"Failed to initialize OpenAIEmbeddings: {e}. Check API key.", exc_info=False) # Avoid logging key
return []
# Build FAISS vector store from the texts and metadata
vectorstore = FAISS.from_texts(texts=texts, embedding=embeddings, metadatas=metadatas)
# Perform similarity search
docs = vectorstore.similarity_search(query, k=k)
return docs
except InvalidId:
logger.error(f"Invalid project_id format passed to build_vector_knowledge_base: {project_id}")
return []
except AttributeError:
logger.error("PyMongo or Langchain components not initialized or available.")
return []
except Exception as e:
logger.error(f"Error building vector knowledge base for project {project_id}: {e}", exc_info=True)
return [] # Return empty list on error
def build_vector_based_prompt_with_knowledge(user_message, project_id):
"""
Constructs a prompt for the LLM, incorporating context from vector search results.
Args:
user_message (str): The user's latest message/query.
project_id (ObjectId): The ObjectId of the project.
Returns:
str: The formatted prompt string including retrieved knowledge.
"""
# Retrieve top 3 relevant documents using vector search
retrieved_docs = build_vector_knowledge_base(project_id, user_message, k=3)
# Format the retrieved knowledge for inclusion in the prompt
if retrieved_docs:
# Join the page_content of each retrieved LangChain Document
knowledge_text = "\n\n---\n\n".join([f"Source URL: {doc.metadata.get('url', 'N/A')}\n{doc.page_content}" for doc in retrieved_docs])
else:
knowledge_text = "No relevant external knowledge found via vector search for this query."
# Construct the final prompt with instructions, knowledge, and user query
# Make prompt more specific about using ONLY the provided knowledge
prompt = (
"You are an expert research assistant. Analyze the following retrieved documents, which contain information "
"(titles, URLs, notes, keywords, summaries) from websites related to the current research project. "
"Base your response *only* on this provided information and the user's query.\n\n"
"Common user questions might involve:\n"
"- Summarizing key topics from the retrieved documents.\n"
"- Suggesting research directions based *only* on the retrieved documents.\n"
"- Recommending specific URLs *from the retrieved documents* that are most relevant.\n"
"- Identifying potentially redundant information *within the retrieved documents*.\n\n"
"--- Relevant Retrieved Knowledge ---\n"
f"{knowledge_text}\n"
"--- End Retrieved Knowledge ---\n\n"
"User Query:\n"
f"{user_message}\n\n"
"Based strictly on the retrieved knowledge and the user query, provide your analysis and recommendations:"
)
return prompt
# --- Gemini Message Formatting Helper (Not currently used by send_dialog_message) ---
def format_messages_for_gemini(db_messages, max_history=MAX_HISTORY_MESSAGES):
"""
Converts dialog history from DB format to Gemini API format.
Handles role mapping ('system' -> 'model') and ensures role alternation.
"""
logger = _get_logger()
contents = []
last_role = None
recent_messages = db_messages[-max_history:] # Get the most recent messages
for msg in recent_messages:
gemini_role = "model" if msg.get("role") == "system" else "user"
if gemini_role == last_role:
logger.warning(f"Skipping consecutive message of role '{gemini_role}' in formatting.")
continue
contents.append({
"role": gemini_role,
"parts": [{"text": msg.get("content", "")}]
})
last_role = gemini_role
if contents and contents[-1]["role"] != "user":
logger.warning("Formatted history for Gemini does not end with a 'user' message.")
return contents
##################################################
# Dialog API Endpoints
##################################################
# Note: Routes use paths relative to the '/api/dialog' prefix.
@bp.route('/', methods=['POST']) # Path relative to prefix
@token_required
def create_dialog_session(current_user):
"""
Creates a new dialog session associated with a project.
Uses DialogCreateSchema for input validation.
Expects JSON: { "projectId": "<ObjectId_string>", "sessionId": "<optional_string>", "startMessage": "<optional_string>" }
Determines the LLM provider based on the user's selected API key.
"""
logger = _get_logger()
# Validate user
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in create_dialog_session: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not DialogCreateSchema or not ValidationError:
return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
# Get and validate JSON data using the schema
json_data = request.get_json() or {}
schema = DialogCreateSchema()
try:
validated_data = schema.load(json_data)
except ValidationError as err:
logger.warning(f"Create dialog session validation failed: {err.messages}")
return jsonify(err.messages), 422 # Return validation errors
# Extract validated data
project_id_str = validated_data['projectId'] # Already validated as ObjectId string by schema
session_id = validated_data.get("sessionId", "") # Optional
start_message = validated_data.get("startMessage", "").strip() # Optional
try:
# Convert project ID
project_obj_id = ObjectId(project_id_str) # Conversion should succeed due to schema validation
# Find the user's selected API key
db = mongo.db
selected_api = db.api_list.find_one({"uid": user_id, "selected": True})
if not selected_api:
return jsonify({"message": "User has no selected API provider. Please select one in API Keys."}), 400
provider = selected_api.get("name")
api_key_exists = bool(selected_api.get("key")) # Check if key value exists
# Validate provider and key presence
allowed_providers = ["Gemini", "Deepseek", "Chatgpt"] # Consider from config
if provider not in allowed_providers:
return jsonify({"message": f"Selected provider '{provider}' is not supported."}), 400
if not api_key_exists:
return jsonify({"message": f"API key value missing for selected provider '{provider}'."}), 400
# Verify project exists and user has access
project = db.projects.find_one({"_id": project_obj_id}, {"ownerId": 1, "collaborators": 1})
if not project:
return jsonify({"message": "Project not found."}), 404
owner_id = project.get("ownerId")
collaborators = project.get("collaborators", [])
if owner_id != user_id and user_id not in collaborators:
return jsonify({"message": "Access denied to the specified project."}), 403
# Prepare initial messages if startMessage exists
now = datetime.datetime.now(datetime.timezone.utc)
messages_array = []
if start_message:
messages_array.append({
"role": "user",
"content": start_message,
"timestamp": now # Store timestamp for messages
})
# Prepare the new dialog document
dialog_doc = {
"uid": user_id,
"projectId": project_obj_id,
"provider": provider, # Store the provider used for this session
"sessionStartedAt": now,
"sessionEndedAt": None, # Mark as null initially
"messages": messages_array
}
if session_id: dialog_doc["sessionId"] = session_id
# Insert the new dialog session
result = db.dialog_activity.insert_one(dialog_doc)
# Return success response with the new dialog ID
return jsonify({
"message": "Dialog session created successfully.",
"dialog_id": str(result.inserted_id)
}), 201
except KeyError: # Should be caught by initial user_id check
logger.error(f"User ID (_id) not found in token payload for create_dialog_session.")
return jsonify({"message": "Authentication token is invalid or missing user ID."}), 401
except AttributeError:
logger.error("PyMongo extension not initialized or attached correctly.")
return jsonify({"message": "Database configuration error."}), 500
except Exception as e:
logger.error(f"Error creating dialog session for user {current_user.get('_id', 'UNKNOWN')}: {e}", exc_info=True)
return jsonify({"message": "Internal server error creating dialog session."}), 500
@bp.route('/<string:dialog_id>/send', methods=['POST']) # Path relative to prefix
@token_required
def send_dialog_message(current_user, dialog_id):
"""
Sends a user message within a specific dialog session.
Uses DialogSendMessageSchema for input validation.
Expects JSON: { "content": "User's message text" }
Retrieves context using vector search, builds a prompt, calls the LLM (Gemini),
and stores the conversation turn in the dialog history.
"""
logger = _get_logger()
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not genai or not google_exceptions: return jsonify({"message": "Gemini API library not available."}), 500
if not DialogSendMessageSchema or not ValidationError:
return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
try:
# Validate IDs
user_id_str = str(current_user.get("_id"))
if not user_id_str: return jsonify({"message": "Missing user ID in token."}), 400
try:
user_id = ObjectId(user_id_str)
dialog_obj_id = ObjectId(dialog_id)
except InvalidId:
return jsonify({"message": "Invalid user or dialog ID format."}), 400
# Get and validate user message content using schema
json_data = request.get_json() or {}
schema = DialogSendMessageSchema()
try:
validated_data = schema.load(json_data)
except ValidationError as err:
logger.warning(f"Send dialog message validation failed: {err.messages}")
return jsonify(err.messages), 422
content = validated_data['content'] # Use validated content
# --- Retrieve Dialog and API Key ---
db = mongo.db
dialog_doc = db.dialog_activity.find_one({"_id": dialog_obj_id, "uid": user_id})
if not dialog_doc: return jsonify({"message": "Dialog session not found or access denied."}), 404
if dialog_doc.get("sessionEndedAt"): return jsonify({"message": "This dialog session has ended."}), 409 # 409 Conflict
provider = dialog_doc.get("provider")
if provider != "Gemini": # This endpoint currently only supports Gemini
return jsonify({"message": f"This endpoint only supports 'Gemini', but session provider is '{provider}'."}), 400
# Find the active Gemini API key for the user
api_doc = db.api_list.find_one({"uid": user_id, "name": "Gemini", "selected": True})
if not (api_doc and api_doc.get("key")):
logger.error(f"No valid Gemini key found or selected for user {user_id} during send message.")
return jsonify({"message": "Gemini API key not configured or selected."}), 400
gemini_key = api_doc["key"]
# --- Build Prompt with Vector Knowledge ---
project_id = dialog_doc.get("projectId")
if not project_id or not isinstance(project_id, ObjectId):
logger.error(f"Dialog {dialog_id} is missing valid projectId.")
return jsonify({"message": "Internal error: Project reference missing."}), 500
# This builds the prompt incorporating vector search results
detailed_prompt = build_vector_based_prompt_with_knowledge(content, project_id)
# Prepare history for Gemini (currently just the detailed prompt as a single user turn)
gemini_history = [{"role": "user", "parts": [{"text": detailed_prompt}]}]
# --- Call Gemini API ---
llm_response_text = "[LLM Call Skipped/Failed]" # Default response text
try:
genai.configure(api_key=gemini_key)
model = genai.GenerativeModel(current_app.config["GEMINI_MODEL_NAME"])
# Consider adding generation_config and safety_settings from Flask config
llm_response = model.generate_content(gemini_history)
# Extract text, handling potential blocks or empty responses
try:
llm_response_text = llm_response.text
except ValueError:
logger.warning(f"Gemini response for dialog {dialog_id} may have been blocked or empty. Feedback: {llm_response.prompt_feedback}")
llm_response_text = "[Response blocked by safety filters or returned no text content]"
except google_exceptions.PermissionDenied as ex:
logger.warning(f"Gemini Permission Denied for user {user_id}: {ex}")
return jsonify({"message": "Gemini API Error: Invalid API key or insufficient permissions."}), 403
except google_exceptions.ResourceExhausted as ex:
logger.warning(f"Gemini Resource Exhausted for user {user_id}: {ex}")
return jsonify({"message": "Gemini API Error: Rate limit or quota exceeded."}), 429
except google_exceptions.GoogleAPIError as ex: # Catch other Google API errors
logger.error(f"Gemini API communication error for user {user_id}: {ex}", exc_info=True)
return jsonify({"message": "An error occurred while communicating with the Gemini API."}), 503 # 503 Service Unavailable
except Exception as e: # Catch potential genai configuration errors etc.
logger.error(f"Unexpected error during Gemini call setup or execution for user {user_id}: {e}", exc_info=True)
return jsonify({"message": "Internal server error during LLM communication."}), 500
# --- Process Response and Update DB ---
now = datetime.datetime.now(datetime.timezone.utc)
user_msg_entry = {"role": "user", "content": content, "timestamp": now}
system_msg_entry = {"role": "system", "content": llm_response_text, "timestamp": now} # Use same timestamp for pair
# Add both messages to the dialog history in MongoDB atomically
update_res = db.dialog_activity.update_one(
{"_id": dialog_obj_id},
{"$push": {"messages": {"$each": [user_msg_entry, system_msg_entry]}}}
)
if update_res.modified_count != 1:
logger.warning(f"Dialog {dialog_id} DB update failed after LLM call (modified_count={update_res.modified_count}).")
# Decide if this should be an error response to the user
# Process the response (e.g., update cached knowledge)
process_api_response_and_update_knowledge(llm_response_text, project_id)
# Return the LLM's response text to the client
return jsonify({"message": "LLM response received.", "llmResponse": llm_response_text}), 200
except KeyError: # Should be caught by initial user_id check
logger.error(f"User ID (_id) not found in token payload for send_dialog_message.")
return jsonify({"message": "Authentication token is invalid or missing user ID."}), 401
except AttributeError:
logger.error("PyMongo or other extension not initialized correctly.")
return jsonify({"message": "Server configuration error."}), 500
except Exception as e:
logger.error(f"Unexpected error in send_dialog_message for dialog {dialog_id}: {e}", exc_info=True)
return jsonify({"message": "Internal server error processing message."}), 500
@bp.route('/', methods=['GET']) # Path relative to prefix
@token_required
def list_dialog_sessions(current_user):
"""
Lists dialog sessions for the authenticated user.
Uses DialogSummarySchema for output serialization.
Supports filtering by 'projectId' query parameter.
Excludes the 'messages' array for brevity.
"""
logger = _get_logger()
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not DialogSummarySchema: return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
try:
# Validate user ID
user_id_str = str(current_user.get("_id"))
if not user_id_str: return jsonify({"message": "Missing user ID in token."}), 400
try:
user_id = ObjectId(user_id_str)
except InvalidId:
return jsonify({"message": "Invalid user ID format in token."}), 400
# Base query for the user's dialogs
query = {"uid": user_id}
# Add projectId filter if provided in query parameters
project_id_str = request.args.get("projectId")
if project_id_str:
try:
project_obj_id = ObjectId(project_id_str)
query["projectId"] = project_obj_id
except InvalidId:
return jsonify({"message": "Invalid projectId format in query parameter."}), 400
# Fetch dialogs, excluding the messages field, sort by start time descending
db = mongo.db
cursor = db.dialog_activity.find(
query,
{"messages": 0} # Projection to exclude messages
).sort("sessionStartedAt", -1)
dialog_docs = list(cursor) # Convert cursor to list
# --- Serialize results using the schema ---
output_schema = DialogSummarySchema(many=True)
# Schema handles ObjectId and datetime conversion, and field exclusion
serialized_result = output_schema.dump(dialog_docs)
return jsonify({"dialogs": serialized_result}), 200
except KeyError: # Should be caught by initial user_id check
logger.error(f"User ID (_id) not found in token payload for list_dialog_sessions.")
return jsonify({"message": "Authentication token is invalid or missing user ID."}), 401
except AttributeError:
logger.error("PyMongo extension not initialized or attached correctly.")
return jsonify({"message": "Database configuration error."}), 500
except Exception as e:
logger.error(f"Error listing dialogs for user {current_user.get('_id', 'UNKNOWN')}: {e}", exc_info=True)
return jsonify({"message": "Internal server error listing dialog sessions."}), 500
@bp.route('/<string:dialog_id>', methods=['GET']) # Path relative to prefix
@token_required
def get_dialog_session(current_user, dialog_id):
"""
Retrieves the full details of a specific dialog session, including messages.
Uses DialogSchema for output serialization. Verifies ownership.
"""
logger = _get_logger()
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not DialogSchema: return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
try:
# Validate IDs
user_id_str = str(current_user.get("_id"))
if not user_id_str: return jsonify({"message": "Missing user ID in token."}), 400
try:
user_id = ObjectId(user_id_str)
dial_obj_id = ObjectId(dialog_id)
except InvalidId:
return jsonify({"message": "Invalid user or dialog ID format."}), 400
# Find the specific dialog owned by the user
db = mongo.db
doc = db.dialog_activity.find_one({"_id": dial_obj_id, "uid": user_id})
if not doc:
return jsonify({"message": "Dialog session not found or access denied."}), 404
# --- Serialize results using the schema ---
output_schema = DialogSchema()
# Schema handles ObjectId, datetime, and nested message formatting
serialized_result = output_schema.dump(doc)
return jsonify(serialized_result), 200
except KeyError: # Should be caught by initial user_id check
logger.error(f"User ID (_id) not found in token payload for get_dialog_session.")
return jsonify({"message": "Authentication token is invalid or missing user ID."}), 401
except AttributeError:
logger.error("PyMongo extension not initialized or attached correctly.")
return jsonify({"message": "Database configuration error."}), 500
except Exception as e:
logger.error(f"Error retrieving dialog {dialog_id} for user {current_user.get('_id', 'UNKNOWN')}: {e}", exc_info=True)
return jsonify({"message": "Internal server error retrieving dialog session."}), 500
@bp.route('/<string:dialog_id>/end', methods=['PUT']) # Path relative to prefix
@token_required
def end_dialog_session(current_user, dialog_id):
"""
Marks a dialog session as ended by setting the 'sessionEndedAt' timestamp.
Prevents ending an already ended session. Verifies ownership.
(No schema needed for input/output here)
"""
logger = _get_logger()
if not mongo: return jsonify({"message": "Database connection not available."}), 500
try:
# Validate IDs
user_id_str = str(current_user.get("_id"))
if not user_id_str: return jsonify({"message": "Missing user ID in token."}), 400
try:
user_id = ObjectId(user_id_str)
dial_obj_id = ObjectId(dialog_id)
except InvalidId:
return jsonify({"message": "Invalid user or dialog ID format."}), 400
# Check if session exists, belongs to user, and is not already ended
db = mongo.db
existing_doc = db.dialog_activity.find_one({"_id": dial_obj_id, "uid": user_id}, {"sessionEndedAt": 1})
if not existing_doc:
return jsonify({"message": "Dialog session not found or access denied."}), 404
if existing_doc.get("sessionEndedAt") is not None:
# 409 Conflict - the session is already in the 'ended' state
return jsonify({"message": "Dialog session has already been ended."}), 409
# Update the document to set the end time
now = datetime.datetime.now(datetime.timezone.utc)
result = db.dialog_activity.update_one(
{"_id": dial_obj_id, "uid": user_id, "sessionEndedAt": None}, # Ensure it's not already ended atomically
{"$set": {"sessionEndedAt": now}}
)
# Check if the update was successful
if result.modified_count == 1:
return jsonify({"message": "Dialog session marked as ended."}), 200
elif result.matched_count == 1 and result.modified_count == 0:
# This could happen if the session was ended between find_one and update_one (race condition)
logger.warning(f"Dialog {dialog_id} was already ended before update (race condition?).")
return jsonify({"message": "Dialog session was already ended."}), 409
else: # matched_count == 0 (shouldn't happen if find_one worked unless deleted concurrently)
logger.warning(f"Dialog {dialog_id} matched 0 for ending update.")
return jsonify({"message": "Dialog session not found or already ended."}), 404
except KeyError: # Should be caught by initial user_id check
logger.error(f"User ID (_id) not found in token payload for end_dialog_session.")
return jsonify({"message": "Authentication token is invalid or missing user ID."}), 401
except AttributeError:
logger.error("PyMongo extension not initialized or attached correctly.")
return jsonify({"message": "Database configuration error."}), 500
except Exception as e:
logger.error(f"Error ending dialog {dialog_id} for user {current_user.get('_id', 'UNKNOWN')}: {e}", exc_info=True)
return jsonify({"message": "Internal server error ending dialog session."}), 500
@bp.route('/<string:dialog_id>', methods=['DELETE']) # Path relative to prefix
@token_required
def delete_dialog_session(current_user, dialog_id):
"""
Deletes an entire dialog session document. Verifies ownership.
(No schema needed for input/output here)
"""
logger = _get_logger()
if not mongo: return jsonify({"message": "Database connection not available."}), 500
try:
# Validate IDs
user_id_str = str(current_user.get("_id"))
if not user_id_str: return jsonify({"message": "Missing user ID in token."}), 400
try:
user_id = ObjectId(user_id_str)
dial_obj_id = ObjectId(dialog_id)
except InvalidId:
return jsonify({"message": "Invalid user or dialog ID format."}), 400
# Perform deletion, ensuring the user owns the dialog
db = mongo.db
result = db.dialog_activity.delete_one({"_id": dial_obj_id, "uid": user_id})
# Check if a document was deleted
if result.deleted_count == 1:
return jsonify({"message": "Dialog session deleted successfully."}), 200 # 200 OK or 204 No Content
else:
# If deleted_count is 0, the document either didn't exist or didn't belong to the user
return jsonify({"message": "Dialog session not found or access denied."}), 404
except KeyError: # Should be caught by initial user_id check
logger.error(f"User ID (_id) not found in token payload for delete_dialog_session.")
return jsonify({"message": "Authentication token is invalid or missing user ID."}), 401
except AttributeError:
logger.error("PyMongo extension not initialized or attached correctly.")
return jsonify({"message": "Database configuration error."}), 500
except Exception as e:
logger.error(f"Error deleting dialog {dialog_id} for user {current_user.get('_id', 'UNKNOWN')}: {e}", exc_info=True)
return jsonify({"message": "Internal server error deleting dialog session."}), 500

View File

@@ -0,0 +1,24 @@
# backend/myapp/extensions.py
"""
Central place to instantiate Flask extension objects.
These objects are initialized with the app instance later in the application factory.
"""
from flask_pymongo import PyMongo
from flask_cors import CORS
# from flask_jwt_extended import JWTManager
from flask_marshmallow import Marshmallow
# Add other necessary extension imports (e.g., Migrate if using SQL + Alembic)
# Instantiate extensions without the app object
mongo = PyMongo()
cors = CORS()
# jwt = JWTManager()
ma = Marshmallow()
# migrate = Migrate() # Example if using Flask-Migrate
# You can add other globally used utilities here if needed,
# but primarily focus on Flask extensions.

View File

@@ -0,0 +1,4 @@
from flask_pymongo import PyMongo
# Create a global PyMongo instance.
mongo = PyMongo()

View File

@@ -0,0 +1,13 @@
# myapp/projects/__init__.py
from flask import Blueprint
# Define the Blueprint instance for the projects module.
# 'projects' is the unique name for this blueprint.
# url_prefix='/api/projects' will be prepended to all routes defined in this blueprint.
bp = Blueprint('projects', __name__, url_prefix='/api/projects')
# Import the routes module.
# This connects the routes defined in routes.py to the 'bp' instance.
# This import MUST come AFTER the Blueprint 'bp' is defined.
from . import projects_routes

View File

@@ -0,0 +1,715 @@
# myapp/projects/projects_routes.py
import datetime
import os # Needed for checking environment variables (e.g., for OpenAI key)
import logging
from flask import request, jsonify, current_app, has_app_context # Flask utilities
from bson.objectid import ObjectId, InvalidId # For MongoDB ObjectIds
from collections import defaultdict # May be used in helper logic
from functools import wraps # Import wraps for dummy decorator
# --- Local Blueprint Import ---
from . import bp # Import the 'bp' instance defined in the local __init__.py
# --- Shared Extensions and Utilities Imports ---
try:
from ..extensions import mongo # Import the initialized PyMongo instance
# Import utilities from the parent 'myapp/utils.py'
from ..utils import token_required, generate_passkey
except ImportError:
# Fallback or error handling if imports fail
print("Warning: Could not import mongo, token_required, or generate_passkey in projects/projects_routes.py.")
mongo = None
generate_passkey = lambda: "error_generating_passkey" # Dummy function
# Define a dummy decorator if token_required is missing
def token_required(f):
@wraps(f)
def wrapper(*args, **kwargs):
print("ERROR: token_required decorator is not available!")
return jsonify({"message": "Server configuration error: Missing authentication utility."}), 500
return wrapper
# --- Schema Imports ---
try:
# Import the relevant schemas defined in schemas.py
from ..schemas import (
ProjectCreateSchema, ProjectUpdateSchema, ProjectSchema,
ProjectListSchema # Use ProjectListSchema for the list endpoint
)
from marshmallow import ValidationError
except ImportError:
print("Warning: Could not import Project schemas or ValidationError in projects/projects_routes.py.")
ProjectCreateSchema = None
ProjectUpdateSchema = None
ProjectSchema = None
ProjectListSchema = None
ValidationError = None
# --- Celery Task Import ---
# IMPORTANT: Assumes the project root directory ('your_fullstack_project/') is in PYTHONPATH
try:
from backend_flask.celery_worker.celery_app import async_recalc_project_keywords
except ModuleNotFoundError:
print("Warning: Could not import 'async_recalc_project_keywords' from 'celery_worker'. Ensure project root is in PYTHONPATH.")
# Define a dummy task function to prevent NameError if Celery isn't set up
def _dummy_celery_task(*args, **kwargs):
task_name = args[0] if args else 'dummy_task'
print(f"ERROR: Celery task {task_name} not available!")
class DummyTask:
def __init__(self, name):
self.__name__ = name
def delay(self, *a, **kw):
print(f"ERROR: Tried to call delay() on dummy task {self.__name__}")
pass
return DummyTask(task_name)
async_recalc_project_keywords = _dummy_celery_task('async_recalc_project_keywords')
# --- Dialog Helper Import ---
# Import the helper function from the sibling 'dialog' blueprint's routes module
try:
# Assumes the function is defined in myapp/dialog/dialog_routes.py
from ..dialog.dialog_routes import generate_knowledge_base_message
except ImportError:
print("Warning: Could not import 'generate_knowledge_base_message' from dialog blueprint.")
# Define a dummy function
generate_knowledge_base_message = lambda pid: "Error: Knowledge base function not available."
# --- External Lib Imports (for summarize_project) ---
# Import conditionally to avoid errors if not installed
try:
import google.generativeai as genai
from google.api_core import exceptions as google_exceptions
except ImportError:
print("Warning: google.generativeai not installed. Project summarization will fail.")
genai = None
google_exceptions = None
# --- Helper to get logger safely ---
def _get_logger():
if has_app_context():
return current_app.logger
return logging.getLogger(__name__)
# Note: Routes use paths relative to the '/api/projects' prefix defined in __init__.py.
@bp.route('/', methods=['POST']) # Path relative to prefix
@token_required
def create_project(current_user):
"""
Create a new project for the authenticated user.
Uses ProjectCreateSchema for input validation.
Expects 'name' and optional 'topic', 'description' in JSON payload.
Generates a unique passkey for the project.
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
logger.error("Invalid current_user object received in create_project")
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in create_project: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not ProjectCreateSchema or not ValidationError:
return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
# Get and validate JSON data using the schema
json_data = request.get_json() or {}
schema = ProjectCreateSchema()
try:
validated_data = schema.load(json_data)
except ValidationError as err:
logger.warning(f"Create project validation failed: {err.messages}")
return jsonify(err.messages), 422 # Return validation errors
# Extract validated data
name = validated_data['name'] # Required field
topic = validated_data.get('topic', "") # Optional field from schema
description = validated_data.get('description', "") # Optional field from schema
try:
# Generate a passkey for potential sharing/joining later
passkey = generate_passkey()
db = mongo.db # Use imported mongo instance
# Prepare project document data
now = datetime.datetime.now(datetime.timezone.utc)
project_data = {
"ownerId": user_id,
"collaborators": [], # Initially empty collaborator list
"passkey": passkey, # Store the generated passkey
"name": name.strip(), # Use validated and trimmed name
"topic": topic,
"description": description,
"summary": "", # Initial empty summary
"keywords": [], # Initial empty keywords
"lastActivityBy": user_id, # Owner is the last active initially
"createdAt": now,
"updatedAt": now
}
# Insert the new project document
result = db.projects.insert_one(project_data)
project_id = str(result.inserted_id)
# Return success response with project ID and passkey
return jsonify({
"message": "Project created successfully.",
"project_id": project_id,
"passkey": passkey # Return passkey so owner knows it
}), 201 # 201 Created status code
except Exception as e:
# Log the detailed error for debugging
logger.error(f"Error creating project for user {user_id}: {e}", exc_info=True)
# Return a generic error message to the client
return jsonify({"message": "An error occurred while creating the project."}), 500
@bp.route('/', methods=['GET']) # Path relative to prefix
@token_required
def get_projects(current_user):
"""
Retrieve a summary list (ID, name, updatedAt) of projects where the
authenticated user is either the owner or a collaborator.
Uses ProjectListSchema for output serialization.
Sorted by last update time descending.
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in get_projects: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not ProjectListSchema: return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
try:
db = mongo.db
# Query for projects owned by or collaborated on by the user
projects_cursor = db.projects.find(
{
"$or": [
{"ownerId": user_id},
{"collaborators": user_id} # Check if user ID is in the collaborators array
]
},
# Projection: only retrieve fields needed by the ProjectListSchema
{"name": 1, "updatedAt": 1, "_id": 1}
).sort("updatedAt", -1) # Sort by most recently updated
project_docs = list(projects_cursor) # Convert cursor to list
# --- Serialize results using the schema ---
output_schema = ProjectListSchema(many=True)
# Schema handles ObjectId and datetime conversion
serialized_result = output_schema.dump(project_docs)
# Return the serialized list of project summaries
return jsonify({"projects": serialized_result}), 200
except Exception as e:
logger.error(f"Error fetching projects for user {user_id}: {e}", exc_info=True)
# Use a generic error message for the client
return jsonify({"message": "An error occurred while fetching projects."}), 500
@bp.route('/<string:project_id>', methods=['GET']) # Path relative to prefix
@token_required
def get_project_detail(current_user, project_id):
"""
Retrieve detailed information for a specific project by its ID.
Uses ProjectSchema for output serialization.
Verifies user access (owner or collaborator).
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in get_project_detail: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not ProjectSchema: return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
try:
db = mongo.db
# Validate the provided project ID format
try:
obj_project_id = ObjectId(project_id)
except InvalidId:
return jsonify({"message": "Invalid project ID format."}), 400
# Find the project by ID
project_doc = db.projects.find_one({"_id": obj_project_id})
if not project_doc:
return jsonify({"message": "Project not found."}), 404 # 404 Not Found
# Verify ownership or collaboration status for access control
owner_id = project_doc.get("ownerId")
collaborators = project_doc.get("collaborators", [])
if not owner_id: # Check for data integrity
logger.error(f"Project {project_id} is missing ownerId.")
return jsonify({"message": "Project data integrity issue."}), 500
if owner_id != user_id and user_id not in collaborators:
return jsonify({"message": "Access denied to this project."}), 403 # 403 Forbidden
# --- Serialize results using the schema ---
output_schema = ProjectSchema()
# Schema handles ObjectId, datetime, nested keywords, and field selection
serialized_result = output_schema.dump(project_doc)
return jsonify(serialized_result), 200
except Exception as e:
logger.error(f"Error fetching project detail for {project_id}: {e}", exc_info=True)
return jsonify({"message": "An error occurred while fetching project details."}), 500
@bp.route('/<string:project_id>', methods=['PUT']) # Path relative to prefix
@token_required
def update_project(current_user, project_id):
"""
Update details of an existing project.
Uses ProjectUpdateSchema for input validation.
Only allows updating specific fields: name, collaborators, topic, description, keywords.
Requires the authenticated user to be the project owner.
Returns the updated project details using ProjectSchema.
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in update_project: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not ProjectUpdateSchema or not ProjectSchema or not ValidationError:
return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
# Get and validate JSON data using the schema
json_data = request.get_json() or {}
schema = ProjectUpdateSchema()
try:
# Load validates allowed fields and their types (like collaborators list of strings)
validated_data = schema.load(json_data)
except ValidationError as err:
logger.warning(f"Update project validation failed: {err.messages}")
return jsonify(err.messages), 422
# If validation passed but no valid fields were provided
if not validated_data:
return jsonify({"message": "No valid fields provided for update."}), 400
try:
db = mongo.db
# Validate project ID format
try:
obj_project_id = ObjectId(project_id)
except InvalidId:
return jsonify({"message": "Invalid project ID format."}), 400
# Find the project
project = db.projects.find_one({"_id": obj_project_id}, {"ownerId": 1}) # Fetch ownerId for check
if not project:
return jsonify({"message": "Project not found."}), 404
# Verify ownership for update permission
owner_id = project.get("ownerId")
if not owner_id:
logger.error(f"Project {project_id} is missing ownerId during update.")
return jsonify({"message": "Project data integrity issue."}), 500
if owner_id != user_id:
return jsonify({"message": "Only the project owner can update this project."}), 403
# --- Prepare Update Fields based on validated data ---
update_fields = {}
# Convert collaborator strings back to ObjectIds if present
if "collaborators" in validated_data:
try:
update_fields["collaborators"] = [ObjectId(cid) for cid in validated_data["collaborators"]]
# Optional: Add check here to ensure collaborator IDs exist and are not the owner
except (InvalidId, TypeError):
# This should ideally be caught by schema validation if using _validate_object_id
return jsonify({"message": "Invalid collaborator ID format received."}), 400
# Copy other validated fields directly
for field in ["name", "topic", "description", "keywords"]:
if field in validated_data:
update_fields[field] = validated_data[field]
# Always update the 'updatedAt' timestamp
update_fields["updatedAt"] = datetime.datetime.now(datetime.timezone.utc)
# Note: lastActivityBy is NOT updated here.
# Perform the update operation
result = db.projects.update_one({"_id": obj_project_id}, {"$set": update_fields})
# Check if the update was successful
if result.matched_count == 1:
# Retrieve the updated project document to return it
updated_project_doc = db.projects.find_one({"_id": obj_project_id})
if updated_project_doc:
# Serialize the updated document using the detail schema
output_schema = ProjectSchema()
serialized_project = output_schema.dump(updated_project_doc)
return jsonify({"message": "Project updated successfully.", "project": serialized_project}), 200
else:
logger.warning(f"Project {project_id} updated but could not be retrieved.")
return jsonify({"message": "Project updated successfully, but failed to retrieve updated data."}), 200
else:
# Matched count was 0
return jsonify({"message": "Project update failed (document not found)."}), 404
except Exception as e:
logger.error(f"Error updating project {project_id}: {e}", exc_info=True)
return jsonify({"message": "An error occurred while updating the project."}), 500
@bp.route('/<string:project_id>', methods=['DELETE']) # Path relative to prefix
@token_required
def delete_project(current_user, project_id):
"""
Delete a project and cascade deletion of associated URLs, activity logs, and dialogs.
Requires the authenticated user to be the project owner.
(No schema needed for input/output here)
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in delete_project: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
if not mongo: return jsonify({"message": "Database connection not available."}), 500
try:
db = mongo.db
# Validate project ID format
try:
obj_project_id = ObjectId(project_id)
except InvalidId:
return jsonify({"message": "Invalid project ID format."}), 400
# Find the project
project = db.projects.find_one({"_id": obj_project_id}, {"ownerId": 1})
if not project:
return jsonify({"message": "Project not found."}), 404
# Verify ownership for delete permission
owner_id = project.get("ownerId")
if not owner_id:
logger.error(f"Project {project_id} is missing ownerId during delete.")
return jsonify({"message": "Project data integrity issue."}), 500
if owner_id != user_id:
return jsonify({"message": "Only the project owner can delete this project."}), 403
# --- Perform Deletions (Consider Transactions if available/needed) ---
# 1. Delete the project document itself
delete_project_result = db.projects.delete_one({"_id": obj_project_id})
if delete_project_result.deleted_count == 0:
logger.warning(f"Project {project_id} found but delete_one removed 0 documents.")
return jsonify({"message": "Project deletion failed (already deleted?)."}), 404
# 2. Cascade delete associated URLs
delete_urls_result = db.urls.delete_many({"projectId": obj_project_id})
logger.info(f"Deleted {delete_urls_result.deleted_count} URLs for project {project_id}")
# 3. Cascade delete associated activity logs
delete_activity_result = db.project_activity.delete_many({"projectId": obj_project_id})
logger.info(f"Deleted {delete_activity_result.deleted_count} activity logs for project {project_id}")
# 4. Cascade delete associated dialog sessions
delete_dialog_result = db.dialog_activity.delete_many({"projectId": obj_project_id})
logger.info(f"Deleted {delete_dialog_result.deleted_count} dialog sessions for project {project_id}")
# --- End Deletions ---
return jsonify({"message": "Project and associated data deleted successfully."}), 200 # 200 OK or 204 No Content
except Exception as e:
logger.error(f"Error deleting project {project_id}: {e}", exc_info=True)
return jsonify({"message": "An error occurred while deleting the project."}), 500
@bp.route('/<string:project_id>/info', methods=['GET']) # Path relative to prefix
@token_required
def get_project_info(current_user, project_id):
"""
Retrieve basic informational fields for a project.
Uses ProjectSchema for output serialization (implicitly selects fields).
Verifies user access (owner or collaborator).
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in get_project_info: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not ProjectSchema: return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
try:
db = mongo.db
# Validate project ID format
try:
obj_project_id = ObjectId(project_id)
except InvalidId:
return jsonify({"message": "Invalid project ID format."}), 400
# Find the project, projecting only necessary fields + access control fields
# Schema will handle final field selection for output
project_doc = db.projects.find_one(
{"_id": obj_project_id} # Fetch full doc for schema dump
# {"name": 1, "topic": 1, "description": 1, "keywords": 1, "summary": 1, "ownerId": 1, "collaborators": 1}
)
if not project_doc:
return jsonify({"message": "Project not found."}), 404
# Verify access
owner_id = project_doc.get("ownerId")
collaborators = project_doc.get("collaborators", [])
if not owner_id:
logger.error(f"Project {project_id} is missing ownerId in get_project_info.")
return jsonify({"message": "Project data integrity issue."}), 500
if owner_id != user_id and user_id not in collaborators:
return jsonify({"message": "Access denied to this project's info."}), 403
# --- Serialize using ProjectSchema ---
# The schema definition controls which fields are included in the output
output_schema = ProjectSchema()
serialized_result = output_schema.dump(project_doc)
# The ProjectSchema includes more than just the 'info' fields,
# adjust schema or create ProjectInfoSchema if only specific fields are desired.
# For now, returning the standard ProjectSchema output.
return jsonify(serialized_result), 200
except Exception as e:
logger.error(f"Error getting project info for {project_id}: {e}", exc_info=True)
return jsonify({"message": "An error occurred while retrieving project info."}), 500
@bp.route('/<string:project_id>/recalc_keywords', methods=['PUT']) # Path relative to prefix
@token_required
def recalc_project_keywords(current_user, project_id):
"""
Triggers an asynchronous Celery task to recalculate project keywords.
Verifies user access (owner or collaborator).
(No schema needed for input/output here)
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
user_id_str = str(user_id) # Keep string version for Celery task if needed
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in recalc_project_keywords: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
if not mongo: return jsonify({"message": "Database connection not available."}), 500
try:
db = mongo.db
# Validate project ID format
try:
obj_project_id = ObjectId(project_id)
except InvalidId:
return jsonify({"message": "Invalid project ID format."}), 400
# Verify project exists and user has access before queueing task
project = db.projects.find_one(
{"_id": obj_project_id},
{"ownerId": 1, "collaborators": 1} # Only fetch fields needed for access check
)
if not project:
return jsonify({"message": "Project not found."}), 404
owner_id = project.get("ownerId")
collaborators = project.get("collaborators", [])
if not owner_id:
logger.error(f"Project {project_id} is missing ownerId in recalc_keywords.")
return jsonify({"message": "Project data integrity issue."}), 500
if owner_id != user_id and user_id not in collaborators:
return jsonify({"message": "Access denied to trigger keyword recalculation for this project."}), 403
# --- Queue the Celery Task ---
try:
# Call the .delay() method on the imported Celery task
task_result = async_recalc_project_keywords.delay(project_id, user_id_str)
logger.info(f"Queued keyword recalc task {task_result.id} for project {project_id}")
# Return 202 Accepted status code to indicate task was queued
return jsonify({"message": "Project keywords recalculation task queued successfully."}), 202
except NameError:
logger.error("Celery task 'async_recalc_project_keywords' is not defined or imported correctly.")
return jsonify({"message": "Server configuration error: Keyword recalculation feature unavailable."}), 500
except Exception as e:
# Catch errors related to Celery connection or queueing
logger.error(f"Error queueing recalc keywords task for project {project_id}: {e}", exc_info=True)
return jsonify({"message": "An error occurred while queueing the keywords recalculation task."}), 500
except Exception as e:
# Catch general errors before task queueing
logger.error(f"Error in recalc_project_keywords endpoint for project {project_id}: {e}", exc_info=True)
return jsonify({"message": "An internal error occurred before queueing the task."}), 500
@bp.route('/<string:project_id>/summarize', methods=['PUT']) # Path relative to prefix
@token_required
def summarize_project(current_user, project_id):
"""
Generates a summary for the project using its associated URL knowledge base
and an external LLM (Gemini). Updates the project's summary field.
Requires the user to have a selected Gemini API key configured.
Verifies user access (owner or collaborator).
(No schema needed for input, output is summary string)
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in summarize_project: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not genai or not google_exceptions: return jsonify({"message": "Gemini API library not available."}), 500
try:
db = mongo.db
# Validate project ID format
try:
obj_project_id = ObjectId(project_id)
except InvalidId:
return jsonify({"message": "Invalid project ID format."}), 400
# Verify project exists and user has access
project = db.projects.find_one(
{"_id": obj_project_id},
{"ownerId": 1, "collaborators": 1} # Only fetch fields needed for access check
)
if not project:
return jsonify({"message": "Project not found."}), 404
owner_id = project.get("ownerId")
collaborators = project.get("collaborators", [])
if not owner_id:
logger.error(f"Project {project_id} is missing ownerId in summarize_project.")
return jsonify({"message": "Project data integrity issue."}), 500
if owner_id != user_id and user_id not in collaborators:
return jsonify({"message": "Access denied to summarize this project."}), 403
# --- Check for User's Gemini API Key ---
api_doc = db.api_list.find_one({"uid": user_id, "selected": True, "name": "Gemini"})
if not (api_doc and api_doc.get("key")):
return jsonify({"message": "Summarization requires a selected Gemini API key. Please configure it in API Keys."}), 400 # 400 Bad Request - missing prereq
gemini_key = api_doc.get("key")
# --- Generate Knowledge Base and Prompt ---
# Use the imported helper function from the dialog blueprint
kb_message = generate_knowledge_base_message(obj_project_id) # Pass ObjectId
if not kb_message or kb_message.startswith("Error:") : # Handle error from helper
logger.warning(f"Knowledge base generation failed or was empty for project {project_id}. KB: {kb_message}")
kb_message = "No external knowledge base content available for this project." # Fallback
# Construct the prompt for Gemini
prompt = (
f"You are an expert research assistant tasked with summarizing a project. "
f"Below is the external knowledge base compiled from websites associated with this project.\n\n"
f"--- External Knowledge Base ---\n{kb_message}\n--- End Knowledge Base ---\n\n"
f"Based ONLY on the provided knowledge base (do not use external information), please generate a concise and comprehensive summary "
f"of the project's main focus, key topics, and potential research directions. Aim for approximately 300 words, maximum 400 words."
)
# --- Call Gemini API ---
summary_text = "[Summary generation failed]" # Default
try:
genai.configure(api_key=gemini_key)
# Use the constant defined earlier or get from config
model = genai.GenerativeModel(current_app.config["GEMINI_MODEL_NAME"])
gemini_input = [{"role": "user", "parts": [{"text": prompt}]}]
# Consider adding safety settings if needed
llm_response = model.generate_content(gemini_input)
# Extract text, handling potential blocks
try:
summary_text = llm_response.text
except ValueError:
logger.warning(f"Gemini response for project {project_id} summary may have been blocked. Feedback: {llm_response.prompt_feedback}")
summary_text = "[Summary generation blocked or failed]"
except google_exceptions.PermissionDenied as ex:
logger.warning(f"Gemini Permission Denied for user {user_id} during summarization: {ex}")
return jsonify({"message": "Gemini API Error: Invalid API key or insufficient permissions."}), 403
except google_exceptions.ResourceExhausted as ex:
logger.warning(f"Gemini Resource Exhausted for user {user_id} during summarization: {ex}")
return jsonify({"message": "Gemini API Error: Rate limit or quota exceeded."}), 429
except google_exceptions.GoogleAPIError as ex:
logger.error(f"Gemini API communication error during summarization for project {project_id}: {ex}", exc_info=True)
return jsonify({"message": "An error occurred while communicating with the Gemini API."}), 503
except Exception as e:
logger.error(f"Unexpected error during Gemini call setup/execution for project {project_id} summary: {e}", exc_info=True)
return jsonify({"message": "Internal server error during LLM communication."}), 500
# Check if the summary is empty after potential blocking
if not summary_text or summary_text == "[Summary generation blocked or failed]":
return jsonify({"message": "Failed to generate summary (LLM returned empty or blocked response)."}), 500
# --- Update Project Summary in DB ---
try:
update_result = db.projects.update_one(
{"_id": obj_project_id},
{"$set": {"summary": summary_text, "updatedAt": datetime.datetime.now(datetime.timezone.utc)}}
)
if update_result.matched_count == 0:
# Project deleted between find and update?
logger.warning(f"Project {project_id} not found during summary update.")
return jsonify({"message": "Project not found while saving summary."}), 404
# Return success response with the generated summary
return jsonify({"message": "Project summary generated and saved successfully.", "summary": summary_text}), 200
except Exception as e:
logger.error(f"Error updating project summary in DB for {project_id}: {e}", exc_info=True)
# Inform user summary was generated but not saved
return jsonify({"message": "Summary generated but failed to save to project.", "summary": summary_text}), 500
except Exception as e:
# Catch-all for errors before API call or DB update
logger.error(f"Error in summarize_project endpoint for project {project_id}: {e}", exc_info=True)
return jsonify({"message": "An internal error occurred during project summarization."}), 500

View File

@@ -0,0 +1,227 @@
# myapp/schemas.py
from marshmallow import fields, validate, ValidationError, Schema, validates_schema
from marshmallow.validate import OneOf
from bson.objectid import ObjectId, InvalidId
# Import Marshmallow instance from extensions
# Assumes 'ma = Marshmallow()' is defined in myapp/extensions.py
# and initialized in myapp/__init__.py's create_app()
try:
from .extensions import ma
except ImportError:
# Basic fallback if extensions.py or 'ma' instance is missing
print("WARNING: Flask-Marshmallow instance 'ma' not found in extensions. Falling back.")
from flask_marshmallow import Marshmallow
ma = Marshmallow()
# --- Custom Validators (Optional but useful) ---
def _validate_object_id(value):
"""Validator function to ensure a string is a valid ObjectId."""
try:
ObjectId(value)
except (InvalidId, TypeError, ValueError): # Catch potential errors
raise ValidationError("Invalid ObjectId format.")
def _is_alphabetic_or_empty(value):
"""Validator for keywords: allows empty string or purely alphabetic."""
if value is not None and value != "" and not value.isalpha():
raise ValidationError("Keyword must be alphabetic if not empty.")
return True # Pass validation if empty or alphabetic
# --- Base Schema for common fields ---
class BaseSchema(ma.Schema):
"""Base schema with common fields like ID and timestamps."""
# Dump ObjectId as string, read-only
id = fields.Function(lambda obj: str(obj.get("_id")), dump_only=True)
# Dump datetime as ISO 8601 string, read-only
createdAt = fields.DateTime(format='iso', dump_only=True)
updatedAt = fields.DateTime(format='iso', dump_only=True)
# --- User Schemas (for auth blueprint) ---
class UserRegistrationSchema(ma.Schema):
"""Schema for validating user registration input."""
username = fields.String(required=True, validate=validate.Length(min=3, max=64, error="Username must be between 3 and 64 characters."))
email = fields.Email(required=True, error="Invalid email format.") # Built-in email validation
password = fields.String(required=True, validate=validate.Length(min=8, error="Password must be at least 8 characters."), load_only=True) # load_only: Input only, never dumped
class UserLoginSchema(ma.Schema):
"""Schema for validating user login input."""
username = fields.String(required=True)
password = fields.String(required=True, load_only=True) # Input only
class UserSchema(BaseSchema):
"""Schema for serializing user data for output (excluding password)."""
username = fields.String(dump_only=True)
email = fields.Email(dump_only=True)
# Inherits id, createdAt, updatedAt from BaseSchema
class Meta:
# IMPORTANT: Explicitly exclude the password field (even if hashed) from output
exclude = ("password",)
class UserUpdateSchema(ma.Schema):
"""Schema for validating user account update input."""
username = fields.String(validate=validate.Length(min=3, max=64)) # Optional update
email = fields.Email() # Optional update
password = fields.String(validate=validate.Length(min=8), load_only=True) # Optional update, input only
# --- API Key Schemas (for api_keys blueprint) ---
ALLOWED_API_PROVIDERS = ["Gemini", "Deepseek", "Chatgpt"]
class APIKeyCreateSchema(ma.Schema):
"""Schema for validating new API key creation input."""
name = fields.String(required=True, validate=OneOf(ALLOWED_API_PROVIDERS, error=f"Provider name must be one of: {ALLOWED_API_PROVIDERS}"))
key = fields.String(required=True, validate=validate.Length(min=5, error="API Key seems too short.")) # Basic length check
selected = fields.Boolean(load_default=False) # Default to False if not provided on load
class APIKeyUpdateSchema(ma.Schema):
"""Schema for validating API key update input."""
# All fields are optional for update
name = fields.String(validate=OneOf(ALLOWED_API_PROVIDERS, error=f"Provider name must be one of: {ALLOWED_API_PROVIDERS}"))
key = fields.String(validate=validate.Length(min=5))
selected = fields.Boolean()
class APIKeySchema(BaseSchema):
"""Schema for serializing API key data for output."""
# Inherits id, createdAt, updatedAt
uid = fields.Function(lambda obj: str(obj.get("uid")), dump_only=True) # User ID as string
name = fields.String(dump_only=True)
key = fields.String(dump_only=True) # Consider masking part of the key for security: fields.Function(lambda obj: f"{obj.get('key', '')[:4]}...{obj.get('key', '')[-4:]}" if obj.get('key') else None, dump_only=True)
selected = fields.Boolean(dump_only=True)
# --- Project Schemas (for projects blueprint) ---
class KeywordSchema(ma.Schema):
"""Schema for individual keywords within a project or URL."""
word = fields.String(required=True, validate=_is_alphabetic_or_empty) # Allow empty string or alphabetic
percentage = fields.Float(required=True, validate=validate.Range(min=0, max=100))
class ProjectCreateSchema(ma.Schema):
"""Schema for validating new project creation input."""
name = fields.String(required=True, validate=validate.Length(min=1, max=100, error="Project name must be between 1 and 100 characters."))
topic = fields.String(validate=validate.Length(max=200)) # Optional topic
description = fields.String(validate=validate.Length(max=1000)) # Optional description
class ProjectUpdateSchema(ma.Schema):
"""Schema for validating project update input."""
# Only allowed fields are optional
name = fields.String(validate=validate.Length(min=1, max=100))
topic = fields.String(validate=validate.Length(max=200))
description = fields.String(validate=validate.Length(max=1000))
collaborators = fields.List(fields.String(validate=_validate_object_id)) # List of user ID strings
keywords = fields.List(fields.Nested(KeywordSchema)) # List of keyword objects
class ProjectSchema(BaseSchema):
"""Schema for serializing detailed project data for output."""
# Inherits id, createdAt, updatedAt
ownerId = fields.Function(lambda obj: str(obj.get("ownerId")), dump_only=True)
collaborators = fields.List(fields.Function(lambda oid: str(oid)), dump_only=True) # List of string IDs
passkey = fields.String(dump_only=True) # Only dump passkey if absolutely necessary, usually not needed in GET responses
name = fields.String(dump_only=True)
topic = fields.String(dump_only=True)
description = fields.String(dump_only=True)
summary = fields.String(dump_only=True)
keywords = fields.List(fields.Nested(KeywordSchema), dump_only=True)
lastActivityBy = fields.Function(lambda obj: str(obj.get("lastActivityBy")) if isinstance(obj.get("lastActivityBy"), ObjectId) else None, dump_only=True)
class ProjectListSchema(ma.Schema):
"""Schema for serializing the summary list of projects."""
id = fields.Function(lambda obj: str(obj.get("_id")), dump_only=True)
name = fields.String(dump_only=True)
updatedAt = fields.DateTime(format='iso', dump_only=True)
# --- URL Schemas (for urls blueprint) ---
class URLCreateSchema(ma.Schema):
"""Schema for validating new URL creation input."""
url = fields.URL(required=True, schemes={'http', 'https'}, error="Invalid URL format.") # Validate URL format
class URLUpdateSchema(ma.Schema):
"""Schema for validating URL update input (only specific fields)."""
title = fields.String(validate=validate.Length(max=500)) # Optional update
starred = fields.Boolean() # Optional update
note = fields.String() # Optional update
keywords = fields.List(fields.Nested(KeywordSchema)) # Optional update, validate nested structure
class URLSchema(BaseSchema):
"""Schema for serializing detailed URL data for output."""
# Inherits id, createdAt, updatedAt
projectId = fields.Function(lambda obj: str(obj.get("projectId")), dump_only=True)
url = fields.URL(dump_only=True)
title = fields.String(dump_only=True)
favicon = fields.String(dump_only=True, allow_none=True)
starred = fields.Boolean(dump_only=True)
note = fields.String(dump_only=True)
keywords = fields.List(fields.Nested(KeywordSchema), dump_only=True)
summary = fields.String(dump_only=True)
processingStatus = fields.String(dump_only=True, validate=OneOf(["pending", "processing", "completed", "failed"])) # Optional: validate status
class URLListSchema(ma.Schema):
"""Schema for serializing the simplified list of URLs."""
id = fields.Function(lambda obj: str(obj.get("_id")), dump_only=True)
title = fields.String(dump_only=True)
url = fields.URL(dump_only=True)
class URLSearchResultSchema(URLListSchema):
"""Schema for search results (same as list for now)."""
pass # Inherits fields from URLListSchema
# --- Activity Schemas (for activity blueprint) ---
class ActivityCreateSchema(ma.Schema):
"""Schema for validating new activity log creation."""
projectId = fields.String(required=True, validate=_validate_object_id) # Validate as ObjectId string
activityType = fields.String(required=True, validate=validate.Length(min=1))
message = fields.String(load_default="") # Optional message
class ActivitySchema(BaseSchema):
"""Schema for serializing activity log data."""
# Inherits id, createdAt
# Note: updatedAt is not typically used for immutable logs
projectId = fields.Function(lambda obj: str(obj.get("projectId")), dump_only=True)
userId = fields.Function(lambda obj: str(obj.get("userId")), dump_only=True)
activityType = fields.String(dump_only=True)
message = fields.String(dump_only=True)
# --- Dialog Schemas (for dialog blueprint) ---
class MessageSchema(ma.Schema):
"""Schema for individual messages within a dialog."""
role = fields.String(required=True, validate=OneOf(["user", "system"], error="Role must be 'user' or 'system'."))
content = fields.String(required=True)
timestamp = fields.DateTime(format='iso', dump_only=True) # Only dump timestamp
class DialogCreateSchema(ma.Schema):
"""Schema for validating new dialog session creation."""
projectId = fields.String(required=True, validate=_validate_object_id)
sessionId = fields.String() # Optional custom session ID
startMessage = fields.String() # Optional initial message
class DialogSendMessageSchema(ma.Schema):
"""Schema for validating user message input when sending to dialog."""
content = fields.String(required=True, validate=validate.Length(min=1, error="Message content cannot be empty."))
class DialogSchema(BaseSchema):
"""Schema for serializing detailed dialog session data (including messages)."""
# Inherits id
uid = fields.Function(lambda obj: str(obj.get("uid")), dump_only=True)
projectId = fields.Function(lambda obj: str(obj.get("projectId")), dump_only=True)
provider = fields.String(dump_only=True)
sessionId = fields.String(dump_only=True) # Dump custom session ID if present
sessionStartedAt = fields.DateTime(format='iso', dump_only=True)
sessionEndedAt = fields.DateTime(format='iso', dump_only=True, allow_none=True) # Can be null
messages = fields.List(fields.Nested(MessageSchema), dump_only=True) # Nested list of messages
class DialogSummarySchema(BaseSchema):
"""Schema for serializing dialog session list (excluding messages)."""
# Inherits id
uid = fields.Function(lambda obj: str(obj.get("uid")), dump_only=True)
projectId = fields.Function(lambda obj: str(obj.get("projectId")), dump_only=True)
provider = fields.String(dump_only=True)
sessionId = fields.String(dump_only=True)
sessionStartedAt = fields.DateTime(format='iso', dump_only=True)
sessionEndedAt = fields.DateTime(format='iso', dump_only=True, allow_none=True)
class Meta:
# Exclude the potentially large messages array for list views
exclude = ("messages",)

View File

@@ -0,0 +1,14 @@
# myapp/urls/__init__.py
from flask import Blueprint
# Define the Blueprint instance for the URL management module.
# 'urls' is the unique name for this blueprint.
# url_prefix='/api' will be prepended to all routes defined in this blueprint.
# Specific paths like '/projects/<id>/urls' or '/urls/<id>' will be defined in routes.py.
bp = Blueprint('urls', __name__, url_prefix='/api')
# Import the routes module.
# This connects the routes defined in routes.py to the 'bp' instance.
# This import MUST come AFTER the Blueprint 'bp' is defined.
from . import urls_routes

View File

@@ -0,0 +1,817 @@
# myapp/urls/urls_routes.py
import datetime
import logging
from flask import request, jsonify, current_app, has_app_context # Flask utilities
from bson.objectid import ObjectId, InvalidId # For MongoDB ObjectIds
from collections import defaultdict # Potentially useful for keyword aggregation etc.
from functools import wraps # For creating dummy decorators
import re # For escaping regex characters in search
# --- Local Blueprint Import ---
from . import bp # Import the 'bp' instance defined in the local __init__.py
# --- Shared Extensions and Utilities Imports ---
try:
from ..extensions import mongo # Import the initialized PyMongo instance
from ..utils import token_required # Import the authentication decorator
except ImportError:
# Fallback or error handling if imports fail
print("Warning: Could not import mongo or token_required in urls/urls_routes.py.")
mongo = None
# Define a dummy decorator if token_required is missing
def token_required(f):
@wraps(f)
def wrapper(*args, **kwargs):
print("ERROR: token_required decorator is not available!")
return jsonify({"message": "Server configuration error: Missing authentication utility."}), 500
return wrapper
# --- Schema Imports ---
try:
# Import the relevant schemas defined in schemas.py
from ..schemas import (
URLCreateSchema, URLUpdateSchema, URLSchema,
URLListSchema, URLSearchResultSchema
)
from marshmallow import ValidationError
except ImportError:
print("Warning: Could not import URL schemas or ValidationError in urls/urls_routes.py.")
URLCreateSchema = None
URLUpdateSchema = None
URLSchema = None
URLListSchema = None
URLSearchResultSchema = None
ValidationError = None
# --- Celery Task Imports ---
# IMPORTANT: Assumes the project root directory ('your_fullstack_project/') is in PYTHONPATH
try:
from backend_flask.celery_worker.celery_app import async_extract_title_and_keywords, async_summarize_url, async_recalc_project_keywords
except ModuleNotFoundError:
print("Warning: Could not import Celery tasks from 'celery_worker'. Ensure project root is in PYTHONPATH.")
# Define dummy task functions to prevent NameError if Celery isn't set up
def _dummy_celery_task(*args, **kwargs):
task_name = args[0] if args else 'dummy_task'
print(f"ERROR: Celery task {task_name} not available!")
class DummyTask:
def __init__(self, name):
self.__name__ = name
def delay(self, *a, **kw):
print(f"ERROR: Tried to call delay() on dummy task {self.__name__}")
pass
return DummyTask(task_name)
async_extract_title_and_keywords = _dummy_celery_task('async_extract_title_and_keywords')
async_summarize_url = _dummy_celery_task('async_summarize_url')
async_recalc_project_keywords = _dummy_celery_task('async_recalc_project_keywords')
# --- Helper to get logger safely ---
def _get_logger():
if has_app_context():
return current_app.logger
return logging.getLogger(__name__)
# Note: Routes use paths relative to the '/api' prefix defined in __init__.py.
# --------------------------
# Create URL Endpoint
# Path: POST /api/projects/<project_id>/urls
# --------------------------
@bp.route('/projects/<string:project_id>/urls', methods=['POST'])
@token_required
def create_url(current_user, project_id):
"""
Create a new URL entry within a specific project.
Uses URLCreateSchema for input validation.
Expects 'url' and optional fields in JSON payload.
Verifies project access for the authenticated user.
Triggers background Celery tasks for title/keyword extraction and summarization.
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
logger.error("Invalid current_user object received in create_url")
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
user_id_str = str(user_id) # Keep string version for Celery tasks
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in create_url: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not URLCreateSchema or not ValidationError:
return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
# Get and validate JSON data using the schema
json_data = request.get_json() or {}
logger.debug(f"create_url called: project_id={project_id}, data={json_data}, user_id={user_id_str}")
schema = URLCreateSchema()
try:
# Validate only the required 'url' field initially
validated_input = schema.load(json_data)
except ValidationError as err:
logger.warning(f"Create URL validation failed: {err.messages}")
return jsonify(err.messages), 422
user_url = validated_input['url'] # URL is guaranteed by schema
try:
# Validate project ID format from URL path
try:
project_obj_id = ObjectId(project_id)
except InvalidId:
return jsonify({"message": "Invalid project ID format in URL path."}), 400
# --- Verify Project Access ---
db = mongo.db
project = db.projects.find_one({"_id": project_obj_id}, {"ownerId": 1, "collaborators": 1})
if not project:
return jsonify({"message": "Project not found."}), 404
owner_id = project.get("ownerId")
collaborators = project.get("collaborators", [])
if not owner_id:
logger.error(f"Project {project_obj_id} is missing ownerId field.")
return jsonify({"message": "Project data integrity issue."}), 500
if owner_id != user_id and user_id not in collaborators:
return jsonify({"message": "You do not have access to add URLs to this project."}), 403
# --- Prepare URL Document (using original data for optional fields) ---
# Optional fields are taken directly from original data, not schema output here
keywords_data = data.get("keywords", []) # Process keywords manually as before
keywords_converted = []
if isinstance(keywords_data, list):
for kw in keywords_data:
if isinstance(kw, dict):
word = kw.get("word", "").strip()
if word:
try:
percentage = float(kw.get("percentage", 0.0))
keywords_converted.append({"word": word, "percentage": percentage})
except (ValueError, TypeError):
logger.warning(f"Could not convert keyword percentage for word '{word}' during URL creation.")
else:
logger.warning("Non-dict item found in keywords during URL creation.")
now = datetime.datetime.now(datetime.timezone.utc)
url_doc = {
"projectId": project_obj_id,
"url": user_url, # Use validated URL
"title": data.get("title", "").strip(),
"favicon": data.get("favicon", ""),
"starred": bool(data.get("starred", False)),
"note": data.get("note", "").strip(),
"keywords": keywords_converted,
"summary": data.get("summary", "").strip(),
"processingStatus": "pending",
"createdAt": now,
"updatedAt": now
}
# Insert the new URL document
result = db.urls.insert_one(url_doc)
new_url_id_str = str(result.inserted_id)
logger.info(f"Successfully inserted URL {new_url_id_str} for project {project_id}")
# --- Trigger Background Tasks ---
tasks_queued = True
try:
async_extract_title_and_keywords.delay(new_url_id_str, user_id_str)
api_doc = db.api_list.find_one({"uid": user_id, "selected": True, "name": "Gemini"})
use_gemini = bool(api_doc and api_doc.get("key"))
async_summarize_url.delay(new_url_id_str, user_id_str, use_gemini)
logger.info(f"Queued Celery tasks for URL {new_url_id_str} (use_gemini={use_gemini})")
except NameError as ne:
logger.error(f"Celery tasks not available for URL {new_url_id_str}: {ne}. Processing cannot be initiated.")
tasks_queued = False
except Exception as celery_err:
logger.error(f"Failed to queue Celery tasks for URL {new_url_id_str}: {celery_err}", exc_info=True)
tasks_queued = False
response_message = "URL created successfully and processing initiated." if tasks_queued else "URL created, but failed to initiate background processing."
return jsonify({"message": response_message, "url_id": new_url_id_str}), 201
except Exception as e:
logger.error(f"Error creating URL for project {project_id}: {e}", exc_info=True)
return jsonify({"message": "An internal error occurred while creating the URL."}), 500
# --------------------------
# List URLs for Project (Simplified)
# Path: GET /api/projects/<project_id>/urls
# --------------------------
@bp.route('/projects/<string:project_id>/urls', methods=['GET'])
@token_required
def list_urls_for_project(current_user, project_id):
"""
Retrieve a simplified list (id, title, url) of all URLs within a specific project.
Uses URLListSchema for output serialization.
Verifies user access to the project.
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in list_urls_for_project: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not URLListSchema: return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
try:
# Validate project ID format from URL path
try:
obj_project_id = ObjectId(project_id)
except InvalidId:
return jsonify({"message": "Invalid project ID format in URL path."}), 400
db = mongo.db
# --- Verify Project Access ---
project = db.projects.find_one(
{"_id": obj_project_id},
{"ownerId": 1, "collaborators": 1} # Projection for access check
)
if not project:
return jsonify({"message": "Project not found."}), 404
owner_id = project.get("ownerId")
collaborators = project.get("collaborators", [])
if owner_id != user_id and user_id not in collaborators:
return jsonify({"message": "You do not have access to this project's URLs."}), 403
# --- Fetch and Serialize URLs ---
# Find URLs for the project, projecting only fields needed by schema
cursor = db.urls.find(
{"projectId": obj_project_id},
{"_id": 1, "title": 1, "url": 1} # Projection matching URLListSchema
).sort("updatedAt", -1) # Sort by most recently updated
url_docs = list(cursor) # Convert cursor to list
# Serialize using the schema
output_schema = URLListSchema(many=True)
serialized_result = output_schema.dump(url_docs)
# Return the serialized list of URLs
return jsonify({"urls": serialized_result}), 200
except Exception as e:
logger.error(f"Error listing URLs for project {project_id}: {e}", exc_info=True)
return jsonify({"message": "An error occurred while listing URLs."}), 500
# --------------------------
# Get URL Detail
# Path: GET /api/urls/<url_id>
# --------------------------
@bp.route('/urls/<string:url_id>', methods=['GET'])
@token_required
def get_url_detail(current_user, url_id):
"""
Retrieve the full details for a specific URL entry by its ID.
Uses URLSchema for output serialization.
Verifies user access via the associated project.
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in get_url_detail: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not URLSchema: return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
try:
# Validate URL ID format from URL path
try:
obj_url_id = ObjectId(url_id)
except InvalidId:
return jsonify({"message": "Invalid URL ID format."}), 400
db = mongo.db
# Find the URL document
url_doc = db.urls.find_one({"_id": obj_url_id})
if not url_doc:
return jsonify({"message": "URL not found."}), 404
# --- Verify Project Access ---
project_obj_id = url_doc.get("projectId")
if not project_obj_id or not isinstance(project_obj_id, ObjectId):
logger.error(f"URL {url_id} has missing or invalid projectId.")
return jsonify({"message": "URL data integrity issue (missing project link)."}), 500
project = db.projects.find_one(
{"_id": project_obj_id},
{"ownerId": 1, "collaborators": 1} # Projection for access check
)
if not project:
logger.error(f"Project {project_obj_id} associated with URL {url_id} not found.")
return jsonify({"message": "Associated project not found; cannot verify access."}), 404 # Or 500
owner_id = project.get("ownerId")
collaborators = project.get("collaborators", [])
if not owner_id:
logger.error(f"Project {project_obj_id} is missing ownerId in get_url_detail.")
return jsonify({"message": "Project data integrity issue."}), 500
if owner_id != user_id and user_id not in collaborators:
return jsonify({"message": "You do not have access to the project containing this URL."}), 403
# --- Serialize and Return URL Details ---
output_schema = URLSchema()
# Schema handles ObjectId, datetime conversion, and field selection
serialized_result = output_schema.dump(url_doc)
return jsonify(serialized_result), 200
except Exception as e:
logger.error(f"Error retrieving URL detail for {url_id}: {e}", exc_info=True)
return jsonify({"message": "An error occurred while retrieving the URL details."}), 500
# --------------------------
# Update URL
# Path: PUT /api/urls/<url_id>
# --------------------------
@bp.route('/urls/<string:url_id>', methods=['PUT'])
@token_required
def update_url(current_user, url_id):
"""
Update specific fields of a URL entry (title, starred, note, keywords).
Uses URLUpdateSchema for input validation.
Verifies user access via the associated project.
Triggers project keyword recalculation if keywords are changed.
Returns simplified updated URL info using URLListSchema.
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in update_url: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not URLUpdateSchema or not URLListSchema or not ValidationError:
return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
# Get and validate JSON data using the schema
json_data = request.get_json() or {}
update_schema = URLUpdateSchema()
try:
# Load validates only the allowed fields (title, starred, note, keywords)
validated_data = update_schema.load(json_data)
except ValidationError as err:
logger.warning(f"Update URL validation failed: {err.messages}")
return jsonify(err.messages), 422
# If validation passed but no valid fields were provided
if not validated_data:
return jsonify({"message": "No valid fields provided for update."}), 400
try:
# Validate URL ID format
try:
obj_url_id = ObjectId(url_id)
except InvalidId:
return jsonify({"message": "Invalid URL ID format."}), 400
db = mongo.db
# --- Find URL and Verify Access ---
# Fetch projectId needed for access check
url_doc = db.urls.find_one({"_id": obj_url_id}, {"projectId": 1})
if not url_doc:
return jsonify({"message": "URL not found."}), 404
project_obj_id = url_doc.get("projectId")
if not project_obj_id or not isinstance(project_obj_id, ObjectId):
logger.error(f"URL {url_id} has missing or invalid projectId during update.")
return jsonify({"message": "URL data integrity issue (missing project link)."}), 500
project = db.projects.find_one(
{"_id": project_obj_id},
{"ownerId": 1, "collaborators": 1} # Projection for access check
)
if not project:
logger.error(f"Project {project_obj_id} associated with URL {url_id} not found during update.")
return jsonify({"message": "Associated project not found; cannot verify access."}), 404 # Or 500
owner_id = project.get("ownerId")
collaborators = project.get("collaborators", [])
if not owner_id:
logger.error(f"Project {project_obj_id} is missing ownerId during URL update.")
return jsonify({"message": "Project data integrity issue."}), 500
if owner_id != user_id and user_id not in collaborators:
return jsonify({"message": "You do not have access to update URLs in this project."}), 403
# --- Prepare Update Fields from validated data ---
update_fields = validated_data # Use the validated dictionary directly
keywords_changed = "keywords" in update_fields # Check if keywords were part of the update
# Always update the 'updatedAt' timestamp
update_fields["updatedAt"] = datetime.datetime.now(datetime.timezone.utc)
# --- Perform Update ---
result = db.urls.update_one({"_id": obj_url_id}, {"$set": update_fields})
# --- Return Response ---
if result.matched_count == 1:
# Retrieve the updated URL doc to return simplified info
updated_url_doc = db.urls.find_one(
{"_id": obj_url_id},
{"_id": 1, "title": 1, "url": 1} # Projection for list schema
)
if updated_url_doc:
# Serialize using the list schema for consistency
output_schema = URLListSchema()
serialized_url = output_schema.dump(updated_url_doc)
# Trigger keyword recalc for the project in background if keywords changed
if keywords_changed:
try:
async_recalc_project_keywords.delay(str(project_obj_id), str(user_id))
logger.info(f"Queued keyword recalc task for project {project_obj_id} after URL {url_id} update.")
except NameError:
logger.error("Celery task 'async_recalc_project_keywords' not available during URL update.")
except Exception as celery_err:
logger.error(f"Failed to queue Celery recalc task for project {project_obj_id} after URL update: {celery_err}", exc_info=True)
return jsonify({"message": "URL updated successfully.", "url": serialized_url}), 200
else:
logger.warning(f"URL {url_id} updated but could not be retrieved.")
return jsonify({"message": "URL updated successfully, but failed to retrieve updated data."}), 200
else:
# Matched count was 0
return jsonify({"message": "URL update failed (document not found)."}), 404
except Exception as e:
logger.error(f"Error updating URL {url_id}: {e}", exc_info=True)
return jsonify({"message": "An error occurred while updating the URL."}), 500
# --------------------------
# Delete URL
# Path: DELETE /api/urls/<url_id>
# --------------------------
@bp.route('/urls/<string:url_id>', methods=['DELETE'])
@token_required
def delete_url(current_user, url_id):
"""
Delete a specific URL entry by its ID.
Verifies user access via the associated project.
Triggers project keyword recalculation after deletion.
(No schema needed for input/output here)
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in delete_url: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check DB connection
if not mongo: return jsonify({"message": "Database connection not available."}), 500
try:
# Validate URL ID format
try:
obj_url_id = ObjectId(url_id)
except InvalidId:
return jsonify({"message": "Invalid URL ID format."}), 400
db = mongo.db
# --- Find URL and Verify Access ---
# Fetch projectId needed for access check and recalc trigger
url_doc = db.urls.find_one({"_id": obj_url_id}, {"projectId": 1})
if not url_doc:
return jsonify({"message": "URL not found."}), 404
project_obj_id = url_doc.get("projectId")
if not project_obj_id or not isinstance(project_obj_id, ObjectId):
logger.error(f"URL {url_id} has missing or invalid projectId during delete.")
return jsonify({"message": "URL data integrity issue (missing project link)."}), 500
project = db.projects.find_one(
{"_id": project_obj_id},
{"ownerId": 1, "collaborators": 1} # Projection for access check
)
# If associated project is missing, we cannot verify access, deny deletion.
if not project:
logger.error(f"Project {project_obj_id} associated with URL {url_id} not found during delete.")
return jsonify({"message": "Cannot verify access; associated project missing."}), 403 # Deny access
owner_id = project.get("ownerId")
collaborators = project.get("collaborators", [])
if not owner_id:
logger.error(f"Project {project_obj_id} is missing ownerId during URL delete.")
return jsonify({"message": "Project data integrity issue."}), 500
# Check if user has access rights (owner or collaborator)
if owner_id != user_id and user_id not in collaborators:
return jsonify({"message": "You do not have permission to delete URLs in this project."}), 403
# --- Perform Deletion ---
delete_result = db.urls.delete_one({"_id": obj_url_id})
# --- Return Response ---
if delete_result.deleted_count == 1:
# Trigger keyword recalc for the project in background after successful URL deletion
try:
async_recalc_project_keywords.delay(str(project_obj_id), str(user_id))
logger.info(f"Queued keyword recalc task for project {project_obj_id} after URL {url_id} deletion.")
except NameError:
logger.error("Celery task 'async_recalc_project_keywords' not available during URL deletion.")
except Exception as celery_err:
logger.error(f"Failed to queue Celery recalc task for project {project_obj_id} after URL deletion: {celery_err}", exc_info=True)
# Still return success for the deletion itself
return jsonify({"message": "URL deleted successfully."}), 200 # 200 OK or 204 No Content
else:
# Document existed (find_one succeeded) but delete failed
logger.error(f"URL {obj_url_id} found but delete_one failed (deleted_count=0).")
return jsonify({"message": "Failed to delete URL (already deleted?)."}), 404 # Or 500
except Exception as e:
logger.error(f"Error deleting URL {url_id}: {e}", exc_info=True)
return jsonify({"message": "An error occurred while deleting the URL."}), 500
# --------------------------
# Celery Task Trigger Endpoints
# Path: PUT /api/urls/<url_id>/extract_title_and_keywords
# Path: PUT /api/urls/<url_id>/summarize
# --------------------------
@bp.route('/urls/<string:url_id>/extract_title_and_keywords', methods=['PUT'])
@token_required
def trigger_extract_title_and_keywords(current_user, url_id):
"""
Manually triggers the background task for extracting title and keywords for a URL.
Verifies user access via the associated project.
Sets processingStatus to 'pending'.
(No schema needed for input/output here)
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
user_id_str = str(user_id) # Keep string version for Celery task
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in trigger_extract: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check DB connection
if not mongo: return jsonify({"message": "Database connection not available."}), 500
try:
# Validate URL ID format
try:
obj_url_id = ObjectId(url_id)
except InvalidId:
return jsonify({"message": "Invalid URL ID format."}), 400
db = mongo.db
# --- Find URL and Verify Access ---
url_doc = db.urls.find_one({"_id": obj_url_id}, {"projectId": 1})
if not url_doc:
return jsonify({"message": "URL not found."}), 404
project_obj_id = url_doc.get("projectId")
if not project_obj_id or not isinstance(project_obj_id, ObjectId):
logger.error(f"URL {url_id} has missing or invalid projectId during trigger_extract.")
return jsonify({"message": "URL data integrity issue (missing project link)."}), 500
project = db.projects.find_one(
{"_id": project_obj_id},
{"ownerId": 1, "collaborators": 1} # Projection for access check
)
if not project:
logger.error(f"Project {project_obj_id} associated with URL {url_id} not found during trigger_extract.")
return jsonify({"message": "Associated project not found; cannot verify access."}), 404 # Or 500
owner_id = project.get("ownerId")
collaborators = project.get("collaborators", [])
if not owner_id:
logger.error(f"Project {project_obj_id} is missing ownerId during trigger_extract.")
return jsonify({"message": "Project data integrity issue."}), 500
if owner_id != user_id and user_id not in collaborators:
return jsonify({"message": "You do not have access to trigger processing for this URL."}), 403
# --- Update Status and Queue Task ---
# Set status to pending before queueing
db.urls.update_one({"_id": obj_url_id},
{"$set": {"processingStatus": "pending", "updatedAt": datetime.datetime.now(datetime.timezone.utc)}})
try:
# Queue the Celery task
async_extract_title_and_keywords.delay(url_id, user_id_str)
logger.info(f"Queued title/keyword extraction task for URL {url_id}")
return jsonify({"message": "Title and keyword extraction task queued successfully."}), 202 # 202 Accepted
except NameError:
logger.error("Celery task 'async_extract_title_and_keywords' is not defined or imported correctly.")
# Revert status? Or leave as pending with error? Let's leave as pending.
return jsonify({"message": "Server configuration error: Extraction feature unavailable."}), 500
except Exception as e:
logger.error(f"Error queueing extraction task for URL {url_id}: {e}", exc_info=True)
# Revert status? Or leave as pending with error? Let's leave as pending.
return jsonify({"message": "An error occurred while queueing the extraction task."}), 500
except Exception as e:
logger.error(f"Error in trigger_extract_title_and_keywords endpoint for URL {url_id}: {e}", exc_info=True)
return jsonify({"message": "An internal error occurred before queueing the task."}), 500
@bp.route('/urls/<string:url_id>/summarize', methods=['PUT'])
@token_required
def trigger_summarize_url(current_user, url_id):
"""
Manually triggers the background task for summarizing a URL.
Verifies user access via the associated project.
Determines whether to use Gemini based on user's selected API key.
Sets processingStatus to 'pending'.
(No schema needed for input/output here)
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
user_id_str = str(user_id) # Keep string version for Celery task
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in trigger_summarize: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check DB connection
if not mongo: return jsonify({"message": "Database connection not available."}), 500
try:
# Validate URL ID format
try:
obj_url_id = ObjectId(url_id)
except InvalidId:
return jsonify({"message": "Invalid URL ID format."}), 400
db = mongo.db
# --- Find URL and Verify Access ---
url_doc = db.urls.find_one({"_id": obj_url_id}, {"projectId": 1})
if not url_doc:
return jsonify({"message": "URL not found."}), 404
project_obj_id = url_doc.get("projectId")
if not project_obj_id or not isinstance(project_obj_id, ObjectId):
logger.error(f"URL {url_id} has missing or invalid projectId during trigger_summarize.")
return jsonify({"message": "URL data integrity issue (missing project link)."}), 500
project = db.projects.find_one(
{"_id": project_obj_id},
{"ownerId": 1, "collaborators": 1} # Projection for access check
)
if not project:
logger.error(f"Project {project_obj_id} associated with URL {url_id} not found during trigger_summarize.")
return jsonify({"message": "Associated project not found; cannot verify access."}), 404 # Or 500
owner_id = project.get("ownerId")
collaborators = project.get("collaborators", [])
if not owner_id:
logger.error(f"Project {project_obj_id} is missing ownerId during trigger_summarize.")
return jsonify({"message": "Project data integrity issue."}), 500
if owner_id != user_id and user_id not in collaborators:
return jsonify({"message": "You do not have access to trigger processing for this URL."}), 403
# --- Update Status, Check API Key, and Queue Task ---
# Set status to pending before queueing
db.urls.update_one({"_id": obj_url_id},
{"$set": {"processingStatus": "pending", "updatedAt": datetime.datetime.now(datetime.timezone.utc)}})
# Check for user's selected Gemini API key
api_doc = db.api_list.find_one({"uid": user_id, "selected": True, "name": "Gemini"})
use_gemini = bool(api_doc and api_doc.get("key")) # True if Gemini selected and key exists
try:
# Queue the Celery task, passing the use_gemini flag
async_summarize_url.delay(url_id, user_id_str, use_gemini)
logger.info(f"Queued summarization task for URL {url_id} (use_gemini={use_gemini})")
return jsonify({"message": "Summarization task queued successfully."}), 202 # 202 Accepted
except NameError:
logger.error("Celery task 'async_summarize_url' is not defined or imported correctly.")
# Revert status? Or leave as pending? Leave as pending.
return jsonify({"message": "Server configuration error: Summarization feature unavailable."}), 500
except Exception as e:
logger.error(f"Error queueing summarization task for URL {url_id}: {e}", exc_info=True)
# Revert status? Or leave as pending? Leave as pending.
return jsonify({"message": "An error occurred while queueing the summarization task."}), 500
except Exception as e:
logger.error(f"Error in trigger_summarize_url endpoint for URL {url_id}: {e}", exc_info=True)
return jsonify({"message": "An internal error occurred before queueing the task."}), 500
# --------------------------
# Search URLs within Project
# Path: GET /api/projects/<project_id>/search?q=...
# --------------------------
@bp.route('/projects/<string:project_id>/search', methods=['GET'])
@token_required
def search_urls(current_user, project_id):
"""
Search for URLs within a specific project based on a query string.
Uses URLSearchResultSchema for output serialization.
Searches 'title', 'note', 'keywords.word', and 'summary' fields using regex.
Returns a simplified list (id, title, url) of matching URLs.
Verifies user access to the project.
"""
logger = _get_logger()
# Validate user object from token
if not current_user or not current_user.get("_id"):
return jsonify({"message": "Internal authorization error."}), 500
try:
user_id = ObjectId(current_user["_id"])
except (InvalidId, TypeError) as e:
logger.error(f"User ID conversion error in search_urls: {e}")
return jsonify({"message": "Invalid user ID format in token."}), 400
# Check dependencies
if not mongo: return jsonify({"message": "Database connection not available."}), 500
if not URLSearchResultSchema: return jsonify({"message": "Server configuration error: Schema unavailable."}), 500
try:
# Get search query string from query parameters
query_str = request.args.get("q", "").strip()
# If query string is empty, return empty results immediately
if not query_str:
return jsonify({"results": []}), 200
# Validate project ID format from URL path
try:
obj_project_id = ObjectId(project_id)
except InvalidId:
return jsonify({"message": "Invalid project ID format in URL path."}), 400
db = mongo.db
# --- Verify Project Access ---
project = db.projects.find_one(
{"_id": obj_project_id},
{"ownerId": 1, "collaborators": 1} # Projection for access check
)
if not project:
return jsonify({"message": "Project not found."}), 404
owner_id = project.get("ownerId")
collaborators = project.get("collaborators", [])
if owner_id != user_id and user_id not in collaborators:
return jsonify({"message": "Access denied to search URLs in this project."}), 403
# --- Perform Search using Aggregation Pipeline ---
# Escape regex special characters in the query string for safety
escaped_query = re.escape(query_str)
search_pipeline = [
{"$match": {"projectId": obj_project_id}},
{"$match": {
"$or": [
{"title": {"$regex": escaped_query, "$options": "i"}},
{"note": {"$regex": escaped_query, "$options": "i"}},
{"keywords.word": {"$regex": escaped_query, "$options": "i"}},
{"summary": {"$regex": escaped_query, "$options": "i"}}
]
}},
# Project only fields needed by the output schema
{"$project": {"_id": 1, "title": 1, "url": 1, "updatedAt": 1}},
{"$sort": {"updatedAt": -1}} # Sort by update time
# Add $limit stage if needed
]
# Execute the aggregation pipeline
results_cursor = db.urls.aggregate(search_pipeline)
search_result_docs = list(results_cursor) # Convert cursor to list
# --- Serialize results using the schema ---
output_schema = URLSearchResultSchema(many=True)
# Schema handles ObjectId conversion and field selection
serialized_result = output_schema.dump(search_result_docs)
# Return the search results
return jsonify({"results": serialized_result}), 200
except Exception as e:
logger.error(f"Error searching URLs in project {project_id} with query '{query_str}': {e}", exc_info=True)
return jsonify({"message": "An error occurred during URL search."}), 500

View File

@@ -0,0 +1,93 @@
# utils/auth.py (or wherever token_required is defined)
from functools import wraps
import secrets
import jwt
from flask import request, jsonify, current_app # <-- Import current_app
# Config might still be needed for default algorithm if not in app.config
# from backend.config import Config # Keep if needed for defaults, but prefer current_app.config
# TODO Flask cannot find config inside the utils
from .config import Config # Example if config.py is in the same dir
from bson.objectid import ObjectId
# Remove direct import of mongo
def token_required(f):
"""
Decorator to ensure a valid JWT token is present in the request header
and injects the corresponding user document into the decorated function.
"""
@wraps(f)
def decorated(*args, **kwargs):
token = None
auth_header = request.headers.get("Authorization")
if auth_header:
# Check for "Bearer " prefix and extract token
parts = auth_header.split()
if len(parts) == 2 and parts[0].lower() == "bearer":
token = parts[1]
# Optional: Allow raw token directly (as in original code)
elif len(parts) == 1:
token = auth_header
if not token:
return jsonify({"message": "Token is missing."}), 401
try:
# Use current_app.config to access SECRET_KEY and JWT_ALGORITHM
secret_key = current_app.config['SECRET_KEY']
# Provide a default algorithm if not explicitly configured
algorithm = current_app.config.get('JWT_ALGORITHM', Config.JWT_ALGORITHM or 'HS256')
# Decode the token
data = jwt.decode(token, secret_key, algorithms=[algorithm])
# --- Use current_app to access mongo ---
user_id_str = data.get("user_id")
if not user_id_str:
return jsonify({"message": "Token payload missing user_id."}), 401
# Access the 'users' collection via the mongo instance attached to current_app
current_user_doc = current_app.mongo.db.users.find_one({"_id": ObjectId(user_id_str)})
# --- End database access change ---
if not current_user_doc:
# Even if token is valid, user might have been deleted
return jsonify({"message": "User associated with token not found."}), 401
# Convert ObjectId back to string for consistency if needed,
# or pass the whole document as is. Passing document is often useful.
# current_user_doc['_id'] = str(current_user_doc['_id']) # Optional conversion
except jwt.ExpiredSignatureError:
# Specific error for expired token
return jsonify({"message": "Token has expired."}), 401
except jwt.InvalidTokenError as e:
# Specific error for other JWT validation issues
current_app.logger.warning(f"Invalid token encountered: {e}") # Log the specific error
return jsonify({"message": "Token is invalid."}), 401
except Exception as e:
# Catch other potential errors (e.g., ObjectId conversion, DB connection issues)
current_app.logger.error(f"Error during token verification: {e}", exc_info=True)
# Return a more generic message for unexpected
return jsonify({"message": "Token verification failed."}), 401
# Inject the fetched user document into the decorated function
return f(current_user_doc, *args, **kwargs)
return decorated
# This is a placeholder for background task functions.
# For example, you could use Celery to process URLs asynchronously.
def process_url(url_id):
# Retrieve URL document by url_id, perform scraping, summarization, and update processingStatus.
# This function should be called by a background worker.
pass
# This function will generate a pass key for frontend-backend communication
def generate_passkey():
return secrets.token_hex(16)