1def read_mongodb_query(user_input: str, table_schema: str, schema_description: str, database: str, collectionName: str):
2 """Generates a MongoDB raw aggregation pipeline based on user input and retrieves data from a MongoDB collection.
3
4 :param user_input: User's question or input that guides the query generation.
5 :type user_input: str
6 :param table_schema: A string representation of the table schema.
7 :type table_schema: str
8 :param schema_description: A string that describes the schema of the data.
9 :type schema_description: str
10 :param database: The name of the MongoDB database to query.
11 :type database: str
12 :param collectionName: The name of the MongoDB collection to query.
13 :type collectionName: str
14 :return: A list of dictionaries containing data retrieved from the MongoDB collection, or an error message.
15 :rtype: list[dict] or str
16 """
17 max_retries=3
18 retries = 0
19 user_message=user_input
20
21 system_temp = f"""
22 You are a MongoDb expert. Create a syntactically correct raw aggregation query for the user question:
23
24 This is the table schema : "{table_schema}"
25 The following is an the example of one mongodb document: {schema_description}
26 Return the raw aggregration pipeline in JSON.
27 output should be like "pipeline": "json_object"
28 Unless the user specifies in the question a specific number of examples to obtain, limit your query to at most 10 results using the $limit stage in an aggregation pipeline.
29 Always use the field names as they appear in the collection. Be careful not to query for fields that do not exist.
30 When dealing with dates, and if the question involves "today", use MongoDB's $currentDate, $dateFromString, or similar operators to work with the current date.
31 DO NOT USE $TRIM etc to convert the price in raw aggregation, data is already in double.
32 DO NOT to use $lookup
33 ###{user_message}###
34 """
35 while retries < max_retries:
36 try:
37
38
39 client = OpenAI(
40 api_key=os.environ.get("OPENAI_API_KEY"),
41 )
42 response = client.chat.completions.create(
43 model="gpt-3.5-turbo-1106",
44 response_format={"type": "json_object"},
45 messages=[
46 {"role": "system", "content": system_temp}
47 ]
48 )
49
50 output_str = response.choices[0].message.content
51
52 # Attempt to parse the output string as JSON and check for the 'pipeline' key
53 output_json = json.loads(output_str)
54 if 'pipeline' not in output_json:
55 raise ValueError("Invalid format: 'pipeline' key not found")
56
57 # MongoDB connection parameters
58 host = "localhost"
59 port = 27017
60 database_name = database
61
62 # Connect to MongoDB
63 mongo_client = pymongo.MongoClient(host, port)
64 db = mongo_client[database_name]
65 collection = db[collectionName]
66
67 # Perform the query and retrieve data
68 result = collection.aggregate(output_json['pipeline'])
69 print(f"(TEST QUERY:{output_json['pipeline']})")
70 # Convert the result to a list of dictionaries
71 data = [item for item in result]
72
73 # Close the MongoDB connection
74 mongo_client.close()
75
76 return data
77 except (json.JSONDecodeError, ValueError) as e:
78 print(f"Error encountered: {e}. Retrying...")
79 retries += 1
80 if retries == max_retries:
81 # Close the MongoDB connection if open and retries are exhausted
82 mongo_client.close()
83 return "Error: Could not figure the query out after maximum retries."