from llama_index.core import VectorStoreIndex, SimpleDirectoryReader from llama_index.core.postprocessor import LLMRerank from llama_index.llms.openai import OpenAI # Load documents and create index documents = SimpleDirectoryReader("./data/paul_graham/").load_data() index = VectorStoreIndex.from_documents(documents=documents) # Create reranker reranker = LLMRerank( top_n=2, choice_batch_size=5 ) # Create query engine with reranker as node postprocessor query_engine = index.as_query_engine( similarity_top_k=10, node_postprocessors=[reranker], response_mode="tree_summarize" ) # Use the query engine response = query_engine.query("Your question here")
LLMRerank
postprocessor will rerank the top 10 retrieved nodes and select the top 2 most relevant ones. The tree_summarize
response mode is used to generate the final answer.from llama_index.core.query_engine import CustomQueryEngine from llama_index.core.retrievers import BaseRetriever from llama_index.core import get_response_synthesizer from llama_index.core.response_synthesizers import BaseSynthesizer class RAGQueryEngine(CustomQueryEngine): """RAG Query Engine.""" retriever: BaseRetriever response_synthesizer: BaseSynthesizer def custom_query(self, query_str: str): nodes = self.retriever.retrieve(query_str) response_obj = self.response_synthesizer.synthesize(query_str, nodes) return response_obj
RAGQueryEngine
class inherits from CustomQueryEngine
.retriever
and response_synthesizer
.custom_query
method is implemented to define the query processing logic:retriever = index.as_retriever() synthesizer = get_response_synthesizer(response_mode="compact") query_engine = RAGQueryEngine( retriever=retriever, response_synthesizer=synthesizer ) response = query_engine.query("Your question here")
excluded_llm_metadata_keys
attribute of a document or node. For example:document.excluded_llm_metadata_keys = ["file_name"]
file_name
metadata from being included in the LLM context.get_content()
function with MetadataMode.LLM
:from llama_index.core.schema import MetadataMode print(document.get_content(metadata_mode=MetadataMode.LLM))
Document.metadata_separator
(default: "\n"
)Document.metadata_template
(default: "{key}: {value}"
)Document.text_template
(default: "{metadata_str}\n\n{content}"
)from llama_index.core import VectorStoreIndex, SimpleDirectoryReader from llama_index.core.agent.workflow import AgentWorkflow from llama_index.core.tools import QueryEngineTool from llama_index.llms.openai import OpenAI import asyncio # Assuming you already have a query_engine # Create a QueryEngineTool from your query engine query_engine_tool = QueryEngineTool.from_defaults( query_engine=query_engine, name="document_search", description="Useful for answering questions about the document." ) # Create an async function to load context and query async def search_documents(query: str) -> str: """Useful for answering natural language questions about the document.""" response = await query_engine.aquery(query) return str(response) # Create an enhanced workflow with the tool agent = AgentWorkflow.from_tools_or_functions( [search_documents], llm=OpenAI(model="gpt-4"), verbose=True ) # Now you can use the agent asynchronously async def main(): response = await agent.run( "Your question about the document here" ) print(response) # Run the agent if __name__ == "__main__": asyncio.run(main())
QueryEngineTool
from your existing query engine.search_documents
that uses the query engine's aquery
method for asynchronous querying.AgentWorkflow
using the from_tools_or_functions
method, passing in our async search_documents
function.main
async function that runs the agent with a question.asyncio.run(main())
to run the async function."Your question about the document here"
with the actual question you want to ask about your document(s).from llama_index.core import VectorStoreIndex, SimpleDirectoryReader from llama_index.core.retrievers import VectorIndexRetriever from llama_index.core.postprocessor import SimilarityPostprocessor from llama_index.core.schema import NodeWithScore, QueryBundle # Assuming you have already created an index index = VectorStoreIndex.from_documents(documents) # Configure retriever retriever = VectorIndexRetriever( index=index, similarity_top_k=10, ) # Configure node postprocessors node_postprocessors = [ SimilarityPostprocessor(similarity_cutoff=0.7) ] # Create a query query = "Your query here" query_bundle = QueryBundle(query) # Retrieve nodes retrieved_nodes = retriever.retrieve(query_bundle) # Apply node postprocessors for postprocessor in node_postprocessors: retrieved_nodes = postprocessor.postprocess_nodes( retrieved_nodes, query_bundle=query_bundle ) # Process the retrieved and postprocessed nodes as needed for node in retrieved_nodes: print(f"Node content: {node.node.get_content()}") print(f"Node score: {node.score}") print(f"Node metadata: {node.node.metadata}") print("---")
VectorIndexRetriever
from the index, specifying the number of top results to retrieve.SimilarityPostprocessor
that filters nodes based on a similarity score cutoff).QueryBundle
.BM25Retriever
, TFIDFRetriever
, etc.)KeywordNodePostprocessor
, PrevNextNodePostprocessor
, etc.)BaseRetriever
class.from llama_index.core.retrievers import BaseRetriever from llama_index.core.schema import NodeWithScore, QueryBundle class CustomRetriever(BaseRetriever): def __init__(self, index): self.index = index def retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]: # Implement your custom retrieval logic here # ... return retrieved_nodes
retriever = index.as_retriever(similarity_top_k=10) # Increase this number as needed
query_engine = index.as_query_engine( retriever=retriever, response_mode="tree_summarize" # or "refine" )
from llama_index.core.response_synthesizers import TreeSummarize summarizer = TreeSummarize( verbose=True, summary_template=( "Provide a detailed summary of the following context:\n" "---------------------\n" "{context_str}\n" "---------------------\n" "Detailed Summary: " ) ) query_engine = index.as_query_engine( retriever=retriever, response_synthesizer=summarizer )
from llama_index.llms import OpenAI llm = OpenAI(model="gpt-4", temperature=0.2) query_engine = index.as_query_engine( retriever=retriever, llm=llm )
Here's an example that combines these approaches: