I am trying to build an application which can be used to chat with multiple types of data using the different langchain and use streamlit to build the application.
I am unable to load the files properly with the langchain document loaders-
Here is the loader mapping dict-
FILE_LOADER_MAPPING = {
".csv": (CSVLoader, {"encoding": "utf-8"}),
".doc": (UnstructuredWordDocumentLoader, {}),
".docx": (UnstructuredWordDocumentLoader, {}),
".epub": (UnstructuredEPubLoader, {}),
".html": (UnstructuredHTMLLoader, {}),
".md": (UnstructuredMarkdownLoader, {}),
".odt": (UnstructuredODTLoader, {}),
".pdf": (PyPDFLoader, {}),
".ppt": (UnstructuredPowerPointLoader, {}),
".pptx": (UnstructuredPowerPointLoader, {}),
".txt": (TextLoader, {"encoding": "utf8"}),
".ipynb": (NotebookLoader, {}),
".py": (PythonLoader, {}),
}
Here is the main function-
def main():
st.title("Docuverse")
# Upload files
uploaded_files = st.file_uploader("Upload your documents", type=["pdf", "md", "txt", "csv", "py", "epub", "html", "ppt", "pptx", "doc", "docx", "odt", "ipynb"], accept_multiple_files=True)
loaded_documents = []
if uploaded_files:
# Process uploaded files
for uploaded_file in uploaded_files:
st.write(f"Uploaded: {uploaded_file.name}")
st.write(f"Uploaded: {type(uploaded_file)}")
ext = os.path.splitext(uploaded_file.name)[-1][1:].lower()
if ext in FILE_LOADER_MAPPING:
loader_class, loader_args = FILE_LOADER_MAPPING[ext]
loader = loader_class(uploaded_file, **loader_args)
else:
loader = UnstructuredFileLoader(uploaded_file)
loaded_documents.extend(loader.load())
st.write("Chat with the Document:")
query = st.text_input("Ask a question:")
if st.button("Get Answer"):
if query:
# Load model, set prompts, create vector database, and retrieve answer
try:
llm = load_model()
prompt = set_custom_prompt()
CONDENSE_QUESTION_PROMPT = set_custom_prompt_condense()
db = create_vector_database(loaded_documents)
response = retrieve_bot_answer(query)
# Display bot response
st.write("Bot Response:")
st.write(response)
except Exception as e:
st.error(f"An error occurred: {str(e)}")
else:
st.warning("Please enter a question.")
if __name__ == "__main__":
main()
I am uploading a pdf named protector.pdf
the error I get is
TypeError: expected str, bytes or os.PathLike object, not UploadedFile
File "/home/user/.local/lib/python3.10/site-packages/streamlit/runtime/scriptrunner/script_runner.py", line 552, in _run_script
exec(code, module.__dict__)
File "/home/user/app/app.py", line 395, in <module>
main()
File "/home/user/app/app.py", line 371, in main
loaded_documents.extend(loader.load())
File "/home/user/.local/lib/python3.10/site-packages/langchain/document_loaders/unstructured.py", line 86, in load
elements = self._get_elements()
File "/home/user/.local/lib/python3.10/site-packages/langchain/document_loaders/unstructured.py", line 172, in _get_elements
return partition(filename=self.file_path, **self.unstructured_kwargs)
File "/home/user/.local/lib/python3.10/site-packages/unstructured/partition/auto.py", line 212, in partition
filetype = detect_filetype(
File "/home/user/.local/lib/python3.10/site-packages/unstructured/file_utils/filetype.py", line 244, in detect_filetype
_, extension = os.path.splitext(_filename)
File "/usr/local/lib/python3.10/posixpath.py", line 118, in splitext
p = os.fspath(p)
Here is the full code - link
I am not sure If I am correctly handling the uploaded files.
How can I resolve this?