You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/utils/retries.py",
line 204, in retry_with_backoff_async\n return await func()\n ^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/utils/retries.py",
line 149, in do_request\n raise PermanentError(exception) from exception\nunstructured_client.utils.retries.PermanentError: unknown async library, or not in async context\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py", line 312, in call_api_partial\n response = await request_utils.call_api_async(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/request_utils.py", line 96, in call_api_async\n response = await retry_async(\n ^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/utils/retries.py", line 153, in retry_async\n return await retry_with_backoff_async(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/utils/retries.py", line 206, in retry_with_backoff_async\n raise exception.inner\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/utils/retries.py", line 121, in do_request\n res = await func()\n ^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/request_utils.py", line 93, in do_request\n return await client.send(new_request)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_client.py", line 1674, in send\n response = await self._send_handling_auth(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_client.py", line 1702, in _send_handling_auth\n response = await self._send_handling_redirects(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_client.py", line 1739, in _send_handling_redirects\n response = await self._send_single_request(request)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_client.py", line 1776, in _send_single_request\n response = await transport.handle_async_request(request)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_transports/default.py", line 377, in handle_async_request\n resp = await self._pool.handle_async_request(req)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_async/connection_pool.py", line 215, in handle_async_request\n await self._close_connections(closing)\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_async/connection_pool.py", line 303, in _close_connections\n with AsyncShieldCancellation():\n ^^^^^^^^^^^^^^^^^^^^^^^^^\n File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_synchronization.py", line 202, in init\n self._backend = current_async_library()\n ^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_synchronization.py", line 29, in current_async_library\n environment = sniffio.current_async_library()\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/sniffio/_impl.py", line 93, in current_async_library\n raise AsyncLibraryNotFoundError(\nsniffio._impl.AsyncLibraryNotFoundError: unknown async library, or not in async context\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n
File "/home/cdw/src/service/document_worker_service.py", line 24, in document_pipeline\n contents = self._process_stage(metadata=metadata)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/src/service/document_worker_service.py", line 88, in _process_stage\n return loader.load(metadata)\n ^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/src/loader/impl/unstructured_loader.py", line 72, in load\n raise exc\n
File "/home/cdw/src/loader/impl/unstructured_loader.py", line 48, in load\n file_elements = self._fetch_file_partition(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/src/loader/impl/unstructured_loader.py", line 85, in _fetch_file_partition\n response = self._client.general.partition(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/general.py", line 77, in partition\n http_res = self.do_request(\n ^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/basesdk.py", line 265, in do_request\n http_res = self.sdk_configuration.get_hooks().after_success(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/sdkhooks.py", line 59, in after_success\n out = hook.after_success(hook_ctx, response)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py", line 423, in after_success\n elements = self._await_elements(operation_id)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py", line 371, in _await_elements\n task_responses: list[tuple[int, httpx.Response]] = ioloop.run_until_complete(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/nest_asyncio.py", line 98, in run_until_complete\n return f.result()\n ^^^^^^^^^^\n
File "/usr/local/lib/python3.11/asyncio/futures.py", line 203, in result\n raise self._exception.with_traceback(self._exception_tb)\n
File "/usr/local/lib/python3.11/asyncio/tasks.py", line 277, in __step\n result = coro.send(None)\n ^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py", line 63, in run_tasks\n index, response = await future\n ^^^^^^^^^^^^\n
File "/usr/local/lib/python3.11/asyncio/tasks.py", line 615, in _wait_for_one\n return f.result() # May raise f.exception().\n ^^^^^^^^^^\n
File "/usr/local/lib/python3.11/asyncio/futures.py", line 203, in result\n raise self._exception.with_traceback(self._exception_tb)\n
File "/usr/local/lib/python3.11/asyncio/tasks.py", line 277, in __step\n result = coro.send(None)\n ^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py", line 50, in _order_keeper\n response = await coro\n ^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py", line 311, in call_api_partial\n async with httpx.AsyncClient(timeout=client_timeout) as client:\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_client.py", line 2062, in aexit\n await self._transport.aexit(exc_type, exc_value, traceback)\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_transports/default.py", line 356, in aexit\n await self._pool.aexit(exc_type, exc_value, traceback)\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_async/connection_pool.py", line 324, in aexit\n await self.aclose()\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_async/connection_pool.py", line 313, in aclose\n await self._close_connections(closing_connections)\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_async/connection_pool.py", line 303, in _close_connections\n with AsyncShieldCancellation():\n ^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_synchronization.py", line 202, in init\n self._backend = current_async_library()\n ^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_synchronization.py", line 29, in current_async_library\n environment = sniffio.current_async_library()\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/sniffio/_impl.py", line 93, in current_async_library\n raise AsyncLibraryNotFoundError(\nsniffio._impl.AsyncLibraryNotFoundError: unknown async library, or not in async context", "component_type": "cdw", "app_name": "cdw"}`
To Reproduce
Perform partition for huge PDF files with Python client twice in a short interval.
Expected behavior
Partition is successful.
Environment Info
Self-hosted unstructured, Python client version is 0.26.0b3. Python version is 3.11.
Is this going to be fixed soon? Basically async doesn't work with splitting PDFs.
Similarly, there is no way to add verify=ssl_context to the requests for the sync client as well.
Describe the bug
When I call the partition function for a 100-page PDF, it raises an AsyncLibraryNotFoundError. This issue does not always reproduce.
my parameters:
files=files, pdf_infer_table_structure=True, extract_image_block_types=["Image"], strategy=shared.Strategy.HI_RES, output_format=shared.OutputFormat.APPLICATION_JSON, unique_element_ids=True, encoding="utf-8", coordinates=True,
relevant logs:
`{"asctime": "2024-10-17 09:02:16,380", "levelname": "ERROR", "module": "document_worker_service", "funcName": "document_pipeline", "lineno": 27, "thread": 140252027388288, "message": "Failure in document pipeline processing, datasource_id: 019299ae-5f80-7b37-a54a-11608642bec1, sub_datasource_id: 019299ae-5f80-7b37-a54a-11608642bec1",
"exc_info":
"Traceback (most recent call last):\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/utils/retries.py",
line 204, in retry_with_backoff_async\n return await func()\n ^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/utils/retries.py",
line 149, in do_request\n raise PermanentError(exception) from exception\nunstructured_client.utils.retries.PermanentError: unknown async library, or not in async context\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py", line 312, in call_api_partial\n response = await request_utils.call_api_async(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/request_utils.py", line 96, in call_api_async\n response = await retry_async(\n ^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/utils/retries.py", line 153, in retry_async\n return await retry_with_backoff_async(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/utils/retries.py", line 206, in retry_with_backoff_async\n raise exception.inner\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/utils/retries.py", line 121, in do_request\n res = await func()\n ^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/request_utils.py", line 93, in do_request\n return await client.send(new_request)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_client.py", line 1674, in send\n response = await self._send_handling_auth(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_client.py", line 1702, in _send_handling_auth\n response = await self._send_handling_redirects(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_client.py", line 1739, in _send_handling_redirects\n response = await self._send_single_request(request)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_client.py", line 1776, in _send_single_request\n response = await transport.handle_async_request(request)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_transports/default.py", line 377, in handle_async_request\n resp = await self._pool.handle_async_request(req)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_async/connection_pool.py", line 215, in handle_async_request\n await self._close_connections(closing)\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_async/connection_pool.py", line 303, in _close_connections\n with AsyncShieldCancellation():\n ^^^^^^^^^^^^^^^^^^^^^^^^^\n File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_synchronization.py", line 202, in init\n self._backend = current_async_library()\n ^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_synchronization.py", line 29, in current_async_library\n environment = sniffio.current_async_library()\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/sniffio/_impl.py", line 93, in current_async_library\n raise AsyncLibraryNotFoundError(\nsniffio._impl.AsyncLibraryNotFoundError: unknown async library, or not in async context\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n
File "/home/cdw/src/service/document_worker_service.py", line 24, in document_pipeline\n contents = self._process_stage(metadata=metadata)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/src/service/document_worker_service.py", line 88, in _process_stage\n return loader.load(metadata)\n ^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/src/loader/impl/unstructured_loader.py", line 72, in load\n raise exc\n
File "/home/cdw/src/loader/impl/unstructured_loader.py", line 48, in load\n file_elements = self._fetch_file_partition(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/src/loader/impl/unstructured_loader.py", line 85, in _fetch_file_partition\n response = self._client.general.partition(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/general.py", line 77, in partition\n http_res = self.do_request(\n ^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/basesdk.py", line 265, in do_request\n http_res = self.sdk_configuration.get_hooks().after_success(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/sdkhooks.py", line 59, in after_success\n out = hook.after_success(hook_ctx, response)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py", line 423, in after_success\n elements = self._await_elements(operation_id)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py", line 371, in _await_elements\n task_responses: list[tuple[int, httpx.Response]] = ioloop.run_until_complete(\n ^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/nest_asyncio.py", line 98, in run_until_complete\n return f.result()\n ^^^^^^^^^^\n
File "/usr/local/lib/python3.11/asyncio/futures.py", line 203, in result\n raise self._exception.with_traceback(self._exception_tb)\n
File "/usr/local/lib/python3.11/asyncio/tasks.py", line 277, in __step\n result = coro.send(None)\n ^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py", line 63, in run_tasks\n index, response = await future\n ^^^^^^^^^^^^\n
File "/usr/local/lib/python3.11/asyncio/tasks.py", line 615, in _wait_for_one\n return f.result() # May raise f.exception().\n ^^^^^^^^^^\n
File "/usr/local/lib/python3.11/asyncio/futures.py", line 203, in result\n raise self._exception.with_traceback(self._exception_tb)\n
File "/usr/local/lib/python3.11/asyncio/tasks.py", line 277, in __step\n result = coro.send(None)\n ^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py", line 50, in _order_keeper\n response = await coro\n ^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/unstructured_client/_hooks/custom/split_pdf_hook.py", line 311, in call_api_partial\n async with httpx.AsyncClient(timeout=client_timeout) as client:\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_client.py", line 2062, in aexit\n await self._transport.aexit(exc_type, exc_value, traceback)\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpx/_transports/default.py", line 356, in aexit\n await self._pool.aexit(exc_type, exc_value, traceback)\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_async/connection_pool.py", line 324, in aexit\n await self.aclose()\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_async/connection_pool.py", line 313, in aclose\n await self._close_connections(closing_connections)\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_async/connection_pool.py", line 303, in _close_connections\n with AsyncShieldCancellation():\n ^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_synchronization.py", line 202, in init\n self._backend = current_async_library()\n ^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/httpcore/_synchronization.py", line 29, in current_async_library\n environment = sniffio.current_async_library()\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n
File "/home/cdw/.local/lib/python3.11/site-packages/sniffio/_impl.py", line 93, in current_async_library\n raise AsyncLibraryNotFoundError(\nsniffio._impl.AsyncLibraryNotFoundError: unknown async library, or not in async context", "component_type": "cdw", "app_name": "cdw"}`
To Reproduce
Perform partition for huge PDF files with Python client twice in a short interval.
Expected behavior
Partition is successful.
Environment Info
Self-hosted unstructured, Python client version is 0.26.0b3. Python version is 3.11.
Additional context
I tried to use gather and add async / await , for now it not reproduced. Not sure whether it works.
https://github.com/jimmyxu1985/unstructured-python-client/pull/1/files
The text was updated successfully, but these errors were encountered: