'Worker timeout during inference on azure AksWebservice

Issue Description

Hi, I'm trying to to deploy worked pipeline, using AksWebservice. Pipeline used multiple steps such as preprocessing and feature engineering with text data. On one of steps my code used trained ClassificationModel from simpletransformers.classification package and on this step I received next error:

raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))

On the other hand, looking into Deployment logs I got error on web-server side:

WORKER TIMEOUT (pid:34)
  File "/azureml-envs/azureml_c00fef16c70ff38ed2e8ddd833d5b8d0/lib/python3.8/site-packages/gunicorn/app/wsgiapp.py", line 67 in run
  File "/azureml-envs/azureml_c00fef16c70ff38ed2e8ddd833d5b8d0/bin/gunicorn", line 8 in <module>
worker timed out, killing gunicorn
Worker exiting (pid: 34)
Worker with pid 34 was terminated due to signal 9
worker timeout is set to 300
Booting worker with pid: 160

Below example of code, that used for deployment:

def get_compute_target_kuber_deploy(ws, cloud_compute_name: str = "deployed-example"):
    compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", cloud_compute_name)
    compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
    compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4)

    vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")

    if compute_name in ws.compute_targets:
        aks_target = ws.compute_targets[compute_name]
    else:
        prov_config = AksCompute.provisioning_configuration(vm_size = vm_size,
                                                            agent_count = 3,
                                                            location = "eastus")
        aks_target = ComputeTarget.create(workspace = ws,
                                            name = cloud_compute_name,
                                            provisioning_configuration = prov_config)
        aks_target.wait_for_completion(
            show_output=True,
        )
    return aks_target


def deploy_config_aks(
        ws,
        env,
        aks_target,
        name="aks-service-example",
        linreg_name="linreg_name",
        transformer_name="transformer_name"):
    inference_config = InferenceConfig(
                        source_directory="./src",
                        entry_script="deploy_src/score.py",
                        environment=env,)

    # https://docs.microsoft.com/ru-ru/python/api/azureml-core/azureml.core.webservice.akswebservice?view=azure-ml-py#azureml-core-webservice-akswebservice-deploy-configuration
    aks_config = AksWebservice.deploy_configuration(
                    cpu_cores=1,
                    memory_gb=1,
                    tags={"data": "site url",  "method": "pipeline"},
                    description='Short description,
                    cpu_cores_limit=2,
                    memory_gb_limit=4,
                    scoring_timeout_ms=300000,
                    max_request_wait_time=1_200_00,
                    timeout_seconds=3600,
                    token_auth_enabled=False,
                    initial_delay_seconds=720,
                    # period_seconds=1800,
                    )


    linreg_model = Model(ws, linreg_name)
    transformer_model = Model(ws, transformer_name)

    aks_service = Model.deploy(workspace=ws,
                            name=name,
                            models=[linreg_model, transformer_model],
                            inference_config=inference_config,
                            deployment_config=aks_config,
                            deployment_target=aks_target)

    aks_service.wait_for_deployment(show_output=True)
    print(aks_service.state)
    print(aks_service.scoring_uri)

    primary_key, _ = aks_service.get_keys()
    print(f"Primary key: {primary_key}")
    
 # We get somehow ws and env
aks_target = get_compute_target_kuber_deploy(ws)
deploy_config_aks(ws, env, aks_target,)

So, my questions are next- how we can increase response time for workers? And how increase cpu_cores and memory_gb parameters?

Because last one give next error:

"error": {
        "message": "Deployment request failed due to insufficient compute resource. For the specified compute target, 1 replica cannot be created per specified CPU/Memory configuration(4 CPU Cores, 8GB Memory). You can address this problem by adjusting number of replicas, using a different CPU/memory configuration, or using a different compute target."


Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source