'How to fetch files from Windows Shared Network Drive and upload to Azure Data Lake Storage location using Python?

I have requirement where I need to fetch files (having .xml as extension) from Windows Shared network drive location and upload them to ADLS (Azure Data Lake Storage) using Python Script (in PyCharm).

I tried using the below code -

import os
import subprocess

file_src = os.listdir('\\\\<Shared Dir Server>\\<Directory>')
local_directory="F:\\Files\\*"
sasToken="<SAS Token>"

endpoint="https://<storageAccount>.blob.core.windows.net/<container>/<target directory>"
copyscript= str(file_src) + " copy " + "\""+ local_directory + "\"" + "\""+endpoint+sasToken + "\"" + " --recursive"

print(copyscript)
subprocess.call(copyscript)

But it is failing -

['temp1.xml', 'temp2.xml', 'abc1.xml', 'desf2.xml', 'file.txt'] copy "F:\Files\*""https://<storageAccount>.blob.core.windows.net/<container>/<Target Directory>/sasToken" --recursive
Traceback (most recent call last):
  File "C:\Program Files\PycharmProjects\pythonProject\venv\Upload_SharedDrive_Files.py", line 17, in <module>
    subprocess.call(myscript)
  File "C:\Program Files\Python39\lib\subprocess.py", line 349, in call
    with Popen(*popenargs, **kwargs) as p:
  File "C:\Program Files\Python39\lib\subprocess.py", line 951, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "C:\Program Files\Python39\lib\subprocess.py", line 1420, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
FileNotFoundError: [WinError 2] The system cannot find the file specified

Process finished with exit code 1


Solution 1:[1]

I am able to complete this using below code (Not sure if this is the best way to do)

from azure.storage.filedatalake import DataLakeFileClient
from azure.storage.blob import BlobServiceClient
from azure.storage.filedatalake import DataLakeServiceClient
import os
import io
import shutil
import sys

connect_str="DefaultEndpointsProtocol=https;AccountName=<storageAccount>;AccountKey=<storageAccountKey>;EndpointSuffix=core.windows.net"
myfilesystem="<adlsContainer>"
myfolder="F:\\Files"

trgt_dir = "<adlsTargetDirectory>"
datalake_service_client = DataLakeServiceClient.from_connection_string(connect_str)

def upload_file_to_directory(trgt,src, filename, filesystem):
    file_system_client = datalake_service_client.get_file_system_client(file_system=filesystem)
    directory_client = file_system_client.get_directory_client(trgt)
    file_client = directory_client.create_file(filename)

    local_file = io.open(os.path.join(src,filename), 'r', errors="ignore")
    file_contents = local_file.read()

    file_client.upload_data(file_contents, overwrite=True)

sys.path.extend(myfolder)

src = '\\\\<hostServer>\\<sourceDirectory>'
files = os.listdir(src)
dst = "F:\\Files"


for file in files:
    if file.endswith('.xml'):
        print(os.path.join(src, file))
        shutil.copy2(os.path.join(src, file), dst)

for fsrc in os.listdir(myfolder):
    print(f"Now uploading {fsrc}")
    upload_file_to_directory(trgt_dir,myfolder, fsrc, myfilesystem)
    print(f"Now removing {fsrc}")
    os.remove(os.path.join(myfolder, fsrc))

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Dharman