# Databricks notebook source import sys, os import pandas as pd app_name = "Databricks" if "BUNDLE_ROOT" in os.environ: root_dir = f"{os.environ['BUNDLE_ROOT']}/{app_name}" else: root_dir = f"{os.getcwd().split(app_name)[0][:-1]}/{app_name}" file_root_dir = f"file://{root_dir}" print(f"Root Dir: {root_dir}") print(f"File Root Dir: {file_root_dir}") append_path = f"{root_dir}/src/functions" if append_path not in sys.path: sys.path.append(append_path) # Append all subfolders under functions/ to sys.path for root, dirs, files in os.walk(append_path): for dir in dirs: subdir = os.path.join(root, dir) if subdir not in sys.path: sys.path.append(subdir) # COMMAND ---------- from utils import get_dbutils, get_spark spark = get_spark() dbutils = get_dbutils(spark) def mount_blob_source_managed_identity(storage_account, container, folder): spark = get_spark() dbutils = get_dbutils(spark) #source = f"abfss://{container}@{storage_account}.blob.core.windows.net/{folder}" source = f"abfss://{container}@{storage_account}.dfs.core.windows.net/{folder}" mount_point = f"/mnt/{storage_account}/{container}/{folder}_1" print(f"Mounting on {mount_point}") configs = { "fs.azure.account.auth.type": "ManagedIdentity", "fs.azure.account.oauth.msi.endpoint": "http://169.254.169.254/metadata/identity/oauth2/token", "fs.azure.account.oauth.msi.token.provider": "org.apache.hadoop.fs.azurebfs.oauth2.MsiTokenProvider" } # Mount only if the directory is not already mounted if not any(mount.mountPoint == mount_point for mount in dbutils.fs.mounts()): dbutils.fs.mount(source=source, mount_point=mount_point, extra_configs=configs) else: print("MountPoint already exists") for i in dbutils.fs.mounts(): if "/mnt/mscsta" in i.mountPoint: display(i) # COMMAND ---------- df = spark.read.option("header", "true").csv( f"{file_root_dir}/assets/mdf_mount_points.csv" ) # COMMAND ---------- display(df) # COMMAND ---------- from azure.identity import ClientSecretCredential from azure.storage.blob import BlobServiceClient import os # COMMAND ---------- def get_azure_access_token(dbutils, scope): #from azure.identity import ClientSecretCredential tenant_id = dbutils.secrets.get(scope=scope, key="MSC-APP-MMDF-DBR-tenant-id") client_id = dbutils.secrets.get(scope=scope, key="MSC-APP-MMDF-DBR-client-id") client_secret = dbutils.secrets.get(scope=scope, key="MSC-APP-MMDF-DBR") csc = ClientSecretCredential(tenant_id, client_id, client_secret) token = csc.get_token("2ff814a6-3304-4ab8-85cb-cd0e6f879c1d/.default") print(f"Authenticating as Service Principal with client_id: {client_id}") return token.token # COMMAND ---------- # Key Vault Scope KV_SCOPE = "MSC-KVA-MMDF" # Service principal Key Vault keys TENANT_ID_KEY = "MSC-APP-MMDF-DBR-tenant-id" CLIENT_ID_KEY = "MSC-APP-MMDF-DBR-client-id" CLIENT_SECRET_KEY = "MSC-APP-MMDF-DBR" STORAGE_ACCOUNT_ACCESS_KEY = "STA-MDF-LOG-AK" # Mount for every row in the Dataframe for row in df.collect(): ENDPOINT = row["Endpoint"] STORAGE_ACCOUNT = dbutils.secrets.get( scope=KV_SCOPE, key=f"STA-{row['StorageAccount']}-NAME" ) CONTAINER = row["Container"] FOLDER = row["Folder"] if FOLDER == None: FOLDER = "" print(f"----Mounting for {row['MountPoint']}----") mount_blob_source_managed_identity( STORAGE_ACCOUNT, CONTAINER, FOLDER, )