You are on page 1of 4

def microBatchOpn_AMME_ini(microBatchDF1, batchId):

start_time = dt.now()
microBatchDF1.cache()
microBatchDF1_cnt=microBatchDF1.count()
print("----- Inside microBatchOpn_AMME: Starting for batchId:{} at:{}
----------------------> ".format(batchId,current_ts()))

microBatchDF_bus_summary=microBatchDF1.filter(expr(""" DATAFRAME_ID = '1' """))


microBatchDF_add_summary=microBatchDF1.filter(expr(""" DATAFRAME_ID = '2' """))
microBatchDF_customer_account=microBatchDF1.filter(expr(""" DATAFRAME_ID = '3'
"""))
microBatchDF_ebi_model=microBatchDF1.filter(expr(""" DATAFRAME_ID = '4' """))
microBatchDF_business_xref=microBatchDF1.filter(expr(""" DATAFRAME_ID = '5' """))
microBatchDF_building_summary=microBatchDF1.filter(expr(""" DATAFRAME_ID = '6'
"""))

print("Inside microBatchOpn_AMME: microBatchDF_bus_summary count={} at:


{}".format(microBatchDF_bus_summary.count(),current_ts()))
print("Inside microBatchOpn_AMME: microBatchDF_add_summary count={} at:
{}".format(microBatchDF_add_summary.count(),current_ts()))
print("Inside microBatchOpn_AMME: microBatchDF_customer_account count={} at:
{}".format(microBatchDF_customer_account.count(),current_ts()))
print("Inside microBatchOpn_AMME: microBatchDF_ebi_model count={} at:
{}".format(microBatchDF_ebi_model.count(),current_ts()))
print("Inside microBatchOpn_AMME: microBatchDF_business_xref count={} at:
{}".format(microBatchDF_business_xref.count(),current_ts()))
print("Inside microBatchOpn_AMME: microBatchDF_building_summary count={} at:
{}".format(microBatchDF_building_summary.count(),current_ts()))

microBatchDF_customer_account.createOrReplaceGlobalTempView("CustomerAccount_V")
microBatchDF_customer_account_agg=spark.sql("SELECT ATHENA_BUSINESS_ID as
ATHENA_BUSINESS_ID_CA,\
max(CMCST_HAS_BCI) as CMCST_HAS_BCI,\
max(CMCST_HAS_BCTV) as CMCST_HAS_BCTV,\
max(CMCST_HAS_BCV) as CMCST_HAS_BCV,\
max(CMCST_HAS_VES) as CMCST_HAS_VES,\
max(CMCST_HAS_METRO_E) as CMCST_HAS_METRO_E,\
max(CMCST_HAS_ADV_VOICE) as CMCST_HAS_ADV_VOICE,\
max(CMCST_HAS_BVE) as CMCST_HAS_BVE,\
max(CMCST_HAS_PRI) as CMCST_HAS_PRI,\
max(CMCST_HAS_SIP_TRUNK) as CMCST_HAS_SIP_TRUNK,\
max(CMCST_HAS_TELEWORKER) as CMCST_HAS_TELEWORKER,\
max(CMCST_HAS_SMART_OFFICE) as CMCST_HAS_SMART_OFFICE,\
max(CMCST_HAS_WIFI_PRO) as CMCST_HAS_WIFI_PRO,\
max(CMCST_HAS_PRIVATE_VIEW) as CMCST_HAS_PRIVATE_VIEW,\
max(CMCST_HAS_PUBLIC_VIEW) as CMCST_HAS_PUBLIC_VIEW,\
max(CMCST_HAS_DATA) as CMCST_HAS_DATA,\
max(CMCST_HAS_VIDEO) as CMCST_HAS_VIDEO,\
max(CMCST_HAS_VOICE) as CMCST_HAS_VOICE,\
max(CMCST_HAS_CONNECTION_PRO) as CMCST_HAS_CONNECTION_PRO,\
max(CMCST_IS_MS_O365) as CMCST_IS_MS_O365,\
max(CMCST_ACCOUNT_STATUS) as CMCST_ACCOUNT_STATUS,\
max(CMCST_ACCOUNT_NUMBER) as CMCST_ACCOUNT_NUMBER FROM
global_temp.CustomerAccount_V group by ATHENA_BUSINESS_ID")

microBatchDF_customer_account_agg.createOrReplaceGlobalTempView("CustomerAccount_Ag
g_V")
#microBatchDF_customer_account_agg.show(3)
microBatchDF_bus_summary.createOrReplaceGlobalTempView("BusinessSummary_V")
microBatchDF_add_summary.createOrReplaceGlobalTempView("AddressSummary_V")
microBatchDF_ebi_model.createOrReplaceGlobalTempView("Model_Classification_V")
microBatchDF_business_xref.createOrReplaceGlobalTempView("BusinessXref_V")
microBatchDF_building_summary.createOrReplaceGlobalTempView("BuildingSummary_V")

initial_load_df=spark.sql("SELECT\
COALESCE(BSV.ATHENA_ADDRESS_ID,'NULL') as ATHENA_ADDRESS_ID,\
COALESCE(BSV.ATHENA_BUSINESS_ID,'NULL') as ATHENA_BUSINESS_ID,\
COALESCE(BSV.NAX_BUILDING_ID,'NULL') as ATHENA_BUILDING_ID,\
COALESCE(BSV.NAX_ADDRESS_ID,'NULL') as NAX_ADDRESS_ID,\
COALESCE(ADV.ATHENA_ADDRESS,'NULL') as ATHENA_ADDRESS,\
COALESCE(ADV.ATHENA_ADDRESS_2,'NULL') as ATHENA_ADDRESS_2,\
COALESCE(ADV.ATHENA_CITY,'NULL') as ATHENA_CITY,\
COALESCE(ADV.ATHENA_STATE,'NULL') as ATHENA_STATE,\
COALESCE(ADV.ATHENA_ZIP_5,'NULL') as ATHENA_ZIP_5,\
COALESCE(ADV.MDT_SELLABILITY_COLOR_COAX,'NULL') as MDT_SELLABILITY_COLOR_COAX,\
COALESCE(ADV.MDT_SELLABILITY_COLOR_FIBER,'NULL') as MDT_SELLABILITY_COLOR_FIBER,\
COALESCE(BLD.ATHENA_BUILDING_LAT,'NULL') as NAX_LAT,\
COALESCE(BLD.ATHENA_BUILDING_LONG,'NULL') as NAX_LONG,\
COALESCE(BSV.ATHENA_BUSINESS_NAME,'NULL') as ATHENA_BUSINESS_NAME ,\
COALESCE(BSV.ATHENA_BUSINESS_LEGAL_NAME,'NULL') as ATHENA_BUSINESS_LEGAL_NAME,\
COALESCE(BSV.BUSINESS_COUNTRY,'NULL') as BUSINESS_COUNTRY,\
COALESCE(BSV.ATHENA_SITE_EST_TOTAL_SPEND,'NULL') as ATHENA_SITE_EST_TOTAL_SPEND,\
COALESCE(BSV.ATHENA_ADDRESS_IS_HYPERBUILD,'NULL') as
ATHENA_ADDRESS_IS_HYPERBUILD,\
COALESCE(BSV.CMCST_STATUS,'NULL') as CMCST_STATUS,\
COALESCE(BSV.ATHENA_SITE_EST_HEADCOUNT,'NULL') as ATHENA_SITE_EST_HEADCOUNT,\
COALESCE(BSV.ATHENA_MKTG_SEGMENT,'NULL') as ATHENA_MKTG_SEGMENT,\
COALESCE(BSV.ATHENA_MKTG_SUBSEGMENT,'NULL') as ATHENA_MKTG_SUBSEGMENT,\
COALESCE(BSV.ATHENA_MKTG_INDUSTRY,'NULL') as ATHENA_MKTG_INDUSTRY,\
COALESCE(BSV.ATHENA_MKTG_SUBINDUSTRY,'NULL') as ATHENA_MKTG_SUBINDUSTRY,\
COALESCE(BSV.ATHENA_PROVIDER_DATA,'NULL') as ATHENA_PROVIDER_DATA,\
COALESCE(BSV.ATHENA_PROVIDER_VOICE,'NULL') as ATHENA_PROVIDER_VOICE,\
COALESCE(BSV.ATHENA_ADDRESS_IS_DO_NOT_MAIL,'NULL') as
ATHENA_ADDRESS_IS_DO_NOT_MAIL,\
COALESCE(BSV.SDWAN_LIKELY,'NULL') as SDWAN_LIKELY,\
COALESCE(BSV.BVE_LIKELY,'NULL') as BVE_LIKELY,\
COALESCE(BSV.WBSTS_ADDR,'NULL') as WBSTS_ADDR,\
COALESCE(BSV.DONOTKNOCK_IND,'NULL') as DONOTKNOCK_IND,\
COALESCE(BSV.MDU,'NULL') as MDU,\
COALESCE(CAV.CMCST_ACCOUNT_STATUS,'NULL') as CMCST_ACCOUNT_STATUS,\
COALESCE(CONCAT_WS(BLD.NAX_BUILDING_LAT,BLD.NAX_BUILDING_LONG,','),'NULL') as
GEO_LOCATION,\
COALESCE(BSV.BLDGBB_CARRIER1,'NULL') as BLDGBB_CARRIER1,\
COALESCE(BSV.BLDGBB_CARRIER2,'NULL') as BLDGBB_CARRIER2,\
COALESCE(BSV.BLDGBB_CARRIER3,'NULL') as BLDGBB_CARRIER3,\
COALESCE(BSV.CMCST_FOOTPRINT,'NULL') as CMCST_FOOTPRINT,\
COALESCE(BSV.CMCST_DIVISION,'NULL') as CMCST_DIVISION,\
COALESCE(BSV.CMCST_REGION,'NULL') as CMCST_REGION,\
COALESCE(BSV.CMCST_CORPSYSPRIN,'NULL') as CMCST_CORPSYSPRIN,\
COALESCE(BSV.ATHENA_ADDRESS_IS_USPS_RESI,'NULL') as ATHENA_ADDRESS_IS_USPS_RESI,\
COALESCE(ADV.ATHENA_ADDRESS_DO_NOT_MAIL_DT,'NULL') as
ATHENA_ADDRESS_DO_NOT_MAIL_DT,\
COALESCE(BSV.ATHENA_BUSINESS_IS_DO_NOT_MAIL,'NULL') as
ATHENA_BUSINESS_IS_DO_NOT_MAIL,\
COALESCE(BSV.ATHENA_BUSINESS_DO_NOT_MAIL_DT,'9999-12-31') as
ATHENA_BUSINESS_DO_NOT_MAIL_DT,\
COALESCE(BSV.ATHENA_BUSINESS_HQ_ULTIMATE,'NULL') as ATHENA_BUSINESS_HQ_ID,\
COALESCE(BSV.ATHENA_BUSINESS_RANK,'NULL') as ATHENA_BUSINESS_RANK,\
COALESCE(BSV.ATHENA_SITE_FIRM_REVENUE,'NULL') as ATHENA_SITE_FIRM_REVENUE,\
COALESCE(BSV.ATHENA_HQ_FIRM_REVENUE,'NULL') as ATHENA_HQ_FIRM_REVENUE,\
COALESCE(BSV.ATHENA_SITE_EST_DS0_DATA,'NULL') as ATHENA_SITE_EST_DS0_DATA,\
COALESCE(BSV.ATHENA_SITE_EST_DS0_VOICE,'NULL') as ATHENA_SITE_EST_DS0_VOICE,\
COALESCE(BSV.ATHENA_HQ_EST_HEADCOUNT,'NULL') as ATHENA_HQ_EST_HEADCOUNT,\
COALESCE(BSV.ATHENA_SITE_EST_DATA_SPEND,'NULL') as ATHENA_SITE_EST_DATA_SPEND,\
COALESCE(BSV.ATHENA_SITE_EST_VOICE_SPEND,'NULL') as ATHENA_SITE_EST_VOICE_SPEND,\
COALESCE(BSV.ATHENA_HQ_EST_TOTAL_SPEND,'NULL') as ATHENA_HQ_EST_TOTAL_SPEND,\
COALESCE(BSV.ATHENA_HQ_EST_DATA_SPEND,'NULL') as ATHENA_HQ_EST_DATA_SPEND,\
COALESCE(BSV.ATHENA_HQ_EST_VOICE_SPEND,'NULL') as ATHENA_HQ_EST_VOICE_SPEND,\
COALESCE(BSV.ATHENA_YEARS_IN_BUSINESS,'NULL') as ATHENA_YEARS_IN_BUSINESS,\
COALESCE(BSV.CMCST_CUSTOMER_DISCONNECT_DATE,'9999-12-31') as
CMCST_CUSTOMER_DISCONNECT_DATE,\
COALESCE(BSV.ATHENA_MKTG_SIC4,'NULL') as ATHENA_MKTG_SIC4,\
COALESCE(BSV.SFDC_IS_LEAD,'NULL') as SFDC_IS_LEAD,\
COALESCE(BSV.HYPERBUILD_ACTIVATION_DATE,'NULL') as HYPERBUILD_ACTIVATION_DATE,\
COALESCE(BSV.ATHENA_BUSINESS_IS_INDIRECT,'NULL') as ATHENA_BUSINESS_IS_INDIRECT,\
COALESCE(BSV.ATHENA_MKTG_SIC4_DESC,'NULL') as ATHENA_MKTG_SIC4_DESC,\
COALESCE(BSV.CMCST_STATUS_CB,'NULL') as CMCST_STATUS_CB,\
COALESCE(BXREF.SOURCE_SITE_ID,'NULL') as SF_ACCOUNT_SITEID,\
COALESCE(BXREF.SOURCE_ID,'NULL') as SF_LEADID,\
COALESCE(BSV.ATHENA_SALES_CHANNEL,'NULL') as ATHENA_SALES_CHANNEL,\
COALESCE(CAV.CMCST_HAS_BCI,'NULL') as CMCST_HAS_BCI,\
COALESCE(CAV.CMCST_HAS_BCTV,'NULL') as CMCST_HAS_BCTV,\
COALESCE(CAV.CMCST_HAS_BCV,'NULL') as CMCST_HAS_BCV,\
COALESCE(CAV.CMCST_HAS_VES,'NULL') as CMCST_HAS_VES,\
COALESCE(CAV.CMCST_HAS_METRO_E,'NULL') as CMCST_HAS_METRO_E,\
COALESCE(CAV.CMCST_HAS_ADV_VOICE,'NULL') as CMCST_HAS_ADV_VOICE,\
COALESCE(CAV.CMCST_HAS_BVE,'NULL') as CMCST_HAS_BVE,\
COALESCE(CAV.CMCST_HAS_PRI,'NULL') as CMCST_HAS_PRI,\
COALESCE(CAV.CMCST_HAS_SIP_TRUNK,'NULL') as CMCST_HAS_SIP_TRUNK,\
COALESCE(CAV.CMCST_HAS_TELEWORKER,'NULL') as CMCST_HAS_TELEWORKER,\
COALESCE(CAV.CMCST_HAS_SMART_OFFICE,'NULL') as CMCST_HAS_SMART_OFFICE,\
COALESCE(CAV.CMCST_HAS_WIFI_PRO,'NULL') as CMCST_HAS_WIFI_PRO,\
COALESCE(CAV.CMCST_HAS_PRIVATE_VIEW,'NULL') as CMCST_HAS_PRIVATE_VIEW,\
COALESCE(CAV.CMCST_HAS_PUBLIC_VIEW,'NULL') as CMCST_HAS_PUBLIC_VIEW,\
COALESCE(CAV.CMCST_HAS_DATA,'NULL') as CMCST_HAS_DATA,\
COALESCE(CAV.CMCST_HAS_VIDEO,'NULL') as CMCST_HAS_VIDEO,\
COALESCE(CAV.CMCST_HAS_VOICE,'NULL') as CMCST_HAS_VOICE,\
COALESCE(CAV.CMCST_HAS_CONNECTION_PRO,'NULL') as CMCST_HAS_CONNECTION_PRO,\
COALESCE(CAV.CMCST_IS_MS_O365,'NULL') as CMCST_IS_MS_O365,\
COALESCE(BSV.STD_BUILDING_ID,'NULL') as STD_BUILDING_ID,\
COALESCE(ADV.ATHENA_ADDRESS_IS_DO_NOT_MAIL,'NULL') as
ATHENA_ADDRESS_IS_DO_NOT_MAIL,\
COALESCE(CASE WHEN EBI.EBI_MODEL_CLASSIFICATION ='Gold' THEN 'Y' ELSE 'N'
END,'NULL') as HIGH_VALUE_IND,\
COALESCE(CASE WHEN BSV.ATHENA_BUSINESS_ID=BSV.ATHENA_BUSINESS_HQ_ULTIMATE THEN
'Y' ELSE 'N'
COALESCE(CAV.CMCST_ACCOUNT_NUMBER,'NULL') as CMCST_ACCOUNT_NUMBER,\
COALESCE(EBI.SOURCE_ID,'NULL') as SOURCE_ID,\
COALESCE(EBI.SOURCE_SITE_ID,'NULL') as SOURCE_SITE_ID,\
"null as SFDC_CLOSED_LOST_REASON_DESC",

FROM global_temp.BusinessSummary_V BSV left join global_temp.AddressSummary_V ADV


on BSV.NAX_ADDRESS_ID=ADV.NAX_ADDRESS_ID left join
global_temp.CustomerAccount_Agg_V CAV on
BSV.ATHENA_BUSINESS_ID=CAV.ATHENA_BUSINESS_ID_CA left join
global_temp.Model_Classification_V EBI on
BSV.ATHENA_BUSINESS_ID=EBI.ATHENA_BUSINESS_ID")

initial_load_df.show(3)

# initial_load_df.write.format("org.elasticsearch.spark.sql")\
# .option("es.nodes.wan.only","true")\
# .option("es.port","443")\
# .option("es.net.ssl","true")\
# .option("es.nodes", esURL)\
# .option("es.spark.dataframe.write.null","true")\
# .option("es.index.auto.create", "true")\
# .mode("Overwrite").save("smartlist_1")

# print('write smartlist success')

# reader=spark.read.format("org.elasticsearch.spark.sql")\
# .option("es.nodes.wan.only","true")\
# .option("es.port","443")\
# .option("es.net.ssl","true")\
# .option("es.nodes", esURL)

# df = reader.load("smartlist_1")
# print('read smartlist success')
# df.show(2)
# display(df)

print(current_ts())

You might also like