savestate before github

This commit is contained in:
Rene Kaßeböhmer
2025-07-09 11:15:51 +02:00
parent 294c640096
commit b09b136ad7
14 changed files with 392 additions and 26 deletions

View File

@ -1,5 +1,7 @@
import pandas as pd
from tqdm import tqdm
import os
import shutil
country_mapping = {
'NL': 'Netherlands',
@ -45,6 +47,9 @@ read_df_qualificationprofile = pd.read_csv('../1_extract_data/results/SCQualific
read_df_qualificationprofileitem = pd.read_csv('../1_extract_data/results/SCQualificationProfileItem__c.csv', header=0, keep_default_na=False, dtype=str)
read_df_lastmaintenance = pd.read_csv('../1_extract_data/results/LastMaintenanceInformation.csv', header=0, keep_default_na=False, dtype=str)
read_df_worktype = pd.read_csv('../1_extract_data/results/WorkType.csv', header=0, keep_default_na=False, dtype=str)
read_df_contractitem = pd.read_csv('../1_extract_data/results/SCContractItem__c.csv', header=0, keep_default_na=False, dtype=str)
df_pricebookentry = pd.read_csv('../1_extract_data/results/PricebookEntry.csv', header=0, keep_default_na=False, dtype=str, usecols=['Id', 'Product2.Product_Code__c'])
read_df_stockitem = pd.read_csv('../1_extract_data/results/SCStockItem__c.csv', header=0, keep_default_na=False, dtype=str, usecols=['Id', 'PlantERP__c', 'StockERP__c', 'Stock__r.Name__c', 'Stock__r.RunReplenishment__c', 'Stock__r.StockERP__c', 'ArticleNo__c', 'ArticleNameCalc__c', 'Qty__c', 'ERPQty__c', 'DifferenceQty__c', 'MinQty__c', 'MaxQty__c', 'LastModifiedDate', 'Stock__c', 'Article__r.EANCode__c', 'Article__r.Name', 'Article__c'])
# Columns for reindexing
reindex_columns = ['Id','City__c','Country__c','GeoY__c','GeoX__c','PostalCode__c','Street__c','Extension__c','HouseNo__c','FlatNo__c','Floor__c']
@ -53,16 +58,17 @@ reindex_columns_product2 = ['Id','Main_Product_Group__c','Family','MaterialType_
reindex_columns_ibr = ['Id', 'InstalledBaseLocation__c', 'Role__c', 'ValidFrom__c', 'ValidTo__c', 'Account__c']
reindex_columns_pricelist = ['Id', 'Name', 'Brand__r.Name', 'Country__c']
reindex_columns_pricelistitem = ['Id', 'Article__r.Name', 'Article__r.EANCode__c', 'Price__c', 'PriceUnit__c', 'Pricelist__c', 'ValidFrom__c', 'ValidTo__c', 'Pricelist__r.Brand__r.Name', 'Pricelist__r.Country__c']
reindex_columns_resourceassignment = ['Id', 'ValidTo__c', 'ValidFrom__c', 'Country__c', 'City__c', 'PostalCode__c', 'District__c', 'Street__c', 'HouseNo__c', 'Extension__c', 'FlatNo__c', 'Floor__c', 'GeoY__c', 'GeoX__c', 'Resource__c', 'Resource__r.Employee__r.Name', 'Stock__c', 'Stock__r.ID2__c']
reindex_columns_resourceassignment = ['Id', 'ValidTo__c', 'ValidFrom__c', 'Country__c', 'City__c', 'PostalCode__c', 'District__c', 'Street__c', 'HouseNo__c', 'Extension__c', 'FlatNo__c', 'Floor__c', 'GeoY__c', 'GeoX__c', 'Resource__c', 'Resource__r.Employee__r.Name', 'ResourceNumberCalc__c', 'Stock__c', 'Stock__r.ID2__c']
reindex_columns_address_iot = ['Id', 'Country', 'CountryCode', 'Street', 'City', 'ParentId', 'PostalCode']
reindex_columns_location_iot = ['Id', 'Name']
reindex_columns_servicecontracttemplates = ['Id', 'Name', 'TemplateName__c', 'Status__c', 'Brand__r.Name', 'Country__c', 'Runtime__c']
reindex_columns_servicecontracts = ['Id', 'Name', 'Template__c', 'Status__c', 'Brand__r.Name', 'Country__c', 'Runtime__c', 'EndDate__c', 'StartDate__c', 'Account__c', 'AccountOwner__c', 'IoT_Registration_Status__c', 'Maintenance_price_inclusion_of_VAT__c', 'MaintenanceAfterDue__c', 'MaintenanceBeforeDue__c', 'MaintenanceCreationLeadtime__c', 'MaintenanceCreationShiftFactor__c', 'MaintenanceDuration__c', 'MaintenanceFirstDate__c', 'MaintenanceLastDate__c', 'MaintenanceInterval__c', 'MaintenancePriceRequired__c', 'util_MaintenancePrice__c', 'util_MaintenanceDuration__c', 'util_MaintenanceNextDate__c']
reindex_columns_servicecontracts = ['Id', 'Name', 'Template__c', 'Status__c', 'Brand__r.Name', 'Country__c', 'Runtime__c', 'EndDate__c', 'StartDate__c', 'Account__c', 'AccountOwner__c', 'IoT_Registration_Status__c', 'Template__r.SAPContractCategory__c', 'Maintenance_price_inclusion_of_VAT__c', 'MaintenanceAfterDue__c', 'MaintenanceBeforeDue__c', 'MaintenanceCreationLeadtime__c', 'MaintenanceCreationShiftFactor__c', 'MaintenanceDuration__c', 'MaintenanceFirstDate__c', 'MaintenanceLastDate__c', 'MaintenanceInterval__c', 'MaintenancePriceRequired__c', 'util_MaintenancePrice__c', 'util_MaintenanceDuration__c', 'util_MaintenanceNextDate__c']
reindex_columns_warrantyterm = ['Id', 'WarrantyTermName', 'WarrantyDuration', 'WarrantyType', 'Pricebook2']
reindex_columns_qualificationprofile = ['Id', 'Name', 'Description__c', 'ID2__c', 'Standard__c']
reindex_columns_qualificationprofileitem = ['CertificationId__c', 'Id', 'Info__c', 'Level__c', 'ValidFrom__c', 'ValidTo__c', 'Name', 'Qualification__c']
reindex_columns_lastmaintenance = ['Id', 'InstalledBase__r.SerialNo__c', 'Order__r.Closed__c']
reindex_columns_worktype = ['Id', 'Name']
reindex_columns_contractitem = ['Id', 'Name', 'InstalledBase__c', 'Contract__c']
# Reindex the columns to match the desired format
df = read_df.reindex(reindex_columns, axis=1)
@ -81,6 +87,7 @@ df_qualificationprofile = read_df_qualificationprofile.reindex(reindex_columns_q
df_qualificationprofileitem = read_df_qualificationprofileitem.reindex(reindex_columns_qualificationprofileitem, axis=1)
df_lastmaintenance = read_df_lastmaintenance.reindex(reindex_columns_lastmaintenance, axis=1)
df_worktype = read_df_worktype.reindex(reindex_columns_worktype, axis=1)
df_contractitem = read_df_contractitem.reindex(reindex_columns_contractitem, axis=1)
##--------------------------------------------------------------------------##
## Update for IoT Addresses and Locations
@ -563,14 +570,16 @@ df_servicecontracttemplates['Term'] = df_servicecontracttemplates['Term'].fillna
## Service Contract
##--------------------------------------------------------------------------##
# Create new dataframe for maintenance plan fields
df_maintenanceplan = df_servicecontract[['Id', 'StartDate__c', 'EndDate__c', 'Account__c', 'Maintenance_price_inclusion_of_VAT__c', 'MaintenanceAfterDue__c',
'MaintenanceBeforeDue__c', 'MaintenanceCreationLeadtime__c',
'MaintenanceCreationShiftFactor__c', 'MaintenanceDuration__c',
'MaintenanceFirstDate__c', 'MaintenanceLastDate__c',
'MaintenanceInterval__c', 'MaintenancePriceRequired__c',
'util_MaintenancePrice__c', 'util_MaintenanceDuration__c',
'util_MaintenanceNextDate__c']].copy()
# Create new dataframe for maintenance plan fields only if Template__r.SAPContractCategory__c is 'BM10', 'FC10', 'MP20', 'WX40'
df_maintenanceplan = df_servicecontract[
df_servicecontract['Template__r.SAPContractCategory__c'].isin(['BM10', 'FC10', 'MP20', 'WX40'])
][['Id', 'StartDate__c', 'EndDate__c', 'Account__c', 'Maintenance_price_inclusion_of_VAT__c', 'MaintenanceAfterDue__c',
'MaintenanceBeforeDue__c', 'MaintenanceCreationLeadtime__c',
'MaintenanceCreationShiftFactor__c', 'MaintenanceDuration__c',
'MaintenanceFirstDate__c', 'MaintenanceLastDate__c',
'MaintenanceInterval__c', 'MaintenancePriceRequired__c',
'util_MaintenancePrice__c', 'util_MaintenanceDuration__c',
'util_MaintenanceNextDate__c']].copy()
# Drop maintenance fields from service contract dataframe, except Id
maintenance_fields = ['Maintenance_price_inclusion_of_VAT__c', 'MaintenanceAfterDue__c',
@ -591,7 +600,17 @@ df_servicecontract['Pricebook2.Name'] = (
df_servicecontract = df_servicecontract.drop('Brand__r.Name', axis=1)
df_servicecontract.columns = ['PKey__c', 'Name', 'TemplateId__r.PKey__c', 'Status', 'BillingCountryCode', 'Term', 'EndDate', 'StartDate', 'AccountId', 'Service_Recipient__c', 'IoT_Registration_Status__c', 'Pricebook2.Name']
# Map Template__r.SAPContractCategory__c values to new names
contract_category_mapping = {
"BM10": "Basic Maintenance Contract",
"FC10": "Basic Maintenance Contract",
"MP20": "Basic Plus Maintenance Contract",
"WX40": "Warranty Extension Contract",
"RD30": "IoT"
}
df_servicecontract["Template__r.SAPContractCategory__c"] = df_servicecontract["Template__r.SAPContractCategory__c"].replace(contract_category_mapping)
df_servicecontract.columns = ['PKey__c', 'Name', 'TemplateId__r.PKey__c', 'Status', 'BillingCountryCode', 'Term', 'EndDate', 'StartDate', 'AccountId', 'Service_Recipient__c', 'IoT_Registration_Status__c', 'FSL_Type_of_Contract__c', 'Pricebook2.Name']
df_servicecontract['IoT_Registration_Status__c'] = df_servicecontract['IoT_Registration_Status__c'].replace('', 'Open')
#df_servicecontract['Name'] = df_servicecontract['PKey__c']
@ -603,10 +622,42 @@ df_servicecontract['TemplateCountry__c'] = df_servicecontract['BillingCountryCod
df_servicecontract['Term'] = pd.to_numeric(df_servicecontract['Term'].str.extract('(\d+)')[0], errors='coerce')
df_servicecontract['Term'] = df_servicecontract['Term'].fillna(0).astype(int)
##--------------------------------------------------------------------------##
## ContractLineItem
##--------------------------------------------------------------------------##
df_contractlineitem = df_contractitem.copy()
df_contractlineitem.columns = ['Id', 'Name', 'Asset.PKey__c', 'ServiceContract.PKey__c']
df_contractlineitem.drop(['Id'], axis=1, inplace=True)
df_contractlineitem.drop(['Name'], axis=1, inplace=True)
# Map Product2.Product_Code__c to df_contractlineitem based on Asset.PKey__c
df_contractlineitem = pd.merge(
df_contractlineitem,
merged_df_ib[['PKey__c', 'Product2.Product_Code__c']],
left_on='Asset.PKey__c',
right_on='PKey__c',
how='left'
)
df_contractlineitem = df_contractlineitem.drop('PKey__c', axis=1)
# Merge df_contractlineitem with df_pricebookentry to get the PricebookEntry Id based on Product2.Product_Code__c
df_contractlineitem = pd.merge(
df_contractlineitem,
df_pricebookentry[['Product2.Product_Code__c', 'Id']],
left_on='Product2.Product_Code__c',
right_on='Product2.Product_Code__c',
how='left'
)
df_contractlineitem = df_contractlineitem.rename(columns={'Id': 'PricebookEntryId'})
print(df_contractlineitem)
##--------------------------------------------------------------------------##
## MaintenancePlan, MaintenanceAsset
##--------------------------------------------------------------------------##
print(df_maintenanceplan.columns)
df_maintenanceplan.columns = ['ServiceContract.PKey__c', 'StartDate', 'EndDate', 'AccountId', 'MaintenancePriceInclusionOfVAT__c',
'MaintenanceWindowEndDays', 'MaintenanceWindowStartDays', 'GenerationTimeframe',
@ -631,21 +682,77 @@ maintenance_worktype_id = df_worktype[df_worktype['Name'] == 'Planned Maintenanc
# Add WorkTypeId to maintenance plan
df_maintenanceplan['WorkTypeId'] = maintenance_worktype_id
print(df_maintenanceplan)
# Map InstalledBase__c from df_contractitem to df_maintenanceasset based on ServiceContract.PKey__c (Contract__c)
# If there are multiple InstalledBase__c for a Contract__c, join them as a semicolon-separated string
# Group by Contract__c and aggregate InstalledBase__c as a semicolon-separated string
contractitem_grouped = df_contractitem.groupby('Contract__c')['InstalledBase__c'].apply(lambda x: ';'.join(x.dropna().unique())).reset_index()
# Merge with df_maintenanceplan
df_maintenanceasset = df_maintenanceplan[['ServiceContract.PKey__c']].copy()
df_maintenanceasset = df_maintenanceasset.merge(
contractitem_grouped,
left_on='ServiceContract.PKey__c',
right_on='Contract__c',
how='left'
)
df_maintenanceasset = df_maintenanceasset.rename(columns={'InstalledBase__c': 'InstalledBase__c'})
df_maintenanceasset = df_maintenanceasset[['ServiceContract.PKey__c', 'InstalledBase__c']]
##--------------------------------------------------------------------------##
## Skills
##--------------------------------------------------------------------------##
##--------------------------------------------------------------------------##
## Stocks
##--------------------------------------------------------------------------##
# Merge Resource__r.Employee__r.Name from df_resourceassignment to read_df_stockitem via Stock__c
read_df_stockitem = pd.merge(
read_df_stockitem,
df_resourceassignment[['Stock__c', 'ResourceNumberCalc__c']],
left_on='Stock__c',
right_on='Stock__c',
how='left'
)
##--------------------------------------------------------------------------##
## StocksOptimizationListItem
##--------------------------------------------------------------------------##
src_file = '../1_extract_data/results/SCStockOptimizationListItem__c.csv'
dst_file = './data_for_external_systems/SCStockOptimizationListItem__c.csv'
shutil.copyfile(src_file, dst_file)
##--------------------------------------------------------------------------##
## Saving to CSV
##--------------------------------------------------------------------------##
# Write each DataFrame to a separate CSV file
# Create directories if they do not exist
output_dirs = [
'../4_upsert_address_and_parent_location',
'../6_upsert_child_location',
'../9_upsert_assets',
'../12_upsert_associated_location',
'../13_insert_pricebook2_and_pricebookentries',
'../3_update_address_and_location_data_for_migration',
'../14_insert_servicecontracttemplates_dummies',
'../16_insert_servicecontract',
'../20_create_maintenance_plan',
'../21_insert_maintenance_asset',
'../19_create_contractlineitems',
'./data_for_external_systems'
]
for dir_path in output_dirs:
if dir_path and not os.path.exists(dir_path):
os.makedirs(dir_path, exist_ok=True)
address_df.to_csv('../4_upsert_address_and_parent_location/Address.csv', index=False)
parent_df.to_csv('../4_upsert_address_and_parent_location/Location.csv', index=False)
child_df.to_csv('../6_upsert_child_location/Location.csv', index=False)
@ -658,7 +765,10 @@ df_servicecontracttemplates.to_csv('../14_insert_servicecontracttemplates_dummie
df_servicecontract.to_csv('../16_insert_servicecontract/ServiceContract_beforetransform.csv', index=False)
df_assetwarranty_save.to_csv('../9_upsert_assets/AssetWarranty.csv', index=False)
df_maintenance.to_csv('../9_upsert_assets/FSL_Asset_Maintenance_Information__c.csv', index=False)
df_maintenanceplan.to_csv('../19_create_maintenance_plan/MaintenancePlan_beforetransform.csv', index=False)
df_maintenanceplan.to_csv('../20_create_maintenance_plan/MaintenancePlan_beforetransform.csv', index=False)
df_maintenanceasset.to_csv('../21_insert_maintenance_asset/MaintenanceAsset_beforetransform.csv', index=False)
df_contractlineitem.to_csv('../19_create_contractlineitems/ContractLineItem.csv', index=False)
read_df_stockitem.to_csv('./data_for_external_systems/SAP_Stocks.csv', index=False)
## end mapping
print('Data has been successfully transformed and saved to CSV files.')