Export complete
This commit is contained in:
162
prepared_steps/2_transform_via_script/LocationScript.py
Normal file
162
prepared_steps/2_transform_via_script/LocationScript.py
Normal file
@ -0,0 +1,162 @@
|
||||
import pandas as pd
|
||||
|
||||
country_mapping = {
|
||||
'NL': 'Netherlands'
|
||||
}
|
||||
|
||||
# Read the input CSV file, assuming the second row is the header
|
||||
read_df = pd.read_csv('../1_extract_data/SCInstalledBaseLocation__c.csv', header=0, keep_default_na=False, dtype=str)
|
||||
read_df_ib = pd.read_csv('../1_extract_data/SCInstalledBase__c.csv', header=0, keep_default_na=False, dtype=str)
|
||||
for row in read_df.to_dict('records'):
|
||||
try:
|
||||
# Your processing logic here
|
||||
pass
|
||||
except KeyError as e:
|
||||
print(f'KeyError: {e}')
|
||||
|
||||
# Columns for reindexing
|
||||
reindex_columns = ['City__c','Country__c','Extension__c','FlatNo__c','Floor__c','GeoX__c','GeoY__c','HouseNo__c','Id','PostalCode__c','Street__c']
|
||||
# ArticleNo__c,CommissioningDate__c,Id,InstallationDate__c,InstalledBaseLocation__c,InstalledBaseLocation__r.Id,Name,ProductEnergy__c,ProductUnitClass__c,ProductUnitType__c,SerialNo__c,SerialNoException__c
|
||||
reindex_columns_ib = ['ArticleNo__c','CommissioningDate__c','Id','InstallationDate__c','InstalledBaseLocation__c','InstalledBaseLocation__r.Id','Name','ProductEnergy__c','ProductUnitClass__c','ProductUnitType__c','SerialNo__c','SerialNoException__c']
|
||||
|
||||
# Reindex the columns to match the desired format
|
||||
df = read_df.reindex(reindex_columns, axis=1)
|
||||
df_ib = read_df_ib.reindex(reindex_columns_ib, axis=1)
|
||||
|
||||
df['Street'] = (
|
||||
df['Street__c'].astype(str) + ' ' +
|
||||
df['HouseNo__c'].astype(str)
|
||||
)
|
||||
|
||||
# Remove any trailing spaces that may result from missing values
|
||||
df['Street'] = df['Street'].str.rstrip()
|
||||
|
||||
df['PKey__c'] = (
|
||||
df['Street'].astype(str) + ', ' +
|
||||
df['PostalCode__c'].astype(str) + ' ' +
|
||||
df['City__c'].astype(str) + ', ' +
|
||||
df['Country__c'].astype(str)
|
||||
)
|
||||
|
||||
# Merge df_ib with df including additional columns
|
||||
merged_df_ib = pd.merge(df_ib,
|
||||
df[['Id', 'PKey__c', 'Extension__c', 'FlatNo__c', 'Floor__c']],
|
||||
left_on='InstalledBaseLocation__c',
|
||||
right_on='Id',
|
||||
how='left')
|
||||
|
||||
# If there are missing values (no match found), you can fill them with a placeholder
|
||||
merged_df_ib['PKey__c'].fillna('Not Found', inplace=True)
|
||||
|
||||
merged_df_ib['PKey__c'] = (
|
||||
merged_df_ib['PKey__c'].astype(str) + ';' +
|
||||
merged_df_ib['Extension__c'].astype(str) + ';' +
|
||||
merged_df_ib['FlatNo__c'].astype(str) + ';' +
|
||||
merged_df_ib['Floor__c'].astype(str)
|
||||
)
|
||||
|
||||
merged_df_ib = merged_df_ib.drop('Extension__c', axis=1)
|
||||
merged_df_ib = merged_df_ib.drop('FlatNo__c', axis=1)
|
||||
merged_df_ib = merged_df_ib.drop('Floor__c', axis=1)
|
||||
|
||||
## 1. Address.csv
|
||||
# Columns needed for Address table based on the input CSV structure
|
||||
address_columns = ['City__c', 'Country__c',
|
||||
'PostalCode__c', 'Street',
|
||||
'GeoY__c', 'GeoX__c', 'PKey__c']
|
||||
|
||||
# Extract data for Address
|
||||
address_df = df[address_columns].copy()
|
||||
|
||||
address_df['CountryCode'] = address_df['Country__c'].map(country_mapping)
|
||||
|
||||
# Now create the 'Name' column without any warnings
|
||||
address_df['Parent.Name'] = address_df['PKey__c']
|
||||
|
||||
# Rename columns to match the desired format
|
||||
address_df.columns = ['City', 'CountryCode', 'PostalCode', 'Street', 'Latitude', 'Longitude', 'PKey__c', 'Country', 'Parent.Name']
|
||||
|
||||
# Check for duplicates in Address table based on OldId, City, CountryCode, PostalCode, and Street
|
||||
address_df = address_df.drop_duplicates(subset=['City', 'Country', 'PostalCode', 'Street'], keep='first')
|
||||
|
||||
## 2. Parent_Location.csv
|
||||
parent_columns = ['City__c', 'Country__c',
|
||||
'PostalCode__c', 'Street', 'PKey__c']
|
||||
parent_df = df[parent_columns]
|
||||
# Rename columns to match the desired format
|
||||
parent_df.columns = ['City', 'Country',
|
||||
'PostalCode', 'Street', 'Name']
|
||||
|
||||
parent_df['CountryCode'] = parent_df['Country'].map(country_mapping)
|
||||
|
||||
# Check for duplicates in Parent Location based on OldId and ConstructionEndDate
|
||||
parent_df = parent_df.drop_duplicates(subset=['Name'], keep='first')
|
||||
|
||||
parent_df = parent_df.drop('Street', axis=1)
|
||||
parent_df = parent_df.drop('PostalCode', axis=1)
|
||||
parent_df = parent_df.drop('City', axis=1)
|
||||
parent_df = parent_df.drop('CountryCode', axis=1)
|
||||
parent_df = parent_df.drop('Country', axis=1)
|
||||
|
||||
parent_df['DuplicateCheck__c'] = 'false'
|
||||
parent_df['IsInventoryLocation'] = 'false'
|
||||
parent_df['IsMobile'] = 'false'
|
||||
parent_df['LocationType'] = 'Site'
|
||||
|
||||
## 3. Child_Location.csv
|
||||
child_columns = ['Extension__c', 'FlatNo__c', 'Floor__c', 'City__c', 'Country__c',
|
||||
'PostalCode__c', 'Street', 'PKey__c']
|
||||
# Modify child_df by explicitly creating a new DataFrame
|
||||
child_df = df[child_columns].copy() # Add .copy() to create an explicit copy
|
||||
|
||||
# Create the 'Name' column with simplified logic
|
||||
child_df['Name'] = (
|
||||
# Check if all three fields are not null; if so, concatenate them
|
||||
child_df['Floor__c'].astype(str) + '-' +
|
||||
child_df['FlatNo__c'].astype(str) + '-' +
|
||||
child_df['Extension__c'].astype(str)
|
||||
)
|
||||
|
||||
# Replace any row where 'Floor__c', 'FlatNo__c', and 'Extension__c' are all empty with "HOME"
|
||||
child_df.replace({'Name': {'--': 'HOME'}}, inplace=True)
|
||||
|
||||
# Create the 'ExternalReference' column for Asset assignment
|
||||
child_df['ExternalReference'] = (
|
||||
child_df['PKey__c'].astype(str) + ';' +
|
||||
child_df['Extension__c'].astype(str) + ';' +
|
||||
child_df['FlatNo__c'].astype(str) + ';' +
|
||||
child_df['Floor__c'].astype(str)
|
||||
)
|
||||
|
||||
# Rename columns to match the desired format
|
||||
child_df.columns = ['Extension__c', 'Flat__c', 'Floor__c', 'City', 'Country',
|
||||
'PostalCode', 'Street', 'PKey__c', 'Name', 'ExternalReference']
|
||||
|
||||
child_df = child_df.drop_duplicates(subset=['Extension__c', 'Flat__c', 'Floor__c','City', 'Country', 'PostalCode', 'Street'], keep='first')
|
||||
|
||||
child_df = child_df.drop('Country', axis=1)
|
||||
child_df = child_df.drop('PostalCode', axis=1)
|
||||
child_df = child_df.drop('City', axis=1)
|
||||
child_df = child_df.drop('Street', axis=1)
|
||||
|
||||
child_df['DuplicateCheck__c'] = 'false'
|
||||
child_df['IsInventoryLocation'] = 'false'
|
||||
child_df['IsMobile'] = 'false'
|
||||
child_df['LocationType'] = 'Site'
|
||||
|
||||
## 4. Assets.csv
|
||||
#ArticleNo__c,CommissioningDate__c,Id,InstallationDate__c,InstalledBaseLocation__c,InstalledBaseLocation__r.Extension__c,InstalledBaseLocation__r.FlatNo__c,InstalledBaseLocation__r.Floor__c,InstalledBaseLocation__r.Id,Name,ProductEnergy__c,ProductUnitClass__c,ProductUnitType__c,SerialNo__c,SerialNoException__c
|
||||
|
||||
merged_df_ib = merged_df_ib.drop('InstalledBaseLocation__c', axis=1)
|
||||
merged_df_ib = merged_df_ib.drop('InstalledBaseLocation__r.Id', axis=1)
|
||||
merged_df_ib = merged_df_ib.drop('Id_y', axis=1)
|
||||
print(merged_df_ib.columns)
|
||||
merged_df_ib.columns = ['Product2.EAN_Product_Code__c', 'FSL_1st_Ignition_Date__c', 'Id', 'InstallDate', 'Name', 'Kind_of_Energy__c', 'Kind_of_Installation__c', 'Main_Product_Group__c', 'SerialNumber', 'Serialnumber_Exception__c', 'Location.ExternalReference']
|
||||
|
||||
# Write each DataFrame to a separate CSV file
|
||||
address_df.to_csv('../3_upsert_address_and_parent_location/Address.csv', index=False)
|
||||
parent_df.to_csv('../3_upsert_address_and_parent_location/Location.csv', index=False)
|
||||
child_df.to_csv('../5_upsert_child_location/Location.csv', index=False)
|
||||
merged_df_ib.to_csv('../7_upsert_assets/Asset.csv', index=False)
|
||||
|
||||
print('Data has been successfully split into Address.csv, Parent_Location.csv, and Child_Location.csv files with duplicate checks applied.')
|
1
prepared_steps/2_transform_via_script/command.txt
Normal file
1
prepared_steps/2_transform_via_script/command.txt
Normal file
@ -0,0 +1 @@
|
||||
python .\LocationScript.py
|
Reference in New Issue
Block a user