Script Added + Retrieving data start

2025-04-01 11:28:44 +02:00
parent b1b9d3db8a
commit 9b1103268c
17 changed files with 187 additions and 39 deletions
--- a/prepared_steps/2/LocationScript.py
+++ b/prepared_steps/2/LocationScript.py
@ -0,0 +1,117 @@
+import pandas as pd
+
+country_mapping = {
+    'NL': 'Netherlands'
+    }   
+
+# Read the input CSV file, assuming the second row is the header
+read_df = pd.read_csv('../1/SCInstalledBaseLocation__c.csv', header=0, keep_default_na=False)
+for row in read_df.to_dict('records'):
+    try:
+        # Your processing logic here
+        pass
+    except KeyError as e:
+        print(f'KeyError: {e}')
+
+# Columns for reindexing
+reindex_columns = ['City__c','Country__c','Extension__c','FlatNo__c','Floor__c','GeoX__c','GeoY__c','HouseNo__c','Id','PostalCode__c','Street__c']
+
+# Reindex the columns to match the desired format
+df = read_df.reindex(reindex_columns, axis=1)
+
+df['Street'] = (
+    df['Street__c'].astype(str) + ' ' +
+    df['HouseNo__c'].astype(str) + ' ' +
+    df['Extension__c'].astype(str)
+)
+
+# Remove any trailing spaces that may result from missing values
+df['Street'] = df['Street'].str.rstrip()
+
+df['PKey__c'] = (
+    df['Street'].astype(str) + ', ' +
+    df['PostalCode__c'].astype(str) + ' ' +
+    df['City__c'].astype(str) + ', ' +
+    df['Country__c'].astype(str)
+)
+
+## 1. Address.csv
+# Columns needed for Address table based on the input CSV structure
+address_columns = ['City__c', 'Country__c', 
+                   'PostalCode__c', 'Street', 
+                   'GeoY__c', 'GeoX__c', 'PKey__c']
+
+# Extract data for Address
+address_df = df[address_columns].copy()
+
+address_df['CountryCode'] = address_df['Country__c'].map(country_mapping)
+
+# Now create the 'Name' column without any warnings
+address_df['Parent.Name'] = address_df['PKey__c']
+
+# Rename columns to match the desired format
+address_df.columns = ['City', 'CountryCode', 'PostalCode', 'Street', 'Latitude', 'Longitude', 'PKey__c', 'Country', 'Parent.Name']
+
+# Check for duplicates in Address table based on OldId, City, CountryCode, PostalCode, and Street
+address_df = address_df.drop_duplicates(subset=['City', 'Country', 'PostalCode', 'Street'], keep='first')
+
+## 2. Parent_Location.csv
+parent_columns = ['City__c', 'Country__c', 
+                   'PostalCode__c', 'Street', 'PKey__c']
+parent_df = df[parent_columns]
+# Rename columns to match the desired format
+parent_df.columns = ['City', 'Country',
+                     'PostalCode', 'Street', 'Name']
+
+parent_df['CountryCode'] = parent_df['Country'].map(country_mapping)
+
+# Check for duplicates in Parent Location based on OldId and ConstructionEndDate
+parent_df = parent_df.drop_duplicates(subset=['Name'], keep='first')
+
+parent_df = parent_df.drop('Street', axis=1)
+parent_df = parent_df.drop('PostalCode', axis=1)
+parent_df = parent_df.drop('City', axis=1)
+parent_df = parent_df.drop('CountryCode', axis=1)
+parent_df = parent_df.drop('Country', axis=1)
+
+parent_df['DuplicateCheck__c'] = 'false'
+parent_df['IsInventoryLocation'] = 'false'
+parent_df['IsMobile'] = 'false'
+parent_df['LocationType'] = 'Site'
+
+## 3. Child_Location.csv
+child_columns = ['Extension__c', 'FlatNo__c', 'Floor__c', 'City__c', 'Country__c', 
+                   'PostalCode__c', 'Street', 'PKey__c']
+# Modify child_df by explicitly creating a new DataFrame
+child_df = df[child_columns].copy()  # Add .copy() to create an explicit copy
+
+# Now create the 'Name' column without any warnings
+child_df['Name'] = (
+    child_df['Floor__c'].astype(str) + '-' +
+    child_df['FlatNo__c'].astype(str) + '-' +
+    child_df['Extension__c'].astype(str)
+)
+
+# Rename columns to match the desired format
+child_df.columns = ['Extension', 'Flat', 'Floor', 'City', 'Country',
+                     'PostalCode', 'Street', 'PKey__c', 'Name']
+
+child_df = child_df.drop_duplicates(subset=['Extension', 'Flat', 'Floor','City', 'Country', 'PostalCode', 'Street'], keep='first')
+
+child_df = child_df.drop('Country', axis=1)
+child_df = child_df.drop('PostalCode', axis=1)
+child_df = child_df.drop('City', axis=1)
+child_df = child_df.drop('Street', axis=1)
+
+child_df['DuplicateCheck__c'] = 'false'
+child_df['IsInventoryLocation'] = 'false'
+child_df['IsMobile'] = 'false'
+child_df['LocationType'] = 'Site'
+
+
+# Write each DataFrame to a separate CSV file
+address_df.to_csv('../3/Address.csv', index=False)
+parent_df.to_csv('../3/Location.csv', index=False)
+child_df.to_csv('../5/Location.csv', index=False)
+
+print('Data has been successfully split into Address.csv, Parent_Location.csv, and Child_Location.csv files with duplicate checks applied.')