GEOG 6161 Capstone

Python Code for Automated QA Process

import arcpy
import os
from arcpy import env
import datetime
from datetime import date
import time
import pandas as pd
from arcgis.features import GeoAccessor, GeoSeriesAccessor
#Establish workspace - change peliWorkspace variable to location of data file
peliWorkspace = 'C:/Users/1hall/Documents/Capstone'
arcpy.env.workspace = peliWorkspace
arcpy.env.overwriteOutput = True
#create an geodatabase to house outputs
db_name = "PelicanSensorDataQA"
out_gdb = arcpy.CreateFileGDB_management(peliWorkspace, db_name)
print (out_gdb)
# Local variables:
peliData = r"C:\Users\1hall\Documents\Capstone\AWPE_ARGOS_20200121.csv"
PelicanLoc = r"C:\Users\1hall\Documents\Capstone\DataQA\DataQA.gdb\PelicanLoc"

# Process: XY Table To Point - convert csv to a new feature class in the out_gdb
#this command puts out an error in jupyter notebook,"ToolValidator' object has no attribute 'isLicensed", known bug, it still works so long a print statement prints
arcpy.XYTableToPoint_management(in_table=peliData, out_feature_class=PelicanLoc, x_field="Long", y_field="Lat", z_field="", coordinate_system="GEOGCS['GCS_WGS_1984',DATUM['D_WGS_1984',SPHEROID['WGS_1984',6378137.0,298.257223563]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]];-400 -400 1000000000;-100000 10000;-100000 10000;8.98315284119521E-09;0.001;0.001;IsHighPrecision")

print('csv to feature class complete')
#Adding fields to new FC: boolean field for altitude(BoolAlt), new text field to edit the altitude values (get rid of text), new display field for altitude (DispAlt), 
#boolean field for Date_time (BoolDT) and boolean field for location (BoolLoc), and several time fields to parce out Date_time to usable values
# Process: Add Fields (multiple)
arcpy.AddFields_management(in_table=PelicanLoc, field_description="RandNum LONG # 8 # #;AltNum TEXT # 8 # #;yyyy SHORT # 4 # #;MM SHORT # 2 # #;dd SHORT # 2 # #;time LONG # 6 # #;BoolAlt SHORT # 1 # #;DispAlt LONG # 10 # #;BoolDT SHORT # 1 # #;BoolLoc SHORT # 1 # #")

print ("New fields added")

# Process: Convert Time Field to yyyyMMddHHmmss - must use text type as field
arcpy.ConvertTimeField_management(in_table=PelicanLoc, input_time_field="Date_time", input_time_format="'Not Used'", output_time_field="Date_time_Converted", output_time_type="TEXT", output_time_format="yyyyMMddHHmmss")

print("New date time field added")
#This code block is set to normalize time data into usable fields and also to create a boolean field if the Date_time field was correct
# Process: Calculate Field - parse Date_time field for each component.  Only the hour is logged, so time is just the hour, no min or sec
arcpy.CalculateField_management(in_table=PelicanLoc, field="yyyy", expression="Year($feature.Date_time)", expression_type="ARCADE", code_block="")
arcpy.CalculateField_management(in_table=PelicanLoc, field="MM", expression="Month($feature.Date_time)", expression_type="ARCADE", code_block="")
arcpy.CalculateField_management(in_table=PelicanLoc, field="dd", expression="Day($feature.Date_time)", expression_type="ARCADE", code_block="")
arcpy.CalculateField_management(in_table=PelicanLoc, field="time", expression="Hour($feature.Date_time)", expression_type="ARCADE", code_block="")

# Process: Calculate Field - copy fields from time over to BoolDT field, then use those values to reclass the BoolDT field
arcpy.CalculateField_management(in_table=PelicanLoc, field="BoolDT", expression= "!time!", expression_type="PYTHON3", code_block="")

def Reclass(BoolDT):
    if (BoolDT > 0 ):
        return 0
    else:
        return 1

arcpy.CalculateField_management(in_table=PelicanLoc, field="BoolDT", expression="Reclass(!BoolDT!)", expression_type="PYTHON3", code_block="")
#This code block is for normalizing the altitude output from Argos - if using other sensor data the text values in the CalculateFieldManagement function will have to be replaced with meaningful entries
#AltNum is a copy of the alt field, all text values are deleted from this field 
#DispAlt is a numeric field with negative values corrected to 0
#BoolAlt is a field where 1 is non numeric values and 0 is for numeric values - this is used for filtering
#Start by copying the alt field to the AltNum field -then edit the AltNum field to replace all text with ""
arcpy.CalculateField_management(in_table=PelicanLoc, field="AltNum", expression= "!alt!", expression_type="PYTHON3", code_block="")
arcpy.CalculateField_management(in_table=PelicanLoc, field="AltNum", expression= "!AltNum!.replace('2D fix', '')", expression_type="PYTHON3", code_block="")
arcpy.CalculateField_management(in_table=PelicanLoc, field="AltNum", expression= "!AltNum!.replace('low alt', '')", expression_type="PYTHON3", code_block="")
arcpy.CalculateField_management(in_table=PelicanLoc, field="AltNum", expression= "!AltNum!.replace('neg alt', '')", expression_type="PYTHON3", code_block="")
#Copy numeric altitude values to the DispAlt field - this will put out a list of errors for the null values in the field
arcpy.CalculateField_management(in_table=PelicanLoc, field="DispAlt", expression= "!AltNum!", expression_type="PYTHON3", code_block="")

print('copied')
---------------------------------------------------------------------------
# Process: Select Layer By Attribute - filter the layer for DispAlt values less than 0
altselect = arcpy.SelectLayerByAttribute_management(in_layer_or_view=PelicanLoc, selection_type="NEW_SELECTION", where_clause="DispAlt < 0", invert_where_clause="")

#function - if the diplay value is less than 0, the function will update the value to 0
def disp_alt_corr(DispAlt):
    if (DispAlt < 0):
        return 0
    else:
        DispAlt == DispAlt


arcpy.CalculateField_management(in_table=altselect, field="DispAlt", expression= "disp_alt_corr(!DispAlt!)", expression_type="PYTHON3", code_block="")
# Process: Select Layer By Attribute - create selections for null and not null values in DispAlt
nnselect = arcpy.SelectLayerByAttribute_management(in_layer_or_view=PelicanLoc, selection_type="NEW_SELECTION", where_clause="DispAlt IS NOT NULL", invert_where_clause="")
inselect = arcpy.SelectLayerByAttribute_management(in_layer_or_view=PelicanLoc, selection_type="NEW_SELECTION", where_clause="DispAlt IS NULL", invert_where_clause="")

# Process: Calculate Field - use new selections to update BoolAlt field
arcpy.CalculateField_management(in_table=nnselect, field="BoolAlt", expression="0", expression_type="PYTHON3", code_block="")
arcpy.CalculateField_management(in_table=inselect, field="BoolAlt", expression="1", expression_type="PYTHON3", code_block="")
#this section starts the location validation part of the QA.  First step is creating a series of selections 
ptselect = arcpy.SelectLayerByAttribute_management(in_layer_or_view=PelicanLoc, selection_type="NEW_SELECTION", where_clause="ptt_id = 104263", invert_where_clause="")

i = 0
def numIt(RandNum):
    global i
    nStart = 1 #adjust start value, if req'd 
    nInterval = 1 #adjust interval value, if req'd
    if (i == 0): 
        i = nStart 
    else: 
        i = i + nInterval 
    return i

arcpy.CalculateField_management(in_table=ptselect, field="RandNum", expression="numIt(!RandNum!)", expression_type="PYTHON3", code_block="")
#Create a selection - for the pt_id and the RandNum is divisible by 5 (so roughly 20% of the data points)
ptid = 104263
ptselect = arcpy.SelectLayerByAttribute_management(in_layer_or_view=PelicanLoc, selection_type="NEW_SELECTION", where_clause="ptt_id = 104263", invert_where_clause="")
PelicanLoc_Select = r"C:\Users\1hall\Documents\Capstone\DataQA\DataQA.gdb\PelicanLoc_Select"
PelicanLoc_PointsToLine = r"C:\Users\1hall\Documents\Capstone\DataQA\DataQA.gdb\PelicanLoc_PointsToLine"
PelicanLoc_PointsToLine_32000Buff = r"C:\Users\1hall\Documents\Capstone\DataQA\DataQA.gdb\PelicanLoc_PointsToLine_32000Buff_2"

# Process: Select
arcpy.Select_analysis(in_features=PelicanLoc, out_feature_class=PelicanLoc_Select, where_clause="ptt_id = 104263 And MOD(RandNum, 5) = 0")

# Process: Points To Line - generate path for pelican based on time stamp
arcpy.PointsToLine_management(Input_Features=PelicanLoc_Select, Output_Feature_Class=PelicanLoc_PointsToLine, Line_Field="ptt_id", Sort_Field="Date_time_Converted", Close_Line="NO_CLOSE")

# Process: Buffer - 32000 m buffer around path
arcpy.Buffer_analysis(in_features=PelicanLoc_PointsToLine, out_feature_class=PelicanLoc_PointsToLine_32000Buff, buffer_distance_or_field="32000 Meters", line_side="FULL", line_end_type="ROUND", dissolve_option="ALL", dissolve_field="", method="PLANAR")


# Process: Select Layer By Location
locselect = arcpy.SelectLayerByLocation_management(in_layer=ptselect, overlap_type="WITHIN", select_features=PelicanLoc_PointsToLine_32000Buff, search_distance="", selection_type="NEW_SELECTION", invert_spatial_relationship="NOT_INVERT")

# Process: Calculate Field (4)
arcpy.CalculateField_management(in_table=locselect, field="BoolLoc", expression="'1'", expression_type="PYTHON3", code_block="")
#generate a timestring to append to the file
timestr = time.strftime("%Y%m%d-%H%M%S")
out_name = "pelidata_clean_" + timestr