ns_tableprep.py

import getpass
import io
import os
import sys
import pyproj
import psycopg2
import psycopg2.extras
import string
import re
import json
import datetime

#Arguments
# q:=xxxx.sql - run query
# qd:=xxxx/ - run all queries in directory
# u:=xxxx.json - update records based on borough_ref
# s:=xxxx.json - update records based on permission_id
# am:=xxxx.json - insert new major records
# at:=xxxx.json - insert new tree records
# ab:=xxxx.json - insert new bat mitigation records

def connectToDb():
  n = 'ldd'
  u = raw_input("Username:")
  pw = getpass.getpass("Password:")
  db = {}
  db['connect'] = psycopg2.connect("dbname='ldd' user='" + u + "' host='localhost' password='" + pw + "'")
  #dbConnect = psycopg2.connect("dbname='ldd' user='ldd' host='localhost' password=''")
  db['cursor'] = db['connect'].cursor(cursor_factory=psycopg2.extras.DictCursor)
  bng = pyproj.Proj(init='epsg:27700')
  wgs84 = pyproj.Proj(init='epsg:4326')
  return db

def checkAndCreateTable(tablename, tablefile, db):
  checkTableSql = "SELECT EXISTS (  SELECT 1  FROM   pg_tables  WHERE  schemaname = 'app_ldd'  AND tablename = '" + tablename + "' );"
  db['cursor'].execute(checkTableSql)
  checkTable = db['cursor'].fetchone()
  print str(checkTable[0]) + "\n"
  if not checkTable[0]:
    print "no table... createing"
    with open(tablefile, 'r') as f:
      sql=f.read().replace('\n', ' ')
    print sql
    db['cursor'].execute(sql)
    db['connect'].commit()
  else:
    print "table present"

def runQueryDirectory(queryDir, db):
  queries = os.listdir(queryDir)
  queries.sort()
  for q in queries:
    if re.search(".sql$", q):
      print "Running: " + q
      chk = runQuery(queryDir + q, db)
      #chk = True
      if chk:
        print "Executed successfully"
      else:
        print "Error with query"
    else:
      print q + " does not end with .sql -- skipping"

def runQuery(fn, db):
  with open(fn, 'r') as f:
    sql=f.read().replace('\n', ' ')
  print sql
  db['cursor'].execute(sql)
  db['connect'].commit()
  return True

def updateUsingJson(fn, db, updatefield):
  i = 0
  with open(fn) as f:
    dataIn = json.loads(f.read())
  for pa in dataIn:
    cs = "commentStats"
    #print "updating " + str(pa[updatefield])
    sql = ""
    if (cs in pa) or ("constraints" in pa) or (checkFloorspaceOverAmount(pa, 1)) or ("housing" in pa):
      i += 1
      sql = "UPDATE app_ldd.ns_base "
      sql += "SET is_in_conservationarea = " + testConstraints(pa, "Conservation Area")
    if cs in pa:
      sql += ", comments_total=" + pa[cs]['Comments Received'] + ", comments_objecting=" + pa[cs]['Objections'] +", comments_supporting=" + pa[cs]['Supporting']
    if 'housing' in pa:
      sql += ", change_to_housing = TRUE"
    if checkFloorspaceOverAmount(pa,1):
      sql += ", change_to_floorspace = TRUE"
    if len(sql) > 1:
      if updatefield == "borough_ref" and 'ref' in pa:
        sql += " WHERE " + updatefield + "='" + pa['ref'] + "'"
      if updatefield == "borough_ref" and 'borough_ref' in pa:
        sql += " WHERE " + updatefield + "='" + pa[updatefield] + "'"
      else:
        sql += " WHERE " + updatefield + "=" + str(pa[updatefield])
      db['cursor'].execute(sql)
  db['connect'].commit()
  print "updated " + str(i) + " records"

def testConstraints(pa, ss):
  rtn = "FALSE"
  if 'constraints' in pa:
    for c in pa['constraints']:
      motype = re.match(r".*" + ss + ".*", c['type'], re.I|re.M)
      moname = re.match(r".*" + ss + ".*", c['name'], re.I|re.M)
      if motype or moname:
        rtn = "TRUE"
  return rtn

def checkCritMet(pa, nstype):
  rtn = False
  if nstype == "tree" or nstype == "bat":
    rtn = True
  elif nstype == "major":
    fscrit = checkFloorspaceOverAmount(pa, 100)
    if ('housing' in pa) or fscrit:
      rtn = True
  return rtn

def insertNewJsonRecords(args, db, intype, nstype):
  with open(args[intype]) as f:
    dataIn = json.loads(f.read())
  print str(len(dataIn)) + " records found"
  for pa in dataIn:
    checkCrit = checkCritMet(pa, nstype) 
    if checkCrit:
      br1 = pa['ref']
      br2 = pa['ref'].replace('/','-')
      sqlCheck = "SELECT COUNT(permission_id) FROM app_ldd.ns_base WHERE borough_ref='" + br1 + "' OR borough_ref='" + br2 + "'"
      db['cursor'].execute(sqlCheck)
      check = db['cursor'].fetchone()
      print br2 + ": " + str(check[0])
      if check[0] == 0:
        ins = {'s':{}, 'n':{}} #s is for string (or quoted) values, n is for numeric (or unquoted) values
        ins['s']['source'] = "onlinePlanDb"
        ins['s']['borough_ref'] = br1
        if "bn" in args:
          ins['s']['borough_name'] = args['bn']
        ins['s']['descr'] = pa['descr'].replace("'","''")
        ins['s']['ns_type'] = nstype
        ins['s']['status_rc'] = determineStatus(pa)
        ins['n']['is_in_conservationarea'] = testConstraints(pa, "Conservation Area")
        ins['n']['existing_socialhousing'] = determineSocialHousing(pa, "Proposed")
        ins['n']['proposed_socialhousing'] = determineSocialHousing(pa, "Existing")
        tf = "textFields"
        if tf in pa:
          if ("siteArea" in pa[tf]) and ("siteAreaUnit" in pa[tf]):
            ins['n']['sitearea_total'] = determineSiteArea(pa) 
        cs = "commentStats"
        if cs in pa:
          ins['n']['comments_total'] = pa[cs]['Comments Received'] 
          ins['n']['comments_objecting'] = pa[cs]['Objections']
          ins['n']['comments_supporting'] = pa[cs]['Supporting']
        if 'permission_date' in pa:
          ins['s']['permission_date'] = processPermissionDate(pa['permission_date'])
        if 'geo' in pa:
          ins['n']['lat'] = pa['geo']['lat']
          ins['n']['lon'] = pa['geo']['lon']
        sql = setupJsonInsertQuery(ins)
        db['cursor'].execute(sql)
  sqlpointfix = "UPDATE app_ldd.ns_base SET the_geom_pt = ST_SetSRID(ST_MakePoint(lon,lat), 4326) WHERE the_geom_pt IS NULL;"
  db['cursor'].execute(sqlpointfix)
  db['connect'].commit()

def processPermissionDate(pdate):
  mo1 = re.match(r"[A-Za-z]+\s\d\d\s[A-Za-z]+\s\d\d\d\d", pdate, re.I|re.M)
  mo2 = re.match(r"[0-9]+\/[0-9]+\/[0-9]+", pdate, re.I|re.M)
  rtndate = ""
  if mo1: 
    d = datetime.datetime.strptime(pdate, '%a %d %b %Y')
  elif mo2:
    d = datetime.datetime.strptime(pdate, '%d/%m/%Y')
  rtndate = d.strftime('%Y-%m-%d')
  return rtndate

def checkFloorspaceOverAmount(pa, amount):
  fscrit = False
  fscat = ['existing', 'proposed']
  for fs in fscat:
    if 'floorspace' in pa:
      if fs in pa['floorspace']:
        if 'Total' in pa['floorspace'][fs]:
          if pa['floorspace'][fs]['Total'] > amount:
            fscrit = True
  return fscrit

def determineSocialHousing(pa, ht):
  rtn = 0
  h = 'housing'
  if h in pa:
    if ht in pa[h]:
      for k in list(pa[h][ht].keys()):
        mo = re.match(r"(.*ocial.*)", k, re.I|re.M)
        if mo:
          rtn = pa[h][ht][k]
  return rtn  

def determineStatus(pa):
  status = "UNKNOWN"
  print "###STARTING status###"
  if pa['status'] == "Awaiting decision" or pa['status'] == "Under consideration/assessment" or pa['status'] == "REGISTERED":
    status = "UNDECIDED"
  elif pa['status'] == "Withdrawn":
    status = "WITHDRAWN"
  elif "decision" in pa:
    matchstr = [r".*Grant.*", r".+ - GRANTED", r".+cceptable.*", r".+granted.*", r"Approv.+", r".+Auto Permit"]
    status = multiRegexCheck(pa, "decision", matchstr, status, "GRANTED")
    matchstr2 = [r".*Refuse.*", r".+ - REFUSED", r".+refused.*", r"Refusal.*"]
    status = multiRegexCheck(pa, "decision", matchstr2, status, "REFUSED")
    matchstr3 = [r".*Withdrawn.*", r".+withdrawn.*"]
    status = multiRegexCheck(pa, "decision", matchstr3, status, "WITHDRAWN")
    matchstr4 = [r"Completed.*"]
    status = multiRegexCheck(pa, "decision", matchstr4, status, "COMPLETED")
  return status  

def multiRegexCheck(pa, field, matchstr, failedOutcome, passedOutcome):
  rtn = failedOutcome
  for ms in matchstr:
    print "comparing " + ms + " to " + pa[field]
    mo = re.match(ms, pa[field], re.I|re.M)
    if mo:
      rtn = passedOutcome
  return rtn

def determineSiteArea(pa):
  tf = "textFields"
  baseArea = float(pa[tf]['siteArea'])
  areaUnit = pa[tf]['siteAreaUnit']
  finalArea = 0.0
  if areaUnit == "hectares" or areaUnit == "Hectares":
    finalArea = baseArea
  elif areaUnit == "sq.metres" or areaUnit == "Sq. metres":
    finalArea = baseArea/10000
  return finalArea

def setupJsonInsertQuery(ins):
  fields = ""
  values = ""
  for k in list(ins['s'].keys()):
    print "adding " + k  
    print "adding " + k + " : " + ins['s'][k] + " to query " 
    fields += k + ","
    values += "'" + ins['s'][k] + "',"
  for k in list(ins['n'].keys()):
    print "adding " + k  
    print "adding " + k + " : " + str(ins['n'][k]) + " to query " 
    fields += k + ","
    values += str(ins['n'][k]) + ","
  fld = fields[:-1]
  val = values[:-1]
  sql = "INSERT INTO app_ldd.ns_base (" + fld + ") VALUES (" + val + ");"
  return sql

if __name__ == "__main__":
  db = connectToDb()
  tablefile = sys.argv[1]
  checkAndCreateTable("ns_base", tablefile, db)
  arglen = len(sys.argv)
  args = {}
  for a in sys.argv:
    mo = re.match(r"(.+):=(.*)", a, re.M|re.I)
    if mo:
      an = mo.group(1)
      av = mo.group(2)
      args[an] = av
  if "s" in args:
    updateUsingJson(args['s'], db, "permission_id") # update from json using permission_id as comparison field (when would I use this??)
  if "u" in args:
    updateUsingJson(args['u'], db, "borough_ref") # update from json using borough_ref as comparison field
  if "q" in args:
    runQuery(args['q'], db) # runs a stated query
  if "qd" in args:
    runQueryDirectory(args['qd'], db)

## Set of json inserts from scraped records
  if "am" in args:
    insertNewJsonRecords(args, db, "am", "major") # insert 'major' records
  if "at" in args:
    insertNewJsonRecords(args, db, "at", "tree")  # insert tree records
  if "ab" in args:
    insertNewJsonRecords(args, db, "ab", "bat") # insert bat related records