Head's Up! These forums are read-only. All users and content have migrated. Please join us at community.neo4j.com.
02-13-2021 05:11 AM
Hi everyone,
I'm trying to load in some files using the Neo4j python driver. the problem is that it skips over creating some nodes without throwing an error. It creates all the patient nodes but then doesnt create encounter nodes.
Does it overwrite my commit? or where am I going wrong?
I'm using the open healthcare Synthea csv dataset
My code:
from neo4j import GraphDatabase
import csv
from datetime import datetime, date
from math import floor
import pandas as pd
import os
csvprefix = "synthea_sample_data_csv_apr2020/csv/"
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "Neo4ja"))
def create_patient(tx, patient):
patient = list(patient)
id = patient[0]
birthday = str(patient[1])
agegroup = calc_agegroup(patient[1])
gender = patient[14]
#print(f"create(p =(Patient {{ id:'{id}', birthday: {birthday} }})-[:is_gender]->({gender})) create(p)-[:in_agegroup]-({agegroup})")
print(f"CREATE(p:Patient) SET p.id = {id} SET p.DOB = date({birthday})")
try:
result = tx.run(f"CREATE(p:Patient) SET p.id = '{id}' SET p.DOB = date('{birthday}') \
MERGE (a:Ages_{agegroup}) \
MERGE (g:Gender_{gender}) \
CREATE (p)-[:in_agegroup]->(a) \
CREATE (p)-[:has_gender]->(g) \
")
except Exception as e:
print("failed "+ patient[0])
print(e)
return 0
return 1
def create_encounter(tx, row):
#print(row)
Id,START,STOP,PATIENT,ORGANIZATION,PROVIDER,PAYER,ENCOUNTERCLASS,CODE,DESCRIPTION,BASE_ENCOUNTER_COST,TOTAL_CLAIM_COST,PAYER_COVERAGE,REASONCODE,REASONDESCRIPTION = row
try:
#MATCH (e2:Encounter) ORDER BY 'stop' RETURN e2 LIMIT 1) \ MATCH (p:Patient {{id: "{PATIENT}"}})-[:had_encounter]-(e2:Encounter) return e2 ORDER BY e2.stop DESC limit 1 \
#with e2, e CREATE (e2)-[:next]->(e) \ WITH e MATCH (p:Patient {{id: "{PATIENT}"}}) \
result = tx.run(f'\
MATCH (p:Patient {{id: "{Id}"}})-[:had_encounter]->(:Encounter)-[r:to_latest]->(e2:Encounter) \
WITH p, r, e2 MERGE (e:Encounter {{name: "{Id}", start: "{START}", stop: "{STOP}", encounterclass: "{ENCOUNTERCLASS}", code: "{CODE}", description: "{DESCRIPTION}", reasoncode: "{REASONCODE}", reason: "{REASONDESCRIPTION}"}}) \
CREATE (p)-[:had_encounter]->(e) \
CREATE (e2)-[:to_latest]->(e) \
CREATE (e2)-[:next]->(e) \
DELETE r \
')
print(f'create encounter: {Id}')
except Exception as e:
print("failed "+ Id)
print(e)
return 0
return 1
def create_file(tx, cfile, relation=None):
path = os.path.abspath(cfile)
name = os.path.basename(path)
storeDate = date.today()
data = pd.read_csv(open(cfile), sep=None)
dimensions = f"{data.shape[0]} Rows, {data.shape[1]} Columns"
try:
result = tx.run(f"MERGE(f:File) SET f.name = '{name}' SET f.storeDate = date('{storeDate}') SET f.location = '{path}' SET f.dimensions = '{dimensions}' \
WITH f MATCH {relation} \
CREATE (p)-[:from_file]->(f) \
")
except Exception as e:
print("failed "+ cfile)
print(e)
return 0
return 1
def calc_agegroup(birthday):
today = date.today()
bday = datetime.strptime(birthday, '%Y-%m-%d')
age = today.year - bday.year - ((today.month, today.day) < (bday.month, bday.day))
lowerbound = (floor(age / 10) * 10)
upperbound = (lowerbound + 10)
return f"{lowerbound}_{upperbound}"
def main():
failed = []
with driver.session() as session:
with open(csvprefix + 'patients.csv', 'r') as f:
reader = csv.reader(f)
for num, row in enumerate(reader):
if num == 0:
pass
else:
#pass
#print(row)
create_file(session, csvprefix + 'patients.csv', "(p:Patient {id: '"+list(row)[0]+"'})")
result = create_patient(session, row)
if result == 0:
failed.append(row)
print(f"failed: {len(failed)}")
with open(csvprefix + 'encounters.csv', 'r') as f:
create_file(session, csvprefix + 'encounters.csv', "(e:Encounter {id: '"+list(row)[0]+"'})")
reader = csv.reader(f)
reader.__next__
for row in reader:
create_encounter(session, row)
#print(Id)
main()
driver.close()
Hope someone can teach me what I'm doing wrong.
Regards,
Julian
02-14-2021 02:49 PM
One possible issue (I'm not 100% sure), is if the Id is actually an integer in this python statement:
id = patient[0]
and some cypher, later you have this statement which looks like you're assigning an integer:
SET p.id = {id}
Then this MATCH will fail because it's explicitly a string:
MATCH (p:Patient {{id: "{Id}"}})
and a string isn't going to match an integer.
02-15-2021 12:11 AM
Hi,
Thank you for the reply, I don't think this is the issue though as id is a string of characters in all cases. I did try it but it just complained about an invalid literal in the match instead.
I'm really stumped on what is going wrong here, I don't get any errors but it just does not create the nodes.
02-15-2021 08:56 AM
Then, it could be the second MATCH has a quote within the quote.
I'd recommend doing a print statement of Id each time before you use it (or go into the debugger)
That is, do something like:
cypher = f"CREATE(p:Patient) SET p.id = '{id}' SET p.DOB = date('{birthday}') \
MERGE (a:Ages_{agegroup}) \
MERGE (g:Gender_{gender}) \
CREATE (p)-[:in_agegroup]->(a) \
CREATE (p)-[:has_gender]->(g) \
"
print(cypher)
try:
result = tx.run(cyphyer)
At least, then you can copy paste the Cypher statements and see if they work within the Neo4J browser. That way, you can separate out whether Cypher vs. neo4J driver is the cause of your problem.
All the sessions of the conference are now available online