cancel
Showing results for 
Search instead for 
Did you mean: 

Head's Up! These forums are read-only. All users and content have migrated. Please join us at community.neo4j.com.

How to build dependency graphs effectively in neo4j with python?

BTW, my main code looks like this. Can anyone give me some advice to speed up the dependency graph construction?

Sincere thanks!

from neo4j import GraphDatabase
driver = GraphDatabase.driver(uri, auth=(user, password), max_connection_lifetime=3600 * 24 * 30, keep_alive=True)
url = 'https://registry.npmjs.org/'
pro_queue = queue.Queue() # pkg names to get information and store in neo4j
seen = set()  # record pkgs in the pro_queue

root_name = 'express'
pro_queue .put(root_name)
seen.add(root_name )
num = pro_queue.qsize()

while num > 0:
    for i in range(num):
        pkg_json = requests.get(url).json()
        parent_name = pkg_json['name']
        with driver.session() as session:
             session.write_transaction(create_basic, pkg_json)  #create node, add its information in 'pkg_json'
             for child_name in pkg_json['dependencies']:
                 session.write_transaction(create_dependency, parent_name, child_name)
                 if child_name not in seen and dependency_id not check_neo4j(child_name):
                     pro_queue.put(child_name)
                     seen.add(child_name)
    num = pro_queue.qsize()

Two functions :

def create_basic(tx, pkg_json):
    pkg_id = pkg_json['id']
	pkg_name = pkg_json['name']
    query = (
        "MERGE (p1:Pkg{ id: $pkg_id }) SET p1.name = $pkg_name "
        "RETURN p1"
    )
    result = tx.run(query, pkg_id=pkg_id, pkg_name=pkg_name)
    try:
        return [{"pkg ": record["p1"]["id"]}
                for record in result]
    except ServiceUnavailable as exception:
        logging.error("CREATE BASIC- {query} raised an error: \n {exception}".format(
            query=query, exception=exception))
        raise


def _create_dependency(tx, parent_name, child_name):
    query = (
        "MATCH (p1:Pkg{name:$parent_name}) "
        "MERGE(p2:Pkg{name:$child_name}) "
        "MERGE (p1)-[depend:DependOn]->(p2) "
        "RETURN p1,p2"
    )
    result = tx.run(query, parent_name = parent_name, child_name = child_name)
    try:
        return [{"pkg": record["p1"]["id"], 'dependcy': record["p2"]["id"]}
                for record in result]
    except ServiceUnavailable as exception:
        logging.error("CREATE DEPENDENCY- {query} raised an error: \n {exception}".format(
            uery=query, exception=exception))
        raise
0 REPLIES 0
Nodes 2022
Nodes
NODES 2022, Neo4j Online Education Summit

All the sessions of the conference are now available online