Coverage for src / graphable / parsers / csv.py: 97%

35 statements  

« prev     ^ index     » next       coverage.py v7.13.3, created at 2026-02-16 21:32 +0000

1from csv import reader as csv_reader 

2from io import StringIO 

3from logging import getLogger 

4from pathlib import Path 

5from typing import Any 

6 

7from ..graph import Graph 

8from ..registry import register_parser 

9from .utils import build_graph_from_data, is_path 

10 

11logger = getLogger(__name__) 

12 

13 

14@register_parser(".csv") 

15def load_graph_csv(source: str | Path, reference_type: type = str) -> Graph[Any]: 

16 """ 

17 Load a Graph from a CSV edge list. 

18 

19 Args: 

20 source: CSV string or path to a CSV file. 

21 reference_type: The type to cast the reference string to. 

22 

23 Returns: 

24 Graph: The loaded Graph instance. 

25 """ 

26 if is_path(source): 

27 with open(source, "r", newline="") as f: 

28 content = f.read() 

29 else: 

30 content = str(source) 

31 

32 f = StringIO(content.strip()) 

33 reader = csv_reader(f) 

34 

35 # Detect header 

36 first_row = next(reader, None) 

37 if not first_row: 

38 return Graph() 

39 

40 edges_data = [] 

41 # Check if first row is header 

42 if first_row == ["source", "target"]: 

43 # Standard header 

44 pass 

45 else: 

46 # No header, treat first row as data 

47 edges_data.append({"source": first_row[0], "target": first_row[1]}) 

48 

49 for row in reader: 

50 if len(row) >= 2: 

51 edges_data.append({"source": row[0], "target": row[1]}) 

52 

53 # Collect unique node IDs from edges 

54 node_ids = set() 

55 for edge in edges_data: 

56 node_ids.add(edge["source"]) 

57 node_ids.add(edge["target"]) 

58 

59 nodes_data = [{"id": nid} for nid in node_ids] 

60 

61 g = build_graph_from_data(nodes_data, edges_data, reference_type) 

62 logger.info(f"Loaded graph with {len(g)} nodes from CSV.") 

63 return g