Source code for pysyrev.network

import networkx as nx
import pandas as pd

from pysyrev.bibdata import BibDataset
from pysyrev.core.config import BibNetworkConfig, BibNetworkExportConfig
from pysyrev.core.network import build_coupling_graph, build_cocitation_graph


[docs] class BibNetwork: """Network analysis built from a BibDataset. Two graph types are available: Bibliographic coupling — documents as nodes, linked when they share common references. See build_coupling_network(). Co-citation — references as nodes, linked when they appear together in the reference list of at least one document. Resolved references that exist in the corpus are marked node_type='internal'; unresolved external references are node_type='external'. See build_cocitation_network(). Both graphs can coexist on the same BibNetwork instance. Usage ----- net = BibNetwork(bib_dataset) net.build_coupling_network() net.build_cocitation_network() G_coupling = net.coupling_graph G_cocit = net.cocitation_graph """ _coupling_config = None _cocitation_config = None _export_config = None _doc_dataset = None _coupling_graph = None _cocitation_graph = None def __init__(self, bib_dataset=None): self._dataset = bib_dataset # ------------------------------------------------------------------ # # Bibliographic coupling # # ------------------------------------------------------------------ #
[docs] def build_coupling_network( self, use_resolved: bool = True, use_unresolved: bool = True, min_shared: int = 1, ) -> 'BibNetwork': """Build (or rebuild) the bibliographic coupling graph. Parameters ---------- use_resolved : bool Use resolved internal reference IDs (requires resolve_references() to have been called on the source dataset). use_unresolved : bool Use raw unresolved reference strings. min_shared : int Minimum shared references to add an edge between two documents. """ self._coupling_graph = build_coupling_graph( self._dataset.dataset, use_resolved=use_resolved, use_unresolved=use_unresolved, min_shared=min_shared, ) return self
@property def coupling_graph(self): if self._coupling_graph is None: raise ValueError( "Coupling graph not built yet — call build_coupling_network() first." ) return self._coupling_graph # ------------------------------------------------------------------ # # Co-citation # # ------------------------------------------------------------------ #
[docs] def build_cocitation_network( self, use_resolved: bool = True, use_unresolved: bool = True, min_cocitations: int = 1, ) -> 'BibNetwork': """Build (or rebuild) the co-citation graph. Parameters ---------- use_resolved : bool Include resolved internal reference IDs as co-citation nodes. use_unresolved : bool Include unresolved raw reference strings as co-citation nodes. min_cocitations : int Minimum co-citation count to add an edge between two references. """ self._cocitation_graph = build_cocitation_graph( self._dataset.dataset, use_resolved=use_resolved, use_unresolved=use_unresolved, min_cocitations=min_cocitations, ) return self
@property def cocitation_graph(self): if self._cocitation_graph is None: raise ValueError( "Co-citation graph not built yet — call build_cocitation_network() first." ) return self._cocitation_graph # ------ bridge from configuration ---------------------------------
[docs] @classmethod def from_config(cls, config: BibNetworkConfig) -> 'BibNetwork': instance = cls() instance._doc_dataset = config.doc_dataset instance._coupling_config = config.coupling_network instance._cocitation_config = config.cocitation_network instance._export_config = config.export return instance
# ------ runtime ---------------------------------------------------
[docs] def run(self, dataset: pd.DataFrame = None) -> 'BibNetwork': """Build coupling and co-citation graphs. Parameters ---------- dataset : pd.DataFrame, optional Reviewed-included dataset. If None, loaded from ``doc_dataset`` (set via config or auto-detected by Config.load). """ if dataset is not None: self._dataset = BibDataset(bib_dataset=dataset) elif self._dataset is None: if not self._doc_dataset: raise ValueError( "No dataset provided: pass a DataFrame to run() or set " "doc_dataset in the bib_network section of your config." ) self._dataset = BibDataset(bib_dataset=pd.read_csv(self._doc_dataset)) cfg = self._coupling_config self.build_coupling_network( use_resolved = cfg.use_resolved if cfg else True, use_unresolved = cfg.use_unresolved if cfg else True, min_shared = cfg.min_shared if cfg else 1, ) cfg = self._cocitation_config self.build_cocitation_network( use_resolved = cfg.use_resolved if cfg else True, use_unresolved = cfg.use_unresolved if cfg else True, min_cocitations = cfg.min_cocitations if cfg else 1, ) return self
[docs] def save(self, export_config: BibNetworkExportConfig = None) -> 'BibNetwork': """Export coupling and co-citation graphs to GraphML files. GraphML is compatible with Gephi, Cytoscape, and networkx. ``shared_refs`` edge sets are serialised as semicolon-separated strings. Falls back to the export config provided at construction time (from YAML). """ cfg = export_config or self._export_config if cfg is None: raise ValueError( "No export config: set bib_network.export in your config or " "pass a BibNetworkExportConfig to save()." ) cfg.resolve() if self._coupling_graph is not None: graph = self._coupling_graph.copy() for _, _, data in graph.edges(data=True): if isinstance(data.get('shared_refs'), set): data['shared_refs'] = '; '.join(sorted(data['shared_refs'])) nx.write_graphml(graph, cfg.coupling_graph) if self._cocitation_graph is not None: nx.write_graphml(self._cocitation_graph, cfg.cocitation_graph) return self
# ------------------------------------------------------------------ # # Generic stats # # ------------------------------------------------------------------ # @property def n_coupling_nodes(self) -> int: return self._coupling_graph.number_of_nodes() if self._coupling_graph is not None else 0 @property def n_coupling_edges(self) -> int: return self._coupling_graph.number_of_edges() if self._coupling_graph is not None else 0 @property def n_cocitation_nodes(self) -> int: return self._cocitation_graph.number_of_nodes() if self._cocitation_graph is not None else 0 @property def n_cocitation_edges(self) -> int: return self._cocitation_graph.number_of_edges() if self._cocitation_graph is not None else 0