Source code for pysyrev.network
import networkx as nx
import pandas as pd
from pysyrev.bibdata import BibDataset
from pysyrev.core.config import BibNetworkConfig, BibNetworkExportConfig
from pysyrev.core.network import build_coupling_graph, build_cocitation_graph
[docs]
class BibNetwork:
"""Network analysis built from a BibDataset.
Two graph types are available:
Bibliographic coupling — documents as nodes, linked when they share
common references. See build_coupling_network().
Co-citation — references as nodes, linked when they appear together
in the reference list of at least one document. Resolved references
that exist in the corpus are marked node_type='internal'; unresolved
external references are node_type='external'. See build_cocitation_network().
Both graphs can coexist on the same BibNetwork instance.
Usage
-----
net = BibNetwork(bib_dataset)
net.build_coupling_network()
net.build_cocitation_network()
G_coupling = net.coupling_graph
G_cocit = net.cocitation_graph
"""
_coupling_config = None
_cocitation_config = None
_export_config = None
_doc_dataset = None
_coupling_graph = None
_cocitation_graph = None
def __init__(self, bib_dataset=None):
self._dataset = bib_dataset
# ------------------------------------------------------------------ #
# Bibliographic coupling #
# ------------------------------------------------------------------ #
[docs]
def build_coupling_network(
self,
use_resolved: bool = True,
use_unresolved: bool = True,
min_shared: int = 1,
) -> 'BibNetwork':
"""Build (or rebuild) the bibliographic coupling graph.
Parameters
----------
use_resolved : bool
Use resolved internal reference IDs (requires resolve_references()
to have been called on the source dataset).
use_unresolved : bool
Use raw unresolved reference strings.
min_shared : int
Minimum shared references to add an edge between two documents.
"""
self._coupling_graph = build_coupling_graph(
self._dataset.dataset,
use_resolved=use_resolved,
use_unresolved=use_unresolved,
min_shared=min_shared,
)
return self
@property
def coupling_graph(self):
if self._coupling_graph is None:
raise ValueError(
"Coupling graph not built yet — call build_coupling_network() first."
)
return self._coupling_graph
# ------------------------------------------------------------------ #
# Co-citation #
# ------------------------------------------------------------------ #
[docs]
def build_cocitation_network(
self,
use_resolved: bool = True,
use_unresolved: bool = True,
min_cocitations: int = 1,
) -> 'BibNetwork':
"""Build (or rebuild) the co-citation graph.
Parameters
----------
use_resolved : bool
Include resolved internal reference IDs as co-citation nodes.
use_unresolved : bool
Include unresolved raw reference strings as co-citation nodes.
min_cocitations : int
Minimum co-citation count to add an edge between two references.
"""
self._cocitation_graph = build_cocitation_graph(
self._dataset.dataset,
use_resolved=use_resolved,
use_unresolved=use_unresolved,
min_cocitations=min_cocitations,
)
return self
@property
def cocitation_graph(self):
if self._cocitation_graph is None:
raise ValueError(
"Co-citation graph not built yet — call build_cocitation_network() first."
)
return self._cocitation_graph
# ------ bridge from configuration ---------------------------------
[docs]
@classmethod
def from_config(cls, config: BibNetworkConfig) -> 'BibNetwork':
instance = cls()
instance._doc_dataset = config.doc_dataset
instance._coupling_config = config.coupling_network
instance._cocitation_config = config.cocitation_network
instance._export_config = config.export
return instance
# ------ runtime ---------------------------------------------------
[docs]
def run(self, dataset: pd.DataFrame = None) -> 'BibNetwork':
"""Build coupling and co-citation graphs.
Parameters
----------
dataset : pd.DataFrame, optional
Reviewed-included dataset. If None, loaded from ``doc_dataset``
(set via config or auto-detected by Config.load).
"""
if dataset is not None:
self._dataset = BibDataset(bib_dataset=dataset)
elif self._dataset is None:
if not self._doc_dataset:
raise ValueError(
"No dataset provided: pass a DataFrame to run() or set "
"doc_dataset in the bib_network section of your config."
)
self._dataset = BibDataset(bib_dataset=pd.read_csv(self._doc_dataset))
cfg = self._coupling_config
self.build_coupling_network(
use_resolved = cfg.use_resolved if cfg else True,
use_unresolved = cfg.use_unresolved if cfg else True,
min_shared = cfg.min_shared if cfg else 1,
)
cfg = self._cocitation_config
self.build_cocitation_network(
use_resolved = cfg.use_resolved if cfg else True,
use_unresolved = cfg.use_unresolved if cfg else True,
min_cocitations = cfg.min_cocitations if cfg else 1,
)
return self
[docs]
def save(self, export_config: BibNetworkExportConfig = None) -> 'BibNetwork':
"""Export coupling and co-citation graphs to GraphML files.
GraphML is compatible with Gephi, Cytoscape, and networkx.
``shared_refs`` edge sets are serialised as semicolon-separated strings.
Falls back to the export config provided at construction time (from YAML).
"""
cfg = export_config or self._export_config
if cfg is None:
raise ValueError(
"No export config: set bib_network.export in your config or "
"pass a BibNetworkExportConfig to save()."
)
cfg.resolve()
if self._coupling_graph is not None:
graph = self._coupling_graph.copy()
for _, _, data in graph.edges(data=True):
if isinstance(data.get('shared_refs'), set):
data['shared_refs'] = '; '.join(sorted(data['shared_refs']))
nx.write_graphml(graph, cfg.coupling_graph)
if self._cocitation_graph is not None:
nx.write_graphml(self._cocitation_graph, cfg.cocitation_graph)
return self
# ------------------------------------------------------------------ #
# Generic stats #
# ------------------------------------------------------------------ #
@property
def n_coupling_nodes(self) -> int:
return self._coupling_graph.number_of_nodes() if self._coupling_graph is not None else 0
@property
def n_coupling_edges(self) -> int:
return self._coupling_graph.number_of_edges() if self._coupling_graph is not None else 0
@property
def n_cocitation_nodes(self) -> int:
return self._cocitation_graph.number_of_nodes() if self._cocitation_graph is not None else 0
@property
def n_cocitation_edges(self) -> int:
return self._cocitation_graph.number_of_edges() if self._cocitation_graph is not None else 0