Skip to content

Commit ace1ff2

Browse files
authored
Merge pull request #861 from OpenKnowledgeMaps/bugfix/separate-field-instead-of-merging-the-link-one
bugfix: separate field instead of merging the link one
2 parents fadade2 + 2b22d69 commit ace1ff2

8 files changed

Lines changed: 59 additions & 4 deletions

File tree

server/workers/base/src/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ def filter_duplicates(df, service, params):
251251
df["doi_duplicate"] = False
252252
df["has_relations"] = False
253253
df["link_duplicate"] = False
254+
df["pdf_link_candidates_from_duplicates"] = ""
254255
df["duplicates"] = df.apply(
255256
lambda x: ",".join([x["id"], x["duplicates"]])
256257
if len(x["duplicates"].split(",")) >= 1

server/workers/common/common/enrichment.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,9 @@ def apply_oa_state_improvements(df, anchor_idx, accumulator):
261261

262262
def apply_link_improvements(df, anchor_idx, all_links):
263263
"""
264-
Applies improvements for link to the anchor element.
264+
Applies improvements for link to the anchor element: set in
265+
pdf_link_candidates_from_duplicates column if there are any links
266+
from duplicates that can be used for PDF lookup.
265267
266268
Args:
267269
df: DataFrame with data
@@ -271,5 +273,20 @@ def apply_link_improvements(df, anchor_idx, all_links):
271273
if all_links:
272274
unique_links = deduplicate_links(all_links)
273275
if unique_links:
274-
merged_links = '; '.join(sorted(unique_links))
275-
df.loc[anchor_idx, 'link'] = merged_links
276+
anchor_link = get_anchor_field_value(df, anchor_idx, 'link')
277+
unique_links_without_anchor_link = [x for x in unique_links if x != anchor_link]
278+
279+
merged_links = '; '.join(sorted(unique_links_without_anchor_link))
280+
df.loc[anchor_idx, 'pdf_link_candidates_from_duplicates'] = merged_links
281+
282+
def get_anchor_field_value(df, anchor_idx, column_name):
283+
"""
284+
Returns the value of the given column for the anchor row, or None if
285+
the column is missing or the value is empty/NaN.
286+
"""
287+
if column_name not in df.columns:
288+
return None
289+
value = df.loc[anchor_idx, column_name]
290+
if pd.isna(value) or value == '':
291+
return None
292+
return value

server/workers/orcid/src/orcid_service.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ def enrich_metadata_with_base(self, params: Dict[str, str], metadata: pd.DataFra
400400
'relations', 'annotations', 'repo', 'source', 'volume', 'issue', 'page', 'issn',
401401
'citation_count', 'cited_by_wikipedia_count', 'cited_by_msm_count', 'cited_by_policies_count',
402402
'cited_by_patents_count', 'cited_by_accounts_count', 'cited_by_fbwalls_count',
403-
'merged_dois',
403+
'merged_dois', 'pdf_link_candidates_from_duplicates',
404404
'cited_by_feeds_count',
405405
'cited_by_gplus_count',
406406
'cited_by_rdts_count',

vis/js/dataprocessing/managers/DataManager.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import {
1414
getListLink,
1515
getOpenAccessLink,
1616
getOutlink,
17+
getPdfLinkCandidatesFromDuplicates,
1718
getValueOrZero,
1819
getVisibleMetric,
1920
isOpenAccess,
@@ -257,6 +258,9 @@ class DataManager {
257258
paper.oa_link = getOpenAccessLink(paper, this.config);
258259
paper.outlink = getOutlink(paper, this.config);
259260
paper.list_link = getListLink(paper, this.config, this.context);
261+
262+
paper.pdf_link_candidates_from_duplicates =
263+
getPdfLinkCandidatesFromDuplicates(paper);
260264
}
261265

262266
__parseComments(paper: any) {

vis/js/types/models/paper.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ export interface CommonPaperDataForAllIntegrations {
6868
zoomedY: number;
6969
zoomedWidth: number;
7070
zoomedHeight: number;
71+
72+
pdf_link_candidates_from_duplicates: string[] | null;
7173
}
7274

7375
export interface PubmedPaper extends CommonPaperDataForAllIntegrations {

vis/js/utils/data.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,25 @@ export const getListLink = (paper, config, context) => {
285285
return {};
286286
};
287287

288+
/**
289+
* Parses the paper's pdf link candidates from duplicates into an array of strings.
290+
*
291+
* @param {object} paper paper object
292+
*
293+
* @returns array of strings or null if no candidates are found
294+
*/
295+
export const getPdfLinkCandidatesFromDuplicates = (paper): string[] | null => {
296+
if (
297+
typeof paper.pdf_link_candidates_from_duplicates !== "string" ||
298+
!paper.pdf_link_candidates_from_duplicates
299+
) {
300+
return null;
301+
}
302+
303+
const links = paper.pdf_link_candidates_from_duplicates.split(";");
304+
return links.length > 0 ? links : null;
305+
};
306+
288307
/**
289308
* Parses the paper's authors string into an object array.
290309
*

vis/js/utils/usePdfLookup.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,12 @@ const usePdfLookup = (paper: Paper, serverUrl: string, service: string) => {
5050
let possiblePDFs = "";
5151
let fallbackUrl = "";
5252
if (service === "base") {
53+
let pdfLinkCandidatesFromDuplicates = null;
54+
55+
if ("pdf_link_candidates_from_duplicates" in paper) {
56+
pdfLinkCandidatesFromDuplicates = paper.pdf_link_candidates_from_duplicates as string[] | null;
57+
}
58+
5359
possiblePDFs =
5460
encodeURIComponent(paper.link) +
5561
";" +
@@ -59,6 +65,10 @@ const usePdfLookup = (paper: Paper, serverUrl: string, service: string) => {
5965
.split("; ")
6066
.map((x) => encodeURIComponent(x))
6167
.join("; ");
68+
69+
if (pdfLinkCandidatesFromDuplicates) {
70+
possiblePDFs += ";" + pdfLinkCandidatesFromDuplicates.map((x: string) => encodeURIComponent(x)).join("; ");
71+
}
6272
}
6373

6474
if (service === "openaire") {

vis/test/data/papers.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ const MOCK_COMMON_PAPER_DATA: CommonPaperDataForAllIntegrations = {
5555
zoomedY: 1,
5656
zoomedWidth: 1,
5757
zoomedHeight: 1,
58+
59+
pdf_link_candidates_from_duplicates: null,
5860
};
5961

6062
export const MOCK_BASE_PAPER_DATA: BasePaper = {

0 commit comments

Comments
 (0)