please adapt both attached bash
doian.sh
SH 1.83KB
doipl.sh
SH 1.23KB
please adapt both attached bash scripts to add q#, [ q # is number comma ] sequentially before citation text with or without doi. all other remain same. for example_ q1, {1} Burdușel A-C, Gherasim O, Grumezescu AM, Mogoantă L, Ficai A, Andronescu E. Biomedical applications of silver nanoparticles: An up-to-date overview. Nanomaterials. 2018;8(9):681.

To adapt both bash scripts to include q# (where # is a sequential number followed by a comma) before the citation text, you can modify the printf statements in both scripts. Here's how you can do it:

doian.sh Modifications

bash
Copy
#!/bin/bash

# extract citations with or without dois
# issn: 2581-9933 | 2581-9178, 0976-5042 | 0976-5050

# Input and output file paths
input_file="/storage/emulated/0/ccbys/up/doi.txt"
output_file="/storage/emulated/0/ccbys/up/oudoi.txt"

# Extract and clean the required data
awk '
BEGIN {
    RS="<li>";  # Treat <li> tags as record separators
    FS="<div class=\"referenceLinks\">";  # Split each record by the end marker
    count = 1;  # Sequential counter
}
{
    entry = "";
    doi_number = "";

    # Match entries starting with <a name="ALPHANUMERIC-NUMBER"></a><strong>...</strong>
    if ($1 ~ /<a name="[A-Za-z0-9]+-[0-9]+"><\/a><strong>[0-9]+<\/strong>/) {
        entry = $1;
        gsub(/<[^>]*>/, "", entry);  # Remove all HTML tags
        gsub(/[ \t\n]+/, " ", entry);  # Replace multiple spaces or newlines with a single space
        gsub(/^[ \t]*[0-9]+[ \t]*/, "", entry);  # Remove leading numbers and spaces
        gsub(/^[ \t]+|[ \t]+$/, "", entry);  # Trim leading and trailing spaces
    }

    # Extract DOI number using a more compatible method
    if ($2 ~ /data-url="https:\/\/doi.org\//) {
        doi_number = $2;
        sub(/.*data-url="https:\/\/doi.org\//, "", doi_number);  # Remove prefix
        sub(/".*/, "", doi_number);  # Remove trailing quote and extra text
    }

    # Print the entry with the correct formatting
    if (entry != "") {
        if (doi_number != "") {
            printf "q%d,    {%d} %s https://chooser.crossref.org/?doi=%s ! https://crossmark.crossref.org/dialog?doi=%s\n", count, count++, entry, doi_number, doi_number;
        } else {
            printf "q%d,    {%d} %s\n", count, count++, entry;
        }
    }
}' "$input_file" > "$output_file"

if [[ ! -s "$output_file" ]]; then
    echo "Error: No data extracted. Please check the input file and patterns."
    exit 1
fi

echo "Data has been extracted to $output_file."

doipl.sh Modifications

bash
Copy
#!/bin/bash

# extract citations with or without dois
# issn: 2321-9971 | 2347-4688, 2394-0565 | 0973-3469, 2322-0007 | 2347-467X

# Input and output file paths
input_file="/storage/emulated/0/ccbys/up/doi.txt"
output_file="/storage/emulated/0/ccbys/up/oudoi.txt"

# Process the file
awk '
BEGIN {
    doi_regex = "https://doi.org/([0-9]+\\.[^\">]+)";
    buffer = "";
    count = 1;  # Sequential counter
}
{
    buffer = buffer $0;
    if ($0 ~ /<\/li>/) {
        line = buffer;
        buffer = "";

        doi = "";
        if (match(line, doi_regex, match_arr)) {
            doi = match_arr[1];
            gsub(doi_regex, "", line);
        }

        gsub(/<\/?[^>]+>/, "", line);
        gsub(/&amp;/, "&");
        gsub(/CrossRef/, "", line);
        gsub(/\xA0|\u200E|\u200F/, "");
        gsub(/^[ \t]+|[ \t]+$/, "", line);

        if (doi != "") {
            gsub("doi:" doi, "", line);
            gsub(/[ ]+/, " ");
            line = line " https://chooser.crossref.org/?doi=" doi " ! https://crossmark.crossref.org/dialog?doi=" doi;
        }

        if (length(line) > 0) {
            printf "q%d,    {%d} %s\n", count, count++, line;
        }
    }
}' "$input_file" > "$output_file"

echo "Processing completed. Output saved to $output_file."

Explanation of Changes:

  1. doian.sh:

    • The printf statement now includes q%d, before the citation text, where %d is the sequential number.

    • The count variable is used to generate the sequential number.

  2. doipl.sh:

    • Similarly, the printf statement now includes q%d, before the citation text.

    • The count variable is used to generate the sequential number.

These changes will ensure that each citation is prefixed with q# (e.g., q1,, q2,, etc.) followed by the citation text, whether or not it includes a DOI.

New chat
DeepThink (R1)
Search
AI-generated, for reference only