This a javascript code for extract ISBN numbers From the Pdf Document.
Code:
var reISBNban=/(ISBN[\:\=\s][\s]*(?=[-0-9xX ]{13})(?:[0-9]+[- ]){3}[0-9]*[xX0-9])|(ISBN[\:\=\s][ ]*\d{9,10}[\d|x])/g;
var strExtban = "_Extracted_ISBN.pdf";
var strIntroban = "ISBN numbers extracted from document: ";
var strFinalban = "Total number of ISBN numbers extracted: " ;
ExtractFromDocument(reISBNban,strExtban,strIntroban,strFinalban);
function ExtractFromDocument(reMatch, strFileExt, strMessage1, strMessage2)
{
var chWordban, numWordsban;
var filenameban = this.path.replace(/\.pdf$/, strFileExt);
try {
var ReportDocban = new Report();
var Outban = new Object();
ReportDocban.writeText(strMessage1 + this.path);
ReportDocban.divide(1);
ReportDocban.writeText(" ");
for (var i = 0; i < this.numPages; i++)
{
numWordsban = this.getPageNumWords(i);
var PageTextban= "";
for (var j = 0; j < numWords; j++) {
var word = this.getPageNthWord(i,j,false);
PageText += word;
}
var strMatchesban = PageText.match(reMatch);
if (strMatchesban == null) continue;
for (j = 0; j < strMatches.length; j++) {
Out[strMatches[j]] = true;
}
}
var nTotalban = 0;
for (var prop in Out)
{
ReportDoc.writeText(prop);
nTotal++;
}
ReportDocban.writeText(" ");
ReportDocban.divide(1);
ReportDocban.writeText(strMessage2 + nTotal);
ReportDocban.save(
{
cDIPath: filename
});
}
catch(e)
{
app.alert("Processing error: "+e)
}
}
Bookmarks