const axios = require("axios");
const cheerio = require("cheerio");
const Promise = require("bluebird");
var URL = require('url');
function SplitCamelCaseWithAbbreviations(s){
return s.split(/([A-Z][a-z0-9]+)/).filter(function(e){return e}).join(' ');
}
function upperCaseFirstLetter(s){
return s.charAt(0).toUpperCase() + s.slice(1);
}
async function parsePage(url) {
const html = await axios.get(url);
const $ = await cheerio.load(html.data);
const urlInfo = URL.parse(url, true);
const pageTitle = ($("div.main-content .defaultText h1").first().text() || "").trim();
const lastPathSegment = urlInfo.path.split('/').pop();
const pageTitleFromURL = SplitCamelCaseWithAbbreviations(upperCaseFirstLetter(lastPathSegment));
const pageTitleToUse = ((pageTitle.length) ? pageTitle : pageTitleFromURL);
const publicGroupUID = 'b3d7ce68-11dc-4e12-a43f-e6f23892b482'
//remove page title from content
$("div.main-content .defaultText h1").first().remove();
$('.page-subnav').remove()
let mainContent = ($("div.main-content .defaultText").html() || "").trim();
let sidebarContent = ($("div.second-sidebar").html() || "").trim();
let subsection = ($('div.first-sidebar .subnav a.selected').html() || "").trim();
let mainsection = ($('div.mainnav #navbar a.selected').html() || "").trim();
let results = {
url: JSON.parse(JSON.stringify(urlInfo.href)), //convert url to generic object
pagename: lastPathSegment,
section: [mainsection,subsection].filter(Boolean).join('/'),
SiteResourceType: "UserCreatedPage",
Status: "Active",
QuickLink: urlInfo.path,
InheritPlacements: 1,
ModeOverride: "",
TemplateOverride: "",
ApplyNoIndex: "no",
ApplyNoFollow: "no",
ApplyNoArchive: "no",
languages : [{
"languageCode": "en",
"PageTitle": pageTitleToUse,
"Keywords": "",
"Description": ""
}],
PageZones:[],
}
results.PageZones.push({
zoneName: "main",
content: [{
isHTML: 1,
uid: "",
permissions: [{
functionname: "view",
allowed: "1",
groupUIDs:[publicGroupUID]
}],
languages : [{
languageCode: "en",
contentTitle: pageTitleToUse,
description: "",
rawContent: mainContent
}],
}]
});
if (pageTitleToUse.length) {
results.PageZones.push({
zoneName: "M",
content: [{
isHTML: 1,
uid: "",
permissions: [{
functionname: "view",
allowed: "1",
groupUIDs:[publicGroupUID]
}],
languages : [{
languageCode: "en",
contentTitle: `${pageTitleToUse} Header`,
description: "",
rawContent: `<ul><li><h2>${pageTitleToUse}</h2></li></ul>`
}],
}]
})
}
if (sidebarContent.length) {
results.PageZones.push({
zoneName: "N",
content: [{
isHTML: 1,
uid: "",
permissions: [{
functionname: "view",
allowed: "1",
groupUIDs:[publicGroupUID]
}],
languages : [{
languageCode: "en",
contentTitle: `${pageTitleToUse} Sidebar`,
description: "",
rawContent: sidebarContent
}],
}]
})
}
return results;
}
let results= await Promise.map(urls, async (item) => parsePage(item));
results.forEach(element => console.log(JSON.stringify(element)))