You are on page 1of 9

import config from "..

/config/globalConfig";
import { GcpData } from "../constants/gcpConstants";
import { addErrorLog } from "../fireBase/logHandler";
import { getFilesCBResponse } from "../model/common.interface";
import { findFile } from "./assetUtils";
import { removeTags,decodeHtmlEntities } from "./commonUtils";

const { Storage } = require("@google-cloud/storage");


const convert = require("xml-js");

const { bucketName = "" } = GcpData;


const storage = new Storage();

export const filterNumberFolderNames = async (folderNames: string[]) => {


try {
let filterValidFolderNames = folderNames
?.map((folderPath: string) => {
let splittedPath = folderPath.split("/");
let folderName: string = splittedPath.length > 1 ?
splittedPath[splittedPath.length - 2] : "";

return Number.isNaN(parseInt(folderName)) ? false : folderName;


})
.filter((fileName) => fileName != false);
return filterValidFolderNames;
} catch (error: any) {
await addErrorLog(error.message, "filterNumberFolderNames");
return [];
}
};

export const fetchFolderNames = async (path: string) => {


try {
const bucket = await storage.bucket(bucketName);
return bucket
.getFiles({
prefix: path,
delimiter: "/",
autoPaginate: false,
})
.then(async ([files, nextQuery, apiResponse]: getFilesCBResponse) => {
const folderNames = await filterNumberFolderNames(apiResponse.prefixes);
return folderNames;
})
.catch((err: any) => {
throw new Error(err.message);
});
} catch (error: any) {
await addErrorLog(error.message, "fetchFolderNames");
return [];
}
};
export const fetchFolderFiles = async (path: string, fileType: string, ID?: string)
=> {
try {
const bucket = await storage.bucket(bucketName);
return bucket
.getFiles({
prefix: path,
delimiter: "/",
autoPaginate: false,
})
.then(([files, nextQuery, apiResponse]: getFilesCBResponse) => {
return findFile(files, fileType, ID);
})
.catch((err: any) => {
throw new Error(err.message);
});
} catch (error: any) {
await addErrorLog(error?.message, "fetchFolderFiles");
return fileType == "toim" ? "" : fileType == "Ar" ? [] : fileType == "pg" ?
"" : fileType == "img" ? [] : "";
}
};

export const getContentFromXmlFile = async (path: string, compactCheck: boolean) =>


{
try {
const storage = new Storage();
const bucket = await storage.bucket(bucketName);
const toimFile = bucket.file(path);
const [contents] = await toimFile.download();
// Convert the buffer to a string (assuming it's UTF-8 encoded)
const xmlString = contents.toString("utf-8");
var result = convert.xml2json(xmlString, {
compact: compactCheck,
spaces: 4,
trim: true,
textKey: "text",
attributesKey: "attributes",
textFn: function (val: string) {
return removeTags(val);
},
});
return JSON.parse(result);
} catch (error: any) {
await addErrorLog(error?.message, "getContentFromXmlFile");

return "";
}
};
export async function doesFolderPathExist(folderPath: string) {
try {
const [files] = await storage.bucket(bucketName).getFiles({
prefix: folderPath,
delimiter: "/",
});
return files.length > 0 ? true : false;
} catch (error: any) {
await addErrorLog(`Error checking folder path existence:${error?.message}`,
"doesFolderPathExist");

return false;
}
}

export const getBufferOfImage = async (imageUrl: string) => {


try {
const storage = new Storage();
const [file] = await storage.bucket(bucketName).file(imageUrl).download();
return file; //buffer
} catch (err: any) {
await addErrorLog(`Error checking folder path existence:${err?.message}`,
"getBufferOfImage");
return [];
}
};

const result: { [key: string]: any } = {};

export const getContentFromJsonFile = async (path: string, compactCheck: boolean)


=> {
try {
const storage = new Storage();
const bucket = await storage.bucket(bucketName);
const toimFile = bucket.file(path);
const [files] = await bucket.getFiles({ prefix: path});
interface ArticleItem {
ColumnTitle: string;
TagName: string;
ArticlePhotograph: string;
}

interface DayItem {
PageName: string;
PageTitle: string;
DisplayPageNumber: string;
Articles: Record<string, ArticleItem[]>;
}

interface JsonContent {
DayIndex: Record<string, DayItem>;
}

const articleDataFetch: Record<string, any> = {};


const articlePhotoDataFetch: Record<string, any> = {};
const linkDataFetch: Record<string, any> = {};
const pageDataFect: Record<string, any> = {};
const resultArticles: Record<string, any> = {};
let mergedObject: Record<string, any> = [];

const promises = files.map(async (file : any) => {


const dayIndexfile = await bucket.file(file.name);
const [fileExists] = await dayIndexfile.exists();
if (fileExists) {
// File exists, proceed with download
const [contents] = await dayIndexfile.download();
const jsonContent = JSON.parse(contents);
for (const daykey in jsonContent.DayIndex) {
const dayItem = jsonContent.DayIndex[daykey];

// if(parseInt(dayItem.DisplayPageNumber)==6){

await
getAdvertisementDetails(jsonContent.DayIndex[daykey].PageName,path);
await getPagePhotosDetails(dayItem.PageName,path,'Page');
if (dayItem.Articles && dayItem.Articles.length > 0) {
for (const articlekey in dayItem.Articles) {
const articleItem: ArticleItem = dayItem.Articles[articlekey];
resultArticles[articlekey] = {
pageName:dayItem.PageName || '',
pageTitle:dayItem.PageTitle || '',
tagName:articleItem.TagName || '',
columnTitle: articleItem.ColumnTitle,
status: "Extracted",
pageNo: parseInt(dayItem.DisplayPageNumber) || '',
sourceId: 'Harns',
numberOfPages: parseInt(jsonContent.DayIndex.length.toString()),
editionName:jsonContent.EditionName,
};

articleDataFetch[articlekey] = await
getArticleDetails(jsonContent.DayIndex[daykey].Articles[articlekey].ArticleName,pat
h);
await
getArticlePhotos(jsonContent.DayIndex[daykey].Articles[articlekey].ArticleName,path
);
articlePhotoDataFetch['articlePhotographs'] = await
getArticlePhotoDetails(jsonContent.DayIndex[daykey].Articles[articlekey].ArticleNam
e,path);
// pageDataFect[articlekey] = await
getPageDetails(dayItem.PageName,path);
linkDataFetch[articlekey] = await
getLinkDetails(jsonContent.DayIndex[daykey].Articles[articlekey].ArticleName,path);
mergedObject.push(Object.assign(
{},
resultArticles[articlekey],
// pageDataFect[articlekey]?pageDataFect[articlekey]:'',
articleDataFetch[articlekey]?articleDataFetch[articlekey]:'',
articlePhotoDataFetch,
linkDataFetch[articlekey]?linkDataFetch[articlekey]:'',
));
}
}
// }
}
}
return mergedObject;
})
const result = await Promise.all(promises);
return result;
} catch (error: any) {
await addErrorLog(error?.message, "getContentFromJsonFile");
return "";
}
};

export const getLinkDetails = async (articleId: string, path:string ) => {


const storage = new Storage();
const bucket = await storage.bucket(bucketName);
const dateSplit = articleId.split("_");
var linkData : Record<string, any> = {};
path = path.replace('DayIndex', 'LinkJson');
const parts = articleId.split(/_/);
const firstPart = parts.slice(0, 3).join("_");
const file = await bucket.file(`${path}/${firstPart}_toim.json`);
const [fileExists] = await file.exists();

if (fileExists) {
const [contents] = await file.download();
const jsonContent = JSON.parse(contents);
for (const linkkey in jsonContent.linkJson) {
if (jsonContent.linkJson[linkkey].ArticleLink === articleId) {

const toAarticle =
jsonContent.linkJson[linkkey].FromLinkArticle.split("_");
linkData = {
"continuationTo": articleId,
"continuationFrom": jsonContent.linkJson[linkkey].FromLinkArticle,

"combinedArticleUrl":"/test/Harns/PublicationData/TOI/toim/"+dateSplit[2]+"/"+dateS
plit[1]+"/"+dateSplit[0]+"/"+parseInt(dateSplit[3])+"/
img/"+jsonContent.linkJson[linkkey].FromLinkArticle+"_"+articleId+".jpg"
};
}
}
}
return linkData;

export const getArticleDetails = async (articleName: string, path:string ) => {

const storage = new Storage();


const bucket = await storage.bucket(bucketName);
path = path.replace('DayIndex', 'ArticleJson');
var articleData : Record<string, any> = {};
const dateSplit = articleName.split("_");
var edISODate = new Date(parseInt(dateSplit[2]),parseInt(dateSplit[1])-
1,parseInt(dateSplit[0]));
//edISODate = await addHours(edISODate,5.5);
var endDate = await getEndDate(dateSplit[2]+"-"+dateSplit[1]+"-"+dateSplit[0]);
const file = await bucket.file(`${path}/${dateSplit[3]}/${articleName}.json`);
const [fileExists] = await file.exists();

if (fileExists) {
const [contents] = await file.download();
const jsonContent = JSON.parse(contents);
articleData = {
"articleTitle": jsonContent.ArticleTitle ?
decodeHtmlEntities(jsonContent.ArticleTitle) : "",
"articleBody" : jsonContent.ArticleBody ?
decodeHtmlEntities(jsonContent.ArticleBody) : "",
"id" : jsonContent.ArticleName ?
jsonContent.ArticleName.replace(/_toim/, '') : "",
"articleName" : jsonContent.ArticleName ?
jsonContent.ArticleName : "",
"articleAuthor" : jsonContent.ArticleAuthor?
jsonContent.ArticleAuthor: "",
"editionDate" :endDate,
"editionISODate" : edISODate.toISOString(),
"articleURL" : jsonContent.ArticleName
?`/test/Harns/PublicationData/TOI/toim/${dateSplit[2]}/${dateSplit[1]}/$
{dateSplit[0]}/${parseInt(dateSplit[3])}/img/${jsonContent.ArticleName}.jpg` : "",
"pageUrl" : jsonContent.ArticleName
?`/test/Harns/PublicationData/TOI/toim/${dateSplit[2]}/${dateSplit[1]}/$
{dateSplit[0]}/${parseInt(dateSplit[3])}/img/${dateSplit[0]}_${dateSplit[1]}_$
{dateSplit[2]}_${dateSplit[3]}_toim.jpg` : "",
// "totalContentCharacterCount" :
(jsonContent.ArticleTitle+jsonContent.ArticleBody).length,
"articleStoryDate" : jsonContent.articleStoryDate?
jsonContent.articleStoryDate: "",
}
}
return articleData;
}

export const getArticlePhotos = async (articleName: string, path:string ) => {


const storage = new Storage();
const bucket = await storage.bucket(bucketName);
const outputFolder = "test/Harns/PublicationData/TOI/toim";
path = path.replace('DayIndex', 'Article');
const dateSplit = articleName.split("_");
const file = await bucket.file(`${path}/${dateSplit[3]}/${articleName}.jpg`);
const [fileExists] = await file.exists();
if (fileExists) {
const [contents] = await file.download();
const sourceImageBuffer = Buffer.from(contents);
const outputPath = `${outputFolder}/${dateSplit[2]}/${dateSplit[1]}/$
{dateSplit[0]}/${parseInt(dateSplit[3])}/img/${articleName}.jpg`;
const outputBucket = storage.bucket(bucketName!);
const outputBlob = outputBucket.file(outputPath);
await outputBlob
.save(sourceImageBuffer)
.then((res: any) => {})
.catch((err: any) => {
throw new Error(err.message);
});
return `success - article image saved to ${outputPath}`;
}
return "";

export const getArticlePhotoDetails = async (articleName: string, path:string ) =>


{
const storage = new Storage();
const bucket = await storage.bucket(bucketName);
path = path.replace('DayIndex', 'ArticleJson');
var photoData : Record<string, any> = {};
const dateSplit = articleName.split("_");
const file = await bucket.file(`${path}/${dateSplit[3]}/${articleName}.json`);

const [fileExists] = await file.exists();


if (fileExists) {
const [contents] = await file.download();
const jsonContent = JSON.parse(contents);
if(jsonContent?.ArticlePhotographs &&
jsonContent.ArticlePhotographs.length>0){
for (const photokey in jsonContent.ArticlePhotographs) {
photoData[photokey] = {
"photograph": `/test/Harns/PublicationData/TOI/toim/${dateSplit[2]}/$
{dateSplit[1]}/${dateSplit[0]}/${parseInt(dateSplit[3])}/img/$
{jsonContent.ArticlePhotographs[photokey].Photograph}_toim.jpg`,
"prsImgGroup": jsonContent.ArticlePhotographs[photokey].PRS_ImgGroup,
"imageCaption":
jsonContent.ArticlePhotographs[photokey].ImageCaption,
};
await
getPagePhotosDetails(jsonContent.ArticlePhotographs[photokey].Photograph,path,'Phot
ographs');
}
}
}
return photoData;
}

export const getAdvertisementDetails = async (articleName: string, path:string )


=> {
const storage = new Storage();
const bucket = await storage.bucket(bucketName);
path = path.replace('DayIndex', 'PageJson');
var photoData : Record<string, any> = {};
const dateSplit = articleName.split("_");
const file = await bucket.file(`${path}/${articleName}.json`);
const [fileExists] = await file.exists();
if (fileExists) {
const [contents] = await file.download();
const jsonContent = JSON.parse(contents);
if(jsonContent?.PageContent && jsonContent.PageContent.length>0){
for (const adkey in jsonContent.PageContent) {
const dayItem = jsonContent.PageContent[adkey];
if (dayItem.Zones && dayItem.Zones.length > 0) {
for (const zonekey in dayItem.Zones) {
if(dayItem.Zones[zonekey].TagName === 'Advertisement'){
await
getPagePhotosDetails(dayItem.ArticleName.replace(/_toim(?!.*_toim)/,
''),path,'Advertisement');
}
}
}
}
}
}
return photoData;
}

export const getPagePhotosDetails = async (articleName: string, path:string,


folder:string ) => {
const storage = new Storage();
const bucket = await storage.bucket(bucketName);
const outputFolder = "test/Harns/PublicationData/TOI/toim";
const dateSplit = articleName.split("_");
if(folder == 'Page'){
path = path.replace('DayIndex', folder);
let file = await bucket.file(`${path}/${articleName}.jpg`);
const [fileExists] = await file.exists();
if (fileExists) {
const [contents] = await file.download();
const sourceImageBuffer = Buffer.from(contents);
const outputPath = `${outputFolder}/${dateSplit[2]}/${dateSplit[1]}/$
{dateSplit[0]}/${parseInt(dateSplit[3])}/img/${dateSplit[0]}_${dateSplit[1]}_$
{dateSplit[2]}_${dateSplit[3]}_toim.jpg`;
const outputBucket = storage.bucket(bucketName!);
const outputBlob = outputBucket.file(outputPath);
await outputBlob
.save(sourceImageBuffer)
.then((res: any) => {})
.catch((err: any) => {
throw new Error(err.message);
});
return `success - page image saved to ${outputPath}`;
}
return "";
}
else {
if(folder == 'Photographs')
path = path.replace('ArticleJson', folder);
else
path = path.replace('PageJson', folder);

let file = await bucket.file(`${path}/${dateSplit[3]}/$


{articleName}_toim.jpg`);
const [fileExists] = await file.exists();
if (fileExists) {
const [contents] = await file.download();
const sourceImageBuffer = Buffer.from(contents);
const outputPath = `${outputFolder}/${dateSplit[2]}/${dateSplit[1]}/$
{dateSplit[0]}/${parseInt(dateSplit[3])}/img/${articleName}_toim.jpg`;
const outputBucket = storage.bucket(bucketName!);
const outputBlob = outputBucket.file(outputPath);
await outputBlob
.save(sourceImageBuffer)
.then((res: any) => {})
.catch((err: any) => {
throw new Error(err.message);
});
return `success - page image saved to ${outputPath}`;
}
return "";
}

export async function getEndDate(date: any): Promise<string> {


const inputDate = new Date(date);
const options: Intl.DateTimeFormatOptions = { weekday: 'long', year: 'numeric',
month: 'long', day: 'numeric' };
const dateFormatter = new Intl.DateTimeFormat('en-US', options);
const formattedDate: string = dateFormatter.format(inputDate);
return formattedDate;
}
export default {
filterNumberFolderNames,
fetchFolderFiles,
getContentFromXmlFile,
doesFolderPathExist,
getBufferOfImage,
};

You might also like