347 lines
10 KiB
JavaScript
347 lines
10 KiB
JavaScript
require("dotenv").config();
|
|
const { Builder, By, until } = require("selenium-webdriver");
|
|
const chrome = require("selenium-webdriver/chrome");
|
|
const fs = require("fs");
|
|
|
|
function sleep(ms) {
|
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
}
|
|
|
|
async function initializeDriver() {
|
|
const options = new chrome.Options();
|
|
options.addArguments("--start-maximized");
|
|
options.addArguments("--no-sandbox");
|
|
options.addArguments("--disable-dev-shm-usage");
|
|
options.addArguments("--disable-blink-features=AutomationControlled");
|
|
options.addArguments(
|
|
"--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
|
);
|
|
return new Builder().forBrowser("chrome").setChromeOptions(options).build();
|
|
}
|
|
|
|
async function safeFindElement(element, by) {
|
|
try {
|
|
return await element.findElement(by);
|
|
} catch (error) {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
async function loginFacebook(driver, username, password, cookieFilePath) {
|
|
await driver.get("https://www.facebook.com/login");
|
|
try {
|
|
const emailInput = await driver.wait(
|
|
until.elementLocated(By.name("email")),
|
|
10000
|
|
);
|
|
await emailInput.clear();
|
|
await emailInput.sendKeys(username);
|
|
const passInput = await driver.wait(
|
|
until.elementLocated(By.name("pass")),
|
|
10000
|
|
);
|
|
await passInput.clear();
|
|
await passInput.sendKeys(password);
|
|
const loginBtn = await driver.wait(
|
|
until.elementLocated(By.name("login")),
|
|
10000
|
|
);
|
|
await loginBtn.click();
|
|
await sleep(5000);
|
|
const currentUrl = await driver.getCurrentUrl();
|
|
if (currentUrl.includes("login") || currentUrl.includes("checkpoint")) {
|
|
throw new Error("Login failed!");
|
|
}
|
|
await saveCookies(driver, cookieFilePath);
|
|
console.log("Auto login successful!");
|
|
return true;
|
|
} catch (e) {
|
|
console.log("Auto login error:", e.message);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
async function loadCookies(driver, cookieFilePath) {
|
|
try {
|
|
if (fs.existsSync(cookieFilePath)) {
|
|
const cookies = JSON.parse(fs.readFileSync(cookieFilePath, "utf8"));
|
|
await driver.get("https://www.facebook.com");
|
|
await sleep(2000);
|
|
for (const cookie of cookies) {
|
|
try {
|
|
await driver.manage().addCookie(cookie);
|
|
} catch (error) {
|
|
console.log(`Could not add cookie: ${cookie.name}`);
|
|
}
|
|
}
|
|
console.log("Cookies loaded successfully!");
|
|
await driver.navigate().refresh();
|
|
await sleep(3000);
|
|
return true;
|
|
}
|
|
} catch (error) {
|
|
console.log("Error loading cookies:", error.message);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
async function saveCookies(driver, cookieFilePath) {
|
|
try {
|
|
const cookies = await driver.manage().getCookies();
|
|
fs.writeFileSync(cookieFilePath, JSON.stringify(cookies, null, 2));
|
|
console.log("Cookies saved successfully!");
|
|
return true;
|
|
} catch (error) {
|
|
console.log("Error saving cookies:", error.message);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
async function getFirstPostBySelector(driver) {
|
|
const postSelectors = [
|
|
'[data-pagelet*="FeedUnit"]',
|
|
'[role="article"]',
|
|
'[data-testid="fbfeed_story"]',
|
|
".userContentWrapper",
|
|
"[data-ft]",
|
|
'div[data-pagelet="ProfileTimeline"]',
|
|
'[data-pagelet="ProfileTimeline"] > div > div',
|
|
'div[data-ad-preview="message"]',
|
|
];
|
|
let firstPost = null;
|
|
let usedSelector = "";
|
|
for (const selector of postSelectors) {
|
|
try {
|
|
const posts = await driver.findElements(By.css(selector));
|
|
if (posts.length > 0) {
|
|
firstPost = posts[0];
|
|
usedSelector = selector;
|
|
break;
|
|
}
|
|
} catch (error) {
|
|
continue;
|
|
}
|
|
}
|
|
if (!firstPost) {
|
|
console.log("No post found!");
|
|
return null;
|
|
}
|
|
try {
|
|
for (let i = 0; i < 3; i++) {
|
|
await driver.executeScript(
|
|
`
|
|
var post = arguments[0];
|
|
var btns = post.querySelectorAll('div[role="button"], button, span');
|
|
for (var j=0; j<btns.length; j++) {
|
|
if (btns[j].innerText && (btns[j].innerText.trim() === 'See more' || btns[j].innerText.trim() === 'Xem thêm')) {
|
|
btns[j].click();
|
|
}
|
|
}
|
|
`,
|
|
firstPost
|
|
);
|
|
await sleep(1200);
|
|
const innerHTML = await firstPost.getAttribute("innerHTML");
|
|
if (!innerHTML.includes("See more") && !innerHTML.includes("Xem thêm")) {
|
|
break;
|
|
}
|
|
}
|
|
} catch (e) {}
|
|
return { firstPost, usedSelector };
|
|
}
|
|
|
|
(async function main() {
|
|
let driver = await initializeDriver();
|
|
const cookieFilePath = "facebook_cookies.json";
|
|
const targetUrl =
|
|
process.argv[2] || "https://www.facebook.com/logisticsarena.bacib.tdtu";
|
|
const username = process.argv[4] || process.env.FB_USERNAME || "";
|
|
const password = process.argv[5] || process.env.FB_PASSWORD || "";
|
|
// const timeRetry = 5 * 60 * 1000; // 5 minutes
|
|
const timeRetry = 20 * 1000;
|
|
|
|
if (!username || !password) {
|
|
console.log(
|
|
"Missing username or password! Pass via argv or set FB_USERNAME, FB_PASSWORD environment variables."
|
|
);
|
|
process.exit(1);
|
|
}
|
|
|
|
let postSet = new Set();
|
|
let contentSet = new Set();
|
|
if (fs.existsSync("facebook_first_post.json")) {
|
|
try {
|
|
const oldData = JSON.parse(
|
|
fs.readFileSync("facebook_first_post.json", "utf8")
|
|
);
|
|
if (Array.isArray(oldData)) {
|
|
oldData.forEach((item) => {
|
|
if (item.postLink) postSet.add(item.postLink.split("?")[0]);
|
|
if (item.content) contentSet.add(item.content.trim());
|
|
});
|
|
} else {
|
|
if (oldData.postLink) postSet.add(oldData.postLink.split("?")[0]);
|
|
if (oldData.content) contentSet.add(oldData.content.trim());
|
|
}
|
|
} catch (e) {}
|
|
}
|
|
|
|
try {
|
|
let cookiesLoaded = await loadCookies(driver, cookieFilePath);
|
|
let isLoggedIn = false;
|
|
if (cookiesLoaded) {
|
|
try {
|
|
const currentUrl = await driver.getCurrentUrl();
|
|
if (
|
|
!currentUrl.includes("login") &&
|
|
!currentUrl.includes("checkpoint")
|
|
) {
|
|
isLoggedIn = true;
|
|
}
|
|
} catch (e) {}
|
|
}
|
|
if (!isLoggedIn) {
|
|
let loginSuccess = await loginFacebook(
|
|
driver,
|
|
username,
|
|
password,
|
|
cookieFilePath
|
|
);
|
|
if (!loginSuccess) {
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
while (true) {
|
|
await driver.get(targetUrl);
|
|
await sleep(5000);
|
|
const postResult = await getFirstPostBySelector(driver);
|
|
if (!postResult) {
|
|
console.log("No post found!");
|
|
} else {
|
|
const { firstPost, usedSelector } = postResult;
|
|
let content = "";
|
|
let contentElement = await safeFindElement(
|
|
firstPost,
|
|
By.css('div[data-ad-preview="message"]')
|
|
);
|
|
if (contentElement) content = await contentElement.getText();
|
|
if (!content) {
|
|
contentElement = await safeFindElement(
|
|
firstPost,
|
|
By.css(".userContent")
|
|
);
|
|
if (contentElement) content = await contentElement.getText();
|
|
}
|
|
if (!content) {
|
|
contentElement = await safeFindElement(
|
|
firstPost,
|
|
By.css('div[role="article"]')
|
|
);
|
|
if (contentElement) content = await contentElement.getText();
|
|
}
|
|
if (!content) {
|
|
contentElement = await safeFindElement(
|
|
firstPost,
|
|
By.css('div[data-testid="post_message"]')
|
|
);
|
|
if (contentElement) content = await contentElement.getText();
|
|
}
|
|
if (!content) {
|
|
content = await firstPost.getText();
|
|
}
|
|
if (!content) content = "None content";
|
|
|
|
let time = "";
|
|
try {
|
|
let timeElement = await safeFindElement(
|
|
firstPost,
|
|
By.css('a[aria-label][href*="/posts/"]')
|
|
);
|
|
time = timeElement ? await timeElement.getText() : "";
|
|
} catch (e) {
|
|
time = "";
|
|
}
|
|
let postLink = "";
|
|
try {
|
|
let linkElements = await firstPost.findElements(By.css("a"));
|
|
for (const el of linkElements) {
|
|
const href = await el.getAttribute("href");
|
|
if (
|
|
href &&
|
|
(href.includes("/posts/") ||
|
|
href.includes("/permalink/") ||
|
|
href.includes("fbid="))
|
|
) {
|
|
postLink = href;
|
|
break;
|
|
}
|
|
}
|
|
} catch (e) {}
|
|
let postLinkKey = postLink ? postLink.split("?")[0] : "";
|
|
let images = [];
|
|
try {
|
|
let imageElements = await firstPost.findElements(By.css("img"));
|
|
for (const img of imageElements) {
|
|
const src = await img.getAttribute("src");
|
|
if (
|
|
src &&
|
|
src.startsWith("http") &&
|
|
!src.includes("data:image") &&
|
|
!src.includes("static.xx")
|
|
) {
|
|
images.push(src);
|
|
}
|
|
}
|
|
} catch (e) {}
|
|
|
|
if (
|
|
(postLinkKey && postSet.has(postLinkKey)) ||
|
|
(content && contentSet.has(content.trim()))
|
|
) {
|
|
console.log("Post already fetched or content duplicated, skipping!");
|
|
} else {
|
|
let newPost = {
|
|
url: targetUrl,
|
|
selector_used: usedSelector,
|
|
content: content,
|
|
postLink: postLink,
|
|
images: images,
|
|
timestamp: new Date().toISOString(),
|
|
time: time,
|
|
};
|
|
let postsArr = [];
|
|
if (fs.existsSync("facebook_first_post.json")) {
|
|
try {
|
|
const oldData = JSON.parse(
|
|
fs.readFileSync("facebook_first_post.json", "utf8")
|
|
);
|
|
if (Array.isArray(oldData)) {
|
|
postsArr = oldData;
|
|
} else {
|
|
postsArr = [oldData];
|
|
}
|
|
} catch (e) {}
|
|
}
|
|
postsArr.push(newPost);
|
|
fs.writeFileSync(
|
|
"facebook_first_post.json",
|
|
JSON.stringify(postsArr, null, 2)
|
|
);
|
|
postSet.add(postLinkKey);
|
|
contentSet.add(content.trim());
|
|
console.log("Saved latest post to facebook_first_post.json");
|
|
}
|
|
}
|
|
await sleep(timeRetry);
|
|
}
|
|
} catch (error) {
|
|
console.error("Error:", error.message);
|
|
} finally {
|
|
setTimeout(async () => {
|
|
await driver.quit();
|
|
console.log("Browser closed");
|
|
}, 10000);
|
|
}
|
|
})();
|