diff --git a/README.md b/README.md index 09f50b2..47abf9c 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,8 @@ npx slack-archive --channel-types Comma-separated list of channel types to fetch messages from. (public_channel, private_channel, mpim, im) --exclude-channels Comma-separated list of channels to exclude, in automatic mode +--start-date Filter messages starting from this date (format: YYYY-MM-DD) +--end-date Filter messages up to and including this date (format: YYYY-MM-DD) --no-backup: Don't create backups. Not recommended. --no-search: Don't create a search file, saving disk space. --no-file-download: Don't download files. diff --git a/src/cli.ts b/src/cli.ts index d54d62d..e12ba83 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -19,6 +19,8 @@ import { EMOJIS_DATA_PATH, NO_SLACK_CONNECT, EXCLUDE_CHANNELS, + START_DATE, + END_DATE, } from "./config.js"; import { downloadExtras } from "./messages.js"; import { downloadMessages } from "./messages.js"; @@ -273,6 +275,33 @@ export async function main() { console.log(`Not connecting to Slack and skipping all Slack API calls`); } + if (START_DATE) { + const startDate = parseISO(START_DATE); + if (!isValid(startDate)) { + console.error(`Invalid start date format: ${START_DATE}. Please use YYYY-MM-DD format.`); + process.exit(-1); + } + console.log(`Filtering messages from date: ${START_DATE}`); + } + + if (END_DATE) { + const endDate = parseISO(END_DATE); + if (!isValid(endDate)) { + console.error(`Invalid end date format: ${END_DATE}. Please use YYYY-MM-DD format.`); + process.exit(-1); + } + console.log(`Filtering messages until date: ${END_DATE}`); + } + + if (START_DATE && END_DATE) { + const startDate = parseISO(START_DATE); + const endDate = parseISO(END_DATE); + if (startDate > endDate) { + console.error(`Start date (${START_DATE}) must be before end date (${END_DATE}).`); + process.exit(-1); + } + } + await getToken(); await createBackup(); diff --git a/src/config.ts b/src/config.ts index 67a52f8..c1585cd 100644 --- a/src/config.ts +++ b/src/config.ts @@ -45,6 +45,8 @@ export const FORCE_HTML_GENERATION = findCliParameter( "--force-html-generation" ); export const EXCLUDE_CHANNELS = getCliParameter("--exclude-channels"); +export const START_DATE = getCliParameter("--start-date"); +export const END_DATE = getCliParameter("--end-date"); export const BASE_DIR = process.cwd(); export const OUT_DIR = path.join(BASE_DIR, "slack-archive"); export const TOKEN_FILE = path.join(OUT_DIR, ".token"); diff --git a/src/messages.ts b/src/messages.ts index dae1f48..14d253b 100644 --- a/src/messages.ts +++ b/src/messages.ts @@ -11,11 +11,33 @@ import { getMessages } from "./data-load.js"; import { isThread } from "./threads.js"; import { downloadUser, getName } from "./users.js"; import { getWebClient } from "./web-client.js"; +import { START_DATE, END_DATE } from "./config.js"; +import { slackTimestampToJavaScriptTimestamp } from "./timestamp.js"; function isConversation(input: any): input is ConversationsHistoryResponse { return !!input.messages; } +function isMessageInDateRange(message: Message | ArchiveMessage): boolean { + if (!message.ts) return false; + + const messageTimestamp = slackTimestampToJavaScriptTimestamp(message.ts); + const messageDate = new Date(messageTimestamp); + + if (START_DATE) { + const startDate = new Date(START_DATE); + if (messageDate < startDate) return false; + } + + if (END_DATE) { + const endDate = new Date(END_DATE); + endDate.setHours(23, 59, 59, 999); // Include the entire end date + if (messageDate > endDate) return false; + } + + return true; +} + interface DownloadMessagesResult { messages: Array; new: number; @@ -37,11 +59,20 @@ export async function downloadMessages( } for (const message of await getMessages(channel.id)) { - result.messages.push(message); + if (isMessageInDateRange(message)) { + result.messages.push(message); + } } - const oldest = - result.messages.length > 0 ? parseInt(result.messages[0].ts || "0", 10) : 0; + // Calculate the oldest timestamp to start from + let oldest = result.messages.length > 0 ? parseInt(result.messages[0].ts || "0", 10) : 0; + + // If we have a start date, use it as the oldest timestamp + if (START_DATE && !result.messages.length) { + const startDate = new Date(START_DATE); + oldest = Math.floor(startDate.getTime() / 1000); + } + const name = channel.name || channel.id || channel.purpose?.value || "Unknown channel"; @@ -49,10 +80,19 @@ export async function downloadMessages( `Downloading messages for channel ${i + 1}/${channelCount} (${name})...` ).start(); - for await (const page of getWebClient().paginate("conversations.history", { + const params: any = { channel: channel.id, oldest, - })) { + }; + + // If we have an end date, use it as the latest timestamp + if (END_DATE) { + const endDate = new Date(END_DATE); + endDate.setHours(23, 59, 59, 999); + params.latest = Math.floor(endDate.getTime() / 1000); + } + + for await (const page of getWebClient().paginate("conversations.history", params)) { if (isConversation(page)) { const pageLength = page.messages?.length || 0; const fetched = `Fetched ${pageLength} messages`; @@ -62,9 +102,10 @@ export async function downloadMessages( i + 1 }/${channelCount} ${name}: ${fetched} ${total})`; - result.new = result.new + (page.messages || []).length; + const filteredMessages = (page.messages || []).filter(isMessageInDateRange); + result.new = result.new + filteredMessages.length; - result.messages.unshift(...(page.messages || [])); + result.messages.unshift(...filteredMessages); } } @@ -103,8 +144,8 @@ export async function downloadReplies( oldest, }); - // First message is the parent - return (result.messages || []).slice(1); + // First message is the parent, filter replies by date range + return (result.messages || []).slice(1).filter(isMessageInDateRange); } export async function downloadExtras(