Browse Source

初步完成,待upload_at加索引、s3同步mysql的deleteBeforeDate还没有限制3-5天

master
review512jwy@163.com 2 weeks ago
parent
commit
fed6ee455e
  1. 28
      src/main/java/com/dashboard/aws/lambda/handler/S3BatchToMySQLHandler.java
  2. 8
      src/main/java/com/dashboard/aws/lambda/service/MySQLService.java
  3. 8
      src/test/java/com/dashboard/aws/lambda/S3CsvReorganizer.java

28
src/main/java/com/dashboard/aws/lambda/handler/S3BatchToMySQLHandler.java

@ -126,11 +126,25 @@ public class S3BatchToMySQLHandler implements RequestHandler<Map<String, Object>
return; return;
} }
// 3. 按文件名排序(确保 00000001.csv, 00000002.csv 顺序处理) // 3. 过滤掉 marker 文件
Collections.sort(keys); List<String> csvKeys = new ArrayList<>();
logger.info("[{}][{}] found {} files to process", DB_SCHEMA, table, keys.size()); for (String key : keys) {
String fileName = extractFileName(key);
if (!fileName.startsWith(".")) { // 跳过隐藏文件(如 .sync_to_db_processed)
csvKeys.add(key);
}
}
if (csvKeys.isEmpty()) {
logger.warn("[{}][{}] no CSV files found for prefix: {}", DB_SCHEMA, table, prefix);
return;
}
// 4. 按文件名排序(确保 00000001.csv, 00000002.csv 顺序处理)
Collections.sort(csvKeys);
logger.info("[{}][{}] found {} CSV files to process", DB_SCHEMA, table, csvKeys.size());
// 4. 删除旧数据(只在第一次处理时执行) // 5. 删除旧数据(只在第一次处理时执行)
if (processedFiles.isEmpty()) { if (processedFiles.isEmpty()) {
int deleted = mysqlService.deleteBeforeDate(DB_SCHEMA, table, fileDate); int deleted = mysqlService.deleteBeforeDate(DB_SCHEMA, table, fileDate);
logger.info("[{}][{}] deleted {} records before {}", DB_SCHEMA, table, deleted, fileDate); logger.info("[{}][{}] deleted {} records before {}", DB_SCHEMA, table, deleted, fileDate);
@ -142,8 +156,8 @@ public class S3BatchToMySQLHandler implements RequestHandler<Map<String, Object>
Map<String, Integer> progressThisTime = new HashMap<>(processedFiles); Map<String, Integer> progressThisTime = new HashMap<>(processedFiles);
boolean allProcessed = true; boolean allProcessed = true;
// 5. 依次处理每个CSV文件 // 6. 依次处理每个CSV文件
for (String s3Key : keys) { for (String s3Key : csvKeys) {
// 处理文件前检查超时 // 处理文件前检查超时
long elapsed = System.currentTimeMillis() - overallStartTime; long elapsed = System.currentTimeMillis() - overallStartTime;
if (elapsed >= timeoutThresholdMs) { if (elapsed >= timeoutThresholdMs) {
@ -171,7 +185,7 @@ public class S3BatchToMySQLHandler implements RequestHandler<Map<String, Object>
logger.info("[{}][{}] file {} progress: {}/{} rows", DB_SCHEMA, table, fileName, newProcessedRows, newProcessedRows); logger.info("[{}][{}] file {} progress: {}/{} rows", DB_SCHEMA, table, fileName, newProcessedRows, newProcessedRows);
} }
// 6. 记录处理结果 // 7. 记录处理结果
if (allProcessed) { if (allProcessed) {
logger.info("[{}][{}] date processing completed", DB_SCHEMA, table); logger.info("[{}][{}] date processing completed", DB_SCHEMA, table);
} else { } else {

8
src/main/java/com/dashboard/aws/lambda/service/MySQLService.java

@ -15,7 +15,7 @@ public class MySQLService {
private static final String MYSQL_URL = System.getenv("DB_URL"); // 统一实例URL private static final String MYSQL_URL = System.getenv("DB_URL"); // 统一实例URL
private static final String DB_USER = System.getenv("DB_USER"); private static final String DB_USER = System.getenv("DB_USER");
private static final String DB_PASSWORD = System.getenv("DB_PASSWORD"); private static final String DB_PASSWORD = System.getenv("DB_PASSWORD");
private static final String batchSizeEnv = System.getenv("BATCH_SIZE"); private static final String S3_2_DB_BATCH_SIZE = System.getenv("S3_2_DB_BATCH_SIZE");
/** /**
@ -90,11 +90,11 @@ public class MySQLService {
// 从环境变量读取批量大小,默认5000 // 从环境变量读取批量大小,默认5000
int batchSize = 5000; int batchSize = 5000;
if (batchSizeEnv != null && !batchSizeEnv.trim().isEmpty()) { if (S3_2_DB_BATCH_SIZE != null && !S3_2_DB_BATCH_SIZE.trim().isEmpty()) {
try { try {
batchSize = Integer.parseInt(batchSizeEnv.trim()); batchSize = Integer.parseInt(S3_2_DB_BATCH_SIZE.trim());
} catch (NumberFormatException e) { } catch (NumberFormatException e) {
logger.warn("Invalid BATCH_SIZE: {}, using default 5000", batchSizeEnv); logger.warn("Invalid BATCH_SIZE: {}, using default 5000", S3_2_DB_BATCH_SIZE);
} }
} }

8
src/test/java/com/dashboard/aws/lambda/S3CsvReorganizer.java

@ -42,8 +42,8 @@ public class S3CsvReorganizer {
.credentialsProvider( .credentialsProvider(
StaticCredentialsProvider.create( StaticCredentialsProvider.create(
AwsBasicCredentials.create( AwsBasicCredentials.create(
"AKA", "AKI",
"IFwUPLd") "IFwUPwW")
) )
) )
.build(); .build();
@ -79,7 +79,7 @@ public class S3CsvReorganizer {
String basePath = matcher.group(1); // dashboard-for-backup/dashboard_record_measure/ String basePath = matcher.group(1); // dashboard-for-backup/dashboard_record_measure/
String date = matcher.group(2); // 2025-12-25 String date = matcher.group(2); // 2025-12-25
String newKey = basePath + date + "/00000001.csv"; String newKey = basePath + date + "/001.csv";
// 如果目标文件和源文件相同则跳过 // 如果目标文件和源文件相同则跳过
if (oldKey.equals(newKey)) { if (oldKey.equals(newKey)) {
@ -122,7 +122,7 @@ public class S3CsvReorganizer {
S3CsvReorganizer reorganizer = new S3CsvReorganizer(); S3CsvReorganizer reorganizer = new S3CsvReorganizer();
reorganizer.reorganize( reorganizer.reorganize(
"dashboard_record_accumulate/" "dashboard_record_measure/"
); );
} }
} }

Loading…
Cancel
Save