import_einsaetze_csv.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. #!/usr/bin/env python3
  2. """
  3. Import Einsätze from FF-Agent CSV export and write JSON in the same format
  4. as fetch_einsaetze.py (for use by the ticker backend).
  5. CSV format: semicolon-delimited, quoted fields, header row.
  6. """
  7. import argparse
  8. import csv
  9. import json
  10. import logging
  11. import sys
  12. from datetime import datetime, timezone
  13. from pathlib import Path
  14. from typing import List
  15. logging.basicConfig(
  16. level=logging.INFO,
  17. format="%(asctime)s %(levelname)s %(message)s",
  18. datefmt="%Y-%m-%d %H:%M:%S",
  19. )
  20. logger = logging.getLogger(__name__)
  21. ENCODINGS = ("utf-8-sig", "utf-8", "cp1252")
  22. def detect_encoding(path: Path) -> str:
  23. """Try to read first bytes and return encoding that works for the header."""
  24. raw = path.read_bytes()
  25. for enc in ENCODINGS:
  26. try:
  27. raw.decode(enc)
  28. return enc
  29. except UnicodeDecodeError:
  30. continue
  31. return "utf-8"
  32. def parse_csv(path: str) -> List[dict]:
  33. """
  34. Parse semicolon-delimited CSV with quoted fields.
  35. Returns list of row dicts; keys are header names (quotes stripped).
  36. """
  37. p = Path(path)
  38. if not p.is_file():
  39. raise FileNotFoundError(f"CSV file not found: {path}")
  40. encoding = detect_encoding(p)
  41. logger.info("Using encoding: %s", encoding)
  42. with open(p, "r", encoding=encoding, newline="") as f:
  43. reader = csv.reader(f, delimiter=";", quotechar='"')
  44. rows = list(reader)
  45. if not rows:
  46. raise ValueError("CSV file is empty")
  47. header = [cell.strip('"') for cell in rows[0]]
  48. einsaetze = []
  49. for row in rows[1:]:
  50. # Pad row to header length so we don't lose columns
  51. while len(row) < len(header):
  52. row.append("")
  53. record = {}
  54. for i, key in enumerate(header):
  55. value = row[i].strip('"') if i < len(row) else ""
  56. record[key] = value
  57. # Skip completely empty rows
  58. if any(v for v in record.values()):
  59. einsaetze.append(record)
  60. return einsaetze
  61. def write_json(einsaetze: List[dict], output_path: str) -> None:
  62. """Write einsaetze to JSON with updated timestamp. Creates parent dir if needed."""
  63. out = Path(output_path)
  64. out.parent.mkdir(parents=True, exist_ok=True)
  65. payload = {
  66. "einsaetze": einsaetze,
  67. "updated": datetime.now(timezone.utc).isoformat(),
  68. }
  69. with open(out, "w", encoding="utf-8") as f:
  70. json.dump(payload, f, ensure_ascii=False, indent=2)
  71. logger.info("Wrote %d Einsätze to %s", len(einsaetze), output_path)
  72. def main() -> int:
  73. parser = argparse.ArgumentParser(
  74. description="Import FF-Agent CSV export and output JSON for ticker backend."
  75. )
  76. parser.add_argument(
  77. "input",
  78. nargs="?",
  79. default=None,
  80. help="Input CSV file path",
  81. )
  82. parser.add_argument(
  83. "--output",
  84. "-o",
  85. default="data/einsaetze.json",
  86. help="Output JSON path (default: data/einsaetze.json)",
  87. )
  88. args = parser.parse_args()
  89. if not args.input:
  90. parser.error("Input CSV path required (or pass as positional argument)")
  91. return 1
  92. try:
  93. einsaetze = parse_csv(args.input)
  94. write_json(einsaetze, args.output)
  95. return 0
  96. except (FileNotFoundError, ValueError) as e:
  97. logger.error("%s", e)
  98. return 1
  99. if __name__ == "__main__":
  100. sys.exit(main())