MedBrief/config.example.yaml at main · Siddhant-K-code/MedBrief · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
# MediBrief Configuration

# API Keys and Authentication
api_keys:
  # PubMed/NCBI E-utilities API key
  # Get from: https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/
  pubmed: "YOUR_PUBMED_API_KEY"

  # Google Cloud Project ID
  gcp_project_id: "YOUR_GCP_PROJECT_ID"

  # Service account key file path (for local development)
  # For cloud deployment, use service account roles instead
  gcp_service_account_key: "path/to/service-account-key.json"

  # Runway API key
  # Get from: https://app.runwayml.com/settings/api-keys
  runway: "YOUR_RUNWAY_API_KEY"

# PubMed API Settings
pubmed:
  # Maximum number of papers to fetch per request
  max_results_per_query: 10

  # Default search fields
  default_fields:
    - "title"
    - "abstract"
    - "authors"
    - "journal"
    - "publication_date"
    - "doi"

  # Medical specialties to search for
  specialties:
    - "cardiology"
    - "neurology"
    - "oncology"
    - "pediatrics"
    - "psychiatry"
    - "infectious disease"
    - "endocrinology"
    - "gastroenterology"
    - "rheumatology"
    - "pulmonology"

  # Time period for paper search (in days)
  time_period_days: 7

  # Rate limiting (requests per second)
  rate_limit: 3

# PDF Processing
pdf_processing:
  # Temporary storage for downloaded PDFs
  temp_storage_path: "temp/pdfs"

  # OCR settings for image text extraction
  ocr:
    language: "eng"
    config: "--oem 3 --psm 6"

  # Figure extraction settings
  figure_extraction:
    min_figure_size: 100  # Minimum pixel size to consider as figure
    caption_keywords:
      - "Figure"
      - "Fig."
      - "Table"
      - "Chart"
      - "Graph"
      - "Image"

# AI Processing
ai_processing:
  # Vertex AI model settings
  vertex_ai:
    location: "us-central1"
    model_name: "gemini-1.5-pro"

  # Summarization settings
  summarization:
    max_length: 800
    min_length: 500
    temperature: 0.2
    top_p: 0.95

  # Key takeaways settings
  key_takeaways:
    count: 5
    max_length_each: 100

# Image Analysis
image_analysis:
  # Vision AI settings
  vision_ai:
    max_results: 10
    feature_types:
      - "TEXT_DETECTION"
      - "IMAGE_PROPERTIES"
      - "OBJECT_LOCALIZATION"

  # Figure selection settings
  figure_selection:
    max_figures: 5
    min_quality_score: 0.7

# Text-to-Speech
tts:
  # Voice settings
  voice:
    language_code: "en-US"
    name: "en-US-Neural2-F"  # Professional female voice
    speaking_rate: 0.9
    pitch: 0.0

  # Audio settings
  audio:
    encoding: "MP3"
    sample_rate_hertz: 24000

  # Chunk settings for API limits
  max_chunk_length: 5000  # Characters per API request

# Video Generation
video_generation:
  # Output settings
  output:
    resolution: "1080p"  # 1920x1080
    fps: 30
    format: "mp4"

  # Style settings
  style:
    background_color: "#FFFFFF"
    text_color: "#333333"
    highlight_color: "#4285F4"  # Google Blue
    font: "Arial"
    title_font_size: 48
    body_font_size: 32

  # Timing settings
  timing:
    intro_duration: 5  # seconds
    slide_duration: 10  # seconds
    transition_duration: 0.5  # seconds
    outro_duration: 5  # seconds

  # Video generation method
  # Options: "moviepy" or "runway"
  method: "moviepy"

  # Runway Gen-2 API settings
  runway:
    # API settings
    max_retries: 5
    retry_delay: 10  # seconds
    timeout: 120  # seconds

    # Scene generation settings
    scene_duration: 4  # seconds per scene
    max_scenes: 10  # maximum number of scenes to generate

    # Style settings
    style_preset: "medical"  # style preset for scene generation

    # Prompt settings
    prompt_templates:
      title: "Professional medical video title sequence showing '{title}'. Clean, modern design with blue and white color scheme."
      abstract: "Visual representation of medical research about {title}. Professional laboratory setting with researchers analyzing data."
      methods: "Medical laboratory with scientists conducting experiments. Clinical trial visualization with modern equipment and data analysis."
      results: "Data visualization of medical research results. Charts, graphs, and statistical analysis in a professional setting."
      conclusion: "Medical professionals discussing research findings in a conference room. Clinical implementation of research results."

    # Post-processing settings
    add_subtitles: true
    subtitle_font: "Arial"
    subtitle_size: 24
    subtitle_color: "#FFFFFF"
    subtitle_background: "#000000AA"  # Semi-transparent black

# Cloud Storage
cloud_storage:
  # Bucket names
  buckets:
    videos: "medbrief-videos"
    pdfs: "medbrief-pdfs"
    images: "medbrief-images"
    audio: "medbrief-audio"

  # Storage classes
  storage_class: "STANDARD"

  # Retention policy (days)
  retention_days: 30

# YouTube Upload
youtube:
  # Channel settings
  channel_id: "YOUR_YOUTUBE_CHANNEL_ID"

  # Video settings
  video:
    category_id: "27"  # Education
    privacy_status: "public"  # public, unlisted, private
    tags:
      - "medical research"
      - "healthcare"
      - "medicine"
      - "research summary"

  # Description template
  description_template: |
    This video summarizes the research paper: {title}

    Authors: {authors}
    Journal: {journal}
    Publication Date: {publication_date}
    DOI: {doi}

    Key Takeaways:
    {key_takeaways}

    This summary was automatically generated by MediBrief.
    For more medical research summaries, subscribe to our channel.

# Automation Pipeline
pipeline:
  # Schedule settings (cron format)
  schedule: "0 0 * * *"  # Daily at midnight

  # Parallel processing
  max_concurrent_papers: 5

  # Error handling
  max_retries: 3
  retry_delay_seconds: 60

  # Notification settings
  notifications:
    email: "your.email@example.com"
    error_reporting: true
    success_reporting: true

# Logging
logging:
  level: "INFO"  # DEBUG, INFO, WARNING, ERROR, CRITICAL
  format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
  file: "logs/medbrief.log"